From 3f5ec7003075eb78e7e2a7b179cbcd3a6c9e294d Mon Sep 17 00:00:00 2001 From: Alejandro Soto Date: Tue, 21 Nov 2023 22:21:37 -0600 Subject: demo: implement float16 support --- demo/float16.c | 331 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ demo/float16.h | 40 +++++++ demo/main.c | 8 ++ demo/sin_cos.c | 263 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 642 insertions(+) create mode 100644 demo/float16.c create mode 100644 demo/float16.h create mode 100644 demo/sin_cos.c (limited to 'demo') diff --git a/demo/float16.c b/demo/float16.c new file mode 100644 index 0000000..13e123a --- /dev/null +++ b/demo/float16.c @@ -0,0 +1,331 @@ +// https://github.com/artyom-beilis/float16/blob/master/c_src/float16.c + +#include "float16.h" +#include +#define SIGN_MASK 0x8000 +#define EXP_MASK 0x7C00 +#define NAN_VALUE 0x7FFF +#define IS_ZERO(x) (((x) & 0x7FFF) == 0) +#define IS_INVALID(x) (((x) & EXP_MASK) == EXP_MASK) +#define IS_NAN(x) (((x) & 0x7FFF) > 0x7C00) +#define IS_INF(x) ( ((x) & 0x7FFF) == 0x7C00) +#define MANTISSA(x) (((x) & 1023) | (((x) & 0x7C00) == 0 ? 0 : 1024)) +#define EXPONENT(x) (((x) & 0x7C00) >> 10) +#define SIGNED_INF_VALUE(x) ((x & SIGN_MASK) | 0x7C00) + +short f16_sub(short ain,short bin) +{ + unsigned short a=ain; + unsigned short b=bin; + if(((a ^ b) & 0x8000) != 0) + return f16_add(a,b ^ 0x8000); + unsigned short sign = a & 0x8000; + a = a << 1; + b = b << 1; + if(a < b) { + unsigned short x=a; + a=b; + b=x; + sign ^= 0x8000; + } + unsigned short ax = a & 0xF800; + unsigned short bx = b & 0xF800; + if(a >=0xf800 || b>=0xf800) { + if(a > 0xF800 || b > 0xF800 || a==b) + return 0x7FFF; + unsigned short res = sign | 0x7C00; + if(a == 0xf800) + return res; + else + return res ^ 0x8000; + } + int exp_diff = ax - bx; + unsigned short exp_part = ax; + if(exp_diff != 0) { + int shift = exp_diff >> 11; + if(bx != 0) + b = ((b & 2047) | 2048) >> shift; + else + b >>= (shift - 1); + } + else { + if(bx == 0) { + unsigned short res = (a-b) >> 1; + if(res == 0) + return res; + return res | sign; + } + else { + b=(b & 2047) | 2048; + } + } + unsigned short r=a - b; + if((r & 0xF800) == exp_part) { + return (r>>1) | sign; + } + unsigned short am = (a & 2047) | 2048; + unsigned short new_m = am - b; + if(new_m == 0) + return 0; + while(exp_part !=0 && !(new_m & (2048))) { + exp_part-=0x800; + if(exp_part!=0) + new_m<<=1; + } + return (((new_m & 2047) | exp_part) >> 1) | sign; +} + +short f16_add(short a,short b) +{ + if (((a ^ b) & 0x8000) != 0) + return f16_sub(a,b ^ 0x8000); + short sign = a & 0x8000; + a &= 0x7FFF; + b &= 0x7FFF; + if(a= 0x7C00 || b>=0x7C00) { + if(a>0x7C00 || b>0x7C00) + return 0x7FFF; + return 0x7C00 | sign; + } + short ax = (a & 0x7C00); + short bx = (b & 0x7C00); + short exp_diff = ax - bx; + short exp_part = ax; + if(exp_diff != 0) { + int shift = exp_diff >> 10; + if(bx != 0) + b = ((b & 1023) | 1024) >> shift; + else + b >>= (shift - 1); + } + else { + if(bx == 0) { + return (a + b) | sign; + } + else { + b=(b & 1023) | 1024; + } + } + short r=a+b; + if ((r & 0x7C00) != exp_part) { + unsigned short am = (a & 1023) | 1024; + unsigned short new_m = (am + b) >> 1; + r =( exp_part + 0x400) | (1023 & new_m); + } + if((unsigned short)r >= 0x7C00u) { + return sign | 0x7C00; + } + return r | sign; +} + + +short f16_mul(short a,short b) +{ + int sign = (a ^ b) & SIGN_MASK; + + if(IS_INVALID(a) || IS_INVALID(b)) { + if(IS_NAN(a) || IS_NAN(b) || IS_ZERO(a) || IS_ZERO(b)) + return NAN_VALUE; + return sign | 0x7C00; + } + + if(IS_ZERO(a) || IS_ZERO(b)) + return 0; + unsigned short m1 = MANTISSA(a); + unsigned short m2 = MANTISSA(b); + + uint32_t v=m1; + v*=m2; + int ax = EXPONENT(a); + int bx = EXPONENT(b); + ax += (ax==0); + bx += (bx==0); + int new_exp = ax + bx - 15; + + if(v & ((uint32_t)1<<21)) { + v >>= 11; + new_exp++; + } + else if(v & ((uint32_t)1<<20)) { + v >>= 10; + } + else { // denormal + new_exp -= 10; + while(v >= 2048) { + v>>=1; + new_exp++; + } + } + if(new_exp <= 0) { + v>>=(-new_exp + 1); + new_exp = 0; + } + else if(new_exp >= 31) { + return SIGNED_INF_VALUE(sign); + } + return (sign) | (new_exp << 10) | (v & 1023); +} + +short f16_div(short a,short b) +{ + short sign = (a ^ b) & SIGN_MASK; + if(IS_NAN(a) || IS_NAN(b) || (IS_INVALID(a) && IS_INVALID(b)) || (IS_ZERO(a) && IS_ZERO(b))) + return 0x7FFF; + if(IS_INVALID(a) || IS_ZERO(b)) + return sign | 0x7C00; + if(IS_INVALID(b)) + return 0; + if(IS_ZERO(a)) + return 0; + + unsigned short m1 = MANTISSA(a); + unsigned short m2 = MANTISSA(b); + uint32_t m1_shifted = m1; + m1_shifted <<= 10; + uint32_t v= m1_shifted / m2; + unsigned short rem = m1_shifted % m2; + + int ax = EXPONENT(a); + int bx = EXPONENT(b); + ax += (ax==0); + bx += (bx==0); + int new_exp = ax - bx + 15 ; + + if(v == 0 && rem==0) + return 0; + + while(v < 1024 && new_exp > 0) { + v<<=1; + rem<<=1; + if(rem >= m2) { + v++; + rem -= m2; + } + new_exp--; + } + while(v >= 2048) { + v>>=1; + new_exp++; + } + + if(new_exp <= 0) { + v>>=(-new_exp + 1); + new_exp = 0; + } + else if(new_exp >= 31) { + return SIGNED_INF_VALUE(sign); + } + return sign | (v & 1023) | (new_exp << 10); +} + +short f16_neg(short v) +{ + return SIGN_MASK ^ v; +} +short f16_from_int(int32_t sv) +{ + uint32_t v; + int sig = 0; + if(sv < 0) { + v=-sv; + sig=1; + } + else + v=sv; + if(v==0) + return 0; + int e=25; + while(v >= 2048) { + v>>=1; + e++; + } + while(v<1024) { + v<<=1; + e--; + } + if(e>=31) + return SIGNED_INF_VALUE(sig << 15); + return (sig << 15) | (e << 10) | (v & 1023); +} +int32_t f16_int(short a) +{ + unsigned short value = MANTISSA(a); + short shift = EXPONENT(a) - 25; + if(shift > 0) + value <<= shift; + else if(shift < 0) + value >>= -shift; + if(a & SIGN_MASK) + return -(int32_t)(value); + return value; +} + +int f16_gte(short a,short b) +{ + if(IS_ZERO(a) && IS_ZERO(b)) + return 1; + if(IS_NAN(a) || IS_NAN(b)) + return 0; + if((a & SIGN_MASK) == 0) { + if((b & SIGN_MASK) == SIGN_MASK) + return 1; + return a >= b; + } + else { + if((b & SIGN_MASK) == 0) + return 0; + return (a & 0x7FFF) <= (b & 0x7FFF); + } +} + +int f16_gt(short a,short b) +{ + if(IS_NAN(a) || IS_NAN(b)) + return 0; + if(IS_ZERO(a) && IS_ZERO(b)) + return 0; + if((a & SIGN_MASK) == 0) { + if((b & SIGN_MASK) == SIGN_MASK) + return 1; + return a > b; + } + else { + if((b & SIGN_MASK) == 0) + return 0; + return (a & 0x7FFF) < (b & 0x7FFF); + } + +} +int f16_eq(short a,short b) +{ + if(IS_NAN(a) || IS_NAN(b)) + return 0; + if(IS_ZERO(a) && IS_ZERO(b)) + return 1; + return a==b; +} + +int f16_lte(short a,short b) +{ + if(IS_NAN(a) || IS_NAN(b)) + return 0; + return f16_gte(b,a); +} + +int f16_lt(short a,short b) +{ + if(IS_NAN(a) || IS_NAN(b)) + return 0; + return f16_gt(b,a); +} +int f16_neq(short a,short b) +{ + return !f16_eq(a,b); +} + + diff --git a/demo/float16.h b/demo/float16.h new file mode 100644 index 0000000..17cb1d0 --- /dev/null +++ b/demo/float16.h @@ -0,0 +1,40 @@ +// https://github.com/artyom-beilis/float16/tree/master/include + +#ifndef FLOAT16_H +#define FLOAT16_H + +#include + +#define f16_tenth 11878 +#define f16_fifth 12902 +#define f16_third 13653 +#define f16_half 14336 +#define f16_one 15360 +#define f16_two 16384 +#define f16_three 16896 +#define f16_five 17664 +#define f16_ten 18688 +#define f16_pi 16968 +#define f16_half_pi 15944 + +short f16_add(short a,short b); +short f16_sub(short a,short b); +short f16_mul(short a,short b); +short f16_div(short a,short b); +short f16_neg(short a); +short f16_from_int(int32_t v); +int32_t f16_int(short v); + +int f16_gte(short a,short b); +int f16_gt(short a,short b); +int f16_eq(short a,short b); +int f16_lte(short a,short b); +int f16_lt(short a,short b); +int f16_neq(short a,short b); + +extern const struct f16_sin_cos +{ + short cos, sin; +} f16_sin_cos[256]; + +#endif diff --git a/demo/main.c b/demo/main.c index 858b50e..bc95579 100644 --- a/demo/main.c +++ b/demo/main.c @@ -211,3 +211,11 @@ void reset(void) else ap_main(); } + +// Requerido por libgcc +int raise(int sig) +{ + print("raise(%d) called, aborting", sig); + halt_cpu(this_cpu->num); + while (1); +} diff --git a/demo/sin_cos.c b/demo/sin_cos.c new file mode 100644 index 0000000..d3adb00 --- /dev/null +++ b/demo/sin_cos.c @@ -0,0 +1,263 @@ +#include "float16.h" + +/* Índice es un byte n, donde el ángulo es n pi / 128. Esto cubre + * desde 0 hasta justo antes de 2pi. El tipo de dato es float16. + */ +const struct f16_sin_cos f16_sin_cos[256] = { + { 0x3c00, 0x0000 }, + { 0x3bff, 0x2648 }, + { 0x3bfe, 0x2a48 }, + { 0x3bfa, 0x2cb5 }, + { 0x3bf6, 0x2e46 }, + { 0x3bf1, 0x2fd6 }, + { 0x3bea, 0x30b2 }, + { 0x3be2, 0x3179 }, + { 0x3bd9, 0x323e }, + { 0x3bce, 0x3303 }, + { 0x3bc3, 0x33c6 }, + { 0x3bb6, 0x3444 }, + { 0x3ba8, 0x34a5 }, + { 0x3b99, 0x3505 }, + { 0x3b88, 0x3564 }, + { 0x3b77, 0x35c2 }, + { 0x3b64, 0x361f }, + { 0x3b50, 0x367c }, + { 0x3b3b, 0x36d7 }, + { 0x3b25, 0x3732 }, + { 0x3b0e, 0x378b }, + { 0x3af6, 0x37e3 }, + { 0x3add, 0x381d }, + { 0x3ac2, 0x3848 }, + { 0x3aa7, 0x3872 }, + { 0x3a8a, 0x389b }, + { 0x3a6d, 0x38c4 }, + { 0x3a4f, 0x38ec }, + { 0x3a2f, 0x3913 }, + { 0x3a0f, 0x393a }, + { 0x39ed, 0x395f }, + { 0x39cb, 0x3984 }, + { 0x39a8, 0x39a8 }, + { 0x3984, 0x39cb }, + { 0x395f, 0x39ed }, + { 0x393a, 0x3a0f }, + { 0x3913, 0x3a2f }, + { 0x38ec, 0x3a4f }, + { 0x38c4, 0x3a6d }, + { 0x389b, 0x3a8a }, + { 0x3872, 0x3aa7 }, + { 0x3848, 0x3ac2 }, + { 0x381d, 0x3add }, + { 0x37e3, 0x3af6 }, + { 0x378b, 0x3b0e }, + { 0x3732, 0x3b25 }, + { 0x36d7, 0x3b3b }, + { 0x367c, 0x3b50 }, + { 0x361f, 0x3b64 }, + { 0x35c2, 0x3b77 }, + { 0x3564, 0x3b88 }, + { 0x3505, 0x3b99 }, + { 0x34a5, 0x3ba8 }, + { 0x3444, 0x3bb6 }, + { 0x33c6, 0x3bc3 }, + { 0x3303, 0x3bce }, + { 0x323e, 0x3bd9 }, + { 0x3179, 0x3be2 }, + { 0x30b2, 0x3bea }, + { 0x2fd6, 0x3bf1 }, + { 0x2e46, 0x3bf6 }, + { 0x2cb5, 0x3bfa }, + { 0x2a48, 0x3bfe }, + { 0x2648, 0x3bff }, + { 0x0000, 0x3c00 }, + { 0xa648, 0x3bff }, + { 0xaa48, 0x3bfe }, + { 0xacb5, 0x3bfa }, + { 0xae46, 0x3bf6 }, + { 0xafd6, 0x3bf1 }, + { 0xb0b2, 0x3bea }, + { 0xb179, 0x3be2 }, + { 0xb23e, 0x3bd9 }, + { 0xb303, 0x3bce }, + { 0xb3c6, 0x3bc3 }, + { 0xb444, 0x3bb6 }, + { 0xb4a5, 0x3ba8 }, + { 0xb505, 0x3b99 }, + { 0xb564, 0x3b88 }, + { 0xb5c2, 0x3b77 }, + { 0xb61f, 0x3b64 }, + { 0xb67c, 0x3b50 }, + { 0xb6d7, 0x3b3b }, + { 0xb732, 0x3b25 }, + { 0xb78b, 0x3b0e }, + { 0xb7e3, 0x3af6 }, + { 0xb81d, 0x3add }, + { 0xb848, 0x3ac2 }, + { 0xb872, 0x3aa7 }, + { 0xb89b, 0x3a8a }, + { 0xb8c4, 0x3a6d }, + { 0xb8ec, 0x3a4f }, + { 0xb913, 0x3a2f }, + { 0xb93a, 0x3a0f }, + { 0xb95f, 0x39ed }, + { 0xb984, 0x39cb }, + { 0xb9a8, 0x39a8 }, + { 0xb9cb, 0x3984 }, + { 0xb9ed, 0x395f }, + { 0xba0f, 0x393a }, + { 0xba2f, 0x3913 }, + { 0xba4f, 0x38ec }, + { 0xba6d, 0x38c4 }, + { 0xba8a, 0x389b }, + { 0xbaa7, 0x3872 }, + { 0xbac2, 0x3848 }, + { 0xbadd, 0x381d }, + { 0xbaf6, 0x37e3 }, + { 0xbb0e, 0x378b }, + { 0xbb25, 0x3732 }, + { 0xbb3b, 0x36d7 }, + { 0xbb50, 0x367c }, + { 0xbb64, 0x361f }, + { 0xbb77, 0x35c2 }, + { 0xbb88, 0x3564 }, + { 0xbb99, 0x3505 }, + { 0xbba8, 0x34a5 }, + { 0xbbb6, 0x3444 }, + { 0xbbc3, 0x33c6 }, + { 0xbbce, 0x3303 }, + { 0xbbd9, 0x323e }, + { 0xbbe2, 0x3179 }, + { 0xbbea, 0x30b2 }, + { 0xbbf1, 0x2fd6 }, + { 0xbbf6, 0x2e46 }, + { 0xbbfa, 0x2cb5 }, + { 0xbbfe, 0x2a48 }, + { 0xbbff, 0x2648 }, + { 0xbc00, 0x0000 }, + { 0xbbff, 0xa648 }, + { 0xbbfe, 0xaa48 }, + { 0xbbfa, 0xacb5 }, + { 0xbbf6, 0xae46 }, + { 0xbbf1, 0xafd6 }, + { 0xbbea, 0xb0b2 }, + { 0xbbe2, 0xb179 }, + { 0xbbd9, 0xb23e }, + { 0xbbce, 0xb303 }, + { 0xbbc3, 0xb3c6 }, + { 0xbbb6, 0xb444 }, + { 0xbba8, 0xb4a5 }, + { 0xbb99, 0xb505 }, + { 0xbb88, 0xb564 }, + { 0xbb77, 0xb5c2 }, + { 0xbb64, 0xb61f }, + { 0xbb50, 0xb67c }, + { 0xbb3b, 0xb6d7 }, + { 0xbb25, 0xb732 }, + { 0xbb0e, 0xb78b }, + { 0xbaf6, 0xb7e3 }, + { 0xbadd, 0xb81d }, + { 0xbac2, 0xb848 }, + { 0xbaa7, 0xb872 }, + { 0xba8a, 0xb89b }, + { 0xba6d, 0xb8c4 }, + { 0xba4f, 0xb8ec }, + { 0xba2f, 0xb913 }, + { 0xba0f, 0xb93a }, + { 0xb9ed, 0xb95f }, + { 0xb9cb, 0xb984 }, + { 0xb9a8, 0xb9a8 }, + { 0xb984, 0xb9cb }, + { 0xb95f, 0xb9ed }, + { 0xb93a, 0xba0f }, + { 0xb913, 0xba2f }, + { 0xb8ec, 0xba4f }, + { 0xb8c4, 0xba6d }, + { 0xb89b, 0xba8a }, + { 0xb872, 0xbaa7 }, + { 0xb848, 0xbac2 }, + { 0xb81d, 0xbadd }, + { 0xb7e3, 0xbaf6 }, + { 0xb78b, 0xbb0e }, + { 0xb732, 0xbb25 }, + { 0xb6d7, 0xbb3b }, + { 0xb67c, 0xbb50 }, + { 0xb61f, 0xbb64 }, + { 0xb5c2, 0xbb77 }, + { 0xb564, 0xbb88 }, + { 0xb505, 0xbb99 }, + { 0xb4a5, 0xbba8 }, + { 0xb444, 0xbbb6 }, + { 0xb3c6, 0xbbc3 }, + { 0xb303, 0xbbce }, + { 0xb23e, 0xbbd9 }, + { 0xb179, 0xbbe2 }, + { 0xb0b2, 0xbbea }, + { 0xafd6, 0xbbf1 }, + { 0xae46, 0xbbf6 }, + { 0xacb5, 0xbbfa }, + { 0xaa48, 0xbbfe }, + { 0xa648, 0xbbff }, + { 0x8000, 0xbc00 }, + { 0x2648, 0xbbff }, + { 0x2a48, 0xbbfe }, + { 0x2cb5, 0xbbfa }, + { 0x2e46, 0xbbf6 }, + { 0x2fd6, 0xbbf1 }, + { 0x30b2, 0xbbea }, + { 0x3179, 0xbbe2 }, + { 0x323e, 0xbbd9 }, + { 0x3303, 0xbbce }, + { 0x33c6, 0xbbc3 }, + { 0x3444, 0xbbb6 }, + { 0x34a5, 0xbba8 }, + { 0x3505, 0xbb99 }, + { 0x3564, 0xbb88 }, + { 0x35c2, 0xbb77 }, + { 0x361f, 0xbb64 }, + { 0x367c, 0xbb50 }, + { 0x36d7, 0xbb3b }, + { 0x3732, 0xbb25 }, + { 0x378b, 0xbb0e }, + { 0x37e3, 0xbaf6 }, + { 0x381d, 0xbadd }, + { 0x3848, 0xbac2 }, + { 0x3872, 0xbaa7 }, + { 0x389b, 0xba8a }, + { 0x38c4, 0xba6d }, + { 0x38ec, 0xba4f }, + { 0x3913, 0xba2f }, + { 0x393a, 0xba0f }, + { 0x395f, 0xb9ed }, + { 0x3984, 0xb9cb }, + { 0x39a8, 0xb9a8 }, + { 0x39cb, 0xb984 }, + { 0x39ed, 0xb95f }, + { 0x3a0f, 0xb93a }, + { 0x3a2f, 0xb913 }, + { 0x3a4f, 0xb8ec }, + { 0x3a6d, 0xb8c4 }, + { 0x3a8a, 0xb89b }, + { 0x3aa7, 0xb872 }, + { 0x3ac2, 0xb848 }, + { 0x3add, 0xb81d }, + { 0x3af6, 0xb7e3 }, + { 0x3b0e, 0xb78b }, + { 0x3b25, 0xb732 }, + { 0x3b3b, 0xb6d7 }, + { 0x3b50, 0xb67c }, + { 0x3b64, 0xb61f }, + { 0x3b77, 0xb5c2 }, + { 0x3b88, 0xb564 }, + { 0x3b99, 0xb505 }, + { 0x3ba8, 0xb4a5 }, + { 0x3bb6, 0xb444 }, + { 0x3bc3, 0xb3c6 }, + { 0x3bce, 0xb303 }, + { 0x3bd9, 0xb23e }, + { 0x3be2, 0xb179 }, + { 0x3bea, 0xb0b2 }, + { 0x3bf1, 0xafd6 }, + { 0x3bf6, 0xae46 }, + { 0x3bfa, 0xacb5 }, + { 0x3bfe, 0xaa48 }, + { 0x3bff, 0xa648 }, +}; -- cgit v1.2.3