diff options
| author | Alejandro Soto <alejandro@34project.org> | 2023-11-21 22:21:37 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2023-11-21 22:21:41 -0600 |
| commit | 3f5ec7003075eb78e7e2a7b179cbcd3a6c9e294d (patch) | |
| tree | 45e5aeb39b02198e92eab7ed5a167289d31a02aa /demo/float16.c | |
| parent | b943b9970564be0e4e8782b0f748912339e1009a (diff) | |
demo: implement float16 support
Diffstat (limited to 'demo/float16.c')
| -rw-r--r-- | demo/float16.c | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/demo/float16.c b/demo/float16.c new file mode 100644 index 0000000..13e123a --- /dev/null +++ b/demo/float16.c @@ -0,0 +1,331 @@ +// https://github.com/artyom-beilis/float16/blob/master/c_src/float16.c + +#include "float16.h" +#include <stdint-gcc.h> +#define SIGN_MASK 0x8000 +#define EXP_MASK 0x7C00 +#define NAN_VALUE 0x7FFF +#define IS_ZERO(x) (((x) & 0x7FFF) == 0) +#define IS_INVALID(x) (((x) & EXP_MASK) == EXP_MASK) +#define IS_NAN(x) (((x) & 0x7FFF) > 0x7C00) +#define IS_INF(x) ( ((x) & 0x7FFF) == 0x7C00) +#define MANTISSA(x) (((x) & 1023) | (((x) & 0x7C00) == 0 ? 0 : 1024)) +#define EXPONENT(x) (((x) & 0x7C00) >> 10) +#define SIGNED_INF_VALUE(x) ((x & SIGN_MASK) | 0x7C00) + +short f16_sub(short ain,short bin) +{ + unsigned short a=ain; + unsigned short b=bin; + if(((a ^ b) & 0x8000) != 0) + return f16_add(a,b ^ 0x8000); + unsigned short sign = a & 0x8000; + a = a << 1; + b = b << 1; + if(a < b) { + unsigned short x=a; + a=b; + b=x; + sign ^= 0x8000; + } + unsigned short ax = a & 0xF800; + unsigned short bx = b & 0xF800; + if(a >=0xf800 || b>=0xf800) { + if(a > 0xF800 || b > 0xF800 || a==b) + return 0x7FFF; + unsigned short res = sign | 0x7C00; + if(a == 0xf800) + return res; + else + return res ^ 0x8000; + } + int exp_diff = ax - bx; + unsigned short exp_part = ax; + if(exp_diff != 0) { + int shift = exp_diff >> 11; + if(bx != 0) + b = ((b & 2047) | 2048) >> shift; + else + b >>= (shift - 1); + } + else { + if(bx == 0) { + unsigned short res = (a-b) >> 1; + if(res == 0) + return res; + return res | sign; + } + else { + b=(b & 2047) | 2048; + } + } + unsigned short r=a - b; + if((r & 0xF800) == exp_part) { + return (r>>1) | sign; + } + unsigned short am = (a & 2047) | 2048; + unsigned short new_m = am - b; + if(new_m == 0) + return 0; + while(exp_part !=0 && !(new_m & (2048))) { + exp_part-=0x800; + if(exp_part!=0) + new_m<<=1; + } + return (((new_m & 2047) | exp_part) >> 1) | sign; +} + +short f16_add(short a,short b) +{ + if (((a ^ b) & 0x8000) != 0) + return f16_sub(a,b ^ 0x8000); + short sign = a & 0x8000; + a &= 0x7FFF; + b &= 0x7FFF; + if(a<b) { + short x=a; + a=b; + b=x; + } + if(a >= 0x7C00 || b>=0x7C00) { + if(a>0x7C00 || b>0x7C00) + return 0x7FFF; + return 0x7C00 | sign; + } + short ax = (a & 0x7C00); + short bx = (b & 0x7C00); + short exp_diff = ax - bx; + short exp_part = ax; + if(exp_diff != 0) { + int shift = exp_diff >> 10; + if(bx != 0) + b = ((b & 1023) | 1024) >> shift; + else + b >>= (shift - 1); + } + else { + if(bx == 0) { + return (a + b) | sign; + } + else { + b=(b & 1023) | 1024; + } + } + short r=a+b; + if ((r & 0x7C00) != exp_part) { + unsigned short am = (a & 1023) | 1024; + unsigned short new_m = (am + b) >> 1; + r =( exp_part + 0x400) | (1023 & new_m); + } + if((unsigned short)r >= 0x7C00u) { + return sign | 0x7C00; + } + return r | sign; +} + + +short f16_mul(short a,short b) +{ + int sign = (a ^ b) & SIGN_MASK; + + if(IS_INVALID(a) || IS_INVALID(b)) { + if(IS_NAN(a) || IS_NAN(b) || IS_ZERO(a) || IS_ZERO(b)) + return NAN_VALUE; + return sign | 0x7C00; + } + + if(IS_ZERO(a) || IS_ZERO(b)) + return 0; + unsigned short m1 = MANTISSA(a); + unsigned short m2 = MANTISSA(b); + + uint32_t v=m1; + v*=m2; + int ax = EXPONENT(a); + int bx = EXPONENT(b); + ax += (ax==0); + bx += (bx==0); + int new_exp = ax + bx - 15; + + if(v & ((uint32_t)1<<21)) { + v >>= 11; + new_exp++; + } + else if(v & ((uint32_t)1<<20)) { + v >>= 10; + } + else { // denormal + new_exp -= 10; + while(v >= 2048) { + v>>=1; + new_exp++; + } + } + if(new_exp <= 0) { + v>>=(-new_exp + 1); + new_exp = 0; + } + else if(new_exp >= 31) { + return SIGNED_INF_VALUE(sign); + } + return (sign) | (new_exp << 10) | (v & 1023); +} + +short f16_div(short a,short b) +{ + short sign = (a ^ b) & SIGN_MASK; + if(IS_NAN(a) || IS_NAN(b) || (IS_INVALID(a) && IS_INVALID(b)) || (IS_ZERO(a) && IS_ZERO(b))) + return 0x7FFF; + if(IS_INVALID(a) || IS_ZERO(b)) + return sign | 0x7C00; + if(IS_INVALID(b)) + return 0; + if(IS_ZERO(a)) + return 0; + + unsigned short m1 = MANTISSA(a); + unsigned short m2 = MANTISSA(b); + uint32_t m1_shifted = m1; + m1_shifted <<= 10; + uint32_t v= m1_shifted / m2; + unsigned short rem = m1_shifted % m2; + + int ax = EXPONENT(a); + int bx = EXPONENT(b); + ax += (ax==0); + bx += (bx==0); + int new_exp = ax - bx + 15 ; + + if(v == 0 && rem==0) + return 0; + + while(v < 1024 && new_exp > 0) { + v<<=1; + rem<<=1; + if(rem >= m2) { + v++; + rem -= m2; + } + new_exp--; + } + while(v >= 2048) { + v>>=1; + new_exp++; + } + + if(new_exp <= 0) { + v>>=(-new_exp + 1); + new_exp = 0; + } + else if(new_exp >= 31) { + return SIGNED_INF_VALUE(sign); + } + return sign | (v & 1023) | (new_exp << 10); +} + +short f16_neg(short v) +{ + return SIGN_MASK ^ v; +} +short f16_from_int(int32_t sv) +{ + uint32_t v; + int sig = 0; + if(sv < 0) { + v=-sv; + sig=1; + } + else + v=sv; + if(v==0) + return 0; + int e=25; + while(v >= 2048) { + v>>=1; + e++; + } + while(v<1024) { + v<<=1; + e--; + } + if(e>=31) + return SIGNED_INF_VALUE(sig << 15); + return (sig << 15) | (e << 10) | (v & 1023); +} +int32_t f16_int(short a) +{ + unsigned short value = MANTISSA(a); + short shift = EXPONENT(a) - 25; + if(shift > 0) + value <<= shift; + else if(shift < 0) + value >>= -shift; + if(a & SIGN_MASK) + return -(int32_t)(value); + return value; +} + +int f16_gte(short a,short b) +{ + if(IS_ZERO(a) && IS_ZERO(b)) + return 1; + if(IS_NAN(a) || IS_NAN(b)) + return 0; + if((a & SIGN_MASK) == 0) { + if((b & SIGN_MASK) == SIGN_MASK) + return 1; + return a >= b; + } + else { + if((b & SIGN_MASK) == 0) + return 0; + return (a & 0x7FFF) <= (b & 0x7FFF); + } +} + +int f16_gt(short a,short b) +{ + if(IS_NAN(a) || IS_NAN(b)) + return 0; + if(IS_ZERO(a) && IS_ZERO(b)) + return 0; + if((a & SIGN_MASK) == 0) { + if((b & SIGN_MASK) == SIGN_MASK) + return 1; + return a > b; + } + else { + if((b & SIGN_MASK) == 0) + return 0; + return (a & 0x7FFF) < (b & 0x7FFF); + } + +} +int f16_eq(short a,short b) +{ + if(IS_NAN(a) || IS_NAN(b)) + return 0; + if(IS_ZERO(a) && IS_ZERO(b)) + return 1; + return a==b; +} + +int f16_lte(short a,short b) +{ + if(IS_NAN(a) || IS_NAN(b)) + return 0; + return f16_gte(b,a); +} + +int f16_lt(short a,short b) +{ + if(IS_NAN(a) || IS_NAN(b)) + return 0; + return f16_gt(b,a); +} +int f16_neq(short a,short b) +{ + return !f16_eq(a,b); +} + + |
