summaryrefslogtreecommitdiff
path: root/demo
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2023-11-21 22:21:37 -0600
committerAlejandro Soto <alejandro@34project.org>2023-11-21 22:21:41 -0600
commit3f5ec7003075eb78e7e2a7b179cbcd3a6c9e294d (patch)
tree45e5aeb39b02198e92eab7ed5a167289d31a02aa /demo
parentb943b9970564be0e4e8782b0f748912339e1009a (diff)
demo: implement float16 support
Diffstat (limited to 'demo')
-rw-r--r--demo/float16.c331
-rw-r--r--demo/float16.h40
-rw-r--r--demo/main.c8
-rw-r--r--demo/sin_cos.c263
4 files changed, 642 insertions, 0 deletions
diff --git a/demo/float16.c b/demo/float16.c
new file mode 100644
index 0000000..13e123a
--- /dev/null
+++ b/demo/float16.c
@@ -0,0 +1,331 @@
+// https://github.com/artyom-beilis/float16/blob/master/c_src/float16.c
+
+#include "float16.h"
+#include <stdint-gcc.h>
+#define SIGN_MASK 0x8000
+#define EXP_MASK 0x7C00
+#define NAN_VALUE 0x7FFF
+#define IS_ZERO(x) (((x) & 0x7FFF) == 0)
+#define IS_INVALID(x) (((x) & EXP_MASK) == EXP_MASK)
+#define IS_NAN(x) (((x) & 0x7FFF) > 0x7C00)
+#define IS_INF(x) ( ((x) & 0x7FFF) == 0x7C00)
+#define MANTISSA(x) (((x) & 1023) | (((x) & 0x7C00) == 0 ? 0 : 1024))
+#define EXPONENT(x) (((x) & 0x7C00) >> 10)
+#define SIGNED_INF_VALUE(x) ((x & SIGN_MASK) | 0x7C00)
+
+short f16_sub(short ain,short bin)
+{
+ unsigned short a=ain;
+ unsigned short b=bin;
+ if(((a ^ b) & 0x8000) != 0)
+ return f16_add(a,b ^ 0x8000);
+ unsigned short sign = a & 0x8000;
+ a = a << 1;
+ b = b << 1;
+ if(a < b) {
+ unsigned short x=a;
+ a=b;
+ b=x;
+ sign ^= 0x8000;
+ }
+ unsigned short ax = a & 0xF800;
+ unsigned short bx = b & 0xF800;
+ if(a >=0xf800 || b>=0xf800) {
+ if(a > 0xF800 || b > 0xF800 || a==b)
+ return 0x7FFF;
+ unsigned short res = sign | 0x7C00;
+ if(a == 0xf800)
+ return res;
+ else
+ return res ^ 0x8000;
+ }
+ int exp_diff = ax - bx;
+ unsigned short exp_part = ax;
+ if(exp_diff != 0) {
+ int shift = exp_diff >> 11;
+ if(bx != 0)
+ b = ((b & 2047) | 2048) >> shift;
+ else
+ b >>= (shift - 1);
+ }
+ else {
+ if(bx == 0) {
+ unsigned short res = (a-b) >> 1;
+ if(res == 0)
+ return res;
+ return res | sign;
+ }
+ else {
+ b=(b & 2047) | 2048;
+ }
+ }
+ unsigned short r=a - b;
+ if((r & 0xF800) == exp_part) {
+ return (r>>1) | sign;
+ }
+ unsigned short am = (a & 2047) | 2048;
+ unsigned short new_m = am - b;
+ if(new_m == 0)
+ return 0;
+ while(exp_part !=0 && !(new_m & (2048))) {
+ exp_part-=0x800;
+ if(exp_part!=0)
+ new_m<<=1;
+ }
+ return (((new_m & 2047) | exp_part) >> 1) | sign;
+}
+
+short f16_add(short a,short b)
+{
+ if (((a ^ b) & 0x8000) != 0)
+ return f16_sub(a,b ^ 0x8000);
+ short sign = a & 0x8000;
+ a &= 0x7FFF;
+ b &= 0x7FFF;
+ if(a<b) {
+ short x=a;
+ a=b;
+ b=x;
+ }
+ if(a >= 0x7C00 || b>=0x7C00) {
+ if(a>0x7C00 || b>0x7C00)
+ return 0x7FFF;
+ return 0x7C00 | sign;
+ }
+ short ax = (a & 0x7C00);
+ short bx = (b & 0x7C00);
+ short exp_diff = ax - bx;
+ short exp_part = ax;
+ if(exp_diff != 0) {
+ int shift = exp_diff >> 10;
+ if(bx != 0)
+ b = ((b & 1023) | 1024) >> shift;
+ else
+ b >>= (shift - 1);
+ }
+ else {
+ if(bx == 0) {
+ return (a + b) | sign;
+ }
+ else {
+ b=(b & 1023) | 1024;
+ }
+ }
+ short r=a+b;
+ if ((r & 0x7C00) != exp_part) {
+ unsigned short am = (a & 1023) | 1024;
+ unsigned short new_m = (am + b) >> 1;
+ r =( exp_part + 0x400) | (1023 & new_m);
+ }
+ if((unsigned short)r >= 0x7C00u) {
+ return sign | 0x7C00;
+ }
+ return r | sign;
+}
+
+
+short f16_mul(short a,short b)
+{
+ int sign = (a ^ b) & SIGN_MASK;
+
+ if(IS_INVALID(a) || IS_INVALID(b)) {
+ if(IS_NAN(a) || IS_NAN(b) || IS_ZERO(a) || IS_ZERO(b))
+ return NAN_VALUE;
+ return sign | 0x7C00;
+ }
+
+ if(IS_ZERO(a) || IS_ZERO(b))
+ return 0;
+ unsigned short m1 = MANTISSA(a);
+ unsigned short m2 = MANTISSA(b);
+
+ uint32_t v=m1;
+ v*=m2;
+ int ax = EXPONENT(a);
+ int bx = EXPONENT(b);
+ ax += (ax==0);
+ bx += (bx==0);
+ int new_exp = ax + bx - 15;
+
+ if(v & ((uint32_t)1<<21)) {
+ v >>= 11;
+ new_exp++;
+ }
+ else if(v & ((uint32_t)1<<20)) {
+ v >>= 10;
+ }
+ else { // denormal
+ new_exp -= 10;
+ while(v >= 2048) {
+ v>>=1;
+ new_exp++;
+ }
+ }
+ if(new_exp <= 0) {
+ v>>=(-new_exp + 1);
+ new_exp = 0;
+ }
+ else if(new_exp >= 31) {
+ return SIGNED_INF_VALUE(sign);
+ }
+ return (sign) | (new_exp << 10) | (v & 1023);
+}
+
+short f16_div(short a,short b)
+{
+ short sign = (a ^ b) & SIGN_MASK;
+ if(IS_NAN(a) || IS_NAN(b) || (IS_INVALID(a) && IS_INVALID(b)) || (IS_ZERO(a) && IS_ZERO(b)))
+ return 0x7FFF;
+ if(IS_INVALID(a) || IS_ZERO(b))
+ return sign | 0x7C00;
+ if(IS_INVALID(b))
+ return 0;
+ if(IS_ZERO(a))
+ return 0;
+
+ unsigned short m1 = MANTISSA(a);
+ unsigned short m2 = MANTISSA(b);
+ uint32_t m1_shifted = m1;
+ m1_shifted <<= 10;
+ uint32_t v= m1_shifted / m2;
+ unsigned short rem = m1_shifted % m2;
+
+ int ax = EXPONENT(a);
+ int bx = EXPONENT(b);
+ ax += (ax==0);
+ bx += (bx==0);
+ int new_exp = ax - bx + 15 ;
+
+ if(v == 0 && rem==0)
+ return 0;
+
+ while(v < 1024 && new_exp > 0) {
+ v<<=1;
+ rem<<=1;
+ if(rem >= m2) {
+ v++;
+ rem -= m2;
+ }
+ new_exp--;
+ }
+ while(v >= 2048) {
+ v>>=1;
+ new_exp++;
+ }
+
+ if(new_exp <= 0) {
+ v>>=(-new_exp + 1);
+ new_exp = 0;
+ }
+ else if(new_exp >= 31) {
+ return SIGNED_INF_VALUE(sign);
+ }
+ return sign | (v & 1023) | (new_exp << 10);
+}
+
+short f16_neg(short v)
+{
+ return SIGN_MASK ^ v;
+}
+short f16_from_int(int32_t sv)
+{
+ uint32_t v;
+ int sig = 0;
+ if(sv < 0) {
+ v=-sv;
+ sig=1;
+ }
+ else
+ v=sv;
+ if(v==0)
+ return 0;
+ int e=25;
+ while(v >= 2048) {
+ v>>=1;
+ e++;
+ }
+ while(v<1024) {
+ v<<=1;
+ e--;
+ }
+ if(e>=31)
+ return SIGNED_INF_VALUE(sig << 15);
+ return (sig << 15) | (e << 10) | (v & 1023);
+}
+int32_t f16_int(short a)
+{
+ unsigned short value = MANTISSA(a);
+ short shift = EXPONENT(a) - 25;
+ if(shift > 0)
+ value <<= shift;
+ else if(shift < 0)
+ value >>= -shift;
+ if(a & SIGN_MASK)
+ return -(int32_t)(value);
+ return value;
+}
+
+int f16_gte(short a,short b)
+{
+ if(IS_ZERO(a) && IS_ZERO(b))
+ return 1;
+ if(IS_NAN(a) || IS_NAN(b))
+ return 0;
+ if((a & SIGN_MASK) == 0) {
+ if((b & SIGN_MASK) == SIGN_MASK)
+ return 1;
+ return a >= b;
+ }
+ else {
+ if((b & SIGN_MASK) == 0)
+ return 0;
+ return (a & 0x7FFF) <= (b & 0x7FFF);
+ }
+}
+
+int f16_gt(short a,short b)
+{
+ if(IS_NAN(a) || IS_NAN(b))
+ return 0;
+ if(IS_ZERO(a) && IS_ZERO(b))
+ return 0;
+ if((a & SIGN_MASK) == 0) {
+ if((b & SIGN_MASK) == SIGN_MASK)
+ return 1;
+ return a > b;
+ }
+ else {
+ if((b & SIGN_MASK) == 0)
+ return 0;
+ return (a & 0x7FFF) < (b & 0x7FFF);
+ }
+
+}
+int f16_eq(short a,short b)
+{
+ if(IS_NAN(a) || IS_NAN(b))
+ return 0;
+ if(IS_ZERO(a) && IS_ZERO(b))
+ return 1;
+ return a==b;
+}
+
+int f16_lte(short a,short b)
+{
+ if(IS_NAN(a) || IS_NAN(b))
+ return 0;
+ return f16_gte(b,a);
+}
+
+int f16_lt(short a,short b)
+{
+ if(IS_NAN(a) || IS_NAN(b))
+ return 0;
+ return f16_gt(b,a);
+}
+int f16_neq(short a,short b)
+{
+ return !f16_eq(a,b);
+}
+
+
diff --git a/demo/float16.h b/demo/float16.h
new file mode 100644
index 0000000..17cb1d0
--- /dev/null
+++ b/demo/float16.h
@@ -0,0 +1,40 @@
+// https://github.com/artyom-beilis/float16/tree/master/include
+
+#ifndef FLOAT16_H
+#define FLOAT16_H
+
+#include <stdint-gcc.h>
+
+#define f16_tenth 11878
+#define f16_fifth 12902
+#define f16_third 13653
+#define f16_half 14336
+#define f16_one 15360
+#define f16_two 16384
+#define f16_three 16896
+#define f16_five 17664
+#define f16_ten 18688
+#define f16_pi 16968
+#define f16_half_pi 15944
+
+short f16_add(short a,short b);
+short f16_sub(short a,short b);
+short f16_mul(short a,short b);
+short f16_div(short a,short b);
+short f16_neg(short a);
+short f16_from_int(int32_t v);
+int32_t f16_int(short v);
+
+int f16_gte(short a,short b);
+int f16_gt(short a,short b);
+int f16_eq(short a,short b);
+int f16_lte(short a,short b);
+int f16_lt(short a,short b);
+int f16_neq(short a,short b);
+
+extern const struct f16_sin_cos
+{
+ short cos, sin;
+} f16_sin_cos[256];
+
+#endif
diff --git a/demo/main.c b/demo/main.c
index 858b50e..bc95579 100644
--- a/demo/main.c
+++ b/demo/main.c
@@ -211,3 +211,11 @@ void reset(void)
else
ap_main();
}
+
+// Requerido por libgcc
+int raise(int sig)
+{
+ print("raise(%d) called, aborting", sig);
+ halt_cpu(this_cpu->num);
+ while (1);
+}
diff --git a/demo/sin_cos.c b/demo/sin_cos.c
new file mode 100644
index 0000000..d3adb00
--- /dev/null
+++ b/demo/sin_cos.c
@@ -0,0 +1,263 @@
+#include "float16.h"
+
+/* Índice es un byte n, donde el ángulo es n pi / 128. Esto cubre
+ * desde 0 hasta justo antes de 2pi. El tipo de dato es float16.
+ */
+const struct f16_sin_cos f16_sin_cos[256] = {
+ { 0x3c00, 0x0000 },
+ { 0x3bff, 0x2648 },
+ { 0x3bfe, 0x2a48 },
+ { 0x3bfa, 0x2cb5 },
+ { 0x3bf6, 0x2e46 },
+ { 0x3bf1, 0x2fd6 },
+ { 0x3bea, 0x30b2 },
+ { 0x3be2, 0x3179 },
+ { 0x3bd9, 0x323e },
+ { 0x3bce, 0x3303 },
+ { 0x3bc3, 0x33c6 },
+ { 0x3bb6, 0x3444 },
+ { 0x3ba8, 0x34a5 },
+ { 0x3b99, 0x3505 },
+ { 0x3b88, 0x3564 },
+ { 0x3b77, 0x35c2 },
+ { 0x3b64, 0x361f },
+ { 0x3b50, 0x367c },
+ { 0x3b3b, 0x36d7 },
+ { 0x3b25, 0x3732 },
+ { 0x3b0e, 0x378b },
+ { 0x3af6, 0x37e3 },
+ { 0x3add, 0x381d },
+ { 0x3ac2, 0x3848 },
+ { 0x3aa7, 0x3872 },
+ { 0x3a8a, 0x389b },
+ { 0x3a6d, 0x38c4 },
+ { 0x3a4f, 0x38ec },
+ { 0x3a2f, 0x3913 },
+ { 0x3a0f, 0x393a },
+ { 0x39ed, 0x395f },
+ { 0x39cb, 0x3984 },
+ { 0x39a8, 0x39a8 },
+ { 0x3984, 0x39cb },
+ { 0x395f, 0x39ed },
+ { 0x393a, 0x3a0f },
+ { 0x3913, 0x3a2f },
+ { 0x38ec, 0x3a4f },
+ { 0x38c4, 0x3a6d },
+ { 0x389b, 0x3a8a },
+ { 0x3872, 0x3aa7 },
+ { 0x3848, 0x3ac2 },
+ { 0x381d, 0x3add },
+ { 0x37e3, 0x3af6 },
+ { 0x378b, 0x3b0e },
+ { 0x3732, 0x3b25 },
+ { 0x36d7, 0x3b3b },
+ { 0x367c, 0x3b50 },
+ { 0x361f, 0x3b64 },
+ { 0x35c2, 0x3b77 },
+ { 0x3564, 0x3b88 },
+ { 0x3505, 0x3b99 },
+ { 0x34a5, 0x3ba8 },
+ { 0x3444, 0x3bb6 },
+ { 0x33c6, 0x3bc3 },
+ { 0x3303, 0x3bce },
+ { 0x323e, 0x3bd9 },
+ { 0x3179, 0x3be2 },
+ { 0x30b2, 0x3bea },
+ { 0x2fd6, 0x3bf1 },
+ { 0x2e46, 0x3bf6 },
+ { 0x2cb5, 0x3bfa },
+ { 0x2a48, 0x3bfe },
+ { 0x2648, 0x3bff },
+ { 0x0000, 0x3c00 },
+ { 0xa648, 0x3bff },
+ { 0xaa48, 0x3bfe },
+ { 0xacb5, 0x3bfa },
+ { 0xae46, 0x3bf6 },
+ { 0xafd6, 0x3bf1 },
+ { 0xb0b2, 0x3bea },
+ { 0xb179, 0x3be2 },
+ { 0xb23e, 0x3bd9 },
+ { 0xb303, 0x3bce },
+ { 0xb3c6, 0x3bc3 },
+ { 0xb444, 0x3bb6 },
+ { 0xb4a5, 0x3ba8 },
+ { 0xb505, 0x3b99 },
+ { 0xb564, 0x3b88 },
+ { 0xb5c2, 0x3b77 },
+ { 0xb61f, 0x3b64 },
+ { 0xb67c, 0x3b50 },
+ { 0xb6d7, 0x3b3b },
+ { 0xb732, 0x3b25 },
+ { 0xb78b, 0x3b0e },
+ { 0xb7e3, 0x3af6 },
+ { 0xb81d, 0x3add },
+ { 0xb848, 0x3ac2 },
+ { 0xb872, 0x3aa7 },
+ { 0xb89b, 0x3a8a },
+ { 0xb8c4, 0x3a6d },
+ { 0xb8ec, 0x3a4f },
+ { 0xb913, 0x3a2f },
+ { 0xb93a, 0x3a0f },
+ { 0xb95f, 0x39ed },
+ { 0xb984, 0x39cb },
+ { 0xb9a8, 0x39a8 },
+ { 0xb9cb, 0x3984 },
+ { 0xb9ed, 0x395f },
+ { 0xba0f, 0x393a },
+ { 0xba2f, 0x3913 },
+ { 0xba4f, 0x38ec },
+ { 0xba6d, 0x38c4 },
+ { 0xba8a, 0x389b },
+ { 0xbaa7, 0x3872 },
+ { 0xbac2, 0x3848 },
+ { 0xbadd, 0x381d },
+ { 0xbaf6, 0x37e3 },
+ { 0xbb0e, 0x378b },
+ { 0xbb25, 0x3732 },
+ { 0xbb3b, 0x36d7 },
+ { 0xbb50, 0x367c },
+ { 0xbb64, 0x361f },
+ { 0xbb77, 0x35c2 },
+ { 0xbb88, 0x3564 },
+ { 0xbb99, 0x3505 },
+ { 0xbba8, 0x34a5 },
+ { 0xbbb6, 0x3444 },
+ { 0xbbc3, 0x33c6 },
+ { 0xbbce, 0x3303 },
+ { 0xbbd9, 0x323e },
+ { 0xbbe2, 0x3179 },
+ { 0xbbea, 0x30b2 },
+ { 0xbbf1, 0x2fd6 },
+ { 0xbbf6, 0x2e46 },
+ { 0xbbfa, 0x2cb5 },
+ { 0xbbfe, 0x2a48 },
+ { 0xbbff, 0x2648 },
+ { 0xbc00, 0x0000 },
+ { 0xbbff, 0xa648 },
+ { 0xbbfe, 0xaa48 },
+ { 0xbbfa, 0xacb5 },
+ { 0xbbf6, 0xae46 },
+ { 0xbbf1, 0xafd6 },
+ { 0xbbea, 0xb0b2 },
+ { 0xbbe2, 0xb179 },
+ { 0xbbd9, 0xb23e },
+ { 0xbbce, 0xb303 },
+ { 0xbbc3, 0xb3c6 },
+ { 0xbbb6, 0xb444 },
+ { 0xbba8, 0xb4a5 },
+ { 0xbb99, 0xb505 },
+ { 0xbb88, 0xb564 },
+ { 0xbb77, 0xb5c2 },
+ { 0xbb64, 0xb61f },
+ { 0xbb50, 0xb67c },
+ { 0xbb3b, 0xb6d7 },
+ { 0xbb25, 0xb732 },
+ { 0xbb0e, 0xb78b },
+ { 0xbaf6, 0xb7e3 },
+ { 0xbadd, 0xb81d },
+ { 0xbac2, 0xb848 },
+ { 0xbaa7, 0xb872 },
+ { 0xba8a, 0xb89b },
+ { 0xba6d, 0xb8c4 },
+ { 0xba4f, 0xb8ec },
+ { 0xba2f, 0xb913 },
+ { 0xba0f, 0xb93a },
+ { 0xb9ed, 0xb95f },
+ { 0xb9cb, 0xb984 },
+ { 0xb9a8, 0xb9a8 },
+ { 0xb984, 0xb9cb },
+ { 0xb95f, 0xb9ed },
+ { 0xb93a, 0xba0f },
+ { 0xb913, 0xba2f },
+ { 0xb8ec, 0xba4f },
+ { 0xb8c4, 0xba6d },
+ { 0xb89b, 0xba8a },
+ { 0xb872, 0xbaa7 },
+ { 0xb848, 0xbac2 },
+ { 0xb81d, 0xbadd },
+ { 0xb7e3, 0xbaf6 },
+ { 0xb78b, 0xbb0e },
+ { 0xb732, 0xbb25 },
+ { 0xb6d7, 0xbb3b },
+ { 0xb67c, 0xbb50 },
+ { 0xb61f, 0xbb64 },
+ { 0xb5c2, 0xbb77 },
+ { 0xb564, 0xbb88 },
+ { 0xb505, 0xbb99 },
+ { 0xb4a5, 0xbba8 },
+ { 0xb444, 0xbbb6 },
+ { 0xb3c6, 0xbbc3 },
+ { 0xb303, 0xbbce },
+ { 0xb23e, 0xbbd9 },
+ { 0xb179, 0xbbe2 },
+ { 0xb0b2, 0xbbea },
+ { 0xafd6, 0xbbf1 },
+ { 0xae46, 0xbbf6 },
+ { 0xacb5, 0xbbfa },
+ { 0xaa48, 0xbbfe },
+ { 0xa648, 0xbbff },
+ { 0x8000, 0xbc00 },
+ { 0x2648, 0xbbff },
+ { 0x2a48, 0xbbfe },
+ { 0x2cb5, 0xbbfa },
+ { 0x2e46, 0xbbf6 },
+ { 0x2fd6, 0xbbf1 },
+ { 0x30b2, 0xbbea },
+ { 0x3179, 0xbbe2 },
+ { 0x323e, 0xbbd9 },
+ { 0x3303, 0xbbce },
+ { 0x33c6, 0xbbc3 },
+ { 0x3444, 0xbbb6 },
+ { 0x34a5, 0xbba8 },
+ { 0x3505, 0xbb99 },
+ { 0x3564, 0xbb88 },
+ { 0x35c2, 0xbb77 },
+ { 0x361f, 0xbb64 },
+ { 0x367c, 0xbb50 },
+ { 0x36d7, 0xbb3b },
+ { 0x3732, 0xbb25 },
+ { 0x378b, 0xbb0e },
+ { 0x37e3, 0xbaf6 },
+ { 0x381d, 0xbadd },
+ { 0x3848, 0xbac2 },
+ { 0x3872, 0xbaa7 },
+ { 0x389b, 0xba8a },
+ { 0x38c4, 0xba6d },
+ { 0x38ec, 0xba4f },
+ { 0x3913, 0xba2f },
+ { 0x393a, 0xba0f },
+ { 0x395f, 0xb9ed },
+ { 0x3984, 0xb9cb },
+ { 0x39a8, 0xb9a8 },
+ { 0x39cb, 0xb984 },
+ { 0x39ed, 0xb95f },
+ { 0x3a0f, 0xb93a },
+ { 0x3a2f, 0xb913 },
+ { 0x3a4f, 0xb8ec },
+ { 0x3a6d, 0xb8c4 },
+ { 0x3a8a, 0xb89b },
+ { 0x3aa7, 0xb872 },
+ { 0x3ac2, 0xb848 },
+ { 0x3add, 0xb81d },
+ { 0x3af6, 0xb7e3 },
+ { 0x3b0e, 0xb78b },
+ { 0x3b25, 0xb732 },
+ { 0x3b3b, 0xb6d7 },
+ { 0x3b50, 0xb67c },
+ { 0x3b64, 0xb61f },
+ { 0x3b77, 0xb5c2 },
+ { 0x3b88, 0xb564 },
+ { 0x3b99, 0xb505 },
+ { 0x3ba8, 0xb4a5 },
+ { 0x3bb6, 0xb444 },
+ { 0x3bc3, 0xb3c6 },
+ { 0x3bce, 0xb303 },
+ { 0x3bd9, 0xb23e },
+ { 0x3be2, 0xb179 },
+ { 0x3bea, 0xb0b2 },
+ { 0x3bf1, 0xafd6 },
+ { 0x3bf6, 0xae46 },
+ { 0x3bfa, 0xacb5 },
+ { 0x3bfe, 0xaa48 },
+ { 0x3bff, 0xa648 },
+};