summaryrefslogtreecommitdiff
path: root/rtl/fpu
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--rtl/fpu/empty.cc0
-rw-r--r--rtl/fpu/float/fp_cmp.sv81
-rw-r--r--rtl/fpu/float/fp_cvt.sv286
-rw-r--r--rtl/fpu/float/fp_exe.sv215
-rw-r--r--rtl/fpu/float/fp_ext.sv122
-rw-r--r--rtl/fpu/float/fp_fdiv.sv822
-rw-r--r--rtl/fpu/float/fp_fma.sv263
-rw-r--r--rtl/fpu/float/fp_mac.sv21
-rw-r--r--rtl/fpu/float/fp_max.sv129
-rw-r--r--rtl/fpu/float/fp_rnd.sv194
-rw-r--r--rtl/fpu/float/fp_sgnj.sv47
-rw-r--r--rtl/fpu/float/fp_unit.sv191
-rw-r--r--rtl/fpu/float/fp_wire.sv607
-rw-r--r--rtl/fpu/lzc/lzc_128.sv64
-rw-r--r--rtl/fpu/lzc/lzc_16.sv49
-rw-r--r--rtl/fpu/lzc/lzc_256.sv69
-rw-r--r--rtl/fpu/lzc/lzc_32.sv54
-rw-r--r--rtl/fpu/lzc/lzc_4.sv33
-rw-r--r--rtl/fpu/lzc/lzc_64.sv59
-rw-r--r--rtl/fpu/lzc/lzc_8.sv44
-rw-r--r--rtl/fpu/lzc/lzc_wire.sv17
-rw-r--r--rtl/fpu/mod.mk15
22 files changed, 3382 insertions, 0 deletions
diff --git a/rtl/fpu/empty.cc b/rtl/fpu/empty.cc
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/rtl/fpu/empty.cc
diff --git a/rtl/fpu/float/fp_cmp.sv b/rtl/fpu/float/fp_cmp.sv
new file mode 100644
index 0000000..98c30bb
--- /dev/null
+++ b/rtl/fpu/float/fp_cmp.sv
@@ -0,0 +1,81 @@
+import fp_wire::*;
+
+module fp_cmp (
+ input fp_cmp_in_type fp_cmp_i,
+ output fp_cmp_out_type fp_cmp_o
+);
+ logic [64:0] data1;
+ logic [64:0] data2;
+ logic [2:0] rm;
+ logic [9:0] class1;
+ logic [9:0] class2;
+
+ logic comp_lt;
+ logic comp_le;
+ logic [63:0] result;
+ logic [4:0] flags;
+
+ always_comb begin
+
+ data1 = fp_cmp_i.data1;
+ data2 = fp_cmp_i.data2;
+ rm = fp_cmp_i.rm;
+ class1 = fp_cmp_i.class1;
+ class2 = fp_cmp_i.class2;
+
+ comp_lt = 0;
+ comp_le = 0;
+ result = 0;
+ flags = 0;
+
+ if ((rm == 0) || (rm == 1) || (rm == 2)) begin
+ comp_lt = (data1[63:0] < data2[63:0]) ? 1'b1 : 1'b0;
+ comp_le = (data1[63:0] <= data2[63:0]) ? 1'b1 : 1'b0;
+ end
+
+ if (rm == 2) begin //feq
+ if ((class1[8] | class2[8]) == 1) begin
+ flags[4] = 1;
+ end else if ((class1[9] | class2[9]) == 1) begin
+ flags[0] = 0;
+ end else if (((class1[3] | class1[4]) & (class2[3] | class2[4])) == 1) begin
+ result[0] = 1;
+ end else if (data1 == data2) begin
+ result[0] = 1;
+ end
+ end else if (rm == 1) begin //flt
+ if ((class1[8] | class2[8] | class1[9] | class2[9]) == 1) begin
+ flags[4] = 1;
+ end else if (((class1[3] | class1[4]) & (class2[3] | class2[4])) == 1) begin
+ result[0] = 0;
+ end else if ((data1[64] ^ data2[64]) == 1) begin
+ result[0] = data1[64];
+ end else begin
+ if (data1[64] == 1) begin
+ result[0] = ~comp_le;
+ end else begin
+ result[0] = comp_lt;
+ end
+ end
+ end else if (rm == 0) begin //fle
+ if ((class1[8] | class2[8] | class1[9] | class2[9]) == 1) begin
+ flags[4] = 1;
+ end else if (((class1[3] | class1[4]) & (class2[3] | class2[4])) == 1) begin
+ result[0] = 1;
+ end else if ((data1[64] ^ data2[64]) == 1) begin
+ result[0] = data1[64];
+ end else begin
+ if (data1[64] == 0) begin
+ result[0] = comp_le;
+ end else begin
+ result[0] = ~comp_lt;
+ end
+ end
+ end
+
+ fp_cmp_o.result = result;
+ fp_cmp_o.flags = flags;
+
+ end
+
+endmodule
diff --git a/rtl/fpu/float/fp_cvt.sv b/rtl/fpu/float/fp_cvt.sv
new file mode 100644
index 0000000..46efc1f
--- /dev/null
+++ b/rtl/fpu/float/fp_cvt.sv
@@ -0,0 +1,286 @@
+import lzc_wire::*;
+import fp_wire::*;
+
+module fp_cvt (
+ input fp_cvt_f2f_in_type fp_cvt_f2f_i,
+ output fp_cvt_f2f_out_type fp_cvt_f2f_o,
+ input fp_cvt_f2i_in_type fp_cvt_f2i_i,
+ output fp_cvt_f2i_out_type fp_cvt_f2i_o,
+ input fp_cvt_i2f_in_type fp_cvt_i2f_i,
+ output fp_cvt_i2f_out_type fp_cvt_i2f_o,
+ input lzc_64_out_type lzc_o,
+ output lzc_64_in_type lzc_i
+);
+
+ fp_cvt_f2f_var_type v_f2f;
+ fp_cvt_f2i_var_type v_f2i;
+ fp_cvt_i2f_var_type v_i2f;
+
+ always_comb begin
+
+ v_f2f.data = fp_cvt_f2f_i.data;
+ v_f2f.fmt = fp_cvt_f2f_i.fmt;
+ v_f2f.rm = fp_cvt_f2f_i.rm;
+ v_f2f.classification = fp_cvt_f2f_i.classification;
+
+ v_f2f.snan = v_f2f.classification[8];
+ v_f2f.qnan = v_f2f.classification[9];
+ v_f2f.dbz = 0;
+ v_f2f.infs = v_f2f.classification[0] | v_f2f.classification[7];
+ v_f2f.zero = v_f2f.classification[3] | v_f2f.classification[4];
+
+ v_f2f.exponent_cvt = v_f2f.data[63:52];
+ v_f2f.mantissa_cvt = {2'h1, v_f2f.data[51:0], 26'h0};
+
+ v_f2f.exponent_bias = 1920;
+ if (v_f2f.fmt == 1) begin
+ v_f2f.exponent_bias = 1024;
+ end
+
+ v_f2f.sign_rnd = v_f2f.data[64];
+ v_f2f.exponent_rnd = {2'h0, v_f2f.exponent_cvt} - {3'h0, v_f2f.exponent_bias};
+
+ v_f2f.counter_cvt = 0;
+ if ($signed(v_f2f.exponent_rnd) <= 0) begin
+ v_f2f.counter_cvt = 63;
+ if ($signed(v_f2f.exponent_rnd) > -63) begin
+ v_f2f.counter_cvt = 14'h1 - v_f2f.exponent_rnd;
+ end
+ v_f2f.exponent_rnd = 0;
+ end
+
+ v_f2f.mantissa_cvt = v_f2f.mantissa_cvt >> v_f2f.counter_cvt[5:0];
+
+ v_f2f.mantissa_rnd = {29'h0, v_f2f.mantissa_cvt[79:55]};
+ v_f2f.grs = {v_f2f.mantissa_cvt[54:53], |v_f2f.mantissa_cvt[52:0]};
+ if (v_f2f.fmt == 1) begin
+ v_f2f.mantissa_rnd = v_f2f.mantissa_cvt[79:26];
+ v_f2f.grs = {v_f2f.mantissa_cvt[25:24], |v_f2f.mantissa_cvt[23:0]};
+ end
+
+ fp_cvt_f2f_o.fp_rnd.sig = v_f2f.sign_rnd;
+ fp_cvt_f2f_o.fp_rnd.expo = v_f2f.exponent_rnd;
+ fp_cvt_f2f_o.fp_rnd.mant = v_f2f.mantissa_rnd;
+ fp_cvt_f2f_o.fp_rnd.rema = 2'h0;
+ fp_cvt_f2f_o.fp_rnd.fmt = v_f2f.fmt;
+ fp_cvt_f2f_o.fp_rnd.rm = v_f2f.rm;
+ fp_cvt_f2f_o.fp_rnd.grs = v_f2f.grs;
+ fp_cvt_f2f_o.fp_rnd.snan = v_f2f.snan;
+ fp_cvt_f2f_o.fp_rnd.qnan = v_f2f.qnan;
+ fp_cvt_f2f_o.fp_rnd.dbz = v_f2f.dbz;
+ fp_cvt_f2f_o.fp_rnd.infs = v_f2f.infs;
+ fp_cvt_f2f_o.fp_rnd.zero = v_f2f.zero;
+ fp_cvt_f2f_o.fp_rnd.diff = 1'h0;
+
+ end
+
+ always_comb begin
+
+ v_f2i.data = fp_cvt_f2i_i.data;
+ v_f2i.op = fp_cvt_f2i_i.op.fcvt_op;
+ v_f2i.rm = fp_cvt_f2i_i.rm;
+ v_f2i.classification = fp_cvt_f2i_i.classification;
+
+ v_f2i.flags = 0;
+ v_f2i.result = 0;
+
+ v_f2i.snan = v_f2i.classification[8];
+ v_f2i.qnan = v_f2i.classification[9];
+ v_f2i.infs = v_f2i.classification[0] | v_f2i.classification[7];
+ v_f2i.zero = 0;
+
+ if (v_f2i.op == 0) begin
+ v_f2i.exponent_bias = 34;
+ end else if (v_f2i.op == 1) begin
+ v_f2i.exponent_bias = 35;
+ end else if (v_f2i.op == 2) begin
+ v_f2i.exponent_bias = 66;
+ end else begin
+ v_f2i.exponent_bias = 67;
+ end
+
+ v_f2i.sign_cvt = v_f2i.data[64];
+ v_f2i.exponent_cvt = v_f2i.data[63:52] - 13'd2044;
+ v_f2i.mantissa_cvt = {68'h1, v_f2i.data[51:0]};
+
+ if ((v_f2i.classification[3] | v_f2i.classification[4]) == 1) begin
+ v_f2i.mantissa_cvt[52] = 0;
+ end
+
+ v_f2i.oor = 0;
+
+ if ($signed(v_f2i.exponent_cvt) > $signed({5'h0, v_f2i.exponent_bias})) begin
+ v_f2i.oor = 1;
+ end else if ($signed(v_f2i.exponent_cvt) > 0) begin
+ v_f2i.mantissa_cvt = v_f2i.mantissa_cvt << v_f2i.exponent_cvt;
+ end
+
+ v_f2i.mantissa_uint = v_f2i.mantissa_cvt[119:55];
+
+ v_f2i.grs = {v_f2i.mantissa_cvt[54:53], |v_f2i.mantissa_cvt[52:0]};
+ v_f2i.odd = v_f2i.mantissa_uint[0] | |v_f2i.grs[1:0];
+
+ v_f2i.flags[0] = |v_f2i.grs;
+
+ v_f2i.rnded = 0;
+ if (v_f2i.rm == 0) begin //rne
+ if (v_f2i.grs[2] & v_f2i.odd) begin
+ v_f2i.rnded = 1;
+ end
+ end else if (v_f2i.rm == 2) begin //rdn
+ if (v_f2i.sign_cvt & v_f2i.flags[0]) begin
+ v_f2i.rnded = 1;
+ end
+ end else if (v_f2i.rm == 3) begin //rup
+ if (~v_f2i.sign_cvt & v_f2i.flags[0]) begin
+ v_f2i.rnded = 1;
+ end
+ end else if (v_f2i.rm == 4) begin //rmm
+ if (v_f2i.grs[2] & v_f2i.flags[0]) begin
+ v_f2i.rnded = 1;
+ end
+ end
+
+ v_f2i.mantissa_uint = v_f2i.mantissa_uint + {64'h0, v_f2i.rnded};
+
+ v_f2i.or_1 = v_f2i.mantissa_uint[64];
+ v_f2i.or_2 = v_f2i.mantissa_uint[63];
+ v_f2i.or_3 = |v_f2i.mantissa_uint[62:32];
+ v_f2i.or_4 = v_f2i.mantissa_uint[31];
+ v_f2i.or_5 = |v_f2i.mantissa_uint[30:0];
+
+ v_f2i.zero = v_f2i.or_1 | v_f2i.or_2 | v_f2i.or_3 | v_f2i.or_4 | v_f2i.or_5;
+
+ v_f2i.oor_64u = v_f2i.or_1;
+ v_f2i.oor_64s = v_f2i.or_1;
+ v_f2i.oor_32u = v_f2i.or_1 | v_f2i.or_2 | v_f2i.or_3;
+ v_f2i.oor_32s = v_f2i.or_1 | v_f2i.or_2 | v_f2i.or_3;
+
+ if (v_f2i.sign_cvt) begin
+ if (v_f2i.op == 0) begin
+ v_f2i.oor_32s = v_f2i.oor_32s | (v_f2i.or_4 & v_f2i.or_5);
+ end else if (v_f2i.op == 1) begin
+ v_f2i.oor = v_f2i.oor | v_f2i.zero;
+ end else if (v_f2i.op == 2) begin
+ v_f2i.oor_64s = v_f2i.oor_64s | (v_f2i.or_2 & (v_f2i.or_3 | v_f2i.or_4 | v_f2i.or_5));
+ end else if (v_f2i.op == 3) begin
+ v_f2i.oor = v_f2i.oor | v_f2i.zero;
+ end
+ end else begin
+ v_f2i.oor_64s = v_f2i.oor_64s | v_f2i.or_2;
+ v_f2i.oor_32s = v_f2i.oor_32s | v_f2i.or_4;
+ end
+
+ v_f2i.oor_64u = (v_f2i.op == 3) & (v_f2i.oor_64u | v_f2i.oor | v_f2i.infs | v_f2i.snan | v_f2i.qnan);
+ v_f2i.oor_64s = (v_f2i.op == 2) & (v_f2i.oor_64s | v_f2i.oor | v_f2i.infs | v_f2i.snan | v_f2i.qnan);
+ v_f2i.oor_32u = (v_f2i.op == 1) & (v_f2i.oor_32u | v_f2i.oor | v_f2i.infs | v_f2i.snan | v_f2i.qnan);
+ v_f2i.oor_32s = (v_f2i.op == 0) & (v_f2i.oor_32s | v_f2i.oor | v_f2i.infs | v_f2i.snan | v_f2i.qnan);
+
+ if (v_f2i.sign_cvt) begin
+ v_f2i.mantissa_uint = -v_f2i.mantissa_uint;
+ end
+
+ if (v_f2i.op == 0) begin
+ v_f2i.result = {32'h0, v_f2i.mantissa_uint[31:0]};
+ if (v_f2i.oor_32s) begin
+ v_f2i.result = 64'h0000000080000000;
+ v_f2i.flags = 5'b10000;
+ end
+ end else if (v_f2i.op == 1) begin
+ v_f2i.result = {32'h0, v_f2i.mantissa_uint[31:0]};
+ if (v_f2i.oor_32u) begin
+ v_f2i.result = 64'h00000000FFFFFFFF;
+ v_f2i.flags = 5'b10000;
+ end
+ end else if (v_f2i.op == 2) begin
+ v_f2i.result = v_f2i.mantissa_uint[63:0];
+ if (v_f2i.oor_64s) begin
+ v_f2i.result = 64'h8000000000000000;
+ v_f2i.flags = 5'b10000;
+ end
+ end else if (v_f2i.op == 3) begin
+ v_f2i.result = v_f2i.mantissa_uint[63:0];
+ if (v_f2i.oor_64u) begin
+ v_f2i.result = 64'hFFFFFFFFFFFFFFFF;
+ v_f2i.flags = 5'b10000;
+ end
+ end
+
+ fp_cvt_f2i_o.result = v_f2i.result;
+ fp_cvt_f2i_o.flags = v_f2i.flags;
+
+ end
+
+ always_comb begin
+
+ v_i2f.data = fp_cvt_i2f_i.data;
+ v_i2f.op = fp_cvt_i2f_i.op.fcvt_op;
+ v_i2f.fmt = fp_cvt_i2f_i.fmt;
+ v_i2f.rm = fp_cvt_i2f_i.rm;
+
+ v_i2f.snan = 0;
+ v_i2f.qnan = 0;
+ v_i2f.dbz = 0;
+ v_i2f.infs = 0;
+ v_i2f.zero = 0;
+
+ v_i2f.exponent_bias = 127;
+ if (v_i2f.fmt == 1) begin
+ v_i2f.exponent_bias = 1023;
+ end
+
+ v_i2f.sign_uint = 0;
+ if (v_i2f.op == 0) begin
+ v_i2f.sign_uint = v_i2f.data[31];
+ end else if (v_i2f.op == 2) begin
+ v_i2f.sign_uint = v_i2f.data[63];
+ end
+
+ if (v_i2f.sign_uint) begin
+ v_i2f.data = -v_i2f.data;
+ end
+
+ v_i2f.mantissa_uint = 64'hFFFFFFFFFFFFFFFF;
+ v_i2f.exponent_uint = 0;
+ if (!v_i2f.op[1]) begin
+ v_i2f.mantissa_uint = {v_i2f.data[31:0], 32'h0};
+ v_i2f.exponent_uint = 31;
+ end else if (v_i2f.op[1]) begin
+ v_i2f.mantissa_uint = v_i2f.data[63:0];
+ v_i2f.exponent_uint = 63;
+ end
+
+ v_i2f.zero = ~|v_i2f.mantissa_uint;
+
+ lzc_i.a = v_i2f.mantissa_uint;
+ v_i2f.counter_uint = ~lzc_o.c;
+
+ v_i2f.mantissa_uint = v_i2f.mantissa_uint << v_i2f.counter_uint;
+
+ v_i2f.sign_rnd = v_i2f.sign_uint;
+ v_i2f.exponent_rnd = {8'h0,v_i2f.exponent_uint} + {4'h0,v_i2f.exponent_bias} - {8'h0,v_i2f.counter_uint};
+
+ v_i2f.mantissa_rnd = {30'h0, v_i2f.mantissa_uint[63:40]};
+ v_i2f.grs = {v_i2f.mantissa_uint[39:38], |v_i2f.mantissa_uint[37:0]};
+ if (v_i2f.fmt == 1) begin
+ v_i2f.mantissa_rnd = {1'h0, v_i2f.mantissa_uint[63:11]};
+ v_i2f.grs = {v_i2f.mantissa_uint[10:9], |v_i2f.mantissa_uint[8:0]};
+ end
+
+ fp_cvt_i2f_o.fp_rnd.sig = v_i2f.sign_rnd;
+ fp_cvt_i2f_o.fp_rnd.expo = v_i2f.exponent_rnd;
+ fp_cvt_i2f_o.fp_rnd.mant = v_i2f.mantissa_rnd;
+ fp_cvt_i2f_o.fp_rnd.rema = 2'h0;
+ fp_cvt_i2f_o.fp_rnd.fmt = v_i2f.fmt;
+ fp_cvt_i2f_o.fp_rnd.rm = v_i2f.rm;
+ fp_cvt_i2f_o.fp_rnd.grs = v_i2f.grs;
+ fp_cvt_i2f_o.fp_rnd.snan = v_i2f.snan;
+ fp_cvt_i2f_o.fp_rnd.qnan = v_i2f.qnan;
+ fp_cvt_i2f_o.fp_rnd.dbz = v_i2f.dbz;
+ fp_cvt_i2f_o.fp_rnd.infs = v_i2f.infs;
+ fp_cvt_i2f_o.fp_rnd.zero = v_i2f.zero;
+ fp_cvt_i2f_o.fp_rnd.diff = 1'h0;
+
+ end
+
+endmodule
diff --git a/rtl/fpu/float/fp_exe.sv b/rtl/fpu/float/fp_exe.sv
new file mode 100644
index 0000000..f61038a
--- /dev/null
+++ b/rtl/fpu/float/fp_exe.sv
@@ -0,0 +1,215 @@
+import fp_wire::*;
+
+module fp_exe (
+ input fp_exe_in_type fp_exe_i,
+ output fp_exe_out_type fp_exe_o,
+ input fp_ext_out_type fp_ext1_o,
+ output fp_ext_in_type fp_ext1_i,
+ input fp_ext_out_type fp_ext2_o,
+ output fp_ext_in_type fp_ext2_i,
+ input fp_ext_out_type fp_ext3_o,
+ output fp_ext_in_type fp_ext3_i,
+ input fp_cmp_out_type fp_cmp_o,
+ output fp_cmp_in_type fp_cmp_i,
+ input fp_max_out_type fp_max_o,
+ output fp_max_in_type fp_max_i,
+ input fp_sgnj_out_type fp_sgnj_o,
+ output fp_sgnj_in_type fp_sgnj_i,
+ input fp_cvt_f2f_out_type fp_cvt_f2f_o,
+ output fp_cvt_f2f_in_type fp_cvt_f2f_i,
+ input fp_cvt_f2i_out_type fp_cvt_f2i_o,
+ output fp_cvt_f2i_in_type fp_cvt_f2i_i,
+ input fp_cvt_i2f_out_type fp_cvt_i2f_o,
+ output fp_cvt_i2f_in_type fp_cvt_i2f_i,
+ input fp_fma_out_type fp_fma_o,
+ output fp_fma_in_type fp_fma_i,
+ input fp_fdiv_out_type fp_fdiv_o,
+ output fp_fdiv_in_type fp_fdiv_i,
+ input fp_rnd_out_type fp_rnd_o,
+ output fp_rnd_in_type fp_rnd_i
+);
+
+ logic [63:0] data1;
+ logic [63:0] data2;
+ logic [63:0] data3;
+ fp_operation_type op;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+
+ logic [63:0] result;
+ logic [4:0] flags;
+ logic ready;
+
+ logic [1:0] fmt_ext;
+
+ logic [64:0] extend1;
+ logic [64:0] extend2;
+ logic [64:0] extend3;
+
+ logic [9:0] class1;
+ logic [9:0] class2;
+ logic [9:0] class3;
+
+ fp_rnd_in_type fp_rnd;
+
+ always_comb begin
+
+ if (fp_exe_i.enable) begin
+ data1 = fp_exe_i.data1;
+ data2 = fp_exe_i.data2;
+ data3 = fp_exe_i.data3;
+ op = fp_exe_i.op;
+ fmt = fp_exe_i.fmt;
+ rm = fp_exe_i.rm;
+ end else begin
+ data1 = 0;
+ data2 = 0;
+ data3 = 0;
+ op = 0;
+ fmt = 0;
+ rm = 0;
+ end
+
+ result = 0;
+ flags = 0;
+ ready = fp_exe_i.enable;
+
+ if (op.fcvt_f2f) begin
+ fmt_ext = fp_exe_i.op.fcvt_op;
+ end else begin
+ fmt_ext = fp_exe_i.fmt;
+ end
+
+ fp_ext1_i.data = data1;
+ fp_ext1_i.fmt = fmt_ext;
+ fp_ext2_i.data = data2;
+ fp_ext2_i.fmt = fmt_ext;
+ fp_ext3_i.data = data3;
+ fp_ext3_i.fmt = fmt_ext;
+
+ extend1 = fp_ext1_o.result;
+ extend2 = fp_ext2_o.result;
+ extend3 = fp_ext3_o.result;
+
+ class2 = fp_ext2_o.classification;
+ class1 = fp_ext1_o.classification;
+ class3 = fp_ext3_o.classification;
+
+ fp_cmp_i.data1 = extend1;
+ fp_cmp_i.data2 = extend2;
+ fp_cmp_i.rm = rm;
+ fp_cmp_i.class1 = class1;
+ fp_cmp_i.class2 = class2;
+
+ fp_max_i.data1 = data1;
+ fp_max_i.data2 = data2;
+ fp_max_i.ext1 = extend1;
+ fp_max_i.ext2 = extend2;
+ fp_max_i.fmt = fmt;
+ fp_max_i.rm = rm;
+ fp_max_i.class1 = class1;
+ fp_max_i.class2 = class2;
+
+ fp_sgnj_i.data1 = data1;
+ fp_sgnj_i.data2 = data2;
+ fp_sgnj_i.fmt = fmt;
+ fp_sgnj_i.rm = rm;
+
+ fp_fma_i.data1 = extend1;
+ fp_fma_i.data2 = extend2;
+ fp_fma_i.data3 = extend3;
+ fp_fma_i.fmt = fmt;
+ fp_fma_i.rm = rm;
+ fp_fma_i.op = op;
+ fp_fma_i.class1 = class1;
+ fp_fma_i.class2 = class2;
+ fp_fma_i.class3 = class3;
+
+ fp_fdiv_i.data1 = extend1;
+ fp_fdiv_i.data2 = extend2;
+ fp_fdiv_i.fmt = fmt;
+ fp_fdiv_i.rm = rm;
+ fp_fdiv_i.op = op;
+ fp_fdiv_i.class1 = class1;
+ fp_fdiv_i.class2 = class2;
+
+ fp_cvt_i2f_i.data = data1;
+ fp_cvt_i2f_i.op = op;
+ fp_cvt_i2f_i.fmt = fmt;
+ fp_cvt_i2f_i.rm = rm;
+
+ fp_cvt_f2f_i.data = extend1;
+ fp_cvt_f2f_i.fmt = fmt;
+ fp_cvt_f2f_i.rm = rm;
+ fp_cvt_f2f_i.classification = class1;
+
+ fp_cvt_f2i_i.data = extend1;
+ fp_cvt_f2i_i.op = op;
+ fp_cvt_f2i_i.rm = rm;
+ fp_cvt_f2i_i.classification = class1;
+
+ fp_rnd = init_fp_rnd_in;
+
+ if (fp_fma_o.ready) begin
+ fp_rnd = fp_fma_o.fp_rnd;
+ end else if (fp_fdiv_o.ready) begin
+ fp_rnd = fp_fdiv_o.fp_rnd;
+ end else if (op.fcvt_f2f) begin
+ fp_rnd = fp_cvt_f2f_o.fp_rnd;
+ end else if (op.fcvt_i2f) begin
+ fp_rnd = fp_cvt_i2f_o.fp_rnd;
+ end
+
+ fp_rnd_i = fp_rnd;
+
+ if (fp_fma_o.ready) begin
+ result = fp_rnd_o.result;
+ flags = fp_rnd_o.flags;
+ ready = 1;
+ end else if (fp_fdiv_o.ready) begin
+ result = fp_rnd_o.result;
+ flags = fp_rnd_o.flags;
+ ready = 1;
+ end else if (op.fmadd | op.fmsub | op.fnmadd | op.fnmsub | op.fadd | op.fadd | op.fsub | op.fmul) begin
+ ready = 0;
+ end else if (op.fdiv | op.fsqrt) begin
+ ready = 0;
+ end else if (op.fcmp) begin
+ result = fp_cmp_o.result;
+ flags = fp_cmp_o.flags;
+ end else if (op.fsgnj) begin
+ result = fp_sgnj_o.result;
+ flags = 0;
+ end else if (op.fmax) begin
+ result = fp_max_o.result;
+ flags = fp_max_o.flags;
+ end else if (op.fcmp) begin
+ result = fp_cmp_o.result;
+ flags = fp_cmp_o.flags;
+ end else if (op.fclass) begin
+ result = {54'h0, class1};
+ flags = 0;
+ end else if (op.fmv_f2i) begin
+ result = data1;
+ flags = 0;
+ end else if (op.fmv_i2f) begin
+ result = data1;
+ flags = 0;
+ end else if (op.fcvt_f2f) begin
+ result = fp_rnd_o.result;
+ flags = fp_rnd_o.flags;
+ end else if (op.fcvt_i2f) begin
+ result = fp_rnd_o.result;
+ flags = fp_rnd_o.flags;
+ end else if (op.fcvt_f2i) begin
+ result = fp_cvt_f2i_o.result;
+ flags = fp_cvt_f2i_o.flags;
+ end
+
+ fp_exe_o.result = result;
+ fp_exe_o.flags = flags;
+ fp_exe_o.ready = ready;
+
+ end
+
+endmodule
diff --git a/rtl/fpu/float/fp_ext.sv b/rtl/fpu/float/fp_ext.sv
new file mode 100644
index 0000000..5a825a2
--- /dev/null
+++ b/rtl/fpu/float/fp_ext.sv
@@ -0,0 +1,122 @@
+import lzc_wire::*;
+import fp_wire::*;
+
+module fp_ext (
+ input fp_ext_in_type fp_ext_i,
+ output fp_ext_out_type fp_ext_o,
+ input lzc_64_out_type lzc_o,
+ output lzc_64_in_type lzc_i
+);
+
+ logic [63:0] data;
+ logic [1:0] fmt;
+
+ logic [63:0] mantissa;
+ logic [64:0] result;
+ logic [9:0] classification;
+ logic [5:0] counter;
+ logic mantissa_zero;
+ logic exponent_zero;
+ logic exponent_ones;
+
+ always_comb begin
+
+ data = fp_ext_i.data;
+ fmt = fp_ext_i.fmt;
+
+ mantissa = 64'hFFFFFFFFFFFFFFFF;
+ counter = 0;
+
+ result = 0;
+ classification = 0;
+
+ mantissa_zero = 0;
+ exponent_zero = 0;
+ exponent_ones = 0;
+
+ if (fmt == 0) begin
+ mantissa = {1'h0, data[22:0], 40'hFFFFFFFFFF};
+ exponent_zero = ~|data[30:23];
+ exponent_ones = &data[30:23];
+ mantissa_zero = ~|data[22:0];
+ end else begin
+ mantissa = {1'h0, data[51:0], 11'h7FF};
+ exponent_zero = ~|data[62:52];
+ exponent_ones = &data[62:52];
+ mantissa_zero = ~|data[51:0];
+ end
+
+ lzc_i.a = mantissa;
+ counter = ~lzc_o.c;
+
+ if (fmt == 0) begin
+ result[64] = data[31];
+ if (&data[30:23]) begin
+ result[63:52] = 12'hFFF;
+ result[51:29] = data[22:0];
+ end else if (|data[30:23]) begin
+ result[63:52] = {4'h0, data[30:23]} + 12'h780;
+ result[51:29] = data[22:0];
+ end else if (counter < 24) begin
+ result[63:52] = 12'h781 - {6'h0, counter};
+ result[51:29] = (data[22:0] << counter);
+ end
+ result[28:0] = 0;
+ end else if (fmt == 1) begin
+ result[64] = data[63];
+ if (&data[62:52]) begin
+ result[63:52] = 12'hFFF;
+ result[51:0] = data[51:0];
+ end else if (|data[62:52]) begin
+ result[63:52] = {1'h0, data[62:52]} + 12'h400;
+ result[51:0] = data[51:0];
+ end else if (counter < 53) begin
+ result[63:52] = 12'h401 - {6'h0, counter};
+ result[51:0] = (data[51:0] << counter);
+ end
+ end
+
+ if (result[64]) begin
+ if (exponent_ones) begin
+ if (mantissa_zero) begin
+ classification[0] = 1;
+ end else if (result[51] == 0) begin
+ classification[8] = 1;
+ end else begin
+ classification[9] = 1;
+ end
+ end else if (exponent_zero) begin
+ if (mantissa_zero == 1) begin
+ classification[3] = 1;
+ end else begin
+ classification[2] = 1;
+ end
+ end else begin
+ classification[1] = 1;
+ end
+ end else begin
+ if (exponent_ones) begin
+ if (mantissa_zero) begin
+ classification[7] = 1;
+ end else if (result[51] == 0) begin
+ classification[8] = 1;
+ end else begin
+ classification[9] = 1;
+ end
+ end else if (exponent_zero) begin
+ if (mantissa_zero == 1) begin
+ classification[4] = 1;
+ end else begin
+ classification[5] = 1;
+ end
+ end else begin
+ classification[6] = 1;
+ end
+ end
+
+ fp_ext_o.result = result;
+ fp_ext_o.classification = classification;
+
+ end
+
+endmodule
diff --git a/rtl/fpu/float/fp_fdiv.sv b/rtl/fpu/float/fp_fdiv.sv
new file mode 100644
index 0000000..a275d19
--- /dev/null
+++ b/rtl/fpu/float/fp_fdiv.sv
@@ -0,0 +1,822 @@
+import fp_wire::*;
+
+module fp_fdiv #(
+ parameter PERFORMANCE = 0
+) (
+ input reset,
+ input clock,
+ input fp_fdiv_in_type fp_fdiv_i,
+ output fp_fdiv_out_type fp_fdiv_o,
+ input fp_mac_out_type fp_mac_o,
+ output fp_mac_in_type fp_mac_i
+);
+
+ fp_fdiv_reg_functional_type r;
+ fp_fdiv_reg_functional_type rin;
+
+ fp_fdiv_reg_functional_type v;
+
+ fp_fdiv_reg_fixed_type r_fix;
+ fp_fdiv_reg_fixed_type rin_fix;
+
+ fp_fdiv_reg_fixed_type v_fix;
+
+ localparam logic [7:0] reciprocal_lut[0:127] = '{
+ 8'b00000000,
+ 8'b11111110,
+ 8'b11111100,
+ 8'b11111010,
+ 8'b11111000,
+ 8'b11110110,
+ 8'b11110100,
+ 8'b11110010,
+ 8'b11110000,
+ 8'b11101111,
+ 8'b11101101,
+ 8'b11101011,
+ 8'b11101010,
+ 8'b11101000,
+ 8'b11100110,
+ 8'b11100101,
+ 8'b11100011,
+ 8'b11100001,
+ 8'b11100000,
+ 8'b11011110,
+ 8'b11011101,
+ 8'b11011011,
+ 8'b11011010,
+ 8'b11011001,
+ 8'b11010111,
+ 8'b11010110,
+ 8'b11010100,
+ 8'b11010011,
+ 8'b11010010,
+ 8'b11010000,
+ 8'b11001111,
+ 8'b11001110,
+ 8'b11001100,
+ 8'b11001011,
+ 8'b11001010,
+ 8'b11001001,
+ 8'b11000111,
+ 8'b11000110,
+ 8'b11000101,
+ 8'b11000100,
+ 8'b11000011,
+ 8'b11000001,
+ 8'b11000000,
+ 8'b10111111,
+ 8'b10111110,
+ 8'b10111101,
+ 8'b10111100,
+ 8'b10111011,
+ 8'b10111010,
+ 8'b10111001,
+ 8'b10111000,
+ 8'b10110111,
+ 8'b10110110,
+ 8'b10110101,
+ 8'b10110100,
+ 8'b10110011,
+ 8'b10110010,
+ 8'b10110001,
+ 8'b10110000,
+ 8'b10101111,
+ 8'b10101110,
+ 8'b10101101,
+ 8'b10101100,
+ 8'b10101011,
+ 8'b10101010,
+ 8'b10101001,
+ 8'b10101000,
+ 8'b10101000,
+ 8'b10100111,
+ 8'b10100110,
+ 8'b10100101,
+ 8'b10100100,
+ 8'b10100011,
+ 8'b10100011,
+ 8'b10100010,
+ 8'b10100001,
+ 8'b10100000,
+ 8'b10011111,
+ 8'b10011111,
+ 8'b10011110,
+ 8'b10011101,
+ 8'b10011100,
+ 8'b10011100,
+ 8'b10011011,
+ 8'b10011010,
+ 8'b10011001,
+ 8'b10011001,
+ 8'b10011000,
+ 8'b10010111,
+ 8'b10010111,
+ 8'b10010110,
+ 8'b10010101,
+ 8'b10010100,
+ 8'b10010100,
+ 8'b10010011,
+ 8'b10010010,
+ 8'b10010010,
+ 8'b10010001,
+ 8'b10010000,
+ 8'b10010000,
+ 8'b10001111,
+ 8'b10001111,
+ 8'b10001110,
+ 8'b10001101,
+ 8'b10001101,
+ 8'b10001100,
+ 8'b10001100,
+ 8'b10001011,
+ 8'b10001010,
+ 8'b10001010,
+ 8'b10001001,
+ 8'b10001001,
+ 8'b10001000,
+ 8'b10000111,
+ 8'b10000111,
+ 8'b10000110,
+ 8'b10000110,
+ 8'b10000101,
+ 8'b10000101,
+ 8'b10000100,
+ 8'b10000100,
+ 8'b10000011,
+ 8'b10000011,
+ 8'b10000010,
+ 8'b10000010,
+ 8'b10000001,
+ 8'b10000001,
+ 8'b10000000
+ };
+
+ localparam logic [7:0] reciprocal_root_lut[0:95] = '{
+ 8'b10110101,
+ 8'b10110010,
+ 8'b10101111,
+ 8'b10101101,
+ 8'b10101010,
+ 8'b10101000,
+ 8'b10100110,
+ 8'b10100011,
+ 8'b10100001,
+ 8'b10011111,
+ 8'b10011110,
+ 8'b10011100,
+ 8'b10011010,
+ 8'b10011000,
+ 8'b10010110,
+ 8'b10010101,
+ 8'b10010011,
+ 8'b10010010,
+ 8'b10010000,
+ 8'b10001111,
+ 8'b10001110,
+ 8'b10001100,
+ 8'b10001011,
+ 8'b10001010,
+ 8'b10001000,
+ 8'b10000111,
+ 8'b10000110,
+ 8'b10000101,
+ 8'b10000100,
+ 8'b10000011,
+ 8'b10000010,
+ 8'b10000001,
+ 8'b10000000,
+ 8'b01111111,
+ 8'b01111110,
+ 8'b01111101,
+ 8'b01111100,
+ 8'b01111011,
+ 8'b01111010,
+ 8'b01111001,
+ 8'b01111000,
+ 8'b01110111,
+ 8'b01110111,
+ 8'b01110110,
+ 8'b01110101,
+ 8'b01110100,
+ 8'b01110011,
+ 8'b01110011,
+ 8'b01110010,
+ 8'b01110001,
+ 8'b01110001,
+ 8'b01110000,
+ 8'b01101111,
+ 8'b01101111,
+ 8'b01101110,
+ 8'b01101101,
+ 8'b01101101,
+ 8'b01101100,
+ 8'b01101011,
+ 8'b01101011,
+ 8'b01101010,
+ 8'b01101010,
+ 8'b01101001,
+ 8'b01101001,
+ 8'b01101000,
+ 8'b01100111,
+ 8'b01100111,
+ 8'b01100110,
+ 8'b01100110,
+ 8'b01100101,
+ 8'b01100101,
+ 8'b01100100,
+ 8'b01100100,
+ 8'b01100011,
+ 8'b01100011,
+ 8'b01100010,
+ 8'b01100010,
+ 8'b01100010,
+ 8'b01100001,
+ 8'b01100001,
+ 8'b01100000,
+ 8'b01100000,
+ 8'b01011111,
+ 8'b01011111,
+ 8'b01011111,
+ 8'b01011110,
+ 8'b01011110,
+ 8'b01011101,
+ 8'b01011101,
+ 8'b01011101,
+ 8'b01011100,
+ 8'b01011100,
+ 8'b01011011,
+ 8'b01011011,
+ 8'b01011011,
+ 8'b01011010
+ };
+
+ generate
+
+ if (PERFORMANCE == 1) begin
+
+ always_comb begin
+
+ v = r;
+
+ if (r.state == 0) begin
+ if (fp_fdiv_i.op.fdiv) begin
+ v.state = 1;
+ end
+ if (fp_fdiv_i.op.fsqrt) begin
+ v.state = 2;
+ end
+ v.istate = 0;
+ v.ready = 0;
+ end else if (r.state == 1) begin
+ if (v.istate == 10) begin
+ v.state = 3;
+ end
+ v.istate = v.istate + 6'd1;
+ v.ready = 0;
+ end else if (r.state == 2) begin
+ if (v.istate == 13) begin
+ v.state = 3;
+ end
+ v.istate = v.istate + 6'd1;
+ v.ready = 0;
+ end else if (r.state == 3) begin
+ v.state = 4;
+ v.ready = 0;
+ end else begin
+ v.state = 0;
+ v.ready = 1;
+ end
+
+ if (r.state == 0) begin
+ v.a = fp_fdiv_i.data1;
+ v.b = fp_fdiv_i.data2;
+ v.class_a = fp_fdiv_i.class1;
+ v.class_b = fp_fdiv_i.class2;
+ v.fmt = fp_fdiv_i.fmt;
+ v.rm = fp_fdiv_i.rm;
+ v.snan = 0;
+ v.qnan = 0;
+ v.dbz = 0;
+ v.infs = 0;
+ v.zero = 0;
+
+ if (fp_fdiv_i.op.fsqrt) begin
+ v.b = 65'h07FF0000000000000;
+ v.class_b = 0;
+ end
+
+ if (v.class_a[8] | v.class_b[8]) begin
+ v.snan = 1;
+ end else if ((v.class_a[3] | v.class_a[4]) & (v.class_b[3] | v.class_b[4])) begin
+ v.snan = 1;
+ end else if ((v.class_a[0] | v.class_a[7]) & (v.class_b[0] | v.class_b[7])) begin
+ v.snan = 1;
+ end else if (v.class_a[9] | v.class_b[9]) begin
+ v.qnan = 1;
+ end
+
+ if ((v.class_a[0] | v.class_a[7]) & (v.class_b[1] | v.class_b[2] | v.class_b[3] | v.class_b[4] | v.class_b[5] | v.class_b[6])) begin
+ v.infs = 1;
+ end else if ((v.class_b[3] | v.class_b[4]) & (v.class_a[1] | v.class_a[2] | v.class_a[5] | v.class_a[6])) begin
+ v.dbz = 1;
+ end
+
+ if ((v.class_a[3] | v.class_a[4]) | (v.class_b[0] | v.class_b[7])) begin
+ v.zero = 1;
+ end
+
+ if (fp_fdiv_i.op.fsqrt) begin
+ if (v.class_a[7]) begin
+ v.infs = 1;
+ end
+ if (v.class_a[0] | v.class_a[1] | v.class_a[2]) begin
+ v.snan = 1;
+ end
+ end
+
+ v.qa = {2'h1, v.a[51:0], 2'h0};
+ v.qb = {2'h1, v.b[51:0], 2'h0};
+
+ v.sign_fdiv = v.a[64] ^ v.b[64];
+ v.exponent_fdiv = {2'h0, v.a[63:52]} - {2'h0, v.b[63:52]};
+ v.y = {1'h0, ~|v.b[51:45], reciprocal_lut[$unsigned(v.b[51:45])], 46'h0};
+ v.op = 0;
+
+ if (fp_fdiv_i.op.fsqrt) begin
+ v.qa = {2'h1, v.a[51:0], 2'h0};
+ if (!v.a[52]) begin
+ v.qa = v.qa >> 1;
+ end
+ v.index = $unsigned(v.qa[54:48]) - 7'd32;
+ v.exponent_fdiv = ($signed({2'h0, v.a[63:52]}) + $signed(-14'd2045)) >>> 1;
+ v.y = {1'h0, reciprocal_root_lut[v.index], 47'h0};
+ v.op = 1;
+ end
+
+ fp_mac_i.a = 0;
+ fp_mac_i.b = 0;
+ fp_mac_i.c = 0;
+ fp_mac_i.op = 0;
+ end else if (r.state == 1) begin
+ if (r.istate == 0) begin
+ fp_mac_i.a = 56'h40000000000000;
+ fp_mac_i.b = v.qb;
+ fp_mac_i.c = v.y;
+ fp_mac_i.op = 1;
+ v.e0 = fp_mac_o.d[109:54];
+ end else if (r.istate == 1) begin
+ fp_mac_i.a = v.y;
+ fp_mac_i.b = v.y;
+ fp_mac_i.c = v.e0;
+ fp_mac_i.op = 0;
+ v.y0 = fp_mac_o.d[109:54];
+ end else if (r.istate == 2) begin
+ fp_mac_i.a = 56'h0;
+ fp_mac_i.b = v.e0;
+ fp_mac_i.c = v.e0;
+ fp_mac_i.op = 0;
+ v.e1 = fp_mac_o.d[109:54];
+ end else if (r.istate == 3) begin
+ fp_mac_i.a = v.y0;
+ fp_mac_i.b = v.y0;
+ fp_mac_i.c = v.e1;
+ fp_mac_i.op = 0;
+ v.y1 = fp_mac_o.d[109:54];
+ end else if (r.istate == 4) begin
+ fp_mac_i.a = 56'h0;
+ fp_mac_i.b = v.e1;
+ fp_mac_i.c = v.e1;
+ fp_mac_i.op = 0;
+ v.e2 = fp_mac_o.d[109:54];
+ end else if (r.istate == 5) begin
+ fp_mac_i.a = v.y1;
+ fp_mac_i.b = v.y1;
+ fp_mac_i.c = v.e2;
+ fp_mac_i.op = 0;
+ v.y2 = fp_mac_o.d[109:54];
+ end else if (r.istate == 6) begin
+ fp_mac_i.a = 56'h0;
+ fp_mac_i.b = v.qa;
+ fp_mac_i.c = v.y2;
+ fp_mac_i.op = 0;
+ v.q0 = fp_mac_o.d[109:54];
+ end else if (r.istate == 7) begin
+ fp_mac_i.a = v.qa;
+ fp_mac_i.b = v.qb;
+ fp_mac_i.c = v.q0;
+ fp_mac_i.op = 1;
+ v.r0 = fp_mac_o.d;
+ end else if (r.istate == 8) begin
+ fp_mac_i.a = v.q0;
+ fp_mac_i.b = v.r0[109:54];
+ fp_mac_i.c = v.y2;
+ fp_mac_i.op = 0;
+ v.q0 = fp_mac_o.d[109:54];
+ end else if (r.istate == 9) begin
+ fp_mac_i.a = v.qa;
+ fp_mac_i.b = v.qb;
+ fp_mac_i.c = v.q0;
+ fp_mac_i.op = 1;
+ v.r1 = fp_mac_o.d;
+ v.q1 = v.q0;
+ if ($signed(v.r1[109:54]) > 0) begin
+ v.q1 = v.q1 + 1;
+ end
+ end else if (r.istate == 10) begin
+ fp_mac_i.a = v.qa;
+ fp_mac_i.b = v.qb;
+ fp_mac_i.c = v.q1;
+ fp_mac_i.op = 1;
+ v.r0 = fp_mac_o.d;
+ if (v.r0[109:54] == 0) begin
+ v.q0 = v.q1;
+ v.r1 = v.r0;
+ end
+ end else begin
+ fp_mac_i.a = 0;
+ fp_mac_i.b = 0;
+ fp_mac_i.c = 0;
+ fp_mac_i.op = 0;
+ end
+ end else if (r.state == 2) begin
+ if (r.istate == 0) begin
+ fp_mac_i.a = 56'h0;
+ fp_mac_i.b = v.qa;
+ fp_mac_i.c = v.y;
+ fp_mac_i.op = 0;
+ v.y0 = fp_mac_o.d[109:54];
+ end else if (r.istate == 1) begin
+ fp_mac_i.a = 56'h0;
+ fp_mac_i.b = 56'h20000000000000;
+ fp_mac_i.c = v.y;
+ fp_mac_i.op = 0;
+ v.h0 = fp_mac_o.d[109:54];
+ end else if (r.istate == 2) begin
+ fp_mac_i.a = 56'h20000000000000;
+ fp_mac_i.b = v.h0;
+ fp_mac_i.c = v.y0;
+ fp_mac_i.op = 1;
+ v.e0 = fp_mac_o.d[109:54];
+ end else if (r.istate == 3) begin
+ fp_mac_i.a = v.y0;
+ fp_mac_i.b = v.y0;
+ fp_mac_i.c = v.e0;
+ fp_mac_i.op = 0;
+ v.y1 = fp_mac_o.d[109:54];
+ end else if (r.istate == 4) begin
+ fp_mac_i.a = v.h0;
+ fp_mac_i.b = v.h0;
+ fp_mac_i.c = v.e0;
+ fp_mac_i.op = 0;
+ v.h1 = fp_mac_o.d[109:54];
+ end else if (r.istate == 5) begin
+ fp_mac_i.a = 56'h20000000000000;
+ fp_mac_i.b = v.h1;
+ fp_mac_i.c = v.y1;
+ fp_mac_i.op = 1;
+ v.e1 = fp_mac_o.d[109:54];
+ end else if (r.istate == 6) begin
+ fp_mac_i.a = v.y1;
+ fp_mac_i.b = v.y1;
+ fp_mac_i.c = v.e1;
+ fp_mac_i.op = 0;
+ v.y2 = fp_mac_o.d[109:54];
+ end else if (r.istate == 7) begin
+ fp_mac_i.a = v.h1;
+ fp_mac_i.b = v.h1;
+ fp_mac_i.c = v.e1;
+ fp_mac_i.op = 0;
+ v.h2 = fp_mac_o.d[109:54];
+ end else if (r.istate == 8) begin
+ fp_mac_i.a = v.qa;
+ fp_mac_i.b = v.y2;
+ fp_mac_i.c = v.y2;
+ fp_mac_i.op = 1;
+ v.r0 = fp_mac_o.d;
+ end else if (r.istate == 9) begin
+ fp_mac_i.a = v.y2;
+ fp_mac_i.b = v.h2;
+ fp_mac_i.c = v.r0[109:54];
+ fp_mac_i.op = 0;
+ v.y3 = fp_mac_o.d[109:54];
+ end else if (r.istate == 10) begin
+ fp_mac_i.a = v.qa;
+ fp_mac_i.b = v.y3;
+ fp_mac_i.c = v.y3;
+ fp_mac_i.op = 1;
+ v.r0 = fp_mac_o.d;
+ end else if (r.istate == 11) begin
+ fp_mac_i.a = v.y3;
+ fp_mac_i.b = v.h2;
+ fp_mac_i.c = v.r0[109:54];
+ fp_mac_i.op = 0;
+ v.q0 = fp_mac_o.d[109:54];
+ end else if (r.istate == 12) begin
+ fp_mac_i.a = v.qa;
+ fp_mac_i.b = v.q0;
+ fp_mac_i.c = v.q0;
+ fp_mac_i.op = 1;
+ v.r1 = fp_mac_o.d;
+ v.q1 = v.q0;
+ if ($signed(v.r1[109:54]) > 0) begin
+ v.q1 = v.q1 + 1;
+ end
+ end else if (r.istate == 13) begin
+ fp_mac_i.a = v.qa;
+ fp_mac_i.b = v.q1;
+ fp_mac_i.c = v.q1;
+ fp_mac_i.op = 1;
+ v.r0 = fp_mac_o.d;
+ if (v.r0[109:54] == 0) begin
+ v.q0 = v.q1;
+ v.r1 = v.r0;
+ end
+ end else begin
+ fp_mac_i.a = 0;
+ fp_mac_i.b = 0;
+ fp_mac_i.c = 0;
+ fp_mac_i.op = 0;
+ end
+ end else if (r.state == 3) begin
+ fp_mac_i.a = 0;
+ fp_mac_i.b = 0;
+ fp_mac_i.c = 0;
+ fp_mac_i.op = 0;
+
+ v.mantissa_fdiv = {v.q0[54:0], 59'h0};
+
+ v.remainder_rnd = 2;
+ if ($signed(v.r1) > 0) begin
+ v.remainder_rnd = 1;
+ end else if (v.r1 == 0) begin
+ v.remainder_rnd = 0;
+ end
+
+ v.counter_fdiv = 0;
+ if (v.mantissa_fdiv[113] == 0) begin
+ v.mantissa_fdiv = {v.mantissa_fdiv[112:0], 1'h0};
+ v.counter_fdiv = 1;
+ end
+ if (v.op == 1) begin
+ v.counter_fdiv = 1;
+ if (v.mantissa_fdiv[113] == 0) begin
+ v.mantissa_fdiv = {v.mantissa_fdiv[112:0], 1'h0};
+ v.counter_fdiv = 0;
+ end
+ end
+
+ v.exponent_bias = 127;
+ if (v.fmt == 1) begin
+ v.exponent_bias = 1023;
+ end
+
+ v.sign_rnd = v.sign_fdiv;
+ v.exponent_rnd = v.exponent_fdiv + {3'h0, v.exponent_bias} - {12'h0, v.counter_fdiv};
+
+ v.counter_rnd = 0;
+ if ($signed(v.exponent_rnd) <= 0) begin
+ v.counter_rnd = 54;
+ if ($signed(v.exponent_rnd) > -54) begin
+ v.counter_rnd = 14'h1 - v.exponent_rnd;
+ end
+ v.exponent_rnd = 0;
+ end
+
+ v.mantissa_fdiv = v.mantissa_fdiv >> v.counter_rnd[5:0];
+
+ v.mantissa_rnd = {30'h0, v.mantissa_fdiv[113:90]};
+ v.grs = {v.mantissa_fdiv[89:88], |v.mantissa_fdiv[87:0]};
+ if (v.fmt == 1) begin
+ v.mantissa_rnd = {1'h0, v.mantissa_fdiv[113:61]};
+ v.grs = {v.mantissa_fdiv[60:59], |v.mantissa_fdiv[58:0]};
+ end
+
+ end else begin
+ fp_mac_i.a = 0;
+ fp_mac_i.b = 0;
+ fp_mac_i.c = 0;
+ fp_mac_i.op = 0;
+
+ end
+
+ fp_fdiv_o.fp_rnd.sig = v.sign_rnd;
+ fp_fdiv_o.fp_rnd.expo = v.exponent_rnd;
+ fp_fdiv_o.fp_rnd.mant = v.mantissa_rnd;
+ fp_fdiv_o.fp_rnd.rema = v.remainder_rnd;
+ fp_fdiv_o.fp_rnd.fmt = v.fmt;
+ fp_fdiv_o.fp_rnd.rm = v.rm;
+ fp_fdiv_o.fp_rnd.grs = v.grs;
+ fp_fdiv_o.fp_rnd.snan = v.snan;
+ fp_fdiv_o.fp_rnd.qnan = v.qnan;
+ fp_fdiv_o.fp_rnd.dbz = v.dbz;
+ fp_fdiv_o.fp_rnd.infs = v.infs;
+ fp_fdiv_o.fp_rnd.zero = v.zero;
+ fp_fdiv_o.fp_rnd.diff = 1'h0;
+ fp_fdiv_o.ready = v.ready;
+
+ rin = v;
+
+ end
+
+ always_ff @(posedge clock) begin
+ if (reset == 0) begin
+ r <= init_fp_fdiv_reg_functional;
+ end else begin
+ r <= rin;
+ end
+ end
+
+ end
+
+ if (PERFORMANCE == 0) begin
+
+ assign fp_mac_i.a = 0;
+ assign fp_mac_i.b = 0;
+ assign fp_mac_i.c = 0;
+ assign fp_mac_i.op = 0;
+
+ always_comb begin
+
+ v_fix = r_fix;
+
+ if (r_fix.state == 0) begin
+ if (fp_fdiv_i.op.fdiv) begin
+ v_fix.state = 1;
+ v_fix.istate = 54;
+ end
+ if (fp_fdiv_i.op.fsqrt) begin
+ v_fix.state = 1;
+ v_fix.istate = 53;
+ end
+ v_fix.ready = 0;
+ end else if (r_fix.state == 1) begin
+ if (v_fix.fmt == 0 & v_fix.istate == 29) begin
+ v_fix.state = 2;
+ end else if (v_fix.istate == 0) begin
+ v_fix.state = 2;
+ end else begin
+ v_fix.istate = v_fix.istate - 6'd1;
+ end
+ v_fix.ready = 0;
+ end else if (r_fix.state == 2) begin
+ v_fix.state = 3;
+ v_fix.ready = 0;
+ end else begin
+ v_fix.state = 0;
+ v_fix.ready = 1;
+ end
+
+ if (r_fix.state == 0) begin
+
+ v_fix.a = fp_fdiv_i.data1;
+ v_fix.b = fp_fdiv_i.data2;
+ v_fix.class_a = fp_fdiv_i.class1;
+ v_fix.class_b = fp_fdiv_i.class2;
+ v_fix.fmt = fp_fdiv_i.fmt;
+ v_fix.rm = fp_fdiv_i.rm;
+ v_fix.snan = 0;
+ v_fix.qnan = 0;
+ v_fix.dbz = 0;
+ v_fix.infs = 0;
+ v_fix.zero = 0;
+
+ if (fp_fdiv_i.op.fsqrt) begin
+ v_fix.b = 65'h07FF0000000000000;
+ v_fix.class_b = 0;
+ end
+
+ if (v_fix.class_a[8] | v_fix.class_b[8]) begin
+ v_fix.snan = 1;
+ end else if ((v_fix.class_a[3] | v_fix.class_a[4]) & (v_fix.class_b[3] | v_fix.class_b[4])) begin
+ v_fix.snan = 1;
+ end else if ((v_fix.class_a[0] | v_fix.class_a[7]) & (v_fix.class_b[0] | v_fix.class_b[7])) begin
+ v_fix.snan = 1;
+ end else if (v_fix.class_a[9] | v_fix.class_b[9]) begin
+ v_fix.qnan = 1;
+ end
+
+ if ((v_fix.class_a[0] | v_fix.class_a[7]) & (v_fix.class_b[1] | v_fix.class_b[2] | v_fix.class_b[3] | v_fix.class_b[4] | v_fix.class_b[5] | v_fix.class_b[6])) begin
+ v_fix.infs = 1;
+ end else if ((v_fix.class_b[3] | v_fix.class_b[4]) & (v_fix.class_a[1] | v_fix.class_a[2] | v_fix.class_a[5] | v_fix.class_a[6])) begin
+ v_fix.dbz = 1;
+ end
+
+ if ((v_fix.class_a[3] | v_fix.class_a[4]) | (v_fix.class_b[0] | v_fix.class_b[7])) begin
+ v_fix.zero = 1;
+ end
+
+ if (fp_fdiv_i.op.fsqrt) begin
+ if (v_fix.class_a[7]) begin
+ v_fix.infs = 1;
+ end
+ if (v_fix.class_a[0] | v_fix.class_a[1] | v_fix.class_a[2]) begin
+ v_fix.snan = 1;
+ end
+ end
+
+ v_fix.sign_fdiv = v_fix.a[64] ^ v_fix.b[64];
+
+ v_fix.exponent_fdiv = {2'h0, v_fix.a[63:52]} - {2'h0, v_fix.b[63:52]};
+ if (fp_fdiv_i.op.fsqrt) begin
+ v_fix.exponent_fdiv = ($signed({2'h0, v_fix.a[63:52]}) + $signed(-14'd2045)) >>> 1;
+ end
+
+ v_fix.q = 0;
+
+ v_fix.m = {4'h1, v_fix.b[51:0], 1'h0};
+ v_fix.r = {5'h1, v_fix.a[51:0]};
+ v_fix.op = 0;
+ if (fp_fdiv_i.op.fsqrt) begin
+ v_fix.m = 0;
+ if (v_fix.a[52] == 0) begin
+ v_fix.r = {v_fix.r[55:0], 1'h0};
+ end
+ v_fix.op = 1;
+ end
+
+ end else if (r_fix.state == 1) begin
+
+ if (v_fix.op == 1) begin
+ v_fix.m = {1'h0, v_fix.q, 1'h0};
+ v_fix.m[r_fix.istate] = 1;
+ end
+ v_fix.r = {v_fix.r[55:0], 1'h0};
+ v_fix.e = $signed(v_fix.r) - $signed(v_fix.m);
+ if (v_fix.e[56] == 0) begin
+ v_fix.q[r_fix.istate] = 1;
+ v_fix.r = v_fix.e;
+ end
+
+ end else if (r_fix.state == 2) begin
+
+ v_fix.mantissa_fdiv = {v_fix.q, v_fix.r[55:0], 54'h0};
+
+ v_fix.counter_fdiv = 0;
+ if (v_fix.mantissa_fdiv[164] == 0) begin
+ v_fix.counter_fdiv = 1;
+ end
+
+ v_fix.mantissa_fdiv = v_fix.mantissa_fdiv << v_fix.counter_fdiv;
+
+ v_fix.sign_rnd = v_fix.sign_fdiv;
+
+ v_fix.exponent_bias = 127;
+ if (v_fix.fmt == 1) begin
+ v_fix.exponent_bias = 1023;
+ end
+
+ v_fix.exponent_rnd = v_fix.exponent_fdiv + {3'h0,v_fix.exponent_bias} - {12'h0,v_fix.counter_fdiv};
+
+ v_fix.counter_rnd = 0;
+ if ($signed(v_fix.exponent_rnd) <= 0) begin
+ v_fix.counter_rnd = 54;
+ if ($signed(v_fix.exponent_rnd) > -54) begin
+ v_fix.counter_rnd = 14'h1 - v_fix.exponent_rnd;
+ end
+ v_fix.exponent_rnd = 0;
+ end
+
+ v_fix.mantissa_fdiv = v_fix.mantissa_fdiv >> v_fix.counter_rnd[5:0];
+
+ v_fix.mantissa_rnd = {30'h0, v_fix.mantissa_fdiv[164:141]};
+ v_fix.grs = {v_fix.mantissa_fdiv[140:139], |(v_fix.mantissa_fdiv[138:0])};
+ if (v_fix.fmt == 1) begin
+ v_fix.mantissa_rnd = {1'h0, v_fix.mantissa_fdiv[164:112]};
+ v_fix.grs = {v_fix.mantissa_fdiv[111:110], |(v_fix.mantissa_fdiv[109:0])};
+ end
+
+ end
+
+ fp_fdiv_o.fp_rnd.sig = v_fix.sign_rnd;
+ fp_fdiv_o.fp_rnd.expo = v_fix.exponent_rnd;
+ fp_fdiv_o.fp_rnd.mant = v_fix.mantissa_rnd;
+ fp_fdiv_o.fp_rnd.rema = 2'h0;
+ fp_fdiv_o.fp_rnd.fmt = v_fix.fmt;
+ fp_fdiv_o.fp_rnd.rm = v_fix.rm;
+ fp_fdiv_o.fp_rnd.grs = v_fix.grs;
+ fp_fdiv_o.fp_rnd.snan = v_fix.snan;
+ fp_fdiv_o.fp_rnd.qnan = v_fix.qnan;
+ fp_fdiv_o.fp_rnd.dbz = v_fix.dbz;
+ fp_fdiv_o.fp_rnd.infs = v_fix.infs;
+ fp_fdiv_o.fp_rnd.zero = v_fix.zero;
+ fp_fdiv_o.fp_rnd.diff = 1'h0;
+ fp_fdiv_o.ready = v_fix.ready;
+
+ rin_fix = v_fix;
+
+ end
+
+ always_ff @(posedge clock) begin
+ if (reset == 0) begin
+ r_fix <= init_fp_fdiv_reg_fixed;
+ end else begin
+ r_fix <= rin_fix;
+ end
+ end
+
+ end
+
+ endgenerate
+
+endmodule
diff --git a/rtl/fpu/float/fp_fma.sv b/rtl/fpu/float/fp_fma.sv
new file mode 100644
index 0000000..e9c6bda
--- /dev/null
+++ b/rtl/fpu/float/fp_fma.sv
@@ -0,0 +1,263 @@
+import lzc_wire::*;
+import fp_wire::*;
+
+module fp_fma (
+ input reset,
+ input clock,
+ input fp_fma_in_type fp_fma_i,
+ output fp_fma_out_type fp_fma_o,
+ input lzc_256_out_type lzc_o,
+ output lzc_256_in_type lzc_i
+);
+
+ fp_fma_reg_type_1 r_1;
+ fp_fma_reg_type_2 r_2;
+
+ fp_fma_reg_type_1 rin_1;
+ fp_fma_reg_type_2 rin_2;
+
+ fp_fma_var_type_1 v_1;
+ fp_fma_var_type_2 v_2;
+
+ always_comb begin
+
+ v_1.a = fp_fma_i.data1;
+ v_1.b = fp_fma_i.data2;
+ v_1.c = fp_fma_i.data3;
+ v_1.class_a = fp_fma_i.class1;
+ v_1.class_b = fp_fma_i.class2;
+ v_1.class_c = fp_fma_i.class3;
+ v_1.fmt = fp_fma_i.fmt;
+ v_1.rm = fp_fma_i.rm;
+ v_1.snan = 0;
+ v_1.qnan = 0;
+ v_1.dbz = 0;
+ v_1.infs = 0;
+ v_1.zero = 0;
+ v_1.ready = fp_fma_i.op.fmadd | fp_fma_i.op.fmsub | fp_fma_i.op.fnmsub | fp_fma_i.op.fnmadd | fp_fma_i.op.fadd | fp_fma_i.op.fsub | fp_fma_i.op.fmul;
+
+ if (fp_fma_i.op.fadd | fp_fma_i.op.fsub) begin
+ v_1.c = v_1.b;
+ v_1.class_c = v_1.class_b;
+ v_1.b = 65'h07FF0000000000000;
+ v_1.class_b = 10'h040;
+ end
+
+ if (fp_fma_i.op.fmul) begin
+ v_1.c = {v_1.a[64] ^ v_1.b[64], 64'h0000000000000000};
+ v_1.class_c = 0;
+ end
+
+ v_1.sign_a = v_1.a[64];
+ v_1.exponent_a = v_1.a[63:52];
+ v_1.mantissa_a = {|v_1.exponent_a, v_1.a[51:0]};
+
+ v_1.sign_b = v_1.b[64];
+ v_1.exponent_b = v_1.b[63:52];
+ v_1.mantissa_b = {|v_1.exponent_b, v_1.b[51:0]};
+
+ v_1.sign_c = v_1.c[64];
+ v_1.exponent_c = v_1.c[63:52];
+ v_1.mantissa_c = {|v_1.exponent_c, v_1.c[51:0]};
+
+ v_1.sign_add = v_1.sign_c ^ (fp_fma_i.op.fmsub | fp_fma_i.op.fnmadd | fp_fma_i.op.fsub);
+ v_1.sign_mul = (v_1.sign_a ^ v_1.sign_b) ^ (fp_fma_i.op.fnmsub | fp_fma_i.op.fnmadd);
+
+ if (v_1.class_a[8] | v_1.class_b[8] | v_1.class_c[8]) begin
+ v_1.snan = 1;
+ end else if (((v_1.class_a[3] | v_1.class_a[4]) & (v_1.class_b[0] | v_1.class_b[7])) | ((v_1.class_b[3] | v_1.class_b[4]) & (v_1.class_a[0] | v_1.class_a[7]))) begin
+ v_1.snan = 1;
+ end else if (v_1.class_a[9] | v_1.class_b[9] | v_1.class_c[9]) begin
+ v_1.qnan = 1;
+ end else if (((v_1.class_a[0] | v_1.class_a[7]) | (v_1.class_b[0] | v_1.class_b[7])) & ((v_1.class_c[0] | v_1.class_c[7]) & (v_1.sign_add != v_1.sign_mul))) begin
+ v_1.snan = 1;
+ end else if ((v_1.class_a[0] | v_1.class_a[7]) | (v_1.class_b[0] | v_1.class_b[7]) | (v_1.class_c[0] | v_1.class_c[7])) begin
+ v_1.infs = 1;
+ end
+
+ v_1.exponent_add = $signed({2'h0, v_1.exponent_c});
+ v_1.exponent_mul = $signed({2'h0, v_1.exponent_a}) + $signed({2'h0, v_1.exponent_b}) - 14'd2047;
+
+ if (&v_1.exponent_c) begin
+ v_1.exponent_add = 14'h0FFF;
+ end
+ if (&v_1.exponent_a | &v_1.exponent_b) begin
+ v_1.exponent_mul = 14'h0FFF;
+ end
+
+ v_1.mantissa_add[163:161] = 0;
+ v_1.mantissa_add[160:108] = v_1.mantissa_c;
+ v_1.mantissa_add[107:0] = 0;
+ v_1.mantissa_mul[163:162] = 0;
+ v_1.mantissa_mul[161:56] = v_1.mantissa_a * v_1.mantissa_b;
+ v_1.mantissa_mul[55:0] = 0;
+
+ v_1.exponent_dif = $signed(v_1.exponent_mul) - $signed(v_1.exponent_add);
+ v_1.counter_dif = 0;
+
+ v_1.exponent_neg = v_1.exponent_dif[13];
+
+ if (v_1.exponent_neg) begin
+ v_1.counter_dif = 56;
+ if ($signed(v_1.exponent_dif) > -56) begin
+ v_1.counter_dif = -v_1.exponent_dif[6:0];
+ end
+ v_1.mantissa_l = v_1.mantissa_add;
+ v_1.mantissa_r = v_1.mantissa_mul;
+ end else begin
+ v_1.counter_dif = 108;
+ if ($signed(v_1.exponent_dif) < 108) begin
+ v_1.counter_dif = v_1.exponent_dif[6:0];
+ end
+ v_1.mantissa_l = v_1.mantissa_mul;
+ v_1.mantissa_r = v_1.mantissa_add;
+ end
+
+ v_1.mantissa_r = v_1.mantissa_r >> v_1.counter_dif;
+
+ if (v_1.exponent_neg) begin
+ v_1.mantissa_add = v_1.mantissa_l;
+ v_1.mantissa_mul = v_1.mantissa_r;
+ end else begin
+ v_1.mantissa_add = v_1.mantissa_r;
+ v_1.mantissa_mul = v_1.mantissa_l;
+ end
+
+ rin_1.fmt = v_1.fmt;
+ rin_1.rm = v_1.rm;
+ rin_1.snan = v_1.snan;
+ rin_1.qnan = v_1.qnan;
+ rin_1.dbz = v_1.dbz;
+ rin_1.infs = v_1.infs;
+ rin_1.zero = v_1.zero;
+ rin_1.sign_mul = v_1.sign_mul;
+ rin_1.exponent_mul = v_1.exponent_mul;
+ rin_1.mantissa_mul = v_1.mantissa_mul;
+ rin_1.sign_add = v_1.sign_add;
+ rin_1.exponent_add = v_1.exponent_add;
+ rin_1.mantissa_add = v_1.mantissa_add;
+ rin_1.exponent_neg = v_1.exponent_neg;
+ rin_1.ready = v_1.ready;
+
+ end
+
+ always_comb begin
+
+ v_2.fmt = r_1.fmt;
+ v_2.rm = r_1.rm;
+ v_2.snan = r_1.snan;
+ v_2.qnan = r_1.qnan;
+ v_2.dbz = r_1.dbz;
+ v_2.infs = r_1.infs;
+ v_2.zero = r_1.zero;
+ v_2.sign_mul = r_1.sign_mul;
+ v_2.exponent_mul = r_1.exponent_mul;
+ v_2.mantissa_mul = r_1.mantissa_mul;
+ v_2.sign_add = r_1.sign_add;
+ v_2.exponent_add = r_1.exponent_add;
+ v_2.mantissa_add = r_1.mantissa_add;
+ v_2.exponent_neg = r_1.exponent_neg;
+ v_2.ready = r_1.ready;
+
+ if (v_2.exponent_neg) begin
+ v_2.exponent_mac = v_2.exponent_add;
+ end else begin
+ v_2.exponent_mac = v_2.exponent_mul;
+ end
+
+ if (v_2.sign_add) begin
+ v_2.mantissa_add = ~v_2.mantissa_add;
+ end
+ if (v_2.sign_mul) begin
+ v_2.mantissa_mul = ~v_2.mantissa_mul;
+ end
+
+ v_2.mantissa_mac = v_2.mantissa_add + v_2.mantissa_mul + {163'h0,v_2.sign_add} + {163'h0,v_2.sign_mul};
+ v_2.sign_mac = v_2.mantissa_mac[163];
+
+ v_2.zero = ~|v_2.mantissa_mac;
+
+ if (v_2.zero) begin
+ v_2.sign_mac = v_2.sign_add & v_2.sign_mul;
+ end else if (v_2.sign_mac) begin
+ v_2.mantissa_mac = -v_2.mantissa_mac;
+ end
+
+ v_2.diff = v_2.sign_add ^ v_2.sign_mul;
+
+ v_2.bias = 1918;
+ if (v_2.fmt == 1) begin
+ v_2.bias = 1022;
+ end
+
+ lzc_i.a = {v_2.mantissa_mac[162:0], {93{1'b1}}};
+ v_2.counter_mac = ~lzc_o.c;
+ v_2.mantissa_mac = v_2.mantissa_mac << v_2.counter_mac;
+
+ v_2.sign_rnd = v_2.sign_mac;
+ v_2.exponent_rnd = v_2.exponent_mac - {3'h0, v_2.bias} - {6'h0, v_2.counter_mac};
+
+ v_2.counter_sub = 0;
+ if ($signed(v_2.exponent_rnd) <= 0) begin
+ v_2.counter_sub = 63;
+ if ($signed(v_2.exponent_rnd) > -63) begin
+ v_2.counter_sub = 14'h1 - v_2.exponent_rnd;
+ end
+ v_2.exponent_rnd = 0;
+ end
+
+ v_2.mantissa_mac = v_2.mantissa_mac >> v_2.counter_sub[5:0];
+
+ v_2.mantissa_rnd = {30'h0, v_2.mantissa_mac[162:139]};
+ v_2.grs = {v_2.mantissa_mac[138:137], |v_2.mantissa_mac[136:0]};
+ if (v_2.fmt == 1) begin
+ v_2.mantissa_rnd = {1'h0, v_2.mantissa_mac[162:110]};
+ v_2.grs = {v_2.mantissa_mac[109:108], |v_2.mantissa_mac[107:0]};
+ end
+
+ rin_2.sign_rnd = v_2.sign_rnd;
+ rin_2.exponent_rnd = v_2.exponent_rnd;
+ rin_2.mantissa_rnd = v_2.mantissa_rnd;
+ rin_2.fmt = v_2.fmt;
+ rin_2.rm = v_2.rm;
+ rin_2.grs = v_2.grs;
+ rin_2.snan = v_2.snan;
+ rin_2.qnan = v_2.qnan;
+ rin_2.dbz = v_2.dbz;
+ rin_2.infs = v_2.infs;
+ rin_2.diff = v_2.diff;
+ rin_2.zero = v_2.zero;
+ rin_2.ready = v_2.ready;
+
+ end
+
+ always_comb begin
+
+ fp_fma_o.fp_rnd.sig = r_2.sign_rnd;
+ fp_fma_o.fp_rnd.expo = r_2.exponent_rnd;
+ fp_fma_o.fp_rnd.mant = r_2.mantissa_rnd;
+ fp_fma_o.fp_rnd.rema = 2'h0;
+ fp_fma_o.fp_rnd.fmt = r_2.fmt;
+ fp_fma_o.fp_rnd.rm = r_2.rm;
+ fp_fma_o.fp_rnd.grs = r_2.grs;
+ fp_fma_o.fp_rnd.snan = r_2.snan;
+ fp_fma_o.fp_rnd.qnan = r_2.qnan;
+ fp_fma_o.fp_rnd.dbz = r_2.dbz;
+ fp_fma_o.fp_rnd.infs = r_2.infs;
+ fp_fma_o.fp_rnd.zero = r_2.zero;
+ fp_fma_o.fp_rnd.diff = r_2.diff;
+ fp_fma_o.ready = r_2.ready;
+
+ end
+
+ always_ff @(posedge clock) begin
+ if (reset == 0) begin
+ r_1 <= init_fp_fma_reg_1;
+ r_2 <= init_fp_fma_reg_2;
+ end else begin
+ r_1 <= rin_1;
+ r_2 <= rin_2;
+ end
+ end
+
+endmodule
diff --git a/rtl/fpu/float/fp_mac.sv b/rtl/fpu/float/fp_mac.sv
new file mode 100644
index 0000000..a471e36
--- /dev/null
+++ b/rtl/fpu/float/fp_mac.sv
@@ -0,0 +1,21 @@
+import fp_wire::*;
+
+module fp_mac (
+ input reset,
+ input clock,
+ input fp_mac_in_type fp_mac_i,
+ output fp_mac_out_type fp_mac_o
+);
+
+ logic [109:0] add;
+ logic [111:0] mul;
+ logic [109:0] mac;
+ logic [109:0] res;
+
+ assign add = {fp_mac_i.a, 54'h0};
+ assign mul = $signed(fp_mac_i.b) * $signed(fp_mac_i.c);
+ assign mac = (fp_mac_i.op == 0) ? mul[109:0] : -mul[109:0];
+ assign res = add + mac;
+ assign fp_mac_o.d = res;
+
+endmodule
diff --git a/rtl/fpu/float/fp_max.sv b/rtl/fpu/float/fp_max.sv
new file mode 100644
index 0000000..14b3ab6
--- /dev/null
+++ b/rtl/fpu/float/fp_max.sv
@@ -0,0 +1,129 @@
+import fp_wire::*;
+
+module fp_max (
+ input fp_max_in_type fp_max_i,
+ output fp_max_out_type fp_max_o
+);
+
+ logic [63:0] data1;
+ logic [63:0] data2;
+ logic [64:0] extend1;
+ logic [64:0] extend2;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [9:0] class1;
+ logic [9:0] class2;
+
+ logic [63:0] nan;
+ logic comp;
+
+ logic [63:0] result;
+ logic [4:0] flags;
+
+ always_comb begin
+
+ data1 = fp_max_i.data1;
+ data2 = fp_max_i.data2;
+ extend1 = fp_max_i.ext1;
+ extend2 = fp_max_i.ext2;
+ fmt = fp_max_i.fmt;
+ rm = fp_max_i.rm;
+ class1 = fp_max_i.class1;
+ class2 = fp_max_i.class2;
+
+ nan = 64'h7ff8000000000000;
+ comp = 0;
+
+ result = 0;
+ flags = 0;
+
+ if (fmt == 0) begin
+ nan = 64'h000000007fc00000;
+ end
+
+ if (extend1[63:0] > extend2[63:0]) begin
+ comp = 1;
+ end
+
+ if (rm == 0) begin
+ if ((class1[8] & class2[8]) == 1) begin
+ result = nan;
+ flags[4] = 1;
+ end else if (class1[8] == 1) begin
+ result = data2;
+ flags[4] = 1;
+ end else if (class2[8] == 1) begin
+ result = data1;
+ flags[4] = 1;
+ end else if ((class1[9] & class2[9]) == 1) begin
+ result = nan;
+ end else if (class1[9] == 1) begin
+ result = data2;
+ end else if (class2[9] == 1) begin
+ result = data1;
+ end else if ((extend1[64] ^ extend2[64]) == 1) begin
+ if (extend1[64] == 1) begin
+ result = data1;
+ end else begin
+ result = data2;
+ end
+ end else begin
+ if (extend1[64] == 1) begin
+ if (comp == 1) begin
+ result = data1;
+ end else begin
+ result = data2;
+ end
+ end else begin
+ if (comp == 0) begin
+ result = data1;
+ end else begin
+ result = data2;
+ end
+ end
+ end
+ end else if (rm == 1) begin
+ if ((class1[8] & class2[8]) == 1) begin
+ result = nan;
+ flags[4] = 1;
+ end else if (class1[8] == 1) begin
+ result = data2;
+ flags[4] = 1;
+ end else if (class2[8] == 1) begin
+ result = data1;
+ flags[4] = 1;
+ end else if ((class1[9] & class2[9]) == 1) begin
+ result = nan;
+ end else if (class1[9] == 1) begin
+ result = data2;
+ end else if (class2[9] == 1) begin
+ result = data1;
+ end else if ((extend1[64] ^ extend2[64]) == 1) begin
+ if (extend1[64] == 1) begin
+ result = data2;
+ end else begin
+ result = data1;
+ end
+ end else begin
+ if (extend1[64] == 1) begin
+ if (comp == 1) begin
+ result = data2;
+ end else begin
+ result = data1;
+ end
+ end else begin
+ if (comp == 0) begin
+ result = data2;
+ end else begin
+ result = data1;
+ end
+ end
+ end
+ end
+
+ fp_max_o.result = result;
+ fp_max_o.flags = flags;
+
+ end
+
+endmodule
diff --git a/rtl/fpu/float/fp_rnd.sv b/rtl/fpu/float/fp_rnd.sv
new file mode 100644
index 0000000..8f69e26
--- /dev/null
+++ b/rtl/fpu/float/fp_rnd.sv
@@ -0,0 +1,194 @@
+import fp_wire::*;
+
+module fp_rnd (
+ input fp_rnd_in_type fp_rnd_i,
+ output fp_rnd_out_type fp_rnd_o
+);
+
+ logic sig;
+ logic [13:0] expo;
+ logic [53:0] mant;
+ logic [1:0] rema;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [2:0] grs;
+ logic snan;
+ logic qnan;
+ logic dbz;
+ logic infs;
+ logic zero;
+ logic diff;
+
+ logic odd;
+ logic rndup;
+ logic rnddn;
+ logic shift;
+ logic [63:0] result;
+ logic [4:0] flags;
+
+ always_comb begin
+
+ sig = fp_rnd_i.sig;
+ expo = fp_rnd_i.expo;
+ mant = fp_rnd_i.mant;
+ rema = fp_rnd_i.rema;
+ fmt = fp_rnd_i.fmt;
+ rm = fp_rnd_i.rm;
+ grs = fp_rnd_i.grs;
+ snan = fp_rnd_i.snan;
+ qnan = fp_rnd_i.qnan;
+ dbz = fp_rnd_i.dbz;
+ infs = fp_rnd_i.infs;
+ zero = fp_rnd_i.zero;
+ diff = fp_rnd_i.diff;
+
+ result = 0;
+ flags = 0;
+
+ odd = mant[0] | |grs[1:0] | (rema == 1);
+ flags[0] = (rema != 0) | |grs;
+
+ rndup = 0;
+ rnddn = 0;
+ if (rm == 0) begin //rne
+ if (grs[2] & odd) begin
+ rndup = 1;
+ end
+ end else if (rm == 1) begin //rtz
+ rnddn = 1;
+ end else if (rm == 2) begin //rdn
+ if (sig & flags[0]) begin
+ rndup = 1;
+ end else if (~sig & zero & diff) begin
+ sig = ~sig;
+ end else if (~sig) begin
+ rnddn = 1;
+ end
+ end else if (rm == 3) begin //rup
+ if (~sig & flags[0]) begin
+ rndup = 1;
+ end else if (sig) begin
+ rnddn = 1;
+ end
+ end else if (rm == 4) begin //rmm
+ if (grs[2] & flags[0]) begin
+ rndup = 1;
+ end
+ end
+
+ //if (expo == 0) begin
+ // flags[1] = flags[0];
+ //end
+
+ mant = mant + {53'h0, rndup};
+
+ if (rndup == 1) begin
+ if (fmt == 0) begin
+ if (expo == 0) begin
+ if (mant[23]) begin
+ expo = 1;
+ end
+ end
+ end else if (fmt == 1) begin
+ if (expo == 0) begin
+ if (mant[52]) begin
+ expo = 1;
+ end
+ end
+ end
+ end
+
+ if (rnddn == 1) begin
+ if (fmt == 0) begin
+ if (expo >= 255) begin
+ expo = 254;
+ mant = {31'b0, {23{1'b1}}};
+ flags = 5'b00101;
+ end
+ end else if (fmt == 1) begin
+ if (expo >= 2047) begin
+ expo = 2046;
+ mant = {2'b0, {52{1'b1}}};
+ flags = 5'b00101;
+ end
+ end
+ end
+
+ shift = 0;
+ if (fmt == 0) begin
+ if (mant[24]) begin
+ shift = 1;
+ end
+ end else if (fmt == 1) begin
+ if (mant[53]) begin
+ shift = 1;
+ end
+ end
+
+ expo = expo + {13'h0, shift};
+ mant = mant >> shift;
+
+ if (expo == 0) begin
+ flags[1] = flags[0];
+ end
+
+ if (rndup == 1) begin
+ if (expo == 1) begin
+ if (fmt == 0 && |mant[22:0] == 0) begin
+ flags[1] = rm == 2 || rm == 3 ? ((grs == 1) | (grs == 2) | (grs == 3) | (grs == 4)) : ((grs == 4) | (grs == 5));
+ end else if (fmt == 1 && |mant[51:0] == 0) begin
+ flags[1] = rm == 2 || rm == 3 ? ((grs == 1) | (grs == 2) | (grs == 3) | (grs == 4)) : ((grs == 4) | (grs == 5));
+ end
+ end
+ end
+
+ if (snan) begin
+ flags = 5'b10000;
+ end else if (qnan) begin
+ flags = 5'b00000;
+ end else if (dbz) begin
+ flags = 5'b01000;
+ end else if (infs) begin
+ flags = 5'b00000;
+ end else if (zero) begin
+ flags = 5'b00000;
+ end
+
+ if (fmt == 0) begin
+ if (snan | qnan) begin
+ result = {32'h00000000, 1'h0, 8'hFF, 23'h400000};
+ end else if (dbz | infs) begin
+ result = {32'h00000000, sig, 8'hFF, 23'h000000};
+ end else if (zero) begin
+ result = {32'h00000000, sig, 8'h00, 23'h000000};
+ end else if (expo == 0) begin
+ result = {32'h00000000, sig, 8'h00, mant[22:0]};
+ end else if ($signed(expo) > 254) begin
+ flags = 5'b00101;
+ result = {32'h00000000, sig, 8'hFF, 23'h000000};
+ end else begin
+ result = {32'h00000000, sig, expo[7:0], mant[22:0]};
+ end
+ end else if (fmt == 1) begin
+ if (snan | qnan) begin
+ result = {1'h0, 11'h7FF, 52'h8000000000000};
+ end else if (dbz | infs) begin
+ result = {sig, 11'h7FF, 52'h0000000000000};
+ end else if (zero) begin
+ result = {sig, 11'h000, 52'h0000000000000};
+ end else if (expo == 0) begin
+ result = {sig, 11'h000, mant[51:0]};
+ end else if ($signed(expo) > 2046) begin
+ flags = 5'b00101;
+ result = {sig, 11'h7FF, 52'h0000000000000};
+ end else begin
+ result = {sig, expo[10:0], mant[51:0]};
+ end
+ end
+
+ fp_rnd_o.result = result;
+ fp_rnd_o.flags = flags;
+
+ end
+
+endmodule
diff --git a/rtl/fpu/float/fp_sgnj.sv b/rtl/fpu/float/fp_sgnj.sv
new file mode 100644
index 0000000..8703e4b
--- /dev/null
+++ b/rtl/fpu/float/fp_sgnj.sv
@@ -0,0 +1,47 @@
+import fp_wire::*;
+
+module fp_sgnj (
+ input fp_sgnj_in_type fp_sgnj_i,
+ output fp_sgnj_out_type fp_sgnj_o
+);
+
+ logic [63:0] data1;
+ logic [63:0] data2;
+ logic [ 1:0] fmt;
+ logic [ 2:0] rm;
+ logic [63:0] result;
+
+ always_comb begin
+
+ data1 = fp_sgnj_i.data1;
+ data2 = fp_sgnj_i.data2;
+ fmt = fp_sgnj_i.fmt;
+ rm = fp_sgnj_i.rm;
+
+ result = 0;
+
+ if (fmt == 0) begin
+ result[30:0] = data1[30:0];
+ if (rm == 0) begin
+ result[31] = data2[31];
+ end else if (rm == 1) begin
+ result[31] = ~data2[31];
+ end else if (rm == 2) begin
+ result[31] = data1[31] ^ data2[31];
+ end
+ end else if (fmt == 1) begin
+ result[62:0] = data1[62:0];
+ if (rm == 0) begin
+ result[63] = data2[63];
+ end else if (rm == 1) begin
+ result[63] = ~data2[63];
+ end else if (rm == 2) begin
+ result[63] = data1[63] ^ data2[63];
+ end
+ end
+
+ fp_sgnj_o.result = result;
+
+ end
+
+endmodule
diff --git a/rtl/fpu/float/fp_unit.sv b/rtl/fpu/float/fp_unit.sv
new file mode 100644
index 0000000..71ddf14
--- /dev/null
+++ b/rtl/fpu/float/fp_unit.sv
@@ -0,0 +1,191 @@
+// https://github.com/taneroksuz/fpu-sp
+
+import lzc_wire::*;
+import fp_wire::*;
+
+module fp_unit (
+ input reset,
+ input clock,
+ input fp_unit_in_type fp_unit_i,
+ output fp_unit_out_type fp_unit_o
+);
+
+ lzc_64_in_type lzc1_64_i;
+ lzc_64_out_type lzc1_64_o;
+ lzc_64_in_type lzc2_64_i;
+ lzc_64_out_type lzc2_64_o;
+ lzc_64_in_type lzc3_64_i;
+ lzc_64_out_type lzc3_64_o;
+ lzc_64_in_type lzc4_64_i;
+ lzc_64_out_type lzc4_64_o;
+
+ lzc_256_in_type lzc_256_i;
+ lzc_256_out_type lzc_256_o;
+
+ fp_ext_in_type fp_ext1_i;
+ fp_ext_out_type fp_ext1_o;
+ fp_ext_in_type fp_ext2_i;
+ fp_ext_out_type fp_ext2_o;
+ fp_ext_in_type fp_ext3_i;
+ fp_ext_out_type fp_ext3_o;
+
+ fp_cmp_in_type fp_cmp_i;
+ fp_cmp_out_type fp_cmp_o;
+ fp_max_in_type fp_max_i;
+ fp_max_out_type fp_max_o;
+ fp_sgnj_in_type fp_sgnj_i;
+ fp_sgnj_out_type fp_sgnj_o;
+ fp_fma_in_type fp_fma_i;
+ fp_fma_out_type fp_fma_o;
+ fp_rnd_in_type fp_rnd_i;
+ fp_rnd_out_type fp_rnd_o;
+
+ fp_cvt_f2f_in_type fp_cvt_f2f_i;
+ fp_cvt_f2f_out_type fp_cvt_f2f_o;
+ fp_cvt_f2i_in_type fp_cvt_f2i_i;
+ fp_cvt_f2i_out_type fp_cvt_f2i_o;
+ fp_cvt_i2f_in_type fp_cvt_i2f_i;
+ fp_cvt_i2f_out_type fp_cvt_i2f_o;
+
+ fp_mac_in_type fp_mac_i;
+ fp_mac_out_type fp_mac_o;
+ fp_fdiv_in_type fp_fdiv_i;
+ fp_fdiv_out_type fp_fdiv_o;
+
+ lzc_64 lzc_64_comp_1 (
+ .a(lzc1_64_i.a),
+ .c(lzc1_64_o.c),
+ .v(lzc1_64_o.v)
+ );
+
+ lzc_64 lzc_64_comp_2 (
+ .a(lzc2_64_i.a),
+ .c(lzc2_64_o.c),
+ .v(lzc2_64_o.v)
+ );
+
+ lzc_64 lzc_64_comp_3 (
+ .a(lzc3_64_i.a),
+ .c(lzc3_64_o.c),
+ .v(lzc3_64_o.v)
+ );
+
+ lzc_64 lzc_64_comp_4 (
+ .a(lzc4_64_i.a),
+ .c(lzc4_64_o.c),
+ .v(lzc4_64_o.v)
+ );
+
+ lzc_256 lzc_256_comp (
+ .a(lzc_256_i.a),
+ .c(lzc_256_o.c),
+ .v(lzc_256_o.v)
+ );
+
+ fp_ext fp_ext_comp_1 (
+ .fp_ext_i(fp_ext1_i),
+ .fp_ext_o(fp_ext1_o),
+ .lzc_o(lzc1_64_o),
+ .lzc_i(lzc1_64_i)
+ );
+
+ fp_ext fp_ext_comp_2 (
+ .fp_ext_i(fp_ext2_i),
+ .fp_ext_o(fp_ext2_o),
+ .lzc_o(lzc2_64_o),
+ .lzc_i(lzc2_64_i)
+ );
+
+ fp_ext fp_ext_comp_3 (
+ .fp_ext_i(fp_ext3_i),
+ .fp_ext_o(fp_ext3_o),
+ .lzc_o(lzc3_64_o),
+ .lzc_i(lzc3_64_i)
+ );
+
+ fp_cmp fp_cmp_comp (
+ .fp_cmp_i(fp_cmp_i),
+ .fp_cmp_o(fp_cmp_o)
+ );
+
+ fp_max fp_max_comp (
+ .fp_max_i(fp_max_i),
+ .fp_max_o(fp_max_o)
+ );
+
+ fp_sgnj fp_sgnj_comp (
+ .fp_sgnj_i(fp_sgnj_i),
+ .fp_sgnj_o(fp_sgnj_o)
+ );
+
+ fp_cvt fp_cvt_comp (
+ .fp_cvt_f2f_i(fp_cvt_f2f_i),
+ .fp_cvt_f2f_o(fp_cvt_f2f_o),
+ .fp_cvt_f2i_i(fp_cvt_f2i_i),
+ .fp_cvt_f2i_o(fp_cvt_f2i_o),
+ .fp_cvt_i2f_i(fp_cvt_i2f_i),
+ .fp_cvt_i2f_o(fp_cvt_i2f_o),
+ .lzc_o(lzc4_64_o),
+ .lzc_i(lzc4_64_i)
+ );
+
+ fp_fma fp_fma_comp (
+ .reset(reset),
+ .clock(clock),
+ .fp_fma_i(fp_fma_i),
+ .fp_fma_o(fp_fma_o),
+ .lzc_o(lzc_256_o),
+ .lzc_i(lzc_256_i)
+ );
+
+ fp_mac fp_mac_comp (
+ .reset(reset),
+ .clock(clock),
+ .fp_mac_i(fp_mac_i),
+ .fp_mac_o(fp_mac_o)
+ );
+
+ fp_fdiv fp_fdiv_comp (
+ .reset(reset),
+ .clock(clock),
+ .fp_fdiv_i(fp_fdiv_i),
+ .fp_fdiv_o(fp_fdiv_o),
+ .fp_mac_o(fp_mac_o),
+ .fp_mac_i(fp_mac_i)
+ );
+
+ fp_rnd fp_rnd_comp (
+ .fp_rnd_i(fp_rnd_i),
+ .fp_rnd_o(fp_rnd_o)
+ );
+
+ fp_exe fp_exe_comp (
+ .fp_exe_i(fp_unit_i.fp_exe_i),
+ .fp_exe_o(fp_unit_o.fp_exe_o),
+ .fp_ext1_o(fp_ext1_o),
+ .fp_ext1_i(fp_ext1_i),
+ .fp_ext2_o(fp_ext2_o),
+ .fp_ext2_i(fp_ext2_i),
+ .fp_ext3_o(fp_ext3_o),
+ .fp_ext3_i(fp_ext3_i),
+ .fp_cmp_o(fp_cmp_o),
+ .fp_cmp_i(fp_cmp_i),
+ .fp_max_o(fp_max_o),
+ .fp_max_i(fp_max_i),
+ .fp_sgnj_o(fp_sgnj_o),
+ .fp_sgnj_i(fp_sgnj_i),
+ .fp_cvt_f2f_i(fp_cvt_f2f_i),
+ .fp_cvt_f2f_o(fp_cvt_f2f_o),
+ .fp_cvt_f2i_i(fp_cvt_f2i_i),
+ .fp_cvt_f2i_o(fp_cvt_f2i_o),
+ .fp_cvt_i2f_i(fp_cvt_i2f_i),
+ .fp_cvt_i2f_o(fp_cvt_i2f_o),
+ .fp_fma_o(fp_fma_o),
+ .fp_fma_i(fp_fma_i),
+ .fp_fdiv_o(fp_fdiv_o),
+ .fp_fdiv_i(fp_fdiv_i),
+ .fp_rnd_o(fp_rnd_o),
+ .fp_rnd_i(fp_rnd_i)
+ );
+
+endmodule
diff --git a/rtl/fpu/float/fp_wire.sv b/rtl/fpu/float/fp_wire.sv
new file mode 100644
index 0000000..be5ebed
--- /dev/null
+++ b/rtl/fpu/float/fp_wire.sv
@@ -0,0 +1,607 @@
+package fp_wire;
+
+ typedef struct packed {
+ logic fmadd;
+ logic fmsub;
+ logic fnmadd;
+ logic fnmsub;
+ logic fadd;
+ logic fsub;
+ logic fmul;
+ logic fdiv;
+ logic fsqrt;
+ logic fsgnj;
+ logic fcmp;
+ logic fmax;
+ logic fclass;
+ logic fmv_i2f;
+ logic fmv_f2i;
+ logic fcvt_f2f;
+ logic fcvt_i2f;
+ logic fcvt_f2i;
+ logic [1:0] fcvt_op;
+ } fp_operation_type;
+
+ parameter fp_operation_type init_fp_operation = '{
+ fmadd : 0,
+ fmsub : 0,
+ fnmadd : 0,
+ fnmsub : 0,
+ fadd : 0,
+ fsub : 0,
+ fmul : 0,
+ fdiv : 0,
+ fsqrt : 0,
+ fsgnj : 0,
+ fcmp : 0,
+ fmax : 0,
+ fclass : 0,
+ fmv_i2f : 0,
+ fmv_f2i : 0,
+ fcvt_f2f : 0,
+ fcvt_i2f : 0,
+ fcvt_f2i : 0,
+ fcvt_op : 0
+ };
+
+ typedef struct packed {
+ logic [63:0] data1;
+ logic [63:0] data2;
+ logic [63:0] data3;
+ fp_operation_type op;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic enable;
+ } fp_exe_in_type;
+
+ typedef struct packed {
+ logic [63:0] result;
+ logic [4:0] flags;
+ logic ready;
+ } fp_exe_out_type;
+
+ typedef struct packed {
+ logic [64:0] data1;
+ logic [64:0] data2;
+ logic [2:0] rm;
+ logic [9:0] class1;
+ logic [9:0] class2;
+ } fp_cmp_in_type;
+
+ typedef struct packed {
+ logic [63:0] result;
+ logic [4:0] flags;
+ } fp_cmp_out_type;
+
+ typedef struct packed {
+ logic [63:0] data1;
+ logic [63:0] data2;
+ logic [64:0] ext1;
+ logic [64:0] ext2;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [9:0] class1;
+ logic [9:0] class2;
+ } fp_max_in_type;
+
+ typedef struct packed {
+ logic [63:0] result;
+ logic [4:0] flags;
+ } fp_max_out_type;
+
+ typedef struct packed {
+ logic [63:0] data1;
+ logic [63:0] data2;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ } fp_sgnj_in_type;
+
+ typedef struct packed {logic [63:0] result;} fp_sgnj_out_type;
+
+ typedef struct packed {
+ logic [63:0] data;
+ logic [1:0] fmt;
+ } fp_ext_in_type;
+
+ typedef struct packed {
+ logic [64:0] result;
+ logic [9:0] classification;
+ } fp_ext_out_type;
+
+ typedef struct packed {fp_exe_in_type fp_exe_i;} fp_unit_in_type;
+
+ typedef struct packed {fp_exe_out_type fp_exe_o;} fp_unit_out_type;
+
+ typedef struct packed {
+ logic sig;
+ logic [13:0] expo;
+ logic [53:0] mant;
+ logic [1:0] rema;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [2:0] grs;
+ logic snan;
+ logic qnan;
+ logic dbz;
+ logic infs;
+ logic zero;
+ logic diff;
+ } fp_rnd_in_type;
+
+ parameter fp_rnd_in_type init_fp_rnd_in = '{
+ sig : 0,
+ expo : 0,
+ mant : 0,
+ rema : 0,
+ fmt : 0,
+ rm : 0,
+ grs : 0,
+ snan : 0,
+ qnan : 0,
+ dbz : 0,
+ infs : 0,
+ zero : 0,
+ diff : 0
+ };
+
+ typedef struct packed {
+ logic [63:0] result;
+ logic [4:0] flags;
+ } fp_rnd_out_type;
+
+ typedef struct packed {
+ logic [64:0] data1;
+ logic [64:0] data2;
+ logic [64:0] data3;
+ logic [9:0] class1;
+ logic [9:0] class2;
+ logic [9:0] class3;
+ fp_operation_type op;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ } fp_fma_in_type;
+
+ typedef struct packed {
+ fp_rnd_in_type fp_rnd;
+ logic ready;
+ } fp_fma_out_type;
+
+ typedef struct packed {
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic snan;
+ logic qnan;
+ logic dbz;
+ logic infs;
+ logic zero;
+ logic sign_mul;
+ logic [13:0] exponent_mul;
+ logic [163:0] mantissa_mul;
+ logic sign_add;
+ logic [13:0] exponent_add;
+ logic [163:0] mantissa_add;
+ logic exponent_neg;
+ logic ready;
+ } fp_fma_reg_type_1;
+
+ parameter fp_fma_reg_type_1 init_fp_fma_reg_1 = '{
+ fmt : 0,
+ rm : 0,
+ snan : 0,
+ qnan : 0,
+ dbz : 0,
+ infs : 0,
+ zero : 0,
+ sign_mul : 0,
+ exponent_mul : 0,
+ mantissa_mul : 0,
+ sign_add : 0,
+ exponent_add : 0,
+ mantissa_add : 0,
+ exponent_neg : 0,
+ ready : 0
+ };
+
+ typedef struct packed {
+ logic [64:0] a;
+ logic [64:0] b;
+ logic [64:0] c;
+ logic [9:0] class_a;
+ logic [9:0] class_b;
+ logic [9:0] class_c;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic snan;
+ logic qnan;
+ logic dbz;
+ logic infs;
+ logic zero;
+ logic sign_a;
+ logic [11:0] exponent_a;
+ logic [52:0] mantissa_a;
+ logic sign_b;
+ logic [11:0] exponent_b;
+ logic [52:0] mantissa_b;
+ logic sign_c;
+ logic [11:0] exponent_c;
+ logic [52:0] mantissa_c;
+ logic sign_mul;
+ logic [13:0] exponent_mul;
+ logic [163:0] mantissa_mul;
+ logic sign_add;
+ logic [13:0] exponent_add;
+ logic [163:0] mantissa_add;
+ logic [163:0] mantissa_l;
+ logic [163:0] mantissa_r;
+ logic [13:0] exponent_dif;
+ logic [6:0] counter_dif;
+ logic exponent_neg;
+ logic ready;
+ } fp_fma_var_type_1;
+
+ typedef struct packed {
+ logic sign_rnd;
+ logic [13:0] exponent_rnd;
+ logic [53:0] mantissa_rnd;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [2:0] grs;
+ logic snan;
+ logic qnan;
+ logic dbz;
+ logic infs;
+ logic zero;
+ logic diff;
+ logic ready;
+ } fp_fma_reg_type_2;
+
+ parameter fp_fma_reg_type_2 init_fp_fma_reg_2 = '{
+ sign_rnd : 0,
+ exponent_rnd : 0,
+ mantissa_rnd : 0,
+ fmt : 0,
+ rm : 0,
+ grs : 0,
+ snan : 0,
+ qnan : 0,
+ dbz : 0,
+ infs : 0,
+ zero : 0,
+ diff : 0,
+ ready : 0
+ };
+
+ typedef struct packed {
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic snan;
+ logic qnan;
+ logic dbz;
+ logic infs;
+ logic zero;
+ logic diff;
+ logic sign_mul;
+ logic [13:0] exponent_mul;
+ logic [163:0] mantissa_mul;
+ logic sign_add;
+ logic [13:0] exponent_add;
+ logic [163:0] mantissa_add;
+ logic exponent_neg;
+ logic sign_mac;
+ logic [13:0] exponent_mac;
+ logic [163:0] mantissa_mac;
+ logic [7:0] counter_mac;
+ logic [13:0] counter_sub;
+ logic [10:0] bias;
+ logic sign_rnd;
+ logic [13:0] exponent_rnd;
+ logic [53:0] mantissa_rnd;
+ logic [2:0] grs;
+ logic ready;
+ } fp_fma_var_type_2;
+
+ typedef struct packed {
+ logic [55:0] a;
+ logic [55:0] b;
+ logic [55:0] c;
+ logic op;
+ } fp_mac_in_type;
+
+ typedef struct packed {logic [109:0] d;} fp_mac_out_type;
+
+ typedef struct packed {
+ logic [64:0] data1;
+ logic [64:0] data2;
+ logic [9:0] class1;
+ logic [9:0] class2;
+ fp_operation_type op;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ } fp_fdiv_in_type;
+
+ typedef struct packed {
+ fp_rnd_in_type fp_rnd;
+ logic ready;
+ } fp_fdiv_out_type;
+
+ typedef struct packed {
+ logic [2:0] state;
+ logic [5:0] istate;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [64:0] a;
+ logic [64:0] b;
+ logic [9:0] class_a;
+ logic [9:0] class_b;
+ logic snan;
+ logic qnan;
+ logic infs;
+ logic dbz;
+ logic zero;
+ logic op;
+ logic [6:0] index;
+ logic [55:0] qa;
+ logic [55:0] qb;
+ logic [55:0] q0;
+ logic [55:0] q1;
+ logic [55:0] y;
+ logic [55:0] y0;
+ logic [55:0] y1;
+ logic [55:0] y2;
+ logic [55:0] y3;
+ logic [55:0] h0;
+ logic [55:0] h1;
+ logic [55:0] h2;
+ logic [55:0] e0;
+ logic [55:0] e1;
+ logic [55:0] e2;
+ logic [109:0] r0;
+ logic [109:0] r1;
+ logic sign_fdiv;
+ logic [13:0] exponent_fdiv;
+ logic [113:0] mantissa_fdiv;
+ logic [1:0] counter_fdiv;
+ logic [10:0] exponent_bias;
+ logic sign_rnd;
+ logic [13:0] exponent_rnd;
+ logic [53:0] mantissa_rnd;
+ logic [1:0] remainder_rnd;
+ logic [13:0] counter_rnd;
+ logic [2:0] grs;
+ logic odd;
+ logic [63:0] result;
+ logic [4:0] flags;
+ logic ready;
+ } fp_fdiv_reg_functional_type;
+
+ parameter fp_fdiv_reg_functional_type init_fp_fdiv_reg_functional = '{
+ state : 0,
+ istate : 0,
+ fmt : 0,
+ rm : 0,
+ a : 0,
+ b : 0,
+ class_a : 0,
+ class_b : 0,
+ snan : 0,
+ qnan : 0,
+ infs : 0,
+ dbz : 0,
+ zero : 0,
+ op : 0,
+ index : 0,
+ qa : 0,
+ qb : 0,
+ q0 : 0,
+ q1 : 0,
+ y : 0,
+ y0 : 0,
+ y1 : 0,
+ y2 : 0,
+ y3 : 0,
+ h0 : 0,
+ h1 : 0,
+ h2 : 0,
+ e0 : 0,
+ e1 : 0,
+ e2 : 0,
+ r0 : 0,
+ r1 : 0,
+ sign_fdiv : 0,
+ exponent_fdiv : 0,
+ mantissa_fdiv : 0,
+ counter_fdiv : 0,
+ exponent_bias : 0,
+ sign_rnd : 0,
+ exponent_rnd : 0,
+ mantissa_rnd : 0,
+ remainder_rnd : 0,
+ counter_rnd : 0,
+ grs : 0,
+ odd : 0,
+ result : 0,
+ flags : 0,
+ ready : 0
+ };
+
+ typedef struct packed {
+ logic [2:0] state;
+ logic [5:0] istate;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [64:0] a;
+ logic [64:0] b;
+ logic [9:0] class_a;
+ logic [9:0] class_b;
+ logic snan;
+ logic qnan;
+ logic infs;
+ logic dbz;
+ logic zero;
+ logic op;
+ logic [6:0] index;
+ logic [55:0] qa;
+ logic [55:0] qb;
+ logic [54:0] q;
+ logic [56:0] e;
+ logic [56:0] r;
+ logic [56:0] m;
+ logic sign_fdiv;
+ logic [13:0] exponent_fdiv;
+ logic [164:0] mantissa_fdiv;
+ logic [1:0] counter_fdiv;
+ logic [10:0] exponent_bias;
+ logic sign_rnd;
+ logic [13:0] exponent_rnd;
+ logic [53:0] mantissa_rnd;
+ logic [1:0] remainder_rnd;
+ logic [13:0] counter_rnd;
+ logic [2:0] grs;
+ logic odd;
+ logic [63:0] result;
+ logic [4:0] flags;
+ logic ready;
+ } fp_fdiv_reg_fixed_type;
+
+ parameter fp_fdiv_reg_fixed_type init_fp_fdiv_reg_fixed = '{
+ state : 0,
+ istate : 0,
+ fmt : 0,
+ rm : 0,
+ a : 0,
+ b : 0,
+ class_a : 0,
+ class_b : 0,
+ snan : 0,
+ qnan : 0,
+ infs : 0,
+ dbz : 0,
+ zero : 0,
+ op : 0,
+ index : 0,
+ qa : 0,
+ qb : 0,
+ q : 0,
+ e : 0,
+ r : 0,
+ m : 0,
+ sign_fdiv : 0,
+ exponent_fdiv : 0,
+ mantissa_fdiv : 0,
+ counter_fdiv : 0,
+ exponent_bias : 0,
+ sign_rnd : 0,
+ exponent_rnd : 0,
+ mantissa_rnd : 0,
+ remainder_rnd : 0,
+ counter_rnd : 0,
+ grs : 0,
+ odd : 0,
+ result : 0,
+ flags : 0,
+ ready : 0
+ };
+
+ typedef struct packed {
+ logic [64:0] data;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [9:0] classification;
+ } fp_cvt_f2f_in_type;
+
+ typedef struct packed {fp_rnd_in_type fp_rnd;} fp_cvt_f2f_out_type;
+
+ typedef struct packed {
+ logic [64:0] data;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic [9:0] classification;
+ logic snan;
+ logic qnan;
+ logic dbz;
+ logic infs;
+ logic zero;
+ logic [13:0] counter_cvt;
+ logic [11:0] exponent_cvt;
+ logic [10:0] exponent_bias;
+ logic [79:0] mantissa_cvt;
+ logic sign_rnd;
+ logic [13:0] exponent_rnd;
+ logic [53:0] mantissa_rnd;
+ logic [2:0] grs;
+ } fp_cvt_f2f_var_type;
+
+ typedef struct packed {
+ logic [64:0] data;
+ fp_operation_type op;
+ logic [2:0] rm;
+ logic [9:0] classification;
+ } fp_cvt_f2i_in_type;
+
+ typedef struct packed {
+ logic [63:0] result;
+ logic [4:0] flags;
+ } fp_cvt_f2i_out_type;
+
+ typedef struct packed {
+ logic [64:0] data;
+ logic [1:0] op;
+ logic [2:0] rm;
+ logic [9:0] classification;
+ logic [63:0] result;
+ logic [4:0] flags;
+ logic snan;
+ logic qnan;
+ logic infs;
+ logic zero;
+ logic sign_cvt;
+ logic [12:0] exponent_cvt;
+ logic [119:0] mantissa_cvt;
+ logic [7:0] exponent_bias;
+ logic [64:0] mantissa_uint;
+ logic [2:0] grs;
+ logic odd;
+ logic rnded;
+ logic oor;
+ logic or_1;
+ logic or_2;
+ logic or_3;
+ logic or_4;
+ logic or_5;
+ logic oor_64u;
+ logic oor_64s;
+ logic oor_32u;
+ logic oor_32s;
+ } fp_cvt_f2i_var_type;
+
+ typedef struct packed {
+ logic [63:0] data;
+ fp_operation_type op;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ } fp_cvt_i2f_in_type;
+
+ typedef struct packed {fp_rnd_in_type fp_rnd;} fp_cvt_i2f_out_type;
+
+ typedef struct packed {
+ logic [63:0] data;
+ logic [1:0] op;
+ logic [1:0] fmt;
+ logic [2:0] rm;
+ logic snan;
+ logic qnan;
+ logic dbz;
+ logic infs;
+ logic zero;
+ logic sign_uint;
+ logic [5:0] exponent_uint;
+ logic [63:0] mantissa_uint;
+ logic [5:0] counter_uint;
+ logic [9:0] exponent_bias;
+ logic sign_rnd;
+ logic [13:0] exponent_rnd;
+ logic [53:0] mantissa_rnd;
+ logic [2:0] grs;
+ } fp_cvt_i2f_var_type;
+
+endpackage
diff --git a/rtl/fpu/lzc/lzc_128.sv b/rtl/fpu/lzc/lzc_128.sv
new file mode 100644
index 0000000..4ac6464
--- /dev/null
+++ b/rtl/fpu/lzc/lzc_128.sv
@@ -0,0 +1,64 @@
+module lzc_128 (
+ input [127:0] a,
+ output [6:0] c,
+ output v
+);
+
+ logic [5:0] z0;
+ logic [5:0] z1;
+
+ logic v0;
+ logic v1;
+
+ logic s0;
+ logic s1;
+ logic s2;
+ logic s3;
+ logic s4;
+ logic s5;
+ logic s6;
+ logic s7;
+ logic s8;
+ logic s9;
+ logic s10;
+ logic s11;
+ logic s12;
+ logic s13;
+ logic s14;
+
+ lzc_64 lzc_64_comp_0 (
+ .a(a[63:0]),
+ .c(z0),
+ .v(v0)
+ );
+
+ lzc_64 lzc_64_comp_1 (
+ .a(a[127:64]),
+ .c(z1),
+ .v(v1)
+ );
+
+ assign s0 = v1 | v0;
+ assign s1 = (~v1) & z0[0];
+ assign s2 = z1[0] | s1;
+ assign s3 = (~v1) & z0[1];
+ assign s4 = z1[1] | s3;
+ assign s5 = (~v1) & z0[2];
+ assign s6 = z1[2] | s5;
+ assign s7 = (~v1) & z0[3];
+ assign s8 = z1[3] | s7;
+ assign s9 = (~v1) & z0[4];
+ assign s10 = z1[4] | s9;
+ assign s11 = (~v1) & z0[5];
+ assign s12 = z1[5] | s11;
+
+ assign v = s0;
+ assign c[0] = s2;
+ assign c[1] = s4;
+ assign c[2] = s6;
+ assign c[3] = s8;
+ assign c[4] = s10;
+ assign c[5] = s12;
+ assign c[6] = v1;
+
+endmodule
diff --git a/rtl/fpu/lzc/lzc_16.sv b/rtl/fpu/lzc/lzc_16.sv
new file mode 100644
index 0000000..9992878
--- /dev/null
+++ b/rtl/fpu/lzc/lzc_16.sv
@@ -0,0 +1,49 @@
+module lzc_16 (
+ input [15:0] a,
+ output [3:0] c,
+ output v
+);
+
+ logic [2:0] z0;
+ logic [2:0] z1;
+
+ logic v0;
+ logic v1;
+
+ logic s0;
+ logic s1;
+ logic s2;
+ logic s3;
+ logic s4;
+ logic s5;
+ logic s6;
+ logic s7;
+ logic s8;
+
+ lzc_8 lzc_8_comp_0 (
+ .a(a[7:0]),
+ .c(z0),
+ .v(v0)
+ );
+
+ lzc_8 lzc_8_comp_1 (
+ .a(a[15:8]),
+ .c(z1),
+ .v(v1)
+ );
+
+ assign s0 = v1 | v0;
+ assign s1 = (~v1) & z0[0];
+ assign s2 = z1[0] | s1;
+ assign s3 = (~v1) & z0[1];
+ assign s4 = z1[1] | s3;
+ assign s5 = (~v1) & z0[2];
+ assign s6 = z1[2] | s5;
+
+ assign v = s0;
+ assign c[0] = s2;
+ assign c[1] = s4;
+ assign c[2] = s6;
+ assign c[3] = v1;
+
+endmodule
diff --git a/rtl/fpu/lzc/lzc_256.sv b/rtl/fpu/lzc/lzc_256.sv
new file mode 100644
index 0000000..cb4ded6
--- /dev/null
+++ b/rtl/fpu/lzc/lzc_256.sv
@@ -0,0 +1,69 @@
+module lzc_256 (
+ input [255:0] a,
+ output [7:0] c,
+ output v
+);
+
+ logic [6:0] z0;
+ logic [6:0] z1;
+
+ logic v0;
+ logic v1;
+
+ logic s0;
+ logic s1;
+ logic s2;
+ logic s3;
+ logic s4;
+ logic s5;
+ logic s6;
+ logic s7;
+ logic s8;
+ logic s9;
+ logic s10;
+ logic s11;
+ logic s12;
+ logic s13;
+ logic s14;
+ logic s15;
+ logic s16;
+
+ lzc_128 lzc_128_comp_0 (
+ .a(a[127:0]),
+ .c(z0),
+ .v(v0)
+ );
+
+ lzc_128 lzc_128_comp_1 (
+ .a(a[255:128]),
+ .c(z1),
+ .v(v1)
+ );
+
+ assign s0 = v1 | v0;
+ assign s1 = (~v1) & z0[0];
+ assign s2 = z1[0] | s1;
+ assign s3 = (~v1) & z0[1];
+ assign s4 = z1[1] | s3;
+ assign s5 = (~v1) & z0[2];
+ assign s6 = z1[2] | s5;
+ assign s7 = (~v1) & z0[3];
+ assign s8 = z1[3] | s7;
+ assign s9 = (~v1) & z0[4];
+ assign s10 = z1[4] | s9;
+ assign s11 = (~v1) & z0[5];
+ assign s12 = z1[5] | s11;
+ assign s13 = (~v1) & z0[6];
+ assign s14 = z1[6] | s13;
+
+ assign v = s0;
+ assign c[0] = s2;
+ assign c[1] = s4;
+ assign c[2] = s6;
+ assign c[3] = s8;
+ assign c[4] = s10;
+ assign c[5] = s12;
+ assign c[6] = s14;
+ assign c[7] = v1;
+
+endmodule
diff --git a/rtl/fpu/lzc/lzc_32.sv b/rtl/fpu/lzc/lzc_32.sv
new file mode 100644
index 0000000..5ce1fa9
--- /dev/null
+++ b/rtl/fpu/lzc/lzc_32.sv
@@ -0,0 +1,54 @@
+module lzc_32 (
+ input [31:0] a,
+ output [4:0] c,
+ output v
+);
+
+ logic [3:0] z0;
+ logic [3:0] z1;
+
+ logic v0;
+ logic v1;
+
+ logic s0;
+ logic s1;
+ logic s2;
+ logic s3;
+ logic s4;
+ logic s5;
+ logic s6;
+ logic s7;
+ logic s8;
+ logic s9;
+ logic s10;
+
+ lzc_16 lzc_16_comp_0 (
+ .a(a[15:0]),
+ .c(z0),
+ .v(v0)
+ );
+
+ lzc_16 lzc_16_comp_1 (
+ .a(a[31:16]),
+ .c(z1),
+ .v(v1)
+ );
+
+ assign s0 = v1 | v0;
+ assign s1 = (~v1) & z0[0];
+ assign s2 = z1[0] | s1;
+ assign s3 = (~v1) & z0[1];
+ assign s4 = z1[1] | s3;
+ assign s5 = (~v1) & z0[2];
+ assign s6 = z1[2] | s5;
+ assign s7 = (~v1) & z0[3];
+ assign s8 = z1[3] | s7;
+
+ assign v = s0;
+ assign c[0] = s2;
+ assign c[1] = s4;
+ assign c[2] = s6;
+ assign c[3] = s8;
+ assign c[4] = v1;
+
+endmodule
diff --git a/rtl/fpu/lzc/lzc_4.sv b/rtl/fpu/lzc/lzc_4.sv
new file mode 100644
index 0000000..0d5182c
--- /dev/null
+++ b/rtl/fpu/lzc/lzc_4.sv
@@ -0,0 +1,33 @@
+module lzc_4 (
+ input [3:0] a,
+ output [1:0] c,
+ output v
+);
+
+ logic a0;
+ logic a1;
+ logic a2;
+ logic a3;
+
+ logic s0;
+ logic s1;
+ logic s2;
+ logic s3;
+ logic s4;
+
+ assign a0 = a[0];
+ assign a1 = a[1];
+ assign a2 = a[2];
+ assign a3 = a[3];
+
+ assign s0 = a3 | a2;
+ assign s1 = a1 | a0;
+ assign s2 = s1 | s0;
+ assign s3 = (~s0) & a1;
+ assign s4 = a3 | s3;
+
+ assign v = s2;
+ assign c[0] = s4;
+ assign c[1] = s0;
+
+endmodule
diff --git a/rtl/fpu/lzc/lzc_64.sv b/rtl/fpu/lzc/lzc_64.sv
new file mode 100644
index 0000000..b235add
--- /dev/null
+++ b/rtl/fpu/lzc/lzc_64.sv
@@ -0,0 +1,59 @@
+module lzc_64 (
+ input [63:0] a,
+ output [5:0] c,
+ output v
+);
+
+ logic [4:0] z0;
+ logic [4:0] z1;
+
+ logic v0;
+ logic v1;
+
+ logic s0;
+ logic s1;
+ logic s2;
+ logic s3;
+ logic s4;
+ logic s5;
+ logic s6;
+ logic s7;
+ logic s8;
+ logic s9;
+ logic s10;
+ logic s11;
+ logic s12;
+
+ lzc_32 lzc_32_comp_0 (
+ .a(a[31:0]),
+ .c(z0),
+ .v(v0)
+ );
+
+ lzc_32 lzc_32_comp_1 (
+ .a(a[63:32]),
+ .c(z1),
+ .v(v1)
+ );
+
+ assign s0 = v1 | v0;
+ assign s1 = (~v1) & z0[0];
+ assign s2 = z1[0] | s1;
+ assign s3 = (~v1) & z0[1];
+ assign s4 = z1[1] | s3;
+ assign s5 = (~v1) & z0[2];
+ assign s6 = z1[2] | s5;
+ assign s7 = (~v1) & z0[3];
+ assign s8 = z1[3] | s7;
+ assign s9 = (~v1) & z0[4];
+ assign s10 = z1[4] | s9;
+
+ assign v = s0;
+ assign c[0] = s2;
+ assign c[1] = s4;
+ assign c[2] = s6;
+ assign c[3] = s8;
+ assign c[4] = s10;
+ assign c[5] = v1;
+
+endmodule
diff --git a/rtl/fpu/lzc/lzc_8.sv b/rtl/fpu/lzc/lzc_8.sv
new file mode 100644
index 0000000..562b79f
--- /dev/null
+++ b/rtl/fpu/lzc/lzc_8.sv
@@ -0,0 +1,44 @@
+module lzc_8 (
+ input [7:0] a,
+ output [2:0] c,
+ output v
+);
+
+ logic [1:0] z0;
+ logic [1:0] z1;
+
+ logic v0;
+ logic v1;
+
+ logic s0;
+ logic s1;
+ logic s2;
+ logic s3;
+ logic s4;
+ logic s5;
+ logic s6;
+
+ lzc_4 lzc_4_comp_0 (
+ .a(a[3:0]),
+ .c(z0),
+ .v(v0)
+ );
+
+ lzc_4 lzc_4_comp_1 (
+ .a(a[7:4]),
+ .c(z1),
+ .v(v1)
+ );
+
+ assign s0 = v1 | v0;
+ assign s1 = (~v1) & z0[0];
+ assign s2 = z1[0] | s1;
+ assign s3 = (~v1) & z0[1];
+ assign s4 = z1[1] | s3;
+
+ assign v = s0;
+ assign c[0] = s2;
+ assign c[1] = s4;
+ assign c[2] = v1;
+
+endmodule
diff --git a/rtl/fpu/lzc/lzc_wire.sv b/rtl/fpu/lzc/lzc_wire.sv
new file mode 100644
index 0000000..6277bfe
--- /dev/null
+++ b/rtl/fpu/lzc/lzc_wire.sv
@@ -0,0 +1,17 @@
+package lzc_wire;
+
+ typedef struct packed {logic [63:0] a;} lzc_64_in_type;
+
+ typedef struct packed {
+ logic [5:0] c;
+ logic v;
+ } lzc_64_out_type;
+
+ typedef struct packed {logic [255:0] a;} lzc_256_in_type;
+
+ typedef struct packed {
+ logic [7:0] c;
+ logic v;
+ } lzc_256_out_type;
+
+endpackage
diff --git a/rtl/fpu/mod.mk b/rtl/fpu/mod.mk
new file mode 100644
index 0000000..606c32c
--- /dev/null
+++ b/rtl/fpu/mod.mk
@@ -0,0 +1,15 @@
+cores := fp_unit fp_lzc
+
+define core/fp_lzc
+ $(this)/rtl_dirs := lzc
+ $(this)/rtl_files := lzc/lzc_wire.sv
+endef
+
+define core/fp_unit
+ $(this)/deps := fp_lzc
+
+ $(this)/vl_main := empty.cpp
+ $(this)/rtl_top := fp_unit
+ $(this)/rtl_dirs := float
+ $(this)/rtl_files := float/fp_wire.sv float/fp_unit.sv
+endef