From de9994dfb10d435725537a48fadde81fc38a6fc4 Mon Sep 17 00:00:00 2001 From: Alejandro Soto Date: Mon, 4 Mar 2024 17:52:52 -0600 Subject: platform/wavelet3d: implement fadd --- platform/wavelet3d/gfx_clz.sv | 68 +++++++++++++++++ platform/wavelet3d/gfx_fadd_lane.sv | 141 +++++++++++++++++++++++++++++++++++ platform/wavelet3d/gfx_float_lane.sv | 2 +- platform/wavelet3d/gfx_pkg.sv | 4 + 4 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 platform/wavelet3d/gfx_clz.sv create mode 100644 platform/wavelet3d/gfx_fadd_lane.sv diff --git a/platform/wavelet3d/gfx_clz.sv b/platform/wavelet3d/gfx_clz.sv new file mode 100644 index 0000000..8d6f100 --- /dev/null +++ b/platform/wavelet3d/gfx_clz.sv @@ -0,0 +1,68 @@ +/* Implementación en árbol de count leading zeros (CLZ). + * WIDTH debe ser una potencia de 2. + */ +module gfx_clz +#(int WIDTH = 0) +( + input logic clk, + + input logic[WIDTH - 1:0] value, + output logic[$clog2(WIDTH):0] clz +); + + genvar i; + generate + if (WIDTH <= 1) begin + always_ff @(posedge clk) + clz <= !value; + end else if (WIDTH == 2) begin + always_ff @(posedge clk) + unique case (value) + 2'b00: clz <= 2'b10; + 2'b01: clz <= 2'b01; + 2'b10: clz <= 2'b00; + 2'b11: clz <= 2'b00; + endcase + end else if (WIDTH == 4) begin + // Eficiente en FPGAs con 4-LUTs + always_ff @(posedge clk) + if (value[3]) + clz <= 3'b000; + else if (value[2]) + clz <= 3'b001; + else if (value[1]) + clz <= 3'b010; + else if (value[0]) + clz <= 3'b011; + else + clz <= 3'b100; + end else begin + logic msb_right; + logic[$clog2(WIDTH) - 1:0] clz_left, clz_right; + logic[$clog2(WIDTH) - 2:0] tail_right; + + assign {msb_right, tail_right} = clz_right; + + gfx_clz #(WIDTH / 2) left + ( + .clk(clk), + .clz(clz_left), + .value(value[WIDTH - 1:WIDTH / 2]) + ); + + gfx_clz #(WIDTH / 2) right + ( + .clk(clk), + .clz(clz_right), + .value(value[WIDTH / 2 - 1:0]) + ); + + always_ff @(posedge clk) + if (clz_left[$clog2(WIDTH) - 1]) + clz <= {msb_right, ~msb_right, tail_right}; + else + clz <= {1'b0, clz_left}; + end + endgenerate + +endmodule diff --git a/platform/wavelet3d/gfx_fadd_lane.sv b/platform/wavelet3d/gfx_fadd_lane.sv new file mode 100644 index 0000000..8eb0c7a --- /dev/null +++ b/platform/wavelet3d/gfx_fadd_lane.sv @@ -0,0 +1,141 @@ +module gfx_fadd_lane +( + input logic clk, + + input gfx::float_special a, + b, + input logic slow_in, + + output gfx::float_round q +); + + import gfx::*; + + // Queremos calcular q = a + b. Curiosamente, eso es más complicado que a * b. + + typedef logic[$bits(float_mant_full) + 1:0] extended; + localparam bit[$clog2($bits(extended)):0] MAX_SHIFT = 1 << $clog2($bits(extended)); + + localparam int SHIFT_WIDTH = {{($bits(int) - $bits(MAX_SHIFT)){1'b0}}, MAX_SHIFT}; + localparam int CLZ_EXTEND_BITS = $bits(float_exp) - $bits(clz_shift) + 1; + + logic overflow, slow_0, slow_1, slow_2, slow_3, sticky, sticky_last; + extended shifted_min, sticky_mask, max_mant; + float_exp exp_delta; + float_round out; + float_special max_0, max_1, max_2, max_3, min_0, min_1, min_2, min_3; + logic[$clog2(MAX_SHIFT):0] clz_shift, exp_shift; + logic[$bits(float_mant_full) + 2:0] add_sub, normalized; + + struct packed + { + float_special max, + min; + logic slow, + sticky; + logic[$bits(add_sub) - 1:0] add_sub; + } clz_hold[FADD_CLZ_STAGES], clz_hold_out; + + gfx_clz #(SHIFT_WIDTH) clz + ( + .clk(clk), + .clz(clz_shift), + .value({add_sub, {(SHIFT_WIDTH - $bits(add_sub)){1'b0}}}) + ); + + function extended extend_min(float_special in); + extend_min = {~in.exp_min, in.val.mant, 2'b00}; + endfunction + + assign max_mant = {~max_2.exp_min, max_2.val.mant, 2'b00}; + assign exp_delta = max_0.val.exp - min_0.val.exp; + assign normalized = add_sub << clz_shift; + assign clz_hold_out = clz_hold[FADD_CLZ_STAGES - 1]; + + always_comb begin + q = out; + q.slow = out.slow || overflow; + q.sticky = out.sticky || sticky_last; + end + + always_ff @(posedge clk) begin + /* Stage 0: ordenar tal que abs(max) >= abs(min). Wiki dice: + * + * A property of the single- and double-precision formats is that + * their encoding allows one to easily sort them without using + * floating-point hardware, as if the bits represented sign-magnitude + * integers, although it is unclear whether this was a design + * consideration (it seems noteworthy that the earlier IBM hexadecimal + * floating-point representation also had this property for normalized + * numbers). + */ + if ({b.val.exp, b.val.mant} > {a.val.exp, a.val.mant}) begin + min_0 <= a; + max_0 <= b; + end else begin + min_0 <= b; + max_0 <= a; + end + + slow_0 <= slow_in; + + // Stage 1: exp_shift amount + + max_1 <= max_0; + min_1 <= min_0; + slow_1 <= slow_0; + + exp_shift <= exp_delta[$bits(exp_shift) - 1:0]; + if (exp_delta > {{($bits(exp_delta) - $bits(MAX_SHIFT)){1'b0}}, MAX_SHIFT}) + exp_shift <= MAX_SHIFT; + + // Stage 2: shifts + + max_2 <= max_1; + min_2 <= min_1; + slow_2 <= slow_1; + + shifted_min <= extend_min(min_1) >> exp_shift; + sticky_mask <= {($bits(shifted_min)){1'b1}} << exp_shift; + + // Stage 3: suma/resta y sticky + + max_3 <= max_2; + min_3 <= min_2; + slow_3 <= slow_2; + + sticky <= |(extend_min(min_2) & ~sticky_mask); + if (max_2.val.sign ^ min_2.val.sign) + add_sub <= {1'b0, max_mant - shifted_min}; + else + add_sub <= {1'b0, max_mant} + {1'b0, shifted_min}; + + // Stages 4-7: clz + + clz_hold[0].max <= max_3; + clz_hold[0].min <= min_3; + clz_hold[0].slow <= slow_3; + clz_hold[0].sticky <= sticky; + clz_hold[0].add_sub <= add_sub; + + for (int i = 1; i < FADD_CLZ_STAGES; ++i) + clz_hold[i] <= clz_hold[i - 1]; + + // Stage 8: normalización + + out.slow <= clz_hold_out.slow; + out.sticky <= clz_hold_out.sticky; + out.normal.sign <= clz_hold_out.max.val.sign; + + {out.normal.mant, out.guard, out.round, sticky_last} <= + normalized[$bits(normalized) - 2:$bits(normalized) - $bits(out.normal.mant) - 4]; + + if (clz_shift[$bits(clz_shift) - 1]) begin + overflow <= 0; + out.normal.exp <= 0; + end else + {overflow, out.normal.exp} <= + {1'b0, clz_hold_out.max.val.exp} - {{CLZ_EXTEND_BITS{1'b0}}, clz_shift} + 1; + end + +endmodule diff --git a/platform/wavelet3d/gfx_float_lane.sv b/platform/wavelet3d/gfx_float_lane.sv index 4d214f6..f7b3ba1 100644 --- a/platform/wavelet3d/gfx_float_lane.sv +++ b/platform/wavelet3d/gfx_float_lane.sv @@ -25,7 +25,7 @@ module gfx_float_lane is_special = in.exp_max | (in.exp_min & ~in.mant_zero); endfunction - gfx_fmul_lane fmul + gfx_fadd_lane fmul ( .clk(clk), .a(a_special), diff --git a/platform/wavelet3d/gfx_pkg.sv b/platform/wavelet3d/gfx_pkg.sv index 27c1117..e108d7d 100644 --- a/platform/wavelet3d/gfx_pkg.sv +++ b/platform/wavelet3d/gfx_pkg.sv @@ -46,4 +46,8 @@ package gfx; mant_zero; } float_special; + /* -> 4,4,4,4,4,4,4,4 -> 8,8,8,8 -> 16,16 -> 32 + */ + localparam FADD_CLZ_STAGES = 4; + endpackage -- cgit v1.2.3