diff options
Diffstat (limited to 'platform/wavelet3d')
| -rw-r--r-- | platform/wavelet3d/gfx_fadd_lane.sv | 141 | ||||
| -rw-r--r-- | platform/wavelet3d/gfx_float_lane.sv | 53 | ||||
| -rw-r--r-- | platform/wavelet3d/gfx_fmul_lane.sv | 82 | ||||
| -rw-r--r-- | platform/wavelet3d/gfx_fpint_lane.sv | 379 | ||||
| -rw-r--r-- | platform/wavelet3d/gfx_pkg.sv | 24 | ||||
| -rw-r--r-- | platform/wavelet3d/gfx_round_lane.sv | 62 | ||||
| -rw-r--r-- | platform/wavelet3d/main.cpp | 50 | ||||
| -rw-r--r-- | platform/wavelet3d/mod.mk | 7 |
8 files changed, 452 insertions, 346 deletions
diff --git a/platform/wavelet3d/gfx_fadd_lane.sv b/platform/wavelet3d/gfx_fadd_lane.sv deleted file mode 100644 index 8eb0c7a..0000000 --- a/platform/wavelet3d/gfx_fadd_lane.sv +++ /dev/null @@ -1,141 +0,0 @@ -module gfx_fadd_lane -( - input logic clk, - - input gfx::float_special a, - b, - input logic slow_in, - - output gfx::float_round q -); - - import gfx::*; - - // Queremos calcular q = a + b. Curiosamente, eso es más complicado que a * b. - - typedef logic[$bits(float_mant_full) + 1:0] extended; - localparam bit[$clog2($bits(extended)):0] MAX_SHIFT = 1 << $clog2($bits(extended)); - - localparam int SHIFT_WIDTH = {{($bits(int) - $bits(MAX_SHIFT)){1'b0}}, MAX_SHIFT}; - localparam int CLZ_EXTEND_BITS = $bits(float_exp) - $bits(clz_shift) + 1; - - logic overflow, slow_0, slow_1, slow_2, slow_3, sticky, sticky_last; - extended shifted_min, sticky_mask, max_mant; - float_exp exp_delta; - float_round out; - float_special max_0, max_1, max_2, max_3, min_0, min_1, min_2, min_3; - logic[$clog2(MAX_SHIFT):0] clz_shift, exp_shift; - logic[$bits(float_mant_full) + 2:0] add_sub, normalized; - - struct packed - { - float_special max, - min; - logic slow, - sticky; - logic[$bits(add_sub) - 1:0] add_sub; - } clz_hold[FADD_CLZ_STAGES], clz_hold_out; - - gfx_clz #(SHIFT_WIDTH) clz - ( - .clk(clk), - .clz(clz_shift), - .value({add_sub, {(SHIFT_WIDTH - $bits(add_sub)){1'b0}}}) - ); - - function extended extend_min(float_special in); - extend_min = {~in.exp_min, in.val.mant, 2'b00}; - endfunction - - assign max_mant = {~max_2.exp_min, max_2.val.mant, 2'b00}; - assign exp_delta = max_0.val.exp - min_0.val.exp; - assign normalized = add_sub << clz_shift; - assign clz_hold_out = clz_hold[FADD_CLZ_STAGES - 1]; - - always_comb begin - q = out; - q.slow = out.slow || overflow; - q.sticky = out.sticky || sticky_last; - end - - always_ff @(posedge clk) begin - /* Stage 0: ordenar tal que abs(max) >= abs(min). Wiki dice: - * - * A property of the single- and double-precision formats is that - * their encoding allows one to easily sort them without using - * floating-point hardware, as if the bits represented sign-magnitude - * integers, although it is unclear whether this was a design - * consideration (it seems noteworthy that the earlier IBM hexadecimal - * floating-point representation also had this property for normalized - * numbers). - */ - if ({b.val.exp, b.val.mant} > {a.val.exp, a.val.mant}) begin - min_0 <= a; - max_0 <= b; - end else begin - min_0 <= b; - max_0 <= a; - end - - slow_0 <= slow_in; - - // Stage 1: exp_shift amount - - max_1 <= max_0; - min_1 <= min_0; - slow_1 <= slow_0; - - exp_shift <= exp_delta[$bits(exp_shift) - 1:0]; - if (exp_delta > {{($bits(exp_delta) - $bits(MAX_SHIFT)){1'b0}}, MAX_SHIFT}) - exp_shift <= MAX_SHIFT; - - // Stage 2: shifts - - max_2 <= max_1; - min_2 <= min_1; - slow_2 <= slow_1; - - shifted_min <= extend_min(min_1) >> exp_shift; - sticky_mask <= {($bits(shifted_min)){1'b1}} << exp_shift; - - // Stage 3: suma/resta y sticky - - max_3 <= max_2; - min_3 <= min_2; - slow_3 <= slow_2; - - sticky <= |(extend_min(min_2) & ~sticky_mask); - if (max_2.val.sign ^ min_2.val.sign) - add_sub <= {1'b0, max_mant - shifted_min}; - else - add_sub <= {1'b0, max_mant} + {1'b0, shifted_min}; - - // Stages 4-7: clz - - clz_hold[0].max <= max_3; - clz_hold[0].min <= min_3; - clz_hold[0].slow <= slow_3; - clz_hold[0].sticky <= sticky; - clz_hold[0].add_sub <= add_sub; - - for (int i = 1; i < FADD_CLZ_STAGES; ++i) - clz_hold[i] <= clz_hold[i - 1]; - - // Stage 8: normalización - - out.slow <= clz_hold_out.slow; - out.sticky <= clz_hold_out.sticky; - out.normal.sign <= clz_hold_out.max.val.sign; - - {out.normal.mant, out.guard, out.round, sticky_last} <= - normalized[$bits(normalized) - 2:$bits(normalized) - $bits(out.normal.mant) - 4]; - - if (clz_shift[$bits(clz_shift) - 1]) begin - overflow <= 0; - out.normal.exp <= 0; - end else - {overflow, out.normal.exp} <= - {1'b0, clz_hold_out.max.val.exp} - {{CLZ_EXTEND_BITS{1'b0}}, clz_shift} + 1; - end - -endmodule diff --git a/platform/wavelet3d/gfx_float_lane.sv b/platform/wavelet3d/gfx_float_lane.sv deleted file mode 100644 index f7b3ba1..0000000 --- a/platform/wavelet3d/gfx_float_lane.sv +++ /dev/null @@ -1,53 +0,0 @@ -module gfx_float_lane -( - input logic clk, - - input gfx::float a, - b, - - output gfx::float q -); - - import gfx::*; - - logic slow_fmul; - float_round q_fmul; - float_special a_special, b_special; - - function float_special front_flags(float in); - front_flags.val = in; - front_flags.exp_max = &in.exp; - front_flags.exp_min = ~|in.exp; - front_flags.mant_zero = ~|in.mant; - endfunction - - function logic is_special(float_special in); - is_special = in.exp_max | (in.exp_min & ~in.mant_zero); - endfunction - - gfx_fadd_lane fmul - ( - .clk(clk), - .a(a_special), - .b(b_special), - .q(q_fmul), - .slow_in(slow_fmul) - ); - - gfx_round_lane round - ( - .clk(clk), - .in(q_fmul), - .out(q) - ); - - always_comb begin - slow_fmul = is_special(a_special) | is_special(b_special); - end - - always_ff @(posedge clk) begin - a_special <= front_flags(a); - b_special <= front_flags(b); - end - -endmodule diff --git a/platform/wavelet3d/gfx_fmul_lane.sv b/platform/wavelet3d/gfx_fmul_lane.sv deleted file mode 100644 index 17a988e..0000000 --- a/platform/wavelet3d/gfx_fmul_lane.sv +++ /dev/null @@ -1,82 +0,0 @@ -module gfx_fmul_lane -( - input logic clk, - - input gfx::float_special a, - b, - input logic slow_in, - - output gfx::float_round q -); - - import gfx::*; - - /* Queremos calcular q = a * b. - * - * Donde a = (-1)^s * 1.m * 2^f, - * b = (-1)^t * 1.n * 2^g - * - * Entonces q = (-1)^(s + t) (1.m * 1.n) 2^(f + g) - * - * El producto es entre números >= 1.0 y < 2.0. En el peor caso: - * Mejor caso: 1.000... * 1.000... ~ 1.000... - * Peor caso: 1.999... * 1.999... ~ 3.999... = 2^1 * 1.999 - * - * Así que, si el producto es >= 2, hay que hacerle >> 1 a la mantisa - * y sumarle 1 al exponente para normalizar. - */ - - logic guard, lo_msb, lo_reduce, overflow_0, overflow_1, - round, sign, slow_0, slow_1, zero; - - float_exp exp; - float_round out; - float_mant_full hi; - logic[$bits(float_mant_full) - 3:0] lo; - - assign lo_msb = lo[$bits(lo) - 1]; - assign lo_reduce = |lo[$bits(lo) - 2:0]; - - always_comb begin - q = out; - q.slow = slow_1 | overflow_1; - end - - always_ff @(posedge clk) begin - // Stage 0: producto - - sign <= a.val.sign ^ b.val.sign; - zero <= a.exp_min | b.exp_min; - slow_0 <= slow_in; - - {overflow_0, exp} <= {1'b0, a.val.exp} + {1'b0, b.val.exp} - {1'b0, FLOAT_EXP_BIAS}; - {hi, guard, round, lo} <= full_mant(a.val.mant) * full_mant(b.val.mant); - - // Stage 1: normalización - - slow_1 <= slow_0 | overflow_0; - overflow_1 <= 0; - - out.slow <= 1'bx; // Ver 'q' - out.zero <= zero; - out.normal.sign <= sign; - - if (hi[$bits(hi) - 1]) begin - out.guard <= guard; - out.round <= round; - out.sticky <= lo_msb | lo_reduce; - out.normal.mant <= implicit_mant(hi); - {overflow_1, out.normal.exp} <= {1'b0, exp} + 1; - end else begin - /* Bit antes de msb es necesariamente 1, ya que los msb de - * ambos multiplicandos son 1. Ver assert en implicit_mant(). - */ - out.guard <= round; - out.round <= lo[$bits(lo) - 1]; - out.sticky <= lo_reduce; - out.normal.exp <= exp; - out.normal.mant <= implicit_mant({hi[$bits(hi) - 2:0], guard}); - end - end - -endmodule diff --git a/platform/wavelet3d/gfx_fpint_lane.sv b/platform/wavelet3d/gfx_fpint_lane.sv new file mode 100644 index 0000000..0010f06 --- /dev/null +++ b/platform/wavelet3d/gfx_fpint_lane.sv @@ -0,0 +1,379 @@ +module gfx_fpint_lane +( + input logic clk, + + input gfx::float a, + b, + + input logic mul_float_m1, + unit_b_m1, + float_a_1, + int_hi_a_1, + int_lo_a_1, + zero_flags_a_1, + zero_b_1, + copy_flags_2, + copy_flags_5, + enable_norm_6, + copy_flags_10, + copy_flags_11, + enable_round_11, + encode_special_13, + + output gfx::float q +); + + import gfx::*; + + /* Notas de implementación para floating-point. + * + * === PRODUCTO === + * Queremos calcular q = a * b. + * + * Donde a = (-1)^s * 1.m * 2^f, + * b = (-1)^t * 1.n * 2^g + * + * Entonces q = (-1)^(s + t) (1.m * 1.n) 2^(f + g) + * + * El producto es entre números >= 1.0 y < 2.0. En el peor caso: + * Mejor caso: 1.000... * 1.000... ~ 1.000... + * Peor caso: 1.999... * 1.999... ~ 3.999... = 2^1 * 1.999 + * + * Así que, si el producto es >= 2, hay que hacerle >> 1 a la mantisa + * y sumarle 1 al exponente para normalizar. + * + * + * === SUMA/RESTA === + * + * Queremos calcular q = a + b. Curiosamente, eso es más complicado que a * b. + * Hay que ajustar el exponente del menor entre a y b para que coincida + * con el del mayor (desnormalizando), realizar la operación y finalmente + * renormalizar. Se hace suma o resta dependiendo de relaciones de signos, + * no según la operación de entrada (eso último solo le hace xor al signo de b). + * Recordar aquí que IEEE 754 es una especie de signo-magnitud y no complemento. + * + * En el caso de una resta, el exponente normalizado puede ser mucho más + * pequeño que cualquiera de los exponentes de entrada. Necesitamos + * entonces de lǵoica CLZ (count leading zeros) para renormalizar. + */ + + logic exp_step, guard_0, guard_1, guard_2, guard_3, guard_4, guard_5, guard_10, + lo_msb, lo_reduce, overflow_0, overflow_1, overflow_10, overflow_12, + round_0, round_1, round_2, round_3, round_4, round_5, round_10, sign_0, + sign_10, sign_11, sign_12, slow_1, slow_2, slow_3, slow_4, slow_5, slow_10, + slow_11, slow_12, slow_in_1, slow_in_next, slow_out, sticky_1, sticky_2, + sticky_3, sticky_4, sticky_5, sticky_10, sticky_last, zero_1, zero_2, zero_3, + zero_4, zero_5, zero_10, zero_11, zero_12; + + float a_add, a_m1, a_mul, b_add, b_0, b_m1, b_mul, + max_2, max_3, max_4, max_5, min_2, min_3, min_4; + + float_class a_class_0, a_class_1, b_class_0, b_class_1, + max_class_2, max_class_3, min_class_2, min_class_3, min_class_4; + + word clz_in, product_hi, product_lo; + dword product; + float_exp exp, exp_11, exp_10, exp_12, exp_delta; + float_mant mant_10, mant_11, mant_12; + float_mant_full hi; + logic[$bits(float_mant_full) - 3:0] lo; + + typedef logic[$bits(float_mant_full) + 1:0] extended_mant; + localparam bit[$clog2($bits(extended_mant)):0] MAX_SHIFT = 1 << $clog2($bits(extended_mant)); + + localparam int SHIFT_WIDTH = {{($bits(int) - $bits(MAX_SHIFT)){1'b0}}, MAX_SHIFT}; + localparam int CLZ_EXTEND_BITS = $bits(float_exp) - $bits(clz_shift) + 1; + + typedef logic[$bits(float_mant_full) + 2:0] mant_sum; + + mant_sum add_sub, normalized; + extended_mant max_mant, min_mant, sticky_mask; + logic[$clog2(MAX_SHIFT):0] clz_shift, exp_shift; + + struct packed + { + float max; + logic guard, + round, + slow, + sticky, + zero; + mant_sum add_sub; + } clz_hold[FADD_CLZ_STAGES], clz_hold_out; + + gfx_clz #($bits(word)) clz + ( + .clk(clk), + .clz(clz_shift), + .value(clz_in) + ); + + function extended_mant extend_min_max(float in, float_class in_class); + extend_min_max = {~in_class.exp_min, in.mant, 2'b00}; + endfunction + + assign lo_msb = lo[$bits(lo) - 1]; + assign slow_out = &exp_12 || slow_12 || overflow_12; + assign exp_delta = max_2.exp - min_2.exp; + assign lo_reduce = |lo[$bits(lo) - 2:0]; + assign normalized = add_sub << clz_shift; + assign clz_hold_out = clz_hold[FADD_CLZ_STAGES - 1]; + assign slow_in_next = is_float_special(a_class_0) | is_float_special(b_class_0); + assign {product_hi, product_lo} = product; + assign {hi, guard_0, round_0, lo} = product[2 * $bits(float_mant_full) - 1:0]; + + always_comb begin + clz_in = {add_sub, {($bits(clz_in) - $bits(add_sub)){1'b0}}}; + if (~enable_norm_6) + clz_in[$bits(clz_in) - 1:$bits(clz_in) - 2] = 2'b01; + end + + always_ff @(posedge clk) begin + // Stage -1: + + a_m1 <= a; + b_m1 <= b; + a_mul <= a; + b_mul <= b; + + /* Nótese que el orden es sign-exp-mant. Esto coloca el 1. implícito + * en la posición correcta para multiplicar mantisas. + */ + if (mul_float_m1) begin + a_mul.exp <= 1; + b_mul.exp <= 1; + a_mul.sign <= 0; + b_mul.sign <= 0; + end + + // Genera un nop junto a lo anterior + if (unit_b_m1) begin + b_mul.exp <= 0; + b_mul.mant <= 1; + end + + // Stage 0: multiplicación de fp o enteros + + b_0 <= b_m1; + sign_0 <= a_m1.sign ^ b_m1.sign; + product <= a_mul * b_mul; + a_class_0 <= classify_float(a_m1); + b_class_0 <= classify_float(b_m1); + {overflow_0, exp} <= {1'b0, a_m1.exp} + {1'b0, b_m1.exp} - {1'b0, FLOAT_EXP_BIAS}; + + // Stage 1: normalización + + slow_in_1 <= slow_in_next; + overflow_1 <= 0; + + if (float_a_1) begin + slow_1 <= slow_in_next | (overflow_0 & ~a_class_0.exp_min & ~a_class_1.exp_min); + zero_1 <= a_class_0.exp_min | b_class_0.exp_min; + end else begin + slow_1 <= 0; + zero_1 <= 0; + end + + a_add.sign <= sign_0; + if (hi[$bits(hi) - 1]) begin + guard_1 <= guard_0; + round_1 <= round_0; + sticky_1 <= lo_msb | lo_reduce; + a_add.mant <= implicit_mant(hi); + {overflow_1, a_add.exp} <= {1'b0, exp} + 1; + end else begin + /* Bit antes de msb es necesariamente 1, ya que los msb de + * ambos multiplicandos son 1. Ver assert en implicit_mant(). + */ + guard_1 <= round_0; + round_1 <= lo[$bits(lo) - 1]; + sticky_1 <= lo_reduce; + a_add.exp <= exp; + a_add.mant <= implicit_mant({hi[$bits(hi) - 2:0], guard_0}); + end + + unique case (1'b1) + float_a_1: ; + + int_hi_a_1: + a_add <= product_hi; + + int_lo_a_1: + a_add <= product_lo; + endcase + + a_class_1 <= a_class_0; + if (zero_flags_a_1) + a_class_1 <= classify_float(0); + + if (zero_b_1) begin + b_add <= 0; + b_class_1 <= classify_float(0); + end else begin + b_add <= b_0; + b_class_1 <= b_class_0; + end + + /* Stage 2: ordenar tal que abs(max) >= abs(min). Wiki dice: + * + * A property of the single- and double-precision formats is that + * their encoding allows one to easily sort them without using + * floating-point hardware, as if the bits represented sign-magnitude + * integers, although it is unclear whether this was a design + * consideration (it seems noteworthy that the earlier IBM hexadecimal + * floating-point representation also had this property for normalized + * numbers). + */ + if ({b_add.exp, b_add.mant} > {a_add.exp, a_add.mant}) begin + max_2 <= b_add; + min_2 <= a_add; + max_class_2 <= b_class_1; + min_class_2 <= a_class_1; + end else begin + max_2 <= a_add; + min_2 <= b_add; + max_class_2 <= a_class_1; + min_class_2 <= b_class_1; + end + + guard_2 <= guard_1; + round_2 <= round_1; + sticky_2 <= sticky_1; + + if (copy_flags_2) begin + slow_2 <= slow_1 | overflow_1; + zero_2 <= zero_1; + end else begin + slow_2 <= slow_in_1; + zero_2 <= 0; + end + + // Stage 3: exp_shift amount + + max_3 <= max_2; + min_3 <= min_2; + slow_3 <= slow_2; + zero_3 <= zero_2; + guard_3 <= guard_2; + round_3 <= round_2; + sticky_3 <= sticky_2; + max_class_3 <= max_class_2; + min_class_3 <= min_class_2; + + exp_shift <= exp_delta[$bits(exp_shift) - 1:0]; + if (exp_delta > {{($bits(exp_delta) - $bits(MAX_SHIFT)){1'b0}}, MAX_SHIFT}) + exp_shift <= MAX_SHIFT; + + // Stage 4: shifts + + max_4 <= max_3; + min_4 <= min_3; + slow_4 <= slow_3; + zero_4 <= zero_3; + guard_4 <= guard_3; + round_4 <= round_3; + sticky_4 <= sticky_3; + min_class_4 <= min_class_3; + + max_mant <= extend_min_max(max_3, max_class_3); + min_mant <= extend_min_max(min_3, min_class_3) >> exp_shift; + sticky_mask <= {($bits(min_mant)){1'b1}} << exp_shift; + + // Stage 5: suma/resta y sticky + + max_5 <= max_4; + slow_5 <= slow_4; + zero_5 <= zero_4; + guard_5 <= guard_4; + round_5 <= round_4; + + if (copy_flags_5) + sticky_5 <= sticky_4; + else + sticky_5 <= |(extend_min_max(min_4, min_class_4) & ~sticky_mask); + + if (max_4.sign ^ min_4.sign) + add_sub <= {1'b0, max_mant - min_mant}; + else + add_sub <= {1'b0, max_mant} + {1'b0, min_mant}; + + // Stages 6-9: clz + + clz_hold[0].max <= max_5; + clz_hold[0].slow <= slow_5; + clz_hold[0].zero <= zero_5; + clz_hold[0].guard <= guard_5; + clz_hold[0].round <= round_5; + clz_hold[0].sticky <= sticky_5; + clz_hold[0].add_sub <= add_sub; + + for (int i = 1; i < FADD_CLZ_STAGES; ++i) + clz_hold[i] <= clz_hold[i - 1]; + + // Stage 10: normalización + + sign_10 <= clz_hold_out.max.sign; + slow_10 <= clz_hold_out.slow; + zero_10 <= clz_hold_out.zero; + sticky_10 <= clz_hold_out.sticky; + + {mant_10, guard_10, round_10, sticky_last} <= + normalized[$bits(normalized) - 2:$bits(normalized) - $bits(float_mant) - 4]; + + {overflow_10, exp_10} <= + {1'b0, clz_hold_out.max.exp} - {{CLZ_EXTEND_BITS{1'b0}}, clz_shift} + 1; + + if (clz_shift[$bits(clz_shift) - 1]) + zero_10 <= 1; + + if (copy_flags_10) begin + guard_10 <= clz_hold_out.guard; + round_10 <= clz_hold_out.round; + sticky_last <= 0; + overflow_10 <= 0; + end + + // Stage 11: redondeo + + exp_11 <= exp_10; + mant_11 <= mant_10; + sign_11 <= sign_10; + slow_11 <= slow_10 | (~copy_flags_11 & overflow_10 & ~zero_10); + zero_11 <= zero_10; + exp_step <= 0; + + // Este es el modo más común: round to nearest, ties to even + if (enable_round_11 & guard_10 & (round_10 | sticky_10 | sticky_last | mant_10[0])) + {exp_step, mant_11} <= {1'b0, mant_10} + 1; + + // Stage 12: ajuste de exponente por redondeo + + sign_12 <= sign_11; + slow_12 <= slow_11; + zero_12 <= zero_11; + mant_12 <= mant_11; + overflow_12 <= 0; + + if (exp_step) + {overflow_12, exp_12} <= {1'b0, exp_11} + 1; + else + exp_12 <= exp_11; + + // Stage 13: ceros y NaNs + + q.exp <= exp_12; + q.mant <= mant_12; + q.sign <= sign_12; + + if (encode_special_13) begin + if (slow_out) begin + q.exp <= FLOAT_EXP_MAX; + q.mant <= 1; + end else if (zero_12) begin + q.exp <= 0; + q.mant <= 0; + end + end + end + +endmodule diff --git a/platform/wavelet3d/gfx_pkg.sv b/platform/wavelet3d/gfx_pkg.sv index e108d7d..cfab6a5 100644 --- a/platform/wavelet3d/gfx_pkg.sv +++ b/platform/wavelet3d/gfx_pkg.sv @@ -1,9 +1,10 @@ package gfx; - typedef logic[31:0] float_word; + typedef logic[31:0] word; + typedef logic[63:0] dword; typedef logic[7:0] float_exp; - typedef logic[$bits(float_word) - $bits(float_exp) - 2:0] float_mant; + typedef logic[$bits(word) - $bits(float_exp) - 2:0] float_mant; typedef logic[$bits(float_mant):0] float_mant_full; // Incluye '1.' explícito localparam float_exp FLOAT_EXP_BIAS = (1 << ($bits(float_exp) - 1)) - 1; @@ -40,14 +41,29 @@ package gfx; typedef struct packed { - float val; logic exp_max, exp_min, mant_zero; - } float_special; + } float_class; + + function float_class classify_float(float in); + classify_float.exp_max = &in.exp; + classify_float.exp_min = ~|in.exp; + classify_float.mant_zero = ~|in.mant; + endfunction + + function logic is_float_special(float_class in); + is_float_special = in.exp_max | (in.exp_min & ~in.mant_zero); + endfunction /* -> 4,4,4,4,4,4,4,4 -> 8,8,8,8 -> 16,16 -> 32 */ localparam FADD_CLZ_STAGES = 4; + typedef struct packed + { + logic fadd, + fmul; + } arith_op; + endpackage diff --git a/platform/wavelet3d/gfx_round_lane.sv b/platform/wavelet3d/gfx_round_lane.sv deleted file mode 100644 index d0b0b03..0000000 --- a/platform/wavelet3d/gfx_round_lane.sv +++ /dev/null @@ -1,62 +0,0 @@ -module gfx_round_lane -( - input logic clk, - - input gfx::float_round in, - - output gfx::float out -); - - import gfx::*; - - logic exp_step, overflow, sign_0, sign_1, slow_0, slow_1, - slow_out, zero_0, zero_1; - - float_exp exp_0, exp_1; - float_mant mant_0, mant_1; - - assign slow_out = slow_1 || overflow || &exp_1; - - always_ff @(posedge clk) begin - // Stage 0: redondeo - - exp_0 <= in.normal.exp; - sign_0 <= in.normal.sign; - slow_0 <= in.slow; - zero_0 <= in.zero; - exp_step <= 0; - - // Este es el modo más común: round to nearest, ties to even - if (in.guard & (in.round | in.sticky | in.normal.mant[0])) - {exp_step, mant_0} <= {1'b0, in.normal.mant} + 1; - else - mant_0 <= in.normal.mant; - - sign_1 <= sign_0; - slow_1 <= slow_0; - zero_1 <= zero_0; - mant_1 <= mant_0; - overflow <= 0; - - if (exp_step) - {overflow, exp_1} <= {1'b0, exp_0} + 1; - else - exp_1 <= exp_0; - - // Stage 1: ceros y slow path - - out.sign <= sign_1; - - if (slow_out) begin - out.exp <= FLOAT_EXP_MAX; - out.mant <= 1; - end else if (zero_1) begin - out.exp <= 0; - out.mant <= 0; - end else begin - out.exp <= exp_1; - out.mant <= mant_1; - end - end - -endmodule diff --git a/platform/wavelet3d/main.cpp b/platform/wavelet3d/main.cpp index 1243dba..1bffb68 100644 --- a/platform/wavelet3d/main.cpp +++ b/platform/wavelet3d/main.cpp @@ -26,6 +26,54 @@ int main(int argc, char **argv) float a, b; std::cin >> a >> b; + // mul int + //top.mul_float_m1 = 0; + //top.unit_b_m1 = 0; + //top.float_a_1 = 0; + //top.int_hi_a_1 = 0; + //top.int_lo_a_1 = 1; + //top.zero_flags_a_1 = 1; + //top.zero_b_1 = 1; + //top.copy_flags_2 = 1; + //top.copy_flags_5 = 1; + //top.enable_norm_6 = 0; + //top.copy_flags_10 = 1; + //top.copy_flags_11 = 1; + //top.enable_round_11 = 0; + //top.encode_special_13 = 0; + + // mul fp + //top.mul_float_m1 = 1; + //top.unit_b_m1 = 0; + //top.float_a_1 = 1; + //top.int_hi_a_1 = 0; + //top.int_lo_a_1 = 0; + //top.zero_flags_a_1 = 0; + //top.zero_b_1 = 1; + //top.copy_flags_2 = 1; + //top.copy_flags_5 = 1; + //top.enable_norm_6 = 1; + //top.copy_flags_10 = 1; + //top.copy_flags_11 = 1; + //top.enable_round_11 = 1; + //top.encode_special_13 = 1; + + // suma/resta + top.mul_float_m1 = 0; + top.unit_b_m1 = 1; + top.float_a_1 = 0; + top.int_hi_a_1 = 0; + top.int_lo_a_1 = 1; + top.zero_flags_a_1 = 0; + top.zero_b_1 = 0; + top.copy_flags_2 = 0; + top.copy_flags_5 = 0; + top.enable_norm_6 = 1; + top.copy_flags_10 = 0; + top.copy_flags_11 = 0; + top.enable_round_11 = 1; + top.encode_special_13 = 1; + top.a = *reinterpret_cast<unsigned*>(&a); top.b = *reinterpret_cast<unsigned*>(&b); @@ -38,7 +86,7 @@ int main(int argc, char **argv) } unsigned q = top.q; - std::cout << a << " * " << b << " = " << *reinterpret_cast<float*>(&q) << '\n'; + std::cout << a << " * " << b << " = " << *reinterpret_cast<decltype(a)*>(&q) << '\n'; bool failed = Py_FinalizeEx() < 0; diff --git a/platform/wavelet3d/mod.mk b/platform/wavelet3d/mod.mk index a12392e..232d808 100644 --- a/platform/wavelet3d/mod.mk +++ b/platform/wavelet3d/mod.mk @@ -1,8 +1,9 @@ define core - $(this)/deps := dma_axi32 picorv32 + $(this)/deps := - $(this)/rtl_top := gfx_float_lane - $(this)/rtl_files := gfx_pkg.sv gfx_float_lane.sv gfx_fmul_lane.sv gfx_round_lane.sv + $(this)/rtl_top := gfx_fpint_lane + $(this)/rtl_dirs := . + $(this)/rtl_files := gfx_pkg.sv gfx_fpint_lane.sv $(this)/vl_main := main.cpp $(this)/vl_pkgconfig := python3-embed |
