diff options
| author | Alejandro Soto <alejandro@34project.org> | 2023-11-05 21:35:16 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2023-11-10 01:43:02 -0600 |
| commit | 5c982f38139cd1b0c5b590f67e99b1bcc1a32c9b (patch) | |
| tree | 8085b34356b79eac8f8a22bc0c484bddfd676b73 | |
| parent | d5de20fade70a0d454e3aa0087313ca715ff8759 (diff) | |
rtl/gfx: implement fixed-point FMA
| -rw-r--r-- | conspiracion.qsf | 5 | ||||
| -rw-r--r-- | gfx_hw.tcl | 2 | ||||
| -rw-r--r-- | rtl/gfx/gfx_defs.sv | 8 | ||||
| -rw-r--r-- | rtl/gfx/gfx_fixed_fma.sv | 72 | ||||
| -rw-r--r-- | rtl/gfx/gfx_fixed_fma_dot.sv | 48 |
5 files changed, 130 insertions, 5 deletions
diff --git a/conspiracion.qsf b/conspiracion.qsf index 926b9b9..031d664 100644 --- a/conspiracion.qsf +++ b/conspiracion.qsf @@ -323,6 +323,7 @@ set_global_assignment -name SIP_FILE ip/ip_fp_mul.sip set_global_assignment -name QIP_FILE ip/ip_fp_add.qip set_global_assignment -name SIP_FILE ip/ip_fp_add.sip -set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top set_global_assignment -name QIP_FILE ip/ip_fp_inv.qip -set_global_assignment -name SIP_FILE ip/ip_fp_inv.sip
\ No newline at end of file +set_global_assignment -name SIP_FILE ip/ip_fp_inv.sip +set_global_assignment -name SIP_FILE ip/ip_dot2.sip +set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top
\ No newline at end of file @@ -43,6 +43,8 @@ add_fileset_file gfx.sv SYSTEM_VERILOG PATH rtl/gfx/gfx.sv TOP_LEVEL_FILE add_fileset_file gfx_fp_add.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_fp_add.sv add_fileset_file gfx_fp_inv.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_fp_inv.sv add_fileset_file gfx_fp_mul.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_fp_mul.sv +add_fileset_file gfx_fixed_fma.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_fixed_fma.sv +add_fileset_file gfx_fixed_fma_dot.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_fixed_fma_dot.sv add_fileset_file gfx_cmd.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_cmd.sv add_fileset_file gfx_defs.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_defs.sv add_fileset_file gfx_fold.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_fold.sv diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv index 1d2bd68..45e1a63 100644 --- a/rtl/gfx/gfx_defs.sv +++ b/rtl/gfx/gfx_defs.sv @@ -14,7 +14,6 @@ typedef logic[`FLOAT_BITS - 1:0] fp; typedef fp vec2[2]; typedef fp vec4[`FLOATS_PER_VEC]; -typedef fp[`FLOATS_PER_VEC - 1:0] vec4_packed; typedef vec4 mat4[`VECS_PER_MAT]; `define FP_UNIT 16'h3c00 @@ -48,10 +47,13 @@ typedef struct packed logic[9:0] r, g, b; } rgb30; -typedef logic signed[31:0] fixed; - `define FIXED_FRAC 16 +`define FIXED_FMA_STAGES 5 +`define FIXED_FMA_DOT_STAGES (2 * `FIXED_FMA_STAGES) + +typedef logic signed[31:0] fixed; + typedef struct packed { fixed x, y; diff --git a/rtl/gfx/gfx_fixed_fma.sv b/rtl/gfx/gfx_fixed_fma.sv new file mode 100644 index 0000000..e1dd6bb --- /dev/null +++ b/rtl/gfx/gfx_fixed_fma.sv @@ -0,0 +1,72 @@ +`include "gfx/gfx_defs.sv" + +/* Operación a * b + c en punto fijo, diseñada para cerrar timing fácilmente + * en Cyclone V donde los bloques de DSP soportan 18x18. Las etapas son: + * - fma_ab: Productos de permutaciones a_lo/hi con b_lo/hi + * - fma_pp: Recombinación en FMAs parciales + * - fma_lo: Mitad baja del resultado y mitad alta pre-carry + * - fma_hi: Mitad alta post-carry + * + * Nótese que esto toma exactamente el mismo trabajo que a * b + * (ver rtl/core/mul.sv en proyecto 2 de arqui 1). + */ +module gfx_fixed_fma +( + input logic clk, + + input fixed a, + b, + c, + input logic stall, + + output fixed q +); + + fixed a_ab, b_ab, c_ab, c_pp; + logic[1:0] carry; + logic[16:0] lo_left, lo_right; + logic[17:0] lo_with_carry; + logic[35:0] ab_ll, ab_lh, ab_hl, ab_hh; + + logic[15:0] a_lo, a_hi, b_lo, b_hi, ab_ll_hi, ab_ll_lo, ab_hl_hi, ab_hl_lo, + ab_lh_hi, ab_lh_lo, ab_hh_hi, ab_hh_lo, c_hi, c_lo, hi, hi_left, hi_right, lo; + + assign {a_hi, a_lo} = a_ab; + assign {b_hi, b_lo} = b_ab; + assign {c_hi, c_lo} = c_pp; + + assign {ab_ll_hi, ab_ll_lo} = ab_ll[31:0]; + assign {ab_lh_hi, ab_lh_lo} = ab_lh[31:0]; + assign {ab_hl_hi, ab_hl_lo} = ab_hl[31:0]; + assign {ab_hh_hi, ab_hh_lo} = ab_hh[31:0]; + + assign {carry, lo} = lo_with_carry; + + always @(posedge clk) + if (!stall) begin + a_ab <= a; + b_ab <= b; + c_ab <= c; + + /* Como los operandos son pequeños (16 bits), esto no se sintetiza, + * sino que se enruta a través de los bloques de DSP más cercanos + */ + ab_ll <= {2'd0, a_lo} * {2'd0, b_lo}; + ab_lh <= {2'd0, a_lo} * {2'd0, b_hi}; + ab_hl <= {2'd0, a_hi} * {2'd0, b_lo}; + ab_hh <= {2'd0, a_hi} * {2'd0, b_hi}; + + c_pp <= c_ab; + + hi_left <= ab_hh_lo + ab_lh_hi; + lo_left <= {1'd0, ab_lh_lo} + {1'd0, ab_hl_lo}; + hi_right <= ab_hl_hi + c_hi; + lo_right <= {1'd0, ab_ll_hi} + {1'd0, c_lo}; + + hi <= hi_left + hi_right; + lo_with_carry <= {1'd0, lo_left} + {1'd0, lo_right}; + + q <= {hi + {14'd0, carry}, lo}; + end + +endmodule diff --git a/rtl/gfx/gfx_fixed_fma_dot.sv b/rtl/gfx/gfx_fixed_fma_dot.sv new file mode 100644 index 0000000..2831d08 --- /dev/null +++ b/rtl/gfx/gfx_fixed_fma_dot.sv @@ -0,0 +1,48 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fixed_fma_dot +( + input logic clk, + + input fixed a0, + b0, + a1, + b1, + c, + input logic stall, + + output fixed q +); + + fixed q0, a1_hold[`FIXED_FMA_STAGES], b1_hold[`FIXED_FMA_STAGES]; + + gfx_fixed_fma fma0 + ( + .a(a0), + .b(b0), + .q(q0), + .* + ); + + gfx_fixed_fma fma1 + ( + .a(a1_hold[`FIXED_FMA_STAGES - 1]), + .b(b1_hold[`FIXED_FMA_STAGES - 1]), + .c(q0), + .* + ); + + integer i; + + always_ff @(posedge clk) + if (!stall) begin + a1_hold[0] <= a1; + b1_hold[0] <= b1; + + for (i = 1; i < `FIXED_FMA_STAGES; ++i) begin + a1_hold[i] <= a1_hold[i - 1]; + b1_hold[i] <= b1_hold[i - 1]; + end + end + +endmodule |
