From a14fc04f3b9f5bcef941ea79c794532d7ca0e7fc Mon Sep 17 00:00:00 2001 From: Alejandro Soto Date: Sat, 21 Oct 2023 15:37:45 -0600 Subject: ip: downgrade to 16-bit floats --- rtl/gfx/fp_add.sv | 1 + rtl/gfx/fp_mul.sv | 1 + rtl/gfx/gfx.sv | 9 ++++++--- rtl/gfx/gfx_defs.sv | 14 ++++++++------ 4 files changed, 16 insertions(+), 9 deletions(-) (limited to 'rtl/gfx') diff --git a/rtl/gfx/fp_add.sv b/rtl/gfx/fp_add.sv index 7f7e2c6..6cf4874 100644 --- a/rtl/gfx/fp_add.sv +++ b/rtl/gfx/fp_add.sv @@ -21,6 +21,7 @@ module fp_add `ifndef VERILATOR ip_fp_add ip_add ( + .en(1), .areset(0), .* ); diff --git a/rtl/gfx/fp_mul.sv b/rtl/gfx/fp_mul.sv index c2f4e2a..c5aa56a 100644 --- a/rtl/gfx/fp_mul.sv +++ b/rtl/gfx/fp_mul.sv @@ -21,6 +21,7 @@ module fp_mul `ifndef VERILATOR ip_fp_mul ip_mul ( + .en(1), .areset(0), .* ); diff --git a/rtl/gfx/gfx.sv b/rtl/gfx/gfx.sv index ab2bcce..ec2fb13 100644 --- a/rtl/gfx/gfx.sv +++ b/rtl/gfx/gfx.sv @@ -12,10 +12,13 @@ module gfx output logic[31:0] cmd_readdata ); + fp readdata, writedata; mat4 a, b, q, hold_q; logic start, done; - assign cmd_readdata = hold_q[cmd_address[3:2]][cmd_address[1:0]]; + assign readdata = hold_q[cmd_address[3:2]][cmd_address[1:0]]; + assign writedata = cmd_writedata[`FLOAT_BITS - 1:0]; + assign cmd_readdata = {{($bits(cmd_readdata) - `FLOAT_BITS){1'b0}}, readdata}; mat_mat_mul mul ( @@ -25,9 +28,9 @@ module gfx always_ff @(posedge clk) begin if (cmd_write) begin if (cmd_address[4]) - a[cmd_address[3:2]][cmd_address[1:0]] <= cmd_writedata; + a[cmd_address[3:2]][cmd_address[1:0]] <= writedata; else - b[cmd_address[3:2]][cmd_address[1:0]] <= cmd_writedata; + b[cmd_address[3:2]][cmd_address[1:0]] <= writedata; end if (done) diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv index e52f243..d01822a 100644 --- a/rtl/gfx/gfx_defs.sv +++ b/rtl/gfx/gfx_defs.sv @@ -1,14 +1,16 @@ `ifndef GFX_DEFS_SV `define GFX_DEFS_SV -`define FP_ADD_STAGES 13 -`define FP_MUL_STAGES 6 - +`define FLOAT_BITS 16 `define FLOATS_PER_VEC 4 `define VECS_PER_MAT 4 -typedef logic[31:0] fp; -typedef fp vec4[`FLOATS_PER_VEC]; -typedef vec4 mat4[`VECS_PER_MAT]; +// Target de 100MHz con float16, rounding aproximado +`define FP_ADD_STAGES 4 // ~325 LUTs +`define FP_MUL_STAGES 3 // ~119 LUTs ~1 bloque DSP + +typedef logic[`FLOAT_BITS - 1:0] fp; +typedef fp vec4[`FLOATS_PER_VEC]; +typedef vec4 mat4[`VECS_PER_MAT]; `endif -- cgit v1.2.3