From d84718bf7955a6bba03aa44938f0f140c1a6390d Mon Sep 17 00:00:00 2001 From: Alejandro Soto Date: Sat, 21 Oct 2023 03:21:18 -0600 Subject: rtl/gfx: implement non-synthesizable matrix multiplier --- rtl/gfx/fp_add.sv | 29 ++++++++++++++++++++++++++ rtl/gfx/fp_mul.sv | 29 ++++++++++++++++++++++++++ rtl/gfx/gfx.sv | 40 ++++++++++++++++++++++++++++++++++++ rtl/gfx/gfx_defs.sv | 14 +++++++++++++ rtl/gfx/horizontal_fold.sv | 51 ++++++++++++++++++++++++++++++++++++++++++++++ rtl/gfx/mat_mat_mul.sv | 33 ++++++++++++++++++++++++++++++ rtl/gfx/mat_vec_mul.sv | 34 +++++++++++++++++++++++++++++++ rtl/gfx/pipelined_flow.sv | 26 +++++++++++++++++++++++ rtl/gfx/vec_dot.sv | 40 ++++++++++++++++++++++++++++++++++++ 9 files changed, 296 insertions(+) create mode 100644 rtl/gfx/fp_add.sv create mode 100644 rtl/gfx/fp_mul.sv create mode 100644 rtl/gfx/gfx.sv create mode 100644 rtl/gfx/gfx_defs.sv create mode 100644 rtl/gfx/horizontal_fold.sv create mode 100644 rtl/gfx/mat_mat_mul.sv create mode 100644 rtl/gfx/mat_vec_mul.sv create mode 100644 rtl/gfx/pipelined_flow.sv create mode 100644 rtl/gfx/vec_dot.sv diff --git a/rtl/gfx/fp_add.sv b/rtl/gfx/fp_add.sv new file mode 100644 index 0000000..7f7e2c6 --- /dev/null +++ b/rtl/gfx/fp_add.sv @@ -0,0 +1,29 @@ +`include "gfx/gfx_defs.sv" + +module fp_add +( + input logic clk, + rst_n, + + input logic start, + input fp a, + b, + + output logic done, + output fp q +); + + pipelined_flow #(.STAGES(`FP_ADD_STAGES)) stages + ( + .* + ); + +`ifndef VERILATOR + ip_fp_add ip_add + ( + .areset(0), + .* + ); +`endif + +endmodule diff --git a/rtl/gfx/fp_mul.sv b/rtl/gfx/fp_mul.sv new file mode 100644 index 0000000..c2f4e2a --- /dev/null +++ b/rtl/gfx/fp_mul.sv @@ -0,0 +1,29 @@ +`include "gfx/gfx_defs.sv" + +module fp_mul +( + input logic clk, + rst_n, + + input logic start, + input fp a, + b, + + output logic done, + output fp q +); + + pipelined_flow #(.STAGES(`FP_MUL_STAGES)) stages + ( + .* + ); + +`ifndef VERILATOR + ip_fp_mul ip_mul + ( + .areset(0), + .* + ); +`endif + +endmodule diff --git a/rtl/gfx/gfx.sv b/rtl/gfx/gfx.sv new file mode 100644 index 0000000..ab2bcce --- /dev/null +++ b/rtl/gfx/gfx.sv @@ -0,0 +1,40 @@ +`include "gfx/gfx_defs.sv" + +module gfx +( + input logic clk, + rst_n, + + input logic[4:0] cmd_address, + input logic cmd_read, + cmd_write, + input logic[31:0] cmd_writedata, + output logic[31:0] cmd_readdata +); + + mat4 a, b, q, hold_q; + logic start, done; + + assign cmd_readdata = hold_q[cmd_address[3:2]][cmd_address[1:0]]; + + mat_mat_mul mul + ( + .* + ); + + always_ff @(posedge clk) begin + if (cmd_write) begin + if (cmd_address[4]) + a[cmd_address[3:2]][cmd_address[1:0]] <= cmd_writedata; + else + b[cmd_address[3:2]][cmd_address[1:0]] <= cmd_writedata; + end + + if (done) + hold_q <= q; + end + + always_ff @(posedge clk or negedge rst_n) + start <= !rst_n ? 0 : (cmd_write && cmd_address == 5'b11111); + +endmodule diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv new file mode 100644 index 0000000..e52f243 --- /dev/null +++ b/rtl/gfx/gfx_defs.sv @@ -0,0 +1,14 @@ +`ifndef GFX_DEFS_SV +`define GFX_DEFS_SV + +`define FP_ADD_STAGES 13 +`define FP_MUL_STAGES 6 + +`define FLOATS_PER_VEC 4 +`define VECS_PER_MAT 4 + +typedef logic[31:0] fp; +typedef fp vec4[`FLOATS_PER_VEC]; +typedef vec4 mat4[`VECS_PER_MAT]; + +`endif diff --git a/rtl/gfx/horizontal_fold.sv b/rtl/gfx/horizontal_fold.sv new file mode 100644 index 0000000..127150e --- /dev/null +++ b/rtl/gfx/horizontal_fold.sv @@ -0,0 +1,51 @@ +`include "gfx/gfx_defs.sv" + +// Asume que N es una potencia de 2 +module horizontal_fold +#(parameter N=1) +( + input logic clk, + rst_n, + + input logic start, + input fp vec[N - 1:0], + + output logic done, + output fp q +); + + fp q_left, q_right; + logic halves_done; + + generate + if (N > 1) begin + horizontal_fold #(.N(N / 2)) left + ( + .q(q_left), + .vec(vec[N - 1:N / 2]), + .done(halves_done), + .* + ); + + horizontal_fold #(.N(N / 2)) right + ( + .q(q_right), + .vec(vec[N / 2 - 1:0]), + .done(), + .* + ); + + fp_add fold + ( + .a(q_left), + .b(q_right), + .start(halves_done), + .* + ); + end else begin + assign q = vec[0]; + assign done = start; + end + endgenerate + +endmodule diff --git a/rtl/gfx/mat_mat_mul.sv b/rtl/gfx/mat_mat_mul.sv new file mode 100644 index 0000000..aa5e769 --- /dev/null +++ b/rtl/gfx/mat_mat_mul.sv @@ -0,0 +1,33 @@ +`include "gfx/gfx_defs.sv" + +module mat_mat_mul +( + input logic clk, + rst_n, + + input logic start, + input mat4 a, + b, + + output logic done, + output mat4 q +); + + logic dones[`VECS_PER_MAT]; + + assign done = dones[0]; + + genvar i; + generate + for (i = 0; i < `VECS_PER_MAT; ++i) begin: columns + mat_vec_mul column_i + ( + .x(b[i]), + .q(q[i]), + .done(dones[i]), + .* + ); + end + endgenerate + +endmodule diff --git a/rtl/gfx/mat_vec_mul.sv b/rtl/gfx/mat_vec_mul.sv new file mode 100644 index 0000000..43860c9 --- /dev/null +++ b/rtl/gfx/mat_vec_mul.sv @@ -0,0 +1,34 @@ +`include "gfx/gfx_defs.sv" + +module mat_vec_mul +( + input logic clk, + rst_n, + + input logic start, + input mat4 a, + input vec4 x, + + output logic done, + output vec4 q +); + + logic dones[`FLOATS_PER_VEC]; + + assign done = dones[0]; + + genvar i; + generate + for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: dots + vec_dot dot_i + ( + .a(a[i]), + .b(x), + .q(q[i]), + .done(dones[i]), + .* + ); + end + endgenerate + +endmodule diff --git a/rtl/gfx/pipelined_flow.sv b/rtl/gfx/pipelined_flow.sv new file mode 100644 index 0000000..1e3c1ce --- /dev/null +++ b/rtl/gfx/pipelined_flow.sv @@ -0,0 +1,26 @@ +module pipelined_flow +#(parameter STAGES=0) +( + input logic clk, + rst_n, + + input logic start, + output logic done +); + + logic valid[STAGES]; + + assign done = valid[STAGES - 1]; + + always_ff @(posedge clk or negedge rst_n) + valid[0] <= !rst_n ? 0 : start; + + genvar i; + generate + for (i = 1; i < STAGES; ++i) begin: pipeline + always_ff @(posedge clk or negedge rst_n) + valid[i] <= !rst_n ? 0 : valid[i - 1]; + end + endgenerate + +endmodule diff --git a/rtl/gfx/vec_dot.sv b/rtl/gfx/vec_dot.sv new file mode 100644 index 0000000..d984504 --- /dev/null +++ b/rtl/gfx/vec_dot.sv @@ -0,0 +1,40 @@ +`include "gfx/gfx_defs.sv" + +module vec_dot +( + input logic clk, + rst_n, + + input logic start, + input vec4 a, + b, + + output logic done, + output fp q +); + + vec4 products; + logic dones[`FLOATS_PER_VEC]; + + horizontal_fold #(.N(`FLOATS_PER_VEC)) fold + ( + .start(dones[0]), + .vec(products), + .* + ); + + genvar i; + generate + for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: entries + fp_mul entry_i + ( + .a(a[i]), + .b(b[i]), + .done(dones[i]), + .q(products[i]), + .* + ); + end + endgenerate + +endmodule -- cgit v1.2.3