summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2023-10-22 00:16:40 -0600
committerAlejandro Soto <alejandro@34project.org>2023-10-22 00:16:50 -0600
commitc1c1f1e823099c82d02e94827a64d7a0b223048e (patch)
treecc6466fad9a943bbde314feb151bbacadf5b338a
parenta14fc04f3b9f5bcef941ea79c794532d7ca0e7fc (diff)
rtl/gfx: reimplement multiplier as a much smaller mat-vec pipeline
Diffstat (limited to '')
-rw-r--r--gfx_hw.tcl3
-rw-r--r--rtl/gfx/fold_flow.sv55
-rw-r--r--rtl/gfx/fp_add.sv11
-rw-r--r--rtl/gfx/fp_mul.sv11
-rw-r--r--rtl/gfx/gfx.sv4
-rw-r--r--rtl/gfx/gfx_defs.sv7
-rw-r--r--rtl/gfx/horizontal_fold.sv68
-rw-r--r--rtl/gfx/mat_mat_mul.sv70
-rw-r--r--rtl/gfx/mat_vec_mul.sv29
-rw-r--r--rtl/gfx/pipeline_flow.sv40
-rw-r--r--rtl/gfx/pipelined_flow.sv26
-rw-r--r--rtl/gfx/vec_dot.sv15
12 files changed, 227 insertions, 112 deletions
diff --git a/gfx_hw.tcl b/gfx_hw.tcl
index f337d95..cff04b0 100644
--- a/gfx_hw.tcl
+++ b/gfx_hw.tcl
@@ -46,7 +46,8 @@ add_fileset_file gfx_defs.sv SYSTEM_VERILOG PATH rtl/gfx/gfx_defs.sv
add_fileset_file horizontal_fold.sv SYSTEM_VERILOG PATH rtl/gfx/horizontal_fold.sv
add_fileset_file mat_mat_mul.sv SYSTEM_VERILOG PATH rtl/gfx/mat_mat_mul.sv
add_fileset_file mat_vec_mul.sv SYSTEM_VERILOG PATH rtl/gfx/mat_vec_mul.sv
-add_fileset_file pipelined_flow.sv SYSTEM_VERILOG PATH rtl/gfx/pipelined_flow.sv
+add_fileset_file pipeline_flow.sv SYSTEM_VERILOG PATH rtl/gfx/pipeline_flow.sv
+add_fileset_file fold_flow.sv SYSTEM_VERILOG PATH rtl/gfx/fold_flow.sv
add_fileset_file vec_dot.sv SYSTEM_VERILOG PATH rtl/gfx/vec_dot.sv
diff --git a/rtl/gfx/fold_flow.sv b/rtl/gfx/fold_flow.sv
new file mode 100644
index 0000000..718786e
--- /dev/null
+++ b/rtl/gfx/fold_flow.sv
@@ -0,0 +1,55 @@
+`include "gfx/gfx_defs.sv"
+
+module fold_flow
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_valid,
+ out_ready,
+
+ output logic in_ready,
+ out_valid,
+ stall,
+ feedback,
+ feedback_last
+);
+
+ index4 rounds[`FP_ADD_STAGES], last_round;
+
+ assign stall = out_valid && !out_ready;
+ assign in_ready = !stall && !feedback;
+ assign out_valid = last_round == `INDEX4_MAX;
+
+ assign feedback = last_round[1] ^ last_round[0];
+ assign feedback_last = last_round[1];
+
+ assign last_round = rounds[`FP_ADD_STAGES - 1];
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ rounds[0] <= `INDEX4_MIN;
+ else if (!stall)
+ unique case (last_round)
+ 2'b01:
+ rounds[0] <= 2'b10;
+
+ 2'b10:
+ rounds[0] <= 2'b11;
+
+ 2'b00, 2'b11:
+ rounds[0] <= {1'b0, in_valid};
+ endcase
+
+ genvar i;
+ generate
+ for (i = 1; i < `FP_ADD_STAGES; ++i) begin: pipeline
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ rounds[i] <= `INDEX4_MIN;
+ else if (in_ready)
+ rounds[i] <= rounds[i - 1];
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/gfx/fp_add.sv b/rtl/gfx/fp_add.sv
index 6cf4874..fad4768 100644
--- a/rtl/gfx/fp_add.sv
+++ b/rtl/gfx/fp_add.sv
@@ -3,25 +3,18 @@
module fp_add
(
input logic clk,
- rst_n,
- input logic start,
input fp a,
b,
+ input logic stall,
- output logic done,
output fp q
);
- pipelined_flow #(.STAGES(`FP_ADD_STAGES)) stages
- (
- .*
- );
-
`ifndef VERILATOR
ip_fp_add ip_add
(
- .en(1),
+ .en(!stall),
.areset(0),
.*
);
diff --git a/rtl/gfx/fp_mul.sv b/rtl/gfx/fp_mul.sv
index c5aa56a..90d30fb 100644
--- a/rtl/gfx/fp_mul.sv
+++ b/rtl/gfx/fp_mul.sv
@@ -3,25 +3,18 @@
module fp_mul
(
input logic clk,
- rst_n,
- input logic start,
input fp a,
b,
+ input logic stall,
- output logic done,
output fp q
);
- pipelined_flow #(.STAGES(`FP_MUL_STAGES)) stages
- (
- .*
- );
-
`ifndef VERILATOR
ip_fp_mul ip_mul
(
- .en(1),
+ .en(!stall),
.areset(0),
.*
);
diff --git a/rtl/gfx/gfx.sv b/rtl/gfx/gfx.sv
index ec2fb13..233d285 100644
--- a/rtl/gfx/gfx.sv
+++ b/rtl/gfx/gfx.sv
@@ -22,6 +22,10 @@ module gfx
mat_mat_mul mul
(
+ .in_ready(),
+ .in_valid(start),
+ .out_ready(1),
+ .out_valid(done),
.*
);
diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv
index d01822a..648f9e6 100644
--- a/rtl/gfx/gfx_defs.sv
+++ b/rtl/gfx/gfx_defs.sv
@@ -1,6 +1,7 @@
`ifndef GFX_DEFS_SV
`define GFX_DEFS_SV
+// Esto es arquitectural, no se puede ajustar sin cambiar otras cosas
`define FLOAT_BITS 16
`define FLOATS_PER_VEC 4
`define VECS_PER_MAT 4
@@ -10,7 +11,13 @@
`define FP_MUL_STAGES 3 // ~119 LUTs ~1 bloque DSP
typedef logic[`FLOAT_BITS - 1:0] fp;
+typedef fp vec2[2];
typedef fp vec4[`FLOATS_PER_VEC];
typedef vec4 mat4[`VECS_PER_MAT];
+typedef logic[1:0] index4;
+
+`define INDEX4_MIN 2'b00
+`define INDEX4_MAX 2'b11
+
`endif
diff --git a/rtl/gfx/horizontal_fold.sv b/rtl/gfx/horizontal_fold.sv
index 127150e..513e3b1 100644
--- a/rtl/gfx/horizontal_fold.sv
+++ b/rtl/gfx/horizontal_fold.sv
@@ -1,50 +1,44 @@
`include "gfx/gfx_defs.sv"
-// Asume que N es una potencia de 2
module horizontal_fold
-#(parameter N=1)
(
- input logic clk,
- rst_n,
+ input logic clk,
- input logic start,
- input fp vec[N - 1:0],
+ input vec4 vec,
+ input logic stall,
+ feedback,
+ feedback_last,
- output logic done,
- output fp q
+ output fp q
);
- fp q_left, q_right;
- logic halves_done;
+ vec2 feedback_vec, queued[`FP_ADD_STAGES];
+
+ assign feedback_vec = queued[`FP_ADD_STAGES - 1];
+
+ fp_add add
+ (
+ .a(feedback ? q : vec[0]),
+ .b(feedback ? feedback_vec[feedback_last] : vec[1]),
+ .*
+ );
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ if (feedback)
+ queued[0] <= feedback_vec;
+ else begin
+ queued[0][0] <= vec[2];
+ queued[0][1] <= vec[3];
+ end
+ end
+ genvar i;
generate
- if (N > 1) begin
- horizontal_fold #(.N(N / 2)) left
- (
- .q(q_left),
- .vec(vec[N - 1:N / 2]),
- .done(halves_done),
- .*
- );
-
- horizontal_fold #(.N(N / 2)) right
- (
- .q(q_right),
- .vec(vec[N / 2 - 1:0]),
- .done(),
- .*
- );
-
- fp_add fold
- (
- .a(q_left),
- .b(q_right),
- .start(halves_done),
- .*
- );
- end else begin
- assign q = vec[0];
- assign done = start;
+ for (i = 1; i < `FLOATS_PER_VEC; ++i) begin: stages
+ always_ff @(posedge clk)
+ if (!stall)
+ queued[i] <= queued[i - 1];
end
endgenerate
diff --git a/rtl/gfx/mat_mat_mul.sv b/rtl/gfx/mat_mat_mul.sv
index aa5e769..cf4bd35 100644
--- a/rtl/gfx/mat_mat_mul.sv
+++ b/rtl/gfx/mat_mat_mul.sv
@@ -5,29 +5,67 @@ module mat_mat_mul
input logic clk,
rst_n,
- input logic start,
input mat4 a,
b,
+ input logic in_valid,
+ out_ready,
- output logic done,
- output mat4 q
+ output mat4 q,
+ output logic in_ready,
+ out_valid
);
- logic dones[`VECS_PER_MAT];
+ mat4 a_hold, b_hold, q_hold, mul_b;
+ vec4 mul_q;
+ logic mul_in_ready, mul_in_valid, mul_out_ready, mul_out_valid;
+ index4 in_index, out_index;
- assign done = dones[0];
+ assign in_ready = mul_in_ready && in_index == `INDEX4_MIN;
+ assign out_valid = mul_out_valid && out_index == `INDEX4_MAX;
- genvar i;
- generate
- for (i = 0; i < `VECS_PER_MAT; ++i) begin: columns
- mat_vec_mul column_i
- (
- .x(b[i]),
- .q(q[i]),
- .done(dones[i]),
- .*
- );
+ assign mul_in_valid = in_valid || in_index != `INDEX4_MIN;
+ assign mul_out_ready = out_ready || out_index != `INDEX4_MAX;
+
+ mat_vec_mul mul
+ (
+ .a(in_index == `INDEX4_MIN ? a : a_hold),
+ .x(mul_b[in_index]),
+ .q(mul_q),
+ .in_ready(mul_in_ready),
+ .in_valid(mul_in_valid),
+ .out_ready(mul_out_ready),
+ .out_valid(mul_out_valid),
+ .*
+ );
+
+ always_comb begin
+ mul_b = b_hold;
+ mul_b[0] = b[0];
+
+ q = q_hold;
+ q[`VECS_PER_MAT - 1] = mul_q;
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ in_index <= `INDEX4_MIN;
+ out_index <= `INDEX4_MIN;
+ end else begin
+ if (mul_in_ready && mul_in_valid)
+ in_index <= in_index + 1;
+
+ if (mul_out_ready && mul_out_valid)
+ out_index <= out_index + 1;
+ end
+
+ always_ff @(posedge clk) begin
+ if (in_ready) begin
+ a_hold <= a;
+ b_hold <= b;
end
- endgenerate
+
+ if (mul_out_ready && mul_out_valid)
+ q_hold[out_index] <= mul_q;
+ end
endmodule
diff --git a/rtl/gfx/mat_vec_mul.sv b/rtl/gfx/mat_vec_mul.sv
index 43860c9..9f5dcae 100644
--- a/rtl/gfx/mat_vec_mul.sv
+++ b/rtl/gfx/mat_vec_mul.sv
@@ -5,27 +5,42 @@ module mat_vec_mul
input logic clk,
rst_n,
- input logic start,
input mat4 a,
input vec4 x,
+ input logic in_valid,
+ out_ready,
- output logic done,
- output vec4 q
+ output vec4 q,
+ output logic in_ready,
+ out_valid
);
- logic dones[`FLOATS_PER_VEC];
+ logic stall_mul, stall_fold, mul_ready, mul_valid, feedback, feedback_last;
- assign done = dones[0];
+ pipeline_flow #(.STAGES(`FP_MUL_STAGES)) mul
+ (
+ .stall(stall_mul),
+ .out_ready(mul_ready),
+ .out_valid(mul_valid),
+ .*
+ );
+
+ fold_flow fold
+ (
+ .stall(stall_fold),
+ .in_ready(mul_ready),
+ .in_valid(mul_valid),
+ .*
+ );
genvar i;
generate
- for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: dots
+ for (i = 0; i < `VECS_PER_MAT; ++i) begin: dots
vec_dot dot_i
(
.a(a[i]),
.b(x),
.q(q[i]),
- .done(dones[i]),
.*
);
end
diff --git a/rtl/gfx/pipeline_flow.sv b/rtl/gfx/pipeline_flow.sv
new file mode 100644
index 0000000..2b9c891
--- /dev/null
+++ b/rtl/gfx/pipeline_flow.sv
@@ -0,0 +1,40 @@
+`include "gfx/gfx_defs.sv"
+
+module pipeline_flow
+#(parameter STAGES=0)
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_valid,
+ out_ready,
+
+ output logic in_ready,
+ out_valid,
+ stall
+);
+
+ logic valid[STAGES];
+
+ assign stall = !in_ready;
+ assign in_ready = out_ready || !out_valid;
+ assign out_valid = valid[STAGES - 1];
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ valid[0] <= 0;
+ else if (in_ready)
+ valid[0] <= in_valid;
+
+ genvar i;
+ generate
+ for (i = 1; i < STAGES; ++i) begin: pipeline
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ valid[i] <= 0;
+ else if (in_ready)
+ valid[i] <= valid[i - 1];
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/gfx/pipelined_flow.sv b/rtl/gfx/pipelined_flow.sv
deleted file mode 100644
index 1e3c1ce..0000000
--- a/rtl/gfx/pipelined_flow.sv
+++ /dev/null
@@ -1,26 +0,0 @@
-module pipelined_flow
-#(parameter STAGES=0)
-(
- input logic clk,
- rst_n,
-
- input logic start,
- output logic done
-);
-
- logic valid[STAGES];
-
- assign done = valid[STAGES - 1];
-
- always_ff @(posedge clk or negedge rst_n)
- valid[0] <= !rst_n ? 0 : start;
-
- genvar i;
- generate
- for (i = 1; i < STAGES; ++i) begin: pipeline
- always_ff @(posedge clk or negedge rst_n)
- valid[i] <= !rst_n ? 0 : valid[i - 1];
- end
- endgenerate
-
-endmodule
diff --git a/rtl/gfx/vec_dot.sv b/rtl/gfx/vec_dot.sv
index d984504..4e1fdee 100644
--- a/rtl/gfx/vec_dot.sv
+++ b/rtl/gfx/vec_dot.sv
@@ -3,23 +3,24 @@
module vec_dot
(
input logic clk,
- rst_n,
- input logic start,
+ input logic stall_mul,
+ stall_fold,
+ feedback,
+ feedback_last,
+
input vec4 a,
b,
- output logic done,
output fp q
);
vec4 products;
- logic dones[`FLOATS_PER_VEC];
- horizontal_fold #(.N(`FLOATS_PER_VEC)) fold
+ horizontal_fold fold
(
- .start(dones[0]),
.vec(products),
+ .stall(stall_fold),
.*
);
@@ -30,8 +31,8 @@ module vec_dot
(
.a(a[i]),
.b(b[i]),
- .done(dones[i]),
.q(products[i]),
+ .stall(stall_mul),
.*
);
end