From d5de20fade70a0d454e3aa0087313ca715ff8759 Mon Sep 17 00:00:00 2001 From: Alejandro Soto Date: Thu, 2 Nov 2023 22:19:26 -0600 Subject: rtl/gfx: rename modules --- rtl/gfx/fold_flow.sv | 61 ------------------------------ rtl/gfx/fp_add.sv | 40 -------------------- rtl/gfx/fp_inv.sv | 36 ------------------ rtl/gfx/fp_mul.sv | 40 -------------------- rtl/gfx/gfx_dot.sv | 49 ++++++++++++++++++++++++ rtl/gfx/gfx_fold.sv | 54 +++++++++++++++++++++++++++ rtl/gfx/gfx_fold_flow.sv | 61 ++++++++++++++++++++++++++++++ rtl/gfx/gfx_fp_add.sv | 40 ++++++++++++++++++++ rtl/gfx/gfx_fp_inv.sv | 36 ++++++++++++++++++ rtl/gfx/gfx_fp_mul.sv | 40 ++++++++++++++++++++ rtl/gfx/gfx_mat_mat.sv | 83 +++++++++++++++++++++++++++++++++++++++++ rtl/gfx/gfx_mat_vec.sv | 49 ++++++++++++++++++++++++ rtl/gfx/gfx_perspective.sv | 4 +- rtl/gfx/gfx_perspective_flow.sv | 2 +- rtl/gfx/gfx_pipeline_flow.sv | 40 ++++++++++++++++++++ rtl/gfx/gfx_scanout.sv | 4 +- rtl/gfx/gfx_skid_buf.sv | 20 ++++++++++ rtl/gfx/gfx_skid_flow.sv | 31 +++++++++++++++ rtl/gfx/gfx_transpose.sv | 17 +++++++++ rtl/gfx/horizontal_fold.sv | 54 --------------------------- rtl/gfx/mat_mat_mul.sv | 83 ----------------------------------------- rtl/gfx/mat_vec_mul.sv | 49 ------------------------ rtl/gfx/pipeline_flow.sv | 40 -------------------- rtl/gfx/skid_buf.sv | 20 ---------- rtl/gfx/skid_flow.sv | 31 --------------- rtl/gfx/transpose.sv | 17 --------- rtl/gfx/vec_dot.sv | 49 ------------------------ 27 files changed, 525 insertions(+), 525 deletions(-) delete mode 100644 rtl/gfx/fold_flow.sv delete mode 100644 rtl/gfx/fp_add.sv delete mode 100644 rtl/gfx/fp_inv.sv delete mode 100644 rtl/gfx/fp_mul.sv create mode 100644 rtl/gfx/gfx_dot.sv create mode 100644 rtl/gfx/gfx_fold.sv create mode 100644 rtl/gfx/gfx_fold_flow.sv create mode 100644 rtl/gfx/gfx_fp_add.sv create mode 100644 rtl/gfx/gfx_fp_inv.sv create mode 100644 rtl/gfx/gfx_fp_mul.sv create mode 100644 rtl/gfx/gfx_mat_mat.sv create mode 100644 rtl/gfx/gfx_mat_vec.sv create mode 100644 rtl/gfx/gfx_pipeline_flow.sv create mode 100644 rtl/gfx/gfx_skid_buf.sv create mode 100644 rtl/gfx/gfx_skid_flow.sv create mode 100644 rtl/gfx/gfx_transpose.sv delete mode 100644 rtl/gfx/horizontal_fold.sv delete mode 100644 rtl/gfx/mat_mat_mul.sv delete mode 100644 rtl/gfx/mat_vec_mul.sv delete mode 100644 rtl/gfx/pipeline_flow.sv delete mode 100644 rtl/gfx/skid_buf.sv delete mode 100644 rtl/gfx/skid_flow.sv delete mode 100644 rtl/gfx/transpose.sv delete mode 100644 rtl/gfx/vec_dot.sv (limited to 'rtl') diff --git a/rtl/gfx/fold_flow.sv b/rtl/gfx/fold_flow.sv deleted file mode 100644 index 2221976..0000000 --- a/rtl/gfx/fold_flow.sv +++ /dev/null @@ -1,61 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module fold_flow -( - input logic clk, - rst_n, - - input logic in_valid, - out_ready, - - output logic in_ready, - out_valid, - stall, - feedback, - feedback_last -); - - logic skid_ready; - index4 rounds[`FP_ADD_STAGES], last_round; - - assign in_ready = skid_ready && !feedback; - - assign feedback = last_round[1] ^ last_round[0]; - assign feedback_last = last_round[1]; - - assign last_round = rounds[`FP_ADD_STAGES - 1]; - - skid_flow skid - ( - .in_valid(last_round == `INDEX4_MAX), - .in_ready(skid_ready), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - rounds[0] <= `INDEX4_MIN; - else if (!stall) - unique case (last_round) - 2'b01: - rounds[0] <= 2'b10; - - 2'b10: - rounds[0] <= 2'b11; - - 2'b00, 2'b11: - rounds[0] <= {1'b0, in_valid}; - endcase - - genvar i; - generate - for (i = 1; i < `FP_ADD_STAGES; ++i) begin: pipeline - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - rounds[i] <= `INDEX4_MIN; - else if (!stall) - rounds[i] <= rounds[i - 1]; - end - endgenerate - -endmodule diff --git a/rtl/gfx/fp_add.sv b/rtl/gfx/fp_add.sv deleted file mode 100644 index b49a8aa..0000000 --- a/rtl/gfx/fp_add.sv +++ /dev/null @@ -1,40 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module fp_add -( - input logic clk, - - input fp a, - b, - input logic stall, - - output fp q -); - -`ifndef VERILATOR - ip_fp_add ip_add - ( - .en(!stall), - .areset(0), - .* - ); -`else - fp a_pipeline[`FP_ADD_STAGES - 1], b_pipeline[`FP_ADD_STAGES - 1]; - - integer i; - - always_ff @(posedge clk) - if (!stall) begin - a_pipeline[0] <= a; - b_pipeline[0] <= b; - - for (i = 1; i < `FP_ADD_STAGES - 1; ++i) begin - a_pipeline[i] <= a_pipeline[i - 1]; - b_pipeline[i] <= b_pipeline[i - 1]; - end - - q <= $c("taller::fp_add(", a_pipeline[`FP_ADD_STAGES - 2], ", ", b_pipeline[`FP_ADD_STAGES - 2], ")"); - end -`endif - -endmodule diff --git a/rtl/gfx/fp_inv.sv b/rtl/gfx/fp_inv.sv deleted file mode 100644 index d2bebdc..0000000 --- a/rtl/gfx/fp_inv.sv +++ /dev/null @@ -1,36 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module fp_inv -( - input logic clk, - - input fp a, - input logic stall, - - output fp q -); - -`ifndef VERILATOR - ip_fp_inv ip_inv - ( - .en(!stall), - .areset(0), - .* - ); -`else - fp pipeline[`FP_INV_STAGES - 1]; - - integer i; - - always_ff @(posedge clk) - if (!stall) begin - pipeline[0] <= a; - - for (i = 1; i < `FP_INV_STAGES - 1; ++i) - pipeline[i] <= pipeline[i - 1]; - - q <= $c("taller::fp_inv(", pipeline[`FP_INV_STAGES - 2], ")"); - end -`endif - -endmodule diff --git a/rtl/gfx/fp_mul.sv b/rtl/gfx/fp_mul.sv deleted file mode 100644 index fda4de2..0000000 --- a/rtl/gfx/fp_mul.sv +++ /dev/null @@ -1,40 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module fp_mul -( - input logic clk, - - input fp a, - b, - input logic stall, - - output fp q -); - -`ifndef VERILATOR - ip_fp_mul ip_mul - ( - .en(!stall), - .areset(0), - .* - ); -`else - fp a_pipeline[`FP_MUL_STAGES - 1], b_pipeline[`FP_MUL_STAGES - 1]; - - integer i; - - always_ff @(posedge clk) - if (!stall) begin - a_pipeline[0] <= a; - b_pipeline[0] <= b; - - for (i = 1; i < `FP_MUL_STAGES - 1; ++i) begin - a_pipeline[i] <= a_pipeline[i - 1]; - b_pipeline[i] <= b_pipeline[i - 1]; - end - - q <= $c("taller::fp_mul(", a_pipeline[`FP_MUL_STAGES - 2], ", ", b_pipeline[`FP_MUL_STAGES - 2], ")"); - end -`endif - -endmodule diff --git a/rtl/gfx/gfx_dot.sv b/rtl/gfx/gfx_dot.sv new file mode 100644 index 0000000..9c21c23 --- /dev/null +++ b/rtl/gfx/gfx_dot.sv @@ -0,0 +1,49 @@ +`include "gfx/gfx_defs.sv" + +module gfx_dot +( + input logic clk, + + input logic stall_mul, + stall_fold, + feedback, + feedback_last, + + input vec4 a, + b, + + output fp q +); + + vec4 products_fold, products_mul; + + gfx_fold fold + ( + .vec(products_fold), + .stall(stall_fold), + .* + ); + + genvar i; + generate + for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: entries + gfx_fp_mul entry_i + ( + .a(a[i]), + .b(b[i]), + .q(products_mul[i]), + .stall(stall_mul), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(fp))) skid_i + ( + .in(products_mul[i]), + .out(products_fold[i]), + .stall(stall_mul), + .* + ); + end + endgenerate + +endmodule diff --git a/rtl/gfx/gfx_fold.sv b/rtl/gfx/gfx_fold.sv new file mode 100644 index 0000000..616d868 --- /dev/null +++ b/rtl/gfx/gfx_fold.sv @@ -0,0 +1,54 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fold +( + input logic clk, + + input vec4 vec, + input logic stall, + feedback, + feedback_last, + + output fp q +); + + fp q_add; + vec2 feedback_vec, queued[`FP_ADD_STAGES]; + + assign feedback_vec = queued[`FP_ADD_STAGES - 1]; + + gfx_fp_add add + ( + .a(feedback ? q_add : vec[0]), + .b(feedback ? feedback_vec[feedback_last] : vec[1]), + .q(q_add), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(q))) skid + ( + .in(q_add), + .out(q), + .* + ); + + always_ff @(posedge clk) + if (!stall) begin + if (feedback) + queued[0] <= feedback_vec; + else begin + queued[0][0] <= vec[2]; + queued[0][1] <= vec[3]; + end + end + + genvar i; + generate + for (i = 1; i < `FP_ADD_STAGES; ++i) begin: stages + always_ff @(posedge clk) + if (!stall) + queued[i] <= queued[i - 1]; + end + endgenerate + +endmodule diff --git a/rtl/gfx/gfx_fold_flow.sv b/rtl/gfx/gfx_fold_flow.sv new file mode 100644 index 0000000..8f23b8f --- /dev/null +++ b/rtl/gfx/gfx_fold_flow.sv @@ -0,0 +1,61 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fold_flow +( + input logic clk, + rst_n, + + input logic in_valid, + out_ready, + + output logic in_ready, + out_valid, + stall, + feedback, + feedback_last +); + + logic skid_ready; + index4 rounds[`FP_ADD_STAGES], last_round; + + assign in_ready = skid_ready && !feedback; + + assign feedback = last_round[1] ^ last_round[0]; + assign feedback_last = last_round[1]; + + assign last_round = rounds[`FP_ADD_STAGES - 1]; + + gfx_skid_flow skid + ( + .in_valid(last_round == `INDEX4_MAX), + .in_ready(skid_ready), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + rounds[0] <= `INDEX4_MIN; + else if (!stall) + unique case (last_round) + 2'b01: + rounds[0] <= 2'b10; + + 2'b10: + rounds[0] <= 2'b11; + + 2'b00, 2'b11: + rounds[0] <= {1'b0, in_valid}; + endcase + + genvar i; + generate + for (i = 1; i < `FP_ADD_STAGES; ++i) begin: pipeline + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + rounds[i] <= `INDEX4_MIN; + else if (!stall) + rounds[i] <= rounds[i - 1]; + end + endgenerate + +endmodule diff --git a/rtl/gfx/gfx_fp_add.sv b/rtl/gfx/gfx_fp_add.sv new file mode 100644 index 0000000..6ba7b1c --- /dev/null +++ b/rtl/gfx/gfx_fp_add.sv @@ -0,0 +1,40 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fp_add +( + input logic clk, + + input fp a, + b, + input logic stall, + + output fp q +); + +`ifndef VERILATOR + ip_fp_add ip_add + ( + .en(!stall), + .areset(0), + .* + ); +`else + fp a_pipeline[`FP_ADD_STAGES - 1], b_pipeline[`FP_ADD_STAGES - 1]; + + integer i; + + always_ff @(posedge clk) + if (!stall) begin + a_pipeline[0] <= a; + b_pipeline[0] <= b; + + for (i = 1; i < `FP_ADD_STAGES - 1; ++i) begin + a_pipeline[i] <= a_pipeline[i - 1]; + b_pipeline[i] <= b_pipeline[i - 1]; + end + + q <= $c("taller::fp_add(", a_pipeline[`FP_ADD_STAGES - 2], ", ", b_pipeline[`FP_ADD_STAGES - 2], ")"); + end +`endif + +endmodule diff --git a/rtl/gfx/gfx_fp_inv.sv b/rtl/gfx/gfx_fp_inv.sv new file mode 100644 index 0000000..41b3ad5 --- /dev/null +++ b/rtl/gfx/gfx_fp_inv.sv @@ -0,0 +1,36 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fp_inv +( + input logic clk, + + input fp a, + input logic stall, + + output fp q +); + +`ifndef VERILATOR + ip_fp_inv ip_inv + ( + .en(!stall), + .areset(0), + .* + ); +`else + fp pipeline[`FP_INV_STAGES - 1]; + + integer i; + + always_ff @(posedge clk) + if (!stall) begin + pipeline[0] <= a; + + for (i = 1; i < `FP_INV_STAGES - 1; ++i) + pipeline[i] <= pipeline[i - 1]; + + q <= $c("taller::fp_inv(", pipeline[`FP_INV_STAGES - 2], ")"); + end +`endif + +endmodule diff --git a/rtl/gfx/gfx_fp_mul.sv b/rtl/gfx/gfx_fp_mul.sv new file mode 100644 index 0000000..eb7d7d7 --- /dev/null +++ b/rtl/gfx/gfx_fp_mul.sv @@ -0,0 +1,40 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fp_mul +( + input logic clk, + + input fp a, + b, + input logic stall, + + output fp q +); + +`ifndef VERILATOR + ip_fp_mul ip_mul + ( + .en(!stall), + .areset(0), + .* + ); +`else + fp a_pipeline[`FP_MUL_STAGES - 1], b_pipeline[`FP_MUL_STAGES - 1]; + + integer i; + + always_ff @(posedge clk) + if (!stall) begin + a_pipeline[0] <= a; + b_pipeline[0] <= b; + + for (i = 1; i < `FP_MUL_STAGES - 1; ++i) begin + a_pipeline[i] <= a_pipeline[i - 1]; + b_pipeline[i] <= b_pipeline[i - 1]; + end + + q <= $c("taller::fp_mul(", a_pipeline[`FP_MUL_STAGES - 2], ", ", b_pipeline[`FP_MUL_STAGES - 2], ")"); + end +`endif + +endmodule diff --git a/rtl/gfx/gfx_mat_mat.sv b/rtl/gfx/gfx_mat_mat.sv new file mode 100644 index 0000000..d03a648 --- /dev/null +++ b/rtl/gfx/gfx_mat_mat.sv @@ -0,0 +1,83 @@ +`include "gfx/gfx_defs.sv" + +module gfx_mat_mat +( + input logic clk, + rst_n, + + input mat4 a, + b, + input logic in_valid, + out_ready, + + output mat4 q, + output logic in_ready, + out_valid +); + + mat4 a_hold, b_hold, b_transpose, q_hold, q_transpose, mul_b; + vec4 mul_q; + logic mul_in_ready, mul_in_valid, mul_out_ready, mul_out_valid; + index4 in_index, out_index; + + assign in_ready = mul_in_ready && in_index == `INDEX4_MIN; + assign out_valid = mul_out_valid && out_index == `INDEX4_MAX; + + assign mul_in_valid = in_valid || in_index != `INDEX4_MIN; + assign mul_out_ready = out_ready || out_index != `INDEX4_MAX; + + gfx_transpose transpose_b + ( + .in(b), + .out(b_transpose) + ); + + gfx_mat_vec mul + ( + .a(in_index == `INDEX4_MIN ? a : a_hold), + .x(mul_b[in_index]), + .q(mul_q), + .in_ready(mul_in_ready), + .in_valid(mul_in_valid), + .out_ready(mul_out_ready), + .out_valid(mul_out_valid), + .* + ); + + gfx_transpose transpose_q + ( + .in(q_transpose), + .out(q) + ); + + always_comb begin + mul_b = b_hold; + mul_b[0] = b_transpose[0]; + + q_transpose = q_hold; + q_transpose[`VECS_PER_MAT - 1] = mul_q; + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + in_index <= `INDEX4_MIN; + out_index <= `INDEX4_MIN; + end else begin + if (mul_in_ready && mul_in_valid) + in_index <= in_index + 1; + + if (mul_out_ready && mul_out_valid) + out_index <= out_index + 1; + end + + always_ff @(posedge clk) begin + if (in_ready) begin + a_hold <= a; + b_hold <= b_transpose; + end + + if (mul_out_ready && mul_out_valid) + q_hold[out_index] <= mul_q; + end + +endmodule diff --git a/rtl/gfx/gfx_mat_vec.sv b/rtl/gfx/gfx_mat_vec.sv new file mode 100644 index 0000000..4be4976 --- /dev/null +++ b/rtl/gfx/gfx_mat_vec.sv @@ -0,0 +1,49 @@ +`include "gfx/gfx_defs.sv" + +module gfx_mat_vec +( + input logic clk, + rst_n, + + input mat4 a, + input vec4 x, + input logic in_valid, + out_ready, + + output vec4 q, + output logic in_ready, + out_valid +); + + logic stall_mul, stall_fold, mul_ready, mul_valid, feedback, feedback_last; + + gfx_pipeline_flow #(.STAGES(`FP_MUL_STAGES)) mul + ( + .stall(stall_mul), + .out_ready(mul_ready), + .out_valid(mul_valid), + .* + ); + + gfx_fold_flow fold + ( + .stall(stall_fold), + .in_ready(mul_ready), + .in_valid(mul_valid), + .* + ); + + genvar i; + generate + for (i = 0; i < `VECS_PER_MAT; ++i) begin: dots + gfx_dot dot_i + ( + .a(a[i]), + .b(x), + .q(q[i]), + .* + ); + end + endgenerate + +endmodule diff --git a/rtl/gfx/gfx_perspective.sv b/rtl/gfx/gfx_perspective.sv index 3cc38b2..6af1724 100644 --- a/rtl/gfx/gfx_perspective.sv +++ b/rtl/gfx/gfx_perspective.sv @@ -19,7 +19,7 @@ module gfx_perspective assign w_inv = w_inv_pipes[`FP_MUL_STAGES - 1]; assign selected_w_inv = in_start ? next_w_inv : vertex_w_inv; - fp_inv inv + gfx_fp_inv inv ( .a(clip_attr.w), .q(next_w_inv), @@ -29,7 +29,7 @@ module gfx_perspective genvar i; generate for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: divs - fp_mul div + gfx_fp_mul div ( .a(in_pipes[`FP_INV_STAGES - 1][i]), .b(selected_w_inv), diff --git a/rtl/gfx/gfx_perspective_flow.sv b/rtl/gfx/gfx_perspective_flow.sv index ecacc65..e94f584 100644 --- a/rtl/gfx/gfx_perspective_flow.sv +++ b/rtl/gfx/gfx_perspective_flow.sv @@ -25,7 +25,7 @@ module gfx_perspective_flow assign in_start = start_pipes[`FP_INV_STAGES - 1]; assign out_start = start_pipes[STAGES - 1]; - pipeline_flow #(.STAGES(STAGES)) flow + gfx_pipeline_flow #(.STAGES(STAGES)) flow ( .* ); diff --git a/rtl/gfx/gfx_pipeline_flow.sv b/rtl/gfx/gfx_pipeline_flow.sv new file mode 100644 index 0000000..9b3f22a --- /dev/null +++ b/rtl/gfx/gfx_pipeline_flow.sv @@ -0,0 +1,40 @@ +module gfx_pipeline_flow +#(parameter STAGES=0) +( + input logic clk, + rst_n, + + input logic in_valid, + out_ready, + + output logic in_ready, + out_valid, + stall +); + + logic[STAGES - 1:0] valid; + + gfx_skid_flow skid + ( + .in_valid(valid[STAGES - 1]), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + valid[0] <= 0; + else if (!stall) + valid[0] <= in_valid; + + genvar i; + generate + for (i = 1; i < STAGES; ++i) begin: pipeline + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + valid[i] <= 0; + else if (!stall) + valid[i] <= valid[i - 1]; + end + endgenerate + +endmodule diff --git a/rtl/gfx/gfx_scanout.sv b/rtl/gfx/gfx_scanout.sv index c549782..18b700d 100644 --- a/rtl/gfx/gfx_scanout.sv +++ b/rtl/gfx/gfx_scanout.sv @@ -73,7 +73,7 @@ module gfx_scanout genvar i; generate for (i = 0; i < `GFX_SCAN_STAGES; ++i) begin: stages - pipeline_flow #(.STAGES(1)) fb_flow + gfx_pipeline_flow #(.STAGES(1)) fb_flow ( .stall(fb_stalls[i]), .in_ready(fb_ready[i]), @@ -83,7 +83,7 @@ module gfx_scanout .* ); - pipeline_flow #(.STAGES(1)) src_flow + gfx_pipeline_flow #(.STAGES(1)) src_flow ( .stall(src_stalls[i]), .in_ready(src_ready[i]), diff --git a/rtl/gfx/gfx_skid_buf.sv b/rtl/gfx/gfx_skid_buf.sv new file mode 100644 index 0000000..fae5717 --- /dev/null +++ b/rtl/gfx/gfx_skid_buf.sv @@ -0,0 +1,20 @@ +module gfx_skid_buf +#(parameter WIDTH=0) +( + input logic clk, + + input logic[WIDTH - 1:0] in, + input logic stall, + + output logic[WIDTH - 1:0] out +); + + logic[WIDTH - 1:0] skid; + + assign out = stall ? skid : in; + + always_ff @(posedge clk) + if (!stall) + skid <= in; + +endmodule diff --git a/rtl/gfx/gfx_skid_flow.sv b/rtl/gfx/gfx_skid_flow.sv new file mode 100644 index 0000000..c5e3b4a --- /dev/null +++ b/rtl/gfx/gfx_skid_flow.sv @@ -0,0 +1,31 @@ +module gfx_skid_flow +( + input logic clk, + rst_n, + + input logic in_valid, + out_ready, + + output logic in_ready, + out_valid, + stall +); + + logic was_ready, was_valid; + + assign stall = !in_ready; + assign in_ready = was_ready || !was_valid; + assign out_valid = in_valid || stall; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + was_ready <= 0; + was_valid <= 0; + end else begin + was_ready <= out_ready; + + if (!stall) + was_valid <= in_valid; + end + +endmodule diff --git a/rtl/gfx/gfx_transpose.sv b/rtl/gfx/gfx_transpose.sv new file mode 100644 index 0000000..03ecf2d --- /dev/null +++ b/rtl/gfx/gfx_transpose.sv @@ -0,0 +1,17 @@ +`include "gfx/gfx_defs.sv" + +module gfx_transpose +( + input mat4 in, + output mat4 out +); + + integer i, j; + + // Esto no tiene costo en hardware, es un renombramiento de señales + always_comb + for (i = 0; i < `VECS_PER_MAT; ++i) + for (j = 0; j < `FLOATS_PER_VEC; ++j) + out[i][j] = in[j][i]; + +endmodule diff --git a/rtl/gfx/horizontal_fold.sv b/rtl/gfx/horizontal_fold.sv deleted file mode 100644 index f244b55..0000000 --- a/rtl/gfx/horizontal_fold.sv +++ /dev/null @@ -1,54 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module horizontal_fold -( - input logic clk, - - input vec4 vec, - input logic stall, - feedback, - feedback_last, - - output fp q -); - - fp q_add; - vec2 feedback_vec, queued[`FP_ADD_STAGES]; - - assign feedback_vec = queued[`FP_ADD_STAGES - 1]; - - fp_add add - ( - .a(feedback ? q_add : vec[0]), - .b(feedback ? feedback_vec[feedback_last] : vec[1]), - .q(q_add), - .* - ); - - skid_buf #(.WIDTH($bits(q))) skid - ( - .in(q_add), - .out(q), - .* - ); - - always_ff @(posedge clk) - if (!stall) begin - if (feedback) - queued[0] <= feedback_vec; - else begin - queued[0][0] <= vec[2]; - queued[0][1] <= vec[3]; - end - end - - genvar i; - generate - for (i = 1; i < `FP_ADD_STAGES; ++i) begin: stages - always_ff @(posedge clk) - if (!stall) - queued[i] <= queued[i - 1]; - end - endgenerate - -endmodule diff --git a/rtl/gfx/mat_mat_mul.sv b/rtl/gfx/mat_mat_mul.sv deleted file mode 100644 index 85ff7d6..0000000 --- a/rtl/gfx/mat_mat_mul.sv +++ /dev/null @@ -1,83 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module mat_mat_mul -( - input logic clk, - rst_n, - - input mat4 a, - b, - input logic in_valid, - out_ready, - - output mat4 q, - output logic in_ready, - out_valid -); - - mat4 a_hold, b_hold, b_transpose, q_hold, q_transpose, mul_b; - vec4 mul_q; - logic mul_in_ready, mul_in_valid, mul_out_ready, mul_out_valid; - index4 in_index, out_index; - - assign in_ready = mul_in_ready && in_index == `INDEX4_MIN; - assign out_valid = mul_out_valid && out_index == `INDEX4_MAX; - - assign mul_in_valid = in_valid || in_index != `INDEX4_MIN; - assign mul_out_ready = out_ready || out_index != `INDEX4_MAX; - - transpose transpose_b - ( - .in(b), - .out(b_transpose) - ); - - mat_vec_mul mul - ( - .a(in_index == `INDEX4_MIN ? a : a_hold), - .x(mul_b[in_index]), - .q(mul_q), - .in_ready(mul_in_ready), - .in_valid(mul_in_valid), - .out_ready(mul_out_ready), - .out_valid(mul_out_valid), - .* - ); - - transpose transpose_q - ( - .in(q_transpose), - .out(q) - ); - - always_comb begin - mul_b = b_hold; - mul_b[0] = b_transpose[0]; - - q_transpose = q_hold; - q_transpose[`VECS_PER_MAT - 1] = mul_q; - end - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - in_index <= `INDEX4_MIN; - out_index <= `INDEX4_MIN; - end else begin - if (mul_in_ready && mul_in_valid) - in_index <= in_index + 1; - - if (mul_out_ready && mul_out_valid) - out_index <= out_index + 1; - end - - always_ff @(posedge clk) begin - if (in_ready) begin - a_hold <= a; - b_hold <= b_transpose; - end - - if (mul_out_ready && mul_out_valid) - q_hold[out_index] <= mul_q; - end - -endmodule diff --git a/rtl/gfx/mat_vec_mul.sv b/rtl/gfx/mat_vec_mul.sv deleted file mode 100644 index 9f5dcae..0000000 --- a/rtl/gfx/mat_vec_mul.sv +++ /dev/null @@ -1,49 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module mat_vec_mul -( - input logic clk, - rst_n, - - input mat4 a, - input vec4 x, - input logic in_valid, - out_ready, - - output vec4 q, - output logic in_ready, - out_valid -); - - logic stall_mul, stall_fold, mul_ready, mul_valid, feedback, feedback_last; - - pipeline_flow #(.STAGES(`FP_MUL_STAGES)) mul - ( - .stall(stall_mul), - .out_ready(mul_ready), - .out_valid(mul_valid), - .* - ); - - fold_flow fold - ( - .stall(stall_fold), - .in_ready(mul_ready), - .in_valid(mul_valid), - .* - ); - - genvar i; - generate - for (i = 0; i < `VECS_PER_MAT; ++i) begin: dots - vec_dot dot_i - ( - .a(a[i]), - .b(x), - .q(q[i]), - .* - ); - end - endgenerate - -endmodule diff --git a/rtl/gfx/pipeline_flow.sv b/rtl/gfx/pipeline_flow.sv deleted file mode 100644 index 64b5714..0000000 --- a/rtl/gfx/pipeline_flow.sv +++ /dev/null @@ -1,40 +0,0 @@ -module pipeline_flow -#(parameter STAGES=0) -( - input logic clk, - rst_n, - - input logic in_valid, - out_ready, - - output logic in_ready, - out_valid, - stall -); - - logic[STAGES - 1:0] valid; - - skid_flow skid - ( - .in_valid(valid[STAGES - 1]), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - valid[0] <= 0; - else if (!stall) - valid[0] <= in_valid; - - genvar i; - generate - for (i = 1; i < STAGES; ++i) begin: pipeline - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - valid[i] <= 0; - else if (!stall) - valid[i] <= valid[i - 1]; - end - endgenerate - -endmodule diff --git a/rtl/gfx/skid_buf.sv b/rtl/gfx/skid_buf.sv deleted file mode 100644 index 6e7ffbb..0000000 --- a/rtl/gfx/skid_buf.sv +++ /dev/null @@ -1,20 +0,0 @@ -module skid_buf -#(parameter WIDTH=0) -( - input logic clk, - - input logic[WIDTH - 1:0] in, - input logic stall, - - output logic[WIDTH - 1:0] out -); - - logic[WIDTH - 1:0] skid; - - assign out = stall ? skid : in; - - always_ff @(posedge clk) - if (!stall) - skid <= in; - -endmodule diff --git a/rtl/gfx/skid_flow.sv b/rtl/gfx/skid_flow.sv deleted file mode 100644 index 2b521e5..0000000 --- a/rtl/gfx/skid_flow.sv +++ /dev/null @@ -1,31 +0,0 @@ -module skid_flow -( - input logic clk, - rst_n, - - input logic in_valid, - out_ready, - - output logic in_ready, - out_valid, - stall -); - - logic was_ready, was_valid; - - assign stall = !in_ready; - assign in_ready = was_ready || !was_valid; - assign out_valid = in_valid || stall; - - always @(posedge clk or negedge rst_n) - if (!rst_n) begin - was_ready <= 0; - was_valid <= 0; - end else begin - was_ready <= out_ready; - - if (!stall) - was_valid <= in_valid; - end - -endmodule diff --git a/rtl/gfx/transpose.sv b/rtl/gfx/transpose.sv deleted file mode 100644 index 1df68d5..0000000 --- a/rtl/gfx/transpose.sv +++ /dev/null @@ -1,17 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module transpose -( - input mat4 in, - output mat4 out -); - - integer i, j; - - // Esto no tiene costo en hardware, es un renombramiento de señales - always_comb - for (i = 0; i < `VECS_PER_MAT; ++i) - for (j = 0; j < `FLOATS_PER_VEC; ++j) - out[i][j] = in[j][i]; - -endmodule diff --git a/rtl/gfx/vec_dot.sv b/rtl/gfx/vec_dot.sv deleted file mode 100644 index a386e6d..0000000 --- a/rtl/gfx/vec_dot.sv +++ /dev/null @@ -1,49 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module vec_dot -( - input logic clk, - - input logic stall_mul, - stall_fold, - feedback, - feedback_last, - - input vec4 a, - b, - - output fp q -); - - vec4 products_fold, products_mul; - - horizontal_fold fold - ( - .vec(products_fold), - .stall(stall_fold), - .* - ); - - genvar i; - generate - for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: entries - fp_mul entry_i - ( - .a(a[i]), - .b(b[i]), - .q(products_mul[i]), - .stall(stall_mul), - .* - ); - - skid_buf #(.WIDTH($bits(fp))) skid_i - ( - .in(products_mul[i]), - .out(products_fold[i]), - .stall(stall_mul), - .* - ); - end - endgenerate - -endmodule -- cgit v1.2.3