summaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2023-11-21 17:14:58 -0600
committerAlejandro Soto <alejandro@34project.org>2023-11-21 18:03:15 -0600
commitd2df92448a7aaaff9ae72f99bf4bcd00a6e55d8c (patch)
tree7426719821476739dce9092dbc7cb2425c9efa5b /rtl
parentd076c33ffb6e3c0d96ee6b5dce0fcf48be8d3582 (diff)
rtl/gfx: implement SP issue
Diffstat (limited to 'rtl')
-rw-r--r--rtl/gfx/gfx.sv2
-rw-r--r--rtl/gfx/gfx_defs.sv7
-rw-r--r--rtl/gfx/gfx_sp.sv37
-rw-r--r--rtl/gfx/gfx_sp_decode.sv2
-rw-r--r--rtl/gfx/gfx_sp_file.sv2
-rw-r--r--rtl/gfx/gfx_sp_isa.sv (renamed from rtl/gfx/gfx_isa.sv)4
-rw-r--r--rtl/gfx/gfx_sp_issue.sv111
7 files changed, 141 insertions, 24 deletions
diff --git a/rtl/gfx/gfx.sv b/rtl/gfx/gfx.sv
index e88dc56..2398826 100644
--- a/rtl/gfx/gfx.sv
+++ b/rtl/gfx/gfx.sv
@@ -43,7 +43,7 @@ module gfx
.*
);
- logic batch_read, fetch_read, send_valid;
+ logic batch_read, fetch_read, running, send_valid;
lane_word send_data;
lane_mask send_mask;
vram_addr batch_address, fetch_address;
diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv
index 70664d1..ec1ea8d 100644
--- a/rtl/gfx/gfx_defs.sv
+++ b/rtl/gfx/gfx_defs.sv
@@ -6,7 +6,7 @@
`define FLOATS_PER_VEC 4
`define VECS_PER_MAT 4
-// Target de 200MHz (reloj es 143MHz) con float16, rounding aproximado
+// Target de 200MHz (reloj es 143MHz) con float16, rounding (muy) aproximado
`define FP_ADD_STAGES 10 // ~401 LUTs
`define FP_MUL_STAGES 5 // ~144 LUTs ~1 bloque DSP
`define FP_INV_STAGES 3 // ~178 LUTs ~1 bloque DSP
@@ -218,7 +218,10 @@ typedef logic[`FLOATS_PER_VEC - 1:0] vec_mask;
typedef logic[`FLOATS_PER_VEC - 1:0][$clog2(`FLOATS_PER_VEC) - 1:0] swizzle_lanes;
-typedef logic[2:0] vreg_num;
+`define GFX_SP_REG_BITS 3
+`define GFX_SP_REG_COUNT (1 << `GFX_SP_REG_BITS)
+
+typedef logic[`GFX_SP_REG_BITS - 1:0] vreg_num;
typedef struct packed
{
diff --git a/rtl/gfx/gfx_sp.sv b/rtl/gfx/gfx_sp.sv
index 4adac7e..ce0f9ff 100644
--- a/rtl/gfx/gfx_sp.sv
+++ b/rtl/gfx/gfx_sp.sv
@@ -20,6 +20,7 @@ module gfx_sp
input logic program_start,
input cmd_word program_header_base,
program_header_size,
+ output logic running,
input logic send_ready,
output logic send_valid,
@@ -27,7 +28,7 @@ module gfx_sp
output lane_mask send_mask
);
- logic batch_start, clear_lanes, insn_valid, running;
+ logic batch_start, clear_lanes, insn_valid;
cmd_word batch_length;
insn_word insn;
vram_insn_addr batch_base;
@@ -47,6 +48,14 @@ module gfx_sp
.*
);
+ logic deco_ready, combiner_issue_valid, shuffler_issue_valid, stream_issue_valid;
+ vreg_num rd_a_reg, rd_b_reg;
+
+ gfx_sp_issue issue
+ (
+ .*
+ );
+
logic recv_valid;
lane_word recv_data;
lane_mask recv_mask;
@@ -60,43 +69,40 @@ module gfx_sp
.*
);
- logic shuffler_wb_valid;
+ logic shuffler_issue_ready, shuffler_wb_valid;
wb_op shuffler_wb;
gfx_sp_shuffler shuffler
(
.wb(shuffler_wb),
- .deco(),
- .in_ready(),
- .in_valid(0),
+ .in_ready(shuffler_issue_ready),
+ .in_valid(shuffler_issue_valid),
.wb_ready(shuffler_wb_ready),
.wb_valid(shuffler_wb_valid),
.*
);
- logic combiner_wb_valid;
+ logic combiner_issue_ready, combiner_wb_valid;
wb_op combiner_wb;
gfx_sp_combiner combiner
(
.wb(combiner_wb),
- .deco(),
- .in_ready(),
- .in_valid(0),
+ .in_ready(combiner_issue_ready),
+ .in_valid(combiner_issue_valid),
.wb_ready(combiner_wb_ready),
.wb_valid(combiner_wb_valid),
.*
);
- logic recv_ready, stream_wb_valid;
+ logic recv_ready, stream_issue_ready, stream_wb_valid;
wb_op stream_wb;
gfx_sp_stream stream
(
.wb(stream_wb),
- .deco(),
- .in_ready(),
- .in_valid(0),
+ .in_ready(stream_issue_ready),
+ .in_valid(stream_issue_valid),
.wb_ready(stream_wb_ready),
.wb_valid(stream_wb_valid),
.*
@@ -115,14 +121,11 @@ module gfx_sp
gfx_sp_regs regs
(
- .rd_a_reg(),
- .rd_b_reg(),
.rd_a_data(a),
.rd_b_data(b),
.*
);
- logic batch_end, deco_ready;
- assign deco_ready = 1;
+ logic batch_end;
endmodule
diff --git a/rtl/gfx/gfx_sp_decode.sv b/rtl/gfx/gfx_sp_decode.sv
index 41ce438..d54077d 100644
--- a/rtl/gfx/gfx_sp_decode.sv
+++ b/rtl/gfx/gfx_sp_decode.sv
@@ -1,5 +1,5 @@
-`include "gfx/gfx_isa.sv"
`include "gfx/gfx_defs.sv"
+`include "gfx/gfx_sp_isa.sv"
module gfx_sp_decode
(
diff --git a/rtl/gfx/gfx_sp_file.sv b/rtl/gfx/gfx_sp_file.sv
index 5dced6e..e98ee18 100644
--- a/rtl/gfx/gfx_sp_file.sv
+++ b/rtl/gfx/gfx_sp_file.sv
@@ -12,7 +12,7 @@ module gfx_sp_file
input vec4 wr_data
);
- vec4 file[1 << $bits(vreg_num)], hold_rd_data, hold_wr_data;
+ vec4 file[`GFX_SP_REG_COUNT], hold_rd_data, hold_wr_data;
logic hold_wr;
vreg_num hold_rd_reg, hold_wr_reg;
diff --git a/rtl/gfx/gfx_isa.sv b/rtl/gfx/gfx_sp_isa.sv
index 18a28e4..1420d95 100644
--- a/rtl/gfx/gfx_isa.sv
+++ b/rtl/gfx/gfx_sp_isa.sv
@@ -1,5 +1,5 @@
-`ifndef GFX_ISA_SV
-`define GFX_ISA_SV
+`ifndef GFX_SP_ISA_SV
+`define GFX_SP_ISA_SV
`include "gfx/gfx_defs.sv"
diff --git a/rtl/gfx/gfx_sp_issue.sv b/rtl/gfx/gfx_sp_issue.sv
new file mode 100644
index 0000000..6934e39
--- /dev/null
+++ b/rtl/gfx/gfx_sp_issue.sv
@@ -0,0 +1,111 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_issue
+(
+ input logic clk,
+ rst_n,
+
+ input insn_deco deco,
+ input logic deco_valid,
+ output logic deco_ready,
+
+ output vreg_num rd_a_reg,
+ rd_b_reg,
+
+ input logic stream_issue_ready,
+ output logic stream_issue_valid,
+
+ input logic combiner_issue_ready,
+ output logic combiner_issue_valid,
+
+ input logic shuffler_issue_ready,
+ output logic shuffler_issue_valid,
+
+ input logic wr,
+ input vreg_num wr_reg
+);
+
+ /* Esto podría ser fully pipelined, pero no dio tiempo, y en
+ * todo caso no haría diferencia debido al pésimo ancho de banda.
+ */
+
+ logic data_hazard, rd_a_hazard, rd_b_hazard, wr_hazard, writing_a, writing_b, writing_dst,
+ busy[`GFX_SP_REG_COUNT];
+
+ enum int unsigned
+ {
+ IDLE,
+ HAZARDS,
+ ISSUE,
+ WAIT
+ } state;
+
+ assign rd_a_reg = deco.src_a;
+ assign rd_b_reg = deco.src_b;
+
+ assign wr_hazard = deco.writeback && writing_dst;
+ assign rd_a_hazard = deco.read_src_a && writing_a;
+ assign rd_b_hazard = deco.read_src_a && writing_b;
+ assign data_hazard = rd_a_hazard || rd_b_hazard || wr_hazard;
+
+ assign deco_ready = (stream_issue_ready && stream_issue_valid)
+ || (combiner_issue_ready && combiner_issue_valid)
+ || (shuffler_issue_ready && shuffler_issue_valid);
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ state <= IDLE;
+
+ stream_issue_valid <= 0;
+ combiner_issue_valid <= 0;
+ shuffler_issue_valid <= 0;
+
+ for (integer i = 0; i < `GFX_SP_REG_COUNT; ++i)
+ busy[i] <= 0;
+ end else begin
+ unique case (state)
+ IDLE:
+ if (deco_valid)
+ state <= HAZARDS;
+
+ HAZARDS:
+ if (!data_hazard) begin
+ state <= ISSUE;
+ if (deco.writeback)
+ busy[deco.dst] <= 1;
+ end
+
+ ISSUE: begin
+ state <= WAIT;
+
+ if (deco.ex.stream)
+ stream_issue_valid <= 1;
+
+ if (deco.ex.combiner)
+ combiner_issue_valid <= 1;
+
+ if (deco.ex.shuffler)
+ shuffler_issue_valid <= 1;
+ end
+
+ WAIT:
+ if (deco_ready) begin
+ state <= IDLE;
+
+ stream_issue_valid <= 0;
+ combiner_issue_valid <= 0;
+ shuffler_issue_valid <= 0;
+ end
+ endcase
+
+ if (wr)
+ busy[wr_reg] <= 0;
+ end
+
+ always_ff @(posedge clk) begin
+ writing_a <= busy[deco.src_a];
+ writing_b <= busy[deco.src_b];
+ writing_dst <= busy[deco.dst];
+ end
+
+endmodule