summaryrefslogtreecommitdiff
path: root/platform/wavelet3d/gfx_shader_back.sv
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2024-05-04 23:58:08 -0600
committerAlejandro Soto <alejandro@34project.org>2024-05-05 14:19:15 -0600
commit4fdcb079663eccc71ed2c120f8279d6c364de9fd (patch)
tree1df076513ef031fa2a2f55d280e2edd09748cdd5 /platform/wavelet3d/gfx_shader_back.sv
parenta7d92072c0bdc3a3e1c99de64f353e932846bc2a (diff)
platform/wavelet3d: implement shader writeback
Diffstat (limited to 'platform/wavelet3d/gfx_shader_back.sv')
-rw-r--r--platform/wavelet3d/gfx_shader_back.sv163
1 files changed, 152 insertions, 11 deletions
diff --git a/platform/wavelet3d/gfx_shader_back.sv b/platform/wavelet3d/gfx_shader_back.sv
index c7b2855..4929192 100644
--- a/platform/wavelet3d/gfx_shader_back.sv
+++ b/platform/wavelet3d/gfx_shader_back.sv
@@ -1,13 +1,15 @@
module gfx_shader_back
import gfx::*;
(
- input logic clk,
- rst_n,
+ input logic clk,
+ rst_n,
- gfx_front_back.back back,
+ gfx_front_back.back back,
- gfx_regfile_io.ab read_data,
- gfx_regfile_io.wb reg_wb
+ gfx_regfile_io.ab read_data,
+ gfx_regfile_io.wb reg_wb,
+
+ gfx_shader_setup.core setup
);
logic abort;
@@ -30,6 +32,7 @@ import gfx::*;
.rst_n,
.op(back.execute.p0),
.wb(p0_wb.tx),
+ .wave(back.execute.wave),
.abort,
.read_data,
.in_valid(back.dispatch.valid)
@@ -41,6 +44,7 @@ import gfx::*;
.rst_n,
.op(back.execute.p1),
.wb(p1_wb.tx),
+ .wave(back.execute.wave),
.in_shake(p1_shake.rx),
.read_data
);
@@ -51,6 +55,7 @@ import gfx::*;
.rst_n,
.op(back.execute.p2),
.wb(p2_wb.tx),
+ .wave(back.execute.wave),
.in_shake(p2_shake.rx),
.read_data
);
@@ -61,6 +66,7 @@ import gfx::*;
.rst_n,
.op(back.execute.p3),
.wb(p3_wb.tx),
+ .wave(back.execute.wave),
.in_shake(p3_shake.rx),
.read_data
);
@@ -81,7 +87,10 @@ import gfx::*;
.clk,
.rst_n,
.wb(out_wb.rx),
- .regs(reg_wb)
+ .regs(reg_wb),
+ .setup,
+ .loop_group(back.loop.group),
+ .loop_valid(back.loop.valid)
);
endmodule
@@ -170,6 +179,7 @@ module gfx_shader_writeback_arbiter2_prio
//TODO
assign a.ready = out.ready;
assign b.ready = 0;
+
assign out.dest = a.dest;
assign out.lanes = a.lanes;
assign out.group = a.group;
@@ -177,18 +187,149 @@ module gfx_shader_writeback_arbiter2_prio
assign out.scalar = a.scalar;
assign out.writeback = a.writeback;
+ assign out.mask = a.mask;
+ assign out.mask_update = a.mask_update;
+
+ assign out.pc_add = a.pc_add;
+ assign out.pc_inc = a.pc_inc;
+ assign out.pc_update = a.pc_update;
+
endmodule
module gfx_shader_writeback
+import gfx::*;
(
- input logic clk,
- rst_n,
+ input logic clk,
+ rst_n,
+
+ gfx_wb.rx wb,
+
+ gfx_regfile_io.wb regs,
- gfx_wb.rx wb,
+ output logic loop_valid,
+ output group_id loop_group,
- gfx_regfile_io.wb regs
+ gfx_shader_setup.core setup
);
-
+ struct
+ {
+ group_id group;
+ word lanes[SHADER_LANES];
+ pc_offset pc_add;
+ lane_mask mask;
+ vgpr_num vgpr;
+ logic pc_update,
+ mask_update,
+ vgpr_update;
+ } loop_hold[REGFILE_STAGES], loop_out;
+
+ logic loop_valid_hold[REGFILE_STAGES], loop_out_valid, mask_wb, scalar_wb,
+ setup_gpr, setup_mask, setup_submit;
+
+ assign wb.ready = 1;
+
+ assign loop_out = loop_hold[REGFILE_STAGES - 1];
+ assign loop_out_valid = loop_valid_hold[REGFILE_STAGES - 1];
+
+ assign loop_valid = loop_out_valid | setup_submit;
+
+ assign regs.pc_back_group = wb.group;
+ assign regs.mask_back_group = wb.group;
+
+ assign regs.pc_wb_write = (loop_out_valid & loop_out.pc_update) | setup_submit;
+ assign regs.mask_wb_write = mask_wb | setup_mask;
+ assign regs.sgpr_write.write = scalar_wb | setup_gpr;
+
+ assign regs.vgpr_write.vgpr = loop_out.vgpr;
+ assign regs.vgpr_write.group = loop_out.group;
+
+ assign mask_wb = loop_out_valid & loop_out.mask_update;
+ assign scalar_wb = wb.valid & wb.writeback & wb.scalar;
+
+ always_comb begin
+ loop_group = setup.write.group;
+ regs.pc_wb = setup.write.pc;
+ regs.pc_wb_group = setup.write.group;
+
+ if (loop_out_valid) begin
+ loop_group = loop_out.group;
+ regs.pc_wb = regs.pc_back + word_ptr'(loop_out.pc_add);
+ regs.pc_wb_group = loop_out.group;
+ end
+
+ regs.mask_wb = setup.write.mask;
+ regs.mask_wb_group = setup.write.group;
+
+ if (mask_wb) begin
+ regs.mask_wb = loop_out.mask;
+ regs.mask_wb_group = loop_out.group;
+ end
+
+ regs.sgpr_write.data = setup.write.gpr_value;
+ regs.sgpr_write.sgpr = setup.write.gpr.sgpr;
+ regs.sgpr_write.group = setup.write.group;
+
+ if (scalar_wb) begin
+ regs.sgpr_write.data = wb.lanes[0];
+ regs.sgpr_write.sgpr = wb.dest.sgpr;
+ regs.sgpr_write.group = wb.group;
+ end
+
+ for (int i = 0; i < SHADER_LANES; ++i)
+ regs.vgpr_write.data[i] = loop_out.lanes[i];
+
+ regs.vgpr_write.mask = regs.mask_back;
+ if (~loop_out_valid | ~loop_out.vgpr_update)
+ regs.vgpr_write.mask = '0;
+ end
+
+ always_ff @(posedge clk) begin
+ // Blocking assignments por bug de verilator (ver for de lanes abajo)
+
+ for (int i = REGFILE_STAGES - 1; i > 0; --i)
+ loop_hold[i] = loop_hold[i - 1];
+
+ loop_hold[0].mask = wb.mask;
+ loop_hold[0].vgpr = wb.dest.vgpr.num;
+ loop_hold[0].group = wb.group;
+ loop_hold[0].pc_add = wb.pc_add;
+ loop_hold[0].pc_update = wb.pc_update;
+ loop_hold[0].mask_update = wb.mask_update;
+ loop_hold[0].vgpr_update = wb.writeback & ~wb.scalar;
+
+ // https://github.com/verilator/verilator/issues/4804
+ for (int i = 0; i < SHADER_LANES; ++i)
+ loop_hold[0].lanes[i] = wb.lanes[i];
+
+ if (wb.pc_inc)
+ loop_hold[0].pc_add = pc_offset'(1);
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n) begin
+ setup_gpr <= 0;
+ setup_mask <= 0;
+ setup_submit <= 0;
+
+ setup.set_done.gpr <= 0;
+ setup.set_done.mask <= 0;
+ setup.set_done.submit <= 0;
+
+ for (int i = 0; i < $size(loop_valid_hold); ++i)
+ loop_valid_hold[i] <= 0;
+ end else begin
+ setup_gpr <= (setup_gpr & scalar_wb) | setup.write.gpr_set;
+ setup_mask <= (setup_mask & mask_wb) | setup.write.mask_set;
+ setup_submit <= (setup_submit & loop_out_valid) | setup.write.pc_set;
+
+ setup.set_done.gpr <= setup_gpr & ~scalar_wb;
+ setup.set_done.mask <= setup_mask & ~mask_wb;
+ setup.set_done.submit <= setup_submit & ~loop_out_valid;
+
+ loop_valid_hold[0] <= wb.valid;
+ for (int i = 1; i < REGFILE_STAGES; ++i)
+ loop_valid_hold[i] <= loop_valid_hold[i - 1];
+ end
endmodule