summaryrefslogtreecommitdiff
path: root/rtl/gfx/gfx_shader_back.sv
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2024-05-05 17:38:55 -0600
committerAlejandro Soto <alejandro@34project.org>2024-05-05 18:12:08 -0600
commitca02833f22b08ceeeff501107371aa6667426115 (patch)
treef864c5fc238a292082d2096ce546270badce9f1d /rtl/gfx/gfx_shader_back.sv
parent081a8a3ba8bfe036f31da53f9c041a2caa30fce2 (diff)
rtl/gfx: rename platform/wavelet3d -> rtl/gfx
Diffstat (limited to 'rtl/gfx/gfx_shader_back.sv')
-rw-r--r--rtl/gfx/gfx_shader_back.sv335
1 files changed, 335 insertions, 0 deletions
diff --git a/rtl/gfx/gfx_shader_back.sv b/rtl/gfx/gfx_shader_back.sv
new file mode 100644
index 0000000..4929192
--- /dev/null
+++ b/rtl/gfx/gfx_shader_back.sv
@@ -0,0 +1,335 @@
+module gfx_shader_back
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ gfx_front_back.back back,
+
+ gfx_regfile_io.ab read_data,
+ gfx_regfile_io.wb reg_wb,
+
+ gfx_shader_setup.core setup
+);
+
+ logic abort;
+
+ gfx_wb out_wb(), p0_wb(), p1_wb(), p2_wb(), p3_wb();
+ gfx_shake p1_shake(), p2_shake(), p3_shake();
+
+ gfx_shader_abort p0_abort
+ (
+ .clk,
+ .p1(p1_shake.peek),
+ .p2(p2_shake.peek),
+ .p3(p3_shake.peek),
+ .abort
+ );
+
+ gfx_shader_fpint p0
+ (
+ .clk,
+ .rst_n,
+ .op(back.execute.p0),
+ .wb(p0_wb.tx),
+ .wave(back.execute.wave),
+ .abort,
+ .read_data,
+ .in_valid(back.dispatch.valid)
+ );
+
+ gfx_shader_mem p1
+ (
+ .clk,
+ .rst_n,
+ .op(back.execute.p1),
+ .wb(p1_wb.tx),
+ .wave(back.execute.wave),
+ .in_shake(p1_shake.rx),
+ .read_data
+ );
+
+ gfx_shader_sfu p2
+ (
+ .clk,
+ .rst_n,
+ .op(back.execute.p2),
+ .wb(p2_wb.tx),
+ .wave(back.execute.wave),
+ .in_shake(p2_shake.rx),
+ .read_data
+ );
+
+ gfx_shader_group p3
+ (
+ .clk,
+ .rst_n,
+ .op(back.execute.p3),
+ .wb(p3_wb.tx),
+ .wave(back.execute.wave),
+ .in_shake(p3_shake.rx),
+ .read_data
+ );
+
+ gfx_shader_writeback_arbiter4 writeback_arbiter
+ (
+ .clk,
+ .rst_n,
+ .p0(p0_wb.rx),
+ .p1(p1_wb.rx),
+ .p2(p2_wb.rx),
+ .p3(p3_wb.rx),
+ .out(out_wb.tx)
+ );
+
+ gfx_shader_writeback writeback
+ (
+ .clk,
+ .rst_n,
+ .wb(out_wb.rx),
+ .regs(reg_wb),
+ .setup,
+ .loop_group(back.loop.group),
+ .loop_valid(back.loop.valid)
+ );
+
+endmodule
+
+module gfx_shader_abort
+(
+ input logic clk,
+
+ gfx_shake.peek p1,
+ p2,
+ p3,
+
+ output logic abort
+);
+
+ always_ff @(posedge clk)
+ abort <=
+ (p1.valid & p1.ready)
+ | (p2.valid & p2.ready)
+ | (p3.valid & p3.ready);
+
+endmodule
+
+module gfx_shader_writeback_arbiter4
+(
+ input logic clk,
+ rst_n,
+
+ gfx_wb.rx p0,
+ p1,
+ p2,
+ p3,
+
+ gfx_wb.tx out
+);
+
+ assert property (
+ @(posedge clk)
+ disable iff (~rst_n)
+
+ (p0.ready & out.ready)
+ );
+
+ gfx_wb p0_p1(), p2_p3();
+
+ gfx_shader_writeback_arbiter2_prio arbiter_p0_p1
+ (
+ .clk,
+ .rst_n,
+ .a(p0),
+ .b(p1),
+ .out(p0_p1.tx)
+ );
+
+ gfx_shader_writeback_arbiter2_prio arbiter_p2_p3
+ (
+ .clk,
+ .rst_n,
+ .a(p2),
+ .b(p3),
+ .out(p2_p3.tx)
+ );
+
+ gfx_shader_writeback_arbiter2_prio arbiter_out
+ (
+ .clk,
+ .rst_n,
+ .a(p0_p1.rx),
+ .b(p2_p3.tx),
+ .out
+ );
+
+endmodule
+
+module gfx_shader_writeback_arbiter2_prio
+(
+ input logic clk,
+ rst_n,
+
+ gfx_wb.rx a,
+ b,
+
+ gfx_wb.tx out
+);
+
+ //TODO
+ assign a.ready = out.ready;
+ assign b.ready = 0;
+
+ assign out.dest = a.dest;
+ assign out.lanes = a.lanes;
+ assign out.group = a.group;
+ assign out.valid = a.valid;
+ assign out.scalar = a.scalar;
+ assign out.writeback = a.writeback;
+
+ assign out.mask = a.mask;
+ assign out.mask_update = a.mask_update;
+
+ assign out.pc_add = a.pc_add;
+ assign out.pc_inc = a.pc_inc;
+ assign out.pc_update = a.pc_update;
+
+endmodule
+
+module gfx_shader_writeback
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ gfx_wb.rx wb,
+
+ gfx_regfile_io.wb regs,
+
+ output logic loop_valid,
+ output group_id loop_group,
+
+ gfx_shader_setup.core setup
+);
+
+ struct
+ {
+ group_id group;
+ word lanes[SHADER_LANES];
+ pc_offset pc_add;
+ lane_mask mask;
+ vgpr_num vgpr;
+ logic pc_update,
+ mask_update,
+ vgpr_update;
+ } loop_hold[REGFILE_STAGES], loop_out;
+
+ logic loop_valid_hold[REGFILE_STAGES], loop_out_valid, mask_wb, scalar_wb,
+ setup_gpr, setup_mask, setup_submit;
+
+ assign wb.ready = 1;
+
+ assign loop_out = loop_hold[REGFILE_STAGES - 1];
+ assign loop_out_valid = loop_valid_hold[REGFILE_STAGES - 1];
+
+ assign loop_valid = loop_out_valid | setup_submit;
+
+ assign regs.pc_back_group = wb.group;
+ assign regs.mask_back_group = wb.group;
+
+ assign regs.pc_wb_write = (loop_out_valid & loop_out.pc_update) | setup_submit;
+ assign regs.mask_wb_write = mask_wb | setup_mask;
+ assign regs.sgpr_write.write = scalar_wb | setup_gpr;
+
+ assign regs.vgpr_write.vgpr = loop_out.vgpr;
+ assign regs.vgpr_write.group = loop_out.group;
+
+ assign mask_wb = loop_out_valid & loop_out.mask_update;
+ assign scalar_wb = wb.valid & wb.writeback & wb.scalar;
+
+ always_comb begin
+ loop_group = setup.write.group;
+ regs.pc_wb = setup.write.pc;
+ regs.pc_wb_group = setup.write.group;
+
+ if (loop_out_valid) begin
+ loop_group = loop_out.group;
+ regs.pc_wb = regs.pc_back + word_ptr'(loop_out.pc_add);
+ regs.pc_wb_group = loop_out.group;
+ end
+
+ regs.mask_wb = setup.write.mask;
+ regs.mask_wb_group = setup.write.group;
+
+ if (mask_wb) begin
+ regs.mask_wb = loop_out.mask;
+ regs.mask_wb_group = loop_out.group;
+ end
+
+ regs.sgpr_write.data = setup.write.gpr_value;
+ regs.sgpr_write.sgpr = setup.write.gpr.sgpr;
+ regs.sgpr_write.group = setup.write.group;
+
+ if (scalar_wb) begin
+ regs.sgpr_write.data = wb.lanes[0];
+ regs.sgpr_write.sgpr = wb.dest.sgpr;
+ regs.sgpr_write.group = wb.group;
+ end
+
+ for (int i = 0; i < SHADER_LANES; ++i)
+ regs.vgpr_write.data[i] = loop_out.lanes[i];
+
+ regs.vgpr_write.mask = regs.mask_back;
+ if (~loop_out_valid | ~loop_out.vgpr_update)
+ regs.vgpr_write.mask = '0;
+ end
+
+ always_ff @(posedge clk) begin
+ // Blocking assignments por bug de verilator (ver for de lanes abajo)
+
+ for (int i = REGFILE_STAGES - 1; i > 0; --i)
+ loop_hold[i] = loop_hold[i - 1];
+
+ loop_hold[0].mask = wb.mask;
+ loop_hold[0].vgpr = wb.dest.vgpr.num;
+ loop_hold[0].group = wb.group;
+ loop_hold[0].pc_add = wb.pc_add;
+ loop_hold[0].pc_update = wb.pc_update;
+ loop_hold[0].mask_update = wb.mask_update;
+ loop_hold[0].vgpr_update = wb.writeback & ~wb.scalar;
+
+ // https://github.com/verilator/verilator/issues/4804
+ for (int i = 0; i < SHADER_LANES; ++i)
+ loop_hold[0].lanes[i] = wb.lanes[i];
+
+ if (wb.pc_inc)
+ loop_hold[0].pc_add = pc_offset'(1);
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n) begin
+ setup_gpr <= 0;
+ setup_mask <= 0;
+ setup_submit <= 0;
+
+ setup.set_done.gpr <= 0;
+ setup.set_done.mask <= 0;
+ setup.set_done.submit <= 0;
+
+ for (int i = 0; i < $size(loop_valid_hold); ++i)
+ loop_valid_hold[i] <= 0;
+ end else begin
+ setup_gpr <= (setup_gpr & scalar_wb) | setup.write.gpr_set;
+ setup_mask <= (setup_mask & mask_wb) | setup.write.mask_set;
+ setup_submit <= (setup_submit & loop_out_valid) | setup.write.pc_set;
+
+ setup.set_done.gpr <= setup_gpr & ~scalar_wb;
+ setup.set_done.mask <= setup_mask & ~mask_wb;
+ setup.set_done.submit <= setup_submit & ~loop_out_valid;
+
+ loop_valid_hold[0] <= wb.valid;
+ for (int i = 1; i < REGFILE_STAGES; ++i)
+ loop_valid_hold[i] <= loop_valid_hold[i - 1];
+ end
+
+endmodule