diff options
Diffstat (limited to '')
| -rw-r--r-- | platform/wavelet3d/gfx_shader_back.sv | 163 |
1 files changed, 152 insertions, 11 deletions
diff --git a/platform/wavelet3d/gfx_shader_back.sv b/platform/wavelet3d/gfx_shader_back.sv index c7b2855..4929192 100644 --- a/platform/wavelet3d/gfx_shader_back.sv +++ b/platform/wavelet3d/gfx_shader_back.sv @@ -1,13 +1,15 @@ module gfx_shader_back import gfx::*; ( - input logic clk, - rst_n, + input logic clk, + rst_n, - gfx_front_back.back back, + gfx_front_back.back back, - gfx_regfile_io.ab read_data, - gfx_regfile_io.wb reg_wb + gfx_regfile_io.ab read_data, + gfx_regfile_io.wb reg_wb, + + gfx_shader_setup.core setup ); logic abort; @@ -30,6 +32,7 @@ import gfx::*; .rst_n, .op(back.execute.p0), .wb(p0_wb.tx), + .wave(back.execute.wave), .abort, .read_data, .in_valid(back.dispatch.valid) @@ -41,6 +44,7 @@ import gfx::*; .rst_n, .op(back.execute.p1), .wb(p1_wb.tx), + .wave(back.execute.wave), .in_shake(p1_shake.rx), .read_data ); @@ -51,6 +55,7 @@ import gfx::*; .rst_n, .op(back.execute.p2), .wb(p2_wb.tx), + .wave(back.execute.wave), .in_shake(p2_shake.rx), .read_data ); @@ -61,6 +66,7 @@ import gfx::*; .rst_n, .op(back.execute.p3), .wb(p3_wb.tx), + .wave(back.execute.wave), .in_shake(p3_shake.rx), .read_data ); @@ -81,7 +87,10 @@ import gfx::*; .clk, .rst_n, .wb(out_wb.rx), - .regs(reg_wb) + .regs(reg_wb), + .setup, + .loop_group(back.loop.group), + .loop_valid(back.loop.valid) ); endmodule @@ -170,6 +179,7 @@ module gfx_shader_writeback_arbiter2_prio //TODO assign a.ready = out.ready; assign b.ready = 0; + assign out.dest = a.dest; assign out.lanes = a.lanes; assign out.group = a.group; @@ -177,18 +187,149 @@ module gfx_shader_writeback_arbiter2_prio assign out.scalar = a.scalar; assign out.writeback = a.writeback; + assign out.mask = a.mask; + assign out.mask_update = a.mask_update; + + assign out.pc_add = a.pc_add; + assign out.pc_inc = a.pc_inc; + assign out.pc_update = a.pc_update; + endmodule module gfx_shader_writeback +import gfx::*; ( - input logic clk, - rst_n, + input logic clk, + rst_n, + + gfx_wb.rx wb, + + gfx_regfile_io.wb regs, - gfx_wb.rx wb, + output logic loop_valid, + output group_id loop_group, - gfx_regfile_io.wb regs + gfx_shader_setup.core setup ); - + struct + { + group_id group; + word lanes[SHADER_LANES]; + pc_offset pc_add; + lane_mask mask; + vgpr_num vgpr; + logic pc_update, + mask_update, + vgpr_update; + } loop_hold[REGFILE_STAGES], loop_out; + + logic loop_valid_hold[REGFILE_STAGES], loop_out_valid, mask_wb, scalar_wb, + setup_gpr, setup_mask, setup_submit; + + assign wb.ready = 1; + + assign loop_out = loop_hold[REGFILE_STAGES - 1]; + assign loop_out_valid = loop_valid_hold[REGFILE_STAGES - 1]; + + assign loop_valid = loop_out_valid | setup_submit; + + assign regs.pc_back_group = wb.group; + assign regs.mask_back_group = wb.group; + + assign regs.pc_wb_write = (loop_out_valid & loop_out.pc_update) | setup_submit; + assign regs.mask_wb_write = mask_wb | setup_mask; + assign regs.sgpr_write.write = scalar_wb | setup_gpr; + + assign regs.vgpr_write.vgpr = loop_out.vgpr; + assign regs.vgpr_write.group = loop_out.group; + + assign mask_wb = loop_out_valid & loop_out.mask_update; + assign scalar_wb = wb.valid & wb.writeback & wb.scalar; + + always_comb begin + loop_group = setup.write.group; + regs.pc_wb = setup.write.pc; + regs.pc_wb_group = setup.write.group; + + if (loop_out_valid) begin + loop_group = loop_out.group; + regs.pc_wb = regs.pc_back + word_ptr'(loop_out.pc_add); + regs.pc_wb_group = loop_out.group; + end + + regs.mask_wb = setup.write.mask; + regs.mask_wb_group = setup.write.group; + + if (mask_wb) begin + regs.mask_wb = loop_out.mask; + regs.mask_wb_group = loop_out.group; + end + + regs.sgpr_write.data = setup.write.gpr_value; + regs.sgpr_write.sgpr = setup.write.gpr.sgpr; + regs.sgpr_write.group = setup.write.group; + + if (scalar_wb) begin + regs.sgpr_write.data = wb.lanes[0]; + regs.sgpr_write.sgpr = wb.dest.sgpr; + regs.sgpr_write.group = wb.group; + end + + for (int i = 0; i < SHADER_LANES; ++i) + regs.vgpr_write.data[i] = loop_out.lanes[i]; + + regs.vgpr_write.mask = regs.mask_back; + if (~loop_out_valid | ~loop_out.vgpr_update) + regs.vgpr_write.mask = '0; + end + + always_ff @(posedge clk) begin + // Blocking assignments por bug de verilator (ver for de lanes abajo) + + for (int i = REGFILE_STAGES - 1; i > 0; --i) + loop_hold[i] = loop_hold[i - 1]; + + loop_hold[0].mask = wb.mask; + loop_hold[0].vgpr = wb.dest.vgpr.num; + loop_hold[0].group = wb.group; + loop_hold[0].pc_add = wb.pc_add; + loop_hold[0].pc_update = wb.pc_update; + loop_hold[0].mask_update = wb.mask_update; + loop_hold[0].vgpr_update = wb.writeback & ~wb.scalar; + + // https://github.com/verilator/verilator/issues/4804 + for (int i = 0; i < SHADER_LANES; ++i) + loop_hold[0].lanes[i] = wb.lanes[i]; + + if (wb.pc_inc) + loop_hold[0].pc_add = pc_offset'(1); + end + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) begin + setup_gpr <= 0; + setup_mask <= 0; + setup_submit <= 0; + + setup.set_done.gpr <= 0; + setup.set_done.mask <= 0; + setup.set_done.submit <= 0; + + for (int i = 0; i < $size(loop_valid_hold); ++i) + loop_valid_hold[i] <= 0; + end else begin + setup_gpr <= (setup_gpr & scalar_wb) | setup.write.gpr_set; + setup_mask <= (setup_mask & mask_wb) | setup.write.mask_set; + setup_submit <= (setup_submit & loop_out_valid) | setup.write.pc_set; + + setup.set_done.gpr <= setup_gpr & ~scalar_wb; + setup.set_done.mask <= setup_mask & ~mask_wb; + setup.set_done.submit <= setup_submit & ~loop_out_valid; + + loop_valid_hold[0] <= wb.valid; + for (int i = 1; i < REGFILE_STAGES; ++i) + loop_valid_hold[i] <= loop_valid_hold[i - 1]; + end endmodule |
