diff options
| author | Alejandro Soto <alejandro@34project.org> | 2024-05-19 19:55:20 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2024-05-24 05:58:41 -0600 |
| commit | 1246f6ec28f37c8bf85dd6262928c92899a5a539 (patch) | |
| tree | 06047f1496fe61f5080eeb7ab5c3997d60271194 /rtl | |
| parent | 40bd702015f3a09f5c4d1ad30439b1ea186d7484 (diff) | |
rtl/gfx: implement memory unit
Diffstat (limited to 'rtl')
| -rw-r--r-- | rtl/gfx/gfx_isa.sv | 10 | ||||
| -rw-r--r-- | rtl/gfx/gfx_pkg.sv | 2 | ||||
| -rw-r--r-- | rtl/gfx/gfx_regfile_io.sv | 17 | ||||
| -rw-r--r-- | rtl/gfx/gfx_shader.sv | 4 | ||||
| -rw-r--r-- | rtl/gfx/gfx_shader_back.sv | 5 | ||||
| -rw-r--r-- | rtl/gfx/gfx_shader_front.sv | 43 | ||||
| -rw-r--r-- | rtl/gfx/gfx_shader_mem.sv | 270 | ||||
| -rw-r--r-- | rtl/gfx/gfx_shader_regs.sv | 4 | ||||
| -rw-r--r-- | rtl/gfx/gfx_top.sv | 3 |
9 files changed, 335 insertions, 23 deletions
diff --git a/rtl/gfx/gfx_isa.sv b/rtl/gfx/gfx_isa.sv index cc34156..f3ca66a 100644 --- a/rtl/gfx/gfx_isa.sv +++ b/rtl/gfx/gfx_isa.sv @@ -80,6 +80,16 @@ package gfx_isa; typedef struct packed { + xgpr_mode reg_mode; + dst_src_rr dst_src; + logic reg_rev; + logic[3:0] reserved; + logic load; + insn_class op_class; + } insn_mem; + + typedef struct packed + { xgpr_mode reg_mode; dst_src_rr dst_src; logic reg_rev; diff --git a/rtl/gfx/gfx_pkg.sv b/rtl/gfx/gfx_pkg.sv index 7072967..d20b678 100644 --- a/rtl/gfx/gfx_pkg.sv +++ b/rtl/gfx/gfx_pkg.sv @@ -103,7 +103,7 @@ package gfx; typedef struct packed { - logic todo; + logic load; } mem_op; typedef struct packed diff --git a/rtl/gfx/gfx_regfile_io.sv b/rtl/gfx/gfx_regfile_io.sv index 2459049..a3f0622 100644 --- a/rtl/gfx/gfx_regfile_io.sv +++ b/rtl/gfx/gfx_regfile_io.sv @@ -33,21 +33,26 @@ interface gfx_regfile_io; word data[SHADER_LANES]; } vgpr_write; + group_id mask_back_group, mask_exec_group, mask_wb_group, + pc_back_group, pc_front_group, pc_wb_group; + word a[SHADER_LANES], b[SHADER_LANES], sgpr_write_data, vgpr_write_data[SHADER_LANES]; logic mask_wb_write, pc_wb_write; word_ptr pc_back, pc_front, pc_wb; - group_id mask_back_group, mask_wb_group, pc_back_group, pc_front_group, pc_wb_group; - lane_mask mask_back, mask_wb; + lane_mask mask_back, mask_exec, mask_wb; modport ab ( input a, - b + b, + + mask_exec ); modport read ( - output op + output op, + mask_exec_group ); modport bind_ @@ -86,6 +91,7 @@ interface gfx_regfile_io; pc_back_group, pc_front_group, mask_back_group, + mask_exec_group, pc_wb, pc_wb_group, @@ -100,7 +106,8 @@ interface gfx_regfile_io; pc_back, pc_front, - mask_back + mask_back, + mask_exec ); endinterface diff --git a/rtl/gfx/gfx_shader.sv b/rtl/gfx/gfx_shader.sv index 8ff6edc..2c3d651 100644 --- a/rtl/gfx/gfx_shader.sv +++ b/rtl/gfx/gfx_shader.sv @@ -4,7 +4,8 @@ import gfx::*, gfx_shader_schedif_pkg::*; input logic clk, rst_n, - if_axib.m insn_mem, + if_axib.m data_mem, + insn_mem, if_axil.s sched ); @@ -55,6 +56,7 @@ import gfx::*, gfx_shader_schedif_pkg::*; .back(front_back.back), .setup(setup.core), .reg_wb(regfile.wb), + .data_mem, .read_data(regfile.ab) ); diff --git a/rtl/gfx/gfx_shader_back.sv b/rtl/gfx/gfx_shader_back.sv index f7c2349..97a2726 100644 --- a/rtl/gfx/gfx_shader_back.sv +++ b/rtl/gfx/gfx_shader_back.sv @@ -9,7 +9,9 @@ import gfx::*; gfx_regfile_io.ab read_data, gfx_regfile_io.wb reg_wb, - gfx_shader_setup.core setup + gfx_shader_setup.core setup, + + if_axib.m data_mem ); logic abort; @@ -44,6 +46,7 @@ import gfx::*; .rst_n, .op(back.execute.p1), .wb(p1_wb.tx), + .mem(data_mem), .wave(back.execute.wave), .in_shake(p1_shake.rx), .read_data diff --git a/rtl/gfx/gfx_shader_front.sv b/rtl/gfx/gfx_shader_front.sv index acdde78..543b534 100644 --- a/rtl/gfx/gfx_shader_front.sv +++ b/rtl/gfx/gfx_shader_front.sv @@ -93,6 +93,13 @@ import gfx::*; .writeback(p0_writeback) ); + gfx_shader_decode_mem p1_dec + ( + .clk, + .op(front.execute.p1), + .insn(port_dec_wave.insn) + ); + endmodule module gfx_shader_bind @@ -446,8 +453,8 @@ import gfx::*, gfx_isa::*; output front_reg_passthru passthru ); - // + 1 por next-cycle de read.op - localparam int PASSTHRU_DEPTH = REG_READ_STAGES + 1 - 2; + // + 1 por next-cycle de read.op, - 2 por resto de decode + localparam int PASSTHRU_DEPTH = REG_READ_STAGES + 1; localparam int HOLD_DEPTH = PASSTHRU_DEPTH - 2; logic reg_rev; @@ -456,6 +463,7 @@ import gfx::*, gfx_isa::*; front_reg_passthru passthru_hold[PASSTHRU_DEPTH]; assign passthru = passthru_hold[$size(passthru_hold) - 1]; + assign read.mask_exec_group = out_hold[PASSTHRU_DEPTH - REGFILE_STAGES - 1].group; assign reg_rev = in.insn.reg_rev; @@ -599,20 +607,16 @@ endmodule module gfx_shader_decode_fpint import gfx::*, gfx_isa::*; ( - input logic clk, + input logic clk, - input insn_any insn, - input logic writeback, + input insn_fpint insn, + input logic writeback, - output fpint_op op + output fpint_op op ); - insn_fpint as_fpint; - - assign as_fpint = insn; - always_ff @(posedge clk) begin - unique case (as_fpint.op) + unique case (insn.op) INSN_FPINT_MOV: begin op.setup_mul_float <= 0; op.setup_unit_b <= 1; @@ -710,7 +714,7 @@ import gfx::*, gfx_isa::*; op.mnorm_zero_flags <= 0; op.mnorm_zero_b <= 0; op.minmax_abs <= 0; - op.minmax_swap <= as_fpint.op == INSN_FPINT_FMIN; + op.minmax_swap <= insn.op == INSN_FPINT_FMIN; op.minmax_zero_min <= 1; op.minmax_copy_flags <= 1; op.shiftr_int_signed <= 0; @@ -754,3 +758,18 @@ import gfx::*, gfx_isa::*; end endmodule + +module gfx_shader_decode_mem +import gfx::*, gfx_isa::*; +( + input logic clk, + + input insn_mem insn, + + output mem_op op +); + + always_ff @(posedge clk) + op.load <= insn.load; + +endmodule diff --git a/rtl/gfx/gfx_shader_mem.sv b/rtl/gfx/gfx_shader_mem.sv index 72ab0a4..64e4516 100644 --- a/rtl/gfx/gfx_shader_mem.sv +++ b/rtl/gfx/gfx_shader_mem.sv @@ -11,9 +11,277 @@ import gfx::*; if_shake.rx in_shake, + if_axib.m mem, + gfx_wb.tx wb ); - word foo; + if_beats #($bits(group_id)) aw_pending(), b_return(); + if_beats #($bits(group_id) + $bits(vgpr_num)) ar_pending(), r_return(); + + logic ar_load, aw_load, b_queued, r_done, r_writeback, + w_load, w_shift, w_start, w_strobe; + + group_id b_return_group, r_return_group; + vgpr_num r_return_vgpr; + logic[$bits(group_id):0] b_add, b_count; + + assign mem.wstrb = {($bits(mem.wstrb)){w_strobe}}; + assign mem.bready = 1; + assign mem.rready = ~r_writeback | r_done; + + assign wb.mask = 'x; + assign wb.group = r_writeback ? r_return_group : b_return_group; + assign wb.valid = r_writeback ? r_return.rx.valid : b_return.rx.valid & b_queued; + assign wb.pc_add = 'x; + assign wb.pc_inc = 1; + assign wb.scalar = 0; + assign wb.dest.vgpr = r_return_vgpr; + assign wb.pc_update = 1; + assign wb.writeback = r_writeback; + assign wb.mask_update = 0; + + assign w_load = ~mem.wvalid | (mem.wlast & mem.wready); + assign w_shift = mem.wvalid & mem.wready; + assign w_start = in_shake.valid & ~op.load & aw_load & w_load; + + assign r_done = wb.ready & r_writeback & r_return.rx.valid; + assign b_queued = |b_count; + assign b_return_group = b_return.rx.data; + assign {r_return_group, r_return_vgpr} = r_return.rx.data; + + assign in_shake.ready = op.load ? ar_load : aw_load & w_load; + assign b_return.rx.ready = wb.ready & ~r_writeback & b_queued; + assign r_return.rx.ready = wb.ready & r_writeback; + assign ar_pending.tx.data = {wave.group, wave.dest.vgpr}; + assign aw_pending.tx.data = wave.group; + assign ar_pending.tx.valid = in_shake.valid & op.load & ar_load; + assign aw_pending.tx.valid = w_start; + + gfx_shader_mem_addr_channel ar_channel + ( + .clk, + .rst_n, + + .load(ar_load), + .load_mask(read_data.mask_exec), + .load_lanes(read_data.a), + .load_valid(in_shake.valid & op.load), + + .axid(mem.arid), + .axlen(mem.arlen), + .axaddr(mem.araddr), + .axsize(mem.arsize), + .axburst(mem.arburst), + .axready(mem.arready), + .axvalid(mem.arvalid) + ); + + gfx_shader_mem_addr_channel aw_channel + ( + .clk, + .rst_n, + + .load(aw_load), + .load_mask(read_data.mask_exec), + .load_lanes(read_data.a), + .load_valid(in_shake.valid & ~op.load & w_load), + + .axid(mem.awid), + .axlen(mem.awlen), + .axaddr(mem.awaddr), + .axsize(mem.awsize), + .axburst(mem.awburst), + .axready(mem.awready), + .axvalid(mem.awvalid) + ); + + gfx_shader_mem_piso_shift w_stream + ( + .clk, + .load(w_load), + .shift(w_shift), + .in_mask(read_data.mask_exec), + .in_lanes(read_data.b), + .out_data(mem.wdata), + .out_last(mem.wlast), + .out_enable(w_strobe) + ); + + gfx_shader_mem_sipo_shift r_stream + ( + .clk, + .rst_n, + .shift(mem.rready & mem.rvalid), + .in_data(mem.rdata), + .in_done(r_done), + .in_last(mem.rlast), + .out_lanes(wb.lanes), + .out_valid(r_writeback) + ); + + + gfx_fifo #(.WIDTH($bits(group_id) + $bits(vgpr_num)), .DEPTH(1 << $bits(group_id))) ar_to_r + ( + .clk, + .rst_n, + .in(ar_pending.rx), + .out(r_return.tx) + ); + + gfx_fifo #(.WIDTH($bits(group_id)), .DEPTH(1 << $bits(group_id))) aw_to_b + ( + .clk, + .rst_n, + .in(aw_pending.rx), + .out(b_return.tx) + ); + + always_comb + unique case ({mem.bvalid, b_return.rx.ready & b_return.rx.valid}) + 2'b00, 2'b11: + b_add = '0; + + 2'b01: + b_add = '1; + + 2'b10: + b_add = {{($bits(b_add) - 1){1'b0}}, 1'b1}; + endcase + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) begin + b_count <= '0; + mem.wvalid <= 0; + end else begin + b_count <= b_count + b_add; + mem.wvalid <= w_start | (mem.wvalid & ~(mem.wlast & mem.wready)); + + assert (ar_pending.tx.ready); + assert (aw_pending.tx.ready); + end + +endmodule + +module gfx_shader_mem_addr_channel +import gfx::*; +( + input logic clk, + rst_n, + + input word load_lanes[SHADER_LANES], + input lane_mask load_mask, + input logic load_valid, + output logic load, + + input logic axready, + output logic axvalid, + output word axaddr, + output logic[7:0] axid, + axlen, + output logic[2:0] axsize, + output logic[1:0] axburst +); + + logic active, shift, strobe; + + assign axid = '0; + assign axlen = ($bits(axlen))'(SHADER_LANES - 1); + assign axsize = 3'b010; // 4 bytes/beat + assign axburst = 2'b01; // Incremental mode + assign axvalid = active & strobe; + + assign load = ~active | (strobe & axready); + assign shift = active & ~strobe; + + gfx_shader_mem_piso_shift ax_stream + ( + .clk, + .load, + .shift, + .in_mask(load_mask), + .in_lanes(load_lanes), + .out_data(axaddr), + .out_last(), + .out_enable(strobe) + ); + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) + active <= 0; + else + active <= ~load | load_valid; + +endmodule + +module gfx_shader_mem_piso_shift +import gfx::*; +( + input logic clk, + + input logic load, + shift, + + input word in_lanes[SHADER_LANES], + input lane_mask in_mask, + + output word out_data, + output logic out_last, + out_enable +); + + word data[SHADER_LANES]; + lane_no count; + lane_mask mask; + + assign out_data = data[0]; + assign out_last = &count; + assign out_enable = mask[0]; + + always_ff @(posedge clk) + if (load) begin + data <= in_lanes; + mask <= in_mask; + count <= '0; + end else if (shift) begin + for (int i = 0; i < SHADER_LANES - 1; ++i) + data[i] <= data[i + 1]; + + mask <= mask >> 1; + count <= count + 1; + end + +endmodule + +module gfx_shader_mem_sipo_shift +import gfx::*; +( + input logic clk, + rst_n, + + input logic shift, + in_done, + in_last, + input word in_data, + + output word out_lanes[SHADER_LANES], + output logic out_valid +); + + always_ff @(posedge clk) + if (shift) begin + for (int i = 0; i < SHADER_LANES - 1; ++i) + out_lanes[i] <= out_lanes[i + 1]; + + out_lanes[SHADER_LANES - 1] <= in_data; + end + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) + out_valid <= 0; + else if (in_done) + out_valid <= 0; + else if (shift) + out_valid <= in_last; endmodule diff --git a/rtl/gfx/gfx_shader_regs.sv b/rtl/gfx/gfx_shader_regs.sv index 2b3451a..de52fe2 100644 --- a/rtl/gfx/gfx_shader_regs.sv +++ b/rtl/gfx/gfx_shader_regs.sv @@ -9,7 +9,7 @@ import gfx::*; // verilator tracing_off localparam PC_TABLE_PORTS = 2; - localparam MASK_TABLE_PORTS = 1; + localparam MASK_TABLE_PORTS = 2; word hold_imm[REGFILE_STAGES], imm_out, read_a_data_sgpr, read_b_data_scalar, read_b_data_sgpr, read_const, read_a_data_vgpr[SHADER_LANES], @@ -33,7 +33,9 @@ import gfx::*; assign pc_read_groups[1] = io.pc_front_group; assign io.mask_back = mask_read[0]; + assign io.mask_exec = mask_read[1]; assign mask_read_groups[0] = io.mask_back_group; + assign mask_read_groups[1] = io.mask_exec_group; assign imm_out = hold_imm[$size(hold_imm) - 1]; assign a_scalar_out = hold_a_scalar[$bits(hold_a_scalar) - 1]; diff --git a/rtl/gfx/gfx_top.sv b/rtl/gfx/gfx_top.sv index 93e3ad9..8b2506b 100644 --- a/rtl/gfx/gfx_top.sv +++ b/rtl/gfx/gfx_top.sv @@ -8,7 +8,7 @@ import gfx::*; if_axil.s host_ctrl ); - if_axib insn_mem(); + if_axib data_mem(), insn_mem(); if_axil bootrom_axi(), debug_axi(), host_ctrl_axi(), sched_axi(), shader_0_axi(); logic irq_host_ctrl; @@ -51,6 +51,7 @@ import gfx::*; .clk, .rst_n, .sched(shader_0_axi.s), + .data_mem(data_mem.m), .insn_mem(insn_mem.m) ); |
