diff options
Diffstat (limited to 'rtl/gfx/gfx_shader_mem.sv')
| -rw-r--r-- | rtl/gfx/gfx_shader_mem.sv | 270 |
1 files changed, 269 insertions, 1 deletions
diff --git a/rtl/gfx/gfx_shader_mem.sv b/rtl/gfx/gfx_shader_mem.sv index 72ab0a4..64e4516 100644 --- a/rtl/gfx/gfx_shader_mem.sv +++ b/rtl/gfx/gfx_shader_mem.sv @@ -11,9 +11,277 @@ import gfx::*; if_shake.rx in_shake, + if_axib.m mem, + gfx_wb.tx wb ); - word foo; + if_beats #($bits(group_id)) aw_pending(), b_return(); + if_beats #($bits(group_id) + $bits(vgpr_num)) ar_pending(), r_return(); + + logic ar_load, aw_load, b_queued, r_done, r_writeback, + w_load, w_shift, w_start, w_strobe; + + group_id b_return_group, r_return_group; + vgpr_num r_return_vgpr; + logic[$bits(group_id):0] b_add, b_count; + + assign mem.wstrb = {($bits(mem.wstrb)){w_strobe}}; + assign mem.bready = 1; + assign mem.rready = ~r_writeback | r_done; + + assign wb.mask = 'x; + assign wb.group = r_writeback ? r_return_group : b_return_group; + assign wb.valid = r_writeback ? r_return.rx.valid : b_return.rx.valid & b_queued; + assign wb.pc_add = 'x; + assign wb.pc_inc = 1; + assign wb.scalar = 0; + assign wb.dest.vgpr = r_return_vgpr; + assign wb.pc_update = 1; + assign wb.writeback = r_writeback; + assign wb.mask_update = 0; + + assign w_load = ~mem.wvalid | (mem.wlast & mem.wready); + assign w_shift = mem.wvalid & mem.wready; + assign w_start = in_shake.valid & ~op.load & aw_load & w_load; + + assign r_done = wb.ready & r_writeback & r_return.rx.valid; + assign b_queued = |b_count; + assign b_return_group = b_return.rx.data; + assign {r_return_group, r_return_vgpr} = r_return.rx.data; + + assign in_shake.ready = op.load ? ar_load : aw_load & w_load; + assign b_return.rx.ready = wb.ready & ~r_writeback & b_queued; + assign r_return.rx.ready = wb.ready & r_writeback; + assign ar_pending.tx.data = {wave.group, wave.dest.vgpr}; + assign aw_pending.tx.data = wave.group; + assign ar_pending.tx.valid = in_shake.valid & op.load & ar_load; + assign aw_pending.tx.valid = w_start; + + gfx_shader_mem_addr_channel ar_channel + ( + .clk, + .rst_n, + + .load(ar_load), + .load_mask(read_data.mask_exec), + .load_lanes(read_data.a), + .load_valid(in_shake.valid & op.load), + + .axid(mem.arid), + .axlen(mem.arlen), + .axaddr(mem.araddr), + .axsize(mem.arsize), + .axburst(mem.arburst), + .axready(mem.arready), + .axvalid(mem.arvalid) + ); + + gfx_shader_mem_addr_channel aw_channel + ( + .clk, + .rst_n, + + .load(aw_load), + .load_mask(read_data.mask_exec), + .load_lanes(read_data.a), + .load_valid(in_shake.valid & ~op.load & w_load), + + .axid(mem.awid), + .axlen(mem.awlen), + .axaddr(mem.awaddr), + .axsize(mem.awsize), + .axburst(mem.awburst), + .axready(mem.awready), + .axvalid(mem.awvalid) + ); + + gfx_shader_mem_piso_shift w_stream + ( + .clk, + .load(w_load), + .shift(w_shift), + .in_mask(read_data.mask_exec), + .in_lanes(read_data.b), + .out_data(mem.wdata), + .out_last(mem.wlast), + .out_enable(w_strobe) + ); + + gfx_shader_mem_sipo_shift r_stream + ( + .clk, + .rst_n, + .shift(mem.rready & mem.rvalid), + .in_data(mem.rdata), + .in_done(r_done), + .in_last(mem.rlast), + .out_lanes(wb.lanes), + .out_valid(r_writeback) + ); + + + gfx_fifo #(.WIDTH($bits(group_id) + $bits(vgpr_num)), .DEPTH(1 << $bits(group_id))) ar_to_r + ( + .clk, + .rst_n, + .in(ar_pending.rx), + .out(r_return.tx) + ); + + gfx_fifo #(.WIDTH($bits(group_id)), .DEPTH(1 << $bits(group_id))) aw_to_b + ( + .clk, + .rst_n, + .in(aw_pending.rx), + .out(b_return.tx) + ); + + always_comb + unique case ({mem.bvalid, b_return.rx.ready & b_return.rx.valid}) + 2'b00, 2'b11: + b_add = '0; + + 2'b01: + b_add = '1; + + 2'b10: + b_add = {{($bits(b_add) - 1){1'b0}}, 1'b1}; + endcase + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) begin + b_count <= '0; + mem.wvalid <= 0; + end else begin + b_count <= b_count + b_add; + mem.wvalid <= w_start | (mem.wvalid & ~(mem.wlast & mem.wready)); + + assert (ar_pending.tx.ready); + assert (aw_pending.tx.ready); + end + +endmodule + +module gfx_shader_mem_addr_channel +import gfx::*; +( + input logic clk, + rst_n, + + input word load_lanes[SHADER_LANES], + input lane_mask load_mask, + input logic load_valid, + output logic load, + + input logic axready, + output logic axvalid, + output word axaddr, + output logic[7:0] axid, + axlen, + output logic[2:0] axsize, + output logic[1:0] axburst +); + + logic active, shift, strobe; + + assign axid = '0; + assign axlen = ($bits(axlen))'(SHADER_LANES - 1); + assign axsize = 3'b010; // 4 bytes/beat + assign axburst = 2'b01; // Incremental mode + assign axvalid = active & strobe; + + assign load = ~active | (strobe & axready); + assign shift = active & ~strobe; + + gfx_shader_mem_piso_shift ax_stream + ( + .clk, + .load, + .shift, + .in_mask(load_mask), + .in_lanes(load_lanes), + .out_data(axaddr), + .out_last(), + .out_enable(strobe) + ); + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) + active <= 0; + else + active <= ~load | load_valid; + +endmodule + +module gfx_shader_mem_piso_shift +import gfx::*; +( + input logic clk, + + input logic load, + shift, + + input word in_lanes[SHADER_LANES], + input lane_mask in_mask, + + output word out_data, + output logic out_last, + out_enable +); + + word data[SHADER_LANES]; + lane_no count; + lane_mask mask; + + assign out_data = data[0]; + assign out_last = &count; + assign out_enable = mask[0]; + + always_ff @(posedge clk) + if (load) begin + data <= in_lanes; + mask <= in_mask; + count <= '0; + end else if (shift) begin + for (int i = 0; i < SHADER_LANES - 1; ++i) + data[i] <= data[i + 1]; + + mask <= mask >> 1; + count <= count + 1; + end + +endmodule + +module gfx_shader_mem_sipo_shift +import gfx::*; +( + input logic clk, + rst_n, + + input logic shift, + in_done, + in_last, + input word in_data, + + output word out_lanes[SHADER_LANES], + output logic out_valid +); + + always_ff @(posedge clk) + if (shift) begin + for (int i = 0; i < SHADER_LANES - 1; ++i) + out_lanes[i] <= out_lanes[i + 1]; + + out_lanes[SHADER_LANES - 1] <= in_data; + end + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) + out_valid <= 0; + else if (in_done) + out_valid <= 0; + else if (shift) + out_valid <= in_last; endmodule |
