summaryrefslogtreecommitdiff
path: root/rtl/gfx/gfx_shader_mem.sv
diff options
context:
space:
mode:
Diffstat (limited to 'rtl/gfx/gfx_shader_mem.sv')
-rw-r--r--rtl/gfx/gfx_shader_mem.sv270
1 files changed, 269 insertions, 1 deletions
diff --git a/rtl/gfx/gfx_shader_mem.sv b/rtl/gfx/gfx_shader_mem.sv
index 72ab0a4..64e4516 100644
--- a/rtl/gfx/gfx_shader_mem.sv
+++ b/rtl/gfx/gfx_shader_mem.sv
@@ -11,9 +11,277 @@ import gfx::*;
if_shake.rx in_shake,
+ if_axib.m mem,
+
gfx_wb.tx wb
);
- word foo;
+ if_beats #($bits(group_id)) aw_pending(), b_return();
+ if_beats #($bits(group_id) + $bits(vgpr_num)) ar_pending(), r_return();
+
+ logic ar_load, aw_load, b_queued, r_done, r_writeback,
+ w_load, w_shift, w_start, w_strobe;
+
+ group_id b_return_group, r_return_group;
+ vgpr_num r_return_vgpr;
+ logic[$bits(group_id):0] b_add, b_count;
+
+ assign mem.wstrb = {($bits(mem.wstrb)){w_strobe}};
+ assign mem.bready = 1;
+ assign mem.rready = ~r_writeback | r_done;
+
+ assign wb.mask = 'x;
+ assign wb.group = r_writeback ? r_return_group : b_return_group;
+ assign wb.valid = r_writeback ? r_return.rx.valid : b_return.rx.valid & b_queued;
+ assign wb.pc_add = 'x;
+ assign wb.pc_inc = 1;
+ assign wb.scalar = 0;
+ assign wb.dest.vgpr = r_return_vgpr;
+ assign wb.pc_update = 1;
+ assign wb.writeback = r_writeback;
+ assign wb.mask_update = 0;
+
+ assign w_load = ~mem.wvalid | (mem.wlast & mem.wready);
+ assign w_shift = mem.wvalid & mem.wready;
+ assign w_start = in_shake.valid & ~op.load & aw_load & w_load;
+
+ assign r_done = wb.ready & r_writeback & r_return.rx.valid;
+ assign b_queued = |b_count;
+ assign b_return_group = b_return.rx.data;
+ assign {r_return_group, r_return_vgpr} = r_return.rx.data;
+
+ assign in_shake.ready = op.load ? ar_load : aw_load & w_load;
+ assign b_return.rx.ready = wb.ready & ~r_writeback & b_queued;
+ assign r_return.rx.ready = wb.ready & r_writeback;
+ assign ar_pending.tx.data = {wave.group, wave.dest.vgpr};
+ assign aw_pending.tx.data = wave.group;
+ assign ar_pending.tx.valid = in_shake.valid & op.load & ar_load;
+ assign aw_pending.tx.valid = w_start;
+
+ gfx_shader_mem_addr_channel ar_channel
+ (
+ .clk,
+ .rst_n,
+
+ .load(ar_load),
+ .load_mask(read_data.mask_exec),
+ .load_lanes(read_data.a),
+ .load_valid(in_shake.valid & op.load),
+
+ .axid(mem.arid),
+ .axlen(mem.arlen),
+ .axaddr(mem.araddr),
+ .axsize(mem.arsize),
+ .axburst(mem.arburst),
+ .axready(mem.arready),
+ .axvalid(mem.arvalid)
+ );
+
+ gfx_shader_mem_addr_channel aw_channel
+ (
+ .clk,
+ .rst_n,
+
+ .load(aw_load),
+ .load_mask(read_data.mask_exec),
+ .load_lanes(read_data.a),
+ .load_valid(in_shake.valid & ~op.load & w_load),
+
+ .axid(mem.awid),
+ .axlen(mem.awlen),
+ .axaddr(mem.awaddr),
+ .axsize(mem.awsize),
+ .axburst(mem.awburst),
+ .axready(mem.awready),
+ .axvalid(mem.awvalid)
+ );
+
+ gfx_shader_mem_piso_shift w_stream
+ (
+ .clk,
+ .load(w_load),
+ .shift(w_shift),
+ .in_mask(read_data.mask_exec),
+ .in_lanes(read_data.b),
+ .out_data(mem.wdata),
+ .out_last(mem.wlast),
+ .out_enable(w_strobe)
+ );
+
+ gfx_shader_mem_sipo_shift r_stream
+ (
+ .clk,
+ .rst_n,
+ .shift(mem.rready & mem.rvalid),
+ .in_data(mem.rdata),
+ .in_done(r_done),
+ .in_last(mem.rlast),
+ .out_lanes(wb.lanes),
+ .out_valid(r_writeback)
+ );
+
+
+ gfx_fifo #(.WIDTH($bits(group_id) + $bits(vgpr_num)), .DEPTH(1 << $bits(group_id))) ar_to_r
+ (
+ .clk,
+ .rst_n,
+ .in(ar_pending.rx),
+ .out(r_return.tx)
+ );
+
+ gfx_fifo #(.WIDTH($bits(group_id)), .DEPTH(1 << $bits(group_id))) aw_to_b
+ (
+ .clk,
+ .rst_n,
+ .in(aw_pending.rx),
+ .out(b_return.tx)
+ );
+
+ always_comb
+ unique case ({mem.bvalid, b_return.rx.ready & b_return.rx.valid})
+ 2'b00, 2'b11:
+ b_add = '0;
+
+ 2'b01:
+ b_add = '1;
+
+ 2'b10:
+ b_add = {{($bits(b_add) - 1){1'b0}}, 1'b1};
+ endcase
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n) begin
+ b_count <= '0;
+ mem.wvalid <= 0;
+ end else begin
+ b_count <= b_count + b_add;
+ mem.wvalid <= w_start | (mem.wvalid & ~(mem.wlast & mem.wready));
+
+ assert (ar_pending.tx.ready);
+ assert (aw_pending.tx.ready);
+ end
+
+endmodule
+
+module gfx_shader_mem_addr_channel
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ input word load_lanes[SHADER_LANES],
+ input lane_mask load_mask,
+ input logic load_valid,
+ output logic load,
+
+ input logic axready,
+ output logic axvalid,
+ output word axaddr,
+ output logic[7:0] axid,
+ axlen,
+ output logic[2:0] axsize,
+ output logic[1:0] axburst
+);
+
+ logic active, shift, strobe;
+
+ assign axid = '0;
+ assign axlen = ($bits(axlen))'(SHADER_LANES - 1);
+ assign axsize = 3'b010; // 4 bytes/beat
+ assign axburst = 2'b01; // Incremental mode
+ assign axvalid = active & strobe;
+
+ assign load = ~active | (strobe & axready);
+ assign shift = active & ~strobe;
+
+ gfx_shader_mem_piso_shift ax_stream
+ (
+ .clk,
+ .load,
+ .shift,
+ .in_mask(load_mask),
+ .in_lanes(load_lanes),
+ .out_data(axaddr),
+ .out_last(),
+ .out_enable(strobe)
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n)
+ active <= 0;
+ else
+ active <= ~load | load_valid;
+
+endmodule
+
+module gfx_shader_mem_piso_shift
+import gfx::*;
+(
+ input logic clk,
+
+ input logic load,
+ shift,
+
+ input word in_lanes[SHADER_LANES],
+ input lane_mask in_mask,
+
+ output word out_data,
+ output logic out_last,
+ out_enable
+);
+
+ word data[SHADER_LANES];
+ lane_no count;
+ lane_mask mask;
+
+ assign out_data = data[0];
+ assign out_last = &count;
+ assign out_enable = mask[0];
+
+ always_ff @(posedge clk)
+ if (load) begin
+ data <= in_lanes;
+ mask <= in_mask;
+ count <= '0;
+ end else if (shift) begin
+ for (int i = 0; i < SHADER_LANES - 1; ++i)
+ data[i] <= data[i + 1];
+
+ mask <= mask >> 1;
+ count <= count + 1;
+ end
+
+endmodule
+
+module gfx_shader_mem_sipo_shift
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ input logic shift,
+ in_done,
+ in_last,
+ input word in_data,
+
+ output word out_lanes[SHADER_LANES],
+ output logic out_valid
+);
+
+ always_ff @(posedge clk)
+ if (shift) begin
+ for (int i = 0; i < SHADER_LANES - 1; ++i)
+ out_lanes[i] <= out_lanes[i + 1];
+
+ out_lanes[SHADER_LANES - 1] <= in_data;
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n)
+ out_valid <= 0;
+ else if (in_done)
+ out_valid <= 0;
+ else if (shift)
+ out_valid <= in_last;
endmodule