summaryrefslogtreecommitdiff
path: root/rtl/gfx
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2024-05-19 19:55:20 -0600
committerAlejandro Soto <alejandro@34project.org>2024-05-24 05:58:41 -0600
commit1246f6ec28f37c8bf85dd6262928c92899a5a539 (patch)
tree06047f1496fe61f5080eeb7ab5c3997d60271194 /rtl/gfx
parent40bd702015f3a09f5c4d1ad30439b1ea186d7484 (diff)
rtl/gfx: implement memory unit
Diffstat (limited to 'rtl/gfx')
-rw-r--r--rtl/gfx/gfx_isa.sv10
-rw-r--r--rtl/gfx/gfx_pkg.sv2
-rw-r--r--rtl/gfx/gfx_regfile_io.sv17
-rw-r--r--rtl/gfx/gfx_shader.sv4
-rw-r--r--rtl/gfx/gfx_shader_back.sv5
-rw-r--r--rtl/gfx/gfx_shader_front.sv43
-rw-r--r--rtl/gfx/gfx_shader_mem.sv270
-rw-r--r--rtl/gfx/gfx_shader_regs.sv4
-rw-r--r--rtl/gfx/gfx_top.sv3
9 files changed, 335 insertions, 23 deletions
diff --git a/rtl/gfx/gfx_isa.sv b/rtl/gfx/gfx_isa.sv
index cc34156..f3ca66a 100644
--- a/rtl/gfx/gfx_isa.sv
+++ b/rtl/gfx/gfx_isa.sv
@@ -80,6 +80,16 @@ package gfx_isa;
typedef struct packed
{
+ xgpr_mode reg_mode;
+ dst_src_rr dst_src;
+ logic reg_rev;
+ logic[3:0] reserved;
+ logic load;
+ insn_class op_class;
+ } insn_mem;
+
+ typedef struct packed
+ {
xgpr_mode reg_mode;
dst_src_rr dst_src;
logic reg_rev;
diff --git a/rtl/gfx/gfx_pkg.sv b/rtl/gfx/gfx_pkg.sv
index 7072967..d20b678 100644
--- a/rtl/gfx/gfx_pkg.sv
+++ b/rtl/gfx/gfx_pkg.sv
@@ -103,7 +103,7 @@ package gfx;
typedef struct packed
{
- logic todo;
+ logic load;
} mem_op;
typedef struct packed
diff --git a/rtl/gfx/gfx_regfile_io.sv b/rtl/gfx/gfx_regfile_io.sv
index 2459049..a3f0622 100644
--- a/rtl/gfx/gfx_regfile_io.sv
+++ b/rtl/gfx/gfx_regfile_io.sv
@@ -33,21 +33,26 @@ interface gfx_regfile_io;
word data[SHADER_LANES];
} vgpr_write;
+ group_id mask_back_group, mask_exec_group, mask_wb_group,
+ pc_back_group, pc_front_group, pc_wb_group;
+
word a[SHADER_LANES], b[SHADER_LANES], sgpr_write_data, vgpr_write_data[SHADER_LANES];
logic mask_wb_write, pc_wb_write;
word_ptr pc_back, pc_front, pc_wb;
- group_id mask_back_group, mask_wb_group, pc_back_group, pc_front_group, pc_wb_group;
- lane_mask mask_back, mask_wb;
+ lane_mask mask_back, mask_exec, mask_wb;
modport ab
(
input a,
- b
+ b,
+
+ mask_exec
);
modport read
(
- output op
+ output op,
+ mask_exec_group
);
modport bind_
@@ -86,6 +91,7 @@ interface gfx_regfile_io;
pc_back_group,
pc_front_group,
mask_back_group,
+ mask_exec_group,
pc_wb,
pc_wb_group,
@@ -100,7 +106,8 @@ interface gfx_regfile_io;
pc_back,
pc_front,
- mask_back
+ mask_back,
+ mask_exec
);
endinterface
diff --git a/rtl/gfx/gfx_shader.sv b/rtl/gfx/gfx_shader.sv
index 8ff6edc..2c3d651 100644
--- a/rtl/gfx/gfx_shader.sv
+++ b/rtl/gfx/gfx_shader.sv
@@ -4,7 +4,8 @@ import gfx::*, gfx_shader_schedif_pkg::*;
input logic clk,
rst_n,
- if_axib.m insn_mem,
+ if_axib.m data_mem,
+ insn_mem,
if_axil.s sched
);
@@ -55,6 +56,7 @@ import gfx::*, gfx_shader_schedif_pkg::*;
.back(front_back.back),
.setup(setup.core),
.reg_wb(regfile.wb),
+ .data_mem,
.read_data(regfile.ab)
);
diff --git a/rtl/gfx/gfx_shader_back.sv b/rtl/gfx/gfx_shader_back.sv
index f7c2349..97a2726 100644
--- a/rtl/gfx/gfx_shader_back.sv
+++ b/rtl/gfx/gfx_shader_back.sv
@@ -9,7 +9,9 @@ import gfx::*;
gfx_regfile_io.ab read_data,
gfx_regfile_io.wb reg_wb,
- gfx_shader_setup.core setup
+ gfx_shader_setup.core setup,
+
+ if_axib.m data_mem
);
logic abort;
@@ -44,6 +46,7 @@ import gfx::*;
.rst_n,
.op(back.execute.p1),
.wb(p1_wb.tx),
+ .mem(data_mem),
.wave(back.execute.wave),
.in_shake(p1_shake.rx),
.read_data
diff --git a/rtl/gfx/gfx_shader_front.sv b/rtl/gfx/gfx_shader_front.sv
index acdde78..543b534 100644
--- a/rtl/gfx/gfx_shader_front.sv
+++ b/rtl/gfx/gfx_shader_front.sv
@@ -93,6 +93,13 @@ import gfx::*;
.writeback(p0_writeback)
);
+ gfx_shader_decode_mem p1_dec
+ (
+ .clk,
+ .op(front.execute.p1),
+ .insn(port_dec_wave.insn)
+ );
+
endmodule
module gfx_shader_bind
@@ -446,8 +453,8 @@ import gfx::*, gfx_isa::*;
output front_reg_passthru passthru
);
- // + 1 por next-cycle de read.op
- localparam int PASSTHRU_DEPTH = REG_READ_STAGES + 1 - 2;
+ // + 1 por next-cycle de read.op, - 2 por resto de decode
+ localparam int PASSTHRU_DEPTH = REG_READ_STAGES + 1;
localparam int HOLD_DEPTH = PASSTHRU_DEPTH - 2;
logic reg_rev;
@@ -456,6 +463,7 @@ import gfx::*, gfx_isa::*;
front_reg_passthru passthru_hold[PASSTHRU_DEPTH];
assign passthru = passthru_hold[$size(passthru_hold) - 1];
+ assign read.mask_exec_group = out_hold[PASSTHRU_DEPTH - REGFILE_STAGES - 1].group;
assign reg_rev = in.insn.reg_rev;
@@ -599,20 +607,16 @@ endmodule
module gfx_shader_decode_fpint
import gfx::*, gfx_isa::*;
(
- input logic clk,
+ input logic clk,
- input insn_any insn,
- input logic writeback,
+ input insn_fpint insn,
+ input logic writeback,
- output fpint_op op
+ output fpint_op op
);
- insn_fpint as_fpint;
-
- assign as_fpint = insn;
-
always_ff @(posedge clk) begin
- unique case (as_fpint.op)
+ unique case (insn.op)
INSN_FPINT_MOV: begin
op.setup_mul_float <= 0;
op.setup_unit_b <= 1;
@@ -710,7 +714,7 @@ import gfx::*, gfx_isa::*;
op.mnorm_zero_flags <= 0;
op.mnorm_zero_b <= 0;
op.minmax_abs <= 0;
- op.minmax_swap <= as_fpint.op == INSN_FPINT_FMIN;
+ op.minmax_swap <= insn.op == INSN_FPINT_FMIN;
op.minmax_zero_min <= 1;
op.minmax_copy_flags <= 1;
op.shiftr_int_signed <= 0;
@@ -754,3 +758,18 @@ import gfx::*, gfx_isa::*;
end
endmodule
+
+module gfx_shader_decode_mem
+import gfx::*, gfx_isa::*;
+(
+ input logic clk,
+
+ input insn_mem insn,
+
+ output mem_op op
+);
+
+ always_ff @(posedge clk)
+ op.load <= insn.load;
+
+endmodule
diff --git a/rtl/gfx/gfx_shader_mem.sv b/rtl/gfx/gfx_shader_mem.sv
index 72ab0a4..64e4516 100644
--- a/rtl/gfx/gfx_shader_mem.sv
+++ b/rtl/gfx/gfx_shader_mem.sv
@@ -11,9 +11,277 @@ import gfx::*;
if_shake.rx in_shake,
+ if_axib.m mem,
+
gfx_wb.tx wb
);
- word foo;
+ if_beats #($bits(group_id)) aw_pending(), b_return();
+ if_beats #($bits(group_id) + $bits(vgpr_num)) ar_pending(), r_return();
+
+ logic ar_load, aw_load, b_queued, r_done, r_writeback,
+ w_load, w_shift, w_start, w_strobe;
+
+ group_id b_return_group, r_return_group;
+ vgpr_num r_return_vgpr;
+ logic[$bits(group_id):0] b_add, b_count;
+
+ assign mem.wstrb = {($bits(mem.wstrb)){w_strobe}};
+ assign mem.bready = 1;
+ assign mem.rready = ~r_writeback | r_done;
+
+ assign wb.mask = 'x;
+ assign wb.group = r_writeback ? r_return_group : b_return_group;
+ assign wb.valid = r_writeback ? r_return.rx.valid : b_return.rx.valid & b_queued;
+ assign wb.pc_add = 'x;
+ assign wb.pc_inc = 1;
+ assign wb.scalar = 0;
+ assign wb.dest.vgpr = r_return_vgpr;
+ assign wb.pc_update = 1;
+ assign wb.writeback = r_writeback;
+ assign wb.mask_update = 0;
+
+ assign w_load = ~mem.wvalid | (mem.wlast & mem.wready);
+ assign w_shift = mem.wvalid & mem.wready;
+ assign w_start = in_shake.valid & ~op.load & aw_load & w_load;
+
+ assign r_done = wb.ready & r_writeback & r_return.rx.valid;
+ assign b_queued = |b_count;
+ assign b_return_group = b_return.rx.data;
+ assign {r_return_group, r_return_vgpr} = r_return.rx.data;
+
+ assign in_shake.ready = op.load ? ar_load : aw_load & w_load;
+ assign b_return.rx.ready = wb.ready & ~r_writeback & b_queued;
+ assign r_return.rx.ready = wb.ready & r_writeback;
+ assign ar_pending.tx.data = {wave.group, wave.dest.vgpr};
+ assign aw_pending.tx.data = wave.group;
+ assign ar_pending.tx.valid = in_shake.valid & op.load & ar_load;
+ assign aw_pending.tx.valid = w_start;
+
+ gfx_shader_mem_addr_channel ar_channel
+ (
+ .clk,
+ .rst_n,
+
+ .load(ar_load),
+ .load_mask(read_data.mask_exec),
+ .load_lanes(read_data.a),
+ .load_valid(in_shake.valid & op.load),
+
+ .axid(mem.arid),
+ .axlen(mem.arlen),
+ .axaddr(mem.araddr),
+ .axsize(mem.arsize),
+ .axburst(mem.arburst),
+ .axready(mem.arready),
+ .axvalid(mem.arvalid)
+ );
+
+ gfx_shader_mem_addr_channel aw_channel
+ (
+ .clk,
+ .rst_n,
+
+ .load(aw_load),
+ .load_mask(read_data.mask_exec),
+ .load_lanes(read_data.a),
+ .load_valid(in_shake.valid & ~op.load & w_load),
+
+ .axid(mem.awid),
+ .axlen(mem.awlen),
+ .axaddr(mem.awaddr),
+ .axsize(mem.awsize),
+ .axburst(mem.awburst),
+ .axready(mem.awready),
+ .axvalid(mem.awvalid)
+ );
+
+ gfx_shader_mem_piso_shift w_stream
+ (
+ .clk,
+ .load(w_load),
+ .shift(w_shift),
+ .in_mask(read_data.mask_exec),
+ .in_lanes(read_data.b),
+ .out_data(mem.wdata),
+ .out_last(mem.wlast),
+ .out_enable(w_strobe)
+ );
+
+ gfx_shader_mem_sipo_shift r_stream
+ (
+ .clk,
+ .rst_n,
+ .shift(mem.rready & mem.rvalid),
+ .in_data(mem.rdata),
+ .in_done(r_done),
+ .in_last(mem.rlast),
+ .out_lanes(wb.lanes),
+ .out_valid(r_writeback)
+ );
+
+
+ gfx_fifo #(.WIDTH($bits(group_id) + $bits(vgpr_num)), .DEPTH(1 << $bits(group_id))) ar_to_r
+ (
+ .clk,
+ .rst_n,
+ .in(ar_pending.rx),
+ .out(r_return.tx)
+ );
+
+ gfx_fifo #(.WIDTH($bits(group_id)), .DEPTH(1 << $bits(group_id))) aw_to_b
+ (
+ .clk,
+ .rst_n,
+ .in(aw_pending.rx),
+ .out(b_return.tx)
+ );
+
+ always_comb
+ unique case ({mem.bvalid, b_return.rx.ready & b_return.rx.valid})
+ 2'b00, 2'b11:
+ b_add = '0;
+
+ 2'b01:
+ b_add = '1;
+
+ 2'b10:
+ b_add = {{($bits(b_add) - 1){1'b0}}, 1'b1};
+ endcase
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n) begin
+ b_count <= '0;
+ mem.wvalid <= 0;
+ end else begin
+ b_count <= b_count + b_add;
+ mem.wvalid <= w_start | (mem.wvalid & ~(mem.wlast & mem.wready));
+
+ assert (ar_pending.tx.ready);
+ assert (aw_pending.tx.ready);
+ end
+
+endmodule
+
+module gfx_shader_mem_addr_channel
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ input word load_lanes[SHADER_LANES],
+ input lane_mask load_mask,
+ input logic load_valid,
+ output logic load,
+
+ input logic axready,
+ output logic axvalid,
+ output word axaddr,
+ output logic[7:0] axid,
+ axlen,
+ output logic[2:0] axsize,
+ output logic[1:0] axburst
+);
+
+ logic active, shift, strobe;
+
+ assign axid = '0;
+ assign axlen = ($bits(axlen))'(SHADER_LANES - 1);
+ assign axsize = 3'b010; // 4 bytes/beat
+ assign axburst = 2'b01; // Incremental mode
+ assign axvalid = active & strobe;
+
+ assign load = ~active | (strobe & axready);
+ assign shift = active & ~strobe;
+
+ gfx_shader_mem_piso_shift ax_stream
+ (
+ .clk,
+ .load,
+ .shift,
+ .in_mask(load_mask),
+ .in_lanes(load_lanes),
+ .out_data(axaddr),
+ .out_last(),
+ .out_enable(strobe)
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n)
+ active <= 0;
+ else
+ active <= ~load | load_valid;
+
+endmodule
+
+module gfx_shader_mem_piso_shift
+import gfx::*;
+(
+ input logic clk,
+
+ input logic load,
+ shift,
+
+ input word in_lanes[SHADER_LANES],
+ input lane_mask in_mask,
+
+ output word out_data,
+ output logic out_last,
+ out_enable
+);
+
+ word data[SHADER_LANES];
+ lane_no count;
+ lane_mask mask;
+
+ assign out_data = data[0];
+ assign out_last = &count;
+ assign out_enable = mask[0];
+
+ always_ff @(posedge clk)
+ if (load) begin
+ data <= in_lanes;
+ mask <= in_mask;
+ count <= '0;
+ end else if (shift) begin
+ for (int i = 0; i < SHADER_LANES - 1; ++i)
+ data[i] <= data[i + 1];
+
+ mask <= mask >> 1;
+ count <= count + 1;
+ end
+
+endmodule
+
+module gfx_shader_mem_sipo_shift
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ input logic shift,
+ in_done,
+ in_last,
+ input word in_data,
+
+ output word out_lanes[SHADER_LANES],
+ output logic out_valid
+);
+
+ always_ff @(posedge clk)
+ if (shift) begin
+ for (int i = 0; i < SHADER_LANES - 1; ++i)
+ out_lanes[i] <= out_lanes[i + 1];
+
+ out_lanes[SHADER_LANES - 1] <= in_data;
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n)
+ out_valid <= 0;
+ else if (in_done)
+ out_valid <= 0;
+ else if (shift)
+ out_valid <= in_last;
endmodule
diff --git a/rtl/gfx/gfx_shader_regs.sv b/rtl/gfx/gfx_shader_regs.sv
index 2b3451a..de52fe2 100644
--- a/rtl/gfx/gfx_shader_regs.sv
+++ b/rtl/gfx/gfx_shader_regs.sv
@@ -9,7 +9,7 @@ import gfx::*;
// verilator tracing_off
localparam PC_TABLE_PORTS = 2;
- localparam MASK_TABLE_PORTS = 1;
+ localparam MASK_TABLE_PORTS = 2;
word hold_imm[REGFILE_STAGES], imm_out, read_a_data_sgpr, read_b_data_scalar,
read_b_data_sgpr, read_const, read_a_data_vgpr[SHADER_LANES],
@@ -33,7 +33,9 @@ import gfx::*;
assign pc_read_groups[1] = io.pc_front_group;
assign io.mask_back = mask_read[0];
+ assign io.mask_exec = mask_read[1];
assign mask_read_groups[0] = io.mask_back_group;
+ assign mask_read_groups[1] = io.mask_exec_group;
assign imm_out = hold_imm[$size(hold_imm) - 1];
assign a_scalar_out = hold_a_scalar[$bits(hold_a_scalar) - 1];
diff --git a/rtl/gfx/gfx_top.sv b/rtl/gfx/gfx_top.sv
index 93e3ad9..8b2506b 100644
--- a/rtl/gfx/gfx_top.sv
+++ b/rtl/gfx/gfx_top.sv
@@ -8,7 +8,7 @@ import gfx::*;
if_axil.s host_ctrl
);
- if_axib insn_mem();
+ if_axib data_mem(), insn_mem();
if_axil bootrom_axi(), debug_axi(), host_ctrl_axi(), sched_axi(), shader_0_axi();
logic irq_host_ctrl;
@@ -51,6 +51,7 @@ import gfx::*;
.clk,
.rst_n,
.sched(shader_0_axi.s),
+ .data_mem(data_mem.m),
.insn_mem(insn_mem.m)
);