summaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2023-11-19 20:23:17 -0600
committerAlejandro Soto <alejandro@34project.org>2023-11-20 16:46:29 -0600
commit2d3c392de957db2252f935a4f2eb3f9a76943966 (patch)
tree305cfb53ef33739055481b45bc9c5188b7cd4392 /rtl
parent33794ca29db5670bc140686ae6e6d3b7832ad406 (diff)
rtl/gfx: implement SP batch input stream
Diffstat (limited to 'rtl')
-rw-r--r--rtl/gfx/gfx.sv10
-rw-r--r--rtl/gfx/gfx_defs.sv17
-rw-r--r--rtl/gfx/gfx_sp.sv15
-rw-r--r--rtl/gfx/gfx_sp_batch.sv144
4 files changed, 181 insertions, 5 deletions
diff --git a/rtl/gfx/gfx.sv b/rtl/gfx/gfx.sv
index 5fa3f35..1da3550 100644
--- a/rtl/gfx/gfx.sv
+++ b/rtl/gfx/gfx.sv
@@ -35,8 +35,8 @@ module gfx
.*
);
- logic fetch_read;
- vram_addr fetch_address;
+ logic batch_read, fetch_read;
+ vram_addr batch_address, fetch_address;
gfx_sp sp
(
@@ -124,10 +124,10 @@ module gfx
.*
);
- logic fetch_readdatavalid, fb_readdatavalid,
- fetch_waitrequest, fb_waitrequest, rop_waitrequest;
+ logic batch_readdatavalid, fb_readdatavalid, fetch_readdatavalid,
+ batch_waitrequest, fb_waitrequest, fetch_waitrequest, rop_waitrequest;
- vram_word fetch_readdata, fb_readdata;
+ vram_word batch_readdata, fb_readdata, fetch_readdata;
gfx_mem mem
(
diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv
index 0aa4055..08710a9 100644
--- a/rtl/gfx/gfx_defs.sv
+++ b/rtl/gfx/gfx_defs.sv
@@ -171,9 +171,14 @@ typedef logic[`GFX_MEM_WORD_ADDR_BITS - 1:0] vram_addr;
`define GFX_INSN_BITS 32
`define GFX_INSN_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_INSN_BITS / `GFX_MEM_DATA_BITS))
`define GFX_INSN_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_INSN_ADDR_BITS)
+`define GFX_LANE_BITS $bits(mat4)
+`define GFX_LANE_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_LANE_BITS / `GFX_MEM_DATA_BITS))
+`define GFX_LANE_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_LANE_ADDR_BITS)
typedef logic[`GFX_INSN_BITS - 1:0] insn_word;
+typedef logic[`GFX_LANE_BITS - 1:0] lane_word;
typedef logic[`GFX_INSN_ADDR_BITS - 1:0] vram_insn_addr;
+typedef logic[`GFX_LANE_ADDR_BITS - 1:0] vram_lane_addr;
typedef logic[5:0] cmd_addr;
typedef logic[31:0] cmd_word;
@@ -185,6 +190,18 @@ typedef struct packed
logic[`GFX_INSN_SUBWORD_BITS - 1:0] sub;
} cmd_insn_ptr;
+typedef struct packed
+{
+ logic[$bits(cmd_word) - $bits(vram_lane_addr) - `GFX_LANE_SUBWORD_BITS - 1:0] pad;
+ vram_lane_addr addr;
+ logic[`GFX_LANE_SUBWORD_BITS - 1:0] sub;
+} cmd_lane_ptr;
+
`define GFX_FETCH_FIFO_DEPTH 8
+`define GFX_BATCH_FIFO_DEPTH 4
+`define GFX_SP_LANES `VECS_PER_MAT
+
+typedef logic[`GFX_SP_LANES - 1:0] lane_mask;
+
`endif
diff --git a/rtl/gfx/gfx_sp.sv b/rtl/gfx/gfx_sp.sv
index b0e36aa..9a638c3 100644
--- a/rtl/gfx/gfx_sp.sv
+++ b/rtl/gfx/gfx_sp.sv
@@ -5,6 +5,12 @@ module gfx_sp
input logic clk,
rst_n,
+ input logic batch_waitrequest,
+ batch_readdatavalid,
+ input vram_word batch_readdata,
+ output vram_addr batch_address,
+ output logic batch_read,
+
input logic fetch_waitrequest,
fetch_readdatavalid,
input vram_word fetch_readdata,
@@ -28,6 +34,15 @@ module gfx_sp
.*
);
+ gfx_sp_batch batch
+ (
+ .out_data(),
+ .out_mask(),
+ .out_ready(1),
+ .out_valid(),
+ .*
+ );
+
logic batch_end;
endmodule
diff --git a/rtl/gfx/gfx_sp_batch.sv b/rtl/gfx/gfx_sp_batch.sv
new file mode 100644
index 0000000..9c2c503
--- /dev/null
+++ b/rtl/gfx/gfx_sp_batch.sv
@@ -0,0 +1,144 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_batch
+(
+ input logic clk,
+ rst_n,
+
+ input logic batch_waitrequest,
+ batch_readdatavalid,
+ input vram_word batch_readdata,
+ output vram_addr batch_address,
+ output logic batch_read,
+
+ input logic batch_start,
+ input vram_insn_addr batch_base,
+ input cmd_word batch_length,
+
+ output lane_word out_data,
+ output lane_mask out_mask,
+ input logic out_ready,
+ output logic out_valid
+);
+
+ localparam TAIL_BITS = $clog2($bits(lane_mask)),
+ BLOCK_BITS = $bits(batch_length) - TAIL_BITS;
+
+ logic fifo_down, fifo_up, lane_read, lane_readdatavalid, lane_waitrequest;
+ lane_word lane_readdata;
+ vram_lane_addr aligned_batch_base, lane_address;
+ logic[TAIL_BITS - 1:0] batch_length_tail, read_tail;
+ logic[BLOCK_BITS - 1:0] batch_length_block, fetch_block_count, read_block_count;
+ logic[$clog2(`GFX_BATCH_FIFO_DEPTH + 1) - 1:0] fifo_pending;
+
+ struct packed
+ {
+ lane_word data;
+ lane_mask mask;
+ } fifo_in, fifo_out;
+
+ enum int unsigned
+ {
+ IDLE,
+ STREAM
+ } state;
+
+ assign out_data = fifo_out.data;
+ assign out_mask = fifo_out.mask;
+
+ assign fifo_up = out_ready && out_valid;
+ assign fifo_down = lane_read && !lane_waitrequest;
+ assign fifo_in.data = lane_readdata;
+
+ assign {batch_length_block, batch_length_tail} = batch_length;
+ assign aligned_batch_base = batch_base[
+ $bits(batch_base) - 1:$bits(batch_base) - $bits(vram_lane_addr)
+ ];
+
+ gfx_sp_widener #(.WIDTH($bits(vram_lane_addr))) lane_bus
+ (
+ .wide_read(lane_read),
+ .wide_address(lane_address),
+ .wide_readdata(lane_readdata),
+ .wide_waitrequest(lane_waitrequest),
+ .wide_readdatavalid(lane_readdatavalid),
+ .word_read(batch_read),
+ .word_address(batch_address),
+ .word_readdata(batch_readdata),
+ .word_waitrequest(batch_waitrequest),
+ .word_readdatavalid(batch_readdatavalid),
+ .*
+ );
+
+ gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_BATCH_FIFO_DEPTH)) lane_fifo
+ (
+ .in(fifo_in),
+ .out(fifo_out),
+ .in_ready(),
+ .in_valid(lane_readdatavalid),
+ .*
+ );
+
+ always_comb begin
+ unique case (read_tail)
+ 2'b00: fifo_in.mask = 4'b0000;
+ 2'b01: fifo_in.mask = 4'b0001;
+ 2'b10: fifo_in.mask = 4'b0011;
+ 2'b11: fifo_in.mask = 4'b0111;
+ endcase
+
+ if (read_block_count == 0)
+ fifo_in.mask = 4'b1111;
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ state <= IDLE;
+ lane_read <= 0;
+ fifo_pending <= 0;
+ end else begin
+ unique case (state)
+ IDLE:
+ if (batch_start) begin
+ state <= STREAM;
+ lane_read <= 1;
+ end
+
+ STREAM: begin
+ if (!lane_read || !lane_waitrequest)
+ lane_read <= fifo_pending < `GFX_BATCH_FIFO_DEPTH - 1;
+
+ if (lane_read && !lane_waitrequest && read_block_count == 0)
+ state <= IDLE;
+ end
+ endcase
+
+ if (fifo_up && !fifo_down)
+ fifo_pending <= fifo_pending - 1;
+ else if (!fifo_up && fifo_down)
+ fifo_pending <= fifo_pending + 1;
+ end
+
+ always_ff @(posedge clk) begin
+ unique case (state)
+ IDLE:
+ if (batch_start) begin
+ read_tail <= batch_length_tail;
+ read_block_count <= batch_length_block;
+ fetch_block_count <= batch_length_block;
+
+ lane_address <= aligned_batch_base;
+ end
+
+ STREAM:
+ if (lane_read && !lane_waitrequest) begin
+ lane_address <= lane_address + 1;
+ fetch_block_count <= fetch_block_count - 1;
+ end
+ endcase
+
+ if (lane_readdatavalid)
+ read_block_count <= read_block_count - 1;
+ end
+
+endmodule