diff options
| author | Alejandro Soto <alejandro@34project.org> | 2023-11-19 20:23:17 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2023-11-20 16:46:29 -0600 |
| commit | 2d3c392de957db2252f935a4f2eb3f9a76943966 (patch) | |
| tree | 305cfb53ef33739055481b45bc9c5188b7cd4392 /rtl/gfx | |
| parent | 33794ca29db5670bc140686ae6e6d3b7832ad406 (diff) | |
rtl/gfx: implement SP batch input stream
Diffstat (limited to '')
| -rw-r--r-- | rtl/gfx/gfx.sv | 10 | ||||
| -rw-r--r-- | rtl/gfx/gfx_defs.sv | 17 | ||||
| -rw-r--r-- | rtl/gfx/gfx_sp.sv | 15 | ||||
| -rw-r--r-- | rtl/gfx/gfx_sp_batch.sv | 144 |
4 files changed, 181 insertions, 5 deletions
diff --git a/rtl/gfx/gfx.sv b/rtl/gfx/gfx.sv index 5fa3f35..1da3550 100644 --- a/rtl/gfx/gfx.sv +++ b/rtl/gfx/gfx.sv @@ -35,8 +35,8 @@ module gfx .* ); - logic fetch_read; - vram_addr fetch_address; + logic batch_read, fetch_read; + vram_addr batch_address, fetch_address; gfx_sp sp ( @@ -124,10 +124,10 @@ module gfx .* ); - logic fetch_readdatavalid, fb_readdatavalid, - fetch_waitrequest, fb_waitrequest, rop_waitrequest; + logic batch_readdatavalid, fb_readdatavalid, fetch_readdatavalid, + batch_waitrequest, fb_waitrequest, fetch_waitrequest, rop_waitrequest; - vram_word fetch_readdata, fb_readdata; + vram_word batch_readdata, fb_readdata, fetch_readdata; gfx_mem mem ( diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv index 0aa4055..08710a9 100644 --- a/rtl/gfx/gfx_defs.sv +++ b/rtl/gfx/gfx_defs.sv @@ -171,9 +171,14 @@ typedef logic[`GFX_MEM_WORD_ADDR_BITS - 1:0] vram_addr; `define GFX_INSN_BITS 32 `define GFX_INSN_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_INSN_BITS / `GFX_MEM_DATA_BITS)) `define GFX_INSN_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_INSN_ADDR_BITS) +`define GFX_LANE_BITS $bits(mat4) +`define GFX_LANE_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_LANE_BITS / `GFX_MEM_DATA_BITS)) +`define GFX_LANE_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_LANE_ADDR_BITS) typedef logic[`GFX_INSN_BITS - 1:0] insn_word; +typedef logic[`GFX_LANE_BITS - 1:0] lane_word; typedef logic[`GFX_INSN_ADDR_BITS - 1:0] vram_insn_addr; +typedef logic[`GFX_LANE_ADDR_BITS - 1:0] vram_lane_addr; typedef logic[5:0] cmd_addr; typedef logic[31:0] cmd_word; @@ -185,6 +190,18 @@ typedef struct packed logic[`GFX_INSN_SUBWORD_BITS - 1:0] sub; } cmd_insn_ptr; +typedef struct packed +{ + logic[$bits(cmd_word) - $bits(vram_lane_addr) - `GFX_LANE_SUBWORD_BITS - 1:0] pad; + vram_lane_addr addr; + logic[`GFX_LANE_SUBWORD_BITS - 1:0] sub; +} cmd_lane_ptr; + `define GFX_FETCH_FIFO_DEPTH 8 +`define GFX_BATCH_FIFO_DEPTH 4 +`define GFX_SP_LANES `VECS_PER_MAT + +typedef logic[`GFX_SP_LANES - 1:0] lane_mask; + `endif diff --git a/rtl/gfx/gfx_sp.sv b/rtl/gfx/gfx_sp.sv index b0e36aa..9a638c3 100644 --- a/rtl/gfx/gfx_sp.sv +++ b/rtl/gfx/gfx_sp.sv @@ -5,6 +5,12 @@ module gfx_sp input logic clk, rst_n, + input logic batch_waitrequest, + batch_readdatavalid, + input vram_word batch_readdata, + output vram_addr batch_address, + output logic batch_read, + input logic fetch_waitrequest, fetch_readdatavalid, input vram_word fetch_readdata, @@ -28,6 +34,15 @@ module gfx_sp .* ); + gfx_sp_batch batch + ( + .out_data(), + .out_mask(), + .out_ready(1), + .out_valid(), + .* + ); + logic batch_end; endmodule diff --git a/rtl/gfx/gfx_sp_batch.sv b/rtl/gfx/gfx_sp_batch.sv new file mode 100644 index 0000000..9c2c503 --- /dev/null +++ b/rtl/gfx/gfx_sp_batch.sv @@ -0,0 +1,144 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_batch +( + input logic clk, + rst_n, + + input logic batch_waitrequest, + batch_readdatavalid, + input vram_word batch_readdata, + output vram_addr batch_address, + output logic batch_read, + + input logic batch_start, + input vram_insn_addr batch_base, + input cmd_word batch_length, + + output lane_word out_data, + output lane_mask out_mask, + input logic out_ready, + output logic out_valid +); + + localparam TAIL_BITS = $clog2($bits(lane_mask)), + BLOCK_BITS = $bits(batch_length) - TAIL_BITS; + + logic fifo_down, fifo_up, lane_read, lane_readdatavalid, lane_waitrequest; + lane_word lane_readdata; + vram_lane_addr aligned_batch_base, lane_address; + logic[TAIL_BITS - 1:0] batch_length_tail, read_tail; + logic[BLOCK_BITS - 1:0] batch_length_block, fetch_block_count, read_block_count; + logic[$clog2(`GFX_BATCH_FIFO_DEPTH + 1) - 1:0] fifo_pending; + + struct packed + { + lane_word data; + lane_mask mask; + } fifo_in, fifo_out; + + enum int unsigned + { + IDLE, + STREAM + } state; + + assign out_data = fifo_out.data; + assign out_mask = fifo_out.mask; + + assign fifo_up = out_ready && out_valid; + assign fifo_down = lane_read && !lane_waitrequest; + assign fifo_in.data = lane_readdata; + + assign {batch_length_block, batch_length_tail} = batch_length; + assign aligned_batch_base = batch_base[ + $bits(batch_base) - 1:$bits(batch_base) - $bits(vram_lane_addr) + ]; + + gfx_sp_widener #(.WIDTH($bits(vram_lane_addr))) lane_bus + ( + .wide_read(lane_read), + .wide_address(lane_address), + .wide_readdata(lane_readdata), + .wide_waitrequest(lane_waitrequest), + .wide_readdatavalid(lane_readdatavalid), + .word_read(batch_read), + .word_address(batch_address), + .word_readdata(batch_readdata), + .word_waitrequest(batch_waitrequest), + .word_readdatavalid(batch_readdatavalid), + .* + ); + + gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_BATCH_FIFO_DEPTH)) lane_fifo + ( + .in(fifo_in), + .out(fifo_out), + .in_ready(), + .in_valid(lane_readdatavalid), + .* + ); + + always_comb begin + unique case (read_tail) + 2'b00: fifo_in.mask = 4'b0000; + 2'b01: fifo_in.mask = 4'b0001; + 2'b10: fifo_in.mask = 4'b0011; + 2'b11: fifo_in.mask = 4'b0111; + endcase + + if (read_block_count == 0) + fifo_in.mask = 4'b1111; + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + state <= IDLE; + lane_read <= 0; + fifo_pending <= 0; + end else begin + unique case (state) + IDLE: + if (batch_start) begin + state <= STREAM; + lane_read <= 1; + end + + STREAM: begin + if (!lane_read || !lane_waitrequest) + lane_read <= fifo_pending < `GFX_BATCH_FIFO_DEPTH - 1; + + if (lane_read && !lane_waitrequest && read_block_count == 0) + state <= IDLE; + end + endcase + + if (fifo_up && !fifo_down) + fifo_pending <= fifo_pending - 1; + else if (!fifo_up && fifo_down) + fifo_pending <= fifo_pending + 1; + end + + always_ff @(posedge clk) begin + unique case (state) + IDLE: + if (batch_start) begin + read_tail <= batch_length_tail; + read_block_count <= batch_length_block; + fetch_block_count <= batch_length_block; + + lane_address <= aligned_batch_base; + end + + STREAM: + if (lane_read && !lane_waitrequest) begin + lane_address <= lane_address + 1; + fetch_block_count <= fetch_block_count - 1; + end + endcase + + if (lane_readdatavalid) + read_block_count <= read_block_count - 1; + end + +endmodule |
