summaryrefslogtreecommitdiff
path: root/rtl/gfx/gfx_sp_batch.sv
blob: b999219da82787b67d6b6aa86bf53f12ce321f59 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
`include "gfx/gfx_defs.sv"

module gfx_sp_batch
(
	input  logic          clk,
	                      rst_n,

	input  logic          batch_waitrequest,
	                      batch_readdatavalid,
	input  vram_word      batch_readdata,
	output vram_addr      batch_address,
	output logic          batch_read,

	input  logic          batch_start,
	input  vram_insn_addr batch_base,
	input  cmd_word       batch_length,

	output lane_mask      out_mask,
	output lane_word      out_data,
	input  logic          out_ready,
	output logic          out_valid
);

	localparam TAIL_BITS  = $clog2($bits(lane_mask)),
	           BLOCK_BITS = $bits(batch_length) - TAIL_BITS;

	logic fifo_down_safe, lane_read, lane_readdatavalid, lane_waitrequest;
	lane_word lane_readdata;
	vram_lane_addr aligned_batch_base, lane_address;
	logic[TAIL_BITS - 1:0] batch_length_tail, read_tail;
	logic[BLOCK_BITS - 1:0] batch_length_block, fetch_block_count, read_block_count;

	struct packed
	{
		lane_word data;
		lane_mask mask;
	} fifo_in, fifo_out;

	enum int unsigned
	{
		IDLE,
		STREAM
	} state;

	assign out_data = fifo_out.data;
	assign out_mask = fifo_out.mask;

	assign fifo_in.data = lane_readdata;

	assign {batch_length_block, batch_length_tail} = batch_length;
	assign aligned_batch_base = batch_base[`GFX_INSN_BITS_IN_LANE +: $bits(vram_lane_addr)];

	gfx_sp_widener #(.WIDTH($bits(vram_lane_addr))) lane_bus
	(
		.wide_read(lane_read),
		.wide_address(lane_address),
		.wide_readdata(lane_readdata),
		.wide_waitrequest(lane_waitrequest),
		.wide_readdatavalid(lane_readdatavalid),
		.word_read(batch_read),
		.word_address(batch_address),
		.word_readdata(batch_readdata),
		.word_waitrequest(batch_waitrequest),
		.word_readdatavalid(batch_readdatavalid),
		.*
	);

	gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_BATCH_FIFO_DEPTH)) lane_fifo
	(
		.in(fifo_in),
		.out(fifo_out),
		.in_ready(),
		.in_valid(lane_readdatavalid),
		.*
	);

	gfx_fifo_overflow #(.DEPTH(`GFX_BATCH_FIFO_DEPTH)) overflow
	(
		.down(lane_read && !lane_waitrequest),
		.empty(),
		.down_safe(fifo_down_safe),
		.*
	);

	always_comb begin
		unique case (read_tail)
			2'b00: fifo_in.mask = 4'b0000;
			2'b01: fifo_in.mask = 4'b0001;
			2'b10: fifo_in.mask = 4'b0011;
			2'b11: fifo_in.mask = 4'b0111;
		endcase

		if (read_block_count == 0)
			fifo_in.mask = 4'b1111;
	end

	always_ff @(posedge clk or negedge rst_n)
		if (!rst_n) begin
			state <= IDLE;
			lane_read <= 0;
		end else unique case (state)
			IDLE:
				if (batch_start) begin
					state <= STREAM;
					lane_read <= 1;
				end

			STREAM: begin
				if (!lane_read || !lane_waitrequest)
					lane_read <= fifo_down_safe;

				if (lane_read && !lane_waitrequest && fetch_block_count == 0) begin
					state <= IDLE;
					lane_read <= 0;
				end
			end
		endcase

	always_ff @(posedge clk) begin
		unique case (state)
			IDLE:
				if (batch_start) begin
					read_tail <= batch_length_tail;
					read_block_count <= batch_length_block;
					fetch_block_count <= batch_length_block;

					lane_address <= aligned_batch_base;
				end

			STREAM:
				if (lane_read && !lane_waitrequest) begin
					lane_address <= lane_address + 1;
					fetch_block_count <= fetch_block_count - 1;
				end
		endcase

		if (lane_readdatavalid)
			read_block_count <= read_block_count - 1;
	end

endmodule