diff options
| author | Alejandro Soto <alejandro@34project.org> | 2023-11-21 17:14:58 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2023-11-21 18:03:15 -0600 |
| commit | d2df92448a7aaaff9ae72f99bf4bcd00a6e55d8c (patch) | |
| tree | 7426719821476739dce9092dbc7cb2425c9efa5b /rtl/gfx | |
| parent | d076c33ffb6e3c0d96ee6b5dce0fcf48be8d3582 (diff) | |
rtl/gfx: implement SP issue
Diffstat (limited to '')
| -rw-r--r-- | rtl/gfx/gfx.sv | 2 | ||||
| -rw-r--r-- | rtl/gfx/gfx_defs.sv | 7 | ||||
| -rw-r--r-- | rtl/gfx/gfx_sp.sv | 37 | ||||
| -rw-r--r-- | rtl/gfx/gfx_sp_decode.sv | 2 | ||||
| -rw-r--r-- | rtl/gfx/gfx_sp_file.sv | 2 | ||||
| -rw-r--r-- | rtl/gfx/gfx_sp_isa.sv (renamed from rtl/gfx/gfx_isa.sv) | 4 | ||||
| -rw-r--r-- | rtl/gfx/gfx_sp_issue.sv | 111 |
7 files changed, 141 insertions, 24 deletions
diff --git a/rtl/gfx/gfx.sv b/rtl/gfx/gfx.sv index e88dc56..2398826 100644 --- a/rtl/gfx/gfx.sv +++ b/rtl/gfx/gfx.sv @@ -43,7 +43,7 @@ module gfx .* ); - logic batch_read, fetch_read, send_valid; + logic batch_read, fetch_read, running, send_valid; lane_word send_data; lane_mask send_mask; vram_addr batch_address, fetch_address; diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv index 70664d1..ec1ea8d 100644 --- a/rtl/gfx/gfx_defs.sv +++ b/rtl/gfx/gfx_defs.sv @@ -6,7 +6,7 @@ `define FLOATS_PER_VEC 4 `define VECS_PER_MAT 4 -// Target de 200MHz (reloj es 143MHz) con float16, rounding aproximado +// Target de 200MHz (reloj es 143MHz) con float16, rounding (muy) aproximado `define FP_ADD_STAGES 10 // ~401 LUTs `define FP_MUL_STAGES 5 // ~144 LUTs ~1 bloque DSP `define FP_INV_STAGES 3 // ~178 LUTs ~1 bloque DSP @@ -218,7 +218,10 @@ typedef logic[`FLOATS_PER_VEC - 1:0] vec_mask; typedef logic[`FLOATS_PER_VEC - 1:0][$clog2(`FLOATS_PER_VEC) - 1:0] swizzle_lanes; -typedef logic[2:0] vreg_num; +`define GFX_SP_REG_BITS 3 +`define GFX_SP_REG_COUNT (1 << `GFX_SP_REG_BITS) + +typedef logic[`GFX_SP_REG_BITS - 1:0] vreg_num; typedef struct packed { diff --git a/rtl/gfx/gfx_sp.sv b/rtl/gfx/gfx_sp.sv index 4adac7e..ce0f9ff 100644 --- a/rtl/gfx/gfx_sp.sv +++ b/rtl/gfx/gfx_sp.sv @@ -20,6 +20,7 @@ module gfx_sp input logic program_start, input cmd_word program_header_base, program_header_size, + output logic running, input logic send_ready, output logic send_valid, @@ -27,7 +28,7 @@ module gfx_sp output lane_mask send_mask ); - logic batch_start, clear_lanes, insn_valid, running; + logic batch_start, clear_lanes, insn_valid; cmd_word batch_length; insn_word insn; vram_insn_addr batch_base; @@ -47,6 +48,14 @@ module gfx_sp .* ); + logic deco_ready, combiner_issue_valid, shuffler_issue_valid, stream_issue_valid; + vreg_num rd_a_reg, rd_b_reg; + + gfx_sp_issue issue + ( + .* + ); + logic recv_valid; lane_word recv_data; lane_mask recv_mask; @@ -60,43 +69,40 @@ module gfx_sp .* ); - logic shuffler_wb_valid; + logic shuffler_issue_ready, shuffler_wb_valid; wb_op shuffler_wb; gfx_sp_shuffler shuffler ( .wb(shuffler_wb), - .deco(), - .in_ready(), - .in_valid(0), + .in_ready(shuffler_issue_ready), + .in_valid(shuffler_issue_valid), .wb_ready(shuffler_wb_ready), .wb_valid(shuffler_wb_valid), .* ); - logic combiner_wb_valid; + logic combiner_issue_ready, combiner_wb_valid; wb_op combiner_wb; gfx_sp_combiner combiner ( .wb(combiner_wb), - .deco(), - .in_ready(), - .in_valid(0), + .in_ready(combiner_issue_ready), + .in_valid(combiner_issue_valid), .wb_ready(combiner_wb_ready), .wb_valid(combiner_wb_valid), .* ); - logic recv_ready, stream_wb_valid; + logic recv_ready, stream_issue_ready, stream_wb_valid; wb_op stream_wb; gfx_sp_stream stream ( .wb(stream_wb), - .deco(), - .in_ready(), - .in_valid(0), + .in_ready(stream_issue_ready), + .in_valid(stream_issue_valid), .wb_ready(stream_wb_ready), .wb_valid(stream_wb_valid), .* @@ -115,14 +121,11 @@ module gfx_sp gfx_sp_regs regs ( - .rd_a_reg(), - .rd_b_reg(), .rd_a_data(a), .rd_b_data(b), .* ); - logic batch_end, deco_ready; - assign deco_ready = 1; + logic batch_end; endmodule diff --git a/rtl/gfx/gfx_sp_decode.sv b/rtl/gfx/gfx_sp_decode.sv index 41ce438..d54077d 100644 --- a/rtl/gfx/gfx_sp_decode.sv +++ b/rtl/gfx/gfx_sp_decode.sv @@ -1,5 +1,5 @@ -`include "gfx/gfx_isa.sv" `include "gfx/gfx_defs.sv" +`include "gfx/gfx_sp_isa.sv" module gfx_sp_decode ( diff --git a/rtl/gfx/gfx_sp_file.sv b/rtl/gfx/gfx_sp_file.sv index 5dced6e..e98ee18 100644 --- a/rtl/gfx/gfx_sp_file.sv +++ b/rtl/gfx/gfx_sp_file.sv @@ -12,7 +12,7 @@ module gfx_sp_file input vec4 wr_data ); - vec4 file[1 << $bits(vreg_num)], hold_rd_data, hold_wr_data; + vec4 file[`GFX_SP_REG_COUNT], hold_rd_data, hold_wr_data; logic hold_wr; vreg_num hold_rd_reg, hold_wr_reg; diff --git a/rtl/gfx/gfx_isa.sv b/rtl/gfx/gfx_sp_isa.sv index 18a28e4..1420d95 100644 --- a/rtl/gfx/gfx_isa.sv +++ b/rtl/gfx/gfx_sp_isa.sv @@ -1,5 +1,5 @@ -`ifndef GFX_ISA_SV -`define GFX_ISA_SV +`ifndef GFX_SP_ISA_SV +`define GFX_SP_ISA_SV `include "gfx/gfx_defs.sv" diff --git a/rtl/gfx/gfx_sp_issue.sv b/rtl/gfx/gfx_sp_issue.sv new file mode 100644 index 0000000..6934e39 --- /dev/null +++ b/rtl/gfx/gfx_sp_issue.sv @@ -0,0 +1,111 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_issue +( + input logic clk, + rst_n, + + input insn_deco deco, + input logic deco_valid, + output logic deco_ready, + + output vreg_num rd_a_reg, + rd_b_reg, + + input logic stream_issue_ready, + output logic stream_issue_valid, + + input logic combiner_issue_ready, + output logic combiner_issue_valid, + + input logic shuffler_issue_ready, + output logic shuffler_issue_valid, + + input logic wr, + input vreg_num wr_reg +); + + /* Esto podría ser fully pipelined, pero no dio tiempo, y en + * todo caso no haría diferencia debido al pésimo ancho de banda. + */ + + logic data_hazard, rd_a_hazard, rd_b_hazard, wr_hazard, writing_a, writing_b, writing_dst, + busy[`GFX_SP_REG_COUNT]; + + enum int unsigned + { + IDLE, + HAZARDS, + ISSUE, + WAIT + } state; + + assign rd_a_reg = deco.src_a; + assign rd_b_reg = deco.src_b; + + assign wr_hazard = deco.writeback && writing_dst; + assign rd_a_hazard = deco.read_src_a && writing_a; + assign rd_b_hazard = deco.read_src_a && writing_b; + assign data_hazard = rd_a_hazard || rd_b_hazard || wr_hazard; + + assign deco_ready = (stream_issue_ready && stream_issue_valid) + || (combiner_issue_ready && combiner_issue_valid) + || (shuffler_issue_ready && shuffler_issue_valid); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + state <= IDLE; + + stream_issue_valid <= 0; + combiner_issue_valid <= 0; + shuffler_issue_valid <= 0; + + for (integer i = 0; i < `GFX_SP_REG_COUNT; ++i) + busy[i] <= 0; + end else begin + unique case (state) + IDLE: + if (deco_valid) + state <= HAZARDS; + + HAZARDS: + if (!data_hazard) begin + state <= ISSUE; + if (deco.writeback) + busy[deco.dst] <= 1; + end + + ISSUE: begin + state <= WAIT; + + if (deco.ex.stream) + stream_issue_valid <= 1; + + if (deco.ex.combiner) + combiner_issue_valid <= 1; + + if (deco.ex.shuffler) + shuffler_issue_valid <= 1; + end + + WAIT: + if (deco_ready) begin + state <= IDLE; + + stream_issue_valid <= 0; + combiner_issue_valid <= 0; + shuffler_issue_valid <= 0; + end + endcase + + if (wr) + busy[wr_reg] <= 0; + end + + always_ff @(posedge clk) begin + writing_a <= busy[deco.src_a]; + writing_b <= busy[deco.src_b]; + writing_dst <= busy[deco.dst]; + end + +endmodule |
