diff options
| author | Alejandro Soto <alejandro@34project.org> | 2024-05-02 21:03:05 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2024-05-02 21:03:17 -0600 |
| commit | 405c0287c80c34b0e9dfb9d9326b86d12433b4c4 (patch) | |
| tree | ef38368c911bae30ff9c528dcf4a8fbfbc227fa7 /platform | |
| parent | 50b71c7f0ea2574eb4802e1a12fe8b0920a4ca7f (diff) | |
platform/wavelet3d: implement shader cores
This commit contains over a month of intermittent work (I don't have
enough free time to do this the right way)
Diffstat (limited to 'platform')
24 files changed, 2056 insertions, 128 deletions
diff --git a/platform/wavelet3d/gfx_axib.sv b/platform/wavelet3d/gfx_axib.sv new file mode 100644 index 0000000..7b3cbdc --- /dev/null +++ b/platform/wavelet3d/gfx_axib.sv @@ -0,0 +1,81 @@ +// AXI4 con burst +interface gfx_axib; + + import gfx::word; + + logic awvalid, + awready; + logic[7:0] awlen; + logic[1:0] awburst; + word awaddr; + + logic wlast; + logic wvalid; + logic wready; + word wdata; + + logic bvalid; + logic bready; + + logic arvalid, + arready; + logic[7:0] arlen; + logic[1:0] arburst; + word araddr; + + logic rlast; + logic rvalid; + logic rready; + word rdata; + + modport m + ( + input awready, + wready, + bvalid, + arready, + rlast, + rvalid, + rdata, + + output awlen, + awburst, + awvalid, + awaddr, + wlast, + wvalid, + wdata, + bready, + arlen, + arburst, + arvalid, + araddr, + rready + ); + + modport s + ( + input awlen, + awburst, + awvalid, + awaddr, + wlast, + wvalid, + wdata, + bready, + arlen, + arburst, + arvalid, + araddr, + rready, + + output awready, + wready, + bvalid, + arready, + rlast, + rvalid, + rdata + ); + +endinterface diff --git a/platform/wavelet3d/gfx_axil.sv b/platform/wavelet3d/gfx_axil.sv index f86dfbf..c254e26 100644 --- a/platform/wavelet3d/gfx_axil.sv +++ b/platform/wavelet3d/gfx_axil.sv @@ -24,39 +24,38 @@ interface gfx_axil; modport m ( input awready, - wready, - bvalid, - arready, - rvalid, - rdata, + wready, + bvalid, + arready, + rvalid, + rdata, output awvalid, - awaddr, - wvalid, - wdata, - bready, - arvalid, - araddr, - rready + awaddr, + wvalid, + wdata, + bready, + arvalid, + araddr, + rready ); modport s ( input awvalid, - awaddr, - wvalid, - wdata, - bready, - arvalid, - araddr, - rready, + awaddr, + wvalid, + wdata, + bready, + arvalid, + araddr, + rready, output awready, - wready, - bvalid, - arready, - rvalid, - rdata - + wready, + bvalid, + arready, + rvalid, + rdata ); endinterface diff --git a/platform/wavelet3d/gfx_beats.sv b/platform/wavelet3d/gfx_beats.sv new file mode 100644 index 0000000..fcbb091 --- /dev/null +++ b/platform/wavelet3d/gfx_beats.sv @@ -0,0 +1,29 @@ +interface gfx_beats +#(int WIDTH = $bits(gfx::word)); + + logic[WIDTH - 1:0] data; + logic ready; + logic valid; + + modport tx + ( + input ready, + output data, + valid + ); + + modport rx + ( + input data, + valid, + output ready + ); + + modport peek + ( + input data, + ready, + valid + ); + +endinterface diff --git a/platform/wavelet3d/gfx_fifo.sv b/platform/wavelet3d/gfx_fifo.sv new file mode 100644 index 0000000..7174e4d --- /dev/null +++ b/platform/wavelet3d/gfx_fifo.sv @@ -0,0 +1,102 @@ +module gfx_fifo +#(int WIDTH = 0, + int DEPTH = 0) +( + input logic clk, + rst_n, + + gfx_beats.rx in, + gfx_beats.tx out +); + + logic do_read, do_write, full_if_eq, in_stall, out_stall, + may_read, may_write, read, read_ok, write; + + logic[WIDTH - 1:0] fifo[DEPTH], read_data, write_data; + logic[$clog2(DEPTH) - 1:0] read_ptr, write_ptr; + + assign do_read = read & may_read; + assign do_write = write & may_write; + + always_comb begin + may_read = full_if_eq; + may_write = !full_if_eq; + + if (read) + may_write = 1; + + if (read_ptr != write_ptr) begin + may_read = 1; + may_write = 1; + end + end + + gfx_skid_flow in_flow + ( + .clk, + .rst_n, + .stall(in_stall), + .in_ready(in.ready), + .in_valid(in.valid), + .out_ready(may_write), + .out_valid(write) + ); + + gfx_skid_flow out_flow + ( + .clk, + .rst_n, + .stall(out_stall), + .in_ready(read), + .in_valid(read_ok), + .out_ready(out.ready), + .out_valid(out.valid) + ); + + gfx_skid_buf #(WIDTH) in_skid + ( + .clk, + .in(in.data), + .out(write_data), + .stall(in_stall) + ); + + gfx_skid_buf #(WIDTH) out_skid + ( + .clk, + .in(read_data), + .out(out.data), + .stall(out_stall) + ); + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) begin + read_ok <= 0; + read_ptr <= 0; + write_ptr <= 0; + full_if_eq <= 0; + end else begin + if (~out_stall) + read_ok <= read && may_read; + + if (do_read) + read_ptr <= read_ptr + 1; + + if (do_write) + write_ptr <= write_ptr + 1; + + if (do_read & ~do_write) + full_if_eq <= 0; + else if (~do_read & do_write) + full_if_eq <= 1; + end + + always_ff @(posedge clk) begin + if (~out_stall) + read_data <= fifo[read_ptr]; + + if (may_write) + fifo[write_ptr] <= write_data; + end + +endmodule diff --git a/platform/wavelet3d/gfx_fpint.sv b/platform/wavelet3d/gfx_fpint.sv index b3108a4..ae2fc28 100644 --- a/platform/wavelet3d/gfx_fpint.sv +++ b/platform/wavelet3d/gfx_fpint.sv @@ -1,72 +1,33 @@ module gfx_fpint +import gfx::*; ( - input logic clk, - rst_n, + input logic clk, + rst_n, - input gfx::word a[gfx::SHADER_LANES], - b[gfx::SHADER_LANES], - input logic in_valid, - setup_mul_float, - setup_unit_b, - mnorm_put_hi, - mnorm_put_lo, - mnorm_put_mul, - mnorm_zero_b, - mnorm_zero_flags, - minmax_abs, - minmax_swap, - minmax_zero_min, - minmax_copy_flags, - shiftr_int_signed, - addsub_copy_flags, - addsub_int_operand, - clz_force_nop, - shiftl_copy_flags, - round_copy_flags, - round_enable, - encode_enable, + input fpint_op op, + input logic abort, + in_valid, - output logic out_valid, - output gfx::word q[gfx::SHADER_LANES] -); + gfx_regfile_io.ab read_data, - import gfx::*; + gfx_wb.tx wb +); logic stage_valid[FPINT_STAGES]; - fpint_op op, stage_op[FPINT_STAGES]; + fpint_op stage_op[FPINT_STAGES]; assign stage_op[0] = op; assign stage_valid[0] = in_valid; - assign op.setup_mul_float = setup_mul_float; - assign op.setup_unit_b = setup_unit_b; - assign op.mnorm_put_hi = mnorm_put_hi; - assign op.mnorm_put_lo = mnorm_put_lo; - assign op.mnorm_put_mul = mnorm_put_mul; - assign op.mnorm_zero_b = mnorm_zero_b; - assign op.mnorm_zero_flags = mnorm_zero_flags; - assign op.minmax_abs = minmax_abs; - assign op.minmax_swap = minmax_swap; - assign op.minmax_zero_min = minmax_zero_min; - assign op.minmax_copy_flags = minmax_copy_flags; - assign op.shiftr_int_signed = shiftr_int_signed; - assign op.addsub_copy_flags = addsub_copy_flags; - assign op.addsub_int_operand = addsub_int_operand; - assign op.clz_force_nop = clz_force_nop; - assign op.shiftl_copy_flags = shiftl_copy_flags; - assign op.round_copy_flags = round_copy_flags; - assign op.round_enable = round_enable; - assign op.encode_enable = encode_enable; - genvar lane; generate for (lane = 0; lane < SHADER_LANES; ++lane) begin: lanes gfx_fpint_lane unit ( .clk(clk), - .a(a[lane]), - .b(b[lane]), - .q(q[lane]), + .a(read_data.a[lane]), + .b(read_data.b[lane]), + .q(wb.lanes[lane]), .mul_float_0(stage_op[0].setup_mul_float), .unit_b_0(stage_op[0].setup_unit_b), .put_hi_2(stage_op[2].mnorm_put_hi), @@ -94,11 +55,21 @@ module gfx_fpint for (int i = 1; i < FPINT_STAGES; ++i) stage_op[i] <= stage_op[i - 1]; - always_ff @(posedge clk or negedge rst_n) begin - for (int i = 1; i < FPINT_STAGES; ++i) - stage_valid[i] <= !rst_n ? 0 : stage_valid[i - 1]; + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) begin + for (int i = 1; i < FPINT_STAGES; ++i) + stage_valid[i] <= 0; - out_valid <= !rst_n ? 0 : stage_valid[FPINT_STAGES - 1]; - end + wb.valid <= 0; + end else begin + for (int i = 1; i < FPINT_STAGES; ++i) + stage_valid[i] <= stage_valid[i - 1]; + + // Se levanta 1 ciclo luego que in_valid + if (abort) + stage_valid[2] <= 0; + + wb.valid <= stage_valid[FPINT_STAGES - 1]; + end endmodule diff --git a/platform/wavelet3d/gfx_front_back.sv b/platform/wavelet3d/gfx_front_back.sv new file mode 100644 index 0000000..890b734 --- /dev/null +++ b/platform/wavelet3d/gfx_front_back.sv @@ -0,0 +1,37 @@ +interface gfx_front_back +import gfx::*;; + + struct + { + group_id group; + fpint_op p0; + mem_op p1; + sfu_op p2; + group_op p3; + } execute; + + struct + { + logic valid; + group_id group; + } loop; + + shader_dispatch dispatch; + + modport front + ( + input loop, + + output execute, + dispatch + ); + + modport back + ( + input execute, + dispatch, + + output loop + ); + +endinterface diff --git a/platform/wavelet3d/gfx_isa.sv b/platform/wavelet3d/gfx_isa.sv new file mode 100644 index 0000000..873e6ec --- /dev/null +++ b/platform/wavelet3d/gfx_isa.sv @@ -0,0 +1,82 @@ +package gfx_isa; + + typedef logic[3:0] sgpr_num; + typedef logic[2:0] vgpr_num; + + typedef union packed + { + sgpr_num sgpr; + + struct packed + { + logic[$bits(sgpr_num) - $bits(vgpr_num) - 1:0] reserved; + vgpr_num num; + } vgpr; + } xgpr_num; + + typedef struct packed + { + enum logic[1:0] + { + REGS_SVS = 2'b00, + REGS_SSS = 2'b01, + REGS_VVS = 2'b10, + REGS_VVV = 2'b11 + } reg_mode; + + union packed + { + struct packed + { + logic b_is_imm; + + union packed + { + logic[12:0] imm; + + struct packed + { + logic from_consts; + logic[7:0] reserved; + xgpr_num r; + } read; + } b; + + xgpr_num ra, + rd; + } rr; + } dst_src; + + logic reg_rev; + + union packed + { + struct packed + { + enum logic[4:0] + { + INSN_FPINT_MOV = 0, + INSN_FPINT_FMUL = 1, + INSN_FPINT_IMUL = 2, + INSN_FPINT_FADD = 3, + INSN_FPINT_RES4 = 4, + INSN_FPINT_FMAX = 5, + INSN_FPINT_RES6 = 6, + INSN_FPINT_FMIN = 7, + INSN_FPINT_RES8 = 8, + INSN_FPINT_FCVT = 9, + INSN_FPINT_RES[10:31] + } op; + } fpint; + } by_class; + + enum logic[1:0] + { + INSN_FPINT = 0, + INSN_MEM = 1, + INSN_SFU = 2, + INSN_GROUP = 3 + } insn_class; + } insn_word; + +endpackage diff --git a/platform/wavelet3d/gfx_pkg.sv b/platform/wavelet3d/gfx_pkg.sv index 3c4b747..42d3f05 100644 --- a/platform/wavelet3d/gfx_pkg.sv +++ b/platform/wavelet3d/gfx_pkg.sv @@ -2,15 +2,22 @@ package gfx; typedef logic[31:0] word; - localparam int SUBWORD_BITS = $clog2($bits(word)) - $clog2($bits(byte)); - localparam int BYTES_PER_WORD = 1 << SUBWORD_BITS; - typedef word uword; typedef logic signed[$bits(word) - 1:0] sword; typedef logic[$bits(word) / 2 - 1:0] uhword; typedef logic signed[$bits(word) / 2 - 1:0] shword; typedef logic[2 * $bits(word) - 1:0] udword; typedef logic signed[2 * $bits(word) - 1:0] sdword; + typedef logic signed[4 * $bits(word) - 1:0] qword; + typedef logic signed[8 * $bits(word) - 1:0] oword; + + localparam int SUBWORD_BITS = $clog2($bits(word)) - $clog2($bits(byte)); + localparam int BYTES_PER_WORD = 1 << SUBWORD_BITS; + + typedef logic[$bits(word) - SUBWORD_BITS - 1:0] word_ptr; + typedef logic[$bits(word_ptr) - 1 - 1:0] dword_ptr; + typedef logic[$bits(word_ptr) - 2 - 1:0] qword_ptr; + typedef logic[$bits(word_ptr) - 3 - 1:0] oword_ptr; typedef logic[7:0] float_exp; typedef logic[$bits(word) - $bits(float_exp) - 2:0] float_mant; @@ -99,7 +106,8 @@ package gfx; shiftl_copy_flags, round_copy_flags, round_enable, - encode_enable; + encode_enable, + writeback; } fpint_op; typedef struct packed @@ -228,6 +236,21 @@ package gfx; overflow; } fpint_rnorm_encode; + typedef struct packed + { + logic todo; + } mem_op; + + typedef struct packed + { + logic todo; + } sfu_op; + + typedef struct packed + { + logic todo; + } group_op; + // Q22.10 typedef logic[9:0] fixed_frac; typedef logic[$bits(word) - $bits(fixed_frac) - 1:0] fixed_int; @@ -345,6 +368,24 @@ package gfx; typedef logic[RASTER_SIZE - 1:0] lane_no; typedef logic[SHADER_LANES - 1:0] lane_mask; + typedef logic[5:0] group_id; + + localparam int REGFILE_STAGES = 3; + localparam int REG_READ_STAGES = 2 + REGFILE_STAGES + 1; + + typedef gfx_isa::sgpr_num sgpr_num; + typedef gfx_isa::vgpr_num vgpr_num; + typedef gfx_isa::xgpr_num xgpr_num; + + typedef struct packed + { + // No incluye p0 porque p0 no tiene señal ready + logic p1, + p2, + p3, + valid; + } shader_dispatch; + localparam int FIXED_MULADD_DEPTH = 5; localparam int FIXED_DOTADD_DEPTH = 2 * FIXED_MULADD_DEPTH; diff --git a/platform/wavelet3d/gfx_raster.sv b/platform/wavelet3d/gfx_raster.sv index 0e740dc..a57a672 100644 --- a/platform/wavelet3d/gfx_raster.sv +++ b/platform/wavelet3d/gfx_raster.sv @@ -261,7 +261,7 @@ module gfx_raster_bounds endcase if (in_state == IN_DIM_Y & next_dim) - assert(geometry.tlast); + assert (geometry.tlast); end always_ff @(posedge clk) begin diff --git a/platform/wavelet3d/gfx_regfile_io.sv b/platform/wavelet3d/gfx_regfile_io.sv new file mode 100644 index 0000000..49dcd5c --- /dev/null +++ b/platform/wavelet3d/gfx_regfile_io.sv @@ -0,0 +1,76 @@ +interface gfx_regfile_io; + + import gfx::*; + + struct + { + group_id group; + sgpr_num a_sgpr, + b_sgpr; + vgpr_num a_vgpr, + b_vgpr; + logic[12:0] b_imm; + logic a_scalar, + b_scalar, + b_is_imm, + b_is_const, + scalar_rev; + } op; + + struct + { + logic write; + group_id group; + sgpr_num sgpr; + word data; + } sgpr_write; + + struct + { + lane_mask mask; + group_id group; + vgpr_num vgpr; + word data[SHADER_LANES]; + } vgpr_write; + + word a[SHADER_LANES], b[SHADER_LANES], sgpr_write_data, vgpr_write_data[SHADER_LANES]; + word_ptr pc_front; + group_id pc_front_group; + + modport ab + ( + input a, + b + ); + + modport read + ( + output op + ); + + modport bind_ + ( + input pc_front, + + output pc_front_group + ); + + modport wb + ( + output sgpr_write, + vgpr_write + ); + + modport regs + ( + input op, + sgpr_write, + vgpr_write, + pc_front_group, + + output a, + b, + pc_front + ); + +endinterface diff --git a/platform/wavelet3d/gfx_shader.sv b/platform/wavelet3d/gfx_shader.sv new file mode 100644 index 0000000..3be6ed4 --- /dev/null +++ b/platform/wavelet3d/gfx_shader.sv @@ -0,0 +1,54 @@ +module gfx_shader +import gfx::*; +import gfx_shader_schedif_pkg::*; +( + input logic clk, + rst_n, + + gfx_axib.m insn_mem, + + axi4lite_intf.slave sched +); + + gfx_shader_schedif__in_t schedif_in; + gfx_shader_schedif__out_t schedif_out; + + gfx_front_back front_back(); + gfx_regfile_io regfile(); + + gfx_shader_front frontend + ( + .clk, + .rst_n, + .front(front_back.front), + .reg_bind(regfile.bind_), + .reg_read(regfile.read), + .fetch_mem(insn_mem), + .icache_flush(schedif_out.CORE.IFLUSH.value) + ); + + gfx_shader_back backend + ( + .clk, + .rst_n, + .back(front_back.back), + .reg_wb(regfile.wb), + .read_data(regfile.ab) + ); + + gfx_shader_regs regs + ( + .clk, + .io(regfile) + ); + + gfx_shader_schedif schedif + ( + .clk, + .arst_n(rst_n), + .s_axil(sched), + .hwif_in(schedif_in), + .hwif_out(schedif_out) + ); + +endmodule diff --git a/platform/wavelet3d/gfx_shader_back.sv b/platform/wavelet3d/gfx_shader_back.sv new file mode 100644 index 0000000..bc7aee9 --- /dev/null +++ b/platform/wavelet3d/gfx_shader_back.sv @@ -0,0 +1,194 @@ +module gfx_shader_back +import gfx::*; +( + input logic clk, + rst_n, + + gfx_front_back.back back, + + gfx_regfile_io.ab read_data, + gfx_regfile_io.wb reg_wb +); + + logic abort; + + gfx_wb out_wb(), p0_wb(), p1_wb(), p2_wb(), p3_wb(); + gfx_shake p1_shake(), p2_shake(), p3_shake(); + + gfx_shader_abort p0_abort + ( + .clk, + .p1(p1_shake.peek), + .p2(p2_shake.peek), + .p3(p3_shake.peek), + .abort + ); + + gfx_fpint p0 + ( + .clk, + .rst_n, + .op(back.execute.p0), + .wb(p0_wb.tx), + .abort, + .read_data, + .in_valid(back.dispatch.valid) + ); + + gfx_shader_mem p1 + ( + .clk, + .rst_n, + .op(back.execute.p1), + .wb(p1_wb.tx), + .in_shake(p1_shake.rx), + .read_data + ); + + gfx_shader_sfu p2 + ( + .clk, + .rst_n, + .op(back.execute.p2), + .wb(p2_wb.tx), + .in_shake(p2_shake.rx), + .read_data + ); + + gfx_shader_group p3 + ( + .clk, + .rst_n, + .op(back.execute.p3), + .wb(p3_wb.tx), + .in_shake(p3_shake.rx), + .read_data + ); + + gfx_shader_writeback_arbiter4 writeback_arbiter + ( + .clk, + .rst_n, + .p0(p0_wb.rx), + .p1(p1_wb.rx), + .p2(p2_wb.rx), + .p3(p3_wb.rx), + .out(out_wb.tx) + ); + + gfx_shader_writeback writeback + ( + .clk, + .rst_n, + .wb(out_wb.rx), + .regs(reg_wb) + ); + +endmodule + +module gfx_shader_abort +( + input logic clk, + + gfx_shake.peek p1, + p2, + p3, + + output logic abort +); + + always_ff @(posedge clk) + abort <= + (p1.valid & p1.ready) + | (p2.valid & p2.ready) + | (p3.valid & p3.ready); + +endmodule + +module gfx_shader_writeback_arbiter4 +( + input logic clk, + rst_n, + + gfx_wb.rx p0, + p1, + p2, + p3, + + gfx_wb.tx out +); + + assert property ( + @(posedge clk) + disable iff (~rst_n) + + (p0.ready & out.ready) + ); + + gfx_wb p0_p1(), p2_p3(); + + gfx_shader_writeback_arbiter2_prio arbiter_p0_p1 + ( + .clk, + .rst_n, + .a(p0), + .b(p1), + .out(p0_p1.tx) + ); + + gfx_shader_writeback_arbiter2_prio arbiter_p2_p3 + ( + .clk, + .rst_n, + .a(p2), + .b(p3), + .out(p2_p3.tx) + ); + + gfx_shader_writeback_arbiter2_prio arbiter_out + ( + .clk, + .rst_n, + .a(p0_p1.rx), + .b(p2_p3.tx), + .out + ); + +endmodule + +module gfx_shader_writeback_arbiter2_prio +( + input logic clk, + rst_n, + + gfx_wb.rx a, + b, + + gfx_wb.tx out +); + + //TODO + assign a.ready = out.ready; + assign b.ready = 0; + assign out.dest = a.dest; + assign out.lanes = a.lanes; + assign out.group = a.group; + assign out.valid = a.valid; + assign out.scalar = a.scalar; + assign out.writeback = a.writeback; + +endmodule + +module gfx_shader_writeback +( + input logic clk, + rst_n, + + gfx_wb.rx wb, + + gfx_regfile_io.wb regs +); + + + +endmodule diff --git a/platform/wavelet3d/gfx_shader_front.sv b/platform/wavelet3d/gfx_shader_front.sv new file mode 100644 index 0000000..5ad0203 --- /dev/null +++ b/platform/wavelet3d/gfx_shader_front.sv @@ -0,0 +1,718 @@ +typedef struct +{ + logic valid, + retry; + gfx::group_id group; + gfx_isa::insn_word insn; +} shader_front_wave; + +typedef logic[4:0] icache_line_num; + +typedef logic[$bits(gfx::oword_ptr) - $bits(icache_line_num) - 1:0] icache_tag; + +typedef struct packed +{ + icache_tag tag; + icache_line_num line; +} icache_line_tag; + +typedef struct packed +{ + icache_line_tag line_tag; + logic[2:0] word_num; +} icache_ptr; + +module gfx_shader_front +import gfx::*; +( + input logic clk, + rst_n, + + gfx_axib.m fetch_mem, + + input logic icache_flush, + + gfx_regfile_io.read reg_read, + gfx_regfile_io.bind_ reg_bind, + + gfx_front_back.front front +); + + word fetch_insn, port_insn; + logic fetch_hit, p0_writeback; + shader_front_wave bind_wave, dec_wave, port_dec_wave; + + gfx_shader_bind bind_ + ( + .clk, + .rst_n, + .mem(fetch_mem), + .wave(bind_wave), + .regs(reg_bind), + .loop_valid(front.loop.valid), + .loop_group(front.loop.group), + .icache_flush + ); + + gfx_shader_read_regs reg_dec + ( + .clk, + .rst_n, + .in(bind_wave), + .out(dec_wave), + .read(reg_read) + ); + + gfx_shader_decode_class class_dec + ( + .clk, + .rst_n, + .wave(dec_wave), + .out_group(front.execute.group), + .port_wave(port_dec_wave), + .dispatch(front.dispatch), + .p0_writeback + ); + + gfx_shader_decode_fpint p0_dec + ( + .clk, + .op(front.execute.p0), + .insn(port_dec_wave.insn), + .writeback(p0_writeback) + ); + +endmodule + +module gfx_shader_bind +import gfx::*; +( + input logic clk, + rst_n, + + gfx_axib.m mem, + + input logic icache_flush, + + input logic loop_valid, + input group_id loop_group, + + gfx_regfile_io.bind_ regs, + + output shader_front_wave wave +); + + localparam int ICACHE_STAGES = 6; + localparam int BIND_STAGES = REGFILE_STAGES + ICACHE_STAGES; + + gfx_beats #($bits(group_id)) runnable_in(), runnable_out(); + + logic ar_stall, request_ready, request_valid, valids[BIND_STAGES]; + group_id groups[BIND_STAGES]; + icache_line_tag araddr, request_addr; + + assign mem.bready = 0; + assign mem.wvalid = 0; + assign mem.awvalid = 0; + + assign mem.arlen = ($bits(mem.arlen))'($bits(oword) / $bits(word) - 1); + assign mem.araddr = {araddr, ($clog2($bits(oword)) - $clog2($bits(word)) + SUBWORD_BITS)'('0)}; + assign mem.arburst = 2'b01; // Incremental mode + + assign runnable_in.tx.data = loop_group; + assign runnable_in.tx.valid = loop_valid; + + assign regs.pc_front_group = runnable_out.rx.data; + assign runnable_out.rx.ready = 1; + + assign wave.group = groups[$size(groups) - 1]; + + gfx_skid_buf #($bits(araddr)) ar_skid + ( + .clk, + .in(request_addr), + .out(araddr), + .stall(ar_stall) + ); + + gfx_skid_flow ar_flow + ( + .clk, + .rst_n, + .stall(ar_stall), + .in_ready(request_ready), + .in_valid(request_valid), + .out_ready(mem.arready), + .out_valid(mem.arvalid) + ); + + //TODO: Podríamos quitar ~25 entries sin afectar throughput, latencia o correctitud + gfx_fifo #(.WIDTH($bits(group_id)), .DEPTH(1 << $bits(group_id))) runnable + ( + .clk, + .rst_n, + .in(runnable_in.rx), + .out(runnable_out.tx) + ); + + gfx_shader_bind_icache icache + ( + .clk, + .rst_n, + + .icache_flush, + .read_addr(regs.pc_front), + .read_valid(valids[REGFILE_STAGES - 1]), + + .request_addr, + .request_valid, + .request_ready, + + .fetch_data(mem.rdata), + .fetch_last(mem.rlast), + .fetch_valid(mem.rvalid), + .fetch_ready(mem.rready), + + .insn(wave.insn), + .insn_retry(wave.retry), + .insn_valid(wave.valid) + ); + + always_ff @(posedge clk) begin + groups[0] <= runnable_out.rx.data; + for (int i = 1; i < $size(groups); ++i) + groups[i] <= groups[i - 1]; + end + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) + for (int i = 0; i < $size(valids); ++i) + valids[i] <= 0; + else begin + valids[0] <= runnable_out.rx.valid; + for (int i = 1; i < $size(valids); ++i) + valids[i] <= valids[i - 1]; + end + +endmodule + +module gfx_shader_bind_icache +import gfx::*; +( + input logic clk, + rst_n, + + input logic icache_flush, + + input logic read_valid, + input icache_ptr read_addr, + + input logic fetch_last, + fetch_valid, + input word fetch_data, + output logic fetch_ready, + + input logic request_ready, + output logic request_valid, + output icache_line_tag request_addr, + + output logic insn_valid, + insn_retry, + output word insn +); + + // Dan Gisselquist limita a (1 << 3) bursts por defecto. + // Ver LGMAXBURST en axixbar.v + localparam int PENDING_FIFO_DEPTH = 8; + + enum int unsigned + { + FLUSH, + RUN + } state; + + struct + { + logic valid, + accessed, + hit; + icache_tag tag; + oword data; + } cache[1 << $bits(icache_line_num)], read, read_hold; + + gfx_beats #($bits(icache_line_tag)) pending_in(), pending_out(); + + logic accessed_write, accessed_write_enable, burst, fetch_done, hit_write, + in_flush, hit_commit, hit_write_enable, retry_4, retry_5, rollback, + tag_hit, valid_1, valid_2, valid_3, valid_4, valid_5, valid_write, + valid_write_enable; + + icache_ptr read_addr_1, read_addr_2, read_addr_3, read_addr_4, read_addr_5; + icache_tag tag_write; + icache_line_num accessed_write_line, flush_ptr, hit_write_line, valid_write_line; + icache_line_tag pending_pop; + + oword data_write; + word[1:0] data_5; + word[7:0] fetch_shift; + qword[1:0] data_3; + udword[1:0] data_4; + + assign data_3 = read.data; + assign tag_hit = read.tag == read_addr_3.line_tag.tag; + assign fetch_ready = ~fetch_done; + assign pending_pop = pending_out.rx.data; + + assign request_addr = read_addr_4.line_tag; + assign request_valid = burst & pending_in.tx.ready; + assign pending_in.tx.data = read_addr_4.line_tag; + assign pending_in.tx.valid = burst & request_ready; + assign pending_out.rx.ready = fetch_done & ~hit_commit & ~rollback; + + gfx_fifo #(.WIDTH($bits(icache_line_tag)), .DEPTH(PENDING_FIFO_DEPTH)) pending + ( + .clk, + .rst_n, + .in(pending_in.rx), + .out(pending_out.tx) + ); + + always_comb + unique case (state) + FLUSH: in_flush = 1; + RUN: in_flush = 0; + endcase + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) begin + state <= FLUSH; + flush_ptr <= '0; + fetch_done <= 0; + + valid_1 <= 0; + valid_2 <= 0; + valid_3 <= 0; + valid_4 <= 0; + valid_5 <= 0; + + burst <= 0; + end else begin + unique case (state) + FLUSH: + if (~icache_flush & &flush_ptr) + state <= RUN; + + RUN: + if (icache_flush) + state <= FLUSH; + endcase + + flush_ptr <= flush_ptr + 1; + if (icache_flush) + flush_ptr <= '0; + + if (fetch_done) + fetch_done <= hit_commit | ~pending_out.rx.valid | rollback; + else if (fetch_ready & fetch_valid) + fetch_done <= fetch_last; + + valid_1 <= read_valid; + valid_2 <= valid_1; + valid_3 <= valid_2; + valid_4 <= valid_3; + valid_5 <= valid_4; + + burst <= valid_3 & ~tag_hit & ~read.accessed & (~read.valid | read.hit); + end + + always_ff @(posedge clk) begin + tag_write <= pending_pop.tag; + data_write <= fetch_shift; + + valid_write <= 1; + valid_write_line <= pending_pop.line; + valid_write_enable <= fetch_done & ~hit_commit & pending_out.rx.valid & ~rollback; + + accessed_write <= 0; + accessed_write_enable <= 1; + + if (rollback) + accessed_write_line <= read_addr_5.line_tag.line; + else if (fetch_done & ~hit_commit & pending_out.rx.valid) + accessed_write_line <= pending_pop.line; + else begin + accessed_write <= 1; + accessed_write_line <= read_addr.line_tag.line; + accessed_write_enable <= read_valid; + end + + hit_write <= hit_commit; + if (hit_commit) begin + hit_write_line <= read_addr_4.line_tag.line; + hit_write_enable <= 1; + end else begin + hit_write_line <= pending_pop.line; + hit_write_enable <= fetch_done & pending_out.rx.valid & ~rollback; + end + + if (in_flush) begin + valid_write <= 0; + valid_write_line <= flush_ptr; + valid_write_enable <= 1; + + accessed_write <= 0; + accessed_write_line <= flush_ptr; + accessed_write_enable <= 1; + + hit_write <= 0; + hit_write_line <= flush_ptr; + hit_write_enable <= 1; + end + + if (valid_write_enable) begin + cache[valid_write_line].tag <= tag_write; + cache[valid_write_line].data <= data_write; + cache[valid_write_line].valid <= valid_write; + end + + if (accessed_write_enable) + cache[accessed_write_line].accessed <= accessed_write; + + if (hit_write_enable) + cache[hit_write_line].hit <= hit_write; + + read_addr_1 <= read_addr; + + read_hold <= cache[read_addr_1.line_tag.line]; + read_addr_2 <= read_addr_1; + + read <= read_hold; + read_addr_3 <= read_addr_2; + + data_4 <= data_3[read_addr_3.word_num[2]]; + retry_4 <= ~tag_hit | ~read.valid; + hit_commit <= valid_3 & tag_hit & read.valid; + read_addr_4 <= read_addr_3; + + data_5 <= data_4[read_addr_4.word_num[1]]; + retry_5 <= retry_4; + rollback <= burst & (~request_valid | ~pending_in.tx.valid); + read_addr_5 <= read_addr_4; + + insn <= data_5[read_addr_5.word_num[0]]; + insn_retry <= retry_5; + insn_valid <= valid_5; + + if (fetch_ready & fetch_valid) begin + fetch_shift[0] <= fetch_data; + for (int i = 1; i < $size(fetch_shift); ++i) + fetch_shift[i] <= fetch_shift[i - 1]; + end + end + +endmodule + +module gfx_shader_read_regs +import gfx::*; +import gfx_isa::*; +( + input logic clk, + rst_n, + + input shader_front_wave in, + + gfx_regfile_io.read read, + + output shader_front_wave out +); + + localparam int HOLD_DEPTH = REG_READ_STAGES + 1 - 2; + + logic reg_rev; + logic hold_valid[HOLD_DEPTH]; + shader_front_wave hold[HOLD_DEPTH]; + + assign reg_rev = in.insn.reg_rev; + + always_comb begin + out = hold[$size(hold) - 1]; + out.valid = hold_valid[$size(hold_valid) - 1]; + end + + always_ff @(posedge clk) begin + hold[0] <= in; + + for (int i = 1; i < HOLD_DEPTH; ++i) + hold[i] <= hold[i - 1]; + + read.op.group <= in.group; + + read.op.b_imm <= in.insn.dst_src.rr.b.imm; + read.op.a_sgpr <= in.insn.dst_src.rr.ra.sgpr; + read.op.b_sgpr <= in.insn.dst_src.rr.b.read.r.sgpr; + read.op.a_vgpr <= in.insn.dst_src.rr.ra.vgpr.num; + read.op.b_vgpr <= in.insn.dst_src.rr.b.read.r.vgpr.num; + read.op.b_is_imm <= in.insn.dst_src.rr.b_is_imm; + read.op.b_is_const <= in.insn.dst_src.rr.b.read.from_consts; + read.op.scalar_rev <= reg_rev; + + unique case (in.insn.reg_mode) + REGS_SVS, REGS_VVS: begin + read.op.a_scalar <= reg_rev; + read.op.b_scalar <= ~reg_rev; + end + + REGS_SSS: begin + read.op.a_scalar <= 1; + read.op.b_scalar <= 1; + end + + REGS_VVV: begin + read.op.a_scalar <= 0; + read.op.b_scalar <= 0; + end + endcase + end + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) + for (int i = 1; i < HOLD_DEPTH; ++i) + hold_valid[i] <= 0; + else begin + hold_valid[0] <= in.valid; + + for (int i = 1; i < HOLD_DEPTH; ++i) + hold_valid[i] <= hold_valid[i - 1]; + end + +endmodule + +module gfx_shader_decode_class +import gfx::*; +import gfx_isa::*; +( + input logic clk, + rst_n, + + input shader_front_wave wave, + output shader_front_wave port_wave, + output group_id out_group, + + output shader_dispatch dispatch, + output logic p0_writeback +); + + logic is_fsu, is_mem, is_group, hold_valid, retry; + shader_front_wave hold_wave; + + assign p0_writeback = ~(is_mem | is_fsu | is_group | retry); + + always_comb begin + port_wave = hold_wave; + port_wave.valid = hold_valid; + end + + always_ff @(posedge clk) begin + hold_wave <= wave; + out_group <= port_wave.group; + end + + always_ff @(posedge clk or negedge rst_n) + // Intencionalmente repetitivo + if (~rst_n) begin + is_fsu <= 0; + is_mem <= 0; + is_group <= 0; + + retry <= 0; + hold_valid <= 0; + + dispatch <= '0; + end else begin + is_fsu <= 0; + is_mem <= 0; + is_group <= 0; + + retry <= wave.retry; + hold_valid <= wave.valid; + + unique case (wave.insn.insn_class) + INSN_FPINT: ; // p0 no tiene ready + INSN_MEM: is_mem <= 1; + INSN_SFU: is_fsu <= 1; + INSN_GROUP: is_group <= 1; + + default: + {is_mem, is_fsu, is_group} <= 'x; + endcase + + dispatch.p1 <= is_mem; + dispatch.p2 <= is_fsu; + dispatch.p3 <= is_group; + + if (~hold_valid | retry) begin + dispatch.p1 <= 0; + dispatch.p2 <= 0; + dispatch.p3 <= 0; + end + + dispatch.valid <= hold_valid; + end + +endmodule + +module gfx_shader_decode_fpint +import gfx::*; +import gfx_isa::*; +( + input logic clk, + + input insn_word insn, + input logic writeback, + + output fpint_op op +); + + always_ff @(posedge clk) begin + unique case (insn.by_class.fpint.op) + INSN_FPINT_MOV: begin + op.setup_mul_float <= 0; + op.setup_unit_b <= 1; + op.mnorm_put_hi <= 0; + op.mnorm_put_lo <= 1; + op.mnorm_put_mul <= 0; + op.mnorm_zero_flags <= 1; + op.mnorm_zero_b <= 1; + op.minmax_abs <= 1; + op.minmax_swap <= 0; + op.minmax_zero_min <= 0; + op.minmax_copy_flags <= 1; + op.shiftr_int_signed <= 0; + op.addsub_int_operand <= 0; + op.addsub_copy_flags <= 1; + op.clz_force_nop <= 1; + op.shiftl_copy_flags <= 1; + op.round_copy_flags <= 1; + op.round_enable <= 1; + op.encode_enable <= 1; + end + + INSN_FPINT_FMUL: begin + op.setup_mul_float <= 1; + op.setup_unit_b <= 0; + op.mnorm_put_hi <= 0; + op.mnorm_put_lo <= 0; + op.mnorm_put_mul <= 1; + op.mnorm_zero_flags <= 0; + op.mnorm_zero_b <= 1; + op.minmax_abs <= 1; + op.minmax_swap <= 0; + op.minmax_zero_min <= 0; + op.minmax_copy_flags <= 1; + op.shiftr_int_signed <= 0; + op.addsub_int_operand <= 0; + op.addsub_copy_flags <= 1; + op.clz_force_nop <= 1; + op.shiftl_copy_flags <= 1; + op.round_copy_flags <= 1; + op.round_enable <= 1; + op.encode_enable <= 1; + end + + INSN_FPINT_IMUL: begin + op.setup_mul_float <= 0; + op.setup_unit_b <= 0; + op.mnorm_put_hi <= 0; + op.mnorm_put_lo <= 1; + op.mnorm_put_mul <= 0; + op.mnorm_zero_flags <= 1; + op.mnorm_zero_b <= 1; + op.minmax_abs <= 1; + op.minmax_swap <= 0; + op.minmax_zero_min <= 0; + op.minmax_copy_flags <= 1; + op.shiftr_int_signed <= 0; + op.addsub_int_operand <= 0; + op.addsub_copy_flags <= 1; + op.clz_force_nop <= 1; + op.shiftl_copy_flags <= 1; + op.round_copy_flags <= 1; + op.round_enable <= 0; + op.encode_enable <= 0; + end + + INSN_FPINT_FADD: begin + op.setup_mul_float <= 0; + op.setup_unit_b <= 1; + op.mnorm_put_hi <= 0; + op.mnorm_put_lo <= 1; + op.mnorm_put_mul <= 0; + op.mnorm_zero_flags <= 0; + op.mnorm_zero_b <= 0; + op.minmax_abs <= 1; + op.minmax_swap <= 0; + op.minmax_zero_min <= 0; + op.minmax_copy_flags <= 0; + op.shiftr_int_signed <= 0; + op.addsub_int_operand <= 0; + op.addsub_copy_flags <= 0; + op.clz_force_nop <= 0; + op.shiftl_copy_flags <= 0; + op.round_copy_flags <= 0; + op.round_enable <= 1; + op.encode_enable <= 1; + end + + INSN_FPINT_FMAX, INSN_FPINT_FMIN: begin + op.setup_mul_float <= 0; + op.setup_unit_b <= 1; + op.mnorm_put_hi <= 0; + op.mnorm_put_lo <= 1; + op.mnorm_put_mul <= 0; + op.mnorm_zero_flags <= 0; + op.mnorm_zero_b <= 0; + op.minmax_abs <= 0; + op.minmax_swap <= insn.by_class.fpint.op == INSN_FPINT_FMIN; + op.minmax_zero_min <= 1; + op.minmax_copy_flags <= 1; + op.shiftr_int_signed <= 0; + op.addsub_int_operand <= 0; + op.addsub_copy_flags <= 1; + op.clz_force_nop <= 1; + op.shiftl_copy_flags <= 1; + op.round_copy_flags <= 1; + op.round_enable <= 0; + op.encode_enable <= 0; + end + + INSN_FPINT_FCVT: begin + op.setup_mul_float <= 0; + op.setup_unit_b <= 1; + op.mnorm_put_hi <= 0; + op.mnorm_put_lo <= 1; + op.mnorm_put_mul <= 0; + op.mnorm_zero_flags <= 1; + op.mnorm_zero_b <= 1; + + op.minmax_abs <= 1; + op.minmax_swap <= 0; + op.minmax_zero_min <= 0; + op.minmax_copy_flags <= 0; + op.shiftr_int_signed <= 1; + op.addsub_int_operand <= 1; + op.addsub_copy_flags <= 1; + op.clz_force_nop <= 0; + op.shiftl_copy_flags <= 0; + op.round_copy_flags <= 0; + op.round_enable <= 1; + op.encode_enable <= 1; + end + + default: + op <= 'x; + endcase + + op.writeback <= writeback; + end + +endmodule diff --git a/platform/wavelet3d/gfx_shader_group.sv b/platform/wavelet3d/gfx_shader_group.sv new file mode 100644 index 0000000..7659bb9 --- /dev/null +++ b/platform/wavelet3d/gfx_shader_group.sv @@ -0,0 +1,16 @@ +module gfx_shader_group +import gfx::*; +( + input logic clk, + rst_n, + + input group_op op, + + gfx_regfile_io.ab read_data, + + gfx_shake.rx in_shake, + + gfx_wb.tx wb +); + +endmodule diff --git a/platform/wavelet3d/gfx_shader_mem.sv b/platform/wavelet3d/gfx_shader_mem.sv new file mode 100644 index 0000000..97561fb --- /dev/null +++ b/platform/wavelet3d/gfx_shader_mem.sv @@ -0,0 +1,16 @@ +module gfx_shader_mem +import gfx::*; +( + input logic clk, + rst_n, + + input mem_op op, + + gfx_regfile_io.ab read_data, + + gfx_shake.rx in_shake, + + gfx_wb.tx wb +); + +endmodule diff --git a/platform/wavelet3d/gfx_shader_regs.sv b/platform/wavelet3d/gfx_shader_regs.sv new file mode 100644 index 0000000..7ae2e14 --- /dev/null +++ b/platform/wavelet3d/gfx_shader_regs.sv @@ -0,0 +1,253 @@ +module gfx_shader_regs +import gfx::*; +( + input logic clk, + + gfx_regfile_io.regs io +); + + // verilator tracing_off + + word hold_imm[REGFILE_STAGES], imm_out, read_a_data_sgpr, read_b_data_scalar, + read_b_data_sgpr, read_const, read_a_data_vgpr[SHADER_LANES], + read_b_data_vgpr[SHADER_LANES], sgpr_out_a, sgpr_out_b; + + logic a_scalar_out, b_is_const_out, b_is_imm_out, b_scalar_out, scalar_rev_out; + group_id hold_read_group_1, hold_read_group_2; + sgpr_num hold_read_a_sgpr; + vgpr_num hold_read_a_vgpr_1, hold_read_a_vgpr_2, hold_read_b_vgpr_1, hold_read_b_vgpr_2; + logic[REGFILE_STAGES - 1:0] hold_b_is_imm, hold_b_is_const; + logic[REGFILE_STAGES + 1 - 1:0] hold_scalar_rev; + logic[REGFILE_STAGES + 2 - 1:0] hold_a_scalar, hold_b_scalar; + + assign imm_out = hold_imm[$size(hold_imm) - 1]; + assign a_scalar_out = hold_a_scalar[$bits(hold_a_scalar) - 1]; + assign b_scalar_out = hold_b_scalar[$bits(hold_b_scalar) - 1]; + assign b_is_imm_out = hold_b_is_imm[$bits(hold_b_is_imm) - 1]; + assign b_is_const_out = hold_b_is_const[$bits(hold_b_is_const) - 1]; + assign scalar_rev_out = hold_scalar_rev[$bits(hold_scalar_rev) - 1]; + + gfx_shader_pc_table pcs + ( + .clk, + .read(io.pc_front), + .read_group(io.pc_front_group) + ); + + gfx_shader_consts consts + ( + .clk, + .num(io.op.b_sgpr), + .value(read_const) + ); + + gfx_shader_regfile #($bits(group_id) + $bits(sgpr_num)) sgprs + ( + .clk, + + .read_a_num({hold_read_group_1, hold_read_a_sgpr}), + .read_b_num({io.op.group, io.op.b_sgpr}), + .read_a_data(read_a_data_sgpr), + .read_b_data(read_b_data_sgpr), + + .write(io.sgpr_write.write), + .write_num({io.sgpr_write.group, io.sgpr_write.sgpr}), + .write_data(io.sgpr_write.data) + ); + + generate + for (genvar i = 0; i < SHADER_LANES; ++i) begin: vgprs + gfx_shader_regfile #($bits(group_id) + $bits(vgpr_num)) vgprs + ( + .clk, + + .read_a_num({hold_read_group_2, hold_read_a_vgpr_2}), + .read_b_num({hold_read_group_2, hold_read_b_vgpr_2}), + .read_a_data(read_a_data_vgpr[i]), + .read_b_data(read_b_data_vgpr[i]), + + .write(io.vgpr_write.mask[i]), + .write_num({io.vgpr_write.group, io.vgpr_write.vgpr}), + .write_data(io.vgpr_write.data[i]) + ); + end + endgenerate + + always_ff @(posedge clk) begin + hold_imm[0] <= {{($bits(word) - $bits(io.op.b_imm)){1'b0}}, io.op.b_imm}; + hold_a_scalar[0] <= io.op.a_scalar; + hold_b_scalar[0] <= io.op.b_scalar; + hold_b_is_imm[0] <= io.op.b_is_imm; + hold_b_is_const[0] <= io.op.b_is_const; + hold_scalar_rev[0] <= io.op.scalar_rev; + + for (int i = 1; i < REGFILE_STAGES; ++i) begin + hold_imm[i] <= hold_imm[i - 1]; + hold_a_scalar[i] <= hold_a_scalar[i - 1]; + hold_b_scalar[i] <= hold_b_scalar[i - 1]; + hold_b_is_imm[i] <= hold_b_is_imm[i - 1]; + hold_b_is_const[i] <= hold_b_is_const[i - 1]; + hold_scalar_rev[i] <= hold_scalar_rev[i - 1]; + end + + for (int i = REGFILE_STAGES; i < REGFILE_STAGES + 2; ++i) begin + hold_a_scalar[i] <= hold_a_scalar[i - 1]; + hold_b_scalar[i] <= hold_b_scalar[i - 1]; + end + + hold_scalar_rev[REGFILE_STAGES] <= hold_scalar_rev[REGFILE_STAGES - 1]; + + hold_read_a_sgpr <= io.op.a_sgpr; + hold_read_group_1 <= io.op.group; + hold_read_group_2 <= hold_read_group_1; + + hold_read_a_vgpr_1 <= io.op.a_vgpr; + hold_read_a_vgpr_2 <= hold_read_a_vgpr_1; + + hold_read_b_vgpr_1 <= io.op.b_vgpr; + hold_read_b_vgpr_2 <= hold_read_b_vgpr_1; + + if (b_is_imm_out) + read_b_data_scalar <= imm_out; + else if (b_is_const_out) + read_b_data_scalar <= read_const; + else + read_b_data_scalar <= read_b_data_sgpr; + + if (scalar_rev_out) begin + sgpr_out_a <= read_b_data_scalar; + sgpr_out_b <= read_a_data_sgpr; + end else begin + sgpr_out_a <= read_a_data_sgpr; + sgpr_out_b <= read_b_data_scalar; + end + + for (int i = 0; i < SHADER_LANES; ++i) begin + io.a[i] <= a_scalar_out ? sgpr_out_a : read_a_data_vgpr[i]; + io.b[i] <= b_scalar_out ? sgpr_out_b : read_a_data_vgpr[i]; + end + end + +endmodule + +module gfx_shader_consts +import gfx::*; +( + input logic clk, + + input sgpr_num num, + output word value +); + + word hold_out, rom[1 << $bits(sgpr_num)]; + sgpr_num hold_in; + + always_ff @(posedge clk) begin + value <= hold_out; + hold_in <= num; + hold_out <= rom[hold_in]; + end + + initial begin + rom[0] = 'hffff_ffff; // -1 + rom[1] = 'h7fff_ffff; // 2^31 - 1, útil para abs de fp + rom[2] = 'h8000_0000; // 2^31, útil para neg de fp + rom[3] = 'h3f80_0000; // +1.0 + rom[4] = 'hbf80_0000; // -1.0 + end + +endmodule + +module gfx_shader_regfile +import gfx::*; +#(int DEPTH_LOG = 0) +( + input logic clk, + + input logic[DEPTH_LOG - 1:0] read_a_num, + read_b_num, + output word read_a_data, + read_b_data, + + input logic write, + input logic[DEPTH_LOG - 1:0] write_num, + input word write_data +); + + gfx_shader_regfile_port #(DEPTH_LOG) a + ( + .clk, + .write, + .read_num(read_a_num), + .read_data(read_a_data), + .write_num, + .write_data + ); + + gfx_shader_regfile_port #(DEPTH_LOG) b + ( + .clk, + .write, + .read_num(read_b_num), + .read_data(read_b_data), + .write_num, + .write_data + ); + +endmodule + +module gfx_shader_regfile_port +import gfx::*; +#(int DEPTH_LOG = 0) +( + input logic clk, + + input logic[DEPTH_LOG - 1:0] read_num, + output word read_data, + + input logic write, + input logic[DEPTH_LOG - 1:0] write_num, + input word write_data +); + + word file[1 << DEPTH_LOG], hold_read_data, hold_write_data; + logic hold_write; + logic[DEPTH_LOG - 1:0] hold_read_num, hold_write_num; + + // hold_write no necesita rst_n porque cualquier write inicial es inofensivo + + always_ff @(posedge clk) begin + hold_write <= write; + hold_read_num <= read_num; + hold_write_num <= write_num; + hold_write_data <= write_data; + + hold_read_data <= file[hold_read_num]; + if (hold_write) + file[hold_write_num] <= hold_write_data; + + read_data <= hold_read_data; + end + +endmodule + +module gfx_shader_pc_table +import gfx::*; +( + input logic clk, + + input group_id read_group, + + output word_ptr read +); + + group_id read_group_hold; + word_ptr pcs[1 << $bits(group_id)], read_hold; + + always_ff @(posedge clk) begin + read <= read_hold; + read_hold <= pcs[read_group_hold]; + read_group_hold <= read_group; + end + +endmodule diff --git a/platform/wavelet3d/gfx_shader_schedif.rdl b/platform/wavelet3d/gfx_shader_schedif.rdl new file mode 100644 index 0000000..2ab31ac --- /dev/null +++ b/platform/wavelet3d/gfx_shader_schedif.rdl @@ -0,0 +1,74 @@ +addrmap gfx_shader_schedif { + name = "Scheduler<->core interface"; + + default hw = r; + default sw = w; + default regwidth = 32; + + reg { + name = "Shader core control register"; + + field { + desc = "Set this field to flush the instruction cache"; + + singlepulse; + } IFLUSH[0:0] = 0; + } CORE @ 0x0; + + reg { + name = "Wavefront setup control register"; + + default hw = w; + default sw = r; + default precedence = hw; + + field { + desc = "Wavefront group number"; + + hw = r; + sw = rw; + } GROUP[5:0]; + + field { + desc = "Destination SGPR number"; + + hw = r; + sw = rw; + } XGPR[11:8]; + + field { + desc = "PC table update done, group submitted"; + + rclr; + hwset; + } SUBMIT_DONE[16:16] = 0; + + field { + desc = "General-purpose register update done"; + + rclr; + hwset; + } GPR_DONE[17:17] = 0; + } SETUP_CTRL @ 0x4; + + reg { + name = "SGPR/VGPR write register"; + + field { + desc = "Value to write"; + + swmod; + } VALUE[31:0]; + } SETUP_GPR @ 0x8; + + reg { + name = "Group submit register"; + + field { + desc = "Initial group program counter, submits group on write"; + + swmod; + } PC[31:2]; + } SETUP_SUBMIT @ 0xc; +}; + diff --git a/platform/wavelet3d/gfx_shader_sfu.sv b/platform/wavelet3d/gfx_shader_sfu.sv new file mode 100644 index 0000000..614d5a1 --- /dev/null +++ b/platform/wavelet3d/gfx_shader_sfu.sv @@ -0,0 +1,16 @@ +module gfx_shader_sfu +import gfx::*; +( + input logic clk, + rst_n, + + input sfu_op op, + + gfx_regfile_io.ab read_data, + + gfx_shake.rx in_shake, + + gfx_wb.tx wb +); + +endmodule diff --git a/platform/wavelet3d/gfx_shake.sv b/platform/wavelet3d/gfx_shake.sv new file mode 100644 index 0000000..baae0c3 --- /dev/null +++ b/platform/wavelet3d/gfx_shake.sv @@ -0,0 +1,24 @@ +interface gfx_shake; + + logic ready; + logic valid; + + modport tx + ( + input ready, + output valid + ); + + modport rx + ( + input valid, + output ready + ); + + modport peek + ( + input ready, + valid + ); + +endinterface diff --git a/platform/wavelet3d/gfx_skid_buf.sv b/platform/wavelet3d/gfx_skid_buf.sv new file mode 100644 index 0000000..e3e5247 --- /dev/null +++ b/platform/wavelet3d/gfx_skid_buf.sv @@ -0,0 +1,20 @@ +module gfx_skid_buf +#(int WIDTH = 0) +( + input logic clk, + + input logic[WIDTH - 1:0] in, + input logic stall, + + output logic[WIDTH - 1:0] out +); + + logic[WIDTH - 1:0] skid; + + assign out = stall ? skid : in; + + always_ff @(posedge clk) + if (~stall) + skid <= in; + +endmodule diff --git a/platform/wavelet3d/gfx_skid_flow.sv b/platform/wavelet3d/gfx_skid_flow.sv new file mode 100644 index 0000000..7890ae3 --- /dev/null +++ b/platform/wavelet3d/gfx_skid_flow.sv @@ -0,0 +1,31 @@ +module gfx_skid_flow +( + input logic clk, + rst_n, + + input logic in_valid, + out_ready, + + output logic in_ready, + out_valid, + stall +); + + logic was_ready, was_valid; + + assign stall = ~in_ready; + assign in_ready = was_ready | ~was_valid; + assign out_valid = in_valid | stall; + + always_ff @(posedge clk or negedge rst_n) + if (~rst_n) begin + was_ready <= 0; + was_valid <= 0; + end else begin + was_ready <= out_ready; + + if (~stall) + was_valid <= in_valid; + end + +endmodule diff --git a/platform/wavelet3d/gfx_top.sv b/platform/wavelet3d/gfx_top.sv index 1a57b90..b6538d7 100644 --- a/platform/wavelet3d/gfx_top.sv +++ b/platform/wavelet3d/gfx_top.sv @@ -1,47 +1,56 @@ module gfx_top +import gfx::*; ( - input logic clk, - rst_n, - - input gfx::word a[gfx::SHADER_LANES], - b[gfx::SHADER_LANES], - input logic in_valid, - setup_mul_float, - setup_unit_b, - mnorm_put_hi, - mnorm_put_lo, - mnorm_put_mul, - mnorm_zero_b, - mnorm_zero_flags, - minmax_abs, - minmax_swap, - minmax_zero_min, - minmax_copy_flags, - shiftr_int_signed, - addsub_copy_flags, - addsub_int_operand, - clz_force_nop, - shiftl_copy_flags, - round_copy_flags, - round_enable, - encode_enable, - - output logic out_valid, - output gfx::word q[gfx::SHADER_LANES], - - input gfx::word geom_tdata, - input logic geom_tlast, - geom_tvalid, - output logic geom_tready, - - input logic raster_tready, - output logic raster_tlast, - raster_tvalid, - output gfx::word raster_tdata + input logic clk, + rst_n, + + input word a[SHADER_LANES], + b[SHADER_LANES], + input logic in_valid, + setup_mul_float, + setup_unit_b, + mnorm_put_hi, + mnorm_put_lo, + mnorm_put_mul, + mnorm_zero_b, + mnorm_zero_flags, + minmax_abs, + minmax_swap, + minmax_zero_min, + minmax_copy_flags, + shiftr_int_signed, + addsub_copy_flags, + addsub_int_operand, + clz_force_nop, + shiftl_copy_flags, + round_copy_flags, + round_enable, + encode_enable, + + output logic out_valid, + output word q[SHADER_LANES], + + input word geom_tdata, + input logic geom_tlast, + geom_tvalid, + output logic geom_tready, + + input logic raster_tready, + output logic raster_tlast, + raster_tvalid, + output word raster_tdata ); + gfx_wb fpint_wb(); + gfx_axib insn_mem(); gfx_axil sched_axi(); gfx_pkts geometry(), coverage(); + gfx_regfile_io fpint_io(); + + axi4lite_intf #(.ADDR_WIDTH(4)) core_sched(); + + assign q = fpint_wb.rx.lanes; + assign out_valid = fpint_wb.rx.valid; assign geometry.tx.tdata = geom_tdata; assign geometry.tx.tlast = geom_tlast; @@ -53,9 +62,40 @@ module gfx_top assign raster_tvalid = coverage.rx.tvalid; assign coverage.rx.tready = raster_tready; + fpint_op op; + assign op.writeback = 1; + assign op.setup_mul_float = setup_mul_float; + assign op.setup_unit_b = setup_unit_b; + assign op.mnorm_put_hi = mnorm_put_hi; + assign op.mnorm_put_lo = mnorm_put_lo; + assign op.mnorm_put_mul = mnorm_put_mul; + assign op.mnorm_zero_b = mnorm_zero_b; + assign op.mnorm_zero_flags = mnorm_zero_flags; + assign op.minmax_abs = minmax_abs; + assign op.minmax_swap = minmax_swap; + assign op.minmax_zero_min = minmax_zero_min; + assign op.minmax_copy_flags = minmax_copy_flags; + assign op.shiftr_int_signed = shiftr_int_signed; + assign op.addsub_copy_flags = addsub_copy_flags; + assign op.addsub_int_operand = addsub_int_operand; + assign op.clz_force_nop = clz_force_nop; + assign op.shiftl_copy_flags = shiftl_copy_flags; + assign op.round_copy_flags = round_copy_flags; + assign op.round_enable = round_enable; + assign op.encode_enable = encode_enable; + + assign fpint_io.regs.a = a; + assign fpint_io.regs.b = b; + gfx_fpint fpint ( - .* + .clk, + .rst_n, + .op, + .wb(fpint_wb.tx), + .abort(0), + .in_valid, + .read_data(fpint_io.ab) ); gfx_sched sched @@ -74,4 +114,12 @@ module gfx_top .coverage(coverage.tx) ); + gfx_shader shader + ( + .clk, + .rst_n, + .sched(core_sched.slave), + .insn_mem(insn_mem.m) + ); + endmodule diff --git a/platform/wavelet3d/gfx_wb.sv b/platform/wavelet3d/gfx_wb.sv new file mode 100644 index 0000000..cc25944 --- /dev/null +++ b/platform/wavelet3d/gfx_wb.sv @@ -0,0 +1,35 @@ +interface gfx_wb; + + import gfx::*; + + word lanes[SHADER_LANES]; + logic ready, scalar, valid, writeback; + group_id group; + xgpr_num dest; + + modport tx + ( + input ready, + + output dest, + group, + lanes, + valid, + scalar, + writeback + ); + + modport rx + ( + input dest, + group, + lanes, + valid, + scalar, + writeback, + + output ready + ); + + +endinterface diff --git a/platform/wavelet3d/mod.mk b/platform/wavelet3d/mod.mk index b1e51f8..153f9c7 100644 --- a/platform/wavelet3d/mod.mk +++ b/platform/wavelet3d/mod.mk @@ -1,10 +1,21 @@ +cores := gfx_shader_schedif + define core - $(this)/deps := axixbar picorv32 + $(this)/deps := axixbar fp_unit gfx_shader_schedif picorv32 $(this)/rtl_top := gfx_top $(this)/rtl_dirs := . - $(this)/rtl_files := gfx_pkg.sv gfx_top.sv + $(this)/rtl_files := gfx_isa.sv gfx_pkg.sv gfx_top.sv $(this)/vl_main := main.cpp $(this)/vl_pkgconfig := sdl2 endef + +define core/gfx_shader_schedif + $(this)/hooks := regblock + + $(this)/regblock_rdl := gfx_shader_schedif.rdl + $(this)/regblock_top := gfx_shader_schedif + $(this)/regblock_args := --default-reset arst_n + $(this)/regblock_cpuif := axi4-lite +endef |
