diff options
| author | Alejandro Soto <alejandro@34project.org> | 2024-05-05 17:38:55 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2024-05-05 18:12:08 -0600 |
| commit | ca02833f22b08ceeeff501107371aa6667426115 (patch) | |
| tree | f864c5fc238a292082d2096ce546270badce9f1d /platform | |
| parent | 081a8a3ba8bfe036f31da53f9c041a2caa30fce2 (diff) | |
rtl/gfx: rename platform/wavelet3d -> rtl/gfx
Diffstat (limited to 'platform')
37 files changed, 4 insertions, 5128 deletions
diff --git a/platform/wavelet3d/gfx_axib.sv b/platform/wavelet3d/gfx_axib.sv deleted file mode 100644 index 7b3cbdc..0000000 --- a/platform/wavelet3d/gfx_axib.sv +++ /dev/null @@ -1,81 +0,0 @@ -// AXI4 con burst -interface gfx_axib; - - import gfx::word; - - logic awvalid, - awready; - logic[7:0] awlen; - logic[1:0] awburst; - word awaddr; - - logic wlast; - logic wvalid; - logic wready; - word wdata; - - logic bvalid; - logic bready; - - logic arvalid, - arready; - logic[7:0] arlen; - logic[1:0] arburst; - word araddr; - - logic rlast; - logic rvalid; - logic rready; - word rdata; - - modport m - ( - input awready, - wready, - bvalid, - arready, - rlast, - rvalid, - rdata, - - output awlen, - awburst, - awvalid, - awaddr, - wlast, - wvalid, - wdata, - bready, - arlen, - arburst, - arvalid, - araddr, - rready - ); - - modport s - ( - input awlen, - awburst, - awvalid, - awaddr, - wlast, - wvalid, - wdata, - bready, - arlen, - arburst, - arvalid, - araddr, - rready, - - output awready, - wready, - bvalid, - arready, - rlast, - rvalid, - rdata - ); - -endinterface diff --git a/platform/wavelet3d/gfx_axil.sv b/platform/wavelet3d/gfx_axil.sv deleted file mode 100644 index c254e26..0000000 --- a/platform/wavelet3d/gfx_axil.sv +++ /dev/null @@ -1,61 +0,0 @@ -// AXI4-Lite, sin wstrb ni axprot -interface gfx_axil; - import gfx::*; - - logic awvalid; - logic awready; - word awaddr; - - logic wvalid; - logic wready; - word wdata; - - logic bvalid; - logic bready; - - logic arvalid; - logic arready; - word araddr; - - logic rvalid; - logic rready; - word rdata; - - modport m - ( - input awready, - wready, - bvalid, - arready, - rvalid, - rdata, - - output awvalid, - awaddr, - wvalid, - wdata, - bready, - arvalid, - araddr, - rready - ); - - modport s - ( - input awvalid, - awaddr, - wvalid, - wdata, - bready, - arvalid, - araddr, - rready, - - output awready, - wready, - bvalid, - arready, - rvalid, - rdata - ); -endinterface diff --git a/platform/wavelet3d/gfx_axil2regblock.sv b/platform/wavelet3d/gfx_axil2regblock.sv deleted file mode 100644 index 2449b05..0000000 --- a/platform/wavelet3d/gfx_axil2regblock.sv +++ /dev/null @@ -1,30 +0,0 @@ -module gfx_axil2regblock -( - gfx_axil.s axis, - axi4lite_intf.master axim -); - - assign axis.rdata = axim.RDATA; - assign axis.rvalid = axim.RVALID; - assign axis.bvalid = axim.BVALID; - assign axis.wready = axim.WREADY; - assign axis.arready = axim.ARREADY; - assign axis.awready = axim.AWREADY; - - assign axim.AWVALID = axis.awvalid; - assign axim.AWADDR = axis.awaddr[$bits(axim.AWADDR) - 1:0]; - assign axim.AWPROT = '0; - - assign axim.WVALID = axis.wvalid; - assign axim.WDATA = axis.wdata; - assign axim.WSTRB = '1; - - assign axim.BREADY = axis.bready; - - assign axim.ARVALID = axis.arvalid; - assign axim.ARADDR = axis.araddr[$bits(axim.ARADDR) - 1:0]; - assign axim.ARPROT = '0; - - assign axim.RREADY = axis.rready; - -endmodule diff --git a/platform/wavelet3d/gfx_beats.sv b/platform/wavelet3d/gfx_beats.sv deleted file mode 100644 index fcbb091..0000000 --- a/platform/wavelet3d/gfx_beats.sv +++ /dev/null @@ -1,29 +0,0 @@ -interface gfx_beats -#(int WIDTH = $bits(gfx::word)); - - logic[WIDTH - 1:0] data; - logic ready; - logic valid; - - modport tx - ( - input ready, - output data, - valid - ); - - modport rx - ( - input data, - valid, - output ready - ); - - modport peek - ( - input data, - ready, - valid - ); - -endinterface diff --git a/platform/wavelet3d/gfx_bootrom.sv b/platform/wavelet3d/gfx_bootrom.sv deleted file mode 100644 index 2c4581e..0000000 --- a/platform/wavelet3d/gfx_bootrom.sv +++ /dev/null @@ -1,66 +0,0 @@ -module gfx_bootrom -import gfx::*; -( - input logic clk, - rst_n, - - gfx_axil.s axis -); - - localparam ROM_WORDS_LOG = 8; - - enum int unsigned - { - WAIT, - READ, - RDATA, - READY - } state; - - word read, rom[1 << ROM_WORDS_LOG]; - logic[ROM_WORDS_LOG - 1:0] read_addr; - - assign axis.bvalid = 0; - assign axis.wready = 0; - assign axis.awready = 0; - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - state <= WAIT; - axis.rvalid <= 0; - axis.arready <= 0; - end else begin - axis.arready <= 0; - - unique case (state) - WAIT: - if (axis.arvalid & ~axis.arready) - state <= READ; - - READ: - state <= RDATA; - - RDATA: begin - state <= READY; - axis.rvalid <= 1; - end - - READY: - if (axis.rready) begin - state <= WAIT; - axis.rvalid <= 0; - axis.arready <= 1; - end - endcase - end - - always_ff @(posedge clk) begin - read <= rom[read_addr]; - read_addr <= axis.araddr[$bits(read_addr) + SUBWORD_BITS - 1:SUBWORD_BITS]; - axis.rdata <= read; - end - - initial - $readmemh("gfx_bootrom.hex", rom); - -endmodule diff --git a/platform/wavelet3d/gfx_clz.sv b/platform/wavelet3d/gfx_clz.sv deleted file mode 100644 index 8d6f100..0000000 --- a/platform/wavelet3d/gfx_clz.sv +++ /dev/null @@ -1,68 +0,0 @@ -/* Implementación en árbol de count leading zeros (CLZ). - * WIDTH debe ser una potencia de 2. - */ -module gfx_clz -#(int WIDTH = 0) -( - input logic clk, - - input logic[WIDTH - 1:0] value, - output logic[$clog2(WIDTH):0] clz -); - - genvar i; - generate - if (WIDTH <= 1) begin - always_ff @(posedge clk) - clz <= !value; - end else if (WIDTH == 2) begin - always_ff @(posedge clk) - unique case (value) - 2'b00: clz <= 2'b10; - 2'b01: clz <= 2'b01; - 2'b10: clz <= 2'b00; - 2'b11: clz <= 2'b00; - endcase - end else if (WIDTH == 4) begin - // Eficiente en FPGAs con 4-LUTs - always_ff @(posedge clk) - if (value[3]) - clz <= 3'b000; - else if (value[2]) - clz <= 3'b001; - else if (value[1]) - clz <= 3'b010; - else if (value[0]) - clz <= 3'b011; - else - clz <= 3'b100; - end else begin - logic msb_right; - logic[$clog2(WIDTH) - 1:0] clz_left, clz_right; - logic[$clog2(WIDTH) - 2:0] tail_right; - - assign {msb_right, tail_right} = clz_right; - - gfx_clz #(WIDTH / 2) left - ( - .clk(clk), - .clz(clz_left), - .value(value[WIDTH - 1:WIDTH / 2]) - ); - - gfx_clz #(WIDTH / 2) right - ( - .clk(clk), - .clz(clz_right), - .value(value[WIDTH / 2 - 1:0]) - ); - - always_ff @(posedge clk) - if (clz_left[$clog2(WIDTH) - 1]) - clz <= {msb_right, ~msb_right, tail_right}; - else - clz <= {1'b0, clz_left}; - end - endgenerate - -endmodule diff --git a/platform/wavelet3d/gfx_ctz.sv b/platform/wavelet3d/gfx_ctz.sv deleted file mode 100644 index 2713f8a..0000000 --- a/platform/wavelet3d/gfx_ctz.sv +++ /dev/null @@ -1,18 +0,0 @@ -// Count trailing zeros (ctz), clz al revés -module gfx_ctz -#(int WIDTH = 0) -( - input logic clk, - - input logic[WIDTH - 1:0] value, - output logic[$clog2(WIDTH):0] ctz -); - - gfx_clz #(WIDTH) clz - ( - .clk, - .value({<<{value}}), - .clz(ctz) - ); - -endmodule diff --git a/platform/wavelet3d/gfx_fifo.sv b/platform/wavelet3d/gfx_fifo.sv deleted file mode 100644 index 7174e4d..0000000 --- a/platform/wavelet3d/gfx_fifo.sv +++ /dev/null @@ -1,102 +0,0 @@ -module gfx_fifo -#(int WIDTH = 0, - int DEPTH = 0) -( - input logic clk, - rst_n, - - gfx_beats.rx in, - gfx_beats.tx out -); - - logic do_read, do_write, full_if_eq, in_stall, out_stall, - may_read, may_write, read, read_ok, write; - - logic[WIDTH - 1:0] fifo[DEPTH], read_data, write_data; - logic[$clog2(DEPTH) - 1:0] read_ptr, write_ptr; - - assign do_read = read & may_read; - assign do_write = write & may_write; - - always_comb begin - may_read = full_if_eq; - may_write = !full_if_eq; - - if (read) - may_write = 1; - - if (read_ptr != write_ptr) begin - may_read = 1; - may_write = 1; - end - end - - gfx_skid_flow in_flow - ( - .clk, - .rst_n, - .stall(in_stall), - .in_ready(in.ready), - .in_valid(in.valid), - .out_ready(may_write), - .out_valid(write) - ); - - gfx_skid_flow out_flow - ( - .clk, - .rst_n, - .stall(out_stall), - .in_ready(read), - .in_valid(read_ok), - .out_ready(out.ready), - .out_valid(out.valid) - ); - - gfx_skid_buf #(WIDTH) in_skid - ( - .clk, - .in(in.data), - .out(write_data), - .stall(in_stall) - ); - - gfx_skid_buf #(WIDTH) out_skid - ( - .clk, - .in(read_data), - .out(out.data), - .stall(out_stall) - ); - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - read_ok <= 0; - read_ptr <= 0; - write_ptr <= 0; - full_if_eq <= 0; - end else begin - if (~out_stall) - read_ok <= read && may_read; - - if (do_read) - read_ptr <= read_ptr + 1; - - if (do_write) - write_ptr <= write_ptr + 1; - - if (do_read & ~do_write) - full_if_eq <= 0; - else if (~do_read & do_write) - full_if_eq <= 1; - end - - always_ff @(posedge clk) begin - if (~out_stall) - read_data <= fifo[read_ptr]; - - if (may_write) - fifo[write_ptr] <= write_data; - end - -endmodule diff --git a/platform/wavelet3d/gfx_fixed_dotadd.sv b/platform/wavelet3d/gfx_fixed_dotadd.sv deleted file mode 100644 index fdd5ffd..0000000 --- a/platform/wavelet3d/gfx_fixed_dotadd.sv +++ /dev/null @@ -1,55 +0,0 @@ -module gfx_fixed_dotadd -( - input logic clk, - - input gfx::fixed a0, - b0, - a1, - b1, - c, - input logic stall, - - output gfx::fixed q -); - - import gfx::*; - - fixed q0, a1_hold, b1_hold; - - gfx_fixed_muladd muladd_0 - ( - .clk, - .a(a0), - .b(b0), - .c, - .q(q0), - .stall - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(FIXED_MULADD_DEPTH)) a_pipes - ( - .clk, - .in(a1), - .out(a1_hold), - .stall - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(FIXED_MULADD_DEPTH)) b_pipes - ( - .clk, - .in(b1), - .out(b1_hold), - .stall - ); - - gfx_fixed_muladd muladd_1 - ( - .clk, - .a(a1_hold), - .b(b1_hold), - .c(q0), - .q, - .stall - ); - -endmodule diff --git a/platform/wavelet3d/gfx_fixed_muladd.sv b/platform/wavelet3d/gfx_fixed_muladd.sv deleted file mode 100644 index 22b7247..0000000 --- a/platform/wavelet3d/gfx_fixed_muladd.sv +++ /dev/null @@ -1,77 +0,0 @@ -module gfx_fixed_muladd -( - input logic clk, - - input gfx::fixed a, - b, - c, - input logic stall, - - output gfx::fixed q -); - - import gfx::*; - -`ifndef VERILATOR - logic[2 * $bits(fixed) - $bits(fixed_frac) - 1:0] q_ext; - - assign q = q_ext[$bits(fixed) - 1:0]; - - lpm_mult mult - ( - .aclr(0), - .clock(clk), - .clken(!stall), - - .sum({c, {`FIXED_FRAC{1'b0}}}), - .dataa(a), - .datab(b), - .result(q_ext) - ); - - defparam - mult.lpm_widtha = $bits(fixed), - mult.lpm_widthb = $bits(fixed), - mult.lpm_widths = $bits(fixed) + $bits(fixed_frac), - /* Esto es crucial. No está documentado en ningún lado (aparte de un - * comentario en r/fpga). Si lpm_widthp < lpm_widtha + lpm_widthb, - * entonces result contiene los lpm_widthp bits más significativos - * del producto, no los menos significativos como tendría sentido. - */ - mult.lpm_widthp = 2 * $bits(fixed) - $bits(fixed_frac), - mult.lpm_representation = "SIGNED", - mult.lpm_pipeline = FIXED_MULADD_DEPTH; -`else - logic[$bits(fixed) + $bits(fixed_frac) - 1:0] q_ext; - - fixed a_hold, b_hold, c_hold; - - assign q = q_ext[$bits(fixed) + $bits(fixed_frac) - 1:$bits(fixed_frac)] + c_hold; - assign q_ext = a_hold * b_hold; - - gfx_pipes #(.WIDTH($bits(a)), .DEPTH(FIXED_MULADD_DEPTH)) a_pipes - ( - .clk, - .in(a), - .out(a_hold), - .stall - ); - - gfx_pipes #(.WIDTH($bits(b)), .DEPTH(FIXED_MULADD_DEPTH)) b_pipes - ( - .clk, - .in(b), - .out(b_hold), - .stall - ); - - gfx_pipes #(.WIDTH($bits(c)), .DEPTH(FIXED_MULADD_DEPTH)) c_pipes - ( - .clk, - .in(c), - .out(c_hold), - .stall - ); -`endif - -endmodule diff --git a/platform/wavelet3d/gfx_front_back.sv b/platform/wavelet3d/gfx_front_back.sv deleted file mode 100644 index b768532..0000000 --- a/platform/wavelet3d/gfx_front_back.sv +++ /dev/null @@ -1,37 +0,0 @@ -interface gfx_front_back -import gfx::*;; - - struct - { - wave_exec wave; - fpint_op p0; - mem_op p1; - sfu_op p2; - group_op p3; - } execute; - - struct - { - logic valid; - group_id group; - } loop; - - shader_dispatch dispatch; - - modport front - ( - input loop, - - output execute, - dispatch - ); - - modport back - ( - input execute, - dispatch, - - output loop - ); - -endinterface diff --git a/platform/wavelet3d/gfx_isa.sv b/platform/wavelet3d/gfx_isa.sv deleted file mode 100644 index 7239478..0000000 --- a/platform/wavelet3d/gfx_isa.sv +++ /dev/null @@ -1,84 +0,0 @@ -package gfx_isa; - - typedef logic[3:0] sgpr_num; - typedef logic[2:0] vgpr_num; - - typedef logic signed[7:0] pc_offset; - - typedef union packed - { - sgpr_num sgpr; - - struct packed - { - logic[$bits(sgpr_num) - $bits(vgpr_num) - 1:0] reserved; - vgpr_num num; - } vgpr; - } xgpr_num; - - typedef struct packed - { - enum logic[1:0] - { - REGS_SVS = 2'b00, - REGS_SSS = 2'b01, - REGS_VVS = 2'b10, - REGS_VVV = 2'b11 - } reg_mode; - - union packed - { - struct packed - { - logic b_is_imm; - - union packed - { - logic[12:0] imm; - - struct packed - { - logic from_consts; - logic[7:0] reserved; - xgpr_num r; - } read; - } b; - - xgpr_num ra, - rd; - } rr; - } dst_src; - - logic reg_rev; - - union packed - { - struct packed - { - enum logic[4:0] - { - INSN_FPINT_MOV = 0, - INSN_FPINT_FMUL = 1, - INSN_FPINT_IMUL = 2, - INSN_FPINT_FADD = 3, - INSN_FPINT_RES4 = 4, - INSN_FPINT_FMAX = 5, - INSN_FPINT_RES6 = 6, - INSN_FPINT_FMIN = 7, - INSN_FPINT_RES8 = 8, - INSN_FPINT_FCVT = 9, - INSN_FPINT_RES[10:31] - } op; - } fpint; - } by_class; - - enum logic[1:0] - { - INSN_FPINT = 0, - INSN_MEM = 1, - INSN_SFU = 2, - INSN_GROUP = 3 - } insn_class; - } insn_word; - -endpackage diff --git a/platform/wavelet3d/gfx_pipes.sv b/platform/wavelet3d/gfx_pipes.sv deleted file mode 100644 index 2fa875a..0000000 --- a/platform/wavelet3d/gfx_pipes.sv +++ /dev/null @@ -1,24 +0,0 @@ -module gfx_pipes -#(int WIDTH=0, int DEPTH=0) -( - input logic clk, - - input logic[WIDTH - 1:0] in, - input logic stall, - - output logic[WIDTH - 1:0] out -); - - logic[WIDTH - 1:0] pipes[DEPTH]; - - assign out = pipes[DEPTH - 1]; - - always_ff @(posedge clk) - if (~stall) begin - pipes[0] <= in; - - for (integer i = 1; i < DEPTH; ++i) - pipes[i] <= pipes[i - 1]; - end - -endmodule diff --git a/platform/wavelet3d/gfx_pkg.sv b/platform/wavelet3d/gfx_pkg.sv deleted file mode 100644 index 7072967..0000000 --- a/platform/wavelet3d/gfx_pkg.sv +++ /dev/null @@ -1,271 +0,0 @@ -package gfx; - - typedef logic[31:0] word; - - typedef word uword; - typedef logic signed[$bits(word) - 1:0] sword; - typedef logic[$bits(word) / 2 - 1:0] uhword; - typedef logic signed[$bits(word) / 2 - 1:0] shword; - typedef logic[2 * $bits(word) - 1:0] udword; - typedef logic signed[2 * $bits(word) - 1:0] sdword; - typedef logic signed[4 * $bits(word) - 1:0] qword; - typedef logic signed[8 * $bits(word) - 1:0] oword; - - localparam int SUBWORD_BITS = $clog2($bits(word)) - $clog2($bits(byte)); - localparam int BYTES_PER_WORD = 1 << SUBWORD_BITS; - - typedef logic[$bits(word) - SUBWORD_BITS - 1:0] word_ptr; - typedef logic[$bits(word_ptr) - 1 - 1:0] dword_ptr; - typedef logic[$bits(word_ptr) - 2 - 1:0] qword_ptr; - typedef logic[$bits(word_ptr) - 3 - 1:0] oword_ptr; - - typedef logic[7:0] float_exp; - typedef logic[$bits(word) - $bits(float_exp) - 2:0] float_mant; - typedef logic[$bits(float_mant):0] float_mant_full; // Incluye '1.' explícito - typedef logic[$bits(float_mant_full) + 1:0] float_mant_ext; // Considera overflow - - localparam float_exp FLOAT_EXP_BIAS = (1 << ($bits(float_exp) - 1)) - 1; - localparam float_exp FLOAT_EXP_MAX = {($bits(float_exp)){1'b1}}; - - function float_mant_full full_mant(float_mant in); - full_mant = {1'b1, in}; - endfunction - - function float_mant implicit_mant(float_mant_full in); - assert (in[$bits(in) - 1]); - implicit_mant = in[$bits(in) - 2:0]; - endfunction - - typedef struct packed - { - logic sign; - float_exp exp; - float_mant mant; - } float; - - /* Explicación de guard, round, sticky: - * https://drilian.com/2023/01/10/floating-point-numbers-and-rounding/ - */ - typedef struct packed - { - float normal; - logic slow, - zero, - guard, - round, - sticky; - } float_round; - - typedef struct packed - { - logic exp_max, - exp_min, - mant_zero; - } float_class; - - function float_class classify_float(float in); - classify_float.exp_max = &in.exp; - classify_float.exp_min = ~|in.exp; - classify_float.mant_zero = ~|in.mant; - endfunction - - function logic is_float_special(float_class in); - is_float_special = in.exp_max | (in.exp_min & ~in.mant_zero); - endfunction - - function float_mant_ext float_prepare_round(float in, float_class in_class); - float_prepare_round = {~in_class.exp_min, in.mant, 2'b00}; - endfunction - - typedef struct packed - { - logic setup_mul_float, - setup_unit_b, - mnorm_put_hi, - mnorm_put_lo, - mnorm_put_mul, - mnorm_zero_b, - mnorm_zero_flags, - minmax_abs, - minmax_swap, - minmax_zero_min, - minmax_copy_flags, - shiftr_int_signed, - addsub_copy_flags, - addsub_int_operand, - clz_force_nop, - shiftl_copy_flags, - round_copy_flags, - round_enable, - encode_enable, - writeback; - } fpint_op; - - typedef struct packed - { - logic todo; - } mem_op; - - typedef struct packed - { - logic todo; - } sfu_op; - - typedef struct packed - { - logic todo; - } group_op; - - // Q22.10 - typedef logic[9:0] fixed_frac; - typedef logic[$bits(word) - $bits(fixed_frac) - 1:0] fixed_int; - - typedef struct packed signed - { - fixed_int fint; // 'int' es una keyword - fixed_frac frac; - } fixed; - - typedef struct packed - { - fixed x, - y; - } fixed_xy; - - typedef struct packed - { - fixed a, - b, - c; - } vtx_fixed; - - typedef struct packed - { - fixed_xy a, - b, - c; - } vtx_xy; - - localparam int RASTER_BITS = 2; - localparam int RASTER_SUB_BITS = 4; - localparam int RASTER_SIZE = 1 << RASTER_BITS; - localparam int RASTER_COARSE_FRAGS = RASTER_SIZE * RASTER_SIZE; - - typedef logic[RASTER_BITS - 1:0] raster_index; - - // Caso RASTER_BITS = 2: -> 4,4,4,4 -> 8,8-> 16 - localparam int RASTER_OUT_CLZ_DEPTH = 3; - - // Asume RASTER_BITS == 2, hay que ajustarlo si cambia - typedef struct packed - { - // Esto ahorra muchos flops - // - // offsets[0] = inc * 0 = 0 - // offsets[1] = inc * 1 = raster2_times1 - // offsets[2] = inc * 2 = raster2_times1 << 1 - // offsets[3] = inc * 3 = raster2_times3 - fixed raster2_times1, - raster2_times3; - } raster_offsets; - - function fixed raster_idx(raster_offsets offsets, raster_index idx); - unique case (idx) - RASTER_BITS'(0): - return '0; - - RASTER_BITS'(1): - return offsets.raster2_times1; - - RASTER_BITS'(2): - return offsets.raster2_times1 << 1; - - RASTER_BITS'(3): - return offsets.raster2_times3; - endcase - endfunction - - function raster_offsets make_raster_offsets(fixed inc); - make_raster_offsets.raster2_times1 = inc; - make_raster_offsets.raster2_times3 = inc + (inc << 1); - endfunction - - typedef struct packed - { - raster_offsets x, - y; - } raster_offsets_xy; - - typedef struct packed - { - logic[RASTER_SUB_BITS - 1:0] num; - logic[$bits(fixed_frac) - RASTER_SUB_BITS - 1:0] prec; - } raster_sub; - - localparam int RASTER_COARSE_DIM_BITS = $bits(fixed) - $bits(raster_index) - $bits(raster_sub); - - typedef logic signed[RASTER_COARSE_DIM_BITS - 1:0] raster_coarse_dim; - - typedef struct packed - { - raster_coarse_dim x, - y; - } raster_coarse_xy; - - typedef struct packed signed - { - raster_coarse_dim coarse; - raster_index fine; - raster_sub sub; - } raster_prec; - - typedef struct packed - { - raster_prec x, - y; - } raster_prec_xy; - - // Definir el número de lanes a partir de las dimensiones del - // rasterizer es una decisión crucial, el diseño entero depende de esto - - localparam int SHADER_LANES = RASTER_COARSE_FRAGS; - - typedef logic[RASTER_SIZE - 1:0] lane_no; - typedef logic[SHADER_LANES - 1:0] lane_mask; - - typedef logic[5:0] group_id; - - localparam int REGFILE_STAGES = 3; - localparam int REG_READ_STAGES = 2 + REGFILE_STAGES + 1; - - typedef gfx_isa::sgpr_num sgpr_num; - typedef gfx_isa::vgpr_num vgpr_num; - typedef gfx_isa::xgpr_num xgpr_num; - typedef gfx_isa::pc_offset pc_offset; - - typedef struct packed - { - // No incluye p0 porque p0 no tiene señal ready - logic p1, - p2, - p3, - valid; - } shader_dispatch; - - typedef struct - { - group_id group; - xgpr_num dest; - logic dest_scalar; - } wave_exec; - - localparam int FIXED_MULADD_DEPTH = 5; - localparam int FIXED_DOTADD_DEPTH = 2 * FIXED_MULADD_DEPTH; - - localparam word BOOTROM_BASE = 32'h0010_0000; - - localparam int SCHED_BRAM_WORDS = 2048; // 8KiB - - typedef word irq_lines; - -endpackage diff --git a/platform/wavelet3d/gfx_pkts.sv b/platform/wavelet3d/gfx_pkts.sv deleted file mode 100644 index 41399ce..0000000 --- a/platform/wavelet3d/gfx_pkts.sv +++ /dev/null @@ -1,29 +0,0 @@ -interface gfx_pkts -#(parameter int WIDTH = $bits(gfx::word)); - - import gfx::*; - - logic tlast; - logic tready; - logic tvalid; - logic[WIDTH - 1:0] tdata; - - modport tx - ( - input tready, - - output tdata, - tlast, - tvalid - ); - - modport rx - ( - input tdata, - tlast, - tvalid, - - output tready - ); - -endinterface diff --git a/platform/wavelet3d/gfx_raster.sv b/platform/wavelet3d/gfx_raster.sv deleted file mode 100644 index a57a672..0000000 --- a/platform/wavelet3d/gfx_raster.sv +++ /dev/null @@ -1,930 +0,0 @@ -module gfx_raster -( - input logic clk, - rst_n, - - gfx_pkts.rx geometry, - - gfx_pkts.tx coverage -); - - import gfx::*; - - gfx_raster_bounds setup_bounds - ( - .clk, - .rst_n, - - .geometry, - - .edges_ref(bounds_edges_ref), - .edges_vtx(bounds_edges_vtx), - .edges_span(bounds_edges_span), - .edges_ready(bounds_edges_ready), - .edges_valid(bounds_edges_valid), - .edges_geom_id(bounds_edges_geom_id) - ); - - word bounds_edges_geom_id; - logic bounds_edges_ready, bounds_edges_valid; - vtx_xy bounds_edges_vtx; - fixed_xy bounds_edges_ref; - raster_prec_xy bounds_edges_span; - - gfx_raster_edges setup_edges - ( - .clk, - .rst_n, - - .bounds_ref(bounds_edges_ref), - .bounds_vtx(bounds_edges_vtx), - .bounds_span(bounds_edges_span), - .bounds_ready(bounds_edges_ready), - .bounds_valid(bounds_edges_valid), - .bounds_geom_id(bounds_edges_geom_id), - - .coarse_ref(edges_coarse_ref), - .coarse_base(edges_coarse_base), - .coarse_span(edges_coarse_span), - .coarse_ready(edges_coarse_ready), - .coarse_valid(edges_coarse_valid), - .coarse_geom_id(edges_coarse_geom_id), - .coarse_offsets(edges_coarse_offsets) - ); - - word edges_coarse_geom_id; - fixed edges_coarse_base; - logic edges_coarse_ready, edges_coarse_valid; - fixed_xy edges_coarse_ref; - raster_prec_xy edges_coarse_span; - raster_offsets_xy edges_coarse_offsets; - - gfx_raster_coarse coarse - ( - .clk, - .rst_n, - - .edges_ref(edges_coarse_ref), - .edges_base(edges_coarse_base), - .edges_span(edges_coarse_span), - .edges_ready(edges_coarse_ready), - .edges_valid(edges_coarse_valid), - .edges_geom_id(edges_coarse_geom_id), - .edges_offsets(edges_coarse_offsets), - - .fine_ref(coarse_fine_ref), - .fine_ready(coarse_fine_ready), - .fine_valid(coarse_fine_valid), - .fine_corner(coarse_fine_corner), - .fine_geom_id(coarse_fine_geom_id), - .fine_offsets(coarse_fine_offsets) - ); - - word coarse_fine_geom_id; - fixed coarse_fine_corner; - logic coarse_fine_ready, coarse_fine_valid; - fixed_xy coarse_fine_ref; - raster_offsets_xy coarse_fine_offsets; - - gfx_raster_fine fine - ( - .clk, - .rst_n, - - .coarse_ref(coarse_fine_ref), - .coarse_ready(coarse_fine_ready), - .coarse_valid(coarse_fine_valid), - .coarse_corner(coarse_fine_corner), - .coarse_geom_id(coarse_fine_geom_id), - .coarse_offsets(coarse_fine_offsets), - - .coverage - ); - -endmodule - -module gfx_raster_bounds -( - input logic clk, - rst_n, - - gfx_pkts.rx geometry, - - input logic edges_ready, - output logic edges_valid, - output gfx::word edges_geom_id, - output gfx::fixed_xy edges_ref, - output gfx::raster_prec_xy edges_span, - output gfx::vtx_xy edges_vtx -); - - import gfx::*; - - enum int unsigned - { - IN_GEOM_ID, - IN_DIM_X, - IN_DIM_Y - } in_state; - - enum int unsigned - { - VTX_A, - VTX_B, - VTX_C - } vtx_state; - - logic a_lt_b, a_lt_c, b_lt_c, edges_handshake, geom_complete, geom_last, - geom_recv, in_vtx, next_dim, new_vtx; - - logic end_new_dim, end_valid, vtx_valid, lt_new_dim, lt_valid, minmax_new_dim, minmax_valid; - - fixed geom_data; - vtx_fixed dim_vtx, dim_vtx_x, dim_vtx_y; - raster_prec max, min; - - assign geom_recv = geometry.tready & geometry.tvalid; - assign edges_handshake = edges_valid & edges_ready; - - assign edges_vtx.a.x = dim_vtx_x.a; - assign edges_vtx.a.y = dim_vtx_y.a; - assign edges_vtx.b.x = dim_vtx_x.b; - assign edges_vtx.b.y = dim_vtx_y.b; - assign edges_vtx.c.x = dim_vtx_x.c; - assign edges_vtx.c.y = dim_vtx_y.c; - - assign geometry.tready = edges_handshake | ~geom_complete; - - always_comb begin - unique case (vtx_state) - VTX_C: next_dim = geom_recv; - default: next_dim = 0; - endcase - - unique case (in_state) - IN_DIM_Y: geom_last = next_dim; - default: geom_last = 0; - endcase - end - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - in_state <= IN_GEOM_ID; - vtx_state <= VTX_A; - - in_vtx <= 0; - new_vtx <= 0; - geom_complete <= 0; - - lt_valid <= 0; - end_valid <= 0; - vtx_valid <= 0; - edges_valid <= 0; - minmax_valid <= 0; - - lt_new_dim <= 0; - end_new_dim <= 0; - minmax_new_dim <= 0; - - edges_geom_id <= 'x; - end else begin - end_valid <= 0; - vtx_valid <= end_valid; - lt_valid <= vtx_valid; - minmax_valid <= lt_valid; - - if (~edges_valid | edges_ready) - edges_valid <= minmax_valid; - - geom_complete <= (geom_complete | geom_last) & ~edges_handshake; - - unique case (in_state) - IN_GEOM_ID: - if (geom_recv) begin - in_state <= IN_DIM_X; - - in_vtx <= 1; - edges_geom_id <= geometry.tdata; - end - - IN_DIM_X: - if (next_dim) - in_state <= IN_DIM_Y; - - IN_DIM_Y: - if (next_dim) begin - in_state <= IN_GEOM_ID; - - in_vtx <= 0; - end_valid <= 1; - end - endcase - - new_vtx <= 0; - - lt_new_dim <= 0; - minmax_new_dim <= lt_new_dim; - end_new_dim <= minmax_new_dim; - - unique case (vtx_state) - VTX_A: begin - if (in_vtx & geom_recv) begin - new_vtx <= 1; - vtx_state <= VTX_B; - end - - if (new_vtx) begin - dim_vtx.c <= geom_data; - lt_new_dim <= 1; - end - end - - VTX_B: begin - if (geom_recv) begin - new_vtx <= 1; - vtx_state <= VTX_C; - end - - if (new_vtx) - dim_vtx.a <= geom_data; - end - - VTX_C: begin - if (geom_recv) begin - new_vtx <= 1; - vtx_state <= VTX_A; - end - - if (new_vtx) - dim_vtx.b <= geom_data; - end - endcase - - if (in_state == IN_DIM_Y & next_dim) - assert (geometry.tlast); - end - - always_ff @(posedge clk) begin - geom_data <= geometry.tdata; - - a_lt_b <= $signed(dim_vtx.a) < $signed(dim_vtx.b); - a_lt_c <= $signed(dim_vtx.a) < $signed(dim_vtx.c); - b_lt_c <= $signed(dim_vtx.b) < $signed(dim_vtx.c); - - // Realmente no son 'x' o 'y' hasta cuando edges_valid = 1 - if (lt_new_dim) begin - dim_vtx_y <= dim_vtx; - dim_vtx_x <= dim_vtx_y; - end - - if (a_lt_b) begin - min <= a_lt_c ? dim_vtx_y.a : dim_vtx_y.c; - max <= b_lt_c ? dim_vtx_y.c : dim_vtx_y.b; - end else begin - min <= b_lt_c ? dim_vtx_y.b : dim_vtx_y.c; - max <= a_lt_c ? dim_vtx_y.c : dim_vtx_y.a; - end - - {min.fine, min.sub} <= '0; - {max.fine, max.sub} <= '0; - - if (end_new_dim) begin - edges_ref.y <= min; - edges_ref.x <= edges_ref.y; - - edges_span.y <= max - min; - edges_span.x <= edges_span.y; - end - end - -endmodule - -module gfx_raster_edges -( - input logic clk, - rst_n, - - input logic bounds_valid, - input gfx::word bounds_geom_id, - input gfx::fixed_xy bounds_ref, - input gfx::raster_prec_xy bounds_span, - input gfx::vtx_xy bounds_vtx, - output logic bounds_ready, - - input logic coarse_ready, - output logic coarse_valid, - output gfx::word coarse_geom_id, - output gfx::fixed_xy coarse_ref, - output gfx::raster_prec_xy coarse_span, - output gfx::fixed coarse_base, - output gfx::raster_offsets_xy coarse_offsets -); - - import gfx::*; - - enum int unsigned - { - EDGE_AB, - EDGE_BC, - EDGE_CA, - // EDGE_CA cumple doble función como OFFSETS_AB - OFFSETS_BC, - OFFSETS_CA, - OUT - } state; - - struct - { - fixed_xy cur, - delay1, - delay2; - } inc; - - logic coarse_handshake, coarse_stall, offsets_flow; - fixed_xy delta, p, q; - - // - 2 porque coarse valid va al final - logic[FIXED_DOTADD_DEPTH - 2:0] dotadd_valid; - - assign coarse_stall = coarse_valid & ~coarse_ready; - assign coarse_handshake = coarse_valid & coarse_ready; - - gfx_fixed_dotadd edge_base - ( - .clk, - .c(0), - .q(coarse_base), - .a0(delta.x), - .b0(inc.cur.x), - .a1(delta.y), - .b1(inc.cur.y), - .stall(coarse_stall) - ); - - always_comb - unique case (state) - OUT: offsets_flow = coarse_handshake; - default: offsets_flow = 1; - endcase - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - state <= EDGE_AB; - - p <= 'x; - q <= 'x; - coarse_ref <= 'x; - coarse_geom_id <= 'x; - - bounds_ready <= 0; - coarse_valid <= 0; - - for (int i = 0; i < $bits(dotadd_valid) - 1; ++i) - dotadd_valid[i] <= 0; - end else begin - for (int i = 1; i < $bits(dotadd_valid); ++i) - dotadd_valid[i] <= dotadd_valid[i - 1]; - - if (~coarse_stall) - coarse_valid <= dotadd_valid[$bits(dotadd_valid) - 1]; - - bounds_ready <= 0; - dotadd_valid[0] <= 0; - - unique case (state) - EDGE_AB: begin - if (bounds_valid) - state <= EDGE_BC; - - coarse_ref <= bounds_ref; - coarse_span <= bounds_span; - coarse_geom_id <= bounds_geom_id; - - p <= bounds_vtx.a; - q <= bounds_vtx.b; - end - - EDGE_BC: begin - state <= EDGE_CA; - bounds_ready <= 1; - - p <= bounds_vtx.b; - q <= bounds_vtx.c; - end - - EDGE_CA: begin - state <= OFFSETS_BC; - - p <= bounds_vtx.c; - q <= bounds_vtx.a; - - // Esto ocurre justamente en un momento en que ab, bc, ca - // quedan todos en sus lugares correctos en la pipeline - dotadd_valid[0] <= 1; - end - - OFFSETS_BC: - state <= OFFSETS_CA; - - OFFSETS_CA: - state <= OUT; - - OUT: - if (coarse_handshake) - state <= EDGE_AB; - endcase - end - - always_ff @(posedge clk) begin - delta.x <= coarse_ref.x - q.x; - delta.y <= coarse_ref.y - q.y; - - inc.cur.x <= p.y - q.y; - inc.cur.y <= q.x - p.x; - - //TODO: top-left rule - if (offsets_flow) begin - inc.delay1 <= inc.cur; - inc.delay2 <= inc.delay1; - - coarse_offsets.x <= make_raster_offsets(inc.delay2.x); - coarse_offsets.y <= make_raster_offsets(inc.delay2.y); - end - end - -endmodule - -module gfx_raster_coarse -( - input logic clk, - rst_n, - - input logic edges_valid, - input gfx::word edges_geom_id, - input gfx::fixed_xy edges_ref, - input gfx::raster_prec_xy edges_span, - input gfx::fixed edges_base, - input gfx::raster_offsets_xy edges_offsets, - output logic edges_ready, - - input logic fine_ready, - output logic fine_valid, - output gfx::word fine_geom_id, - output gfx::fixed_xy fine_ref, - output gfx::fixed fine_corner, - output gfx::raster_offsets_xy fine_offsets -); - - import gfx::*; - - enum int unsigned - { - SETUP, - TEST_AB, - TEST_BC, - TEST_CA, - OUT - } state; - - struct - { - fixed cur, - next, - prev; - } corner, edge_fn, vertical; - - struct - { - raster_offsets_xy cur, - next, - prev; - } offsets; - - logic edges_recv, end_block, end_x, end_y, first_run, - mask, mask_reset, new_geom, test_flow, out_flow; - - fixed edge_test, reference_x, vertical_inc; - fixed_xy max_offset, min_offset, test_offset; - raster_coarse_xy stride; - raster_coarse_dim width; - raster_offsets_xy next_offsets; - - function fixed coarse_offset(raster_offsets offsets); - return raster_idx(offsets, RASTER_BITS'(1)) << RASTER_BITS; - endfunction - - assign end_x = stride.x == '0; - assign end_y = stride.y == '0; - assign end_block = end_x & end_y; - - assign edge_test = edge_fn.cur + test_offset.x + test_offset.y; - assign vertical_inc = vertical.cur + coarse_offset(offsets.cur.y); - - assign fine_corner = corner.cur; - assign fine_offsets = offsets.cur; // Vuelve a cur luego de 3 ciclos - - assign min_offset.x = raster_idx(next_offsets.x, RASTER_BITS'(0)); - assign min_offset.y = raster_idx(next_offsets.y, RASTER_BITS'(0)); - assign max_offset.x = raster_idx(next_offsets.x, RASTER_BITS'(RASTER_SIZE - 1)); - assign max_offset.y = raster_idx(next_offsets.y, RASTER_BITS'(RASTER_SIZE - 1)); - assign next_offsets = edges_recv ? edges_offsets : offsets.next; - - always_comb begin - unique case (state) - SETUP: new_geom = 1; - default: new_geom = 0; - endcase - - unique case (state) - TEST_AB: mask_reset = 1; - default: mask_reset = 0; - endcase - - unique case (state) - SETUP: edges_ready = 1; - default: edges_ready = 0; - endcase - - unique case (state) - SETUP: - edges_recv = 1; - - TEST_AB, TEST_BC: - edges_recv = first_run; - - default: - edges_recv = 0; - endcase - - unique case (state) - OUT: fine_valid = mask; - default: fine_valid = 0; - endcase - - unique case (state) - OUT: begin - out_flow = ~mask | fine_ready; - test_flow = 0; - end - - default: begin - out_flow = 0; - test_flow = 1; - end - endcase - end - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - state <= SETUP; - first_run <= 1; - end else - unique case (state) - SETUP: - if (edges_valid) - state <= TEST_AB; - - TEST_AB: - state <= TEST_BC; - - TEST_BC: - state <= TEST_CA; - - TEST_CA: - state <= OUT; - - OUT: begin - first_run <= end_block; - if (out_flow) - state <= end_block ? SETUP : TEST_AB; - end - endcase - - always_ff @(posedge clk) begin - if (new_geom) begin - width <= edges_span.x.coarse; - stride.x <= edges_span.x.coarse; - stride.y <= edges_span.y.coarse; - reference_x <= edges_ref.x; - - fine_ref <= edges_ref; - fine_geom_id <= edges_geom_id; - end - - if (out_flow) begin - stride.x <= stride.x - 1; - fine_ref.x.fint <= fine_ref.x.fint + ($bits(fixed_int))'(RASTER_SIZE); - - if (end_x) begin - fine_ref.x <= reference_x; - fine_ref.y.fint <= fine_ref.y.fint + ($bits(fixed_int))'(RASTER_SIZE); - - stride.x <= width; - stride.y <= stride.y - 1; - end - end - - if (test_flow) begin - offsets.cur <= next_offsets; - offsets.next <= offsets.prev; - offsets.prev <= offsets.cur; - - vertical.cur <= vertical.next; - vertical.next <= vertical.prev; - vertical.prev <= vertical.cur; - - edge_fn.cur <= edge_fn.next; - edge_fn.next <= edge_fn.prev; - edge_fn.prev <= edge_fn.cur + coarse_offset(offsets.cur.x); - - if (end_x) begin - edge_fn.prev <= vertical_inc; - vertical.prev <= vertical_inc; - end - - corner.cur <= corner.next; - corner.next <= corner.prev; - corner.prev <= edge_fn.cur; - - if (coarse_offset(next_offsets.x) >= 'sd0) - test_offset.x <= max_offset.x; - else - test_offset.x <= min_offset.x; - - if (coarse_offset(next_offsets.y) >= 'sd0) - test_offset.y <= max_offset.y; - else - test_offset.y <= min_offset.y; - - mask <= (mask | mask_reset) & 1/*(edge_test >= 'sd0)*/; - end - - if (edges_recv) begin - edge_fn.cur <= edges_base; - vertical.cur <= edges_base; - end - end - -endmodule - -module gfx_raster_fine -( - input logic clk, - rst_n, - - input logic coarse_valid, - input gfx::word coarse_geom_id, - input gfx::fixed_xy coarse_ref, - input gfx::fixed coarse_corner, - input gfx::raster_offsets_xy coarse_offsets, - output logic coarse_ready, - - gfx_pkts.tx coverage -); - - import gfx::*; - - enum int unsigned - { - IN_C, - IN_A, - IN_B, - IN_MASK - } in_state; - - enum int unsigned - { - OUT_ACCEPT, - OUT_GEOM_ID, - OUT_POS, - OUT_MASK, - OUT_BARY_C, - OUT_BARY_A, - OUT_BARY_B - } out_state; - - struct - { - fixed cur, - next, - prev; - } corner; - - struct - { - raster_offsets_xy cur, - next, - prev; - } offsets; - - logic begin_bary, hold_block, in_valid, mask_in_clean, - mask_in_reset, new_block, out_last; - - word geom_id; - fixed bary_coord; - lane_no lane, lane_ctz, lane_hold; - fixed_xy block_ref; - lane_mask mask_in, mask, mask_ctz; - raster_index lane_x, lane_y; - logic[$bits(lane_ctz):0] ctz_count; - - function shword ref_half(raster_prec dim); - return dim.coarse[$bits(shword) - 1:0]; - endfunction - - assign lane_ctz = ctz_count[$bits(lane_ctz) - 1:0]; - assign in_valid = mask_in_clean & |mask_in; - assign out_last = ~|mask; - assign {lane_y, lane_x} = lane; - - // **IMPORTANTE**: Esto va a fallar a partir de RASTER_BITS >= 3, - // ya que la fsm asume que ctz termina en 3 ciclos o menos - - gfx_ctz #(RASTER_COARSE_FRAGS) ctz - ( - .clk, - .value(mask_ctz), - .ctz(ctz_count) - ); - - always_comb begin - unique case (out_state) - OUT_ACCEPT: new_block = 1; - default: new_block = 0; - endcase - - unique case (out_state) - OUT_ACCEPT: mask_ctz = mask_in; - default: mask_ctz = mask; - endcase - - unique case (out_state) - OUT_ACCEPT: coverage.tvalid = 0; - default: coverage.tvalid = 1; - endcase - - unique case (out_state) - OUT_MASK, OUT_BARY_B: - begin_bary = coverage.tready; - - default: - begin_bary = 0; - endcase - - unique case (out_state) - OUT_BARY_B: coverage.tlast = out_last; - default: coverage.tlast = 0; - endcase - - unique case (out_state) - OUT_GEOM_ID: - coverage.tdata = geom_id; - - OUT_POS: - coverage.tdata = {ref_half(coarse_ref.y), ref_half(block_ref.x)}; - - OUT_MASK: - coverage.tdata = {{($bits(word) - $bits(mask)){1'b0}}, mask}; - - OUT_BARY_C, OUT_BARY_A, OUT_BARY_B: - coverage.tdata = bary_coord; - - default: - coverage.tdata = 'x; - endcase - - unique case (out_state) - OUT_MASK: - lane = lane_ctz; - - default: - lane = lane_hold; - endcase - - unique case (in_state) - IN_C: coarse_ready = new_block; - default: coarse_ready = 0; - endcase - - unique case (in_state) - IN_C: hold_block = new_block; - IN_A: hold_block = 1; - IN_B: hold_block = 1; - IN_MASK: hold_block = 0; - endcase - - unique case (in_state) - IN_C: mask_in_reset = 1; - default: mask_in_reset = 0; - endcase - - unique case (in_state) - IN_MASK: mask_in_clean = 1; - default: mask_in_clean = 0; - endcase - end - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - in_state <= IN_C; - out_state <= OUT_ACCEPT; - end else begin - unique case (in_state) - IN_C: - if (coarse_valid & new_block) - in_state <= IN_A; - - IN_A: - in_state <= IN_B; - - IN_B: - in_state <= IN_MASK; - - IN_MASK: - in_state <= IN_C; - endcase - - unique case (out_state) - OUT_ACCEPT: - if (in_valid) - out_state <= OUT_GEOM_ID; - - OUT_GEOM_ID: - if (coverage.tready) - out_state <= OUT_POS; - - OUT_POS: - if (coverage.tready) - out_state <= OUT_MASK; - - OUT_MASK: - if (coverage.tready) - out_state <= OUT_BARY_C; - - OUT_BARY_C: - if (coverage.tready) - out_state <= OUT_BARY_A; - - OUT_BARY_A: - if (coverage.tready) - out_state <= OUT_BARY_B; - - OUT_BARY_B: - if (coverage.tready) - out_state <= out_last ? OUT_ACCEPT : OUT_BARY_C; - endcase - end - - always_ff @(posedge clk) begin - // Prueba paralela de signos, esto hace el heavy lifting de fine raster - // Nótese que muchos sumadores serán eliminados en síntesis - for (int i = 0; i < RASTER_SIZE; ++i) - for (int j = 0; j < RASTER_SIZE; ++j) - mask_in[i * RASTER_SIZE + j] <= - (mask_in[i * RASTER_SIZE + j] | mask_in_reset) - & (coarse_corner - + raster_idx(coarse_offsets.y, RASTER_BITS'(i)) - + raster_idx(coarse_offsets.x, RASTER_BITS'(j)) - >= 'sd0); - - // Recalculamos las coordenadas baricéntricas de cada fragmento que - // no haya sido descartado. La razón de esto es evitar almacenar y - // luego multiplexar las coordenadas de un bloque entero (48 words). - if (coverage.tready) - bary_coord <= corner.next - + raster_idx(offsets.next.y, RASTER_BITS'(lane_y)) - + raster_idx(offsets.next.x, RASTER_BITS'(lane_x)); - - if (new_block & mask_in_reset) begin - geom_id <= coarse_geom_id; - block_ref <= coarse_ref; - end - - // new_block = 0 => coverage.tvalid = 1 - if (new_block | coverage.tready) begin - corner.cur <= corner.next; - corner.next <= corner.prev; - corner.prev <= corner.cur; - - offsets.cur <= offsets.next; - offsets.next <= offsets.prev; - offsets.prev <= offsets.cur; - end - - if (hold_block) begin - // Para prev en vez de cur para que los primeros valores queden en - // cur justamente al llegar a OUT_BARY_C - corner.prev <= coarse_corner; - offsets.prev <= coarse_offsets; - end - - if (new_block) - mask <= mask_in; - - if (begin_bary) begin - mask <= mask & (mask - 1); - lane_hold <= lane_ctz; - end - end - -endmodule diff --git a/platform/wavelet3d/gfx_regfile_io.sv b/platform/wavelet3d/gfx_regfile_io.sv deleted file mode 100644 index 2459049..0000000 --- a/platform/wavelet3d/gfx_regfile_io.sv +++ /dev/null @@ -1,106 +0,0 @@ -interface gfx_regfile_io; - - import gfx::*; - - struct - { - group_id group; - sgpr_num a_sgpr, - b_sgpr; - vgpr_num a_vgpr, - b_vgpr; - logic[12:0] b_imm; - logic a_scalar, - b_scalar, - b_is_imm, - b_is_const, - scalar_rev; - } op; - - struct - { - logic write; - group_id group; - sgpr_num sgpr; - word data; - } sgpr_write; - - struct - { - lane_mask mask; - group_id group; - vgpr_num vgpr; - word data[SHADER_LANES]; - } vgpr_write; - - word a[SHADER_LANES], b[SHADER_LANES], sgpr_write_data, vgpr_write_data[SHADER_LANES]; - logic mask_wb_write, pc_wb_write; - word_ptr pc_back, pc_front, pc_wb; - group_id mask_back_group, mask_wb_group, pc_back_group, pc_front_group, pc_wb_group; - lane_mask mask_back, mask_wb; - - modport ab - ( - input a, - b - ); - - modport read - ( - output op - ); - - modport bind_ - ( - input pc_front, - - output pc_front_group - ); - - modport wb - ( - input pc_back, - mask_back, - - output sgpr_write, - vgpr_write, - - pc_back_group, - mask_back_group, - - pc_wb, - pc_wb_group, - pc_wb_write, - - mask_wb, - mask_wb_group, - mask_wb_write - ); - - modport regs - ( - input op, - sgpr_write, - vgpr_write, - - pc_back_group, - pc_front_group, - mask_back_group, - - pc_wb, - pc_wb_group, - pc_wb_write, - - mask_wb, - mask_wb_group, - mask_wb_write, - - output a, - b, - - pc_back, - pc_front, - mask_back - ); - -endinterface diff --git a/platform/wavelet3d/gfx_rst_sync.sv b/platform/wavelet3d/gfx_rst_sync.sv deleted file mode 100644 index 2a8ea3b..0000000 --- a/platform/wavelet3d/gfx_rst_sync.sv +++ /dev/null @@ -1,13 +0,0 @@ -//FIXME: peligro -module gfx_rst_sync -( - input logic clk, - rst_n, - - output logic srst_n -); - - always_ff @(posedge clk or negedge rst_n) - srst_n <= ~rst_n ? 0 : 1; - -endmodule diff --git a/platform/wavelet3d/gfx_sched.sv b/platform/wavelet3d/gfx_sched.sv deleted file mode 100644 index b8b6b7e..0000000 --- a/platform/wavelet3d/gfx_sched.sv +++ /dev/null @@ -1,139 +0,0 @@ -module gfx_sched -import gfx::*; -( - input logic clk, - rst_n, - srst_n, - - gfx_axil.m axim, - - input irq_lines irq -); - - logic axi_ready, axi_valid, bram_ready, bram_read, bram_write, bram_write_next, - mem_instr, mem_la_read, mem_la_write, mem_ready, mem_valid, select_bram; - - word bram[SCHED_BRAM_WORDS]; - word axi_rdata, bram_rdata, mem_addr, mem_la_addr, mem_rdata, mem_wdata; - logic[$bits(word) / $bits(byte) - 1:0] mem_wstrb; - - logic[$clog2(SCHED_BRAM_WORDS) - 1:0] bram_addr; - - assign bram_addr = mem_addr[$bits(bram_addr) + SUBWORD_BITS - 1:SUBWORD_BITS]; - assign mem_ready = (axi_valid & axi_ready) | bram_ready; - assign mem_rdata = bram_ready ? bram_rdata : axi_rdata; - assign select_bram = ~|mem_la_addr[$bits(mem_la_addr) - 1:$bits(bram_addr) + SUBWORD_BITS]; - assign bram_write_next = mem_la_write & select_bram; - - defparam core.ENABLE_COUNTERS = 0; - defparam core.ENABLE_COUNTERS64 = 0; - defparam core.BARREL_SHIFTER = 1; - defparam core.COMPRESSED_ISA = 1; - defparam core.CATCH_MISALIGN = 0; - defparam core.CATCH_ILLINSN = 0; - defparam core.ENABLE_MUL = 1; - defparam core.ENABLE_DIV = 1; - defparam core.ENABLE_IRQ = 1; - defparam core.ENABLE_IRQ_QREGS = 0; - defparam core.ENABLE_IRQ_TIMER = 0; - defparam core.PROGADDR_RESET = BOOTROM_BASE; - - picorv32 core - ( - .clk, - .resetn(srst_n), - .trap(), - - .mem_valid, - .mem_instr, - .mem_ready, - - .mem_addr, - .mem_wdata, - .mem_wstrb, - .mem_rdata, - - .mem_la_read, - .mem_la_write, - .mem_la_addr, - .mem_la_wdata(), - .mem_la_wstrb(), - - .pcpi_valid(), - .pcpi_insn(), - .pcpi_rs1(), - .pcpi_rs2(), - .pcpi_wr(), - .pcpi_rd(), - .pcpi_wait(0), - .pcpi_ready(0), - - .irq, - .eoi(), - - .trace_valid(), - .trace_data() - ); - - picorv32_axi_adapter axi - ( - .clk, - .resetn(srst_n), - - .mem_axi_awvalid(axim.awvalid), - .mem_axi_awready(axim.awready), - .mem_axi_awaddr(axim.awaddr), - .mem_axi_awprot(), - - .mem_axi_wvalid(axim.wvalid), - .mem_axi_wready(axim.wready), - .mem_axi_wdata(axim.wdata), - .mem_axi_wstrb(), // Potenciales sorpresas - - .mem_axi_bvalid(axim.bvalid), - .mem_axi_bready(axim.bready), - - .mem_axi_arvalid(axim.arvalid), - .mem_axi_arready(axim.arready), - .mem_axi_araddr(axim.araddr), - .mem_axi_arprot(), - - .mem_axi_rvalid(axim.rvalid), - .mem_axi_rready(axim.rready), - .mem_axi_rdata(axim.rdata), - - .mem_valid(mem_valid & axi_valid), - .mem_instr, - .mem_ready(axi_ready), - .mem_addr, - .mem_wdata, - .mem_wstrb, - .mem_rdata(axi_rdata) - ); - - always_ff @(posedge clk) begin - if (bram_write) begin - for (int i = 0; i < $bits(mem_wstrb); ++i) - if (mem_wstrb[i]) - bram[bram_addr][i] <= mem_wdata[i]; - - bram_rdata <= 'x; - end else - bram_rdata <= bram[bram_addr]; - end - - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - axi_valid <= 0; - bram_read <= 0; - bram_ready <= 0; - bram_write <= 0; - end else begin - axi_valid <= ~select_bram | (axi_valid & ~axi_ready); - bram_read <= mem_la_read & select_bram; - bram_write <= bram_write_next; - bram_ready <= bram_read | bram_write_next; - end - -endmodule diff --git a/platform/wavelet3d/gfx_shader.sv b/platform/wavelet3d/gfx_shader.sv deleted file mode 100644 index 322ffb5..0000000 --- a/platform/wavelet3d/gfx_shader.sv +++ /dev/null @@ -1,77 +0,0 @@ -module gfx_shader -import gfx::*; -import gfx_shader_schedif_pkg::*; -( - input logic clk, - rst_n, - - gfx_axib.m insn_mem, - - gfx_axil.s sched -); - - axi4lite_intf #(.ADDR_WIDTH(GFX_SHADER_SCHEDIF_MIN_ADDR_WIDTH)) regblock(); - - gfx_axil2regblock axil2regblock - ( - .axis(sched), - .axim(regblock.master) - ); - - gfx_shader_schedif__in_t schedif_in; - gfx_shader_schedif__out_t schedif_out; - - gfx_front_back front_back(); - gfx_regfile_io regfile(); - gfx_shader_setup setup(); - - assign schedif_in.SETUP_CTRL.GPR_DONE.hwset = setup.sched.set_done.gpr; - assign schedif_in.SETUP_CTRL.MASK_DONE.hwset = setup.sched.set_done.mask; - assign schedif_in.SETUP_CTRL.SUBMIT_DONE.hwset = setup.sched.set_done.submit; - - assign setup.sched.write.pc = schedif_out.SETUP_SUBMIT.PC.value; - assign setup.sched.write.gpr = schedif_out.SETUP_CTRL.XGPR.value; - assign setup.sched.write.mask = schedif_out.SETUP_MASK.MASK.value; - assign setup.sched.write.group = schedif_out.SETUP_CTRL.GROUP.value; - assign setup.sched.write.pc_set = schedif_out.SETUP_SUBMIT.PC.swmod; - assign setup.sched.write.gpr_set = schedif_out.SETUP_GPR.VALUE.swmod; - assign setup.sched.write.mask_set = schedif_out.SETUP_MASK.MASK.swmod; - assign setup.sched.write.gpr_value = schedif_out.SETUP_GPR.VALUE.value; - - gfx_shader_front frontend - ( - .clk, - .rst_n, - .front(front_back.front), - .reg_bind(regfile.bind_), - .reg_read(regfile.read), - .fetch_mem(insn_mem), - .icache_flush(schedif_out.CORE.IFLUSH.value) - ); - - gfx_shader_back backend - ( - .clk, - .rst_n, - .back(front_back.back), - .setup(setup.core), - .reg_wb(regfile.wb), - .read_data(regfile.ab) - ); - - gfx_shader_regs regs - ( - .clk, - .io(regfile.regs) - ); - - gfx_shader_schedif schedif - ( - .clk, - .arst_n(rst_n), - .s_axil(regblock.slave), - .hwif_in(schedif_in), - .hwif_out(schedif_out) - ); - -endmodule diff --git a/platform/wavelet3d/gfx_shader_back.sv b/platform/wavelet3d/gfx_shader_back.sv deleted file mode 100644 index 4929192..0000000 --- a/platform/wavelet3d/gfx_shader_back.sv +++ /dev/null @@ -1,335 +0,0 @@ -module gfx_shader_back -import gfx::*; -( - input logic clk, - rst_n, - - gfx_front_back.back back, - - gfx_regfile_io.ab read_data, - gfx_regfile_io.wb reg_wb, - - gfx_shader_setup.core setup -); - - logic abort; - - gfx_wb out_wb(), p0_wb(), p1_wb(), p2_wb(), p3_wb(); - gfx_shake p1_shake(), p2_shake(), p3_shake(); - - gfx_shader_abort p0_abort - ( - .clk, - .p1(p1_shake.peek), - .p2(p2_shake.peek), - .p3(p3_shake.peek), - .abort - ); - - gfx_shader_fpint p0 - ( - .clk, - .rst_n, - .op(back.execute.p0), - .wb(p0_wb.tx), - .wave(back.execute.wave), - .abort, - .read_data, - .in_valid(back.dispatch.valid) - ); - - gfx_shader_mem p1 - ( - .clk, - .rst_n, - .op(back.execute.p1), - .wb(p1_wb.tx), - .wave(back.execute.wave), - .in_shake(p1_shake.rx), - .read_data - ); - - gfx_shader_sfu p2 - ( - .clk, - .rst_n, - .op(back.execute.p2), - .wb(p2_wb.tx), - .wave(back.execute.wave), - .in_shake(p2_shake.rx), - .read_data - ); - - gfx_shader_group p3 - ( - .clk, - .rst_n, - .op(back.execute.p3), - .wb(p3_wb.tx), - .wave(back.execute.wave), - .in_shake(p3_shake.rx), - .read_data - ); - - gfx_shader_writeback_arbiter4 writeback_arbiter - ( - .clk, - .rst_n, - .p0(p0_wb.rx), - .p1(p1_wb.rx), - .p2(p2_wb.rx), - .p3(p3_wb.rx), - .out(out_wb.tx) - ); - - gfx_shader_writeback writeback - ( - .clk, - .rst_n, - .wb(out_wb.rx), - .regs(reg_wb), - .setup, - .loop_group(back.loop.group), - .loop_valid(back.loop.valid) - ); - -endmodule - -module gfx_shader_abort -( - input logic clk, - - gfx_shake.peek p1, - p2, - p3, - - output logic abort -); - - always_ff @(posedge clk) - abort <= - (p1.valid & p1.ready) - | (p2.valid & p2.ready) - | (p3.valid & p3.ready); - -endmodule - -module gfx_shader_writeback_arbiter4 -( - input logic clk, - rst_n, - - gfx_wb.rx p0, - p1, - p2, - p3, - - gfx_wb.tx out -); - - assert property ( - @(posedge clk) - disable iff (~rst_n) - - (p0.ready & out.ready) - ); - - gfx_wb p0_p1(), p2_p3(); - - gfx_shader_writeback_arbiter2_prio arbiter_p0_p1 - ( - .clk, - .rst_n, - .a(p0), - .b(p1), - .out(p0_p1.tx) - ); - - gfx_shader_writeback_arbiter2_prio arbiter_p2_p3 - ( - .clk, - .rst_n, - .a(p2), - .b(p3), - .out(p2_p3.tx) - ); - - gfx_shader_writeback_arbiter2_prio arbiter_out - ( - .clk, - .rst_n, - .a(p0_p1.rx), - .b(p2_p3.tx), - .out - ); - -endmodule - -module gfx_shader_writeback_arbiter2_prio -( - input logic clk, - rst_n, - - gfx_wb.rx a, - b, - - gfx_wb.tx out -); - - //TODO - assign a.ready = out.ready; - assign b.ready = 0; - - assign out.dest = a.dest; - assign out.lanes = a.lanes; - assign out.group = a.group; - assign out.valid = a.valid; - assign out.scalar = a.scalar; - assign out.writeback = a.writeback; - - assign out.mask = a.mask; - assign out.mask_update = a.mask_update; - - assign out.pc_add = a.pc_add; - assign out.pc_inc = a.pc_inc; - assign out.pc_update = a.pc_update; - -endmodule - -module gfx_shader_writeback -import gfx::*; -( - input logic clk, - rst_n, - - gfx_wb.rx wb, - - gfx_regfile_io.wb regs, - - output logic loop_valid, - output group_id loop_group, - - gfx_shader_setup.core setup -); - - struct - { - group_id group; - word lanes[SHADER_LANES]; - pc_offset pc_add; - lane_mask mask; - vgpr_num vgpr; - logic pc_update, - mask_update, - vgpr_update; - } loop_hold[REGFILE_STAGES], loop_out; - - logic loop_valid_hold[REGFILE_STAGES], loop_out_valid, mask_wb, scalar_wb, - setup_gpr, setup_mask, setup_submit; - - assign wb.ready = 1; - - assign loop_out = loop_hold[REGFILE_STAGES - 1]; - assign loop_out_valid = loop_valid_hold[REGFILE_STAGES - 1]; - - assign loop_valid = loop_out_valid | setup_submit; - - assign regs.pc_back_group = wb.group; - assign regs.mask_back_group = wb.group; - - assign regs.pc_wb_write = (loop_out_valid & loop_out.pc_update) | setup_submit; - assign regs.mask_wb_write = mask_wb | setup_mask; - assign regs.sgpr_write.write = scalar_wb | setup_gpr; - - assign regs.vgpr_write.vgpr = loop_out.vgpr; - assign regs.vgpr_write.group = loop_out.group; - - assign mask_wb = loop_out_valid & loop_out.mask_update; - assign scalar_wb = wb.valid & wb.writeback & wb.scalar; - - always_comb begin - loop_group = setup.write.group; - regs.pc_wb = setup.write.pc; - regs.pc_wb_group = setup.write.group; - - if (loop_out_valid) begin - loop_group = loop_out.group; - regs.pc_wb = regs.pc_back + word_ptr'(loop_out.pc_add); - regs.pc_wb_group = loop_out.group; - end - - regs.mask_wb = setup.write.mask; - regs.mask_wb_group = setup.write.group; - - if (mask_wb) begin - regs.mask_wb = loop_out.mask; - regs.mask_wb_group = loop_out.group; - end - - regs.sgpr_write.data = setup.write.gpr_value; - regs.sgpr_write.sgpr = setup.write.gpr.sgpr; - regs.sgpr_write.group = setup.write.group; - - if (scalar_wb) begin - regs.sgpr_write.data = wb.lanes[0]; - regs.sgpr_write.sgpr = wb.dest.sgpr; - regs.sgpr_write.group = wb.group; - end - - for (int i = 0; i < SHADER_LANES; ++i) - regs.vgpr_write.data[i] = loop_out.lanes[i]; - - regs.vgpr_write.mask = regs.mask_back; - if (~loop_out_valid | ~loop_out.vgpr_update) - regs.vgpr_write.mask = '0; - end - - always_ff @(posedge clk) begin - // Blocking assignments por bug de verilator (ver for de lanes abajo) - - for (int i = REGFILE_STAGES - 1; i > 0; --i) - loop_hold[i] = loop_hold[i - 1]; - - loop_hold[0].mask = wb.mask; - loop_hold[0].vgpr = wb.dest.vgpr.num; - loop_hold[0].group = wb.group; - loop_hold[0].pc_add = wb.pc_add; - loop_hold[0].pc_update = wb.pc_update; - loop_hold[0].mask_update = wb.mask_update; - loop_hold[0].vgpr_update = wb.writeback & ~wb.scalar; - - // https://github.com/verilator/verilator/issues/4804 - for (int i = 0; i < SHADER_LANES; ++i) - loop_hold[0].lanes[i] = wb.lanes[i]; - - if (wb.pc_inc) - loop_hold[0].pc_add = pc_offset'(1); - end - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - setup_gpr <= 0; - setup_mask <= 0; - setup_submit <= 0; - - setup.set_done.gpr <= 0; - setup.set_done.mask <= 0; - setup.set_done.submit <= 0; - - for (int i = 0; i < $size(loop_valid_hold); ++i) - loop_valid_hold[i] <= 0; - end else begin - setup_gpr <= (setup_gpr & scalar_wb) | setup.write.gpr_set; - setup_mask <= (setup_mask & mask_wb) | setup.write.mask_set; - setup_submit <= (setup_submit & loop_out_valid) | setup.write.pc_set; - - setup.set_done.gpr <= setup_gpr & ~scalar_wb; - setup.set_done.mask <= setup_mask & ~mask_wb; - setup.set_done.submit <= setup_submit & ~loop_out_valid; - - loop_valid_hold[0] <= wb.valid; - for (int i = 1; i < REGFILE_STAGES; ++i) - loop_valid_hold[i] <= loop_valid_hold[i - 1]; - end - -endmodule diff --git a/platform/wavelet3d/gfx_shader_fpint.sv b/platform/wavelet3d/gfx_shader_fpint.sv deleted file mode 100644 index a418dcc..0000000 --- a/platform/wavelet3d/gfx_shader_fpint.sv +++ /dev/null @@ -1,932 +0,0 @@ -// -> 4,4,4,4,4,4,4,4 -> 8,8,8,8 -> 16,16 -> 32 -localparam int FPINT_CLZ_STAGES = 4; - -localparam bit[$clog2($bits(gfx::float_mant_ext)):0] FPINT_MAX_SHIFT - = 1 << $clog2($bits(gfx::float_mant_ext)); - -typedef logic[$clog2(FPINT_MAX_SHIFT):0] fpint_shift; - -/* Las 15 etapas son: - * - setup - * - mulclass - * - mnorm - * - minmax - * - expdiff - * - shiftr - * - addsub - * - clz0-clz3 - * - shiftl - * - round - * - rnorm - * - encode - */ - -typedef struct -{ - gfx::float a, - b, - a_mul, - b_mul; -} fpint_setup_mulclass; - -typedef struct -{ - gfx::float b; - gfx::float_exp exp; - gfx::float_class a_class, - b_class; - gfx::udword product; - logic sign, - overflow; -} fpint_mulclass_mnorm; - -typedef struct -{ - gfx::float a, - b; - gfx::float_class a_class, - b_class; - logic slow, - zero, - guard, - round, - sticky, - slow_in, - overflow; -} fpint_mnorm_minmax; - -typedef struct -{ - gfx::float max, - min; - gfx::float_class max_class, - min_class; - logic slow, - zero, - guard, - round, - sticky; -} fpint_minmax_expdiff; - -typedef struct -{ - gfx::float max, - min; - gfx::float_class max_class, - min_class; - fpint_shift exp_shift; - logic slow, - zero, - guard, - round, - sticky; -} fpint_expdiff_shiftr; - -typedef struct -{ - gfx::float max, - min; - gfx::float_class max_class, - min_class; - gfx::float_mant_ext max_mant, - min_mant, - sticky_mask; - logic slow, - zero, - guard, - round, - sticky, - int_sign; -} fpint_shiftr_addsub; - -typedef struct -{ - gfx::float max; - gfx::word add_sub; - logic slow, - zero, - guard, - round, - sticky; -} fpint_clz_hold; - -typedef fpint_clz_hold fpint_addsub_clz; - -typedef struct -{ - fpint_clz_hold hold; - fpint_shift shift; -} fpint_clz_shiftl; - -typedef struct -{ - gfx::float val; - logic slow, - zero, - guard, - round, - sticky, - overflow, - sticky_last; -} fpint_shiftl_round; - -typedef struct -{ - gfx::float val; - logic slow, - zero, - exp_step, - overflow; -} fpint_round_rnorm; - -typedef struct -{ - gfx::float val; - logic slow, - zero, - overflow; -} fpint_rnorm_encode; - -module gfx_shader_fpint -import gfx::*; -( - input logic clk, - rst_n, - - input fpint_op op, - input wave_exec wave, - input logic abort, - in_valid, - - gfx_regfile_io.ab read_data, - - gfx_wb.tx wb -); - - localparam int FPINT_STAGES = 7 + FPINT_CLZ_STAGES + 4; - - struct - { - fpint_op op; - wave_exec wave; - } stage[FPINT_STAGES]; - - logic stage_valid[FPINT_STAGES]; - - assign wb.dest = stage[FPINT_STAGES - 1].wave.dest; - assign wb.mask = 'x; - assign wb.group = stage[FPINT_STAGES - 1].wave.group; - assign wb.pc_add = 'x; - assign wb.pc_inc = 1; - assign wb.scalar = stage[FPINT_STAGES - 1].wave.dest_scalar; - assign wb.pc_update = wb.writeback; - assign wb.writeback = stage[FPINT_STAGES - 1].op.writeback; - assign wb.mask_update = 0; - - // Ojo: stage_valid[0], pero stage[0] no - assign stage_valid[0] = in_valid; - - genvar lane; - generate - for (lane = 0; lane < SHADER_LANES; ++lane) begin: lanes - gfx_shader_fpint_lane unit - ( - .clk(clk), - .a(read_data.a[lane]), - .b(read_data.b[lane]), - .q(wb.lanes[lane]), - .mul_float_0(op.setup_mul_float), - .unit_b_0(op.setup_unit_b), - .put_hi_2(stage[2 - 1].op.mnorm_put_hi), - .put_lo_2(stage[2 - 1].op.mnorm_put_lo), - .put_mul_2(stage[2 - 1].op.mnorm_put_mul), - .zero_b_2(stage[2 - 1].op.mnorm_zero_b), - .zero_flags_2(stage[2 - 1].op.mnorm_zero_flags), - .abs_3(stage[3 - 1].op.minmax_abs), - .swap_3(stage[3 - 1].op.minmax_swap), - .zero_min_3(stage[3 - 1].op.minmax_zero_min), - .copy_flags_3(stage[3 - 1].op.minmax_copy_flags), - .int_signed_5(stage[5 - 1].op.shiftr_int_signed), - .copy_flags_6(stage[6 - 1].op.addsub_copy_flags), - .int_operand_6(stage[6 - 1].op.addsub_int_operand), - .force_nop_7(stage[7 - 1].op.clz_force_nop), - .copy_flags_11(stage[11 - 1].op.shiftl_copy_flags), - .copy_flags_12(stage[12 - 1].op.round_copy_flags), - .enable_12(stage[12 - 1].op.round_enable), - .enable_14(stage[14 - 1].op.encode_enable) - ); - end - endgenerate - - always_ff @(posedge clk) begin - stage[0].op <= op; - stage[0].wave <= wave; - - for (int i = 1; i < FPINT_STAGES; ++i) - stage[i] <= stage[i - 1]; - end - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - for (int i = 1; i < FPINT_STAGES; ++i) - stage_valid[i] <= 0; - - wb.valid <= 0; - end else begin - for (int i = 1; i < FPINT_STAGES; ++i) - stage_valid[i] <= stage_valid[i - 1]; - - // Se levanta 1 ciclo luego que in_valid - stage_valid[2] <= stage_valid[1] & ~abort; - - wb.valid <= stage_valid[FPINT_STAGES - 1]; - end - -endmodule - -module gfx_shader_fpint_lane -import gfx::*; -( - input logic clk, - - input word a, - b, - - input logic mul_float_0, - unit_b_0, - put_hi_2, - put_lo_2, - put_mul_2, - zero_b_2, - zero_flags_2, - abs_3, - swap_3, - zero_min_3, - copy_flags_3, - int_signed_5, - copy_flags_6, - int_operand_6, - force_nop_7, - copy_flags_11, - copy_flags_12, - enable_12, - enable_14, - - output word q -); - - /* Notas de implementación para floating-point - * - * === PRODUCTO === - * - * Queremos calcular q = a * b. - * - * Donde a = (-1)^s * 1.m * 2^f, - * b = (-1)^t * 1.n * 2^g - * - * Entonces q = (-1)^(s + t) (1.m * 1.n) 2^(f + g) - * - * El producto es entre números >= 1.0 y < 2.0. En el peor caso: - * Mejor caso: 1.000... * 1.000... ~ 1.000... - * Peor caso: 1.999... * 1.999... ~ 3.999... = 2^1 * 1.999 - * - * Así que, si el producto es >= 2, hay que hacerle >> 1 a la mantisa - * y sumarle 1 al exponente para normalizar. - * - * - * === SUMA/RESTA === - * - * Queremos calcular q = a + b. Curiosamente, eso es más complicado que a * b. - * Hay que ajustar el exponente del menor entre a y b para que coincida - * con el del mayor (desnormalizando), realizar la operación y finalmente - * renormalizar. Se hace suma o resta dependiendo de relaciones de signos, - * no según la operación de entrada (eso último solo le hace xor al signo de b). - * Recordar aquí que IEEE 754 es una especie de signo-magnitud y no complemento. - * - * En el caso de una resta, el exponente normalizado puede ser mucho más - * pequeño que cualquiera de los exponentes de entrada. Necesitamos - * entonces de lǵoica CLZ (count leading zeros) para renormalizar. - * - * - * === CONVERSIÓN INTEGER->FP === - * - * Esto simplemente usa el mismo datapath de fadd, con el abs del entero - * como entrada como entrada de clz. El exponente de referencia se fija - * en 30 (aludiendo al segundo msb de un entero de 32 bits). A partir de - * ese punto es idéntico a un fadd, las etapas de clz se encargan de ajustar - * el exponente. - */ - - fpint_setup_mulclass setup_mulclass; - fpint_mulclass_mnorm mulclass_mnorm; - fpint_mnorm_minmax mnorm_minmax; - fpint_minmax_expdiff minmax_expdiff; - fpint_expdiff_shiftr expdiff_shiftr; - fpint_shiftr_addsub shiftr_addsub; - fpint_addsub_clz addsub_clz; - fpint_clz_shiftl clz_shiftl; - fpint_shiftl_round shiftl_round; - fpint_round_rnorm round_rnorm; - fpint_rnorm_encode rnorm_encode; - - gfx_shader_fpint_setup stage_0 - ( - .clk(clk), - .a(a), - .b(b), - .out(setup_mulclass), - .unit_b(unit_b_0), - .mul_float(mul_float_0) - ); - - gfx_shader_fpint_mulclass stage_1 - ( - .clk(clk), - .in(setup_mulclass), - .out(mulclass_mnorm) - ); - - gfx_shader_fpint_mnorm stage_2 - ( - .clk(clk), - .in(mulclass_mnorm), - .out(mnorm_minmax), - .put_hi(put_hi_2), - .put_lo(put_lo_2), - .put_mul(put_mul_2), - .zero_b(zero_b_2), - .zero_flags(zero_flags_2) - ); - - gfx_shader_fpint_minmax stage_3 - ( - .clk(clk), - .in(mnorm_minmax), - .out(minmax_expdiff), - .abs(abs_3), - .swap(swap_3), - .zero_min(zero_min_3), - .copy_flags(copy_flags_3) - ); - - gfx_shader_fpint_expdiff stage_4 - ( - .clk(clk), - .in(minmax_expdiff), - .out(expdiff_shiftr) - ); - - gfx_shader_fpint_shiftr stage_5 - ( - .clk(clk), - .in(expdiff_shiftr), - .out(shiftr_addsub), - .int_signed(int_signed_5) - ); - - gfx_shader_fpint_addsub stage_6 - ( - .clk(clk), - .in(shiftr_addsub), - .out(addsub_clz), - .copy_flags(copy_flags_6), - .int_operand(int_operand_6) - ); - - gfx_shader_fpint_clz stage_7_8_9_10 - ( - .clk(clk), - .in(addsub_clz), - .out(clz_shiftl), - .force_nop(force_nop_7) - ); - - gfx_shader_fpint_shiftl stage_11 - ( - .clk(clk), - .in(clz_shiftl), - .out(shiftl_round), - .copy_flags(copy_flags_11) - ); - - gfx_shader_fpint_round stage_12 - ( - .clk(clk), - .in(shiftl_round), - .out(round_rnorm), - .enable(enable_12), - .copy_flags(copy_flags_12) - ); - - gfx_shader_fpint_rnorm stage_13 - ( - .clk(clk), - .in(round_rnorm), - .out(rnorm_encode) - ); - - gfx_shader_fpint_encode stage_14 - ( - .clk(clk), - .q(q), - .in(rnorm_encode), - .enable(enable_14) - ); - -endmodule - -// Stage 0: argumentos de mul -module gfx_shader_fpint_setup -import gfx::*; -( - input logic clk, - - input word a, - b, - input logic mul_float, - unit_b, - - output fpint_setup_mulclass out -); - - always_ff @(posedge clk) begin - out.a <= a; - out.b <= b; - out.a_mul <= a; - out.b_mul <= b; - - /* Nótese que el orden es sign-exp-mant. Esto coloca el '1.' implícito - * en la posición correcta para multiplicar las mantisas. - */ - if (mul_float) begin - out.a_mul.exp <= 1; - out.b_mul.exp <= 1; - out.a_mul.sign <= 0; - out.b_mul.sign <= 0; - end - - if (unit_b) begin - out.b_mul.exp <= 0; - out.b_mul.mant <= 1; - out.b_mul.sign <= 0; - end - end - -endmodule - -// Stage 1: multiplicación de fp o enteros -module gfx_shader_fpint_mulclass -import gfx::*; -( - input logic clk, - - input fpint_setup_mulclass in, - - output fpint_mulclass_mnorm out -); - - always_ff @(posedge clk) begin - out.b <= in.b; - out.sign <= in.a.sign ^ in.b.sign; - out.a_class <= classify_float(in.a); - out.b_class <= classify_float(in.b); - out.product <= in.a_mul * in.b_mul; - {out.overflow, out.exp} <= {1'b0, in.a.exp} + {1'b0, in.b.exp} - {1'b0, FLOAT_EXP_BIAS}; - end - -endmodule - -// Stage 2: normalización -module gfx_shader_fpint_mnorm -import gfx::*; -( - input logic clk, - - input fpint_mulclass_mnorm in, - input logic put_hi, - put_lo, - put_mul, - zero_b, - zero_flags, - - output fpint_mnorm_minmax out -); - - word product_hi, product_lo; - logic guard, lo_msb, lo_reduce, round, slow_in_next; - float_mant_full hi; - logic[$bits(float_mant_full) - 3:0] lo; - - assign lo_msb = lo[$bits(lo) - 1]; - assign lo_reduce = |lo[$bits(lo) - 2:0]; - assign slow_in_next = is_float_special(in.a_class) | is_float_special(in.b_class); - assign {product_hi, product_lo} = in.product; - assign {hi, guard, round, lo} = in.product[2 * $bits(float_mant_full) - 1:0]; - - always_ff @(posedge clk) begin - if (put_mul) begin - out.slow <= slow_in_next | (in.overflow & ~in.a_class.exp_min & ~in.a_class.exp_min); - out.zero <= in.a_class.exp_min | in.b_class.exp_min; - end else begin - out.slow <= 0; - out.zero <= 0; - end - - out.a.sign <= in.sign; - out.overflow <= 0; - - if (hi[$bits(hi) - 1]) begin - out.guard <= guard; - out.round <= round; - out.sticky <= lo_msb | lo_reduce; - out.a.mant <= implicit_mant(hi); - {out.overflow, out.a.exp} <= {1'b0, in.exp} + 1; - end else begin - /* Bit antes de msb es necesariamente 1, ya que los msb de - * ambos multiplicandos son 1. Ver assert en implicit_mant(). - */ - out.guard <= round; - out.round <= lo_msb; - out.sticky <= lo_reduce; - - out.a.exp <= in.exp; - out.a.mant <= implicit_mant({hi[$bits(hi) - 2:0], guard}); - end - - unique case (1'b1) - put_mul: ; - - put_hi: - out.a <= product_hi; - - put_lo: - out.a <= product_lo; - endcase - - out.a_class <= in.a_class; - out.slow_in <= slow_in_next; - - if (zero_flags) begin - out.a_class <= classify_float(0); - out.slow_in <= 0; - end - - if (zero_b) begin - out.b <= 0; - out.b_class <= classify_float(0); - end else begin - out.b <= in.b; - out.b_class <= in.b_class; - end - end - -endmodule - -// Stage 3: ordenar tal que abs(max) >= abs(min) -module gfx_shader_fpint_minmax -import gfx::*; -( - input logic clk, - - input fpint_mnorm_minmax in, - input logic abs, - swap, - zero_min, - copy_flags, - - output fpint_minmax_expdiff out -); - - logic abs_b_gt_abs_a, b_gt_a; - - /* Wiki dice: - * - * A property of the single- and double-precision formats is that - * their encoding allows one to easily sort them without using - * floating-point hardware, as if the bits represented sign-magnitude - * integers, although it is unclear whether this was a design - * consideration (it seems noteworthy that the earlier IBM hexadecimal - * floating-point representation also had this property for normalized - * numbers). - */ - assign abs_b_gt_abs_a = {in.b.exp, in.b.mant} > {in.a.exp, in.a.mant}; - - always_comb begin - unique case ({in.b.sign, in.a.sign}) - 2'b00: b_gt_a = abs_b_gt_abs_a; - 2'b01: b_gt_a = 1; - 2'b10: b_gt_a = 0; - 2'b11: b_gt_a = abs_b_gt_abs_a; - endcase - - if (abs) - b_gt_a = abs_b_gt_abs_a; - end - - always_ff @(posedge clk) begin - if (b_gt_a ^ swap) begin - out.max <= in.b; - out.min <= in.a; - out.max_class <= in.b_class; - out.min_class <= in.a_class; - end else begin - out.max <= in.a; - out.min <= in.b; - out.max_class <= in.a_class; - out.min_class <= in.b_class; - end - - if (zero_min) begin - out.min <= 0; - out.min_class <= classify_float(0); - end - - out.guard <= in.guard; - out.round <= in.round; - out.sticky <= in.sticky; - - if (copy_flags) begin - out.slow <= in.slow | in.overflow; - out.zero <= in.zero; - end else begin - out.slow <= in.slow_in; - out.zero <= 0; - end - end - -endmodule - -// Stage 4: exp_shift amount -module gfx_shader_fpint_expdiff -import gfx::*; -( - input logic clk, - - input fpint_minmax_expdiff in, - - output fpint_expdiff_shiftr out -); - - float_exp exp_delta; - - assign exp_delta = in.max.exp - in.min.exp; - - always_ff @(posedge clk) begin - out.max <= in.max; - out.min <= in.min; - out.slow <= in.slow; - out.zero <= in.zero; - out.guard <= in.guard; - out.round <= in.round; - out.sticky <= in.sticky; - out.max_class <= in.max_class; - out.min_class <= in.min_class; - - out.exp_shift <= exp_delta[$bits(out.exp_shift) - 1:0]; - if (exp_delta > {{($bits(exp_delta) - $bits(FPINT_MAX_SHIFT)){1'b0}}, FPINT_MAX_SHIFT}) - out.exp_shift <= FPINT_MAX_SHIFT; - end - -endmodule - -// Stage 5: shifts y abs(max) para enteros con signo -module gfx_shader_fpint_shiftr -import gfx::*; -( - input logic clk, - - input fpint_expdiff_shiftr in, - input logic int_signed, - - output fpint_shiftr_addsub out -); - - always_ff @(posedge clk) begin - out.min <= in.min; - out.slow <= in.slow; - out.zero <= in.zero; - out.guard <= in.guard; - out.round <= in.round; - out.sticky <= in.sticky; - out.min_class <= in.min_class; - - out.max_mant <= float_prepare_round(in.max, in.max_class); - out.min_mant <= float_prepare_round(in.min, in.min_class) >> in.exp_shift; - out.sticky_mask <= {($bits(out.min_mant)){1'b1}} << in.exp_shift; - - out.max <= in.max; - out.int_sign <= in.max[$bits(in.max) - 1]; - - if (int_signed & in.max[$bits(in.max) - 1]) - out.max <= -in.max; - end - -endmodule - -// Stage 6: suma de mantisas -module gfx_shader_fpint_addsub -import gfx::*; -( - input logic clk, - - input fpint_shiftr_addsub in, - input logic copy_flags, - int_operand, - - output fpint_addsub_clz out -); - - localparam int INT_SHIFT_REF = $bits(word) - 2; - - function word fp_add_sub_arg(float_mant_ext arg); - fp_add_sub_arg = {1'b0, arg, {($bits(fp_add_sub_arg) - $bits(arg) - 1){1'b0}}}; - endfunction - - always_ff @(posedge clk) begin - out.max <= in.max; - out.slow <= in.slow; - out.zero <= in.zero; - out.guard <= in.guard; - out.round <= in.round; - - if (int_operand) begin - out.max.exp <= FLOAT_EXP_BIAS + INT_SHIFT_REF[$bits(float_exp) - 1:0]; - out.max.sign <= in.int_sign; - end - - if (copy_flags) - out.sticky <= in.sticky; - else - out.sticky <= |(float_prepare_round(in.min, in.min_class) & ~in.sticky_mask); - - if (int_operand) - out.add_sub <= in.max; - else if (in.max.sign ^ in.min.sign) - out.add_sub <= fp_add_sub_arg(in.max_mant) - fp_add_sub_arg(in.min_mant); - else - out.add_sub <= fp_add_sub_arg(in.max_mant) + fp_add_sub_arg(in.min_mant); - end - -endmodule - -// Stages 7-10: encontrar el 1 más significativo -module gfx_shader_fpint_clz -import gfx::*; -( - input logic clk, - - input fpint_addsub_clz in, - input logic force_nop, - - output fpint_clz_shiftl out -); - - word clz_in; - fpint_clz_hold hold[FPINT_CLZ_STAGES]; - - assign out.hold = hold[FPINT_CLZ_STAGES - 1]; - - gfx_clz #($bits(word)) clz - ( - .clk(clk), - .clz(out.shift), - .value(clz_in) - ); - - always_comb begin - clz_in = in.add_sub; - if (force_nop) - clz_in[$bits(clz_in) - 1:$bits(clz_in) - 2] = 2'b01; - end - - always_ff @(posedge clk) begin - hold[0] <= in; - - for (int i = 1; i < FPINT_CLZ_STAGES; ++i) - hold[i] <= hold[i - 1]; - end - -endmodule - -// Stage 11: normalización -module gfx_shader_fpint_shiftl -import gfx::*; -( - input logic clk, - - input fpint_clz_shiftl in, - input logic copy_flags, - - output fpint_shiftl_round out -); - - localparam int CLZ_EXTEND_BITS = $bits(float_exp) - $bits(in.shift) + 1; - - word normalized; - - assign normalized = in.hold.add_sub << in.shift; - - always_ff @(posedge clk) begin - out.slow <= in.hold.slow; - out.zero <= in.hold.zero; - out.sticky <= in.hold.sticky; - out.val.sign <= in.hold.max.sign; - - {out.val.mant, out.guard, out.round, out.sticky_last} <= - normalized[$bits(normalized) - 2:$bits(normalized) - $bits(float_mant) - 4]; - - {out.overflow, out.val.exp} <= - {1'b0, in.hold.max.exp} - {{CLZ_EXTEND_BITS{1'b0}}, in.shift} + 1; - - if (in.shift[$bits(in.shift) - 1]) - out.zero <= 1; - - if (copy_flags) begin - out.guard <= in.hold.guard; - out.round <= in.hold.round; - out.overflow <= 0; - out.sticky_last <= 0; - end - end - -endmodule - -// Stage 12: redondeo -module gfx_shader_fpint_round -import gfx::*; -( - input logic clk, - - input fpint_shiftl_round in, - input logic copy_flags, - enable, - - output fpint_round_rnorm out -); - - always_ff @(posedge clk) begin - out.val <= in.val; - out.slow <= in.slow | (~copy_flags & in.overflow & ~in.zero); - out.zero <= in.zero; - out.exp_step <= 0; - - // Este es el modo de redondeo más usual: round to nearest, ties to even - if (enable & in.guard & (in.round | in.sticky | in.sticky_last | in.val.mant[0])) - {out.exp_step, out.val.mant} <= {1'b0, out.val.mant} + 1; - end - -endmodule - -// Stage 13: ajuste de exponente por redondeo -module gfx_shader_fpint_rnorm -import gfx::*; -( - input logic clk, - - input fpint_round_rnorm in, - - output fpint_rnorm_encode out -); - - always_ff @(posedge clk) begin - out.slow <= in.slow; - out.zero <= in.zero; - out.overflow <= 0; - out.val.mant <= in.val.mant; - out.val.sign <= in.val.sign; - - if (in.exp_step) - {out.overflow, out.val.exp} <= {1'b0, in.val.exp} + 1; - else - out.val.exp <= in.val.exp; - end - -endmodule - -// Stage 14: salida y codificación de ceros y NaNs -module gfx_shader_fpint_encode -import gfx::*; -( - input logic clk, - - input fpint_rnorm_encode in, - input logic enable, - - output float q -); - - always_ff @(posedge clk) begin - q <= in.val; - - if (enable) begin - if (&in.val.exp | in.slow | in.overflow) begin - q.exp <= FLOAT_EXP_MAX; - q.mant <= 1; - end else if (in.zero) begin - q.exp <= 0; - q.mant <= 0; - end - end - end - -endmodule diff --git a/platform/wavelet3d/gfx_shader_front.sv b/platform/wavelet3d/gfx_shader_front.sv deleted file mode 100644 index 52074fd..0000000 --- a/platform/wavelet3d/gfx_shader_front.sv +++ /dev/null @@ -1,746 +0,0 @@ -typedef struct -{ - logic valid, - retry; - gfx::group_id group; - gfx_isa::insn_word insn; -} front_wave; - -typedef struct -{ - gfx::xgpr_num dest; - logic dest_scalar; -} front_reg_passthru; - -typedef logic[4:0] icache_line_num; - -typedef logic[$bits(gfx::oword_ptr) - $bits(icache_line_num) - 1:0] icache_tag; - -typedef struct packed -{ - icache_tag tag; - icache_line_num line; -} icache_line_tag; - -typedef struct packed -{ - icache_line_tag line_tag; - logic[2:0] word_num; -} icache_ptr; - -module gfx_shader_front -import gfx::*; -( - input logic clk, - rst_n, - - gfx_axib.m fetch_mem, - - input logic icache_flush, - - gfx_regfile_io.read reg_read, - gfx_regfile_io.bind_ reg_bind, - - gfx_front_back.front front -); - - word fetch_insn, port_insn; - logic fetch_hit, p0_writeback; - front_wave bind_wave, dec_wave, port_dec_wave; - front_reg_passthru reg_passthru; - - assign front.execute.wave.dest = reg_passthru.dest; - assign front.execute.wave.dest_scalar = reg_passthru.dest_scalar; - - gfx_shader_bind bind_ - ( - .clk, - .rst_n, - .mem(fetch_mem), - .wave(bind_wave), - .regs(reg_bind), - .loop_valid(front.loop.valid), - .loop_group(front.loop.group), - .icache_flush - ); - - gfx_shader_read_regs reg_dec - ( - .clk, - .rst_n, - .in(bind_wave), - .out(dec_wave), - .read(reg_read), - .passthru(reg_passthru) - ); - - gfx_shader_decode_class class_dec - ( - .clk, - .rst_n, - .wave(dec_wave), - .out_group(front.execute.wave.group), - .port_wave(port_dec_wave), - .dispatch(front.dispatch), - .p0_writeback - ); - - gfx_shader_decode_fpint p0_dec - ( - .clk, - .op(front.execute.p0), - .insn(port_dec_wave.insn), - .writeback(p0_writeback) - ); - -endmodule - -module gfx_shader_bind -import gfx::*; -( - input logic clk, - rst_n, - - gfx_axib.m mem, - - input logic icache_flush, - - input logic loop_valid, - input group_id loop_group, - - gfx_regfile_io.bind_ regs, - - output front_wave wave -); - - localparam int ICACHE_STAGES = 6; - localparam int BIND_STAGES = REGFILE_STAGES + ICACHE_STAGES; - - gfx_beats #($bits(group_id)) runnable_in(), runnable_out(); - - logic ar_stall, request_ready, request_valid, valids[BIND_STAGES]; - group_id groups[BIND_STAGES]; - icache_line_tag araddr, request_addr; - - assign mem.bready = 0; - assign mem.wvalid = 0; - assign mem.awvalid = 0; - - assign mem.arlen = ($bits(mem.arlen))'($bits(oword) / $bits(word) - 1); - assign mem.araddr = {araddr, ($clog2($bits(oword)) - $clog2($bits(word)) + SUBWORD_BITS)'('0)}; - assign mem.arburst = 2'b01; // Incremental mode - - assign runnable_in.tx.data = loop_group; - assign runnable_in.tx.valid = loop_valid; - - assign regs.pc_front_group = runnable_out.rx.data; - assign runnable_out.rx.ready = 1; - - assign wave.group = groups[$size(groups) - 1]; - - gfx_skid_buf #($bits(araddr)) ar_skid - ( - .clk, - .in(request_addr), - .out(araddr), - .stall(ar_stall) - ); - - gfx_skid_flow ar_flow - ( - .clk, - .rst_n, - .stall(ar_stall), - .in_ready(request_ready), - .in_valid(request_valid), - .out_ready(mem.arready), - .out_valid(mem.arvalid) - ); - - //TODO: Podríamos quitar ~25 entries sin afectar throughput, latencia o correctitud - gfx_fifo #(.WIDTH($bits(group_id)), .DEPTH(1 << $bits(group_id))) runnable - ( - .clk, - .rst_n, - .in(runnable_in.rx), - .out(runnable_out.tx) - ); - - gfx_shader_bind_icache icache - ( - .clk, - .rst_n, - - .icache_flush, - .read_addr(regs.pc_front), - .read_valid(valids[REGFILE_STAGES - 1]), - - .request_addr, - .request_valid, - .request_ready, - - .fetch_data(mem.rdata), - .fetch_last(mem.rlast), - .fetch_valid(mem.rvalid), - .fetch_ready(mem.rready), - - .insn(wave.insn), - .insn_retry(wave.retry), - .insn_valid(wave.valid) - ); - - always_ff @(posedge clk) begin - groups[0] <= runnable_out.rx.data; - for (int i = 1; i < $size(groups); ++i) - groups[i] <= groups[i - 1]; - end - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) - for (int i = 0; i < $size(valids); ++i) - valids[i] <= 0; - else begin - valids[0] <= runnable_out.rx.valid; - for (int i = 1; i < $size(valids); ++i) - valids[i] <= valids[i - 1]; - end - -endmodule - -module gfx_shader_bind_icache -import gfx::*; -( - input logic clk, - rst_n, - - input logic icache_flush, - - input logic read_valid, - input icache_ptr read_addr, - - input logic fetch_last, - fetch_valid, - input word fetch_data, - output logic fetch_ready, - - input logic request_ready, - output logic request_valid, - output icache_line_tag request_addr, - - output logic insn_valid, - insn_retry, - output word insn -); - - // Dan Gisselquist limita a (1 << 3) bursts por defecto. - // Ver LGMAXBURST en axixbar.v - localparam int PENDING_FIFO_DEPTH = 8; - - enum int unsigned - { - FLUSH, - RUN - } state; - - struct - { - logic valid, - accessed, - hit; - icache_tag tag; - oword data; - } cache[1 << $bits(icache_line_num)], read, read_hold; - - gfx_beats #($bits(icache_line_tag)) pending_in(), pending_out(); - - logic accessed_write, accessed_write_enable, burst, fetch_done, hit_write, - in_flush, hit_commit, hit_write_enable, retry_4, retry_5, rollback, - tag_hit, valid_1, valid_2, valid_3, valid_4, valid_5, valid_write, - valid_write_enable; - - icache_ptr read_addr_1, read_addr_2, read_addr_3, read_addr_4, read_addr_5; - icache_tag tag_write; - icache_line_num accessed_write_line, flush_ptr, hit_write_line, valid_write_line; - icache_line_tag pending_pop; - - oword data_write; - word[1:0] data_5; - word[7:0] fetch_shift; - qword[1:0] data_3; - udword[1:0] data_4; - - assign data_3 = read.data; - assign tag_hit = read.tag == read_addr_3.line_tag.tag; - assign fetch_ready = ~fetch_done; - assign pending_pop = pending_out.rx.data; - - assign request_addr = read_addr_4.line_tag; - assign request_valid = burst & pending_in.tx.ready; - assign pending_in.tx.data = read_addr_4.line_tag; - assign pending_in.tx.valid = burst & request_ready; - assign pending_out.rx.ready = fetch_done & ~hit_commit & ~rollback; - - gfx_fifo #(.WIDTH($bits(icache_line_tag)), .DEPTH(PENDING_FIFO_DEPTH)) pending - ( - .clk, - .rst_n, - .in(pending_in.rx), - .out(pending_out.tx) - ); - - always_comb - unique case (state) - FLUSH: in_flush = 1; - RUN: in_flush = 0; - endcase - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - state <= FLUSH; - flush_ptr <= '0; - fetch_done <= 0; - - valid_1 <= 0; - valid_2 <= 0; - valid_3 <= 0; - valid_4 <= 0; - valid_5 <= 0; - - burst <= 0; - end else begin - unique case (state) - FLUSH: - if (~icache_flush & &flush_ptr) - state <= RUN; - - RUN: - if (icache_flush) - state <= FLUSH; - endcase - - flush_ptr <= flush_ptr + 1; - if (icache_flush) - flush_ptr <= '0; - - if (fetch_done) - fetch_done <= hit_commit | ~pending_out.rx.valid | rollback; - else if (fetch_ready & fetch_valid) - fetch_done <= fetch_last; - - valid_1 <= read_valid; - valid_2 <= valid_1; - valid_3 <= valid_2; - valid_4 <= valid_3; - valid_5 <= valid_4; - - burst <= valid_3 & ~tag_hit & ~read.accessed & (~read.valid | read.hit); - end - - always_ff @(posedge clk) begin - tag_write <= pending_pop.tag; - data_write <= fetch_shift; - - valid_write <= 1; - valid_write_line <= pending_pop.line; - valid_write_enable <= fetch_done & ~hit_commit & pending_out.rx.valid & ~rollback; - - accessed_write <= 0; - accessed_write_enable <= 1; - - if (rollback) - accessed_write_line <= read_addr_5.line_tag.line; - else if (fetch_done & ~hit_commit & pending_out.rx.valid) - accessed_write_line <= pending_pop.line; - else begin - accessed_write <= 1; - accessed_write_line <= read_addr.line_tag.line; - accessed_write_enable <= read_valid; - end - - hit_write <= hit_commit; - if (hit_commit) begin - hit_write_line <= read_addr_4.line_tag.line; - hit_write_enable <= 1; - end else begin - hit_write_line <= pending_pop.line; - hit_write_enable <= fetch_done & pending_out.rx.valid & ~rollback; - end - - if (in_flush) begin - valid_write <= 0; - valid_write_line <= flush_ptr; - valid_write_enable <= 1; - - accessed_write <= 0; - accessed_write_line <= flush_ptr; - accessed_write_enable <= 1; - - hit_write <= 0; - hit_write_line <= flush_ptr; - hit_write_enable <= 1; - end - - if (valid_write_enable) begin - cache[valid_write_line].tag <= tag_write; - cache[valid_write_line].data <= data_write; - cache[valid_write_line].valid <= valid_write; - end - - if (accessed_write_enable) - cache[accessed_write_line].accessed <= accessed_write; - - if (hit_write_enable) - cache[hit_write_line].hit <= hit_write; - - read_addr_1 <= read_addr; - - read_hold <= cache[read_addr_1.line_tag.line]; - read_addr_2 <= read_addr_1; - - read <= read_hold; - read_addr_3 <= read_addr_2; - - data_4 <= data_3[read_addr_3.word_num[2]]; - retry_4 <= ~tag_hit | ~read.valid; - hit_commit <= valid_3 & tag_hit & read.valid; - read_addr_4 <= read_addr_3; - - data_5 <= data_4[read_addr_4.word_num[1]]; - retry_5 <= retry_4; - rollback <= burst & (~request_valid | ~pending_in.tx.valid); - read_addr_5 <= read_addr_4; - - insn <= data_5[read_addr_5.word_num[0]]; - insn_retry <= retry_5; - insn_valid <= valid_5; - - if (fetch_ready & fetch_valid) begin - fetch_shift[0] <= fetch_data; - for (int i = 1; i < $size(fetch_shift); ++i) - fetch_shift[i] <= fetch_shift[i - 1]; - end - end - -endmodule - -module gfx_shader_read_regs -import gfx::*; -import gfx_isa::*; -( - input logic clk, - rst_n, - - input front_wave in, - - gfx_regfile_io.read read, - - output front_wave out, - output front_reg_passthru passthru -); - - // + 1 por next-cycle de read.op - localparam int PASSTHRU_DEPTH = REG_READ_STAGES + 1 - 2; - localparam int HOLD_DEPTH = PASSTHRU_DEPTH - 2; - - logic reg_rev; - logic valid[HOLD_DEPTH]; - front_wave out_hold[HOLD_DEPTH]; - front_reg_passthru passthru_hold[PASSTHRU_DEPTH]; - - assign passthru = passthru_hold[$size(passthru_hold) - 1]; - - assign reg_rev = in.insn.reg_rev; - - always_comb begin - out = out_hold[$size(out_hold) - 1]; - out.valid = valid[$size(valid) - 1]; - end - - always_ff @(posedge clk) begin - out_hold[0] <= in; - for (int i = 1; i < $size(out_hold); ++i) - out_hold[i] <= out_hold[i - 1]; - - passthru_hold[0].dest <= in.insn.dst_src.rr.rd; - unique case (in.insn.reg_mode) - REGS_SVS, REGS_SSS: - passthru_hold[0].dest_scalar <= 1; - - REGS_VVS, REGS_VVV: - passthru_hold[0].dest_scalar <= 0; - endcase - - for (int i = 1; i < $size(passthru_hold); ++i) - passthru_hold[i] <= passthru_hold[i - 1]; - - read.op.group <= in.group; - - read.op.b_imm <= in.insn.dst_src.rr.b.imm; - read.op.a_sgpr <= in.insn.dst_src.rr.ra.sgpr; - read.op.b_sgpr <= in.insn.dst_src.rr.b.read.r.sgpr; - read.op.a_vgpr <= in.insn.dst_src.rr.ra.vgpr.num; - read.op.b_vgpr <= in.insn.dst_src.rr.b.read.r.vgpr.num; - read.op.b_is_imm <= in.insn.dst_src.rr.b_is_imm; - read.op.b_is_const <= in.insn.dst_src.rr.b.read.from_consts; - read.op.scalar_rev <= reg_rev; - - unique case (in.insn.reg_mode) - REGS_SVS, REGS_VVS: begin - read.op.a_scalar <= reg_rev; - read.op.b_scalar <= ~reg_rev; - end - - REGS_SSS: begin - read.op.a_scalar <= 1; - read.op.b_scalar <= 1; - end - - REGS_VVV: begin - read.op.a_scalar <= 0; - read.op.b_scalar <= 0; - end - endcase - end - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) - for (int i = 0; i < HOLD_DEPTH; ++i) - valid[i] <= 0; - else begin - valid[0] <= in.valid; - - for (int i = 1; i < HOLD_DEPTH; ++i) - valid[i] <= valid[i - 1]; - end - -endmodule - -module gfx_shader_decode_class -import gfx::*; -import gfx_isa::*; -( - input logic clk, - rst_n, - - input front_wave wave, - output front_wave port_wave, - output group_id out_group, - - output shader_dispatch dispatch, - output logic p0_writeback -); - - logic is_fsu, is_mem, is_group, hold_valid, retry; - front_wave hold_wave; - - assign p0_writeback = ~(is_mem | is_fsu | is_group | retry); - - always_comb begin - port_wave = hold_wave; - port_wave.valid = hold_valid; - end - - always_ff @(posedge clk) begin - hold_wave <= wave; - out_group <= port_wave.group; - end - - always_ff @(posedge clk or negedge rst_n) - // Intencionalmente repetitivo - if (~rst_n) begin - is_fsu <= 0; - is_mem <= 0; - is_group <= 0; - - retry <= 0; - hold_valid <= 0; - - dispatch <= '0; - end else begin - is_fsu <= 0; - is_mem <= 0; - is_group <= 0; - - retry <= wave.retry; - hold_valid <= wave.valid; - - unique case (wave.insn.insn_class) - INSN_FPINT: ; // p0 no tiene ready - INSN_MEM: is_mem <= 1; - INSN_SFU: is_fsu <= 1; - INSN_GROUP: is_group <= 1; - - default: - {is_mem, is_fsu, is_group} <= 'x; - endcase - - dispatch.p1 <= is_mem; - dispatch.p2 <= is_fsu; - dispatch.p3 <= is_group; - - if (~hold_valid | retry) begin - dispatch.p1 <= 0; - dispatch.p2 <= 0; - dispatch.p3 <= 0; - end - - dispatch.valid <= hold_valid; - end - -endmodule - -module gfx_shader_decode_fpint -import gfx::*; -import gfx_isa::*; -( - input logic clk, - - input insn_word insn, - input logic writeback, - - output fpint_op op -); - - always_ff @(posedge clk) begin - unique case (insn.by_class.fpint.op) - INSN_FPINT_MOV: begin - op.setup_mul_float <= 0; - op.setup_unit_b <= 1; - op.mnorm_put_hi <= 0; - op.mnorm_put_lo <= 1; - op.mnorm_put_mul <= 0; - op.mnorm_zero_flags <= 1; - op.mnorm_zero_b <= 1; - op.minmax_abs <= 1; - op.minmax_swap <= 0; - op.minmax_zero_min <= 0; - op.minmax_copy_flags <= 1; - op.shiftr_int_signed <= 0; - op.addsub_int_operand <= 0; - op.addsub_copy_flags <= 1; - op.clz_force_nop <= 1; - op.shiftl_copy_flags <= 1; - op.round_copy_flags <= 1; - op.round_enable <= 1; - op.encode_enable <= 1; - end - - INSN_FPINT_FMUL: begin - op.setup_mul_float <= 1; - op.setup_unit_b <= 0; - op.mnorm_put_hi <= 0; - op.mnorm_put_lo <= 0; - op.mnorm_put_mul <= 1; - op.mnorm_zero_flags <= 0; - op.mnorm_zero_b <= 1; - op.minmax_abs <= 1; - op.minmax_swap <= 0; - op.minmax_zero_min <= 0; - op.minmax_copy_flags <= 1; - op.shiftr_int_signed <= 0; - op.addsub_int_operand <= 0; - op.addsub_copy_flags <= 1; - op.clz_force_nop <= 1; - op.shiftl_copy_flags <= 1; - op.round_copy_flags <= 1; - op.round_enable <= 1; - op.encode_enable <= 1; - end - - INSN_FPINT_IMUL: begin - op.setup_mul_float <= 0; - op.setup_unit_b <= 0; - op.mnorm_put_hi <= 0; - op.mnorm_put_lo <= 1; - op.mnorm_put_mul <= 0; - op.mnorm_zero_flags <= 1; - op.mnorm_zero_b <= 1; - op.minmax_abs <= 1; - op.minmax_swap <= 0; - op.minmax_zero_min <= 0; - op.minmax_copy_flags <= 1; - op.shiftr_int_signed <= 0; - op.addsub_int_operand <= 0; - op.addsub_copy_flags <= 1; - op.clz_force_nop <= 1; - op.shiftl_copy_flags <= 1; - op.round_copy_flags <= 1; - op.round_enable <= 0; - op.encode_enable <= 0; - end - - INSN_FPINT_FADD: begin - op.setup_mul_float <= 0; - op.setup_unit_b <= 1; - op.mnorm_put_hi <= 0; - op.mnorm_put_lo <= 1; - op.mnorm_put_mul <= 0; - op.mnorm_zero_flags <= 0; - op.mnorm_zero_b <= 0; - op.minmax_abs <= 1; - op.minmax_swap <= 0; - op.minmax_zero_min <= 0; - op.minmax_copy_flags <= 0; - op.shiftr_int_signed <= 0; - op.addsub_int_operand <= 0; - op.addsub_copy_flags <= 0; - op.clz_force_nop <= 0; - op.shiftl_copy_flags <= 0; - op.round_copy_flags <= 0; - op.round_enable <= 1; - op.encode_enable <= 1; - end - - INSN_FPINT_FMAX, INSN_FPINT_FMIN: begin - op.setup_mul_float <= 0; - op.setup_unit_b <= 1; - op.mnorm_put_hi <= 0; - op.mnorm_put_lo <= 1; - op.mnorm_put_mul <= 0; - op.mnorm_zero_flags <= 0; - op.mnorm_zero_b <= 0; - op.minmax_abs <= 0; - op.minmax_swap <= insn.by_class.fpint.op == INSN_FPINT_FMIN; - op.minmax_zero_min <= 1; - op.minmax_copy_flags <= 1; - op.shiftr_int_signed <= 0; - op.addsub_int_operand <= 0; - op.addsub_copy_flags <= 1; - op.clz_force_nop <= 1; - op.shiftl_copy_flags <= 1; - op.round_copy_flags <= 1; - op.round_enable <= 0; - op.encode_enable <= 0; - end - - INSN_FPINT_FCVT: begin - op.setup_mul_float <= 0; - op.setup_unit_b <= 1; - op.mnorm_put_hi <= 0; - op.mnorm_put_lo <= 1; - op.mnorm_put_mul <= 0; - op.mnorm_zero_flags <= 1; - op.mnorm_zero_b <= 1; - - op.minmax_abs <= 1; - op.minmax_swap <= 0; - op.minmax_zero_min <= 0; - op.minmax_copy_flags <= 0; - op.shiftr_int_signed <= 1; - op.addsub_int_operand <= 1; - op.addsub_copy_flags <= 1; - op.clz_force_nop <= 0; - op.shiftl_copy_flags <= 0; - op.round_copy_flags <= 0; - op.round_enable <= 1; - op.encode_enable <= 1; - end - - default: - op <= 'x; - endcase - - op.writeback <= writeback; - end - -endmodule diff --git a/platform/wavelet3d/gfx_shader_group.sv b/platform/wavelet3d/gfx_shader_group.sv deleted file mode 100644 index e668877..0000000 --- a/platform/wavelet3d/gfx_shader_group.sv +++ /dev/null @@ -1,17 +0,0 @@ -module gfx_shader_group -import gfx::*; -( - input logic clk, - rst_n, - - input group_op op, - input wave_exec wave, - - gfx_regfile_io.ab read_data, - - gfx_shake.rx in_shake, - - gfx_wb.tx wb -); - -endmodule diff --git a/platform/wavelet3d/gfx_shader_mem.sv b/platform/wavelet3d/gfx_shader_mem.sv deleted file mode 100644 index 403c9e4..0000000 --- a/platform/wavelet3d/gfx_shader_mem.sv +++ /dev/null @@ -1,17 +0,0 @@ -module gfx_shader_mem -import gfx::*; -( - input logic clk, - rst_n, - - input mem_op op, - input wave_exec wave, - - gfx_regfile_io.ab read_data, - - gfx_shake.rx in_shake, - - gfx_wb.tx wb -); - -endmodule diff --git a/platform/wavelet3d/gfx_shader_regs.sv b/platform/wavelet3d/gfx_shader_regs.sv deleted file mode 100644 index ef3a129..0000000 --- a/platform/wavelet3d/gfx_shader_regs.sv +++ /dev/null @@ -1,302 +0,0 @@ -module gfx_shader_regs -import gfx::*; -( - input logic clk, - - gfx_regfile_io.regs io -); - - // verilator tracing_off - - localparam PC_TABLE_PORTS = 2; - localparam MASK_TABLE_PORTS = 1; - - word hold_imm[REGFILE_STAGES], imm_out, read_a_data_sgpr, read_b_data_scalar, - read_b_data_sgpr, read_const, read_a_data_vgpr[SHADER_LANES], - read_b_data_vgpr[SHADER_LANES], sgpr_out_a, sgpr_out_b; - - group_id mask_read_groups[MASK_TABLE_PORTS], pc_read_groups[PC_TABLE_PORTS]; - word_ptr pc_read[PC_TABLE_PORTS]; - lane_mask mask_read[MASK_TABLE_PORTS]; - - logic a_scalar_out, b_is_const_out, b_is_imm_out, b_scalar_out, scalar_rev_out; - group_id hold_read_group_1, hold_read_group_2; - sgpr_num hold_read_a_sgpr; - vgpr_num hold_read_a_vgpr_1, hold_read_a_vgpr_2, hold_read_b_vgpr_1, hold_read_b_vgpr_2; - logic[REGFILE_STAGES - 1:0] hold_b_is_imm, hold_b_is_const; - logic[REGFILE_STAGES + 1 - 1:0] hold_scalar_rev; - logic[REGFILE_STAGES + 2 - 1:0] hold_a_scalar, hold_b_scalar; - - assign io.pc_back = pc_read[0]; - assign io.pc_front = pc_read[1]; - assign pc_read_groups[0] = io.pc_back_group; - assign pc_read_groups[1] = io.pc_front_group; - - assign io.mask_back = mask_read[0]; - assign pc_read_groups[0] = io.mask_back_group; - - assign imm_out = hold_imm[$size(hold_imm) - 1]; - assign a_scalar_out = hold_a_scalar[$bits(hold_a_scalar) - 1]; - assign b_scalar_out = hold_b_scalar[$bits(hold_b_scalar) - 1]; - assign b_is_imm_out = hold_b_is_imm[$bits(hold_b_is_imm) - 1]; - assign b_is_const_out = hold_b_is_const[$bits(hold_b_is_const) - 1]; - assign scalar_rev_out = hold_scalar_rev[$bits(hold_scalar_rev) - 1]; - - gfx_shader_table #(.DATA_WIDTH($bits(word_ptr)), .READ_PORTS(PC_TABLE_PORTS)) pc_table - ( - .clk, - .read(pc_read), - .write(io.pc_wb), - .read_groups(pc_read_groups), - .write_group(io.pc_wb_group), - .write_enable(io.pc_wb_write) - ); - - gfx_shader_table #(.DATA_WIDTH($bits(lane_mask)), .READ_PORTS(MASK_TABLE_PORTS)) mask_table - ( - .clk, - .read(mask_read), - .write(io.mask_wb), - .read_groups(mask_read_groups), - .write_group(io.mask_wb_group), - .write_enable(io.mask_wb_write) - ); - - gfx_shader_consts consts - ( - .clk, - .num(io.op.b_sgpr), - .value(read_const) - ); - - gfx_shader_regfile #($bits(group_id) + $bits(sgpr_num)) sgprs - ( - .clk, - - .read_a_num({hold_read_group_1, hold_read_a_sgpr}), - .read_b_num({io.op.group, io.op.b_sgpr}), - .read_a_data(read_a_data_sgpr), - .read_b_data(read_b_data_sgpr), - - .write(io.sgpr_write.write), - .write_num({io.sgpr_write.group, io.sgpr_write.sgpr}), - .write_data(io.sgpr_write.data) - ); - - generate - for (genvar i = 0; i < SHADER_LANES; ++i) begin: vgprs - gfx_shader_regfile #($bits(group_id) + $bits(vgpr_num)) vgprs - ( - .clk, - - .read_a_num({hold_read_group_2, hold_read_a_vgpr_2}), - .read_b_num({hold_read_group_2, hold_read_b_vgpr_2}), - .read_a_data(read_a_data_vgpr[i]), - .read_b_data(read_b_data_vgpr[i]), - - .write(io.vgpr_write.mask[i]), - .write_num({io.vgpr_write.group, io.vgpr_write.vgpr}), - .write_data(io.vgpr_write.data[i]) - ); - end - endgenerate - - always_ff @(posedge clk) begin - hold_imm[0] <= {{($bits(word) - $bits(io.op.b_imm)){1'b0}}, io.op.b_imm}; - hold_a_scalar[0] <= io.op.a_scalar; - hold_b_scalar[0] <= io.op.b_scalar; - hold_b_is_imm[0] <= io.op.b_is_imm; - hold_b_is_const[0] <= io.op.b_is_const; - hold_scalar_rev[0] <= io.op.scalar_rev; - - for (int i = 1; i < REGFILE_STAGES; ++i) begin - hold_imm[i] <= hold_imm[i - 1]; - hold_a_scalar[i] <= hold_a_scalar[i - 1]; - hold_b_scalar[i] <= hold_b_scalar[i - 1]; - hold_b_is_imm[i] <= hold_b_is_imm[i - 1]; - hold_b_is_const[i] <= hold_b_is_const[i - 1]; - hold_scalar_rev[i] <= hold_scalar_rev[i - 1]; - end - - for (int i = REGFILE_STAGES; i < REGFILE_STAGES + 2; ++i) begin - hold_a_scalar[i] <= hold_a_scalar[i - 1]; - hold_b_scalar[i] <= hold_b_scalar[i - 1]; - end - - hold_scalar_rev[REGFILE_STAGES] <= hold_scalar_rev[REGFILE_STAGES - 1]; - - hold_read_a_sgpr <= io.op.a_sgpr; - hold_read_group_1 <= io.op.group; - hold_read_group_2 <= hold_read_group_1; - - hold_read_a_vgpr_1 <= io.op.a_vgpr; - hold_read_a_vgpr_2 <= hold_read_a_vgpr_1; - - hold_read_b_vgpr_1 <= io.op.b_vgpr; - hold_read_b_vgpr_2 <= hold_read_b_vgpr_1; - - if (b_is_imm_out) - read_b_data_scalar <= imm_out; - else if (b_is_const_out) - read_b_data_scalar <= read_const; - else - read_b_data_scalar <= read_b_data_sgpr; - - if (scalar_rev_out) begin - sgpr_out_a <= read_b_data_scalar; - sgpr_out_b <= read_a_data_sgpr; - end else begin - sgpr_out_a <= read_a_data_sgpr; - sgpr_out_b <= read_b_data_scalar; - end - - for (int i = 0; i < SHADER_LANES; ++i) begin - io.a[i] <= a_scalar_out ? sgpr_out_a : read_a_data_vgpr[i]; - io.b[i] <= b_scalar_out ? sgpr_out_b : read_a_data_vgpr[i]; - end - end - -endmodule - -module gfx_shader_consts -import gfx::*; -( - input logic clk, - - input sgpr_num num, - output word value -); - - word hold_out, rom[1 << $bits(sgpr_num)]; - sgpr_num hold_in; - - always_ff @(posedge clk) begin - value <= hold_out; - hold_in <= num; - hold_out <= rom[hold_in]; - end - - initial begin - rom[0] = 'hffff_ffff; // -1 - rom[1] = 'h7fff_ffff; // 2^31 - 1, útil para abs de fp - rom[2] = 'h8000_0000; // 2^31, útil para neg de fp - rom[3] = 'h3f80_0000; // +1.0 - rom[4] = 'hbf80_0000; // -1.0 - end - -endmodule - -module gfx_shader_regfile -import gfx::*; -#(int DEPTH_LOG = 0) -( - input logic clk, - - input logic[DEPTH_LOG - 1:0] read_a_num, - read_b_num, - output word read_a_data, - read_b_data, - - input logic write, - input logic[DEPTH_LOG - 1:0] write_num, - input word write_data -); - - gfx_shader_regfile_port #(DEPTH_LOG) a - ( - .clk, - .write, - .read_num(read_a_num), - .read_data(read_a_data), - .write_num, - .write_data - ); - - gfx_shader_regfile_port #(DEPTH_LOG) b - ( - .clk, - .write, - .read_num(read_b_num), - .read_data(read_b_data), - .write_num, - .write_data - ); - -endmodule - -module gfx_shader_regfile_port -import gfx::*; -#(int DEPTH_LOG = 0) -( - input logic clk, - - input logic[DEPTH_LOG - 1:0] read_num, - output word read_data, - - input logic write, - input logic[DEPTH_LOG - 1:0] write_num, - input word write_data -); - - word file[1 << DEPTH_LOG], hold_read_data, hold_write_data; - logic hold_write; - logic[DEPTH_LOG - 1:0] hold_read_num, hold_write_num; - - // hold_write no necesita rst_n porque cualquier write inicial es inofensivo - - always_ff @(posedge clk) begin - hold_write <= write; - hold_read_num <= read_num; - hold_write_num <= write_num; - hold_write_data <= write_data; - - hold_read_data <= file[hold_read_num]; - if (hold_write) - file[hold_write_num] <= hold_write_data; - - read_data <= hold_read_data; - end - -endmodule - -module gfx_shader_table -import gfx::*; -#(int DATA_WIDTH = 0, - int READ_PORTS = 0) -( - input logic clk, - - input group_id write_group, - read_groups[READ_PORTS], - - input logic[DATA_WIDTH - 1:0] write, - input logic write_enable, - - output logic[DATA_WIDTH - 1:0] read[READ_PORTS] -); - - genvar i; - - generate - for (i = 0; i < READ_PORTS; ++i) begin: ports - logic write_enable_hold; - group_id read_group_hold, write_group_hold; - logic[DATA_WIDTH - 1:0] data[1 << $bits(group_id)], read_hold, write_hold; - - always_ff @(posedge clk) begin - write_hold <= write; - read_group_hold <= read_groups[i]; - write_group_hold <= write_group; - write_enable_hold <= write_enable; - - read_hold <= data[read_group_hold]; - - if (write_enable_hold) - data[write_group_hold] <= write_hold; - - read[i] <= read_hold; - end - end - endgenerate - -endmodule diff --git a/platform/wavelet3d/gfx_shader_schedif.rdl b/platform/wavelet3d/gfx_shader_schedif.rdl deleted file mode 100644 index c846da9..0000000 --- a/platform/wavelet3d/gfx_shader_schedif.rdl +++ /dev/null @@ -1,91 +0,0 @@ -addrmap gfx_shader_schedif { - name = "Scheduler<->core interface"; - - default hw = r; - default sw = w; - default regwidth = 32; - - reg { - name = "Shader core control register"; - - field { - desc = "Set this field to flush the instruction cache"; - - singlepulse; - } IFLUSH[0:0] = 0; - } CORE @ 0x00; - - reg { - name = "Wavefront setup control register"; - - default hw = na; - default sw = r; - default precedence = hw; - - field { - desc = "Wavefront group number"; - - hw = r; - sw = rw; - } GROUP[5:0]; - - field { - desc = "Destination SGPR number"; - - hw = r; - sw = rw; - } XGPR[11:8]; - - field { - desc = "PC table update done, group submitted"; - - rclr; - hwset; - } SUBMIT_DONE[16:16] = 0; - - field { - desc = "General-purpose register update done"; - - rclr; - hwset; - } GPR_DONE[17:17] = 0; - - field { - desc = "Lane mask update done"; - - rclr; - hwset; - } MASK_DONE[18:18] = 0; - } SETUP_CTRL @ 0x04; - - reg { - name = "SGPR/VGPR write register"; - - field { - desc = "Value to write"; - - swmod; - } VALUE[31:0]; - } SETUP_GPR @ 0x08; - - reg { - name = "Lane mask write register"; - - field { - desc = "Mask value to write"; - - swmod; - } MASK[15:0]; - } SETUP_MASK @ 0x0c; - - reg { - name = "Group submit register"; - - field { - desc = "Initial group program counter, submits group on write"; - - swmod; - } PC[31:2]; - } SETUP_SUBMIT @ 0x10; -}; - diff --git a/platform/wavelet3d/gfx_shader_setup.sv b/platform/wavelet3d/gfx_shader_setup.sv deleted file mode 100644 index f46fb66..0000000 --- a/platform/wavelet3d/gfx_shader_setup.sv +++ /dev/null @@ -1,37 +0,0 @@ -interface gfx_shader_setup -import gfx::*;; - - struct - { - group_id group; - word_ptr pc; - xgpr_num gpr; - word gpr_value; - lane_mask mask; - logic pc_set, - gpr_set, - mask_set; - } write; - - struct - { - logic gpr, - mask, - submit; - } set_done; - - modport core - ( - input write, - - output set_done - ); - - modport sched - ( - input set_done, - - output write - ); - -endinterface diff --git a/platform/wavelet3d/gfx_shader_sfu.sv b/platform/wavelet3d/gfx_shader_sfu.sv deleted file mode 100644 index d65e522..0000000 --- a/platform/wavelet3d/gfx_shader_sfu.sv +++ /dev/null @@ -1,17 +0,0 @@ -module gfx_shader_sfu -import gfx::*; -( - input logic clk, - rst_n, - - input sfu_op op, - input wave_exec wave, - - gfx_regfile_io.ab read_data, - - gfx_shake.rx in_shake, - - gfx_wb.tx wb -); - -endmodule diff --git a/platform/wavelet3d/gfx_shake.sv b/platform/wavelet3d/gfx_shake.sv deleted file mode 100644 index baae0c3..0000000 --- a/platform/wavelet3d/gfx_shake.sv +++ /dev/null @@ -1,24 +0,0 @@ -interface gfx_shake; - - logic ready; - logic valid; - - modport tx - ( - input ready, - output valid - ); - - modport rx - ( - input valid, - output ready - ); - - modport peek - ( - input ready, - valid - ); - -endinterface diff --git a/platform/wavelet3d/gfx_sim_debug.sv b/platform/wavelet3d/gfx_sim_debug.sv deleted file mode 100644 index 4b4622a..0000000 --- a/platform/wavelet3d/gfx_sim_debug.sv +++ /dev/null @@ -1,50 +0,0 @@ -module gfx_sim_debug -import gfx::*; -( - input logic clk, - rst_n, - - gfx_axil.s axis -); - - enum int unsigned - { - INPUT, - STALL - } state; - - assign axis.rvalid = 0; - assign axis.arready = 0; - assign axis.awready = 1; - - always_comb - unique case (state) - INPUT: begin - axis.wready = 1; - axis.bvalid = axis.wvalid; - end - - STALL: begin - axis.wready = 0; - axis.bvalid = 1; - end - endcase - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) - state <= INPUT; - else - unique case (state) - INPUT: - if (axis.wvalid) begin - $display("%c", axis.wdata[7:0]); - if (~axis.bready) - state <= STALL; - end - - STALL: - if (axis.bready) - state <= INPUT; - endcase - -endmodule diff --git a/platform/wavelet3d/gfx_skid_buf.sv b/platform/wavelet3d/gfx_skid_buf.sv deleted file mode 100644 index e3e5247..0000000 --- a/platform/wavelet3d/gfx_skid_buf.sv +++ /dev/null @@ -1,20 +0,0 @@ -module gfx_skid_buf -#(int WIDTH = 0) -( - input logic clk, - - input logic[WIDTH - 1:0] in, - input logic stall, - - output logic[WIDTH - 1:0] out -); - - logic[WIDTH - 1:0] skid; - - assign out = stall ? skid : in; - - always_ff @(posedge clk) - if (~stall) - skid <= in; - -endmodule diff --git a/platform/wavelet3d/gfx_skid_flow.sv b/platform/wavelet3d/gfx_skid_flow.sv deleted file mode 100644 index 7890ae3..0000000 --- a/platform/wavelet3d/gfx_skid_flow.sv +++ /dev/null @@ -1,31 +0,0 @@ -module gfx_skid_flow -( - input logic clk, - rst_n, - - input logic in_valid, - out_ready, - - output logic in_ready, - out_valid, - stall -); - - logic was_ready, was_valid; - - assign stall = ~in_ready; - assign in_ready = was_ready | ~was_valid; - assign out_valid = in_valid | stall; - - always_ff @(posedge clk or negedge rst_n) - if (~rst_n) begin - was_ready <= 0; - was_valid <= 0; - end else begin - was_ready <= out_ready; - - if (~stall) - was_valid <= in_valid; - end - -endmodule diff --git a/platform/wavelet3d/gfx_wb.sv b/platform/wavelet3d/gfx_wb.sv deleted file mode 100644 index 20c7c64..0000000 --- a/platform/wavelet3d/gfx_wb.sv +++ /dev/null @@ -1,51 +0,0 @@ -interface gfx_wb; - - import gfx::*; - - word lanes[SHADER_LANES]; - logic mask_update, pc_inc, pc_update, ready, scalar, valid, writeback; - group_id group; - xgpr_num dest; - lane_mask mask; - pc_offset pc_add; - - modport tx - ( - input ready, - - output dest, - group, - lanes, - valid, - scalar, - writeback, - - mask, - mask_update, - - pc_add, - pc_inc, - pc_update - ); - - modport rx - ( - input dest, - group, - lanes, - valid, - scalar, - writeback, - - mask, - mask_update, - - pc_add, - pc_inc, - pc_update, - - output ready - ); - - -endinterface diff --git a/platform/wavelet3d/gfx_xbar_sched.sv b/platform/wavelet3d/gfx_xbar_sched.sv deleted file mode 100644 index 95e4afb..0000000 --- a/platform/wavelet3d/gfx_xbar_sched.sv +++ /dev/null @@ -1,146 +0,0 @@ -module gfx_xbar_sched -import gfx::*; -( - input logic clk, - srst_n, - - gfx_axil.s sched, - - gfx_axil.m debug, - gfx_axil.m bootrom, - gfx_axil.m shader_0 -); - - localparam word BOOTROM_MASK = 32'hfff0_0000; - localparam word DEBUG_BASE = 32'h0020_0000; - localparam word DEBUG_MASK = 32'hfff0_0000; - localparam word SHADER_0_BASE = 32'h0100_0000; - localparam word SHADER_0_MASK = 32'hfff0_0000; - - defparam xbar.NM = 1; - defparam xbar.NS = 3; - defparam xbar.OPT_LOWPOWER = 0; - - defparam xbar.SLAVE_ADDR = { - SHADER_0_BASE, - DEBUG_BASE, - BOOTROM_BASE - }; - - defparam xbar.SLAVE_MASK = { - SHADER_0_MASK, - DEBUG_MASK, - BOOTROM_MASK - }; - - axilxbar xbar - ( - .S_AXI_ACLK(clk), - .S_AXI_ARESETN(srst_n), - - .S_AXI_AWVALID(sched.awvalid), - .S_AXI_AWREADY(sched.awready), - .S_AXI_AWADDR(sched.awaddr), - .S_AXI_AWPROT('0), - - .S_AXI_WVALID(sched.wvalid), - .S_AXI_WREADY(sched.wready), - .S_AXI_WDATA(sched.wdata), - .S_AXI_WSTRB('1), - - .S_AXI_BVALID(sched.bvalid), - .S_AXI_BREADY(sched.bready), - .S_AXI_BRESP(), - - .S_AXI_ARVALID(sched.arvalid), - .S_AXI_ARREADY(sched.arready), - .S_AXI_ARADDR(sched.araddr), - .S_AXI_ARPROT('0), - - .S_AXI_RVALID(sched.rvalid), - .S_AXI_RREADY(sched.rready), - .S_AXI_RDATA(sched.rdata), - .S_AXI_RRESP(), - - .M_AXI_AWADDR({ - shader_0.awaddr, - debug.awaddr, - bootrom.awaddr - }), - .M_AXI_AWPROT(), - .M_AXI_AWVALID({ - shader_0.awvalid, - debug.awvalid, - bootrom.awvalid - }), - .M_AXI_AWREADY({ - shader_0.awready, - debug.awready, - bootrom.awready - }), - - .M_AXI_WDATA({ - shader_0.wdata, - debug.wdata, - bootrom.wdata - }), - .M_AXI_WSTRB(), - .M_AXI_WVALID({ - shader_0.wvalid, - debug.wvalid, - bootrom.wvalid - }), - .M_AXI_WREADY({ - shader_0.wready, - debug.wready, - bootrom.wready - }), - - .M_AXI_BRESP('0), - .M_AXI_BVALID({ - shader_0.bvalid, - debug.bvalid, - bootrom.bvalid - }), - .M_AXI_BREADY({ - shader_0.bready, - debug.bready, - bootrom.bready - }), - - .M_AXI_ARADDR({ - shader_0.araddr, - debug.araddr, - bootrom.araddr - }), - .M_AXI_ARPROT(), - .M_AXI_ARVALID({ - shader_0.arvalid, - debug.arvalid, - bootrom.arvalid - }), - .M_AXI_ARREADY({ - shader_0.arready, - debug.arready, - bootrom.arready - }), - - .M_AXI_RDATA({ - shader_0.rdata, - debug.rdata, - bootrom.rdata - }), - .M_AXI_RRESP('0), - .M_AXI_RVALID({ - shader_0.rvalid, - debug.rvalid, - bootrom.rvalid - }), - .M_AXI_RREADY({ - shader_0.rready, - debug.rready, - bootrom.rready - }) - ); - -endmodule diff --git a/platform/wavelet3d/mod.mk b/platform/wavelet3d/mod.mk index 153f9c7..16c6cfc 100644 --- a/platform/wavelet3d/mod.mk +++ b/platform/wavelet3d/mod.mk @@ -1,21 +1,10 @@ -cores := gfx_shader_schedif - define core - $(this)/deps := axixbar fp_unit gfx_shader_schedif picorv32 + $(this)/deps := gfx - $(this)/rtl_top := gfx_top + $(this)/rtl_top := w3d_top $(this)/rtl_dirs := . - $(this)/rtl_files := gfx_isa.sv gfx_pkg.sv gfx_top.sv + $(this)/rtl_files := w3d_top.sv $(this)/vl_main := main.cpp $(this)/vl_pkgconfig := sdl2 endef - -define core/gfx_shader_schedif - $(this)/hooks := regblock - - $(this)/regblock_rdl := gfx_shader_schedif.rdl - $(this)/regblock_top := gfx_shader_schedif - $(this)/regblock_args := --default-reset arst_n - $(this)/regblock_cpuif := axi4-lite -endef diff --git a/platform/wavelet3d/gfx_top.sv b/platform/wavelet3d/w3d_top.sv index 41ff7f4..34ecb52 100644 --- a/platform/wavelet3d/gfx_top.sv +++ b/platform/wavelet3d/w3d_top.sv @@ -1,4 +1,4 @@ -module gfx_top +module w3d_top import gfx::*; ( input logic clk, |
