summaryrefslogtreecommitdiff
path: root/platform
diff options
context:
space:
mode:
Diffstat (limited to 'platform')
-rw-r--r--platform/wavelet3d/gfx_axib.sv81
-rw-r--r--platform/wavelet3d/gfx_axil.sv61
-rw-r--r--platform/wavelet3d/gfx_axil2regblock.sv30
-rw-r--r--platform/wavelet3d/gfx_beats.sv29
-rw-r--r--platform/wavelet3d/gfx_bootrom.sv66
-rw-r--r--platform/wavelet3d/gfx_clz.sv68
-rw-r--r--platform/wavelet3d/gfx_ctz.sv18
-rw-r--r--platform/wavelet3d/gfx_fifo.sv102
-rw-r--r--platform/wavelet3d/gfx_fixed_dotadd.sv55
-rw-r--r--platform/wavelet3d/gfx_fixed_muladd.sv77
-rw-r--r--platform/wavelet3d/gfx_front_back.sv37
-rw-r--r--platform/wavelet3d/gfx_isa.sv84
-rw-r--r--platform/wavelet3d/gfx_pipes.sv24
-rw-r--r--platform/wavelet3d/gfx_pkg.sv271
-rw-r--r--platform/wavelet3d/gfx_pkts.sv29
-rw-r--r--platform/wavelet3d/gfx_raster.sv930
-rw-r--r--platform/wavelet3d/gfx_regfile_io.sv106
-rw-r--r--platform/wavelet3d/gfx_rst_sync.sv13
-rw-r--r--platform/wavelet3d/gfx_sched.sv139
-rw-r--r--platform/wavelet3d/gfx_shader.sv77
-rw-r--r--platform/wavelet3d/gfx_shader_back.sv335
-rw-r--r--platform/wavelet3d/gfx_shader_fpint.sv932
-rw-r--r--platform/wavelet3d/gfx_shader_front.sv746
-rw-r--r--platform/wavelet3d/gfx_shader_group.sv17
-rw-r--r--platform/wavelet3d/gfx_shader_mem.sv17
-rw-r--r--platform/wavelet3d/gfx_shader_regs.sv302
-rw-r--r--platform/wavelet3d/gfx_shader_schedif.rdl91
-rw-r--r--platform/wavelet3d/gfx_shader_setup.sv37
-rw-r--r--platform/wavelet3d/gfx_shader_sfu.sv17
-rw-r--r--platform/wavelet3d/gfx_shake.sv24
-rw-r--r--platform/wavelet3d/gfx_sim_debug.sv50
-rw-r--r--platform/wavelet3d/gfx_skid_buf.sv20
-rw-r--r--platform/wavelet3d/gfx_skid_flow.sv31
-rw-r--r--platform/wavelet3d/gfx_wb.sv51
-rw-r--r--platform/wavelet3d/gfx_xbar_sched.sv146
-rw-r--r--platform/wavelet3d/mod.mk17
-rw-r--r--platform/wavelet3d/w3d_top.sv (renamed from platform/wavelet3d/gfx_top.sv)2
37 files changed, 4 insertions, 5128 deletions
diff --git a/platform/wavelet3d/gfx_axib.sv b/platform/wavelet3d/gfx_axib.sv
deleted file mode 100644
index 7b3cbdc..0000000
--- a/platform/wavelet3d/gfx_axib.sv
+++ /dev/null
@@ -1,81 +0,0 @@
-// AXI4 con burst
-interface gfx_axib;
-
- import gfx::word;
-
- logic awvalid,
- awready;
- logic[7:0] awlen;
- logic[1:0] awburst;
- word awaddr;
-
- logic wlast;
- logic wvalid;
- logic wready;
- word wdata;
-
- logic bvalid;
- logic bready;
-
- logic arvalid,
- arready;
- logic[7:0] arlen;
- logic[1:0] arburst;
- word araddr;
-
- logic rlast;
- logic rvalid;
- logic rready;
- word rdata;
-
- modport m
- (
- input awready,
- wready,
- bvalid,
- arready,
- rlast,
- rvalid,
- rdata,
-
- output awlen,
- awburst,
- awvalid,
- awaddr,
- wlast,
- wvalid,
- wdata,
- bready,
- arlen,
- arburst,
- arvalid,
- araddr,
- rready
- );
-
- modport s
- (
- input awlen,
- awburst,
- awvalid,
- awaddr,
- wlast,
- wvalid,
- wdata,
- bready,
- arlen,
- arburst,
- arvalid,
- araddr,
- rready,
-
- output awready,
- wready,
- bvalid,
- arready,
- rlast,
- rvalid,
- rdata
- );
-
-endinterface
diff --git a/platform/wavelet3d/gfx_axil.sv b/platform/wavelet3d/gfx_axil.sv
deleted file mode 100644
index c254e26..0000000
--- a/platform/wavelet3d/gfx_axil.sv
+++ /dev/null
@@ -1,61 +0,0 @@
-// AXI4-Lite, sin wstrb ni axprot
-interface gfx_axil;
- import gfx::*;
-
- logic awvalid;
- logic awready;
- word awaddr;
-
- logic wvalid;
- logic wready;
- word wdata;
-
- logic bvalid;
- logic bready;
-
- logic arvalid;
- logic arready;
- word araddr;
-
- logic rvalid;
- logic rready;
- word rdata;
-
- modport m
- (
- input awready,
- wready,
- bvalid,
- arready,
- rvalid,
- rdata,
-
- output awvalid,
- awaddr,
- wvalid,
- wdata,
- bready,
- arvalid,
- araddr,
- rready
- );
-
- modport s
- (
- input awvalid,
- awaddr,
- wvalid,
- wdata,
- bready,
- arvalid,
- araddr,
- rready,
-
- output awready,
- wready,
- bvalid,
- arready,
- rvalid,
- rdata
- );
-endinterface
diff --git a/platform/wavelet3d/gfx_axil2regblock.sv b/platform/wavelet3d/gfx_axil2regblock.sv
deleted file mode 100644
index 2449b05..0000000
--- a/platform/wavelet3d/gfx_axil2regblock.sv
+++ /dev/null
@@ -1,30 +0,0 @@
-module gfx_axil2regblock
-(
- gfx_axil.s axis,
- axi4lite_intf.master axim
-);
-
- assign axis.rdata = axim.RDATA;
- assign axis.rvalid = axim.RVALID;
- assign axis.bvalid = axim.BVALID;
- assign axis.wready = axim.WREADY;
- assign axis.arready = axim.ARREADY;
- assign axis.awready = axim.AWREADY;
-
- assign axim.AWVALID = axis.awvalid;
- assign axim.AWADDR = axis.awaddr[$bits(axim.AWADDR) - 1:0];
- assign axim.AWPROT = '0;
-
- assign axim.WVALID = axis.wvalid;
- assign axim.WDATA = axis.wdata;
- assign axim.WSTRB = '1;
-
- assign axim.BREADY = axis.bready;
-
- assign axim.ARVALID = axis.arvalid;
- assign axim.ARADDR = axis.araddr[$bits(axim.ARADDR) - 1:0];
- assign axim.ARPROT = '0;
-
- assign axim.RREADY = axis.rready;
-
-endmodule
diff --git a/platform/wavelet3d/gfx_beats.sv b/platform/wavelet3d/gfx_beats.sv
deleted file mode 100644
index fcbb091..0000000
--- a/platform/wavelet3d/gfx_beats.sv
+++ /dev/null
@@ -1,29 +0,0 @@
-interface gfx_beats
-#(int WIDTH = $bits(gfx::word));
-
- logic[WIDTH - 1:0] data;
- logic ready;
- logic valid;
-
- modport tx
- (
- input ready,
- output data,
- valid
- );
-
- modport rx
- (
- input data,
- valid,
- output ready
- );
-
- modport peek
- (
- input data,
- ready,
- valid
- );
-
-endinterface
diff --git a/platform/wavelet3d/gfx_bootrom.sv b/platform/wavelet3d/gfx_bootrom.sv
deleted file mode 100644
index 2c4581e..0000000
--- a/platform/wavelet3d/gfx_bootrom.sv
+++ /dev/null
@@ -1,66 +0,0 @@
-module gfx_bootrom
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- gfx_axil.s axis
-);
-
- localparam ROM_WORDS_LOG = 8;
-
- enum int unsigned
- {
- WAIT,
- READ,
- RDATA,
- READY
- } state;
-
- word read, rom[1 << ROM_WORDS_LOG];
- logic[ROM_WORDS_LOG - 1:0] read_addr;
-
- assign axis.bvalid = 0;
- assign axis.wready = 0;
- assign axis.awready = 0;
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- state <= WAIT;
- axis.rvalid <= 0;
- axis.arready <= 0;
- end else begin
- axis.arready <= 0;
-
- unique case (state)
- WAIT:
- if (axis.arvalid & ~axis.arready)
- state <= READ;
-
- READ:
- state <= RDATA;
-
- RDATA: begin
- state <= READY;
- axis.rvalid <= 1;
- end
-
- READY:
- if (axis.rready) begin
- state <= WAIT;
- axis.rvalid <= 0;
- axis.arready <= 1;
- end
- endcase
- end
-
- always_ff @(posedge clk) begin
- read <= rom[read_addr];
- read_addr <= axis.araddr[$bits(read_addr) + SUBWORD_BITS - 1:SUBWORD_BITS];
- axis.rdata <= read;
- end
-
- initial
- $readmemh("gfx_bootrom.hex", rom);
-
-endmodule
diff --git a/platform/wavelet3d/gfx_clz.sv b/platform/wavelet3d/gfx_clz.sv
deleted file mode 100644
index 8d6f100..0000000
--- a/platform/wavelet3d/gfx_clz.sv
+++ /dev/null
@@ -1,68 +0,0 @@
-/* Implementación en árbol de count leading zeros (CLZ).
- * WIDTH debe ser una potencia de 2.
- */
-module gfx_clz
-#(int WIDTH = 0)
-(
- input logic clk,
-
- input logic[WIDTH - 1:0] value,
- output logic[$clog2(WIDTH):0] clz
-);
-
- genvar i;
- generate
- if (WIDTH <= 1) begin
- always_ff @(posedge clk)
- clz <= !value;
- end else if (WIDTH == 2) begin
- always_ff @(posedge clk)
- unique case (value)
- 2'b00: clz <= 2'b10;
- 2'b01: clz <= 2'b01;
- 2'b10: clz <= 2'b00;
- 2'b11: clz <= 2'b00;
- endcase
- end else if (WIDTH == 4) begin
- // Eficiente en FPGAs con 4-LUTs
- always_ff @(posedge clk)
- if (value[3])
- clz <= 3'b000;
- else if (value[2])
- clz <= 3'b001;
- else if (value[1])
- clz <= 3'b010;
- else if (value[0])
- clz <= 3'b011;
- else
- clz <= 3'b100;
- end else begin
- logic msb_right;
- logic[$clog2(WIDTH) - 1:0] clz_left, clz_right;
- logic[$clog2(WIDTH) - 2:0] tail_right;
-
- assign {msb_right, tail_right} = clz_right;
-
- gfx_clz #(WIDTH / 2) left
- (
- .clk(clk),
- .clz(clz_left),
- .value(value[WIDTH - 1:WIDTH / 2])
- );
-
- gfx_clz #(WIDTH / 2) right
- (
- .clk(clk),
- .clz(clz_right),
- .value(value[WIDTH / 2 - 1:0])
- );
-
- always_ff @(posedge clk)
- if (clz_left[$clog2(WIDTH) - 1])
- clz <= {msb_right, ~msb_right, tail_right};
- else
- clz <= {1'b0, clz_left};
- end
- endgenerate
-
-endmodule
diff --git a/platform/wavelet3d/gfx_ctz.sv b/platform/wavelet3d/gfx_ctz.sv
deleted file mode 100644
index 2713f8a..0000000
--- a/platform/wavelet3d/gfx_ctz.sv
+++ /dev/null
@@ -1,18 +0,0 @@
-// Count trailing zeros (ctz), clz al revés
-module gfx_ctz
-#(int WIDTH = 0)
-(
- input logic clk,
-
- input logic[WIDTH - 1:0] value,
- output logic[$clog2(WIDTH):0] ctz
-);
-
- gfx_clz #(WIDTH) clz
- (
- .clk,
- .value({<<{value}}),
- .clz(ctz)
- );
-
-endmodule
diff --git a/platform/wavelet3d/gfx_fifo.sv b/platform/wavelet3d/gfx_fifo.sv
deleted file mode 100644
index 7174e4d..0000000
--- a/platform/wavelet3d/gfx_fifo.sv
+++ /dev/null
@@ -1,102 +0,0 @@
-module gfx_fifo
-#(int WIDTH = 0,
- int DEPTH = 0)
-(
- input logic clk,
- rst_n,
-
- gfx_beats.rx in,
- gfx_beats.tx out
-);
-
- logic do_read, do_write, full_if_eq, in_stall, out_stall,
- may_read, may_write, read, read_ok, write;
-
- logic[WIDTH - 1:0] fifo[DEPTH], read_data, write_data;
- logic[$clog2(DEPTH) - 1:0] read_ptr, write_ptr;
-
- assign do_read = read & may_read;
- assign do_write = write & may_write;
-
- always_comb begin
- may_read = full_if_eq;
- may_write = !full_if_eq;
-
- if (read)
- may_write = 1;
-
- if (read_ptr != write_ptr) begin
- may_read = 1;
- may_write = 1;
- end
- end
-
- gfx_skid_flow in_flow
- (
- .clk,
- .rst_n,
- .stall(in_stall),
- .in_ready(in.ready),
- .in_valid(in.valid),
- .out_ready(may_write),
- .out_valid(write)
- );
-
- gfx_skid_flow out_flow
- (
- .clk,
- .rst_n,
- .stall(out_stall),
- .in_ready(read),
- .in_valid(read_ok),
- .out_ready(out.ready),
- .out_valid(out.valid)
- );
-
- gfx_skid_buf #(WIDTH) in_skid
- (
- .clk,
- .in(in.data),
- .out(write_data),
- .stall(in_stall)
- );
-
- gfx_skid_buf #(WIDTH) out_skid
- (
- .clk,
- .in(read_data),
- .out(out.data),
- .stall(out_stall)
- );
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- read_ok <= 0;
- read_ptr <= 0;
- write_ptr <= 0;
- full_if_eq <= 0;
- end else begin
- if (~out_stall)
- read_ok <= read && may_read;
-
- if (do_read)
- read_ptr <= read_ptr + 1;
-
- if (do_write)
- write_ptr <= write_ptr + 1;
-
- if (do_read & ~do_write)
- full_if_eq <= 0;
- else if (~do_read & do_write)
- full_if_eq <= 1;
- end
-
- always_ff @(posedge clk) begin
- if (~out_stall)
- read_data <= fifo[read_ptr];
-
- if (may_write)
- fifo[write_ptr] <= write_data;
- end
-
-endmodule
diff --git a/platform/wavelet3d/gfx_fixed_dotadd.sv b/platform/wavelet3d/gfx_fixed_dotadd.sv
deleted file mode 100644
index fdd5ffd..0000000
--- a/platform/wavelet3d/gfx_fixed_dotadd.sv
+++ /dev/null
@@ -1,55 +0,0 @@
-module gfx_fixed_dotadd
-(
- input logic clk,
-
- input gfx::fixed a0,
- b0,
- a1,
- b1,
- c,
- input logic stall,
-
- output gfx::fixed q
-);
-
- import gfx::*;
-
- fixed q0, a1_hold, b1_hold;
-
- gfx_fixed_muladd muladd_0
- (
- .clk,
- .a(a0),
- .b(b0),
- .c,
- .q(q0),
- .stall
- );
-
- gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(FIXED_MULADD_DEPTH)) a_pipes
- (
- .clk,
- .in(a1),
- .out(a1_hold),
- .stall
- );
-
- gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(FIXED_MULADD_DEPTH)) b_pipes
- (
- .clk,
- .in(b1),
- .out(b1_hold),
- .stall
- );
-
- gfx_fixed_muladd muladd_1
- (
- .clk,
- .a(a1_hold),
- .b(b1_hold),
- .c(q0),
- .q,
- .stall
- );
-
-endmodule
diff --git a/platform/wavelet3d/gfx_fixed_muladd.sv b/platform/wavelet3d/gfx_fixed_muladd.sv
deleted file mode 100644
index 22b7247..0000000
--- a/platform/wavelet3d/gfx_fixed_muladd.sv
+++ /dev/null
@@ -1,77 +0,0 @@
-module gfx_fixed_muladd
-(
- input logic clk,
-
- input gfx::fixed a,
- b,
- c,
- input logic stall,
-
- output gfx::fixed q
-);
-
- import gfx::*;
-
-`ifndef VERILATOR
- logic[2 * $bits(fixed) - $bits(fixed_frac) - 1:0] q_ext;
-
- assign q = q_ext[$bits(fixed) - 1:0];
-
- lpm_mult mult
- (
- .aclr(0),
- .clock(clk),
- .clken(!stall),
-
- .sum({c, {`FIXED_FRAC{1'b0}}}),
- .dataa(a),
- .datab(b),
- .result(q_ext)
- );
-
- defparam
- mult.lpm_widtha = $bits(fixed),
- mult.lpm_widthb = $bits(fixed),
- mult.lpm_widths = $bits(fixed) + $bits(fixed_frac),
- /* Esto es crucial. No está documentado en ningún lado (aparte de un
- * comentario en r/fpga). Si lpm_widthp < lpm_widtha + lpm_widthb,
- * entonces result contiene los lpm_widthp bits más significativos
- * del producto, no los menos significativos como tendría sentido.
- */
- mult.lpm_widthp = 2 * $bits(fixed) - $bits(fixed_frac),
- mult.lpm_representation = "SIGNED",
- mult.lpm_pipeline = FIXED_MULADD_DEPTH;
-`else
- logic[$bits(fixed) + $bits(fixed_frac) - 1:0] q_ext;
-
- fixed a_hold, b_hold, c_hold;
-
- assign q = q_ext[$bits(fixed) + $bits(fixed_frac) - 1:$bits(fixed_frac)] + c_hold;
- assign q_ext = a_hold * b_hold;
-
- gfx_pipes #(.WIDTH($bits(a)), .DEPTH(FIXED_MULADD_DEPTH)) a_pipes
- (
- .clk,
- .in(a),
- .out(a_hold),
- .stall
- );
-
- gfx_pipes #(.WIDTH($bits(b)), .DEPTH(FIXED_MULADD_DEPTH)) b_pipes
- (
- .clk,
- .in(b),
- .out(b_hold),
- .stall
- );
-
- gfx_pipes #(.WIDTH($bits(c)), .DEPTH(FIXED_MULADD_DEPTH)) c_pipes
- (
- .clk,
- .in(c),
- .out(c_hold),
- .stall
- );
-`endif
-
-endmodule
diff --git a/platform/wavelet3d/gfx_front_back.sv b/platform/wavelet3d/gfx_front_back.sv
deleted file mode 100644
index b768532..0000000
--- a/platform/wavelet3d/gfx_front_back.sv
+++ /dev/null
@@ -1,37 +0,0 @@
-interface gfx_front_back
-import gfx::*;;
-
- struct
- {
- wave_exec wave;
- fpint_op p0;
- mem_op p1;
- sfu_op p2;
- group_op p3;
- } execute;
-
- struct
- {
- logic valid;
- group_id group;
- } loop;
-
- shader_dispatch dispatch;
-
- modport front
- (
- input loop,
-
- output execute,
- dispatch
- );
-
- modport back
- (
- input execute,
- dispatch,
-
- output loop
- );
-
-endinterface
diff --git a/platform/wavelet3d/gfx_isa.sv b/platform/wavelet3d/gfx_isa.sv
deleted file mode 100644
index 7239478..0000000
--- a/platform/wavelet3d/gfx_isa.sv
+++ /dev/null
@@ -1,84 +0,0 @@
-package gfx_isa;
-
- typedef logic[3:0] sgpr_num;
- typedef logic[2:0] vgpr_num;
-
- typedef logic signed[7:0] pc_offset;
-
- typedef union packed
- {
- sgpr_num sgpr;
-
- struct packed
- {
- logic[$bits(sgpr_num) - $bits(vgpr_num) - 1:0] reserved;
- vgpr_num num;
- } vgpr;
- } xgpr_num;
-
- typedef struct packed
- {
- enum logic[1:0]
- {
- REGS_SVS = 2'b00,
- REGS_SSS = 2'b01,
- REGS_VVS = 2'b10,
- REGS_VVV = 2'b11
- } reg_mode;
-
- union packed
- {
- struct packed
- {
- logic b_is_imm;
-
- union packed
- {
- logic[12:0] imm;
-
- struct packed
- {
- logic from_consts;
- logic[7:0] reserved;
- xgpr_num r;
- } read;
- } b;
-
- xgpr_num ra,
- rd;
- } rr;
- } dst_src;
-
- logic reg_rev;
-
- union packed
- {
- struct packed
- {
- enum logic[4:0]
- {
- INSN_FPINT_MOV = 0,
- INSN_FPINT_FMUL = 1,
- INSN_FPINT_IMUL = 2,
- INSN_FPINT_FADD = 3,
- INSN_FPINT_RES4 = 4,
- INSN_FPINT_FMAX = 5,
- INSN_FPINT_RES6 = 6,
- INSN_FPINT_FMIN = 7,
- INSN_FPINT_RES8 = 8,
- INSN_FPINT_FCVT = 9,
- INSN_FPINT_RES[10:31]
- } op;
- } fpint;
- } by_class;
-
- enum logic[1:0]
- {
- INSN_FPINT = 0,
- INSN_MEM = 1,
- INSN_SFU = 2,
- INSN_GROUP = 3
- } insn_class;
- } insn_word;
-
-endpackage
diff --git a/platform/wavelet3d/gfx_pipes.sv b/platform/wavelet3d/gfx_pipes.sv
deleted file mode 100644
index 2fa875a..0000000
--- a/platform/wavelet3d/gfx_pipes.sv
+++ /dev/null
@@ -1,24 +0,0 @@
-module gfx_pipes
-#(int WIDTH=0, int DEPTH=0)
-(
- input logic clk,
-
- input logic[WIDTH - 1:0] in,
- input logic stall,
-
- output logic[WIDTH - 1:0] out
-);
-
- logic[WIDTH - 1:0] pipes[DEPTH];
-
- assign out = pipes[DEPTH - 1];
-
- always_ff @(posedge clk)
- if (~stall) begin
- pipes[0] <= in;
-
- for (integer i = 1; i < DEPTH; ++i)
- pipes[i] <= pipes[i - 1];
- end
-
-endmodule
diff --git a/platform/wavelet3d/gfx_pkg.sv b/platform/wavelet3d/gfx_pkg.sv
deleted file mode 100644
index 7072967..0000000
--- a/platform/wavelet3d/gfx_pkg.sv
+++ /dev/null
@@ -1,271 +0,0 @@
-package gfx;
-
- typedef logic[31:0] word;
-
- typedef word uword;
- typedef logic signed[$bits(word) - 1:0] sword;
- typedef logic[$bits(word) / 2 - 1:0] uhword;
- typedef logic signed[$bits(word) / 2 - 1:0] shword;
- typedef logic[2 * $bits(word) - 1:0] udword;
- typedef logic signed[2 * $bits(word) - 1:0] sdword;
- typedef logic signed[4 * $bits(word) - 1:0] qword;
- typedef logic signed[8 * $bits(word) - 1:0] oword;
-
- localparam int SUBWORD_BITS = $clog2($bits(word)) - $clog2($bits(byte));
- localparam int BYTES_PER_WORD = 1 << SUBWORD_BITS;
-
- typedef logic[$bits(word) - SUBWORD_BITS - 1:0] word_ptr;
- typedef logic[$bits(word_ptr) - 1 - 1:0] dword_ptr;
- typedef logic[$bits(word_ptr) - 2 - 1:0] qword_ptr;
- typedef logic[$bits(word_ptr) - 3 - 1:0] oword_ptr;
-
- typedef logic[7:0] float_exp;
- typedef logic[$bits(word) - $bits(float_exp) - 2:0] float_mant;
- typedef logic[$bits(float_mant):0] float_mant_full; // Incluye '1.' explícito
- typedef logic[$bits(float_mant_full) + 1:0] float_mant_ext; // Considera overflow
-
- localparam float_exp FLOAT_EXP_BIAS = (1 << ($bits(float_exp) - 1)) - 1;
- localparam float_exp FLOAT_EXP_MAX = {($bits(float_exp)){1'b1}};
-
- function float_mant_full full_mant(float_mant in);
- full_mant = {1'b1, in};
- endfunction
-
- function float_mant implicit_mant(float_mant_full in);
- assert (in[$bits(in) - 1]);
- implicit_mant = in[$bits(in) - 2:0];
- endfunction
-
- typedef struct packed
- {
- logic sign;
- float_exp exp;
- float_mant mant;
- } float;
-
- /* Explicación de guard, round, sticky:
- * https://drilian.com/2023/01/10/floating-point-numbers-and-rounding/
- */
- typedef struct packed
- {
- float normal;
- logic slow,
- zero,
- guard,
- round,
- sticky;
- } float_round;
-
- typedef struct packed
- {
- logic exp_max,
- exp_min,
- mant_zero;
- } float_class;
-
- function float_class classify_float(float in);
- classify_float.exp_max = &in.exp;
- classify_float.exp_min = ~|in.exp;
- classify_float.mant_zero = ~|in.mant;
- endfunction
-
- function logic is_float_special(float_class in);
- is_float_special = in.exp_max | (in.exp_min & ~in.mant_zero);
- endfunction
-
- function float_mant_ext float_prepare_round(float in, float_class in_class);
- float_prepare_round = {~in_class.exp_min, in.mant, 2'b00};
- endfunction
-
- typedef struct packed
- {
- logic setup_mul_float,
- setup_unit_b,
- mnorm_put_hi,
- mnorm_put_lo,
- mnorm_put_mul,
- mnorm_zero_b,
- mnorm_zero_flags,
- minmax_abs,
- minmax_swap,
- minmax_zero_min,
- minmax_copy_flags,
- shiftr_int_signed,
- addsub_copy_flags,
- addsub_int_operand,
- clz_force_nop,
- shiftl_copy_flags,
- round_copy_flags,
- round_enable,
- encode_enable,
- writeback;
- } fpint_op;
-
- typedef struct packed
- {
- logic todo;
- } mem_op;
-
- typedef struct packed
- {
- logic todo;
- } sfu_op;
-
- typedef struct packed
- {
- logic todo;
- } group_op;
-
- // Q22.10
- typedef logic[9:0] fixed_frac;
- typedef logic[$bits(word) - $bits(fixed_frac) - 1:0] fixed_int;
-
- typedef struct packed signed
- {
- fixed_int fint; // 'int' es una keyword
- fixed_frac frac;
- } fixed;
-
- typedef struct packed
- {
- fixed x,
- y;
- } fixed_xy;
-
- typedef struct packed
- {
- fixed a,
- b,
- c;
- } vtx_fixed;
-
- typedef struct packed
- {
- fixed_xy a,
- b,
- c;
- } vtx_xy;
-
- localparam int RASTER_BITS = 2;
- localparam int RASTER_SUB_BITS = 4;
- localparam int RASTER_SIZE = 1 << RASTER_BITS;
- localparam int RASTER_COARSE_FRAGS = RASTER_SIZE * RASTER_SIZE;
-
- typedef logic[RASTER_BITS - 1:0] raster_index;
-
- // Caso RASTER_BITS = 2: -> 4,4,4,4 -> 8,8-> 16
- localparam int RASTER_OUT_CLZ_DEPTH = 3;
-
- // Asume RASTER_BITS == 2, hay que ajustarlo si cambia
- typedef struct packed
- {
- // Esto ahorra muchos flops
- //
- // offsets[0] = inc * 0 = 0
- // offsets[1] = inc * 1 = raster2_times1
- // offsets[2] = inc * 2 = raster2_times1 << 1
- // offsets[3] = inc * 3 = raster2_times3
- fixed raster2_times1,
- raster2_times3;
- } raster_offsets;
-
- function fixed raster_idx(raster_offsets offsets, raster_index idx);
- unique case (idx)
- RASTER_BITS'(0):
- return '0;
-
- RASTER_BITS'(1):
- return offsets.raster2_times1;
-
- RASTER_BITS'(2):
- return offsets.raster2_times1 << 1;
-
- RASTER_BITS'(3):
- return offsets.raster2_times3;
- endcase
- endfunction
-
- function raster_offsets make_raster_offsets(fixed inc);
- make_raster_offsets.raster2_times1 = inc;
- make_raster_offsets.raster2_times3 = inc + (inc << 1);
- endfunction
-
- typedef struct packed
- {
- raster_offsets x,
- y;
- } raster_offsets_xy;
-
- typedef struct packed
- {
- logic[RASTER_SUB_BITS - 1:0] num;
- logic[$bits(fixed_frac) - RASTER_SUB_BITS - 1:0] prec;
- } raster_sub;
-
- localparam int RASTER_COARSE_DIM_BITS = $bits(fixed) - $bits(raster_index) - $bits(raster_sub);
-
- typedef logic signed[RASTER_COARSE_DIM_BITS - 1:0] raster_coarse_dim;
-
- typedef struct packed
- {
- raster_coarse_dim x,
- y;
- } raster_coarse_xy;
-
- typedef struct packed signed
- {
- raster_coarse_dim coarse;
- raster_index fine;
- raster_sub sub;
- } raster_prec;
-
- typedef struct packed
- {
- raster_prec x,
- y;
- } raster_prec_xy;
-
- // Definir el número de lanes a partir de las dimensiones del
- // rasterizer es una decisión crucial, el diseño entero depende de esto
-
- localparam int SHADER_LANES = RASTER_COARSE_FRAGS;
-
- typedef logic[RASTER_SIZE - 1:0] lane_no;
- typedef logic[SHADER_LANES - 1:0] lane_mask;
-
- typedef logic[5:0] group_id;
-
- localparam int REGFILE_STAGES = 3;
- localparam int REG_READ_STAGES = 2 + REGFILE_STAGES + 1;
-
- typedef gfx_isa::sgpr_num sgpr_num;
- typedef gfx_isa::vgpr_num vgpr_num;
- typedef gfx_isa::xgpr_num xgpr_num;
- typedef gfx_isa::pc_offset pc_offset;
-
- typedef struct packed
- {
- // No incluye p0 porque p0 no tiene señal ready
- logic p1,
- p2,
- p3,
- valid;
- } shader_dispatch;
-
- typedef struct
- {
- group_id group;
- xgpr_num dest;
- logic dest_scalar;
- } wave_exec;
-
- localparam int FIXED_MULADD_DEPTH = 5;
- localparam int FIXED_DOTADD_DEPTH = 2 * FIXED_MULADD_DEPTH;
-
- localparam word BOOTROM_BASE = 32'h0010_0000;
-
- localparam int SCHED_BRAM_WORDS = 2048; // 8KiB
-
- typedef word irq_lines;
-
-endpackage
diff --git a/platform/wavelet3d/gfx_pkts.sv b/platform/wavelet3d/gfx_pkts.sv
deleted file mode 100644
index 41399ce..0000000
--- a/platform/wavelet3d/gfx_pkts.sv
+++ /dev/null
@@ -1,29 +0,0 @@
-interface gfx_pkts
-#(parameter int WIDTH = $bits(gfx::word));
-
- import gfx::*;
-
- logic tlast;
- logic tready;
- logic tvalid;
- logic[WIDTH - 1:0] tdata;
-
- modport tx
- (
- input tready,
-
- output tdata,
- tlast,
- tvalid
- );
-
- modport rx
- (
- input tdata,
- tlast,
- tvalid,
-
- output tready
- );
-
-endinterface
diff --git a/platform/wavelet3d/gfx_raster.sv b/platform/wavelet3d/gfx_raster.sv
deleted file mode 100644
index a57a672..0000000
--- a/platform/wavelet3d/gfx_raster.sv
+++ /dev/null
@@ -1,930 +0,0 @@
-module gfx_raster
-(
- input logic clk,
- rst_n,
-
- gfx_pkts.rx geometry,
-
- gfx_pkts.tx coverage
-);
-
- import gfx::*;
-
- gfx_raster_bounds setup_bounds
- (
- .clk,
- .rst_n,
-
- .geometry,
-
- .edges_ref(bounds_edges_ref),
- .edges_vtx(bounds_edges_vtx),
- .edges_span(bounds_edges_span),
- .edges_ready(bounds_edges_ready),
- .edges_valid(bounds_edges_valid),
- .edges_geom_id(bounds_edges_geom_id)
- );
-
- word bounds_edges_geom_id;
- logic bounds_edges_ready, bounds_edges_valid;
- vtx_xy bounds_edges_vtx;
- fixed_xy bounds_edges_ref;
- raster_prec_xy bounds_edges_span;
-
- gfx_raster_edges setup_edges
- (
- .clk,
- .rst_n,
-
- .bounds_ref(bounds_edges_ref),
- .bounds_vtx(bounds_edges_vtx),
- .bounds_span(bounds_edges_span),
- .bounds_ready(bounds_edges_ready),
- .bounds_valid(bounds_edges_valid),
- .bounds_geom_id(bounds_edges_geom_id),
-
- .coarse_ref(edges_coarse_ref),
- .coarse_base(edges_coarse_base),
- .coarse_span(edges_coarse_span),
- .coarse_ready(edges_coarse_ready),
- .coarse_valid(edges_coarse_valid),
- .coarse_geom_id(edges_coarse_geom_id),
- .coarse_offsets(edges_coarse_offsets)
- );
-
- word edges_coarse_geom_id;
- fixed edges_coarse_base;
- logic edges_coarse_ready, edges_coarse_valid;
- fixed_xy edges_coarse_ref;
- raster_prec_xy edges_coarse_span;
- raster_offsets_xy edges_coarse_offsets;
-
- gfx_raster_coarse coarse
- (
- .clk,
- .rst_n,
-
- .edges_ref(edges_coarse_ref),
- .edges_base(edges_coarse_base),
- .edges_span(edges_coarse_span),
- .edges_ready(edges_coarse_ready),
- .edges_valid(edges_coarse_valid),
- .edges_geom_id(edges_coarse_geom_id),
- .edges_offsets(edges_coarse_offsets),
-
- .fine_ref(coarse_fine_ref),
- .fine_ready(coarse_fine_ready),
- .fine_valid(coarse_fine_valid),
- .fine_corner(coarse_fine_corner),
- .fine_geom_id(coarse_fine_geom_id),
- .fine_offsets(coarse_fine_offsets)
- );
-
- word coarse_fine_geom_id;
- fixed coarse_fine_corner;
- logic coarse_fine_ready, coarse_fine_valid;
- fixed_xy coarse_fine_ref;
- raster_offsets_xy coarse_fine_offsets;
-
- gfx_raster_fine fine
- (
- .clk,
- .rst_n,
-
- .coarse_ref(coarse_fine_ref),
- .coarse_ready(coarse_fine_ready),
- .coarse_valid(coarse_fine_valid),
- .coarse_corner(coarse_fine_corner),
- .coarse_geom_id(coarse_fine_geom_id),
- .coarse_offsets(coarse_fine_offsets),
-
- .coverage
- );
-
-endmodule
-
-module gfx_raster_bounds
-(
- input logic clk,
- rst_n,
-
- gfx_pkts.rx geometry,
-
- input logic edges_ready,
- output logic edges_valid,
- output gfx::word edges_geom_id,
- output gfx::fixed_xy edges_ref,
- output gfx::raster_prec_xy edges_span,
- output gfx::vtx_xy edges_vtx
-);
-
- import gfx::*;
-
- enum int unsigned
- {
- IN_GEOM_ID,
- IN_DIM_X,
- IN_DIM_Y
- } in_state;
-
- enum int unsigned
- {
- VTX_A,
- VTX_B,
- VTX_C
- } vtx_state;
-
- logic a_lt_b, a_lt_c, b_lt_c, edges_handshake, geom_complete, geom_last,
- geom_recv, in_vtx, next_dim, new_vtx;
-
- logic end_new_dim, end_valid, vtx_valid, lt_new_dim, lt_valid, minmax_new_dim, minmax_valid;
-
- fixed geom_data;
- vtx_fixed dim_vtx, dim_vtx_x, dim_vtx_y;
- raster_prec max, min;
-
- assign geom_recv = geometry.tready & geometry.tvalid;
- assign edges_handshake = edges_valid & edges_ready;
-
- assign edges_vtx.a.x = dim_vtx_x.a;
- assign edges_vtx.a.y = dim_vtx_y.a;
- assign edges_vtx.b.x = dim_vtx_x.b;
- assign edges_vtx.b.y = dim_vtx_y.b;
- assign edges_vtx.c.x = dim_vtx_x.c;
- assign edges_vtx.c.y = dim_vtx_y.c;
-
- assign geometry.tready = edges_handshake | ~geom_complete;
-
- always_comb begin
- unique case (vtx_state)
- VTX_C: next_dim = geom_recv;
- default: next_dim = 0;
- endcase
-
- unique case (in_state)
- IN_DIM_Y: geom_last = next_dim;
- default: geom_last = 0;
- endcase
- end
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- in_state <= IN_GEOM_ID;
- vtx_state <= VTX_A;
-
- in_vtx <= 0;
- new_vtx <= 0;
- geom_complete <= 0;
-
- lt_valid <= 0;
- end_valid <= 0;
- vtx_valid <= 0;
- edges_valid <= 0;
- minmax_valid <= 0;
-
- lt_new_dim <= 0;
- end_new_dim <= 0;
- minmax_new_dim <= 0;
-
- edges_geom_id <= 'x;
- end else begin
- end_valid <= 0;
- vtx_valid <= end_valid;
- lt_valid <= vtx_valid;
- minmax_valid <= lt_valid;
-
- if (~edges_valid | edges_ready)
- edges_valid <= minmax_valid;
-
- geom_complete <= (geom_complete | geom_last) & ~edges_handshake;
-
- unique case (in_state)
- IN_GEOM_ID:
- if (geom_recv) begin
- in_state <= IN_DIM_X;
-
- in_vtx <= 1;
- edges_geom_id <= geometry.tdata;
- end
-
- IN_DIM_X:
- if (next_dim)
- in_state <= IN_DIM_Y;
-
- IN_DIM_Y:
- if (next_dim) begin
- in_state <= IN_GEOM_ID;
-
- in_vtx <= 0;
- end_valid <= 1;
- end
- endcase
-
- new_vtx <= 0;
-
- lt_new_dim <= 0;
- minmax_new_dim <= lt_new_dim;
- end_new_dim <= minmax_new_dim;
-
- unique case (vtx_state)
- VTX_A: begin
- if (in_vtx & geom_recv) begin
- new_vtx <= 1;
- vtx_state <= VTX_B;
- end
-
- if (new_vtx) begin
- dim_vtx.c <= geom_data;
- lt_new_dim <= 1;
- end
- end
-
- VTX_B: begin
- if (geom_recv) begin
- new_vtx <= 1;
- vtx_state <= VTX_C;
- end
-
- if (new_vtx)
- dim_vtx.a <= geom_data;
- end
-
- VTX_C: begin
- if (geom_recv) begin
- new_vtx <= 1;
- vtx_state <= VTX_A;
- end
-
- if (new_vtx)
- dim_vtx.b <= geom_data;
- end
- endcase
-
- if (in_state == IN_DIM_Y & next_dim)
- assert (geometry.tlast);
- end
-
- always_ff @(posedge clk) begin
- geom_data <= geometry.tdata;
-
- a_lt_b <= $signed(dim_vtx.a) < $signed(dim_vtx.b);
- a_lt_c <= $signed(dim_vtx.a) < $signed(dim_vtx.c);
- b_lt_c <= $signed(dim_vtx.b) < $signed(dim_vtx.c);
-
- // Realmente no son 'x' o 'y' hasta cuando edges_valid = 1
- if (lt_new_dim) begin
- dim_vtx_y <= dim_vtx;
- dim_vtx_x <= dim_vtx_y;
- end
-
- if (a_lt_b) begin
- min <= a_lt_c ? dim_vtx_y.a : dim_vtx_y.c;
- max <= b_lt_c ? dim_vtx_y.c : dim_vtx_y.b;
- end else begin
- min <= b_lt_c ? dim_vtx_y.b : dim_vtx_y.c;
- max <= a_lt_c ? dim_vtx_y.c : dim_vtx_y.a;
- end
-
- {min.fine, min.sub} <= '0;
- {max.fine, max.sub} <= '0;
-
- if (end_new_dim) begin
- edges_ref.y <= min;
- edges_ref.x <= edges_ref.y;
-
- edges_span.y <= max - min;
- edges_span.x <= edges_span.y;
- end
- end
-
-endmodule
-
-module gfx_raster_edges
-(
- input logic clk,
- rst_n,
-
- input logic bounds_valid,
- input gfx::word bounds_geom_id,
- input gfx::fixed_xy bounds_ref,
- input gfx::raster_prec_xy bounds_span,
- input gfx::vtx_xy bounds_vtx,
- output logic bounds_ready,
-
- input logic coarse_ready,
- output logic coarse_valid,
- output gfx::word coarse_geom_id,
- output gfx::fixed_xy coarse_ref,
- output gfx::raster_prec_xy coarse_span,
- output gfx::fixed coarse_base,
- output gfx::raster_offsets_xy coarse_offsets
-);
-
- import gfx::*;
-
- enum int unsigned
- {
- EDGE_AB,
- EDGE_BC,
- EDGE_CA,
- // EDGE_CA cumple doble función como OFFSETS_AB
- OFFSETS_BC,
- OFFSETS_CA,
- OUT
- } state;
-
- struct
- {
- fixed_xy cur,
- delay1,
- delay2;
- } inc;
-
- logic coarse_handshake, coarse_stall, offsets_flow;
- fixed_xy delta, p, q;
-
- // - 2 porque coarse valid va al final
- logic[FIXED_DOTADD_DEPTH - 2:0] dotadd_valid;
-
- assign coarse_stall = coarse_valid & ~coarse_ready;
- assign coarse_handshake = coarse_valid & coarse_ready;
-
- gfx_fixed_dotadd edge_base
- (
- .clk,
- .c(0),
- .q(coarse_base),
- .a0(delta.x),
- .b0(inc.cur.x),
- .a1(delta.y),
- .b1(inc.cur.y),
- .stall(coarse_stall)
- );
-
- always_comb
- unique case (state)
- OUT: offsets_flow = coarse_handshake;
- default: offsets_flow = 1;
- endcase
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- state <= EDGE_AB;
-
- p <= 'x;
- q <= 'x;
- coarse_ref <= 'x;
- coarse_geom_id <= 'x;
-
- bounds_ready <= 0;
- coarse_valid <= 0;
-
- for (int i = 0; i < $bits(dotadd_valid) - 1; ++i)
- dotadd_valid[i] <= 0;
- end else begin
- for (int i = 1; i < $bits(dotadd_valid); ++i)
- dotadd_valid[i] <= dotadd_valid[i - 1];
-
- if (~coarse_stall)
- coarse_valid <= dotadd_valid[$bits(dotadd_valid) - 1];
-
- bounds_ready <= 0;
- dotadd_valid[0] <= 0;
-
- unique case (state)
- EDGE_AB: begin
- if (bounds_valid)
- state <= EDGE_BC;
-
- coarse_ref <= bounds_ref;
- coarse_span <= bounds_span;
- coarse_geom_id <= bounds_geom_id;
-
- p <= bounds_vtx.a;
- q <= bounds_vtx.b;
- end
-
- EDGE_BC: begin
- state <= EDGE_CA;
- bounds_ready <= 1;
-
- p <= bounds_vtx.b;
- q <= bounds_vtx.c;
- end
-
- EDGE_CA: begin
- state <= OFFSETS_BC;
-
- p <= bounds_vtx.c;
- q <= bounds_vtx.a;
-
- // Esto ocurre justamente en un momento en que ab, bc, ca
- // quedan todos en sus lugares correctos en la pipeline
- dotadd_valid[0] <= 1;
- end
-
- OFFSETS_BC:
- state <= OFFSETS_CA;
-
- OFFSETS_CA:
- state <= OUT;
-
- OUT:
- if (coarse_handshake)
- state <= EDGE_AB;
- endcase
- end
-
- always_ff @(posedge clk) begin
- delta.x <= coarse_ref.x - q.x;
- delta.y <= coarse_ref.y - q.y;
-
- inc.cur.x <= p.y - q.y;
- inc.cur.y <= q.x - p.x;
-
- //TODO: top-left rule
- if (offsets_flow) begin
- inc.delay1 <= inc.cur;
- inc.delay2 <= inc.delay1;
-
- coarse_offsets.x <= make_raster_offsets(inc.delay2.x);
- coarse_offsets.y <= make_raster_offsets(inc.delay2.y);
- end
- end
-
-endmodule
-
-module gfx_raster_coarse
-(
- input logic clk,
- rst_n,
-
- input logic edges_valid,
- input gfx::word edges_geom_id,
- input gfx::fixed_xy edges_ref,
- input gfx::raster_prec_xy edges_span,
- input gfx::fixed edges_base,
- input gfx::raster_offsets_xy edges_offsets,
- output logic edges_ready,
-
- input logic fine_ready,
- output logic fine_valid,
- output gfx::word fine_geom_id,
- output gfx::fixed_xy fine_ref,
- output gfx::fixed fine_corner,
- output gfx::raster_offsets_xy fine_offsets
-);
-
- import gfx::*;
-
- enum int unsigned
- {
- SETUP,
- TEST_AB,
- TEST_BC,
- TEST_CA,
- OUT
- } state;
-
- struct
- {
- fixed cur,
- next,
- prev;
- } corner, edge_fn, vertical;
-
- struct
- {
- raster_offsets_xy cur,
- next,
- prev;
- } offsets;
-
- logic edges_recv, end_block, end_x, end_y, first_run,
- mask, mask_reset, new_geom, test_flow, out_flow;
-
- fixed edge_test, reference_x, vertical_inc;
- fixed_xy max_offset, min_offset, test_offset;
- raster_coarse_xy stride;
- raster_coarse_dim width;
- raster_offsets_xy next_offsets;
-
- function fixed coarse_offset(raster_offsets offsets);
- return raster_idx(offsets, RASTER_BITS'(1)) << RASTER_BITS;
- endfunction
-
- assign end_x = stride.x == '0;
- assign end_y = stride.y == '0;
- assign end_block = end_x & end_y;
-
- assign edge_test = edge_fn.cur + test_offset.x + test_offset.y;
- assign vertical_inc = vertical.cur + coarse_offset(offsets.cur.y);
-
- assign fine_corner = corner.cur;
- assign fine_offsets = offsets.cur; // Vuelve a cur luego de 3 ciclos
-
- assign min_offset.x = raster_idx(next_offsets.x, RASTER_BITS'(0));
- assign min_offset.y = raster_idx(next_offsets.y, RASTER_BITS'(0));
- assign max_offset.x = raster_idx(next_offsets.x, RASTER_BITS'(RASTER_SIZE - 1));
- assign max_offset.y = raster_idx(next_offsets.y, RASTER_BITS'(RASTER_SIZE - 1));
- assign next_offsets = edges_recv ? edges_offsets : offsets.next;
-
- always_comb begin
- unique case (state)
- SETUP: new_geom = 1;
- default: new_geom = 0;
- endcase
-
- unique case (state)
- TEST_AB: mask_reset = 1;
- default: mask_reset = 0;
- endcase
-
- unique case (state)
- SETUP: edges_ready = 1;
- default: edges_ready = 0;
- endcase
-
- unique case (state)
- SETUP:
- edges_recv = 1;
-
- TEST_AB, TEST_BC:
- edges_recv = first_run;
-
- default:
- edges_recv = 0;
- endcase
-
- unique case (state)
- OUT: fine_valid = mask;
- default: fine_valid = 0;
- endcase
-
- unique case (state)
- OUT: begin
- out_flow = ~mask | fine_ready;
- test_flow = 0;
- end
-
- default: begin
- out_flow = 0;
- test_flow = 1;
- end
- endcase
- end
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- state <= SETUP;
- first_run <= 1;
- end else
- unique case (state)
- SETUP:
- if (edges_valid)
- state <= TEST_AB;
-
- TEST_AB:
- state <= TEST_BC;
-
- TEST_BC:
- state <= TEST_CA;
-
- TEST_CA:
- state <= OUT;
-
- OUT: begin
- first_run <= end_block;
- if (out_flow)
- state <= end_block ? SETUP : TEST_AB;
- end
- endcase
-
- always_ff @(posedge clk) begin
- if (new_geom) begin
- width <= edges_span.x.coarse;
- stride.x <= edges_span.x.coarse;
- stride.y <= edges_span.y.coarse;
- reference_x <= edges_ref.x;
-
- fine_ref <= edges_ref;
- fine_geom_id <= edges_geom_id;
- end
-
- if (out_flow) begin
- stride.x <= stride.x - 1;
- fine_ref.x.fint <= fine_ref.x.fint + ($bits(fixed_int))'(RASTER_SIZE);
-
- if (end_x) begin
- fine_ref.x <= reference_x;
- fine_ref.y.fint <= fine_ref.y.fint + ($bits(fixed_int))'(RASTER_SIZE);
-
- stride.x <= width;
- stride.y <= stride.y - 1;
- end
- end
-
- if (test_flow) begin
- offsets.cur <= next_offsets;
- offsets.next <= offsets.prev;
- offsets.prev <= offsets.cur;
-
- vertical.cur <= vertical.next;
- vertical.next <= vertical.prev;
- vertical.prev <= vertical.cur;
-
- edge_fn.cur <= edge_fn.next;
- edge_fn.next <= edge_fn.prev;
- edge_fn.prev <= edge_fn.cur + coarse_offset(offsets.cur.x);
-
- if (end_x) begin
- edge_fn.prev <= vertical_inc;
- vertical.prev <= vertical_inc;
- end
-
- corner.cur <= corner.next;
- corner.next <= corner.prev;
- corner.prev <= edge_fn.cur;
-
- if (coarse_offset(next_offsets.x) >= 'sd0)
- test_offset.x <= max_offset.x;
- else
- test_offset.x <= min_offset.x;
-
- if (coarse_offset(next_offsets.y) >= 'sd0)
- test_offset.y <= max_offset.y;
- else
- test_offset.y <= min_offset.y;
-
- mask <= (mask | mask_reset) & 1/*(edge_test >= 'sd0)*/;
- end
-
- if (edges_recv) begin
- edge_fn.cur <= edges_base;
- vertical.cur <= edges_base;
- end
- end
-
-endmodule
-
-module gfx_raster_fine
-(
- input logic clk,
- rst_n,
-
- input logic coarse_valid,
- input gfx::word coarse_geom_id,
- input gfx::fixed_xy coarse_ref,
- input gfx::fixed coarse_corner,
- input gfx::raster_offsets_xy coarse_offsets,
- output logic coarse_ready,
-
- gfx_pkts.tx coverage
-);
-
- import gfx::*;
-
- enum int unsigned
- {
- IN_C,
- IN_A,
- IN_B,
- IN_MASK
- } in_state;
-
- enum int unsigned
- {
- OUT_ACCEPT,
- OUT_GEOM_ID,
- OUT_POS,
- OUT_MASK,
- OUT_BARY_C,
- OUT_BARY_A,
- OUT_BARY_B
- } out_state;
-
- struct
- {
- fixed cur,
- next,
- prev;
- } corner;
-
- struct
- {
- raster_offsets_xy cur,
- next,
- prev;
- } offsets;
-
- logic begin_bary, hold_block, in_valid, mask_in_clean,
- mask_in_reset, new_block, out_last;
-
- word geom_id;
- fixed bary_coord;
- lane_no lane, lane_ctz, lane_hold;
- fixed_xy block_ref;
- lane_mask mask_in, mask, mask_ctz;
- raster_index lane_x, lane_y;
- logic[$bits(lane_ctz):0] ctz_count;
-
- function shword ref_half(raster_prec dim);
- return dim.coarse[$bits(shword) - 1:0];
- endfunction
-
- assign lane_ctz = ctz_count[$bits(lane_ctz) - 1:0];
- assign in_valid = mask_in_clean & |mask_in;
- assign out_last = ~|mask;
- assign {lane_y, lane_x} = lane;
-
- // **IMPORTANTE**: Esto va a fallar a partir de RASTER_BITS >= 3,
- // ya que la fsm asume que ctz termina en 3 ciclos o menos
-
- gfx_ctz #(RASTER_COARSE_FRAGS) ctz
- (
- .clk,
- .value(mask_ctz),
- .ctz(ctz_count)
- );
-
- always_comb begin
- unique case (out_state)
- OUT_ACCEPT: new_block = 1;
- default: new_block = 0;
- endcase
-
- unique case (out_state)
- OUT_ACCEPT: mask_ctz = mask_in;
- default: mask_ctz = mask;
- endcase
-
- unique case (out_state)
- OUT_ACCEPT: coverage.tvalid = 0;
- default: coverage.tvalid = 1;
- endcase
-
- unique case (out_state)
- OUT_MASK, OUT_BARY_B:
- begin_bary = coverage.tready;
-
- default:
- begin_bary = 0;
- endcase
-
- unique case (out_state)
- OUT_BARY_B: coverage.tlast = out_last;
- default: coverage.tlast = 0;
- endcase
-
- unique case (out_state)
- OUT_GEOM_ID:
- coverage.tdata = geom_id;
-
- OUT_POS:
- coverage.tdata = {ref_half(coarse_ref.y), ref_half(block_ref.x)};
-
- OUT_MASK:
- coverage.tdata = {{($bits(word) - $bits(mask)){1'b0}}, mask};
-
- OUT_BARY_C, OUT_BARY_A, OUT_BARY_B:
- coverage.tdata = bary_coord;
-
- default:
- coverage.tdata = 'x;
- endcase
-
- unique case (out_state)
- OUT_MASK:
- lane = lane_ctz;
-
- default:
- lane = lane_hold;
- endcase
-
- unique case (in_state)
- IN_C: coarse_ready = new_block;
- default: coarse_ready = 0;
- endcase
-
- unique case (in_state)
- IN_C: hold_block = new_block;
- IN_A: hold_block = 1;
- IN_B: hold_block = 1;
- IN_MASK: hold_block = 0;
- endcase
-
- unique case (in_state)
- IN_C: mask_in_reset = 1;
- default: mask_in_reset = 0;
- endcase
-
- unique case (in_state)
- IN_MASK: mask_in_clean = 1;
- default: mask_in_clean = 0;
- endcase
- end
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- in_state <= IN_C;
- out_state <= OUT_ACCEPT;
- end else begin
- unique case (in_state)
- IN_C:
- if (coarse_valid & new_block)
- in_state <= IN_A;
-
- IN_A:
- in_state <= IN_B;
-
- IN_B:
- in_state <= IN_MASK;
-
- IN_MASK:
- in_state <= IN_C;
- endcase
-
- unique case (out_state)
- OUT_ACCEPT:
- if (in_valid)
- out_state <= OUT_GEOM_ID;
-
- OUT_GEOM_ID:
- if (coverage.tready)
- out_state <= OUT_POS;
-
- OUT_POS:
- if (coverage.tready)
- out_state <= OUT_MASK;
-
- OUT_MASK:
- if (coverage.tready)
- out_state <= OUT_BARY_C;
-
- OUT_BARY_C:
- if (coverage.tready)
- out_state <= OUT_BARY_A;
-
- OUT_BARY_A:
- if (coverage.tready)
- out_state <= OUT_BARY_B;
-
- OUT_BARY_B:
- if (coverage.tready)
- out_state <= out_last ? OUT_ACCEPT : OUT_BARY_C;
- endcase
- end
-
- always_ff @(posedge clk) begin
- // Prueba paralela de signos, esto hace el heavy lifting de fine raster
- // Nótese que muchos sumadores serán eliminados en síntesis
- for (int i = 0; i < RASTER_SIZE; ++i)
- for (int j = 0; j < RASTER_SIZE; ++j)
- mask_in[i * RASTER_SIZE + j] <=
- (mask_in[i * RASTER_SIZE + j] | mask_in_reset)
- & (coarse_corner
- + raster_idx(coarse_offsets.y, RASTER_BITS'(i))
- + raster_idx(coarse_offsets.x, RASTER_BITS'(j))
- >= 'sd0);
-
- // Recalculamos las coordenadas baricéntricas de cada fragmento que
- // no haya sido descartado. La razón de esto es evitar almacenar y
- // luego multiplexar las coordenadas de un bloque entero (48 words).
- if (coverage.tready)
- bary_coord <= corner.next
- + raster_idx(offsets.next.y, RASTER_BITS'(lane_y))
- + raster_idx(offsets.next.x, RASTER_BITS'(lane_x));
-
- if (new_block & mask_in_reset) begin
- geom_id <= coarse_geom_id;
- block_ref <= coarse_ref;
- end
-
- // new_block = 0 => coverage.tvalid = 1
- if (new_block | coverage.tready) begin
- corner.cur <= corner.next;
- corner.next <= corner.prev;
- corner.prev <= corner.cur;
-
- offsets.cur <= offsets.next;
- offsets.next <= offsets.prev;
- offsets.prev <= offsets.cur;
- end
-
- if (hold_block) begin
- // Para prev en vez de cur para que los primeros valores queden en
- // cur justamente al llegar a OUT_BARY_C
- corner.prev <= coarse_corner;
- offsets.prev <= coarse_offsets;
- end
-
- if (new_block)
- mask <= mask_in;
-
- if (begin_bary) begin
- mask <= mask & (mask - 1);
- lane_hold <= lane_ctz;
- end
- end
-
-endmodule
diff --git a/platform/wavelet3d/gfx_regfile_io.sv b/platform/wavelet3d/gfx_regfile_io.sv
deleted file mode 100644
index 2459049..0000000
--- a/platform/wavelet3d/gfx_regfile_io.sv
+++ /dev/null
@@ -1,106 +0,0 @@
-interface gfx_regfile_io;
-
- import gfx::*;
-
- struct
- {
- group_id group;
- sgpr_num a_sgpr,
- b_sgpr;
- vgpr_num a_vgpr,
- b_vgpr;
- logic[12:0] b_imm;
- logic a_scalar,
- b_scalar,
- b_is_imm,
- b_is_const,
- scalar_rev;
- } op;
-
- struct
- {
- logic write;
- group_id group;
- sgpr_num sgpr;
- word data;
- } sgpr_write;
-
- struct
- {
- lane_mask mask;
- group_id group;
- vgpr_num vgpr;
- word data[SHADER_LANES];
- } vgpr_write;
-
- word a[SHADER_LANES], b[SHADER_LANES], sgpr_write_data, vgpr_write_data[SHADER_LANES];
- logic mask_wb_write, pc_wb_write;
- word_ptr pc_back, pc_front, pc_wb;
- group_id mask_back_group, mask_wb_group, pc_back_group, pc_front_group, pc_wb_group;
- lane_mask mask_back, mask_wb;
-
- modport ab
- (
- input a,
- b
- );
-
- modport read
- (
- output op
- );
-
- modport bind_
- (
- input pc_front,
-
- output pc_front_group
- );
-
- modport wb
- (
- input pc_back,
- mask_back,
-
- output sgpr_write,
- vgpr_write,
-
- pc_back_group,
- mask_back_group,
-
- pc_wb,
- pc_wb_group,
- pc_wb_write,
-
- mask_wb,
- mask_wb_group,
- mask_wb_write
- );
-
- modport regs
- (
- input op,
- sgpr_write,
- vgpr_write,
-
- pc_back_group,
- pc_front_group,
- mask_back_group,
-
- pc_wb,
- pc_wb_group,
- pc_wb_write,
-
- mask_wb,
- mask_wb_group,
- mask_wb_write,
-
- output a,
- b,
-
- pc_back,
- pc_front,
- mask_back
- );
-
-endinterface
diff --git a/platform/wavelet3d/gfx_rst_sync.sv b/platform/wavelet3d/gfx_rst_sync.sv
deleted file mode 100644
index 2a8ea3b..0000000
--- a/platform/wavelet3d/gfx_rst_sync.sv
+++ /dev/null
@@ -1,13 +0,0 @@
-//FIXME: peligro
-module gfx_rst_sync
-(
- input logic clk,
- rst_n,
-
- output logic srst_n
-);
-
- always_ff @(posedge clk or negedge rst_n)
- srst_n <= ~rst_n ? 0 : 1;
-
-endmodule
diff --git a/platform/wavelet3d/gfx_sched.sv b/platform/wavelet3d/gfx_sched.sv
deleted file mode 100644
index b8b6b7e..0000000
--- a/platform/wavelet3d/gfx_sched.sv
+++ /dev/null
@@ -1,139 +0,0 @@
-module gfx_sched
-import gfx::*;
-(
- input logic clk,
- rst_n,
- srst_n,
-
- gfx_axil.m axim,
-
- input irq_lines irq
-);
-
- logic axi_ready, axi_valid, bram_ready, bram_read, bram_write, bram_write_next,
- mem_instr, mem_la_read, mem_la_write, mem_ready, mem_valid, select_bram;
-
- word bram[SCHED_BRAM_WORDS];
- word axi_rdata, bram_rdata, mem_addr, mem_la_addr, mem_rdata, mem_wdata;
- logic[$bits(word) / $bits(byte) - 1:0] mem_wstrb;
-
- logic[$clog2(SCHED_BRAM_WORDS) - 1:0] bram_addr;
-
- assign bram_addr = mem_addr[$bits(bram_addr) + SUBWORD_BITS - 1:SUBWORD_BITS];
- assign mem_ready = (axi_valid & axi_ready) | bram_ready;
- assign mem_rdata = bram_ready ? bram_rdata : axi_rdata;
- assign select_bram = ~|mem_la_addr[$bits(mem_la_addr) - 1:$bits(bram_addr) + SUBWORD_BITS];
- assign bram_write_next = mem_la_write & select_bram;
-
- defparam core.ENABLE_COUNTERS = 0;
- defparam core.ENABLE_COUNTERS64 = 0;
- defparam core.BARREL_SHIFTER = 1;
- defparam core.COMPRESSED_ISA = 1;
- defparam core.CATCH_MISALIGN = 0;
- defparam core.CATCH_ILLINSN = 0;
- defparam core.ENABLE_MUL = 1;
- defparam core.ENABLE_DIV = 1;
- defparam core.ENABLE_IRQ = 1;
- defparam core.ENABLE_IRQ_QREGS = 0;
- defparam core.ENABLE_IRQ_TIMER = 0;
- defparam core.PROGADDR_RESET = BOOTROM_BASE;
-
- picorv32 core
- (
- .clk,
- .resetn(srst_n),
- .trap(),
-
- .mem_valid,
- .mem_instr,
- .mem_ready,
-
- .mem_addr,
- .mem_wdata,
- .mem_wstrb,
- .mem_rdata,
-
- .mem_la_read,
- .mem_la_write,
- .mem_la_addr,
- .mem_la_wdata(),
- .mem_la_wstrb(),
-
- .pcpi_valid(),
- .pcpi_insn(),
- .pcpi_rs1(),
- .pcpi_rs2(),
- .pcpi_wr(),
- .pcpi_rd(),
- .pcpi_wait(0),
- .pcpi_ready(0),
-
- .irq,
- .eoi(),
-
- .trace_valid(),
- .trace_data()
- );
-
- picorv32_axi_adapter axi
- (
- .clk,
- .resetn(srst_n),
-
- .mem_axi_awvalid(axim.awvalid),
- .mem_axi_awready(axim.awready),
- .mem_axi_awaddr(axim.awaddr),
- .mem_axi_awprot(),
-
- .mem_axi_wvalid(axim.wvalid),
- .mem_axi_wready(axim.wready),
- .mem_axi_wdata(axim.wdata),
- .mem_axi_wstrb(), // Potenciales sorpresas
-
- .mem_axi_bvalid(axim.bvalid),
- .mem_axi_bready(axim.bready),
-
- .mem_axi_arvalid(axim.arvalid),
- .mem_axi_arready(axim.arready),
- .mem_axi_araddr(axim.araddr),
- .mem_axi_arprot(),
-
- .mem_axi_rvalid(axim.rvalid),
- .mem_axi_rready(axim.rready),
- .mem_axi_rdata(axim.rdata),
-
- .mem_valid(mem_valid & axi_valid),
- .mem_instr,
- .mem_ready(axi_ready),
- .mem_addr,
- .mem_wdata,
- .mem_wstrb,
- .mem_rdata(axi_rdata)
- );
-
- always_ff @(posedge clk) begin
- if (bram_write) begin
- for (int i = 0; i < $bits(mem_wstrb); ++i)
- if (mem_wstrb[i])
- bram[bram_addr][i] <= mem_wdata[i];
-
- bram_rdata <= 'x;
- end else
- bram_rdata <= bram[bram_addr];
- end
-
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- axi_valid <= 0;
- bram_read <= 0;
- bram_ready <= 0;
- bram_write <= 0;
- end else begin
- axi_valid <= ~select_bram | (axi_valid & ~axi_ready);
- bram_read <= mem_la_read & select_bram;
- bram_write <= bram_write_next;
- bram_ready <= bram_read | bram_write_next;
- end
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shader.sv b/platform/wavelet3d/gfx_shader.sv
deleted file mode 100644
index 322ffb5..0000000
--- a/platform/wavelet3d/gfx_shader.sv
+++ /dev/null
@@ -1,77 +0,0 @@
-module gfx_shader
-import gfx::*;
-import gfx_shader_schedif_pkg::*;
-(
- input logic clk,
- rst_n,
-
- gfx_axib.m insn_mem,
-
- gfx_axil.s sched
-);
-
- axi4lite_intf #(.ADDR_WIDTH(GFX_SHADER_SCHEDIF_MIN_ADDR_WIDTH)) regblock();
-
- gfx_axil2regblock axil2regblock
- (
- .axis(sched),
- .axim(regblock.master)
- );
-
- gfx_shader_schedif__in_t schedif_in;
- gfx_shader_schedif__out_t schedif_out;
-
- gfx_front_back front_back();
- gfx_regfile_io regfile();
- gfx_shader_setup setup();
-
- assign schedif_in.SETUP_CTRL.GPR_DONE.hwset = setup.sched.set_done.gpr;
- assign schedif_in.SETUP_CTRL.MASK_DONE.hwset = setup.sched.set_done.mask;
- assign schedif_in.SETUP_CTRL.SUBMIT_DONE.hwset = setup.sched.set_done.submit;
-
- assign setup.sched.write.pc = schedif_out.SETUP_SUBMIT.PC.value;
- assign setup.sched.write.gpr = schedif_out.SETUP_CTRL.XGPR.value;
- assign setup.sched.write.mask = schedif_out.SETUP_MASK.MASK.value;
- assign setup.sched.write.group = schedif_out.SETUP_CTRL.GROUP.value;
- assign setup.sched.write.pc_set = schedif_out.SETUP_SUBMIT.PC.swmod;
- assign setup.sched.write.gpr_set = schedif_out.SETUP_GPR.VALUE.swmod;
- assign setup.sched.write.mask_set = schedif_out.SETUP_MASK.MASK.swmod;
- assign setup.sched.write.gpr_value = schedif_out.SETUP_GPR.VALUE.value;
-
- gfx_shader_front frontend
- (
- .clk,
- .rst_n,
- .front(front_back.front),
- .reg_bind(regfile.bind_),
- .reg_read(regfile.read),
- .fetch_mem(insn_mem),
- .icache_flush(schedif_out.CORE.IFLUSH.value)
- );
-
- gfx_shader_back backend
- (
- .clk,
- .rst_n,
- .back(front_back.back),
- .setup(setup.core),
- .reg_wb(regfile.wb),
- .read_data(regfile.ab)
- );
-
- gfx_shader_regs regs
- (
- .clk,
- .io(regfile.regs)
- );
-
- gfx_shader_schedif schedif
- (
- .clk,
- .arst_n(rst_n),
- .s_axil(regblock.slave),
- .hwif_in(schedif_in),
- .hwif_out(schedif_out)
- );
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shader_back.sv b/platform/wavelet3d/gfx_shader_back.sv
deleted file mode 100644
index 4929192..0000000
--- a/platform/wavelet3d/gfx_shader_back.sv
+++ /dev/null
@@ -1,335 +0,0 @@
-module gfx_shader_back
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- gfx_front_back.back back,
-
- gfx_regfile_io.ab read_data,
- gfx_regfile_io.wb reg_wb,
-
- gfx_shader_setup.core setup
-);
-
- logic abort;
-
- gfx_wb out_wb(), p0_wb(), p1_wb(), p2_wb(), p3_wb();
- gfx_shake p1_shake(), p2_shake(), p3_shake();
-
- gfx_shader_abort p0_abort
- (
- .clk,
- .p1(p1_shake.peek),
- .p2(p2_shake.peek),
- .p3(p3_shake.peek),
- .abort
- );
-
- gfx_shader_fpint p0
- (
- .clk,
- .rst_n,
- .op(back.execute.p0),
- .wb(p0_wb.tx),
- .wave(back.execute.wave),
- .abort,
- .read_data,
- .in_valid(back.dispatch.valid)
- );
-
- gfx_shader_mem p1
- (
- .clk,
- .rst_n,
- .op(back.execute.p1),
- .wb(p1_wb.tx),
- .wave(back.execute.wave),
- .in_shake(p1_shake.rx),
- .read_data
- );
-
- gfx_shader_sfu p2
- (
- .clk,
- .rst_n,
- .op(back.execute.p2),
- .wb(p2_wb.tx),
- .wave(back.execute.wave),
- .in_shake(p2_shake.rx),
- .read_data
- );
-
- gfx_shader_group p3
- (
- .clk,
- .rst_n,
- .op(back.execute.p3),
- .wb(p3_wb.tx),
- .wave(back.execute.wave),
- .in_shake(p3_shake.rx),
- .read_data
- );
-
- gfx_shader_writeback_arbiter4 writeback_arbiter
- (
- .clk,
- .rst_n,
- .p0(p0_wb.rx),
- .p1(p1_wb.rx),
- .p2(p2_wb.rx),
- .p3(p3_wb.rx),
- .out(out_wb.tx)
- );
-
- gfx_shader_writeback writeback
- (
- .clk,
- .rst_n,
- .wb(out_wb.rx),
- .regs(reg_wb),
- .setup,
- .loop_group(back.loop.group),
- .loop_valid(back.loop.valid)
- );
-
-endmodule
-
-module gfx_shader_abort
-(
- input logic clk,
-
- gfx_shake.peek p1,
- p2,
- p3,
-
- output logic abort
-);
-
- always_ff @(posedge clk)
- abort <=
- (p1.valid & p1.ready)
- | (p2.valid & p2.ready)
- | (p3.valid & p3.ready);
-
-endmodule
-
-module gfx_shader_writeback_arbiter4
-(
- input logic clk,
- rst_n,
-
- gfx_wb.rx p0,
- p1,
- p2,
- p3,
-
- gfx_wb.tx out
-);
-
- assert property (
- @(posedge clk)
- disable iff (~rst_n)
-
- (p0.ready & out.ready)
- );
-
- gfx_wb p0_p1(), p2_p3();
-
- gfx_shader_writeback_arbiter2_prio arbiter_p0_p1
- (
- .clk,
- .rst_n,
- .a(p0),
- .b(p1),
- .out(p0_p1.tx)
- );
-
- gfx_shader_writeback_arbiter2_prio arbiter_p2_p3
- (
- .clk,
- .rst_n,
- .a(p2),
- .b(p3),
- .out(p2_p3.tx)
- );
-
- gfx_shader_writeback_arbiter2_prio arbiter_out
- (
- .clk,
- .rst_n,
- .a(p0_p1.rx),
- .b(p2_p3.tx),
- .out
- );
-
-endmodule
-
-module gfx_shader_writeback_arbiter2_prio
-(
- input logic clk,
- rst_n,
-
- gfx_wb.rx a,
- b,
-
- gfx_wb.tx out
-);
-
- //TODO
- assign a.ready = out.ready;
- assign b.ready = 0;
-
- assign out.dest = a.dest;
- assign out.lanes = a.lanes;
- assign out.group = a.group;
- assign out.valid = a.valid;
- assign out.scalar = a.scalar;
- assign out.writeback = a.writeback;
-
- assign out.mask = a.mask;
- assign out.mask_update = a.mask_update;
-
- assign out.pc_add = a.pc_add;
- assign out.pc_inc = a.pc_inc;
- assign out.pc_update = a.pc_update;
-
-endmodule
-
-module gfx_shader_writeback
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- gfx_wb.rx wb,
-
- gfx_regfile_io.wb regs,
-
- output logic loop_valid,
- output group_id loop_group,
-
- gfx_shader_setup.core setup
-);
-
- struct
- {
- group_id group;
- word lanes[SHADER_LANES];
- pc_offset pc_add;
- lane_mask mask;
- vgpr_num vgpr;
- logic pc_update,
- mask_update,
- vgpr_update;
- } loop_hold[REGFILE_STAGES], loop_out;
-
- logic loop_valid_hold[REGFILE_STAGES], loop_out_valid, mask_wb, scalar_wb,
- setup_gpr, setup_mask, setup_submit;
-
- assign wb.ready = 1;
-
- assign loop_out = loop_hold[REGFILE_STAGES - 1];
- assign loop_out_valid = loop_valid_hold[REGFILE_STAGES - 1];
-
- assign loop_valid = loop_out_valid | setup_submit;
-
- assign regs.pc_back_group = wb.group;
- assign regs.mask_back_group = wb.group;
-
- assign regs.pc_wb_write = (loop_out_valid & loop_out.pc_update) | setup_submit;
- assign regs.mask_wb_write = mask_wb | setup_mask;
- assign regs.sgpr_write.write = scalar_wb | setup_gpr;
-
- assign regs.vgpr_write.vgpr = loop_out.vgpr;
- assign regs.vgpr_write.group = loop_out.group;
-
- assign mask_wb = loop_out_valid & loop_out.mask_update;
- assign scalar_wb = wb.valid & wb.writeback & wb.scalar;
-
- always_comb begin
- loop_group = setup.write.group;
- regs.pc_wb = setup.write.pc;
- regs.pc_wb_group = setup.write.group;
-
- if (loop_out_valid) begin
- loop_group = loop_out.group;
- regs.pc_wb = regs.pc_back + word_ptr'(loop_out.pc_add);
- regs.pc_wb_group = loop_out.group;
- end
-
- regs.mask_wb = setup.write.mask;
- regs.mask_wb_group = setup.write.group;
-
- if (mask_wb) begin
- regs.mask_wb = loop_out.mask;
- regs.mask_wb_group = loop_out.group;
- end
-
- regs.sgpr_write.data = setup.write.gpr_value;
- regs.sgpr_write.sgpr = setup.write.gpr.sgpr;
- regs.sgpr_write.group = setup.write.group;
-
- if (scalar_wb) begin
- regs.sgpr_write.data = wb.lanes[0];
- regs.sgpr_write.sgpr = wb.dest.sgpr;
- regs.sgpr_write.group = wb.group;
- end
-
- for (int i = 0; i < SHADER_LANES; ++i)
- regs.vgpr_write.data[i] = loop_out.lanes[i];
-
- regs.vgpr_write.mask = regs.mask_back;
- if (~loop_out_valid | ~loop_out.vgpr_update)
- regs.vgpr_write.mask = '0;
- end
-
- always_ff @(posedge clk) begin
- // Blocking assignments por bug de verilator (ver for de lanes abajo)
-
- for (int i = REGFILE_STAGES - 1; i > 0; --i)
- loop_hold[i] = loop_hold[i - 1];
-
- loop_hold[0].mask = wb.mask;
- loop_hold[0].vgpr = wb.dest.vgpr.num;
- loop_hold[0].group = wb.group;
- loop_hold[0].pc_add = wb.pc_add;
- loop_hold[0].pc_update = wb.pc_update;
- loop_hold[0].mask_update = wb.mask_update;
- loop_hold[0].vgpr_update = wb.writeback & ~wb.scalar;
-
- // https://github.com/verilator/verilator/issues/4804
- for (int i = 0; i < SHADER_LANES; ++i)
- loop_hold[0].lanes[i] = wb.lanes[i];
-
- if (wb.pc_inc)
- loop_hold[0].pc_add = pc_offset'(1);
- end
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- setup_gpr <= 0;
- setup_mask <= 0;
- setup_submit <= 0;
-
- setup.set_done.gpr <= 0;
- setup.set_done.mask <= 0;
- setup.set_done.submit <= 0;
-
- for (int i = 0; i < $size(loop_valid_hold); ++i)
- loop_valid_hold[i] <= 0;
- end else begin
- setup_gpr <= (setup_gpr & scalar_wb) | setup.write.gpr_set;
- setup_mask <= (setup_mask & mask_wb) | setup.write.mask_set;
- setup_submit <= (setup_submit & loop_out_valid) | setup.write.pc_set;
-
- setup.set_done.gpr <= setup_gpr & ~scalar_wb;
- setup.set_done.mask <= setup_mask & ~mask_wb;
- setup.set_done.submit <= setup_submit & ~loop_out_valid;
-
- loop_valid_hold[0] <= wb.valid;
- for (int i = 1; i < REGFILE_STAGES; ++i)
- loop_valid_hold[i] <= loop_valid_hold[i - 1];
- end
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shader_fpint.sv b/platform/wavelet3d/gfx_shader_fpint.sv
deleted file mode 100644
index a418dcc..0000000
--- a/platform/wavelet3d/gfx_shader_fpint.sv
+++ /dev/null
@@ -1,932 +0,0 @@
-// -> 4,4,4,4,4,4,4,4 -> 8,8,8,8 -> 16,16 -> 32
-localparam int FPINT_CLZ_STAGES = 4;
-
-localparam bit[$clog2($bits(gfx::float_mant_ext)):0] FPINT_MAX_SHIFT
- = 1 << $clog2($bits(gfx::float_mant_ext));
-
-typedef logic[$clog2(FPINT_MAX_SHIFT):0] fpint_shift;
-
-/* Las 15 etapas son:
- * - setup
- * - mulclass
- * - mnorm
- * - minmax
- * - expdiff
- * - shiftr
- * - addsub
- * - clz0-clz3
- * - shiftl
- * - round
- * - rnorm
- * - encode
- */
-
-typedef struct
-{
- gfx::float a,
- b,
- a_mul,
- b_mul;
-} fpint_setup_mulclass;
-
-typedef struct
-{
- gfx::float b;
- gfx::float_exp exp;
- gfx::float_class a_class,
- b_class;
- gfx::udword product;
- logic sign,
- overflow;
-} fpint_mulclass_mnorm;
-
-typedef struct
-{
- gfx::float a,
- b;
- gfx::float_class a_class,
- b_class;
- logic slow,
- zero,
- guard,
- round,
- sticky,
- slow_in,
- overflow;
-} fpint_mnorm_minmax;
-
-typedef struct
-{
- gfx::float max,
- min;
- gfx::float_class max_class,
- min_class;
- logic slow,
- zero,
- guard,
- round,
- sticky;
-} fpint_minmax_expdiff;
-
-typedef struct
-{
- gfx::float max,
- min;
- gfx::float_class max_class,
- min_class;
- fpint_shift exp_shift;
- logic slow,
- zero,
- guard,
- round,
- sticky;
-} fpint_expdiff_shiftr;
-
-typedef struct
-{
- gfx::float max,
- min;
- gfx::float_class max_class,
- min_class;
- gfx::float_mant_ext max_mant,
- min_mant,
- sticky_mask;
- logic slow,
- zero,
- guard,
- round,
- sticky,
- int_sign;
-} fpint_shiftr_addsub;
-
-typedef struct
-{
- gfx::float max;
- gfx::word add_sub;
- logic slow,
- zero,
- guard,
- round,
- sticky;
-} fpint_clz_hold;
-
-typedef fpint_clz_hold fpint_addsub_clz;
-
-typedef struct
-{
- fpint_clz_hold hold;
- fpint_shift shift;
-} fpint_clz_shiftl;
-
-typedef struct
-{
- gfx::float val;
- logic slow,
- zero,
- guard,
- round,
- sticky,
- overflow,
- sticky_last;
-} fpint_shiftl_round;
-
-typedef struct
-{
- gfx::float val;
- logic slow,
- zero,
- exp_step,
- overflow;
-} fpint_round_rnorm;
-
-typedef struct
-{
- gfx::float val;
- logic slow,
- zero,
- overflow;
-} fpint_rnorm_encode;
-
-module gfx_shader_fpint
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- input fpint_op op,
- input wave_exec wave,
- input logic abort,
- in_valid,
-
- gfx_regfile_io.ab read_data,
-
- gfx_wb.tx wb
-);
-
- localparam int FPINT_STAGES = 7 + FPINT_CLZ_STAGES + 4;
-
- struct
- {
- fpint_op op;
- wave_exec wave;
- } stage[FPINT_STAGES];
-
- logic stage_valid[FPINT_STAGES];
-
- assign wb.dest = stage[FPINT_STAGES - 1].wave.dest;
- assign wb.mask = 'x;
- assign wb.group = stage[FPINT_STAGES - 1].wave.group;
- assign wb.pc_add = 'x;
- assign wb.pc_inc = 1;
- assign wb.scalar = stage[FPINT_STAGES - 1].wave.dest_scalar;
- assign wb.pc_update = wb.writeback;
- assign wb.writeback = stage[FPINT_STAGES - 1].op.writeback;
- assign wb.mask_update = 0;
-
- // Ojo: stage_valid[0], pero stage[0] no
- assign stage_valid[0] = in_valid;
-
- genvar lane;
- generate
- for (lane = 0; lane < SHADER_LANES; ++lane) begin: lanes
- gfx_shader_fpint_lane unit
- (
- .clk(clk),
- .a(read_data.a[lane]),
- .b(read_data.b[lane]),
- .q(wb.lanes[lane]),
- .mul_float_0(op.setup_mul_float),
- .unit_b_0(op.setup_unit_b),
- .put_hi_2(stage[2 - 1].op.mnorm_put_hi),
- .put_lo_2(stage[2 - 1].op.mnorm_put_lo),
- .put_mul_2(stage[2 - 1].op.mnorm_put_mul),
- .zero_b_2(stage[2 - 1].op.mnorm_zero_b),
- .zero_flags_2(stage[2 - 1].op.mnorm_zero_flags),
- .abs_3(stage[3 - 1].op.minmax_abs),
- .swap_3(stage[3 - 1].op.minmax_swap),
- .zero_min_3(stage[3 - 1].op.minmax_zero_min),
- .copy_flags_3(stage[3 - 1].op.minmax_copy_flags),
- .int_signed_5(stage[5 - 1].op.shiftr_int_signed),
- .copy_flags_6(stage[6 - 1].op.addsub_copy_flags),
- .int_operand_6(stage[6 - 1].op.addsub_int_operand),
- .force_nop_7(stage[7 - 1].op.clz_force_nop),
- .copy_flags_11(stage[11 - 1].op.shiftl_copy_flags),
- .copy_flags_12(stage[12 - 1].op.round_copy_flags),
- .enable_12(stage[12 - 1].op.round_enable),
- .enable_14(stage[14 - 1].op.encode_enable)
- );
- end
- endgenerate
-
- always_ff @(posedge clk) begin
- stage[0].op <= op;
- stage[0].wave <= wave;
-
- for (int i = 1; i < FPINT_STAGES; ++i)
- stage[i] <= stage[i - 1];
- end
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- for (int i = 1; i < FPINT_STAGES; ++i)
- stage_valid[i] <= 0;
-
- wb.valid <= 0;
- end else begin
- for (int i = 1; i < FPINT_STAGES; ++i)
- stage_valid[i] <= stage_valid[i - 1];
-
- // Se levanta 1 ciclo luego que in_valid
- stage_valid[2] <= stage_valid[1] & ~abort;
-
- wb.valid <= stage_valid[FPINT_STAGES - 1];
- end
-
-endmodule
-
-module gfx_shader_fpint_lane
-import gfx::*;
-(
- input logic clk,
-
- input word a,
- b,
-
- input logic mul_float_0,
- unit_b_0,
- put_hi_2,
- put_lo_2,
- put_mul_2,
- zero_b_2,
- zero_flags_2,
- abs_3,
- swap_3,
- zero_min_3,
- copy_flags_3,
- int_signed_5,
- copy_flags_6,
- int_operand_6,
- force_nop_7,
- copy_flags_11,
- copy_flags_12,
- enable_12,
- enable_14,
-
- output word q
-);
-
- /* Notas de implementación para floating-point
- *
- * === PRODUCTO ===
- *
- * Queremos calcular q = a * b.
- *
- * Donde a = (-1)^s * 1.m * 2^f,
- * b = (-1)^t * 1.n * 2^g
- *
- * Entonces q = (-1)^(s + t) (1.m * 1.n) 2^(f + g)
- *
- * El producto es entre números >= 1.0 y < 2.0. En el peor caso:
- * Mejor caso: 1.000... * 1.000... ~ 1.000...
- * Peor caso: 1.999... * 1.999... ~ 3.999... = 2^1 * 1.999
- *
- * Así que, si el producto es >= 2, hay que hacerle >> 1 a la mantisa
- * y sumarle 1 al exponente para normalizar.
- *
- *
- * === SUMA/RESTA ===
- *
- * Queremos calcular q = a + b. Curiosamente, eso es más complicado que a * b.
- * Hay que ajustar el exponente del menor entre a y b para que coincida
- * con el del mayor (desnormalizando), realizar la operación y finalmente
- * renormalizar. Se hace suma o resta dependiendo de relaciones de signos,
- * no según la operación de entrada (eso último solo le hace xor al signo de b).
- * Recordar aquí que IEEE 754 es una especie de signo-magnitud y no complemento.
- *
- * En el caso de una resta, el exponente normalizado puede ser mucho más
- * pequeño que cualquiera de los exponentes de entrada. Necesitamos
- * entonces de lǵoica CLZ (count leading zeros) para renormalizar.
- *
- *
- * === CONVERSIÓN INTEGER->FP ===
- *
- * Esto simplemente usa el mismo datapath de fadd, con el abs del entero
- * como entrada como entrada de clz. El exponente de referencia se fija
- * en 30 (aludiendo al segundo msb de un entero de 32 bits). A partir de
- * ese punto es idéntico a un fadd, las etapas de clz se encargan de ajustar
- * el exponente.
- */
-
- fpint_setup_mulclass setup_mulclass;
- fpint_mulclass_mnorm mulclass_mnorm;
- fpint_mnorm_minmax mnorm_minmax;
- fpint_minmax_expdiff minmax_expdiff;
- fpint_expdiff_shiftr expdiff_shiftr;
- fpint_shiftr_addsub shiftr_addsub;
- fpint_addsub_clz addsub_clz;
- fpint_clz_shiftl clz_shiftl;
- fpint_shiftl_round shiftl_round;
- fpint_round_rnorm round_rnorm;
- fpint_rnorm_encode rnorm_encode;
-
- gfx_shader_fpint_setup stage_0
- (
- .clk(clk),
- .a(a),
- .b(b),
- .out(setup_mulclass),
- .unit_b(unit_b_0),
- .mul_float(mul_float_0)
- );
-
- gfx_shader_fpint_mulclass stage_1
- (
- .clk(clk),
- .in(setup_mulclass),
- .out(mulclass_mnorm)
- );
-
- gfx_shader_fpint_mnorm stage_2
- (
- .clk(clk),
- .in(mulclass_mnorm),
- .out(mnorm_minmax),
- .put_hi(put_hi_2),
- .put_lo(put_lo_2),
- .put_mul(put_mul_2),
- .zero_b(zero_b_2),
- .zero_flags(zero_flags_2)
- );
-
- gfx_shader_fpint_minmax stage_3
- (
- .clk(clk),
- .in(mnorm_minmax),
- .out(minmax_expdiff),
- .abs(abs_3),
- .swap(swap_3),
- .zero_min(zero_min_3),
- .copy_flags(copy_flags_3)
- );
-
- gfx_shader_fpint_expdiff stage_4
- (
- .clk(clk),
- .in(minmax_expdiff),
- .out(expdiff_shiftr)
- );
-
- gfx_shader_fpint_shiftr stage_5
- (
- .clk(clk),
- .in(expdiff_shiftr),
- .out(shiftr_addsub),
- .int_signed(int_signed_5)
- );
-
- gfx_shader_fpint_addsub stage_6
- (
- .clk(clk),
- .in(shiftr_addsub),
- .out(addsub_clz),
- .copy_flags(copy_flags_6),
- .int_operand(int_operand_6)
- );
-
- gfx_shader_fpint_clz stage_7_8_9_10
- (
- .clk(clk),
- .in(addsub_clz),
- .out(clz_shiftl),
- .force_nop(force_nop_7)
- );
-
- gfx_shader_fpint_shiftl stage_11
- (
- .clk(clk),
- .in(clz_shiftl),
- .out(shiftl_round),
- .copy_flags(copy_flags_11)
- );
-
- gfx_shader_fpint_round stage_12
- (
- .clk(clk),
- .in(shiftl_round),
- .out(round_rnorm),
- .enable(enable_12),
- .copy_flags(copy_flags_12)
- );
-
- gfx_shader_fpint_rnorm stage_13
- (
- .clk(clk),
- .in(round_rnorm),
- .out(rnorm_encode)
- );
-
- gfx_shader_fpint_encode stage_14
- (
- .clk(clk),
- .q(q),
- .in(rnorm_encode),
- .enable(enable_14)
- );
-
-endmodule
-
-// Stage 0: argumentos de mul
-module gfx_shader_fpint_setup
-import gfx::*;
-(
- input logic clk,
-
- input word a,
- b,
- input logic mul_float,
- unit_b,
-
- output fpint_setup_mulclass out
-);
-
- always_ff @(posedge clk) begin
- out.a <= a;
- out.b <= b;
- out.a_mul <= a;
- out.b_mul <= b;
-
- /* Nótese que el orden es sign-exp-mant. Esto coloca el '1.' implícito
- * en la posición correcta para multiplicar las mantisas.
- */
- if (mul_float) begin
- out.a_mul.exp <= 1;
- out.b_mul.exp <= 1;
- out.a_mul.sign <= 0;
- out.b_mul.sign <= 0;
- end
-
- if (unit_b) begin
- out.b_mul.exp <= 0;
- out.b_mul.mant <= 1;
- out.b_mul.sign <= 0;
- end
- end
-
-endmodule
-
-// Stage 1: multiplicación de fp o enteros
-module gfx_shader_fpint_mulclass
-import gfx::*;
-(
- input logic clk,
-
- input fpint_setup_mulclass in,
-
- output fpint_mulclass_mnorm out
-);
-
- always_ff @(posedge clk) begin
- out.b <= in.b;
- out.sign <= in.a.sign ^ in.b.sign;
- out.a_class <= classify_float(in.a);
- out.b_class <= classify_float(in.b);
- out.product <= in.a_mul * in.b_mul;
- {out.overflow, out.exp} <= {1'b0, in.a.exp} + {1'b0, in.b.exp} - {1'b0, FLOAT_EXP_BIAS};
- end
-
-endmodule
-
-// Stage 2: normalización
-module gfx_shader_fpint_mnorm
-import gfx::*;
-(
- input logic clk,
-
- input fpint_mulclass_mnorm in,
- input logic put_hi,
- put_lo,
- put_mul,
- zero_b,
- zero_flags,
-
- output fpint_mnorm_minmax out
-);
-
- word product_hi, product_lo;
- logic guard, lo_msb, lo_reduce, round, slow_in_next;
- float_mant_full hi;
- logic[$bits(float_mant_full) - 3:0] lo;
-
- assign lo_msb = lo[$bits(lo) - 1];
- assign lo_reduce = |lo[$bits(lo) - 2:0];
- assign slow_in_next = is_float_special(in.a_class) | is_float_special(in.b_class);
- assign {product_hi, product_lo} = in.product;
- assign {hi, guard, round, lo} = in.product[2 * $bits(float_mant_full) - 1:0];
-
- always_ff @(posedge clk) begin
- if (put_mul) begin
- out.slow <= slow_in_next | (in.overflow & ~in.a_class.exp_min & ~in.a_class.exp_min);
- out.zero <= in.a_class.exp_min | in.b_class.exp_min;
- end else begin
- out.slow <= 0;
- out.zero <= 0;
- end
-
- out.a.sign <= in.sign;
- out.overflow <= 0;
-
- if (hi[$bits(hi) - 1]) begin
- out.guard <= guard;
- out.round <= round;
- out.sticky <= lo_msb | lo_reduce;
- out.a.mant <= implicit_mant(hi);
- {out.overflow, out.a.exp} <= {1'b0, in.exp} + 1;
- end else begin
- /* Bit antes de msb es necesariamente 1, ya que los msb de
- * ambos multiplicandos son 1. Ver assert en implicit_mant().
- */
- out.guard <= round;
- out.round <= lo_msb;
- out.sticky <= lo_reduce;
-
- out.a.exp <= in.exp;
- out.a.mant <= implicit_mant({hi[$bits(hi) - 2:0], guard});
- end
-
- unique case (1'b1)
- put_mul: ;
-
- put_hi:
- out.a <= product_hi;
-
- put_lo:
- out.a <= product_lo;
- endcase
-
- out.a_class <= in.a_class;
- out.slow_in <= slow_in_next;
-
- if (zero_flags) begin
- out.a_class <= classify_float(0);
- out.slow_in <= 0;
- end
-
- if (zero_b) begin
- out.b <= 0;
- out.b_class <= classify_float(0);
- end else begin
- out.b <= in.b;
- out.b_class <= in.b_class;
- end
- end
-
-endmodule
-
-// Stage 3: ordenar tal que abs(max) >= abs(min)
-module gfx_shader_fpint_minmax
-import gfx::*;
-(
- input logic clk,
-
- input fpint_mnorm_minmax in,
- input logic abs,
- swap,
- zero_min,
- copy_flags,
-
- output fpint_minmax_expdiff out
-);
-
- logic abs_b_gt_abs_a, b_gt_a;
-
- /* Wiki dice:
- *
- * A property of the single- and double-precision formats is that
- * their encoding allows one to easily sort them without using
- * floating-point hardware, as if the bits represented sign-magnitude
- * integers, although it is unclear whether this was a design
- * consideration (it seems noteworthy that the earlier IBM hexadecimal
- * floating-point representation also had this property for normalized
- * numbers).
- */
- assign abs_b_gt_abs_a = {in.b.exp, in.b.mant} > {in.a.exp, in.a.mant};
-
- always_comb begin
- unique case ({in.b.sign, in.a.sign})
- 2'b00: b_gt_a = abs_b_gt_abs_a;
- 2'b01: b_gt_a = 1;
- 2'b10: b_gt_a = 0;
- 2'b11: b_gt_a = abs_b_gt_abs_a;
- endcase
-
- if (abs)
- b_gt_a = abs_b_gt_abs_a;
- end
-
- always_ff @(posedge clk) begin
- if (b_gt_a ^ swap) begin
- out.max <= in.b;
- out.min <= in.a;
- out.max_class <= in.b_class;
- out.min_class <= in.a_class;
- end else begin
- out.max <= in.a;
- out.min <= in.b;
- out.max_class <= in.a_class;
- out.min_class <= in.b_class;
- end
-
- if (zero_min) begin
- out.min <= 0;
- out.min_class <= classify_float(0);
- end
-
- out.guard <= in.guard;
- out.round <= in.round;
- out.sticky <= in.sticky;
-
- if (copy_flags) begin
- out.slow <= in.slow | in.overflow;
- out.zero <= in.zero;
- end else begin
- out.slow <= in.slow_in;
- out.zero <= 0;
- end
- end
-
-endmodule
-
-// Stage 4: exp_shift amount
-module gfx_shader_fpint_expdiff
-import gfx::*;
-(
- input logic clk,
-
- input fpint_minmax_expdiff in,
-
- output fpint_expdiff_shiftr out
-);
-
- float_exp exp_delta;
-
- assign exp_delta = in.max.exp - in.min.exp;
-
- always_ff @(posedge clk) begin
- out.max <= in.max;
- out.min <= in.min;
- out.slow <= in.slow;
- out.zero <= in.zero;
- out.guard <= in.guard;
- out.round <= in.round;
- out.sticky <= in.sticky;
- out.max_class <= in.max_class;
- out.min_class <= in.min_class;
-
- out.exp_shift <= exp_delta[$bits(out.exp_shift) - 1:0];
- if (exp_delta > {{($bits(exp_delta) - $bits(FPINT_MAX_SHIFT)){1'b0}}, FPINT_MAX_SHIFT})
- out.exp_shift <= FPINT_MAX_SHIFT;
- end
-
-endmodule
-
-// Stage 5: shifts y abs(max) para enteros con signo
-module gfx_shader_fpint_shiftr
-import gfx::*;
-(
- input logic clk,
-
- input fpint_expdiff_shiftr in,
- input logic int_signed,
-
- output fpint_shiftr_addsub out
-);
-
- always_ff @(posedge clk) begin
- out.min <= in.min;
- out.slow <= in.slow;
- out.zero <= in.zero;
- out.guard <= in.guard;
- out.round <= in.round;
- out.sticky <= in.sticky;
- out.min_class <= in.min_class;
-
- out.max_mant <= float_prepare_round(in.max, in.max_class);
- out.min_mant <= float_prepare_round(in.min, in.min_class) >> in.exp_shift;
- out.sticky_mask <= {($bits(out.min_mant)){1'b1}} << in.exp_shift;
-
- out.max <= in.max;
- out.int_sign <= in.max[$bits(in.max) - 1];
-
- if (int_signed & in.max[$bits(in.max) - 1])
- out.max <= -in.max;
- end
-
-endmodule
-
-// Stage 6: suma de mantisas
-module gfx_shader_fpint_addsub
-import gfx::*;
-(
- input logic clk,
-
- input fpint_shiftr_addsub in,
- input logic copy_flags,
- int_operand,
-
- output fpint_addsub_clz out
-);
-
- localparam int INT_SHIFT_REF = $bits(word) - 2;
-
- function word fp_add_sub_arg(float_mant_ext arg);
- fp_add_sub_arg = {1'b0, arg, {($bits(fp_add_sub_arg) - $bits(arg) - 1){1'b0}}};
- endfunction
-
- always_ff @(posedge clk) begin
- out.max <= in.max;
- out.slow <= in.slow;
- out.zero <= in.zero;
- out.guard <= in.guard;
- out.round <= in.round;
-
- if (int_operand) begin
- out.max.exp <= FLOAT_EXP_BIAS + INT_SHIFT_REF[$bits(float_exp) - 1:0];
- out.max.sign <= in.int_sign;
- end
-
- if (copy_flags)
- out.sticky <= in.sticky;
- else
- out.sticky <= |(float_prepare_round(in.min, in.min_class) & ~in.sticky_mask);
-
- if (int_operand)
- out.add_sub <= in.max;
- else if (in.max.sign ^ in.min.sign)
- out.add_sub <= fp_add_sub_arg(in.max_mant) - fp_add_sub_arg(in.min_mant);
- else
- out.add_sub <= fp_add_sub_arg(in.max_mant) + fp_add_sub_arg(in.min_mant);
- end
-
-endmodule
-
-// Stages 7-10: encontrar el 1 más significativo
-module gfx_shader_fpint_clz
-import gfx::*;
-(
- input logic clk,
-
- input fpint_addsub_clz in,
- input logic force_nop,
-
- output fpint_clz_shiftl out
-);
-
- word clz_in;
- fpint_clz_hold hold[FPINT_CLZ_STAGES];
-
- assign out.hold = hold[FPINT_CLZ_STAGES - 1];
-
- gfx_clz #($bits(word)) clz
- (
- .clk(clk),
- .clz(out.shift),
- .value(clz_in)
- );
-
- always_comb begin
- clz_in = in.add_sub;
- if (force_nop)
- clz_in[$bits(clz_in) - 1:$bits(clz_in) - 2] = 2'b01;
- end
-
- always_ff @(posedge clk) begin
- hold[0] <= in;
-
- for (int i = 1; i < FPINT_CLZ_STAGES; ++i)
- hold[i] <= hold[i - 1];
- end
-
-endmodule
-
-// Stage 11: normalización
-module gfx_shader_fpint_shiftl
-import gfx::*;
-(
- input logic clk,
-
- input fpint_clz_shiftl in,
- input logic copy_flags,
-
- output fpint_shiftl_round out
-);
-
- localparam int CLZ_EXTEND_BITS = $bits(float_exp) - $bits(in.shift) + 1;
-
- word normalized;
-
- assign normalized = in.hold.add_sub << in.shift;
-
- always_ff @(posedge clk) begin
- out.slow <= in.hold.slow;
- out.zero <= in.hold.zero;
- out.sticky <= in.hold.sticky;
- out.val.sign <= in.hold.max.sign;
-
- {out.val.mant, out.guard, out.round, out.sticky_last} <=
- normalized[$bits(normalized) - 2:$bits(normalized) - $bits(float_mant) - 4];
-
- {out.overflow, out.val.exp} <=
- {1'b0, in.hold.max.exp} - {{CLZ_EXTEND_BITS{1'b0}}, in.shift} + 1;
-
- if (in.shift[$bits(in.shift) - 1])
- out.zero <= 1;
-
- if (copy_flags) begin
- out.guard <= in.hold.guard;
- out.round <= in.hold.round;
- out.overflow <= 0;
- out.sticky_last <= 0;
- end
- end
-
-endmodule
-
-// Stage 12: redondeo
-module gfx_shader_fpint_round
-import gfx::*;
-(
- input logic clk,
-
- input fpint_shiftl_round in,
- input logic copy_flags,
- enable,
-
- output fpint_round_rnorm out
-);
-
- always_ff @(posedge clk) begin
- out.val <= in.val;
- out.slow <= in.slow | (~copy_flags & in.overflow & ~in.zero);
- out.zero <= in.zero;
- out.exp_step <= 0;
-
- // Este es el modo de redondeo más usual: round to nearest, ties to even
- if (enable & in.guard & (in.round | in.sticky | in.sticky_last | in.val.mant[0]))
- {out.exp_step, out.val.mant} <= {1'b0, out.val.mant} + 1;
- end
-
-endmodule
-
-// Stage 13: ajuste de exponente por redondeo
-module gfx_shader_fpint_rnorm
-import gfx::*;
-(
- input logic clk,
-
- input fpint_round_rnorm in,
-
- output fpint_rnorm_encode out
-);
-
- always_ff @(posedge clk) begin
- out.slow <= in.slow;
- out.zero <= in.zero;
- out.overflow <= 0;
- out.val.mant <= in.val.mant;
- out.val.sign <= in.val.sign;
-
- if (in.exp_step)
- {out.overflow, out.val.exp} <= {1'b0, in.val.exp} + 1;
- else
- out.val.exp <= in.val.exp;
- end
-
-endmodule
-
-// Stage 14: salida y codificación de ceros y NaNs
-module gfx_shader_fpint_encode
-import gfx::*;
-(
- input logic clk,
-
- input fpint_rnorm_encode in,
- input logic enable,
-
- output float q
-);
-
- always_ff @(posedge clk) begin
- q <= in.val;
-
- if (enable) begin
- if (&in.val.exp | in.slow | in.overflow) begin
- q.exp <= FLOAT_EXP_MAX;
- q.mant <= 1;
- end else if (in.zero) begin
- q.exp <= 0;
- q.mant <= 0;
- end
- end
- end
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shader_front.sv b/platform/wavelet3d/gfx_shader_front.sv
deleted file mode 100644
index 52074fd..0000000
--- a/platform/wavelet3d/gfx_shader_front.sv
+++ /dev/null
@@ -1,746 +0,0 @@
-typedef struct
-{
- logic valid,
- retry;
- gfx::group_id group;
- gfx_isa::insn_word insn;
-} front_wave;
-
-typedef struct
-{
- gfx::xgpr_num dest;
- logic dest_scalar;
-} front_reg_passthru;
-
-typedef logic[4:0] icache_line_num;
-
-typedef logic[$bits(gfx::oword_ptr) - $bits(icache_line_num) - 1:0] icache_tag;
-
-typedef struct packed
-{
- icache_tag tag;
- icache_line_num line;
-} icache_line_tag;
-
-typedef struct packed
-{
- icache_line_tag line_tag;
- logic[2:0] word_num;
-} icache_ptr;
-
-module gfx_shader_front
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- gfx_axib.m fetch_mem,
-
- input logic icache_flush,
-
- gfx_regfile_io.read reg_read,
- gfx_regfile_io.bind_ reg_bind,
-
- gfx_front_back.front front
-);
-
- word fetch_insn, port_insn;
- logic fetch_hit, p0_writeback;
- front_wave bind_wave, dec_wave, port_dec_wave;
- front_reg_passthru reg_passthru;
-
- assign front.execute.wave.dest = reg_passthru.dest;
- assign front.execute.wave.dest_scalar = reg_passthru.dest_scalar;
-
- gfx_shader_bind bind_
- (
- .clk,
- .rst_n,
- .mem(fetch_mem),
- .wave(bind_wave),
- .regs(reg_bind),
- .loop_valid(front.loop.valid),
- .loop_group(front.loop.group),
- .icache_flush
- );
-
- gfx_shader_read_regs reg_dec
- (
- .clk,
- .rst_n,
- .in(bind_wave),
- .out(dec_wave),
- .read(reg_read),
- .passthru(reg_passthru)
- );
-
- gfx_shader_decode_class class_dec
- (
- .clk,
- .rst_n,
- .wave(dec_wave),
- .out_group(front.execute.wave.group),
- .port_wave(port_dec_wave),
- .dispatch(front.dispatch),
- .p0_writeback
- );
-
- gfx_shader_decode_fpint p0_dec
- (
- .clk,
- .op(front.execute.p0),
- .insn(port_dec_wave.insn),
- .writeback(p0_writeback)
- );
-
-endmodule
-
-module gfx_shader_bind
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- gfx_axib.m mem,
-
- input logic icache_flush,
-
- input logic loop_valid,
- input group_id loop_group,
-
- gfx_regfile_io.bind_ regs,
-
- output front_wave wave
-);
-
- localparam int ICACHE_STAGES = 6;
- localparam int BIND_STAGES = REGFILE_STAGES + ICACHE_STAGES;
-
- gfx_beats #($bits(group_id)) runnable_in(), runnable_out();
-
- logic ar_stall, request_ready, request_valid, valids[BIND_STAGES];
- group_id groups[BIND_STAGES];
- icache_line_tag araddr, request_addr;
-
- assign mem.bready = 0;
- assign mem.wvalid = 0;
- assign mem.awvalid = 0;
-
- assign mem.arlen = ($bits(mem.arlen))'($bits(oword) / $bits(word) - 1);
- assign mem.araddr = {araddr, ($clog2($bits(oword)) - $clog2($bits(word)) + SUBWORD_BITS)'('0)};
- assign mem.arburst = 2'b01; // Incremental mode
-
- assign runnable_in.tx.data = loop_group;
- assign runnable_in.tx.valid = loop_valid;
-
- assign regs.pc_front_group = runnable_out.rx.data;
- assign runnable_out.rx.ready = 1;
-
- assign wave.group = groups[$size(groups) - 1];
-
- gfx_skid_buf #($bits(araddr)) ar_skid
- (
- .clk,
- .in(request_addr),
- .out(araddr),
- .stall(ar_stall)
- );
-
- gfx_skid_flow ar_flow
- (
- .clk,
- .rst_n,
- .stall(ar_stall),
- .in_ready(request_ready),
- .in_valid(request_valid),
- .out_ready(mem.arready),
- .out_valid(mem.arvalid)
- );
-
- //TODO: Podríamos quitar ~25 entries sin afectar throughput, latencia o correctitud
- gfx_fifo #(.WIDTH($bits(group_id)), .DEPTH(1 << $bits(group_id))) runnable
- (
- .clk,
- .rst_n,
- .in(runnable_in.rx),
- .out(runnable_out.tx)
- );
-
- gfx_shader_bind_icache icache
- (
- .clk,
- .rst_n,
-
- .icache_flush,
- .read_addr(regs.pc_front),
- .read_valid(valids[REGFILE_STAGES - 1]),
-
- .request_addr,
- .request_valid,
- .request_ready,
-
- .fetch_data(mem.rdata),
- .fetch_last(mem.rlast),
- .fetch_valid(mem.rvalid),
- .fetch_ready(mem.rready),
-
- .insn(wave.insn),
- .insn_retry(wave.retry),
- .insn_valid(wave.valid)
- );
-
- always_ff @(posedge clk) begin
- groups[0] <= runnable_out.rx.data;
- for (int i = 1; i < $size(groups); ++i)
- groups[i] <= groups[i - 1];
- end
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n)
- for (int i = 0; i < $size(valids); ++i)
- valids[i] <= 0;
- else begin
- valids[0] <= runnable_out.rx.valid;
- for (int i = 1; i < $size(valids); ++i)
- valids[i] <= valids[i - 1];
- end
-
-endmodule
-
-module gfx_shader_bind_icache
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- input logic icache_flush,
-
- input logic read_valid,
- input icache_ptr read_addr,
-
- input logic fetch_last,
- fetch_valid,
- input word fetch_data,
- output logic fetch_ready,
-
- input logic request_ready,
- output logic request_valid,
- output icache_line_tag request_addr,
-
- output logic insn_valid,
- insn_retry,
- output word insn
-);
-
- // Dan Gisselquist limita a (1 << 3) bursts por defecto.
- // Ver LGMAXBURST en axixbar.v
- localparam int PENDING_FIFO_DEPTH = 8;
-
- enum int unsigned
- {
- FLUSH,
- RUN
- } state;
-
- struct
- {
- logic valid,
- accessed,
- hit;
- icache_tag tag;
- oword data;
- } cache[1 << $bits(icache_line_num)], read, read_hold;
-
- gfx_beats #($bits(icache_line_tag)) pending_in(), pending_out();
-
- logic accessed_write, accessed_write_enable, burst, fetch_done, hit_write,
- in_flush, hit_commit, hit_write_enable, retry_4, retry_5, rollback,
- tag_hit, valid_1, valid_2, valid_3, valid_4, valid_5, valid_write,
- valid_write_enable;
-
- icache_ptr read_addr_1, read_addr_2, read_addr_3, read_addr_4, read_addr_5;
- icache_tag tag_write;
- icache_line_num accessed_write_line, flush_ptr, hit_write_line, valid_write_line;
- icache_line_tag pending_pop;
-
- oword data_write;
- word[1:0] data_5;
- word[7:0] fetch_shift;
- qword[1:0] data_3;
- udword[1:0] data_4;
-
- assign data_3 = read.data;
- assign tag_hit = read.tag == read_addr_3.line_tag.tag;
- assign fetch_ready = ~fetch_done;
- assign pending_pop = pending_out.rx.data;
-
- assign request_addr = read_addr_4.line_tag;
- assign request_valid = burst & pending_in.tx.ready;
- assign pending_in.tx.data = read_addr_4.line_tag;
- assign pending_in.tx.valid = burst & request_ready;
- assign pending_out.rx.ready = fetch_done & ~hit_commit & ~rollback;
-
- gfx_fifo #(.WIDTH($bits(icache_line_tag)), .DEPTH(PENDING_FIFO_DEPTH)) pending
- (
- .clk,
- .rst_n,
- .in(pending_in.rx),
- .out(pending_out.tx)
- );
-
- always_comb
- unique case (state)
- FLUSH: in_flush = 1;
- RUN: in_flush = 0;
- endcase
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- state <= FLUSH;
- flush_ptr <= '0;
- fetch_done <= 0;
-
- valid_1 <= 0;
- valid_2 <= 0;
- valid_3 <= 0;
- valid_4 <= 0;
- valid_5 <= 0;
-
- burst <= 0;
- end else begin
- unique case (state)
- FLUSH:
- if (~icache_flush & &flush_ptr)
- state <= RUN;
-
- RUN:
- if (icache_flush)
- state <= FLUSH;
- endcase
-
- flush_ptr <= flush_ptr + 1;
- if (icache_flush)
- flush_ptr <= '0;
-
- if (fetch_done)
- fetch_done <= hit_commit | ~pending_out.rx.valid | rollback;
- else if (fetch_ready & fetch_valid)
- fetch_done <= fetch_last;
-
- valid_1 <= read_valid;
- valid_2 <= valid_1;
- valid_3 <= valid_2;
- valid_4 <= valid_3;
- valid_5 <= valid_4;
-
- burst <= valid_3 & ~tag_hit & ~read.accessed & (~read.valid | read.hit);
- end
-
- always_ff @(posedge clk) begin
- tag_write <= pending_pop.tag;
- data_write <= fetch_shift;
-
- valid_write <= 1;
- valid_write_line <= pending_pop.line;
- valid_write_enable <= fetch_done & ~hit_commit & pending_out.rx.valid & ~rollback;
-
- accessed_write <= 0;
- accessed_write_enable <= 1;
-
- if (rollback)
- accessed_write_line <= read_addr_5.line_tag.line;
- else if (fetch_done & ~hit_commit & pending_out.rx.valid)
- accessed_write_line <= pending_pop.line;
- else begin
- accessed_write <= 1;
- accessed_write_line <= read_addr.line_tag.line;
- accessed_write_enable <= read_valid;
- end
-
- hit_write <= hit_commit;
- if (hit_commit) begin
- hit_write_line <= read_addr_4.line_tag.line;
- hit_write_enable <= 1;
- end else begin
- hit_write_line <= pending_pop.line;
- hit_write_enable <= fetch_done & pending_out.rx.valid & ~rollback;
- end
-
- if (in_flush) begin
- valid_write <= 0;
- valid_write_line <= flush_ptr;
- valid_write_enable <= 1;
-
- accessed_write <= 0;
- accessed_write_line <= flush_ptr;
- accessed_write_enable <= 1;
-
- hit_write <= 0;
- hit_write_line <= flush_ptr;
- hit_write_enable <= 1;
- end
-
- if (valid_write_enable) begin
- cache[valid_write_line].tag <= tag_write;
- cache[valid_write_line].data <= data_write;
- cache[valid_write_line].valid <= valid_write;
- end
-
- if (accessed_write_enable)
- cache[accessed_write_line].accessed <= accessed_write;
-
- if (hit_write_enable)
- cache[hit_write_line].hit <= hit_write;
-
- read_addr_1 <= read_addr;
-
- read_hold <= cache[read_addr_1.line_tag.line];
- read_addr_2 <= read_addr_1;
-
- read <= read_hold;
- read_addr_3 <= read_addr_2;
-
- data_4 <= data_3[read_addr_3.word_num[2]];
- retry_4 <= ~tag_hit | ~read.valid;
- hit_commit <= valid_3 & tag_hit & read.valid;
- read_addr_4 <= read_addr_3;
-
- data_5 <= data_4[read_addr_4.word_num[1]];
- retry_5 <= retry_4;
- rollback <= burst & (~request_valid | ~pending_in.tx.valid);
- read_addr_5 <= read_addr_4;
-
- insn <= data_5[read_addr_5.word_num[0]];
- insn_retry <= retry_5;
- insn_valid <= valid_5;
-
- if (fetch_ready & fetch_valid) begin
- fetch_shift[0] <= fetch_data;
- for (int i = 1; i < $size(fetch_shift); ++i)
- fetch_shift[i] <= fetch_shift[i - 1];
- end
- end
-
-endmodule
-
-module gfx_shader_read_regs
-import gfx::*;
-import gfx_isa::*;
-(
- input logic clk,
- rst_n,
-
- input front_wave in,
-
- gfx_regfile_io.read read,
-
- output front_wave out,
- output front_reg_passthru passthru
-);
-
- // + 1 por next-cycle de read.op
- localparam int PASSTHRU_DEPTH = REG_READ_STAGES + 1 - 2;
- localparam int HOLD_DEPTH = PASSTHRU_DEPTH - 2;
-
- logic reg_rev;
- logic valid[HOLD_DEPTH];
- front_wave out_hold[HOLD_DEPTH];
- front_reg_passthru passthru_hold[PASSTHRU_DEPTH];
-
- assign passthru = passthru_hold[$size(passthru_hold) - 1];
-
- assign reg_rev = in.insn.reg_rev;
-
- always_comb begin
- out = out_hold[$size(out_hold) - 1];
- out.valid = valid[$size(valid) - 1];
- end
-
- always_ff @(posedge clk) begin
- out_hold[0] <= in;
- for (int i = 1; i < $size(out_hold); ++i)
- out_hold[i] <= out_hold[i - 1];
-
- passthru_hold[0].dest <= in.insn.dst_src.rr.rd;
- unique case (in.insn.reg_mode)
- REGS_SVS, REGS_SSS:
- passthru_hold[0].dest_scalar <= 1;
-
- REGS_VVS, REGS_VVV:
- passthru_hold[0].dest_scalar <= 0;
- endcase
-
- for (int i = 1; i < $size(passthru_hold); ++i)
- passthru_hold[i] <= passthru_hold[i - 1];
-
- read.op.group <= in.group;
-
- read.op.b_imm <= in.insn.dst_src.rr.b.imm;
- read.op.a_sgpr <= in.insn.dst_src.rr.ra.sgpr;
- read.op.b_sgpr <= in.insn.dst_src.rr.b.read.r.sgpr;
- read.op.a_vgpr <= in.insn.dst_src.rr.ra.vgpr.num;
- read.op.b_vgpr <= in.insn.dst_src.rr.b.read.r.vgpr.num;
- read.op.b_is_imm <= in.insn.dst_src.rr.b_is_imm;
- read.op.b_is_const <= in.insn.dst_src.rr.b.read.from_consts;
- read.op.scalar_rev <= reg_rev;
-
- unique case (in.insn.reg_mode)
- REGS_SVS, REGS_VVS: begin
- read.op.a_scalar <= reg_rev;
- read.op.b_scalar <= ~reg_rev;
- end
-
- REGS_SSS: begin
- read.op.a_scalar <= 1;
- read.op.b_scalar <= 1;
- end
-
- REGS_VVV: begin
- read.op.a_scalar <= 0;
- read.op.b_scalar <= 0;
- end
- endcase
- end
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n)
- for (int i = 0; i < HOLD_DEPTH; ++i)
- valid[i] <= 0;
- else begin
- valid[0] <= in.valid;
-
- for (int i = 1; i < HOLD_DEPTH; ++i)
- valid[i] <= valid[i - 1];
- end
-
-endmodule
-
-module gfx_shader_decode_class
-import gfx::*;
-import gfx_isa::*;
-(
- input logic clk,
- rst_n,
-
- input front_wave wave,
- output front_wave port_wave,
- output group_id out_group,
-
- output shader_dispatch dispatch,
- output logic p0_writeback
-);
-
- logic is_fsu, is_mem, is_group, hold_valid, retry;
- front_wave hold_wave;
-
- assign p0_writeback = ~(is_mem | is_fsu | is_group | retry);
-
- always_comb begin
- port_wave = hold_wave;
- port_wave.valid = hold_valid;
- end
-
- always_ff @(posedge clk) begin
- hold_wave <= wave;
- out_group <= port_wave.group;
- end
-
- always_ff @(posedge clk or negedge rst_n)
- // Intencionalmente repetitivo
- if (~rst_n) begin
- is_fsu <= 0;
- is_mem <= 0;
- is_group <= 0;
-
- retry <= 0;
- hold_valid <= 0;
-
- dispatch <= '0;
- end else begin
- is_fsu <= 0;
- is_mem <= 0;
- is_group <= 0;
-
- retry <= wave.retry;
- hold_valid <= wave.valid;
-
- unique case (wave.insn.insn_class)
- INSN_FPINT: ; // p0 no tiene ready
- INSN_MEM: is_mem <= 1;
- INSN_SFU: is_fsu <= 1;
- INSN_GROUP: is_group <= 1;
-
- default:
- {is_mem, is_fsu, is_group} <= 'x;
- endcase
-
- dispatch.p1 <= is_mem;
- dispatch.p2 <= is_fsu;
- dispatch.p3 <= is_group;
-
- if (~hold_valid | retry) begin
- dispatch.p1 <= 0;
- dispatch.p2 <= 0;
- dispatch.p3 <= 0;
- end
-
- dispatch.valid <= hold_valid;
- end
-
-endmodule
-
-module gfx_shader_decode_fpint
-import gfx::*;
-import gfx_isa::*;
-(
- input logic clk,
-
- input insn_word insn,
- input logic writeback,
-
- output fpint_op op
-);
-
- always_ff @(posedge clk) begin
- unique case (insn.by_class.fpint.op)
- INSN_FPINT_MOV: begin
- op.setup_mul_float <= 0;
- op.setup_unit_b <= 1;
- op.mnorm_put_hi <= 0;
- op.mnorm_put_lo <= 1;
- op.mnorm_put_mul <= 0;
- op.mnorm_zero_flags <= 1;
- op.mnorm_zero_b <= 1;
- op.minmax_abs <= 1;
- op.minmax_swap <= 0;
- op.minmax_zero_min <= 0;
- op.minmax_copy_flags <= 1;
- op.shiftr_int_signed <= 0;
- op.addsub_int_operand <= 0;
- op.addsub_copy_flags <= 1;
- op.clz_force_nop <= 1;
- op.shiftl_copy_flags <= 1;
- op.round_copy_flags <= 1;
- op.round_enable <= 1;
- op.encode_enable <= 1;
- end
-
- INSN_FPINT_FMUL: begin
- op.setup_mul_float <= 1;
- op.setup_unit_b <= 0;
- op.mnorm_put_hi <= 0;
- op.mnorm_put_lo <= 0;
- op.mnorm_put_mul <= 1;
- op.mnorm_zero_flags <= 0;
- op.mnorm_zero_b <= 1;
- op.minmax_abs <= 1;
- op.minmax_swap <= 0;
- op.minmax_zero_min <= 0;
- op.minmax_copy_flags <= 1;
- op.shiftr_int_signed <= 0;
- op.addsub_int_operand <= 0;
- op.addsub_copy_flags <= 1;
- op.clz_force_nop <= 1;
- op.shiftl_copy_flags <= 1;
- op.round_copy_flags <= 1;
- op.round_enable <= 1;
- op.encode_enable <= 1;
- end
-
- INSN_FPINT_IMUL: begin
- op.setup_mul_float <= 0;
- op.setup_unit_b <= 0;
- op.mnorm_put_hi <= 0;
- op.mnorm_put_lo <= 1;
- op.mnorm_put_mul <= 0;
- op.mnorm_zero_flags <= 1;
- op.mnorm_zero_b <= 1;
- op.minmax_abs <= 1;
- op.minmax_swap <= 0;
- op.minmax_zero_min <= 0;
- op.minmax_copy_flags <= 1;
- op.shiftr_int_signed <= 0;
- op.addsub_int_operand <= 0;
- op.addsub_copy_flags <= 1;
- op.clz_force_nop <= 1;
- op.shiftl_copy_flags <= 1;
- op.round_copy_flags <= 1;
- op.round_enable <= 0;
- op.encode_enable <= 0;
- end
-
- INSN_FPINT_FADD: begin
- op.setup_mul_float <= 0;
- op.setup_unit_b <= 1;
- op.mnorm_put_hi <= 0;
- op.mnorm_put_lo <= 1;
- op.mnorm_put_mul <= 0;
- op.mnorm_zero_flags <= 0;
- op.mnorm_zero_b <= 0;
- op.minmax_abs <= 1;
- op.minmax_swap <= 0;
- op.minmax_zero_min <= 0;
- op.minmax_copy_flags <= 0;
- op.shiftr_int_signed <= 0;
- op.addsub_int_operand <= 0;
- op.addsub_copy_flags <= 0;
- op.clz_force_nop <= 0;
- op.shiftl_copy_flags <= 0;
- op.round_copy_flags <= 0;
- op.round_enable <= 1;
- op.encode_enable <= 1;
- end
-
- INSN_FPINT_FMAX, INSN_FPINT_FMIN: begin
- op.setup_mul_float <= 0;
- op.setup_unit_b <= 1;
- op.mnorm_put_hi <= 0;
- op.mnorm_put_lo <= 1;
- op.mnorm_put_mul <= 0;
- op.mnorm_zero_flags <= 0;
- op.mnorm_zero_b <= 0;
- op.minmax_abs <= 0;
- op.minmax_swap <= insn.by_class.fpint.op == INSN_FPINT_FMIN;
- op.minmax_zero_min <= 1;
- op.minmax_copy_flags <= 1;
- op.shiftr_int_signed <= 0;
- op.addsub_int_operand <= 0;
- op.addsub_copy_flags <= 1;
- op.clz_force_nop <= 1;
- op.shiftl_copy_flags <= 1;
- op.round_copy_flags <= 1;
- op.round_enable <= 0;
- op.encode_enable <= 0;
- end
-
- INSN_FPINT_FCVT: begin
- op.setup_mul_float <= 0;
- op.setup_unit_b <= 1;
- op.mnorm_put_hi <= 0;
- op.mnorm_put_lo <= 1;
- op.mnorm_put_mul <= 0;
- op.mnorm_zero_flags <= 1;
- op.mnorm_zero_b <= 1;
-
- op.minmax_abs <= 1;
- op.minmax_swap <= 0;
- op.minmax_zero_min <= 0;
- op.minmax_copy_flags <= 0;
- op.shiftr_int_signed <= 1;
- op.addsub_int_operand <= 1;
- op.addsub_copy_flags <= 1;
- op.clz_force_nop <= 0;
- op.shiftl_copy_flags <= 0;
- op.round_copy_flags <= 0;
- op.round_enable <= 1;
- op.encode_enable <= 1;
- end
-
- default:
- op <= 'x;
- endcase
-
- op.writeback <= writeback;
- end
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shader_group.sv b/platform/wavelet3d/gfx_shader_group.sv
deleted file mode 100644
index e668877..0000000
--- a/platform/wavelet3d/gfx_shader_group.sv
+++ /dev/null
@@ -1,17 +0,0 @@
-module gfx_shader_group
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- input group_op op,
- input wave_exec wave,
-
- gfx_regfile_io.ab read_data,
-
- gfx_shake.rx in_shake,
-
- gfx_wb.tx wb
-);
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shader_mem.sv b/platform/wavelet3d/gfx_shader_mem.sv
deleted file mode 100644
index 403c9e4..0000000
--- a/platform/wavelet3d/gfx_shader_mem.sv
+++ /dev/null
@@ -1,17 +0,0 @@
-module gfx_shader_mem
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- input mem_op op,
- input wave_exec wave,
-
- gfx_regfile_io.ab read_data,
-
- gfx_shake.rx in_shake,
-
- gfx_wb.tx wb
-);
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shader_regs.sv b/platform/wavelet3d/gfx_shader_regs.sv
deleted file mode 100644
index ef3a129..0000000
--- a/platform/wavelet3d/gfx_shader_regs.sv
+++ /dev/null
@@ -1,302 +0,0 @@
-module gfx_shader_regs
-import gfx::*;
-(
- input logic clk,
-
- gfx_regfile_io.regs io
-);
-
- // verilator tracing_off
-
- localparam PC_TABLE_PORTS = 2;
- localparam MASK_TABLE_PORTS = 1;
-
- word hold_imm[REGFILE_STAGES], imm_out, read_a_data_sgpr, read_b_data_scalar,
- read_b_data_sgpr, read_const, read_a_data_vgpr[SHADER_LANES],
- read_b_data_vgpr[SHADER_LANES], sgpr_out_a, sgpr_out_b;
-
- group_id mask_read_groups[MASK_TABLE_PORTS], pc_read_groups[PC_TABLE_PORTS];
- word_ptr pc_read[PC_TABLE_PORTS];
- lane_mask mask_read[MASK_TABLE_PORTS];
-
- logic a_scalar_out, b_is_const_out, b_is_imm_out, b_scalar_out, scalar_rev_out;
- group_id hold_read_group_1, hold_read_group_2;
- sgpr_num hold_read_a_sgpr;
- vgpr_num hold_read_a_vgpr_1, hold_read_a_vgpr_2, hold_read_b_vgpr_1, hold_read_b_vgpr_2;
- logic[REGFILE_STAGES - 1:0] hold_b_is_imm, hold_b_is_const;
- logic[REGFILE_STAGES + 1 - 1:0] hold_scalar_rev;
- logic[REGFILE_STAGES + 2 - 1:0] hold_a_scalar, hold_b_scalar;
-
- assign io.pc_back = pc_read[0];
- assign io.pc_front = pc_read[1];
- assign pc_read_groups[0] = io.pc_back_group;
- assign pc_read_groups[1] = io.pc_front_group;
-
- assign io.mask_back = mask_read[0];
- assign pc_read_groups[0] = io.mask_back_group;
-
- assign imm_out = hold_imm[$size(hold_imm) - 1];
- assign a_scalar_out = hold_a_scalar[$bits(hold_a_scalar) - 1];
- assign b_scalar_out = hold_b_scalar[$bits(hold_b_scalar) - 1];
- assign b_is_imm_out = hold_b_is_imm[$bits(hold_b_is_imm) - 1];
- assign b_is_const_out = hold_b_is_const[$bits(hold_b_is_const) - 1];
- assign scalar_rev_out = hold_scalar_rev[$bits(hold_scalar_rev) - 1];
-
- gfx_shader_table #(.DATA_WIDTH($bits(word_ptr)), .READ_PORTS(PC_TABLE_PORTS)) pc_table
- (
- .clk,
- .read(pc_read),
- .write(io.pc_wb),
- .read_groups(pc_read_groups),
- .write_group(io.pc_wb_group),
- .write_enable(io.pc_wb_write)
- );
-
- gfx_shader_table #(.DATA_WIDTH($bits(lane_mask)), .READ_PORTS(MASK_TABLE_PORTS)) mask_table
- (
- .clk,
- .read(mask_read),
- .write(io.mask_wb),
- .read_groups(mask_read_groups),
- .write_group(io.mask_wb_group),
- .write_enable(io.mask_wb_write)
- );
-
- gfx_shader_consts consts
- (
- .clk,
- .num(io.op.b_sgpr),
- .value(read_const)
- );
-
- gfx_shader_regfile #($bits(group_id) + $bits(sgpr_num)) sgprs
- (
- .clk,
-
- .read_a_num({hold_read_group_1, hold_read_a_sgpr}),
- .read_b_num({io.op.group, io.op.b_sgpr}),
- .read_a_data(read_a_data_sgpr),
- .read_b_data(read_b_data_sgpr),
-
- .write(io.sgpr_write.write),
- .write_num({io.sgpr_write.group, io.sgpr_write.sgpr}),
- .write_data(io.sgpr_write.data)
- );
-
- generate
- for (genvar i = 0; i < SHADER_LANES; ++i) begin: vgprs
- gfx_shader_regfile #($bits(group_id) + $bits(vgpr_num)) vgprs
- (
- .clk,
-
- .read_a_num({hold_read_group_2, hold_read_a_vgpr_2}),
- .read_b_num({hold_read_group_2, hold_read_b_vgpr_2}),
- .read_a_data(read_a_data_vgpr[i]),
- .read_b_data(read_b_data_vgpr[i]),
-
- .write(io.vgpr_write.mask[i]),
- .write_num({io.vgpr_write.group, io.vgpr_write.vgpr}),
- .write_data(io.vgpr_write.data[i])
- );
- end
- endgenerate
-
- always_ff @(posedge clk) begin
- hold_imm[0] <= {{($bits(word) - $bits(io.op.b_imm)){1'b0}}, io.op.b_imm};
- hold_a_scalar[0] <= io.op.a_scalar;
- hold_b_scalar[0] <= io.op.b_scalar;
- hold_b_is_imm[0] <= io.op.b_is_imm;
- hold_b_is_const[0] <= io.op.b_is_const;
- hold_scalar_rev[0] <= io.op.scalar_rev;
-
- for (int i = 1; i < REGFILE_STAGES; ++i) begin
- hold_imm[i] <= hold_imm[i - 1];
- hold_a_scalar[i] <= hold_a_scalar[i - 1];
- hold_b_scalar[i] <= hold_b_scalar[i - 1];
- hold_b_is_imm[i] <= hold_b_is_imm[i - 1];
- hold_b_is_const[i] <= hold_b_is_const[i - 1];
- hold_scalar_rev[i] <= hold_scalar_rev[i - 1];
- end
-
- for (int i = REGFILE_STAGES; i < REGFILE_STAGES + 2; ++i) begin
- hold_a_scalar[i] <= hold_a_scalar[i - 1];
- hold_b_scalar[i] <= hold_b_scalar[i - 1];
- end
-
- hold_scalar_rev[REGFILE_STAGES] <= hold_scalar_rev[REGFILE_STAGES - 1];
-
- hold_read_a_sgpr <= io.op.a_sgpr;
- hold_read_group_1 <= io.op.group;
- hold_read_group_2 <= hold_read_group_1;
-
- hold_read_a_vgpr_1 <= io.op.a_vgpr;
- hold_read_a_vgpr_2 <= hold_read_a_vgpr_1;
-
- hold_read_b_vgpr_1 <= io.op.b_vgpr;
- hold_read_b_vgpr_2 <= hold_read_b_vgpr_1;
-
- if (b_is_imm_out)
- read_b_data_scalar <= imm_out;
- else if (b_is_const_out)
- read_b_data_scalar <= read_const;
- else
- read_b_data_scalar <= read_b_data_sgpr;
-
- if (scalar_rev_out) begin
- sgpr_out_a <= read_b_data_scalar;
- sgpr_out_b <= read_a_data_sgpr;
- end else begin
- sgpr_out_a <= read_a_data_sgpr;
- sgpr_out_b <= read_b_data_scalar;
- end
-
- for (int i = 0; i < SHADER_LANES; ++i) begin
- io.a[i] <= a_scalar_out ? sgpr_out_a : read_a_data_vgpr[i];
- io.b[i] <= b_scalar_out ? sgpr_out_b : read_a_data_vgpr[i];
- end
- end
-
-endmodule
-
-module gfx_shader_consts
-import gfx::*;
-(
- input logic clk,
-
- input sgpr_num num,
- output word value
-);
-
- word hold_out, rom[1 << $bits(sgpr_num)];
- sgpr_num hold_in;
-
- always_ff @(posedge clk) begin
- value <= hold_out;
- hold_in <= num;
- hold_out <= rom[hold_in];
- end
-
- initial begin
- rom[0] = 'hffff_ffff; // -1
- rom[1] = 'h7fff_ffff; // 2^31 - 1, útil para abs de fp
- rom[2] = 'h8000_0000; // 2^31, útil para neg de fp
- rom[3] = 'h3f80_0000; // +1.0
- rom[4] = 'hbf80_0000; // -1.0
- end
-
-endmodule
-
-module gfx_shader_regfile
-import gfx::*;
-#(int DEPTH_LOG = 0)
-(
- input logic clk,
-
- input logic[DEPTH_LOG - 1:0] read_a_num,
- read_b_num,
- output word read_a_data,
- read_b_data,
-
- input logic write,
- input logic[DEPTH_LOG - 1:0] write_num,
- input word write_data
-);
-
- gfx_shader_regfile_port #(DEPTH_LOG) a
- (
- .clk,
- .write,
- .read_num(read_a_num),
- .read_data(read_a_data),
- .write_num,
- .write_data
- );
-
- gfx_shader_regfile_port #(DEPTH_LOG) b
- (
- .clk,
- .write,
- .read_num(read_b_num),
- .read_data(read_b_data),
- .write_num,
- .write_data
- );
-
-endmodule
-
-module gfx_shader_regfile_port
-import gfx::*;
-#(int DEPTH_LOG = 0)
-(
- input logic clk,
-
- input logic[DEPTH_LOG - 1:0] read_num,
- output word read_data,
-
- input logic write,
- input logic[DEPTH_LOG - 1:0] write_num,
- input word write_data
-);
-
- word file[1 << DEPTH_LOG], hold_read_data, hold_write_data;
- logic hold_write;
- logic[DEPTH_LOG - 1:0] hold_read_num, hold_write_num;
-
- // hold_write no necesita rst_n porque cualquier write inicial es inofensivo
-
- always_ff @(posedge clk) begin
- hold_write <= write;
- hold_read_num <= read_num;
- hold_write_num <= write_num;
- hold_write_data <= write_data;
-
- hold_read_data <= file[hold_read_num];
- if (hold_write)
- file[hold_write_num] <= hold_write_data;
-
- read_data <= hold_read_data;
- end
-
-endmodule
-
-module gfx_shader_table
-import gfx::*;
-#(int DATA_WIDTH = 0,
- int READ_PORTS = 0)
-(
- input logic clk,
-
- input group_id write_group,
- read_groups[READ_PORTS],
-
- input logic[DATA_WIDTH - 1:0] write,
- input logic write_enable,
-
- output logic[DATA_WIDTH - 1:0] read[READ_PORTS]
-);
-
- genvar i;
-
- generate
- for (i = 0; i < READ_PORTS; ++i) begin: ports
- logic write_enable_hold;
- group_id read_group_hold, write_group_hold;
- logic[DATA_WIDTH - 1:0] data[1 << $bits(group_id)], read_hold, write_hold;
-
- always_ff @(posedge clk) begin
- write_hold <= write;
- read_group_hold <= read_groups[i];
- write_group_hold <= write_group;
- write_enable_hold <= write_enable;
-
- read_hold <= data[read_group_hold];
-
- if (write_enable_hold)
- data[write_group_hold] <= write_hold;
-
- read[i] <= read_hold;
- end
- end
- endgenerate
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shader_schedif.rdl b/platform/wavelet3d/gfx_shader_schedif.rdl
deleted file mode 100644
index c846da9..0000000
--- a/platform/wavelet3d/gfx_shader_schedif.rdl
+++ /dev/null
@@ -1,91 +0,0 @@
-addrmap gfx_shader_schedif {
- name = "Scheduler<->core interface";
-
- default hw = r;
- default sw = w;
- default regwidth = 32;
-
- reg {
- name = "Shader core control register";
-
- field {
- desc = "Set this field to flush the instruction cache";
-
- singlepulse;
- } IFLUSH[0:0] = 0;
- } CORE @ 0x00;
-
- reg {
- name = "Wavefront setup control register";
-
- default hw = na;
- default sw = r;
- default precedence = hw;
-
- field {
- desc = "Wavefront group number";
-
- hw = r;
- sw = rw;
- } GROUP[5:0];
-
- field {
- desc = "Destination SGPR number";
-
- hw = r;
- sw = rw;
- } XGPR[11:8];
-
- field {
- desc = "PC table update done, group submitted";
-
- rclr;
- hwset;
- } SUBMIT_DONE[16:16] = 0;
-
- field {
- desc = "General-purpose register update done";
-
- rclr;
- hwset;
- } GPR_DONE[17:17] = 0;
-
- field {
- desc = "Lane mask update done";
-
- rclr;
- hwset;
- } MASK_DONE[18:18] = 0;
- } SETUP_CTRL @ 0x04;
-
- reg {
- name = "SGPR/VGPR write register";
-
- field {
- desc = "Value to write";
-
- swmod;
- } VALUE[31:0];
- } SETUP_GPR @ 0x08;
-
- reg {
- name = "Lane mask write register";
-
- field {
- desc = "Mask value to write";
-
- swmod;
- } MASK[15:0];
- } SETUP_MASK @ 0x0c;
-
- reg {
- name = "Group submit register";
-
- field {
- desc = "Initial group program counter, submits group on write";
-
- swmod;
- } PC[31:2];
- } SETUP_SUBMIT @ 0x10;
-};
-
diff --git a/platform/wavelet3d/gfx_shader_setup.sv b/platform/wavelet3d/gfx_shader_setup.sv
deleted file mode 100644
index f46fb66..0000000
--- a/platform/wavelet3d/gfx_shader_setup.sv
+++ /dev/null
@@ -1,37 +0,0 @@
-interface gfx_shader_setup
-import gfx::*;;
-
- struct
- {
- group_id group;
- word_ptr pc;
- xgpr_num gpr;
- word gpr_value;
- lane_mask mask;
- logic pc_set,
- gpr_set,
- mask_set;
- } write;
-
- struct
- {
- logic gpr,
- mask,
- submit;
- } set_done;
-
- modport core
- (
- input write,
-
- output set_done
- );
-
- modport sched
- (
- input set_done,
-
- output write
- );
-
-endinterface
diff --git a/platform/wavelet3d/gfx_shader_sfu.sv b/platform/wavelet3d/gfx_shader_sfu.sv
deleted file mode 100644
index d65e522..0000000
--- a/platform/wavelet3d/gfx_shader_sfu.sv
+++ /dev/null
@@ -1,17 +0,0 @@
-module gfx_shader_sfu
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- input sfu_op op,
- input wave_exec wave,
-
- gfx_regfile_io.ab read_data,
-
- gfx_shake.rx in_shake,
-
- gfx_wb.tx wb
-);
-
-endmodule
diff --git a/platform/wavelet3d/gfx_shake.sv b/platform/wavelet3d/gfx_shake.sv
deleted file mode 100644
index baae0c3..0000000
--- a/platform/wavelet3d/gfx_shake.sv
+++ /dev/null
@@ -1,24 +0,0 @@
-interface gfx_shake;
-
- logic ready;
- logic valid;
-
- modport tx
- (
- input ready,
- output valid
- );
-
- modport rx
- (
- input valid,
- output ready
- );
-
- modport peek
- (
- input ready,
- valid
- );
-
-endinterface
diff --git a/platform/wavelet3d/gfx_sim_debug.sv b/platform/wavelet3d/gfx_sim_debug.sv
deleted file mode 100644
index 4b4622a..0000000
--- a/platform/wavelet3d/gfx_sim_debug.sv
+++ /dev/null
@@ -1,50 +0,0 @@
-module gfx_sim_debug
-import gfx::*;
-(
- input logic clk,
- rst_n,
-
- gfx_axil.s axis
-);
-
- enum int unsigned
- {
- INPUT,
- STALL
- } state;
-
- assign axis.rvalid = 0;
- assign axis.arready = 0;
- assign axis.awready = 1;
-
- always_comb
- unique case (state)
- INPUT: begin
- axis.wready = 1;
- axis.bvalid = axis.wvalid;
- end
-
- STALL: begin
- axis.wready = 0;
- axis.bvalid = 1;
- end
- endcase
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n)
- state <= INPUT;
- else
- unique case (state)
- INPUT:
- if (axis.wvalid) begin
- $display("%c", axis.wdata[7:0]);
- if (~axis.bready)
- state <= STALL;
- end
-
- STALL:
- if (axis.bready)
- state <= INPUT;
- endcase
-
-endmodule
diff --git a/platform/wavelet3d/gfx_skid_buf.sv b/platform/wavelet3d/gfx_skid_buf.sv
deleted file mode 100644
index e3e5247..0000000
--- a/platform/wavelet3d/gfx_skid_buf.sv
+++ /dev/null
@@ -1,20 +0,0 @@
-module gfx_skid_buf
-#(int WIDTH = 0)
-(
- input logic clk,
-
- input logic[WIDTH - 1:0] in,
- input logic stall,
-
- output logic[WIDTH - 1:0] out
-);
-
- logic[WIDTH - 1:0] skid;
-
- assign out = stall ? skid : in;
-
- always_ff @(posedge clk)
- if (~stall)
- skid <= in;
-
-endmodule
diff --git a/platform/wavelet3d/gfx_skid_flow.sv b/platform/wavelet3d/gfx_skid_flow.sv
deleted file mode 100644
index 7890ae3..0000000
--- a/platform/wavelet3d/gfx_skid_flow.sv
+++ /dev/null
@@ -1,31 +0,0 @@
-module gfx_skid_flow
-(
- input logic clk,
- rst_n,
-
- input logic in_valid,
- out_ready,
-
- output logic in_ready,
- out_valid,
- stall
-);
-
- logic was_ready, was_valid;
-
- assign stall = ~in_ready;
- assign in_ready = was_ready | ~was_valid;
- assign out_valid = in_valid | stall;
-
- always_ff @(posedge clk or negedge rst_n)
- if (~rst_n) begin
- was_ready <= 0;
- was_valid <= 0;
- end else begin
- was_ready <= out_ready;
-
- if (~stall)
- was_valid <= in_valid;
- end
-
-endmodule
diff --git a/platform/wavelet3d/gfx_wb.sv b/platform/wavelet3d/gfx_wb.sv
deleted file mode 100644
index 20c7c64..0000000
--- a/platform/wavelet3d/gfx_wb.sv
+++ /dev/null
@@ -1,51 +0,0 @@
-interface gfx_wb;
-
- import gfx::*;
-
- word lanes[SHADER_LANES];
- logic mask_update, pc_inc, pc_update, ready, scalar, valid, writeback;
- group_id group;
- xgpr_num dest;
- lane_mask mask;
- pc_offset pc_add;
-
- modport tx
- (
- input ready,
-
- output dest,
- group,
- lanes,
- valid,
- scalar,
- writeback,
-
- mask,
- mask_update,
-
- pc_add,
- pc_inc,
- pc_update
- );
-
- modport rx
- (
- input dest,
- group,
- lanes,
- valid,
- scalar,
- writeback,
-
- mask,
- mask_update,
-
- pc_add,
- pc_inc,
- pc_update,
-
- output ready
- );
-
-
-endinterface
diff --git a/platform/wavelet3d/gfx_xbar_sched.sv b/platform/wavelet3d/gfx_xbar_sched.sv
deleted file mode 100644
index 95e4afb..0000000
--- a/platform/wavelet3d/gfx_xbar_sched.sv
+++ /dev/null
@@ -1,146 +0,0 @@
-module gfx_xbar_sched
-import gfx::*;
-(
- input logic clk,
- srst_n,
-
- gfx_axil.s sched,
-
- gfx_axil.m debug,
- gfx_axil.m bootrom,
- gfx_axil.m shader_0
-);
-
- localparam word BOOTROM_MASK = 32'hfff0_0000;
- localparam word DEBUG_BASE = 32'h0020_0000;
- localparam word DEBUG_MASK = 32'hfff0_0000;
- localparam word SHADER_0_BASE = 32'h0100_0000;
- localparam word SHADER_0_MASK = 32'hfff0_0000;
-
- defparam xbar.NM = 1;
- defparam xbar.NS = 3;
- defparam xbar.OPT_LOWPOWER = 0;
-
- defparam xbar.SLAVE_ADDR = {
- SHADER_0_BASE,
- DEBUG_BASE,
- BOOTROM_BASE
- };
-
- defparam xbar.SLAVE_MASK = {
- SHADER_0_MASK,
- DEBUG_MASK,
- BOOTROM_MASK
- };
-
- axilxbar xbar
- (
- .S_AXI_ACLK(clk),
- .S_AXI_ARESETN(srst_n),
-
- .S_AXI_AWVALID(sched.awvalid),
- .S_AXI_AWREADY(sched.awready),
- .S_AXI_AWADDR(sched.awaddr),
- .S_AXI_AWPROT('0),
-
- .S_AXI_WVALID(sched.wvalid),
- .S_AXI_WREADY(sched.wready),
- .S_AXI_WDATA(sched.wdata),
- .S_AXI_WSTRB('1),
-
- .S_AXI_BVALID(sched.bvalid),
- .S_AXI_BREADY(sched.bready),
- .S_AXI_BRESP(),
-
- .S_AXI_ARVALID(sched.arvalid),
- .S_AXI_ARREADY(sched.arready),
- .S_AXI_ARADDR(sched.araddr),
- .S_AXI_ARPROT('0),
-
- .S_AXI_RVALID(sched.rvalid),
- .S_AXI_RREADY(sched.rready),
- .S_AXI_RDATA(sched.rdata),
- .S_AXI_RRESP(),
-
- .M_AXI_AWADDR({
- shader_0.awaddr,
- debug.awaddr,
- bootrom.awaddr
- }),
- .M_AXI_AWPROT(),
- .M_AXI_AWVALID({
- shader_0.awvalid,
- debug.awvalid,
- bootrom.awvalid
- }),
- .M_AXI_AWREADY({
- shader_0.awready,
- debug.awready,
- bootrom.awready
- }),
-
- .M_AXI_WDATA({
- shader_0.wdata,
- debug.wdata,
- bootrom.wdata
- }),
- .M_AXI_WSTRB(),
- .M_AXI_WVALID({
- shader_0.wvalid,
- debug.wvalid,
- bootrom.wvalid
- }),
- .M_AXI_WREADY({
- shader_0.wready,
- debug.wready,
- bootrom.wready
- }),
-
- .M_AXI_BRESP('0),
- .M_AXI_BVALID({
- shader_0.bvalid,
- debug.bvalid,
- bootrom.bvalid
- }),
- .M_AXI_BREADY({
- shader_0.bready,
- debug.bready,
- bootrom.bready
- }),
-
- .M_AXI_ARADDR({
- shader_0.araddr,
- debug.araddr,
- bootrom.araddr
- }),
- .M_AXI_ARPROT(),
- .M_AXI_ARVALID({
- shader_0.arvalid,
- debug.arvalid,
- bootrom.arvalid
- }),
- .M_AXI_ARREADY({
- shader_0.arready,
- debug.arready,
- bootrom.arready
- }),
-
- .M_AXI_RDATA({
- shader_0.rdata,
- debug.rdata,
- bootrom.rdata
- }),
- .M_AXI_RRESP('0),
- .M_AXI_RVALID({
- shader_0.rvalid,
- debug.rvalid,
- bootrom.rvalid
- }),
- .M_AXI_RREADY({
- shader_0.rready,
- debug.rready,
- bootrom.rready
- })
- );
-
-endmodule
diff --git a/platform/wavelet3d/mod.mk b/platform/wavelet3d/mod.mk
index 153f9c7..16c6cfc 100644
--- a/platform/wavelet3d/mod.mk
+++ b/platform/wavelet3d/mod.mk
@@ -1,21 +1,10 @@
-cores := gfx_shader_schedif
-
define core
- $(this)/deps := axixbar fp_unit gfx_shader_schedif picorv32
+ $(this)/deps := gfx
- $(this)/rtl_top := gfx_top
+ $(this)/rtl_top := w3d_top
$(this)/rtl_dirs := .
- $(this)/rtl_files := gfx_isa.sv gfx_pkg.sv gfx_top.sv
+ $(this)/rtl_files := w3d_top.sv
$(this)/vl_main := main.cpp
$(this)/vl_pkgconfig := sdl2
endef
-
-define core/gfx_shader_schedif
- $(this)/hooks := regblock
-
- $(this)/regblock_rdl := gfx_shader_schedif.rdl
- $(this)/regblock_top := gfx_shader_schedif
- $(this)/regblock_args := --default-reset arst_n
- $(this)/regblock_cpuif := axi4-lite
-endef
diff --git a/platform/wavelet3d/gfx_top.sv b/platform/wavelet3d/w3d_top.sv
index 41ff7f4..34ecb52 100644
--- a/platform/wavelet3d/gfx_top.sv
+++ b/platform/wavelet3d/w3d_top.sv
@@ -1,4 +1,4 @@
-module gfx_top
+module w3d_top
import gfx::*;
(
input logic clk,