summaryrefslogtreecommitdiff
path: root/platform
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--platform/wavelet3d/gfx_axib.sv81
-rw-r--r--platform/wavelet3d/gfx_axil.sv49
-rw-r--r--platform/wavelet3d/gfx_beats.sv29
-rw-r--r--platform/wavelet3d/gfx_fifo.sv102
-rw-r--r--platform/wavelet3d/gfx_fpint.sv85
-rw-r--r--platform/wavelet3d/gfx_front_back.sv37
-rw-r--r--platform/wavelet3d/gfx_isa.sv82
-rw-r--r--platform/wavelet3d/gfx_pkg.sv49
-rw-r--r--platform/wavelet3d/gfx_raster.sv2
-rw-r--r--platform/wavelet3d/gfx_regfile_io.sv76
-rw-r--r--platform/wavelet3d/gfx_shader.sv54
-rw-r--r--platform/wavelet3d/gfx_shader_back.sv194
-rw-r--r--platform/wavelet3d/gfx_shader_front.sv718
-rw-r--r--platform/wavelet3d/gfx_shader_group.sv16
-rw-r--r--platform/wavelet3d/gfx_shader_mem.sv16
-rw-r--r--platform/wavelet3d/gfx_shader_regs.sv253
-rw-r--r--platform/wavelet3d/gfx_shader_schedif.rdl74
-rw-r--r--platform/wavelet3d/gfx_shader_sfu.sv16
-rw-r--r--platform/wavelet3d/gfx_shake.sv24
-rw-r--r--platform/wavelet3d/gfx_skid_buf.sv20
-rw-r--r--platform/wavelet3d/gfx_skid_flow.sv31
-rw-r--r--platform/wavelet3d/gfx_top.sv126
-rw-r--r--platform/wavelet3d/gfx_wb.sv35
-rw-r--r--platform/wavelet3d/mod.mk15
24 files changed, 2056 insertions, 128 deletions
diff --git a/platform/wavelet3d/gfx_axib.sv b/platform/wavelet3d/gfx_axib.sv
new file mode 100644
index 0000000..7b3cbdc
--- /dev/null
+++ b/platform/wavelet3d/gfx_axib.sv
@@ -0,0 +1,81 @@
+// AXI4 con burst
+interface gfx_axib;
+
+ import gfx::word;
+
+ logic awvalid,
+ awready;
+ logic[7:0] awlen;
+ logic[1:0] awburst;
+ word awaddr;
+
+ logic wlast;
+ logic wvalid;
+ logic wready;
+ word wdata;
+
+ logic bvalid;
+ logic bready;
+
+ logic arvalid,
+ arready;
+ logic[7:0] arlen;
+ logic[1:0] arburst;
+ word araddr;
+
+ logic rlast;
+ logic rvalid;
+ logic rready;
+ word rdata;
+
+ modport m
+ (
+ input awready,
+ wready,
+ bvalid,
+ arready,
+ rlast,
+ rvalid,
+ rdata,
+
+ output awlen,
+ awburst,
+ awvalid,
+ awaddr,
+ wlast,
+ wvalid,
+ wdata,
+ bready,
+ arlen,
+ arburst,
+ arvalid,
+ araddr,
+ rready
+ );
+
+ modport s
+ (
+ input awlen,
+ awburst,
+ awvalid,
+ awaddr,
+ wlast,
+ wvalid,
+ wdata,
+ bready,
+ arlen,
+ arburst,
+ arvalid,
+ araddr,
+ rready,
+
+ output awready,
+ wready,
+ bvalid,
+ arready,
+ rlast,
+ rvalid,
+ rdata
+ );
+
+endinterface
diff --git a/platform/wavelet3d/gfx_axil.sv b/platform/wavelet3d/gfx_axil.sv
index f86dfbf..c254e26 100644
--- a/platform/wavelet3d/gfx_axil.sv
+++ b/platform/wavelet3d/gfx_axil.sv
@@ -24,39 +24,38 @@ interface gfx_axil;
modport m
(
input awready,
- wready,
- bvalid,
- arready,
- rvalid,
- rdata,
+ wready,
+ bvalid,
+ arready,
+ rvalid,
+ rdata,
output awvalid,
- awaddr,
- wvalid,
- wdata,
- bready,
- arvalid,
- araddr,
- rready
+ awaddr,
+ wvalid,
+ wdata,
+ bready,
+ arvalid,
+ araddr,
+ rready
);
modport s
(
input awvalid,
- awaddr,
- wvalid,
- wdata,
- bready,
- arvalid,
- araddr,
- rready,
+ awaddr,
+ wvalid,
+ wdata,
+ bready,
+ arvalid,
+ araddr,
+ rready,
output awready,
- wready,
- bvalid,
- arready,
- rvalid,
- rdata
-
+ wready,
+ bvalid,
+ arready,
+ rvalid,
+ rdata
);
endinterface
diff --git a/platform/wavelet3d/gfx_beats.sv b/platform/wavelet3d/gfx_beats.sv
new file mode 100644
index 0000000..fcbb091
--- /dev/null
+++ b/platform/wavelet3d/gfx_beats.sv
@@ -0,0 +1,29 @@
+interface gfx_beats
+#(int WIDTH = $bits(gfx::word));
+
+ logic[WIDTH - 1:0] data;
+ logic ready;
+ logic valid;
+
+ modport tx
+ (
+ input ready,
+ output data,
+ valid
+ );
+
+ modport rx
+ (
+ input data,
+ valid,
+ output ready
+ );
+
+ modport peek
+ (
+ input data,
+ ready,
+ valid
+ );
+
+endinterface
diff --git a/platform/wavelet3d/gfx_fifo.sv b/platform/wavelet3d/gfx_fifo.sv
new file mode 100644
index 0000000..7174e4d
--- /dev/null
+++ b/platform/wavelet3d/gfx_fifo.sv
@@ -0,0 +1,102 @@
+module gfx_fifo
+#(int WIDTH = 0,
+ int DEPTH = 0)
+(
+ input logic clk,
+ rst_n,
+
+ gfx_beats.rx in,
+ gfx_beats.tx out
+);
+
+ logic do_read, do_write, full_if_eq, in_stall, out_stall,
+ may_read, may_write, read, read_ok, write;
+
+ logic[WIDTH - 1:0] fifo[DEPTH], read_data, write_data;
+ logic[$clog2(DEPTH) - 1:0] read_ptr, write_ptr;
+
+ assign do_read = read & may_read;
+ assign do_write = write & may_write;
+
+ always_comb begin
+ may_read = full_if_eq;
+ may_write = !full_if_eq;
+
+ if (read)
+ may_write = 1;
+
+ if (read_ptr != write_ptr) begin
+ may_read = 1;
+ may_write = 1;
+ end
+ end
+
+ gfx_skid_flow in_flow
+ (
+ .clk,
+ .rst_n,
+ .stall(in_stall),
+ .in_ready(in.ready),
+ .in_valid(in.valid),
+ .out_ready(may_write),
+ .out_valid(write)
+ );
+
+ gfx_skid_flow out_flow
+ (
+ .clk,
+ .rst_n,
+ .stall(out_stall),
+ .in_ready(read),
+ .in_valid(read_ok),
+ .out_ready(out.ready),
+ .out_valid(out.valid)
+ );
+
+ gfx_skid_buf #(WIDTH) in_skid
+ (
+ .clk,
+ .in(in.data),
+ .out(write_data),
+ .stall(in_stall)
+ );
+
+ gfx_skid_buf #(WIDTH) out_skid
+ (
+ .clk,
+ .in(read_data),
+ .out(out.data),
+ .stall(out_stall)
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n) begin
+ read_ok <= 0;
+ read_ptr <= 0;
+ write_ptr <= 0;
+ full_if_eq <= 0;
+ end else begin
+ if (~out_stall)
+ read_ok <= read && may_read;
+
+ if (do_read)
+ read_ptr <= read_ptr + 1;
+
+ if (do_write)
+ write_ptr <= write_ptr + 1;
+
+ if (do_read & ~do_write)
+ full_if_eq <= 0;
+ else if (~do_read & do_write)
+ full_if_eq <= 1;
+ end
+
+ always_ff @(posedge clk) begin
+ if (~out_stall)
+ read_data <= fifo[read_ptr];
+
+ if (may_write)
+ fifo[write_ptr] <= write_data;
+ end
+
+endmodule
diff --git a/platform/wavelet3d/gfx_fpint.sv b/platform/wavelet3d/gfx_fpint.sv
index b3108a4..ae2fc28 100644
--- a/platform/wavelet3d/gfx_fpint.sv
+++ b/platform/wavelet3d/gfx_fpint.sv
@@ -1,72 +1,33 @@
module gfx_fpint
+import gfx::*;
(
- input logic clk,
- rst_n,
+ input logic clk,
+ rst_n,
- input gfx::word a[gfx::SHADER_LANES],
- b[gfx::SHADER_LANES],
- input logic in_valid,
- setup_mul_float,
- setup_unit_b,
- mnorm_put_hi,
- mnorm_put_lo,
- mnorm_put_mul,
- mnorm_zero_b,
- mnorm_zero_flags,
- minmax_abs,
- minmax_swap,
- minmax_zero_min,
- minmax_copy_flags,
- shiftr_int_signed,
- addsub_copy_flags,
- addsub_int_operand,
- clz_force_nop,
- shiftl_copy_flags,
- round_copy_flags,
- round_enable,
- encode_enable,
+ input fpint_op op,
+ input logic abort,
+ in_valid,
- output logic out_valid,
- output gfx::word q[gfx::SHADER_LANES]
-);
+ gfx_regfile_io.ab read_data,
- import gfx::*;
+ gfx_wb.tx wb
+);
logic stage_valid[FPINT_STAGES];
- fpint_op op, stage_op[FPINT_STAGES];
+ fpint_op stage_op[FPINT_STAGES];
assign stage_op[0] = op;
assign stage_valid[0] = in_valid;
- assign op.setup_mul_float = setup_mul_float;
- assign op.setup_unit_b = setup_unit_b;
- assign op.mnorm_put_hi = mnorm_put_hi;
- assign op.mnorm_put_lo = mnorm_put_lo;
- assign op.mnorm_put_mul = mnorm_put_mul;
- assign op.mnorm_zero_b = mnorm_zero_b;
- assign op.mnorm_zero_flags = mnorm_zero_flags;
- assign op.minmax_abs = minmax_abs;
- assign op.minmax_swap = minmax_swap;
- assign op.minmax_zero_min = minmax_zero_min;
- assign op.minmax_copy_flags = minmax_copy_flags;
- assign op.shiftr_int_signed = shiftr_int_signed;
- assign op.addsub_copy_flags = addsub_copy_flags;
- assign op.addsub_int_operand = addsub_int_operand;
- assign op.clz_force_nop = clz_force_nop;
- assign op.shiftl_copy_flags = shiftl_copy_flags;
- assign op.round_copy_flags = round_copy_flags;
- assign op.round_enable = round_enable;
- assign op.encode_enable = encode_enable;
-
genvar lane;
generate
for (lane = 0; lane < SHADER_LANES; ++lane) begin: lanes
gfx_fpint_lane unit
(
.clk(clk),
- .a(a[lane]),
- .b(b[lane]),
- .q(q[lane]),
+ .a(read_data.a[lane]),
+ .b(read_data.b[lane]),
+ .q(wb.lanes[lane]),
.mul_float_0(stage_op[0].setup_mul_float),
.unit_b_0(stage_op[0].setup_unit_b),
.put_hi_2(stage_op[2].mnorm_put_hi),
@@ -94,11 +55,21 @@ module gfx_fpint
for (int i = 1; i < FPINT_STAGES; ++i)
stage_op[i] <= stage_op[i - 1];
- always_ff @(posedge clk or negedge rst_n) begin
- for (int i = 1; i < FPINT_STAGES; ++i)
- stage_valid[i] <= !rst_n ? 0 : stage_valid[i - 1];
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n) begin
+ for (int i = 1; i < FPINT_STAGES; ++i)
+ stage_valid[i] <= 0;
- out_valid <= !rst_n ? 0 : stage_valid[FPINT_STAGES - 1];
- end
+ wb.valid <= 0;
+ end else begin
+ for (int i = 1; i < FPINT_STAGES; ++i)
+ stage_valid[i] <= stage_valid[i - 1];
+
+ // Se levanta 1 ciclo luego que in_valid
+ if (abort)
+ stage_valid[2] <= 0;
+
+ wb.valid <= stage_valid[FPINT_STAGES - 1];
+ end
endmodule
diff --git a/platform/wavelet3d/gfx_front_back.sv b/platform/wavelet3d/gfx_front_back.sv
new file mode 100644
index 0000000..890b734
--- /dev/null
+++ b/platform/wavelet3d/gfx_front_back.sv
@@ -0,0 +1,37 @@
+interface gfx_front_back
+import gfx::*;;
+
+ struct
+ {
+ group_id group;
+ fpint_op p0;
+ mem_op p1;
+ sfu_op p2;
+ group_op p3;
+ } execute;
+
+ struct
+ {
+ logic valid;
+ group_id group;
+ } loop;
+
+ shader_dispatch dispatch;
+
+ modport front
+ (
+ input loop,
+
+ output execute,
+ dispatch
+ );
+
+ modport back
+ (
+ input execute,
+ dispatch,
+
+ output loop
+ );
+
+endinterface
diff --git a/platform/wavelet3d/gfx_isa.sv b/platform/wavelet3d/gfx_isa.sv
new file mode 100644
index 0000000..873e6ec
--- /dev/null
+++ b/platform/wavelet3d/gfx_isa.sv
@@ -0,0 +1,82 @@
+package gfx_isa;
+
+ typedef logic[3:0] sgpr_num;
+ typedef logic[2:0] vgpr_num;
+
+ typedef union packed
+ {
+ sgpr_num sgpr;
+
+ struct packed
+ {
+ logic[$bits(sgpr_num) - $bits(vgpr_num) - 1:0] reserved;
+ vgpr_num num;
+ } vgpr;
+ } xgpr_num;
+
+ typedef struct packed
+ {
+ enum logic[1:0]
+ {
+ REGS_SVS = 2'b00,
+ REGS_SSS = 2'b01,
+ REGS_VVS = 2'b10,
+ REGS_VVV = 2'b11
+ } reg_mode;
+
+ union packed
+ {
+ struct packed
+ {
+ logic b_is_imm;
+
+ union packed
+ {
+ logic[12:0] imm;
+
+ struct packed
+ {
+ logic from_consts;
+ logic[7:0] reserved;
+ xgpr_num r;
+ } read;
+ } b;
+
+ xgpr_num ra,
+ rd;
+ } rr;
+ } dst_src;
+
+ logic reg_rev;
+
+ union packed
+ {
+ struct packed
+ {
+ enum logic[4:0]
+ {
+ INSN_FPINT_MOV = 0,
+ INSN_FPINT_FMUL = 1,
+ INSN_FPINT_IMUL = 2,
+ INSN_FPINT_FADD = 3,
+ INSN_FPINT_RES4 = 4,
+ INSN_FPINT_FMAX = 5,
+ INSN_FPINT_RES6 = 6,
+ INSN_FPINT_FMIN = 7,
+ INSN_FPINT_RES8 = 8,
+ INSN_FPINT_FCVT = 9,
+ INSN_FPINT_RES[10:31]
+ } op;
+ } fpint;
+ } by_class;
+
+ enum logic[1:0]
+ {
+ INSN_FPINT = 0,
+ INSN_MEM = 1,
+ INSN_SFU = 2,
+ INSN_GROUP = 3
+ } insn_class;
+ } insn_word;
+
+endpackage
diff --git a/platform/wavelet3d/gfx_pkg.sv b/platform/wavelet3d/gfx_pkg.sv
index 3c4b747..42d3f05 100644
--- a/platform/wavelet3d/gfx_pkg.sv
+++ b/platform/wavelet3d/gfx_pkg.sv
@@ -2,15 +2,22 @@ package gfx;
typedef logic[31:0] word;
- localparam int SUBWORD_BITS = $clog2($bits(word)) - $clog2($bits(byte));
- localparam int BYTES_PER_WORD = 1 << SUBWORD_BITS;
-
typedef word uword;
typedef logic signed[$bits(word) - 1:0] sword;
typedef logic[$bits(word) / 2 - 1:0] uhword;
typedef logic signed[$bits(word) / 2 - 1:0] shword;
typedef logic[2 * $bits(word) - 1:0] udword;
typedef logic signed[2 * $bits(word) - 1:0] sdword;
+ typedef logic signed[4 * $bits(word) - 1:0] qword;
+ typedef logic signed[8 * $bits(word) - 1:0] oword;
+
+ localparam int SUBWORD_BITS = $clog2($bits(word)) - $clog2($bits(byte));
+ localparam int BYTES_PER_WORD = 1 << SUBWORD_BITS;
+
+ typedef logic[$bits(word) - SUBWORD_BITS - 1:0] word_ptr;
+ typedef logic[$bits(word_ptr) - 1 - 1:0] dword_ptr;
+ typedef logic[$bits(word_ptr) - 2 - 1:0] qword_ptr;
+ typedef logic[$bits(word_ptr) - 3 - 1:0] oword_ptr;
typedef logic[7:0] float_exp;
typedef logic[$bits(word) - $bits(float_exp) - 2:0] float_mant;
@@ -99,7 +106,8 @@ package gfx;
shiftl_copy_flags,
round_copy_flags,
round_enable,
- encode_enable;
+ encode_enable,
+ writeback;
} fpint_op;
typedef struct packed
@@ -228,6 +236,21 @@ package gfx;
overflow;
} fpint_rnorm_encode;
+ typedef struct packed
+ {
+ logic todo;
+ } mem_op;
+
+ typedef struct packed
+ {
+ logic todo;
+ } sfu_op;
+
+ typedef struct packed
+ {
+ logic todo;
+ } group_op;
+
// Q22.10
typedef logic[9:0] fixed_frac;
typedef logic[$bits(word) - $bits(fixed_frac) - 1:0] fixed_int;
@@ -345,6 +368,24 @@ package gfx;
typedef logic[RASTER_SIZE - 1:0] lane_no;
typedef logic[SHADER_LANES - 1:0] lane_mask;
+ typedef logic[5:0] group_id;
+
+ localparam int REGFILE_STAGES = 3;
+ localparam int REG_READ_STAGES = 2 + REGFILE_STAGES + 1;
+
+ typedef gfx_isa::sgpr_num sgpr_num;
+ typedef gfx_isa::vgpr_num vgpr_num;
+ typedef gfx_isa::xgpr_num xgpr_num;
+
+ typedef struct packed
+ {
+ // No incluye p0 porque p0 no tiene señal ready
+ logic p1,
+ p2,
+ p3,
+ valid;
+ } shader_dispatch;
+
localparam int FIXED_MULADD_DEPTH = 5;
localparam int FIXED_DOTADD_DEPTH = 2 * FIXED_MULADD_DEPTH;
diff --git a/platform/wavelet3d/gfx_raster.sv b/platform/wavelet3d/gfx_raster.sv
index 0e740dc..a57a672 100644
--- a/platform/wavelet3d/gfx_raster.sv
+++ b/platform/wavelet3d/gfx_raster.sv
@@ -261,7 +261,7 @@ module gfx_raster_bounds
endcase
if (in_state == IN_DIM_Y & next_dim)
- assert(geometry.tlast);
+ assert (geometry.tlast);
end
always_ff @(posedge clk) begin
diff --git a/platform/wavelet3d/gfx_regfile_io.sv b/platform/wavelet3d/gfx_regfile_io.sv
new file mode 100644
index 0000000..49dcd5c
--- /dev/null
+++ b/platform/wavelet3d/gfx_regfile_io.sv
@@ -0,0 +1,76 @@
+interface gfx_regfile_io;
+
+ import gfx::*;
+
+ struct
+ {
+ group_id group;
+ sgpr_num a_sgpr,
+ b_sgpr;
+ vgpr_num a_vgpr,
+ b_vgpr;
+ logic[12:0] b_imm;
+ logic a_scalar,
+ b_scalar,
+ b_is_imm,
+ b_is_const,
+ scalar_rev;
+ } op;
+
+ struct
+ {
+ logic write;
+ group_id group;
+ sgpr_num sgpr;
+ word data;
+ } sgpr_write;
+
+ struct
+ {
+ lane_mask mask;
+ group_id group;
+ vgpr_num vgpr;
+ word data[SHADER_LANES];
+ } vgpr_write;
+
+ word a[SHADER_LANES], b[SHADER_LANES], sgpr_write_data, vgpr_write_data[SHADER_LANES];
+ word_ptr pc_front;
+ group_id pc_front_group;
+
+ modport ab
+ (
+ input a,
+ b
+ );
+
+ modport read
+ (
+ output op
+ );
+
+ modport bind_
+ (
+ input pc_front,
+
+ output pc_front_group
+ );
+
+ modport wb
+ (
+ output sgpr_write,
+ vgpr_write
+ );
+
+ modport regs
+ (
+ input op,
+ sgpr_write,
+ vgpr_write,
+ pc_front_group,
+
+ output a,
+ b,
+ pc_front
+ );
+
+endinterface
diff --git a/platform/wavelet3d/gfx_shader.sv b/platform/wavelet3d/gfx_shader.sv
new file mode 100644
index 0000000..3be6ed4
--- /dev/null
+++ b/platform/wavelet3d/gfx_shader.sv
@@ -0,0 +1,54 @@
+module gfx_shader
+import gfx::*;
+import gfx_shader_schedif_pkg::*;
+(
+ input logic clk,
+ rst_n,
+
+ gfx_axib.m insn_mem,
+
+ axi4lite_intf.slave sched
+);
+
+ gfx_shader_schedif__in_t schedif_in;
+ gfx_shader_schedif__out_t schedif_out;
+
+ gfx_front_back front_back();
+ gfx_regfile_io regfile();
+
+ gfx_shader_front frontend
+ (
+ .clk,
+ .rst_n,
+ .front(front_back.front),
+ .reg_bind(regfile.bind_),
+ .reg_read(regfile.read),
+ .fetch_mem(insn_mem),
+ .icache_flush(schedif_out.CORE.IFLUSH.value)
+ );
+
+ gfx_shader_back backend
+ (
+ .clk,
+ .rst_n,
+ .back(front_back.back),
+ .reg_wb(regfile.wb),
+ .read_data(regfile.ab)
+ );
+
+ gfx_shader_regs regs
+ (
+ .clk,
+ .io(regfile)
+ );
+
+ gfx_shader_schedif schedif
+ (
+ .clk,
+ .arst_n(rst_n),
+ .s_axil(sched),
+ .hwif_in(schedif_in),
+ .hwif_out(schedif_out)
+ );
+
+endmodule
diff --git a/platform/wavelet3d/gfx_shader_back.sv b/platform/wavelet3d/gfx_shader_back.sv
new file mode 100644
index 0000000..bc7aee9
--- /dev/null
+++ b/platform/wavelet3d/gfx_shader_back.sv
@@ -0,0 +1,194 @@
+module gfx_shader_back
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ gfx_front_back.back back,
+
+ gfx_regfile_io.ab read_data,
+ gfx_regfile_io.wb reg_wb
+);
+
+ logic abort;
+
+ gfx_wb out_wb(), p0_wb(), p1_wb(), p2_wb(), p3_wb();
+ gfx_shake p1_shake(), p2_shake(), p3_shake();
+
+ gfx_shader_abort p0_abort
+ (
+ .clk,
+ .p1(p1_shake.peek),
+ .p2(p2_shake.peek),
+ .p3(p3_shake.peek),
+ .abort
+ );
+
+ gfx_fpint p0
+ (
+ .clk,
+ .rst_n,
+ .op(back.execute.p0),
+ .wb(p0_wb.tx),
+ .abort,
+ .read_data,
+ .in_valid(back.dispatch.valid)
+ );
+
+ gfx_shader_mem p1
+ (
+ .clk,
+ .rst_n,
+ .op(back.execute.p1),
+ .wb(p1_wb.tx),
+ .in_shake(p1_shake.rx),
+ .read_data
+ );
+
+ gfx_shader_sfu p2
+ (
+ .clk,
+ .rst_n,
+ .op(back.execute.p2),
+ .wb(p2_wb.tx),
+ .in_shake(p2_shake.rx),
+ .read_data
+ );
+
+ gfx_shader_group p3
+ (
+ .clk,
+ .rst_n,
+ .op(back.execute.p3),
+ .wb(p3_wb.tx),
+ .in_shake(p3_shake.rx),
+ .read_data
+ );
+
+ gfx_shader_writeback_arbiter4 writeback_arbiter
+ (
+ .clk,
+ .rst_n,
+ .p0(p0_wb.rx),
+ .p1(p1_wb.rx),
+ .p2(p2_wb.rx),
+ .p3(p3_wb.rx),
+ .out(out_wb.tx)
+ );
+
+ gfx_shader_writeback writeback
+ (
+ .clk,
+ .rst_n,
+ .wb(out_wb.rx),
+ .regs(reg_wb)
+ );
+
+endmodule
+
+module gfx_shader_abort
+(
+ input logic clk,
+
+ gfx_shake.peek p1,
+ p2,
+ p3,
+
+ output logic abort
+);
+
+ always_ff @(posedge clk)
+ abort <=
+ (p1.valid & p1.ready)
+ | (p2.valid & p2.ready)
+ | (p3.valid & p3.ready);
+
+endmodule
+
+module gfx_shader_writeback_arbiter4
+(
+ input logic clk,
+ rst_n,
+
+ gfx_wb.rx p0,
+ p1,
+ p2,
+ p3,
+
+ gfx_wb.tx out
+);
+
+ assert property (
+ @(posedge clk)
+ disable iff (~rst_n)
+
+ (p0.ready & out.ready)
+ );
+
+ gfx_wb p0_p1(), p2_p3();
+
+ gfx_shader_writeback_arbiter2_prio arbiter_p0_p1
+ (
+ .clk,
+ .rst_n,
+ .a(p0),
+ .b(p1),
+ .out(p0_p1.tx)
+ );
+
+ gfx_shader_writeback_arbiter2_prio arbiter_p2_p3
+ (
+ .clk,
+ .rst_n,
+ .a(p2),
+ .b(p3),
+ .out(p2_p3.tx)
+ );
+
+ gfx_shader_writeback_arbiter2_prio arbiter_out
+ (
+ .clk,
+ .rst_n,
+ .a(p0_p1.rx),
+ .b(p2_p3.tx),
+ .out
+ );
+
+endmodule
+
+module gfx_shader_writeback_arbiter2_prio
+(
+ input logic clk,
+ rst_n,
+
+ gfx_wb.rx a,
+ b,
+
+ gfx_wb.tx out
+);
+
+ //TODO
+ assign a.ready = out.ready;
+ assign b.ready = 0;
+ assign out.dest = a.dest;
+ assign out.lanes = a.lanes;
+ assign out.group = a.group;
+ assign out.valid = a.valid;
+ assign out.scalar = a.scalar;
+ assign out.writeback = a.writeback;
+
+endmodule
+
+module gfx_shader_writeback
+(
+ input logic clk,
+ rst_n,
+
+ gfx_wb.rx wb,
+
+ gfx_regfile_io.wb regs
+);
+
+
+
+endmodule
diff --git a/platform/wavelet3d/gfx_shader_front.sv b/platform/wavelet3d/gfx_shader_front.sv
new file mode 100644
index 0000000..5ad0203
--- /dev/null
+++ b/platform/wavelet3d/gfx_shader_front.sv
@@ -0,0 +1,718 @@
+typedef struct
+{
+ logic valid,
+ retry;
+ gfx::group_id group;
+ gfx_isa::insn_word insn;
+} shader_front_wave;
+
+typedef logic[4:0] icache_line_num;
+
+typedef logic[$bits(gfx::oword_ptr) - $bits(icache_line_num) - 1:0] icache_tag;
+
+typedef struct packed
+{
+ icache_tag tag;
+ icache_line_num line;
+} icache_line_tag;
+
+typedef struct packed
+{
+ icache_line_tag line_tag;
+ logic[2:0] word_num;
+} icache_ptr;
+
+module gfx_shader_front
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ gfx_axib.m fetch_mem,
+
+ input logic icache_flush,
+
+ gfx_regfile_io.read reg_read,
+ gfx_regfile_io.bind_ reg_bind,
+
+ gfx_front_back.front front
+);
+
+ word fetch_insn, port_insn;
+ logic fetch_hit, p0_writeback;
+ shader_front_wave bind_wave, dec_wave, port_dec_wave;
+
+ gfx_shader_bind bind_
+ (
+ .clk,
+ .rst_n,
+ .mem(fetch_mem),
+ .wave(bind_wave),
+ .regs(reg_bind),
+ .loop_valid(front.loop.valid),
+ .loop_group(front.loop.group),
+ .icache_flush
+ );
+
+ gfx_shader_read_regs reg_dec
+ (
+ .clk,
+ .rst_n,
+ .in(bind_wave),
+ .out(dec_wave),
+ .read(reg_read)
+ );
+
+ gfx_shader_decode_class class_dec
+ (
+ .clk,
+ .rst_n,
+ .wave(dec_wave),
+ .out_group(front.execute.group),
+ .port_wave(port_dec_wave),
+ .dispatch(front.dispatch),
+ .p0_writeback
+ );
+
+ gfx_shader_decode_fpint p0_dec
+ (
+ .clk,
+ .op(front.execute.p0),
+ .insn(port_dec_wave.insn),
+ .writeback(p0_writeback)
+ );
+
+endmodule
+
+module gfx_shader_bind
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ gfx_axib.m mem,
+
+ input logic icache_flush,
+
+ input logic loop_valid,
+ input group_id loop_group,
+
+ gfx_regfile_io.bind_ regs,
+
+ output shader_front_wave wave
+);
+
+ localparam int ICACHE_STAGES = 6;
+ localparam int BIND_STAGES = REGFILE_STAGES + ICACHE_STAGES;
+
+ gfx_beats #($bits(group_id)) runnable_in(), runnable_out();
+
+ logic ar_stall, request_ready, request_valid, valids[BIND_STAGES];
+ group_id groups[BIND_STAGES];
+ icache_line_tag araddr, request_addr;
+
+ assign mem.bready = 0;
+ assign mem.wvalid = 0;
+ assign mem.awvalid = 0;
+
+ assign mem.arlen = ($bits(mem.arlen))'($bits(oword) / $bits(word) - 1);
+ assign mem.araddr = {araddr, ($clog2($bits(oword)) - $clog2($bits(word)) + SUBWORD_BITS)'('0)};
+ assign mem.arburst = 2'b01; // Incremental mode
+
+ assign runnable_in.tx.data = loop_group;
+ assign runnable_in.tx.valid = loop_valid;
+
+ assign regs.pc_front_group = runnable_out.rx.data;
+ assign runnable_out.rx.ready = 1;
+
+ assign wave.group = groups[$size(groups) - 1];
+
+ gfx_skid_buf #($bits(araddr)) ar_skid
+ (
+ .clk,
+ .in(request_addr),
+ .out(araddr),
+ .stall(ar_stall)
+ );
+
+ gfx_skid_flow ar_flow
+ (
+ .clk,
+ .rst_n,
+ .stall(ar_stall),
+ .in_ready(request_ready),
+ .in_valid(request_valid),
+ .out_ready(mem.arready),
+ .out_valid(mem.arvalid)
+ );
+
+ //TODO: Podríamos quitar ~25 entries sin afectar throughput, latencia o correctitud
+ gfx_fifo #(.WIDTH($bits(group_id)), .DEPTH(1 << $bits(group_id))) runnable
+ (
+ .clk,
+ .rst_n,
+ .in(runnable_in.rx),
+ .out(runnable_out.tx)
+ );
+
+ gfx_shader_bind_icache icache
+ (
+ .clk,
+ .rst_n,
+
+ .icache_flush,
+ .read_addr(regs.pc_front),
+ .read_valid(valids[REGFILE_STAGES - 1]),
+
+ .request_addr,
+ .request_valid,
+ .request_ready,
+
+ .fetch_data(mem.rdata),
+ .fetch_last(mem.rlast),
+ .fetch_valid(mem.rvalid),
+ .fetch_ready(mem.rready),
+
+ .insn(wave.insn),
+ .insn_retry(wave.retry),
+ .insn_valid(wave.valid)
+ );
+
+ always_ff @(posedge clk) begin
+ groups[0] <= runnable_out.rx.data;
+ for (int i = 1; i < $size(groups); ++i)
+ groups[i] <= groups[i - 1];
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n)
+ for (int i = 0; i < $size(valids); ++i)
+ valids[i] <= 0;
+ else begin
+ valids[0] <= runnable_out.rx.valid;
+ for (int i = 1; i < $size(valids); ++i)
+ valids[i] <= valids[i - 1];
+ end
+
+endmodule
+
+module gfx_shader_bind_icache
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ input logic icache_flush,
+
+ input logic read_valid,
+ input icache_ptr read_addr,
+
+ input logic fetch_last,
+ fetch_valid,
+ input word fetch_data,
+ output logic fetch_ready,
+
+ input logic request_ready,
+ output logic request_valid,
+ output icache_line_tag request_addr,
+
+ output logic insn_valid,
+ insn_retry,
+ output word insn
+);
+
+ // Dan Gisselquist limita a (1 << 3) bursts por defecto.
+ // Ver LGMAXBURST en axixbar.v
+ localparam int PENDING_FIFO_DEPTH = 8;
+
+ enum int unsigned
+ {
+ FLUSH,
+ RUN
+ } state;
+
+ struct
+ {
+ logic valid,
+ accessed,
+ hit;
+ icache_tag tag;
+ oword data;
+ } cache[1 << $bits(icache_line_num)], read, read_hold;
+
+ gfx_beats #($bits(icache_line_tag)) pending_in(), pending_out();
+
+ logic accessed_write, accessed_write_enable, burst, fetch_done, hit_write,
+ in_flush, hit_commit, hit_write_enable, retry_4, retry_5, rollback,
+ tag_hit, valid_1, valid_2, valid_3, valid_4, valid_5, valid_write,
+ valid_write_enable;
+
+ icache_ptr read_addr_1, read_addr_2, read_addr_3, read_addr_4, read_addr_5;
+ icache_tag tag_write;
+ icache_line_num accessed_write_line, flush_ptr, hit_write_line, valid_write_line;
+ icache_line_tag pending_pop;
+
+ oword data_write;
+ word[1:0] data_5;
+ word[7:0] fetch_shift;
+ qword[1:0] data_3;
+ udword[1:0] data_4;
+
+ assign data_3 = read.data;
+ assign tag_hit = read.tag == read_addr_3.line_tag.tag;
+ assign fetch_ready = ~fetch_done;
+ assign pending_pop = pending_out.rx.data;
+
+ assign request_addr = read_addr_4.line_tag;
+ assign request_valid = burst & pending_in.tx.ready;
+ assign pending_in.tx.data = read_addr_4.line_tag;
+ assign pending_in.tx.valid = burst & request_ready;
+ assign pending_out.rx.ready = fetch_done & ~hit_commit & ~rollback;
+
+ gfx_fifo #(.WIDTH($bits(icache_line_tag)), .DEPTH(PENDING_FIFO_DEPTH)) pending
+ (
+ .clk,
+ .rst_n,
+ .in(pending_in.rx),
+ .out(pending_out.tx)
+ );
+
+ always_comb
+ unique case (state)
+ FLUSH: in_flush = 1;
+ RUN: in_flush = 0;
+ endcase
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n) begin
+ state <= FLUSH;
+ flush_ptr <= '0;
+ fetch_done <= 0;
+
+ valid_1 <= 0;
+ valid_2 <= 0;
+ valid_3 <= 0;
+ valid_4 <= 0;
+ valid_5 <= 0;
+
+ burst <= 0;
+ end else begin
+ unique case (state)
+ FLUSH:
+ if (~icache_flush & &flush_ptr)
+ state <= RUN;
+
+ RUN:
+ if (icache_flush)
+ state <= FLUSH;
+ endcase
+
+ flush_ptr <= flush_ptr + 1;
+ if (icache_flush)
+ flush_ptr <= '0;
+
+ if (fetch_done)
+ fetch_done <= hit_commit | ~pending_out.rx.valid | rollback;
+ else if (fetch_ready & fetch_valid)
+ fetch_done <= fetch_last;
+
+ valid_1 <= read_valid;
+ valid_2 <= valid_1;
+ valid_3 <= valid_2;
+ valid_4 <= valid_3;
+ valid_5 <= valid_4;
+
+ burst <= valid_3 & ~tag_hit & ~read.accessed & (~read.valid | read.hit);
+ end
+
+ always_ff @(posedge clk) begin
+ tag_write <= pending_pop.tag;
+ data_write <= fetch_shift;
+
+ valid_write <= 1;
+ valid_write_line <= pending_pop.line;
+ valid_write_enable <= fetch_done & ~hit_commit & pending_out.rx.valid & ~rollback;
+
+ accessed_write <= 0;
+ accessed_write_enable <= 1;
+
+ if (rollback)
+ accessed_write_line <= read_addr_5.line_tag.line;
+ else if (fetch_done & ~hit_commit & pending_out.rx.valid)
+ accessed_write_line <= pending_pop.line;
+ else begin
+ accessed_write <= 1;
+ accessed_write_line <= read_addr.line_tag.line;
+ accessed_write_enable <= read_valid;
+ end
+
+ hit_write <= hit_commit;
+ if (hit_commit) begin
+ hit_write_line <= read_addr_4.line_tag.line;
+ hit_write_enable <= 1;
+ end else begin
+ hit_write_line <= pending_pop.line;
+ hit_write_enable <= fetch_done & pending_out.rx.valid & ~rollback;
+ end
+
+ if (in_flush) begin
+ valid_write <= 0;
+ valid_write_line <= flush_ptr;
+ valid_write_enable <= 1;
+
+ accessed_write <= 0;
+ accessed_write_line <= flush_ptr;
+ accessed_write_enable <= 1;
+
+ hit_write <= 0;
+ hit_write_line <= flush_ptr;
+ hit_write_enable <= 1;
+ end
+
+ if (valid_write_enable) begin
+ cache[valid_write_line].tag <= tag_write;
+ cache[valid_write_line].data <= data_write;
+ cache[valid_write_line].valid <= valid_write;
+ end
+
+ if (accessed_write_enable)
+ cache[accessed_write_line].accessed <= accessed_write;
+
+ if (hit_write_enable)
+ cache[hit_write_line].hit <= hit_write;
+
+ read_addr_1 <= read_addr;
+
+ read_hold <= cache[read_addr_1.line_tag.line];
+ read_addr_2 <= read_addr_1;
+
+ read <= read_hold;
+ read_addr_3 <= read_addr_2;
+
+ data_4 <= data_3[read_addr_3.word_num[2]];
+ retry_4 <= ~tag_hit | ~read.valid;
+ hit_commit <= valid_3 & tag_hit & read.valid;
+ read_addr_4 <= read_addr_3;
+
+ data_5 <= data_4[read_addr_4.word_num[1]];
+ retry_5 <= retry_4;
+ rollback <= burst & (~request_valid | ~pending_in.tx.valid);
+ read_addr_5 <= read_addr_4;
+
+ insn <= data_5[read_addr_5.word_num[0]];
+ insn_retry <= retry_5;
+ insn_valid <= valid_5;
+
+ if (fetch_ready & fetch_valid) begin
+ fetch_shift[0] <= fetch_data;
+ for (int i = 1; i < $size(fetch_shift); ++i)
+ fetch_shift[i] <= fetch_shift[i - 1];
+ end
+ end
+
+endmodule
+
+module gfx_shader_read_regs
+import gfx::*;
+import gfx_isa::*;
+(
+ input logic clk,
+ rst_n,
+
+ input shader_front_wave in,
+
+ gfx_regfile_io.read read,
+
+ output shader_front_wave out
+);
+
+ localparam int HOLD_DEPTH = REG_READ_STAGES + 1 - 2;
+
+ logic reg_rev;
+ logic hold_valid[HOLD_DEPTH];
+ shader_front_wave hold[HOLD_DEPTH];
+
+ assign reg_rev = in.insn.reg_rev;
+
+ always_comb begin
+ out = hold[$size(hold) - 1];
+ out.valid = hold_valid[$size(hold_valid) - 1];
+ end
+
+ always_ff @(posedge clk) begin
+ hold[0] <= in;
+
+ for (int i = 1; i < HOLD_DEPTH; ++i)
+ hold[i] <= hold[i - 1];
+
+ read.op.group <= in.group;
+
+ read.op.b_imm <= in.insn.dst_src.rr.b.imm;
+ read.op.a_sgpr <= in.insn.dst_src.rr.ra.sgpr;
+ read.op.b_sgpr <= in.insn.dst_src.rr.b.read.r.sgpr;
+ read.op.a_vgpr <= in.insn.dst_src.rr.ra.vgpr.num;
+ read.op.b_vgpr <= in.insn.dst_src.rr.b.read.r.vgpr.num;
+ read.op.b_is_imm <= in.insn.dst_src.rr.b_is_imm;
+ read.op.b_is_const <= in.insn.dst_src.rr.b.read.from_consts;
+ read.op.scalar_rev <= reg_rev;
+
+ unique case (in.insn.reg_mode)
+ REGS_SVS, REGS_VVS: begin
+ read.op.a_scalar <= reg_rev;
+ read.op.b_scalar <= ~reg_rev;
+ end
+
+ REGS_SSS: begin
+ read.op.a_scalar <= 1;
+ read.op.b_scalar <= 1;
+ end
+
+ REGS_VVV: begin
+ read.op.a_scalar <= 0;
+ read.op.b_scalar <= 0;
+ end
+ endcase
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n)
+ for (int i = 1; i < HOLD_DEPTH; ++i)
+ hold_valid[i] <= 0;
+ else begin
+ hold_valid[0] <= in.valid;
+
+ for (int i = 1; i < HOLD_DEPTH; ++i)
+ hold_valid[i] <= hold_valid[i - 1];
+ end
+
+endmodule
+
+module gfx_shader_decode_class
+import gfx::*;
+import gfx_isa::*;
+(
+ input logic clk,
+ rst_n,
+
+ input shader_front_wave wave,
+ output shader_front_wave port_wave,
+ output group_id out_group,
+
+ output shader_dispatch dispatch,
+ output logic p0_writeback
+);
+
+ logic is_fsu, is_mem, is_group, hold_valid, retry;
+ shader_front_wave hold_wave;
+
+ assign p0_writeback = ~(is_mem | is_fsu | is_group | retry);
+
+ always_comb begin
+ port_wave = hold_wave;
+ port_wave.valid = hold_valid;
+ end
+
+ always_ff @(posedge clk) begin
+ hold_wave <= wave;
+ out_group <= port_wave.group;
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ // Intencionalmente repetitivo
+ if (~rst_n) begin
+ is_fsu <= 0;
+ is_mem <= 0;
+ is_group <= 0;
+
+ retry <= 0;
+ hold_valid <= 0;
+
+ dispatch <= '0;
+ end else begin
+ is_fsu <= 0;
+ is_mem <= 0;
+ is_group <= 0;
+
+ retry <= wave.retry;
+ hold_valid <= wave.valid;
+
+ unique case (wave.insn.insn_class)
+ INSN_FPINT: ; // p0 no tiene ready
+ INSN_MEM: is_mem <= 1;
+ INSN_SFU: is_fsu <= 1;
+ INSN_GROUP: is_group <= 1;
+
+ default:
+ {is_mem, is_fsu, is_group} <= 'x;
+ endcase
+
+ dispatch.p1 <= is_mem;
+ dispatch.p2 <= is_fsu;
+ dispatch.p3 <= is_group;
+
+ if (~hold_valid | retry) begin
+ dispatch.p1 <= 0;
+ dispatch.p2 <= 0;
+ dispatch.p3 <= 0;
+ end
+
+ dispatch.valid <= hold_valid;
+ end
+
+endmodule
+
+module gfx_shader_decode_fpint
+import gfx::*;
+import gfx_isa::*;
+(
+ input logic clk,
+
+ input insn_word insn,
+ input logic writeback,
+
+ output fpint_op op
+);
+
+ always_ff @(posedge clk) begin
+ unique case (insn.by_class.fpint.op)
+ INSN_FPINT_MOV: begin
+ op.setup_mul_float <= 0;
+ op.setup_unit_b <= 1;
+ op.mnorm_put_hi <= 0;
+ op.mnorm_put_lo <= 1;
+ op.mnorm_put_mul <= 0;
+ op.mnorm_zero_flags <= 1;
+ op.mnorm_zero_b <= 1;
+ op.minmax_abs <= 1;
+ op.minmax_swap <= 0;
+ op.minmax_zero_min <= 0;
+ op.minmax_copy_flags <= 1;
+ op.shiftr_int_signed <= 0;
+ op.addsub_int_operand <= 0;
+ op.addsub_copy_flags <= 1;
+ op.clz_force_nop <= 1;
+ op.shiftl_copy_flags <= 1;
+ op.round_copy_flags <= 1;
+ op.round_enable <= 1;
+ op.encode_enable <= 1;
+ end
+
+ INSN_FPINT_FMUL: begin
+ op.setup_mul_float <= 1;
+ op.setup_unit_b <= 0;
+ op.mnorm_put_hi <= 0;
+ op.mnorm_put_lo <= 0;
+ op.mnorm_put_mul <= 1;
+ op.mnorm_zero_flags <= 0;
+ op.mnorm_zero_b <= 1;
+ op.minmax_abs <= 1;
+ op.minmax_swap <= 0;
+ op.minmax_zero_min <= 0;
+ op.minmax_copy_flags <= 1;
+ op.shiftr_int_signed <= 0;
+ op.addsub_int_operand <= 0;
+ op.addsub_copy_flags <= 1;
+ op.clz_force_nop <= 1;
+ op.shiftl_copy_flags <= 1;
+ op.round_copy_flags <= 1;
+ op.round_enable <= 1;
+ op.encode_enable <= 1;
+ end
+
+ INSN_FPINT_IMUL: begin
+ op.setup_mul_float <= 0;
+ op.setup_unit_b <= 0;
+ op.mnorm_put_hi <= 0;
+ op.mnorm_put_lo <= 1;
+ op.mnorm_put_mul <= 0;
+ op.mnorm_zero_flags <= 1;
+ op.mnorm_zero_b <= 1;
+ op.minmax_abs <= 1;
+ op.minmax_swap <= 0;
+ op.minmax_zero_min <= 0;
+ op.minmax_copy_flags <= 1;
+ op.shiftr_int_signed <= 0;
+ op.addsub_int_operand <= 0;
+ op.addsub_copy_flags <= 1;
+ op.clz_force_nop <= 1;
+ op.shiftl_copy_flags <= 1;
+ op.round_copy_flags <= 1;
+ op.round_enable <= 0;
+ op.encode_enable <= 0;
+ end
+
+ INSN_FPINT_FADD: begin
+ op.setup_mul_float <= 0;
+ op.setup_unit_b <= 1;
+ op.mnorm_put_hi <= 0;
+ op.mnorm_put_lo <= 1;
+ op.mnorm_put_mul <= 0;
+ op.mnorm_zero_flags <= 0;
+ op.mnorm_zero_b <= 0;
+ op.minmax_abs <= 1;
+ op.minmax_swap <= 0;
+ op.minmax_zero_min <= 0;
+ op.minmax_copy_flags <= 0;
+ op.shiftr_int_signed <= 0;
+ op.addsub_int_operand <= 0;
+ op.addsub_copy_flags <= 0;
+ op.clz_force_nop <= 0;
+ op.shiftl_copy_flags <= 0;
+ op.round_copy_flags <= 0;
+ op.round_enable <= 1;
+ op.encode_enable <= 1;
+ end
+
+ INSN_FPINT_FMAX, INSN_FPINT_FMIN: begin
+ op.setup_mul_float <= 0;
+ op.setup_unit_b <= 1;
+ op.mnorm_put_hi <= 0;
+ op.mnorm_put_lo <= 1;
+ op.mnorm_put_mul <= 0;
+ op.mnorm_zero_flags <= 0;
+ op.mnorm_zero_b <= 0;
+ op.minmax_abs <= 0;
+ op.minmax_swap <= insn.by_class.fpint.op == INSN_FPINT_FMIN;
+ op.minmax_zero_min <= 1;
+ op.minmax_copy_flags <= 1;
+ op.shiftr_int_signed <= 0;
+ op.addsub_int_operand <= 0;
+ op.addsub_copy_flags <= 1;
+ op.clz_force_nop <= 1;
+ op.shiftl_copy_flags <= 1;
+ op.round_copy_flags <= 1;
+ op.round_enable <= 0;
+ op.encode_enable <= 0;
+ end
+
+ INSN_FPINT_FCVT: begin
+ op.setup_mul_float <= 0;
+ op.setup_unit_b <= 1;
+ op.mnorm_put_hi <= 0;
+ op.mnorm_put_lo <= 1;
+ op.mnorm_put_mul <= 0;
+ op.mnorm_zero_flags <= 1;
+ op.mnorm_zero_b <= 1;
+
+ op.minmax_abs <= 1;
+ op.minmax_swap <= 0;
+ op.minmax_zero_min <= 0;
+ op.minmax_copy_flags <= 0;
+ op.shiftr_int_signed <= 1;
+ op.addsub_int_operand <= 1;
+ op.addsub_copy_flags <= 1;
+ op.clz_force_nop <= 0;
+ op.shiftl_copy_flags <= 0;
+ op.round_copy_flags <= 0;
+ op.round_enable <= 1;
+ op.encode_enable <= 1;
+ end
+
+ default:
+ op <= 'x;
+ endcase
+
+ op.writeback <= writeback;
+ end
+
+endmodule
diff --git a/platform/wavelet3d/gfx_shader_group.sv b/platform/wavelet3d/gfx_shader_group.sv
new file mode 100644
index 0000000..7659bb9
--- /dev/null
+++ b/platform/wavelet3d/gfx_shader_group.sv
@@ -0,0 +1,16 @@
+module gfx_shader_group
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ input group_op op,
+
+ gfx_regfile_io.ab read_data,
+
+ gfx_shake.rx in_shake,
+
+ gfx_wb.tx wb
+);
+
+endmodule
diff --git a/platform/wavelet3d/gfx_shader_mem.sv b/platform/wavelet3d/gfx_shader_mem.sv
new file mode 100644
index 0000000..97561fb
--- /dev/null
+++ b/platform/wavelet3d/gfx_shader_mem.sv
@@ -0,0 +1,16 @@
+module gfx_shader_mem
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ input mem_op op,
+
+ gfx_regfile_io.ab read_data,
+
+ gfx_shake.rx in_shake,
+
+ gfx_wb.tx wb
+);
+
+endmodule
diff --git a/platform/wavelet3d/gfx_shader_regs.sv b/platform/wavelet3d/gfx_shader_regs.sv
new file mode 100644
index 0000000..7ae2e14
--- /dev/null
+++ b/platform/wavelet3d/gfx_shader_regs.sv
@@ -0,0 +1,253 @@
+module gfx_shader_regs
+import gfx::*;
+(
+ input logic clk,
+
+ gfx_regfile_io.regs io
+);
+
+ // verilator tracing_off
+
+ word hold_imm[REGFILE_STAGES], imm_out, read_a_data_sgpr, read_b_data_scalar,
+ read_b_data_sgpr, read_const, read_a_data_vgpr[SHADER_LANES],
+ read_b_data_vgpr[SHADER_LANES], sgpr_out_a, sgpr_out_b;
+
+ logic a_scalar_out, b_is_const_out, b_is_imm_out, b_scalar_out, scalar_rev_out;
+ group_id hold_read_group_1, hold_read_group_2;
+ sgpr_num hold_read_a_sgpr;
+ vgpr_num hold_read_a_vgpr_1, hold_read_a_vgpr_2, hold_read_b_vgpr_1, hold_read_b_vgpr_2;
+ logic[REGFILE_STAGES - 1:0] hold_b_is_imm, hold_b_is_const;
+ logic[REGFILE_STAGES + 1 - 1:0] hold_scalar_rev;
+ logic[REGFILE_STAGES + 2 - 1:0] hold_a_scalar, hold_b_scalar;
+
+ assign imm_out = hold_imm[$size(hold_imm) - 1];
+ assign a_scalar_out = hold_a_scalar[$bits(hold_a_scalar) - 1];
+ assign b_scalar_out = hold_b_scalar[$bits(hold_b_scalar) - 1];
+ assign b_is_imm_out = hold_b_is_imm[$bits(hold_b_is_imm) - 1];
+ assign b_is_const_out = hold_b_is_const[$bits(hold_b_is_const) - 1];
+ assign scalar_rev_out = hold_scalar_rev[$bits(hold_scalar_rev) - 1];
+
+ gfx_shader_pc_table pcs
+ (
+ .clk,
+ .read(io.pc_front),
+ .read_group(io.pc_front_group)
+ );
+
+ gfx_shader_consts consts
+ (
+ .clk,
+ .num(io.op.b_sgpr),
+ .value(read_const)
+ );
+
+ gfx_shader_regfile #($bits(group_id) + $bits(sgpr_num)) sgprs
+ (
+ .clk,
+
+ .read_a_num({hold_read_group_1, hold_read_a_sgpr}),
+ .read_b_num({io.op.group, io.op.b_sgpr}),
+ .read_a_data(read_a_data_sgpr),
+ .read_b_data(read_b_data_sgpr),
+
+ .write(io.sgpr_write.write),
+ .write_num({io.sgpr_write.group, io.sgpr_write.sgpr}),
+ .write_data(io.sgpr_write.data)
+ );
+
+ generate
+ for (genvar i = 0; i < SHADER_LANES; ++i) begin: vgprs
+ gfx_shader_regfile #($bits(group_id) + $bits(vgpr_num)) vgprs
+ (
+ .clk,
+
+ .read_a_num({hold_read_group_2, hold_read_a_vgpr_2}),
+ .read_b_num({hold_read_group_2, hold_read_b_vgpr_2}),
+ .read_a_data(read_a_data_vgpr[i]),
+ .read_b_data(read_b_data_vgpr[i]),
+
+ .write(io.vgpr_write.mask[i]),
+ .write_num({io.vgpr_write.group, io.vgpr_write.vgpr}),
+ .write_data(io.vgpr_write.data[i])
+ );
+ end
+ endgenerate
+
+ always_ff @(posedge clk) begin
+ hold_imm[0] <= {{($bits(word) - $bits(io.op.b_imm)){1'b0}}, io.op.b_imm};
+ hold_a_scalar[0] <= io.op.a_scalar;
+ hold_b_scalar[0] <= io.op.b_scalar;
+ hold_b_is_imm[0] <= io.op.b_is_imm;
+ hold_b_is_const[0] <= io.op.b_is_const;
+ hold_scalar_rev[0] <= io.op.scalar_rev;
+
+ for (int i = 1; i < REGFILE_STAGES; ++i) begin
+ hold_imm[i] <= hold_imm[i - 1];
+ hold_a_scalar[i] <= hold_a_scalar[i - 1];
+ hold_b_scalar[i] <= hold_b_scalar[i - 1];
+ hold_b_is_imm[i] <= hold_b_is_imm[i - 1];
+ hold_b_is_const[i] <= hold_b_is_const[i - 1];
+ hold_scalar_rev[i] <= hold_scalar_rev[i - 1];
+ end
+
+ for (int i = REGFILE_STAGES; i < REGFILE_STAGES + 2; ++i) begin
+ hold_a_scalar[i] <= hold_a_scalar[i - 1];
+ hold_b_scalar[i] <= hold_b_scalar[i - 1];
+ end
+
+ hold_scalar_rev[REGFILE_STAGES] <= hold_scalar_rev[REGFILE_STAGES - 1];
+
+ hold_read_a_sgpr <= io.op.a_sgpr;
+ hold_read_group_1 <= io.op.group;
+ hold_read_group_2 <= hold_read_group_1;
+
+ hold_read_a_vgpr_1 <= io.op.a_vgpr;
+ hold_read_a_vgpr_2 <= hold_read_a_vgpr_1;
+
+ hold_read_b_vgpr_1 <= io.op.b_vgpr;
+ hold_read_b_vgpr_2 <= hold_read_b_vgpr_1;
+
+ if (b_is_imm_out)
+ read_b_data_scalar <= imm_out;
+ else if (b_is_const_out)
+ read_b_data_scalar <= read_const;
+ else
+ read_b_data_scalar <= read_b_data_sgpr;
+
+ if (scalar_rev_out) begin
+ sgpr_out_a <= read_b_data_scalar;
+ sgpr_out_b <= read_a_data_sgpr;
+ end else begin
+ sgpr_out_a <= read_a_data_sgpr;
+ sgpr_out_b <= read_b_data_scalar;
+ end
+
+ for (int i = 0; i < SHADER_LANES; ++i) begin
+ io.a[i] <= a_scalar_out ? sgpr_out_a : read_a_data_vgpr[i];
+ io.b[i] <= b_scalar_out ? sgpr_out_b : read_a_data_vgpr[i];
+ end
+ end
+
+endmodule
+
+module gfx_shader_consts
+import gfx::*;
+(
+ input logic clk,
+
+ input sgpr_num num,
+ output word value
+);
+
+ word hold_out, rom[1 << $bits(sgpr_num)];
+ sgpr_num hold_in;
+
+ always_ff @(posedge clk) begin
+ value <= hold_out;
+ hold_in <= num;
+ hold_out <= rom[hold_in];
+ end
+
+ initial begin
+ rom[0] = 'hffff_ffff; // -1
+ rom[1] = 'h7fff_ffff; // 2^31 - 1, útil para abs de fp
+ rom[2] = 'h8000_0000; // 2^31, útil para neg de fp
+ rom[3] = 'h3f80_0000; // +1.0
+ rom[4] = 'hbf80_0000; // -1.0
+ end
+
+endmodule
+
+module gfx_shader_regfile
+import gfx::*;
+#(int DEPTH_LOG = 0)
+(
+ input logic clk,
+
+ input logic[DEPTH_LOG - 1:0] read_a_num,
+ read_b_num,
+ output word read_a_data,
+ read_b_data,
+
+ input logic write,
+ input logic[DEPTH_LOG - 1:0] write_num,
+ input word write_data
+);
+
+ gfx_shader_regfile_port #(DEPTH_LOG) a
+ (
+ .clk,
+ .write,
+ .read_num(read_a_num),
+ .read_data(read_a_data),
+ .write_num,
+ .write_data
+ );
+
+ gfx_shader_regfile_port #(DEPTH_LOG) b
+ (
+ .clk,
+ .write,
+ .read_num(read_b_num),
+ .read_data(read_b_data),
+ .write_num,
+ .write_data
+ );
+
+endmodule
+
+module gfx_shader_regfile_port
+import gfx::*;
+#(int DEPTH_LOG = 0)
+(
+ input logic clk,
+
+ input logic[DEPTH_LOG - 1:0] read_num,
+ output word read_data,
+
+ input logic write,
+ input logic[DEPTH_LOG - 1:0] write_num,
+ input word write_data
+);
+
+ word file[1 << DEPTH_LOG], hold_read_data, hold_write_data;
+ logic hold_write;
+ logic[DEPTH_LOG - 1:0] hold_read_num, hold_write_num;
+
+ // hold_write no necesita rst_n porque cualquier write inicial es inofensivo
+
+ always_ff @(posedge clk) begin
+ hold_write <= write;
+ hold_read_num <= read_num;
+ hold_write_num <= write_num;
+ hold_write_data <= write_data;
+
+ hold_read_data <= file[hold_read_num];
+ if (hold_write)
+ file[hold_write_num] <= hold_write_data;
+
+ read_data <= hold_read_data;
+ end
+
+endmodule
+
+module gfx_shader_pc_table
+import gfx::*;
+(
+ input logic clk,
+
+ input group_id read_group,
+
+ output word_ptr read
+);
+
+ group_id read_group_hold;
+ word_ptr pcs[1 << $bits(group_id)], read_hold;
+
+ always_ff @(posedge clk) begin
+ read <= read_hold;
+ read_hold <= pcs[read_group_hold];
+ read_group_hold <= read_group;
+ end
+
+endmodule
diff --git a/platform/wavelet3d/gfx_shader_schedif.rdl b/platform/wavelet3d/gfx_shader_schedif.rdl
new file mode 100644
index 0000000..2ab31ac
--- /dev/null
+++ b/platform/wavelet3d/gfx_shader_schedif.rdl
@@ -0,0 +1,74 @@
+addrmap gfx_shader_schedif {
+ name = "Scheduler<->core interface";
+
+ default hw = r;
+ default sw = w;
+ default regwidth = 32;
+
+ reg {
+ name = "Shader core control register";
+
+ field {
+ desc = "Set this field to flush the instruction cache";
+
+ singlepulse;
+ } IFLUSH[0:0] = 0;
+ } CORE @ 0x0;
+
+ reg {
+ name = "Wavefront setup control register";
+
+ default hw = w;
+ default sw = r;
+ default precedence = hw;
+
+ field {
+ desc = "Wavefront group number";
+
+ hw = r;
+ sw = rw;
+ } GROUP[5:0];
+
+ field {
+ desc = "Destination SGPR number";
+
+ hw = r;
+ sw = rw;
+ } XGPR[11:8];
+
+ field {
+ desc = "PC table update done, group submitted";
+
+ rclr;
+ hwset;
+ } SUBMIT_DONE[16:16] = 0;
+
+ field {
+ desc = "General-purpose register update done";
+
+ rclr;
+ hwset;
+ } GPR_DONE[17:17] = 0;
+ } SETUP_CTRL @ 0x4;
+
+ reg {
+ name = "SGPR/VGPR write register";
+
+ field {
+ desc = "Value to write";
+
+ swmod;
+ } VALUE[31:0];
+ } SETUP_GPR @ 0x8;
+
+ reg {
+ name = "Group submit register";
+
+ field {
+ desc = "Initial group program counter, submits group on write";
+
+ swmod;
+ } PC[31:2];
+ } SETUP_SUBMIT @ 0xc;
+};
+
diff --git a/platform/wavelet3d/gfx_shader_sfu.sv b/platform/wavelet3d/gfx_shader_sfu.sv
new file mode 100644
index 0000000..614d5a1
--- /dev/null
+++ b/platform/wavelet3d/gfx_shader_sfu.sv
@@ -0,0 +1,16 @@
+module gfx_shader_sfu
+import gfx::*;
+(
+ input logic clk,
+ rst_n,
+
+ input sfu_op op,
+
+ gfx_regfile_io.ab read_data,
+
+ gfx_shake.rx in_shake,
+
+ gfx_wb.tx wb
+);
+
+endmodule
diff --git a/platform/wavelet3d/gfx_shake.sv b/platform/wavelet3d/gfx_shake.sv
new file mode 100644
index 0000000..baae0c3
--- /dev/null
+++ b/platform/wavelet3d/gfx_shake.sv
@@ -0,0 +1,24 @@
+interface gfx_shake;
+
+ logic ready;
+ logic valid;
+
+ modport tx
+ (
+ input ready,
+ output valid
+ );
+
+ modport rx
+ (
+ input valid,
+ output ready
+ );
+
+ modport peek
+ (
+ input ready,
+ valid
+ );
+
+endinterface
diff --git a/platform/wavelet3d/gfx_skid_buf.sv b/platform/wavelet3d/gfx_skid_buf.sv
new file mode 100644
index 0000000..e3e5247
--- /dev/null
+++ b/platform/wavelet3d/gfx_skid_buf.sv
@@ -0,0 +1,20 @@
+module gfx_skid_buf
+#(int WIDTH = 0)
+(
+ input logic clk,
+
+ input logic[WIDTH - 1:0] in,
+ input logic stall,
+
+ output logic[WIDTH - 1:0] out
+);
+
+ logic[WIDTH - 1:0] skid;
+
+ assign out = stall ? skid : in;
+
+ always_ff @(posedge clk)
+ if (~stall)
+ skid <= in;
+
+endmodule
diff --git a/platform/wavelet3d/gfx_skid_flow.sv b/platform/wavelet3d/gfx_skid_flow.sv
new file mode 100644
index 0000000..7890ae3
--- /dev/null
+++ b/platform/wavelet3d/gfx_skid_flow.sv
@@ -0,0 +1,31 @@
+module gfx_skid_flow
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_valid,
+ out_ready,
+
+ output logic in_ready,
+ out_valid,
+ stall
+);
+
+ logic was_ready, was_valid;
+
+ assign stall = ~in_ready;
+ assign in_ready = was_ready | ~was_valid;
+ assign out_valid = in_valid | stall;
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (~rst_n) begin
+ was_ready <= 0;
+ was_valid <= 0;
+ end else begin
+ was_ready <= out_ready;
+
+ if (~stall)
+ was_valid <= in_valid;
+ end
+
+endmodule
diff --git a/platform/wavelet3d/gfx_top.sv b/platform/wavelet3d/gfx_top.sv
index 1a57b90..b6538d7 100644
--- a/platform/wavelet3d/gfx_top.sv
+++ b/platform/wavelet3d/gfx_top.sv
@@ -1,47 +1,56 @@
module gfx_top
+import gfx::*;
(
- input logic clk,
- rst_n,
-
- input gfx::word a[gfx::SHADER_LANES],
- b[gfx::SHADER_LANES],
- input logic in_valid,
- setup_mul_float,
- setup_unit_b,
- mnorm_put_hi,
- mnorm_put_lo,
- mnorm_put_mul,
- mnorm_zero_b,
- mnorm_zero_flags,
- minmax_abs,
- minmax_swap,
- minmax_zero_min,
- minmax_copy_flags,
- shiftr_int_signed,
- addsub_copy_flags,
- addsub_int_operand,
- clz_force_nop,
- shiftl_copy_flags,
- round_copy_flags,
- round_enable,
- encode_enable,
-
- output logic out_valid,
- output gfx::word q[gfx::SHADER_LANES],
-
- input gfx::word geom_tdata,
- input logic geom_tlast,
- geom_tvalid,
- output logic geom_tready,
-
- input logic raster_tready,
- output logic raster_tlast,
- raster_tvalid,
- output gfx::word raster_tdata
+ input logic clk,
+ rst_n,
+
+ input word a[SHADER_LANES],
+ b[SHADER_LANES],
+ input logic in_valid,
+ setup_mul_float,
+ setup_unit_b,
+ mnorm_put_hi,
+ mnorm_put_lo,
+ mnorm_put_mul,
+ mnorm_zero_b,
+ mnorm_zero_flags,
+ minmax_abs,
+ minmax_swap,
+ minmax_zero_min,
+ minmax_copy_flags,
+ shiftr_int_signed,
+ addsub_copy_flags,
+ addsub_int_operand,
+ clz_force_nop,
+ shiftl_copy_flags,
+ round_copy_flags,
+ round_enable,
+ encode_enable,
+
+ output logic out_valid,
+ output word q[SHADER_LANES],
+
+ input word geom_tdata,
+ input logic geom_tlast,
+ geom_tvalid,
+ output logic geom_tready,
+
+ input logic raster_tready,
+ output logic raster_tlast,
+ raster_tvalid,
+ output word raster_tdata
);
+ gfx_wb fpint_wb();
+ gfx_axib insn_mem();
gfx_axil sched_axi();
gfx_pkts geometry(), coverage();
+ gfx_regfile_io fpint_io();
+
+ axi4lite_intf #(.ADDR_WIDTH(4)) core_sched();
+
+ assign q = fpint_wb.rx.lanes;
+ assign out_valid = fpint_wb.rx.valid;
assign geometry.tx.tdata = geom_tdata;
assign geometry.tx.tlast = geom_tlast;
@@ -53,9 +62,40 @@ module gfx_top
assign raster_tvalid = coverage.rx.tvalid;
assign coverage.rx.tready = raster_tready;
+ fpint_op op;
+ assign op.writeback = 1;
+ assign op.setup_mul_float = setup_mul_float;
+ assign op.setup_unit_b = setup_unit_b;
+ assign op.mnorm_put_hi = mnorm_put_hi;
+ assign op.mnorm_put_lo = mnorm_put_lo;
+ assign op.mnorm_put_mul = mnorm_put_mul;
+ assign op.mnorm_zero_b = mnorm_zero_b;
+ assign op.mnorm_zero_flags = mnorm_zero_flags;
+ assign op.minmax_abs = minmax_abs;
+ assign op.minmax_swap = minmax_swap;
+ assign op.minmax_zero_min = minmax_zero_min;
+ assign op.minmax_copy_flags = minmax_copy_flags;
+ assign op.shiftr_int_signed = shiftr_int_signed;
+ assign op.addsub_copy_flags = addsub_copy_flags;
+ assign op.addsub_int_operand = addsub_int_operand;
+ assign op.clz_force_nop = clz_force_nop;
+ assign op.shiftl_copy_flags = shiftl_copy_flags;
+ assign op.round_copy_flags = round_copy_flags;
+ assign op.round_enable = round_enable;
+ assign op.encode_enable = encode_enable;
+
+ assign fpint_io.regs.a = a;
+ assign fpint_io.regs.b = b;
+
gfx_fpint fpint
(
- .*
+ .clk,
+ .rst_n,
+ .op,
+ .wb(fpint_wb.tx),
+ .abort(0),
+ .in_valid,
+ .read_data(fpint_io.ab)
);
gfx_sched sched
@@ -74,4 +114,12 @@ module gfx_top
.coverage(coverage.tx)
);
+ gfx_shader shader
+ (
+ .clk,
+ .rst_n,
+ .sched(core_sched.slave),
+ .insn_mem(insn_mem.m)
+ );
+
endmodule
diff --git a/platform/wavelet3d/gfx_wb.sv b/platform/wavelet3d/gfx_wb.sv
new file mode 100644
index 0000000..cc25944
--- /dev/null
+++ b/platform/wavelet3d/gfx_wb.sv
@@ -0,0 +1,35 @@
+interface gfx_wb;
+
+ import gfx::*;
+
+ word lanes[SHADER_LANES];
+ logic ready, scalar, valid, writeback;
+ group_id group;
+ xgpr_num dest;
+
+ modport tx
+ (
+ input ready,
+
+ output dest,
+ group,
+ lanes,
+ valid,
+ scalar,
+ writeback
+ );
+
+ modport rx
+ (
+ input dest,
+ group,
+ lanes,
+ valid,
+ scalar,
+ writeback,
+
+ output ready
+ );
+
+
+endinterface
diff --git a/platform/wavelet3d/mod.mk b/platform/wavelet3d/mod.mk
index b1e51f8..153f9c7 100644
--- a/platform/wavelet3d/mod.mk
+++ b/platform/wavelet3d/mod.mk
@@ -1,10 +1,21 @@
+cores := gfx_shader_schedif
+
define core
- $(this)/deps := axixbar picorv32
+ $(this)/deps := axixbar fp_unit gfx_shader_schedif picorv32
$(this)/rtl_top := gfx_top
$(this)/rtl_dirs := .
- $(this)/rtl_files := gfx_pkg.sv gfx_top.sv
+ $(this)/rtl_files := gfx_isa.sv gfx_pkg.sv gfx_top.sv
$(this)/vl_main := main.cpp
$(this)/vl_pkgconfig := sdl2
endef
+
+define core/gfx_shader_schedif
+ $(this)/hooks := regblock
+
+ $(this)/regblock_rdl := gfx_shader_schedif.rdl
+ $(this)/regblock_top := gfx_shader_schedif
+ $(this)/regblock_args := --default-reset arst_n
+ $(this)/regblock_cpuif := axi4-lite
+endef