summaryrefslogtreecommitdiff
path: root/rtl/legacy_gfx
diff options
context:
space:
mode:
Diffstat (limited to 'rtl/legacy_gfx')
-rw-r--r--rtl/legacy_gfx/gfx.sv208
-rw-r--r--rtl/legacy_gfx/gfx_assembly.sv89
-rw-r--r--rtl/legacy_gfx/gfx_clear.sv70
-rw-r--r--rtl/legacy_gfx/gfx_cmd.sv125
-rw-r--r--rtl/legacy_gfx/gfx_defs.sv267
-rw-r--r--rtl/legacy_gfx/gfx_dot.sv49
-rw-r--r--rtl/legacy_gfx/gfx_fifo.sv98
-rw-r--r--rtl/legacy_gfx/gfx_fifo_overflow.sv34
-rw-r--r--rtl/legacy_gfx/gfx_fix_floats.sv49
-rw-r--r--rtl/legacy_gfx/gfx_fix_vertex.sv64
-rw-r--r--rtl/legacy_gfx/gfx_fixed_div.sv77
-rw-r--r--rtl/legacy_gfx/gfx_fixed_fma.sv73
-rw-r--r--rtl/legacy_gfx/gfx_fixed_fma_dot.sv49
-rw-r--r--rtl/legacy_gfx/gfx_flush_flow.sv45
-rw-r--r--rtl/legacy_gfx/gfx_fold.sv54
-rw-r--r--rtl/legacy_gfx/gfx_fold_flow.sv61
-rw-r--r--rtl/legacy_gfx/gfx_fp_add.sv41
-rw-r--r--rtl/legacy_gfx/gfx_fp_fix.sv34
-rw-r--r--rtl/legacy_gfx/gfx_fp_mul.sv41
-rw-r--r--rtl/legacy_gfx/gfx_frag.sv79
-rw-r--r--rtl/legacy_gfx/gfx_frag_addr.sv59
-rw-r--r--rtl/legacy_gfx/gfx_frag_bary.sv78
-rw-r--r--rtl/legacy_gfx/gfx_frag_shade.sv53
-rw-r--r--rtl/legacy_gfx/gfx_funnel.sv96
-rw-r--r--rtl/legacy_gfx/gfx_lerp.sv32
-rw-r--r--rtl/legacy_gfx/gfx_mask_sram.sv31
-rw-r--r--rtl/legacy_gfx/gfx_masks.sv68
-rw-r--r--rtl/legacy_gfx/gfx_mat_mat.sv83
-rw-r--r--rtl/legacy_gfx/gfx_mat_vec.sv49
-rw-r--r--rtl/legacy_gfx/gfx_mem.sv228
-rw-r--r--rtl/legacy_gfx/gfx_persp.sv58
-rw-r--r--rtl/legacy_gfx/gfx_persp_vertex.sv52
-rw-r--r--rtl/legacy_gfx/gfx_pipeline_flow.sv40
-rw-r--r--rtl/legacy_gfx/gfx_pipes.sv24
-rw-r--r--rtl/legacy_gfx/gfx_raster.sv131
-rw-r--r--rtl/legacy_gfx/gfx_raster_coarse.sv135
-rw-r--r--rtl/legacy_gfx/gfx_raster_fine.sv49
-rw-r--r--rtl/legacy_gfx/gfx_rop.sv85
-rw-r--r--rtl/legacy_gfx/gfx_scanout.sv138
-rw-r--r--rtl/legacy_gfx/gfx_scanout_dac.sv117
-rw-r--r--rtl/legacy_gfx/gfx_setup.sv190
-rw-r--r--rtl/legacy_gfx/gfx_setup_bounds.sv73
-rw-r--r--rtl/legacy_gfx/gfx_setup_edge.sv53
-rw-r--r--rtl/legacy_gfx/gfx_setup_offsets.sv44
-rw-r--r--rtl/legacy_gfx/gfx_skid_buf.sv20
-rw-r--r--rtl/legacy_gfx/gfx_skid_flow.sv31
-rw-r--r--rtl/legacy_gfx/gfx_sp.sv131
-rw-r--r--rtl/legacy_gfx/gfx_sp_batch.sv141
-rw-r--r--rtl/legacy_gfx/gfx_sp_combiner.sv63
-rw-r--r--rtl/legacy_gfx/gfx_sp_decode.sv116
-rw-r--r--rtl/legacy_gfx/gfx_sp_fetch.sv224
-rw-r--r--rtl/legacy_gfx/gfx_sp_file.sv32
-rw-r--r--rtl/legacy_gfx/gfx_sp_isa.sv23
-rw-r--r--rtl/legacy_gfx/gfx_sp_issue.sv111
-rw-r--r--rtl/legacy_gfx/gfx_sp_regs.sv39
-rw-r--r--rtl/legacy_gfx/gfx_sp_select.sv25
-rw-r--r--rtl/legacy_gfx/gfx_sp_shuffler.sv70
-rw-r--r--rtl/legacy_gfx/gfx_sp_stream.sv66
-rw-r--r--rtl/legacy_gfx/gfx_sp_swizzle.sv19
-rw-r--r--rtl/legacy_gfx/gfx_sp_widener.sv63
-rw-r--r--rtl/legacy_gfx/gfx_sp_writeback.sv65
-rw-r--r--rtl/legacy_gfx/gfx_transpose.sv17
-rw-r--r--rtl/legacy_gfx/mod.mk5
63 files changed, 4834 insertions, 0 deletions
diff --git a/rtl/legacy_gfx/gfx.sv b/rtl/legacy_gfx/gfx.sv
new file mode 100644
index 0000000..73e5dbf
--- /dev/null
+++ b/rtl/legacy_gfx/gfx.sv
@@ -0,0 +1,208 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx
+(
+ input logic clk,
+ rst_n,
+
+ input cmd_addr cmd_address,
+ input logic cmd_read,
+ cmd_write,
+ input cmd_word cmd_writedata,
+ output cmd_word cmd_readdata,
+
+ input logic mem_waitrequest,
+ mem_readdatavalid,
+ input vram_word mem_readdata,
+ output vram_byte_addr mem_address,
+ output logic mem_read,
+ mem_write,
+ output vram_word mem_writedata,
+
+ input vram_addr host_address,
+ input logic host_read,
+ host_write,
+ input vram_word host_writedata,
+ output logic host_waitrequest,
+ host_readdatavalid,
+ output vram_word host_readdata,
+
+ input logic scan_ready,
+ output logic scan_valid,
+ scan_endofpacket,
+ scan_startofpacket,
+ output rgb30 scan_data
+);
+
+ logic enable_clear, program_start, start_clear, swap_buffers;
+ rgb24 clear_color;
+ cmd_word fb_base_a, fb_base_b, program_header_base, program_header_size;
+
+ gfx_cmd cmd
+ (
+ .*
+ );
+
+ logic batch_read, fetch_read, running, send_valid;
+ lane_word send_data;
+ lane_mask send_mask;
+ vram_addr batch_address, fetch_address;
+
+ gfx_sp sp
+ (
+ .*
+ );
+
+ logic send_ready, assembly_valid;
+ fp_xyzw assembly_vertex_a, assembly_vertex_b, assembly_vertex_c;
+
+ gfx_assembly assembly
+ (
+ .out_ready(fix_ready),
+ .out_valid(assembly_valid),
+ .out_vertex_a(assembly_vertex_a),
+ .out_vertex_b(assembly_vertex_b),
+ .out_vertex_c(assembly_vertex_c),
+ .*
+ );
+
+ logic fix_ready, fix_valid;
+ raster_xyzw fix_vertex_a, fix_vertex_b, fix_vertex_c;
+
+ gfx_fix_floats fix
+ (
+ .in_ready(fix_ready),
+ .in_valid(assembly_valid),
+ .out_ready(persp_ready),
+ .out_valid(fix_valid),
+ .in_vertex_a(assembly_vertex_a),
+ .in_vertex_b(assembly_vertex_b),
+ .in_vertex_c(assembly_vertex_c),
+ .out_vertex_a(fix_vertex_a),
+ .out_vertex_b(fix_vertex_b),
+ .out_vertex_c(fix_vertex_c),
+ .*
+ );
+
+ logic persp_ready, persp_valid;
+ raster_xyzw persp_vertex_a, persp_vertex_b, persp_vertex_c;
+
+ gfx_persp perspective
+ (
+ .in_ready(persp_ready),
+ .in_valid(fix_valid),
+ .out_ready(raster_ready),
+ .out_valid(persp_valid),
+ .in_vertex_a(fix_vertex_a),
+ .in_vertex_b(fix_vertex_b),
+ .in_vertex_c(fix_vertex_c),
+ .out_vertex_a(persp_vertex_a),
+ .out_vertex_b(persp_vertex_b),
+ .out_vertex_c(persp_vertex_c),
+ .*
+ );
+
+ logic raster_ready;
+ fixed_tri raster_ws;
+ bary_lanes barys;
+ paint_lanes raster_valid;
+ frag_xy_lanes fragments;
+
+ gfx_raster raster
+ (
+ .ws(raster_ws),
+ .in_ready(raster_ready),
+ .in_valid(persp_valid),
+ .out_ready(funnel_ready),
+ .out_valid(raster_valid),
+
+ .vertex_a(persp_vertex_a),
+ .vertex_b(persp_vertex_b),
+ .vertex_c(persp_vertex_c),
+
+ .*
+ );
+
+ logic frag_mask, scan_mask;
+ vram_addr frag_base, scan_base;
+
+ gfx_masks masks
+ (
+ .frag_mask_read_addr(),
+ .*
+ );
+
+ logic frag_mask_set, frag_mask_write, frag_wait;
+ linear_coord frag_mask_write_addr;
+
+ gfx_clear clear
+ (
+ .*
+ );
+
+ logic funnel_ready, funnel_valid;
+ frag_xy frag;
+ fixed_tri frag_bary, frag_ws;
+
+ gfx_funnel funnel
+ (
+ .in_ready(funnel_ready),
+ .in_valid(raster_valid),
+ .out_ready(frag_ready),
+ .out_valid(funnel_valid),
+ .*
+ );
+
+ logic frag_ready, frag_valid;
+ frag_paint frag_out;
+
+ gfx_frag frag_
+ (
+ .out(frag_out),
+
+ .ws(frag_ws),
+ .bary(frag_bary),
+ .in_ready(frag_ready),
+ .in_valid(funnel_valid),
+ .out_ready(rop_ready),
+ .out_valid(frag_valid),
+ .*
+ );
+
+ logic rop_mask_assert, rop_ready, rop_write;
+ vram_addr rop_address;
+ vram_word rop_writedata;
+ linear_coord rop_mask_addr;
+
+ gfx_rop rop
+ (
+ .in(frag_out),
+ .in_ready(rop_ready),
+ .in_valid(frag_valid),
+ .mask_addr(rop_mask_addr),
+ .mask_assert(rop_mask_assert),
+ .*
+ );
+
+ logic batch_readdatavalid, fb_readdatavalid, fetch_readdatavalid,
+ batch_waitrequest, fb_waitrequest, fetch_waitrequest, rop_waitrequest;
+
+ vram_word batch_readdata, fb_readdata, fetch_readdata;
+
+ gfx_mem mem
+ (
+ .*
+ );
+
+ logic fb_read, vsync;
+ vram_addr fb_address;
+ linear_coord scan_mask_addr;
+
+ gfx_scanout scanout
+ (
+ .mask(scan_mask),
+ .mask_addr(scan_mask_addr),
+ .*
+ );
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_assembly.sv b/rtl/legacy_gfx/gfx_assembly.sv
new file mode 100644
index 0000000..1a909be
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_assembly.sv
@@ -0,0 +1,89 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_assembly
+(
+ input logic clk,
+ rst_n,
+
+ input lane_word send_data,
+ input lane_mask send_mask,
+ input logic send_valid,
+ output logic send_ready,
+
+ input logic out_ready,
+ output logic out_valid,
+ output fp_xyzw out_vertex_a,
+ out_vertex_b,
+ out_vertex_c
+);
+
+ localparam SETS_PER_TRI = 6;
+
+ mat4 sets[SETS_PER_TRI];
+ logic assemble_next, permit_out;
+ lane_mask current_mask, next_mask;
+ logic[1:0] out_lane;
+ logic[2:0] set_num;
+
+ enum int unsigned
+ {
+ GET_LANES,
+ ASSEMBLE
+ } state;
+
+ assign out_valid = permit_out && current_mask[out_lane];
+ assign out_vertex_a = sets[0][out_lane];
+ assign out_vertex_b = sets[2][out_lane];
+ assign out_vertex_c = sets[4][out_lane];
+
+ assign next_mask = current_mask & send_mask;
+ assign assemble_next = !current_mask[out_lane] || out_ready;
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ state <= GET_LANES;
+ set_num <= 0;
+ out_lane <= 0;
+ permit_out <= 0;
+ send_ready <= 1;
+ current_mask <= {($bits(current_mask)){1'b1}};
+ end else unique case (state)
+ GET_LANES:
+ if (send_valid) begin
+ set_num <= set_num + 1;
+ current_mask <= next_mask;
+
+ if (set_num == SETS_PER_TRI - 1) begin
+ state <= ASSEMBLE;
+ permit_out <= 1;
+ send_ready <= 0;
+ end
+
+ if (!(|next_mask)) begin
+ state <= GET_LANES;
+ set_num <= 0;
+ current_mask <= {($bits(current_mask)){1'b1}};
+ end
+ end
+
+ ASSEMBLE:
+ if (assemble_next) begin
+ out_lane <= out_lane + 1;
+ if (&out_lane) begin
+ state <= GET_LANES;
+ permit_out <= 0;
+ send_ready <= 1;
+ end
+ end
+ endcase
+
+ always_ff @(posedge clk)
+ unique case (state)
+ GET_LANES:
+ if (send_valid)
+ sets[set_num] <= send_data;
+
+ ASSEMBLE: ;
+ endcase
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_clear.sv b/rtl/legacy_gfx/gfx_clear.sv
new file mode 100644
index 0000000..ae9a20c
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_clear.sv
@@ -0,0 +1,70 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_clear
+(
+ input logic clk,
+ rst_n,
+
+ input logic start_clear,
+
+ input linear_coord rop_mask_addr,
+ input logic rop_mask_assert,
+ output logic frag_wait,
+
+ output logic frag_mask_set,
+ frag_mask_write,
+ output linear_coord frag_mask_write_addr
+);
+
+ enum int unsigned
+ {
+ FRAG,
+ CLEAR
+ } state;
+
+ logic end_clear;
+
+ assign end_clear = frag_mask_write_addr == `GFX_LINEAR_RES - 1;
+
+ always_comb
+ unique case (state)
+ FRAG: frag_wait = start_clear;
+ CLEAR: frag_wait = 1;
+ endcase
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ state <= FRAG;
+ frag_mask_write <= 0;
+ end else unique case (state)
+ FRAG: begin
+ frag_mask_write <= rop_mask_assert;
+
+ if (start_clear) begin
+ state <= CLEAR;
+ frag_mask_write <= 1;
+ end
+ end
+
+ CLEAR:
+ if (end_clear) begin
+ state <= FRAG;
+ frag_mask_write <= 0;
+ end
+ endcase
+
+ always_ff @(posedge clk)
+ unique case (state)
+ FRAG: begin
+ frag_mask_set <= !start_clear;
+ frag_mask_write_addr <= rop_mask_addr;
+
+ if (start_clear)
+ frag_mask_write_addr <= 0;
+ end
+
+ CLEAR:
+ frag_mask_write_addr <= frag_mask_write_addr + 1;
+ endcase
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_cmd.sv b/rtl/legacy_gfx/gfx_cmd.sv
new file mode 100644
index 0000000..29b6e21
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_cmd.sv
@@ -0,0 +1,125 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_cmd
+(
+ input logic clk,
+ rst_n,
+
+ input cmd_addr cmd_address,
+ input logic cmd_read,
+ cmd_write,
+ input cmd_word cmd_writedata,
+ output cmd_word cmd_readdata,
+
+ input logic vsync,
+
+ output logic swap_buffers,
+ enable_clear,
+ start_clear,
+ output rgb24 clear_color,
+
+ output logic program_start,
+ output cmd_word program_header_base,
+ program_header_size,
+
+ output cmd_word fb_base_a,
+ fb_base_b
+);
+
+ rgb24 next_clear_color;
+ logic do_start_clear, next_start_clear, next_enable_clear, next_swap_buffers;
+
+ struct packed
+ {
+ logic[4:0] mbz;
+ logic start_frame,
+ enable_clear,
+ swap_buffers;
+ rgb24 clear_color;
+ } readdata_scan, writedata_scan;
+
+ assign cmd_readdata = readdata_scan;
+
+ assign writedata_scan = cmd_writedata;
+ assign readdata_scan.mbz = 0;
+ assign readdata_scan.clear_color = clear_color;
+ assign readdata_scan.enable_clear = enable_clear;
+ assign readdata_scan.swap_buffers = swap_buffers;
+
+ assign do_start_clear = writedata_scan.start_frame && writedata_scan.enable_clear;
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ start_clear <= 0;
+ enable_clear <= 0;
+ swap_buffers <= 0;
+
+ next_start_clear <= 0;
+ next_enable_clear <= 0;
+ next_swap_buffers <= 0;
+
+ program_start <= 0;
+
+ fb_base_a <= 0;
+ fb_base_b <= 0;
+ end else begin
+ start_clear <= 0;
+ program_start <= 0;
+
+ if (vsync) begin
+ start_clear <= next_start_clear;
+ enable_clear <= next_enable_clear;
+ swap_buffers <= next_swap_buffers;
+
+ next_start_clear <= 0;
+ end
+
+ if (cmd_write)
+ unique case (cmd_address[2:0])
+ `GFX_CMD_REG_ID: ;
+
+ `GFX_CMD_REG_SCAN: begin
+ next_enable_clear <= writedata_scan.enable_clear;
+ next_swap_buffers <= writedata_scan.swap_buffers;
+
+ if (!next_start_clear)
+ next_start_clear <= do_start_clear;
+ end
+
+ `GFX_CMD_REG_HEADER_BASE: ;
+
+ `GFX_CMD_REG_HEADER_SIZE:
+ program_start <= 1;
+
+ `GFX_CMD_REG_FB_BASE_A:
+ fb_base_a <= cmd_writedata;
+
+ `GFX_CMD_REG_FB_BASE_B:
+ fb_base_b <= cmd_writedata;
+
+ default: ;
+ endcase
+ end
+
+ always_ff @(posedge clk) begin
+ if (vsync)
+ clear_color <= next_clear_color;
+
+ if (cmd_write)
+ unique case (cmd_address[2:0])
+ `GFX_CMD_REG_ID: ;
+
+ `GFX_CMD_REG_SCAN:
+ next_clear_color <= writedata_scan.clear_color;
+
+ `GFX_CMD_REG_HEADER_BASE:
+ program_header_base <= cmd_writedata;
+
+ `GFX_CMD_REG_HEADER_SIZE:
+ program_header_size <= cmd_writedata;
+
+ default: ;
+ endcase
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_defs.sv b/rtl/legacy_gfx/gfx_defs.sv
new file mode 100644
index 0000000..1e7a335
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_defs.sv
@@ -0,0 +1,267 @@
+`ifndef GFX_DEFS_SV
+`define GFX_DEFS_SV
+
+// Esto es arquitectural, no se puede ajustar sin cambiar otras cosas
+`define FLOAT_BITS 16
+`define FLOATS_PER_VEC 4
+`define VECS_PER_MAT 4
+
+// Target de 200MHz (reloj es 143MHz) con float16, rounding (muy) aproximado
+`define FP_ADD_STAGES 10 // ~401 LUTs
+`define FP_MUL_STAGES 5 // ~144 LUTs ~1 bloque DSP
+`define FP_FIX_STAGES 5 // ~313 LUTs
+
+typedef logic[`FLOAT_BITS - 1:0] fp;
+typedef fp[1:0] vec2;
+typedef fp[`FLOATS_PER_VEC - 1:0] vec4;
+typedef vec4[`VECS_PER_MAT - 1:0] mat4;
+
+`define FP_UNIT 16'h3c00
+
+typedef logic[1:0] index4;
+
+`define INDEX4_MIN 2'b00
+`define INDEX4_MAX 2'b11
+
+typedef logic[8:0] x_coord;
+typedef logic[9:0] y_coord;
+typedef logic[9:0] xy_coord;
+typedef logic[18:0] linear_coord;
+typedef logic[19:0] half_coord;
+
+`define GFX_X_RES 640
+`define GFX_Y_RES 480
+`define GFX_LINEAR_RES (`GFX_X_RES * `GFX_Y_RES)
+
+`define COLOR_CHANNELS 4
+
+typedef logic[7:0] color8;
+typedef logic[9:0] color10;
+
+typedef struct packed
+{
+ color8 r, g, b;
+} rgb24;
+
+typedef struct packed
+{
+ color10 r, g, b;
+} rgb30;
+
+typedef struct packed
+{
+ color8 a, r, g, b;
+} rgb32;
+
+`define FIXED_FRAC 16
+
+`define FIXED_DIV_PIPES 2
+`define FIXED_DIV_STAGES (`FIXED_DIV_PIPES + $bits(fixed) + `FIXED_FRAC)
+`define FIXED_FMA_STAGES 5
+`define FIXED_FMA_DOT_STAGES (2 * `FIXED_FMA_STAGES)
+`define LERP_STAGES `FIXED_FMA_DOT_STAGES
+
+typedef logic signed[31:0] fixed;
+typedef fixed[2:0] fixed_tri;
+
+`define EDGE_P0_TO_P1 0
+`define EDGE_P1_TO_P2 1
+`define EDGE_P2_TO_P0 2
+
+typedef struct packed
+{
+ fixed x, y;
+} raster_xy;
+
+typedef struct packed
+{
+ fixed z, w;
+} raster_zw;
+
+typedef struct packed
+{
+ raster_xy xy;
+ raster_zw zw;
+} raster_xyzw;
+
+typedef struct packed
+{
+ fp x, y, z, w;
+} fp_xyzw;
+
+typedef logic[8:0] coarse_dim;
+
+`define GFX_MASK_SRAM_STAGES 3
+`define GFX_MASK_STAGES (1 + `GFX_MASK_SRAM_STAGES + 1)
+
+`define GFX_SCANOUT_FIFO_DEPTH 16 // Ajustable
+
+`define GFX_SETUP_BOUNDS_STAGES 3
+`define GFX_SETUP_EDGE_STAGES (1 + `FIXED_FMA_DOT_STAGES)
+`define GFX_SETUP_OFFSETS_STAGES 2
+`define GFX_SETUP_STAGES (`GFX_SETUP_BOUNDS_STAGES \
+ + `GFX_SETUP_EDGE_STAGES \
+ + `GFX_SETUP_OFFSETS_STAGES)
+
+`define GFX_FINE_STAGES 2
+
+`define GFX_RASTER_BITS 1 // Solía ser 2, pero la FPGA no da para tanto
+`define GFX_RASTER_SUB_BITS 4
+`define GFX_RASTER_PAD_BITS ($bits(fixed) - $bits(coarse_dim) - `FIXED_FRAC - `GFX_RASTER_BITS)
+`define GFX_RASTER_SIZE (1 << `GFX_RASTER_BITS)
+`define GFX_RASTER_OFFSETS (1 << (2 * `GFX_RASTER_BITS))
+
+typedef struct packed
+{
+ logic[`GFX_RASTER_SUB_BITS - 1:0] num;
+ logic[`FIXED_FRAC - `GFX_RASTER_SUB_BITS - 1:0] prec;
+} raster_sub;
+
+typedef struct packed
+{
+ logic sign;
+ logic[`GFX_RASTER_PAD_BITS - 1:0] padding;
+ logic[$bits(coarse_dim) - 2:0] coarse;
+ logic[`GFX_RASTER_BITS - 1:0] fine;
+ raster_sub sub;
+} raster_prec;
+
+typedef struct packed
+{
+ raster_prec x, y;
+} raster_xy_prec;
+
+typedef fixed[`GFX_RASTER_OFFSETS - 1:0] raster_offsets;
+typedef raster_offsets[2:0] raster_offsets_tri;
+
+`define GFX_FINE_LANES (`GFX_RASTER_SIZE * `GFX_RASTER_SIZE)
+
+typedef struct packed
+{
+ xy_coord x, y;
+} frag_xy;
+
+typedef frag_xy[`GFX_FINE_LANES - 1:0] frag_xy_lanes;
+typedef logic[`GFX_FINE_LANES - 1:0] paint_lanes;
+typedef fixed[`COLOR_CHANNELS - 1:0] color_lerp_lanes;
+typedef fixed_tri[`GFX_FINE_LANES - 1:0] bary_lanes;
+
+typedef struct packed
+{
+ linear_coord addr;
+ rgb32 color;
+} frag_paint;
+
+`define GFX_FRAG_ADDR_STAGES 3
+`define GFX_FRAG_BARY_STAGES (`FIXED_DIV_STAGES + 2 + `FIXED_DIV_STAGES)
+`define GFX_FRAG_SHADE_STAGES (`LERP_STAGES + 1)
+`define GFX_FRAG_STAGES (`GFX_FRAG_BARY_STAGES + `GFX_FRAG_SHADE_STAGES)
+
+`define GFX_MEM_WORD_ADDR_BITS 25
+`define GFX_MEM_DATA_BITS 16 // No puedo hacer nada al respecto
+`define GFX_MEM_SUBWORD_BITS ($clog2(`GFX_MEM_DATA_BITS / 8))
+`define GFX_MEM_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS + `GFX_MEM_SUBWORD_BITS)
+`define GFX_MEM_RESPONSE_DEPTH 2 // Ajustar
+`define GFX_MEM_TRANS_DEPTH 4 // NO TOCAR, ver `GFX_MEM_MAX_PENDING_READS
+`define GFX_MEM_DISPATCH_DEPTH 8 // Nótese que platform.vram_0.s1.maximumPendingReadTransactions = 7
+
+// NO TOCAR. Esto debe coincidir perfectamente con gfx_hw.tcl
+`define GFX_VRAM_MAX_PENDING_READS 7 // platform.vram_0.s1.maximumPendingReadTransactions
+`define GFX_MEM_MAX_PENDING_READS (1 + `GFX_MEM_TRANS_DEPTH + 1 + `GFX_VRAM_MAX_PENDING_READS)
+
+typedef logic[`GFX_MEM_DATA_BITS - 1:0] vram_word;
+typedef logic[`GFX_MEM_ADDR_BITS - 1:0] vram_byte_addr;
+typedef logic[`GFX_MEM_WORD_ADDR_BITS - 1:0] vram_addr;
+
+`define GFX_INSN_BITS 32
+`define GFX_INSN_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_INSN_BITS / `GFX_MEM_DATA_BITS))
+`define GFX_INSN_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_INSN_ADDR_BITS)
+`define GFX_LANE_BITS $bits(mat4)
+`define GFX_LANE_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_LANE_BITS / `GFX_MEM_DATA_BITS))
+`define GFX_LANE_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_LANE_ADDR_BITS)
+`define GFX_INSN_BITS_IN_LANE (`GFX_LANE_SUBWORD_BITS - `GFX_INSN_SUBWORD_BITS)
+
+typedef logic[`GFX_INSN_BITS - 1:0] insn_word;
+typedef logic[`GFX_LANE_BITS - 1:0] lane_word;
+typedef logic[`GFX_INSN_ADDR_BITS - 1:0] vram_insn_addr;
+typedef logic[`GFX_LANE_ADDR_BITS - 1:0] vram_lane_addr;
+
+typedef logic[5:0] cmd_addr;
+typedef logic[31:0] cmd_word;
+
+`define GFX_CMD_REG_ID 3'b000
+`define GFX_CMD_REG_SCAN 3'b001
+`define GFX_CMD_REG_HEADER_BASE 3'b010
+`define GFX_CMD_REG_HEADER_SIZE 3'b011
+`define GFX_CMD_REG_FB_BASE_A 3'b100
+`define GFX_CMD_REG_FB_BASE_B 3'b101
+
+typedef struct packed
+{
+ logic[$bits(cmd_word) - $bits(vram_insn_addr) - `GFX_INSN_SUBWORD_BITS - 1:0] pad;
+ vram_insn_addr addr;
+ logic[`GFX_INSN_SUBWORD_BITS - 1:0] sub;
+} cmd_insn_ptr;
+
+typedef struct packed
+{
+ logic[$bits(cmd_word) - $bits(vram_lane_addr) - `GFX_LANE_SUBWORD_BITS - 1:0] pad;
+ vram_lane_addr addr;
+ logic[`GFX_LANE_SUBWORD_BITS - 1:0] sub;
+} cmd_lane_ptr;
+
+`define GFX_FETCH_FIFO_DEPTH 8
+
+`define GFX_BATCH_FIFO_DEPTH 4
+`define GFX_SP_LANES `VECS_PER_MAT
+
+typedef logic[`GFX_SP_LANES - 1:0] lane_mask;
+typedef logic[`FLOATS_PER_VEC - 1:0] vec_mask;
+
+typedef logic[`FLOATS_PER_VEC - 1:0][$clog2(`FLOATS_PER_VEC) - 1:0] swizzle_lanes;
+
+`define GFX_SP_REG_BITS 3
+`define GFX_SP_REG_COUNT (1 << `GFX_SP_REG_BITS)
+
+typedef logic[`GFX_SP_REG_BITS - 1:0] vreg_num;
+
+typedef struct packed
+{
+ logic stream,
+ combiner,
+ shuffler;
+} ex_units;
+
+typedef struct packed
+{
+ logic is_swizzle,
+ is_broadcast;
+ fp imm;
+ vec_mask select_mask;
+ swizzle_lanes swizzle_op;
+} shuffler_deco;
+
+typedef struct packed
+{
+ logic writeback,
+ read_src_a,
+ read_src_b,
+ clear_lanes;
+ vreg_num dst,
+ src_a,
+ src_b;
+ ex_units ex;
+ shuffler_deco shuffler;
+} insn_deco; // "insn_decode" ya existe en core, esto es confuso pero lo hice por tiempo
+
+typedef struct packed
+{
+ vreg_num dst;
+ mat4 data;
+} wb_op;
+
+`define GFX_SP_COMBINER_FIFO_DEPTH 4 // TODO: optimizar esto
+
+`define GFX_SP_WB_STAGES 2
+
+`endif
diff --git a/rtl/legacy_gfx/gfx_dot.sv b/rtl/legacy_gfx/gfx_dot.sv
new file mode 100644
index 0000000..9c21c23
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_dot.sv
@@ -0,0 +1,49 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_dot
+(
+ input logic clk,
+
+ input logic stall_mul,
+ stall_fold,
+ feedback,
+ feedback_last,
+
+ input vec4 a,
+ b,
+
+ output fp q
+);
+
+ vec4 products_fold, products_mul;
+
+ gfx_fold fold
+ (
+ .vec(products_fold),
+ .stall(stall_fold),
+ .*
+ );
+
+ genvar i;
+ generate
+ for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: entries
+ gfx_fp_mul entry_i
+ (
+ .a(a[i]),
+ .b(b[i]),
+ .q(products_mul[i]),
+ .stall(stall_mul),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(fp))) skid_i
+ (
+ .in(products_mul[i]),
+ .out(products_fold[i]),
+ .stall(stall_mul),
+ .*
+ );
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fifo.sv b/rtl/legacy_gfx/gfx_fifo.sv
new file mode 100644
index 0000000..e9fa8f5
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fifo.sv
@@ -0,0 +1,98 @@
+module gfx_fifo
+#(parameter WIDTH=0, DEPTH=0)
+(
+ input logic clk,
+ rst_n,
+
+ input logic[WIDTH - 1:0] in,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic out_ready,
+ output logic out_valid,
+ output logic[WIDTH - 1:0] out
+);
+
+ logic do_read, do_write, full_if_eq, in_stall, out_stall,
+ may_read, may_write, read, read_ok, write;
+
+ logic[WIDTH - 1:0] fifo[DEPTH], read_data, write_data;
+ logic[$clog2(DEPTH) - 1:0] read_ptr, write_ptr;
+
+ assign do_read = read && may_read;
+ assign do_write = write && may_write;
+
+ always_comb begin
+ may_read = full_if_eq;
+ may_write = !full_if_eq;
+
+ if (read)
+ may_write = 1;
+
+ if (read_ptr != write_ptr) begin
+ may_read = 1;
+ may_write = 1;
+ end
+ end
+
+ gfx_skid_flow in_flow
+ (
+ .stall(in_stall),
+ .out_ready(may_write),
+ .out_valid(write),
+ .*
+ );
+
+ gfx_skid_flow out_flow
+ (
+ .stall(out_stall),
+ .in_ready(read),
+ .in_valid(read_ok),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH(WIDTH)) in_skid
+ (
+ .out(write_data),
+ .stall(in_stall),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH(WIDTH)) out_skid
+ (
+ .in(read_data),
+ .stall(out_stall),
+ .*
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ read_ok <= 0;
+ read_ptr <= 0;
+ write_ptr <= 0;
+ full_if_eq <= 0;
+ end else begin
+ if (!out_stall)
+ read_ok <= read && may_read;
+
+ if (do_read)
+ read_ptr <= read_ptr + 1;
+
+ if (do_write)
+ write_ptr <= write_ptr + 1;
+
+ if (do_read && !do_write)
+ full_if_eq <= 0;
+ else if (!do_read && do_write)
+ full_if_eq <= 1;
+ end
+
+ always_ff @(posedge clk) begin
+ if (!out_stall)
+ read_data <= fifo[read_ptr];
+
+ if (may_write)
+ fifo[write_ptr] <= write_data;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fifo_overflow.sv b/rtl/legacy_gfx/gfx_fifo_overflow.sv
new file mode 100644
index 0000000..c9cb3de
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fifo_overflow.sv
@@ -0,0 +1,34 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fifo_overflow
+#(parameter DEPTH=0)
+(
+ input logic clk,
+ rst_n,
+
+ input logic down,
+ out_ready,
+ out_valid,
+
+ output logic empty,
+ down_safe
+);
+
+ logic up;
+ logic[$clog2(DEPTH + 1) - 1:0] pending;
+
+ assign up = out_ready && out_valid;
+ assign empty = pending == 0;
+ assign down_safe = up || pending < DEPTH - 1;
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ pending <= 0;
+ else begin
+ if (up && !down)
+ pending <= pending - 1;
+ else if (!up && down)
+ pending <= pending + 1;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fix_floats.sv b/rtl/legacy_gfx/gfx_fix_floats.sv
new file mode 100644
index 0000000..fe3ab21
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fix_floats.sv
@@ -0,0 +1,49 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fix_floats
+(
+ input logic clk,
+ rst_n,
+
+ input vec4 in_vertex_a,
+ in_vertex_b,
+ in_vertex_c,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic out_ready,
+ output logic out_valid,
+ output raster_xyzw out_vertex_a,
+ out_vertex_b,
+ out_vertex_c
+);
+
+ logic stall;
+
+ gfx_pipeline_flow #(.STAGES(`FP_FIX_STAGES + 1)) flow
+ (
+ .*
+ );
+
+ gfx_fix_vertex fix_a
+ (
+ .in_vertex(in_vertex_a),
+ .out_vertex(out_vertex_a),
+ .*
+ );
+
+ gfx_fix_vertex fix_b
+ (
+ .in_vertex(in_vertex_b),
+ .out_vertex(out_vertex_b),
+ .*
+ );
+
+ gfx_fix_vertex fix_c
+ (
+ .in_vertex(in_vertex_c),
+ .out_vertex(out_vertex_c),
+ .*
+ );
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fix_vertex.sv b/rtl/legacy_gfx/gfx_fix_vertex.sv
new file mode 100644
index 0000000..728f3b6
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fix_vertex.sv
@@ -0,0 +1,64 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fix_vertex
+(
+ input logic clk,
+
+ input vec4 in_vertex,
+ input logic stall,
+
+ output raster_xyzw out_vertex
+);
+
+ fixed x, y;
+ raster_xyzw fixed_vertex, corrected;
+ fixed[`FLOATS_PER_VEC - 1:0] fixed_vals, corrected_vals, skid_vals;
+
+ assign out_vertex = skid_vals;
+ assign fixed_vertex = fixed_vals;
+ assign corrected_vals = corrected;
+
+ assign x = fixed_vertex.xy.x;
+ assign y = fixed_vertex.xy.y;
+
+ genvar i;
+ generate
+ for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: components
+ gfx_fp_fix fix
+ (
+ .in(in_vertex[i]),
+ .out(fixed_vals[i]),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(fixed))) skid
+ (
+ .in(corrected_vals[i]),
+ .out(skid_vals[i]),
+ .*
+ );
+ end
+ endgenerate
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ /* x * `GFX_X_RES / 2
+ * = x * 320
+ * = x * 64 * 5
+ * = (x * 5) << 6
+ * = (x * (4 + 1)) << 6
+ * = ((x << 2) + x) << 6
+ *
+ * y * `GFX_Y_RES / 2
+ * = y * 240
+ * = y * 16 * 15
+ * = (y * 15) << 4
+ * = (y * (16 - 1)) << 4
+ * = ((y << 4) - y) << 4
+ */
+ corrected.zw <= fixed_vertex.zw;
+ corrected.xy.x <= ((x << 2) + x) << 6;
+ corrected.xy.y <= ((y << 4) - y) << 4;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fixed_div.sv b/rtl/legacy_gfx/gfx_fixed_div.sv
new file mode 100644
index 0000000..e562072
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fixed_div.sv
@@ -0,0 +1,77 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fixed_div
+(
+ input logic clk,
+
+ input fixed z,
+ d,
+ input logic stall,
+
+ output fixed q
+);
+
+ localparam DIV_BITS = `FIXED_FRAC + $bits(fixed);
+
+ fixed d_hold, z_hold;
+ logic signed[DIV_BITS - 1:0] z_int, q_int;
+
+ assign q = q_int[$bits(q) - 1:0];
+ assign z_int = {z_hold, {`FIXED_FRAC{1'b0}}};
+
+`ifndef VERILATOR
+ lpm_divide div
+ (
+ .aclr(0),
+ .clock(clk),
+ .clken(!stall),
+ .numer(z_int),
+ .denom(d_hold),
+ .remain(),
+ .quotient(q_int)
+ );
+
+ defparam
+ div.lpm_widthn = DIV_BITS,
+ div.lpm_widthd = $bits(fixed),
+ div.lpm_nrepresentation = "SIGNED",
+ div.lpm_drepresentation = "SIGNED",
+ div.lpm_pipeline = `FIXED_DIV_STAGES - `FIXED_DIV_PIPES,
+ div.maximize_speed = 6;
+
+ gfx_pipes #(.WIDTH($bits(z)), .DEPTH(`FIXED_DIV_PIPES)) z_pipes
+ (
+ .in(z),
+ .out(z_hold),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(d)), .DEPTH(`FIXED_DIV_PIPES)) d_pipes
+ (
+ .in(d),
+ .out(d_hold),
+ .*
+ );
+`else
+ logic signed[DIV_BITS - 1:0] d_int_hold, z_int_hold;
+
+ assign q_int = z_int_hold / d_int_hold;
+ assign z_hold = z;
+ assign d_int_hold = {{`FIXED_FRAC{d_hold[$bits(d_hold) - 1]}}, d_hold};
+
+ gfx_pipes #(.WIDTH($bits(z_int)), .DEPTH(`FIXED_DIV_STAGES)) z_int_pipes
+ (
+ .in(z_int),
+ .out(z_int_hold),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(d)), .DEPTH(`FIXED_DIV_STAGES)) d_pipes
+ (
+ .in(d),
+ .out(d_hold),
+ .*
+ );
+`endif
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fixed_fma.sv b/rtl/legacy_gfx/gfx_fixed_fma.sv
new file mode 100644
index 0000000..ec26477
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fixed_fma.sv
@@ -0,0 +1,73 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fixed_fma
+(
+ input logic clk,
+
+ input fixed a,
+ b,
+ c,
+ input logic stall,
+
+ output fixed q
+);
+
+`ifndef VERILATOR
+ logic[2 * $bits(fixed) - `FIXED_FRAC - 1:0] q_ext;
+ assign q = q_ext[$bits(fixed) - 1:0];
+
+ lpm_mult mult
+ (
+ .aclr(0),
+ .clock(clk),
+ .clken(!stall),
+
+ .sum({c, {`FIXED_FRAC{1'b0}}}),
+ .dataa(a),
+ .datab(b),
+ .result(q_ext)
+ );
+
+ defparam
+ mult.lpm_widtha = $bits(fixed),
+ mult.lpm_widthb = $bits(fixed),
+ mult.lpm_widths = $bits(fixed) + `FIXED_FRAC,
+ /* Esto es crucial. No está documentado en ningún lado (aparte de un
+ * comentario en r/fpga). Si lpm_widthp < lpm_widtha + lpm_widthb,
+ * entonces result contiene los lpm_widthp bits más significativos
+ * del producto, no los menos significativos como tendría sentido.
+ */
+ mult.lpm_widthp = 2 * $bits(fixed) - `FIXED_FRAC,
+ mult.lpm_representation = "SIGNED",
+ mult.lpm_pipeline = `FIXED_FMA_STAGES;
+`else
+ logic[$bits(fixed) + `FIXED_FRAC - 1:0] q_ext;
+
+ fixed a_hold, b_hold, c_hold;
+
+ assign q = q_ext[$bits(fixed) + `FIXED_FRAC - 1:`FIXED_FRAC] + c_hold;
+ assign q_ext = a_hold * b_hold;
+
+ gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FIXED_FMA_STAGES)) a_pipes
+ (
+ .in(a),
+ .out(a_hold),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FIXED_FMA_STAGES)) b_pipes
+ (
+ .in(b),
+ .out(b_hold),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(c)), .DEPTH(`FIXED_FMA_STAGES)) c_pipes
+ (
+ .in(c),
+ .out(c_hold),
+ .*
+ );
+`endif
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fixed_fma_dot.sv b/rtl/legacy_gfx/gfx_fixed_fma_dot.sv
new file mode 100644
index 0000000..c19b49e
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fixed_fma_dot.sv
@@ -0,0 +1,49 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fixed_fma_dot
+(
+ input logic clk,
+
+ input fixed a0,
+ b0,
+ a1,
+ b1,
+ c,
+ input logic stall,
+
+ output fixed q
+);
+
+ fixed q0, a1_hold, b1_hold;
+
+ gfx_fixed_fma fma0
+ (
+ .a(a0),
+ .b(b0),
+ .q(q0),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_FMA_STAGES)) a_pipes
+ (
+ .in(a1),
+ .out(a1_hold),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_FMA_STAGES)) b_pipes
+ (
+ .in(b1),
+ .out(b1_hold),
+ .*
+ );
+
+ gfx_fixed_fma fma1
+ (
+ .a(a1_hold),
+ .b(b1_hold),
+ .c(q0),
+ .*
+ );
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_flush_flow.sv b/rtl/legacy_gfx/gfx_flush_flow.sv
new file mode 100644
index 0000000..a0e43d7
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_flush_flow.sv
@@ -0,0 +1,45 @@
+module gfx_flush_flow
+#(parameter STAGES=0)
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_valid,
+ out_ready,
+
+ output logic out_valid,
+ commit,
+ flush
+);
+
+ logic was_valid, was_ready;
+ logic[STAGES - 1:0] valid;
+
+ assign flush = was_valid && !was_ready;
+ assign commit = was_valid && was_ready;
+ assign out_valid = valid[STAGES - 1] && !flush;
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ was_ready <= 0;
+ was_valid <= 0;
+
+ for (integer i = 0; i < STAGES; ++i)
+ valid[i] <= 0;
+ end else begin
+ was_ready <= out_ready;
+ was_valid <= out_valid;
+
+ if (!flush)
+ valid[0] <= in_valid;
+ else
+ valid[0] <= 0;
+
+ for (integer i = 1; i < STAGES; ++i)
+ if (!flush)
+ valid[i] <= valid[i - 1];
+ else
+ valid[i] <= 0;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fold.sv b/rtl/legacy_gfx/gfx_fold.sv
new file mode 100644
index 0000000..616d868
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fold.sv
@@ -0,0 +1,54 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fold
+(
+ input logic clk,
+
+ input vec4 vec,
+ input logic stall,
+ feedback,
+ feedback_last,
+
+ output fp q
+);
+
+ fp q_add;
+ vec2 feedback_vec, queued[`FP_ADD_STAGES];
+
+ assign feedback_vec = queued[`FP_ADD_STAGES - 1];
+
+ gfx_fp_add add
+ (
+ .a(feedback ? q_add : vec[0]),
+ .b(feedback ? feedback_vec[feedback_last] : vec[1]),
+ .q(q_add),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(q))) skid
+ (
+ .in(q_add),
+ .out(q),
+ .*
+ );
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ if (feedback)
+ queued[0] <= feedback_vec;
+ else begin
+ queued[0][0] <= vec[2];
+ queued[0][1] <= vec[3];
+ end
+ end
+
+ genvar i;
+ generate
+ for (i = 1; i < `FP_ADD_STAGES; ++i) begin: stages
+ always_ff @(posedge clk)
+ if (!stall)
+ queued[i] <= queued[i - 1];
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fold_flow.sv b/rtl/legacy_gfx/gfx_fold_flow.sv
new file mode 100644
index 0000000..8f23b8f
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fold_flow.sv
@@ -0,0 +1,61 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fold_flow
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_valid,
+ out_ready,
+
+ output logic in_ready,
+ out_valid,
+ stall,
+ feedback,
+ feedback_last
+);
+
+ logic skid_ready;
+ index4 rounds[`FP_ADD_STAGES], last_round;
+
+ assign in_ready = skid_ready && !feedback;
+
+ assign feedback = last_round[1] ^ last_round[0];
+ assign feedback_last = last_round[1];
+
+ assign last_round = rounds[`FP_ADD_STAGES - 1];
+
+ gfx_skid_flow skid
+ (
+ .in_valid(last_round == `INDEX4_MAX),
+ .in_ready(skid_ready),
+ .*
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ rounds[0] <= `INDEX4_MIN;
+ else if (!stall)
+ unique case (last_round)
+ 2'b01:
+ rounds[0] <= 2'b10;
+
+ 2'b10:
+ rounds[0] <= 2'b11;
+
+ 2'b00, 2'b11:
+ rounds[0] <= {1'b0, in_valid};
+ endcase
+
+ genvar i;
+ generate
+ for (i = 1; i < `FP_ADD_STAGES; ++i) begin: pipeline
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ rounds[i] <= `INDEX4_MIN;
+ else if (!stall)
+ rounds[i] <= rounds[i - 1];
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fp_add.sv b/rtl/legacy_gfx/gfx_fp_add.sv
new file mode 100644
index 0000000..0b3058a
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fp_add.sv
@@ -0,0 +1,41 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fp_add
+(
+ input logic clk,
+
+ input fp a,
+ b,
+ input logic stall,
+
+ output fp q
+);
+
+`ifndef VERILATOR
+ ip_fp_add ip_add
+ (
+ .en(!stall),
+ .areset(0),
+ .*
+ );
+`else
+ fp a_pop, b_pop;
+
+ assign q = $c("taller::fp_add(", a_pop, ", ", b_pop, ")");
+
+ gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FP_ADD_STAGES)) a_pipes
+ (
+ .in(a),
+ .out(a_pop),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FP_ADD_STAGES)) b_pipes
+ (
+ .in(b),
+ .out(b_pop),
+ .*
+ );
+`endif
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fp_fix.sv b/rtl/legacy_gfx/gfx_fp_fix.sv
new file mode 100644
index 0000000..b38e0e3
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fp_fix.sv
@@ -0,0 +1,34 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fp_fix
+(
+ input logic clk,
+
+ input fp in,
+ input logic stall,
+
+ output fixed out
+);
+
+`ifndef VERILATOR
+ ip_fp_fix ip_fix
+ (
+ .a(in),
+ .q(out),
+ .en(!stall),
+ .areset(0),
+ .*
+ );
+`else
+ fp pop;
+
+ assign out = $c("taller::fp_fix(", pop, ")");
+
+ gfx_pipes #(.WIDTH($bits(in)), .DEPTH(`FP_FIX_STAGES)) pipes
+ (
+ .out(pop),
+ .*
+ );
+`endif
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_fp_mul.sv b/rtl/legacy_gfx/gfx_fp_mul.sv
new file mode 100644
index 0000000..7ff3c02
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_fp_mul.sv
@@ -0,0 +1,41 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_fp_mul
+(
+ input logic clk,
+
+ input fp a,
+ b,
+ input logic stall,
+
+ output fp q
+);
+
+`ifndef VERILATOR
+ ip_fp_mul ip_mul
+ (
+ .en(!stall),
+ .areset(0),
+ .*
+ );
+`else
+ fp a_pop, b_pop;
+
+ assign q = $c("taller::fp_mul(", a_pop, ", ", b_pop, ")");
+
+ gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FP_MUL_STAGES)) a_pipes
+ (
+ .in(a),
+ .out(a_pop),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FP_MUL_STAGES)) b_pipes
+ (
+ .in(b),
+ .out(b_pop),
+ .*
+ );
+`endif
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_frag.sv b/rtl/legacy_gfx/gfx_frag.sv
new file mode 100644
index 0000000..d61de72
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_frag.sv
@@ -0,0 +1,79 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_frag
+(
+ input logic clk,
+ rst_n,
+
+ input frag_xy frag,
+ input fixed_tri bary,
+ ws,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic out_ready,
+ output logic out_valid,
+ output frag_paint out
+);
+
+ logic stall;
+ frag_paint frag_out;
+
+ gfx_pipeline_flow #(.STAGES(`GFX_FRAG_STAGES)) addr_flow
+ (
+ .*
+ );
+
+ linear_coord linear;
+
+ gfx_frag_addr addr
+ (
+ .*
+ );
+
+ localparam ADDR_WAIT_STAGES = `GFX_FRAG_STAGES - `GFX_FRAG_ADDR_STAGES;
+
+ gfx_pipes #(.WIDTH($bits(linear_coord)), .DEPTH(ADDR_WAIT_STAGES)) addr_pipes
+ (
+ .in(linear),
+ .out(frag_out.addr),
+ .*
+ );
+
+ fixed b1, b2;
+
+ gfx_frag_bary frag_bary
+ (
+ .*
+ );
+
+ color_lerp_lanes argb0, argb1_argb0, argb2_argb0;
+
+ assign argb0[3] = 32'd0 << 8;
+ assign argb0[2] = 32'd255 << 8;
+ assign argb0[1] = 32'd0 << 8;
+ assign argb0[0] = 32'd0 << 8;
+
+ assign argb1_argb0[3] = 32'd0 << 8;
+ assign argb1_argb0[2] = (-32'sd255) << 8;
+ assign argb1_argb0[1] = 32'd255 << 8;
+ assign argb1_argb0[0] = 32'd0 << 8;
+
+ assign argb2_argb0[3] = 32'd0 << 8;
+ assign argb2_argb0[2] = (-32'sd255) << 8;
+ assign argb2_argb0[1] = 32'd0 << 8;
+ assign argb2_argb0[0] = 32'd255 << 8;
+
+ gfx_frag_shade shade
+ (
+ .color(frag_out.color),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(frag_out))) skid
+ (
+ .in(frag_out),
+ .*
+ );
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_frag_addr.sv b/rtl/legacy_gfx/gfx_frag_addr.sv
new file mode 100644
index 0000000..23bd315
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_frag_addr.sv
@@ -0,0 +1,59 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_frag_addr
+(
+ input logic clk,
+
+ input frag_xy frag,
+ input logic stall,
+
+ output linear_coord linear
+);
+
+ /* frag está expresado en un rango normalizado con igual distribución
+ * entre positivos y negativos. Para obtener la dirección lineal que le
+ * corresponde, debemos corregir esto para que el mínimo sea cero en
+ * cada coordenada. Luego de eso,
+ *
+ * linear = y_corregido * `GFX_X_RES + x_corregido
+ *
+ * Afortunadamente, esto no necesita una FMA, como procederé a demostrar:
+ *
+ * y * `GFX_X_RES + x
+ * = y * 640 + x
+ * = y * 128 * 5 + x
+ * = ((y * 5) << 7) + x
+ * = ((y * (4 + 1)) << 7) + x
+ * = (((y << 2) + y) << 7) + x
+ * = (y << 9) + (y << 7) + x
+ *
+ * Para corregir x ([-320, 319]) se le suma `GFX_RES_X / 2.
+ *
+ * Para corregir y ([-240, 239]) se debe tomar en cuenta que las
+ * direcciones lineales incrementan hacia abajo, así que:
+ * y_corregido = `GFX_RES_Y / 2 - 1 - y
+ */
+
+ localparam ZERO_PAD = $bits(linear_coord) - $bits(xy_coord);
+
+ // Estas constantes asumen `GFX_X_RES == 640
+ localparam Y_SHIFT0 = 9, Y_SHIFT1 = 7;
+
+ xy_coord bias_x, bias_y;
+ linear_coord row_start, x_biased, x_hold, y_biased;
+
+ assign bias_x = `GFX_X_RES / 2;
+ assign bias_y = `GFX_Y_RES / 2 - 1;
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ x_biased <= {{ZERO_PAD{1'b0}}, frag.x + bias_x};
+ y_biased <= {{ZERO_PAD{1'b0}}, bias_y - frag.y};
+
+ x_hold <= x_biased;
+ row_start <= (y_biased << Y_SHIFT0) + (y_biased << Y_SHIFT1);
+
+ linear <= row_start + x_hold;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_frag_bary.sv b/rtl/legacy_gfx/gfx_frag_bary.sv
new file mode 100644
index 0000000..4f4f452
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_frag_bary.sv
@@ -0,0 +1,78 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_frag_bary
+(
+ input logic clk,
+
+ input fixed_tri bary,
+ ws,
+ input logic stall,
+
+ output fixed b1,
+ b2
+);
+
+ fixed area, b0_w0, b1_w1, b2_w2, b1_w1_b2_w2, hold_b0_w0, hold_b1_w1, hold_b2_w2;
+ fixed_tri bs_ws, orthographic_bs;
+
+ assign b0_w0 = bs_ws[0];
+ assign b1_w1 = bs_ws[1];
+ assign b2_w2 = bs_ws[2];
+
+ assign orthographic_bs[0] = bary[`EDGE_P1_TO_P2];
+ assign orthographic_bs[1] = bary[`EDGE_P2_TO_P0];
+ assign orthographic_bs[2] = bary[`EDGE_P0_TO_P1];
+
+ genvar i;
+ generate
+ for (i = 0; i < 3; ++i) begin: vertices
+ gfx_fixed_div div_b_w
+ (
+ .z(orthographic_bs[i]),
+ .d(ws[i]),
+ .q(bs_ws[i]),
+ .*
+ );
+ end
+ endgenerate
+
+ localparam AREA_STAGES = 2;
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(AREA_STAGES)) b1_w1_pipes
+ (
+ .in(b1_w1),
+ .out(hold_b1_w1),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(AREA_STAGES)) b2_w2_pipes
+ (
+ .in(b2_w2),
+ .out(hold_b2_w2),
+ .*
+ );
+
+ gfx_fixed_div norm_b1
+ (
+ .z(hold_b1_w1),
+ .d(area),
+ .q(b1),
+ .*
+ );
+
+ gfx_fixed_div norm_b2
+ (
+ .z(hold_b2_w2),
+ .d(area),
+ .q(b2),
+ .*
+ );
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ area <= hold_b0_w0 + b1_w1_b2_w2;
+ hold_b0_w0 <= b0_w0;
+ b1_w1_b2_w2 <= b1_w1 + b2_w2;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_frag_shade.sv b/rtl/legacy_gfx/gfx_frag_shade.sv
new file mode 100644
index 0000000..d2ad7ce
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_frag_shade.sv
@@ -0,0 +1,53 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_frag_shade
+(
+ input logic clk,
+
+ input fixed b1,
+ b2,
+ input color_lerp_lanes argb0,
+ argb1_argb0,
+ argb2_argb0,
+ input logic stall,
+
+ output rgb32 color
+);
+
+ struct packed
+ {
+ logic sign;
+ logic[$bits(fixed) - `FIXED_FRAC - 2:0] out_of_range;
+ color8 color;
+ logic[`FIXED_FRAC - $bits(color8) - 1:0] sub;
+ } lerped[`COLOR_CHANNELS];
+
+ fixed channel_lerp[`COLOR_CHANNELS];
+ color8[`COLOR_CHANNELS - 1:0] out;
+
+ assign color = out;
+
+ genvar i;
+ generate
+ for (i = 0; i < `COLOR_CHANNELS; ++i) begin: channels
+ assign lerped[i] = channel_lerp[i];
+
+ gfx_lerp lerp
+ (
+ .q(channel_lerp[i]),
+ .q0(argb0[i]),
+ .q1_q0(argb1_argb0[i]),
+ .q2_q0(argb2_argb0[i]),
+ .*
+ );
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ out[i] <= lerped[i].color;
+ if (lerped[i].sign || |lerped[i].out_of_range)
+ out[i] <= {($bits(color8)){!lerped[i].sign}};
+ end
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_funnel.sv b/rtl/legacy_gfx/gfx_funnel.sv
new file mode 100644
index 0000000..4710111
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_funnel.sv
@@ -0,0 +1,96 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_funnel
+(
+ input logic clk,
+ rst_n,
+
+ input frag_xy_lanes fragments,
+ input bary_lanes barys,
+ input fixed_tri raster_ws,
+ input paint_lanes in_valid,
+ output logic in_ready,
+
+ input logic out_ready,
+ output logic out_valid,
+ output frag_xy frag,
+ output fixed_tri frag_bary,
+ frag_ws
+);
+
+ logic skid_ready, stall, ready, valid;
+ frag_xy next_frag, out_frag;
+ fixed_tri next_bary, out_bary, out_ws, ws_hold;
+ bary_lanes barys_hold;
+ paint_lanes current, next;
+ frag_xy_lanes fragments_hold;
+
+ assign ready = !(|next);
+ assign in_ready = skid_ready && ready;
+
+ gfx_skid_buf #(.WIDTH($bits(frag))) skid_frag
+ (
+ .in(out_frag),
+ .out(frag),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(frag_bary))) skid_bary
+ (
+ .in(out_bary),
+ .out(frag_bary),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(frag_ws))) skid_ws
+ (
+ .in(out_ws),
+ .out(frag_ws),
+ .*
+ );
+
+ gfx_skid_flow skid_flow
+ (
+ .in_ready(skid_ready),
+ .in_valid(valid),
+ .*
+ );
+
+ always_comb begin
+ next = 0;
+ next_bary = {($bits(next_bary)){1'bx}};
+ next_frag = {($bits(next_frag)){1'bx}};
+
+ for (integer i = 0; i < `GFX_FINE_LANES; ++i)
+ if (current[i]) begin
+ next = current;
+ next[i] = 0;
+
+ next_bary = barys_hold[i];
+ next_frag = fragments_hold[i];
+ end
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ valid <= 0;
+ current <= 0;
+ end else if (!stall) begin
+ valid <= |current;
+ current <= ready ? in_valid : next;
+ end
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ if (ready) begin
+ ws_hold <= raster_ws;
+ barys_hold <= barys;
+ fragments_hold <= fragments;
+ end
+
+ out_ws <= ws_hold;
+ out_bary <= next_bary;
+ out_frag <= next_frag;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_lerp.sv b/rtl/legacy_gfx/gfx_lerp.sv
new file mode 100644
index 0000000..42e4393
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_lerp.sv
@@ -0,0 +1,32 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_lerp
+(
+ input logic clk,
+
+ input fixed b1,
+ b2,
+ q0,
+ q1_q0,
+ q2_q0,
+ input logic stall,
+
+ output fixed q
+);
+
+ /* Interpolación lineal, trivializada.
+ *
+ * Esta es la clave: https://fgiesen.wordpress.com/2013/02/06/the-barycentric-conspirac/
+ */
+
+ gfx_fixed_fma_dot fma
+ (
+ .c(q0),
+ .a0(b1),
+ .b0(q1_q0),
+ .a1(b2),
+ .b1(q2_q0),
+ .*
+ );
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_mask_sram.sv b/rtl/legacy_gfx/gfx_mask_sram.sv
new file mode 100644
index 0000000..730ee12
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_mask_sram.sv
@@ -0,0 +1,31 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_mask_sram
+(
+ input logic clk,
+
+ input logic set,
+ write,
+ input linear_coord write_addr,
+ read_addr,
+ output logic mask
+);
+
+ logic mem[`GFX_LINEAR_RES];
+ logic mask_hold, write_hold, set_hold;
+ linear_coord read_addr_hold, write_addr_hold;
+
+ always_ff @(posedge clk) begin
+ mask <= mask_hold;
+ mask_hold <= mem[read_addr_hold];
+ read_addr_hold <= read_addr;
+
+ set_hold <= set;
+ write_hold <= write;
+ write_addr_hold <= write_addr;
+
+ if (write_hold)
+ mem[write_addr_hold] <= set_hold;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_masks.sv b/rtl/legacy_gfx/gfx_masks.sv
new file mode 100644
index 0000000..5182bd4
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_masks.sv
@@ -0,0 +1,68 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_masks
+(
+ input logic clk,
+ rst_n,
+
+ input logic swap_buffers,
+ input cmd_word fb_base_a,
+ fb_base_b,
+
+ input linear_coord scan_mask_addr,
+ output logic scan_mask,
+
+ input logic frag_mask_write,
+ frag_mask_set,
+ input linear_coord frag_mask_read_addr,
+ frag_mask_write_addr,
+ output logic frag_mask,
+
+ output vram_addr frag_base,
+ scan_base
+);
+
+ logic mask_a, mask_b, frag_write_hold, frag_set_hold;
+ linear_coord scan_addr_hold, frag_write_addr_hold, frag_read_addr_hold;
+
+ gfx_mask_sram sram_a
+ (
+ .set(frag_set_hold),
+ .mask(mask_a),
+ .write(swap_buffers && frag_write_hold),
+ .read_addr(swap_buffers ? frag_read_addr_hold : scan_addr_hold),
+ .write_addr(frag_write_addr_hold),
+ .*
+ );
+
+ gfx_mask_sram sram_b
+ (
+ .set(frag_set_hold),
+ .mask(mask_b),
+ .write(!swap_buffers && frag_write_hold),
+ .read_addr(swap_buffers ? scan_addr_hold : frag_read_addr_hold),
+ .write_addr(frag_write_addr_hold),
+ .*
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ frag_base <= 0;
+ scan_base <= 0;
+ end else begin
+ frag_base <= swap_buffers ? fb_base_a[$bits(vram_addr):1] : fb_base_b[$bits(vram_addr):1];
+ scan_base <= swap_buffers ? fb_base_b[$bits(vram_addr):1] : fb_base_a[$bits(vram_addr):1];
+ end
+
+ always_ff @(posedge clk) begin
+ scan_mask <= swap_buffers ? mask_b : mask_a;
+ scan_addr_hold <= scan_mask_addr;
+
+ frag_mask <= swap_buffers ? mask_a : mask_b;
+ frag_set_hold <= frag_mask_set;
+ frag_write_hold <= frag_mask_write;
+ frag_read_addr_hold <= frag_mask_read_addr;
+ frag_write_addr_hold <= frag_mask_write_addr;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_mat_mat.sv b/rtl/legacy_gfx/gfx_mat_mat.sv
new file mode 100644
index 0000000..d03a648
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_mat_mat.sv
@@ -0,0 +1,83 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_mat_mat
+(
+ input logic clk,
+ rst_n,
+
+ input mat4 a,
+ b,
+ input logic in_valid,
+ out_ready,
+
+ output mat4 q,
+ output logic in_ready,
+ out_valid
+);
+
+ mat4 a_hold, b_hold, b_transpose, q_hold, q_transpose, mul_b;
+ vec4 mul_q;
+ logic mul_in_ready, mul_in_valid, mul_out_ready, mul_out_valid;
+ index4 in_index, out_index;
+
+ assign in_ready = mul_in_ready && in_index == `INDEX4_MIN;
+ assign out_valid = mul_out_valid && out_index == `INDEX4_MAX;
+
+ assign mul_in_valid = in_valid || in_index != `INDEX4_MIN;
+ assign mul_out_ready = out_ready || out_index != `INDEX4_MAX;
+
+ gfx_transpose transpose_b
+ (
+ .in(b),
+ .out(b_transpose)
+ );
+
+ gfx_mat_vec mul
+ (
+ .a(in_index == `INDEX4_MIN ? a : a_hold),
+ .x(mul_b[in_index]),
+ .q(mul_q),
+ .in_ready(mul_in_ready),
+ .in_valid(mul_in_valid),
+ .out_ready(mul_out_ready),
+ .out_valid(mul_out_valid),
+ .*
+ );
+
+ gfx_transpose transpose_q
+ (
+ .in(q_transpose),
+ .out(q)
+ );
+
+ always_comb begin
+ mul_b = b_hold;
+ mul_b[0] = b_transpose[0];
+
+ q_transpose = q_hold;
+ q_transpose[`VECS_PER_MAT - 1] = mul_q;
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ in_index <= `INDEX4_MIN;
+ out_index <= `INDEX4_MIN;
+ end else begin
+ if (mul_in_ready && mul_in_valid)
+ in_index <= in_index + 1;
+
+ if (mul_out_ready && mul_out_valid)
+ out_index <= out_index + 1;
+ end
+
+ always_ff @(posedge clk) begin
+ if (in_ready) begin
+ a_hold <= a;
+ b_hold <= b_transpose;
+ end
+
+ if (mul_out_ready && mul_out_valid)
+ q_hold[out_index] <= mul_q;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_mat_vec.sv b/rtl/legacy_gfx/gfx_mat_vec.sv
new file mode 100644
index 0000000..4be4976
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_mat_vec.sv
@@ -0,0 +1,49 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_mat_vec
+(
+ input logic clk,
+ rst_n,
+
+ input mat4 a,
+ input vec4 x,
+ input logic in_valid,
+ out_ready,
+
+ output vec4 q,
+ output logic in_ready,
+ out_valid
+);
+
+ logic stall_mul, stall_fold, mul_ready, mul_valid, feedback, feedback_last;
+
+ gfx_pipeline_flow #(.STAGES(`FP_MUL_STAGES)) mul
+ (
+ .stall(stall_mul),
+ .out_ready(mul_ready),
+ .out_valid(mul_valid),
+ .*
+ );
+
+ gfx_fold_flow fold
+ (
+ .stall(stall_fold),
+ .in_ready(mul_ready),
+ .in_valid(mul_valid),
+ .*
+ );
+
+ genvar i;
+ generate
+ for (i = 0; i < `VECS_PER_MAT; ++i) begin: dots
+ gfx_dot dot_i
+ (
+ .a(a[i]),
+ .b(x),
+ .q(q[i]),
+ .*
+ );
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_mem.sv b/rtl/legacy_gfx/gfx_mem.sv
new file mode 100644
index 0000000..fbca2fa
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_mem.sv
@@ -0,0 +1,228 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_mem
+(
+ input logic clk,
+ rst_n,
+
+ input logic mem_waitrequest,
+ mem_readdatavalid,
+ input vram_word mem_readdata,
+ output vram_byte_addr mem_address,
+ output logic mem_read,
+ mem_write,
+ output vram_word mem_writedata,
+
+ input vram_addr host_address,
+ input logic host_read,
+ host_write,
+ input vram_word host_writedata,
+ output logic host_waitrequest,
+ host_readdatavalid,
+ output vram_word host_readdata,
+
+ input logic rop_write,
+ input vram_word rop_writedata,
+ input vram_addr rop_address,
+ output logic rop_waitrequest,
+
+ input logic fb_read,
+ input vram_addr fb_address,
+ output logic fb_waitrequest,
+ fb_readdatavalid,
+ output vram_word fb_readdata,
+
+ input logic batch_read,
+ input vram_addr batch_address,
+ output logic batch_waitrequest,
+ batch_readdatavalid,
+ output vram_word batch_readdata,
+
+ input logic fetch_read,
+ input vram_addr fetch_address,
+ output logic fetch_waitrequest,
+ fetch_readdatavalid,
+ output vram_word fetch_readdata
+);
+
+ // Este módulo es inaceptable, hay que reescribirlo
+
+ logic mem_rw, trans_in_stall, trans_out_stall, in_ready, in_valid, skid_in_valid, out_ready,
+ any_readdatavalid, readdatavalid, dispatch_full, dispatch_put, mem_ready;
+
+ vram_word any_readdata, readdata;
+ logic[$clog2(`GFX_MEM_DISPATCH_DEPTH) - 1:0] next_put_ptr, pop_ptr, put_ptr;
+
+ struct packed
+ {
+ logic fb,
+ host,
+ batch,
+ fetch;
+ } dispatch_in, dispatch_out, dispatch_buf[`GFX_MEM_DISPATCH_DEPTH];
+
+ struct packed
+ {
+ vram_addr address;
+ logic write,
+ fb_waitrequest,
+ host_waitrequest,
+ batch_waitrequest,
+ fetch_waitrequest;
+ vram_word writedata;
+ } trans_in, trans_out, trans_in_skid, trans_out_skid;
+
+ assign mem_read = mem_rw && !trans_out_skid.write && !dispatch_full;
+ assign mem_write = mem_rw && trans_out_skid.write;
+ assign mem_address = {trans_out_skid.address, {`GFX_MEM_SUBWORD_BITS{1'b0}}};
+ assign mem_writedata = trans_out_skid.writedata;
+
+ assign fb_readdata = any_readdata;
+ assign host_readdata = any_readdata;
+ assign batch_readdata = any_readdata;
+ assign fetch_readdata = any_readdata;
+
+ assign fb_readdatavalid = any_readdatavalid && dispatch_out.fb;
+ assign host_readdatavalid = any_readdatavalid && dispatch_out.host;
+ assign batch_readdatavalid = any_readdatavalid && dispatch_out.batch;
+ assign fetch_readdatavalid = any_readdatavalid && dispatch_out.fetch;
+
+ assign dispatch_in.fb = !trans_out_skid.fb_waitrequest;
+ assign dispatch_in.host = !trans_out_skid.host_waitrequest;
+ assign dispatch_in.batch = !trans_out_skid.batch_waitrequest;
+ assign dispatch_in.fetch = !trans_out_skid.fetch_waitrequest;
+
+ assign in_valid = rop_write || fb_read || batch_read || fetch_read || host_read || host_write;
+ assign mem_ready = !mem_waitrequest && (!dispatch_full || trans_out_skid.write);
+ assign next_put_ptr = put_ptr + 1;
+ assign dispatch_put = mem_ready && mem_rw && !trans_out_skid.write;
+ assign dispatch_full = next_put_ptr == pop_ptr;
+
+ /* Cerrar timing aquí no es tan fácil, debido al enrutamiento al el que
+ * necesariamente está sujeto este módulo (eg, VRAM y DAC están en
+ * posiciones fijas en los bordes de la FPGA y no pueden reacomodarse).
+ */
+
+ gfx_skid_buf #(.WIDTH($bits(trans_in))) in_skid
+ (
+ .in(trans_in),
+ .out(trans_in_skid),
+ .stall(trans_in_stall),
+ .*
+ );
+
+ gfx_skid_flow in_flow
+ (
+ .stall(trans_in_stall),
+ .out_ready(out_ready),
+ .out_valid(skid_in_valid),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(trans_out)), .DEPTH(`GFX_MEM_TRANS_DEPTH)) out_pipes
+ (
+ .in(trans_in_skid),
+ .out(trans_out),
+ .stall(trans_out_stall),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(trans_out))) out_skid
+ (
+ .in(trans_out),
+ .out(trans_out_skid),
+ .stall(trans_out_stall),
+ .*
+ );
+
+ gfx_pipeline_flow #(.STAGES(`GFX_MEM_TRANS_DEPTH)) out_flow
+ (
+ .stall(trans_out_stall),
+ .in_ready(out_ready),
+ .in_valid(skid_in_valid),
+ .out_ready(mem_ready),
+ .out_valid(mem_rw),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(vram_word)), .DEPTH(`GFX_MEM_RESPONSE_DEPTH)) readdata_pipes
+ (
+ .in(mem_readdata),
+ .out(readdata),
+ .stall(0),
+ .*
+ );
+
+ gfx_pipeline_flow #(.STAGES(`GFX_MEM_RESPONSE_DEPTH)) readdata_flow
+ (
+ .stall(),
+ .in_ready(),
+ .in_valid(mem_readdatavalid),
+ .out_ready(1),
+ .out_valid(readdatavalid),
+ .*
+ );
+
+ always_comb begin
+ fb_waitrequest = 1;
+ rop_waitrequest = 1;
+ host_waitrequest = 1;
+ batch_waitrequest = 1;
+ fetch_waitrequest = 1;
+
+ trans_in.write = 0;
+ trans_in.writedata = {($bits(trans_in.writedata)){1'bx}};
+
+ if (fb_read) begin
+ fb_waitrequest = !in_ready;
+ trans_in.address = fb_address;
+ end else if (batch_read) begin
+ batch_waitrequest = !in_ready;
+ trans_in.address = batch_address;
+ end else if (rop_write) begin
+ rop_waitrequest = !in_ready;
+
+ trans_in.write = 1;
+ trans_in.address = rop_address;
+ trans_in.writedata = rop_writedata;
+ end else if (fetch_read) begin
+ fetch_waitrequest = !in_ready;
+ trans_in.address = fetch_address;
+ end else begin
+ host_waitrequest = !in_ready;
+
+ trans_in.write = host_write;
+ trans_in.address = host_address;
+ trans_in.writedata = host_writedata;
+ end
+
+ trans_in.fb_waitrequest = fb_waitrequest;
+ trans_in.host_waitrequest = host_waitrequest;
+ trans_in.batch_waitrequest = batch_waitrequest;
+ trans_in.fetch_waitrequest = fetch_waitrequest;
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ pop_ptr <= 0;
+ put_ptr <= 0;
+ end else begin
+ if (readdatavalid)
+ pop_ptr <= pop_ptr + 1;
+
+ if (dispatch_put)
+ put_ptr <= next_put_ptr;
+ end
+
+
+ always_ff @(posedge clk) begin
+ any_readdata <= readdata;
+ any_readdatavalid <= readdatavalid;
+
+ dispatch_out <= dispatch_buf[pop_ptr];
+
+ if (dispatch_put)
+ dispatch_buf[put_ptr] <= dispatch_in;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_persp.sv b/rtl/legacy_gfx/gfx_persp.sv
new file mode 100644
index 0000000..243b5eb
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_persp.sv
@@ -0,0 +1,58 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_persp
+(
+ input logic clk,
+ rst_n,
+
+ input raster_xyzw in_vertex_a,
+ in_vertex_b,
+ in_vertex_c,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic out_ready,
+ output logic out_valid,
+ output raster_xyzw out_vertex_a,
+ out_vertex_b,
+ out_vertex_c
+);
+
+ // Perdón Ronald
+ assign in_ready = out_ready;
+ assign out_valid = in_valid;
+ assign out_vertex_a = in_vertex_a;
+ assign out_vertex_b = in_vertex_b;
+ assign out_vertex_c = in_vertex_c;
+
+/*
+ logic stall;
+
+ gfx_pipeline_flow #(.STAGES(`FIXED_DIV_STAGES)) flow
+ (
+ .*
+ );
+
+ gfx_persp_vertex persp_a
+ (
+ .in_vertex(in_vertex_a),
+ .out_vertex(out_vertex_a),
+ .*
+ );
+
+ gfx_persp_vertex persp_b
+ (
+ .in_vertex(in_vertex_b),
+ .out_vertex(out_vertex_b),
+ .*
+ );
+
+ gfx_persp_vertex persp_c
+ (
+ .in_vertex(in_vertex_c),
+ .out_vertex(out_vertex_c),
+ .*
+ );
+*/
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_persp_vertex.sv b/rtl/legacy_gfx/gfx_persp_vertex.sv
new file mode 100644
index 0000000..f7434f0
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_persp_vertex.sv
@@ -0,0 +1,52 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_persp_vertex
+(
+ input logic clk,
+
+ input raster_xyzw in_vertex,
+ input logic stall,
+
+ output raster_xyzw out_vertex
+);
+
+ raster_xyzw skid_vertex;
+
+ gfx_fixed_div x_div
+ (
+ .z(in_vertex.xy.x),
+ .d(in_vertex.zw.w),
+ .q(skid_vertex.xy.x),
+ .*
+ );
+
+ gfx_fixed_div y_div
+ (
+ .z(in_vertex.xy.y),
+ .d(in_vertex.zw.w),
+ .q(skid_vertex.xy.y),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_DIV_STAGES)) z_pipes
+ (
+ .in(in_vertex.zw.z),
+ .out(skid_vertex.zw.z),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_DIV_STAGES)) w_pipes
+ (
+ .in(in_vertex.zw.w),
+ .out(skid_vertex.zw.w),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(out_vertex))) vertex_skid
+ (
+ .in(skid_vertex),
+ .out(out_vertex),
+ .*
+ );
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_pipeline_flow.sv b/rtl/legacy_gfx/gfx_pipeline_flow.sv
new file mode 100644
index 0000000..9b3f22a
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_pipeline_flow.sv
@@ -0,0 +1,40 @@
+module gfx_pipeline_flow
+#(parameter STAGES=0)
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_valid,
+ out_ready,
+
+ output logic in_ready,
+ out_valid,
+ stall
+);
+
+ logic[STAGES - 1:0] valid;
+
+ gfx_skid_flow skid
+ (
+ .in_valid(valid[STAGES - 1]),
+ .*
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ valid[0] <= 0;
+ else if (!stall)
+ valid[0] <= in_valid;
+
+ genvar i;
+ generate
+ for (i = 1; i < STAGES; ++i) begin: pipeline
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ valid[i] <= 0;
+ else if (!stall)
+ valid[i] <= valid[i - 1];
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_pipes.sv b/rtl/legacy_gfx/gfx_pipes.sv
new file mode 100644
index 0000000..09b1d43
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_pipes.sv
@@ -0,0 +1,24 @@
+module gfx_pipes
+#(parameter WIDTH=0, DEPTH=0)
+(
+ input logic clk,
+
+ input logic[WIDTH - 1:0] in,
+ input logic stall,
+
+ output logic[WIDTH - 1:0] out
+);
+
+ logic[WIDTH - 1:0] pipes[DEPTH];
+
+ assign out = pipes[DEPTH - 1];
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ pipes[0] <= in;
+
+ for (integer i = 1; i < DEPTH; ++i)
+ pipes[i] <= pipes[i - 1];
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_raster.sv b/rtl/legacy_gfx/gfx_raster.sv
new file mode 100644
index 0000000..cb03744
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_raster.sv
@@ -0,0 +1,131 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_raster
+(
+ input logic clk,
+ rst_n,
+
+ input raster_xyzw vertex_a,
+ vertex_b,
+ vertex_c,
+ input logic in_valid,
+ output logic in_ready,
+
+ output frag_xy_lanes fragments,
+ output bary_lanes barys,
+ output fixed_tri ws,
+ input logic out_ready,
+ output paint_lanes out_valid
+);
+
+ //TODO: Es exactamente el mismo asunto que offsets
+ assign ws[0] = vertex_a.zw.w;
+ assign ws[1] = vertex_b.zw.w;
+ assign ws[2] = vertex_c.zw.w;
+
+ logic setup_stall, setup_valid;
+
+ gfx_pipeline_flow #(.STAGES(`GFX_SETUP_STAGES)) setup_flow
+ (
+ .stall(setup_stall),
+ .out_ready(coarse_ready),
+ .out_valid(setup_valid),
+ .*
+ );
+
+ fixed_tri coarse_x_offsets, coarse_y_offsets, coarse_test_offsets, edge_refs;
+ raster_xy pos_ref;
+ coarse_dim span_x, span_y;
+ raster_offsets_tri offsets;
+
+ gfx_setup setup
+ (
+ .stall(setup_stall),
+ .vertex_a(vertex_a.xy),
+ .vertex_b(vertex_b.xy),
+ .vertex_c(vertex_c.xy),
+ .*
+ );
+
+ logic coarse_ready, coarse_valid;
+ fixed_tri coarse_corners;
+ raster_xy coarse_pos;
+ raster_offsets_tri fine_offsets;
+
+ gfx_raster_coarse coarse
+ (
+ .in_valid(setup_valid),
+ .in_ready(coarse_ready),
+ .out_ready(fine_ready),
+ .out_valid(coarse_valid),
+ .pos(coarse_pos),
+ .corners(coarse_corners),
+ .*
+ );
+
+ logic fine_ready, fine_stall, fine_valid;
+
+ always_comb
+ for (integer i = 0; i < `GFX_FINE_LANES; ++i)
+ out_valid[i] = fine_valid && skid_paint_ij[i];
+
+ gfx_pipeline_flow #(.STAGES(`GFX_FINE_STAGES)) fine_flow
+ (
+ .stall(fine_stall),
+ .in_ready(fine_ready),
+ .in_valid(coarse_valid),
+ .out_ready(out_ready || !(|skid_paint_ij)),
+ .out_valid(fine_valid),
+ .*
+ );
+
+ frag_xy fragment_ij[`GFX_RASTER_SIZE][`GFX_RASTER_SIZE];
+ fixed_tri barys_ij[`GFX_RASTER_SIZE][`GFX_RASTER_SIZE];
+ logic[`GFX_FINE_LANES - 1:0] paint_ij, skid_paint_ij;
+
+ gfx_skid_buf #(.WIDTH(`GFX_FINE_LANES)) skid_paint
+ (
+ .in(paint_ij),
+ .out(skid_paint_ij),
+ .stall(fine_stall),
+ .*
+ );
+
+ genvar i, j;
+ generate
+ for (i = 0; i < `GFX_RASTER_SIZE; ++i) begin: fine_x
+ for (j = 0; j < `GFX_RASTER_SIZE; ++j) begin: fine_y
+ gfx_raster_fine #(.X(i), .Y(j)) fine
+ (
+ .stall(fine_stall),
+
+ .pos(coarse_pos),
+ .corners(coarse_corners),
+ .offsets(fine_offsets),
+
+ .barys(barys_ij[i][j]),
+ .paint(paint_ij[j * `GFX_RASTER_SIZE + i]),
+ .fragment(fragment_ij[i][j]),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(frag_xy))) skid_fragment
+ (
+ .in(fragment_ij[i][j]),
+ .out(fragments[j * `GFX_RASTER_SIZE + i]),
+ .stall(fine_stall),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(fixed_tri))) skid_barys
+ (
+ .in(barys_ij[i][j]),
+ .out(barys[j * `GFX_RASTER_SIZE + i]),
+ .stall(fine_stall),
+ .*
+ );
+ end
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_raster_coarse.sv b/rtl/legacy_gfx/gfx_raster_coarse.sv
new file mode 100644
index 0000000..8db3fe9
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_raster_coarse.sv
@@ -0,0 +1,135 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_raster_coarse
+(
+ input logic clk,
+ rst_n,
+
+ input raster_xy pos_ref,
+ input coarse_dim span_x,
+ span_y,
+ input raster_offsets_tri offsets,
+ input fixed_tri edge_refs,
+ coarse_x_offsets,
+ coarse_y_offsets,
+ coarse_test_offsets,
+
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic out_ready,
+ output logic out_valid,
+
+ output raster_xy pos,
+ output fixed_tri corners,
+ output raster_offsets_tri fine_offsets
+);
+
+ fixed reference_x;
+ logic end_x, end_y, running, send, send_valid, skid_ready, stall;
+ raster_xy next_pos;
+ fixed_tri edge_fns, edge_tests, edge_vert, edge_vert_next;
+ coarse_dim stride_x, stride_y, width;
+ logic[2:0] edge_signs;
+ raster_offsets_tri hold_offsets;
+
+ fixed_tri hold_coarse_x_offsets, hold_coarse_y_offsets, hold_coarse_test_offsets;
+
+ struct packed
+ {
+ raster_xy pos;
+ fixed_tri corners;
+ raster_offsets_tri fine_offsets;
+ } out, skid_out;
+
+ assign pos = skid_out.pos;
+ assign corners = skid_out.corners;
+ assign fine_offsets = skid_out.fine_offsets;
+
+ assign end_x = stride_x == 0;
+ assign end_y = stride_y == 0;
+
+ assign send = &edge_signs && send_valid;
+ assign in_ready = skid_ready && !running;
+
+ gfx_skid_buf #(.WIDTH($bits(out))) skid_buf
+ (
+ .in(out),
+ .out(skid_out),
+ .*
+ );
+
+ gfx_skid_flow skid_flow
+ (
+ .in_ready(skid_ready),
+ .in_valid(send),
+ .*
+ );
+
+ always_comb
+ for (integer i = 0; i < 3; ++i) begin
+ edge_tests[i] = edge_fns[i] + hold_coarse_test_offsets[i];
+ edge_vert_next[i] = edge_vert[i] + hold_coarse_y_offsets[i];
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ running <= 0;
+ send_valid <= 0;
+ end else if (!stall) begin
+ if (running)
+ running <= !end_x || !end_y;
+ else
+ running <= in_ready && in_valid;
+
+ send_valid <= running;
+ end
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ out.pos <= next_pos;
+ out.corners <= edge_fns;
+ out.fine_offsets <= hold_offsets;
+
+ stride_x <= stride_x - 1;
+ next_pos.x <= next_pos.x + (1 << (`FIXED_FRAC + `GFX_RASTER_BITS));
+
+ if (end_x) begin
+ next_pos.x <= reference_x;
+ next_pos.y <= next_pos.y + (1 << (`FIXED_FRAC + `GFX_RASTER_BITS));
+
+ stride_x <= width;
+ stride_y <= stride_y - 1;
+ end
+
+ if (in_ready && in_valid) begin
+ next_pos <= pos_ref;
+ reference_x <= pos_ref.x;
+
+ width <= span_x;
+ stride_x <= span_x;
+ stride_y <= span_y;
+
+ hold_offsets <= offsets;
+ hold_coarse_x_offsets <= coarse_x_offsets;
+ hold_coarse_y_offsets <= coarse_y_offsets;
+ hold_coarse_test_offsets <= coarse_test_offsets;
+ end
+
+ for (integer i = 0; i < 3; ++i) begin
+ edge_fns[i] <= edge_fns[i] + hold_coarse_x_offsets[i];
+ if (end_x) begin
+ edge_fns[i] <= edge_vert_next[i];
+ edge_vert[i] <= edge_vert_next[i];
+ end
+
+ if (in_ready && in_valid) begin
+ edge_fns[i] <= edge_refs[i];
+ edge_vert[i] <= edge_refs[i];
+ end
+
+ edge_signs[i] <= !edge_tests[i][$bits(fixed) - 1];
+ end
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_raster_fine.sv b/rtl/legacy_gfx/gfx_raster_fine.sv
new file mode 100644
index 0000000..da11b6f
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_raster_fine.sv
@@ -0,0 +1,49 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_raster_fine
+#(parameter X=0, Y=0)
+(
+ input logic clk,
+
+ input raster_xy pos,
+ input fixed_tri corners,
+ input raster_offsets_tri offsets,
+ input logic stall,
+
+ output frag_xy fragment,
+ output fixed_tri barys,
+ output logic paint
+);
+
+ localparam INDEX = Y * `GFX_RASTER_SIZE + X;
+
+ frag_xy fragment_hold;
+ fixed_tri edges, per_edge_offsets;
+ logic[2:0] signs;
+ raster_xy_prec prec;
+ logic[`GFX_RASTER_BITS - 1:0] fine_x, fine_y;
+
+ assign prec = pos;
+ assign fine_x = X;
+ assign fine_y = Y;
+
+ always_comb
+ for (integer i = 0; i < 3; ++i) begin
+ signs[i] = edges[i][$bits(edges[0]) - 1];
+ per_edge_offsets[i] = offsets[i][INDEX];
+ end
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ barys <= edges;
+ paint <= signs == 0;
+
+ fragment <= fragment_hold;
+ fragment_hold.x <= {prec.x.sign, prec.x.coarse, fine_x};
+ fragment_hold.y <= {prec.y.sign, prec.y.coarse, fine_y};
+
+ for (integer i = 0; i < 3; ++i)
+ edges[i] <= corners[i] + per_edge_offsets[i];
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_rop.sv b/rtl/legacy_gfx/gfx_rop.sv
new file mode 100644
index 0000000..3e6ef35
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_rop.sv
@@ -0,0 +1,85 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_rop
+(
+ input logic clk,
+ rst_n,
+
+ input vram_addr frag_base,
+
+ input frag_paint in,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic rop_waitrequest,
+ output logic rop_write,
+ output vram_word rop_writedata,
+ output vram_addr rop_address,
+
+ output linear_coord mask_addr,
+ output logic mask_assert
+);
+
+ enum int unsigned
+ {
+ IDLE,
+ WRITE_LO,
+ WRITE_HI
+ } state;
+
+ logic hi;
+ vram_word color_hi, color_lo;
+ frag_paint hold;
+
+ assign {color_hi, color_lo} = hold.color;
+
+ assign mask_addr = hold.addr;
+ assign rop_address = frag_base + {5'd0, hold.addr, hi};
+ assign rop_writedata = hi ? color_hi : color_lo;
+
+ always_comb begin
+ hi = 1'bx;
+ in_ready = 0;
+ rop_write = 0;
+ mask_assert = 0;
+
+ unique case (state)
+ IDLE:
+ in_ready = 1;
+
+ WRITE_LO: begin
+ hi = 0;
+ rop_write = 1;
+ mask_assert = 1;
+ end
+
+ WRITE_HI: begin
+ hi = 1;
+ in_ready = !rop_waitrequest;
+ rop_write = 1;
+ end
+ endcase
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n)
+ state <= IDLE;
+ else unique case (state)
+ IDLE:
+ if (in_valid)
+ state <= WRITE_LO;
+
+ WRITE_LO:
+ if (!rop_waitrequest)
+ state <= WRITE_HI;
+
+ WRITE_HI:
+ if (!rop_waitrequest)
+ state <= in_valid ? WRITE_LO : IDLE;
+ endcase
+
+ always_ff @(posedge clk)
+ if (in_ready)
+ hold <= in;
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_scanout.sv b/rtl/legacy_gfx/gfx_scanout.sv
new file mode 100644
index 0000000..a43d14c
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_scanout.sv
@@ -0,0 +1,138 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_scanout
+(
+ input logic clk,
+ rst_n,
+
+ input logic enable_clear,
+ input rgb24 clear_color,
+ input vram_addr scan_base,
+
+ input logic mask,
+ output linear_coord mask_addr,
+
+ input logic fb_waitrequest,
+ fb_readdatavalid,
+ input vram_word fb_readdata,
+ output logic fb_read,
+ output vram_addr fb_address,
+
+ input logic scan_ready,
+ output logic scan_valid,
+ scan_endofpacket,
+ scan_startofpacket,
+ output rgb30 scan_data,
+
+ output logic vsync
+);
+
+ logic commit, effective_mask, flush, mask_fifo_out, dac_ready,
+ fb_ready, mask_fifo_ready, fb_fifo_valid, mask_fifo_valid,
+ pop, put, put_mask, next_vsync, start_vsync, wait_vsync;
+
+ vram_word fb_fifo_out;
+ half_coord commit_addr, mask_in_addr, mask_out_addr, mask_hold_addr, max_addr;
+
+ assign mask_addr = mask_in_addr[$bits(mask_in_addr) - 1:$bits(mask_in_addr) - $bits(mask_addr)];
+ assign max_addr[0] = 1;
+ assign max_addr[$bits(max_addr) - 1:1] = `GFX_X_RES * `GFX_Y_RES - 1;
+
+ assign fb_ready = !fb_read || !fb_waitrequest;
+ assign next_vsync = commit && start_vsync;
+ assign start_vsync = mask_hold_addr == max_addr;
+ assign effective_mask = mask || !enable_clear;
+
+ gfx_flush_flow #(.STAGES(`GFX_MASK_STAGES)) mask_flow
+ (
+ .in_valid(!wait_vsync),
+ .out_ready(fb_ready && mask_fifo_ready && !next_vsync),
+ .out_valid(pop),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(mask_in_addr)), .DEPTH(`GFX_MASK_STAGES)) addr_pipes
+ (
+ .in(mask_in_addr),
+ .out(mask_out_addr),
+ .stall(0),
+ .*
+ );
+
+ /* Estas FIFOs deben cumplir dos propiedades para garantizar correctitud:
+ *
+ * 1. mask_fifo.out_ready && mask_fifo.out_valid <=> scan.in_ready && scan.in_valid
+ * 2. fb_fifo.out_ready && fb_fifo.out_valid => scan.in_ready && scan.in_valid
+ *
+ * Nótese la asimetría (<=> vs =>), debido a mask_fifo.out
+ */
+
+ gfx_fifo #(.WIDTH($bits(effective_mask)), .DEPTH(`GFX_SCANOUT_FIFO_DEPTH)) mask_fifo
+ (
+ .in(put_mask),
+ .out(mask_fifo_out),
+ .in_ready(mask_fifo_ready),
+ .in_valid(put),
+ .out_ready(dac_ready && (!mask_fifo_out || fb_fifo_valid)),
+ .out_valid(mask_fifo_valid),
+ .*
+ );
+
+ // 2x para evitar potencial overflow cuando fb_read=1 pero mask_fifo está llena
+ gfx_fifo #(.WIDTH($bits(vram_word)), .DEPTH(2 * `GFX_SCANOUT_FIFO_DEPTH)) fb_fifo
+ (
+ .in(fb_readdata),
+ .out(fb_fifo_out),
+ .in_ready(), // readdatavalid no soporta backpressure
+ .in_valid(fb_readdatavalid),
+ .out_ready(dac_ready && mask_fifo_valid && mask_fifo_out),
+ .out_valid(fb_fifo_valid),
+ .*
+ );
+
+ gfx_scanout_dac dac
+ (
+ .in_ready(dac_ready),
+ .in_valid(mask_fifo_valid && (!mask_fifo_out || fb_fifo_valid)),
+ .*
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ put <= 0;
+ fb_read <= 0;
+ wait_vsync <= 0;
+ commit_addr <= 0;
+ mask_in_addr <= 0;
+ end else begin
+ mask_in_addr <= mask_in_addr + 1;
+
+ if (flush || wait_vsync)
+ mask_in_addr <= commit_addr;
+
+ if (commit) begin
+ wait_vsync <= start_vsync;
+ commit_addr <= start_vsync ? 0 : mask_out_addr;
+ end
+
+ if (fb_ready)
+ fb_read <= mask_fifo_ready && pop && !next_vsync && effective_mask;
+
+ if (mask_fifo_ready)
+ put <= fb_ready && pop && !next_vsync;
+
+ if (vsync)
+ wait_vsync <= 0;
+ end
+
+ always_ff @(posedge clk) begin
+ mask_hold_addr <= mask_out_addr;
+
+ if (fb_ready)
+ fb_address <= scan_base + {5'd0, mask_out_addr};
+
+ if (mask_fifo_ready)
+ put_mask <= effective_mask;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_scanout_dac.sv b/rtl/legacy_gfx/gfx_scanout_dac.sv
new file mode 100644
index 0000000..5c80d2b
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_scanout_dac.sv
@@ -0,0 +1,117 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_scanout_dac
+(
+ input logic clk,
+ rst_n,
+
+ input logic enable_clear,
+ input rgb24 clear_color,
+
+ input logic mask_fifo_out,
+ input vram_word fb_fifo_out,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic scan_ready,
+ output logic scan_valid,
+ scan_endofpacket,
+ scan_startofpacket,
+ output rgb30 scan_data,
+
+ output logic vsync
+);
+
+ logic dac_valid, half, half_mask, stall, endofpacket, startofpacket;
+ rgb24 pixel;
+ rgb32 fifo_pixel;
+ vram_word msw, lsw;
+ half_coord next_addr;
+ linear_coord max_addr, pixel_addr;
+
+ struct packed
+ {
+ logic endofpacket,
+ startofpacket;
+ rgb30 pixel;
+ } skid_in, skid_out;
+
+ assign scan_data = skid_out.pixel;
+ assign scan_endofpacket = skid_out.endofpacket;
+ assign scan_startofpacket = skid_out.startofpacket;
+
+ assign max_addr = `GFX_X_RES * `GFX_Y_RES - 1;
+
+ assign fifo_pixel = {msw, lsw};
+ assign skid_in.endofpacket = endofpacket;
+ assign skid_in.startofpacket = startofpacket;
+
+ function color10 dac_color(color8 in);
+ dac_color = {in, {2{in[0]}}};
+ endfunction
+
+ assign skid_in.pixel.r = dac_color(pixel.r);
+ assign skid_in.pixel.g = dac_color(pixel.g);
+ assign skid_in.pixel.b = dac_color(pixel.b);
+
+ always_comb begin
+ // Descarta fifo_pixel.a
+ pixel.r = fifo_pixel.r;
+ pixel.g = fifo_pixel.g;
+ pixel.b = fifo_pixel.b;
+
+ if (!half_mask)
+ pixel = clear_color;
+ end
+
+ gfx_skid_flow flow
+ (
+ .in_valid(dac_valid),
+ .out_ready(scan_ready),
+ .out_valid(scan_valid),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(skid_in))) skid
+ (
+ .in(skid_in),
+ .out(skid_out),
+ .*
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ half <= 0;
+ vsync <= 0;
+ dac_valid <= 0;
+ pixel_addr <= 0;
+ end else begin
+ vsync <= 0;
+ if (in_ready && dac_valid) begin
+ vsync <= skid_in.endofpacket;
+ dac_valid <= 0;
+ end
+
+ if (in_ready && in_valid) begin
+ half <= !half;
+ dac_valid <= half;
+
+ if (half) begin
+ pixel_addr <= pixel_addr + 1;
+ if (pixel_addr == max_addr)
+ pixel_addr <= 0;
+ end
+ end
+ end
+
+ always_ff @(posedge clk)
+ if (in_ready && in_valid) begin
+ lsw <= msw;
+ msw <= fb_fifo_out;
+ half_mask <= mask_fifo_out;
+
+ endofpacket <= pixel_addr == max_addr;
+ startofpacket <= pixel_addr == 0;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_setup.sv b/rtl/legacy_gfx/gfx_setup.sv
new file mode 100644
index 0000000..1213645
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_setup.sv
@@ -0,0 +1,190 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_setup
+(
+ input logic clk,
+
+ input raster_xy vertex_a,
+ vertex_b,
+ vertex_c,
+ input logic stall,
+
+ output raster_xy pos_ref,
+ output coarse_dim span_x,
+ span_y,
+ output raster_offsets_tri offsets,
+ output fixed_tri edge_refs,
+ coarse_x_offsets,
+ coarse_y_offsets,
+ coarse_test_offsets
+);
+
+ // FIXME FIXME FIXME: Top-left rule
+
+ fixed_tri edge_base, edge_inc_x, edge_inc_y, out_edge_refs, x_offsets, y_offsets, test_offsets;
+
+ raster_xy bounds_ref, hold_vertex_a, hold_vertex_b, hold_vertex_c, ps[3], qs[3], out_pos_ref;
+ coarse_dim bounds_span_x, bounds_span_y, out_span_x, out_span_y;
+ raster_offsets_tri out_offsets;
+
+ struct packed
+ {
+ raster_xy pos_ref;
+ coarse_dim span_x,
+ span_y;
+ raster_offsets_tri offsets;
+ fixed_tri edge_refs,
+ coarse_x_offsets,
+ coarse_y_offsets,
+ coarse_test_offsets;
+ } out, skid_out;
+
+ gfx_skid_buf #(.WIDTH($bits(out))) skid
+ (
+ .in(out),
+ .out(skid_out),
+ .*
+ );
+
+ assign out.span_x = out_span_x;
+ assign out.span_y = out_span_y;
+ assign out.pos_ref = out_pos_ref;
+ assign out.offsets = out_offsets;
+ assign out.edge_refs = out_edge_refs;
+ assign out.coarse_x_offsets = x_offsets;
+ assign out.coarse_y_offsets = y_offsets;
+ assign out.coarse_test_offsets = test_offsets;
+
+ assign span_x = skid_out.span_x;
+ assign span_y = skid_out.span_y;
+ assign pos_ref = skid_out.pos_ref;
+ assign offsets = skid_out.offsets;
+ assign edge_refs = skid_out.edge_refs;
+ assign coarse_x_offsets = skid_out.coarse_x_offsets;
+ assign coarse_y_offsets = skid_out.coarse_y_offsets;
+ assign coarse_test_offsets = skid_out.coarse_test_offsets;
+
+ assign ps[0] = hold_vertex_a;
+ assign qs[0] = hold_vertex_b;
+
+ assign ps[1] = hold_vertex_b;
+ assign qs[1] = hold_vertex_c;
+
+ assign ps[2] = hold_vertex_c;
+ assign qs[2] = hold_vertex_a;
+
+ gfx_pipes #(.WIDTH($bits(vertex_a)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_a_pipes
+ (
+ .in(vertex_a),
+ .out(hold_vertex_a),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(vertex_b)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_b_pipes
+ (
+ .in(vertex_b),
+ .out(hold_vertex_b),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(vertex_c)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_c_pipes
+ (
+ .in(vertex_c),
+ .out(hold_vertex_c),
+ .*
+ );
+
+ gfx_setup_bounds bounds
+ (
+ .span_x(bounds_span_x),
+ .span_y(bounds_span_y),
+ .reference(bounds_ref),
+ .*
+ );
+
+ localparam POST_BOUNDS_DEPTH = `GFX_SETUP_EDGE_STAGES + `GFX_SETUP_OFFSETS_STAGES;
+
+ gfx_pipes #(.WIDTH($bits(pos_ref)), .DEPTH(POST_BOUNDS_DEPTH)) ref_pipes
+ (
+ .in(bounds_ref),
+ .out(out_pos_ref),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(span_x)), .DEPTH(POST_BOUNDS_DEPTH)) span_x_pipes
+ (
+ .in(bounds_span_x),
+ .out(out_span_x),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(span_y)), .DEPTH(POST_BOUNDS_DEPTH)) span_y_pipes
+ (
+ .in(bounds_span_y),
+ .out(out_span_y),
+ .*
+ );
+
+ always_comb
+ for (integer i = 0; i < 3; ++i)
+ // Imaginárselo
+ unique case ({x_offsets[i][$bits(fixed) - 1], y_offsets[i][$bits(fixed) - 1]})
+ 2'b00:
+ test_offsets[i] = out_offsets[i][`GFX_RASTER_OFFSETS - 1];
+
+ 2'b01:
+ test_offsets[i] = out_offsets[i][`GFX_RASTER_SIZE - 1];
+
+ 2'b10:
+ test_offsets[i] = out_offsets[i][`GFX_RASTER_OFFSETS - `GFX_RASTER_SIZE - 1];
+
+ 2'b11:
+ test_offsets[i] = out_offsets[i][0];
+ endcase
+
+ genvar i;
+ generate
+ for (i = 0; i < 3; ++i) begin: edges
+ gfx_setup_edge edge_fn
+ (
+ .p(ps[i]),
+ .q(qs[i]),
+ .base(edge_base[i]),
+ .inc_x(edge_inc_x[i]),
+ .inc_y(edge_inc_y[i]),
+ .origin(bounds_ref),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) base_pipes
+ (
+ .in(edge_base[i]),
+ .out(out_edge_refs[i]),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) coarse_x_pipes
+ (
+ .in(edge_inc_x[i] << `GFX_RASTER_BITS),
+ .out(x_offsets[i]),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) coarse_y_pipes
+ (
+ .in(edge_inc_y[i] << `GFX_RASTER_BITS),
+ .out(y_offsets[i]),
+ .*
+ );
+
+ gfx_setup_offsets edge_offsets
+ (
+ .inc_x(edge_inc_x[i]),
+ .inc_y(edge_inc_y[i]),
+ .offsets(out_offsets[i]),
+ .*
+ );
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_setup_bounds.sv b/rtl/legacy_gfx/gfx_setup_bounds.sv
new file mode 100644
index 0000000..b110438
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_setup_bounds.sv
@@ -0,0 +1,73 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_setup_bounds
+(
+ input logic clk,
+
+ input raster_xy vertex_a,
+ vertex_b,
+ vertex_c,
+ input logic stall,
+
+ output raster_xy reference,
+ output coarse_dim span_x,
+ span_y
+);
+
+ logic x_a_lt_b, x_a_lt_c, x_b_lt_c, y_a_lt_b, y_a_lt_c, y_b_lt_c;
+ raster_xy min, max, hold_a, hold_b, hold_c;
+ coarse_dim ref_x, ref_y;
+ raster_xy_prec min_prec, max_prec, ref_prec;
+
+ assign min_prec = min;
+ assign max_prec = max;
+ assign reference = ref_prec;
+
+ assign ref_prec.x.sub = 0;
+ assign ref_prec.x.fine = 0;
+ assign ref_prec.x.padding = {`GFX_RASTER_PAD_BITS{ref_x[$bits(ref_x) - 1]}};
+ assign {ref_prec.x.sign, ref_prec.x.coarse} = ref_x;
+
+ assign ref_prec.y.sub = 0;
+ assign ref_prec.y.fine = 0;
+ assign ref_prec.y.padding = {`GFX_RASTER_PAD_BITS{ref_y[$bits(ref_y) - 1]}};
+ assign {ref_prec.y.sign, ref_prec.y.coarse} = ref_y;
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ hold_a <= vertex_a;
+ hold_b <= vertex_b;
+ hold_c <= vertex_c;
+
+ x_a_lt_b <= vertex_a.x < vertex_b.x;
+ x_a_lt_c <= vertex_a.x < vertex_c.x;
+ x_b_lt_c <= vertex_b.x < vertex_c.x;
+
+ y_a_lt_b <= vertex_a.y < vertex_b.y;
+ y_a_lt_c <= vertex_a.y < vertex_c.y;
+ y_b_lt_c <= vertex_b.y < vertex_c.y;
+
+ if (x_a_lt_b) begin
+ min.x <= x_a_lt_c ? hold_a.x : hold_c.x;
+ max.x <= x_b_lt_c ? hold_c.x : hold_b.x;
+ end else begin
+ min.x <= x_b_lt_c ? hold_b.x : hold_c.x;
+ max.x <= x_a_lt_c ? hold_c.x : hold_a.x;
+ end
+
+ if (y_a_lt_b) begin
+ min.y <= y_a_lt_c ? hold_a.y : hold_c.y;
+ max.y <= y_b_lt_c ? hold_c.y : hold_b.y;
+ end else begin
+ min.y <= y_b_lt_c ? hold_b.y : hold_c.y;
+ max.y <= y_a_lt_c ? hold_c.y : hold_a.y;
+ end
+
+ ref_x <= {min_prec.x.sign, min_prec.x.coarse};
+ ref_y <= {min_prec.y.sign, min_prec.y.coarse};
+
+ span_x <= {max_prec.x.sign, max_prec.x.coarse} - {min_prec.x.sign, min_prec.x.coarse};
+ span_y <= {max_prec.y.sign, max_prec.y.coarse} - {min_prec.y.sign, min_prec.y.coarse};
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_setup_edge.sv b/rtl/legacy_gfx/gfx_setup_edge.sv
new file mode 100644
index 0000000..5d69a88
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_setup_edge.sv
@@ -0,0 +1,53 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_setup_edge
+(
+ input logic clk,
+
+ input raster_xy p,
+ q,
+ origin,
+ input logic stall,
+
+ output fixed base,
+ inc_x,
+ inc_y
+);
+
+ fixed delta_x, delta_y, hold_inc_x, hold_inc_y;
+
+ gfx_pipes #(.WIDTH($bits(inc_x)), .DEPTH(`FIXED_FMA_DOT_STAGES)) inc_x_pipes
+ (
+ .in(hold_inc_x),
+ .out(inc_x),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(inc_y)), .DEPTH(`FIXED_FMA_DOT_STAGES)) inc_y_pipes
+ (
+ .in(hold_inc_y),
+ .out(inc_y),
+ .*
+ );
+
+ gfx_fixed_fma_dot edge_base
+ (
+ .c(0),
+ .q(base),
+ .a0(delta_x),
+ .b0(hold_inc_x),
+ .a1(delta_y),
+ .b1(hold_inc_y),
+ .*
+ );
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ delta_x <= origin.x - q.x;
+ delta_y <= origin.y - q.y;
+
+ hold_inc_x <= p.y - q.y;
+ hold_inc_y <= q.x - p.x;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_setup_offsets.sv b/rtl/legacy_gfx/gfx_setup_offsets.sv
new file mode 100644
index 0000000..aabd322
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_setup_offsets.sv
@@ -0,0 +1,44 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_setup_offsets
+(
+ input logic clk,
+
+ input fixed inc_x,
+ inc_y,
+ input logic stall,
+
+ output raster_offsets offsets
+);
+
+ fixed x_hold[`GFX_RASTER_SIZE], y_hold[`GFX_RASTER_SIZE],
+ x_multiples[`GFX_RASTER_SIZE], y_multiples[`GFX_RASTER_SIZE];
+
+ // Asume GFX_RASTER_BITS == 2. Los ceros deberían optimizarse trivialmente
+ assign x_multiples[0] = 0;
+ assign y_multiples[0] = 0;
+ assign x_multiples[1] = inc_x;
+ assign y_multiples[1] = inc_y;
+ //assign x_multiples[2] = inc_x << 1;
+ //assign y_multiples[2] = inc_y << 1;
+ //assign x_multiples[3] = (inc_x << 1) + inc_x;
+ //assign y_multiples[3] = (inc_y << 1) + inc_y;
+
+ genvar i;
+ generate
+ for (i = 0; i < `GFX_RASTER_SIZE; ++i) begin: multiples
+ always_ff @(posedge clk)
+ if (!stall) begin
+ x_hold[i] <= x_multiples[i];
+ y_hold[i] <= y_multiples[i];
+ end
+ end
+
+ for (i = 0; i < `GFX_RASTER_OFFSETS; ++i) begin: permutations
+ always_ff @(posedge clk)
+ if (!stall)
+ offsets[i] <= x_hold[i % `GFX_RASTER_SIZE] + y_hold[i / `GFX_RASTER_SIZE];
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_skid_buf.sv b/rtl/legacy_gfx/gfx_skid_buf.sv
new file mode 100644
index 0000000..fae5717
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_skid_buf.sv
@@ -0,0 +1,20 @@
+module gfx_skid_buf
+#(parameter WIDTH=0)
+(
+ input logic clk,
+
+ input logic[WIDTH - 1:0] in,
+ input logic stall,
+
+ output logic[WIDTH - 1:0] out
+);
+
+ logic[WIDTH - 1:0] skid;
+
+ assign out = stall ? skid : in;
+
+ always_ff @(posedge clk)
+ if (!stall)
+ skid <= in;
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_skid_flow.sv b/rtl/legacy_gfx/gfx_skid_flow.sv
new file mode 100644
index 0000000..c5e3b4a
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_skid_flow.sv
@@ -0,0 +1,31 @@
+module gfx_skid_flow
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_valid,
+ out_ready,
+
+ output logic in_ready,
+ out_valid,
+ stall
+);
+
+ logic was_ready, was_valid;
+
+ assign stall = !in_ready;
+ assign in_ready = was_ready || !was_valid;
+ assign out_valid = in_valid || stall;
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ was_ready <= 0;
+ was_valid <= 0;
+ end else begin
+ was_ready <= out_ready;
+
+ if (!stall)
+ was_valid <= in_valid;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp.sv b/rtl/legacy_gfx/gfx_sp.sv
new file mode 100644
index 0000000..ce0f9ff
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp.sv
@@ -0,0 +1,131 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp
+(
+ input logic clk,
+ rst_n,
+
+ input logic batch_waitrequest,
+ batch_readdatavalid,
+ input vram_word batch_readdata,
+ output vram_addr batch_address,
+ output logic batch_read,
+
+ input logic fetch_waitrequest,
+ fetch_readdatavalid,
+ input vram_word fetch_readdata,
+ output vram_addr fetch_address,
+ output logic fetch_read,
+
+ input logic program_start,
+ input cmd_word program_header_base,
+ program_header_size,
+ output logic running,
+
+ input logic send_ready,
+ output logic send_valid,
+ output lane_word send_data,
+ output lane_mask send_mask
+);
+
+ logic batch_start, clear_lanes, insn_valid;
+ cmd_word batch_length;
+ insn_word insn;
+ vram_insn_addr batch_base;
+
+ gfx_sp_fetch fetch
+ (
+ .ready(insn_ready),
+ .valid(insn_valid),
+ .*
+ );
+
+ logic deco_valid, insn_ready;
+ insn_deco deco;
+
+ gfx_sp_decode decode
+ (
+ .*
+ );
+
+ logic deco_ready, combiner_issue_valid, shuffler_issue_valid, stream_issue_valid;
+ vreg_num rd_a_reg, rd_b_reg;
+
+ gfx_sp_issue issue
+ (
+ .*
+ );
+
+ logic recv_valid;
+ lane_word recv_data;
+ lane_mask recv_mask;
+
+ gfx_sp_batch batch
+ (
+ .out_data(recv_data),
+ .out_mask(recv_mask),
+ .out_ready(recv_ready),
+ .out_valid(recv_valid),
+ .*
+ );
+
+ logic shuffler_issue_ready, shuffler_wb_valid;
+ wb_op shuffler_wb;
+
+ gfx_sp_shuffler shuffler
+ (
+ .wb(shuffler_wb),
+ .in_ready(shuffler_issue_ready),
+ .in_valid(shuffler_issue_valid),
+ .wb_ready(shuffler_wb_ready),
+ .wb_valid(shuffler_wb_valid),
+ .*
+ );
+
+ logic combiner_issue_ready, combiner_wb_valid;
+ wb_op combiner_wb;
+
+ gfx_sp_combiner combiner
+ (
+ .wb(combiner_wb),
+ .in_ready(combiner_issue_ready),
+ .in_valid(combiner_issue_valid),
+ .wb_ready(combiner_wb_ready),
+ .wb_valid(combiner_wb_valid),
+ .*
+ );
+
+ logic recv_ready, stream_issue_ready, stream_wb_valid;
+ wb_op stream_wb;
+
+ gfx_sp_stream stream
+ (
+ .wb(stream_wb),
+ .in_ready(stream_issue_ready),
+ .in_valid(stream_issue_valid),
+ .wb_ready(stream_wb_ready),
+ .wb_valid(stream_wb_valid),
+ .*
+ );
+
+ mat4 wr_data;
+ logic combiner_wb_ready, shuffler_wb_ready, stream_wb_ready, wr;
+ vreg_num wr_reg;
+
+ gfx_sp_writeback writeback
+ (
+ .*
+ );
+
+ mat4 a, b;
+
+ gfx_sp_regs regs
+ (
+ .rd_a_data(a),
+ .rd_b_data(b),
+ .*
+ );
+
+ logic batch_end;
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_batch.sv b/rtl/legacy_gfx/gfx_sp_batch.sv
new file mode 100644
index 0000000..3d566ab
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_batch.sv
@@ -0,0 +1,141 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_batch
+(
+ input logic clk,
+ rst_n,
+
+ input logic batch_waitrequest,
+ batch_readdatavalid,
+ input vram_word batch_readdata,
+ output vram_addr batch_address,
+ output logic batch_read,
+
+ input logic batch_start,
+ input vram_insn_addr batch_base,
+ input cmd_word batch_length,
+
+ output lane_mask out_mask,
+ output lane_word out_data,
+ input logic out_ready,
+ output logic out_valid
+);
+
+ localparam TAIL_BITS = $clog2($bits(lane_mask)),
+ BLOCK_BITS = $bits(batch_length) - TAIL_BITS;
+
+ logic fifo_down_safe, lane_read, lane_readdatavalid, lane_waitrequest;
+ lane_word lane_readdata;
+ vram_lane_addr aligned_batch_base, lane_address;
+ logic[TAIL_BITS - 1:0] batch_length_tail, read_tail;
+ logic[BLOCK_BITS - 1:0] batch_length_block, fetch_block_count, read_block_count;
+
+ struct packed
+ {
+ lane_word data;
+ lane_mask mask;
+ } fifo_in, fifo_out;
+
+ enum int unsigned
+ {
+ IDLE,
+ STREAM
+ } state;
+
+ assign out_data = fifo_out.data;
+ assign out_mask = fifo_out.mask;
+
+ assign fifo_in.data = lane_readdata;
+
+ assign {batch_length_block, batch_length_tail} = batch_length;
+ assign aligned_batch_base = batch_base[`GFX_INSN_BITS_IN_LANE +: $bits(vram_lane_addr)];
+
+ gfx_sp_widener #(.WIDTH($bits(vram_lane_addr))) lane_bus
+ (
+ .wide_read(lane_read),
+ .wide_address(lane_address),
+ .wide_readdata(lane_readdata),
+ .wide_waitrequest(lane_waitrequest),
+ .wide_readdatavalid(lane_readdatavalid),
+ .word_read(batch_read),
+ .word_address(batch_address),
+ .word_readdata(batch_readdata),
+ .word_waitrequest(batch_waitrequest),
+ .word_readdatavalid(batch_readdatavalid),
+ .*
+ );
+
+ gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_BATCH_FIFO_DEPTH)) lane_fifo
+ (
+ .in(fifo_in),
+ .out(fifo_out),
+ .in_ready(),
+ .in_valid(lane_readdatavalid),
+ .*
+ );
+
+ gfx_fifo_overflow #(.DEPTH(`GFX_BATCH_FIFO_DEPTH)) overflow
+ (
+ .down(lane_read && !lane_waitrequest),
+ .empty(),
+ .down_safe(fifo_down_safe),
+ .*
+ );
+
+ always_comb begin
+ unique case (read_tail)
+ 2'b00: fifo_in.mask = 4'b0000;
+ 2'b01: fifo_in.mask = 4'b0001;
+ 2'b10: fifo_in.mask = 4'b0011;
+ 2'b11: fifo_in.mask = 4'b0111;
+ endcase
+
+ if (read_block_count != 0)
+ fifo_in.mask = 4'b1111;
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ state <= IDLE;
+ lane_read <= 0;
+ end else unique case (state)
+ IDLE:
+ if (batch_start) begin
+ state <= STREAM;
+ lane_read <= 1;
+ end
+
+ STREAM: begin
+ if (!lane_read || !lane_waitrequest)
+ lane_read <= fifo_down_safe;
+
+ if (lane_read && !lane_waitrequest && fetch_block_count == 0) begin
+ state <= IDLE;
+ lane_read <= 0;
+ end
+ end
+ endcase
+
+ always_ff @(posedge clk) begin
+ unique case (state)
+ IDLE:
+ if (batch_start) begin
+ read_tail <= batch_length_tail;
+ read_block_count <= batch_length_block;
+ fetch_block_count <= batch_length_block;
+
+ lane_address <= aligned_batch_base;
+ end
+
+ STREAM:
+ if (lane_read && !lane_waitrequest) begin
+ lane_address <= lane_address + 1;
+ fetch_block_count <= fetch_block_count - 1;
+ end
+ endcase
+
+ if (lane_readdatavalid)
+ read_block_count <= read_block_count - 1;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_combiner.sv b/rtl/legacy_gfx/gfx_sp_combiner.sv
new file mode 100644
index 0000000..900af00
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_combiner.sv
@@ -0,0 +1,63 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_combiner
+(
+ input logic clk,
+ rst_n,
+
+ input mat4 a,
+ b,
+ input insn_deco deco,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic wb_ready,
+ output logic wb_valid,
+ output wb_op wb
+);
+
+ wb_op wb_out;
+ logic mul_ready, mul_valid, fifo_ready, fifo_valid, skid_ready, out_stall;
+
+ assign in_ready = mul_ready && fifo_ready;
+
+ gfx_mat_mat mul
+ (
+ .q(wb_out.data),
+ .in_ready(mul_ready),
+ .in_valid(in_valid && fifo_ready),
+ .out_ready(skid_ready && fifo_valid),
+ .out_valid(mul_valid),
+ .*
+ );
+
+ gfx_fifo #(.WIDTH($bits(vreg_num)), .DEPTH(`GFX_SP_COMBINER_FIFO_DEPTH)) depth
+ (
+ .in(deco.dst),
+ .out(wb_out.dst),
+ .in_ready(fifo_ready),
+ .in_valid(in_valid && mul_ready),
+ .out_ready(skid_ready && mul_valid),
+ .out_valid(fifo_valid),
+ .*
+ );
+
+ gfx_skid_flow out_flow
+ (
+ .stall(out_stall),
+ .in_ready(skid_ready),
+ .in_valid(fifo_valid && mul_valid),
+ .out_ready(wb_ready),
+ .out_valid(wb_valid),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(wb))) out_skid
+ (
+ .in(wb_out),
+ .out(wb),
+ .stall(out_stall),
+ .*
+ );
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_decode.sv b/rtl/legacy_gfx/gfx_sp_decode.sv
new file mode 100644
index 0000000..d54077d
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_decode.sv
@@ -0,0 +1,116 @@
+`include "gfx/gfx_defs.sv"
+`include "gfx/gfx_sp_isa.sv"
+
+module gfx_sp_decode
+(
+ input logic clk,
+ rst_n,
+
+ input logic clear_lanes,
+ input insn_word insn,
+ input logic insn_valid,
+ output logic insn_ready,
+
+ output insn_deco deco,
+ input logic deco_ready,
+ output logic deco_valid
+);
+
+ logic stall;
+ insn_deco deco_in, deco_out;
+
+ gfx_pipeline_flow #(.STAGES(1)) flow
+ (
+ .in_ready(insn_ready),
+ .in_valid(insn_valid),
+ .out_ready(deco_ready),
+ .out_valid(deco_valid),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(deco)), .DEPTH(1)) pipe
+ (
+ .in(deco_in),
+ .out(deco_out),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(deco))) skid
+ (
+ .in(deco_out),
+ .out(deco),
+ .*
+ );
+
+ always_comb begin
+ deco_in.writeback = 0;
+ deco_in.read_src_a = 0;
+ deco_in.read_src_b = 0;
+
+ deco_in.ex.stream = 0;
+ deco_in.ex.combiner = 0;
+ deco_in.ex.shuffler = 0;
+
+ deco_in.shuffler.is_swizzle = 1'bx;
+ deco_in.shuffler.is_broadcast = 1'bx;
+
+ unique casez (insn)
+ `GFX_INSN_OP_SELECT: begin
+ deco_in.writeback = 1;
+ deco_in.read_src_a = 1;
+ deco_in.read_src_b = 1;
+
+ deco_in.ex.shuffler = 1;
+ deco_in.shuffler.is_swizzle = 0;
+ deco_in.shuffler.is_broadcast = 0;
+ end
+
+ `GFX_INSN_OP_SWIZZL: begin
+ deco_in.writeback = 1;
+ deco_in.read_src_a = 1;
+
+ deco_in.ex.shuffler = 1;
+ deco_in.shuffler.is_swizzle = 1;
+ end
+
+ `GFX_INSN_OP_BROADC: begin
+ deco_in.writeback = 1;
+
+ deco_in.ex.shuffler = 1;
+ deco_in.shuffler.is_swizzle = 0;
+ deco_in.shuffler.is_broadcast = 1;
+ end
+
+ `GFX_INSN_OP_MATVEC: begin
+ deco_in.writeback = 1;
+ deco_in.read_src_a = 1;
+ deco_in.read_src_b = 1;
+ deco_in.ex.combiner = 1;
+ end
+
+ `GFX_INSN_OP_SEND: begin
+ deco_in.read_src_a = 1;
+ deco_in.ex.stream = 1;
+ end
+
+ `GFX_INSN_OP_RECV: begin
+ deco_in.writeback = 1;
+ deco_in.ex.stream = 1;
+ end
+
+ default:
+ // Esto es jugar con fuego, pero lo vale con tal de que cierre el timing
+ deco_in = {($bits(deco_in)){1'bx}};
+ endcase
+
+ deco_in.dst = insn `GFX_INSN_DST;
+ deco_in.src_a = insn `GFX_INSN_SRC_A;
+ deco_in.src_b = insn `GFX_INSN_SRC_B;
+ deco_in.clear_lanes = clear_lanes;
+
+ deco_in.shuffler.imm = insn `GFX_INSN_BROADC_IMM;
+ deco_in.shuffler.select_mask = insn `GFX_INSN_SELECT_MASK;
+ deco_in.shuffler.swizzle_op = insn `GFX_INSN_SWIZZL_LANES;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_fetch.sv b/rtl/legacy_gfx/gfx_sp_fetch.sv
new file mode 100644
index 0000000..23fb20e
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_fetch.sv
@@ -0,0 +1,224 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_fetch
+(
+ input logic clk,
+ rst_n,
+
+ input logic fetch_waitrequest,
+ fetch_readdatavalid,
+ input vram_word fetch_readdata,
+ output vram_addr fetch_address,
+ output logic fetch_read,
+
+ input logic program_start,
+ input cmd_insn_ptr program_header_base,
+ input cmd_word program_header_size,
+ output logic running,
+
+ input logic batch_end,
+ output vram_insn_addr batch_base,
+ output logic batch_start,
+ output cmd_word batch_length,
+
+ input logic ready,
+ output logic valid,
+ output insn_word insn,
+ output logic clear_lanes
+);
+
+ localparam ENTRY_SIZE = 4;
+
+ logic break_loop, entry_end, fifo_down_safe, fifo_empty, fifo_put,
+ header_continue, insn_read, insn_readdatavalid, insn_waitrequest;
+
+ cmd_word header_count;
+ insn_word code_length, code_read_ptr, code_fetch_ptr, insn_readdata, entry_data[ENTRY_SIZE];
+ vram_insn_addr code_base, insn_address, header_ptr;
+ logic[$clog2(ENTRY_SIZE - 1):0] entry_fetch_count, entry_read_count;
+
+ enum int unsigned
+ {
+ IDLE,
+ HEADER,
+ LOOP,
+ FLUSH
+ } state;
+
+ struct packed
+ {
+ insn_word insn;
+ logic clear_lanes;
+ } fifo_in, fifo_out;
+
+ assign insn = fifo_out.insn;
+ assign clear_lanes = fifo_out.clear_lanes;
+
+ assign entry_end = entry_read_count == ENTRY_SIZE - 1;
+ assign header_continue = header_count != 0;
+ assign break_loop = batch_end && (!insn_read || !insn_waitrequest);
+
+ function vram_insn_addr base_from_word(insn_word in);
+ base_from_word = in[`GFX_INSN_SUBWORD_BITS +: $bits(vram_insn_addr)];
+ endfunction
+
+ assign code_base = base_from_word(entry_data[0]);
+ assign batch_base = base_from_word(entry_data[2]);
+ assign code_length = entry_data[1];
+ assign batch_length = entry_data[3];
+
+ gfx_sp_widener #(.WIDTH($bits(vram_insn_addr))) insn_bus
+ (
+ .wide_read(insn_read),
+ .wide_address(insn_address),
+ .wide_readdata(insn_readdata),
+ .wide_waitrequest(insn_waitrequest),
+ .wide_readdatavalid(insn_readdatavalid),
+ .word_read(fetch_read),
+ .word_address(fetch_address),
+ .word_readdata(fetch_readdata),
+ .word_waitrequest(fetch_waitrequest),
+ .word_readdatavalid(fetch_readdatavalid),
+ .*
+ );
+
+ gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_FETCH_FIFO_DEPTH)) insn_fifo
+ (
+ .in(fifo_in),
+ .out(fifo_out),
+ .in_ready(),
+ .in_valid(fifo_put),
+ .out_ready(ready),
+ .out_valid(valid),
+ .*
+ );
+
+ gfx_fifo_overflow #(.DEPTH(`GFX_FETCH_FIFO_DEPTH)) overflow
+ (
+ .down(insn_read && !insn_waitrequest),
+ .empty(fifo_empty),
+ .down_safe(fifo_down_safe),
+ .out_ready(ready),
+ .out_valid(valid),
+ .*
+ );
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ state <= IDLE;
+ running <= 0;
+ fifo_put <= 0;
+ insn_read <= 0;
+ batch_start <= 0;
+ end else unique case (state)
+ IDLE:
+ if (program_start) begin
+ state <= HEADER;
+ running <= 1;
+ insn_read <= 1;
+ end
+
+ HEADER: begin
+ if (insn_read && !insn_waitrequest)
+ insn_read <= entry_fetch_count != ENTRY_SIZE - 1;
+
+ if (insn_readdatavalid && entry_end) begin
+ state <= LOOP;
+ insn_read <= 1;
+ batch_start <= 1;
+ end
+ end
+
+ LOOP: begin
+ fifo_put <= 0;
+ batch_start <= 0;
+
+ if (!insn_read || !insn_waitrequest)
+ insn_read <= fifo_down_safe;
+
+ if (break_loop) begin
+ state <= FLUSH;
+ insn_read <= 0;
+ end
+
+ if (insn_readdatavalid)
+ fifo_put <= 1;
+ end
+
+ FLUSH: begin
+ fifo_put <= 0;
+
+ if (fifo_empty) begin
+ state <= header_continue ? HEADER : IDLE;
+ running <= header_continue;
+ insn_read <= header_continue;
+ end
+ end
+ endcase
+
+ always_ff @(posedge clk)
+ unique case (state)
+ IDLE:
+ if (program_start) begin
+ header_ptr <= program_header_base.addr;
+ header_count <= program_header_size;
+ insn_address <= program_header_base.addr;
+
+ entry_read_count <= 0;
+ entry_fetch_count <= 0;
+ end
+
+ HEADER: begin
+ code_read_ptr <= 0;
+ code_fetch_ptr <= 0;
+
+ if (!insn_waitrequest) begin
+ insn_address <= insn_address + 1;
+ entry_fetch_count <= entry_fetch_count + 1;
+ end
+
+ if (insn_read && !insn_waitrequest)
+ header_ptr <= header_ptr + 1;
+
+ if (insn_readdatavalid) begin
+ entry_read_count <= entry_read_count + 1;
+
+ for (integer i = 0; i < ENTRY_SIZE - 1; ++i)
+ entry_data[i] <= entry_data[i + 1];
+
+ entry_data[ENTRY_SIZE - 1] <= insn_readdata;
+
+ if (entry_end)
+ insn_address <= base_from_word(entry_data[1]);
+ end
+ end
+
+ LOOP: begin
+ if (insn_read && !insn_waitrequest) begin
+ insn_address <= insn_address + 1;
+ code_fetch_ptr <= code_fetch_ptr + 1;
+
+ if (code_fetch_ptr == code_length) begin
+ insn_address <= code_base;
+ code_fetch_ptr <= 0;
+ end
+ end
+
+ if (insn_readdatavalid) begin
+ fifo_in.insn <= insn_readdata;
+ fifo_in.clear_lanes <= code_read_ptr == 0;
+
+ code_read_ptr <= code_read_ptr + 1;
+ if (code_read_ptr == code_length)
+ code_read_ptr <= 0;
+ end
+ end
+
+ FLUSH:
+ if (fifo_empty) begin
+ header_count <= header_count - 1;
+ insn_address <= header_ptr;
+ end
+ endcase
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_file.sv b/rtl/legacy_gfx/gfx_sp_file.sv
new file mode 100644
index 0000000..e98ee18
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_file.sv
@@ -0,0 +1,32 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_file
+(
+ input logic clk,
+
+ input vreg_num rd_reg,
+ output vec4 rd_data,
+
+ input logic wr,
+ input vreg_num wr_reg,
+ input vec4 wr_data
+);
+
+ vec4 file[`GFX_SP_REG_COUNT], hold_rd_data, hold_wr_data;
+ logic hold_wr;
+ vreg_num hold_rd_reg, hold_wr_reg;
+
+ always_ff @(posedge clk) begin
+ hold_wr <= wr;
+ hold_wr_reg <= wr_reg;
+ hold_wr_data <= wr_data;
+
+ rd_data <= hold_rd_data;
+ hold_rd_reg <= rd_reg;
+ hold_rd_data <= file[hold_rd_reg];
+
+ if (hold_wr)
+ file[hold_wr_reg] <= hold_wr_data;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_isa.sv b/rtl/legacy_gfx/gfx_sp_isa.sv
new file mode 100644
index 0000000..1420d95
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_isa.sv
@@ -0,0 +1,23 @@
+`ifndef GFX_SP_ISA_SV
+`define GFX_SP_ISA_SV
+
+`include "gfx/gfx_defs.sv"
+
+`define GFX_INSN_OP_SELECT 32'b00000000_zzzz0zzz_0zzz0zzz_00000001
+`define GFX_INSN_OP_SWIZZL 32'bzzzzzzzz_00000000_0zzz0zzz_00000010
+`define GFX_INSN_OP_BROADC 32'bzzzzzzzz_zzzzzzzz_00000zzz_00000100
+`define GFX_INSN_OP_MATVEC 32'b00000000_00000zzz_0zzz0zzz_00001000
+`define GFX_INSN_OP_SEND 32'b00000000_00000000_0zzz0000_00010000
+`define GFX_INSN_OP_RECV 32'b00000000_00000000_00000zzz_00100000
+
+`define GFX_INSN_DST [10:8]
+`define GFX_INSN_SRC_A [14:12]
+`define GFX_INSN_SRC_B [18:16]
+
+`define GFX_INSN_SELECT_MASK [23:20]
+
+`define GFX_INSN_SWIZZL_LANES [31:24]
+
+`define GFX_INSN_BROADC_IMM [31:16]
+
+`endif
diff --git a/rtl/legacy_gfx/gfx_sp_issue.sv b/rtl/legacy_gfx/gfx_sp_issue.sv
new file mode 100644
index 0000000..6934e39
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_issue.sv
@@ -0,0 +1,111 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_issue
+(
+ input logic clk,
+ rst_n,
+
+ input insn_deco deco,
+ input logic deco_valid,
+ output logic deco_ready,
+
+ output vreg_num rd_a_reg,
+ rd_b_reg,
+
+ input logic stream_issue_ready,
+ output logic stream_issue_valid,
+
+ input logic combiner_issue_ready,
+ output logic combiner_issue_valid,
+
+ input logic shuffler_issue_ready,
+ output logic shuffler_issue_valid,
+
+ input logic wr,
+ input vreg_num wr_reg
+);
+
+ /* Esto podría ser fully pipelined, pero no dio tiempo, y en
+ * todo caso no haría diferencia debido al pésimo ancho de banda.
+ */
+
+ logic data_hazard, rd_a_hazard, rd_b_hazard, wr_hazard, writing_a, writing_b, writing_dst,
+ busy[`GFX_SP_REG_COUNT];
+
+ enum int unsigned
+ {
+ IDLE,
+ HAZARDS,
+ ISSUE,
+ WAIT
+ } state;
+
+ assign rd_a_reg = deco.src_a;
+ assign rd_b_reg = deco.src_b;
+
+ assign wr_hazard = deco.writeback && writing_dst;
+ assign rd_a_hazard = deco.read_src_a && writing_a;
+ assign rd_b_hazard = deco.read_src_a && writing_b;
+ assign data_hazard = rd_a_hazard || rd_b_hazard || wr_hazard;
+
+ assign deco_ready = (stream_issue_ready && stream_issue_valid)
+ || (combiner_issue_ready && combiner_issue_valid)
+ || (shuffler_issue_ready && shuffler_issue_valid);
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ state <= IDLE;
+
+ stream_issue_valid <= 0;
+ combiner_issue_valid <= 0;
+ shuffler_issue_valid <= 0;
+
+ for (integer i = 0; i < `GFX_SP_REG_COUNT; ++i)
+ busy[i] <= 0;
+ end else begin
+ unique case (state)
+ IDLE:
+ if (deco_valid)
+ state <= HAZARDS;
+
+ HAZARDS:
+ if (!data_hazard) begin
+ state <= ISSUE;
+ if (deco.writeback)
+ busy[deco.dst] <= 1;
+ end
+
+ ISSUE: begin
+ state <= WAIT;
+
+ if (deco.ex.stream)
+ stream_issue_valid <= 1;
+
+ if (deco.ex.combiner)
+ combiner_issue_valid <= 1;
+
+ if (deco.ex.shuffler)
+ shuffler_issue_valid <= 1;
+ end
+
+ WAIT:
+ if (deco_ready) begin
+ state <= IDLE;
+
+ stream_issue_valid <= 0;
+ combiner_issue_valid <= 0;
+ shuffler_issue_valid <= 0;
+ end
+ endcase
+
+ if (wr)
+ busy[wr_reg] <= 0;
+ end
+
+ always_ff @(posedge clk) begin
+ writing_a <= busy[deco.src_a];
+ writing_b <= busy[deco.src_b];
+ writing_dst <= busy[deco.dst];
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_regs.sv b/rtl/legacy_gfx/gfx_sp_regs.sv
new file mode 100644
index 0000000..68aaf06
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_regs.sv
@@ -0,0 +1,39 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_regs
+(
+ input logic clk,
+
+ input vreg_num rd_a_reg,
+ output mat4 rd_a_data,
+
+ input vreg_num rd_b_reg,
+ output mat4 rd_b_data,
+
+ input logic wr,
+ input vreg_num wr_reg,
+ input mat4 wr_data
+);
+
+ genvar i;
+ generate
+ for (i = 0; i < `GFX_SP_LANES; ++i) begin: lanes
+ gfx_sp_file a
+ (
+ .rd_reg(rd_a_reg),
+ .rd_data(rd_a_data[i]),
+ .wr_data(wr_data[i]),
+ .*
+ );
+
+ gfx_sp_file b
+ (
+ .rd_reg(rd_b_reg),
+ .rd_data(rd_b_data[i]),
+ .wr_data(wr_data[i]),
+ .*
+ );
+ end
+ endgenerate
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_select.sv b/rtl/legacy_gfx/gfx_sp_select.sv
new file mode 100644
index 0000000..46b23c9
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_select.sv
@@ -0,0 +1,25 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_select
+(
+ input logic clk,
+
+ input vec4 a,
+ b,
+ input shuffler_deco deco,
+ input logic stall,
+
+ output vec4 out
+);
+
+ always_ff @(posedge clk)
+ if (!stall)
+ for (integer i = 0; i < `FLOATS_PER_VEC; ++i)
+ if (deco.is_broadcast)
+ out[i] <= deco.imm;
+ else if (deco.select_mask[i])
+ out[i] <= b[i];
+ else
+ out[i] <= a[i];
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_shuffler.sv b/rtl/legacy_gfx/gfx_sp_shuffler.sv
new file mode 100644
index 0000000..b813d03
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_shuffler.sv
@@ -0,0 +1,70 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_shuffler
+(
+ input logic clk,
+ rst_n,
+
+ input mat4 a,
+ b,
+ input insn_deco deco,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic wb_ready,
+ output logic wb_valid,
+ output wb_op wb
+);
+
+ mat4 select_out, swizzle_out;
+ wb_op wb_out;
+ logic stall, is_swizzle;
+ vreg_num hold_dst;
+
+ gfx_pipeline_flow #(.STAGES(2)) flow
+ (
+ .out_ready(wb_ready),
+ .out_valid(wb_valid),
+ .*
+ );
+
+ gfx_skid_buf #(.WIDTH($bits(wb))) skid
+ (
+ .in(wb_out),
+ .out(wb),
+ .*
+ );
+
+ genvar gen_i;
+ generate
+ for (gen_i = 0; gen_i < `GFX_SP_LANES; ++gen_i) begin: lanes
+ gfx_sp_select select
+ (
+ .a(a[gen_i]),
+ .b(b[gen_i]),
+ .out(select_out[gen_i]),
+ .deco(deco.shuffler),
+ .*
+ );
+
+ gfx_sp_swizzle swizzle
+ (
+ .in(a[gen_i]),
+ .out(swizzle_out[gen_i]),
+ .deco(deco.shuffler),
+ .*
+ );
+ end
+ endgenerate
+
+ always_ff @(posedge clk)
+ if (!stall) begin
+ hold_dst <= deco.dst;
+ is_swizzle <= deco.shuffler.is_swizzle;
+
+ wb_out.dst <= hold_dst;
+ for (integer i = 0; i < `GFX_SP_LANES; ++i)
+ wb_out.data[i] <= is_swizzle ? swizzle_out[i] : select_out[i];
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_stream.sv b/rtl/legacy_gfx/gfx_sp_stream.sv
new file mode 100644
index 0000000..7901028
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_stream.sv
@@ -0,0 +1,66 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_stream
+(
+ input logic clk,
+ rst_n,
+
+ input mat4 a,
+ input insn_deco deco,
+ input logic in_valid,
+ output logic in_ready,
+
+ input logic wb_ready,
+ output logic wb_valid,
+ output wb_op wb,
+
+ input lane_word recv_data,
+ input lane_mask recv_mask,
+ input logic recv_valid,
+ output logic recv_ready,
+
+ input logic send_ready,
+ output logic send_valid,
+ output lane_word send_data,
+ output lane_mask send_mask
+);
+
+ logic active, recv;
+ vreg_num wb_reg;
+
+ assign in_ready = !active;
+ assign recv_ready = active && recv && wb_ready;
+
+ assign wb_valid = active && recv && recv_valid;
+ assign send_valid = active && !recv;
+
+ assign wb.dst = wb_reg;
+ assign wb.data = recv_data;
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ active <= 0;
+ send_mask <= 0;
+ end else begin
+ if (!active)
+ active <= in_valid && (deco.writeback || |send_mask);
+ else if (recv)
+ active <= !wb_ready || !recv_valid;
+ else
+ active <= !send_ready;
+
+ if (recv_ready && recv_valid)
+ send_mask <= send_mask & recv_mask;
+
+ if (in_ready && in_valid && deco.clear_lanes)
+ send_mask <= {($bits(send_mask)){1'b1}};
+ end
+
+ always_ff @(posedge clk)
+ if (!active) begin
+ recv <= deco.writeback;
+ wb_reg <= deco.dst;
+ send_data <= a;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_swizzle.sv b/rtl/legacy_gfx/gfx_sp_swizzle.sv
new file mode 100644
index 0000000..d07d934
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_swizzle.sv
@@ -0,0 +1,19 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_swizzle
+(
+ input logic clk,
+
+ input vec4 in,
+ input shuffler_deco deco,
+ input logic stall,
+
+ output vec4 out
+);
+
+ always_ff @(posedge clk)
+ if (!stall)
+ for (integer i = 0; i < `FLOATS_PER_VEC; ++i)
+ out[i] <= in[deco.swizzle_op[i]];
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_widener.sv b/rtl/legacy_gfx/gfx_sp_widener.sv
new file mode 100644
index 0000000..92101ca
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_widener.sv
@@ -0,0 +1,63 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_widener
+#(parameter WIDTH=0) // Quartus no soporta 'parameter type'
+(
+ input logic clk,
+ rst_n,
+
+ input logic word_waitrequest,
+ word_readdatavalid,
+ input vram_word word_readdata,
+ output vram_addr word_address,
+ output logic word_read,
+
+ input logic wide_read,
+ input logic[WIDTH - 1:0] wide_address,
+ output logic wide_waitrequest,
+ wide_readdatavalid,
+
+ output logic[DATA_WIDTH - 1:0] wide_readdata
+);
+
+ // Este módulo existe para fingir que la DE1-SoC tiene un bus de SDRAM más ancho
+
+ localparam WIDE_BITS = $bits(vram_addr) - WIDTH,
+ WIDE_SIZE = 1 << WIDE_BITS,
+ DATA_WIDTH = $bits(vram_word) << WIDE_BITS;
+
+ vram_word shift_in[WIDE_SIZE];
+ logic[WIDE_BITS - 1:0] address_count, read_count;
+
+ assign word_read = wide_read;
+ assign word_address = {wide_address, address_count};
+ assign wide_waitrequest = word_waitrequest || !(&address_count);
+
+ always_comb
+ for (integer i = 0; i < WIDE_SIZE; ++i)
+ wide_readdata[$bits(vram_word) * i +: $bits(vram_word)] = shift_in[i];
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ read_count <= 0;
+ address_count <= 0;
+ wide_readdatavalid <= 0;
+ end else begin
+ if (word_read && !word_waitrequest)
+ address_count <= address_count + 1;
+
+ if (word_readdatavalid)
+ read_count <= read_count + 1;
+
+ wide_readdatavalid <= word_readdatavalid && &read_count;
+ end
+
+ always_ff @(posedge clk)
+ if (word_readdatavalid) begin
+ for (integer i = 0; i < WIDE_SIZE - 1; ++i)
+ shift_in[i] <= shift_in[i + 1];
+
+ shift_in[WIDE_SIZE - 1] <= word_readdata;
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_sp_writeback.sv b/rtl/legacy_gfx/gfx_sp_writeback.sv
new file mode 100644
index 0000000..1195910
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_sp_writeback.sv
@@ -0,0 +1,65 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_sp_writeback
+(
+ input logic clk,
+ rst_n,
+
+ input wb_op stream_wb,
+ input logic stream_wb_valid,
+ output logic stream_wb_ready,
+
+ input wb_op combiner_wb,
+ input logic combiner_wb_valid,
+ output logic combiner_wb_ready,
+
+ input wb_op shuffler_wb,
+ input logic shuffler_wb_valid,
+ output logic shuffler_wb_ready,
+
+ output logic wr,
+ output vreg_num wr_reg,
+ output mat4 wr_data
+);
+
+ wb_op wb_in, wb_out;
+
+ assign wr_reg = wb_out.dst;
+ assign wr_data = wb_out.data;
+
+ gfx_pipeline_flow #(.STAGES(`GFX_SP_WB_STAGES)) flow
+ (
+ .stall(),
+ .in_ready(),
+ .in_valid(stream_wb_valid || combiner_wb_valid || shuffler_wb_valid),
+ .out_ready(1),
+ .out_valid(wr),
+ .*
+ );
+
+ gfx_pipes #(.WIDTH($bits(wb_out)), .DEPTH(`GFX_SP_WB_STAGES)) pipes
+ (
+ .in(wb_in),
+ .out(wb_out),
+ .stall(0),
+ .*
+ );
+
+ always_comb begin
+ stream_wb_ready = 0;
+ combiner_wb_ready = 0;
+ shuffler_wb_ready = 0;
+
+ if (stream_wb_valid) begin
+ wb_in = stream_wb;
+ stream_wb_ready = 1;
+ end else if (shuffler_wb_valid) begin
+ wb_in = shuffler_wb;
+ shuffler_wb_ready = 1;
+ end else begin
+ wb_in = combiner_wb;
+ combiner_wb_ready = 1;
+ end
+ end
+
+endmodule
diff --git a/rtl/legacy_gfx/gfx_transpose.sv b/rtl/legacy_gfx/gfx_transpose.sv
new file mode 100644
index 0000000..03ecf2d
--- /dev/null
+++ b/rtl/legacy_gfx/gfx_transpose.sv
@@ -0,0 +1,17 @@
+`include "gfx/gfx_defs.sv"
+
+module gfx_transpose
+(
+ input mat4 in,
+ output mat4 out
+);
+
+ integer i, j;
+
+ // Esto no tiene costo en hardware, es un renombramiento de señales
+ always_comb
+ for (i = 0; i < `VECS_PER_MAT; ++i)
+ for (j = 0; j < `FLOATS_PER_VEC; ++j)
+ out[i][j] = in[j][i];
+
+endmodule
diff --git a/rtl/legacy_gfx/mod.mk b/rtl/legacy_gfx/mod.mk
new file mode 100644
index 0000000..4e0f46d
--- /dev/null
+++ b/rtl/legacy_gfx/mod.mk
@@ -0,0 +1,5 @@
+define core
+ $(this)/deps := config
+ $(this)/rtl_dirs := .
+ $(this)/rtl_top := gfx
+endef