From 081a8a3ba8bfe036f31da53f9c041a2caa30fce2 Mon Sep 17 00:00:00 2001 From: Alejandro Soto Date: Sun, 5 May 2024 17:34:22 -0600 Subject: rtl/legacy_gfx: rename gfx -> legacy_gfx --- rtl/gfx/gfx.sv | 208 ---------------------------- rtl/gfx/gfx_assembly.sv | 89 ------------ rtl/gfx/gfx_clear.sv | 70 ---------- rtl/gfx/gfx_cmd.sv | 125 ----------------- rtl/gfx/gfx_defs.sv | 267 ------------------------------------ rtl/gfx/gfx_dot.sv | 49 ------- rtl/gfx/gfx_fifo.sv | 98 ------------- rtl/gfx/gfx_fifo_overflow.sv | 34 ----- rtl/gfx/gfx_fix_floats.sv | 49 ------- rtl/gfx/gfx_fix_vertex.sv | 64 --------- rtl/gfx/gfx_fixed_div.sv | 77 ----------- rtl/gfx/gfx_fixed_fma.sv | 73 ---------- rtl/gfx/gfx_fixed_fma_dot.sv | 49 ------- rtl/gfx/gfx_flush_flow.sv | 45 ------ rtl/gfx/gfx_fold.sv | 54 -------- rtl/gfx/gfx_fold_flow.sv | 61 -------- rtl/gfx/gfx_fp_add.sv | 41 ------ rtl/gfx/gfx_fp_fix.sv | 34 ----- rtl/gfx/gfx_fp_mul.sv | 41 ------ rtl/gfx/gfx_frag.sv | 79 ----------- rtl/gfx/gfx_frag_addr.sv | 59 -------- rtl/gfx/gfx_frag_bary.sv | 78 ----------- rtl/gfx/gfx_frag_shade.sv | 53 ------- rtl/gfx/gfx_funnel.sv | 96 ------------- rtl/gfx/gfx_lerp.sv | 32 ----- rtl/gfx/gfx_mask_sram.sv | 31 ----- rtl/gfx/gfx_masks.sv | 68 --------- rtl/gfx/gfx_mat_mat.sv | 83 ----------- rtl/gfx/gfx_mat_vec.sv | 49 ------- rtl/gfx/gfx_mem.sv | 228 ------------------------------ rtl/gfx/gfx_persp.sv | 58 -------- rtl/gfx/gfx_persp_vertex.sv | 52 ------- rtl/gfx/gfx_pipeline_flow.sv | 40 ------ rtl/gfx/gfx_pipes.sv | 24 ---- rtl/gfx/gfx_raster.sv | 131 ------------------ rtl/gfx/gfx_raster_coarse.sv | 135 ------------------ rtl/gfx/gfx_raster_fine.sv | 49 ------- rtl/gfx/gfx_rop.sv | 85 ------------ rtl/gfx/gfx_scanout.sv | 138 ------------------- rtl/gfx/gfx_scanout_dac.sv | 117 ---------------- rtl/gfx/gfx_setup.sv | 190 ------------------------- rtl/gfx/gfx_setup_bounds.sv | 73 ---------- rtl/gfx/gfx_setup_edge.sv | 53 ------- rtl/gfx/gfx_setup_offsets.sv | 44 ------ rtl/gfx/gfx_skid_buf.sv | 20 --- rtl/gfx/gfx_skid_flow.sv | 31 ----- rtl/gfx/gfx_sp.sv | 131 ------------------ rtl/gfx/gfx_sp_batch.sv | 141 ------------------- rtl/gfx/gfx_sp_combiner.sv | 63 --------- rtl/gfx/gfx_sp_decode.sv | 116 ---------------- rtl/gfx/gfx_sp_fetch.sv | 224 ------------------------------ rtl/gfx/gfx_sp_file.sv | 32 ----- rtl/gfx/gfx_sp_isa.sv | 23 ---- rtl/gfx/gfx_sp_issue.sv | 111 --------------- rtl/gfx/gfx_sp_regs.sv | 39 ------ rtl/gfx/gfx_sp_select.sv | 25 ---- rtl/gfx/gfx_sp_shuffler.sv | 70 ---------- rtl/gfx/gfx_sp_stream.sv | 66 --------- rtl/gfx/gfx_sp_swizzle.sv | 19 --- rtl/gfx/gfx_sp_widener.sv | 63 --------- rtl/gfx/gfx_sp_writeback.sv | 65 --------- rtl/gfx/gfx_transpose.sv | 17 --- rtl/gfx/mod.mk | 5 - rtl/legacy_gfx/gfx.sv | 208 ++++++++++++++++++++++++++++ rtl/legacy_gfx/gfx_assembly.sv | 89 ++++++++++++ rtl/legacy_gfx/gfx_clear.sv | 70 ++++++++++ rtl/legacy_gfx/gfx_cmd.sv | 125 +++++++++++++++++ rtl/legacy_gfx/gfx_defs.sv | 267 ++++++++++++++++++++++++++++++++++++ rtl/legacy_gfx/gfx_dot.sv | 49 +++++++ rtl/legacy_gfx/gfx_fifo.sv | 98 +++++++++++++ rtl/legacy_gfx/gfx_fifo_overflow.sv | 34 +++++ rtl/legacy_gfx/gfx_fix_floats.sv | 49 +++++++ rtl/legacy_gfx/gfx_fix_vertex.sv | 64 +++++++++ rtl/legacy_gfx/gfx_fixed_div.sv | 77 +++++++++++ rtl/legacy_gfx/gfx_fixed_fma.sv | 73 ++++++++++ rtl/legacy_gfx/gfx_fixed_fma_dot.sv | 49 +++++++ rtl/legacy_gfx/gfx_flush_flow.sv | 45 ++++++ rtl/legacy_gfx/gfx_fold.sv | 54 ++++++++ rtl/legacy_gfx/gfx_fold_flow.sv | 61 ++++++++ rtl/legacy_gfx/gfx_fp_add.sv | 41 ++++++ rtl/legacy_gfx/gfx_fp_fix.sv | 34 +++++ rtl/legacy_gfx/gfx_fp_mul.sv | 41 ++++++ rtl/legacy_gfx/gfx_frag.sv | 79 +++++++++++ rtl/legacy_gfx/gfx_frag_addr.sv | 59 ++++++++ rtl/legacy_gfx/gfx_frag_bary.sv | 78 +++++++++++ rtl/legacy_gfx/gfx_frag_shade.sv | 53 +++++++ rtl/legacy_gfx/gfx_funnel.sv | 96 +++++++++++++ rtl/legacy_gfx/gfx_lerp.sv | 32 +++++ rtl/legacy_gfx/gfx_mask_sram.sv | 31 +++++ rtl/legacy_gfx/gfx_masks.sv | 68 +++++++++ rtl/legacy_gfx/gfx_mat_mat.sv | 83 +++++++++++ rtl/legacy_gfx/gfx_mat_vec.sv | 49 +++++++ rtl/legacy_gfx/gfx_mem.sv | 228 ++++++++++++++++++++++++++++++ rtl/legacy_gfx/gfx_persp.sv | 58 ++++++++ rtl/legacy_gfx/gfx_persp_vertex.sv | 52 +++++++ rtl/legacy_gfx/gfx_pipeline_flow.sv | 40 ++++++ rtl/legacy_gfx/gfx_pipes.sv | 24 ++++ rtl/legacy_gfx/gfx_raster.sv | 131 ++++++++++++++++++ rtl/legacy_gfx/gfx_raster_coarse.sv | 135 ++++++++++++++++++ rtl/legacy_gfx/gfx_raster_fine.sv | 49 +++++++ rtl/legacy_gfx/gfx_rop.sv | 85 ++++++++++++ rtl/legacy_gfx/gfx_scanout.sv | 138 +++++++++++++++++++ rtl/legacy_gfx/gfx_scanout_dac.sv | 117 ++++++++++++++++ rtl/legacy_gfx/gfx_setup.sv | 190 +++++++++++++++++++++++++ rtl/legacy_gfx/gfx_setup_bounds.sv | 73 ++++++++++ rtl/legacy_gfx/gfx_setup_edge.sv | 53 +++++++ rtl/legacy_gfx/gfx_setup_offsets.sv | 44 ++++++ rtl/legacy_gfx/gfx_skid_buf.sv | 20 +++ rtl/legacy_gfx/gfx_skid_flow.sv | 31 +++++ rtl/legacy_gfx/gfx_sp.sv | 131 ++++++++++++++++++ rtl/legacy_gfx/gfx_sp_batch.sv | 141 +++++++++++++++++++ rtl/legacy_gfx/gfx_sp_combiner.sv | 63 +++++++++ rtl/legacy_gfx/gfx_sp_decode.sv | 116 ++++++++++++++++ rtl/legacy_gfx/gfx_sp_fetch.sv | 224 ++++++++++++++++++++++++++++++ rtl/legacy_gfx/gfx_sp_file.sv | 32 +++++ rtl/legacy_gfx/gfx_sp_isa.sv | 23 ++++ rtl/legacy_gfx/gfx_sp_issue.sv | 111 +++++++++++++++ rtl/legacy_gfx/gfx_sp_regs.sv | 39 ++++++ rtl/legacy_gfx/gfx_sp_select.sv | 25 ++++ rtl/legacy_gfx/gfx_sp_shuffler.sv | 70 ++++++++++ rtl/legacy_gfx/gfx_sp_stream.sv | 66 +++++++++ rtl/legacy_gfx/gfx_sp_swizzle.sv | 19 +++ rtl/legacy_gfx/gfx_sp_widener.sv | 63 +++++++++ rtl/legacy_gfx/gfx_sp_writeback.sv | 65 +++++++++ rtl/legacy_gfx/gfx_transpose.sv | 17 +++ rtl/legacy_gfx/mod.mk | 5 + rtl/mod.mk | 2 +- rtl/top/mod.mk | 4 +- 128 files changed, 4837 insertions(+), 4837 deletions(-) delete mode 100644 rtl/gfx/gfx.sv delete mode 100644 rtl/gfx/gfx_assembly.sv delete mode 100644 rtl/gfx/gfx_clear.sv delete mode 100644 rtl/gfx/gfx_cmd.sv delete mode 100644 rtl/gfx/gfx_defs.sv delete mode 100644 rtl/gfx/gfx_dot.sv delete mode 100644 rtl/gfx/gfx_fifo.sv delete mode 100644 rtl/gfx/gfx_fifo_overflow.sv delete mode 100644 rtl/gfx/gfx_fix_floats.sv delete mode 100644 rtl/gfx/gfx_fix_vertex.sv delete mode 100644 rtl/gfx/gfx_fixed_div.sv delete mode 100644 rtl/gfx/gfx_fixed_fma.sv delete mode 100644 rtl/gfx/gfx_fixed_fma_dot.sv delete mode 100644 rtl/gfx/gfx_flush_flow.sv delete mode 100644 rtl/gfx/gfx_fold.sv delete mode 100644 rtl/gfx/gfx_fold_flow.sv delete mode 100644 rtl/gfx/gfx_fp_add.sv delete mode 100644 rtl/gfx/gfx_fp_fix.sv delete mode 100644 rtl/gfx/gfx_fp_mul.sv delete mode 100644 rtl/gfx/gfx_frag.sv delete mode 100644 rtl/gfx/gfx_frag_addr.sv delete mode 100644 rtl/gfx/gfx_frag_bary.sv delete mode 100644 rtl/gfx/gfx_frag_shade.sv delete mode 100644 rtl/gfx/gfx_funnel.sv delete mode 100644 rtl/gfx/gfx_lerp.sv delete mode 100644 rtl/gfx/gfx_mask_sram.sv delete mode 100644 rtl/gfx/gfx_masks.sv delete mode 100644 rtl/gfx/gfx_mat_mat.sv delete mode 100644 rtl/gfx/gfx_mat_vec.sv delete mode 100644 rtl/gfx/gfx_mem.sv delete mode 100644 rtl/gfx/gfx_persp.sv delete mode 100644 rtl/gfx/gfx_persp_vertex.sv delete mode 100644 rtl/gfx/gfx_pipeline_flow.sv delete mode 100644 rtl/gfx/gfx_pipes.sv delete mode 100644 rtl/gfx/gfx_raster.sv delete mode 100644 rtl/gfx/gfx_raster_coarse.sv delete mode 100644 rtl/gfx/gfx_raster_fine.sv delete mode 100644 rtl/gfx/gfx_rop.sv delete mode 100644 rtl/gfx/gfx_scanout.sv delete mode 100644 rtl/gfx/gfx_scanout_dac.sv delete mode 100644 rtl/gfx/gfx_setup.sv delete mode 100644 rtl/gfx/gfx_setup_bounds.sv delete mode 100644 rtl/gfx/gfx_setup_edge.sv delete mode 100644 rtl/gfx/gfx_setup_offsets.sv delete mode 100644 rtl/gfx/gfx_skid_buf.sv delete mode 100644 rtl/gfx/gfx_skid_flow.sv delete mode 100644 rtl/gfx/gfx_sp.sv delete mode 100644 rtl/gfx/gfx_sp_batch.sv delete mode 100644 rtl/gfx/gfx_sp_combiner.sv delete mode 100644 rtl/gfx/gfx_sp_decode.sv delete mode 100644 rtl/gfx/gfx_sp_fetch.sv delete mode 100644 rtl/gfx/gfx_sp_file.sv delete mode 100644 rtl/gfx/gfx_sp_isa.sv delete mode 100644 rtl/gfx/gfx_sp_issue.sv delete mode 100644 rtl/gfx/gfx_sp_regs.sv delete mode 100644 rtl/gfx/gfx_sp_select.sv delete mode 100644 rtl/gfx/gfx_sp_shuffler.sv delete mode 100644 rtl/gfx/gfx_sp_stream.sv delete mode 100644 rtl/gfx/gfx_sp_swizzle.sv delete mode 100644 rtl/gfx/gfx_sp_widener.sv delete mode 100644 rtl/gfx/gfx_sp_writeback.sv delete mode 100644 rtl/gfx/gfx_transpose.sv delete mode 100644 rtl/gfx/mod.mk create mode 100644 rtl/legacy_gfx/gfx.sv create mode 100644 rtl/legacy_gfx/gfx_assembly.sv create mode 100644 rtl/legacy_gfx/gfx_clear.sv create mode 100644 rtl/legacy_gfx/gfx_cmd.sv create mode 100644 rtl/legacy_gfx/gfx_defs.sv create mode 100644 rtl/legacy_gfx/gfx_dot.sv create mode 100644 rtl/legacy_gfx/gfx_fifo.sv create mode 100644 rtl/legacy_gfx/gfx_fifo_overflow.sv create mode 100644 rtl/legacy_gfx/gfx_fix_floats.sv create mode 100644 rtl/legacy_gfx/gfx_fix_vertex.sv create mode 100644 rtl/legacy_gfx/gfx_fixed_div.sv create mode 100644 rtl/legacy_gfx/gfx_fixed_fma.sv create mode 100644 rtl/legacy_gfx/gfx_fixed_fma_dot.sv create mode 100644 rtl/legacy_gfx/gfx_flush_flow.sv create mode 100644 rtl/legacy_gfx/gfx_fold.sv create mode 100644 rtl/legacy_gfx/gfx_fold_flow.sv create mode 100644 rtl/legacy_gfx/gfx_fp_add.sv create mode 100644 rtl/legacy_gfx/gfx_fp_fix.sv create mode 100644 rtl/legacy_gfx/gfx_fp_mul.sv create mode 100644 rtl/legacy_gfx/gfx_frag.sv create mode 100644 rtl/legacy_gfx/gfx_frag_addr.sv create mode 100644 rtl/legacy_gfx/gfx_frag_bary.sv create mode 100644 rtl/legacy_gfx/gfx_frag_shade.sv create mode 100644 rtl/legacy_gfx/gfx_funnel.sv create mode 100644 rtl/legacy_gfx/gfx_lerp.sv create mode 100644 rtl/legacy_gfx/gfx_mask_sram.sv create mode 100644 rtl/legacy_gfx/gfx_masks.sv create mode 100644 rtl/legacy_gfx/gfx_mat_mat.sv create mode 100644 rtl/legacy_gfx/gfx_mat_vec.sv create mode 100644 rtl/legacy_gfx/gfx_mem.sv create mode 100644 rtl/legacy_gfx/gfx_persp.sv create mode 100644 rtl/legacy_gfx/gfx_persp_vertex.sv create mode 100644 rtl/legacy_gfx/gfx_pipeline_flow.sv create mode 100644 rtl/legacy_gfx/gfx_pipes.sv create mode 100644 rtl/legacy_gfx/gfx_raster.sv create mode 100644 rtl/legacy_gfx/gfx_raster_coarse.sv create mode 100644 rtl/legacy_gfx/gfx_raster_fine.sv create mode 100644 rtl/legacy_gfx/gfx_rop.sv create mode 100644 rtl/legacy_gfx/gfx_scanout.sv create mode 100644 rtl/legacy_gfx/gfx_scanout_dac.sv create mode 100644 rtl/legacy_gfx/gfx_setup.sv create mode 100644 rtl/legacy_gfx/gfx_setup_bounds.sv create mode 100644 rtl/legacy_gfx/gfx_setup_edge.sv create mode 100644 rtl/legacy_gfx/gfx_setup_offsets.sv create mode 100644 rtl/legacy_gfx/gfx_skid_buf.sv create mode 100644 rtl/legacy_gfx/gfx_skid_flow.sv create mode 100644 rtl/legacy_gfx/gfx_sp.sv create mode 100644 rtl/legacy_gfx/gfx_sp_batch.sv create mode 100644 rtl/legacy_gfx/gfx_sp_combiner.sv create mode 100644 rtl/legacy_gfx/gfx_sp_decode.sv create mode 100644 rtl/legacy_gfx/gfx_sp_fetch.sv create mode 100644 rtl/legacy_gfx/gfx_sp_file.sv create mode 100644 rtl/legacy_gfx/gfx_sp_isa.sv create mode 100644 rtl/legacy_gfx/gfx_sp_issue.sv create mode 100644 rtl/legacy_gfx/gfx_sp_regs.sv create mode 100644 rtl/legacy_gfx/gfx_sp_select.sv create mode 100644 rtl/legacy_gfx/gfx_sp_shuffler.sv create mode 100644 rtl/legacy_gfx/gfx_sp_stream.sv create mode 100644 rtl/legacy_gfx/gfx_sp_swizzle.sv create mode 100644 rtl/legacy_gfx/gfx_sp_widener.sv create mode 100644 rtl/legacy_gfx/gfx_sp_writeback.sv create mode 100644 rtl/legacy_gfx/gfx_transpose.sv create mode 100644 rtl/legacy_gfx/mod.mk (limited to 'rtl') diff --git a/rtl/gfx/gfx.sv b/rtl/gfx/gfx.sv deleted file mode 100644 index 73e5dbf..0000000 --- a/rtl/gfx/gfx.sv +++ /dev/null @@ -1,208 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx -( - input logic clk, - rst_n, - - input cmd_addr cmd_address, - input logic cmd_read, - cmd_write, - input cmd_word cmd_writedata, - output cmd_word cmd_readdata, - - input logic mem_waitrequest, - mem_readdatavalid, - input vram_word mem_readdata, - output vram_byte_addr mem_address, - output logic mem_read, - mem_write, - output vram_word mem_writedata, - - input vram_addr host_address, - input logic host_read, - host_write, - input vram_word host_writedata, - output logic host_waitrequest, - host_readdatavalid, - output vram_word host_readdata, - - input logic scan_ready, - output logic scan_valid, - scan_endofpacket, - scan_startofpacket, - output rgb30 scan_data -); - - logic enable_clear, program_start, start_clear, swap_buffers; - rgb24 clear_color; - cmd_word fb_base_a, fb_base_b, program_header_base, program_header_size; - - gfx_cmd cmd - ( - .* - ); - - logic batch_read, fetch_read, running, send_valid; - lane_word send_data; - lane_mask send_mask; - vram_addr batch_address, fetch_address; - - gfx_sp sp - ( - .* - ); - - logic send_ready, assembly_valid; - fp_xyzw assembly_vertex_a, assembly_vertex_b, assembly_vertex_c; - - gfx_assembly assembly - ( - .out_ready(fix_ready), - .out_valid(assembly_valid), - .out_vertex_a(assembly_vertex_a), - .out_vertex_b(assembly_vertex_b), - .out_vertex_c(assembly_vertex_c), - .* - ); - - logic fix_ready, fix_valid; - raster_xyzw fix_vertex_a, fix_vertex_b, fix_vertex_c; - - gfx_fix_floats fix - ( - .in_ready(fix_ready), - .in_valid(assembly_valid), - .out_ready(persp_ready), - .out_valid(fix_valid), - .in_vertex_a(assembly_vertex_a), - .in_vertex_b(assembly_vertex_b), - .in_vertex_c(assembly_vertex_c), - .out_vertex_a(fix_vertex_a), - .out_vertex_b(fix_vertex_b), - .out_vertex_c(fix_vertex_c), - .* - ); - - logic persp_ready, persp_valid; - raster_xyzw persp_vertex_a, persp_vertex_b, persp_vertex_c; - - gfx_persp perspective - ( - .in_ready(persp_ready), - .in_valid(fix_valid), - .out_ready(raster_ready), - .out_valid(persp_valid), - .in_vertex_a(fix_vertex_a), - .in_vertex_b(fix_vertex_b), - .in_vertex_c(fix_vertex_c), - .out_vertex_a(persp_vertex_a), - .out_vertex_b(persp_vertex_b), - .out_vertex_c(persp_vertex_c), - .* - ); - - logic raster_ready; - fixed_tri raster_ws; - bary_lanes barys; - paint_lanes raster_valid; - frag_xy_lanes fragments; - - gfx_raster raster - ( - .ws(raster_ws), - .in_ready(raster_ready), - .in_valid(persp_valid), - .out_ready(funnel_ready), - .out_valid(raster_valid), - - .vertex_a(persp_vertex_a), - .vertex_b(persp_vertex_b), - .vertex_c(persp_vertex_c), - - .* - ); - - logic frag_mask, scan_mask; - vram_addr frag_base, scan_base; - - gfx_masks masks - ( - .frag_mask_read_addr(), - .* - ); - - logic frag_mask_set, frag_mask_write, frag_wait; - linear_coord frag_mask_write_addr; - - gfx_clear clear - ( - .* - ); - - logic funnel_ready, funnel_valid; - frag_xy frag; - fixed_tri frag_bary, frag_ws; - - gfx_funnel funnel - ( - .in_ready(funnel_ready), - .in_valid(raster_valid), - .out_ready(frag_ready), - .out_valid(funnel_valid), - .* - ); - - logic frag_ready, frag_valid; - frag_paint frag_out; - - gfx_frag frag_ - ( - .out(frag_out), - - .ws(frag_ws), - .bary(frag_bary), - .in_ready(frag_ready), - .in_valid(funnel_valid), - .out_ready(rop_ready), - .out_valid(frag_valid), - .* - ); - - logic rop_mask_assert, rop_ready, rop_write; - vram_addr rop_address; - vram_word rop_writedata; - linear_coord rop_mask_addr; - - gfx_rop rop - ( - .in(frag_out), - .in_ready(rop_ready), - .in_valid(frag_valid), - .mask_addr(rop_mask_addr), - .mask_assert(rop_mask_assert), - .* - ); - - logic batch_readdatavalid, fb_readdatavalid, fetch_readdatavalid, - batch_waitrequest, fb_waitrequest, fetch_waitrequest, rop_waitrequest; - - vram_word batch_readdata, fb_readdata, fetch_readdata; - - gfx_mem mem - ( - .* - ); - - logic fb_read, vsync; - vram_addr fb_address; - linear_coord scan_mask_addr; - - gfx_scanout scanout - ( - .mask(scan_mask), - .mask_addr(scan_mask_addr), - .* - ); - -endmodule diff --git a/rtl/gfx/gfx_assembly.sv b/rtl/gfx/gfx_assembly.sv deleted file mode 100644 index 1a909be..0000000 --- a/rtl/gfx/gfx_assembly.sv +++ /dev/null @@ -1,89 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_assembly -( - input logic clk, - rst_n, - - input lane_word send_data, - input lane_mask send_mask, - input logic send_valid, - output logic send_ready, - - input logic out_ready, - output logic out_valid, - output fp_xyzw out_vertex_a, - out_vertex_b, - out_vertex_c -); - - localparam SETS_PER_TRI = 6; - - mat4 sets[SETS_PER_TRI]; - logic assemble_next, permit_out; - lane_mask current_mask, next_mask; - logic[1:0] out_lane; - logic[2:0] set_num; - - enum int unsigned - { - GET_LANES, - ASSEMBLE - } state; - - assign out_valid = permit_out && current_mask[out_lane]; - assign out_vertex_a = sets[0][out_lane]; - assign out_vertex_b = sets[2][out_lane]; - assign out_vertex_c = sets[4][out_lane]; - - assign next_mask = current_mask & send_mask; - assign assemble_next = !current_mask[out_lane] || out_ready; - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - state <= GET_LANES; - set_num <= 0; - out_lane <= 0; - permit_out <= 0; - send_ready <= 1; - current_mask <= {($bits(current_mask)){1'b1}}; - end else unique case (state) - GET_LANES: - if (send_valid) begin - set_num <= set_num + 1; - current_mask <= next_mask; - - if (set_num == SETS_PER_TRI - 1) begin - state <= ASSEMBLE; - permit_out <= 1; - send_ready <= 0; - end - - if (!(|next_mask)) begin - state <= GET_LANES; - set_num <= 0; - current_mask <= {($bits(current_mask)){1'b1}}; - end - end - - ASSEMBLE: - if (assemble_next) begin - out_lane <= out_lane + 1; - if (&out_lane) begin - state <= GET_LANES; - permit_out <= 0; - send_ready <= 1; - end - end - endcase - - always_ff @(posedge clk) - unique case (state) - GET_LANES: - if (send_valid) - sets[set_num] <= send_data; - - ASSEMBLE: ; - endcase - -endmodule diff --git a/rtl/gfx/gfx_clear.sv b/rtl/gfx/gfx_clear.sv deleted file mode 100644 index ae9a20c..0000000 --- a/rtl/gfx/gfx_clear.sv +++ /dev/null @@ -1,70 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_clear -( - input logic clk, - rst_n, - - input logic start_clear, - - input linear_coord rop_mask_addr, - input logic rop_mask_assert, - output logic frag_wait, - - output logic frag_mask_set, - frag_mask_write, - output linear_coord frag_mask_write_addr -); - - enum int unsigned - { - FRAG, - CLEAR - } state; - - logic end_clear; - - assign end_clear = frag_mask_write_addr == `GFX_LINEAR_RES - 1; - - always_comb - unique case (state) - FRAG: frag_wait = start_clear; - CLEAR: frag_wait = 1; - endcase - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - state <= FRAG; - frag_mask_write <= 0; - end else unique case (state) - FRAG: begin - frag_mask_write <= rop_mask_assert; - - if (start_clear) begin - state <= CLEAR; - frag_mask_write <= 1; - end - end - - CLEAR: - if (end_clear) begin - state <= FRAG; - frag_mask_write <= 0; - end - endcase - - always_ff @(posedge clk) - unique case (state) - FRAG: begin - frag_mask_set <= !start_clear; - frag_mask_write_addr <= rop_mask_addr; - - if (start_clear) - frag_mask_write_addr <= 0; - end - - CLEAR: - frag_mask_write_addr <= frag_mask_write_addr + 1; - endcase - -endmodule diff --git a/rtl/gfx/gfx_cmd.sv b/rtl/gfx/gfx_cmd.sv deleted file mode 100644 index 29b6e21..0000000 --- a/rtl/gfx/gfx_cmd.sv +++ /dev/null @@ -1,125 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_cmd -( - input logic clk, - rst_n, - - input cmd_addr cmd_address, - input logic cmd_read, - cmd_write, - input cmd_word cmd_writedata, - output cmd_word cmd_readdata, - - input logic vsync, - - output logic swap_buffers, - enable_clear, - start_clear, - output rgb24 clear_color, - - output logic program_start, - output cmd_word program_header_base, - program_header_size, - - output cmd_word fb_base_a, - fb_base_b -); - - rgb24 next_clear_color; - logic do_start_clear, next_start_clear, next_enable_clear, next_swap_buffers; - - struct packed - { - logic[4:0] mbz; - logic start_frame, - enable_clear, - swap_buffers; - rgb24 clear_color; - } readdata_scan, writedata_scan; - - assign cmd_readdata = readdata_scan; - - assign writedata_scan = cmd_writedata; - assign readdata_scan.mbz = 0; - assign readdata_scan.clear_color = clear_color; - assign readdata_scan.enable_clear = enable_clear; - assign readdata_scan.swap_buffers = swap_buffers; - - assign do_start_clear = writedata_scan.start_frame && writedata_scan.enable_clear; - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - start_clear <= 0; - enable_clear <= 0; - swap_buffers <= 0; - - next_start_clear <= 0; - next_enable_clear <= 0; - next_swap_buffers <= 0; - - program_start <= 0; - - fb_base_a <= 0; - fb_base_b <= 0; - end else begin - start_clear <= 0; - program_start <= 0; - - if (vsync) begin - start_clear <= next_start_clear; - enable_clear <= next_enable_clear; - swap_buffers <= next_swap_buffers; - - next_start_clear <= 0; - end - - if (cmd_write) - unique case (cmd_address[2:0]) - `GFX_CMD_REG_ID: ; - - `GFX_CMD_REG_SCAN: begin - next_enable_clear <= writedata_scan.enable_clear; - next_swap_buffers <= writedata_scan.swap_buffers; - - if (!next_start_clear) - next_start_clear <= do_start_clear; - end - - `GFX_CMD_REG_HEADER_BASE: ; - - `GFX_CMD_REG_HEADER_SIZE: - program_start <= 1; - - `GFX_CMD_REG_FB_BASE_A: - fb_base_a <= cmd_writedata; - - `GFX_CMD_REG_FB_BASE_B: - fb_base_b <= cmd_writedata; - - default: ; - endcase - end - - always_ff @(posedge clk) begin - if (vsync) - clear_color <= next_clear_color; - - if (cmd_write) - unique case (cmd_address[2:0]) - `GFX_CMD_REG_ID: ; - - `GFX_CMD_REG_SCAN: - next_clear_color <= writedata_scan.clear_color; - - `GFX_CMD_REG_HEADER_BASE: - program_header_base <= cmd_writedata; - - `GFX_CMD_REG_HEADER_SIZE: - program_header_size <= cmd_writedata; - - default: ; - endcase - end - -endmodule diff --git a/rtl/gfx/gfx_defs.sv b/rtl/gfx/gfx_defs.sv deleted file mode 100644 index 1e7a335..0000000 --- a/rtl/gfx/gfx_defs.sv +++ /dev/null @@ -1,267 +0,0 @@ -`ifndef GFX_DEFS_SV -`define GFX_DEFS_SV - -// Esto es arquitectural, no se puede ajustar sin cambiar otras cosas -`define FLOAT_BITS 16 -`define FLOATS_PER_VEC 4 -`define VECS_PER_MAT 4 - -// Target de 200MHz (reloj es 143MHz) con float16, rounding (muy) aproximado -`define FP_ADD_STAGES 10 // ~401 LUTs -`define FP_MUL_STAGES 5 // ~144 LUTs ~1 bloque DSP -`define FP_FIX_STAGES 5 // ~313 LUTs - -typedef logic[`FLOAT_BITS - 1:0] fp; -typedef fp[1:0] vec2; -typedef fp[`FLOATS_PER_VEC - 1:0] vec4; -typedef vec4[`VECS_PER_MAT - 1:0] mat4; - -`define FP_UNIT 16'h3c00 - -typedef logic[1:0] index4; - -`define INDEX4_MIN 2'b00 -`define INDEX4_MAX 2'b11 - -typedef logic[8:0] x_coord; -typedef logic[9:0] y_coord; -typedef logic[9:0] xy_coord; -typedef logic[18:0] linear_coord; -typedef logic[19:0] half_coord; - -`define GFX_X_RES 640 -`define GFX_Y_RES 480 -`define GFX_LINEAR_RES (`GFX_X_RES * `GFX_Y_RES) - -`define COLOR_CHANNELS 4 - -typedef logic[7:0] color8; -typedef logic[9:0] color10; - -typedef struct packed -{ - color8 r, g, b; -} rgb24; - -typedef struct packed -{ - color10 r, g, b; -} rgb30; - -typedef struct packed -{ - color8 a, r, g, b; -} rgb32; - -`define FIXED_FRAC 16 - -`define FIXED_DIV_PIPES 2 -`define FIXED_DIV_STAGES (`FIXED_DIV_PIPES + $bits(fixed) + `FIXED_FRAC) -`define FIXED_FMA_STAGES 5 -`define FIXED_FMA_DOT_STAGES (2 * `FIXED_FMA_STAGES) -`define LERP_STAGES `FIXED_FMA_DOT_STAGES - -typedef logic signed[31:0] fixed; -typedef fixed[2:0] fixed_tri; - -`define EDGE_P0_TO_P1 0 -`define EDGE_P1_TO_P2 1 -`define EDGE_P2_TO_P0 2 - -typedef struct packed -{ - fixed x, y; -} raster_xy; - -typedef struct packed -{ - fixed z, w; -} raster_zw; - -typedef struct packed -{ - raster_xy xy; - raster_zw zw; -} raster_xyzw; - -typedef struct packed -{ - fp x, y, z, w; -} fp_xyzw; - -typedef logic[8:0] coarse_dim; - -`define GFX_MASK_SRAM_STAGES 3 -`define GFX_MASK_STAGES (1 + `GFX_MASK_SRAM_STAGES + 1) - -`define GFX_SCANOUT_FIFO_DEPTH 16 // Ajustable - -`define GFX_SETUP_BOUNDS_STAGES 3 -`define GFX_SETUP_EDGE_STAGES (1 + `FIXED_FMA_DOT_STAGES) -`define GFX_SETUP_OFFSETS_STAGES 2 -`define GFX_SETUP_STAGES (`GFX_SETUP_BOUNDS_STAGES \ - + `GFX_SETUP_EDGE_STAGES \ - + `GFX_SETUP_OFFSETS_STAGES) - -`define GFX_FINE_STAGES 2 - -`define GFX_RASTER_BITS 1 // Solía ser 2, pero la FPGA no da para tanto -`define GFX_RASTER_SUB_BITS 4 -`define GFX_RASTER_PAD_BITS ($bits(fixed) - $bits(coarse_dim) - `FIXED_FRAC - `GFX_RASTER_BITS) -`define GFX_RASTER_SIZE (1 << `GFX_RASTER_BITS) -`define GFX_RASTER_OFFSETS (1 << (2 * `GFX_RASTER_BITS)) - -typedef struct packed -{ - logic[`GFX_RASTER_SUB_BITS - 1:0] num; - logic[`FIXED_FRAC - `GFX_RASTER_SUB_BITS - 1:0] prec; -} raster_sub; - -typedef struct packed -{ - logic sign; - logic[`GFX_RASTER_PAD_BITS - 1:0] padding; - logic[$bits(coarse_dim) - 2:0] coarse; - logic[`GFX_RASTER_BITS - 1:0] fine; - raster_sub sub; -} raster_prec; - -typedef struct packed -{ - raster_prec x, y; -} raster_xy_prec; - -typedef fixed[`GFX_RASTER_OFFSETS - 1:0] raster_offsets; -typedef raster_offsets[2:0] raster_offsets_tri; - -`define GFX_FINE_LANES (`GFX_RASTER_SIZE * `GFX_RASTER_SIZE) - -typedef struct packed -{ - xy_coord x, y; -} frag_xy; - -typedef frag_xy[`GFX_FINE_LANES - 1:0] frag_xy_lanes; -typedef logic[`GFX_FINE_LANES - 1:0] paint_lanes; -typedef fixed[`COLOR_CHANNELS - 1:0] color_lerp_lanes; -typedef fixed_tri[`GFX_FINE_LANES - 1:0] bary_lanes; - -typedef struct packed -{ - linear_coord addr; - rgb32 color; -} frag_paint; - -`define GFX_FRAG_ADDR_STAGES 3 -`define GFX_FRAG_BARY_STAGES (`FIXED_DIV_STAGES + 2 + `FIXED_DIV_STAGES) -`define GFX_FRAG_SHADE_STAGES (`LERP_STAGES + 1) -`define GFX_FRAG_STAGES (`GFX_FRAG_BARY_STAGES + `GFX_FRAG_SHADE_STAGES) - -`define GFX_MEM_WORD_ADDR_BITS 25 -`define GFX_MEM_DATA_BITS 16 // No puedo hacer nada al respecto -`define GFX_MEM_SUBWORD_BITS ($clog2(`GFX_MEM_DATA_BITS / 8)) -`define GFX_MEM_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS + `GFX_MEM_SUBWORD_BITS) -`define GFX_MEM_RESPONSE_DEPTH 2 // Ajustar -`define GFX_MEM_TRANS_DEPTH 4 // NO TOCAR, ver `GFX_MEM_MAX_PENDING_READS -`define GFX_MEM_DISPATCH_DEPTH 8 // Nótese que platform.vram_0.s1.maximumPendingReadTransactions = 7 - -// NO TOCAR. Esto debe coincidir perfectamente con gfx_hw.tcl -`define GFX_VRAM_MAX_PENDING_READS 7 // platform.vram_0.s1.maximumPendingReadTransactions -`define GFX_MEM_MAX_PENDING_READS (1 + `GFX_MEM_TRANS_DEPTH + 1 + `GFX_VRAM_MAX_PENDING_READS) - -typedef logic[`GFX_MEM_DATA_BITS - 1:0] vram_word; -typedef logic[`GFX_MEM_ADDR_BITS - 1:0] vram_byte_addr; -typedef logic[`GFX_MEM_WORD_ADDR_BITS - 1:0] vram_addr; - -`define GFX_INSN_BITS 32 -`define GFX_INSN_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_INSN_BITS / `GFX_MEM_DATA_BITS)) -`define GFX_INSN_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_INSN_ADDR_BITS) -`define GFX_LANE_BITS $bits(mat4) -`define GFX_LANE_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_LANE_BITS / `GFX_MEM_DATA_BITS)) -`define GFX_LANE_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_LANE_ADDR_BITS) -`define GFX_INSN_BITS_IN_LANE (`GFX_LANE_SUBWORD_BITS - `GFX_INSN_SUBWORD_BITS) - -typedef logic[`GFX_INSN_BITS - 1:0] insn_word; -typedef logic[`GFX_LANE_BITS - 1:0] lane_word; -typedef logic[`GFX_INSN_ADDR_BITS - 1:0] vram_insn_addr; -typedef logic[`GFX_LANE_ADDR_BITS - 1:0] vram_lane_addr; - -typedef logic[5:0] cmd_addr; -typedef logic[31:0] cmd_word; - -`define GFX_CMD_REG_ID 3'b000 -`define GFX_CMD_REG_SCAN 3'b001 -`define GFX_CMD_REG_HEADER_BASE 3'b010 -`define GFX_CMD_REG_HEADER_SIZE 3'b011 -`define GFX_CMD_REG_FB_BASE_A 3'b100 -`define GFX_CMD_REG_FB_BASE_B 3'b101 - -typedef struct packed -{ - logic[$bits(cmd_word) - $bits(vram_insn_addr) - `GFX_INSN_SUBWORD_BITS - 1:0] pad; - vram_insn_addr addr; - logic[`GFX_INSN_SUBWORD_BITS - 1:0] sub; -} cmd_insn_ptr; - -typedef struct packed -{ - logic[$bits(cmd_word) - $bits(vram_lane_addr) - `GFX_LANE_SUBWORD_BITS - 1:0] pad; - vram_lane_addr addr; - logic[`GFX_LANE_SUBWORD_BITS - 1:0] sub; -} cmd_lane_ptr; - -`define GFX_FETCH_FIFO_DEPTH 8 - -`define GFX_BATCH_FIFO_DEPTH 4 -`define GFX_SP_LANES `VECS_PER_MAT - -typedef logic[`GFX_SP_LANES - 1:0] lane_mask; -typedef logic[`FLOATS_PER_VEC - 1:0] vec_mask; - -typedef logic[`FLOATS_PER_VEC - 1:0][$clog2(`FLOATS_PER_VEC) - 1:0] swizzle_lanes; - -`define GFX_SP_REG_BITS 3 -`define GFX_SP_REG_COUNT (1 << `GFX_SP_REG_BITS) - -typedef logic[`GFX_SP_REG_BITS - 1:0] vreg_num; - -typedef struct packed -{ - logic stream, - combiner, - shuffler; -} ex_units; - -typedef struct packed -{ - logic is_swizzle, - is_broadcast; - fp imm; - vec_mask select_mask; - swizzle_lanes swizzle_op; -} shuffler_deco; - -typedef struct packed -{ - logic writeback, - read_src_a, - read_src_b, - clear_lanes; - vreg_num dst, - src_a, - src_b; - ex_units ex; - shuffler_deco shuffler; -} insn_deco; // "insn_decode" ya existe en core, esto es confuso pero lo hice por tiempo - -typedef struct packed -{ - vreg_num dst; - mat4 data; -} wb_op; - -`define GFX_SP_COMBINER_FIFO_DEPTH 4 // TODO: optimizar esto - -`define GFX_SP_WB_STAGES 2 - -`endif diff --git a/rtl/gfx/gfx_dot.sv b/rtl/gfx/gfx_dot.sv deleted file mode 100644 index 9c21c23..0000000 --- a/rtl/gfx/gfx_dot.sv +++ /dev/null @@ -1,49 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_dot -( - input logic clk, - - input logic stall_mul, - stall_fold, - feedback, - feedback_last, - - input vec4 a, - b, - - output fp q -); - - vec4 products_fold, products_mul; - - gfx_fold fold - ( - .vec(products_fold), - .stall(stall_fold), - .* - ); - - genvar i; - generate - for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: entries - gfx_fp_mul entry_i - ( - .a(a[i]), - .b(b[i]), - .q(products_mul[i]), - .stall(stall_mul), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(fp))) skid_i - ( - .in(products_mul[i]), - .out(products_fold[i]), - .stall(stall_mul), - .* - ); - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_fifo.sv b/rtl/gfx/gfx_fifo.sv deleted file mode 100644 index e9fa8f5..0000000 --- a/rtl/gfx/gfx_fifo.sv +++ /dev/null @@ -1,98 +0,0 @@ -module gfx_fifo -#(parameter WIDTH=0, DEPTH=0) -( - input logic clk, - rst_n, - - input logic[WIDTH - 1:0] in, - input logic in_valid, - output logic in_ready, - - input logic out_ready, - output logic out_valid, - output logic[WIDTH - 1:0] out -); - - logic do_read, do_write, full_if_eq, in_stall, out_stall, - may_read, may_write, read, read_ok, write; - - logic[WIDTH - 1:0] fifo[DEPTH], read_data, write_data; - logic[$clog2(DEPTH) - 1:0] read_ptr, write_ptr; - - assign do_read = read && may_read; - assign do_write = write && may_write; - - always_comb begin - may_read = full_if_eq; - may_write = !full_if_eq; - - if (read) - may_write = 1; - - if (read_ptr != write_ptr) begin - may_read = 1; - may_write = 1; - end - end - - gfx_skid_flow in_flow - ( - .stall(in_stall), - .out_ready(may_write), - .out_valid(write), - .* - ); - - gfx_skid_flow out_flow - ( - .stall(out_stall), - .in_ready(read), - .in_valid(read_ok), - .* - ); - - gfx_skid_buf #(.WIDTH(WIDTH)) in_skid - ( - .out(write_data), - .stall(in_stall), - .* - ); - - gfx_skid_buf #(.WIDTH(WIDTH)) out_skid - ( - .in(read_data), - .stall(out_stall), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - read_ok <= 0; - read_ptr <= 0; - write_ptr <= 0; - full_if_eq <= 0; - end else begin - if (!out_stall) - read_ok <= read && may_read; - - if (do_read) - read_ptr <= read_ptr + 1; - - if (do_write) - write_ptr <= write_ptr + 1; - - if (do_read && !do_write) - full_if_eq <= 0; - else if (!do_read && do_write) - full_if_eq <= 1; - end - - always_ff @(posedge clk) begin - if (!out_stall) - read_data <= fifo[read_ptr]; - - if (may_write) - fifo[write_ptr] <= write_data; - end - -endmodule diff --git a/rtl/gfx/gfx_fifo_overflow.sv b/rtl/gfx/gfx_fifo_overflow.sv deleted file mode 100644 index c9cb3de..0000000 --- a/rtl/gfx/gfx_fifo_overflow.sv +++ /dev/null @@ -1,34 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fifo_overflow -#(parameter DEPTH=0) -( - input logic clk, - rst_n, - - input logic down, - out_ready, - out_valid, - - output logic empty, - down_safe -); - - logic up; - logic[$clog2(DEPTH + 1) - 1:0] pending; - - assign up = out_ready && out_valid; - assign empty = pending == 0; - assign down_safe = up || pending < DEPTH - 1; - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - pending <= 0; - else begin - if (up && !down) - pending <= pending - 1; - else if (!up && down) - pending <= pending + 1; - end - -endmodule diff --git a/rtl/gfx/gfx_fix_floats.sv b/rtl/gfx/gfx_fix_floats.sv deleted file mode 100644 index fe3ab21..0000000 --- a/rtl/gfx/gfx_fix_floats.sv +++ /dev/null @@ -1,49 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fix_floats -( - input logic clk, - rst_n, - - input vec4 in_vertex_a, - in_vertex_b, - in_vertex_c, - input logic in_valid, - output logic in_ready, - - input logic out_ready, - output logic out_valid, - output raster_xyzw out_vertex_a, - out_vertex_b, - out_vertex_c -); - - logic stall; - - gfx_pipeline_flow #(.STAGES(`FP_FIX_STAGES + 1)) flow - ( - .* - ); - - gfx_fix_vertex fix_a - ( - .in_vertex(in_vertex_a), - .out_vertex(out_vertex_a), - .* - ); - - gfx_fix_vertex fix_b - ( - .in_vertex(in_vertex_b), - .out_vertex(out_vertex_b), - .* - ); - - gfx_fix_vertex fix_c - ( - .in_vertex(in_vertex_c), - .out_vertex(out_vertex_c), - .* - ); - -endmodule diff --git a/rtl/gfx/gfx_fix_vertex.sv b/rtl/gfx/gfx_fix_vertex.sv deleted file mode 100644 index 728f3b6..0000000 --- a/rtl/gfx/gfx_fix_vertex.sv +++ /dev/null @@ -1,64 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fix_vertex -( - input logic clk, - - input vec4 in_vertex, - input logic stall, - - output raster_xyzw out_vertex -); - - fixed x, y; - raster_xyzw fixed_vertex, corrected; - fixed[`FLOATS_PER_VEC - 1:0] fixed_vals, corrected_vals, skid_vals; - - assign out_vertex = skid_vals; - assign fixed_vertex = fixed_vals; - assign corrected_vals = corrected; - - assign x = fixed_vertex.xy.x; - assign y = fixed_vertex.xy.y; - - genvar i; - generate - for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: components - gfx_fp_fix fix - ( - .in(in_vertex[i]), - .out(fixed_vals[i]), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(fixed))) skid - ( - .in(corrected_vals[i]), - .out(skid_vals[i]), - .* - ); - end - endgenerate - - always_ff @(posedge clk) - if (!stall) begin - /* x * `GFX_X_RES / 2 - * = x * 320 - * = x * 64 * 5 - * = (x * 5) << 6 - * = (x * (4 + 1)) << 6 - * = ((x << 2) + x) << 6 - * - * y * `GFX_Y_RES / 2 - * = y * 240 - * = y * 16 * 15 - * = (y * 15) << 4 - * = (y * (16 - 1)) << 4 - * = ((y << 4) - y) << 4 - */ - corrected.zw <= fixed_vertex.zw; - corrected.xy.x <= ((x << 2) + x) << 6; - corrected.xy.y <= ((y << 4) - y) << 4; - end - -endmodule diff --git a/rtl/gfx/gfx_fixed_div.sv b/rtl/gfx/gfx_fixed_div.sv deleted file mode 100644 index e562072..0000000 --- a/rtl/gfx/gfx_fixed_div.sv +++ /dev/null @@ -1,77 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fixed_div -( - input logic clk, - - input fixed z, - d, - input logic stall, - - output fixed q -); - - localparam DIV_BITS = `FIXED_FRAC + $bits(fixed); - - fixed d_hold, z_hold; - logic signed[DIV_BITS - 1:0] z_int, q_int; - - assign q = q_int[$bits(q) - 1:0]; - assign z_int = {z_hold, {`FIXED_FRAC{1'b0}}}; - -`ifndef VERILATOR - lpm_divide div - ( - .aclr(0), - .clock(clk), - .clken(!stall), - .numer(z_int), - .denom(d_hold), - .remain(), - .quotient(q_int) - ); - - defparam - div.lpm_widthn = DIV_BITS, - div.lpm_widthd = $bits(fixed), - div.lpm_nrepresentation = "SIGNED", - div.lpm_drepresentation = "SIGNED", - div.lpm_pipeline = `FIXED_DIV_STAGES - `FIXED_DIV_PIPES, - div.maximize_speed = 6; - - gfx_pipes #(.WIDTH($bits(z)), .DEPTH(`FIXED_DIV_PIPES)) z_pipes - ( - .in(z), - .out(z_hold), - .* - ); - - gfx_pipes #(.WIDTH($bits(d)), .DEPTH(`FIXED_DIV_PIPES)) d_pipes - ( - .in(d), - .out(d_hold), - .* - ); -`else - logic signed[DIV_BITS - 1:0] d_int_hold, z_int_hold; - - assign q_int = z_int_hold / d_int_hold; - assign z_hold = z; - assign d_int_hold = {{`FIXED_FRAC{d_hold[$bits(d_hold) - 1]}}, d_hold}; - - gfx_pipes #(.WIDTH($bits(z_int)), .DEPTH(`FIXED_DIV_STAGES)) z_int_pipes - ( - .in(z_int), - .out(z_int_hold), - .* - ); - - gfx_pipes #(.WIDTH($bits(d)), .DEPTH(`FIXED_DIV_STAGES)) d_pipes - ( - .in(d), - .out(d_hold), - .* - ); -`endif - -endmodule diff --git a/rtl/gfx/gfx_fixed_fma.sv b/rtl/gfx/gfx_fixed_fma.sv deleted file mode 100644 index ec26477..0000000 --- a/rtl/gfx/gfx_fixed_fma.sv +++ /dev/null @@ -1,73 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fixed_fma -( - input logic clk, - - input fixed a, - b, - c, - input logic stall, - - output fixed q -); - -`ifndef VERILATOR - logic[2 * $bits(fixed) - `FIXED_FRAC - 1:0] q_ext; - assign q = q_ext[$bits(fixed) - 1:0]; - - lpm_mult mult - ( - .aclr(0), - .clock(clk), - .clken(!stall), - - .sum({c, {`FIXED_FRAC{1'b0}}}), - .dataa(a), - .datab(b), - .result(q_ext) - ); - - defparam - mult.lpm_widtha = $bits(fixed), - mult.lpm_widthb = $bits(fixed), - mult.lpm_widths = $bits(fixed) + `FIXED_FRAC, - /* Esto es crucial. No está documentado en ningún lado (aparte de un - * comentario en r/fpga). Si lpm_widthp < lpm_widtha + lpm_widthb, - * entonces result contiene los lpm_widthp bits más significativos - * del producto, no los menos significativos como tendría sentido. - */ - mult.lpm_widthp = 2 * $bits(fixed) - `FIXED_FRAC, - mult.lpm_representation = "SIGNED", - mult.lpm_pipeline = `FIXED_FMA_STAGES; -`else - logic[$bits(fixed) + `FIXED_FRAC - 1:0] q_ext; - - fixed a_hold, b_hold, c_hold; - - assign q = q_ext[$bits(fixed) + `FIXED_FRAC - 1:`FIXED_FRAC] + c_hold; - assign q_ext = a_hold * b_hold; - - gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FIXED_FMA_STAGES)) a_pipes - ( - .in(a), - .out(a_hold), - .* - ); - - gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FIXED_FMA_STAGES)) b_pipes - ( - .in(b), - .out(b_hold), - .* - ); - - gfx_pipes #(.WIDTH($bits(c)), .DEPTH(`FIXED_FMA_STAGES)) c_pipes - ( - .in(c), - .out(c_hold), - .* - ); -`endif - -endmodule diff --git a/rtl/gfx/gfx_fixed_fma_dot.sv b/rtl/gfx/gfx_fixed_fma_dot.sv deleted file mode 100644 index c19b49e..0000000 --- a/rtl/gfx/gfx_fixed_fma_dot.sv +++ /dev/null @@ -1,49 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fixed_fma_dot -( - input logic clk, - - input fixed a0, - b0, - a1, - b1, - c, - input logic stall, - - output fixed q -); - - fixed q0, a1_hold, b1_hold; - - gfx_fixed_fma fma0 - ( - .a(a0), - .b(b0), - .q(q0), - .* - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_FMA_STAGES)) a_pipes - ( - .in(a1), - .out(a1_hold), - .* - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_FMA_STAGES)) b_pipes - ( - .in(b1), - .out(b1_hold), - .* - ); - - gfx_fixed_fma fma1 - ( - .a(a1_hold), - .b(b1_hold), - .c(q0), - .* - ); - -endmodule diff --git a/rtl/gfx/gfx_flush_flow.sv b/rtl/gfx/gfx_flush_flow.sv deleted file mode 100644 index a0e43d7..0000000 --- a/rtl/gfx/gfx_flush_flow.sv +++ /dev/null @@ -1,45 +0,0 @@ -module gfx_flush_flow -#(parameter STAGES=0) -( - input logic clk, - rst_n, - - input logic in_valid, - out_ready, - - output logic out_valid, - commit, - flush -); - - logic was_valid, was_ready; - logic[STAGES - 1:0] valid; - - assign flush = was_valid && !was_ready; - assign commit = was_valid && was_ready; - assign out_valid = valid[STAGES - 1] && !flush; - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - was_ready <= 0; - was_valid <= 0; - - for (integer i = 0; i < STAGES; ++i) - valid[i] <= 0; - end else begin - was_ready <= out_ready; - was_valid <= out_valid; - - if (!flush) - valid[0] <= in_valid; - else - valid[0] <= 0; - - for (integer i = 1; i < STAGES; ++i) - if (!flush) - valid[i] <= valid[i - 1]; - else - valid[i] <= 0; - end - -endmodule diff --git a/rtl/gfx/gfx_fold.sv b/rtl/gfx/gfx_fold.sv deleted file mode 100644 index 616d868..0000000 --- a/rtl/gfx/gfx_fold.sv +++ /dev/null @@ -1,54 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fold -( - input logic clk, - - input vec4 vec, - input logic stall, - feedback, - feedback_last, - - output fp q -); - - fp q_add; - vec2 feedback_vec, queued[`FP_ADD_STAGES]; - - assign feedback_vec = queued[`FP_ADD_STAGES - 1]; - - gfx_fp_add add - ( - .a(feedback ? q_add : vec[0]), - .b(feedback ? feedback_vec[feedback_last] : vec[1]), - .q(q_add), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(q))) skid - ( - .in(q_add), - .out(q), - .* - ); - - always_ff @(posedge clk) - if (!stall) begin - if (feedback) - queued[0] <= feedback_vec; - else begin - queued[0][0] <= vec[2]; - queued[0][1] <= vec[3]; - end - end - - genvar i; - generate - for (i = 1; i < `FP_ADD_STAGES; ++i) begin: stages - always_ff @(posedge clk) - if (!stall) - queued[i] <= queued[i - 1]; - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_fold_flow.sv b/rtl/gfx/gfx_fold_flow.sv deleted file mode 100644 index 8f23b8f..0000000 --- a/rtl/gfx/gfx_fold_flow.sv +++ /dev/null @@ -1,61 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fold_flow -( - input logic clk, - rst_n, - - input logic in_valid, - out_ready, - - output logic in_ready, - out_valid, - stall, - feedback, - feedback_last -); - - logic skid_ready; - index4 rounds[`FP_ADD_STAGES], last_round; - - assign in_ready = skid_ready && !feedback; - - assign feedback = last_round[1] ^ last_round[0]; - assign feedback_last = last_round[1]; - - assign last_round = rounds[`FP_ADD_STAGES - 1]; - - gfx_skid_flow skid - ( - .in_valid(last_round == `INDEX4_MAX), - .in_ready(skid_ready), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - rounds[0] <= `INDEX4_MIN; - else if (!stall) - unique case (last_round) - 2'b01: - rounds[0] <= 2'b10; - - 2'b10: - rounds[0] <= 2'b11; - - 2'b00, 2'b11: - rounds[0] <= {1'b0, in_valid}; - endcase - - genvar i; - generate - for (i = 1; i < `FP_ADD_STAGES; ++i) begin: pipeline - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - rounds[i] <= `INDEX4_MIN; - else if (!stall) - rounds[i] <= rounds[i - 1]; - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_fp_add.sv b/rtl/gfx/gfx_fp_add.sv deleted file mode 100644 index 0b3058a..0000000 --- a/rtl/gfx/gfx_fp_add.sv +++ /dev/null @@ -1,41 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fp_add -( - input logic clk, - - input fp a, - b, - input logic stall, - - output fp q -); - -`ifndef VERILATOR - ip_fp_add ip_add - ( - .en(!stall), - .areset(0), - .* - ); -`else - fp a_pop, b_pop; - - assign q = $c("taller::fp_add(", a_pop, ", ", b_pop, ")"); - - gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FP_ADD_STAGES)) a_pipes - ( - .in(a), - .out(a_pop), - .* - ); - - gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FP_ADD_STAGES)) b_pipes - ( - .in(b), - .out(b_pop), - .* - ); -`endif - -endmodule diff --git a/rtl/gfx/gfx_fp_fix.sv b/rtl/gfx/gfx_fp_fix.sv deleted file mode 100644 index b38e0e3..0000000 --- a/rtl/gfx/gfx_fp_fix.sv +++ /dev/null @@ -1,34 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fp_fix -( - input logic clk, - - input fp in, - input logic stall, - - output fixed out -); - -`ifndef VERILATOR - ip_fp_fix ip_fix - ( - .a(in), - .q(out), - .en(!stall), - .areset(0), - .* - ); -`else - fp pop; - - assign out = $c("taller::fp_fix(", pop, ")"); - - gfx_pipes #(.WIDTH($bits(in)), .DEPTH(`FP_FIX_STAGES)) pipes - ( - .out(pop), - .* - ); -`endif - -endmodule diff --git a/rtl/gfx/gfx_fp_mul.sv b/rtl/gfx/gfx_fp_mul.sv deleted file mode 100644 index 7ff3c02..0000000 --- a/rtl/gfx/gfx_fp_mul.sv +++ /dev/null @@ -1,41 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_fp_mul -( - input logic clk, - - input fp a, - b, - input logic stall, - - output fp q -); - -`ifndef VERILATOR - ip_fp_mul ip_mul - ( - .en(!stall), - .areset(0), - .* - ); -`else - fp a_pop, b_pop; - - assign q = $c("taller::fp_mul(", a_pop, ", ", b_pop, ")"); - - gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FP_MUL_STAGES)) a_pipes - ( - .in(a), - .out(a_pop), - .* - ); - - gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FP_MUL_STAGES)) b_pipes - ( - .in(b), - .out(b_pop), - .* - ); -`endif - -endmodule diff --git a/rtl/gfx/gfx_frag.sv b/rtl/gfx/gfx_frag.sv deleted file mode 100644 index d61de72..0000000 --- a/rtl/gfx/gfx_frag.sv +++ /dev/null @@ -1,79 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_frag -( - input logic clk, - rst_n, - - input frag_xy frag, - input fixed_tri bary, - ws, - input logic in_valid, - output logic in_ready, - - input logic out_ready, - output logic out_valid, - output frag_paint out -); - - logic stall; - frag_paint frag_out; - - gfx_pipeline_flow #(.STAGES(`GFX_FRAG_STAGES)) addr_flow - ( - .* - ); - - linear_coord linear; - - gfx_frag_addr addr - ( - .* - ); - - localparam ADDR_WAIT_STAGES = `GFX_FRAG_STAGES - `GFX_FRAG_ADDR_STAGES; - - gfx_pipes #(.WIDTH($bits(linear_coord)), .DEPTH(ADDR_WAIT_STAGES)) addr_pipes - ( - .in(linear), - .out(frag_out.addr), - .* - ); - - fixed b1, b2; - - gfx_frag_bary frag_bary - ( - .* - ); - - color_lerp_lanes argb0, argb1_argb0, argb2_argb0; - - assign argb0[3] = 32'd0 << 8; - assign argb0[2] = 32'd255 << 8; - assign argb0[1] = 32'd0 << 8; - assign argb0[0] = 32'd0 << 8; - - assign argb1_argb0[3] = 32'd0 << 8; - assign argb1_argb0[2] = (-32'sd255) << 8; - assign argb1_argb0[1] = 32'd255 << 8; - assign argb1_argb0[0] = 32'd0 << 8; - - assign argb2_argb0[3] = 32'd0 << 8; - assign argb2_argb0[2] = (-32'sd255) << 8; - assign argb2_argb0[1] = 32'd0 << 8; - assign argb2_argb0[0] = 32'd255 << 8; - - gfx_frag_shade shade - ( - .color(frag_out.color), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(frag_out))) skid - ( - .in(frag_out), - .* - ); - -endmodule diff --git a/rtl/gfx/gfx_frag_addr.sv b/rtl/gfx/gfx_frag_addr.sv deleted file mode 100644 index 23bd315..0000000 --- a/rtl/gfx/gfx_frag_addr.sv +++ /dev/null @@ -1,59 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_frag_addr -( - input logic clk, - - input frag_xy frag, - input logic stall, - - output linear_coord linear -); - - /* frag está expresado en un rango normalizado con igual distribución - * entre positivos y negativos. Para obtener la dirección lineal que le - * corresponde, debemos corregir esto para que el mínimo sea cero en - * cada coordenada. Luego de eso, - * - * linear = y_corregido * `GFX_X_RES + x_corregido - * - * Afortunadamente, esto no necesita una FMA, como procederé a demostrar: - * - * y * `GFX_X_RES + x - * = y * 640 + x - * = y * 128 * 5 + x - * = ((y * 5) << 7) + x - * = ((y * (4 + 1)) << 7) + x - * = (((y << 2) + y) << 7) + x - * = (y << 9) + (y << 7) + x - * - * Para corregir x ([-320, 319]) se le suma `GFX_RES_X / 2. - * - * Para corregir y ([-240, 239]) se debe tomar en cuenta que las - * direcciones lineales incrementan hacia abajo, así que: - * y_corregido = `GFX_RES_Y / 2 - 1 - y - */ - - localparam ZERO_PAD = $bits(linear_coord) - $bits(xy_coord); - - // Estas constantes asumen `GFX_X_RES == 640 - localparam Y_SHIFT0 = 9, Y_SHIFT1 = 7; - - xy_coord bias_x, bias_y; - linear_coord row_start, x_biased, x_hold, y_biased; - - assign bias_x = `GFX_X_RES / 2; - assign bias_y = `GFX_Y_RES / 2 - 1; - - always_ff @(posedge clk) - if (!stall) begin - x_biased <= {{ZERO_PAD{1'b0}}, frag.x + bias_x}; - y_biased <= {{ZERO_PAD{1'b0}}, bias_y - frag.y}; - - x_hold <= x_biased; - row_start <= (y_biased << Y_SHIFT0) + (y_biased << Y_SHIFT1); - - linear <= row_start + x_hold; - end - -endmodule diff --git a/rtl/gfx/gfx_frag_bary.sv b/rtl/gfx/gfx_frag_bary.sv deleted file mode 100644 index 4f4f452..0000000 --- a/rtl/gfx/gfx_frag_bary.sv +++ /dev/null @@ -1,78 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_frag_bary -( - input logic clk, - - input fixed_tri bary, - ws, - input logic stall, - - output fixed b1, - b2 -); - - fixed area, b0_w0, b1_w1, b2_w2, b1_w1_b2_w2, hold_b0_w0, hold_b1_w1, hold_b2_w2; - fixed_tri bs_ws, orthographic_bs; - - assign b0_w0 = bs_ws[0]; - assign b1_w1 = bs_ws[1]; - assign b2_w2 = bs_ws[2]; - - assign orthographic_bs[0] = bary[`EDGE_P1_TO_P2]; - assign orthographic_bs[1] = bary[`EDGE_P2_TO_P0]; - assign orthographic_bs[2] = bary[`EDGE_P0_TO_P1]; - - genvar i; - generate - for (i = 0; i < 3; ++i) begin: vertices - gfx_fixed_div div_b_w - ( - .z(orthographic_bs[i]), - .d(ws[i]), - .q(bs_ws[i]), - .* - ); - end - endgenerate - - localparam AREA_STAGES = 2; - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(AREA_STAGES)) b1_w1_pipes - ( - .in(b1_w1), - .out(hold_b1_w1), - .* - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(AREA_STAGES)) b2_w2_pipes - ( - .in(b2_w2), - .out(hold_b2_w2), - .* - ); - - gfx_fixed_div norm_b1 - ( - .z(hold_b1_w1), - .d(area), - .q(b1), - .* - ); - - gfx_fixed_div norm_b2 - ( - .z(hold_b2_w2), - .d(area), - .q(b2), - .* - ); - - always_ff @(posedge clk) - if (!stall) begin - area <= hold_b0_w0 + b1_w1_b2_w2; - hold_b0_w0 <= b0_w0; - b1_w1_b2_w2 <= b1_w1 + b2_w2; - end - -endmodule diff --git a/rtl/gfx/gfx_frag_shade.sv b/rtl/gfx/gfx_frag_shade.sv deleted file mode 100644 index d2ad7ce..0000000 --- a/rtl/gfx/gfx_frag_shade.sv +++ /dev/null @@ -1,53 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_frag_shade -( - input logic clk, - - input fixed b1, - b2, - input color_lerp_lanes argb0, - argb1_argb0, - argb2_argb0, - input logic stall, - - output rgb32 color -); - - struct packed - { - logic sign; - logic[$bits(fixed) - `FIXED_FRAC - 2:0] out_of_range; - color8 color; - logic[`FIXED_FRAC - $bits(color8) - 1:0] sub; - } lerped[`COLOR_CHANNELS]; - - fixed channel_lerp[`COLOR_CHANNELS]; - color8[`COLOR_CHANNELS - 1:0] out; - - assign color = out; - - genvar i; - generate - for (i = 0; i < `COLOR_CHANNELS; ++i) begin: channels - assign lerped[i] = channel_lerp[i]; - - gfx_lerp lerp - ( - .q(channel_lerp[i]), - .q0(argb0[i]), - .q1_q0(argb1_argb0[i]), - .q2_q0(argb2_argb0[i]), - .* - ); - - always_ff @(posedge clk) - if (!stall) begin - out[i] <= lerped[i].color; - if (lerped[i].sign || |lerped[i].out_of_range) - out[i] <= {($bits(color8)){!lerped[i].sign}}; - end - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_funnel.sv b/rtl/gfx/gfx_funnel.sv deleted file mode 100644 index 4710111..0000000 --- a/rtl/gfx/gfx_funnel.sv +++ /dev/null @@ -1,96 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_funnel -( - input logic clk, - rst_n, - - input frag_xy_lanes fragments, - input bary_lanes barys, - input fixed_tri raster_ws, - input paint_lanes in_valid, - output logic in_ready, - - input logic out_ready, - output logic out_valid, - output frag_xy frag, - output fixed_tri frag_bary, - frag_ws -); - - logic skid_ready, stall, ready, valid; - frag_xy next_frag, out_frag; - fixed_tri next_bary, out_bary, out_ws, ws_hold; - bary_lanes barys_hold; - paint_lanes current, next; - frag_xy_lanes fragments_hold; - - assign ready = !(|next); - assign in_ready = skid_ready && ready; - - gfx_skid_buf #(.WIDTH($bits(frag))) skid_frag - ( - .in(out_frag), - .out(frag), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(frag_bary))) skid_bary - ( - .in(out_bary), - .out(frag_bary), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(frag_ws))) skid_ws - ( - .in(out_ws), - .out(frag_ws), - .* - ); - - gfx_skid_flow skid_flow - ( - .in_ready(skid_ready), - .in_valid(valid), - .* - ); - - always_comb begin - next = 0; - next_bary = {($bits(next_bary)){1'bx}}; - next_frag = {($bits(next_frag)){1'bx}}; - - for (integer i = 0; i < `GFX_FINE_LANES; ++i) - if (current[i]) begin - next = current; - next[i] = 0; - - next_bary = barys_hold[i]; - next_frag = fragments_hold[i]; - end - end - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - valid <= 0; - current <= 0; - end else if (!stall) begin - valid <= |current; - current <= ready ? in_valid : next; - end - - always_ff @(posedge clk) - if (!stall) begin - if (ready) begin - ws_hold <= raster_ws; - barys_hold <= barys; - fragments_hold <= fragments; - end - - out_ws <= ws_hold; - out_bary <= next_bary; - out_frag <= next_frag; - end - -endmodule diff --git a/rtl/gfx/gfx_lerp.sv b/rtl/gfx/gfx_lerp.sv deleted file mode 100644 index 42e4393..0000000 --- a/rtl/gfx/gfx_lerp.sv +++ /dev/null @@ -1,32 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_lerp -( - input logic clk, - - input fixed b1, - b2, - q0, - q1_q0, - q2_q0, - input logic stall, - - output fixed q -); - - /* Interpolación lineal, trivializada. - * - * Esta es la clave: https://fgiesen.wordpress.com/2013/02/06/the-barycentric-conspirac/ - */ - - gfx_fixed_fma_dot fma - ( - .c(q0), - .a0(b1), - .b0(q1_q0), - .a1(b2), - .b1(q2_q0), - .* - ); - -endmodule diff --git a/rtl/gfx/gfx_mask_sram.sv b/rtl/gfx/gfx_mask_sram.sv deleted file mode 100644 index 730ee12..0000000 --- a/rtl/gfx/gfx_mask_sram.sv +++ /dev/null @@ -1,31 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_mask_sram -( - input logic clk, - - input logic set, - write, - input linear_coord write_addr, - read_addr, - output logic mask -); - - logic mem[`GFX_LINEAR_RES]; - logic mask_hold, write_hold, set_hold; - linear_coord read_addr_hold, write_addr_hold; - - always_ff @(posedge clk) begin - mask <= mask_hold; - mask_hold <= mem[read_addr_hold]; - read_addr_hold <= read_addr; - - set_hold <= set; - write_hold <= write; - write_addr_hold <= write_addr; - - if (write_hold) - mem[write_addr_hold] <= set_hold; - end - -endmodule diff --git a/rtl/gfx/gfx_masks.sv b/rtl/gfx/gfx_masks.sv deleted file mode 100644 index 5182bd4..0000000 --- a/rtl/gfx/gfx_masks.sv +++ /dev/null @@ -1,68 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_masks -( - input logic clk, - rst_n, - - input logic swap_buffers, - input cmd_word fb_base_a, - fb_base_b, - - input linear_coord scan_mask_addr, - output logic scan_mask, - - input logic frag_mask_write, - frag_mask_set, - input linear_coord frag_mask_read_addr, - frag_mask_write_addr, - output logic frag_mask, - - output vram_addr frag_base, - scan_base -); - - logic mask_a, mask_b, frag_write_hold, frag_set_hold; - linear_coord scan_addr_hold, frag_write_addr_hold, frag_read_addr_hold; - - gfx_mask_sram sram_a - ( - .set(frag_set_hold), - .mask(mask_a), - .write(swap_buffers && frag_write_hold), - .read_addr(swap_buffers ? frag_read_addr_hold : scan_addr_hold), - .write_addr(frag_write_addr_hold), - .* - ); - - gfx_mask_sram sram_b - ( - .set(frag_set_hold), - .mask(mask_b), - .write(!swap_buffers && frag_write_hold), - .read_addr(swap_buffers ? scan_addr_hold : frag_read_addr_hold), - .write_addr(frag_write_addr_hold), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - frag_base <= 0; - scan_base <= 0; - end else begin - frag_base <= swap_buffers ? fb_base_a[$bits(vram_addr):1] : fb_base_b[$bits(vram_addr):1]; - scan_base <= swap_buffers ? fb_base_b[$bits(vram_addr):1] : fb_base_a[$bits(vram_addr):1]; - end - - always_ff @(posedge clk) begin - scan_mask <= swap_buffers ? mask_b : mask_a; - scan_addr_hold <= scan_mask_addr; - - frag_mask <= swap_buffers ? mask_a : mask_b; - frag_set_hold <= frag_mask_set; - frag_write_hold <= frag_mask_write; - frag_read_addr_hold <= frag_mask_read_addr; - frag_write_addr_hold <= frag_mask_write_addr; - end - -endmodule diff --git a/rtl/gfx/gfx_mat_mat.sv b/rtl/gfx/gfx_mat_mat.sv deleted file mode 100644 index d03a648..0000000 --- a/rtl/gfx/gfx_mat_mat.sv +++ /dev/null @@ -1,83 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_mat_mat -( - input logic clk, - rst_n, - - input mat4 a, - b, - input logic in_valid, - out_ready, - - output mat4 q, - output logic in_ready, - out_valid -); - - mat4 a_hold, b_hold, b_transpose, q_hold, q_transpose, mul_b; - vec4 mul_q; - logic mul_in_ready, mul_in_valid, mul_out_ready, mul_out_valid; - index4 in_index, out_index; - - assign in_ready = mul_in_ready && in_index == `INDEX4_MIN; - assign out_valid = mul_out_valid && out_index == `INDEX4_MAX; - - assign mul_in_valid = in_valid || in_index != `INDEX4_MIN; - assign mul_out_ready = out_ready || out_index != `INDEX4_MAX; - - gfx_transpose transpose_b - ( - .in(b), - .out(b_transpose) - ); - - gfx_mat_vec mul - ( - .a(in_index == `INDEX4_MIN ? a : a_hold), - .x(mul_b[in_index]), - .q(mul_q), - .in_ready(mul_in_ready), - .in_valid(mul_in_valid), - .out_ready(mul_out_ready), - .out_valid(mul_out_valid), - .* - ); - - gfx_transpose transpose_q - ( - .in(q_transpose), - .out(q) - ); - - always_comb begin - mul_b = b_hold; - mul_b[0] = b_transpose[0]; - - q_transpose = q_hold; - q_transpose[`VECS_PER_MAT - 1] = mul_q; - end - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - in_index <= `INDEX4_MIN; - out_index <= `INDEX4_MIN; - end else begin - if (mul_in_ready && mul_in_valid) - in_index <= in_index + 1; - - if (mul_out_ready && mul_out_valid) - out_index <= out_index + 1; - end - - always_ff @(posedge clk) begin - if (in_ready) begin - a_hold <= a; - b_hold <= b_transpose; - end - - if (mul_out_ready && mul_out_valid) - q_hold[out_index] <= mul_q; - end - -endmodule diff --git a/rtl/gfx/gfx_mat_vec.sv b/rtl/gfx/gfx_mat_vec.sv deleted file mode 100644 index 4be4976..0000000 --- a/rtl/gfx/gfx_mat_vec.sv +++ /dev/null @@ -1,49 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_mat_vec -( - input logic clk, - rst_n, - - input mat4 a, - input vec4 x, - input logic in_valid, - out_ready, - - output vec4 q, - output logic in_ready, - out_valid -); - - logic stall_mul, stall_fold, mul_ready, mul_valid, feedback, feedback_last; - - gfx_pipeline_flow #(.STAGES(`FP_MUL_STAGES)) mul - ( - .stall(stall_mul), - .out_ready(mul_ready), - .out_valid(mul_valid), - .* - ); - - gfx_fold_flow fold - ( - .stall(stall_fold), - .in_ready(mul_ready), - .in_valid(mul_valid), - .* - ); - - genvar i; - generate - for (i = 0; i < `VECS_PER_MAT; ++i) begin: dots - gfx_dot dot_i - ( - .a(a[i]), - .b(x), - .q(q[i]), - .* - ); - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_mem.sv b/rtl/gfx/gfx_mem.sv deleted file mode 100644 index fbca2fa..0000000 --- a/rtl/gfx/gfx_mem.sv +++ /dev/null @@ -1,228 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_mem -( - input logic clk, - rst_n, - - input logic mem_waitrequest, - mem_readdatavalid, - input vram_word mem_readdata, - output vram_byte_addr mem_address, - output logic mem_read, - mem_write, - output vram_word mem_writedata, - - input vram_addr host_address, - input logic host_read, - host_write, - input vram_word host_writedata, - output logic host_waitrequest, - host_readdatavalid, - output vram_word host_readdata, - - input logic rop_write, - input vram_word rop_writedata, - input vram_addr rop_address, - output logic rop_waitrequest, - - input logic fb_read, - input vram_addr fb_address, - output logic fb_waitrequest, - fb_readdatavalid, - output vram_word fb_readdata, - - input logic batch_read, - input vram_addr batch_address, - output logic batch_waitrequest, - batch_readdatavalid, - output vram_word batch_readdata, - - input logic fetch_read, - input vram_addr fetch_address, - output logic fetch_waitrequest, - fetch_readdatavalid, - output vram_word fetch_readdata -); - - // Este módulo es inaceptable, hay que reescribirlo - - logic mem_rw, trans_in_stall, trans_out_stall, in_ready, in_valid, skid_in_valid, out_ready, - any_readdatavalid, readdatavalid, dispatch_full, dispatch_put, mem_ready; - - vram_word any_readdata, readdata; - logic[$clog2(`GFX_MEM_DISPATCH_DEPTH) - 1:0] next_put_ptr, pop_ptr, put_ptr; - - struct packed - { - logic fb, - host, - batch, - fetch; - } dispatch_in, dispatch_out, dispatch_buf[`GFX_MEM_DISPATCH_DEPTH]; - - struct packed - { - vram_addr address; - logic write, - fb_waitrequest, - host_waitrequest, - batch_waitrequest, - fetch_waitrequest; - vram_word writedata; - } trans_in, trans_out, trans_in_skid, trans_out_skid; - - assign mem_read = mem_rw && !trans_out_skid.write && !dispatch_full; - assign mem_write = mem_rw && trans_out_skid.write; - assign mem_address = {trans_out_skid.address, {`GFX_MEM_SUBWORD_BITS{1'b0}}}; - assign mem_writedata = trans_out_skid.writedata; - - assign fb_readdata = any_readdata; - assign host_readdata = any_readdata; - assign batch_readdata = any_readdata; - assign fetch_readdata = any_readdata; - - assign fb_readdatavalid = any_readdatavalid && dispatch_out.fb; - assign host_readdatavalid = any_readdatavalid && dispatch_out.host; - assign batch_readdatavalid = any_readdatavalid && dispatch_out.batch; - assign fetch_readdatavalid = any_readdatavalid && dispatch_out.fetch; - - assign dispatch_in.fb = !trans_out_skid.fb_waitrequest; - assign dispatch_in.host = !trans_out_skid.host_waitrequest; - assign dispatch_in.batch = !trans_out_skid.batch_waitrequest; - assign dispatch_in.fetch = !trans_out_skid.fetch_waitrequest; - - assign in_valid = rop_write || fb_read || batch_read || fetch_read || host_read || host_write; - assign mem_ready = !mem_waitrequest && (!dispatch_full || trans_out_skid.write); - assign next_put_ptr = put_ptr + 1; - assign dispatch_put = mem_ready && mem_rw && !trans_out_skid.write; - assign dispatch_full = next_put_ptr == pop_ptr; - - /* Cerrar timing aquí no es tan fácil, debido al enrutamiento al el que - * necesariamente está sujeto este módulo (eg, VRAM y DAC están en - * posiciones fijas en los bordes de la FPGA y no pueden reacomodarse). - */ - - gfx_skid_buf #(.WIDTH($bits(trans_in))) in_skid - ( - .in(trans_in), - .out(trans_in_skid), - .stall(trans_in_stall), - .* - ); - - gfx_skid_flow in_flow - ( - .stall(trans_in_stall), - .out_ready(out_ready), - .out_valid(skid_in_valid), - .* - ); - - gfx_pipes #(.WIDTH($bits(trans_out)), .DEPTH(`GFX_MEM_TRANS_DEPTH)) out_pipes - ( - .in(trans_in_skid), - .out(trans_out), - .stall(trans_out_stall), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(trans_out))) out_skid - ( - .in(trans_out), - .out(trans_out_skid), - .stall(trans_out_stall), - .* - ); - - gfx_pipeline_flow #(.STAGES(`GFX_MEM_TRANS_DEPTH)) out_flow - ( - .stall(trans_out_stall), - .in_ready(out_ready), - .in_valid(skid_in_valid), - .out_ready(mem_ready), - .out_valid(mem_rw), - .* - ); - - gfx_pipes #(.WIDTH($bits(vram_word)), .DEPTH(`GFX_MEM_RESPONSE_DEPTH)) readdata_pipes - ( - .in(mem_readdata), - .out(readdata), - .stall(0), - .* - ); - - gfx_pipeline_flow #(.STAGES(`GFX_MEM_RESPONSE_DEPTH)) readdata_flow - ( - .stall(), - .in_ready(), - .in_valid(mem_readdatavalid), - .out_ready(1), - .out_valid(readdatavalid), - .* - ); - - always_comb begin - fb_waitrequest = 1; - rop_waitrequest = 1; - host_waitrequest = 1; - batch_waitrequest = 1; - fetch_waitrequest = 1; - - trans_in.write = 0; - trans_in.writedata = {($bits(trans_in.writedata)){1'bx}}; - - if (fb_read) begin - fb_waitrequest = !in_ready; - trans_in.address = fb_address; - end else if (batch_read) begin - batch_waitrequest = !in_ready; - trans_in.address = batch_address; - end else if (rop_write) begin - rop_waitrequest = !in_ready; - - trans_in.write = 1; - trans_in.address = rop_address; - trans_in.writedata = rop_writedata; - end else if (fetch_read) begin - fetch_waitrequest = !in_ready; - trans_in.address = fetch_address; - end else begin - host_waitrequest = !in_ready; - - trans_in.write = host_write; - trans_in.address = host_address; - trans_in.writedata = host_writedata; - end - - trans_in.fb_waitrequest = fb_waitrequest; - trans_in.host_waitrequest = host_waitrequest; - trans_in.batch_waitrequest = batch_waitrequest; - trans_in.fetch_waitrequest = fetch_waitrequest; - end - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - pop_ptr <= 0; - put_ptr <= 0; - end else begin - if (readdatavalid) - pop_ptr <= pop_ptr + 1; - - if (dispatch_put) - put_ptr <= next_put_ptr; - end - - - always_ff @(posedge clk) begin - any_readdata <= readdata; - any_readdatavalid <= readdatavalid; - - dispatch_out <= dispatch_buf[pop_ptr]; - - if (dispatch_put) - dispatch_buf[put_ptr] <= dispatch_in; - end - -endmodule diff --git a/rtl/gfx/gfx_persp.sv b/rtl/gfx/gfx_persp.sv deleted file mode 100644 index 243b5eb..0000000 --- a/rtl/gfx/gfx_persp.sv +++ /dev/null @@ -1,58 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_persp -( - input logic clk, - rst_n, - - input raster_xyzw in_vertex_a, - in_vertex_b, - in_vertex_c, - input logic in_valid, - output logic in_ready, - - input logic out_ready, - output logic out_valid, - output raster_xyzw out_vertex_a, - out_vertex_b, - out_vertex_c -); - - // Perdón Ronald - assign in_ready = out_ready; - assign out_valid = in_valid; - assign out_vertex_a = in_vertex_a; - assign out_vertex_b = in_vertex_b; - assign out_vertex_c = in_vertex_c; - -/* - logic stall; - - gfx_pipeline_flow #(.STAGES(`FIXED_DIV_STAGES)) flow - ( - .* - ); - - gfx_persp_vertex persp_a - ( - .in_vertex(in_vertex_a), - .out_vertex(out_vertex_a), - .* - ); - - gfx_persp_vertex persp_b - ( - .in_vertex(in_vertex_b), - .out_vertex(out_vertex_b), - .* - ); - - gfx_persp_vertex persp_c - ( - .in_vertex(in_vertex_c), - .out_vertex(out_vertex_c), - .* - ); -*/ - -endmodule diff --git a/rtl/gfx/gfx_persp_vertex.sv b/rtl/gfx/gfx_persp_vertex.sv deleted file mode 100644 index f7434f0..0000000 --- a/rtl/gfx/gfx_persp_vertex.sv +++ /dev/null @@ -1,52 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_persp_vertex -( - input logic clk, - - input raster_xyzw in_vertex, - input logic stall, - - output raster_xyzw out_vertex -); - - raster_xyzw skid_vertex; - - gfx_fixed_div x_div - ( - .z(in_vertex.xy.x), - .d(in_vertex.zw.w), - .q(skid_vertex.xy.x), - .* - ); - - gfx_fixed_div y_div - ( - .z(in_vertex.xy.y), - .d(in_vertex.zw.w), - .q(skid_vertex.xy.y), - .* - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_DIV_STAGES)) z_pipes - ( - .in(in_vertex.zw.z), - .out(skid_vertex.zw.z), - .* - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_DIV_STAGES)) w_pipes - ( - .in(in_vertex.zw.w), - .out(skid_vertex.zw.w), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(out_vertex))) vertex_skid - ( - .in(skid_vertex), - .out(out_vertex), - .* - ); - -endmodule diff --git a/rtl/gfx/gfx_pipeline_flow.sv b/rtl/gfx/gfx_pipeline_flow.sv deleted file mode 100644 index 9b3f22a..0000000 --- a/rtl/gfx/gfx_pipeline_flow.sv +++ /dev/null @@ -1,40 +0,0 @@ -module gfx_pipeline_flow -#(parameter STAGES=0) -( - input logic clk, - rst_n, - - input logic in_valid, - out_ready, - - output logic in_ready, - out_valid, - stall -); - - logic[STAGES - 1:0] valid; - - gfx_skid_flow skid - ( - .in_valid(valid[STAGES - 1]), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - valid[0] <= 0; - else if (!stall) - valid[0] <= in_valid; - - genvar i; - generate - for (i = 1; i < STAGES; ++i) begin: pipeline - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - valid[i] <= 0; - else if (!stall) - valid[i] <= valid[i - 1]; - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_pipes.sv b/rtl/gfx/gfx_pipes.sv deleted file mode 100644 index 09b1d43..0000000 --- a/rtl/gfx/gfx_pipes.sv +++ /dev/null @@ -1,24 +0,0 @@ -module gfx_pipes -#(parameter WIDTH=0, DEPTH=0) -( - input logic clk, - - input logic[WIDTH - 1:0] in, - input logic stall, - - output logic[WIDTH - 1:0] out -); - - logic[WIDTH - 1:0] pipes[DEPTH]; - - assign out = pipes[DEPTH - 1]; - - always_ff @(posedge clk) - if (!stall) begin - pipes[0] <= in; - - for (integer i = 1; i < DEPTH; ++i) - pipes[i] <= pipes[i - 1]; - end - -endmodule diff --git a/rtl/gfx/gfx_raster.sv b/rtl/gfx/gfx_raster.sv deleted file mode 100644 index cb03744..0000000 --- a/rtl/gfx/gfx_raster.sv +++ /dev/null @@ -1,131 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_raster -( - input logic clk, - rst_n, - - input raster_xyzw vertex_a, - vertex_b, - vertex_c, - input logic in_valid, - output logic in_ready, - - output frag_xy_lanes fragments, - output bary_lanes barys, - output fixed_tri ws, - input logic out_ready, - output paint_lanes out_valid -); - - //TODO: Es exactamente el mismo asunto que offsets - assign ws[0] = vertex_a.zw.w; - assign ws[1] = vertex_b.zw.w; - assign ws[2] = vertex_c.zw.w; - - logic setup_stall, setup_valid; - - gfx_pipeline_flow #(.STAGES(`GFX_SETUP_STAGES)) setup_flow - ( - .stall(setup_stall), - .out_ready(coarse_ready), - .out_valid(setup_valid), - .* - ); - - fixed_tri coarse_x_offsets, coarse_y_offsets, coarse_test_offsets, edge_refs; - raster_xy pos_ref; - coarse_dim span_x, span_y; - raster_offsets_tri offsets; - - gfx_setup setup - ( - .stall(setup_stall), - .vertex_a(vertex_a.xy), - .vertex_b(vertex_b.xy), - .vertex_c(vertex_c.xy), - .* - ); - - logic coarse_ready, coarse_valid; - fixed_tri coarse_corners; - raster_xy coarse_pos; - raster_offsets_tri fine_offsets; - - gfx_raster_coarse coarse - ( - .in_valid(setup_valid), - .in_ready(coarse_ready), - .out_ready(fine_ready), - .out_valid(coarse_valid), - .pos(coarse_pos), - .corners(coarse_corners), - .* - ); - - logic fine_ready, fine_stall, fine_valid; - - always_comb - for (integer i = 0; i < `GFX_FINE_LANES; ++i) - out_valid[i] = fine_valid && skid_paint_ij[i]; - - gfx_pipeline_flow #(.STAGES(`GFX_FINE_STAGES)) fine_flow - ( - .stall(fine_stall), - .in_ready(fine_ready), - .in_valid(coarse_valid), - .out_ready(out_ready || !(|skid_paint_ij)), - .out_valid(fine_valid), - .* - ); - - frag_xy fragment_ij[`GFX_RASTER_SIZE][`GFX_RASTER_SIZE]; - fixed_tri barys_ij[`GFX_RASTER_SIZE][`GFX_RASTER_SIZE]; - logic[`GFX_FINE_LANES - 1:0] paint_ij, skid_paint_ij; - - gfx_skid_buf #(.WIDTH(`GFX_FINE_LANES)) skid_paint - ( - .in(paint_ij), - .out(skid_paint_ij), - .stall(fine_stall), - .* - ); - - genvar i, j; - generate - for (i = 0; i < `GFX_RASTER_SIZE; ++i) begin: fine_x - for (j = 0; j < `GFX_RASTER_SIZE; ++j) begin: fine_y - gfx_raster_fine #(.X(i), .Y(j)) fine - ( - .stall(fine_stall), - - .pos(coarse_pos), - .corners(coarse_corners), - .offsets(fine_offsets), - - .barys(barys_ij[i][j]), - .paint(paint_ij[j * `GFX_RASTER_SIZE + i]), - .fragment(fragment_ij[i][j]), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(frag_xy))) skid_fragment - ( - .in(fragment_ij[i][j]), - .out(fragments[j * `GFX_RASTER_SIZE + i]), - .stall(fine_stall), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(fixed_tri))) skid_barys - ( - .in(barys_ij[i][j]), - .out(barys[j * `GFX_RASTER_SIZE + i]), - .stall(fine_stall), - .* - ); - end - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_raster_coarse.sv b/rtl/gfx/gfx_raster_coarse.sv deleted file mode 100644 index 8db3fe9..0000000 --- a/rtl/gfx/gfx_raster_coarse.sv +++ /dev/null @@ -1,135 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_raster_coarse -( - input logic clk, - rst_n, - - input raster_xy pos_ref, - input coarse_dim span_x, - span_y, - input raster_offsets_tri offsets, - input fixed_tri edge_refs, - coarse_x_offsets, - coarse_y_offsets, - coarse_test_offsets, - - input logic in_valid, - output logic in_ready, - - input logic out_ready, - output logic out_valid, - - output raster_xy pos, - output fixed_tri corners, - output raster_offsets_tri fine_offsets -); - - fixed reference_x; - logic end_x, end_y, running, send, send_valid, skid_ready, stall; - raster_xy next_pos; - fixed_tri edge_fns, edge_tests, edge_vert, edge_vert_next; - coarse_dim stride_x, stride_y, width; - logic[2:0] edge_signs; - raster_offsets_tri hold_offsets; - - fixed_tri hold_coarse_x_offsets, hold_coarse_y_offsets, hold_coarse_test_offsets; - - struct packed - { - raster_xy pos; - fixed_tri corners; - raster_offsets_tri fine_offsets; - } out, skid_out; - - assign pos = skid_out.pos; - assign corners = skid_out.corners; - assign fine_offsets = skid_out.fine_offsets; - - assign end_x = stride_x == 0; - assign end_y = stride_y == 0; - - assign send = &edge_signs && send_valid; - assign in_ready = skid_ready && !running; - - gfx_skid_buf #(.WIDTH($bits(out))) skid_buf - ( - .in(out), - .out(skid_out), - .* - ); - - gfx_skid_flow skid_flow - ( - .in_ready(skid_ready), - .in_valid(send), - .* - ); - - always_comb - for (integer i = 0; i < 3; ++i) begin - edge_tests[i] = edge_fns[i] + hold_coarse_test_offsets[i]; - edge_vert_next[i] = edge_vert[i] + hold_coarse_y_offsets[i]; - end - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - running <= 0; - send_valid <= 0; - end else if (!stall) begin - if (running) - running <= !end_x || !end_y; - else - running <= in_ready && in_valid; - - send_valid <= running; - end - - always_ff @(posedge clk) - if (!stall) begin - out.pos <= next_pos; - out.corners <= edge_fns; - out.fine_offsets <= hold_offsets; - - stride_x <= stride_x - 1; - next_pos.x <= next_pos.x + (1 << (`FIXED_FRAC + `GFX_RASTER_BITS)); - - if (end_x) begin - next_pos.x <= reference_x; - next_pos.y <= next_pos.y + (1 << (`FIXED_FRAC + `GFX_RASTER_BITS)); - - stride_x <= width; - stride_y <= stride_y - 1; - end - - if (in_ready && in_valid) begin - next_pos <= pos_ref; - reference_x <= pos_ref.x; - - width <= span_x; - stride_x <= span_x; - stride_y <= span_y; - - hold_offsets <= offsets; - hold_coarse_x_offsets <= coarse_x_offsets; - hold_coarse_y_offsets <= coarse_y_offsets; - hold_coarse_test_offsets <= coarse_test_offsets; - end - - for (integer i = 0; i < 3; ++i) begin - edge_fns[i] <= edge_fns[i] + hold_coarse_x_offsets[i]; - if (end_x) begin - edge_fns[i] <= edge_vert_next[i]; - edge_vert[i] <= edge_vert_next[i]; - end - - if (in_ready && in_valid) begin - edge_fns[i] <= edge_refs[i]; - edge_vert[i] <= edge_refs[i]; - end - - edge_signs[i] <= !edge_tests[i][$bits(fixed) - 1]; - end - end - -endmodule diff --git a/rtl/gfx/gfx_raster_fine.sv b/rtl/gfx/gfx_raster_fine.sv deleted file mode 100644 index da11b6f..0000000 --- a/rtl/gfx/gfx_raster_fine.sv +++ /dev/null @@ -1,49 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_raster_fine -#(parameter X=0, Y=0) -( - input logic clk, - - input raster_xy pos, - input fixed_tri corners, - input raster_offsets_tri offsets, - input logic stall, - - output frag_xy fragment, - output fixed_tri barys, - output logic paint -); - - localparam INDEX = Y * `GFX_RASTER_SIZE + X; - - frag_xy fragment_hold; - fixed_tri edges, per_edge_offsets; - logic[2:0] signs; - raster_xy_prec prec; - logic[`GFX_RASTER_BITS - 1:0] fine_x, fine_y; - - assign prec = pos; - assign fine_x = X; - assign fine_y = Y; - - always_comb - for (integer i = 0; i < 3; ++i) begin - signs[i] = edges[i][$bits(edges[0]) - 1]; - per_edge_offsets[i] = offsets[i][INDEX]; - end - - always_ff @(posedge clk) - if (!stall) begin - barys <= edges; - paint <= signs == 0; - - fragment <= fragment_hold; - fragment_hold.x <= {prec.x.sign, prec.x.coarse, fine_x}; - fragment_hold.y <= {prec.y.sign, prec.y.coarse, fine_y}; - - for (integer i = 0; i < 3; ++i) - edges[i] <= corners[i] + per_edge_offsets[i]; - end - -endmodule diff --git a/rtl/gfx/gfx_rop.sv b/rtl/gfx/gfx_rop.sv deleted file mode 100644 index 3e6ef35..0000000 --- a/rtl/gfx/gfx_rop.sv +++ /dev/null @@ -1,85 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_rop -( - input logic clk, - rst_n, - - input vram_addr frag_base, - - input frag_paint in, - input logic in_valid, - output logic in_ready, - - input logic rop_waitrequest, - output logic rop_write, - output vram_word rop_writedata, - output vram_addr rop_address, - - output linear_coord mask_addr, - output logic mask_assert -); - - enum int unsigned - { - IDLE, - WRITE_LO, - WRITE_HI - } state; - - logic hi; - vram_word color_hi, color_lo; - frag_paint hold; - - assign {color_hi, color_lo} = hold.color; - - assign mask_addr = hold.addr; - assign rop_address = frag_base + {5'd0, hold.addr, hi}; - assign rop_writedata = hi ? color_hi : color_lo; - - always_comb begin - hi = 1'bx; - in_ready = 0; - rop_write = 0; - mask_assert = 0; - - unique case (state) - IDLE: - in_ready = 1; - - WRITE_LO: begin - hi = 0; - rop_write = 1; - mask_assert = 1; - end - - WRITE_HI: begin - hi = 1; - in_ready = !rop_waitrequest; - rop_write = 1; - end - endcase - end - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) - state <= IDLE; - else unique case (state) - IDLE: - if (in_valid) - state <= WRITE_LO; - - WRITE_LO: - if (!rop_waitrequest) - state <= WRITE_HI; - - WRITE_HI: - if (!rop_waitrequest) - state <= in_valid ? WRITE_LO : IDLE; - endcase - - always_ff @(posedge clk) - if (in_ready) - hold <= in; - -endmodule diff --git a/rtl/gfx/gfx_scanout.sv b/rtl/gfx/gfx_scanout.sv deleted file mode 100644 index a43d14c..0000000 --- a/rtl/gfx/gfx_scanout.sv +++ /dev/null @@ -1,138 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_scanout -( - input logic clk, - rst_n, - - input logic enable_clear, - input rgb24 clear_color, - input vram_addr scan_base, - - input logic mask, - output linear_coord mask_addr, - - input logic fb_waitrequest, - fb_readdatavalid, - input vram_word fb_readdata, - output logic fb_read, - output vram_addr fb_address, - - input logic scan_ready, - output logic scan_valid, - scan_endofpacket, - scan_startofpacket, - output rgb30 scan_data, - - output logic vsync -); - - logic commit, effective_mask, flush, mask_fifo_out, dac_ready, - fb_ready, mask_fifo_ready, fb_fifo_valid, mask_fifo_valid, - pop, put, put_mask, next_vsync, start_vsync, wait_vsync; - - vram_word fb_fifo_out; - half_coord commit_addr, mask_in_addr, mask_out_addr, mask_hold_addr, max_addr; - - assign mask_addr = mask_in_addr[$bits(mask_in_addr) - 1:$bits(mask_in_addr) - $bits(mask_addr)]; - assign max_addr[0] = 1; - assign max_addr[$bits(max_addr) - 1:1] = `GFX_X_RES * `GFX_Y_RES - 1; - - assign fb_ready = !fb_read || !fb_waitrequest; - assign next_vsync = commit && start_vsync; - assign start_vsync = mask_hold_addr == max_addr; - assign effective_mask = mask || !enable_clear; - - gfx_flush_flow #(.STAGES(`GFX_MASK_STAGES)) mask_flow - ( - .in_valid(!wait_vsync), - .out_ready(fb_ready && mask_fifo_ready && !next_vsync), - .out_valid(pop), - .* - ); - - gfx_pipes #(.WIDTH($bits(mask_in_addr)), .DEPTH(`GFX_MASK_STAGES)) addr_pipes - ( - .in(mask_in_addr), - .out(mask_out_addr), - .stall(0), - .* - ); - - /* Estas FIFOs deben cumplir dos propiedades para garantizar correctitud: - * - * 1. mask_fifo.out_ready && mask_fifo.out_valid <=> scan.in_ready && scan.in_valid - * 2. fb_fifo.out_ready && fb_fifo.out_valid => scan.in_ready && scan.in_valid - * - * Nótese la asimetría (<=> vs =>), debido a mask_fifo.out - */ - - gfx_fifo #(.WIDTH($bits(effective_mask)), .DEPTH(`GFX_SCANOUT_FIFO_DEPTH)) mask_fifo - ( - .in(put_mask), - .out(mask_fifo_out), - .in_ready(mask_fifo_ready), - .in_valid(put), - .out_ready(dac_ready && (!mask_fifo_out || fb_fifo_valid)), - .out_valid(mask_fifo_valid), - .* - ); - - // 2x para evitar potencial overflow cuando fb_read=1 pero mask_fifo está llena - gfx_fifo #(.WIDTH($bits(vram_word)), .DEPTH(2 * `GFX_SCANOUT_FIFO_DEPTH)) fb_fifo - ( - .in(fb_readdata), - .out(fb_fifo_out), - .in_ready(), // readdatavalid no soporta backpressure - .in_valid(fb_readdatavalid), - .out_ready(dac_ready && mask_fifo_valid && mask_fifo_out), - .out_valid(fb_fifo_valid), - .* - ); - - gfx_scanout_dac dac - ( - .in_ready(dac_ready), - .in_valid(mask_fifo_valid && (!mask_fifo_out || fb_fifo_valid)), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - put <= 0; - fb_read <= 0; - wait_vsync <= 0; - commit_addr <= 0; - mask_in_addr <= 0; - end else begin - mask_in_addr <= mask_in_addr + 1; - - if (flush || wait_vsync) - mask_in_addr <= commit_addr; - - if (commit) begin - wait_vsync <= start_vsync; - commit_addr <= start_vsync ? 0 : mask_out_addr; - end - - if (fb_ready) - fb_read <= mask_fifo_ready && pop && !next_vsync && effective_mask; - - if (mask_fifo_ready) - put <= fb_ready && pop && !next_vsync; - - if (vsync) - wait_vsync <= 0; - end - - always_ff @(posedge clk) begin - mask_hold_addr <= mask_out_addr; - - if (fb_ready) - fb_address <= scan_base + {5'd0, mask_out_addr}; - - if (mask_fifo_ready) - put_mask <= effective_mask; - end - -endmodule diff --git a/rtl/gfx/gfx_scanout_dac.sv b/rtl/gfx/gfx_scanout_dac.sv deleted file mode 100644 index 5c80d2b..0000000 --- a/rtl/gfx/gfx_scanout_dac.sv +++ /dev/null @@ -1,117 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_scanout_dac -( - input logic clk, - rst_n, - - input logic enable_clear, - input rgb24 clear_color, - - input logic mask_fifo_out, - input vram_word fb_fifo_out, - input logic in_valid, - output logic in_ready, - - input logic scan_ready, - output logic scan_valid, - scan_endofpacket, - scan_startofpacket, - output rgb30 scan_data, - - output logic vsync -); - - logic dac_valid, half, half_mask, stall, endofpacket, startofpacket; - rgb24 pixel; - rgb32 fifo_pixel; - vram_word msw, lsw; - half_coord next_addr; - linear_coord max_addr, pixel_addr; - - struct packed - { - logic endofpacket, - startofpacket; - rgb30 pixel; - } skid_in, skid_out; - - assign scan_data = skid_out.pixel; - assign scan_endofpacket = skid_out.endofpacket; - assign scan_startofpacket = skid_out.startofpacket; - - assign max_addr = `GFX_X_RES * `GFX_Y_RES - 1; - - assign fifo_pixel = {msw, lsw}; - assign skid_in.endofpacket = endofpacket; - assign skid_in.startofpacket = startofpacket; - - function color10 dac_color(color8 in); - dac_color = {in, {2{in[0]}}}; - endfunction - - assign skid_in.pixel.r = dac_color(pixel.r); - assign skid_in.pixel.g = dac_color(pixel.g); - assign skid_in.pixel.b = dac_color(pixel.b); - - always_comb begin - // Descarta fifo_pixel.a - pixel.r = fifo_pixel.r; - pixel.g = fifo_pixel.g; - pixel.b = fifo_pixel.b; - - if (!half_mask) - pixel = clear_color; - end - - gfx_skid_flow flow - ( - .in_valid(dac_valid), - .out_ready(scan_ready), - .out_valid(scan_valid), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(skid_in))) skid - ( - .in(skid_in), - .out(skid_out), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - half <= 0; - vsync <= 0; - dac_valid <= 0; - pixel_addr <= 0; - end else begin - vsync <= 0; - if (in_ready && dac_valid) begin - vsync <= skid_in.endofpacket; - dac_valid <= 0; - end - - if (in_ready && in_valid) begin - half <= !half; - dac_valid <= half; - - if (half) begin - pixel_addr <= pixel_addr + 1; - if (pixel_addr == max_addr) - pixel_addr <= 0; - end - end - end - - always_ff @(posedge clk) - if (in_ready && in_valid) begin - lsw <= msw; - msw <= fb_fifo_out; - half_mask <= mask_fifo_out; - - endofpacket <= pixel_addr == max_addr; - startofpacket <= pixel_addr == 0; - end - -endmodule diff --git a/rtl/gfx/gfx_setup.sv b/rtl/gfx/gfx_setup.sv deleted file mode 100644 index 1213645..0000000 --- a/rtl/gfx/gfx_setup.sv +++ /dev/null @@ -1,190 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_setup -( - input logic clk, - - input raster_xy vertex_a, - vertex_b, - vertex_c, - input logic stall, - - output raster_xy pos_ref, - output coarse_dim span_x, - span_y, - output raster_offsets_tri offsets, - output fixed_tri edge_refs, - coarse_x_offsets, - coarse_y_offsets, - coarse_test_offsets -); - - // FIXME FIXME FIXME: Top-left rule - - fixed_tri edge_base, edge_inc_x, edge_inc_y, out_edge_refs, x_offsets, y_offsets, test_offsets; - - raster_xy bounds_ref, hold_vertex_a, hold_vertex_b, hold_vertex_c, ps[3], qs[3], out_pos_ref; - coarse_dim bounds_span_x, bounds_span_y, out_span_x, out_span_y; - raster_offsets_tri out_offsets; - - struct packed - { - raster_xy pos_ref; - coarse_dim span_x, - span_y; - raster_offsets_tri offsets; - fixed_tri edge_refs, - coarse_x_offsets, - coarse_y_offsets, - coarse_test_offsets; - } out, skid_out; - - gfx_skid_buf #(.WIDTH($bits(out))) skid - ( - .in(out), - .out(skid_out), - .* - ); - - assign out.span_x = out_span_x; - assign out.span_y = out_span_y; - assign out.pos_ref = out_pos_ref; - assign out.offsets = out_offsets; - assign out.edge_refs = out_edge_refs; - assign out.coarse_x_offsets = x_offsets; - assign out.coarse_y_offsets = y_offsets; - assign out.coarse_test_offsets = test_offsets; - - assign span_x = skid_out.span_x; - assign span_y = skid_out.span_y; - assign pos_ref = skid_out.pos_ref; - assign offsets = skid_out.offsets; - assign edge_refs = skid_out.edge_refs; - assign coarse_x_offsets = skid_out.coarse_x_offsets; - assign coarse_y_offsets = skid_out.coarse_y_offsets; - assign coarse_test_offsets = skid_out.coarse_test_offsets; - - assign ps[0] = hold_vertex_a; - assign qs[0] = hold_vertex_b; - - assign ps[1] = hold_vertex_b; - assign qs[1] = hold_vertex_c; - - assign ps[2] = hold_vertex_c; - assign qs[2] = hold_vertex_a; - - gfx_pipes #(.WIDTH($bits(vertex_a)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_a_pipes - ( - .in(vertex_a), - .out(hold_vertex_a), - .* - ); - - gfx_pipes #(.WIDTH($bits(vertex_b)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_b_pipes - ( - .in(vertex_b), - .out(hold_vertex_b), - .* - ); - - gfx_pipes #(.WIDTH($bits(vertex_c)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_c_pipes - ( - .in(vertex_c), - .out(hold_vertex_c), - .* - ); - - gfx_setup_bounds bounds - ( - .span_x(bounds_span_x), - .span_y(bounds_span_y), - .reference(bounds_ref), - .* - ); - - localparam POST_BOUNDS_DEPTH = `GFX_SETUP_EDGE_STAGES + `GFX_SETUP_OFFSETS_STAGES; - - gfx_pipes #(.WIDTH($bits(pos_ref)), .DEPTH(POST_BOUNDS_DEPTH)) ref_pipes - ( - .in(bounds_ref), - .out(out_pos_ref), - .* - ); - - gfx_pipes #(.WIDTH($bits(span_x)), .DEPTH(POST_BOUNDS_DEPTH)) span_x_pipes - ( - .in(bounds_span_x), - .out(out_span_x), - .* - ); - - gfx_pipes #(.WIDTH($bits(span_y)), .DEPTH(POST_BOUNDS_DEPTH)) span_y_pipes - ( - .in(bounds_span_y), - .out(out_span_y), - .* - ); - - always_comb - for (integer i = 0; i < 3; ++i) - // Imaginárselo - unique case ({x_offsets[i][$bits(fixed) - 1], y_offsets[i][$bits(fixed) - 1]}) - 2'b00: - test_offsets[i] = out_offsets[i][`GFX_RASTER_OFFSETS - 1]; - - 2'b01: - test_offsets[i] = out_offsets[i][`GFX_RASTER_SIZE - 1]; - - 2'b10: - test_offsets[i] = out_offsets[i][`GFX_RASTER_OFFSETS - `GFX_RASTER_SIZE - 1]; - - 2'b11: - test_offsets[i] = out_offsets[i][0]; - endcase - - genvar i; - generate - for (i = 0; i < 3; ++i) begin: edges - gfx_setup_edge edge_fn - ( - .p(ps[i]), - .q(qs[i]), - .base(edge_base[i]), - .inc_x(edge_inc_x[i]), - .inc_y(edge_inc_y[i]), - .origin(bounds_ref), - .* - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) base_pipes - ( - .in(edge_base[i]), - .out(out_edge_refs[i]), - .* - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) coarse_x_pipes - ( - .in(edge_inc_x[i] << `GFX_RASTER_BITS), - .out(x_offsets[i]), - .* - ); - - gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) coarse_y_pipes - ( - .in(edge_inc_y[i] << `GFX_RASTER_BITS), - .out(y_offsets[i]), - .* - ); - - gfx_setup_offsets edge_offsets - ( - .inc_x(edge_inc_x[i]), - .inc_y(edge_inc_y[i]), - .offsets(out_offsets[i]), - .* - ); - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_setup_bounds.sv b/rtl/gfx/gfx_setup_bounds.sv deleted file mode 100644 index b110438..0000000 --- a/rtl/gfx/gfx_setup_bounds.sv +++ /dev/null @@ -1,73 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_setup_bounds -( - input logic clk, - - input raster_xy vertex_a, - vertex_b, - vertex_c, - input logic stall, - - output raster_xy reference, - output coarse_dim span_x, - span_y -); - - logic x_a_lt_b, x_a_lt_c, x_b_lt_c, y_a_lt_b, y_a_lt_c, y_b_lt_c; - raster_xy min, max, hold_a, hold_b, hold_c; - coarse_dim ref_x, ref_y; - raster_xy_prec min_prec, max_prec, ref_prec; - - assign min_prec = min; - assign max_prec = max; - assign reference = ref_prec; - - assign ref_prec.x.sub = 0; - assign ref_prec.x.fine = 0; - assign ref_prec.x.padding = {`GFX_RASTER_PAD_BITS{ref_x[$bits(ref_x) - 1]}}; - assign {ref_prec.x.sign, ref_prec.x.coarse} = ref_x; - - assign ref_prec.y.sub = 0; - assign ref_prec.y.fine = 0; - assign ref_prec.y.padding = {`GFX_RASTER_PAD_BITS{ref_y[$bits(ref_y) - 1]}}; - assign {ref_prec.y.sign, ref_prec.y.coarse} = ref_y; - - always_ff @(posedge clk) - if (!stall) begin - hold_a <= vertex_a; - hold_b <= vertex_b; - hold_c <= vertex_c; - - x_a_lt_b <= vertex_a.x < vertex_b.x; - x_a_lt_c <= vertex_a.x < vertex_c.x; - x_b_lt_c <= vertex_b.x < vertex_c.x; - - y_a_lt_b <= vertex_a.y < vertex_b.y; - y_a_lt_c <= vertex_a.y < vertex_c.y; - y_b_lt_c <= vertex_b.y < vertex_c.y; - - if (x_a_lt_b) begin - min.x <= x_a_lt_c ? hold_a.x : hold_c.x; - max.x <= x_b_lt_c ? hold_c.x : hold_b.x; - end else begin - min.x <= x_b_lt_c ? hold_b.x : hold_c.x; - max.x <= x_a_lt_c ? hold_c.x : hold_a.x; - end - - if (y_a_lt_b) begin - min.y <= y_a_lt_c ? hold_a.y : hold_c.y; - max.y <= y_b_lt_c ? hold_c.y : hold_b.y; - end else begin - min.y <= y_b_lt_c ? hold_b.y : hold_c.y; - max.y <= y_a_lt_c ? hold_c.y : hold_a.y; - end - - ref_x <= {min_prec.x.sign, min_prec.x.coarse}; - ref_y <= {min_prec.y.sign, min_prec.y.coarse}; - - span_x <= {max_prec.x.sign, max_prec.x.coarse} - {min_prec.x.sign, min_prec.x.coarse}; - span_y <= {max_prec.y.sign, max_prec.y.coarse} - {min_prec.y.sign, min_prec.y.coarse}; - end - -endmodule diff --git a/rtl/gfx/gfx_setup_edge.sv b/rtl/gfx/gfx_setup_edge.sv deleted file mode 100644 index 5d69a88..0000000 --- a/rtl/gfx/gfx_setup_edge.sv +++ /dev/null @@ -1,53 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_setup_edge -( - input logic clk, - - input raster_xy p, - q, - origin, - input logic stall, - - output fixed base, - inc_x, - inc_y -); - - fixed delta_x, delta_y, hold_inc_x, hold_inc_y; - - gfx_pipes #(.WIDTH($bits(inc_x)), .DEPTH(`FIXED_FMA_DOT_STAGES)) inc_x_pipes - ( - .in(hold_inc_x), - .out(inc_x), - .* - ); - - gfx_pipes #(.WIDTH($bits(inc_y)), .DEPTH(`FIXED_FMA_DOT_STAGES)) inc_y_pipes - ( - .in(hold_inc_y), - .out(inc_y), - .* - ); - - gfx_fixed_fma_dot edge_base - ( - .c(0), - .q(base), - .a0(delta_x), - .b0(hold_inc_x), - .a1(delta_y), - .b1(hold_inc_y), - .* - ); - - always_ff @(posedge clk) - if (!stall) begin - delta_x <= origin.x - q.x; - delta_y <= origin.y - q.y; - - hold_inc_x <= p.y - q.y; - hold_inc_y <= q.x - p.x; - end - -endmodule diff --git a/rtl/gfx/gfx_setup_offsets.sv b/rtl/gfx/gfx_setup_offsets.sv deleted file mode 100644 index aabd322..0000000 --- a/rtl/gfx/gfx_setup_offsets.sv +++ /dev/null @@ -1,44 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_setup_offsets -( - input logic clk, - - input fixed inc_x, - inc_y, - input logic stall, - - output raster_offsets offsets -); - - fixed x_hold[`GFX_RASTER_SIZE], y_hold[`GFX_RASTER_SIZE], - x_multiples[`GFX_RASTER_SIZE], y_multiples[`GFX_RASTER_SIZE]; - - // Asume GFX_RASTER_BITS == 2. Los ceros deberían optimizarse trivialmente - assign x_multiples[0] = 0; - assign y_multiples[0] = 0; - assign x_multiples[1] = inc_x; - assign y_multiples[1] = inc_y; - //assign x_multiples[2] = inc_x << 1; - //assign y_multiples[2] = inc_y << 1; - //assign x_multiples[3] = (inc_x << 1) + inc_x; - //assign y_multiples[3] = (inc_y << 1) + inc_y; - - genvar i; - generate - for (i = 0; i < `GFX_RASTER_SIZE; ++i) begin: multiples - always_ff @(posedge clk) - if (!stall) begin - x_hold[i] <= x_multiples[i]; - y_hold[i] <= y_multiples[i]; - end - end - - for (i = 0; i < `GFX_RASTER_OFFSETS; ++i) begin: permutations - always_ff @(posedge clk) - if (!stall) - offsets[i] <= x_hold[i % `GFX_RASTER_SIZE] + y_hold[i / `GFX_RASTER_SIZE]; - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_skid_buf.sv b/rtl/gfx/gfx_skid_buf.sv deleted file mode 100644 index fae5717..0000000 --- a/rtl/gfx/gfx_skid_buf.sv +++ /dev/null @@ -1,20 +0,0 @@ -module gfx_skid_buf -#(parameter WIDTH=0) -( - input logic clk, - - input logic[WIDTH - 1:0] in, - input logic stall, - - output logic[WIDTH - 1:0] out -); - - logic[WIDTH - 1:0] skid; - - assign out = stall ? skid : in; - - always_ff @(posedge clk) - if (!stall) - skid <= in; - -endmodule diff --git a/rtl/gfx/gfx_skid_flow.sv b/rtl/gfx/gfx_skid_flow.sv deleted file mode 100644 index c5e3b4a..0000000 --- a/rtl/gfx/gfx_skid_flow.sv +++ /dev/null @@ -1,31 +0,0 @@ -module gfx_skid_flow -( - input logic clk, - rst_n, - - input logic in_valid, - out_ready, - - output logic in_ready, - out_valid, - stall -); - - logic was_ready, was_valid; - - assign stall = !in_ready; - assign in_ready = was_ready || !was_valid; - assign out_valid = in_valid || stall; - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - was_ready <= 0; - was_valid <= 0; - end else begin - was_ready <= out_ready; - - if (!stall) - was_valid <= in_valid; - end - -endmodule diff --git a/rtl/gfx/gfx_sp.sv b/rtl/gfx/gfx_sp.sv deleted file mode 100644 index ce0f9ff..0000000 --- a/rtl/gfx/gfx_sp.sv +++ /dev/null @@ -1,131 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp -( - input logic clk, - rst_n, - - input logic batch_waitrequest, - batch_readdatavalid, - input vram_word batch_readdata, - output vram_addr batch_address, - output logic batch_read, - - input logic fetch_waitrequest, - fetch_readdatavalid, - input vram_word fetch_readdata, - output vram_addr fetch_address, - output logic fetch_read, - - input logic program_start, - input cmd_word program_header_base, - program_header_size, - output logic running, - - input logic send_ready, - output logic send_valid, - output lane_word send_data, - output lane_mask send_mask -); - - logic batch_start, clear_lanes, insn_valid; - cmd_word batch_length; - insn_word insn; - vram_insn_addr batch_base; - - gfx_sp_fetch fetch - ( - .ready(insn_ready), - .valid(insn_valid), - .* - ); - - logic deco_valid, insn_ready; - insn_deco deco; - - gfx_sp_decode decode - ( - .* - ); - - logic deco_ready, combiner_issue_valid, shuffler_issue_valid, stream_issue_valid; - vreg_num rd_a_reg, rd_b_reg; - - gfx_sp_issue issue - ( - .* - ); - - logic recv_valid; - lane_word recv_data; - lane_mask recv_mask; - - gfx_sp_batch batch - ( - .out_data(recv_data), - .out_mask(recv_mask), - .out_ready(recv_ready), - .out_valid(recv_valid), - .* - ); - - logic shuffler_issue_ready, shuffler_wb_valid; - wb_op shuffler_wb; - - gfx_sp_shuffler shuffler - ( - .wb(shuffler_wb), - .in_ready(shuffler_issue_ready), - .in_valid(shuffler_issue_valid), - .wb_ready(shuffler_wb_ready), - .wb_valid(shuffler_wb_valid), - .* - ); - - logic combiner_issue_ready, combiner_wb_valid; - wb_op combiner_wb; - - gfx_sp_combiner combiner - ( - .wb(combiner_wb), - .in_ready(combiner_issue_ready), - .in_valid(combiner_issue_valid), - .wb_ready(combiner_wb_ready), - .wb_valid(combiner_wb_valid), - .* - ); - - logic recv_ready, stream_issue_ready, stream_wb_valid; - wb_op stream_wb; - - gfx_sp_stream stream - ( - .wb(stream_wb), - .in_ready(stream_issue_ready), - .in_valid(stream_issue_valid), - .wb_ready(stream_wb_ready), - .wb_valid(stream_wb_valid), - .* - ); - - mat4 wr_data; - logic combiner_wb_ready, shuffler_wb_ready, stream_wb_ready, wr; - vreg_num wr_reg; - - gfx_sp_writeback writeback - ( - .* - ); - - mat4 a, b; - - gfx_sp_regs regs - ( - .rd_a_data(a), - .rd_b_data(b), - .* - ); - - logic batch_end; - -endmodule diff --git a/rtl/gfx/gfx_sp_batch.sv b/rtl/gfx/gfx_sp_batch.sv deleted file mode 100644 index 3d566ab..0000000 --- a/rtl/gfx/gfx_sp_batch.sv +++ /dev/null @@ -1,141 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_batch -( - input logic clk, - rst_n, - - input logic batch_waitrequest, - batch_readdatavalid, - input vram_word batch_readdata, - output vram_addr batch_address, - output logic batch_read, - - input logic batch_start, - input vram_insn_addr batch_base, - input cmd_word batch_length, - - output lane_mask out_mask, - output lane_word out_data, - input logic out_ready, - output logic out_valid -); - - localparam TAIL_BITS = $clog2($bits(lane_mask)), - BLOCK_BITS = $bits(batch_length) - TAIL_BITS; - - logic fifo_down_safe, lane_read, lane_readdatavalid, lane_waitrequest; - lane_word lane_readdata; - vram_lane_addr aligned_batch_base, lane_address; - logic[TAIL_BITS - 1:0] batch_length_tail, read_tail; - logic[BLOCK_BITS - 1:0] batch_length_block, fetch_block_count, read_block_count; - - struct packed - { - lane_word data; - lane_mask mask; - } fifo_in, fifo_out; - - enum int unsigned - { - IDLE, - STREAM - } state; - - assign out_data = fifo_out.data; - assign out_mask = fifo_out.mask; - - assign fifo_in.data = lane_readdata; - - assign {batch_length_block, batch_length_tail} = batch_length; - assign aligned_batch_base = batch_base[`GFX_INSN_BITS_IN_LANE +: $bits(vram_lane_addr)]; - - gfx_sp_widener #(.WIDTH($bits(vram_lane_addr))) lane_bus - ( - .wide_read(lane_read), - .wide_address(lane_address), - .wide_readdata(lane_readdata), - .wide_waitrequest(lane_waitrequest), - .wide_readdatavalid(lane_readdatavalid), - .word_read(batch_read), - .word_address(batch_address), - .word_readdata(batch_readdata), - .word_waitrequest(batch_waitrequest), - .word_readdatavalid(batch_readdatavalid), - .* - ); - - gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_BATCH_FIFO_DEPTH)) lane_fifo - ( - .in(fifo_in), - .out(fifo_out), - .in_ready(), - .in_valid(lane_readdatavalid), - .* - ); - - gfx_fifo_overflow #(.DEPTH(`GFX_BATCH_FIFO_DEPTH)) overflow - ( - .down(lane_read && !lane_waitrequest), - .empty(), - .down_safe(fifo_down_safe), - .* - ); - - always_comb begin - unique case (read_tail) - 2'b00: fifo_in.mask = 4'b0000; - 2'b01: fifo_in.mask = 4'b0001; - 2'b10: fifo_in.mask = 4'b0011; - 2'b11: fifo_in.mask = 4'b0111; - endcase - - if (read_block_count != 0) - fifo_in.mask = 4'b1111; - end - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - state <= IDLE; - lane_read <= 0; - end else unique case (state) - IDLE: - if (batch_start) begin - state <= STREAM; - lane_read <= 1; - end - - STREAM: begin - if (!lane_read || !lane_waitrequest) - lane_read <= fifo_down_safe; - - if (lane_read && !lane_waitrequest && fetch_block_count == 0) begin - state <= IDLE; - lane_read <= 0; - end - end - endcase - - always_ff @(posedge clk) begin - unique case (state) - IDLE: - if (batch_start) begin - read_tail <= batch_length_tail; - read_block_count <= batch_length_block; - fetch_block_count <= batch_length_block; - - lane_address <= aligned_batch_base; - end - - STREAM: - if (lane_read && !lane_waitrequest) begin - lane_address <= lane_address + 1; - fetch_block_count <= fetch_block_count - 1; - end - endcase - - if (lane_readdatavalid) - read_block_count <= read_block_count - 1; - end - -endmodule diff --git a/rtl/gfx/gfx_sp_combiner.sv b/rtl/gfx/gfx_sp_combiner.sv deleted file mode 100644 index 900af00..0000000 --- a/rtl/gfx/gfx_sp_combiner.sv +++ /dev/null @@ -1,63 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_combiner -( - input logic clk, - rst_n, - - input mat4 a, - b, - input insn_deco deco, - input logic in_valid, - output logic in_ready, - - input logic wb_ready, - output logic wb_valid, - output wb_op wb -); - - wb_op wb_out; - logic mul_ready, mul_valid, fifo_ready, fifo_valid, skid_ready, out_stall; - - assign in_ready = mul_ready && fifo_ready; - - gfx_mat_mat mul - ( - .q(wb_out.data), - .in_ready(mul_ready), - .in_valid(in_valid && fifo_ready), - .out_ready(skid_ready && fifo_valid), - .out_valid(mul_valid), - .* - ); - - gfx_fifo #(.WIDTH($bits(vreg_num)), .DEPTH(`GFX_SP_COMBINER_FIFO_DEPTH)) depth - ( - .in(deco.dst), - .out(wb_out.dst), - .in_ready(fifo_ready), - .in_valid(in_valid && mul_ready), - .out_ready(skid_ready && mul_valid), - .out_valid(fifo_valid), - .* - ); - - gfx_skid_flow out_flow - ( - .stall(out_stall), - .in_ready(skid_ready), - .in_valid(fifo_valid && mul_valid), - .out_ready(wb_ready), - .out_valid(wb_valid), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(wb))) out_skid - ( - .in(wb_out), - .out(wb), - .stall(out_stall), - .* - ); - -endmodule diff --git a/rtl/gfx/gfx_sp_decode.sv b/rtl/gfx/gfx_sp_decode.sv deleted file mode 100644 index d54077d..0000000 --- a/rtl/gfx/gfx_sp_decode.sv +++ /dev/null @@ -1,116 +0,0 @@ -`include "gfx/gfx_defs.sv" -`include "gfx/gfx_sp_isa.sv" - -module gfx_sp_decode -( - input logic clk, - rst_n, - - input logic clear_lanes, - input insn_word insn, - input logic insn_valid, - output logic insn_ready, - - output insn_deco deco, - input logic deco_ready, - output logic deco_valid -); - - logic stall; - insn_deco deco_in, deco_out; - - gfx_pipeline_flow #(.STAGES(1)) flow - ( - .in_ready(insn_ready), - .in_valid(insn_valid), - .out_ready(deco_ready), - .out_valid(deco_valid), - .* - ); - - gfx_pipes #(.WIDTH($bits(deco)), .DEPTH(1)) pipe - ( - .in(deco_in), - .out(deco_out), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(deco))) skid - ( - .in(deco_out), - .out(deco), - .* - ); - - always_comb begin - deco_in.writeback = 0; - deco_in.read_src_a = 0; - deco_in.read_src_b = 0; - - deco_in.ex.stream = 0; - deco_in.ex.combiner = 0; - deco_in.ex.shuffler = 0; - - deco_in.shuffler.is_swizzle = 1'bx; - deco_in.shuffler.is_broadcast = 1'bx; - - unique casez (insn) - `GFX_INSN_OP_SELECT: begin - deco_in.writeback = 1; - deco_in.read_src_a = 1; - deco_in.read_src_b = 1; - - deco_in.ex.shuffler = 1; - deco_in.shuffler.is_swizzle = 0; - deco_in.shuffler.is_broadcast = 0; - end - - `GFX_INSN_OP_SWIZZL: begin - deco_in.writeback = 1; - deco_in.read_src_a = 1; - - deco_in.ex.shuffler = 1; - deco_in.shuffler.is_swizzle = 1; - end - - `GFX_INSN_OP_BROADC: begin - deco_in.writeback = 1; - - deco_in.ex.shuffler = 1; - deco_in.shuffler.is_swizzle = 0; - deco_in.shuffler.is_broadcast = 1; - end - - `GFX_INSN_OP_MATVEC: begin - deco_in.writeback = 1; - deco_in.read_src_a = 1; - deco_in.read_src_b = 1; - deco_in.ex.combiner = 1; - end - - `GFX_INSN_OP_SEND: begin - deco_in.read_src_a = 1; - deco_in.ex.stream = 1; - end - - `GFX_INSN_OP_RECV: begin - deco_in.writeback = 1; - deco_in.ex.stream = 1; - end - - default: - // Esto es jugar con fuego, pero lo vale con tal de que cierre el timing - deco_in = {($bits(deco_in)){1'bx}}; - endcase - - deco_in.dst = insn `GFX_INSN_DST; - deco_in.src_a = insn `GFX_INSN_SRC_A; - deco_in.src_b = insn `GFX_INSN_SRC_B; - deco_in.clear_lanes = clear_lanes; - - deco_in.shuffler.imm = insn `GFX_INSN_BROADC_IMM; - deco_in.shuffler.select_mask = insn `GFX_INSN_SELECT_MASK; - deco_in.shuffler.swizzle_op = insn `GFX_INSN_SWIZZL_LANES; - end - -endmodule diff --git a/rtl/gfx/gfx_sp_fetch.sv b/rtl/gfx/gfx_sp_fetch.sv deleted file mode 100644 index 23fb20e..0000000 --- a/rtl/gfx/gfx_sp_fetch.sv +++ /dev/null @@ -1,224 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_fetch -( - input logic clk, - rst_n, - - input logic fetch_waitrequest, - fetch_readdatavalid, - input vram_word fetch_readdata, - output vram_addr fetch_address, - output logic fetch_read, - - input logic program_start, - input cmd_insn_ptr program_header_base, - input cmd_word program_header_size, - output logic running, - - input logic batch_end, - output vram_insn_addr batch_base, - output logic batch_start, - output cmd_word batch_length, - - input logic ready, - output logic valid, - output insn_word insn, - output logic clear_lanes -); - - localparam ENTRY_SIZE = 4; - - logic break_loop, entry_end, fifo_down_safe, fifo_empty, fifo_put, - header_continue, insn_read, insn_readdatavalid, insn_waitrequest; - - cmd_word header_count; - insn_word code_length, code_read_ptr, code_fetch_ptr, insn_readdata, entry_data[ENTRY_SIZE]; - vram_insn_addr code_base, insn_address, header_ptr; - logic[$clog2(ENTRY_SIZE - 1):0] entry_fetch_count, entry_read_count; - - enum int unsigned - { - IDLE, - HEADER, - LOOP, - FLUSH - } state; - - struct packed - { - insn_word insn; - logic clear_lanes; - } fifo_in, fifo_out; - - assign insn = fifo_out.insn; - assign clear_lanes = fifo_out.clear_lanes; - - assign entry_end = entry_read_count == ENTRY_SIZE - 1; - assign header_continue = header_count != 0; - assign break_loop = batch_end && (!insn_read || !insn_waitrequest); - - function vram_insn_addr base_from_word(insn_word in); - base_from_word = in[`GFX_INSN_SUBWORD_BITS +: $bits(vram_insn_addr)]; - endfunction - - assign code_base = base_from_word(entry_data[0]); - assign batch_base = base_from_word(entry_data[2]); - assign code_length = entry_data[1]; - assign batch_length = entry_data[3]; - - gfx_sp_widener #(.WIDTH($bits(vram_insn_addr))) insn_bus - ( - .wide_read(insn_read), - .wide_address(insn_address), - .wide_readdata(insn_readdata), - .wide_waitrequest(insn_waitrequest), - .wide_readdatavalid(insn_readdatavalid), - .word_read(fetch_read), - .word_address(fetch_address), - .word_readdata(fetch_readdata), - .word_waitrequest(fetch_waitrequest), - .word_readdatavalid(fetch_readdatavalid), - .* - ); - - gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_FETCH_FIFO_DEPTH)) insn_fifo - ( - .in(fifo_in), - .out(fifo_out), - .in_ready(), - .in_valid(fifo_put), - .out_ready(ready), - .out_valid(valid), - .* - ); - - gfx_fifo_overflow #(.DEPTH(`GFX_FETCH_FIFO_DEPTH)) overflow - ( - .down(insn_read && !insn_waitrequest), - .empty(fifo_empty), - .down_safe(fifo_down_safe), - .out_ready(ready), - .out_valid(valid), - .* - ); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - state <= IDLE; - running <= 0; - fifo_put <= 0; - insn_read <= 0; - batch_start <= 0; - end else unique case (state) - IDLE: - if (program_start) begin - state <= HEADER; - running <= 1; - insn_read <= 1; - end - - HEADER: begin - if (insn_read && !insn_waitrequest) - insn_read <= entry_fetch_count != ENTRY_SIZE - 1; - - if (insn_readdatavalid && entry_end) begin - state <= LOOP; - insn_read <= 1; - batch_start <= 1; - end - end - - LOOP: begin - fifo_put <= 0; - batch_start <= 0; - - if (!insn_read || !insn_waitrequest) - insn_read <= fifo_down_safe; - - if (break_loop) begin - state <= FLUSH; - insn_read <= 0; - end - - if (insn_readdatavalid) - fifo_put <= 1; - end - - FLUSH: begin - fifo_put <= 0; - - if (fifo_empty) begin - state <= header_continue ? HEADER : IDLE; - running <= header_continue; - insn_read <= header_continue; - end - end - endcase - - always_ff @(posedge clk) - unique case (state) - IDLE: - if (program_start) begin - header_ptr <= program_header_base.addr; - header_count <= program_header_size; - insn_address <= program_header_base.addr; - - entry_read_count <= 0; - entry_fetch_count <= 0; - end - - HEADER: begin - code_read_ptr <= 0; - code_fetch_ptr <= 0; - - if (!insn_waitrequest) begin - insn_address <= insn_address + 1; - entry_fetch_count <= entry_fetch_count + 1; - end - - if (insn_read && !insn_waitrequest) - header_ptr <= header_ptr + 1; - - if (insn_readdatavalid) begin - entry_read_count <= entry_read_count + 1; - - for (integer i = 0; i < ENTRY_SIZE - 1; ++i) - entry_data[i] <= entry_data[i + 1]; - - entry_data[ENTRY_SIZE - 1] <= insn_readdata; - - if (entry_end) - insn_address <= base_from_word(entry_data[1]); - end - end - - LOOP: begin - if (insn_read && !insn_waitrequest) begin - insn_address <= insn_address + 1; - code_fetch_ptr <= code_fetch_ptr + 1; - - if (code_fetch_ptr == code_length) begin - insn_address <= code_base; - code_fetch_ptr <= 0; - end - end - - if (insn_readdatavalid) begin - fifo_in.insn <= insn_readdata; - fifo_in.clear_lanes <= code_read_ptr == 0; - - code_read_ptr <= code_read_ptr + 1; - if (code_read_ptr == code_length) - code_read_ptr <= 0; - end - end - - FLUSH: - if (fifo_empty) begin - header_count <= header_count - 1; - insn_address <= header_ptr; - end - endcase - -endmodule diff --git a/rtl/gfx/gfx_sp_file.sv b/rtl/gfx/gfx_sp_file.sv deleted file mode 100644 index e98ee18..0000000 --- a/rtl/gfx/gfx_sp_file.sv +++ /dev/null @@ -1,32 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_file -( - input logic clk, - - input vreg_num rd_reg, - output vec4 rd_data, - - input logic wr, - input vreg_num wr_reg, - input vec4 wr_data -); - - vec4 file[`GFX_SP_REG_COUNT], hold_rd_data, hold_wr_data; - logic hold_wr; - vreg_num hold_rd_reg, hold_wr_reg; - - always_ff @(posedge clk) begin - hold_wr <= wr; - hold_wr_reg <= wr_reg; - hold_wr_data <= wr_data; - - rd_data <= hold_rd_data; - hold_rd_reg <= rd_reg; - hold_rd_data <= file[hold_rd_reg]; - - if (hold_wr) - file[hold_wr_reg] <= hold_wr_data; - end - -endmodule diff --git a/rtl/gfx/gfx_sp_isa.sv b/rtl/gfx/gfx_sp_isa.sv deleted file mode 100644 index 1420d95..0000000 --- a/rtl/gfx/gfx_sp_isa.sv +++ /dev/null @@ -1,23 +0,0 @@ -`ifndef GFX_SP_ISA_SV -`define GFX_SP_ISA_SV - -`include "gfx/gfx_defs.sv" - -`define GFX_INSN_OP_SELECT 32'b00000000_zzzz0zzz_0zzz0zzz_00000001 -`define GFX_INSN_OP_SWIZZL 32'bzzzzzzzz_00000000_0zzz0zzz_00000010 -`define GFX_INSN_OP_BROADC 32'bzzzzzzzz_zzzzzzzz_00000zzz_00000100 -`define GFX_INSN_OP_MATVEC 32'b00000000_00000zzz_0zzz0zzz_00001000 -`define GFX_INSN_OP_SEND 32'b00000000_00000000_0zzz0000_00010000 -`define GFX_INSN_OP_RECV 32'b00000000_00000000_00000zzz_00100000 - -`define GFX_INSN_DST [10:8] -`define GFX_INSN_SRC_A [14:12] -`define GFX_INSN_SRC_B [18:16] - -`define GFX_INSN_SELECT_MASK [23:20] - -`define GFX_INSN_SWIZZL_LANES [31:24] - -`define GFX_INSN_BROADC_IMM [31:16] - -`endif diff --git a/rtl/gfx/gfx_sp_issue.sv b/rtl/gfx/gfx_sp_issue.sv deleted file mode 100644 index 6934e39..0000000 --- a/rtl/gfx/gfx_sp_issue.sv +++ /dev/null @@ -1,111 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_issue -( - input logic clk, - rst_n, - - input insn_deco deco, - input logic deco_valid, - output logic deco_ready, - - output vreg_num rd_a_reg, - rd_b_reg, - - input logic stream_issue_ready, - output logic stream_issue_valid, - - input logic combiner_issue_ready, - output logic combiner_issue_valid, - - input logic shuffler_issue_ready, - output logic shuffler_issue_valid, - - input logic wr, - input vreg_num wr_reg -); - - /* Esto podría ser fully pipelined, pero no dio tiempo, y en - * todo caso no haría diferencia debido al pésimo ancho de banda. - */ - - logic data_hazard, rd_a_hazard, rd_b_hazard, wr_hazard, writing_a, writing_b, writing_dst, - busy[`GFX_SP_REG_COUNT]; - - enum int unsigned - { - IDLE, - HAZARDS, - ISSUE, - WAIT - } state; - - assign rd_a_reg = deco.src_a; - assign rd_b_reg = deco.src_b; - - assign wr_hazard = deco.writeback && writing_dst; - assign rd_a_hazard = deco.read_src_a && writing_a; - assign rd_b_hazard = deco.read_src_a && writing_b; - assign data_hazard = rd_a_hazard || rd_b_hazard || wr_hazard; - - assign deco_ready = (stream_issue_ready && stream_issue_valid) - || (combiner_issue_ready && combiner_issue_valid) - || (shuffler_issue_ready && shuffler_issue_valid); - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - state <= IDLE; - - stream_issue_valid <= 0; - combiner_issue_valid <= 0; - shuffler_issue_valid <= 0; - - for (integer i = 0; i < `GFX_SP_REG_COUNT; ++i) - busy[i] <= 0; - end else begin - unique case (state) - IDLE: - if (deco_valid) - state <= HAZARDS; - - HAZARDS: - if (!data_hazard) begin - state <= ISSUE; - if (deco.writeback) - busy[deco.dst] <= 1; - end - - ISSUE: begin - state <= WAIT; - - if (deco.ex.stream) - stream_issue_valid <= 1; - - if (deco.ex.combiner) - combiner_issue_valid <= 1; - - if (deco.ex.shuffler) - shuffler_issue_valid <= 1; - end - - WAIT: - if (deco_ready) begin - state <= IDLE; - - stream_issue_valid <= 0; - combiner_issue_valid <= 0; - shuffler_issue_valid <= 0; - end - endcase - - if (wr) - busy[wr_reg] <= 0; - end - - always_ff @(posedge clk) begin - writing_a <= busy[deco.src_a]; - writing_b <= busy[deco.src_b]; - writing_dst <= busy[deco.dst]; - end - -endmodule diff --git a/rtl/gfx/gfx_sp_regs.sv b/rtl/gfx/gfx_sp_regs.sv deleted file mode 100644 index 68aaf06..0000000 --- a/rtl/gfx/gfx_sp_regs.sv +++ /dev/null @@ -1,39 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_regs -( - input logic clk, - - input vreg_num rd_a_reg, - output mat4 rd_a_data, - - input vreg_num rd_b_reg, - output mat4 rd_b_data, - - input logic wr, - input vreg_num wr_reg, - input mat4 wr_data -); - - genvar i; - generate - for (i = 0; i < `GFX_SP_LANES; ++i) begin: lanes - gfx_sp_file a - ( - .rd_reg(rd_a_reg), - .rd_data(rd_a_data[i]), - .wr_data(wr_data[i]), - .* - ); - - gfx_sp_file b - ( - .rd_reg(rd_b_reg), - .rd_data(rd_b_data[i]), - .wr_data(wr_data[i]), - .* - ); - end - endgenerate - -endmodule diff --git a/rtl/gfx/gfx_sp_select.sv b/rtl/gfx/gfx_sp_select.sv deleted file mode 100644 index 46b23c9..0000000 --- a/rtl/gfx/gfx_sp_select.sv +++ /dev/null @@ -1,25 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_select -( - input logic clk, - - input vec4 a, - b, - input shuffler_deco deco, - input logic stall, - - output vec4 out -); - - always_ff @(posedge clk) - if (!stall) - for (integer i = 0; i < `FLOATS_PER_VEC; ++i) - if (deco.is_broadcast) - out[i] <= deco.imm; - else if (deco.select_mask[i]) - out[i] <= b[i]; - else - out[i] <= a[i]; - -endmodule diff --git a/rtl/gfx/gfx_sp_shuffler.sv b/rtl/gfx/gfx_sp_shuffler.sv deleted file mode 100644 index b813d03..0000000 --- a/rtl/gfx/gfx_sp_shuffler.sv +++ /dev/null @@ -1,70 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_shuffler -( - input logic clk, - rst_n, - - input mat4 a, - b, - input insn_deco deco, - input logic in_valid, - output logic in_ready, - - input logic wb_ready, - output logic wb_valid, - output wb_op wb -); - - mat4 select_out, swizzle_out; - wb_op wb_out; - logic stall, is_swizzle; - vreg_num hold_dst; - - gfx_pipeline_flow #(.STAGES(2)) flow - ( - .out_ready(wb_ready), - .out_valid(wb_valid), - .* - ); - - gfx_skid_buf #(.WIDTH($bits(wb))) skid - ( - .in(wb_out), - .out(wb), - .* - ); - - genvar gen_i; - generate - for (gen_i = 0; gen_i < `GFX_SP_LANES; ++gen_i) begin: lanes - gfx_sp_select select - ( - .a(a[gen_i]), - .b(b[gen_i]), - .out(select_out[gen_i]), - .deco(deco.shuffler), - .* - ); - - gfx_sp_swizzle swizzle - ( - .in(a[gen_i]), - .out(swizzle_out[gen_i]), - .deco(deco.shuffler), - .* - ); - end - endgenerate - - always_ff @(posedge clk) - if (!stall) begin - hold_dst <= deco.dst; - is_swizzle <= deco.shuffler.is_swizzle; - - wb_out.dst <= hold_dst; - for (integer i = 0; i < `GFX_SP_LANES; ++i) - wb_out.data[i] <= is_swizzle ? swizzle_out[i] : select_out[i]; - end - -endmodule diff --git a/rtl/gfx/gfx_sp_stream.sv b/rtl/gfx/gfx_sp_stream.sv deleted file mode 100644 index 7901028..0000000 --- a/rtl/gfx/gfx_sp_stream.sv +++ /dev/null @@ -1,66 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_stream -( - input logic clk, - rst_n, - - input mat4 a, - input insn_deco deco, - input logic in_valid, - output logic in_ready, - - input logic wb_ready, - output logic wb_valid, - output wb_op wb, - - input lane_word recv_data, - input lane_mask recv_mask, - input logic recv_valid, - output logic recv_ready, - - input logic send_ready, - output logic send_valid, - output lane_word send_data, - output lane_mask send_mask -); - - logic active, recv; - vreg_num wb_reg; - - assign in_ready = !active; - assign recv_ready = active && recv && wb_ready; - - assign wb_valid = active && recv && recv_valid; - assign send_valid = active && !recv; - - assign wb.dst = wb_reg; - assign wb.data = recv_data; - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - active <= 0; - send_mask <= 0; - end else begin - if (!active) - active <= in_valid && (deco.writeback || |send_mask); - else if (recv) - active <= !wb_ready || !recv_valid; - else - active <= !send_ready; - - if (recv_ready && recv_valid) - send_mask <= send_mask & recv_mask; - - if (in_ready && in_valid && deco.clear_lanes) - send_mask <= {($bits(send_mask)){1'b1}}; - end - - always_ff @(posedge clk) - if (!active) begin - recv <= deco.writeback; - wb_reg <= deco.dst; - send_data <= a; - end - -endmodule diff --git a/rtl/gfx/gfx_sp_swizzle.sv b/rtl/gfx/gfx_sp_swizzle.sv deleted file mode 100644 index d07d934..0000000 --- a/rtl/gfx/gfx_sp_swizzle.sv +++ /dev/null @@ -1,19 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_swizzle -( - input logic clk, - - input vec4 in, - input shuffler_deco deco, - input logic stall, - - output vec4 out -); - - always_ff @(posedge clk) - if (!stall) - for (integer i = 0; i < `FLOATS_PER_VEC; ++i) - out[i] <= in[deco.swizzle_op[i]]; - -endmodule diff --git a/rtl/gfx/gfx_sp_widener.sv b/rtl/gfx/gfx_sp_widener.sv deleted file mode 100644 index 92101ca..0000000 --- a/rtl/gfx/gfx_sp_widener.sv +++ /dev/null @@ -1,63 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_widener -#(parameter WIDTH=0) // Quartus no soporta 'parameter type' -( - input logic clk, - rst_n, - - input logic word_waitrequest, - word_readdatavalid, - input vram_word word_readdata, - output vram_addr word_address, - output logic word_read, - - input logic wide_read, - input logic[WIDTH - 1:0] wide_address, - output logic wide_waitrequest, - wide_readdatavalid, - - output logic[DATA_WIDTH - 1:0] wide_readdata -); - - // Este módulo existe para fingir que la DE1-SoC tiene un bus de SDRAM más ancho - - localparam WIDE_BITS = $bits(vram_addr) - WIDTH, - WIDE_SIZE = 1 << WIDE_BITS, - DATA_WIDTH = $bits(vram_word) << WIDE_BITS; - - vram_word shift_in[WIDE_SIZE]; - logic[WIDE_BITS - 1:0] address_count, read_count; - - assign word_read = wide_read; - assign word_address = {wide_address, address_count}; - assign wide_waitrequest = word_waitrequest || !(&address_count); - - always_comb - for (integer i = 0; i < WIDE_SIZE; ++i) - wide_readdata[$bits(vram_word) * i +: $bits(vram_word)] = shift_in[i]; - - always_ff @(posedge clk or negedge rst_n) - if (!rst_n) begin - read_count <= 0; - address_count <= 0; - wide_readdatavalid <= 0; - end else begin - if (word_read && !word_waitrequest) - address_count <= address_count + 1; - - if (word_readdatavalid) - read_count <= read_count + 1; - - wide_readdatavalid <= word_readdatavalid && &read_count; - end - - always_ff @(posedge clk) - if (word_readdatavalid) begin - for (integer i = 0; i < WIDE_SIZE - 1; ++i) - shift_in[i] <= shift_in[i + 1]; - - shift_in[WIDE_SIZE - 1] <= word_readdata; - end - -endmodule diff --git a/rtl/gfx/gfx_sp_writeback.sv b/rtl/gfx/gfx_sp_writeback.sv deleted file mode 100644 index 1195910..0000000 --- a/rtl/gfx/gfx_sp_writeback.sv +++ /dev/null @@ -1,65 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_sp_writeback -( - input logic clk, - rst_n, - - input wb_op stream_wb, - input logic stream_wb_valid, - output logic stream_wb_ready, - - input wb_op combiner_wb, - input logic combiner_wb_valid, - output logic combiner_wb_ready, - - input wb_op shuffler_wb, - input logic shuffler_wb_valid, - output logic shuffler_wb_ready, - - output logic wr, - output vreg_num wr_reg, - output mat4 wr_data -); - - wb_op wb_in, wb_out; - - assign wr_reg = wb_out.dst; - assign wr_data = wb_out.data; - - gfx_pipeline_flow #(.STAGES(`GFX_SP_WB_STAGES)) flow - ( - .stall(), - .in_ready(), - .in_valid(stream_wb_valid || combiner_wb_valid || shuffler_wb_valid), - .out_ready(1), - .out_valid(wr), - .* - ); - - gfx_pipes #(.WIDTH($bits(wb_out)), .DEPTH(`GFX_SP_WB_STAGES)) pipes - ( - .in(wb_in), - .out(wb_out), - .stall(0), - .* - ); - - always_comb begin - stream_wb_ready = 0; - combiner_wb_ready = 0; - shuffler_wb_ready = 0; - - if (stream_wb_valid) begin - wb_in = stream_wb; - stream_wb_ready = 1; - end else if (shuffler_wb_valid) begin - wb_in = shuffler_wb; - shuffler_wb_ready = 1; - end else begin - wb_in = combiner_wb; - combiner_wb_ready = 1; - end - end - -endmodule diff --git a/rtl/gfx/gfx_transpose.sv b/rtl/gfx/gfx_transpose.sv deleted file mode 100644 index 03ecf2d..0000000 --- a/rtl/gfx/gfx_transpose.sv +++ /dev/null @@ -1,17 +0,0 @@ -`include "gfx/gfx_defs.sv" - -module gfx_transpose -( - input mat4 in, - output mat4 out -); - - integer i, j; - - // Esto no tiene costo en hardware, es un renombramiento de señales - always_comb - for (i = 0; i < `VECS_PER_MAT; ++i) - for (j = 0; j < `FLOATS_PER_VEC; ++j) - out[i][j] = in[j][i]; - -endmodule diff --git a/rtl/gfx/mod.mk b/rtl/gfx/mod.mk deleted file mode 100644 index 4e0f46d..0000000 --- a/rtl/gfx/mod.mk +++ /dev/null @@ -1,5 +0,0 @@ -define core - $(this)/deps := config - $(this)/rtl_dirs := . - $(this)/rtl_top := gfx -endef diff --git a/rtl/legacy_gfx/gfx.sv b/rtl/legacy_gfx/gfx.sv new file mode 100644 index 0000000..73e5dbf --- /dev/null +++ b/rtl/legacy_gfx/gfx.sv @@ -0,0 +1,208 @@ +`include "gfx/gfx_defs.sv" + +module gfx +( + input logic clk, + rst_n, + + input cmd_addr cmd_address, + input logic cmd_read, + cmd_write, + input cmd_word cmd_writedata, + output cmd_word cmd_readdata, + + input logic mem_waitrequest, + mem_readdatavalid, + input vram_word mem_readdata, + output vram_byte_addr mem_address, + output logic mem_read, + mem_write, + output vram_word mem_writedata, + + input vram_addr host_address, + input logic host_read, + host_write, + input vram_word host_writedata, + output logic host_waitrequest, + host_readdatavalid, + output vram_word host_readdata, + + input logic scan_ready, + output logic scan_valid, + scan_endofpacket, + scan_startofpacket, + output rgb30 scan_data +); + + logic enable_clear, program_start, start_clear, swap_buffers; + rgb24 clear_color; + cmd_word fb_base_a, fb_base_b, program_header_base, program_header_size; + + gfx_cmd cmd + ( + .* + ); + + logic batch_read, fetch_read, running, send_valid; + lane_word send_data; + lane_mask send_mask; + vram_addr batch_address, fetch_address; + + gfx_sp sp + ( + .* + ); + + logic send_ready, assembly_valid; + fp_xyzw assembly_vertex_a, assembly_vertex_b, assembly_vertex_c; + + gfx_assembly assembly + ( + .out_ready(fix_ready), + .out_valid(assembly_valid), + .out_vertex_a(assembly_vertex_a), + .out_vertex_b(assembly_vertex_b), + .out_vertex_c(assembly_vertex_c), + .* + ); + + logic fix_ready, fix_valid; + raster_xyzw fix_vertex_a, fix_vertex_b, fix_vertex_c; + + gfx_fix_floats fix + ( + .in_ready(fix_ready), + .in_valid(assembly_valid), + .out_ready(persp_ready), + .out_valid(fix_valid), + .in_vertex_a(assembly_vertex_a), + .in_vertex_b(assembly_vertex_b), + .in_vertex_c(assembly_vertex_c), + .out_vertex_a(fix_vertex_a), + .out_vertex_b(fix_vertex_b), + .out_vertex_c(fix_vertex_c), + .* + ); + + logic persp_ready, persp_valid; + raster_xyzw persp_vertex_a, persp_vertex_b, persp_vertex_c; + + gfx_persp perspective + ( + .in_ready(persp_ready), + .in_valid(fix_valid), + .out_ready(raster_ready), + .out_valid(persp_valid), + .in_vertex_a(fix_vertex_a), + .in_vertex_b(fix_vertex_b), + .in_vertex_c(fix_vertex_c), + .out_vertex_a(persp_vertex_a), + .out_vertex_b(persp_vertex_b), + .out_vertex_c(persp_vertex_c), + .* + ); + + logic raster_ready; + fixed_tri raster_ws; + bary_lanes barys; + paint_lanes raster_valid; + frag_xy_lanes fragments; + + gfx_raster raster + ( + .ws(raster_ws), + .in_ready(raster_ready), + .in_valid(persp_valid), + .out_ready(funnel_ready), + .out_valid(raster_valid), + + .vertex_a(persp_vertex_a), + .vertex_b(persp_vertex_b), + .vertex_c(persp_vertex_c), + + .* + ); + + logic frag_mask, scan_mask; + vram_addr frag_base, scan_base; + + gfx_masks masks + ( + .frag_mask_read_addr(), + .* + ); + + logic frag_mask_set, frag_mask_write, frag_wait; + linear_coord frag_mask_write_addr; + + gfx_clear clear + ( + .* + ); + + logic funnel_ready, funnel_valid; + frag_xy frag; + fixed_tri frag_bary, frag_ws; + + gfx_funnel funnel + ( + .in_ready(funnel_ready), + .in_valid(raster_valid), + .out_ready(frag_ready), + .out_valid(funnel_valid), + .* + ); + + logic frag_ready, frag_valid; + frag_paint frag_out; + + gfx_frag frag_ + ( + .out(frag_out), + + .ws(frag_ws), + .bary(frag_bary), + .in_ready(frag_ready), + .in_valid(funnel_valid), + .out_ready(rop_ready), + .out_valid(frag_valid), + .* + ); + + logic rop_mask_assert, rop_ready, rop_write; + vram_addr rop_address; + vram_word rop_writedata; + linear_coord rop_mask_addr; + + gfx_rop rop + ( + .in(frag_out), + .in_ready(rop_ready), + .in_valid(frag_valid), + .mask_addr(rop_mask_addr), + .mask_assert(rop_mask_assert), + .* + ); + + logic batch_readdatavalid, fb_readdatavalid, fetch_readdatavalid, + batch_waitrequest, fb_waitrequest, fetch_waitrequest, rop_waitrequest; + + vram_word batch_readdata, fb_readdata, fetch_readdata; + + gfx_mem mem + ( + .* + ); + + logic fb_read, vsync; + vram_addr fb_address; + linear_coord scan_mask_addr; + + gfx_scanout scanout + ( + .mask(scan_mask), + .mask_addr(scan_mask_addr), + .* + ); + +endmodule diff --git a/rtl/legacy_gfx/gfx_assembly.sv b/rtl/legacy_gfx/gfx_assembly.sv new file mode 100644 index 0000000..1a909be --- /dev/null +++ b/rtl/legacy_gfx/gfx_assembly.sv @@ -0,0 +1,89 @@ +`include "gfx/gfx_defs.sv" + +module gfx_assembly +( + input logic clk, + rst_n, + + input lane_word send_data, + input lane_mask send_mask, + input logic send_valid, + output logic send_ready, + + input logic out_ready, + output logic out_valid, + output fp_xyzw out_vertex_a, + out_vertex_b, + out_vertex_c +); + + localparam SETS_PER_TRI = 6; + + mat4 sets[SETS_PER_TRI]; + logic assemble_next, permit_out; + lane_mask current_mask, next_mask; + logic[1:0] out_lane; + logic[2:0] set_num; + + enum int unsigned + { + GET_LANES, + ASSEMBLE + } state; + + assign out_valid = permit_out && current_mask[out_lane]; + assign out_vertex_a = sets[0][out_lane]; + assign out_vertex_b = sets[2][out_lane]; + assign out_vertex_c = sets[4][out_lane]; + + assign next_mask = current_mask & send_mask; + assign assemble_next = !current_mask[out_lane] || out_ready; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + state <= GET_LANES; + set_num <= 0; + out_lane <= 0; + permit_out <= 0; + send_ready <= 1; + current_mask <= {($bits(current_mask)){1'b1}}; + end else unique case (state) + GET_LANES: + if (send_valid) begin + set_num <= set_num + 1; + current_mask <= next_mask; + + if (set_num == SETS_PER_TRI - 1) begin + state <= ASSEMBLE; + permit_out <= 1; + send_ready <= 0; + end + + if (!(|next_mask)) begin + state <= GET_LANES; + set_num <= 0; + current_mask <= {($bits(current_mask)){1'b1}}; + end + end + + ASSEMBLE: + if (assemble_next) begin + out_lane <= out_lane + 1; + if (&out_lane) begin + state <= GET_LANES; + permit_out <= 0; + send_ready <= 1; + end + end + endcase + + always_ff @(posedge clk) + unique case (state) + GET_LANES: + if (send_valid) + sets[set_num] <= send_data; + + ASSEMBLE: ; + endcase + +endmodule diff --git a/rtl/legacy_gfx/gfx_clear.sv b/rtl/legacy_gfx/gfx_clear.sv new file mode 100644 index 0000000..ae9a20c --- /dev/null +++ b/rtl/legacy_gfx/gfx_clear.sv @@ -0,0 +1,70 @@ +`include "gfx/gfx_defs.sv" + +module gfx_clear +( + input logic clk, + rst_n, + + input logic start_clear, + + input linear_coord rop_mask_addr, + input logic rop_mask_assert, + output logic frag_wait, + + output logic frag_mask_set, + frag_mask_write, + output linear_coord frag_mask_write_addr +); + + enum int unsigned + { + FRAG, + CLEAR + } state; + + logic end_clear; + + assign end_clear = frag_mask_write_addr == `GFX_LINEAR_RES - 1; + + always_comb + unique case (state) + FRAG: frag_wait = start_clear; + CLEAR: frag_wait = 1; + endcase + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + state <= FRAG; + frag_mask_write <= 0; + end else unique case (state) + FRAG: begin + frag_mask_write <= rop_mask_assert; + + if (start_clear) begin + state <= CLEAR; + frag_mask_write <= 1; + end + end + + CLEAR: + if (end_clear) begin + state <= FRAG; + frag_mask_write <= 0; + end + endcase + + always_ff @(posedge clk) + unique case (state) + FRAG: begin + frag_mask_set <= !start_clear; + frag_mask_write_addr <= rop_mask_addr; + + if (start_clear) + frag_mask_write_addr <= 0; + end + + CLEAR: + frag_mask_write_addr <= frag_mask_write_addr + 1; + endcase + +endmodule diff --git a/rtl/legacy_gfx/gfx_cmd.sv b/rtl/legacy_gfx/gfx_cmd.sv new file mode 100644 index 0000000..29b6e21 --- /dev/null +++ b/rtl/legacy_gfx/gfx_cmd.sv @@ -0,0 +1,125 @@ +`include "gfx/gfx_defs.sv" + +module gfx_cmd +( + input logic clk, + rst_n, + + input cmd_addr cmd_address, + input logic cmd_read, + cmd_write, + input cmd_word cmd_writedata, + output cmd_word cmd_readdata, + + input logic vsync, + + output logic swap_buffers, + enable_clear, + start_clear, + output rgb24 clear_color, + + output logic program_start, + output cmd_word program_header_base, + program_header_size, + + output cmd_word fb_base_a, + fb_base_b +); + + rgb24 next_clear_color; + logic do_start_clear, next_start_clear, next_enable_clear, next_swap_buffers; + + struct packed + { + logic[4:0] mbz; + logic start_frame, + enable_clear, + swap_buffers; + rgb24 clear_color; + } readdata_scan, writedata_scan; + + assign cmd_readdata = readdata_scan; + + assign writedata_scan = cmd_writedata; + assign readdata_scan.mbz = 0; + assign readdata_scan.clear_color = clear_color; + assign readdata_scan.enable_clear = enable_clear; + assign readdata_scan.swap_buffers = swap_buffers; + + assign do_start_clear = writedata_scan.start_frame && writedata_scan.enable_clear; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + start_clear <= 0; + enable_clear <= 0; + swap_buffers <= 0; + + next_start_clear <= 0; + next_enable_clear <= 0; + next_swap_buffers <= 0; + + program_start <= 0; + + fb_base_a <= 0; + fb_base_b <= 0; + end else begin + start_clear <= 0; + program_start <= 0; + + if (vsync) begin + start_clear <= next_start_clear; + enable_clear <= next_enable_clear; + swap_buffers <= next_swap_buffers; + + next_start_clear <= 0; + end + + if (cmd_write) + unique case (cmd_address[2:0]) + `GFX_CMD_REG_ID: ; + + `GFX_CMD_REG_SCAN: begin + next_enable_clear <= writedata_scan.enable_clear; + next_swap_buffers <= writedata_scan.swap_buffers; + + if (!next_start_clear) + next_start_clear <= do_start_clear; + end + + `GFX_CMD_REG_HEADER_BASE: ; + + `GFX_CMD_REG_HEADER_SIZE: + program_start <= 1; + + `GFX_CMD_REG_FB_BASE_A: + fb_base_a <= cmd_writedata; + + `GFX_CMD_REG_FB_BASE_B: + fb_base_b <= cmd_writedata; + + default: ; + endcase + end + + always_ff @(posedge clk) begin + if (vsync) + clear_color <= next_clear_color; + + if (cmd_write) + unique case (cmd_address[2:0]) + `GFX_CMD_REG_ID: ; + + `GFX_CMD_REG_SCAN: + next_clear_color <= writedata_scan.clear_color; + + `GFX_CMD_REG_HEADER_BASE: + program_header_base <= cmd_writedata; + + `GFX_CMD_REG_HEADER_SIZE: + program_header_size <= cmd_writedata; + + default: ; + endcase + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_defs.sv b/rtl/legacy_gfx/gfx_defs.sv new file mode 100644 index 0000000..1e7a335 --- /dev/null +++ b/rtl/legacy_gfx/gfx_defs.sv @@ -0,0 +1,267 @@ +`ifndef GFX_DEFS_SV +`define GFX_DEFS_SV + +// Esto es arquitectural, no se puede ajustar sin cambiar otras cosas +`define FLOAT_BITS 16 +`define FLOATS_PER_VEC 4 +`define VECS_PER_MAT 4 + +// Target de 200MHz (reloj es 143MHz) con float16, rounding (muy) aproximado +`define FP_ADD_STAGES 10 // ~401 LUTs +`define FP_MUL_STAGES 5 // ~144 LUTs ~1 bloque DSP +`define FP_FIX_STAGES 5 // ~313 LUTs + +typedef logic[`FLOAT_BITS - 1:0] fp; +typedef fp[1:0] vec2; +typedef fp[`FLOATS_PER_VEC - 1:0] vec4; +typedef vec4[`VECS_PER_MAT - 1:0] mat4; + +`define FP_UNIT 16'h3c00 + +typedef logic[1:0] index4; + +`define INDEX4_MIN 2'b00 +`define INDEX4_MAX 2'b11 + +typedef logic[8:0] x_coord; +typedef logic[9:0] y_coord; +typedef logic[9:0] xy_coord; +typedef logic[18:0] linear_coord; +typedef logic[19:0] half_coord; + +`define GFX_X_RES 640 +`define GFX_Y_RES 480 +`define GFX_LINEAR_RES (`GFX_X_RES * `GFX_Y_RES) + +`define COLOR_CHANNELS 4 + +typedef logic[7:0] color8; +typedef logic[9:0] color10; + +typedef struct packed +{ + color8 r, g, b; +} rgb24; + +typedef struct packed +{ + color10 r, g, b; +} rgb30; + +typedef struct packed +{ + color8 a, r, g, b; +} rgb32; + +`define FIXED_FRAC 16 + +`define FIXED_DIV_PIPES 2 +`define FIXED_DIV_STAGES (`FIXED_DIV_PIPES + $bits(fixed) + `FIXED_FRAC) +`define FIXED_FMA_STAGES 5 +`define FIXED_FMA_DOT_STAGES (2 * `FIXED_FMA_STAGES) +`define LERP_STAGES `FIXED_FMA_DOT_STAGES + +typedef logic signed[31:0] fixed; +typedef fixed[2:0] fixed_tri; + +`define EDGE_P0_TO_P1 0 +`define EDGE_P1_TO_P2 1 +`define EDGE_P2_TO_P0 2 + +typedef struct packed +{ + fixed x, y; +} raster_xy; + +typedef struct packed +{ + fixed z, w; +} raster_zw; + +typedef struct packed +{ + raster_xy xy; + raster_zw zw; +} raster_xyzw; + +typedef struct packed +{ + fp x, y, z, w; +} fp_xyzw; + +typedef logic[8:0] coarse_dim; + +`define GFX_MASK_SRAM_STAGES 3 +`define GFX_MASK_STAGES (1 + `GFX_MASK_SRAM_STAGES + 1) + +`define GFX_SCANOUT_FIFO_DEPTH 16 // Ajustable + +`define GFX_SETUP_BOUNDS_STAGES 3 +`define GFX_SETUP_EDGE_STAGES (1 + `FIXED_FMA_DOT_STAGES) +`define GFX_SETUP_OFFSETS_STAGES 2 +`define GFX_SETUP_STAGES (`GFX_SETUP_BOUNDS_STAGES \ + + `GFX_SETUP_EDGE_STAGES \ + + `GFX_SETUP_OFFSETS_STAGES) + +`define GFX_FINE_STAGES 2 + +`define GFX_RASTER_BITS 1 // Solía ser 2, pero la FPGA no da para tanto +`define GFX_RASTER_SUB_BITS 4 +`define GFX_RASTER_PAD_BITS ($bits(fixed) - $bits(coarse_dim) - `FIXED_FRAC - `GFX_RASTER_BITS) +`define GFX_RASTER_SIZE (1 << `GFX_RASTER_BITS) +`define GFX_RASTER_OFFSETS (1 << (2 * `GFX_RASTER_BITS)) + +typedef struct packed +{ + logic[`GFX_RASTER_SUB_BITS - 1:0] num; + logic[`FIXED_FRAC - `GFX_RASTER_SUB_BITS - 1:0] prec; +} raster_sub; + +typedef struct packed +{ + logic sign; + logic[`GFX_RASTER_PAD_BITS - 1:0] padding; + logic[$bits(coarse_dim) - 2:0] coarse; + logic[`GFX_RASTER_BITS - 1:0] fine; + raster_sub sub; +} raster_prec; + +typedef struct packed +{ + raster_prec x, y; +} raster_xy_prec; + +typedef fixed[`GFX_RASTER_OFFSETS - 1:0] raster_offsets; +typedef raster_offsets[2:0] raster_offsets_tri; + +`define GFX_FINE_LANES (`GFX_RASTER_SIZE * `GFX_RASTER_SIZE) + +typedef struct packed +{ + xy_coord x, y; +} frag_xy; + +typedef frag_xy[`GFX_FINE_LANES - 1:0] frag_xy_lanes; +typedef logic[`GFX_FINE_LANES - 1:0] paint_lanes; +typedef fixed[`COLOR_CHANNELS - 1:0] color_lerp_lanes; +typedef fixed_tri[`GFX_FINE_LANES - 1:0] bary_lanes; + +typedef struct packed +{ + linear_coord addr; + rgb32 color; +} frag_paint; + +`define GFX_FRAG_ADDR_STAGES 3 +`define GFX_FRAG_BARY_STAGES (`FIXED_DIV_STAGES + 2 + `FIXED_DIV_STAGES) +`define GFX_FRAG_SHADE_STAGES (`LERP_STAGES + 1) +`define GFX_FRAG_STAGES (`GFX_FRAG_BARY_STAGES + `GFX_FRAG_SHADE_STAGES) + +`define GFX_MEM_WORD_ADDR_BITS 25 +`define GFX_MEM_DATA_BITS 16 // No puedo hacer nada al respecto +`define GFX_MEM_SUBWORD_BITS ($clog2(`GFX_MEM_DATA_BITS / 8)) +`define GFX_MEM_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS + `GFX_MEM_SUBWORD_BITS) +`define GFX_MEM_RESPONSE_DEPTH 2 // Ajustar +`define GFX_MEM_TRANS_DEPTH 4 // NO TOCAR, ver `GFX_MEM_MAX_PENDING_READS +`define GFX_MEM_DISPATCH_DEPTH 8 // Nótese que platform.vram_0.s1.maximumPendingReadTransactions = 7 + +// NO TOCAR. Esto debe coincidir perfectamente con gfx_hw.tcl +`define GFX_VRAM_MAX_PENDING_READS 7 // platform.vram_0.s1.maximumPendingReadTransactions +`define GFX_MEM_MAX_PENDING_READS (1 + `GFX_MEM_TRANS_DEPTH + 1 + `GFX_VRAM_MAX_PENDING_READS) + +typedef logic[`GFX_MEM_DATA_BITS - 1:0] vram_word; +typedef logic[`GFX_MEM_ADDR_BITS - 1:0] vram_byte_addr; +typedef logic[`GFX_MEM_WORD_ADDR_BITS - 1:0] vram_addr; + +`define GFX_INSN_BITS 32 +`define GFX_INSN_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_INSN_BITS / `GFX_MEM_DATA_BITS)) +`define GFX_INSN_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_INSN_ADDR_BITS) +`define GFX_LANE_BITS $bits(mat4) +`define GFX_LANE_ADDR_BITS (`GFX_MEM_WORD_ADDR_BITS - $clog2(`GFX_LANE_BITS / `GFX_MEM_DATA_BITS)) +`define GFX_LANE_SUBWORD_BITS (`GFX_MEM_ADDR_BITS - `GFX_LANE_ADDR_BITS) +`define GFX_INSN_BITS_IN_LANE (`GFX_LANE_SUBWORD_BITS - `GFX_INSN_SUBWORD_BITS) + +typedef logic[`GFX_INSN_BITS - 1:0] insn_word; +typedef logic[`GFX_LANE_BITS - 1:0] lane_word; +typedef logic[`GFX_INSN_ADDR_BITS - 1:0] vram_insn_addr; +typedef logic[`GFX_LANE_ADDR_BITS - 1:0] vram_lane_addr; + +typedef logic[5:0] cmd_addr; +typedef logic[31:0] cmd_word; + +`define GFX_CMD_REG_ID 3'b000 +`define GFX_CMD_REG_SCAN 3'b001 +`define GFX_CMD_REG_HEADER_BASE 3'b010 +`define GFX_CMD_REG_HEADER_SIZE 3'b011 +`define GFX_CMD_REG_FB_BASE_A 3'b100 +`define GFX_CMD_REG_FB_BASE_B 3'b101 + +typedef struct packed +{ + logic[$bits(cmd_word) - $bits(vram_insn_addr) - `GFX_INSN_SUBWORD_BITS - 1:0] pad; + vram_insn_addr addr; + logic[`GFX_INSN_SUBWORD_BITS - 1:0] sub; +} cmd_insn_ptr; + +typedef struct packed +{ + logic[$bits(cmd_word) - $bits(vram_lane_addr) - `GFX_LANE_SUBWORD_BITS - 1:0] pad; + vram_lane_addr addr; + logic[`GFX_LANE_SUBWORD_BITS - 1:0] sub; +} cmd_lane_ptr; + +`define GFX_FETCH_FIFO_DEPTH 8 + +`define GFX_BATCH_FIFO_DEPTH 4 +`define GFX_SP_LANES `VECS_PER_MAT + +typedef logic[`GFX_SP_LANES - 1:0] lane_mask; +typedef logic[`FLOATS_PER_VEC - 1:0] vec_mask; + +typedef logic[`FLOATS_PER_VEC - 1:0][$clog2(`FLOATS_PER_VEC) - 1:0] swizzle_lanes; + +`define GFX_SP_REG_BITS 3 +`define GFX_SP_REG_COUNT (1 << `GFX_SP_REG_BITS) + +typedef logic[`GFX_SP_REG_BITS - 1:0] vreg_num; + +typedef struct packed +{ + logic stream, + combiner, + shuffler; +} ex_units; + +typedef struct packed +{ + logic is_swizzle, + is_broadcast; + fp imm; + vec_mask select_mask; + swizzle_lanes swizzle_op; +} shuffler_deco; + +typedef struct packed +{ + logic writeback, + read_src_a, + read_src_b, + clear_lanes; + vreg_num dst, + src_a, + src_b; + ex_units ex; + shuffler_deco shuffler; +} insn_deco; // "insn_decode" ya existe en core, esto es confuso pero lo hice por tiempo + +typedef struct packed +{ + vreg_num dst; + mat4 data; +} wb_op; + +`define GFX_SP_COMBINER_FIFO_DEPTH 4 // TODO: optimizar esto + +`define GFX_SP_WB_STAGES 2 + +`endif diff --git a/rtl/legacy_gfx/gfx_dot.sv b/rtl/legacy_gfx/gfx_dot.sv new file mode 100644 index 0000000..9c21c23 --- /dev/null +++ b/rtl/legacy_gfx/gfx_dot.sv @@ -0,0 +1,49 @@ +`include "gfx/gfx_defs.sv" + +module gfx_dot +( + input logic clk, + + input logic stall_mul, + stall_fold, + feedback, + feedback_last, + + input vec4 a, + b, + + output fp q +); + + vec4 products_fold, products_mul; + + gfx_fold fold + ( + .vec(products_fold), + .stall(stall_fold), + .* + ); + + genvar i; + generate + for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: entries + gfx_fp_mul entry_i + ( + .a(a[i]), + .b(b[i]), + .q(products_mul[i]), + .stall(stall_mul), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(fp))) skid_i + ( + .in(products_mul[i]), + .out(products_fold[i]), + .stall(stall_mul), + .* + ); + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_fifo.sv b/rtl/legacy_gfx/gfx_fifo.sv new file mode 100644 index 0000000..e9fa8f5 --- /dev/null +++ b/rtl/legacy_gfx/gfx_fifo.sv @@ -0,0 +1,98 @@ +module gfx_fifo +#(parameter WIDTH=0, DEPTH=0) +( + input logic clk, + rst_n, + + input logic[WIDTH - 1:0] in, + input logic in_valid, + output logic in_ready, + + input logic out_ready, + output logic out_valid, + output logic[WIDTH - 1:0] out +); + + logic do_read, do_write, full_if_eq, in_stall, out_stall, + may_read, may_write, read, read_ok, write; + + logic[WIDTH - 1:0] fifo[DEPTH], read_data, write_data; + logic[$clog2(DEPTH) - 1:0] read_ptr, write_ptr; + + assign do_read = read && may_read; + assign do_write = write && may_write; + + always_comb begin + may_read = full_if_eq; + may_write = !full_if_eq; + + if (read) + may_write = 1; + + if (read_ptr != write_ptr) begin + may_read = 1; + may_write = 1; + end + end + + gfx_skid_flow in_flow + ( + .stall(in_stall), + .out_ready(may_write), + .out_valid(write), + .* + ); + + gfx_skid_flow out_flow + ( + .stall(out_stall), + .in_ready(read), + .in_valid(read_ok), + .* + ); + + gfx_skid_buf #(.WIDTH(WIDTH)) in_skid + ( + .out(write_data), + .stall(in_stall), + .* + ); + + gfx_skid_buf #(.WIDTH(WIDTH)) out_skid + ( + .in(read_data), + .stall(out_stall), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + read_ok <= 0; + read_ptr <= 0; + write_ptr <= 0; + full_if_eq <= 0; + end else begin + if (!out_stall) + read_ok <= read && may_read; + + if (do_read) + read_ptr <= read_ptr + 1; + + if (do_write) + write_ptr <= write_ptr + 1; + + if (do_read && !do_write) + full_if_eq <= 0; + else if (!do_read && do_write) + full_if_eq <= 1; + end + + always_ff @(posedge clk) begin + if (!out_stall) + read_data <= fifo[read_ptr]; + + if (may_write) + fifo[write_ptr] <= write_data; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_fifo_overflow.sv b/rtl/legacy_gfx/gfx_fifo_overflow.sv new file mode 100644 index 0000000..c9cb3de --- /dev/null +++ b/rtl/legacy_gfx/gfx_fifo_overflow.sv @@ -0,0 +1,34 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fifo_overflow +#(parameter DEPTH=0) +( + input logic clk, + rst_n, + + input logic down, + out_ready, + out_valid, + + output logic empty, + down_safe +); + + logic up; + logic[$clog2(DEPTH + 1) - 1:0] pending; + + assign up = out_ready && out_valid; + assign empty = pending == 0; + assign down_safe = up || pending < DEPTH - 1; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + pending <= 0; + else begin + if (up && !down) + pending <= pending - 1; + else if (!up && down) + pending <= pending + 1; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_fix_floats.sv b/rtl/legacy_gfx/gfx_fix_floats.sv new file mode 100644 index 0000000..fe3ab21 --- /dev/null +++ b/rtl/legacy_gfx/gfx_fix_floats.sv @@ -0,0 +1,49 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fix_floats +( + input logic clk, + rst_n, + + input vec4 in_vertex_a, + in_vertex_b, + in_vertex_c, + input logic in_valid, + output logic in_ready, + + input logic out_ready, + output logic out_valid, + output raster_xyzw out_vertex_a, + out_vertex_b, + out_vertex_c +); + + logic stall; + + gfx_pipeline_flow #(.STAGES(`FP_FIX_STAGES + 1)) flow + ( + .* + ); + + gfx_fix_vertex fix_a + ( + .in_vertex(in_vertex_a), + .out_vertex(out_vertex_a), + .* + ); + + gfx_fix_vertex fix_b + ( + .in_vertex(in_vertex_b), + .out_vertex(out_vertex_b), + .* + ); + + gfx_fix_vertex fix_c + ( + .in_vertex(in_vertex_c), + .out_vertex(out_vertex_c), + .* + ); + +endmodule diff --git a/rtl/legacy_gfx/gfx_fix_vertex.sv b/rtl/legacy_gfx/gfx_fix_vertex.sv new file mode 100644 index 0000000..728f3b6 --- /dev/null +++ b/rtl/legacy_gfx/gfx_fix_vertex.sv @@ -0,0 +1,64 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fix_vertex +( + input logic clk, + + input vec4 in_vertex, + input logic stall, + + output raster_xyzw out_vertex +); + + fixed x, y; + raster_xyzw fixed_vertex, corrected; + fixed[`FLOATS_PER_VEC - 1:0] fixed_vals, corrected_vals, skid_vals; + + assign out_vertex = skid_vals; + assign fixed_vertex = fixed_vals; + assign corrected_vals = corrected; + + assign x = fixed_vertex.xy.x; + assign y = fixed_vertex.xy.y; + + genvar i; + generate + for (i = 0; i < `FLOATS_PER_VEC; ++i) begin: components + gfx_fp_fix fix + ( + .in(in_vertex[i]), + .out(fixed_vals[i]), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(fixed))) skid + ( + .in(corrected_vals[i]), + .out(skid_vals[i]), + .* + ); + end + endgenerate + + always_ff @(posedge clk) + if (!stall) begin + /* x * `GFX_X_RES / 2 + * = x * 320 + * = x * 64 * 5 + * = (x * 5) << 6 + * = (x * (4 + 1)) << 6 + * = ((x << 2) + x) << 6 + * + * y * `GFX_Y_RES / 2 + * = y * 240 + * = y * 16 * 15 + * = (y * 15) << 4 + * = (y * (16 - 1)) << 4 + * = ((y << 4) - y) << 4 + */ + corrected.zw <= fixed_vertex.zw; + corrected.xy.x <= ((x << 2) + x) << 6; + corrected.xy.y <= ((y << 4) - y) << 4; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_fixed_div.sv b/rtl/legacy_gfx/gfx_fixed_div.sv new file mode 100644 index 0000000..e562072 --- /dev/null +++ b/rtl/legacy_gfx/gfx_fixed_div.sv @@ -0,0 +1,77 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fixed_div +( + input logic clk, + + input fixed z, + d, + input logic stall, + + output fixed q +); + + localparam DIV_BITS = `FIXED_FRAC + $bits(fixed); + + fixed d_hold, z_hold; + logic signed[DIV_BITS - 1:0] z_int, q_int; + + assign q = q_int[$bits(q) - 1:0]; + assign z_int = {z_hold, {`FIXED_FRAC{1'b0}}}; + +`ifndef VERILATOR + lpm_divide div + ( + .aclr(0), + .clock(clk), + .clken(!stall), + .numer(z_int), + .denom(d_hold), + .remain(), + .quotient(q_int) + ); + + defparam + div.lpm_widthn = DIV_BITS, + div.lpm_widthd = $bits(fixed), + div.lpm_nrepresentation = "SIGNED", + div.lpm_drepresentation = "SIGNED", + div.lpm_pipeline = `FIXED_DIV_STAGES - `FIXED_DIV_PIPES, + div.maximize_speed = 6; + + gfx_pipes #(.WIDTH($bits(z)), .DEPTH(`FIXED_DIV_PIPES)) z_pipes + ( + .in(z), + .out(z_hold), + .* + ); + + gfx_pipes #(.WIDTH($bits(d)), .DEPTH(`FIXED_DIV_PIPES)) d_pipes + ( + .in(d), + .out(d_hold), + .* + ); +`else + logic signed[DIV_BITS - 1:0] d_int_hold, z_int_hold; + + assign q_int = z_int_hold / d_int_hold; + assign z_hold = z; + assign d_int_hold = {{`FIXED_FRAC{d_hold[$bits(d_hold) - 1]}}, d_hold}; + + gfx_pipes #(.WIDTH($bits(z_int)), .DEPTH(`FIXED_DIV_STAGES)) z_int_pipes + ( + .in(z_int), + .out(z_int_hold), + .* + ); + + gfx_pipes #(.WIDTH($bits(d)), .DEPTH(`FIXED_DIV_STAGES)) d_pipes + ( + .in(d), + .out(d_hold), + .* + ); +`endif + +endmodule diff --git a/rtl/legacy_gfx/gfx_fixed_fma.sv b/rtl/legacy_gfx/gfx_fixed_fma.sv new file mode 100644 index 0000000..ec26477 --- /dev/null +++ b/rtl/legacy_gfx/gfx_fixed_fma.sv @@ -0,0 +1,73 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fixed_fma +( + input logic clk, + + input fixed a, + b, + c, + input logic stall, + + output fixed q +); + +`ifndef VERILATOR + logic[2 * $bits(fixed) - `FIXED_FRAC - 1:0] q_ext; + assign q = q_ext[$bits(fixed) - 1:0]; + + lpm_mult mult + ( + .aclr(0), + .clock(clk), + .clken(!stall), + + .sum({c, {`FIXED_FRAC{1'b0}}}), + .dataa(a), + .datab(b), + .result(q_ext) + ); + + defparam + mult.lpm_widtha = $bits(fixed), + mult.lpm_widthb = $bits(fixed), + mult.lpm_widths = $bits(fixed) + `FIXED_FRAC, + /* Esto es crucial. No está documentado en ningún lado (aparte de un + * comentario en r/fpga). Si lpm_widthp < lpm_widtha + lpm_widthb, + * entonces result contiene los lpm_widthp bits más significativos + * del producto, no los menos significativos como tendría sentido. + */ + mult.lpm_widthp = 2 * $bits(fixed) - `FIXED_FRAC, + mult.lpm_representation = "SIGNED", + mult.lpm_pipeline = `FIXED_FMA_STAGES; +`else + logic[$bits(fixed) + `FIXED_FRAC - 1:0] q_ext; + + fixed a_hold, b_hold, c_hold; + + assign q = q_ext[$bits(fixed) + `FIXED_FRAC - 1:`FIXED_FRAC] + c_hold; + assign q_ext = a_hold * b_hold; + + gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FIXED_FMA_STAGES)) a_pipes + ( + .in(a), + .out(a_hold), + .* + ); + + gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FIXED_FMA_STAGES)) b_pipes + ( + .in(b), + .out(b_hold), + .* + ); + + gfx_pipes #(.WIDTH($bits(c)), .DEPTH(`FIXED_FMA_STAGES)) c_pipes + ( + .in(c), + .out(c_hold), + .* + ); +`endif + +endmodule diff --git a/rtl/legacy_gfx/gfx_fixed_fma_dot.sv b/rtl/legacy_gfx/gfx_fixed_fma_dot.sv new file mode 100644 index 0000000..c19b49e --- /dev/null +++ b/rtl/legacy_gfx/gfx_fixed_fma_dot.sv @@ -0,0 +1,49 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fixed_fma_dot +( + input logic clk, + + input fixed a0, + b0, + a1, + b1, + c, + input logic stall, + + output fixed q +); + + fixed q0, a1_hold, b1_hold; + + gfx_fixed_fma fma0 + ( + .a(a0), + .b(b0), + .q(q0), + .* + ); + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_FMA_STAGES)) a_pipes + ( + .in(a1), + .out(a1_hold), + .* + ); + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_FMA_STAGES)) b_pipes + ( + .in(b1), + .out(b1_hold), + .* + ); + + gfx_fixed_fma fma1 + ( + .a(a1_hold), + .b(b1_hold), + .c(q0), + .* + ); + +endmodule diff --git a/rtl/legacy_gfx/gfx_flush_flow.sv b/rtl/legacy_gfx/gfx_flush_flow.sv new file mode 100644 index 0000000..a0e43d7 --- /dev/null +++ b/rtl/legacy_gfx/gfx_flush_flow.sv @@ -0,0 +1,45 @@ +module gfx_flush_flow +#(parameter STAGES=0) +( + input logic clk, + rst_n, + + input logic in_valid, + out_ready, + + output logic out_valid, + commit, + flush +); + + logic was_valid, was_ready; + logic[STAGES - 1:0] valid; + + assign flush = was_valid && !was_ready; + assign commit = was_valid && was_ready; + assign out_valid = valid[STAGES - 1] && !flush; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + was_ready <= 0; + was_valid <= 0; + + for (integer i = 0; i < STAGES; ++i) + valid[i] <= 0; + end else begin + was_ready <= out_ready; + was_valid <= out_valid; + + if (!flush) + valid[0] <= in_valid; + else + valid[0] <= 0; + + for (integer i = 1; i < STAGES; ++i) + if (!flush) + valid[i] <= valid[i - 1]; + else + valid[i] <= 0; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_fold.sv b/rtl/legacy_gfx/gfx_fold.sv new file mode 100644 index 0000000..616d868 --- /dev/null +++ b/rtl/legacy_gfx/gfx_fold.sv @@ -0,0 +1,54 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fold +( + input logic clk, + + input vec4 vec, + input logic stall, + feedback, + feedback_last, + + output fp q +); + + fp q_add; + vec2 feedback_vec, queued[`FP_ADD_STAGES]; + + assign feedback_vec = queued[`FP_ADD_STAGES - 1]; + + gfx_fp_add add + ( + .a(feedback ? q_add : vec[0]), + .b(feedback ? feedback_vec[feedback_last] : vec[1]), + .q(q_add), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(q))) skid + ( + .in(q_add), + .out(q), + .* + ); + + always_ff @(posedge clk) + if (!stall) begin + if (feedback) + queued[0] <= feedback_vec; + else begin + queued[0][0] <= vec[2]; + queued[0][1] <= vec[3]; + end + end + + genvar i; + generate + for (i = 1; i < `FP_ADD_STAGES; ++i) begin: stages + always_ff @(posedge clk) + if (!stall) + queued[i] <= queued[i - 1]; + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_fold_flow.sv b/rtl/legacy_gfx/gfx_fold_flow.sv new file mode 100644 index 0000000..8f23b8f --- /dev/null +++ b/rtl/legacy_gfx/gfx_fold_flow.sv @@ -0,0 +1,61 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fold_flow +( + input logic clk, + rst_n, + + input logic in_valid, + out_ready, + + output logic in_ready, + out_valid, + stall, + feedback, + feedback_last +); + + logic skid_ready; + index4 rounds[`FP_ADD_STAGES], last_round; + + assign in_ready = skid_ready && !feedback; + + assign feedback = last_round[1] ^ last_round[0]; + assign feedback_last = last_round[1]; + + assign last_round = rounds[`FP_ADD_STAGES - 1]; + + gfx_skid_flow skid + ( + .in_valid(last_round == `INDEX4_MAX), + .in_ready(skid_ready), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + rounds[0] <= `INDEX4_MIN; + else if (!stall) + unique case (last_round) + 2'b01: + rounds[0] <= 2'b10; + + 2'b10: + rounds[0] <= 2'b11; + + 2'b00, 2'b11: + rounds[0] <= {1'b0, in_valid}; + endcase + + genvar i; + generate + for (i = 1; i < `FP_ADD_STAGES; ++i) begin: pipeline + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + rounds[i] <= `INDEX4_MIN; + else if (!stall) + rounds[i] <= rounds[i - 1]; + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_fp_add.sv b/rtl/legacy_gfx/gfx_fp_add.sv new file mode 100644 index 0000000..0b3058a --- /dev/null +++ b/rtl/legacy_gfx/gfx_fp_add.sv @@ -0,0 +1,41 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fp_add +( + input logic clk, + + input fp a, + b, + input logic stall, + + output fp q +); + +`ifndef VERILATOR + ip_fp_add ip_add + ( + .en(!stall), + .areset(0), + .* + ); +`else + fp a_pop, b_pop; + + assign q = $c("taller::fp_add(", a_pop, ", ", b_pop, ")"); + + gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FP_ADD_STAGES)) a_pipes + ( + .in(a), + .out(a_pop), + .* + ); + + gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FP_ADD_STAGES)) b_pipes + ( + .in(b), + .out(b_pop), + .* + ); +`endif + +endmodule diff --git a/rtl/legacy_gfx/gfx_fp_fix.sv b/rtl/legacy_gfx/gfx_fp_fix.sv new file mode 100644 index 0000000..b38e0e3 --- /dev/null +++ b/rtl/legacy_gfx/gfx_fp_fix.sv @@ -0,0 +1,34 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fp_fix +( + input logic clk, + + input fp in, + input logic stall, + + output fixed out +); + +`ifndef VERILATOR + ip_fp_fix ip_fix + ( + .a(in), + .q(out), + .en(!stall), + .areset(0), + .* + ); +`else + fp pop; + + assign out = $c("taller::fp_fix(", pop, ")"); + + gfx_pipes #(.WIDTH($bits(in)), .DEPTH(`FP_FIX_STAGES)) pipes + ( + .out(pop), + .* + ); +`endif + +endmodule diff --git a/rtl/legacy_gfx/gfx_fp_mul.sv b/rtl/legacy_gfx/gfx_fp_mul.sv new file mode 100644 index 0000000..7ff3c02 --- /dev/null +++ b/rtl/legacy_gfx/gfx_fp_mul.sv @@ -0,0 +1,41 @@ +`include "gfx/gfx_defs.sv" + +module gfx_fp_mul +( + input logic clk, + + input fp a, + b, + input logic stall, + + output fp q +); + +`ifndef VERILATOR + ip_fp_mul ip_mul + ( + .en(!stall), + .areset(0), + .* + ); +`else + fp a_pop, b_pop; + + assign q = $c("taller::fp_mul(", a_pop, ", ", b_pop, ")"); + + gfx_pipes #(.WIDTH($bits(a)), .DEPTH(`FP_MUL_STAGES)) a_pipes + ( + .in(a), + .out(a_pop), + .* + ); + + gfx_pipes #(.WIDTH($bits(b)), .DEPTH(`FP_MUL_STAGES)) b_pipes + ( + .in(b), + .out(b_pop), + .* + ); +`endif + +endmodule diff --git a/rtl/legacy_gfx/gfx_frag.sv b/rtl/legacy_gfx/gfx_frag.sv new file mode 100644 index 0000000..d61de72 --- /dev/null +++ b/rtl/legacy_gfx/gfx_frag.sv @@ -0,0 +1,79 @@ +`include "gfx/gfx_defs.sv" + +module gfx_frag +( + input logic clk, + rst_n, + + input frag_xy frag, + input fixed_tri bary, + ws, + input logic in_valid, + output logic in_ready, + + input logic out_ready, + output logic out_valid, + output frag_paint out +); + + logic stall; + frag_paint frag_out; + + gfx_pipeline_flow #(.STAGES(`GFX_FRAG_STAGES)) addr_flow + ( + .* + ); + + linear_coord linear; + + gfx_frag_addr addr + ( + .* + ); + + localparam ADDR_WAIT_STAGES = `GFX_FRAG_STAGES - `GFX_FRAG_ADDR_STAGES; + + gfx_pipes #(.WIDTH($bits(linear_coord)), .DEPTH(ADDR_WAIT_STAGES)) addr_pipes + ( + .in(linear), + .out(frag_out.addr), + .* + ); + + fixed b1, b2; + + gfx_frag_bary frag_bary + ( + .* + ); + + color_lerp_lanes argb0, argb1_argb0, argb2_argb0; + + assign argb0[3] = 32'd0 << 8; + assign argb0[2] = 32'd255 << 8; + assign argb0[1] = 32'd0 << 8; + assign argb0[0] = 32'd0 << 8; + + assign argb1_argb0[3] = 32'd0 << 8; + assign argb1_argb0[2] = (-32'sd255) << 8; + assign argb1_argb0[1] = 32'd255 << 8; + assign argb1_argb0[0] = 32'd0 << 8; + + assign argb2_argb0[3] = 32'd0 << 8; + assign argb2_argb0[2] = (-32'sd255) << 8; + assign argb2_argb0[1] = 32'd0 << 8; + assign argb2_argb0[0] = 32'd255 << 8; + + gfx_frag_shade shade + ( + .color(frag_out.color), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(frag_out))) skid + ( + .in(frag_out), + .* + ); + +endmodule diff --git a/rtl/legacy_gfx/gfx_frag_addr.sv b/rtl/legacy_gfx/gfx_frag_addr.sv new file mode 100644 index 0000000..23bd315 --- /dev/null +++ b/rtl/legacy_gfx/gfx_frag_addr.sv @@ -0,0 +1,59 @@ +`include "gfx/gfx_defs.sv" + +module gfx_frag_addr +( + input logic clk, + + input frag_xy frag, + input logic stall, + + output linear_coord linear +); + + /* frag está expresado en un rango normalizado con igual distribución + * entre positivos y negativos. Para obtener la dirección lineal que le + * corresponde, debemos corregir esto para que el mínimo sea cero en + * cada coordenada. Luego de eso, + * + * linear = y_corregido * `GFX_X_RES + x_corregido + * + * Afortunadamente, esto no necesita una FMA, como procederé a demostrar: + * + * y * `GFX_X_RES + x + * = y * 640 + x + * = y * 128 * 5 + x + * = ((y * 5) << 7) + x + * = ((y * (4 + 1)) << 7) + x + * = (((y << 2) + y) << 7) + x + * = (y << 9) + (y << 7) + x + * + * Para corregir x ([-320, 319]) se le suma `GFX_RES_X / 2. + * + * Para corregir y ([-240, 239]) se debe tomar en cuenta que las + * direcciones lineales incrementan hacia abajo, así que: + * y_corregido = `GFX_RES_Y / 2 - 1 - y + */ + + localparam ZERO_PAD = $bits(linear_coord) - $bits(xy_coord); + + // Estas constantes asumen `GFX_X_RES == 640 + localparam Y_SHIFT0 = 9, Y_SHIFT1 = 7; + + xy_coord bias_x, bias_y; + linear_coord row_start, x_biased, x_hold, y_biased; + + assign bias_x = `GFX_X_RES / 2; + assign bias_y = `GFX_Y_RES / 2 - 1; + + always_ff @(posedge clk) + if (!stall) begin + x_biased <= {{ZERO_PAD{1'b0}}, frag.x + bias_x}; + y_biased <= {{ZERO_PAD{1'b0}}, bias_y - frag.y}; + + x_hold <= x_biased; + row_start <= (y_biased << Y_SHIFT0) + (y_biased << Y_SHIFT1); + + linear <= row_start + x_hold; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_frag_bary.sv b/rtl/legacy_gfx/gfx_frag_bary.sv new file mode 100644 index 0000000..4f4f452 --- /dev/null +++ b/rtl/legacy_gfx/gfx_frag_bary.sv @@ -0,0 +1,78 @@ +`include "gfx/gfx_defs.sv" + +module gfx_frag_bary +( + input logic clk, + + input fixed_tri bary, + ws, + input logic stall, + + output fixed b1, + b2 +); + + fixed area, b0_w0, b1_w1, b2_w2, b1_w1_b2_w2, hold_b0_w0, hold_b1_w1, hold_b2_w2; + fixed_tri bs_ws, orthographic_bs; + + assign b0_w0 = bs_ws[0]; + assign b1_w1 = bs_ws[1]; + assign b2_w2 = bs_ws[2]; + + assign orthographic_bs[0] = bary[`EDGE_P1_TO_P2]; + assign orthographic_bs[1] = bary[`EDGE_P2_TO_P0]; + assign orthographic_bs[2] = bary[`EDGE_P0_TO_P1]; + + genvar i; + generate + for (i = 0; i < 3; ++i) begin: vertices + gfx_fixed_div div_b_w + ( + .z(orthographic_bs[i]), + .d(ws[i]), + .q(bs_ws[i]), + .* + ); + end + endgenerate + + localparam AREA_STAGES = 2; + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(AREA_STAGES)) b1_w1_pipes + ( + .in(b1_w1), + .out(hold_b1_w1), + .* + ); + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(AREA_STAGES)) b2_w2_pipes + ( + .in(b2_w2), + .out(hold_b2_w2), + .* + ); + + gfx_fixed_div norm_b1 + ( + .z(hold_b1_w1), + .d(area), + .q(b1), + .* + ); + + gfx_fixed_div norm_b2 + ( + .z(hold_b2_w2), + .d(area), + .q(b2), + .* + ); + + always_ff @(posedge clk) + if (!stall) begin + area <= hold_b0_w0 + b1_w1_b2_w2; + hold_b0_w0 <= b0_w0; + b1_w1_b2_w2 <= b1_w1 + b2_w2; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_frag_shade.sv b/rtl/legacy_gfx/gfx_frag_shade.sv new file mode 100644 index 0000000..d2ad7ce --- /dev/null +++ b/rtl/legacy_gfx/gfx_frag_shade.sv @@ -0,0 +1,53 @@ +`include "gfx/gfx_defs.sv" + +module gfx_frag_shade +( + input logic clk, + + input fixed b1, + b2, + input color_lerp_lanes argb0, + argb1_argb0, + argb2_argb0, + input logic stall, + + output rgb32 color +); + + struct packed + { + logic sign; + logic[$bits(fixed) - `FIXED_FRAC - 2:0] out_of_range; + color8 color; + logic[`FIXED_FRAC - $bits(color8) - 1:0] sub; + } lerped[`COLOR_CHANNELS]; + + fixed channel_lerp[`COLOR_CHANNELS]; + color8[`COLOR_CHANNELS - 1:0] out; + + assign color = out; + + genvar i; + generate + for (i = 0; i < `COLOR_CHANNELS; ++i) begin: channels + assign lerped[i] = channel_lerp[i]; + + gfx_lerp lerp + ( + .q(channel_lerp[i]), + .q0(argb0[i]), + .q1_q0(argb1_argb0[i]), + .q2_q0(argb2_argb0[i]), + .* + ); + + always_ff @(posedge clk) + if (!stall) begin + out[i] <= lerped[i].color; + if (lerped[i].sign || |lerped[i].out_of_range) + out[i] <= {($bits(color8)){!lerped[i].sign}}; + end + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_funnel.sv b/rtl/legacy_gfx/gfx_funnel.sv new file mode 100644 index 0000000..4710111 --- /dev/null +++ b/rtl/legacy_gfx/gfx_funnel.sv @@ -0,0 +1,96 @@ +`include "gfx/gfx_defs.sv" + +module gfx_funnel +( + input logic clk, + rst_n, + + input frag_xy_lanes fragments, + input bary_lanes barys, + input fixed_tri raster_ws, + input paint_lanes in_valid, + output logic in_ready, + + input logic out_ready, + output logic out_valid, + output frag_xy frag, + output fixed_tri frag_bary, + frag_ws +); + + logic skid_ready, stall, ready, valid; + frag_xy next_frag, out_frag; + fixed_tri next_bary, out_bary, out_ws, ws_hold; + bary_lanes barys_hold; + paint_lanes current, next; + frag_xy_lanes fragments_hold; + + assign ready = !(|next); + assign in_ready = skid_ready && ready; + + gfx_skid_buf #(.WIDTH($bits(frag))) skid_frag + ( + .in(out_frag), + .out(frag), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(frag_bary))) skid_bary + ( + .in(out_bary), + .out(frag_bary), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(frag_ws))) skid_ws + ( + .in(out_ws), + .out(frag_ws), + .* + ); + + gfx_skid_flow skid_flow + ( + .in_ready(skid_ready), + .in_valid(valid), + .* + ); + + always_comb begin + next = 0; + next_bary = {($bits(next_bary)){1'bx}}; + next_frag = {($bits(next_frag)){1'bx}}; + + for (integer i = 0; i < `GFX_FINE_LANES; ++i) + if (current[i]) begin + next = current; + next[i] = 0; + + next_bary = barys_hold[i]; + next_frag = fragments_hold[i]; + end + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + valid <= 0; + current <= 0; + end else if (!stall) begin + valid <= |current; + current <= ready ? in_valid : next; + end + + always_ff @(posedge clk) + if (!stall) begin + if (ready) begin + ws_hold <= raster_ws; + barys_hold <= barys; + fragments_hold <= fragments; + end + + out_ws <= ws_hold; + out_bary <= next_bary; + out_frag <= next_frag; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_lerp.sv b/rtl/legacy_gfx/gfx_lerp.sv new file mode 100644 index 0000000..42e4393 --- /dev/null +++ b/rtl/legacy_gfx/gfx_lerp.sv @@ -0,0 +1,32 @@ +`include "gfx/gfx_defs.sv" + +module gfx_lerp +( + input logic clk, + + input fixed b1, + b2, + q0, + q1_q0, + q2_q0, + input logic stall, + + output fixed q +); + + /* Interpolación lineal, trivializada. + * + * Esta es la clave: https://fgiesen.wordpress.com/2013/02/06/the-barycentric-conspirac/ + */ + + gfx_fixed_fma_dot fma + ( + .c(q0), + .a0(b1), + .b0(q1_q0), + .a1(b2), + .b1(q2_q0), + .* + ); + +endmodule diff --git a/rtl/legacy_gfx/gfx_mask_sram.sv b/rtl/legacy_gfx/gfx_mask_sram.sv new file mode 100644 index 0000000..730ee12 --- /dev/null +++ b/rtl/legacy_gfx/gfx_mask_sram.sv @@ -0,0 +1,31 @@ +`include "gfx/gfx_defs.sv" + +module gfx_mask_sram +( + input logic clk, + + input logic set, + write, + input linear_coord write_addr, + read_addr, + output logic mask +); + + logic mem[`GFX_LINEAR_RES]; + logic mask_hold, write_hold, set_hold; + linear_coord read_addr_hold, write_addr_hold; + + always_ff @(posedge clk) begin + mask <= mask_hold; + mask_hold <= mem[read_addr_hold]; + read_addr_hold <= read_addr; + + set_hold <= set; + write_hold <= write; + write_addr_hold <= write_addr; + + if (write_hold) + mem[write_addr_hold] <= set_hold; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_masks.sv b/rtl/legacy_gfx/gfx_masks.sv new file mode 100644 index 0000000..5182bd4 --- /dev/null +++ b/rtl/legacy_gfx/gfx_masks.sv @@ -0,0 +1,68 @@ +`include "gfx/gfx_defs.sv" + +module gfx_masks +( + input logic clk, + rst_n, + + input logic swap_buffers, + input cmd_word fb_base_a, + fb_base_b, + + input linear_coord scan_mask_addr, + output logic scan_mask, + + input logic frag_mask_write, + frag_mask_set, + input linear_coord frag_mask_read_addr, + frag_mask_write_addr, + output logic frag_mask, + + output vram_addr frag_base, + scan_base +); + + logic mask_a, mask_b, frag_write_hold, frag_set_hold; + linear_coord scan_addr_hold, frag_write_addr_hold, frag_read_addr_hold; + + gfx_mask_sram sram_a + ( + .set(frag_set_hold), + .mask(mask_a), + .write(swap_buffers && frag_write_hold), + .read_addr(swap_buffers ? frag_read_addr_hold : scan_addr_hold), + .write_addr(frag_write_addr_hold), + .* + ); + + gfx_mask_sram sram_b + ( + .set(frag_set_hold), + .mask(mask_b), + .write(!swap_buffers && frag_write_hold), + .read_addr(swap_buffers ? scan_addr_hold : frag_read_addr_hold), + .write_addr(frag_write_addr_hold), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + frag_base <= 0; + scan_base <= 0; + end else begin + frag_base <= swap_buffers ? fb_base_a[$bits(vram_addr):1] : fb_base_b[$bits(vram_addr):1]; + scan_base <= swap_buffers ? fb_base_b[$bits(vram_addr):1] : fb_base_a[$bits(vram_addr):1]; + end + + always_ff @(posedge clk) begin + scan_mask <= swap_buffers ? mask_b : mask_a; + scan_addr_hold <= scan_mask_addr; + + frag_mask <= swap_buffers ? mask_a : mask_b; + frag_set_hold <= frag_mask_set; + frag_write_hold <= frag_mask_write; + frag_read_addr_hold <= frag_mask_read_addr; + frag_write_addr_hold <= frag_mask_write_addr; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_mat_mat.sv b/rtl/legacy_gfx/gfx_mat_mat.sv new file mode 100644 index 0000000..d03a648 --- /dev/null +++ b/rtl/legacy_gfx/gfx_mat_mat.sv @@ -0,0 +1,83 @@ +`include "gfx/gfx_defs.sv" + +module gfx_mat_mat +( + input logic clk, + rst_n, + + input mat4 a, + b, + input logic in_valid, + out_ready, + + output mat4 q, + output logic in_ready, + out_valid +); + + mat4 a_hold, b_hold, b_transpose, q_hold, q_transpose, mul_b; + vec4 mul_q; + logic mul_in_ready, mul_in_valid, mul_out_ready, mul_out_valid; + index4 in_index, out_index; + + assign in_ready = mul_in_ready && in_index == `INDEX4_MIN; + assign out_valid = mul_out_valid && out_index == `INDEX4_MAX; + + assign mul_in_valid = in_valid || in_index != `INDEX4_MIN; + assign mul_out_ready = out_ready || out_index != `INDEX4_MAX; + + gfx_transpose transpose_b + ( + .in(b), + .out(b_transpose) + ); + + gfx_mat_vec mul + ( + .a(in_index == `INDEX4_MIN ? a : a_hold), + .x(mul_b[in_index]), + .q(mul_q), + .in_ready(mul_in_ready), + .in_valid(mul_in_valid), + .out_ready(mul_out_ready), + .out_valid(mul_out_valid), + .* + ); + + gfx_transpose transpose_q + ( + .in(q_transpose), + .out(q) + ); + + always_comb begin + mul_b = b_hold; + mul_b[0] = b_transpose[0]; + + q_transpose = q_hold; + q_transpose[`VECS_PER_MAT - 1] = mul_q; + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + in_index <= `INDEX4_MIN; + out_index <= `INDEX4_MIN; + end else begin + if (mul_in_ready && mul_in_valid) + in_index <= in_index + 1; + + if (mul_out_ready && mul_out_valid) + out_index <= out_index + 1; + end + + always_ff @(posedge clk) begin + if (in_ready) begin + a_hold <= a; + b_hold <= b_transpose; + end + + if (mul_out_ready && mul_out_valid) + q_hold[out_index] <= mul_q; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_mat_vec.sv b/rtl/legacy_gfx/gfx_mat_vec.sv new file mode 100644 index 0000000..4be4976 --- /dev/null +++ b/rtl/legacy_gfx/gfx_mat_vec.sv @@ -0,0 +1,49 @@ +`include "gfx/gfx_defs.sv" + +module gfx_mat_vec +( + input logic clk, + rst_n, + + input mat4 a, + input vec4 x, + input logic in_valid, + out_ready, + + output vec4 q, + output logic in_ready, + out_valid +); + + logic stall_mul, stall_fold, mul_ready, mul_valid, feedback, feedback_last; + + gfx_pipeline_flow #(.STAGES(`FP_MUL_STAGES)) mul + ( + .stall(stall_mul), + .out_ready(mul_ready), + .out_valid(mul_valid), + .* + ); + + gfx_fold_flow fold + ( + .stall(stall_fold), + .in_ready(mul_ready), + .in_valid(mul_valid), + .* + ); + + genvar i; + generate + for (i = 0; i < `VECS_PER_MAT; ++i) begin: dots + gfx_dot dot_i + ( + .a(a[i]), + .b(x), + .q(q[i]), + .* + ); + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_mem.sv b/rtl/legacy_gfx/gfx_mem.sv new file mode 100644 index 0000000..fbca2fa --- /dev/null +++ b/rtl/legacy_gfx/gfx_mem.sv @@ -0,0 +1,228 @@ +`include "gfx/gfx_defs.sv" + +module gfx_mem +( + input logic clk, + rst_n, + + input logic mem_waitrequest, + mem_readdatavalid, + input vram_word mem_readdata, + output vram_byte_addr mem_address, + output logic mem_read, + mem_write, + output vram_word mem_writedata, + + input vram_addr host_address, + input logic host_read, + host_write, + input vram_word host_writedata, + output logic host_waitrequest, + host_readdatavalid, + output vram_word host_readdata, + + input logic rop_write, + input vram_word rop_writedata, + input vram_addr rop_address, + output logic rop_waitrequest, + + input logic fb_read, + input vram_addr fb_address, + output logic fb_waitrequest, + fb_readdatavalid, + output vram_word fb_readdata, + + input logic batch_read, + input vram_addr batch_address, + output logic batch_waitrequest, + batch_readdatavalid, + output vram_word batch_readdata, + + input logic fetch_read, + input vram_addr fetch_address, + output logic fetch_waitrequest, + fetch_readdatavalid, + output vram_word fetch_readdata +); + + // Este módulo es inaceptable, hay que reescribirlo + + logic mem_rw, trans_in_stall, trans_out_stall, in_ready, in_valid, skid_in_valid, out_ready, + any_readdatavalid, readdatavalid, dispatch_full, dispatch_put, mem_ready; + + vram_word any_readdata, readdata; + logic[$clog2(`GFX_MEM_DISPATCH_DEPTH) - 1:0] next_put_ptr, pop_ptr, put_ptr; + + struct packed + { + logic fb, + host, + batch, + fetch; + } dispatch_in, dispatch_out, dispatch_buf[`GFX_MEM_DISPATCH_DEPTH]; + + struct packed + { + vram_addr address; + logic write, + fb_waitrequest, + host_waitrequest, + batch_waitrequest, + fetch_waitrequest; + vram_word writedata; + } trans_in, trans_out, trans_in_skid, trans_out_skid; + + assign mem_read = mem_rw && !trans_out_skid.write && !dispatch_full; + assign mem_write = mem_rw && trans_out_skid.write; + assign mem_address = {trans_out_skid.address, {`GFX_MEM_SUBWORD_BITS{1'b0}}}; + assign mem_writedata = trans_out_skid.writedata; + + assign fb_readdata = any_readdata; + assign host_readdata = any_readdata; + assign batch_readdata = any_readdata; + assign fetch_readdata = any_readdata; + + assign fb_readdatavalid = any_readdatavalid && dispatch_out.fb; + assign host_readdatavalid = any_readdatavalid && dispatch_out.host; + assign batch_readdatavalid = any_readdatavalid && dispatch_out.batch; + assign fetch_readdatavalid = any_readdatavalid && dispatch_out.fetch; + + assign dispatch_in.fb = !trans_out_skid.fb_waitrequest; + assign dispatch_in.host = !trans_out_skid.host_waitrequest; + assign dispatch_in.batch = !trans_out_skid.batch_waitrequest; + assign dispatch_in.fetch = !trans_out_skid.fetch_waitrequest; + + assign in_valid = rop_write || fb_read || batch_read || fetch_read || host_read || host_write; + assign mem_ready = !mem_waitrequest && (!dispatch_full || trans_out_skid.write); + assign next_put_ptr = put_ptr + 1; + assign dispatch_put = mem_ready && mem_rw && !trans_out_skid.write; + assign dispatch_full = next_put_ptr == pop_ptr; + + /* Cerrar timing aquí no es tan fácil, debido al enrutamiento al el que + * necesariamente está sujeto este módulo (eg, VRAM y DAC están en + * posiciones fijas en los bordes de la FPGA y no pueden reacomodarse). + */ + + gfx_skid_buf #(.WIDTH($bits(trans_in))) in_skid + ( + .in(trans_in), + .out(trans_in_skid), + .stall(trans_in_stall), + .* + ); + + gfx_skid_flow in_flow + ( + .stall(trans_in_stall), + .out_ready(out_ready), + .out_valid(skid_in_valid), + .* + ); + + gfx_pipes #(.WIDTH($bits(trans_out)), .DEPTH(`GFX_MEM_TRANS_DEPTH)) out_pipes + ( + .in(trans_in_skid), + .out(trans_out), + .stall(trans_out_stall), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(trans_out))) out_skid + ( + .in(trans_out), + .out(trans_out_skid), + .stall(trans_out_stall), + .* + ); + + gfx_pipeline_flow #(.STAGES(`GFX_MEM_TRANS_DEPTH)) out_flow + ( + .stall(trans_out_stall), + .in_ready(out_ready), + .in_valid(skid_in_valid), + .out_ready(mem_ready), + .out_valid(mem_rw), + .* + ); + + gfx_pipes #(.WIDTH($bits(vram_word)), .DEPTH(`GFX_MEM_RESPONSE_DEPTH)) readdata_pipes + ( + .in(mem_readdata), + .out(readdata), + .stall(0), + .* + ); + + gfx_pipeline_flow #(.STAGES(`GFX_MEM_RESPONSE_DEPTH)) readdata_flow + ( + .stall(), + .in_ready(), + .in_valid(mem_readdatavalid), + .out_ready(1), + .out_valid(readdatavalid), + .* + ); + + always_comb begin + fb_waitrequest = 1; + rop_waitrequest = 1; + host_waitrequest = 1; + batch_waitrequest = 1; + fetch_waitrequest = 1; + + trans_in.write = 0; + trans_in.writedata = {($bits(trans_in.writedata)){1'bx}}; + + if (fb_read) begin + fb_waitrequest = !in_ready; + trans_in.address = fb_address; + end else if (batch_read) begin + batch_waitrequest = !in_ready; + trans_in.address = batch_address; + end else if (rop_write) begin + rop_waitrequest = !in_ready; + + trans_in.write = 1; + trans_in.address = rop_address; + trans_in.writedata = rop_writedata; + end else if (fetch_read) begin + fetch_waitrequest = !in_ready; + trans_in.address = fetch_address; + end else begin + host_waitrequest = !in_ready; + + trans_in.write = host_write; + trans_in.address = host_address; + trans_in.writedata = host_writedata; + end + + trans_in.fb_waitrequest = fb_waitrequest; + trans_in.host_waitrequest = host_waitrequest; + trans_in.batch_waitrequest = batch_waitrequest; + trans_in.fetch_waitrequest = fetch_waitrequest; + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + pop_ptr <= 0; + put_ptr <= 0; + end else begin + if (readdatavalid) + pop_ptr <= pop_ptr + 1; + + if (dispatch_put) + put_ptr <= next_put_ptr; + end + + + always_ff @(posedge clk) begin + any_readdata <= readdata; + any_readdatavalid <= readdatavalid; + + dispatch_out <= dispatch_buf[pop_ptr]; + + if (dispatch_put) + dispatch_buf[put_ptr] <= dispatch_in; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_persp.sv b/rtl/legacy_gfx/gfx_persp.sv new file mode 100644 index 0000000..243b5eb --- /dev/null +++ b/rtl/legacy_gfx/gfx_persp.sv @@ -0,0 +1,58 @@ +`include "gfx/gfx_defs.sv" + +module gfx_persp +( + input logic clk, + rst_n, + + input raster_xyzw in_vertex_a, + in_vertex_b, + in_vertex_c, + input logic in_valid, + output logic in_ready, + + input logic out_ready, + output logic out_valid, + output raster_xyzw out_vertex_a, + out_vertex_b, + out_vertex_c +); + + // Perdón Ronald + assign in_ready = out_ready; + assign out_valid = in_valid; + assign out_vertex_a = in_vertex_a; + assign out_vertex_b = in_vertex_b; + assign out_vertex_c = in_vertex_c; + +/* + logic stall; + + gfx_pipeline_flow #(.STAGES(`FIXED_DIV_STAGES)) flow + ( + .* + ); + + gfx_persp_vertex persp_a + ( + .in_vertex(in_vertex_a), + .out_vertex(out_vertex_a), + .* + ); + + gfx_persp_vertex persp_b + ( + .in_vertex(in_vertex_b), + .out_vertex(out_vertex_b), + .* + ); + + gfx_persp_vertex persp_c + ( + .in_vertex(in_vertex_c), + .out_vertex(out_vertex_c), + .* + ); +*/ + +endmodule diff --git a/rtl/legacy_gfx/gfx_persp_vertex.sv b/rtl/legacy_gfx/gfx_persp_vertex.sv new file mode 100644 index 0000000..f7434f0 --- /dev/null +++ b/rtl/legacy_gfx/gfx_persp_vertex.sv @@ -0,0 +1,52 @@ +`include "gfx/gfx_defs.sv" + +module gfx_persp_vertex +( + input logic clk, + + input raster_xyzw in_vertex, + input logic stall, + + output raster_xyzw out_vertex +); + + raster_xyzw skid_vertex; + + gfx_fixed_div x_div + ( + .z(in_vertex.xy.x), + .d(in_vertex.zw.w), + .q(skid_vertex.xy.x), + .* + ); + + gfx_fixed_div y_div + ( + .z(in_vertex.xy.y), + .d(in_vertex.zw.w), + .q(skid_vertex.xy.y), + .* + ); + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_DIV_STAGES)) z_pipes + ( + .in(in_vertex.zw.z), + .out(skid_vertex.zw.z), + .* + ); + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`FIXED_DIV_STAGES)) w_pipes + ( + .in(in_vertex.zw.w), + .out(skid_vertex.zw.w), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(out_vertex))) vertex_skid + ( + .in(skid_vertex), + .out(out_vertex), + .* + ); + +endmodule diff --git a/rtl/legacy_gfx/gfx_pipeline_flow.sv b/rtl/legacy_gfx/gfx_pipeline_flow.sv new file mode 100644 index 0000000..9b3f22a --- /dev/null +++ b/rtl/legacy_gfx/gfx_pipeline_flow.sv @@ -0,0 +1,40 @@ +module gfx_pipeline_flow +#(parameter STAGES=0) +( + input logic clk, + rst_n, + + input logic in_valid, + out_ready, + + output logic in_ready, + out_valid, + stall +); + + logic[STAGES - 1:0] valid; + + gfx_skid_flow skid + ( + .in_valid(valid[STAGES - 1]), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + valid[0] <= 0; + else if (!stall) + valid[0] <= in_valid; + + genvar i; + generate + for (i = 1; i < STAGES; ++i) begin: pipeline + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + valid[i] <= 0; + else if (!stall) + valid[i] <= valid[i - 1]; + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_pipes.sv b/rtl/legacy_gfx/gfx_pipes.sv new file mode 100644 index 0000000..09b1d43 --- /dev/null +++ b/rtl/legacy_gfx/gfx_pipes.sv @@ -0,0 +1,24 @@ +module gfx_pipes +#(parameter WIDTH=0, DEPTH=0) +( + input logic clk, + + input logic[WIDTH - 1:0] in, + input logic stall, + + output logic[WIDTH - 1:0] out +); + + logic[WIDTH - 1:0] pipes[DEPTH]; + + assign out = pipes[DEPTH - 1]; + + always_ff @(posedge clk) + if (!stall) begin + pipes[0] <= in; + + for (integer i = 1; i < DEPTH; ++i) + pipes[i] <= pipes[i - 1]; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_raster.sv b/rtl/legacy_gfx/gfx_raster.sv new file mode 100644 index 0000000..cb03744 --- /dev/null +++ b/rtl/legacy_gfx/gfx_raster.sv @@ -0,0 +1,131 @@ +`include "gfx/gfx_defs.sv" + +module gfx_raster +( + input logic clk, + rst_n, + + input raster_xyzw vertex_a, + vertex_b, + vertex_c, + input logic in_valid, + output logic in_ready, + + output frag_xy_lanes fragments, + output bary_lanes barys, + output fixed_tri ws, + input logic out_ready, + output paint_lanes out_valid +); + + //TODO: Es exactamente el mismo asunto que offsets + assign ws[0] = vertex_a.zw.w; + assign ws[1] = vertex_b.zw.w; + assign ws[2] = vertex_c.zw.w; + + logic setup_stall, setup_valid; + + gfx_pipeline_flow #(.STAGES(`GFX_SETUP_STAGES)) setup_flow + ( + .stall(setup_stall), + .out_ready(coarse_ready), + .out_valid(setup_valid), + .* + ); + + fixed_tri coarse_x_offsets, coarse_y_offsets, coarse_test_offsets, edge_refs; + raster_xy pos_ref; + coarse_dim span_x, span_y; + raster_offsets_tri offsets; + + gfx_setup setup + ( + .stall(setup_stall), + .vertex_a(vertex_a.xy), + .vertex_b(vertex_b.xy), + .vertex_c(vertex_c.xy), + .* + ); + + logic coarse_ready, coarse_valid; + fixed_tri coarse_corners; + raster_xy coarse_pos; + raster_offsets_tri fine_offsets; + + gfx_raster_coarse coarse + ( + .in_valid(setup_valid), + .in_ready(coarse_ready), + .out_ready(fine_ready), + .out_valid(coarse_valid), + .pos(coarse_pos), + .corners(coarse_corners), + .* + ); + + logic fine_ready, fine_stall, fine_valid; + + always_comb + for (integer i = 0; i < `GFX_FINE_LANES; ++i) + out_valid[i] = fine_valid && skid_paint_ij[i]; + + gfx_pipeline_flow #(.STAGES(`GFX_FINE_STAGES)) fine_flow + ( + .stall(fine_stall), + .in_ready(fine_ready), + .in_valid(coarse_valid), + .out_ready(out_ready || !(|skid_paint_ij)), + .out_valid(fine_valid), + .* + ); + + frag_xy fragment_ij[`GFX_RASTER_SIZE][`GFX_RASTER_SIZE]; + fixed_tri barys_ij[`GFX_RASTER_SIZE][`GFX_RASTER_SIZE]; + logic[`GFX_FINE_LANES - 1:0] paint_ij, skid_paint_ij; + + gfx_skid_buf #(.WIDTH(`GFX_FINE_LANES)) skid_paint + ( + .in(paint_ij), + .out(skid_paint_ij), + .stall(fine_stall), + .* + ); + + genvar i, j; + generate + for (i = 0; i < `GFX_RASTER_SIZE; ++i) begin: fine_x + for (j = 0; j < `GFX_RASTER_SIZE; ++j) begin: fine_y + gfx_raster_fine #(.X(i), .Y(j)) fine + ( + .stall(fine_stall), + + .pos(coarse_pos), + .corners(coarse_corners), + .offsets(fine_offsets), + + .barys(barys_ij[i][j]), + .paint(paint_ij[j * `GFX_RASTER_SIZE + i]), + .fragment(fragment_ij[i][j]), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(frag_xy))) skid_fragment + ( + .in(fragment_ij[i][j]), + .out(fragments[j * `GFX_RASTER_SIZE + i]), + .stall(fine_stall), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(fixed_tri))) skid_barys + ( + .in(barys_ij[i][j]), + .out(barys[j * `GFX_RASTER_SIZE + i]), + .stall(fine_stall), + .* + ); + end + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_raster_coarse.sv b/rtl/legacy_gfx/gfx_raster_coarse.sv new file mode 100644 index 0000000..8db3fe9 --- /dev/null +++ b/rtl/legacy_gfx/gfx_raster_coarse.sv @@ -0,0 +1,135 @@ +`include "gfx/gfx_defs.sv" + +module gfx_raster_coarse +( + input logic clk, + rst_n, + + input raster_xy pos_ref, + input coarse_dim span_x, + span_y, + input raster_offsets_tri offsets, + input fixed_tri edge_refs, + coarse_x_offsets, + coarse_y_offsets, + coarse_test_offsets, + + input logic in_valid, + output logic in_ready, + + input logic out_ready, + output logic out_valid, + + output raster_xy pos, + output fixed_tri corners, + output raster_offsets_tri fine_offsets +); + + fixed reference_x; + logic end_x, end_y, running, send, send_valid, skid_ready, stall; + raster_xy next_pos; + fixed_tri edge_fns, edge_tests, edge_vert, edge_vert_next; + coarse_dim stride_x, stride_y, width; + logic[2:0] edge_signs; + raster_offsets_tri hold_offsets; + + fixed_tri hold_coarse_x_offsets, hold_coarse_y_offsets, hold_coarse_test_offsets; + + struct packed + { + raster_xy pos; + fixed_tri corners; + raster_offsets_tri fine_offsets; + } out, skid_out; + + assign pos = skid_out.pos; + assign corners = skid_out.corners; + assign fine_offsets = skid_out.fine_offsets; + + assign end_x = stride_x == 0; + assign end_y = stride_y == 0; + + assign send = &edge_signs && send_valid; + assign in_ready = skid_ready && !running; + + gfx_skid_buf #(.WIDTH($bits(out))) skid_buf + ( + .in(out), + .out(skid_out), + .* + ); + + gfx_skid_flow skid_flow + ( + .in_ready(skid_ready), + .in_valid(send), + .* + ); + + always_comb + for (integer i = 0; i < 3; ++i) begin + edge_tests[i] = edge_fns[i] + hold_coarse_test_offsets[i]; + edge_vert_next[i] = edge_vert[i] + hold_coarse_y_offsets[i]; + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + running <= 0; + send_valid <= 0; + end else if (!stall) begin + if (running) + running <= !end_x || !end_y; + else + running <= in_ready && in_valid; + + send_valid <= running; + end + + always_ff @(posedge clk) + if (!stall) begin + out.pos <= next_pos; + out.corners <= edge_fns; + out.fine_offsets <= hold_offsets; + + stride_x <= stride_x - 1; + next_pos.x <= next_pos.x + (1 << (`FIXED_FRAC + `GFX_RASTER_BITS)); + + if (end_x) begin + next_pos.x <= reference_x; + next_pos.y <= next_pos.y + (1 << (`FIXED_FRAC + `GFX_RASTER_BITS)); + + stride_x <= width; + stride_y <= stride_y - 1; + end + + if (in_ready && in_valid) begin + next_pos <= pos_ref; + reference_x <= pos_ref.x; + + width <= span_x; + stride_x <= span_x; + stride_y <= span_y; + + hold_offsets <= offsets; + hold_coarse_x_offsets <= coarse_x_offsets; + hold_coarse_y_offsets <= coarse_y_offsets; + hold_coarse_test_offsets <= coarse_test_offsets; + end + + for (integer i = 0; i < 3; ++i) begin + edge_fns[i] <= edge_fns[i] + hold_coarse_x_offsets[i]; + if (end_x) begin + edge_fns[i] <= edge_vert_next[i]; + edge_vert[i] <= edge_vert_next[i]; + end + + if (in_ready && in_valid) begin + edge_fns[i] <= edge_refs[i]; + edge_vert[i] <= edge_refs[i]; + end + + edge_signs[i] <= !edge_tests[i][$bits(fixed) - 1]; + end + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_raster_fine.sv b/rtl/legacy_gfx/gfx_raster_fine.sv new file mode 100644 index 0000000..da11b6f --- /dev/null +++ b/rtl/legacy_gfx/gfx_raster_fine.sv @@ -0,0 +1,49 @@ +`include "gfx/gfx_defs.sv" + +module gfx_raster_fine +#(parameter X=0, Y=0) +( + input logic clk, + + input raster_xy pos, + input fixed_tri corners, + input raster_offsets_tri offsets, + input logic stall, + + output frag_xy fragment, + output fixed_tri barys, + output logic paint +); + + localparam INDEX = Y * `GFX_RASTER_SIZE + X; + + frag_xy fragment_hold; + fixed_tri edges, per_edge_offsets; + logic[2:0] signs; + raster_xy_prec prec; + logic[`GFX_RASTER_BITS - 1:0] fine_x, fine_y; + + assign prec = pos; + assign fine_x = X; + assign fine_y = Y; + + always_comb + for (integer i = 0; i < 3; ++i) begin + signs[i] = edges[i][$bits(edges[0]) - 1]; + per_edge_offsets[i] = offsets[i][INDEX]; + end + + always_ff @(posedge clk) + if (!stall) begin + barys <= edges; + paint <= signs == 0; + + fragment <= fragment_hold; + fragment_hold.x <= {prec.x.sign, prec.x.coarse, fine_x}; + fragment_hold.y <= {prec.y.sign, prec.y.coarse, fine_y}; + + for (integer i = 0; i < 3; ++i) + edges[i] <= corners[i] + per_edge_offsets[i]; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_rop.sv b/rtl/legacy_gfx/gfx_rop.sv new file mode 100644 index 0000000..3e6ef35 --- /dev/null +++ b/rtl/legacy_gfx/gfx_rop.sv @@ -0,0 +1,85 @@ +`include "gfx/gfx_defs.sv" + +module gfx_rop +( + input logic clk, + rst_n, + + input vram_addr frag_base, + + input frag_paint in, + input logic in_valid, + output logic in_ready, + + input logic rop_waitrequest, + output logic rop_write, + output vram_word rop_writedata, + output vram_addr rop_address, + + output linear_coord mask_addr, + output logic mask_assert +); + + enum int unsigned + { + IDLE, + WRITE_LO, + WRITE_HI + } state; + + logic hi; + vram_word color_hi, color_lo; + frag_paint hold; + + assign {color_hi, color_lo} = hold.color; + + assign mask_addr = hold.addr; + assign rop_address = frag_base + {5'd0, hold.addr, hi}; + assign rop_writedata = hi ? color_hi : color_lo; + + always_comb begin + hi = 1'bx; + in_ready = 0; + rop_write = 0; + mask_assert = 0; + + unique case (state) + IDLE: + in_ready = 1; + + WRITE_LO: begin + hi = 0; + rop_write = 1; + mask_assert = 1; + end + + WRITE_HI: begin + hi = 1; + in_ready = !rop_waitrequest; + rop_write = 1; + end + endcase + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) + state <= IDLE; + else unique case (state) + IDLE: + if (in_valid) + state <= WRITE_LO; + + WRITE_LO: + if (!rop_waitrequest) + state <= WRITE_HI; + + WRITE_HI: + if (!rop_waitrequest) + state <= in_valid ? WRITE_LO : IDLE; + endcase + + always_ff @(posedge clk) + if (in_ready) + hold <= in; + +endmodule diff --git a/rtl/legacy_gfx/gfx_scanout.sv b/rtl/legacy_gfx/gfx_scanout.sv new file mode 100644 index 0000000..a43d14c --- /dev/null +++ b/rtl/legacy_gfx/gfx_scanout.sv @@ -0,0 +1,138 @@ +`include "gfx/gfx_defs.sv" + +module gfx_scanout +( + input logic clk, + rst_n, + + input logic enable_clear, + input rgb24 clear_color, + input vram_addr scan_base, + + input logic mask, + output linear_coord mask_addr, + + input logic fb_waitrequest, + fb_readdatavalid, + input vram_word fb_readdata, + output logic fb_read, + output vram_addr fb_address, + + input logic scan_ready, + output logic scan_valid, + scan_endofpacket, + scan_startofpacket, + output rgb30 scan_data, + + output logic vsync +); + + logic commit, effective_mask, flush, mask_fifo_out, dac_ready, + fb_ready, mask_fifo_ready, fb_fifo_valid, mask_fifo_valid, + pop, put, put_mask, next_vsync, start_vsync, wait_vsync; + + vram_word fb_fifo_out; + half_coord commit_addr, mask_in_addr, mask_out_addr, mask_hold_addr, max_addr; + + assign mask_addr = mask_in_addr[$bits(mask_in_addr) - 1:$bits(mask_in_addr) - $bits(mask_addr)]; + assign max_addr[0] = 1; + assign max_addr[$bits(max_addr) - 1:1] = `GFX_X_RES * `GFX_Y_RES - 1; + + assign fb_ready = !fb_read || !fb_waitrequest; + assign next_vsync = commit && start_vsync; + assign start_vsync = mask_hold_addr == max_addr; + assign effective_mask = mask || !enable_clear; + + gfx_flush_flow #(.STAGES(`GFX_MASK_STAGES)) mask_flow + ( + .in_valid(!wait_vsync), + .out_ready(fb_ready && mask_fifo_ready && !next_vsync), + .out_valid(pop), + .* + ); + + gfx_pipes #(.WIDTH($bits(mask_in_addr)), .DEPTH(`GFX_MASK_STAGES)) addr_pipes + ( + .in(mask_in_addr), + .out(mask_out_addr), + .stall(0), + .* + ); + + /* Estas FIFOs deben cumplir dos propiedades para garantizar correctitud: + * + * 1. mask_fifo.out_ready && mask_fifo.out_valid <=> scan.in_ready && scan.in_valid + * 2. fb_fifo.out_ready && fb_fifo.out_valid => scan.in_ready && scan.in_valid + * + * Nótese la asimetría (<=> vs =>), debido a mask_fifo.out + */ + + gfx_fifo #(.WIDTH($bits(effective_mask)), .DEPTH(`GFX_SCANOUT_FIFO_DEPTH)) mask_fifo + ( + .in(put_mask), + .out(mask_fifo_out), + .in_ready(mask_fifo_ready), + .in_valid(put), + .out_ready(dac_ready && (!mask_fifo_out || fb_fifo_valid)), + .out_valid(mask_fifo_valid), + .* + ); + + // 2x para evitar potencial overflow cuando fb_read=1 pero mask_fifo está llena + gfx_fifo #(.WIDTH($bits(vram_word)), .DEPTH(2 * `GFX_SCANOUT_FIFO_DEPTH)) fb_fifo + ( + .in(fb_readdata), + .out(fb_fifo_out), + .in_ready(), // readdatavalid no soporta backpressure + .in_valid(fb_readdatavalid), + .out_ready(dac_ready && mask_fifo_valid && mask_fifo_out), + .out_valid(fb_fifo_valid), + .* + ); + + gfx_scanout_dac dac + ( + .in_ready(dac_ready), + .in_valid(mask_fifo_valid && (!mask_fifo_out || fb_fifo_valid)), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + put <= 0; + fb_read <= 0; + wait_vsync <= 0; + commit_addr <= 0; + mask_in_addr <= 0; + end else begin + mask_in_addr <= mask_in_addr + 1; + + if (flush || wait_vsync) + mask_in_addr <= commit_addr; + + if (commit) begin + wait_vsync <= start_vsync; + commit_addr <= start_vsync ? 0 : mask_out_addr; + end + + if (fb_ready) + fb_read <= mask_fifo_ready && pop && !next_vsync && effective_mask; + + if (mask_fifo_ready) + put <= fb_ready && pop && !next_vsync; + + if (vsync) + wait_vsync <= 0; + end + + always_ff @(posedge clk) begin + mask_hold_addr <= mask_out_addr; + + if (fb_ready) + fb_address <= scan_base + {5'd0, mask_out_addr}; + + if (mask_fifo_ready) + put_mask <= effective_mask; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_scanout_dac.sv b/rtl/legacy_gfx/gfx_scanout_dac.sv new file mode 100644 index 0000000..5c80d2b --- /dev/null +++ b/rtl/legacy_gfx/gfx_scanout_dac.sv @@ -0,0 +1,117 @@ +`include "gfx/gfx_defs.sv" + +module gfx_scanout_dac +( + input logic clk, + rst_n, + + input logic enable_clear, + input rgb24 clear_color, + + input logic mask_fifo_out, + input vram_word fb_fifo_out, + input logic in_valid, + output logic in_ready, + + input logic scan_ready, + output logic scan_valid, + scan_endofpacket, + scan_startofpacket, + output rgb30 scan_data, + + output logic vsync +); + + logic dac_valid, half, half_mask, stall, endofpacket, startofpacket; + rgb24 pixel; + rgb32 fifo_pixel; + vram_word msw, lsw; + half_coord next_addr; + linear_coord max_addr, pixel_addr; + + struct packed + { + logic endofpacket, + startofpacket; + rgb30 pixel; + } skid_in, skid_out; + + assign scan_data = skid_out.pixel; + assign scan_endofpacket = skid_out.endofpacket; + assign scan_startofpacket = skid_out.startofpacket; + + assign max_addr = `GFX_X_RES * `GFX_Y_RES - 1; + + assign fifo_pixel = {msw, lsw}; + assign skid_in.endofpacket = endofpacket; + assign skid_in.startofpacket = startofpacket; + + function color10 dac_color(color8 in); + dac_color = {in, {2{in[0]}}}; + endfunction + + assign skid_in.pixel.r = dac_color(pixel.r); + assign skid_in.pixel.g = dac_color(pixel.g); + assign skid_in.pixel.b = dac_color(pixel.b); + + always_comb begin + // Descarta fifo_pixel.a + pixel.r = fifo_pixel.r; + pixel.g = fifo_pixel.g; + pixel.b = fifo_pixel.b; + + if (!half_mask) + pixel = clear_color; + end + + gfx_skid_flow flow + ( + .in_valid(dac_valid), + .out_ready(scan_ready), + .out_valid(scan_valid), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(skid_in))) skid + ( + .in(skid_in), + .out(skid_out), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + half <= 0; + vsync <= 0; + dac_valid <= 0; + pixel_addr <= 0; + end else begin + vsync <= 0; + if (in_ready && dac_valid) begin + vsync <= skid_in.endofpacket; + dac_valid <= 0; + end + + if (in_ready && in_valid) begin + half <= !half; + dac_valid <= half; + + if (half) begin + pixel_addr <= pixel_addr + 1; + if (pixel_addr == max_addr) + pixel_addr <= 0; + end + end + end + + always_ff @(posedge clk) + if (in_ready && in_valid) begin + lsw <= msw; + msw <= fb_fifo_out; + half_mask <= mask_fifo_out; + + endofpacket <= pixel_addr == max_addr; + startofpacket <= pixel_addr == 0; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_setup.sv b/rtl/legacy_gfx/gfx_setup.sv new file mode 100644 index 0000000..1213645 --- /dev/null +++ b/rtl/legacy_gfx/gfx_setup.sv @@ -0,0 +1,190 @@ +`include "gfx/gfx_defs.sv" + +module gfx_setup +( + input logic clk, + + input raster_xy vertex_a, + vertex_b, + vertex_c, + input logic stall, + + output raster_xy pos_ref, + output coarse_dim span_x, + span_y, + output raster_offsets_tri offsets, + output fixed_tri edge_refs, + coarse_x_offsets, + coarse_y_offsets, + coarse_test_offsets +); + + // FIXME FIXME FIXME: Top-left rule + + fixed_tri edge_base, edge_inc_x, edge_inc_y, out_edge_refs, x_offsets, y_offsets, test_offsets; + + raster_xy bounds_ref, hold_vertex_a, hold_vertex_b, hold_vertex_c, ps[3], qs[3], out_pos_ref; + coarse_dim bounds_span_x, bounds_span_y, out_span_x, out_span_y; + raster_offsets_tri out_offsets; + + struct packed + { + raster_xy pos_ref; + coarse_dim span_x, + span_y; + raster_offsets_tri offsets; + fixed_tri edge_refs, + coarse_x_offsets, + coarse_y_offsets, + coarse_test_offsets; + } out, skid_out; + + gfx_skid_buf #(.WIDTH($bits(out))) skid + ( + .in(out), + .out(skid_out), + .* + ); + + assign out.span_x = out_span_x; + assign out.span_y = out_span_y; + assign out.pos_ref = out_pos_ref; + assign out.offsets = out_offsets; + assign out.edge_refs = out_edge_refs; + assign out.coarse_x_offsets = x_offsets; + assign out.coarse_y_offsets = y_offsets; + assign out.coarse_test_offsets = test_offsets; + + assign span_x = skid_out.span_x; + assign span_y = skid_out.span_y; + assign pos_ref = skid_out.pos_ref; + assign offsets = skid_out.offsets; + assign edge_refs = skid_out.edge_refs; + assign coarse_x_offsets = skid_out.coarse_x_offsets; + assign coarse_y_offsets = skid_out.coarse_y_offsets; + assign coarse_test_offsets = skid_out.coarse_test_offsets; + + assign ps[0] = hold_vertex_a; + assign qs[0] = hold_vertex_b; + + assign ps[1] = hold_vertex_b; + assign qs[1] = hold_vertex_c; + + assign ps[2] = hold_vertex_c; + assign qs[2] = hold_vertex_a; + + gfx_pipes #(.WIDTH($bits(vertex_a)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_a_pipes + ( + .in(vertex_a), + .out(hold_vertex_a), + .* + ); + + gfx_pipes #(.WIDTH($bits(vertex_b)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_b_pipes + ( + .in(vertex_b), + .out(hold_vertex_b), + .* + ); + + gfx_pipes #(.WIDTH($bits(vertex_c)), .DEPTH(`GFX_SETUP_BOUNDS_STAGES)) vertex_c_pipes + ( + .in(vertex_c), + .out(hold_vertex_c), + .* + ); + + gfx_setup_bounds bounds + ( + .span_x(bounds_span_x), + .span_y(bounds_span_y), + .reference(bounds_ref), + .* + ); + + localparam POST_BOUNDS_DEPTH = `GFX_SETUP_EDGE_STAGES + `GFX_SETUP_OFFSETS_STAGES; + + gfx_pipes #(.WIDTH($bits(pos_ref)), .DEPTH(POST_BOUNDS_DEPTH)) ref_pipes + ( + .in(bounds_ref), + .out(out_pos_ref), + .* + ); + + gfx_pipes #(.WIDTH($bits(span_x)), .DEPTH(POST_BOUNDS_DEPTH)) span_x_pipes + ( + .in(bounds_span_x), + .out(out_span_x), + .* + ); + + gfx_pipes #(.WIDTH($bits(span_y)), .DEPTH(POST_BOUNDS_DEPTH)) span_y_pipes + ( + .in(bounds_span_y), + .out(out_span_y), + .* + ); + + always_comb + for (integer i = 0; i < 3; ++i) + // Imaginárselo + unique case ({x_offsets[i][$bits(fixed) - 1], y_offsets[i][$bits(fixed) - 1]}) + 2'b00: + test_offsets[i] = out_offsets[i][`GFX_RASTER_OFFSETS - 1]; + + 2'b01: + test_offsets[i] = out_offsets[i][`GFX_RASTER_SIZE - 1]; + + 2'b10: + test_offsets[i] = out_offsets[i][`GFX_RASTER_OFFSETS - `GFX_RASTER_SIZE - 1]; + + 2'b11: + test_offsets[i] = out_offsets[i][0]; + endcase + + genvar i; + generate + for (i = 0; i < 3; ++i) begin: edges + gfx_setup_edge edge_fn + ( + .p(ps[i]), + .q(qs[i]), + .base(edge_base[i]), + .inc_x(edge_inc_x[i]), + .inc_y(edge_inc_y[i]), + .origin(bounds_ref), + .* + ); + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) base_pipes + ( + .in(edge_base[i]), + .out(out_edge_refs[i]), + .* + ); + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) coarse_x_pipes + ( + .in(edge_inc_x[i] << `GFX_RASTER_BITS), + .out(x_offsets[i]), + .* + ); + + gfx_pipes #(.WIDTH($bits(fixed)), .DEPTH(`GFX_SETUP_OFFSETS_STAGES)) coarse_y_pipes + ( + .in(edge_inc_y[i] << `GFX_RASTER_BITS), + .out(y_offsets[i]), + .* + ); + + gfx_setup_offsets edge_offsets + ( + .inc_x(edge_inc_x[i]), + .inc_y(edge_inc_y[i]), + .offsets(out_offsets[i]), + .* + ); + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_setup_bounds.sv b/rtl/legacy_gfx/gfx_setup_bounds.sv new file mode 100644 index 0000000..b110438 --- /dev/null +++ b/rtl/legacy_gfx/gfx_setup_bounds.sv @@ -0,0 +1,73 @@ +`include "gfx/gfx_defs.sv" + +module gfx_setup_bounds +( + input logic clk, + + input raster_xy vertex_a, + vertex_b, + vertex_c, + input logic stall, + + output raster_xy reference, + output coarse_dim span_x, + span_y +); + + logic x_a_lt_b, x_a_lt_c, x_b_lt_c, y_a_lt_b, y_a_lt_c, y_b_lt_c; + raster_xy min, max, hold_a, hold_b, hold_c; + coarse_dim ref_x, ref_y; + raster_xy_prec min_prec, max_prec, ref_prec; + + assign min_prec = min; + assign max_prec = max; + assign reference = ref_prec; + + assign ref_prec.x.sub = 0; + assign ref_prec.x.fine = 0; + assign ref_prec.x.padding = {`GFX_RASTER_PAD_BITS{ref_x[$bits(ref_x) - 1]}}; + assign {ref_prec.x.sign, ref_prec.x.coarse} = ref_x; + + assign ref_prec.y.sub = 0; + assign ref_prec.y.fine = 0; + assign ref_prec.y.padding = {`GFX_RASTER_PAD_BITS{ref_y[$bits(ref_y) - 1]}}; + assign {ref_prec.y.sign, ref_prec.y.coarse} = ref_y; + + always_ff @(posedge clk) + if (!stall) begin + hold_a <= vertex_a; + hold_b <= vertex_b; + hold_c <= vertex_c; + + x_a_lt_b <= vertex_a.x < vertex_b.x; + x_a_lt_c <= vertex_a.x < vertex_c.x; + x_b_lt_c <= vertex_b.x < vertex_c.x; + + y_a_lt_b <= vertex_a.y < vertex_b.y; + y_a_lt_c <= vertex_a.y < vertex_c.y; + y_b_lt_c <= vertex_b.y < vertex_c.y; + + if (x_a_lt_b) begin + min.x <= x_a_lt_c ? hold_a.x : hold_c.x; + max.x <= x_b_lt_c ? hold_c.x : hold_b.x; + end else begin + min.x <= x_b_lt_c ? hold_b.x : hold_c.x; + max.x <= x_a_lt_c ? hold_c.x : hold_a.x; + end + + if (y_a_lt_b) begin + min.y <= y_a_lt_c ? hold_a.y : hold_c.y; + max.y <= y_b_lt_c ? hold_c.y : hold_b.y; + end else begin + min.y <= y_b_lt_c ? hold_b.y : hold_c.y; + max.y <= y_a_lt_c ? hold_c.y : hold_a.y; + end + + ref_x <= {min_prec.x.sign, min_prec.x.coarse}; + ref_y <= {min_prec.y.sign, min_prec.y.coarse}; + + span_x <= {max_prec.x.sign, max_prec.x.coarse} - {min_prec.x.sign, min_prec.x.coarse}; + span_y <= {max_prec.y.sign, max_prec.y.coarse} - {min_prec.y.sign, min_prec.y.coarse}; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_setup_edge.sv b/rtl/legacy_gfx/gfx_setup_edge.sv new file mode 100644 index 0000000..5d69a88 --- /dev/null +++ b/rtl/legacy_gfx/gfx_setup_edge.sv @@ -0,0 +1,53 @@ +`include "gfx/gfx_defs.sv" + +module gfx_setup_edge +( + input logic clk, + + input raster_xy p, + q, + origin, + input logic stall, + + output fixed base, + inc_x, + inc_y +); + + fixed delta_x, delta_y, hold_inc_x, hold_inc_y; + + gfx_pipes #(.WIDTH($bits(inc_x)), .DEPTH(`FIXED_FMA_DOT_STAGES)) inc_x_pipes + ( + .in(hold_inc_x), + .out(inc_x), + .* + ); + + gfx_pipes #(.WIDTH($bits(inc_y)), .DEPTH(`FIXED_FMA_DOT_STAGES)) inc_y_pipes + ( + .in(hold_inc_y), + .out(inc_y), + .* + ); + + gfx_fixed_fma_dot edge_base + ( + .c(0), + .q(base), + .a0(delta_x), + .b0(hold_inc_x), + .a1(delta_y), + .b1(hold_inc_y), + .* + ); + + always_ff @(posedge clk) + if (!stall) begin + delta_x <= origin.x - q.x; + delta_y <= origin.y - q.y; + + hold_inc_x <= p.y - q.y; + hold_inc_y <= q.x - p.x; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_setup_offsets.sv b/rtl/legacy_gfx/gfx_setup_offsets.sv new file mode 100644 index 0000000..aabd322 --- /dev/null +++ b/rtl/legacy_gfx/gfx_setup_offsets.sv @@ -0,0 +1,44 @@ +`include "gfx/gfx_defs.sv" + +module gfx_setup_offsets +( + input logic clk, + + input fixed inc_x, + inc_y, + input logic stall, + + output raster_offsets offsets +); + + fixed x_hold[`GFX_RASTER_SIZE], y_hold[`GFX_RASTER_SIZE], + x_multiples[`GFX_RASTER_SIZE], y_multiples[`GFX_RASTER_SIZE]; + + // Asume GFX_RASTER_BITS == 2. Los ceros deberían optimizarse trivialmente + assign x_multiples[0] = 0; + assign y_multiples[0] = 0; + assign x_multiples[1] = inc_x; + assign y_multiples[1] = inc_y; + //assign x_multiples[2] = inc_x << 1; + //assign y_multiples[2] = inc_y << 1; + //assign x_multiples[3] = (inc_x << 1) + inc_x; + //assign y_multiples[3] = (inc_y << 1) + inc_y; + + genvar i; + generate + for (i = 0; i < `GFX_RASTER_SIZE; ++i) begin: multiples + always_ff @(posedge clk) + if (!stall) begin + x_hold[i] <= x_multiples[i]; + y_hold[i] <= y_multiples[i]; + end + end + + for (i = 0; i < `GFX_RASTER_OFFSETS; ++i) begin: permutations + always_ff @(posedge clk) + if (!stall) + offsets[i] <= x_hold[i % `GFX_RASTER_SIZE] + y_hold[i / `GFX_RASTER_SIZE]; + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_skid_buf.sv b/rtl/legacy_gfx/gfx_skid_buf.sv new file mode 100644 index 0000000..fae5717 --- /dev/null +++ b/rtl/legacy_gfx/gfx_skid_buf.sv @@ -0,0 +1,20 @@ +module gfx_skid_buf +#(parameter WIDTH=0) +( + input logic clk, + + input logic[WIDTH - 1:0] in, + input logic stall, + + output logic[WIDTH - 1:0] out +); + + logic[WIDTH - 1:0] skid; + + assign out = stall ? skid : in; + + always_ff @(posedge clk) + if (!stall) + skid <= in; + +endmodule diff --git a/rtl/legacy_gfx/gfx_skid_flow.sv b/rtl/legacy_gfx/gfx_skid_flow.sv new file mode 100644 index 0000000..c5e3b4a --- /dev/null +++ b/rtl/legacy_gfx/gfx_skid_flow.sv @@ -0,0 +1,31 @@ +module gfx_skid_flow +( + input logic clk, + rst_n, + + input logic in_valid, + out_ready, + + output logic in_ready, + out_valid, + stall +); + + logic was_ready, was_valid; + + assign stall = !in_ready; + assign in_ready = was_ready || !was_valid; + assign out_valid = in_valid || stall; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + was_ready <= 0; + was_valid <= 0; + end else begin + was_ready <= out_ready; + + if (!stall) + was_valid <= in_valid; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp.sv b/rtl/legacy_gfx/gfx_sp.sv new file mode 100644 index 0000000..ce0f9ff --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp.sv @@ -0,0 +1,131 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp +( + input logic clk, + rst_n, + + input logic batch_waitrequest, + batch_readdatavalid, + input vram_word batch_readdata, + output vram_addr batch_address, + output logic batch_read, + + input logic fetch_waitrequest, + fetch_readdatavalid, + input vram_word fetch_readdata, + output vram_addr fetch_address, + output logic fetch_read, + + input logic program_start, + input cmd_word program_header_base, + program_header_size, + output logic running, + + input logic send_ready, + output logic send_valid, + output lane_word send_data, + output lane_mask send_mask +); + + logic batch_start, clear_lanes, insn_valid; + cmd_word batch_length; + insn_word insn; + vram_insn_addr batch_base; + + gfx_sp_fetch fetch + ( + .ready(insn_ready), + .valid(insn_valid), + .* + ); + + logic deco_valid, insn_ready; + insn_deco deco; + + gfx_sp_decode decode + ( + .* + ); + + logic deco_ready, combiner_issue_valid, shuffler_issue_valid, stream_issue_valid; + vreg_num rd_a_reg, rd_b_reg; + + gfx_sp_issue issue + ( + .* + ); + + logic recv_valid; + lane_word recv_data; + lane_mask recv_mask; + + gfx_sp_batch batch + ( + .out_data(recv_data), + .out_mask(recv_mask), + .out_ready(recv_ready), + .out_valid(recv_valid), + .* + ); + + logic shuffler_issue_ready, shuffler_wb_valid; + wb_op shuffler_wb; + + gfx_sp_shuffler shuffler + ( + .wb(shuffler_wb), + .in_ready(shuffler_issue_ready), + .in_valid(shuffler_issue_valid), + .wb_ready(shuffler_wb_ready), + .wb_valid(shuffler_wb_valid), + .* + ); + + logic combiner_issue_ready, combiner_wb_valid; + wb_op combiner_wb; + + gfx_sp_combiner combiner + ( + .wb(combiner_wb), + .in_ready(combiner_issue_ready), + .in_valid(combiner_issue_valid), + .wb_ready(combiner_wb_ready), + .wb_valid(combiner_wb_valid), + .* + ); + + logic recv_ready, stream_issue_ready, stream_wb_valid; + wb_op stream_wb; + + gfx_sp_stream stream + ( + .wb(stream_wb), + .in_ready(stream_issue_ready), + .in_valid(stream_issue_valid), + .wb_ready(stream_wb_ready), + .wb_valid(stream_wb_valid), + .* + ); + + mat4 wr_data; + logic combiner_wb_ready, shuffler_wb_ready, stream_wb_ready, wr; + vreg_num wr_reg; + + gfx_sp_writeback writeback + ( + .* + ); + + mat4 a, b; + + gfx_sp_regs regs + ( + .rd_a_data(a), + .rd_b_data(b), + .* + ); + + logic batch_end; + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_batch.sv b/rtl/legacy_gfx/gfx_sp_batch.sv new file mode 100644 index 0000000..3d566ab --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_batch.sv @@ -0,0 +1,141 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_batch +( + input logic clk, + rst_n, + + input logic batch_waitrequest, + batch_readdatavalid, + input vram_word batch_readdata, + output vram_addr batch_address, + output logic batch_read, + + input logic batch_start, + input vram_insn_addr batch_base, + input cmd_word batch_length, + + output lane_mask out_mask, + output lane_word out_data, + input logic out_ready, + output logic out_valid +); + + localparam TAIL_BITS = $clog2($bits(lane_mask)), + BLOCK_BITS = $bits(batch_length) - TAIL_BITS; + + logic fifo_down_safe, lane_read, lane_readdatavalid, lane_waitrequest; + lane_word lane_readdata; + vram_lane_addr aligned_batch_base, lane_address; + logic[TAIL_BITS - 1:0] batch_length_tail, read_tail; + logic[BLOCK_BITS - 1:0] batch_length_block, fetch_block_count, read_block_count; + + struct packed + { + lane_word data; + lane_mask mask; + } fifo_in, fifo_out; + + enum int unsigned + { + IDLE, + STREAM + } state; + + assign out_data = fifo_out.data; + assign out_mask = fifo_out.mask; + + assign fifo_in.data = lane_readdata; + + assign {batch_length_block, batch_length_tail} = batch_length; + assign aligned_batch_base = batch_base[`GFX_INSN_BITS_IN_LANE +: $bits(vram_lane_addr)]; + + gfx_sp_widener #(.WIDTH($bits(vram_lane_addr))) lane_bus + ( + .wide_read(lane_read), + .wide_address(lane_address), + .wide_readdata(lane_readdata), + .wide_waitrequest(lane_waitrequest), + .wide_readdatavalid(lane_readdatavalid), + .word_read(batch_read), + .word_address(batch_address), + .word_readdata(batch_readdata), + .word_waitrequest(batch_waitrequest), + .word_readdatavalid(batch_readdatavalid), + .* + ); + + gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_BATCH_FIFO_DEPTH)) lane_fifo + ( + .in(fifo_in), + .out(fifo_out), + .in_ready(), + .in_valid(lane_readdatavalid), + .* + ); + + gfx_fifo_overflow #(.DEPTH(`GFX_BATCH_FIFO_DEPTH)) overflow + ( + .down(lane_read && !lane_waitrequest), + .empty(), + .down_safe(fifo_down_safe), + .* + ); + + always_comb begin + unique case (read_tail) + 2'b00: fifo_in.mask = 4'b0000; + 2'b01: fifo_in.mask = 4'b0001; + 2'b10: fifo_in.mask = 4'b0011; + 2'b11: fifo_in.mask = 4'b0111; + endcase + + if (read_block_count != 0) + fifo_in.mask = 4'b1111; + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + state <= IDLE; + lane_read <= 0; + end else unique case (state) + IDLE: + if (batch_start) begin + state <= STREAM; + lane_read <= 1; + end + + STREAM: begin + if (!lane_read || !lane_waitrequest) + lane_read <= fifo_down_safe; + + if (lane_read && !lane_waitrequest && fetch_block_count == 0) begin + state <= IDLE; + lane_read <= 0; + end + end + endcase + + always_ff @(posedge clk) begin + unique case (state) + IDLE: + if (batch_start) begin + read_tail <= batch_length_tail; + read_block_count <= batch_length_block; + fetch_block_count <= batch_length_block; + + lane_address <= aligned_batch_base; + end + + STREAM: + if (lane_read && !lane_waitrequest) begin + lane_address <= lane_address + 1; + fetch_block_count <= fetch_block_count - 1; + end + endcase + + if (lane_readdatavalid) + read_block_count <= read_block_count - 1; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_combiner.sv b/rtl/legacy_gfx/gfx_sp_combiner.sv new file mode 100644 index 0000000..900af00 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_combiner.sv @@ -0,0 +1,63 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_combiner +( + input logic clk, + rst_n, + + input mat4 a, + b, + input insn_deco deco, + input logic in_valid, + output logic in_ready, + + input logic wb_ready, + output logic wb_valid, + output wb_op wb +); + + wb_op wb_out; + logic mul_ready, mul_valid, fifo_ready, fifo_valid, skid_ready, out_stall; + + assign in_ready = mul_ready && fifo_ready; + + gfx_mat_mat mul + ( + .q(wb_out.data), + .in_ready(mul_ready), + .in_valid(in_valid && fifo_ready), + .out_ready(skid_ready && fifo_valid), + .out_valid(mul_valid), + .* + ); + + gfx_fifo #(.WIDTH($bits(vreg_num)), .DEPTH(`GFX_SP_COMBINER_FIFO_DEPTH)) depth + ( + .in(deco.dst), + .out(wb_out.dst), + .in_ready(fifo_ready), + .in_valid(in_valid && mul_ready), + .out_ready(skid_ready && mul_valid), + .out_valid(fifo_valid), + .* + ); + + gfx_skid_flow out_flow + ( + .stall(out_stall), + .in_ready(skid_ready), + .in_valid(fifo_valid && mul_valid), + .out_ready(wb_ready), + .out_valid(wb_valid), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(wb))) out_skid + ( + .in(wb_out), + .out(wb), + .stall(out_stall), + .* + ); + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_decode.sv b/rtl/legacy_gfx/gfx_sp_decode.sv new file mode 100644 index 0000000..d54077d --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_decode.sv @@ -0,0 +1,116 @@ +`include "gfx/gfx_defs.sv" +`include "gfx/gfx_sp_isa.sv" + +module gfx_sp_decode +( + input logic clk, + rst_n, + + input logic clear_lanes, + input insn_word insn, + input logic insn_valid, + output logic insn_ready, + + output insn_deco deco, + input logic deco_ready, + output logic deco_valid +); + + logic stall; + insn_deco deco_in, deco_out; + + gfx_pipeline_flow #(.STAGES(1)) flow + ( + .in_ready(insn_ready), + .in_valid(insn_valid), + .out_ready(deco_ready), + .out_valid(deco_valid), + .* + ); + + gfx_pipes #(.WIDTH($bits(deco)), .DEPTH(1)) pipe + ( + .in(deco_in), + .out(deco_out), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(deco))) skid + ( + .in(deco_out), + .out(deco), + .* + ); + + always_comb begin + deco_in.writeback = 0; + deco_in.read_src_a = 0; + deco_in.read_src_b = 0; + + deco_in.ex.stream = 0; + deco_in.ex.combiner = 0; + deco_in.ex.shuffler = 0; + + deco_in.shuffler.is_swizzle = 1'bx; + deco_in.shuffler.is_broadcast = 1'bx; + + unique casez (insn) + `GFX_INSN_OP_SELECT: begin + deco_in.writeback = 1; + deco_in.read_src_a = 1; + deco_in.read_src_b = 1; + + deco_in.ex.shuffler = 1; + deco_in.shuffler.is_swizzle = 0; + deco_in.shuffler.is_broadcast = 0; + end + + `GFX_INSN_OP_SWIZZL: begin + deco_in.writeback = 1; + deco_in.read_src_a = 1; + + deco_in.ex.shuffler = 1; + deco_in.shuffler.is_swizzle = 1; + end + + `GFX_INSN_OP_BROADC: begin + deco_in.writeback = 1; + + deco_in.ex.shuffler = 1; + deco_in.shuffler.is_swizzle = 0; + deco_in.shuffler.is_broadcast = 1; + end + + `GFX_INSN_OP_MATVEC: begin + deco_in.writeback = 1; + deco_in.read_src_a = 1; + deco_in.read_src_b = 1; + deco_in.ex.combiner = 1; + end + + `GFX_INSN_OP_SEND: begin + deco_in.read_src_a = 1; + deco_in.ex.stream = 1; + end + + `GFX_INSN_OP_RECV: begin + deco_in.writeback = 1; + deco_in.ex.stream = 1; + end + + default: + // Esto es jugar con fuego, pero lo vale con tal de que cierre el timing + deco_in = {($bits(deco_in)){1'bx}}; + endcase + + deco_in.dst = insn `GFX_INSN_DST; + deco_in.src_a = insn `GFX_INSN_SRC_A; + deco_in.src_b = insn `GFX_INSN_SRC_B; + deco_in.clear_lanes = clear_lanes; + + deco_in.shuffler.imm = insn `GFX_INSN_BROADC_IMM; + deco_in.shuffler.select_mask = insn `GFX_INSN_SELECT_MASK; + deco_in.shuffler.swizzle_op = insn `GFX_INSN_SWIZZL_LANES; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_fetch.sv b/rtl/legacy_gfx/gfx_sp_fetch.sv new file mode 100644 index 0000000..23fb20e --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_fetch.sv @@ -0,0 +1,224 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_fetch +( + input logic clk, + rst_n, + + input logic fetch_waitrequest, + fetch_readdatavalid, + input vram_word fetch_readdata, + output vram_addr fetch_address, + output logic fetch_read, + + input logic program_start, + input cmd_insn_ptr program_header_base, + input cmd_word program_header_size, + output logic running, + + input logic batch_end, + output vram_insn_addr batch_base, + output logic batch_start, + output cmd_word batch_length, + + input logic ready, + output logic valid, + output insn_word insn, + output logic clear_lanes +); + + localparam ENTRY_SIZE = 4; + + logic break_loop, entry_end, fifo_down_safe, fifo_empty, fifo_put, + header_continue, insn_read, insn_readdatavalid, insn_waitrequest; + + cmd_word header_count; + insn_word code_length, code_read_ptr, code_fetch_ptr, insn_readdata, entry_data[ENTRY_SIZE]; + vram_insn_addr code_base, insn_address, header_ptr; + logic[$clog2(ENTRY_SIZE - 1):0] entry_fetch_count, entry_read_count; + + enum int unsigned + { + IDLE, + HEADER, + LOOP, + FLUSH + } state; + + struct packed + { + insn_word insn; + logic clear_lanes; + } fifo_in, fifo_out; + + assign insn = fifo_out.insn; + assign clear_lanes = fifo_out.clear_lanes; + + assign entry_end = entry_read_count == ENTRY_SIZE - 1; + assign header_continue = header_count != 0; + assign break_loop = batch_end && (!insn_read || !insn_waitrequest); + + function vram_insn_addr base_from_word(insn_word in); + base_from_word = in[`GFX_INSN_SUBWORD_BITS +: $bits(vram_insn_addr)]; + endfunction + + assign code_base = base_from_word(entry_data[0]); + assign batch_base = base_from_word(entry_data[2]); + assign code_length = entry_data[1]; + assign batch_length = entry_data[3]; + + gfx_sp_widener #(.WIDTH($bits(vram_insn_addr))) insn_bus + ( + .wide_read(insn_read), + .wide_address(insn_address), + .wide_readdata(insn_readdata), + .wide_waitrequest(insn_waitrequest), + .wide_readdatavalid(insn_readdatavalid), + .word_read(fetch_read), + .word_address(fetch_address), + .word_readdata(fetch_readdata), + .word_waitrequest(fetch_waitrequest), + .word_readdatavalid(fetch_readdatavalid), + .* + ); + + gfx_fifo #(.WIDTH($bits(fifo_in)), .DEPTH(`GFX_FETCH_FIFO_DEPTH)) insn_fifo + ( + .in(fifo_in), + .out(fifo_out), + .in_ready(), + .in_valid(fifo_put), + .out_ready(ready), + .out_valid(valid), + .* + ); + + gfx_fifo_overflow #(.DEPTH(`GFX_FETCH_FIFO_DEPTH)) overflow + ( + .down(insn_read && !insn_waitrequest), + .empty(fifo_empty), + .down_safe(fifo_down_safe), + .out_ready(ready), + .out_valid(valid), + .* + ); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + state <= IDLE; + running <= 0; + fifo_put <= 0; + insn_read <= 0; + batch_start <= 0; + end else unique case (state) + IDLE: + if (program_start) begin + state <= HEADER; + running <= 1; + insn_read <= 1; + end + + HEADER: begin + if (insn_read && !insn_waitrequest) + insn_read <= entry_fetch_count != ENTRY_SIZE - 1; + + if (insn_readdatavalid && entry_end) begin + state <= LOOP; + insn_read <= 1; + batch_start <= 1; + end + end + + LOOP: begin + fifo_put <= 0; + batch_start <= 0; + + if (!insn_read || !insn_waitrequest) + insn_read <= fifo_down_safe; + + if (break_loop) begin + state <= FLUSH; + insn_read <= 0; + end + + if (insn_readdatavalid) + fifo_put <= 1; + end + + FLUSH: begin + fifo_put <= 0; + + if (fifo_empty) begin + state <= header_continue ? HEADER : IDLE; + running <= header_continue; + insn_read <= header_continue; + end + end + endcase + + always_ff @(posedge clk) + unique case (state) + IDLE: + if (program_start) begin + header_ptr <= program_header_base.addr; + header_count <= program_header_size; + insn_address <= program_header_base.addr; + + entry_read_count <= 0; + entry_fetch_count <= 0; + end + + HEADER: begin + code_read_ptr <= 0; + code_fetch_ptr <= 0; + + if (!insn_waitrequest) begin + insn_address <= insn_address + 1; + entry_fetch_count <= entry_fetch_count + 1; + end + + if (insn_read && !insn_waitrequest) + header_ptr <= header_ptr + 1; + + if (insn_readdatavalid) begin + entry_read_count <= entry_read_count + 1; + + for (integer i = 0; i < ENTRY_SIZE - 1; ++i) + entry_data[i] <= entry_data[i + 1]; + + entry_data[ENTRY_SIZE - 1] <= insn_readdata; + + if (entry_end) + insn_address <= base_from_word(entry_data[1]); + end + end + + LOOP: begin + if (insn_read && !insn_waitrequest) begin + insn_address <= insn_address + 1; + code_fetch_ptr <= code_fetch_ptr + 1; + + if (code_fetch_ptr == code_length) begin + insn_address <= code_base; + code_fetch_ptr <= 0; + end + end + + if (insn_readdatavalid) begin + fifo_in.insn <= insn_readdata; + fifo_in.clear_lanes <= code_read_ptr == 0; + + code_read_ptr <= code_read_ptr + 1; + if (code_read_ptr == code_length) + code_read_ptr <= 0; + end + end + + FLUSH: + if (fifo_empty) begin + header_count <= header_count - 1; + insn_address <= header_ptr; + end + endcase + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_file.sv b/rtl/legacy_gfx/gfx_sp_file.sv new file mode 100644 index 0000000..e98ee18 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_file.sv @@ -0,0 +1,32 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_file +( + input logic clk, + + input vreg_num rd_reg, + output vec4 rd_data, + + input logic wr, + input vreg_num wr_reg, + input vec4 wr_data +); + + vec4 file[`GFX_SP_REG_COUNT], hold_rd_data, hold_wr_data; + logic hold_wr; + vreg_num hold_rd_reg, hold_wr_reg; + + always_ff @(posedge clk) begin + hold_wr <= wr; + hold_wr_reg <= wr_reg; + hold_wr_data <= wr_data; + + rd_data <= hold_rd_data; + hold_rd_reg <= rd_reg; + hold_rd_data <= file[hold_rd_reg]; + + if (hold_wr) + file[hold_wr_reg] <= hold_wr_data; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_isa.sv b/rtl/legacy_gfx/gfx_sp_isa.sv new file mode 100644 index 0000000..1420d95 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_isa.sv @@ -0,0 +1,23 @@ +`ifndef GFX_SP_ISA_SV +`define GFX_SP_ISA_SV + +`include "gfx/gfx_defs.sv" + +`define GFX_INSN_OP_SELECT 32'b00000000_zzzz0zzz_0zzz0zzz_00000001 +`define GFX_INSN_OP_SWIZZL 32'bzzzzzzzz_00000000_0zzz0zzz_00000010 +`define GFX_INSN_OP_BROADC 32'bzzzzzzzz_zzzzzzzz_00000zzz_00000100 +`define GFX_INSN_OP_MATVEC 32'b00000000_00000zzz_0zzz0zzz_00001000 +`define GFX_INSN_OP_SEND 32'b00000000_00000000_0zzz0000_00010000 +`define GFX_INSN_OP_RECV 32'b00000000_00000000_00000zzz_00100000 + +`define GFX_INSN_DST [10:8] +`define GFX_INSN_SRC_A [14:12] +`define GFX_INSN_SRC_B [18:16] + +`define GFX_INSN_SELECT_MASK [23:20] + +`define GFX_INSN_SWIZZL_LANES [31:24] + +`define GFX_INSN_BROADC_IMM [31:16] + +`endif diff --git a/rtl/legacy_gfx/gfx_sp_issue.sv b/rtl/legacy_gfx/gfx_sp_issue.sv new file mode 100644 index 0000000..6934e39 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_issue.sv @@ -0,0 +1,111 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_issue +( + input logic clk, + rst_n, + + input insn_deco deco, + input logic deco_valid, + output logic deco_ready, + + output vreg_num rd_a_reg, + rd_b_reg, + + input logic stream_issue_ready, + output logic stream_issue_valid, + + input logic combiner_issue_ready, + output logic combiner_issue_valid, + + input logic shuffler_issue_ready, + output logic shuffler_issue_valid, + + input logic wr, + input vreg_num wr_reg +); + + /* Esto podría ser fully pipelined, pero no dio tiempo, y en + * todo caso no haría diferencia debido al pésimo ancho de banda. + */ + + logic data_hazard, rd_a_hazard, rd_b_hazard, wr_hazard, writing_a, writing_b, writing_dst, + busy[`GFX_SP_REG_COUNT]; + + enum int unsigned + { + IDLE, + HAZARDS, + ISSUE, + WAIT + } state; + + assign rd_a_reg = deco.src_a; + assign rd_b_reg = deco.src_b; + + assign wr_hazard = deco.writeback && writing_dst; + assign rd_a_hazard = deco.read_src_a && writing_a; + assign rd_b_hazard = deco.read_src_a && writing_b; + assign data_hazard = rd_a_hazard || rd_b_hazard || wr_hazard; + + assign deco_ready = (stream_issue_ready && stream_issue_valid) + || (combiner_issue_ready && combiner_issue_valid) + || (shuffler_issue_ready && shuffler_issue_valid); + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + state <= IDLE; + + stream_issue_valid <= 0; + combiner_issue_valid <= 0; + shuffler_issue_valid <= 0; + + for (integer i = 0; i < `GFX_SP_REG_COUNT; ++i) + busy[i] <= 0; + end else begin + unique case (state) + IDLE: + if (deco_valid) + state <= HAZARDS; + + HAZARDS: + if (!data_hazard) begin + state <= ISSUE; + if (deco.writeback) + busy[deco.dst] <= 1; + end + + ISSUE: begin + state <= WAIT; + + if (deco.ex.stream) + stream_issue_valid <= 1; + + if (deco.ex.combiner) + combiner_issue_valid <= 1; + + if (deco.ex.shuffler) + shuffler_issue_valid <= 1; + end + + WAIT: + if (deco_ready) begin + state <= IDLE; + + stream_issue_valid <= 0; + combiner_issue_valid <= 0; + shuffler_issue_valid <= 0; + end + endcase + + if (wr) + busy[wr_reg] <= 0; + end + + always_ff @(posedge clk) begin + writing_a <= busy[deco.src_a]; + writing_b <= busy[deco.src_b]; + writing_dst <= busy[deco.dst]; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_regs.sv b/rtl/legacy_gfx/gfx_sp_regs.sv new file mode 100644 index 0000000..68aaf06 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_regs.sv @@ -0,0 +1,39 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_regs +( + input logic clk, + + input vreg_num rd_a_reg, + output mat4 rd_a_data, + + input vreg_num rd_b_reg, + output mat4 rd_b_data, + + input logic wr, + input vreg_num wr_reg, + input mat4 wr_data +); + + genvar i; + generate + for (i = 0; i < `GFX_SP_LANES; ++i) begin: lanes + gfx_sp_file a + ( + .rd_reg(rd_a_reg), + .rd_data(rd_a_data[i]), + .wr_data(wr_data[i]), + .* + ); + + gfx_sp_file b + ( + .rd_reg(rd_b_reg), + .rd_data(rd_b_data[i]), + .wr_data(wr_data[i]), + .* + ); + end + endgenerate + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_select.sv b/rtl/legacy_gfx/gfx_sp_select.sv new file mode 100644 index 0000000..46b23c9 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_select.sv @@ -0,0 +1,25 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_select +( + input logic clk, + + input vec4 a, + b, + input shuffler_deco deco, + input logic stall, + + output vec4 out +); + + always_ff @(posedge clk) + if (!stall) + for (integer i = 0; i < `FLOATS_PER_VEC; ++i) + if (deco.is_broadcast) + out[i] <= deco.imm; + else if (deco.select_mask[i]) + out[i] <= b[i]; + else + out[i] <= a[i]; + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_shuffler.sv b/rtl/legacy_gfx/gfx_sp_shuffler.sv new file mode 100644 index 0000000..b813d03 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_shuffler.sv @@ -0,0 +1,70 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_shuffler +( + input logic clk, + rst_n, + + input mat4 a, + b, + input insn_deco deco, + input logic in_valid, + output logic in_ready, + + input logic wb_ready, + output logic wb_valid, + output wb_op wb +); + + mat4 select_out, swizzle_out; + wb_op wb_out; + logic stall, is_swizzle; + vreg_num hold_dst; + + gfx_pipeline_flow #(.STAGES(2)) flow + ( + .out_ready(wb_ready), + .out_valid(wb_valid), + .* + ); + + gfx_skid_buf #(.WIDTH($bits(wb))) skid + ( + .in(wb_out), + .out(wb), + .* + ); + + genvar gen_i; + generate + for (gen_i = 0; gen_i < `GFX_SP_LANES; ++gen_i) begin: lanes + gfx_sp_select select + ( + .a(a[gen_i]), + .b(b[gen_i]), + .out(select_out[gen_i]), + .deco(deco.shuffler), + .* + ); + + gfx_sp_swizzle swizzle + ( + .in(a[gen_i]), + .out(swizzle_out[gen_i]), + .deco(deco.shuffler), + .* + ); + end + endgenerate + + always_ff @(posedge clk) + if (!stall) begin + hold_dst <= deco.dst; + is_swizzle <= deco.shuffler.is_swizzle; + + wb_out.dst <= hold_dst; + for (integer i = 0; i < `GFX_SP_LANES; ++i) + wb_out.data[i] <= is_swizzle ? swizzle_out[i] : select_out[i]; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_stream.sv b/rtl/legacy_gfx/gfx_sp_stream.sv new file mode 100644 index 0000000..7901028 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_stream.sv @@ -0,0 +1,66 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_stream +( + input logic clk, + rst_n, + + input mat4 a, + input insn_deco deco, + input logic in_valid, + output logic in_ready, + + input logic wb_ready, + output logic wb_valid, + output wb_op wb, + + input lane_word recv_data, + input lane_mask recv_mask, + input logic recv_valid, + output logic recv_ready, + + input logic send_ready, + output logic send_valid, + output lane_word send_data, + output lane_mask send_mask +); + + logic active, recv; + vreg_num wb_reg; + + assign in_ready = !active; + assign recv_ready = active && recv && wb_ready; + + assign wb_valid = active && recv && recv_valid; + assign send_valid = active && !recv; + + assign wb.dst = wb_reg; + assign wb.data = recv_data; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + active <= 0; + send_mask <= 0; + end else begin + if (!active) + active <= in_valid && (deco.writeback || |send_mask); + else if (recv) + active <= !wb_ready || !recv_valid; + else + active <= !send_ready; + + if (recv_ready && recv_valid) + send_mask <= send_mask & recv_mask; + + if (in_ready && in_valid && deco.clear_lanes) + send_mask <= {($bits(send_mask)){1'b1}}; + end + + always_ff @(posedge clk) + if (!active) begin + recv <= deco.writeback; + wb_reg <= deco.dst; + send_data <= a; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_swizzle.sv b/rtl/legacy_gfx/gfx_sp_swizzle.sv new file mode 100644 index 0000000..d07d934 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_swizzle.sv @@ -0,0 +1,19 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_swizzle +( + input logic clk, + + input vec4 in, + input shuffler_deco deco, + input logic stall, + + output vec4 out +); + + always_ff @(posedge clk) + if (!stall) + for (integer i = 0; i < `FLOATS_PER_VEC; ++i) + out[i] <= in[deco.swizzle_op[i]]; + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_widener.sv b/rtl/legacy_gfx/gfx_sp_widener.sv new file mode 100644 index 0000000..92101ca --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_widener.sv @@ -0,0 +1,63 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_widener +#(parameter WIDTH=0) // Quartus no soporta 'parameter type' +( + input logic clk, + rst_n, + + input logic word_waitrequest, + word_readdatavalid, + input vram_word word_readdata, + output vram_addr word_address, + output logic word_read, + + input logic wide_read, + input logic[WIDTH - 1:0] wide_address, + output logic wide_waitrequest, + wide_readdatavalid, + + output logic[DATA_WIDTH - 1:0] wide_readdata +); + + // Este módulo existe para fingir que la DE1-SoC tiene un bus de SDRAM más ancho + + localparam WIDE_BITS = $bits(vram_addr) - WIDTH, + WIDE_SIZE = 1 << WIDE_BITS, + DATA_WIDTH = $bits(vram_word) << WIDE_BITS; + + vram_word shift_in[WIDE_SIZE]; + logic[WIDE_BITS - 1:0] address_count, read_count; + + assign word_read = wide_read; + assign word_address = {wide_address, address_count}; + assign wide_waitrequest = word_waitrequest || !(&address_count); + + always_comb + for (integer i = 0; i < WIDE_SIZE; ++i) + wide_readdata[$bits(vram_word) * i +: $bits(vram_word)] = shift_in[i]; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + read_count <= 0; + address_count <= 0; + wide_readdatavalid <= 0; + end else begin + if (word_read && !word_waitrequest) + address_count <= address_count + 1; + + if (word_readdatavalid) + read_count <= read_count + 1; + + wide_readdatavalid <= word_readdatavalid && &read_count; + end + + always_ff @(posedge clk) + if (word_readdatavalid) begin + for (integer i = 0; i < WIDE_SIZE - 1; ++i) + shift_in[i] <= shift_in[i + 1]; + + shift_in[WIDE_SIZE - 1] <= word_readdata; + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_sp_writeback.sv b/rtl/legacy_gfx/gfx_sp_writeback.sv new file mode 100644 index 0000000..1195910 --- /dev/null +++ b/rtl/legacy_gfx/gfx_sp_writeback.sv @@ -0,0 +1,65 @@ +`include "gfx/gfx_defs.sv" + +module gfx_sp_writeback +( + input logic clk, + rst_n, + + input wb_op stream_wb, + input logic stream_wb_valid, + output logic stream_wb_ready, + + input wb_op combiner_wb, + input logic combiner_wb_valid, + output logic combiner_wb_ready, + + input wb_op shuffler_wb, + input logic shuffler_wb_valid, + output logic shuffler_wb_ready, + + output logic wr, + output vreg_num wr_reg, + output mat4 wr_data +); + + wb_op wb_in, wb_out; + + assign wr_reg = wb_out.dst; + assign wr_data = wb_out.data; + + gfx_pipeline_flow #(.STAGES(`GFX_SP_WB_STAGES)) flow + ( + .stall(), + .in_ready(), + .in_valid(stream_wb_valid || combiner_wb_valid || shuffler_wb_valid), + .out_ready(1), + .out_valid(wr), + .* + ); + + gfx_pipes #(.WIDTH($bits(wb_out)), .DEPTH(`GFX_SP_WB_STAGES)) pipes + ( + .in(wb_in), + .out(wb_out), + .stall(0), + .* + ); + + always_comb begin + stream_wb_ready = 0; + combiner_wb_ready = 0; + shuffler_wb_ready = 0; + + if (stream_wb_valid) begin + wb_in = stream_wb; + stream_wb_ready = 1; + end else if (shuffler_wb_valid) begin + wb_in = shuffler_wb; + shuffler_wb_ready = 1; + end else begin + wb_in = combiner_wb; + combiner_wb_ready = 1; + end + end + +endmodule diff --git a/rtl/legacy_gfx/gfx_transpose.sv b/rtl/legacy_gfx/gfx_transpose.sv new file mode 100644 index 0000000..03ecf2d --- /dev/null +++ b/rtl/legacy_gfx/gfx_transpose.sv @@ -0,0 +1,17 @@ +`include "gfx/gfx_defs.sv" + +module gfx_transpose +( + input mat4 in, + output mat4 out +); + + integer i, j; + + // Esto no tiene costo en hardware, es un renombramiento de señales + always_comb + for (i = 0; i < `VECS_PER_MAT; ++i) + for (j = 0; j < `FLOATS_PER_VEC; ++j) + out[i][j] = in[j][i]; + +endmodule diff --git a/rtl/legacy_gfx/mod.mk b/rtl/legacy_gfx/mod.mk new file mode 100644 index 0000000..4e0f46d --- /dev/null +++ b/rtl/legacy_gfx/mod.mk @@ -0,0 +1,5 @@ +define core + $(this)/deps := config + $(this)/rtl_dirs := . + $(this)/rtl_top := gfx +endef diff --git a/rtl/mod.mk b/rtl/mod.mk index 740118d..081d3a3 100644 --- a/rtl/mod.mk +++ b/rtl/mod.mk @@ -1,5 +1,5 @@ cores := config debounce intc -subdirs := cache core dma_axi32 fpu gfx perf picorv32 pkt_switch smp top wb2axip +subdirs := cache core dma_axi32 fpu legacy_gfx perf picorv32 pkt_switch smp top wb2axip define core/config $(this)/rtl_include_dirs := . diff --git a/rtl/top/mod.mk b/rtl/top/mod.mk index 2adc352..299c621 100644 --- a/rtl/top/mod.mk +++ b/rtl/top/mod.mk @@ -22,7 +22,7 @@ endef define core/test_fb $(this)/targets := test - $(this)/deps := gfx + $(this)/deps := legacy_gfx $(this)/rtl_files := test_fb.sv $(this)/rtl_top := test_fb $(this)/cocotb_paths := ../../.. @@ -31,7 +31,7 @@ endef define core/test_fifo $(this)/targets := test - $(this)/deps := gfx + $(this)/deps := legacy_gfx $(this)/rtl_files := test_fifo.sv $(this)/rtl_top := test_fifo $(this)/cocotb_paths := ../../.. -- cgit v1.2.3