diff options
Diffstat (limited to '')
| -rw-r--r-- | rtl/cache/cache.sv | 98 | ||||
| -rw-r--r-- | rtl/cache/control.sv | 364 | ||||
| -rw-r--r-- | rtl/cache/defs.sv | 64 | ||||
| -rw-r--r-- | rtl/cache/offsets.sv | 68 | ||||
| -rw-r--r-- | rtl/cache/routing.sv | 107 | ||||
| -rw-r--r-- | rtl/cache/sram.sv | 49 | ||||
| -rw-r--r-- | rtl/core/uarch.sv | 7 |
7 files changed, 756 insertions, 1 deletions
diff --git a/rtl/cache/cache.sv b/rtl/cache/cache.sv new file mode 100644 index 0000000..6efeb28 --- /dev/null +++ b/rtl/cache/cache.sv @@ -0,0 +1,98 @@ +`include "cache/defs.sv" + +module cache +#(parameter TOKEN_AT_RESET=0) +( + input logic clk, + rst_n, + + input word core_address, + input logic core_read, + core_write, + input word core_writedata, + input word_be core_byteenable, + output logic core_waitrequest, + output word core_readdata, + + //TODO + //input /*TODO*/ dbg_address, + input logic dbg_read, + dbg_write, + input word dbg_writedata, + output logic dbg_waitrequest, + output word dbg_readdata, + + input logic mem_waitrequest, + input line mem_readdata, + output word mem_address, + output logic mem_read, + mem_write, + output line mem_writedata, + output line_be mem_byteenable, + + input logic in_data_valid, + input ring_req in_data, + output logic in_data_ready, + + input logic out_data_ready, + output ring_req out_data, + output logic out_data_valid, + + input ring_token in_token, + input logic in_token_valid, + + output ring_token out_token, + output logic out_token_valid +); + + //TODO + assign dbg_waitrequest = 1; + + logic write_data, write_state; + line data_wr, data_rd; + addr_tag tag_wr, tag_rd; + line_state state_wr, state_rd; + addr_index index_rd, index_wr; + + cache_sram sram + ( + .* + ); + + word cache_mem_address; + line cache_mem_writedata; + logic cache_core_waitrequest, cache_mem_waitrequest, cache_mem_read, cache_mem_write; + + cache_control #(.TOKEN_AT_RESET(TOKEN_AT_RESET)) control + ( + .core_read(cache_core_read), + .core_write(cache_core_write), + .core_waitrequest(cache_core_waitrequest), + .mem_waitrequest(cache_mem_waitrequest), + .mem_address(cache_mem_address), + .mem_writedata(cache_mem_writedata), + .mem_read(cache_mem_read), + .mem_write(cache_mem_write), + .* + ); + + line core_readdata_line; + logic cache_core_read, cache_core_write; + addr_tag core_tag; + addr_index core_index; + addr_offset core_offset; + + cache_routing routing + ( + .* + ); + + line core_writedata_line, core_data_wr; + line_be core_byteenable_line; + + cache_offsets offsets + ( + .* + ); + +endmodule diff --git a/rtl/cache/control.sv b/rtl/cache/control.sv new file mode 100644 index 0000000..f551477 --- /dev/null +++ b/rtl/cache/control.sv @@ -0,0 +1,364 @@ +`include "cache/defs.sv" + +module cache_control +#(parameter TOKEN_AT_RESET=0) +( + input logic clk, + rst_n, + + input addr_tag core_tag, + input addr_index core_index, + input logic core_read, + core_write, + input line core_data_wr, + output logic core_waitrequest, + + input ring_req in_data, + input logic in_data_valid, + output logic in_data_ready, + + input logic out_data_ready, + output ring_req out_data, + output logic out_data_valid, + + input ring_token in_token, + input logic in_token_valid, + + output ring_token out_token, + output logic out_token_valid, + + input addr_tag tag_rd, + input line data_rd, + input line_state state_rd, + + output addr_index index_rd, + index_wr, + output logic write_data, + write_state, + output addr_tag tag_wr, + output line data_wr, + output line_state state_wr, + + input logic mem_waitrequest, + input line mem_readdata, + output word mem_address, + output logic mem_read, + mem_write, + output line mem_writedata +); + + enum int unsigned + { + ACCEPT, + CORE, + SNOOP, + REPLY + } state, next_state; + + logic accept_snoop, in_hold_valid, last_hop, lock_line, locked, may_send, + may_send_if_token_held, mem_begin, mem_end, mem_end_read, mem_wait, + out_stall, reply_wait, replace, retry, send, send_inval, send_read, + snoop_hit, set_reply, unlock_line, writeback; + + ring_req in_hold, send_data, fwd_data, stall_data, out_data_next; + + addr_tag mem_tag; + addr_index mem_index; + + assign mem_end = (mem_read || mem_write) && !mem_waitrequest; + assign mem_wait = (mem_read || mem_write) && mem_waitrequest; + assign mem_address = {3'b000, mem_tag, mem_index, 4'b0000}; + assign mem_end_read = mem_read && !mem_waitrequest; + + /* Desbloquear la línea hasta que la request del core termine garantiza + * avance del sistema completo, en lockstep en el peor caso posible, + * a pesar de retries (una fuerte contención de writes a INVALID + * o SHARED jamás provocará que dos o más nodos queden en deadlock). + */ + assign unlock_line = !core_waitrequest; + + assign replace = tag_rd != core_tag && state_rd != INVALID; + assign last_hop = in_hold.ttl == `TTL_END; + assign snoop_hit = tag_rd == in_hold.tag; + assign accept_snoop = in_hold_valid && !last_hop && (in_hold.inval || !in_hold.reply); + + assign may_send = may_send_if_token_held && in_token_valid; + assign may_send_if_token_held + = (!in_token.e2.valid || in_token.e2.index != core_index || in_token.e2.tag != core_tag) + && (!in_token.e1.valid || in_token.e1.index != core_index || in_token.e1.tag != core_tag) + && (!in_token.e0.valid || in_token.e0.index != core_index || in_token.e0.tag != core_tag); + + assign out_data = out_stall ? stall_data : out_data_next; + assign out_data_next = send ? send_data : fwd_data; + assign out_data_valid = out_stall || send || (in_hold_valid && !last_hop); + + assign send_data.tag = core_tag; + assign send_data.ttl = `TTL_MAX; + assign send_data.data = fwd_data.data; // Esto evita muchos muxes + assign send_data.read = send_read; + assign send_data.index = core_index; + assign send_data.inval = send_inval; + assign send_data.reply = 0; + + always_comb begin + tag_wr = core_tag; + data_wr = core_data_wr; + index_rd = core_index; + + state_wr = INVALID; + write_data = 0; + write_state = 0; + + mem_begin = 0; + writeback = 0; + + send = 0; + send_read = 0; + send_inval = 0; + + set_reply = 0; + core_waitrequest = 1; + + in_data_ready = !in_hold_valid; + + unique case (state) + ACCEPT: begin + if (last_hop && !in_hold.read) + in_data_ready = 1; + + if (accept_snoop) + index_rd = in_hold.index; + end + + CORE: + if (replace) begin + state_wr = INVALID; + write_state = 1; + + if (state_rd == MODIFIED) + writeback = 1; + end else unique case ({state_rd, core_write}) + {INVALID, 1'b0}: begin + send = 1; + send_read = 1; + end + + {INVALID, 1'b1}: begin + send = 1; + send_read = 1; + send_inval = 1; + end + + {SHARED, 1'b0}: + core_waitrequest = 0; + + {SHARED, 1'b1}: begin + /* No hacemos write_data ya que reintentaremos el + * write luego, cuando el estado será EXCLUSIVE. + * + * Nótese que esta es la misma razón por la que no + * pasamos directamente a MODIFIED. + */ + state_wr = EXCLUSIVE; + write_state = 1; + + send = 1; + send_inval = 1; + end + + {EXCLUSIVE, 1'b0}: + core_waitrequest = 0; + + {EXCLUSIVE, 1'b1}: begin + state_wr = MODIFIED; + write_data = 1; + write_state = 1; + core_waitrequest = 0; + end + + {MODIFIED, 1'b0}: + core_waitrequest = 0; + + {MODIFIED, 1'b1}: begin + write_data = 1; + core_waitrequest = 0; + end + endcase + + SNOOP: begin + index_rd = in_hold.index; + in_data_ready = 1; + + if (snoop_hit) begin + if (in_hold.read) begin + set_reply = 1; + + unique case (state_rd) + INVALID: + set_reply = 0; + + SHARED: ; + + EXCLUSIVE: begin + state_wr = SHARED; + write_state = 1; + end + + MODIFIED: begin + state_wr = SHARED; + write_state = 1; + + writeback = 1; + end + endcase + end + + if (in_hold.inval) begin + state_wr = INVALID; + write_state = 1; + end + end + end + + REPLY: begin + in_data_ready = 1; + + if (in_hold.reply) begin + data_wr = in_hold.data; + state_wr = SHARED; + write_data = 1; + write_state = 1; + end else + mem_begin = 1; + end + + default: ; + endcase + + if (writeback) + mem_begin = 1; + + // Colisiones de bus + retry = (mem_end_read && (write_data || write_state)) || (mem_wait && mem_begin); + + // Nótese la diferencia con un assign, ya que send puede cambiar más abajo + lock_line = send; + + if (send && !may_send && !locked) + retry = 1; + + if (retry) begin + send = 0; + mem_begin = 0; + write_data = 0; + write_state = 0; + + in_data_ready = !in_hold_valid; + core_waitrequest = 1; + end + + index_wr = index_rd; + if (mem_end_read) begin + tag_wr = mem_tag; + index_wr = mem_index; + + data_wr = mem_readdata; + state_wr = EXCLUSIVE; + + write_data = 1; + write_state = 1; + end + end + + always_comb begin + fwd_data = in_hold; + fwd_data.ttl = in_hold.ttl - 2'b1; + + if (set_reply) begin + fwd_data.data = data_rd; + fwd_data.reply = 1; + end + end + + always_comb begin + next_state = ACCEPT; + + unique case (state) + ACCEPT: begin + if (accept_snoop) + next_state = SNOOP; + else if (in_hold_valid && last_hop && in_hold.read) + next_state = REPLY; + else if ((core_read || core_write) && !reply_wait + && (!locked || (may_send && !unlock_line))) + next_state = CORE; + + if (out_stall && !out_data_ready) + next_state = ACCEPT; + end + + default: ; + endcase + end + + always_ff @(posedge clk or negedge rst_n) + state <= !rst_n ? ACCEPT : next_state; + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + out_token <= {($bits(out_token)){1'b0}}; + out_token_valid <= TOKEN_AT_RESET; + + in_hold_valid <= 0; + out_stall <= 0; + + locked <= 0; + + mem_read <= 0; + mem_write <= 0; + end else begin + out_token.e0.tag <= core_tag; + out_token.e0.index <= core_index; + out_token.e0.valid <= may_send_if_token_held && (send || locked) && !unlock_line; + + out_token.e2 <= in_token.e1; + out_token.e1 <= in_token.e0; + out_token_valid <= in_token_valid; + + if (in_data_ready) + in_hold_valid <= in_data_valid; + + out_stall <= out_data_valid && !out_data_ready; + + if (lock_line) + locked <= 1; + + if (unlock_line) + locked <= 0; + + if (mem_end) begin + mem_read <= 0; + mem_write <= 0; + end + + if (mem_begin) begin + mem_read <= !writeback; + mem_write <= writeback; + end + end + + always_ff @(posedge clk) begin + if (in_data_ready) + in_hold <= in_data; + + if (!out_stall) + stall_data <= out_data_next; + + if (mem_begin) begin + mem_tag <= writeback ? tag_rd : core_tag; + mem_index <= index_wr; + mem_writedata <= data_rd; + end + end + +endmodule diff --git a/rtl/cache/defs.sv b/rtl/cache/defs.sv new file mode 100644 index 0000000..2566058 --- /dev/null +++ b/rtl/cache/defs.sv @@ -0,0 +1,64 @@ +`ifndef CACHE_DEFS_SV +`define CACHE_DEFS_SV + +typedef logic[3:0] word_be; +typedef logic[15:0] line_be; +typedef logic[127:0] line; + +// Choca con typedef en core/uarch.sv +`ifndef WORD_DEFINED +typedef logic[31:0] word; +`define WORD_DEFINED +`endif + +/* Tenemos 512MiB de SDRAM, el resto del espacio es I/O (uncached). Usamos +* 512 líneas direct-mapped de 16 bytes cada una. El core solo realiza +* operaciones alineadas. Por tanto, cada dirección de 32 bits consta de: + * - 2 bits que siempre son 0 (traducidos a byteenable por core) + * - 2 bits de offset (ya que para cache la unidad direccionable es la word) + * - 9 bits de index + * - 16 bits de tag + * - 3 bits que son == 0 si cached, != 0 si uncached + */ +typedef logic[1:0] addr_mbz; +typedef logic[1:0] addr_offset; +typedef logic[8:0] addr_index; +typedef logic[15:0] addr_tag; +typedef logic[2:0] addr_io_region; +typedef logic[26:0] addr_cacheable; + +typedef enum logic[1:0] +{ + INVALID, + SHARED, + EXCLUSIVE, + MODIFIED +} line_state; + +typedef struct packed +{ + logic[1:0] ttl; + logic read, + inval, + reply; + addr_tag tag; + addr_index index; + line data; +} ring_req; + +`define TTL_END 2'b00 +`define TTL_MAX 2'b11 + +typedef struct packed +{ + logic valid; + addr_tag tag; + addr_index index; +} token_lock; + +typedef struct packed +{ + token_lock e2, e1, e0; +} ring_token; + +`endif diff --git a/rtl/cache/offsets.sv b/rtl/cache/offsets.sv new file mode 100644 index 0000000..a933d1c --- /dev/null +++ b/rtl/cache/offsets.sv @@ -0,0 +1,68 @@ +`include "cache/defs.sv" + +module cache_offsets +( + input addr_offset core_offset, + input word_be core_byteenable, + input word core_writedata, + input line core_readdata_line, + data_rd, + + output line core_data_wr, + core_writedata_line, + output word core_readdata, + output line_be core_byteenable_line +); + + line line_mask; + word be_extend, mask3, mask2, mask1, mask0; + word_be be3, be2, be1, be0; + + assign core_writedata_line = {4{core_writedata}}; + assign core_byteenable_line = {be3, be2, be1, be0}; + + assign be_extend = {{8{core_byteenable[3]}}, {8{core_byteenable[2]}}, + {8{core_byteenable[1]}}, {8{core_byteenable[0]}}}; + + assign line_mask = {mask3, mask2, mask1, mask0}; + assign core_data_wr = (core_writedata_line & line_mask) | (data_rd & ~line_mask); + + always_comb begin + mask3 = 0; + mask2 = 0; + mask1 = 0; + mask0 = 0; + + be3 = 0; + be2 = 0; + be1 = 0; + be0 = 0; + + unique case (core_offset) + 2'b00: begin + be0 = core_byteenable; + mask0 = be_extend; + core_readdata = core_readdata_line[31:0]; + end + + 2'b01: begin + be1 = core_byteenable; + mask1 = be_extend; + core_readdata = core_readdata_line[63:32]; + end + + 2'b10: begin + be2 = core_byteenable; + mask2 = be_extend; + core_readdata = core_readdata_line[95:64]; + end + + 2'b11: begin + be3 = core_byteenable; + mask3 = be_extend; + core_readdata = core_readdata_line[127:96]; + end + endcase + end + +endmodule diff --git a/rtl/cache/routing.sv b/rtl/cache/routing.sv new file mode 100644 index 0000000..4857e08 --- /dev/null +++ b/rtl/cache/routing.sv @@ -0,0 +1,107 @@ +`include "cache/defs.sv" + +module cache_routing +( + input logic clk, + rst_n, + + input word core_address, + input logic core_read, + core_write, + input line core_writedata_line, + input line_be core_byteenable_line, + output logic core_waitrequest, + output line core_readdata_line, + + output addr_tag core_tag, + output addr_index core_index, + output addr_offset core_offset, + + input line data_rd, + input logic cache_core_waitrequest, + output logic cache_core_read, + cache_core_write, + + input word cache_mem_address, + input logic cache_mem_read, + cache_mem_write, + input line cache_mem_writedata, + output logic cache_mem_waitrequest, + + input logic mem_waitrequest, + input line mem_readdata, + output word mem_address, + output logic mem_read, + mem_write, + output line mem_writedata, + output line_be mem_byteenable +); + + word core_address_line; + logic cached, cache_mem, transition; + addr_mbz mbz; + addr_io_region io; + + enum int unsigned + { + IDLE, + CACHE, + BYPASS + } state; + + assign cached = io == 3'b000; + assign cache_mem = cache_mem_read || cache_mem_write; + + assign {io, core_tag, core_index, core_offset, mbz} = core_address; + assign core_address_line = {io, core_tag, core_index, 4'b0000}; + assign core_readdata_line = cached ? data_rd : mem_readdata; + + assign cache_core_read = core_read && cached; + assign cache_core_write = core_write && cached; + + always_comb begin + transition = 0; + core_waitrequest = cache_core_waitrequest; + cache_mem_waitrequest = 1; + + unique case (state) + IDLE: + transition = cache_mem || (!cached && (core_read || core_write)); + + CACHE: + cache_mem_waitrequest = mem_waitrequest; + + BYPASS: + core_waitrequest = mem_waitrequest; + endcase + end + + always_ff @(posedge clk or negedge rst_n) + if (!rst_n) begin + state <= IDLE; + mem_read <= 0; + mem_write <= 0; + end else unique case (state) + IDLE: + if (transition) begin + state <= cache_mem ? CACHE : BYPASS; + mem_read <= cache_mem ? cache_mem_read : core_read; + mem_write <= cache_mem ? cache_mem_write : core_write; + end + + CACHE, BYPASS: + if (!mem_waitrequest) begin + state <= IDLE; + mem_read <= 0; + mem_write <= 0; + end + endcase + + always_ff @(posedge clk) + if (transition) begin + mem_address <= cache_mem ? cache_mem_address : core_address_line; + mem_writedata <= cache_mem ? cache_mem_writedata : core_writedata_line; + mem_byteenable <= cache_mem ? 16'hff : core_byteenable_line; + end + +endmodule diff --git a/rtl/cache/sram.sv b/rtl/cache/sram.sv new file mode 100644 index 0000000..2e6c6ce --- /dev/null +++ b/rtl/cache/sram.sv @@ -0,0 +1,49 @@ +`include "cache/defs.sv" + +module cache_sram +( + input logic clk, + rst_n, + + input addr_index index_rd, + index_wr, + input logic write_data, + write_state, + input addr_tag tag_wr, + input line data_wr, + input line_state state_wr, + + output addr_tag tag_rd, + output line data_rd, + output line_state state_rd +); + + // Existe un mito que habla de true dual-ports con byte-enables, dudo mucho que sea real: + // https://www.intel.com/content/www/us/en/docs/programmable/683082/21-3/ram-with-byte-enable-signals.html + + localparam DEPTH = 1 << $bits(addr_index); + + line data_file[DEPTH]; + addr_tag tag_file[DEPTH]; + line_state state_file[DEPTH]; + + always_ff @(posedge clk) begin + if (write_data) begin + tag_file[index_wr] <= tag_wr; + data_file[index_wr] <= data_wr; + end + + if (write_state) + state_file[index_wr] <= state_wr; + + tag_rd <= tag_file[index_rd]; + data_rd <= data_file[index_rd]; + state_rd <= state_file[index_rd]; + end + + //FIXME: rst_n para state_file? + initial + for (int i = 0; i < DEPTH; ++i) + state_file[i] = INVALID; + +endmodule diff --git a/rtl/core/uarch.sv b/rtl/core/uarch.sv index 82dc1dc..0226cbf 100644 --- a/rtl/core/uarch.sv +++ b/rtl/core/uarch.sv @@ -4,10 +4,15 @@ // Decodifica como andeq r0, r0, r0 `define NOP 32'd0 +// Choca con typedef en cache/defs.sv +`ifndef WORD_DEFINED +typedef logic[31:0] word; +`define WORD_DEFINED +`endif + typedef logic[3:0] reg_num; typedef logic[2:0] cp_opcode; typedef logic[15:0] reg_list; -typedef logic[31:0] word; typedef logic[63:0] dword; typedef logic[29:0] ptr; |
