summaryrefslogtreecommitdiff
path: root/rtl
diff options
context:
space:
mode:
Diffstat (limited to 'rtl')
-rw-r--r--rtl/cache/cache.sv98
-rw-r--r--rtl/cache/control.sv364
-rw-r--r--rtl/cache/defs.sv64
-rw-r--r--rtl/cache/offsets.sv68
-rw-r--r--rtl/cache/routing.sv107
-rw-r--r--rtl/cache/sram.sv49
-rw-r--r--rtl/core/uarch.sv7
7 files changed, 756 insertions, 1 deletions
diff --git a/rtl/cache/cache.sv b/rtl/cache/cache.sv
new file mode 100644
index 0000000..6efeb28
--- /dev/null
+++ b/rtl/cache/cache.sv
@@ -0,0 +1,98 @@
+`include "cache/defs.sv"
+
+module cache
+#(parameter TOKEN_AT_RESET=0)
+(
+ input logic clk,
+ rst_n,
+
+ input word core_address,
+ input logic core_read,
+ core_write,
+ input word core_writedata,
+ input word_be core_byteenable,
+ output logic core_waitrequest,
+ output word core_readdata,
+
+ //TODO
+ //input /*TODO*/ dbg_address,
+ input logic dbg_read,
+ dbg_write,
+ input word dbg_writedata,
+ output logic dbg_waitrequest,
+ output word dbg_readdata,
+
+ input logic mem_waitrequest,
+ input line mem_readdata,
+ output word mem_address,
+ output logic mem_read,
+ mem_write,
+ output line mem_writedata,
+ output line_be mem_byteenable,
+
+ input logic in_data_valid,
+ input ring_req in_data,
+ output logic in_data_ready,
+
+ input logic out_data_ready,
+ output ring_req out_data,
+ output logic out_data_valid,
+
+ input ring_token in_token,
+ input logic in_token_valid,
+
+ output ring_token out_token,
+ output logic out_token_valid
+);
+
+ //TODO
+ assign dbg_waitrequest = 1;
+
+ logic write_data, write_state;
+ line data_wr, data_rd;
+ addr_tag tag_wr, tag_rd;
+ line_state state_wr, state_rd;
+ addr_index index_rd, index_wr;
+
+ cache_sram sram
+ (
+ .*
+ );
+
+ word cache_mem_address;
+ line cache_mem_writedata;
+ logic cache_core_waitrequest, cache_mem_waitrequest, cache_mem_read, cache_mem_write;
+
+ cache_control #(.TOKEN_AT_RESET(TOKEN_AT_RESET)) control
+ (
+ .core_read(cache_core_read),
+ .core_write(cache_core_write),
+ .core_waitrequest(cache_core_waitrequest),
+ .mem_waitrequest(cache_mem_waitrequest),
+ .mem_address(cache_mem_address),
+ .mem_writedata(cache_mem_writedata),
+ .mem_read(cache_mem_read),
+ .mem_write(cache_mem_write),
+ .*
+ );
+
+ line core_readdata_line;
+ logic cache_core_read, cache_core_write;
+ addr_tag core_tag;
+ addr_index core_index;
+ addr_offset core_offset;
+
+ cache_routing routing
+ (
+ .*
+ );
+
+ line core_writedata_line, core_data_wr;
+ line_be core_byteenable_line;
+
+ cache_offsets offsets
+ (
+ .*
+ );
+
+endmodule
diff --git a/rtl/cache/control.sv b/rtl/cache/control.sv
new file mode 100644
index 0000000..f551477
--- /dev/null
+++ b/rtl/cache/control.sv
@@ -0,0 +1,364 @@
+`include "cache/defs.sv"
+
+module cache_control
+#(parameter TOKEN_AT_RESET=0)
+(
+ input logic clk,
+ rst_n,
+
+ input addr_tag core_tag,
+ input addr_index core_index,
+ input logic core_read,
+ core_write,
+ input line core_data_wr,
+ output logic core_waitrequest,
+
+ input ring_req in_data,
+ input logic in_data_valid,
+ output logic in_data_ready,
+
+ input logic out_data_ready,
+ output ring_req out_data,
+ output logic out_data_valid,
+
+ input ring_token in_token,
+ input logic in_token_valid,
+
+ output ring_token out_token,
+ output logic out_token_valid,
+
+ input addr_tag tag_rd,
+ input line data_rd,
+ input line_state state_rd,
+
+ output addr_index index_rd,
+ index_wr,
+ output logic write_data,
+ write_state,
+ output addr_tag tag_wr,
+ output line data_wr,
+ output line_state state_wr,
+
+ input logic mem_waitrequest,
+ input line mem_readdata,
+ output word mem_address,
+ output logic mem_read,
+ mem_write,
+ output line mem_writedata
+);
+
+ enum int unsigned
+ {
+ ACCEPT,
+ CORE,
+ SNOOP,
+ REPLY
+ } state, next_state;
+
+ logic accept_snoop, in_hold_valid, last_hop, lock_line, locked, may_send,
+ may_send_if_token_held, mem_begin, mem_end, mem_end_read, mem_wait,
+ out_stall, reply_wait, replace, retry, send, send_inval, send_read,
+ snoop_hit, set_reply, unlock_line, writeback;
+
+ ring_req in_hold, send_data, fwd_data, stall_data, out_data_next;
+
+ addr_tag mem_tag;
+ addr_index mem_index;
+
+ assign mem_end = (mem_read || mem_write) && !mem_waitrequest;
+ assign mem_wait = (mem_read || mem_write) && mem_waitrequest;
+ assign mem_address = {3'b000, mem_tag, mem_index, 4'b0000};
+ assign mem_end_read = mem_read && !mem_waitrequest;
+
+ /* Desbloquear la línea hasta que la request del core termine garantiza
+ * avance del sistema completo, en lockstep en el peor caso posible,
+ * a pesar de retries (una fuerte contención de writes a INVALID
+ * o SHARED jamás provocará que dos o más nodos queden en deadlock).
+ */
+ assign unlock_line = !core_waitrequest;
+
+ assign replace = tag_rd != core_tag && state_rd != INVALID;
+ assign last_hop = in_hold.ttl == `TTL_END;
+ assign snoop_hit = tag_rd == in_hold.tag;
+ assign accept_snoop = in_hold_valid && !last_hop && (in_hold.inval || !in_hold.reply);
+
+ assign may_send = may_send_if_token_held && in_token_valid;
+ assign may_send_if_token_held
+ = (!in_token.e2.valid || in_token.e2.index != core_index || in_token.e2.tag != core_tag)
+ && (!in_token.e1.valid || in_token.e1.index != core_index || in_token.e1.tag != core_tag)
+ && (!in_token.e0.valid || in_token.e0.index != core_index || in_token.e0.tag != core_tag);
+
+ assign out_data = out_stall ? stall_data : out_data_next;
+ assign out_data_next = send ? send_data : fwd_data;
+ assign out_data_valid = out_stall || send || (in_hold_valid && !last_hop);
+
+ assign send_data.tag = core_tag;
+ assign send_data.ttl = `TTL_MAX;
+ assign send_data.data = fwd_data.data; // Esto evita muchos muxes
+ assign send_data.read = send_read;
+ assign send_data.index = core_index;
+ assign send_data.inval = send_inval;
+ assign send_data.reply = 0;
+
+ always_comb begin
+ tag_wr = core_tag;
+ data_wr = core_data_wr;
+ index_rd = core_index;
+
+ state_wr = INVALID;
+ write_data = 0;
+ write_state = 0;
+
+ mem_begin = 0;
+ writeback = 0;
+
+ send = 0;
+ send_read = 0;
+ send_inval = 0;
+
+ set_reply = 0;
+ core_waitrequest = 1;
+
+ in_data_ready = !in_hold_valid;
+
+ unique case (state)
+ ACCEPT: begin
+ if (last_hop && !in_hold.read)
+ in_data_ready = 1;
+
+ if (accept_snoop)
+ index_rd = in_hold.index;
+ end
+
+ CORE:
+ if (replace) begin
+ state_wr = INVALID;
+ write_state = 1;
+
+ if (state_rd == MODIFIED)
+ writeback = 1;
+ end else unique case ({state_rd, core_write})
+ {INVALID, 1'b0}: begin
+ send = 1;
+ send_read = 1;
+ end
+
+ {INVALID, 1'b1}: begin
+ send = 1;
+ send_read = 1;
+ send_inval = 1;
+ end
+
+ {SHARED, 1'b0}:
+ core_waitrequest = 0;
+
+ {SHARED, 1'b1}: begin
+ /* No hacemos write_data ya que reintentaremos el
+ * write luego, cuando el estado será EXCLUSIVE.
+ *
+ * Nótese que esta es la misma razón por la que no
+ * pasamos directamente a MODIFIED.
+ */
+ state_wr = EXCLUSIVE;
+ write_state = 1;
+
+ send = 1;
+ send_inval = 1;
+ end
+
+ {EXCLUSIVE, 1'b0}:
+ core_waitrequest = 0;
+
+ {EXCLUSIVE, 1'b1}: begin
+ state_wr = MODIFIED;
+ write_data = 1;
+ write_state = 1;
+ core_waitrequest = 0;
+ end
+
+ {MODIFIED, 1'b0}:
+ core_waitrequest = 0;
+
+ {MODIFIED, 1'b1}: begin
+ write_data = 1;
+ core_waitrequest = 0;
+ end
+ endcase
+
+ SNOOP: begin
+ index_rd = in_hold.index;
+ in_data_ready = 1;
+
+ if (snoop_hit) begin
+ if (in_hold.read) begin
+ set_reply = 1;
+
+ unique case (state_rd)
+ INVALID:
+ set_reply = 0;
+
+ SHARED: ;
+
+ EXCLUSIVE: begin
+ state_wr = SHARED;
+ write_state = 1;
+ end
+
+ MODIFIED: begin
+ state_wr = SHARED;
+ write_state = 1;
+
+ writeback = 1;
+ end
+ endcase
+ end
+
+ if (in_hold.inval) begin
+ state_wr = INVALID;
+ write_state = 1;
+ end
+ end
+ end
+
+ REPLY: begin
+ in_data_ready = 1;
+
+ if (in_hold.reply) begin
+ data_wr = in_hold.data;
+ state_wr = SHARED;
+ write_data = 1;
+ write_state = 1;
+ end else
+ mem_begin = 1;
+ end
+
+ default: ;
+ endcase
+
+ if (writeback)
+ mem_begin = 1;
+
+ // Colisiones de bus
+ retry = (mem_end_read && (write_data || write_state)) || (mem_wait && mem_begin);
+
+ // Nótese la diferencia con un assign, ya que send puede cambiar más abajo
+ lock_line = send;
+
+ if (send && !may_send && !locked)
+ retry = 1;
+
+ if (retry) begin
+ send = 0;
+ mem_begin = 0;
+ write_data = 0;
+ write_state = 0;
+
+ in_data_ready = !in_hold_valid;
+ core_waitrequest = 1;
+ end
+
+ index_wr = index_rd;
+ if (mem_end_read) begin
+ tag_wr = mem_tag;
+ index_wr = mem_index;
+
+ data_wr = mem_readdata;
+ state_wr = EXCLUSIVE;
+
+ write_data = 1;
+ write_state = 1;
+ end
+ end
+
+ always_comb begin
+ fwd_data = in_hold;
+ fwd_data.ttl = in_hold.ttl - 2'b1;
+
+ if (set_reply) begin
+ fwd_data.data = data_rd;
+ fwd_data.reply = 1;
+ end
+ end
+
+ always_comb begin
+ next_state = ACCEPT;
+
+ unique case (state)
+ ACCEPT: begin
+ if (accept_snoop)
+ next_state = SNOOP;
+ else if (in_hold_valid && last_hop && in_hold.read)
+ next_state = REPLY;
+ else if ((core_read || core_write) && !reply_wait
+ && (!locked || (may_send && !unlock_line)))
+ next_state = CORE;
+
+ if (out_stall && !out_data_ready)
+ next_state = ACCEPT;
+ end
+
+ default: ;
+ endcase
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ state <= !rst_n ? ACCEPT : next_state;
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ out_token <= {($bits(out_token)){1'b0}};
+ out_token_valid <= TOKEN_AT_RESET;
+
+ in_hold_valid <= 0;
+ out_stall <= 0;
+
+ locked <= 0;
+
+ mem_read <= 0;
+ mem_write <= 0;
+ end else begin
+ out_token.e0.tag <= core_tag;
+ out_token.e0.index <= core_index;
+ out_token.e0.valid <= may_send_if_token_held && (send || locked) && !unlock_line;
+
+ out_token.e2 <= in_token.e1;
+ out_token.e1 <= in_token.e0;
+ out_token_valid <= in_token_valid;
+
+ if (in_data_ready)
+ in_hold_valid <= in_data_valid;
+
+ out_stall <= out_data_valid && !out_data_ready;
+
+ if (lock_line)
+ locked <= 1;
+
+ if (unlock_line)
+ locked <= 0;
+
+ if (mem_end) begin
+ mem_read <= 0;
+ mem_write <= 0;
+ end
+
+ if (mem_begin) begin
+ mem_read <= !writeback;
+ mem_write <= writeback;
+ end
+ end
+
+ always_ff @(posedge clk) begin
+ if (in_data_ready)
+ in_hold <= in_data;
+
+ if (!out_stall)
+ stall_data <= out_data_next;
+
+ if (mem_begin) begin
+ mem_tag <= writeback ? tag_rd : core_tag;
+ mem_index <= index_wr;
+ mem_writedata <= data_rd;
+ end
+ end
+
+endmodule
diff --git a/rtl/cache/defs.sv b/rtl/cache/defs.sv
new file mode 100644
index 0000000..2566058
--- /dev/null
+++ b/rtl/cache/defs.sv
@@ -0,0 +1,64 @@
+`ifndef CACHE_DEFS_SV
+`define CACHE_DEFS_SV
+
+typedef logic[3:0] word_be;
+typedef logic[15:0] line_be;
+typedef logic[127:0] line;
+
+// Choca con typedef en core/uarch.sv
+`ifndef WORD_DEFINED
+typedef logic[31:0] word;
+`define WORD_DEFINED
+`endif
+
+/* Tenemos 512MiB de SDRAM, el resto del espacio es I/O (uncached). Usamos
+* 512 líneas direct-mapped de 16 bytes cada una. El core solo realiza
+* operaciones alineadas. Por tanto, cada dirección de 32 bits consta de:
+ * - 2 bits que siempre son 0 (traducidos a byteenable por core)
+ * - 2 bits de offset (ya que para cache la unidad direccionable es la word)
+ * - 9 bits de index
+ * - 16 bits de tag
+ * - 3 bits que son == 0 si cached, != 0 si uncached
+ */
+typedef logic[1:0] addr_mbz;
+typedef logic[1:0] addr_offset;
+typedef logic[8:0] addr_index;
+typedef logic[15:0] addr_tag;
+typedef logic[2:0] addr_io_region;
+typedef logic[26:0] addr_cacheable;
+
+typedef enum logic[1:0]
+{
+ INVALID,
+ SHARED,
+ EXCLUSIVE,
+ MODIFIED
+} line_state;
+
+typedef struct packed
+{
+ logic[1:0] ttl;
+ logic read,
+ inval,
+ reply;
+ addr_tag tag;
+ addr_index index;
+ line data;
+} ring_req;
+
+`define TTL_END 2'b00
+`define TTL_MAX 2'b11
+
+typedef struct packed
+{
+ logic valid;
+ addr_tag tag;
+ addr_index index;
+} token_lock;
+
+typedef struct packed
+{
+ token_lock e2, e1, e0;
+} ring_token;
+
+`endif
diff --git a/rtl/cache/offsets.sv b/rtl/cache/offsets.sv
new file mode 100644
index 0000000..a933d1c
--- /dev/null
+++ b/rtl/cache/offsets.sv
@@ -0,0 +1,68 @@
+`include "cache/defs.sv"
+
+module cache_offsets
+(
+ input addr_offset core_offset,
+ input word_be core_byteenable,
+ input word core_writedata,
+ input line core_readdata_line,
+ data_rd,
+
+ output line core_data_wr,
+ core_writedata_line,
+ output word core_readdata,
+ output line_be core_byteenable_line
+);
+
+ line line_mask;
+ word be_extend, mask3, mask2, mask1, mask0;
+ word_be be3, be2, be1, be0;
+
+ assign core_writedata_line = {4{core_writedata}};
+ assign core_byteenable_line = {be3, be2, be1, be0};
+
+ assign be_extend = {{8{core_byteenable[3]}}, {8{core_byteenable[2]}},
+ {8{core_byteenable[1]}}, {8{core_byteenable[0]}}};
+
+ assign line_mask = {mask3, mask2, mask1, mask0};
+ assign core_data_wr = (core_writedata_line & line_mask) | (data_rd & ~line_mask);
+
+ always_comb begin
+ mask3 = 0;
+ mask2 = 0;
+ mask1 = 0;
+ mask0 = 0;
+
+ be3 = 0;
+ be2 = 0;
+ be1 = 0;
+ be0 = 0;
+
+ unique case (core_offset)
+ 2'b00: begin
+ be0 = core_byteenable;
+ mask0 = be_extend;
+ core_readdata = core_readdata_line[31:0];
+ end
+
+ 2'b01: begin
+ be1 = core_byteenable;
+ mask1 = be_extend;
+ core_readdata = core_readdata_line[63:32];
+ end
+
+ 2'b10: begin
+ be2 = core_byteenable;
+ mask2 = be_extend;
+ core_readdata = core_readdata_line[95:64];
+ end
+
+ 2'b11: begin
+ be3 = core_byteenable;
+ mask3 = be_extend;
+ core_readdata = core_readdata_line[127:96];
+ end
+ endcase
+ end
+
+endmodule
diff --git a/rtl/cache/routing.sv b/rtl/cache/routing.sv
new file mode 100644
index 0000000..4857e08
--- /dev/null
+++ b/rtl/cache/routing.sv
@@ -0,0 +1,107 @@
+`include "cache/defs.sv"
+
+module cache_routing
+(
+ input logic clk,
+ rst_n,
+
+ input word core_address,
+ input logic core_read,
+ core_write,
+ input line core_writedata_line,
+ input line_be core_byteenable_line,
+ output logic core_waitrequest,
+ output line core_readdata_line,
+
+ output addr_tag core_tag,
+ output addr_index core_index,
+ output addr_offset core_offset,
+
+ input line data_rd,
+ input logic cache_core_waitrequest,
+ output logic cache_core_read,
+ cache_core_write,
+
+ input word cache_mem_address,
+ input logic cache_mem_read,
+ cache_mem_write,
+ input line cache_mem_writedata,
+ output logic cache_mem_waitrequest,
+
+ input logic mem_waitrequest,
+ input line mem_readdata,
+ output word mem_address,
+ output logic mem_read,
+ mem_write,
+ output line mem_writedata,
+ output line_be mem_byteenable
+);
+
+ word core_address_line;
+ logic cached, cache_mem, transition;
+ addr_mbz mbz;
+ addr_io_region io;
+
+ enum int unsigned
+ {
+ IDLE,
+ CACHE,
+ BYPASS
+ } state;
+
+ assign cached = io == 3'b000;
+ assign cache_mem = cache_mem_read || cache_mem_write;
+
+ assign {io, core_tag, core_index, core_offset, mbz} = core_address;
+ assign core_address_line = {io, core_tag, core_index, 4'b0000};
+ assign core_readdata_line = cached ? data_rd : mem_readdata;
+
+ assign cache_core_read = core_read && cached;
+ assign cache_core_write = core_write && cached;
+
+ always_comb begin
+ transition = 0;
+ core_waitrequest = cache_core_waitrequest;
+ cache_mem_waitrequest = 1;
+
+ unique case (state)
+ IDLE:
+ transition = cache_mem || (!cached && (core_read || core_write));
+
+ CACHE:
+ cache_mem_waitrequest = mem_waitrequest;
+
+ BYPASS:
+ core_waitrequest = mem_waitrequest;
+ endcase
+ end
+
+ always_ff @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ state <= IDLE;
+ mem_read <= 0;
+ mem_write <= 0;
+ end else unique case (state)
+ IDLE:
+ if (transition) begin
+ state <= cache_mem ? CACHE : BYPASS;
+ mem_read <= cache_mem ? cache_mem_read : core_read;
+ mem_write <= cache_mem ? cache_mem_write : core_write;
+ end
+
+ CACHE, BYPASS:
+ if (!mem_waitrequest) begin
+ state <= IDLE;
+ mem_read <= 0;
+ mem_write <= 0;
+ end
+ endcase
+
+ always_ff @(posedge clk)
+ if (transition) begin
+ mem_address <= cache_mem ? cache_mem_address : core_address_line;
+ mem_writedata <= cache_mem ? cache_mem_writedata : core_writedata_line;
+ mem_byteenable <= cache_mem ? 16'hff : core_byteenable_line;
+ end
+
+endmodule
diff --git a/rtl/cache/sram.sv b/rtl/cache/sram.sv
new file mode 100644
index 0000000..2e6c6ce
--- /dev/null
+++ b/rtl/cache/sram.sv
@@ -0,0 +1,49 @@
+`include "cache/defs.sv"
+
+module cache_sram
+(
+ input logic clk,
+ rst_n,
+
+ input addr_index index_rd,
+ index_wr,
+ input logic write_data,
+ write_state,
+ input addr_tag tag_wr,
+ input line data_wr,
+ input line_state state_wr,
+
+ output addr_tag tag_rd,
+ output line data_rd,
+ output line_state state_rd
+);
+
+ // Existe un mito que habla de true dual-ports con byte-enables, dudo mucho que sea real:
+ // https://www.intel.com/content/www/us/en/docs/programmable/683082/21-3/ram-with-byte-enable-signals.html
+
+ localparam DEPTH = 1 << $bits(addr_index);
+
+ line data_file[DEPTH];
+ addr_tag tag_file[DEPTH];
+ line_state state_file[DEPTH];
+
+ always_ff @(posedge clk) begin
+ if (write_data) begin
+ tag_file[index_wr] <= tag_wr;
+ data_file[index_wr] <= data_wr;
+ end
+
+ if (write_state)
+ state_file[index_wr] <= state_wr;
+
+ tag_rd <= tag_file[index_rd];
+ data_rd <= data_file[index_rd];
+ state_rd <= state_file[index_rd];
+ end
+
+ //FIXME: rst_n para state_file?
+ initial
+ for (int i = 0; i < DEPTH; ++i)
+ state_file[i] = INVALID;
+
+endmodule
diff --git a/rtl/core/uarch.sv b/rtl/core/uarch.sv
index 82dc1dc..0226cbf 100644
--- a/rtl/core/uarch.sv
+++ b/rtl/core/uarch.sv
@@ -4,10 +4,15 @@
// Decodifica como andeq r0, r0, r0
`define NOP 32'd0
+// Choca con typedef en cache/defs.sv
+`ifndef WORD_DEFINED
+typedef logic[31:0] word;
+`define WORD_DEFINED
+`endif
+
typedef logic[3:0] reg_num;
typedef logic[2:0] cp_opcode;
typedef logic[15:0] reg_list;
-typedef logic[31:0] word;
typedef logic[63:0] dword;
typedef logic[29:0] ptr;