diff options
| author | Alejandro Soto <alejandro@34project.org> | 2023-09-25 05:14:25 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2023-09-25 05:14:25 -0600 |
| commit | d18a37a740db37707e5266e5ca6a8fd956737197 (patch) | |
| tree | 3e2d6ea6d354d8cac722f864f5b8c51fd881e2a9 | |
| parent | 27978501a87c5bb7a9fd78e376e8f6772cad009e (diff) | |
tb: implement cache ring
| -rw-r--r-- | tb/avalon.hpp | 25 | ||||
| -rw-r--r-- | tb/avalon.impl.hpp | 95 | ||||
| -rw-r--r-- | tb/platform.sv | 208 | ||||
| -rw-r--r-- | tb/sim/tarea2.py | 2 | ||||
| -rw-r--r-- | tb/top/conspiracion.cpp | 63 |
5 files changed, 296 insertions, 97 deletions
diff --git a/tb/avalon.hpp b/tb/avalon.hpp index 40bfe9a..fd87c31 100644 --- a/tb/avalon.hpp +++ b/tb/avalon.hpp @@ -7,11 +7,14 @@ #include <stdexcept> #include <vector> +#include <verilated.h> + namespace taller::avalon { union line { - __int128 qword; + __int128 qword; + VlWide<4> verilated; struct { @@ -22,8 +25,26 @@ namespace taller::avalon { std::uint32_t words[4]; }; + + line() noexcept = default; + + inline line(VlWide<4> verilated) noexcept + : verilated(verilated) + {} + + inline operator VlWide<4>() const noexcept + { + return this->verilated; + } + + inline bool operator==(VlWide<4> verilated) const noexcept + { + return this->verilated == verilated; + } }; + static_assert(sizeof(line) == 16); + class slave { public: @@ -202,7 +223,7 @@ namespace taller::avalon std::vector<binding> devices; interrupt_controller *root_intc = nullptr; std::uint32_t avl_address = 0; - std::uint32_t avl_writedata = 0; + line avl_writedata; unsigned avl_byteenable = 0; bool avl_read = false; bool avl_write = false; diff --git a/tb/avalon.impl.hpp b/tb/avalon.impl.hpp index 992f949..5dd9822 100644 --- a/tb/avalon.impl.hpp +++ b/tb/avalon.impl.hpp @@ -33,22 +33,19 @@ namespace taller::avalon template<class Platform> bool interconnect<Platform>::tick(bool clk) noexcept { - if(!plat.reset_reset_n) - { + if (!plat.reset_reset_n) { active = nullptr; plat.avl_irq = 0; avl_read = false; avl_write = false; avl_address = 0; - avl_writedata = 0; avl_byteenable = 0; return true; } - if(active) - { + if (active) { assert(avl_address == plat.avl_address); assert(avl_read == plat.avl_read); assert(avl_write == plat.avl_write); @@ -56,21 +53,16 @@ namespace taller::avalon assert(avl_byteenable == plat.avl_byteenable); } - if(!clk) - { + if (!clk) { tick_falling(); return true; - } else if(!active) - { + } else if (!active) assert(!avl_read || !avl_write); - } - try - { + try { tick_rising(); return true; - } catch(const avl_bus_error&) - { + } catch(const avl_bus_error&) { return false; } } @@ -78,48 +70,36 @@ namespace taller::avalon template<class Platform> void interconnect<Platform>::tick_rising() { - for(auto &binding : devices) - { + for (auto &binding : devices) binding.dev.tick(); - } - if(root_intc) - { + if (root_intc) plat.avl_irq = root_intc->irq(); - } - if(!active) - { + if (!active) { avl_address = plat.avl_address; avl_read = plat.avl_read; avl_write = plat.avl_write; avl_writedata = plat.avl_writedata; avl_byteenable = plat.avl_byteenable; - if(!avl_read && !avl_write) - { + if (!avl_read && !avl_write) return; - } - for(auto &binding : devices) - { - if((avl_address & binding.mask) == binding.base) - { + for (auto &binding : devices) + if ((avl_address & binding.mask) == binding.base) { active = &binding.dev; break; } - } - if(!active) [[unlikely]] - { + if (!active) [[unlikely]] { bail(); const char *op = avl_read ? "read" : "write"; fprintf(stderr, "[avl] attempt to %s memory hole at 0x%08x\n", op, avl_address); throw avl_bus_error{"memory hole addressed"}; - } else if(avl_address & active->word_mask()) [[unlikely]] - { + } else if(avl_address & 0b1111) [[unlikely]] { bail(); fprintf(stderr, "[avl] unaligned address: 0x%08x\n", avl_address); @@ -127,35 +107,28 @@ namespace taller::avalon } } - auto pos = (avl_address & ~active->address_mask()) >> active->word_bits(); + auto pos = (avl_address & ~active->address_mask()) >> 4; - if(avl_read) - { - std::uint32_t readdata; - plat.avl_waitrequest = !active->read(pos, readdata); + if (avl_read) { + line readdata; + plat.avl_waitrequest = !active->read_line(pos, readdata); plat.avl_readdata = readdata; - } else if(avl_write) - { - plat.avl_waitrequest = !active->write(pos, avl_writedata, avl_byteenable); - } + } else if (avl_write) + plat.avl_waitrequest = !active->write_line(pos, avl_writedata, avl_byteenable); } template<class Platform> void interconnect<Platform>::tick_falling() noexcept { - if(!plat.avl_waitrequest) - { + if (!plat.avl_waitrequest) active = nullptr; - } } template<class Platform> void interconnect<Platform>::bail() noexcept { - for(auto &binding : devices) - { + for (auto &binding : devices) binding.dev.bail(); - } } template<class Platform> @@ -164,17 +137,13 @@ namespace taller::avalon std::uint32_t avl_address = addr << 2; auto *dev = resolve_external(avl_address); - if(!dev) - { + if (!dev) return false; - } auto pos = (avl_address & ~dev->address_mask()) >> dev->word_bits(); - while(!dev->read(pos, word)) - { + while (!dev->read(pos, word)) continue; - } return true; } @@ -185,17 +154,13 @@ namespace taller::avalon std::uint32_t avl_address = addr << 2; auto *dev = resolve_external(avl_address); - if(!dev) - { + if (!dev) return false; - } auto pos = (avl_address & ~dev->address_mask()) >> dev->word_bits(); - while(!dev->write(pos, writedata, 0b1111)) - { + while (!dev->write(pos, writedata, 0b1111)) continue; - } return true; } @@ -203,13 +168,9 @@ namespace taller::avalon template<class Platform> slave* interconnect<Platform>::resolve_external(std::uint32_t avl_address) { - for(auto &binding : devices) - { - if((avl_address & binding.mask) == binding.base) - { + for (auto &binding : devices) + if ((avl_address & binding.mask) == binding.base) return &binding.dev; - } - } fprintf(stderr, "[avl] attempt to access hole at 0x%08x\n", avl_address); return nullptr; diff --git a/tb/platform.sv b/tb/platform.sv index 2dade78..570ee4e 100644 --- a/tb/platform.sv +++ b/tb/platform.sv @@ -1,3 +1,5 @@ +`include "cache/defs.sv" + module platform ( input wire clk_clk, // clk.clk @@ -52,14 +54,22 @@ module platform output wire [7:0] vga_dac_B // .B ); - logic[31:0] avl_address /*verilator public*/; - logic avl_read /*verilator public*/; - logic avl_write /*verilator public*/; - logic avl_irq /*verilator public_flat_rw @(negedge clk_clk)*/; - logic[31:0] avl_readdata /*verilator public_flat_rw @(negedge clk_clk)*/; - logic[31:0] avl_writedata /*verilator public*/; - logic avl_waitrequest /*verilator public_flat_rw @(negedge clk_clk)*/; - logic[3:0] avl_byteenable /*verilator public*/; + logic[31:0] avl_address /*verilator public*/; + logic avl_read /*verilator public*/; + logic avl_write /*verilator public*/; + logic avl_irq /*verilator public_flat_rw @(negedge clk_clk)*/; + logic[127:0] avl_readdata /*verilator public_flat_rw @(negedge clk_clk)*/; + logic[127:0] avl_writedata /*verilator public*/; + logic avl_waitrequest /*verilator public_flat_rw @(negedge clk_clk)*/; + logic[15:0] avl_byteenable /*verilator public*/; + + logic[31:0] core_avl_address; + logic core_avl_read; + logic core_avl_write; + logic[31:0] core_avl_readdata; + logic[31:0] core_avl_writedata; + logic core_avl_waitrequest; + logic[3:0] core_avl_byteenable; bus_master master_0 ( @@ -75,6 +85,13 @@ module platform .cpu_clk(master_0_core_cpu_clk), .cpu_rst_n(master_0_core_cpu_rst_n), .irq(master_0_core_irq), + .avl_address(core_avl_address), + .avl_read(core_avl_read), + .avl_write(core_avl_write), + .avl_readdata(core_avl_readdata), + .avl_writedata(core_avl_writedata), + .avl_waitrequest(core_avl_waitrequest), + .avl_byteenable(core_avl_byteenable), .* ); @@ -83,4 +100,179 @@ module platform .* ); + ring_req data_0, data_1, data_2, data_3; + ring_token token_0, token_1, token_2, token_3; + + logic data_valid_0, data_valid_1, data_valid_2, data_valid_3, + data_ready_0, data_ready_1, data_ready_2, data_ready_3, + token_valid_0, token_valid_1, token_valid_2, token_valid_3; + + cache #(.TOKEN_AT_RESET(0)) c0 + ( + .clk(clk_clk), + .rst_n(reset_reset_n), + .core_address(core_avl_address[31:2]), + .core_read(core_avl_read), + .core_write(core_avl_write), + .core_writedata(core_avl_writedata), + .core_byteenable(core_avl_byteenable), + .core_waitrequest(core_avl_waitrequest), + .core_readdata(core_avl_readdata), + + //.dbg_address(), + .dbg_read(0), + .dbg_write(0), + .dbg_writedata(), + .dbg_waitrequest(), + .dbg_readdata(), + + .mem_waitrequest(avl_waitrequest), + .mem_readdata(avl_readdata), + .mem_address(avl_address), + .mem_read(avl_read), + .mem_write(avl_write), + .mem_writedata(avl_writedata), + .mem_byteenable(avl_byteenable), + + .in_data_valid(data_valid_3), + .in_data(data_3), + .in_data_ready(data_ready_0), + + .out_data_valid(data_valid_0), + .out_data(data_0), + .out_data_ready(data_ready_1), + + .in_token(token_3), + .in_token_valid(token_valid_3), + + .out_token(token_0), + .out_token_valid(token_valid_0) + ); + + cache #(.TOKEN_AT_RESET(0)) c1 + ( + .clk(clk_clk), + .rst_n(reset_reset_n), + .core_address(), + .core_read(0), + .core_write(0), + .core_writedata(), + .core_byteenable(), + .core_waitrequest(), + .core_readdata(), + + //.dbg_address(), + .dbg_read(0), + .dbg_write(0), + .dbg_writedata(), + .dbg_waitrequest(), + .dbg_readdata(), + + .mem_waitrequest(1), + .mem_readdata(), + .mem_address(), + .mem_read(), + .mem_write(), + .mem_writedata(), + .mem_byteenable(), + + .in_data_valid(data_valid_0), + .in_data(data_0), + .in_data_ready(data_ready_1), + + .out_data_valid(data_valid_1), + .out_data(data_1), + .out_data_ready(data_ready_2), + + .in_token(token_0), + .in_token_valid(token_valid_0), + + .out_token(token_1), + .out_token_valid(token_valid_1) + ); + + cache #(.TOKEN_AT_RESET(0)) c2 + ( + .clk(clk_clk), + .rst_n(reset_reset_n), + .core_address(), + .core_read(0), + .core_write(0), + .core_writedata(), + .core_byteenable(), + .core_waitrequest(), + .core_readdata(), + + //.dbg_address(), + .dbg_read(0), + .dbg_write(0), + .dbg_writedata(), + .dbg_waitrequest(), + .dbg_readdata(), + + .mem_waitrequest(1), + .mem_readdata(), + .mem_address(), + .mem_read(), + .mem_write(), + .mem_writedata(), + .mem_byteenable(), + + .in_data_valid(data_valid_1), + .in_data(data_1), + .in_data_ready(data_ready_2), + + .out_data_valid(data_valid_2), + .out_data(data_2), + .out_data_ready(data_ready_3), + + .in_token(token_1), + .in_token_valid(token_valid_1), + + .out_token(token_2), + .out_token_valid(token_valid_2) + ); + + cache #(.TOKEN_AT_RESET(1)) c3 + ( + .clk(clk_clk), + .rst_n(reset_reset_n), + .core_address(), + .core_read(0), + .core_write(0), + .core_writedata(), + .core_byteenable(), + .core_waitrequest(), + .core_readdata(), + + //.dbg_address(), + .dbg_read(0), + .dbg_write(0), + .dbg_writedata(), + .dbg_waitrequest(), + .dbg_readdata(), + + .mem_waitrequest(1), + .mem_readdata(), + .mem_address(), + .mem_read(), + .mem_write(), + .mem_writedata(), + .mem_byteenable(), + + .in_data_valid(data_valid_2), + .in_data(data_2), + .in_data_ready(data_ready_3), + + .out_data_valid(data_valid_3), + .out_data(data_3), + .out_data_ready(data_ready_0), + + .in_token(token_2), + .in_token_valid(token_valid_2), + + .out_token(token_3), + .out_token_valid(token_valid_3) + ); + endmodule diff --git a/tb/sim/tarea2.py b/tb/sim/tarea2.py index 921ea6f..49aade8 100644 --- a/tb/sim/tarea2.py +++ b/tb/sim/tarea2.py @@ -1,5 +1,7 @@ N = 20 +cycles = 1024 + 512 + mem_dumps = [range(0x100, 0x108), range(0x200, 0x200 + 4 * N)] def init(): diff --git a/tb/top/conspiracion.cpp b/tb/top/conspiracion.cpp index ea71dbe..b243c89 100644 --- a/tb/top/conspiracion.cpp +++ b/tb/top/conspiracion.cpp @@ -31,6 +31,9 @@ #include "Vconspiracion_core_psr.h" #include "Vconspiracion_core_regs.h" #include "Vconspiracion_core_reg_file.h" +#include "Vconspiracion_cache.h" +#include "Vconspiracion_cache__T1.h" +#include "Vconspiracion_cache_sram.h" #include "../args.hxx" @@ -312,14 +315,14 @@ int main(int argc, char **argv) null vram_null(0x3800'0000, 64 << 20, 2); window vram_window(vram, 0x0000'0000); + Vconspiracion_platform &plat = *top.conspiracion->plat; display<Vconspiracion_vga_domain> vga ( - *top.conspiracion->plat->vga, - 0x3800'0000, 25'175'000, 50'000'000 + *plat.vga, 0x3800'0000, 25'175'000, 50'000'000 ); - interconnect<Vconspiracion_platform> avl(*top.conspiracion->plat); - interconnect<Vconspiracion_vga_domain> avl_vga(*top.conspiracion->plat->vga); + interconnect<Vconspiracion_platform> avl(plat); + //interconnect<Vconspiracion_vga_domain> avl_vga(plat->vga); std::vector<const_map> consts; for(const auto &init : *const_) @@ -346,7 +349,7 @@ int main(int argc, char **argv) } else if(enable_accurate_video) { avl.attach(vram); - avl_vga.attach(vram_window); + //avl_vga.attach(vram_window); } else { avl.attach(vram_null); @@ -407,10 +410,10 @@ int main(int argc, char **argv) if(enable_accurate_video) { - if(!avl_vga.tick(top.clk_clk)) + /*if(!avl_vga.tick(top.clk_clk)) { failed = true; - } + }*/ vga.signal_tick(top.clk_clk); } @@ -469,6 +472,34 @@ int main(int argc, char **argv) std::fputs("=== end-regs ===\n", ctrl); }; + Vconspiracion_cache_sram *const caches[] = { + plat.c0->sram, + plat.c1->sram, + plat.c2->sram, + plat.c3->sram + }; + + auto dump_coherent = [&](std::uint32_t addr, std::uint32_t &data) + { + bool ok = avl.dump(addr, data); + if (!ok || (ok >> 29)) + return ok; + + unsigned tag = (addr >> 11) & ((1 << 16) - 1); + unsigned index = (addr >> 2) & ((1 << 9) - 1); + + for (std::size_t i = 0; i < sizeof caches / sizeof caches[0]; ++i) { + const auto *cache = caches[i]; + + if (cache->state_file[index] != 0b00 && cache->tag_file[index] == tag) { + line line_data = cache->data_file[index]; + data = line_data.words[addr & 0b11]; + } + } + + return true; + }; + auto pagewalk = [&](std::uint32_t &addr) { if(!core.mmu->mmu_enable) @@ -479,13 +510,10 @@ int main(int argc, char **argv) std::uint32_t ttbr = core.mmu->mmu_ttbr; std::uint32_t entry; - if(!avl.dump(ttbr << 12 | addr >> 18, entry)) - { + if (!dump_coherent(ttbr << 12 | addr >> 18, entry)) return false; - } - switch(entry & 0b11) - { + switch (entry & 0b11) { case 0b01: break; @@ -498,13 +526,10 @@ int main(int argc, char **argv) } std::uint32_t entryaddr = (entry & ~((1 << 10) - 1)) >> 2 | ((addr >> 10) & ((1 << 8) - 1)); - if(!avl.dump(entryaddr, entry)) - { + if (!dump_coherent(entryaddr, entry)) return false; - } - switch(entry & 0b11) - { + switch (entry & 0b11) { case 0b01: addr = (entry & ~((1 << 16) - 1)) >> 2 | (addr & ((1 << 14) - 1)); return true; @@ -536,10 +561,8 @@ int main(int argc, char **argv) } std::uint32_t word; - if(!avl.dump(at, word)) - { + if (!dump_coherent(at, word)) break; - } word = (word & 0xff) << 24 | ((word >> 8) & 0xff) << 16 |
