diff options
| author | Alejandro Soto <alejandro@34project.org> | 2023-10-05 16:26:26 -0600 |
|---|---|---|
| committer | Alejandro Soto <alejandro@34project.org> | 2023-10-05 17:16:55 -0600 |
| commit | d406720cecd7328f595255e65b6fd6b6814cefe4 (patch) | |
| tree | c085f1b4a2d30df0c08cd1efcbf3503ee40c5354 /rtl | |
| parent | dc705df98037bfd9db8efded025651b676e87754 (diff) | |
rtl/perf: implement performance unit
Diffstat (limited to 'rtl')
| -rw-r--r-- | rtl/cache/cache_control.sv | 2 | ||||
| -rw-r--r-- | rtl/cache/defs.sv | 12 | ||||
| -rw-r--r-- | rtl/cache/routing.sv | 2 | ||||
| -rw-r--r-- | rtl/core/uarch.sv | 1 | ||||
| -rw-r--r-- | rtl/perf/link.sv | 181 | ||||
| -rw-r--r-- | rtl/perf/perf_monitor.sv | 286 | ||||
| -rw-r--r-- | rtl/perf/snoop.sv | 129 |
7 files changed, 611 insertions, 2 deletions
diff --git a/rtl/cache/cache_control.sv b/rtl/cache/cache_control.sv index b31b6a8..64b4ce1 100644 --- a/rtl/cache/cache_control.sv +++ b/rtl/cache/cache_control.sv @@ -80,7 +80,7 @@ module cache_control assign mem_end = (mem_read || mem_write) && !mem_waitrequest; assign mem_wait = (mem_read || mem_write) && mem_waitrequest; - assign mem_address = {3'b000, mem_tag, mem_index, 4'b0000}; + assign mem_address = {`IO_CACHED, mem_tag, mem_index, 4'b0000}; assign mem_read_end = mem_read && !mem_waitrequest; /* Desbloquear la línea hasta que la request del core termine garantiza diff --git a/rtl/cache/defs.sv b/rtl/cache/defs.sv index 24ab9ea..2c7550f 100644 --- a/rtl/cache/defs.sv +++ b/rtl/cache/defs.sv @@ -7,11 +7,13 @@ typedef logic[15:0] line_be; // Tamaño de una línea de cache typedef logic[127:0] line; +typedef logic[27:0] line_ptr; // Choca con typedef en core/uarch.sv `ifndef WORD_DEFINED typedef logic[29:0] ptr; typedef logic[31:0] word; +typedef logic[15:0] hword; `define WORD_DEFINED `endif @@ -36,6 +38,8 @@ typedef logic[12:0] addr_tag; typedef logic[2:0] addr_io_region; typedef logic[26:0] addr_cacheable; +`define IO_CACHED 3'b000 + typedef struct packed { addr_io_region io; @@ -68,6 +72,14 @@ typedef struct packed line data; } ring_req; +typedef struct packed +{ + logic[1:0] ttl; + logic read, + inval, + reply; +} perf_sample; + `define TTL_END 2'b00 `define TTL_MAX 2'b11 diff --git a/rtl/cache/routing.sv b/rtl/cache/routing.sv index 78f1be0..4119a7f 100644 --- a/rtl/cache/routing.sv +++ b/rtl/cache/routing.sv @@ -63,7 +63,7 @@ module cache_routing * Entonces si los bits de IO son distintos de 0, se sabe que no es * una dirección cached */ - assign cached = io == 3'b000; + assign cached = io == `IO_CACHED; // Se afirma si cache quiere hacer un read o write de memoria assign cache_mem = cache_mem_read || cache_mem_write; diff --git a/rtl/core/uarch.sv b/rtl/core/uarch.sv index a474274..4510cb3 100644 --- a/rtl/core/uarch.sv +++ b/rtl/core/uarch.sv @@ -8,6 +8,7 @@ `ifndef WORD_DEFINED typedef logic[29:0] ptr; typedef logic[31:0] word; +typedef logic[15:0] hword; `define WORD_DEFINED `endif diff --git a/rtl/perf/link.sv b/rtl/perf/link.sv new file mode 100644 index 0000000..0899399 --- /dev/null +++ b/rtl/perf/link.sv @@ -0,0 +1,181 @@ +`include "cache/defs.sv" + +module perf_link +( + input logic clk, + rst_n, + + input logic in_left_valid, + input ring_req in_left, + output logic in_left_ready, + + input logic in_right_valid, + input ring_req in_right, + output logic in_right_ready, + + input logic out_left_ready, + output ring_req out_left, + output logic out_left_valid, + + input line_ptr local_address, + input logic local_read, + local_write, + input line local_writedata, + input line_be local_byteenable, + output logic local_waitrequest, + output line local_readdata, + + input logic mem_waitrequest, + input line mem_readdata, + output word mem_address, + output logic mem_read, + mem_write, + output line mem_writedata, + output line_be mem_byteenable, + + input logic clear, + input logic[3:0] address, + output word readdata +); + + logic snoop_left_ready, snoop_left_valid, snoop_right_ready, snoop_right_valid, + snoop_read, snoop_write, snoop_waitrequest, cached; + + addr_bits snoop_addr_bits; + perf_sample snoop_left, snoop_right; + + word reads, writes, ring_reads, ring_invals, ring_read_invals, ring_replies, ring_forwards, + io_reads, io_writes, snoop_address; + + hword mem_cycles, mem_cycles_hold, ring_cycles, min_ring_cycles, max_ring_cycles, + min_read_cycles, max_read_cycles, min_write_cycles, max_write_cycles; + + perf_snoop snoop + ( + .* + ); + + assign cached = snoop_addr_bits.io == `IO_CACHED; + assign mem_cycles = mem_cycles_hold + 1; + assign snoop_addr_bits = snoop_address; + + always_comb + if (!address[3]) unique case (address[2:0]) + 3'b000: readdata = reads; + 3'b001: readdata = writes; + 3'b010: readdata = {max_read_cycles, min_read_cycles}; + 3'b011: readdata = {max_write_cycles, min_write_cycles}; + 3'b100: readdata = ring_reads; + 3'b101: readdata = ring_invals; + 3'b110: readdata = ring_read_invals; + 3'b111: readdata = ring_replies; + endcase else unique case (address[1:0]) + 2'b00: readdata = ring_forwards; + 2'b01: readdata = {max_ring_cycles, min_ring_cycles}; + 2'b10: readdata = io_reads; + 2'b11: readdata = io_writes; + endcase + + always @(posedge clk or negedge rst_n) + if (!rst_n) begin + reads <= 0; + writes <= 0; + io_reads <= 0; + io_writes <= 0; + + min_ring_cycles <= 0; + max_ring_cycles <= 0; + min_read_cycles <= 0; + max_read_cycles <= 0; + min_write_cycles <= 0; + max_write_cycles <= 0; + + ring_reads <= 0; + ring_invals <= 0; + ring_replies <= 0; + ring_forwards <= 0; + ring_read_invals <= 0; + + mem_cycles_hold <= 0; + end else begin + ring_cycles <= ring_cycles + 1; + + if (mem_read || mem_write) + mem_cycles_hold <= mem_cycles; + + if ((mem_read || mem_write) && !mem_waitrequest) begin + mem_cycles_hold <= 0; + + if (!cached) begin + if (mem_write) + io_writes <= io_writes + 1; + else + io_reads <= io_reads + 1; + end else if (mem_write) begin + writes <= writes + 1; + + if (min_write_cycles == 0 || mem_cycles_hold < min_write_cycles) + min_write_cycles <= mem_cycles; + + if (mem_cycles_hold >= max_write_cycles) + max_write_cycles <= mem_cycles; + end else begin + reads <= reads + 1; + + if (min_read_cycles == 0 || mem_cycles_hold < min_read_cycles) + min_read_cycles <= mem_cycles; + + if (mem_cycles_hold >= max_read_cycles) + max_read_cycles <= mem_cycles; + end + end + + if (snoop_left_valid && snoop_left_ready && snoop_left.ttl == `TTL_END) begin + if (snoop_left.reply) + ring_replies <= ring_replies + 1; + + if (min_ring_cycles == 0 || ring_cycles < min_ring_cycles) + min_ring_cycles <= ring_cycles; + + if (ring_cycles > max_ring_cycles) + max_ring_cycles <= ring_cycles; + end + + if (snoop_right_valid && snoop_right_ready) begin + if (snoop_right.ttl == `TTL_MAX) begin + ring_cycles <= 1; + + if (snoop_right.read && !snoop_right.inval) + ring_reads <= ring_reads + 1; + + if (!snoop_right.read && snoop_right.inval) + ring_invals <= ring_invals + 1; + + if (snoop_right.read && snoop_right.inval) + ring_read_invals <= ring_read_invals + 1; + end else + ring_forwards <= ring_forwards + 1; + end + + if (clear) begin + reads <= 0; + writes <= 0; + io_reads <= 0; + io_writes <= 0; + + min_ring_cycles <= 0; + max_ring_cycles <= 0; + min_read_cycles <= 0; + max_read_cycles <= 0; + min_write_cycles <= 0; + max_write_cycles <= 0; + + ring_reads <= 0; + ring_invals <= 0; + ring_replies <= 0; + ring_forwards <= 0; + ring_read_invals <= 0; + end + end + +endmodule diff --git a/rtl/perf/perf_monitor.sv b/rtl/perf/perf_monitor.sv new file mode 100644 index 0000000..2f38d94 --- /dev/null +++ b/rtl/perf/perf_monitor.sv @@ -0,0 +1,286 @@ +`include "cache/defs.sv" + +module perf_monitor +( + input logic clk, + rst_n, + + input logic[5:0] perf_address, + input logic perf_read, + perf_write, + input word perf_writedata, // No se usa + output word perf_readdata, + + input logic in_0_valid, + input ring_req in_0, + output logic in_0_ready, + + input logic in_1_valid, + input ring_req in_1, + output logic in_1_ready, + + input logic in_2_valid, + input ring_req in_2, + output logic in_2_ready, + + input logic in_3_valid, + input ring_req in_3, + output logic in_3_ready, + + input logic out_0_ready, + output ring_req out_0, + output logic out_0_valid, + + input logic out_1_ready, + output ring_req out_1, + output logic out_1_valid, + + input logic out_2_ready, + output ring_req out_2, + output logic out_2_valid, + + input logic out_3_ready, + output ring_req out_3, + output logic out_3_valid, + + input line_ptr local_0_address, + input logic local_0_read, + local_0_write, + input line local_0_writedata, + input line_be local_0_byteenable, + output logic local_0_waitrequest, + output line local_0_readdata, + + input line_ptr local_1_address, + input logic local_1_read, + local_1_write, + input line local_1_writedata, + input line_be local_1_byteenable, + output logic local_1_waitrequest, + output line local_1_readdata, + + input line_ptr local_2_address, + input logic local_2_read, + local_2_write, + input line local_2_writedata, + input line_be local_2_byteenable, + output logic local_2_waitrequest, + output line local_2_readdata, + + input line_ptr local_3_address, + input logic local_3_read, + local_3_write, + input line local_3_writedata, + input line_be local_3_byteenable, + output logic local_3_waitrequest, + output line local_3_readdata, + + input logic mem_0_waitrequest, + input line mem_0_readdata, + output word mem_0_address, + output logic mem_0_read, + mem_0_write, + output line mem_0_writedata, + output line_be mem_0_byteenable, + + input logic mem_1_waitrequest, + input line mem_1_readdata, + output word mem_1_address, + output logic mem_1_read, + mem_1_write, + output line mem_1_writedata, + output line_be mem_1_byteenable, + + input logic mem_2_waitrequest, + input line mem_2_readdata, + output word mem_2_address, + output logic mem_2_read, + mem_2_write, + output line mem_2_writedata, + output line_be mem_2_byteenable, + + input logic mem_3_waitrequest, + input line mem_3_readdata, + output word mem_3_address, + output logic mem_3_read, + mem_3_write, + output line mem_3_writedata, + output line_be mem_3_byteenable +); + + word readdata_0, readdata_1, readdata_2, readdata_3; + logic clear_0, clear_1, clear_2, clear_3; + logic[3:0] address; + + perf_link link_0 + ( + .in_left_valid(in_3_valid), + .in_left(in_3), + .in_left_ready(in_3_ready), + + .in_right_valid(in_0_valid), + .in_right(in_0), + .in_right_ready(in_0_ready), + + .out_left_ready(out_0_ready), + .out_left(out_0), + .out_left_valid(out_0_valid), + + .local_address(local_0_address), + .local_read(local_0_read), + .local_write(local_0_write), + .local_writedata(local_0_writedata), + .local_byteenable(local_0_byteenable), + .local_waitrequest(local_0_waitrequest), + .local_readdata(local_0_readdata), + + .mem_waitrequest(mem_0_waitrequest), + .mem_readdata(mem_0_readdata), + .mem_address(mem_0_address), + .mem_read(mem_0_read), + .mem_write(mem_0_write), + .mem_writedata(mem_0_writedata), + .mem_byteenable(mem_0_byteenable), + + .clear(clear_0), + .readdata(readdata_0), + .* + ); + + perf_link link_1 + ( + .in_left_valid(in_0_valid), + .in_left(in_0), + .in_left_ready(in_0_ready), + + .in_right_valid(in_1_valid), + .in_right(in_1), + .in_right_ready(in_1_ready), + + .out_left_ready(out_1_ready), + .out_left(out_1), + .out_left_valid(out_1_valid), + + .local_address(local_1_address), + .local_read(local_1_read), + .local_write(local_1_write), + .local_writedata(local_1_writedata), + .local_byteenable(local_1_byteenable), + .local_waitrequest(local_1_waitrequest), + .local_readdata(local_1_readdata), + + .mem_waitrequest(mem_1_waitrequest), + .mem_readdata(mem_1_readdata), + .mem_address(mem_1_address), + .mem_read(mem_1_read), + .mem_write(mem_1_write), + .mem_writedata(mem_1_writedata), + .mem_byteenable(mem_1_byteenable), + + .clear(clear_1), + .readdata(readdata_1), + .* + ); + + perf_link link_2 + ( + .in_left_valid(in_1_valid), + .in_left(in_1), + .in_left_ready(in_1_ready), + + .in_right_valid(in_2_valid), + .in_right(in_2), + .in_right_ready(in_2_ready), + + .out_left_ready(out_2_ready), + .out_left(out_2), + .out_left_valid(out_2_valid), + + .local_address(local_2_address), + .local_read(local_2_read), + .local_write(local_2_write), + .local_writedata(local_2_writedata), + .local_byteenable(local_2_byteenable), + .local_waitrequest(local_2_waitrequest), + .local_readdata(local_2_readdata), + + .mem_waitrequest(mem_2_waitrequest), + .mem_readdata(mem_2_readdata), + .mem_address(mem_2_address), + .mem_read(mem_2_read), + .mem_write(mem_2_write), + .mem_writedata(mem_2_writedata), + .mem_byteenable(mem_2_byteenable), + + .clear(clear_2), + .readdata(readdata_2), + .* + ); + + perf_link link_3 + ( + .in_left_valid(in_2_valid), + .in_left(in_2), + .in_left_ready(in_2_ready), + + .in_right_valid(in_3_valid), + .in_right(in_3), + .in_right_ready(in_3_ready), + + .out_left_ready(out_3_ready), + .out_left(out_3), + .out_left_valid(out_3_valid), + + .local_address(local_3_address), + .local_read(local_3_read), + .local_write(local_3_write), + .local_writedata(local_3_writedata), + .local_byteenable(local_3_byteenable), + .local_waitrequest(local_3_waitrequest), + .local_readdata(local_3_readdata), + + .mem_waitrequest(mem_3_waitrequest), + .mem_readdata(mem_3_readdata), + .mem_address(mem_3_address), + .mem_read(mem_3_read), + .mem_write(mem_3_write), + .mem_writedata(mem_3_writedata), + .mem_byteenable(mem_3_byteenable), + + .clear(clear_3), + .readdata(readdata_3), + .* + ); + + assign address = perf_address[3:0]; + + always_comb begin + clear_0 = 0; + clear_1 = 0; + clear_2 = 0; + clear_3 = 0; + + unique case (perf_address[5:4]) + 2'b00: begin + clear_0 = perf_write; + perf_readdata = readdata_0; + end + + 2'b01: begin + clear_1 = perf_write; + perf_readdata = readdata_1; + end + + 2'b10: begin + clear_2 = perf_write; + perf_readdata = readdata_2; + end + + 2'b11: begin + clear_3 = perf_write; + perf_readdata = readdata_3; + end + endcase + end + +endmodule diff --git a/rtl/perf/snoop.sv b/rtl/perf/snoop.sv new file mode 100644 index 0000000..e98153e --- /dev/null +++ b/rtl/perf/snoop.sv @@ -0,0 +1,129 @@ +`include "cache/defs.sv" + +module perf_snoop +( + input logic clk, + rst_n, + + input logic in_left_valid, + input ring_req in_left, + output logic in_left_ready, + + input logic out_left_ready, + output ring_req out_left, + output logic out_left_valid, + + input logic in_right_valid, + input ring_req in_right, + input logic in_right_ready, + + input line_ptr local_address, + input logic local_read, + local_write, + input line local_writedata, + input line_be local_byteenable, + output logic local_waitrequest, + output line local_readdata, + + input logic mem_waitrequest, + input line mem_readdata, + output word mem_address, + output logic mem_read, + mem_write, + output line mem_writedata, + output line_be mem_byteenable, + + output logic snoop_left_ready, + snoop_left_valid, + snoop_right_ready, + snoop_right_valid, + snoop_read, + snoop_write, + snoop_waitrequest, + output word snoop_address, + output perf_sample snoop_left, + snoop_right +); + + word hold_address; + logic hold_left_ready, hold_left_valid, hold_right_ready, hold_right_valid, + hold_read, hold_write, hold_waitrequest; + + perf_sample hold_left, hold_right; + + // out_right es driveado por las mismas líneas debido al anillo + assign in_left_ready = out_left_ready; + assign out_left = in_left; + assign out_left_valid = in_left_valid; + + assign mem_read = local_read; + assign mem_write = local_write; + assign mem_address = {local_address, 4'b0000}; + assign mem_writedata = local_writedata; + assign mem_byteenable = local_byteenable; + assign local_readdata = mem_readdata; + assign local_waitrequest = mem_waitrequest; + + always @(posedge clk or negedge rst_n) + if (!rst_n) begin + hold_read <= 0; + hold_write <= 0; + hold_waitrequest <= 0; + + hold_left_ready <= 0; + hold_left_valid <= 0; + hold_right_ready <= 0; + hold_right_valid <= 0; + + snoop_read <= 0; + snoop_write <= 0; + snoop_waitrequest <= 0; + + snoop_left_ready <= 0; + snoop_left_valid <= 0; + snoop_right_ready <= 0; + snoop_right_valid <= 0; + end else begin + /* La idea aquí es aligerar el trabajo del fitter, ya que perf_monitor + * muestrea el anillo completo, por lo que su span de área es + * potencialmente grande. + */ + + hold_read <= mem_read; + hold_write <= mem_write; + hold_waitrequest <= mem_waitrequest; + + hold_left_ready <= in_left_ready; + hold_left_valid <= in_left_valid; + hold_right_ready <= in_right_ready; + hold_right_valid <= in_right_valid; + + snoop_read <= hold_read; + snoop_write <= hold_write; + snoop_waitrequest <= hold_waitrequest; + + snoop_left_ready <= hold_left_ready; + snoop_left_valid <= hold_left_valid; + snoop_right_ready <= hold_right_ready; + snoop_right_valid <= hold_right_valid; + end + + always @(posedge clk) begin + hold_left.ttl <= in_left.ttl; + hold_left.read <= in_left.read; + hold_left.inval <= in_left.inval; + hold_left.reply <= in_left.reply; + + hold_right.ttl <= in_right.ttl; + hold_right.read <= in_right.read; + hold_right.inval <= in_right.inval; + hold_right.reply <= in_right.reply; + + snoop_left <= hold_left; + snoop_right <= hold_right; + + hold_address <= mem_address; + snoop_address <= hold_address; + end + +endmodule |
