summaryrefslogtreecommitdiff
path: root/rtl/perf
diff options
context:
space:
mode:
authorAlejandro Soto <alejandro@34project.org>2023-10-05 16:26:26 -0600
committerAlejandro Soto <alejandro@34project.org>2023-10-05 17:16:55 -0600
commitd406720cecd7328f595255e65b6fd6b6814cefe4 (patch)
treec085f1b4a2d30df0c08cd1efcbf3503ee40c5354 /rtl/perf
parentdc705df98037bfd9db8efded025651b676e87754 (diff)
rtl/perf: implement performance unit
Diffstat (limited to 'rtl/perf')
-rw-r--r--rtl/perf/link.sv181
-rw-r--r--rtl/perf/perf_monitor.sv286
-rw-r--r--rtl/perf/snoop.sv129
3 files changed, 596 insertions, 0 deletions
diff --git a/rtl/perf/link.sv b/rtl/perf/link.sv
new file mode 100644
index 0000000..0899399
--- /dev/null
+++ b/rtl/perf/link.sv
@@ -0,0 +1,181 @@
+`include "cache/defs.sv"
+
+module perf_link
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_left_valid,
+ input ring_req in_left,
+ output logic in_left_ready,
+
+ input logic in_right_valid,
+ input ring_req in_right,
+ output logic in_right_ready,
+
+ input logic out_left_ready,
+ output ring_req out_left,
+ output logic out_left_valid,
+
+ input line_ptr local_address,
+ input logic local_read,
+ local_write,
+ input line local_writedata,
+ input line_be local_byteenable,
+ output logic local_waitrequest,
+ output line local_readdata,
+
+ input logic mem_waitrequest,
+ input line mem_readdata,
+ output word mem_address,
+ output logic mem_read,
+ mem_write,
+ output line mem_writedata,
+ output line_be mem_byteenable,
+
+ input logic clear,
+ input logic[3:0] address,
+ output word readdata
+);
+
+ logic snoop_left_ready, snoop_left_valid, snoop_right_ready, snoop_right_valid,
+ snoop_read, snoop_write, snoop_waitrequest, cached;
+
+ addr_bits snoop_addr_bits;
+ perf_sample snoop_left, snoop_right;
+
+ word reads, writes, ring_reads, ring_invals, ring_read_invals, ring_replies, ring_forwards,
+ io_reads, io_writes, snoop_address;
+
+ hword mem_cycles, mem_cycles_hold, ring_cycles, min_ring_cycles, max_ring_cycles,
+ min_read_cycles, max_read_cycles, min_write_cycles, max_write_cycles;
+
+ perf_snoop snoop
+ (
+ .*
+ );
+
+ assign cached = snoop_addr_bits.io == `IO_CACHED;
+ assign mem_cycles = mem_cycles_hold + 1;
+ assign snoop_addr_bits = snoop_address;
+
+ always_comb
+ if (!address[3]) unique case (address[2:0])
+ 3'b000: readdata = reads;
+ 3'b001: readdata = writes;
+ 3'b010: readdata = {max_read_cycles, min_read_cycles};
+ 3'b011: readdata = {max_write_cycles, min_write_cycles};
+ 3'b100: readdata = ring_reads;
+ 3'b101: readdata = ring_invals;
+ 3'b110: readdata = ring_read_invals;
+ 3'b111: readdata = ring_replies;
+ endcase else unique case (address[1:0])
+ 2'b00: readdata = ring_forwards;
+ 2'b01: readdata = {max_ring_cycles, min_ring_cycles};
+ 2'b10: readdata = io_reads;
+ 2'b11: readdata = io_writes;
+ endcase
+
+ always @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ reads <= 0;
+ writes <= 0;
+ io_reads <= 0;
+ io_writes <= 0;
+
+ min_ring_cycles <= 0;
+ max_ring_cycles <= 0;
+ min_read_cycles <= 0;
+ max_read_cycles <= 0;
+ min_write_cycles <= 0;
+ max_write_cycles <= 0;
+
+ ring_reads <= 0;
+ ring_invals <= 0;
+ ring_replies <= 0;
+ ring_forwards <= 0;
+ ring_read_invals <= 0;
+
+ mem_cycles_hold <= 0;
+ end else begin
+ ring_cycles <= ring_cycles + 1;
+
+ if (mem_read || mem_write)
+ mem_cycles_hold <= mem_cycles;
+
+ if ((mem_read || mem_write) && !mem_waitrequest) begin
+ mem_cycles_hold <= 0;
+
+ if (!cached) begin
+ if (mem_write)
+ io_writes <= io_writes + 1;
+ else
+ io_reads <= io_reads + 1;
+ end else if (mem_write) begin
+ writes <= writes + 1;
+
+ if (min_write_cycles == 0 || mem_cycles_hold < min_write_cycles)
+ min_write_cycles <= mem_cycles;
+
+ if (mem_cycles_hold >= max_write_cycles)
+ max_write_cycles <= mem_cycles;
+ end else begin
+ reads <= reads + 1;
+
+ if (min_read_cycles == 0 || mem_cycles_hold < min_read_cycles)
+ min_read_cycles <= mem_cycles;
+
+ if (mem_cycles_hold >= max_read_cycles)
+ max_read_cycles <= mem_cycles;
+ end
+ end
+
+ if (snoop_left_valid && snoop_left_ready && snoop_left.ttl == `TTL_END) begin
+ if (snoop_left.reply)
+ ring_replies <= ring_replies + 1;
+
+ if (min_ring_cycles == 0 || ring_cycles < min_ring_cycles)
+ min_ring_cycles <= ring_cycles;
+
+ if (ring_cycles > max_ring_cycles)
+ max_ring_cycles <= ring_cycles;
+ end
+
+ if (snoop_right_valid && snoop_right_ready) begin
+ if (snoop_right.ttl == `TTL_MAX) begin
+ ring_cycles <= 1;
+
+ if (snoop_right.read && !snoop_right.inval)
+ ring_reads <= ring_reads + 1;
+
+ if (!snoop_right.read && snoop_right.inval)
+ ring_invals <= ring_invals + 1;
+
+ if (snoop_right.read && snoop_right.inval)
+ ring_read_invals <= ring_read_invals + 1;
+ end else
+ ring_forwards <= ring_forwards + 1;
+ end
+
+ if (clear) begin
+ reads <= 0;
+ writes <= 0;
+ io_reads <= 0;
+ io_writes <= 0;
+
+ min_ring_cycles <= 0;
+ max_ring_cycles <= 0;
+ min_read_cycles <= 0;
+ max_read_cycles <= 0;
+ min_write_cycles <= 0;
+ max_write_cycles <= 0;
+
+ ring_reads <= 0;
+ ring_invals <= 0;
+ ring_replies <= 0;
+ ring_forwards <= 0;
+ ring_read_invals <= 0;
+ end
+ end
+
+endmodule
diff --git a/rtl/perf/perf_monitor.sv b/rtl/perf/perf_monitor.sv
new file mode 100644
index 0000000..2f38d94
--- /dev/null
+++ b/rtl/perf/perf_monitor.sv
@@ -0,0 +1,286 @@
+`include "cache/defs.sv"
+
+module perf_monitor
+(
+ input logic clk,
+ rst_n,
+
+ input logic[5:0] perf_address,
+ input logic perf_read,
+ perf_write,
+ input word perf_writedata, // No se usa
+ output word perf_readdata,
+
+ input logic in_0_valid,
+ input ring_req in_0,
+ output logic in_0_ready,
+
+ input logic in_1_valid,
+ input ring_req in_1,
+ output logic in_1_ready,
+
+ input logic in_2_valid,
+ input ring_req in_2,
+ output logic in_2_ready,
+
+ input logic in_3_valid,
+ input ring_req in_3,
+ output logic in_3_ready,
+
+ input logic out_0_ready,
+ output ring_req out_0,
+ output logic out_0_valid,
+
+ input logic out_1_ready,
+ output ring_req out_1,
+ output logic out_1_valid,
+
+ input logic out_2_ready,
+ output ring_req out_2,
+ output logic out_2_valid,
+
+ input logic out_3_ready,
+ output ring_req out_3,
+ output logic out_3_valid,
+
+ input line_ptr local_0_address,
+ input logic local_0_read,
+ local_0_write,
+ input line local_0_writedata,
+ input line_be local_0_byteenable,
+ output logic local_0_waitrequest,
+ output line local_0_readdata,
+
+ input line_ptr local_1_address,
+ input logic local_1_read,
+ local_1_write,
+ input line local_1_writedata,
+ input line_be local_1_byteenable,
+ output logic local_1_waitrequest,
+ output line local_1_readdata,
+
+ input line_ptr local_2_address,
+ input logic local_2_read,
+ local_2_write,
+ input line local_2_writedata,
+ input line_be local_2_byteenable,
+ output logic local_2_waitrequest,
+ output line local_2_readdata,
+
+ input line_ptr local_3_address,
+ input logic local_3_read,
+ local_3_write,
+ input line local_3_writedata,
+ input line_be local_3_byteenable,
+ output logic local_3_waitrequest,
+ output line local_3_readdata,
+
+ input logic mem_0_waitrequest,
+ input line mem_0_readdata,
+ output word mem_0_address,
+ output logic mem_0_read,
+ mem_0_write,
+ output line mem_0_writedata,
+ output line_be mem_0_byteenable,
+
+ input logic mem_1_waitrequest,
+ input line mem_1_readdata,
+ output word mem_1_address,
+ output logic mem_1_read,
+ mem_1_write,
+ output line mem_1_writedata,
+ output line_be mem_1_byteenable,
+
+ input logic mem_2_waitrequest,
+ input line mem_2_readdata,
+ output word mem_2_address,
+ output logic mem_2_read,
+ mem_2_write,
+ output line mem_2_writedata,
+ output line_be mem_2_byteenable,
+
+ input logic mem_3_waitrequest,
+ input line mem_3_readdata,
+ output word mem_3_address,
+ output logic mem_3_read,
+ mem_3_write,
+ output line mem_3_writedata,
+ output line_be mem_3_byteenable
+);
+
+ word readdata_0, readdata_1, readdata_2, readdata_3;
+ logic clear_0, clear_1, clear_2, clear_3;
+ logic[3:0] address;
+
+ perf_link link_0
+ (
+ .in_left_valid(in_3_valid),
+ .in_left(in_3),
+ .in_left_ready(in_3_ready),
+
+ .in_right_valid(in_0_valid),
+ .in_right(in_0),
+ .in_right_ready(in_0_ready),
+
+ .out_left_ready(out_0_ready),
+ .out_left(out_0),
+ .out_left_valid(out_0_valid),
+
+ .local_address(local_0_address),
+ .local_read(local_0_read),
+ .local_write(local_0_write),
+ .local_writedata(local_0_writedata),
+ .local_byteenable(local_0_byteenable),
+ .local_waitrequest(local_0_waitrequest),
+ .local_readdata(local_0_readdata),
+
+ .mem_waitrequest(mem_0_waitrequest),
+ .mem_readdata(mem_0_readdata),
+ .mem_address(mem_0_address),
+ .mem_read(mem_0_read),
+ .mem_write(mem_0_write),
+ .mem_writedata(mem_0_writedata),
+ .mem_byteenable(mem_0_byteenable),
+
+ .clear(clear_0),
+ .readdata(readdata_0),
+ .*
+ );
+
+ perf_link link_1
+ (
+ .in_left_valid(in_0_valid),
+ .in_left(in_0),
+ .in_left_ready(in_0_ready),
+
+ .in_right_valid(in_1_valid),
+ .in_right(in_1),
+ .in_right_ready(in_1_ready),
+
+ .out_left_ready(out_1_ready),
+ .out_left(out_1),
+ .out_left_valid(out_1_valid),
+
+ .local_address(local_1_address),
+ .local_read(local_1_read),
+ .local_write(local_1_write),
+ .local_writedata(local_1_writedata),
+ .local_byteenable(local_1_byteenable),
+ .local_waitrequest(local_1_waitrequest),
+ .local_readdata(local_1_readdata),
+
+ .mem_waitrequest(mem_1_waitrequest),
+ .mem_readdata(mem_1_readdata),
+ .mem_address(mem_1_address),
+ .mem_read(mem_1_read),
+ .mem_write(mem_1_write),
+ .mem_writedata(mem_1_writedata),
+ .mem_byteenable(mem_1_byteenable),
+
+ .clear(clear_1),
+ .readdata(readdata_1),
+ .*
+ );
+
+ perf_link link_2
+ (
+ .in_left_valid(in_1_valid),
+ .in_left(in_1),
+ .in_left_ready(in_1_ready),
+
+ .in_right_valid(in_2_valid),
+ .in_right(in_2),
+ .in_right_ready(in_2_ready),
+
+ .out_left_ready(out_2_ready),
+ .out_left(out_2),
+ .out_left_valid(out_2_valid),
+
+ .local_address(local_2_address),
+ .local_read(local_2_read),
+ .local_write(local_2_write),
+ .local_writedata(local_2_writedata),
+ .local_byteenable(local_2_byteenable),
+ .local_waitrequest(local_2_waitrequest),
+ .local_readdata(local_2_readdata),
+
+ .mem_waitrequest(mem_2_waitrequest),
+ .mem_readdata(mem_2_readdata),
+ .mem_address(mem_2_address),
+ .mem_read(mem_2_read),
+ .mem_write(mem_2_write),
+ .mem_writedata(mem_2_writedata),
+ .mem_byteenable(mem_2_byteenable),
+
+ .clear(clear_2),
+ .readdata(readdata_2),
+ .*
+ );
+
+ perf_link link_3
+ (
+ .in_left_valid(in_2_valid),
+ .in_left(in_2),
+ .in_left_ready(in_2_ready),
+
+ .in_right_valid(in_3_valid),
+ .in_right(in_3),
+ .in_right_ready(in_3_ready),
+
+ .out_left_ready(out_3_ready),
+ .out_left(out_3),
+ .out_left_valid(out_3_valid),
+
+ .local_address(local_3_address),
+ .local_read(local_3_read),
+ .local_write(local_3_write),
+ .local_writedata(local_3_writedata),
+ .local_byteenable(local_3_byteenable),
+ .local_waitrequest(local_3_waitrequest),
+ .local_readdata(local_3_readdata),
+
+ .mem_waitrequest(mem_3_waitrequest),
+ .mem_readdata(mem_3_readdata),
+ .mem_address(mem_3_address),
+ .mem_read(mem_3_read),
+ .mem_write(mem_3_write),
+ .mem_writedata(mem_3_writedata),
+ .mem_byteenable(mem_3_byteenable),
+
+ .clear(clear_3),
+ .readdata(readdata_3),
+ .*
+ );
+
+ assign address = perf_address[3:0];
+
+ always_comb begin
+ clear_0 = 0;
+ clear_1 = 0;
+ clear_2 = 0;
+ clear_3 = 0;
+
+ unique case (perf_address[5:4])
+ 2'b00: begin
+ clear_0 = perf_write;
+ perf_readdata = readdata_0;
+ end
+
+ 2'b01: begin
+ clear_1 = perf_write;
+ perf_readdata = readdata_1;
+ end
+
+ 2'b10: begin
+ clear_2 = perf_write;
+ perf_readdata = readdata_2;
+ end
+
+ 2'b11: begin
+ clear_3 = perf_write;
+ perf_readdata = readdata_3;
+ end
+ endcase
+ end
+
+endmodule
diff --git a/rtl/perf/snoop.sv b/rtl/perf/snoop.sv
new file mode 100644
index 0000000..e98153e
--- /dev/null
+++ b/rtl/perf/snoop.sv
@@ -0,0 +1,129 @@
+`include "cache/defs.sv"
+
+module perf_snoop
+(
+ input logic clk,
+ rst_n,
+
+ input logic in_left_valid,
+ input ring_req in_left,
+ output logic in_left_ready,
+
+ input logic out_left_ready,
+ output ring_req out_left,
+ output logic out_left_valid,
+
+ input logic in_right_valid,
+ input ring_req in_right,
+ input logic in_right_ready,
+
+ input line_ptr local_address,
+ input logic local_read,
+ local_write,
+ input line local_writedata,
+ input line_be local_byteenable,
+ output logic local_waitrequest,
+ output line local_readdata,
+
+ input logic mem_waitrequest,
+ input line mem_readdata,
+ output word mem_address,
+ output logic mem_read,
+ mem_write,
+ output line mem_writedata,
+ output line_be mem_byteenable,
+
+ output logic snoop_left_ready,
+ snoop_left_valid,
+ snoop_right_ready,
+ snoop_right_valid,
+ snoop_read,
+ snoop_write,
+ snoop_waitrequest,
+ output word snoop_address,
+ output perf_sample snoop_left,
+ snoop_right
+);
+
+ word hold_address;
+ logic hold_left_ready, hold_left_valid, hold_right_ready, hold_right_valid,
+ hold_read, hold_write, hold_waitrequest;
+
+ perf_sample hold_left, hold_right;
+
+ // out_right es driveado por las mismas líneas debido al anillo
+ assign in_left_ready = out_left_ready;
+ assign out_left = in_left;
+ assign out_left_valid = in_left_valid;
+
+ assign mem_read = local_read;
+ assign mem_write = local_write;
+ assign mem_address = {local_address, 4'b0000};
+ assign mem_writedata = local_writedata;
+ assign mem_byteenable = local_byteenable;
+ assign local_readdata = mem_readdata;
+ assign local_waitrequest = mem_waitrequest;
+
+ always @(posedge clk or negedge rst_n)
+ if (!rst_n) begin
+ hold_read <= 0;
+ hold_write <= 0;
+ hold_waitrequest <= 0;
+
+ hold_left_ready <= 0;
+ hold_left_valid <= 0;
+ hold_right_ready <= 0;
+ hold_right_valid <= 0;
+
+ snoop_read <= 0;
+ snoop_write <= 0;
+ snoop_waitrequest <= 0;
+
+ snoop_left_ready <= 0;
+ snoop_left_valid <= 0;
+ snoop_right_ready <= 0;
+ snoop_right_valid <= 0;
+ end else begin
+ /* La idea aquí es aligerar el trabajo del fitter, ya que perf_monitor
+ * muestrea el anillo completo, por lo que su span de área es
+ * potencialmente grande.
+ */
+
+ hold_read <= mem_read;
+ hold_write <= mem_write;
+ hold_waitrequest <= mem_waitrequest;
+
+ hold_left_ready <= in_left_ready;
+ hold_left_valid <= in_left_valid;
+ hold_right_ready <= in_right_ready;
+ hold_right_valid <= in_right_valid;
+
+ snoop_read <= hold_read;
+ snoop_write <= hold_write;
+ snoop_waitrequest <= hold_waitrequest;
+
+ snoop_left_ready <= hold_left_ready;
+ snoop_left_valid <= hold_left_valid;
+ snoop_right_ready <= hold_right_ready;
+ snoop_right_valid <= hold_right_valid;
+ end
+
+ always @(posedge clk) begin
+ hold_left.ttl <= in_left.ttl;
+ hold_left.read <= in_left.read;
+ hold_left.inval <= in_left.inval;
+ hold_left.reply <= in_left.reply;
+
+ hold_right.ttl <= in_right.ttl;
+ hold_right.read <= in_right.read;
+ hold_right.inval <= in_right.inval;
+ hold_right.reply <= in_right.reply;
+
+ snoop_left <= hold_left;
+ snoop_right <= hold_right;
+
+ hold_address <= mem_address;
+ snoop_address <= hold_address;
+ end
+
+endmodule