summaryrefslogtreecommitdiff
path: root/demo/perf.c
blob: 28a20c8911a87d95cf73cdb8a0cc0e343722c1c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#include "demo.h"

#define PERF_BASE(n)             (0x30150000 | (0x40 * (n)))
#define PERF_CLEAR(n)            (*(volatile unsigned *)PERF_BASE(n))
#define PERF_MEM_READS(n)        (*(const volatile unsigned *)PERF_BASE(n))
#define PERF_MEM_WRITES(n)       (*(const volatile unsigned *)(PERF_BASE(n) + 4))
#define PERF_MEM_READ_CYCLES(n)  (*(const volatile unsigned *)(PERF_BASE(n) + 8))
#define PERF_MEM_WRITE_CYCLES(n) (*(const volatile unsigned *)(PERF_BASE(n) + 12))
#define PERF_RING_READS(n)       (*(const volatile unsigned *)(PERF_BASE(n) + 16))
#define PERF_RING_INVALS(n)      (*(const volatile unsigned *)(PERF_BASE(n) + 20))
#define PERF_RING_READ_INVALS(n) (*(const volatile unsigned *)(PERF_BASE(n) + 24))
#define PERF_RING_REPLIES(n)     (*(const volatile unsigned *)(PERF_BASE(n) + 28))
#define PERF_RING_FORWARDS(n)    (*(const volatile unsigned *)(PERF_BASE(n) + 32))
#define PERF_RING_CYCLES(n)      (*(const volatile unsigned *)(PERF_BASE(n) + 36))
#define PERF_IO_READS(n)         (*(const volatile unsigned *)(PERF_BASE(n) + 40))
#define PERF_IO_WRITES(n)        (*(const volatile unsigned *)(PERF_BASE(n) + 44))

#define PERF_MIN_MASK  0x0000ffff
#define PERF_MAX_MASK  0xffff0000
#define PERF_MAX_SHIFT 16

void perf_show(unsigned cpu)
{
	print("dumping performance counters for cpu%u", cpu);

	unsigned mem_reads = PERF_MEM_READS(cpu);
	unsigned mem_writes = PERF_MEM_WRITES(cpu);
	unsigned read_cycles = PERF_MEM_READ_CYCLES(cpu);
	unsigned write_cycles = PERF_MEM_WRITE_CYCLES(cpu);
	unsigned ring_reads = PERF_RING_READS(cpu);
	unsigned ring_invals = PERF_RING_INVALS(cpu);
	unsigned ring_read_invals = PERF_RING_READ_INVALS(cpu);
	unsigned ring_replies = PERF_RING_REPLIES(cpu);
	unsigned ring_forwards = PERF_RING_FORWARDS(cpu);
	unsigned ring_cycles = PERF_RING_CYCLES(cpu);
	unsigned io_reads = PERF_IO_READS(cpu);
	unsigned io_writes = PERF_IO_WRITES(cpu);

	unsigned min_ring_cycles = ring_cycles & PERF_MIN_MASK;
	unsigned max_ring_cycles = (ring_cycles & PERF_MAX_MASK) >> PERF_MAX_SHIFT;
	unsigned min_read_cycles = read_cycles & PERF_MIN_MASK;
	unsigned max_read_cycles = (read_cycles & PERF_MAX_MASK) >> PERF_MAX_SHIFT;
	unsigned min_write_cycles = write_cycles & PERF_MIN_MASK;
	unsigned max_write_cycles = (write_cycles & PERF_MAX_MASK) >> PERF_MAX_SHIFT;
	unsigned ring_sends = ring_reads + ring_invals + ring_read_invals;

	print("requests:         sends=%u forwards=%u replies=%u", ring_sends, ring_forwards, ring_replies);
	print("requests sent:    read=%u inval=%u read_inval=%u", ring_reads, ring_invals, ring_read_invals);
	print("memory:           misses=%u writebacks=%u", mem_reads, mem_writes);
	print("uncached i/o:     reads=%u writes=%u", io_reads, io_writes);
	print("ring cycles:      min=%u max=%u", min_ring_cycles, max_ring_cycles);
	print("mem read cycles:  min=%u max=%u", min_read_cycles, max_read_cycles);
	print("mem write cycles: min=%u max=%u", min_write_cycles, max_write_cycles);
}

void perf_clear(unsigned cpu)
{
	PERF_CLEAR(cpu) = 0;
	print("cleared performance counters for cpu%u", cpu);
}