File srhd.c changed (mode: 100644) (index f688a2a..716ee13) |
5 |
5 |
#include <errno.h> |
#include <errno.h> |
6 |
6 |
#include <string.h> |
#include <string.h> |
7 |
7 |
#include <sys/ioctl.h> |
#include <sys/ioctl.h> |
|
8 |
|
#include <sys/mman.h> |
|
9 |
|
#include <sched.h> |
8 |
10 |
#include <linux/perf_event.h> |
#include <linux/perf_event.h> |
9 |
11 |
#include <asm/unistd.h> |
#include <asm/unistd.h> |
10 |
12 |
|
|
11 |
|
#define LOOPS (10 * 1000 * 1000) |
|
|
13 |
|
#define barrier() asm volatile ("" ::: "memory") |
12 |
14 |
|
|
13 |
|
static int k; |
|
|
15 |
|
#define MMAP_BUF_SIZE 16 /* in pages */ |
|
16 |
|
|
|
17 |
|
static unsigned long page_size; |
14 |
18 |
|
|
15 |
19 |
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, |
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, |
16 |
20 |
int cpu, int group_fd, unsigned long flags) |
int cpu, int group_fd, unsigned long flags) |
|
... |
... |
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, |
18 |
22 |
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); |
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); |
19 |
23 |
} |
} |
20 |
24 |
|
|
21 |
|
/* |
|
22 |
|
* The heavy one: we flush the cache line before any access to i |
|
23 |
|
*/ |
|
24 |
|
static void test_function1(void) |
|
25 |
|
{ |
|
26 |
|
int i; |
|
27 |
|
|
|
28 |
|
for (i = 0; i < LOOPS; i++) { |
|
29 |
|
asm volatile("clflush (%0)" : : "r" (&i) : "memory"); |
|
30 |
|
} |
|
31 |
|
} |
|
32 |
|
|
|
33 |
|
/* |
|
34 |
|
* The optimized version (please note that k is not used |
|
35 |
|
*/ |
|
36 |
|
static void test_function2(void) |
|
37 |
|
{ |
|
38 |
|
int i; |
|
39 |
|
|
|
40 |
|
for (i = 0; i < LOOPS; i++) { |
|
41 |
|
asm volatile("clflush (%0)" : : "r" (&k) : "memory"); |
|
42 |
|
} |
|
43 |
|
} |
|
44 |
|
|
|
45 |
25 |
// TODO: investigate PERF_TYPE_BREAKPOINT |
// TODO: investigate PERF_TYPE_BREAKPOINT |
46 |
26 |
|
|
47 |
27 |
/* |
/* |
48 |
28 |
* Registers a counter |
* Registers a counter |
49 |
29 |
*/ |
*/ |
50 |
|
static int register_event(const __u32 type, __u64 config) |
|
|
30 |
|
static int register_event(const __u32 type, __u64 config, void **m) |
51 |
31 |
{ |
{ |
52 |
32 |
int fd; |
int fd; |
53 |
33 |
struct perf_event_attr attr; |
struct perf_event_attr attr; |
54 |
34 |
|
|
55 |
35 |
memset(&attr, 0, sizeof(struct perf_event_attr)); |
memset(&attr, 0, sizeof(struct perf_event_attr)); |
56 |
36 |
attr.size = sizeof(struct perf_event_attr); |
attr.size = sizeof(struct perf_event_attr); |
57 |
|
attr.sample_freq = 990; |
|
58 |
|
attr.freq = 1; |
|
|
37 |
|
attr.sample_type = PERF_SAMPLE_IP |
|
38 |
|
| PERF_SAMPLE_TID |
|
39 |
|
| PERF_SAMPLE_ADDR |
|
40 |
|
| PERF_SAMPLE_PERIOD; |
|
41 |
|
attr.sample_period = 1000 * 1000; |
59 |
42 |
attr.enable_on_exec = 1; |
attr.enable_on_exec = 1; |
60 |
43 |
attr.disabled = 1; |
attr.disabled = 1; |
61 |
44 |
attr.exclude_kernel = 1; |
attr.exclude_kernel = 1; |
62 |
|
attr.exclude_hv = 1; |
|
63 |
|
attr.wakeup_events = 100; |
|
|
45 |
|
//attr.exclude_hv = 1; |
64 |
46 |
|
|
65 |
47 |
attr.type = type; |
attr.type = type; |
66 |
48 |
attr.config = config; |
attr.config = config; |
67 |
49 |
|
|
68 |
|
fd = perf_event_open(&attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); |
|
|
50 |
|
fd = perf_event_open(&attr, -1 /* pid */, 0 /* cpu */, |
|
51 |
|
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); |
69 |
52 |
if (fd == -1) { |
if (fd == -1) { |
70 |
53 |
fprintf(stderr, "Cannot call perf_event_open, config %llx\n", |
fprintf(stderr, "Cannot call perf_event_open, config %llx\n", |
71 |
54 |
attr.config); |
attr.config); |
72 |
55 |
exit(1); |
exit(1); |
73 |
56 |
} |
} |
74 |
57 |
|
|
|
58 |
|
*m = mmap(NULL, 1 + MMAP_BUF_SIZE * page_size, |
|
59 |
|
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); |
|
60 |
|
if (!*m) { |
|
61 |
|
fprintf(stderr, "Cannot mmap (%s)!\n", strerror(errno)); |
|
62 |
|
exit(1); |
|
63 |
|
} |
|
64 |
|
|
75 |
65 |
return fd; |
return fd; |
76 |
66 |
} |
} |
77 |
67 |
|
|
78 |
68 |
/* |
/* |
79 |
|
* Print the couters |
|
|
69 |
|
* Checking if any process is on the wrong side |
|
70 |
|
* Returns the ?TODO |
80 |
71 |
*/ |
*/ |
81 |
|
static void print_event(int fd, const char *name) |
|
|
72 |
|
static int check(void *m) |
82 |
73 |
{ |
{ |
83 |
|
unsigned char buf[4096]; |
|
84 |
|
long long count; |
|
85 |
|
ssize_t n; |
|
86 |
|
|
|
87 |
|
n = read(fd, buf, sizeof(buf)); |
|
88 |
|
fprintf(stderr, "n[%s] = %zd\n", name, n); |
|
89 |
|
if (n <= 0) { |
|
90 |
|
fprintf(stderr, "Cannot read: %s!\n", strerror(errno)); |
|
91 |
|
exit(1); |
|
|
74 |
|
struct perf_event_mmap_page *mp; |
|
75 |
|
struct perf_event_header *h; |
|
76 |
|
void *p; |
|
77 |
|
unsigned long long *p_ip, *p_addr, *p_period; |
|
78 |
|
unsigned int *p_pid, *p_tid; |
|
79 |
|
unsigned int version, seq, index; |
|
80 |
|
unsigned long long count, head, tail, rest; |
|
81 |
|
|
|
82 |
|
mp = m; |
|
83 |
|
do { |
|
84 |
|
seq = mp->lock; |
|
85 |
|
barrier(); |
|
86 |
|
version = mp->version; |
|
87 |
|
index = mp->index; |
|
88 |
|
count = mp->offset; |
|
89 |
|
head = mp->data_head; |
|
90 |
|
tail = mp->data_tail; |
|
91 |
|
barrier(); |
|
92 |
|
} while (mp->lock != seq); |
|
93 |
|
|
|
94 |
|
head %= MMAP_BUF_SIZE * page_size; |
|
95 |
|
tail %= MMAP_BUF_SIZE * page_size; |
|
96 |
|
|
|
97 |
|
fprintf(stderr, "version=%u index=%u count=%llu" |
|
98 |
|
" head=%llu tail=%llu\n", |
|
99 |
|
version, index, count, head, tail); |
|
100 |
|
|
|
101 |
|
/* Dump rest of buffers */ |
|
102 |
|
p = m + page_size + tail; |
|
103 |
|
rest = head - tail; |
|
104 |
|
while (rest > 0) { |
|
105 |
|
h = p; |
|
106 |
|
fprintf(stderr, "\th: type=%u misc=%hu size=%hu rest=%llu\n", |
|
107 |
|
h->type, h->misc, h->size, rest); |
|
108 |
|
p += sizeof(struct perf_event_header); |
|
109 |
|
|
|
110 |
|
switch (h->type) { |
|
111 |
|
case PERF_RECORD_SAMPLE: |
|
112 |
|
p_ip = p; p += 8; |
|
113 |
|
p_pid = p; p += 4; |
|
114 |
|
p_tid = p; p += 4; |
|
115 |
|
p_addr = p; p += 8; |
|
116 |
|
p_period = p; p += 8; |
|
117 |
|
fprintf(stderr, "\tsample! ip=0x%llx pid=%u" |
|
118 |
|
" tid=%u addr=%llx period=%llu\n", |
|
119 |
|
*p_ip, *p_pid, *p_tid, *p_addr, *p_period); |
|
120 |
|
break; |
|
121 |
|
|
|
122 |
|
default: |
|
123 |
|
fprintf(stderr, "Unknown type %u\n", h->type); |
|
124 |
|
} |
|
125 |
|
rest -= h->size; |
92 |
126 |
} |
} |
93 |
|
memcpy(&count, buf, sizeof(long long)); |
|
94 |
|
printf("Count[%s]: %lld\n", name, count); |
|
|
127 |
|
mp->data_tail = head; |
|
128 |
|
|
|
129 |
|
return 0; |
95 |
130 |
} |
} |
96 |
131 |
|
|
97 |
132 |
int main(void) |
int main(void) |
98 |
133 |
{ |
{ |
|
134 |
|
struct sched_param param; |
99 |
135 |
int fd1, fd2, nr_cpus; |
int fd1, fd2, nr_cpus; |
|
136 |
|
void *m1, *m2; |
100 |
137 |
|
|
101 |
138 |
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); |
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); |
102 |
|
fprintf(stderr, "%d cpu(s)\n", nr_cpus); |
|
|
139 |
|
fprintf(stderr, "%d cpu(s) online.\n", nr_cpus); |
|
140 |
|
|
|
141 |
|
page_size = sysconf(_SC_PAGE_SIZE); |
|
142 |
|
fprintf(stderr, "page size is %lu\n", page_size); |
|
143 |
|
|
|
144 |
|
/* TODO: move to 99? */ |
|
145 |
|
param.sched_priority = 1; |
|
146 |
|
if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { |
|
147 |
|
fprintf(stderr, "Cannot set fifo schedule (%s)!", strerror(errno)); |
|
148 |
|
return 1; |
|
149 |
|
} |
103 |
150 |
|
|
104 |
151 |
fd1 = register_event(PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_LL |
fd1 = register_event(PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_LL |
105 |
152 |
| (PERF_COUNT_HW_CACHE_OP_READ << 8) |
| (PERF_COUNT_HW_CACHE_OP_READ << 8) |
106 |
|
| (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)); |
|
|
153 |
|
| (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), &m1); |
107 |
154 |
fd2 = register_event(PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_LL |
fd2 = register_event(PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_LL |
108 |
155 |
| (PERF_COUNT_HW_CACHE_OP_READ << 8) |
| (PERF_COUNT_HW_CACHE_OP_READ << 8) |
109 |
|
| (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)); |
|
|
156 |
|
| (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), &m2); |
110 |
157 |
|
|
111 |
|
ioctl(fd1, PERF_EVENT_IOC_RESET, 0); |
|
|
158 |
|
//ioctl(fd1, PERF_EVENT_IOC_RESET, 0); |
112 |
159 |
ioctl(fd2, PERF_EVENT_IOC_RESET, 0); |
ioctl(fd2, PERF_EVENT_IOC_RESET, 0); |
113 |
|
ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); |
|
|
160 |
|
//ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); |
114 |
161 |
ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); |
ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); |
115 |
|
test_function1(); |
|
116 |
|
ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); |
|
117 |
|
ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); |
|
118 |
|
print_event(fd1, "llc-access"); |
|
119 |
|
print_event(fd2, "llc-miss"); |
|
120 |
162 |
|
|
121 |
|
ioctl(fd1, PERF_EVENT_IOC_RESET, 0); |
|
122 |
|
ioctl(fd2, PERF_EVENT_IOC_RESET, 0); |
|
123 |
|
ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); |
|
124 |
|
ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); |
|
125 |
|
test_function2(); |
|
|
163 |
|
while (1) { |
|
164 |
|
fprintf(stderr, "Checking...\n"); |
|
165 |
|
check(m2); |
|
166 |
|
usleep(1000 * 1000); |
|
167 |
|
} |
126 |
168 |
ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); |
ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); |
127 |
169 |
ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); |
ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); |
128 |
|
print_event(fd1, "llc-access"); |
|
129 |
|
print_event(fd2, "llc-miss"); |
|
|
170 |
|
|
|
171 |
|
munmap(m1, 0); /* TODO: 0 is correct? */ |
|
172 |
|
munmap(m2, 0); |
130 |
173 |
|
|
131 |
174 |
close(fd1); |
close(fd1); |
132 |
175 |
close(fd2); |
close(fd2); |