File srhd.c changed (mode: 100644) (index fb84fdd..f688a2a) |
8 |
8 |
#include <linux/perf_event.h> |
#include <linux/perf_event.h> |
9 |
9 |
#include <asm/unistd.h> |
#include <asm/unistd.h> |
10 |
10 |
|
|
|
11 |
|
#define LOOPS (10 * 1000 * 1000) |
|
12 |
|
|
|
13 |
|
static int k; |
|
14 |
|
|
11 |
15 |
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, |
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, |
12 |
16 |
int cpu, int group_fd, unsigned long flags) |
int cpu, int group_fd, unsigned long flags) |
13 |
17 |
{ |
{ |
14 |
18 |
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); |
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); |
15 |
19 |
} |
} |
16 |
20 |
|
|
17 |
|
static void test_function(void) |
|
|
21 |
|
/* |
|
22 |
|
* The heavy one: we flush the cache line before any access to i |
|
23 |
|
*/ |
|
24 |
|
static void test_function1(void) |
18 |
25 |
{ |
{ |
19 |
|
int i, j; |
|
|
26 |
|
int i; |
20 |
27 |
|
|
21 |
|
for (i = 0; i < 1000; i++) { |
|
22 |
|
asm volatile("clflush (%0);" : : "r" (&j) : "memory"); |
|
|
28 |
|
for (i = 0; i < LOOPS; i++) { |
|
29 |
|
asm volatile("clflush (%0)" : : "r" (&i) : "memory"); |
23 |
30 |
} |
} |
24 |
31 |
} |
} |
25 |
32 |
|
|
26 |
|
int main(void) |
|
|
33 |
|
/* |
|
34 |
|
* The optimized version (please note that k is not used |
|
35 |
|
*/ |
|
36 |
|
static void test_function2(void) |
|
37 |
|
{ |
|
38 |
|
int i; |
|
39 |
|
|
|
40 |
|
for (i = 0; i < LOOPS; i++) { |
|
41 |
|
asm volatile("clflush (%0)" : : "r" (&k) : "memory"); |
|
42 |
|
} |
|
43 |
|
} |
|
44 |
|
|
|
45 |
|
// TODO: investigate PERF_TYPE_BREAKPOINT |
|
46 |
|
|
|
47 |
|
/* |
|
48 |
|
* Registers a counter |
|
49 |
|
*/ |
|
50 |
|
static int register_event(const __u32 type, __u64 config) |
27 |
51 |
{ |
{ |
28 |
|
struct perf_event_attr attr; |
|
29 |
|
long long count; |
|
30 |
52 |
int fd; |
int fd; |
31 |
|
ssize_t n; |
|
32 |
|
unsigned char buf[4096]; |
|
|
53 |
|
struct perf_event_attr attr; |
33 |
54 |
|
|
34 |
55 |
memset(&attr, 0, sizeof(struct perf_event_attr)); |
memset(&attr, 0, sizeof(struct perf_event_attr)); |
35 |
|
attr.type = PERF_TYPE_HARDWARE; |
|
36 |
56 |
attr.size = sizeof(struct perf_event_attr); |
attr.size = sizeof(struct perf_event_attr); |
37 |
|
attr.config = PERF_COUNT_HW_INSTRUCTIONS; /* CHECK: PERF_TYPE_HW_CACHE */ |
|
38 |
57 |
attr.sample_freq = 990; |
attr.sample_freq = 990; |
39 |
58 |
attr.freq = 1; |
attr.freq = 1; |
40 |
59 |
attr.enable_on_exec = 1; |
attr.enable_on_exec = 1; |
41 |
60 |
attr.disabled = 1; |
attr.disabled = 1; |
42 |
61 |
attr.exclude_kernel = 1; |
attr.exclude_kernel = 1; |
43 |
62 |
attr.exclude_hv = 1; |
attr.exclude_hv = 1; |
44 |
|
attr.wakeup_events = 16; |
|
|
63 |
|
attr.wakeup_events = 100; |
|
64 |
|
|
|
65 |
|
attr.type = type; |
|
66 |
|
attr.config = config; |
45 |
67 |
|
|
46 |
68 |
fd = perf_event_open(&attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); |
fd = perf_event_open(&attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); |
47 |
69 |
if (fd == -1) { |
if (fd == -1) { |
48 |
70 |
fprintf(stderr, "Cannot call perf_event_open, config %llx\n", |
fprintf(stderr, "Cannot call perf_event_open, config %llx\n", |
49 |
71 |
attr.config); |
attr.config); |
50 |
|
return EXIT_FAILURE; |
|
|
72 |
|
exit(1); |
51 |
73 |
} |
} |
52 |
74 |
|
|
53 |
|
ioctl(fd, PERF_EVENT_IOC_RESET, 0); |
|
54 |
|
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); |
|
55 |
|
test_function(); |
|
56 |
|
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); |
|
|
75 |
|
return fd; |
|
76 |
|
} |
|
77 |
|
|
|
78 |
|
/* |
|
79 |
|
* Print the couters |
|
80 |
|
*/ |
|
81 |
|
static void print_event(int fd, const char *name) |
|
82 |
|
{ |
|
83 |
|
unsigned char buf[4096]; |
|
84 |
|
long long count; |
|
85 |
|
ssize_t n; |
57 |
86 |
|
|
58 |
87 |
n = read(fd, buf, sizeof(buf)); |
n = read(fd, buf, sizeof(buf)); |
59 |
|
fprintf(stderr, "n = %zd\n", n); |
|
|
88 |
|
fprintf(stderr, "n[%s] = %zd\n", name, n); |
60 |
89 |
if (n <= 0) { |
if (n <= 0) { |
61 |
90 |
fprintf(stderr, "Cannot read: %s!\n", strerror(errno)); |
fprintf(stderr, "Cannot read: %s!\n", strerror(errno)); |
62 |
|
return EXIT_FAILURE; |
|
|
91 |
|
exit(1); |
63 |
92 |
} |
} |
64 |
|
|
|
65 |
93 |
memcpy(&count, buf, sizeof(long long)); |
memcpy(&count, buf, sizeof(long long)); |
66 |
|
printf("Instructions: %lld\n", count); |
|
|
94 |
|
printf("Count[%s]: %lld\n", name, count); |
|
95 |
|
} |
|
96 |
|
|
|
97 |
|
int main(void) |
|
98 |
|
{ |
|
99 |
|
int fd1, fd2, nr_cpus; |
|
100 |
|
|
|
101 |
|
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); |
|
102 |
|
fprintf(stderr, "%d cpu(s)\n", nr_cpus); |
|
103 |
|
|
|
104 |
|
fd1 = register_event(PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_LL |
|
105 |
|
| (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
106 |
|
| (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)); |
|
107 |
|
fd2 = register_event(PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_LL |
|
108 |
|
| (PERF_COUNT_HW_CACHE_OP_READ << 8) |
|
109 |
|
| (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)); |
|
110 |
|
|
|
111 |
|
ioctl(fd1, PERF_EVENT_IOC_RESET, 0); |
|
112 |
|
ioctl(fd2, PERF_EVENT_IOC_RESET, 0); |
|
113 |
|
ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); |
|
114 |
|
ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); |
|
115 |
|
test_function1(); |
|
116 |
|
ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); |
|
117 |
|
ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); |
|
118 |
|
print_event(fd1, "llc-access"); |
|
119 |
|
print_event(fd2, "llc-miss"); |
|
120 |
|
|
|
121 |
|
ioctl(fd1, PERF_EVENT_IOC_RESET, 0); |
|
122 |
|
ioctl(fd2, PERF_EVENT_IOC_RESET, 0); |
|
123 |
|
ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); |
|
124 |
|
ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); |
|
125 |
|
test_function2(); |
|
126 |
|
ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); |
|
127 |
|
ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); |
|
128 |
|
print_event(fd1, "llc-access"); |
|
129 |
|
print_event(fd2, "llc-miss"); |
67 |
130 |
|
|
68 |
|
close(fd); |
|
|
131 |
|
close(fd1); |
|
132 |
|
close(fd2); |
69 |
133 |
|
|
70 |
134 |
return 0; |
return 0; |
71 |
135 |
} |
} |