/*
* Stop Row Hammer
* Copyright: Catalin(ux) M. BOIE
* After an idea found on Project Zero Google site
* (http://googleprojectzero.blogspot.ro/2015/03/exploiting-dram-rowhammer-bug-to-gain.html)
*
* TODO: investigate PERF_TYPE_BREAKPOINT
*/
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sched.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <sys/epoll.h>
#include <signal.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define MISSES_SUSPEND_TRIGGER 20000 /* How many events will trigger a suspend */
#define MISS_TRIGGER 10000 /* How many perf counts events will trigger an epoll wakeup */
#define PENALTY_TIME 100 /* how much time to suspend the process (in milliseconds) */
#define MMAP_BUF_SIZE 1 /* in pages */
#define PID_HASH_SIZE 128
#define barrier() asm volatile ("" ::: "memory")
/* This tructure will keep track of suspect pids */
struct pid_cell
{
int pid;
struct pid_cell *next;
struct pid_cell *next_in_queue; /* used for suspend queue */
unsigned long misses; /* how many misses this process got */
unsigned int suspends; /* how many suspends this pid got */
struct timeval start;
struct timeval wake;
unsigned int uid;
char cmd[128];
};
static struct pid_cell *pid_hash[PID_HASH_SIZE];
/* Queue that will store the entries that will need unblocking */
static struct pid_cell *queue_head, *queue_tail;
static unsigned long page_size;
/*
* Difference in milliseconds between two timeval structures
*/
static long time_diff(const struct timeval *t1, const struct timeval *t2)
{
return (t1->tv_sec - t2->tv_sec) * 1000
+ (t1->tv_usec - t2->tv_usec) / 1000;
}
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
/*
* Lookup proc info
*/
static void lookup_proc_info(struct pid_cell *p)
{
struct stat S;
char path[128];
int fd;
ssize_t n;
snprintf(path, sizeof(path), "/proc/%d/cmdline", p->pid);
if (stat(path, &S) != 0)
return;
fd = open(path, O_RDONLY);
if (fd != -1) {
n = read(fd, p->cmd, sizeof(p->cmd) - 1);
if (n > 0) {
int i;
for (i = 0; i < n; i++) {
if (p->cmd[i] == '\0')
p->cmd[i] = ' ';
}
p->cmd[n] = '\0';
}
close(fd);
}
p->uid = S.st_uid;
}
/*
* Suspends a pid
*/
static void pid_suspend(const struct timeval *now, struct pid_cell *p)
{
int r;
/* I will not block myself */
if (p->pid == getpid())
return;
/* We do not block root processes */
if (p->uid == 0)
return;
/* TODO */
if (p->uid != 503)
return;
r = kill(p->pid, SIGSTOP);
if (r != 0)
fprintf(stderr, "Cannot suspend pid: %s!\n", strerror(errno));
fprintf(stderr, "Suspended pid %d (uid %u) [%s]...\n",
p->pid, p->uid, p->cmd);
p->suspends++;
p->wake = *now;
if (p->wake.tv_usec > 1000000 - PENALTY_TIME) {
p->wake.tv_sec++;
p->wake.tv_usec = 1000000 - p->wake.tv_usec + PENALTY_TIME;
} else {
p->wake.tv_usec += PENALTY_TIME;
}
/* To be able to resume it, add it to the resume queue */
if (queue_head == NULL)
queue_head = p;
else
queue_tail->next_in_queue = p;
queue_tail = p;
}
/*
* Un-suspends a pid
*/
static void pid_unsuspend(struct pid_cell *p)
{
int r;
fprintf(stderr, "Un-suspend pid %d (uid %u) [%s]...\n",
p->pid, p->uid, p->cmd);
r = kill(p->pid, SIGCONT);
if (r != 0)
fprintf(stderr, "Cannot un-suspend pid: %s!\n", strerror(errno));
p->start.tv_sec = 0;
p->start.tv_usec = 0;
p->misses = 0;
}
/*
* Adds a pid to suspect list, and suspend it if needed
*/
static void pid_hash_add(const int pid)
{
unsigned int i;
struct pid_cell *p, *q;
struct timeval now;
float misses_per_second;
int diff;
i = pid % PID_HASH_SIZE;
/* First, search for it */
q = pid_hash[i];
while (q) {
if (q->pid == pid)
break;
q = q->next;
}
gettimeofday(&now, NULL);
if (q) {
/* Found an old entry */
q->misses += MISS_TRIGGER;
if (q->start.tv_sec == 0) {
q->start = now;
return;
}
/* Compute the number of misses per second */
diff = time_diff(&now, &q->start);
misses_per_second = q->misses;
misses_per_second /= diff;
misses_per_second *= 1000; /* ms -> s */
/* its time to look-up proc info */
if (q->uid == 0x0FFFFFFF)
lookup_proc_info(q);
if (strstr(q->cmd, "attack2"))
fprintf(stderr, "attack2: [%s] diff=%dms misses=%lu mps=%.3f\n",
q->cmd, diff, q->misses, misses_per_second);
if (misses_per_second > MISSES_SUSPEND_TRIGGER) {
fprintf(stderr, "pid %d [%s] has %.3f/s score (diff=%dms misses=%lu\n",
q->pid, q->cmd, misses_per_second, diff, q->misses);
pid_suspend(&now, q);
return;
}
}
/* Add it to the hash */
p = malloc(sizeof(struct pid_cell));
if (!p)
return;
p->pid = pid;
p->next = NULL;
p->next_in_queue = NULL;
p->start = now;
p->misses = 0;
p->uid = 0x0FFFFFFF;
strcpy(p->cmd, "?");
if (pid_hash[i] == NULL) {
pid_hash[i] = p;
} else {
q = pid_hash[i];
while (q->next)
q = q->next;
q->next = p;
}
}
/*
* Registers a counter
*/
static int register_event(const unsigned int cpu, const __u32 type,
__u64 config, void **m)
{
int fd;
struct perf_event_attr attr;
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.size = sizeof(struct perf_event_attr);
attr.sample_type = PERF_SAMPLE_IP
| PERF_SAMPLE_TID
| PERF_SAMPLE_ADDR
| PERF_SAMPLE_PERIOD
| PERF_SAMPLE_CPU;
attr.sample_period = MISS_TRIGGER;
//attr.enable_on_exec = 1;
attr.disabled = 0;
attr.exclude_kernel = 1;
//attr.exclude_hv = 1;
//attr.mmap = 1;
//attr.mmap2 = 1;
//attr.comm = 1;
attr.wakeup_events = 1;
attr.type = type;
attr.config = config;
fd = perf_event_open(&attr, -1 /* pid */, cpu,
-1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
if (fd == -1) {
fprintf(stderr, "Cannot call perf_event_open, config %llx\n",
attr.config);
exit(1);
}
*m = mmap(NULL, (1 + MMAP_BUF_SIZE) * page_size,
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
fprintf(stderr, "cpu=%d fd=%d *m=%p\n", cpu, fd, *m);
if (*m == MAP_FAILED) {
fprintf(stderr, "Cannot mmap (%s)!\n", strerror(errno));
exit(1);
}
return fd;
}
/*
* Increments q with @inc, taking in consideration the wrap around
*/
static inline void q_inc(void *m, void **q, const unsigned int inc)
{
void *old_q = *q;
int diff;
diff = (*q + inc) - (m + (1 + MMAP_BUF_SIZE) * page_size);
if (diff >= 0)
*q = m + page_size;
else
*q = *q + inc;
#if 0
fprintf(stderr, "\tm=%p *q=%p -> *q=%p inc=%u\n",
m, old_q, *q, inc);
#endif
}
/*
* Checking if any process is on the wrong side
* Returns the ?TODO
*/
static int check(void *m)
{
struct perf_event_mmap_page *mp;
struct perf_event_header *h;
void *p, *q;
unsigned long long *p_ip, *p_addr, *p_period, *p_len, *p_pgoff;
unsigned long long *p_time;
unsigned int *p_pid, *p_ppid, *p_tid, *p_ptid, *p_cpu;
unsigned int version, seq, index;
unsigned long long count, head, tail, rest;
char *filename, *comm;
mp = m;
do {
seq = mp->lock;
barrier();
version = mp->version;
index = mp->index;
count = mp->offset;
head = mp->data_head;
tail = mp->data_tail;
barrier();
} while (mp->lock != seq);
#if 0
fprintf(stderr, "version=%u index=%u count=%llu"
" head=%llu(%llu) tail=%llu(%llu)\n",
version, index, count,
head, head % (MMAP_BUF_SIZE * page_size),
tail, tail % (MMAP_BUF_SIZE * page_size));
#endif
/* Dump rest of buffers */
rest = head - tail;
while (rest > 0) {
p = m + page_size + tail % (MMAP_BUF_SIZE * page_size);
//fprintf(stderr, "\trest=%llu p=%p\n", rest, p);
q = p;
h = q;
#if 0
fprintf(stderr, "\th: type=%u misc=%hu size=%hu\n",
h->type, h->misc, h->size);
#endif
q += sizeof(struct perf_event_header);
switch (h->type) {
case PERF_RECORD_MMAP:
p_pid = q; q_inc(m, &q, 4);
p_tid = q; q_inc(m, &q, 4);
p_addr = q; q_inc(m, &q, 8);
p_len = q; q_inc(m, &q, 8);
p_pgoff = q; q_inc(m, &q, 8);
filename = q;
fprintf(stderr, "\tmmap pid=%u tid=%u addr=%llx"
" len=%llu pgoff=%llx filename=%s.\n",
*p_pid, *p_tid, *p_addr,
*p_len, *p_pgoff, filename);
break;
case PERF_RECORD_SAMPLE:
p_ip = q; q_inc(m, &q, 8);
p_pid = q; q_inc(m, &q, 4);
p_tid = q; q_inc(m, &q, 4);
p_addr = q; q_inc(m, &q, 8);
p_cpu = q; q_inc(m, &q, 8);
p_period = q; q_inc(m, &q, 8);
#if 0
fprintf(stderr, "\tsample ip=0x%llx pid=%u"
" tid=%u addr=%llx period=%llu cpu=%u\n",
*p_ip, *p_pid, *p_tid, *p_addr, *p_period, *p_cpu);
#endif
pid_hash_add(*p_pid);
break;
case PERF_RECORD_EXIT:
p_pid = q; q_inc(m, &q, 4);
p_ppid = q; q_inc(m, &q, 4);
p_tid = q; q_inc(m, &q, 4);
p_ptid = q; q_inc(m, &q, 4);
p_time = q; q_inc(m, &q, 8);
fprintf(stderr, "\texit pid=%u ppid=%u tid=%u"
" ptid=%u time=%llu\n",
*p_pid, *p_ppid, *p_tid, *p_ptid,
*p_time);
break;
case PERF_RECORD_COMM:
p_pid = q; q_inc(m, &q, 4);
p_tid = q; q_inc(m, &q, 4);
comm = q;
fprintf(stderr, "\tcomm pid=%u tid=%u comm=%s\n",
*p_pid, *p_tid, comm);
break;
case PERF_RECORD_MMAP2:
p_pid = q; q_inc(m, &q, 4);
p_tid = q; q_inc(m, &q, 4);
p_addr = q; q_inc(m, &q, 8);
p_len = q; q_inc(m, &q, 8);
p_pgoff = q; q_inc(m, &q, 8);
q_inc(m, &q, 4); /* maj */
q_inc(m, &q, 4); /* min */
q_inc(m, &q, 8); /* ino */
q_inc(m, &q, 8); /* ino_generation */
q_inc(m, &q, 4); /* prot */
q_inc(m, &q, 4); /* flags */
filename = p;
fprintf(stderr, "\tmmap pid=%u tid=%u addr=%llx"
" len=%llu pgoff=%llx filename=%s.\n",
*p_pid, *p_tid, *p_addr,
*p_len, *p_pgoff, filename);
break;
case PERF_RECORD_FORK:
p_pid = q; q_inc(m, &q, 4);
p_ppid = q; q_inc(m, &q, 4);
p_tid = q; q_inc(m, &q, 4);
p_ptid = q; q_inc(m, &q, 4);
p_time = q; q_inc(m, &q, 8);
fprintf(stderr, "\tfork pid=%u ppid=%u tid=%u"
" ptid=%u time=%llu\n",
*p_pid, *p_ppid, *p_tid, *p_ptid,
*p_time);
break;
default:
fprintf(stderr, "\tUnknown type %u\n", h->type);
break;
}
tail += h->size;
rest -= h->size;
}
mp->data_tail = head;
return 0;
}
int main(void)
{
struct sched_param param;
int fd1, fd2[2], nr_cpus, r, efd, i, fd;
void *m1, *m2[2];
struct epoll_event ee;
struct pid_cell *p, *prev;
struct timeval now;
int next_wake, diff;
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
fprintf(stderr, "%d cpu(s) online.\n", nr_cpus);
page_size = sysconf(_SC_PAGE_SIZE);
fprintf(stderr, "page size is %lu\n", page_size);
memset(pid_hash, 0, sizeof(pid_hash));
/* TODO: move to 99? */
param.sched_priority = 1;
if (sched_setscheduler(0, SCHED_FIFO, ¶m)) {
fprintf(stderr, "Cannot set fifo schedule (%s)!", strerror(errno));
return 1;
}
efd = epoll_create1(EPOLL_CLOEXEC);
if (efd == -1) {
fprintf(stderr, "Cannot create epoll fd (%s)!\n",
strerror(errno));
return 1;
}
/*
fd1 = register_event(0, PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_LL
| (PERF_COUNT_HW_CACHE_OP_READ << 8)
| (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), &m1);
*/
for (i = 0; i < 2; i++) {
fd2[i] = register_event(i, PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_LL
| (PERF_COUNT_HW_CACHE_OP_READ << 8)
| (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), &m2[i]);
fprintf(stderr, "fd2[%d]=%d m2[%d]=%p\n",
i, fd2[i], i, m2[i]);
memset(&ee, 0, sizeof(struct epoll_event));
ee.events = EPOLLIN;
ee.data.fd = fd2[i];
r = epoll_ctl(efd, EPOLL_CTL_ADD, fd2[i], &ee);
if (r != 0) {
fprintf(stderr, "Canot add fd to epoll (%s)!\n",
strerror(errno));
return 1;
}
}
//ioctl(fd1, PERF_EVENT_IOC_RESET, 0);
//ioctl(fd2, PERF_EVENT_IOC_RESET, 0);
//ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
//ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
while (1) {
next_wake = -1;
if (queue_head) {
gettimeofday(&now, NULL);
next_wake = time_diff(&queue_head->wake, &now);
}
r = epoll_wait(efd, &ee, 1, next_wake);
if (r == -1) {
if (errno == EINTR)
continue;
fprintf(stderr, "Cannot wait (%s)!\n", strerror(errno));
return 1;
}
/* Process suspend queue */
gettimeofday(&now, NULL);
p = queue_head;
prev = NULL;
while (p) {
diff = time_diff(&p->wake, &now);
fprintf(stderr, "Q: p=%p diff=%ums\n", p, diff);
if (diff <= 0) {
pid_unsuspend(p);
if (prev)
prev->next_in_queue = p->next_in_queue;
else
queue_head = p->next_in_queue;
}
prev = p;
p = p->next_in_queue;
}
queue_tail = prev;
if (r == 0)
continue;
fd = ee.data.fd;
for (i = 0; i < 2; i++) {
if (fd == fd2[i])
break;
}
//fprintf(stderr, "Got an event fd=%d i=%d\n", fd, i);
check(m2[i]);
}
//ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
//ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
munmap(m1, 0); /* TODO: 0 is correct? */
munmap(m2, 0);
close(fd1);
for (i = 0; i < 2; i++)
close(fd2[i]);
close(efd);
return 0;
}