summaryrefslogtreecommitdiff
path: root/module/sysprof-module.c
diff options
context:
space:
mode:
Diffstat (limited to 'module/sysprof-module.c')
-rw-r--r--module/sysprof-module.c431
1 files changed, 431 insertions, 0 deletions
diff --git a/module/sysprof-module.c b/module/sysprof-module.c
new file mode 100644
index 0000000..c69d702
--- /dev/null
+++ b/module/sysprof-module.c
@@ -0,0 +1,431 @@
+/* -*- c-basic-offset: 8 -*- */
+
+/* Sysprof -- Sampling, systemwide CPU profiler
+ * Copyright 2004, Red Hat, Inc.
+ * Copyright 2004, 2005, Soeren Sandmann
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <linux/config.h>
+#ifdef CONFIG_SMP
+# define __SMP__
+#endif
+#include <asm/atomic.h>
+#include <linux/kernel.h> /* Needed for KERN_ALERT */
+#include <linux/module.h> /* Needed by all modules */
+#include <linux/sched.h>
+
+#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+
+#include "sysprof-module.h"
+
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Soeren Sandmann (sandmann@daimi.au.dk)");
+
+#define SAMPLES_PER_SECOND (100)
+#define INTERVAL (HZ / SAMPLES_PER_SECOND)
+#define N_TRACES 256
+
+static SysprofStackTrace stack_traces[N_TRACES];
+static SysprofStackTrace * head = &stack_traces[0];
+static SysprofStackTrace * tail = &stack_traces[0];
+DECLARE_WAIT_QUEUE_HEAD (wait_for_trace);
+DECLARE_WAIT_QUEUE_HEAD (wait_for_exit);
+
+static int exiting;
+
+static void on_timer(unsigned long);
+static struct timer_list timer;
+
+typedef struct userspace_reader userspace_reader;
+struct userspace_reader
+{
+ struct task_struct *task;
+ unsigned long cache_address;
+ unsigned long *cache;
+};
+
+/* This function was mostly cutted and pasted from ptrace.c
+ */
+
+/* Access another process' address space.
+ * Source/target buffer must be kernel space,
+ * Do not walk the page table directly, use get_user_pages
+ */
+
+static struct mm_struct *
+get_mm (struct task_struct *tsk)
+{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)
+ struct mm_struct *mm;
+
+ task_lock (tsk);
+ mm = tsk->mm;
+ task_unlock (tsk);
+
+ return mm;
+#else
+ return get_task_mm (tsk);
+#endif
+}
+
+static void
+put_mm (struct mm_struct *mm)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
+ mmput(mm);
+#endif
+}
+
+static int
+x_access_process_vm (struct task_struct *tsk,
+ unsigned long addr,
+ void *buf, int len, int write)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ struct page *page;
+ void *old_buf = buf;
+
+ mm = get_mm (tsk);
+
+ if (!mm)
+ return 0;
+
+ down_read(&mm->mmap_sem);
+ /* ignore errors, just check how much was sucessfully transfered */
+ while (len)
+ {
+ int bytes, ret, offset;
+ void *maddr;
+
+ ret = get_user_pages(tsk, mm, addr, 1,
+ write, 1, &page, &vma);
+ if (ret <= 0) {
+ break;
+ }
+ bytes = len;
+ offset = addr & (PAGE_SIZE-1);
+ if (bytes > PAGE_SIZE-offset)
+ bytes = PAGE_SIZE-offset;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
+
+ flush_cache_page(vma, addr);
+
+#endif
+
+ maddr = kmap(page);
+ if (write) {
+ copy_to_user_page(vma, page, addr,
+ maddr + offset, buf, bytes);
+ set_page_dirty_lock(page);
+ } else {
+ copy_from_user_page(vma, page, addr,
+ buf, maddr + offset, bytes);
+ }
+ kunmap(page);
+ page_cache_release(page);
+ len -= bytes;
+ buf += bytes;
+ addr += bytes;
+ }
+ up_read(&mm->mmap_sem);
+
+ put_mm (mm);
+
+ return buf - old_buf;
+}
+
+static int
+init_userspace_reader (userspace_reader *reader,
+ struct task_struct *task)
+{
+ reader->task = task;
+ reader->cache = kmalloc (PAGE_SIZE, GFP_KERNEL);
+ if (!reader->cache)
+ return 0;
+ reader->cache_address = 0x0;
+ return 1;
+}
+
+static int
+page_readable (userspace_reader *reader, unsigned long address)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION (2,6,11)
+ struct mm_struct *mm;
+ int result = 1;
+
+ mm = get_mm (reader->task);
+
+ if (!mm)
+ return 0;
+
+ if (!check_user_page_readable (reader->task->mm, address))
+ result = 0;
+
+ put_mm (mm);
+
+ return result;
+#endif
+
+ return 1;
+}
+
+static int
+read_user_space (userspace_reader *reader,
+ unsigned long address,
+ unsigned long *result)
+{
+ unsigned long cache_address = reader->cache_address;
+ int index, r;
+
+ if (!cache_address || cache_address != (address & PAGE_MASK))
+ {
+ if (!page_readable (reader, address))
+ return 0;
+
+ cache_address = address & PAGE_MASK;
+
+ r = x_access_process_vm (reader->task, cache_address,
+ reader->cache, PAGE_SIZE, 0);
+
+ if (r != PAGE_SIZE) {
+ return 0;
+ }
+
+ reader->cache_address = cache_address;
+ }
+
+ index = (address - cache_address) / sizeof (unsigned long);
+
+ *result = reader->cache[index];
+ return 1;
+}
+
+static void
+done_userspace_reader (userspace_reader *reader)
+{
+ kfree (reader->cache);
+}
+
+typedef struct StackFrame StackFrame;
+struct StackFrame {
+ unsigned long next;
+ unsigned long return_address;
+};
+
+static int
+read_frame (userspace_reader *reader, unsigned long addr, StackFrame *frame)
+{
+ if (!addr || !frame)
+ return 0;
+
+ frame->next = 0;
+ frame->return_address = 0;
+
+ if (!read_user_space (reader, addr, &(frame->next)))
+ return 0;
+
+ if (!read_user_space (reader, addr + 4, &(frame->return_address)))
+ return 0;
+
+ return 1;
+}
+
+static struct pt_regs *
+get_regs (struct task_struct *task)
+{
+ /* This seems to work in Linux 2.6.11 */
+ void *stack_top = (void *)task->thread.esp0;
+ return stack_top - sizeof(struct pt_regs);
+
+#if 0
+ /* This used to work, but doesn't anymore */
+ return ((struct pt_regs *) (THREAD_SIZE + (unsigned long) task->thread_info)) - 1;
+#endif
+
+#if 0
+ /* One might suspect that this would work, but it doesn't
+ */
+ return task_pt_regs (task);
+#endif
+}
+
+static void
+generate_stack_trace(struct task_struct *task,
+ SysprofStackTrace *trace)
+{
+#ifdef CONFIG_HIGHMEM
+# define START_OF_STACK 0xFF000000
+#else
+# define START_OF_STACK 0xBFFFFFFF
+#endif
+ struct pt_regs *regs = get_regs (task); // task->thread.regs;
+
+ StackFrame frame;
+ unsigned long addr;
+ userspace_reader reader;
+ int i;
+
+ memset(trace, 0, sizeof (SysprofStackTrace));
+
+ trace->pid = task->pid;
+ trace->truncated = 0;
+
+ trace->addresses[0] = (void *)regs->eip;
+
+ i = 1;
+
+ addr = regs->ebp;
+
+#if 0
+ printk (KERN_ALERT "ebp: %p\n", regs->ebp);
+#endif
+
+ if (init_userspace_reader (&reader, task))
+ {
+ while (i < SYSPROF_MAX_ADDRESSES &&
+ read_frame (&reader, addr, &frame) &&
+ addr < START_OF_STACK && addr >= regs->esp)
+ {
+ trace->addresses[i++] = (void *)frame.return_address;
+ addr = frame.next;
+ }
+
+ done_userspace_reader (&reader);
+ }
+
+ trace->n_addresses = i;
+ if (i == SYSPROF_MAX_ADDRESSES)
+ trace->truncated = 1;
+ else
+ trace->truncated = 0;
+}
+
+DECLARE_WAIT_QUEUE_HEAD (wait_to_be_scanned);
+
+static void
+do_generate (void *data)
+{
+ struct task_struct *task = data;
+
+ generate_stack_trace(task, head);
+
+ if (head++ == &stack_traces[N_TRACES - 1])
+ head = &stack_traces[0];
+
+ wake_up (&wait_for_trace);
+
+ /* If a task dies between the time we see it in on_timer and
+ * the time we get here, it will be leaked. If __put_task_struct9)
+ * was exported, then we could do this properly
+ */
+ atomic_dec (&(task)->usage);
+
+ mod_timer(&timer, jiffies + INTERVAL);
+}
+
+struct work_struct work;
+
+static void
+on_timer(unsigned long dong)
+{
+ if (current && current->state == TASK_RUNNING && current->pid != 0)
+ {
+ get_task_struct (current);
+
+ INIT_WORK (&work, do_generate, current);
+
+ schedule_work (&work);
+ }
+ else
+ {
+ mod_timer(&timer, jiffies + INTERVAL);
+ }
+}
+
+static int
+procfile_read(char *buffer,
+ char **buffer_location,
+ off_t offset,
+ int buffer_len,
+ int *eof,
+ void *data)
+{
+ if (head == tail)
+ return -EWOULDBLOCK;
+ wait_event_interruptible (wait_for_trace, head != tail);
+ *buffer_location = (char *)tail;
+ if (tail++ == &stack_traces[N_TRACES - 1])
+ tail = &stack_traces[0];
+ return sizeof (SysprofStackTrace);
+}
+
+struct proc_dir_entry *trace_proc_file;
+static unsigned int
+procfile_poll(struct file *filp, poll_table *poll_table)
+{
+ if (head != tail) {
+ return POLLIN | POLLRDNORM;
+ }
+ poll_wait(filp, &wait_for_trace, poll_table);
+ if (head != tail) {
+ return POLLIN | POLLRDNORM;
+ }
+ return 0;
+}
+
+int
+init_module(void)
+{
+ trace_proc_file =
+ create_proc_entry ("sysprof-trace", S_IFREG | S_IRUGO, &proc_root);
+
+ if (!trace_proc_file)
+ return 1;
+
+ trace_proc_file->read_proc = procfile_read;
+ trace_proc_file->proc_fops->poll = procfile_poll;
+ trace_proc_file->size = sizeof (SysprofStackTrace);
+
+ init_timer(&timer);
+ timer.function = on_timer;
+ mod_timer(&timer, jiffies + INTERVAL);
+
+ printk(KERN_ALERT "starting sysprof module\n");
+
+ return 0;
+}
+
+void
+cleanup_module(void)
+{
+ exiting = 1;
+
+ del_timer (&timer);
+
+ remove_proc_entry("sysprof-trace", &proc_root);
+
+ printk(KERN_ALERT "stopping sysprof module\n");
+}