diff options
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/array.c | 5 | ||||
-rw-r--r-- | fs/proc/base.c | 192 | ||||
-rw-r--r-- | fs/proc/generic.c | 13 | ||||
-rw-r--r-- | fs/proc/inode.c | 2 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 357 |
5 files changed, 467 insertions, 102 deletions
diff --git a/fs/proc/array.c b/fs/proc/array.c index 37668fe998ad..d88d518d30f6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer) { struct group_info *group_info; int g; + struct fdtable *fdt = NULL; read_lock(&tasklist_lock); buffer += sprintf(buffer, @@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer) p->gid, p->egid, p->sgid, p->fsgid); read_unlock(&tasklist_lock); task_lock(p); + if (p->files) + fdt = files_fdtable(p->files); buffer += sprintf(buffer, "FDSize:\t%d\n" "Groups:\t", - p->files ? p->files->max_fds : 0); + fdt ? fdt->max_fds : 0); group_info = p->group_info; get_group_info(group_info); diff --git a/fs/proc/base.c b/fs/proc/base.c index 491f2d9f89ac..23db452ab428 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -11,6 +11,40 @@ * go into icache. We cache the reference to task_struct upon lookup too. * Eventually it should become a filesystem in its own. We don't use the * rest of procfs anymore. + * + * + * Changelog: + * 17-Jan-2005 + * Allan Bezerra + * Bruna Moreira <bruna.moreira@indt.org.br> + * Edjard Mota <edjard.mota@indt.org.br> + * Ilias Biris <ilias.biris@indt.org.br> + * Mauricio Lin <mauricio.lin@indt.org.br> + * + * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT + * + * A new process specific entry (smaps) included in /proc. It shows the + * size of rss for each memory area. The maps entry lacks information + * about physical memory size (rss) for each mapped file, i.e., + * rss information for executables and library files. + * This additional information is useful for any tools that need to know + * about physical memory consumption for a process specific library. + * + * Changelog: + * 21-Feb-2005 + * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT + * Pud inclusion in the page table walking. + * + * ChangeLog: + * 10-Mar-2005 + * 10LE Instituto Nokia de Tecnologia - INdT: + * A better way to walks through the page table as suggested by Hugh Dickins. + * + * Simo Piiroinen <simo.piiroinen@nokia.com>: + * Smaps information related to shared, private, clean and dirty pages. + * + * Paul Mundt <paul.mundt@nokia.com>: + * Overall revision about smaps. */ #include <asm/uaccess.h> @@ -28,6 +62,7 @@ #include <linux/namespace.h> #include <linux/mm.h> #include <linux/smp_lock.h> +#include <linux/rcupdate.h> #include <linux/kallsyms.h> #include <linux/mount.h> #include <linux/security.h> @@ -65,8 +100,10 @@ enum pid_directory_inos { PROC_TGID_STAT, PROC_TGID_STATM, PROC_TGID_MAPS, + PROC_TGID_NUMA_MAPS, PROC_TGID_MOUNTS, PROC_TGID_WCHAN, + PROC_TGID_SMAPS, #ifdef CONFIG_SCHEDSTATS PROC_TGID_SCHEDSTAT, #endif @@ -83,7 +120,6 @@ enum pid_directory_inos { #ifdef CONFIG_AUDITSYSCALL PROC_TGID_LOGINUID, #endif - PROC_TGID_FD_DIR, PROC_TGID_OOM_SCORE, PROC_TGID_OOM_ADJUST, PROC_TID_INO, @@ -102,8 +138,10 @@ enum pid_directory_inos { PROC_TID_STAT, PROC_TID_STATM, PROC_TID_MAPS, + PROC_TID_NUMA_MAPS, PROC_TID_MOUNTS, PROC_TID_WCHAN, + PROC_TID_SMAPS, #ifdef CONFIG_SCHEDSTATS PROC_TID_SCHEDSTAT, #endif @@ -120,9 +158,11 @@ enum pid_directory_inos { #ifdef CONFIG_AUDITSYSCALL PROC_TID_LOGINUID, #endif - PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ PROC_TID_OOM_SCORE, PROC_TID_OOM_ADJUST, + + /* Add new entries before this */ + PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ }; struct pid_entry { @@ -144,6 +184,9 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), +#ifdef CONFIG_NUMA + E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), +#endif E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), #ifdef CONFIG_SECCOMP E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), @@ -152,6 +195,7 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), + E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), #ifdef CONFIG_SECURITY E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -180,6 +224,9 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), +#ifdef CONFIG_NUMA + E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), +#endif E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), #ifdef CONFIG_SECCOMP E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), @@ -188,6 +235,7 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), + E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), #ifdef CONFIG_SECURITY E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), #endif @@ -236,30 +284,36 @@ static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsm files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (file) { *mnt = mntget(file->f_vfsmnt); *dentry = dget(file->f_dentry); - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); return 0; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } return -ENOENT; } -static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +static struct fs_struct *get_fs_struct(struct task_struct *task) { struct fs_struct *fs; - int result = -ENOENT; - task_lock(proc_task(inode)); - fs = proc_task(inode)->fs; + task_lock(task); + fs = task->fs; if(fs) atomic_inc(&fs->count); - task_unlock(proc_task(inode)); + task_unlock(task); + return fs; +} + +static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct fs_struct *fs = get_fs_struct(proc_task(inode)); + int result = -ENOENT; if (fs) { read_lock(&fs->lock); *mnt = mntget(fs->pwdmnt); @@ -273,13 +327,8 @@ static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfs static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) { - struct fs_struct *fs; + struct fs_struct *fs = get_fs_struct(proc_task(inode)); int result = -ENOENT; - task_lock(proc_task(inode)); - fs = proc_task(inode)->fs; - if(fs) - atomic_inc(&fs->count); - task_unlock(proc_task(inode)); if (fs) { read_lock(&fs->lock); *mnt = mntget(fs->rootmnt); @@ -298,33 +347,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ security_ptrace(current,task) == 0)) -static int may_ptrace_attach(struct task_struct *task) -{ - int retval = 0; - - task_lock(task); - - if (!task->mm) - goto out; - if (((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) - goto out; - rmb(); - if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE)) - goto out; - if (security_ptrace(current, task)) - goto out; - - retval = 1; -out: - task_unlock(task); - return retval; -} - static int proc_pid_environ(struct task_struct *task, char * buffer) { int res = 0; @@ -334,7 +356,7 @@ static int proc_pid_environ(struct task_struct *task, char * buffer) if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->env_start, buffer, len, 0); - if (!may_ptrace_attach(task)) + if (!ptrace_may_attach(task)) res = -ESRCH; mmput(mm); } @@ -515,6 +537,46 @@ static struct file_operations proc_maps_operations = { .release = seq_release, }; +#ifdef CONFIG_NUMA +extern struct seq_operations proc_pid_numa_maps_op; +static int numa_maps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_numa_maps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_numa_maps_operations = { + .open = numa_maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + +extern struct seq_operations proc_pid_smaps_op; +static int smaps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_smaps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_smaps_operations = { + .open = smaps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + extern struct seq_operations mounts_op; static int mounts_open(struct inode *inode, struct file *file) { @@ -597,7 +659,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, int ret = -ESRCH; struct mm_struct *mm; - if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) goto out; ret = -ENOMEM; @@ -623,7 +685,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { + if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { if (!ret) ret = -EIO; break; @@ -661,7 +723,7 @@ static ssize_t mem_write(struct file * file, const char * buf, struct task_struct *task = proc_task(file->f_dentry->d_inode); unsigned long dst = *ppos; - if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) return -ESRCH; page = (char *)__get_free_page(GFP_USER); @@ -978,6 +1040,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) int retval; char buf[NUMBUF]; struct files_struct * files; + struct fdtable *fdt; retval = -ENOENT; if (!pid_alive(p)) @@ -1000,15 +1063,16 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) files = get_files_struct(p); if (!files) goto out; - spin_lock(&files->file_lock); + rcu_read_lock(); + fdt = files_fdtable(files); for (fd = filp->f_pos-2; - fd < files->max_fds; + fd < fdt->max_fds; fd++, filp->f_pos++) { unsigned int i,j; if (!fcheck_files(files, fd)) continue; - spin_unlock(&files->file_lock); + rcu_read_unlock(); j = NUMBUF; i = fd; @@ -1020,12 +1084,12 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) ino = fake_ino(tid, PROC_TID_FD_DIR + fd); if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { - spin_lock(&files->file_lock); + rcu_read_lock(); break; } - spin_lock(&files->file_lock); + rcu_read_lock(); } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } out: @@ -1200,9 +1264,9 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) files = get_files_struct(task); if (files) { - spin_lock(&files->file_lock); + rcu_read_lock(); if (fcheck_files(files, fd)) { - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { inode->i_uid = task->euid; @@ -1214,7 +1278,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) security_task_to_inode(task, inode); return 1; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); } d_drop(dentry); @@ -1306,7 +1370,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, if (!files) goto out_unlock; inode->i_mode = S_IFLNK; - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (!file) goto out_unlock2; @@ -1314,7 +1378,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, inode->i_mode |= S_IRUSR | S_IXUSR; if (file->f_mode & 2) inode->i_mode |= S_IWUSR | S_IXUSR; - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; @@ -1324,7 +1388,7 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, return NULL; out_unlock2: - spin_unlock(&files->file_lock); + rcu_read_unlock(); put_files_struct(files); out_unlock: iput(inode); @@ -1524,6 +1588,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MAPS: inode->i_fop = &proc_maps_operations; break; +#ifdef CONFIG_NUMA + case PROC_TID_NUMA_MAPS: + case PROC_TGID_NUMA_MAPS: + inode->i_fop = &proc_numa_maps_operations; + break; +#endif case PROC_TID_MEM: case PROC_TGID_MEM: inode->i_op = &proc_mem_inode_operations; @@ -1539,6 +1609,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MOUNTS: inode->i_fop = &proc_mounts_operations; break; + case PROC_TID_SMAPS: + case PROC_TGID_SMAPS: + inode->i_fop = &proc_smaps_operations; + break; #ifdef CONFIG_SECURITY case PROC_TID_ATTR: inode->i_nlink = 2; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index abe8920313fb..8a8c34461d48 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -249,6 +249,18 @@ out: return error; } +static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct proc_dir_entry *de = PROC_I(inode)->pde; + if (de && de->nlink) + inode->i_nlink = de->nlink; + + generic_fillattr(inode, stat); + return 0; +} + static struct inode_operations proc_file_inode_operations = { .setattr = proc_notify_change, }; @@ -475,6 +487,7 @@ static struct file_operations proc_dir_operations = { */ static struct inode_operations proc_dir_inode_operations = { .lookup = proc_lookup, + .getattr = proc_getattr, .setattr = proc_notify_change, }; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 133c28685105..effa6c0c467a 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -60,6 +60,8 @@ static void proc_delete_inode(struct inode *inode) struct proc_dir_entry *de; struct task_struct *tsk; + truncate_inode_pages(&inode->i_data, 0); + /* Let go of any associated process */ tsk = PROC_I(inode)->task; if (tsk) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 28b4a0253a92..c7ef3e48e35b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2,8 +2,13 @@ #include <linux/hugetlb.h> #include <linux/mount.h> #include <linux/seq_file.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> +#include <linux/mempolicy.h> + #include <asm/elf.h> #include <asm/uaccess.h> +#include <asm/tlbflush.h> #include "internal.h" char *task_mem(struct mm_struct *mm, char *buffer) @@ -87,49 +92,58 @@ static void pad_len_spaces(struct seq_file *m, int len) seq_printf(m, "%*c", len, ' '); } -static int show_map(struct seq_file *m, void *v) +struct mem_size_stats +{ + unsigned long resident; + unsigned long shared_clean; + unsigned long shared_dirty; + unsigned long private_clean; + unsigned long private_dirty; +}; + +static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) { struct task_struct *task = m->private; - struct vm_area_struct *map = v; - struct mm_struct *mm = map->vm_mm; - struct file *file = map->vm_file; - int flags = map->vm_flags; + struct vm_area_struct *vma = v; + struct mm_struct *mm = vma->vm_mm; + struct file *file = vma->vm_file; + int flags = vma->vm_flags; unsigned long ino = 0; dev_t dev = 0; int len; if (file) { - struct inode *inode = map->vm_file->f_dentry->d_inode; + struct inode *inode = vma->vm_file->f_dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; } seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", - map->vm_start, - map->vm_end, + vma->vm_start, + vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', - map->vm_pgoff << PAGE_SHIFT, + vma->vm_pgoff << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ - if (map->vm_file) { + if (file) { pad_len_spaces(m, len); - seq_path(m, file->f_vfsmnt, file->f_dentry, ""); + seq_path(m, file->f_vfsmnt, file->f_dentry, "\n"); } else { if (mm) { - if (map->vm_start <= mm->start_brk && - map->vm_end >= mm->brk) { + if (vma->vm_start <= mm->start_brk && + vma->vm_end >= mm->brk) { pad_len_spaces(m, len); seq_puts(m, "[heap]"); } else { - if (map->vm_start <= mm->start_stack && - map->vm_end >= mm->start_stack) { + if (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack) { pad_len_spaces(m, len); seq_puts(m, "[stack]"); @@ -141,24 +155,146 @@ static int show_map(struct seq_file *m, void *v) } } seq_putc(m, '\n'); - if (m->count < m->size) /* map is copied successfully */ - m->version = (map != get_gate_vma(task))? map->vm_start: 0; + + if (mss) + seq_printf(m, + "Size: %8lu kB\n" + "Rss: %8lu kB\n" + "Shared_Clean: %8lu kB\n" + "Shared_Dirty: %8lu kB\n" + "Private_Clean: %8lu kB\n" + "Private_Dirty: %8lu kB\n", + (vma->vm_end - vma->vm_start) >> 10, + mss->resident >> 10, + mss->shared_clean >> 10, + mss->shared_dirty >> 10, + mss->private_clean >> 10, + mss->private_dirty >> 10); + + if (m->count < m->size) /* vma is copied successfully */ + m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; return 0; } +static int show_map(struct seq_file *m, void *v) +{ + return show_map_internal(m, v, 0); +} + +static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, + unsigned long addr, unsigned long end, + struct mem_size_stats *mss) +{ + pte_t *pte, ptent; + unsigned long pfn; + struct page *page; + + pte = pte_offset_map(pmd, addr); + do { + ptent = *pte; + if (pte_none(ptent) || !pte_present(ptent)) + continue; + + mss->resident += PAGE_SIZE; + pfn = pte_pfn(ptent); + if (!pfn_valid(pfn)) + continue; + + page = pfn_to_page(pfn); + if (page_count(page) >= 2) { + if (pte_dirty(ptent)) + mss->shared_dirty += PAGE_SIZE; + else + mss->shared_clean += PAGE_SIZE; + } else { + if (pte_dirty(ptent)) + mss->private_dirty += PAGE_SIZE; + else + mss->private_clean += PAGE_SIZE; + } + } while (pte++, addr += PAGE_SIZE, addr != end); + pte_unmap(pte - 1); + cond_resched_lock(&vma->vm_mm->page_table_lock); +} + +static inline void smaps_pmd_range(struct vm_area_struct *vma, pud_t *pud, + unsigned long addr, unsigned long end, + struct mem_size_stats *mss) +{ + pmd_t *pmd; + unsigned long next; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + smaps_pte_range(vma, pmd, addr, next, mss); + } while (pmd++, addr = next, addr != end); +} + +static inline void smaps_pud_range(struct vm_area_struct *vma, pgd_t *pgd, + unsigned long addr, unsigned long end, + struct mem_size_stats *mss) +{ + pud_t *pud; + unsigned long next; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + smaps_pmd_range(vma, pud, addr, next, mss); + } while (pud++, addr = next, addr != end); +} + +static inline void smaps_pgd_range(struct vm_area_struct *vma, + unsigned long addr, unsigned long end, + struct mem_size_stats *mss) +{ + pgd_t *pgd; + unsigned long next; + + pgd = pgd_offset(vma->vm_mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + smaps_pud_range(vma, pgd, addr, next, mss); + } while (pgd++, addr = next, addr != end); +} + +static int show_smap(struct seq_file *m, void *v) +{ + struct vm_area_struct *vma = v; + struct mm_struct *mm = vma->vm_mm; + struct mem_size_stats mss; + + memset(&mss, 0, sizeof mss); + + if (mm) { + spin_lock(&mm->page_table_lock); + smaps_pgd_range(vma, vma->vm_start, vma->vm_end, &mss); + spin_unlock(&mm->page_table_lock); + } + + return show_map_internal(m, v, &mss); +} + static void *m_start(struct seq_file *m, loff_t *pos) { struct task_struct *task = m->private; unsigned long last_addr = m->version; struct mm_struct *mm; - struct vm_area_struct *map, *tail_map; + struct vm_area_struct *vma, *tail_vma; loff_t l = *pos; /* * We remember last_addr rather than next_addr to hit with * mmap_cache most of the time. We have zero last_addr at - * the begining and also after lseek. We will have -1 last_addr - * after the end of the maps. + * the beginning and also after lseek. We will have -1 last_addr + * after the end of the vmas. */ if (last_addr == -1UL) @@ -168,47 +304,47 @@ static void *m_start(struct seq_file *m, loff_t *pos) if (!mm) return NULL; - tail_map = get_gate_vma(task); + tail_vma = get_gate_vma(task); down_read(&mm->mmap_sem); /* Start with last addr hint */ - if (last_addr && (map = find_vma(mm, last_addr))) { - map = map->vm_next; + if (last_addr && (vma = find_vma(mm, last_addr))) { + vma = vma->vm_next; goto out; } /* - * Check the map index is within the range and do + * Check the vma index is within the range and do * sequential scan until m_index. */ - map = NULL; + vma = NULL; if ((unsigned long)l < mm->map_count) { - map = mm->mmap; - while (l-- && map) - map = map->vm_next; + vma = mm->mmap; + while (l-- && vma) + vma = vma->vm_next; goto out; } if (l != mm->map_count) - tail_map = NULL; /* After gate map */ + tail_vma = NULL; /* After gate vma */ out: - if (map) - return map; + if (vma) + return vma; - /* End of maps has reached */ - m->version = (tail_map != NULL)? 0: -1UL; + /* End of vmas has been reached */ + m->version = (tail_vma != NULL)? 0: -1UL; up_read(&mm->mmap_sem); mmput(mm); - return tail_map; + return tail_vma; } static void m_stop(struct seq_file *m, void *v) { struct task_struct *task = m->private; - struct vm_area_struct *map = v; - if (map && map != get_gate_vma(task)) { - struct mm_struct *mm = map->vm_mm; + struct vm_area_struct *vma = v; + if (vma && vma != get_gate_vma(task)) { + struct mm_struct *mm = vma->vm_mm; up_read(&mm->mmap_sem); mmput(mm); } @@ -217,14 +353,14 @@ static void m_stop(struct seq_file *m, void *v) static void *m_next(struct seq_file *m, void *v, loff_t *pos) { struct task_struct *task = m->private; - struct vm_area_struct *map = v; - struct vm_area_struct *tail_map = get_gate_vma(task); + struct vm_area_struct *vma = v; + struct vm_area_struct *tail_vma = get_gate_vma(task); (*pos)++; - if (map && (map != tail_map) && map->vm_next) - return map->vm_next; + if (vma && (vma != tail_vma) && vma->vm_next) + return vma->vm_next; m_stop(m, v); - return (map != tail_map)? tail_map: NULL; + return (vma != tail_vma)? tail_vma: NULL; } struct seq_operations proc_pid_maps_op = { @@ -233,3 +369,140 @@ struct seq_operations proc_pid_maps_op = { .stop = m_stop, .show = show_map }; + +struct seq_operations proc_pid_smaps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_smap +}; + +#ifdef CONFIG_NUMA + +struct numa_maps { + unsigned long pages; + unsigned long anon; + unsigned long mapped; + unsigned long mapcount_max; + unsigned long node[MAX_NUMNODES]; +}; + +/* + * Calculate numa node maps for a vma + */ +static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma) +{ + struct page *page; + unsigned long vaddr; + struct mm_struct *mm = vma->vm_mm; + int i; + struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL); + + if (!md) + return NULL; + md->pages = 0; + md->anon = 0; + md->mapped = 0; + md->mapcount_max = 0; + for_each_node(i) + md->node[i] =0; + + spin_lock(&mm->page_table_lock); + for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { + page = follow_page(mm, vaddr, 0); + if (page) { + int count = page_mapcount(page); + + if (count) + md->mapped++; + if (count > md->mapcount_max) + md->mapcount_max = count; + md->pages++; + if (PageAnon(page)) + md->anon++; + md->node[page_to_nid(page)]++; + } + } + spin_unlock(&mm->page_table_lock); + return md; +} + +static int show_numa_map(struct seq_file *m, void *v) +{ + struct task_struct *task = m->private; + struct vm_area_struct *vma = v; + struct mempolicy *pol; + struct numa_maps *md; + struct zone **z; + int n; + int first; + + if (!vma->vm_mm) + return 0; + + md = get_numa_maps(vma); + if (!md) + return 0; + + seq_printf(m, "%08lx", vma->vm_start); + pol = get_vma_policy(task, vma, vma->vm_start); + /* Print policy */ + switch (pol->policy) { + case MPOL_PREFERRED: + seq_printf(m, " prefer=%d", pol->v.preferred_node); + break; + case MPOL_BIND: + seq_printf(m, " bind={"); + first = 1; + for (z = pol->v.zonelist->zones; *z; z++) { + + if (!first) + seq_putc(m, ','); + else + first = 0; + seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id, + (*z)->name); + } + seq_putc(m, '}'); + break; + case MPOL_INTERLEAVE: + seq_printf(m, " interleave={"); + first = 1; + for_each_node(n) { + if (test_bit(n, pol->v.nodes)) { + if (!first) + seq_putc(m,','); + else + first = 0; + seq_printf(m, "%d",n); + } + } + seq_putc(m, '}'); + break; + default: + seq_printf(m," default"); + break; + } + seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu", + md->mapcount_max, md->pages, md->mapped); + if (md->anon) + seq_printf(m," Anon=%lu",md->anon); + + for_each_online_node(n) { + if (md->node[n]) + seq_printf(m, " N%d=%lu", n, md->node[n]); + } + seq_putc(m, '\n'); + kfree(md); + if (m->count < m->size) /* vma is copied successfully */ + m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + return 0; +} + +struct seq_operations proc_pid_numa_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_numa_map +}; +#endif |