diff options
Diffstat (limited to 'kernel')
57 files changed, 284 insertions, 181 deletions
diff --git a/kernel/async.c b/kernel/async.c index 27235f5de198..15319d6c18fe 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -56,6 +56,7 @@ asynchronous and synchronous parts of the kernel. #include <linux/init.h> #include <linux/kthread.h> #include <linux/delay.h> +#include <linux/slab.h> #include <asm/atomic.h> static async_cookie_t next_cookie = 1; diff --git a/kernel/audit.c b/kernel/audit.c index 78f7f86aa238..c71bd26631a2 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -46,6 +46,7 @@ #include <asm/atomic.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/err.h> #include <linux/kthread.h> diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 028e85663f27..46a57b57a335 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -3,6 +3,7 @@ #include <linux/namei.h> #include <linux/mount.h> #include <linux/kthread.h> +#include <linux/slab.h> struct audit_tree; struct audit_chunk; diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index cc7e87936cbc..8df43696f4ba 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -27,6 +27,7 @@ #include <linux/namei.h> #include <linux/netlink.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/inotify.h> #include <linux/security.h> #include "audit.h" diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a70604047f3c..ce08041f578d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -27,6 +27,7 @@ #include <linux/namei.h> #include <linux/netlink.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/security.h> #include "audit.h" diff --git a/kernel/auditsc.c b/kernel/auditsc.c index f3a461c0970a..3828ad5fb8f1 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -49,6 +49,7 @@ #include <linux/namei.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/mount.h> #include <linux/socket.h> #include <linux/mqueue.h> @@ -1893,7 +1894,7 @@ static int audit_inc_name_count(struct audit_context *context, { if (context->name_count >= AUDIT_NAMES) { if (inode) - printk(KERN_DEBUG "name_count maxed, losing inode data: " + printk(KERN_DEBUG "audit: name_count maxed, losing inode data: " "dev=%02x:%02x, inode=%lu\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 59e9ef6aab40..da5e13975531 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -15,6 +15,7 @@ */ #include <linux/module.h> +#include <linux/slab.h> #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/uaccess.h> @@ -47,17 +48,20 @@ static inline struct freezer *task_freezer(struct task_struct *task) struct freezer, css); } -int cgroup_frozen(struct task_struct *task) +int cgroup_freezing_or_frozen(struct task_struct *task) { struct freezer *freezer; enum freezer_state state; task_lock(task); freezer = task_freezer(task); - state = freezer->state; + if (!freezer->css.cgroup->parent) + state = CGROUP_THAWED; /* root cgroup can't be frozen */ + else + state = freezer->state; task_unlock(task); - return state == CGROUP_FROZEN; + return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); } /* diff --git a/kernel/compat.c b/kernel/compat.c index f6c204f07ea6..7f40e9275fd9 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -25,6 +25,7 @@ #include <linux/posix-timers.h> #include <linux/times.h> #include <linux/ptrace.h> +#include <linux/gfp.h> #include <asm/uaccess.h> diff --git a/kernel/cpu.c b/kernel/cpu.c index f8cced2692b3..25bba73b1be3 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -14,6 +14,7 @@ #include <linux/kthread.h> #include <linux/stop_machine.h> #include <linux/mutex.h> +#include <linux/gfp.h> #ifdef CONFIG_SMP /* Serializes the updates to cpu_online_mask, cpu_present_mask */ diff --git a/kernel/cred.c b/kernel/cred.c index 1b1129d0cce8..e1dbe9eef800 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -10,6 +10,7 @@ */ #include <linux/module.h> #include <linux/cred.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/key.h> #include <linux/keyctl.h> diff --git a/kernel/exit.c b/kernel/exit.c index cce59cb5ee6a..7f2683a10ac4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -953,7 +953,8 @@ NORET_TYPE void do_exit(long code) acct_update_integrals(tsk); /* sync mm's RSS info before statistics gathering */ - sync_mm_rss(tsk, tsk->mm); + if (tsk->mm) + sync_mm_rss(tsk, tsk->mm); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); diff --git a/kernel/fork.c b/kernel/fork.c index d67f1dbfbe03..5d3592deaf71 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1052,6 +1052,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; #endif +#if defined(SPLIT_RSS_COUNTING) + memset(&p->rss_stat, 0, sizeof(p->rss_stat)); +#endif p->default_timer_slack_ns = current->timer_slack_ns; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 398fda155f6e..704e488730a5 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -757,6 +757,16 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (new->flags & IRQF_ONESHOT) desc->status |= IRQ_ONESHOT; + /* + * Force MSI interrupts to run with interrupts + * disabled. The multi vector cards can cause stack + * overflows due to nested interrupts when enough of + * them are directed to a core and fire at the same + * time. + */ + if (desc->msi_desc) + new->flags |= IRQF_DISABLED; + if (!(desc->status & IRQ_NOAUTOEN)) { desc->depth = 0; desc->status &= ~IRQ_DISABLED; diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 963559dbd858..65d3845665ac 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -6,6 +6,7 @@ */ #include <linux/irq.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 6f50eccc79c0..7a6eb04ef6b5 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -7,6 +7,7 @@ */ #include <linux/irq.h> +#include <linux/gfp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/interrupt.h> diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 8e5288a8a355..13aff293f4de 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -21,6 +21,7 @@ #include <linux/sched.h> /* for cond_resched */ #include <linux/mm.h> #include <linux/ctype.h> +#include <linux/slab.h> #include <asm/sections.h> diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 761fdd2b3034..11f3515ca83f 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -69,9 +69,16 @@ struct kgdb_state { struct pt_regs *linux_regs; }; +/* Exception state values */ +#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */ +#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */ +#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */ +#define DCPU_SSTEP 0x8 /* CPU is single stepping */ + static struct debuggerinfo_struct { void *debuggerinfo; struct task_struct *task; + int exception_state; } kgdb_info[NR_CPUS]; /** @@ -391,27 +398,22 @@ int kgdb_mem2hex(char *mem, char *buf, int count) /* * Copy the binary array pointed to by buf into mem. Fix $, #, and - * 0x7d escaped with 0x7d. Return a pointer to the character after - * the last byte written. + * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success. + * The input buf is overwitten with the result to write to mem. */ static int kgdb_ebin2mem(char *buf, char *mem, int count) { - int err = 0; - char c; + int size = 0; + char *c = buf; while (count-- > 0) { - c = *buf++; - if (c == 0x7d) - c = *buf++ ^ 0x20; - - err = probe_kernel_write(mem, &c, 1); - if (err) - break; - - mem++; + c[size] = *buf++; + if (c[size] == 0x7d) + c[size] = *buf++ ^ 0x20; + size++; } - return err; + return probe_kernel_write(mem, c, size); } /* @@ -563,49 +565,6 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid) } /* - * CPU debug state control: - */ - -#ifdef CONFIG_SMP -static void kgdb_wait(struct pt_regs *regs) -{ - unsigned long flags; - int cpu; - - local_irq_save(flags); - cpu = raw_smp_processor_id(); - kgdb_info[cpu].debuggerinfo = regs; - kgdb_info[cpu].task = current; - /* - * Make sure the above info reaches the primary CPU before - * our cpu_in_kgdb[] flag setting does: - */ - smp_wmb(); - atomic_set(&cpu_in_kgdb[cpu], 1); - - /* Disable any cpu specific hw breakpoints */ - kgdb_disable_hw_debug(regs); - - /* Wait till primary CPU is done with debugging */ - while (atomic_read(&passive_cpu_wait[cpu])) - cpu_relax(); - - kgdb_info[cpu].debuggerinfo = NULL; - kgdb_info[cpu].task = NULL; - - /* fix up hardware debug registers on local cpu */ - if (arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - - /* Signal the primary CPU that we are done: */ - atomic_set(&cpu_in_kgdb[cpu], 0); - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); - local_irq_restore(flags); -} -#endif - -/* * Some architectures need cache flushes when we set/clear a * breakpoint: */ @@ -1400,34 +1359,13 @@ static int kgdb_reenter_check(struct kgdb_state *ks) return 1; } -/* - * kgdb_handle_exception() - main entry point from a kernel exception - * - * Locking hierarchy: - * interface locks, if any (begin_session) - * kgdb lock (kgdb_active) - */ -int -kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) +static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) { - struct kgdb_state kgdb_var; - struct kgdb_state *ks = &kgdb_var; unsigned long flags; int sstep_tries = 100; int error = 0; int i, cpu; - - ks->cpu = raw_smp_processor_id(); - ks->ex_vector = evector; - ks->signo = signo; - ks->ex_vector = evector; - ks->err_code = ecode; - ks->kgdb_usethreadid = 0; - ks->linux_regs = regs; - - if (kgdb_reenter_check(ks)) - return 0; /* Ouch, double exception ! */ - + int trace_on = 0; acquirelock: /* * Interrupts will be restored by the 'trap return' code, except when @@ -1435,13 +1373,43 @@ acquirelock: */ local_irq_save(flags); - cpu = raw_smp_processor_id(); + cpu = ks->cpu; + kgdb_info[cpu].debuggerinfo = regs; + kgdb_info[cpu].task = current; + /* + * Make sure the above info reaches the primary CPU before + * our cpu_in_kgdb[] flag setting does: + */ + atomic_inc(&cpu_in_kgdb[cpu]); /* - * Acquire the kgdb_active lock: + * CPU will loop if it is a slave or request to become a kgdb + * master cpu and acquire the kgdb_active lock: */ - while (atomic_cmpxchg(&kgdb_active, -1, cpu) != -1) + while (1) { + if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { + if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu) + break; + } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { + if (!atomic_read(&passive_cpu_wait[cpu])) + goto return_normal; + } else { +return_normal: + /* Return to normal operation by executing any + * hw breakpoint fixup. + */ + if (arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); + if (trace_on) + tracing_on(); + atomic_dec(&cpu_in_kgdb[cpu]); + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); + local_irq_restore(flags); + return 0; + } cpu_relax(); + } /* * For single stepping, try to only enter on the processor @@ -1475,9 +1443,6 @@ acquirelock: if (kgdb_io_ops->pre_exception) kgdb_io_ops->pre_exception(); - kgdb_info[ks->cpu].debuggerinfo = ks->linux_regs; - kgdb_info[ks->cpu].task = current; - kgdb_disable_hw_debug(ks->linux_regs); /* @@ -1486,15 +1451,9 @@ acquirelock: */ if (!kgdb_single_step) { for (i = 0; i < NR_CPUS; i++) - atomic_set(&passive_cpu_wait[i], 1); + atomic_inc(&passive_cpu_wait[i]); } - /* - * spin_lock code is good enough as a barrier so we don't - * need one here: - */ - atomic_set(&cpu_in_kgdb[ks->cpu], 1); - #ifdef CONFIG_SMP /* Signal the other CPUs to enter kgdb_wait() */ if ((!kgdb_single_step) && kgdb_do_roundup) @@ -1518,6 +1477,9 @@ acquirelock: kgdb_single_step = 0; kgdb_contthread = current; exception_level = 0; + trace_on = tracing_is_on(); + if (trace_on) + tracing_off(); /* Talk to debugger with gdbserial protocol */ error = gdb_serial_stub(ks); @@ -1526,13 +1488,11 @@ acquirelock: if (kgdb_io_ops->post_exception) kgdb_io_ops->post_exception(); - kgdb_info[ks->cpu].debuggerinfo = NULL; - kgdb_info[ks->cpu].task = NULL; - atomic_set(&cpu_in_kgdb[ks->cpu], 0); + atomic_dec(&cpu_in_kgdb[ks->cpu]); if (!kgdb_single_step) { for (i = NR_CPUS-1; i >= 0; i--) - atomic_set(&passive_cpu_wait[i], 0); + atomic_dec(&passive_cpu_wait[i]); /* * Wait till all the CPUs have quit * from the debugger. @@ -1551,6 +1511,8 @@ kgdb_restore: else kgdb_sstep_pid = 0; } + if (trace_on) + tracing_on(); /* Free kgdb_active */ atomic_set(&kgdb_active, -1); touch_softlockup_watchdog_sync(); @@ -1560,13 +1522,52 @@ kgdb_restore: return error; } +/* + * kgdb_handle_exception() - main entry point from a kernel exception + * + * Locking hierarchy: + * interface locks, if any (begin_session) + * kgdb lock (kgdb_active) + */ +int +kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) +{ + struct kgdb_state kgdb_var; + struct kgdb_state *ks = &kgdb_var; + int ret; + + ks->cpu = raw_smp_processor_id(); + ks->ex_vector = evector; + ks->signo = signo; + ks->ex_vector = evector; + ks->err_code = ecode; + ks->kgdb_usethreadid = 0; + ks->linux_regs = regs; + + if (kgdb_reenter_check(ks)) + return 0; /* Ouch, double exception ! */ + kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER; + ret = kgdb_cpu_enter(ks, regs); + kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER; + return ret; +} + int kgdb_nmicallback(int cpu, void *regs) { #ifdef CONFIG_SMP + struct kgdb_state kgdb_var; + struct kgdb_state *ks = &kgdb_var; + + memset(ks, 0, sizeof(struct kgdb_state)); + ks->cpu = cpu; + ks->linux_regs = regs; + if (!atomic_read(&cpu_in_kgdb[cpu]) && - atomic_read(&kgdb_active) != cpu && - atomic_read(&cpu_in_kgdb[atomic_read(&kgdb_active)])) { - kgdb_wait((struct pt_regs *)regs); + atomic_read(&kgdb_active) != -1 && + atomic_read(&kgdb_active) != cpu) { + kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; + kgdb_cpu_enter(ks, regs); + kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE; return 0; } #endif @@ -1742,11 +1743,11 @@ EXPORT_SYMBOL_GPL(kgdb_unregister_io_module); */ void kgdb_breakpoint(void) { - atomic_set(&kgdb_setting_breakpoint, 1); + atomic_inc(&kgdb_setting_breakpoint); wmb(); /* Sync point before breakpoint */ arch_kgdb_breakpoint(); wmb(); /* Sync point after breakpoint */ - atomic_set(&kgdb_setting_breakpoint, 0); + atomic_dec(&kgdb_setting_breakpoint); } EXPORT_SYMBOL_GPL(kgdb_breakpoint); diff --git a/kernel/latencytop.c b/kernel/latencytop.c index ca07c5c0c914..877fb306d415 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -56,7 +56,6 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/list.h> -#include <linux/slab.h> #include <linux/stacktrace.h> static DEFINE_SPINLOCK(latency_lock); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c927a549db2c..2594e1ce41cb 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -43,6 +43,7 @@ #include <linux/ftrace.h> #include <linux/stringify.h> #include <linux/bitops.h> +#include <linux/gfp.h> #include <asm/sections.h> @@ -582,9 +583,6 @@ static int static_obj(void *obj) unsigned long start = (unsigned long) &_stext, end = (unsigned long) &_end, addr = (unsigned long) obj; -#ifdef CONFIG_SMP - int i; -#endif /* * static variable? @@ -595,24 +593,16 @@ static int static_obj(void *obj) if (arch_is_kernel_data(addr)) return 1; -#ifdef CONFIG_SMP /* - * percpu var? + * in-kernel percpu var? */ - for_each_possible_cpu(i) { - start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); - end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM - + per_cpu_offset(i); - - if ((addr >= start) && (addr < end)) - return 1; - } -#endif + if (is_kernel_percpu_address(addr)) + return 1; /* - * module var? + * module static or percpu var? */ - return is_module_address(addr); + return is_module_address(addr) || is_module_percpu_address(addr); } /* diff --git a/kernel/module.c b/kernel/module.c index c968d3606dca..1016b75b026a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -static void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { - void *ptr; + return mod->percpu; +} +static int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", - name, align, PAGE_SIZE); + mod->name, align, PAGE_SIZE); align = PAGE_SIZE; } - ptr = __alloc_reserved_percpu(size, align); - if (!ptr) + mod->percpu = __alloc_reserved_percpu(size, align); + if (!mod->percpu) { printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); - return ptr; + return -ENOMEM; + } + mod->percpu_size = size; + return 0; } -static void percpu_modfree(void *freeme) +static void percpu_modfree(struct module *mod) { - free_percpu(freeme); + free_percpu(mod->percpu); } static unsigned int find_pcpusec(Elf_Ehdr *hdr, @@ -400,24 +406,62 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr, return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); } -static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) +static void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { int cpu; for_each_possible_cpu(cpu) - memcpy(pcpudest + per_cpu_offset(cpu), from, size); + memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); +} + +/** + * is_module_percpu_address - test whether address is from module static percpu + * @addr: address to test + * + * Test whether @addr belongs to module static percpu area. + * + * RETURNS: + * %true if @addr is from module static percpu area + */ +bool is_module_percpu_address(unsigned long addr) +{ + struct module *mod; + unsigned int cpu; + + preempt_disable(); + + list_for_each_entry_rcu(mod, &modules, list) { + if (!mod->percpu_size) + continue; + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(mod->percpu, cpu); + + if ((void *)addr >= start && + (void *)addr < start + mod->percpu_size) { + preempt_enable(); + return true; + } + } + } + + preempt_enable(); + return false; } #else /* ... !CONFIG_SMP */ -static inline void *percpu_modalloc(unsigned long size, unsigned long align, - const char *name) +static inline void __percpu *mod_percpu(struct module *mod) { return NULL; } -static inline void percpu_modfree(void *pcpuptr) +static inline int percpu_modalloc(struct module *mod, + unsigned long size, unsigned long align) +{ + return -ENOMEM; +} +static inline void percpu_modfree(struct module *mod) { - BUG(); } static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -425,12 +469,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, { return 0; } -static inline void percpu_modcopy(void *pcpudst, const void *src, - unsigned long size) +static inline void percpu_modcopy(struct module *mod, + const void *from, unsigned long size) { /* pcpusec should be 0, and size of that section should be 0. */ BUG_ON(size != 0); } +bool is_module_percpu_address(unsigned long addr) +{ + return false; +} #endif /* CONFIG_SMP */ @@ -473,11 +521,13 @@ static void module_unload_init(struct module *mod) int cpu; INIT_LIST_HEAD(&mod->modules_which_use_me); - for_each_possible_cpu(cpu) - per_cpu_ptr(mod->refptr, cpu)->count = 0; + for_each_possible_cpu(cpu) { + per_cpu_ptr(mod->refptr, cpu)->incs = 0; + per_cpu_ptr(mod->refptr, cpu)->decs = 0; + } /* Hold reference count during initialization. */ - __this_cpu_write(mod->refptr->count, 1); + __this_cpu_write(mod->refptr->incs, 1); /* Backwards compatibility macros put refcount during init. */ mod->waiter = current; } @@ -616,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) unsigned int module_refcount(struct module *mod) { - unsigned int total = 0; + unsigned int incs = 0, decs = 0; int cpu; for_each_possible_cpu(cpu) - total += per_cpu_ptr(mod->refptr, cpu)->count; - return total; + decs += per_cpu_ptr(mod->refptr, cpu)->decs; + /* + * ensure the incs are added up after the decs. + * module_put ensures incs are visible before decs with smp_wmb. + * + * This 2-count scheme avoids the situation where the refcount + * for CPU0 is read, then CPU0 increments the module refcount, + * then CPU1 drops that refcount, then the refcount for CPU1 is + * read. We would record a decrement but not its corresponding + * increment so we would see a low count (disaster). + * + * Rare situation? But module_refcount can be preempted, and we + * might be tallying up 4096+ CPUs. So it is not impossible. + */ + smp_rmb(); + for_each_possible_cpu(cpu) + incs += per_cpu_ptr(mod->refptr, cpu)->incs; + return incs - decs; } EXPORT_SYMBOL(module_refcount); @@ -798,10 +864,11 @@ void module_put(struct module *module) { if (module) { preempt_disable(); - __this_cpu_dec(module->refptr->count); + smp_wmb(); /* see comment in module_refcount */ + __this_cpu_inc(module->refptr->decs); trace_module_put(module, _RET_IP_, - __this_cpu_read(module->refptr->count)); + __this_cpu_read(module->refptr->decs)); /* Maybe they're waiting for us to drop reference? */ if (unlikely(!module_is_live(module))) wake_up_process(module->waiter); @@ -1400,8 +1467,7 @@ static void free_module(struct module *mod) /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); - if (mod->percpu) - percpu_modfree(mod->percpu); + percpu_modfree(mod); #if defined(CONFIG_MODULE_UNLOAD) if (mod->refptr) free_percpu(mod->refptr); @@ -1520,7 +1586,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs, default: /* Divert to percpu allocation if a percpu var. */ if (sym[i].st_shndx == pcpuindex) - secbase = (unsigned long)mod->percpu; + secbase = (unsigned long)mod_percpu(mod); else secbase = sechdrs[sym[i].st_shndx].sh_addr; sym[i].st_value += secbase; @@ -1954,7 +2020,7 @@ static noinline struct module *load_module(void __user *umod, unsigned int modindex, versindex, infoindex, pcpuindex; struct module *mod; long err = 0; - void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ + void *ptr = NULL; /* Stops spurious gcc warning */ unsigned long symoffs, stroffs, *strmap; mm_segment_t old_fs; @@ -2094,15 +2160,11 @@ static noinline struct module *load_module(void __user *umod, if (pcpuindex) { /* We have a special allocation for this section. */ - percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, - sechdrs[pcpuindex].sh_addralign, - mod->name); - if (!percpu) { - err = -ENOMEM; + err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size, + sechdrs[pcpuindex].sh_addralign); + if (err) goto free_mod; - } sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; - mod->percpu = percpu; } /* Determine total sizes, and put offsets in sh_entsize. For now @@ -2317,7 +2379,7 @@ static noinline struct module *load_module(void __user *umod, sort_extable(mod->extable, mod->extable + mod->num_exentries); /* Finally, copy percpu area over. */ - percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, + percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, @@ -2409,8 +2471,7 @@ static noinline struct module *load_module(void __user *umod, module_free(mod, mod->module_core); /* mod will be freed with core. Don't access it beyond this line! */ free_percpu: - if (percpu) - percpu_modfree(percpu); + percpu_modfree(mod); free_mod: kfree(args); kfree(strmap); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 2ab67233ee8f..f74e6c00e26d 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -13,6 +13,7 @@ * Pavel Emelianov <xemul@openvz.org> */ +#include <linux/slab.h> #include <linux/module.h> #include <linux/nsproxy.h> #include <linux/init_task.h> diff --git a/kernel/padata.c b/kernel/padata.c index 93caf65ff57c..fd03513c7327 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -25,6 +25,7 @@ #include <linux/padata.h> #include <linux/mutex.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/rcupdate.h> #define MAX_SEQ_NR INT_MAX - NR_CPUS diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 4aa50ff4efc0..fcf42dcd6089 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -15,6 +15,7 @@ #include <linux/smp.h> #include <linux/file.h> #include <linux/poll.h> +#include <linux/slab.h> #include <linux/sysfs.h> #include <linux/dcache.h> #include <linux/percpu.h> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 79aac93acf99..a5aff94e1f0b 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -13,6 +13,7 @@ #include <linux/syscalls.h> #include <linux/err.h> #include <linux/acct.h> +#include <linux/slab.h> #define BITS_PER_PAGE (PAGE_SIZE*8) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index da5288ec2392..aa9e916da4d5 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -22,6 +22,7 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> +#include <linux/gfp.h> #include <scsi/scsi_scan.h> #include <asm/suspend.h> diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c index 39ac698ef836..fdcad9ed5a7b 100644 --- a/kernel/power/hibernate_nvs.c +++ b/kernel/power/hibernate_nvs.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> +#include <linux/slab.h> #include <linux/suspend.h> /* diff --git a/kernel/power/process.c b/kernel/power/process.c index 5ade1bdcf366..71ae29052ab6 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -88,12 +88,11 @@ static int try_to_freeze_tasks(bool sig_only) printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " "(%d tasks refusing to freeze):\n", elapsed_csecs / 100, elapsed_csecs % 100, todo); - show_state(); read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); if (freezing(p) && !freezer_should_skip(p)) - printk(KERN_ERR " %s\n", p->comm); + sched_show_task(p); cancel_freezing(p); task_unlock(p); } while_each_thread(g, p); @@ -145,7 +144,7 @@ static void thaw_tasks(bool nosig_only) if (nosig_only && should_send_signal(p)) continue; - if (cgroup_frozen(p)) + if (cgroup_freezing_or_frozen(p)) continue; thaw_process(p); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 830cadecbdfc..be861c26dda7 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -26,6 +26,7 @@ #include <linux/console.h> #include <linux/highmem.h> #include <linux/list.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 44cce10b582d..56e7dbb8b996 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -15,6 +15,7 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/syscalls.h> +#include <linux/gfp.h> #include "power.h" diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 1d575733d4e1..66824d71983a 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -23,6 +23,7 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/pm.h> +#include <linux/slab.h> #include "power.h" diff --git a/kernel/res_counter.c b/kernel/res_counter.c index bcdabf37c40b..c7eaa37a768b 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -10,7 +10,6 @@ #include <linux/types.h> #include <linux/parser.h> #include <linux/fs.h> -#include <linux/slab.h> #include <linux/res_counter.h> #include <linux/uaccess.h> #include <linux/mm.h> diff --git a/kernel/sched.c b/kernel/sched.c index 1038ca163890..8cafe3ff558f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@ #include <linux/debugfs.h> #include <linux/ctype.h> #include <linux/ftrace.h> +#include <linux/slab.h> #include <asm/tlb.h> #include <asm/irq_regs.h> @@ -5344,7 +5345,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) get_task_struct(mt); task_rq_unlock(rq, &flags); - wake_up_process(rq->migration_thread); + wake_up_process(mt); put_task_struct(mt); wait_for_completion(&req.done); tlb_migrate_finish(p->mm); diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index fccf9fbb0d7b..e6871cb3fc83 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c @@ -27,6 +27,7 @@ * of the License. */ +#include <linux/gfp.h> #include "sched_cpupri.h" /* Convert between a 140 based task->prio, and our 102 based cpupri */ diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 67f95aada4b9..9b49db144037 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -518,8 +518,4 @@ void proc_sched_set_task(struct task_struct *p) p->se.nr_wakeups_idle = 0; p->sched_info.bkl_count = 0; #endif - p->se.sum_exec_runtime = 0; - p->se.prev_sum_exec_runtime = 0; - p->nvcsw = 0; - p->nivcsw = 0; } diff --git a/kernel/smp.c b/kernel/smp.c index 9867b6bfefce..3fc697336183 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/percpu.h> #include <linux/init.h> +#include <linux/gfp.h> #include <linux/smp.h> #include <linux/cpu.h> diff --git a/kernel/srcu.c b/kernel/srcu.c index bde4295774c8..2980da3fd509 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -30,7 +30,6 @@ #include <linux/preempt.h> #include <linux/rcupdate.h> #include <linux/sched.h> -#include <linux/slab.h> #include <linux/smp.h> #include <linux/srcu.h> diff --git a/kernel/sys.c b/kernel/sys.c index 8298878f4f71..6d1a7e0f9d5b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -36,6 +36,7 @@ #include <linux/personality.h> #include <linux/ptrace.h> #include <linux/fs_struct.h> +#include <linux/gfp.h> #include <linux/compat.h> #include <linux/syscalls.h> diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 8cd50d8f9bde..59030570f5ca 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -13,6 +13,7 @@ #include <linux/file.h> #include <linux/ctype.h> #include <linux/netdevice.h> +#include <linux/slab.h> #ifdef CONFIG_SYSCTL_SYSCALL diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 899ca51be5e8..11281d5792bd 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -22,6 +22,7 @@ #include <linux/delayacct.h> #include <linux/cpumask.h> #include <linux/percpu.h> +#include <linux/slab.h> #include <linux/cgroupstats.h> #include <linux/cgroup.h> #include <linux/fs.h> diff --git a/kernel/time.c b/kernel/time.c index 804798005d19..656dccfe1cbb 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -35,7 +35,6 @@ #include <linux/syscalls.h> #include <linux/security.h> #include <linux/fs.h> -#include <linux/slab.h> #include <linux/math64.h> #include <linux/ptrace.h> diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c index 12f5c55090be..ac38fbb176cc 100644 --- a/kernel/time/timecompare.c +++ b/kernel/time/timecompare.c @@ -19,6 +19,7 @@ #include <linux/timecompare.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/math64.h> /* diff --git a/kernel/timer.c b/kernel/timer.c index fc965eae0e87..aeb6a54f2771 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -39,6 +39,7 @@ #include <linux/kallsyms.h> #include <linux/perf_event.h> #include <linux/sched.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/unistd.h> diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 07f945a99430..b3bc91a3f510 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -21,6 +21,7 @@ #include <linux/percpu.h> #include <linux/init.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/debugfs.h> #include <linux/smp_lock.h> #include <linux/time.h> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d9062f5cc0c0..2404b59b3097 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -24,6 +24,7 @@ #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/sysctl.h> +#include <linux/slab.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/hash.h> diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index 9f4f565b01e6..a22582a06161 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -9,7 +9,6 @@ #include <linux/workqueue.h> #include <linux/sched.h> #include <linux/module.h> -#include <linux/slab.h> #define CREATE_TRACE_POINTS #include <trace/events/power.h> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d1187ef20caf..41ca394feb22 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/percpu.h> #include <linux/mutex.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/hash.h> #include <linux/list.h> @@ -1209,18 +1210,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) for (i = 0; i < nr_pages; i++) { if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) - return; + goto out; p = cpu_buffer->pages->next; bpage = list_entry(p, struct buffer_page, list); list_del_init(&bpage->list); free_buffer_page(bpage); } if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) - return; + goto out; rb_reset_cpu(cpu_buffer); rb_check_pages(cpu_buffer); +out: spin_unlock_irq(&cpu_buffer->reader_lock); } @@ -1237,7 +1239,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, for (i = 0; i < nr_pages; i++) { if (RB_WARN_ON(cpu_buffer, list_empty(pages))) - return; + goto out; p = pages->next; bpage = list_entry(p, struct buffer_page, list); list_del_init(&bpage->list); @@ -1246,6 +1248,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, rb_reset_cpu(cpu_buffer); rb_check_pages(cpu_buffer); +out: spin_unlock_irq(&cpu_buffer->reader_lock); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3ec2ee6f6560..44f916a04065 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -33,10 +33,10 @@ #include <linux/kdebug.h> #include <linux/string.h> #include <linux/rwsem.h> +#include <linux/slab.h> #include <linux/ctype.h> #include <linux/init.h> #include <linux/poll.h> -#include <linux/gfp.h> #include <linux/fs.h> #include "trace.h" diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 6fbfb8f417b9..9d589d8dcd1a 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -84,7 +84,7 @@ u64 notrace trace_clock_global(void) int this_cpu; u64 now; - raw_local_irq_save(flags); + local_irq_save(flags); this_cpu = raw_smp_processor_id(); now = cpu_clock(this_cpu); @@ -110,7 +110,7 @@ u64 notrace trace_clock_global(void) arch_spin_unlock(&trace_clock_struct.lock); out: - raw_local_irq_restore(flags); + local_irq_restore(flags); return now; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index beab8bf2f310..c697c7043349 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -15,6 +15,7 @@ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> +#include <linux/slab.h> #include <linux/delay.h> #include <asm/setup.h> diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 4615f62a04f1..88c0b6dbd7fe 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -22,6 +22,7 @@ #include <linux/ctype.h> #include <linux/mutex.h> #include <linux/perf_event.h> +#include <linux/slab.h> #include "trace.h" #include "trace_output.h" diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index e6989d9b44da..9aed1a5cf553 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -9,6 +9,7 @@ #include <linux/debugfs.h> #include <linux/uaccess.h> #include <linux/ftrace.h> +#include <linux/slab.h> #include <linux/fs.h> #include "trace.h" diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 94103cdcf9d8..d59cd6879477 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -23,6 +23,7 @@ #include <linux/debugfs.h> #include <linux/ftrace.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/fs.h> #include "trace_output.h" diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 0acd834659ed..017fa376505d 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/mmiotrace.h> #include <linux/pci.h> +#include <linux/slab.h> #include <linux/time.h> #include <asm/atomic.h> diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index a7084e7c0427..1cc9858258b3 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -3,6 +3,7 @@ #include <linux/stringify.h> #include <linux/kthread.h> #include <linux/delay.h> +#include <linux/slab.h> static inline int trace_valid_entry(struct trace_entry *entry) { diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index a4bb239eb987..96cffb269e73 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -10,6 +10,7 @@ #include <linux/list.h> +#include <linux/slab.h> #include <linux/rbtree.h> #include <linux/debugfs.h> #include "trace_stat.h" diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 33c2a5b769dc..4d6d711717f2 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -1,5 +1,6 @@ #include <trace/syscall.h> #include <trace/events/syscalls.h> +#include <linux/slab.h> #include <linux/kernel.h> #include <linux/ftrace.h> #include <linux/perf_event.h> diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 40cafb07dffd..cc2d2faa7d9e 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -9,6 +9,7 @@ #include <trace/events/workqueue.h> #include <linux/list.h> #include <linux/percpu.h> +#include <linux/slab.h> #include <linux/kref.h> #include "trace_stat.h" #include "trace.h" |