diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bounds.c | 2 | ||||
-rw-r--r-- | kernel/bpf/core.c | 9 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 33 | ||||
-rw-r--r-- | kernel/cpu.c | 14 | ||||
-rw-r--r-- | kernel/dma/swiotlb.c | 1 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 2 | ||||
-rw-r--r-- | kernel/profile.c | 43 | ||||
-rw-r--r-- | kernel/sched/fair.c | 34 | ||||
-rw-r--r-- | kernel/sched/isolation.c | 18 | ||||
-rw-r--r-- | kernel/softirq.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 2 | ||||
-rw-r--r-- | kernel/vmcore_info.c | 5 | ||||
-rw-r--r-- | kernel/workqueue.c | 19 |
14 files changed, 128 insertions, 78 deletions
diff --git a/kernel/bounds.c b/kernel/bounds.c index c5a9fcd2d622..29b2cd00df2c 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -19,7 +19,7 @@ int main(void) DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS); DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES); #ifdef CONFIG_SMP - DEFINE(NR_CPUS_BITS, bits_per(CONFIG_NR_CPUS)); + DEFINE(NR_CPUS_BITS, order_base_2(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); #ifdef CONFIG_LRU_GEN diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 696bc55de8e8..1ea5ce5bb599 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void) return false; } +u64 __weak bpf_arch_uaddress_limit(void) +{ +#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) + return TASK_SIZE; +#else + return 0; +#endif +} + /* Return TRUE if the JIT backend satisfies the following two conditions: * 1) JIT backend supports atomic_xchg() on pointer-sized words. * 2) Under the specific arch, the implementation of xchg() is the same diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 98188379d5c7..cb7ad1f795e1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18289,8 +18289,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) f = fdget(fd); map = __bpf_map_get(f); if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", - insn[0].imm); + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); return PTR_ERR(map); } @@ -19676,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto next_insn; } + /* Make it impossible to de-reference a userspace address */ + if (BPF_CLASS(insn->code) == BPF_LDX && + (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) { + struct bpf_insn *patch = &insn_buf[0]; + u64 uaddress_limit = bpf_arch_uaddress_limit(); + + if (!uaddress_limit) + goto next_insn; + + *patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg); + if (insn->off) + *patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off); + *patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32); + *patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2); + *patch++ = *insn; + *patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); + *patch++ = BPF_MOV64_IMM(insn->dst_reg, 0); + + cnt = patch - insn_buf; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + goto next_insn; + } + /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || diff --git a/kernel/cpu.c b/kernel/cpu.c index 07ad53b7f119..63447eb85dab 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -3196,6 +3196,7 @@ void __init boot_cpu_hotplug_init(void) this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); } +#ifdef CONFIG_CPU_MITIGATIONS /* * These are used for a global "mitigations=" cmdline option for toggling * optional CPU mitigations. @@ -3206,9 +3207,7 @@ enum cpu_mitigations { CPU_MITIGATIONS_AUTO_NOSMT, }; -static enum cpu_mitigations cpu_mitigations __ro_after_init = - IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO : - CPU_MITIGATIONS_OFF; +static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { @@ -3224,7 +3223,6 @@ static int __init mitigations_parse_cmdline(char *arg) return 0; } -early_param("mitigations", mitigations_parse_cmdline); /* mitigations=off */ bool cpu_mitigations_off(void) @@ -3239,3 +3237,11 @@ bool cpu_mitigations_auto_nosmt(void) return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; } EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); +#else +static int __init mitigations_parse_cmdline(char *arg) +{ + pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n"); + return 0; +} +#endif +early_param("mitigations", mitigations_parse_cmdline); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index a5e0dfc44d24..0de66f0ff43a 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1798,6 +1798,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, mem->for_alloc = true; #ifdef CONFIG_SWIOTLB_DYNAMIC spin_lock_init(&mem->lock); + INIT_LIST_HEAD_RCU(&mem->pools); #endif add_mem_pool(mem, pool); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 43b1a82e800c..0a213f69a9e4 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1361,7 +1361,7 @@ static int __init resume_setup(char *str) if (noresume) return 1; - strncpy(resume_file, str, 255); + strscpy(resume_file, str); return 1; } diff --git a/kernel/profile.c b/kernel/profile.c index 8a77769bc4b4..2b775cc5c28f 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -344,49 +344,6 @@ void profile_tick(int type) #include <linux/seq_file.h> #include <linux/uaccess.h> -static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) -{ - seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask)); - return 0; -} - -static int prof_cpu_mask_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, prof_cpu_mask_proc_show, NULL); -} - -static ssize_t prof_cpu_mask_proc_write(struct file *file, - const char __user *buffer, size_t count, loff_t *pos) -{ - cpumask_var_t new_value; - int err; - - if (!zalloc_cpumask_var(&new_value, GFP_KERNEL)) - return -ENOMEM; - - err = cpumask_parse_user(buffer, count, new_value); - if (!err) { - cpumask_copy(prof_cpu_mask, new_value); - err = count; - } - free_cpumask_var(new_value); - return err; -} - -static const struct proc_ops prof_cpu_mask_proc_ops = { - .proc_open = prof_cpu_mask_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = prof_cpu_mask_proc_write, -}; - -void create_prof_cpu_mask(void) -{ - /* create /proc/irq/prof_cpu_mask */ - proc_create("irq/prof_cpu_mask", 0600, NULL, &prof_cpu_mask_proc_ops); -} - /* * This function accesses profiling information. The returned data is * binary: the sampling step and the actual contents of the profile diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 03be0d1330a6..c62805dbd608 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -696,15 +696,21 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) * * XXX could add max_slice to the augmented data to track this. */ -static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) +static s64 entity_lag(u64 avruntime, struct sched_entity *se) { - s64 lag, limit; + s64 vlag, limit; + + vlag = avruntime - se->vruntime; + limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); + return clamp(vlag, -limit, limit); +} + +static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ SCHED_WARN_ON(!se->on_rq); - lag = avg_vruntime(cfs_rq) - se->vruntime; - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); - se->vlag = clamp(lag, -limit, limit); + se->vlag = entity_lag(avg_vruntime(cfs_rq), se); } /* @@ -3676,11 +3682,10 @@ static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } #endif -static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, +static void reweight_eevdf(struct sched_entity *se, u64 avruntime, unsigned long weight) { unsigned long old_weight = se->load.weight; - u64 avruntime = avg_vruntime(cfs_rq); s64 vlag, vslice; /* @@ -3761,7 +3766,7 @@ static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, * = V - vl' */ if (avruntime != se->vruntime) { - vlag = (s64)(avruntime - se->vruntime); + vlag = entity_lag(avruntime, se); vlag = div_s64(vlag * old_weight, weight); se->vruntime = avruntime - vlag; } @@ -3787,25 +3792,26 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { bool curr = cfs_rq->curr == se; + u64 avruntime; if (se->on_rq) { /* commit outstanding execution time */ - if (curr) - update_curr(cfs_rq); - else + update_curr(cfs_rq); + avruntime = avg_vruntime(cfs_rq); + if (!curr) __dequeue_entity(cfs_rq, se); update_load_sub(&cfs_rq->load, se->load.weight); } dequeue_load_avg(cfs_rq, se); - if (!se->on_rq) { + if (se->on_rq) { + reweight_eevdf(se, avruntime, weight); + } else { /* * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), * we need to scale se->vlag when w_i changes. */ se->vlag = div_s64(se->vlag * se->load.weight, weight); - } else { - reweight_eevdf(cfs_rq, se, weight); } update_load_set(&se->load, weight); diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 373d42c707bc..5891e715f00d 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -46,7 +46,16 @@ int housekeeping_any_cpu(enum hk_type type) if (cpu < nr_cpu_ids) return cpu; - return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask); + cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask); + if (likely(cpu < nr_cpu_ids)) + return cpu; + /* + * Unless we have another problem this can only happen + * at boot time before start_secondary() brings the 1st + * housekeeping CPU up. + */ + WARN_ON_ONCE(system_state == SYSTEM_RUNNING || + type != HK_TYPE_TIMER); } } return smp_processor_id(); @@ -109,6 +118,7 @@ static void __init housekeeping_setup_type(enum hk_type type, static int __init housekeeping_setup(char *str, unsigned long flags) { cpumask_var_t non_housekeeping_mask, housekeeping_staging; + unsigned int first_cpu; int err = 0; if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) { @@ -129,7 +139,8 @@ static int __init housekeeping_setup(char *str, unsigned long flags) cpumask_andnot(housekeeping_staging, cpu_possible_mask, non_housekeeping_mask); - if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) { + first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging); + if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) { __cpumask_set_cpu(smp_processor_id(), housekeeping_staging); __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); if (!housekeeping.flags) { @@ -138,6 +149,9 @@ static int __init housekeeping_setup(char *str, unsigned long flags) } } + if (cpumask_empty(non_housekeeping_mask)) + goto free_housekeeping_staging; + if (!housekeeping.flags) { /* First setup call ("nohz_full=" or "isolcpus=") */ enum hk_type type; diff --git a/kernel/softirq.c b/kernel/softirq.c index b315b21fb28c..02582017759a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -508,7 +508,7 @@ static inline bool lockdep_softirq_start(void) { return false; } static inline void lockdep_softirq_end(bool in_hardirq) { } #endif -asmlinkage __visible void __softirq_entry __do_softirq(void) +static void handle_softirqs(bool ksirqd) { unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; @@ -563,8 +563,7 @@ restart: pending >>= softirq_bit; } - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && - __this_cpu_read(ksoftirqd) == current) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd) rcu_softirq_qs(); local_irq_disable(); @@ -584,6 +583,11 @@ restart: current_restore_flags(old_flags, PF_MEMALLOC); } +asmlinkage __visible void __softirq_entry __do_softirq(void) +{ + handle_softirqs(false); +} + /** * irq_enter_rcu - Enter an interrupt context with RCU watching */ @@ -921,7 +925,7 @@ static void run_ksoftirqd(unsigned int cpu) * We can safely run softirq on inline stack, as we are not deep * in the task stack here. */ - __do_softirq(); + handle_softirqs(true); ksoftirqd_run_end(); cond_resched(); return; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 52f75c36bbca..6ef29eba90ce 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2552,6 +2552,14 @@ static int event_callback(const char *name, umode_t *mode, void **data, return 0; } +/* The file is incremented on creation and freeing the enable file decrements it */ +static void event_release(const char *name, void *data) +{ + struct trace_event_file *file = data; + + event_file_put(file); +} + static int event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) { @@ -2566,6 +2574,7 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) { .name = "enable", .callback = event_callback, + .release = event_release, }, { .name = "filter", @@ -2634,6 +2643,9 @@ event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file) return ret; } + /* Gets decremented on freeing of the "enable" file */ + event_file_get(file); + return 0; } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index dfe3ee6035ec..42bc0f362226 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1466,7 +1466,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size, parg->fmt = kmalloc(len, GFP_KERNEL); if (!parg->fmt) { ret = -ENOMEM; - goto out; + goto fail; } snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, parg->count); diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c index f95516cd45bb..23c125c2e243 100644 --- a/kernel/vmcore_info.c +++ b/kernel/vmcore_info.c @@ -205,11 +205,10 @@ static int __init crash_save_vmcoreinfo_init(void) VMCOREINFO_NUMBER(PG_head_mask); #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); -#ifdef CONFIG_HUGETLB_PAGE - VMCOREINFO_NUMBER(PG_hugetlb); +#define PAGE_HUGETLB_MAPCOUNT_VALUE (~PG_hugetlb) + VMCOREINFO_NUMBER(PAGE_HUGETLB_MAPCOUNT_VALUE); #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); -#endif #ifdef CONFIG_KALLSYMS VMCOREINFO_SYMBOL(kallsyms_names); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0066c8f6c154..d2dbe099286b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1277,8 +1277,12 @@ static bool kick_pool(struct worker_pool *pool) !cpumask_test_cpu(p->wake_cpu, pool->attrs->__pod_cpumask)) { struct work_struct *work = list_first_entry(&pool->worklist, struct work_struct, entry); - p->wake_cpu = cpumask_any_distribute(pool->attrs->__pod_cpumask); - get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++; + int wake_cpu = cpumask_any_and_distribute(pool->attrs->__pod_cpumask, + cpu_online_mask); + if (wake_cpu < nr_cpu_ids) { + p->wake_cpu = wake_cpu; + get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++; + } } #endif wake_up_process(p); @@ -1594,6 +1598,15 @@ static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu) if (off_cpu >= 0) total_cpus--; + /* If all CPUs of the wq get offline, use the default values */ + if (unlikely(!total_cpus)) { + for_each_node(node) + wq_node_nr_active(wq, node)->max = min_active; + + wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; + return; + } + for_each_node(node) { int node_cpus; @@ -1606,7 +1619,7 @@ static void wq_update_node_max_active(struct workqueue_struct *wq, int off_cpu) min_active, max_active); } - wq_node_nr_active(wq, NUMA_NO_NODE)->max = min_active; + wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; } /** |