summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.kexec4
-rw-r--r--kernel/bpf/arraymap.c58
-rw-r--r--kernel/bpf/core.c12
-rw-r--r--kernel/bpf/memalloc.c2
-rw-r--r--kernel/cgroup/legacy_freezer.c8
-rw-r--r--kernel/crash_core.c10
-rw-r--r--kernel/cred.c251
-rw-r--r--kernel/events/core.c69
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/freezer.c2
-rw-r--r--kernel/kprobes.c4
-rw-r--r--kernel/locking/osq_lock.c37
-rw-r--r--kernel/resource.c4
-rw-r--r--kernel/sys_ni.c14
-rw-r--r--kernel/time/posix-stubs.c45
-rw-r--r--kernel/trace/ftrace.c100
-rw-r--r--kernel/trace/rethook.c23
-rw-r--r--kernel/trace/ring_buffer.c209
-rw-r--r--kernel/trace/synth_event_gen_test.c11
-rw-r--r--kernel/trace/trace.c190
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_events_hist.c12
-rw-r--r--kernel/trace/trace_events_synth.c4
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/workqueue.c22
25 files changed, 410 insertions, 691 deletions
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 7aff28ded2f4..946dffa048b7 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -36,6 +36,8 @@ config KEXEC
config KEXEC_FILE
bool "Enable kexec file based system call"
depends on ARCH_SUPPORTS_KEXEC_FILE
+ select CRYPTO
+ select CRYPTO_SHA256
select KEXEC_CORE
help
This is new version of kexec system call. This system call is
@@ -94,10 +96,8 @@ config KEXEC_JUMP
config CRASH_DUMP
bool "kernel crash dumps"
depends on ARCH_SUPPORTS_CRASH_DUMP
- depends on ARCH_SUPPORTS_KEXEC
select CRASH_CORE
select KEXEC_CORE
- select KEXEC
help
Generate crash dump after being started by kexec.
This should be normally only set in special crash dump kernels
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2058e89b5ddd..c85ff9162a5c 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map,
mutex_unlock(&aux->poke_mutex);
}
+void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ struct bpf_prog *new, struct bpf_prog *old)
+{
+ WARN_ON_ONCE(1);
+}
+
static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
struct bpf_prog *old,
struct bpf_prog *new)
{
- u8 *old_addr, *new_addr, *old_bypass_addr;
struct prog_poke_elem *elem;
struct bpf_array_aux *aux;
@@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
list_for_each_entry(elem, &aux->poke_progs, list) {
struct bpf_jit_poke_descriptor *poke;
- int i, ret;
+ int i;
for (i = 0; i < elem->aux->size_poke_tab; i++) {
poke = &elem->aux->poke_tab[i];
@@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
* activated, so tail call updates can arrive from here
* while JIT is still finishing its final fixup for
* non-activated poke entries.
- * 3) On program teardown, the program's kallsym entry gets
- * removed out of RCU callback, but we can only untrack
- * from sleepable context, therefore bpf_arch_text_poke()
- * might not see that this is in BPF text section and
- * bails out with -EINVAL. As these are unreachable since
- * RCU grace period already passed, we simply skip them.
- * 4) Also programs reaching refcount of zero while patching
+ * 3) Also programs reaching refcount of zero while patching
* is in progress is okay since we're protected under
* poke_mutex and untrack the programs before the JIT
- * buffer is freed. When we're still in the middle of
- * patching and suddenly kallsyms entry of the program
- * gets evicted, we just skip the rest which is fine due
- * to point 3).
- * 5) Any other error happening below from bpf_arch_text_poke()
- * is a unexpected bug.
+ * buffer is freed.
*/
if (!READ_ONCE(poke->tailcall_target_stable))
continue;
@@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
poke->tail_call.key != key)
continue;
- old_bypass_addr = old ? NULL : poke->bypass_addr;
- old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
- new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
-
- if (new) {
- ret = bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
- old_addr, new_addr);
- BUG_ON(ret < 0 && ret != -EINVAL);
- if (!old) {
- ret = bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
- poke->bypass_addr,
- NULL);
- BUG_ON(ret < 0 && ret != -EINVAL);
- }
- } else {
- ret = bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
- old_bypass_addr,
- poke->bypass_addr);
- BUG_ON(ret < 0 && ret != -EINVAL);
- /* let other CPUs finish the execution of program
- * so that it will not possible to expose them
- * to invalid nop, stack unwind, nop state
- */
- if (!ret)
- synchronize_rcu();
- ret = bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
- old_addr, NULL);
- BUG_ON(ret < 0 && ret != -EINVAL);
- }
+ bpf_arch_poke_desc_update(poke, new, old);
}
}
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index cd3afe57ece3..fe254ae035fe 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -371,14 +371,18 @@ static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old,
static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
s32 end_new, s32 curr, const bool probe_pass)
{
- const s32 off_min = S16_MIN, off_max = S16_MAX;
+ s64 off_min, off_max, off;
s32 delta = end_new - end_old;
- s32 off;
- if (insn->code == (BPF_JMP32 | BPF_JA))
+ if (insn->code == (BPF_JMP32 | BPF_JA)) {
off = insn->imm;
- else
+ off_min = S32_MIN;
+ off_max = S32_MAX;
+ } else {
off = insn->off;
+ off_min = S16_MIN;
+ off_max = S16_MAX;
+ }
if (curr < pos && curr + off + 1 >= end_old)
off += delta;
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 63b909d277d4..6a51cfe4c2d6 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -978,6 +978,8 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)
memcg = get_memcg(c);
old_memcg = set_active_memcg(memcg);
ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT);
+ if (ret)
+ *(struct bpf_mem_cache **)ret = c;
set_active_memcg(old_memcg);
mem_cgroup_put(memcg);
}
diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c
index 122dacb3a443..66d1708042a7 100644
--- a/kernel/cgroup/legacy_freezer.c
+++ b/kernel/cgroup/legacy_freezer.c
@@ -66,9 +66,15 @@ static struct freezer *parent_freezer(struct freezer *freezer)
bool cgroup_freezing(struct task_struct *task)
{
bool ret;
+ unsigned int state;
rcu_read_lock();
- ret = task_freezer(task)->state & CGROUP_FREEZING;
+ /* Check if the cgroup is still FREEZING, but not FROZEN. The extra
+ * !FROZEN check is required, because the FREEZING bit is not cleared
+ * when the state FROZEN is reached.
+ */
+ state = task_freezer(task)->state;
+ ret = (state & CGROUP_FREEZING) && !(state & CGROUP_FROZEN);
rcu_read_unlock();
return ret;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index efe87d501c8c..d4313b53837e 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -199,7 +199,7 @@ static __initdata char *suffix_tbl[] = {
* It returns 0 on success and -EINVAL on failure.
*/
static int __init parse_crashkernel_suffix(char *cmdline,
- unsigned long long *crash_size,
+ unsigned long long *crash_size,
const char *suffix)
{
char *cur = cmdline;
@@ -268,9 +268,9 @@ static int __init __parse_crashkernel(char *cmdline,
unsigned long long *crash_base,
const char *suffix)
{
- char *first_colon, *first_space;
- char *ck_cmdline;
- char *name = "crashkernel=";
+ char *first_colon, *first_space;
+ char *ck_cmdline;
+ char *name = "crashkernel=";
BUG_ON(!crash_size || !crash_base);
*crash_size = 0;
@@ -440,7 +440,7 @@ retry:
return;
}
- if ((crash_base > CRASH_ADDR_LOW_MAX) &&
+ if ((crash_base >= CRASH_ADDR_LOW_MAX) &&
crash_low_size && reserve_crashkernel_low(crash_low_size)) {
memblock_phys_free(crash_base, crash_size);
return;
diff --git a/kernel/cred.c b/kernel/cred.c
index 3c714cb31660..c033a201c808 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -43,10 +43,6 @@ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) };
*/
struct cred init_cred = {
.usage = ATOMIC_INIT(4),
-#ifdef CONFIG_DEBUG_CREDENTIALS
- .subscribers = ATOMIC_INIT(2),
- .magic = CRED_MAGIC,
-#endif
.uid = GLOBAL_ROOT_UID,
.gid = GLOBAL_ROOT_GID,
.suid = GLOBAL_ROOT_UID,
@@ -66,31 +62,6 @@ struct cred init_cred = {
.ucounts = &init_ucounts,
};
-static inline void set_cred_subscribers(struct cred *cred, int n)
-{
-#ifdef CONFIG_DEBUG_CREDENTIALS
- atomic_set(&cred->subscribers, n);
-#endif
-}
-
-static inline int read_cred_subscribers(const struct cred *cred)
-{
-#ifdef CONFIG_DEBUG_CREDENTIALS
- return atomic_read(&cred->subscribers);
-#else
- return 0;
-#endif
-}
-
-static inline void alter_cred_subscribers(const struct cred *_cred, int n)
-{
-#ifdef CONFIG_DEBUG_CREDENTIALS
- struct cred *cred = (struct cred *) _cred;
-
- atomic_add(n, &cred->subscribers);
-#endif
-}
-
/*
* The RCU callback to actually dispose of a set of credentials
*/
@@ -100,20 +71,9 @@ static void put_cred_rcu(struct rcu_head *rcu)
kdebug("put_cred_rcu(%p)", cred);
-#ifdef CONFIG_DEBUG_CREDENTIALS
- if (cred->magic != CRED_MAGIC_DEAD ||
- atomic_read(&cred->usage) != 0 ||
- read_cred_subscribers(cred) != 0)
- panic("CRED: put_cred_rcu() sees %p with"
- " mag %x, put %p, usage %d, subscr %d\n",
- cred, cred->magic, cred->put_addr,
- atomic_read(&cred->usage),
- read_cred_subscribers(cred));
-#else
- if (atomic_read(&cred->usage) != 0)
- panic("CRED: put_cred_rcu() sees %p with usage %d\n",
- cred, atomic_read(&cred->usage));
-#endif
+ if (atomic_long_read(&cred->usage) != 0)
+ panic("CRED: put_cred_rcu() sees %p with usage %ld\n",
+ cred, atomic_long_read(&cred->usage));
security_cred_free(cred);
key_put(cred->session_keyring);
@@ -137,16 +97,10 @@ static void put_cred_rcu(struct rcu_head *rcu)
*/
void __put_cred(struct cred *cred)
{
- kdebug("__put_cred(%p{%d,%d})", cred,
- atomic_read(&cred->usage),
- read_cred_subscribers(cred));
-
- BUG_ON(atomic_read(&cred->usage) != 0);
-#ifdef CONFIG_DEBUG_CREDENTIALS
- BUG_ON(read_cred_subscribers(cred) != 0);
- cred->magic = CRED_MAGIC_DEAD;
- cred->put_addr = __builtin_return_address(0);
-#endif
+ kdebug("__put_cred(%p{%ld})", cred,
+ atomic_long_read(&cred->usage));
+
+ BUG_ON(atomic_long_read(&cred->usage) != 0);
BUG_ON(cred == current->cred);
BUG_ON(cred == current->real_cred);
@@ -164,9 +118,8 @@ void exit_creds(struct task_struct *tsk)
{
struct cred *real_cred, *cred;
- kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
- atomic_read(&tsk->cred->usage),
- read_cred_subscribers(tsk->cred));
+ kdebug("exit_creds(%u,%p,%p,{%ld})", tsk->pid, tsk->real_cred, tsk->cred,
+ atomic_long_read(&tsk->cred->usage));
real_cred = (struct cred *) tsk->real_cred;
tsk->real_cred = NULL;
@@ -174,15 +127,10 @@ void exit_creds(struct task_struct *tsk)
cred = (struct cred *) tsk->cred;
tsk->cred = NULL;
- validate_creds(cred);
if (real_cred == cred) {
- alter_cred_subscribers(cred, -2);
put_cred_many(cred, 2);
} else {
- validate_creds(real_cred);
- alter_cred_subscribers(real_cred, -1);
put_cred(real_cred);
- alter_cred_subscribers(cred, -1);
put_cred(cred);
}
@@ -230,10 +178,7 @@ struct cred *cred_alloc_blank(void)
if (!new)
return NULL;
- atomic_set(&new->usage, 1);
-#ifdef CONFIG_DEBUG_CREDENTIALS
- new->magic = CRED_MAGIC;
-#endif
+ atomic_long_set(&new->usage, 1);
if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
goto error;
@@ -264,8 +209,6 @@ struct cred *prepare_creds(void)
const struct cred *old;
struct cred *new;
- validate_process_creds();
-
new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
if (!new)
return NULL;
@@ -276,8 +219,7 @@ struct cred *prepare_creds(void)
memcpy(new, old, sizeof(struct cred));
new->non_rcu = 0;
- atomic_set(&new->usage, 1);
- set_cred_subscribers(new, 0);
+ atomic_long_set(&new->usage, 1);
get_group_info(new->group_info);
get_uid(new->user);
get_user_ns(new->user_ns);
@@ -300,7 +242,6 @@ struct cred *prepare_creds(void)
if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
goto error;
- validate_creds(new);
return new;
error:
@@ -362,10 +303,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
clone_flags & CLONE_THREAD
) {
p->real_cred = get_cred_many(p->cred, 2);
- alter_cred_subscribers(p->cred, 2);
- kdebug("share_creds(%p{%d,%d})",
- p->cred, atomic_read(&p->cred->usage),
- read_cred_subscribers(p->cred));
+ kdebug("share_creds(%p{%ld})",
+ p->cred, atomic_long_read(&p->cred->usage));
inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
return 0;
}
@@ -404,8 +343,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
p->cred = p->real_cred = get_cred(new);
inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
- alter_cred_subscribers(new, 2);
- validate_creds(new);
return 0;
error_put:
@@ -457,17 +394,11 @@ int commit_creds(struct cred *new)
struct task_struct *task = current;
const struct cred *old = task->real_cred;
- kdebug("commit_creds(%p{%d,%d})", new,
- atomic_read(&new->usage),
- read_cred_subscribers(new));
+ kdebug("commit_creds(%p{%ld})", new,
+ atomic_long_read(&new->usage));
BUG_ON(task->cred != old);
-#ifdef CONFIG_DEBUG_CREDENTIALS
- BUG_ON(read_cred_subscribers(old) < 2);
- validate_creds(old);
- validate_creds(new);
-#endif
- BUG_ON(atomic_read(&new->usage) < 1);
+ BUG_ON(atomic_long_read(&new->usage) < 1);
get_cred(new); /* we will require a ref for the subj creds too */
@@ -502,14 +433,12 @@ int commit_creds(struct cred *new)
* RLIMIT_NPROC limits on user->processes have already been checked
* in set_user().
*/
- alter_cred_subscribers(new, 2);
if (new->user != old->user || new->user_ns != old->user_ns)
inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
rcu_assign_pointer(task->real_cred, new);
rcu_assign_pointer(task->cred, new);
if (new->user != old->user || new->user_ns != old->user_ns)
dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
- alter_cred_subscribers(old, -2);
/* send notifications */
if (!uid_eq(new->uid, old->uid) ||
@@ -539,14 +468,10 @@ EXPORT_SYMBOL(commit_creds);
*/
void abort_creds(struct cred *new)
{
- kdebug("abort_creds(%p{%d,%d})", new,
- atomic_read(&new->usage),
- read_cred_subscribers(new));
+ kdebug("abort_creds(%p{%ld})", new,
+ atomic_long_read(&new->usage));
-#ifdef CONFIG_DEBUG_CREDENTIALS
- BUG_ON(read_cred_subscribers(new) != 0);
-#endif
- BUG_ON(atomic_read(&new->usage) < 1);
+ BUG_ON(atomic_long_read(&new->usage) < 1);
put_cred(new);
}
EXPORT_SYMBOL(abort_creds);
@@ -562,12 +487,8 @@ const struct cred *override_creds(const struct cred *new)
{
const struct cred *old = current->cred;
- kdebug("override_creds(%p{%d,%d})", new,
- atomic_read(&new->usage),
- read_cred_subscribers(new));
-
- validate_creds(old);
- validate_creds(new);
+ kdebug("override_creds(%p{%ld})", new,
+ atomic_long_read(&new->usage));
/*
* NOTE! This uses 'get_new_cred()' rather than 'get_cred()'.
@@ -576,18 +497,12 @@ const struct cred *override_creds(const struct cred *new)
* we are only installing the cred into the thread-synchronous
* '->cred' pointer, not the '->real_cred' pointer that is
* visible to other threads under RCU.
- *
- * Also note that we did validate_creds() manually, not depending
- * on the validation in 'get_cred()'.
*/
get_new_cred((struct cred *)new);
- alter_cred_subscribers(new, 1);
rcu_assign_pointer(current->cred, new);
- alter_cred_subscribers(old, -1);
- kdebug("override_creds() = %p{%d,%d}", old,
- atomic_read(&old->usage),
- read_cred_subscribers(old));
+ kdebug("override_creds() = %p{%ld}", old,
+ atomic_long_read(&old->usage));
return old;
}
EXPORT_SYMBOL(override_creds);
@@ -603,15 +518,10 @@ void revert_creds(const struct cred *old)
{
const struct cred *override = current->cred;
- kdebug("revert_creds(%p{%d,%d})", old,
- atomic_read(&old->usage),
- read_cred_subscribers(old));
+ kdebug("revert_creds(%p{%ld})", old,
+ atomic_long_read(&old->usage));
- validate_creds(old);
- validate_creds(override);
- alter_cred_subscribers(old, 1);
rcu_assign_pointer(current->cred, old);
- alter_cred_subscribers(override, -1);
put_cred(override);
}
EXPORT_SYMBOL(revert_creds);
@@ -731,12 +641,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
kdebug("prepare_kernel_cred() alloc %p", new);
old = get_task_cred(daemon);
- validate_creds(old);
*new = *old;
new->non_rcu = 0;
- atomic_set(&new->usage, 1);
- set_cred_subscribers(new, 0);
+ atomic_long_set(&new->usage, 1);
get_uid(new->user);
get_user_ns(new->user_ns);
get_group_info(new->group_info);
@@ -760,7 +668,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
goto error;
put_cred(old);
- validate_creds(new);
return new;
error:
@@ -825,109 +732,3 @@ int set_create_files_as(struct cred *new, struct inode *inode)
return security_kernel_create_files_as(new, inode);
}
EXPORT_SYMBOL(set_create_files_as);
-
-#ifdef CONFIG_DEBUG_CREDENTIALS
-
-bool creds_are_invalid(const struct cred *cred)
-{
- if (cred->magic != CRED_MAGIC)
- return true;
- return false;
-}
-EXPORT_SYMBOL(creds_are_invalid);
-
-/*
- * dump invalid credentials
- */
-static void dump_invalid_creds(const struct cred *cred, const char *label,
- const struct task_struct *tsk)
-{
- pr_err("%s credentials: %p %s%s%s\n",
- label, cred,
- cred == &init_cred ? "[init]" : "",
- cred == tsk->real_cred ? "[real]" : "",
- cred == tsk->cred ? "[eff]" : "");
- pr_err("->magic=%x, put_addr=%p\n",
- cred->magic, cred->put_addr);
- pr_err("->usage=%d, subscr=%d\n",
- atomic_read(&cred->usage),
- read_cred_subscribers(cred));
- pr_err("->*uid = { %d,%d,%d,%d }\n",
- from_kuid_munged(&init_user_ns, cred->uid),
- from_kuid_munged(&init_user_ns, cred->euid),
- from_kuid_munged(&init_user_ns, cred->suid),
- from_kuid_munged(&init_user_ns, cred->fsuid));
- pr_err("->*gid = { %d,%d,%d,%d }\n",
- from_kgid_munged(&init_user_ns, cred->gid),
- from_kgid_munged(&init_user_ns, cred->egid),
- from_kgid_munged(&init_user_ns, cred->sgid),
- from_kgid_munged(&init_user_ns, cred->fsgid));
-#ifdef CONFIG_SECURITY
- pr_err("->security is %p\n", cred->security);
- if ((unsigned long) cred->security >= PAGE_SIZE &&
- (((unsigned long) cred->security & 0xffffff00) !=
- (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)))
- pr_err("->security {%x, %x}\n",
- ((u32*)cred->security)[0],
- ((u32*)cred->security)[1]);
-#endif
-}
-
-/*
- * report use of invalid credentials
- */
-void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line)
-{
- pr_err("Invalid credentials\n");
- pr_err("At %s:%u\n", file, line);
- dump_invalid_creds(cred, "Specified", current);
- BUG();
-}
-EXPORT_SYMBOL(__invalid_creds);
-
-/*
- * check the credentials on a process
- */
-void __validate_process_creds(struct task_struct *tsk,
- const char *file, unsigned line)
-{
- if (tsk->cred == tsk->real_cred) {
- if (unlikely(read_cred_subscribers(tsk->cred) < 2 ||
- creds_are_invalid(tsk->cred)))
- goto invalid_creds;
- } else {
- if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 ||
- read_cred_subscribers(tsk->cred) < 1 ||
- creds_are_invalid(tsk->real_cred) ||
- creds_are_invalid(tsk->cred)))
- goto invalid_creds;
- }
- return;
-
-invalid_creds:
- pr_err("Invalid process credentials\n");
- pr_err("At %s:%u\n", file, line);
-
- dump_invalid_creds(tsk->real_cred, "Real", tsk);
- if (tsk->cred != tsk->real_cred)
- dump_invalid_creds(tsk->cred, "Effective", tsk);
- else
- pr_err("Effective creds == Real creds\n");
- BUG();
-}
-EXPORT_SYMBOL(__validate_process_creds);
-
-/*
- * check creds for do_exit()
- */
-void validate_creds_for_do_exit(struct task_struct *tsk)
-{
- kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})",
- tsk->real_cred, tsk->cred,
- atomic_read(&tsk->cred->usage),
- read_cred_subscribers(tsk->cred));
-
- __validate_process_creds(tsk, __FILE__, __LINE__);
-}
-
-#endif /* CONFIG_DEBUG_CREDENTIALS */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b704d83a28b2..9efd0d7775e7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1814,31 +1814,34 @@ static inline void perf_event__state_init(struct perf_event *event)
PERF_EVENT_STATE_INACTIVE;
}
-static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
+static int __perf_event_read_size(u64 read_format, int nr_siblings)
{
int entry = sizeof(u64); /* value */
int size = 0;
int nr = 1;
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
size += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
size += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_ID)
+ if (read_format & PERF_FORMAT_ID)
entry += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_LOST)
+ if (read_format & PERF_FORMAT_LOST)
entry += sizeof(u64);
- if (event->attr.read_format & PERF_FORMAT_GROUP) {
+ if (read_format & PERF_FORMAT_GROUP) {
nr += nr_siblings;
size += sizeof(u64);
}
- size += entry * nr;
- event->read_size = size;
+ /*
+ * Since perf_event_validate_size() limits this to 16k and inhibits
+ * adding more siblings, this will never overflow.
+ */
+ return size + nr * entry;
}
static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
@@ -1888,8 +1891,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
*/
static void perf_event__header_size(struct perf_event *event)
{
- __perf_event_read_size(event,
- event->group_leader->nr_siblings);
+ event->read_size =
+ __perf_event_read_size(event->attr.read_format,
+ event->group_leader->nr_siblings);
__perf_event_header_size(event, event->attr.sample_type);
}
@@ -1920,23 +1924,44 @@ static void perf_event__id_header_size(struct perf_event *event)
event->id_header_size = size;
}
+/*
+ * Check that adding an event to the group does not result in anybody
+ * overflowing the 64k event limit imposed by the output buffer.
+ *
+ * Specifically, check that the read_size for the event does not exceed 16k,
+ * read_size being the one term that grows with groups size. Since read_size
+ * depends on per-event read_format, also (re)check the existing events.
+ *
+ * This leaves 48k for the constant size fields and things like callchains,
+ * branch stacks and register sets.
+ */
static bool perf_event_validate_size(struct perf_event *event)
{
- /*
- * The values computed here will be over-written when we actually
- * attach the event.
- */
- __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
- __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
- perf_event__id_header_size(event);
+ struct perf_event *sibling, *group_leader = event->group_leader;
+
+ if (__perf_event_read_size(event->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
+
+ if (__perf_event_read_size(group_leader->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
/*
- * Sum the lot; should not exceed the 64k limit we have on records.
- * Conservative limit to allow for callchains and other variable fields.
+ * When creating a new group leader, group_leader->ctx is initialized
+ * after the size has been validated, but we cannot safely use
+ * for_each_sibling_event() until group_leader->ctx is set. A new group
+ * leader cannot have any siblings yet, so we can safely skip checking
+ * the non-existent siblings.
*/
- if (event->read_size + event->header_size +
- event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
- return false;
+ if (event == group_leader)
+ return true;
+
+ for_each_sibling_event(sibling, group_leader) {
+ if (__perf_event_read_size(sibling->attr.read_format,
+ group_leader->nr_siblings + 1) > 16*1024)
+ return false;
+ }
return true;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index ee9f43bed49a..aedc0832c9f4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -824,8 +824,6 @@ void __noreturn do_exit(long code)
ptrace_event(PTRACE_EVENT_EXIT, code);
user_events_exit(tsk);
- validate_creds_for_do_exit(tsk);
-
io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */
@@ -909,7 +907,6 @@ void __noreturn do_exit(long code)
if (tsk->task_frag.page)
put_page(tsk->task_frag.page);
- validate_creds_for_do_exit(tsk);
exit_task_stack_account(tsk);
check_stack_usage();
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c450fa8b8b5e..759006a9a910 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -201,7 +201,7 @@ void __thaw_task(struct task_struct *p)
if (WARN_ON_ONCE(freezing(p)))
goto unlock;
- if (task_call_func(p, __restore_freezer_state, NULL))
+ if (!frozen(p) || task_call_func(p, __restore_freezer_state, NULL))
goto unlock;
wake_up_state(p, TASK_FROZEN);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 075a632e6c7c..d5a0ee40bf66 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2252,7 +2252,7 @@ int register_kretprobe(struct kretprobe *rp)
rp->rph = NULL;
return -ENOMEM;
}
- rp->rph->rp = rp;
+ rcu_assign_pointer(rp->rph->rp, rp);
rp->nmissed = 0;
/* Establish function entry probe point */
ret = register_kprobe(&rp->kp);
@@ -2300,7 +2300,7 @@ void unregister_kretprobes(struct kretprobe **rps, int num)
#ifdef CONFIG_KRETPROBE_ON_RETHOOK
rethook_free(rps[i]->rh);
#else
- rps[i]->rph->rp = NULL;
+ rcu_assign_pointer(rps[i]->rph->rp, NULL);
#endif
}
mutex_unlock(&kprobe_mutex);
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index d5610ad52b92..75a6f6133866 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -11,6 +11,13 @@
* called from interrupt context and we have preemption disabled while
* spinning.
*/
+
+struct optimistic_spin_node {
+ struct optimistic_spin_node *next, *prev;
+ int locked; /* 1 if lock acquired */
+ int cpu; /* encoded CPU # + 1 value */
+};
+
static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node);
/*
@@ -37,32 +44,28 @@ static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val)
/*
* Get a stable @node->next pointer, either for unlock() or unqueue() purposes.
* Can return NULL in case we were the last queued and we updated @lock instead.
+ *
+ * If osq_lock() is being cancelled there must be a previous node
+ * and 'old_cpu' is its CPU #.
+ * For osq_unlock() there is never a previous node and old_cpu is
+ * set to OSQ_UNLOCKED_VAL.
*/
static inline struct optimistic_spin_node *
osq_wait_next(struct optimistic_spin_queue *lock,
struct optimistic_spin_node *node,
- struct optimistic_spin_node *prev)
+ int old_cpu)
{
- struct optimistic_spin_node *next = NULL;
int curr = encode_cpu(smp_processor_id());
- int old;
-
- /*
- * If there is a prev node in queue, then the 'old' value will be
- * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if
- * we're currently last in queue, then the queue will then become empty.
- */
- old = prev ? prev->cpu : OSQ_UNLOCKED_VAL;
for (;;) {
if (atomic_read(&lock->tail) == curr &&
- atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
+ atomic_cmpxchg_acquire(&lock->tail, curr, old_cpu) == curr) {
/*
* We were the last queued, we moved @lock back. @prev
* will now observe @lock and will complete its
* unlock()/unqueue().
*/
- break;
+ return NULL;
}
/*
@@ -76,15 +79,15 @@ osq_wait_next(struct optimistic_spin_queue *lock,
* wait for a new @node->next from its Step-C.
*/
if (node->next) {
+ struct optimistic_spin_node *next;
+
next = xchg(&node->next, NULL);
if (next)
- break;
+ return next;
}
cpu_relax();
}
-
- return next;
}
bool osq_lock(struct optimistic_spin_queue *lock)
@@ -186,7 +189,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
* back to @prev.
*/
- next = osq_wait_next(lock, node, prev);
+ next = osq_wait_next(lock, node, prev->cpu);
if (!next)
return false;
@@ -226,7 +229,7 @@ void osq_unlock(struct optimistic_spin_queue *lock)
return;
}
- next = osq_wait_next(lock, node, NULL);
+ next = osq_wait_next(lock, node, OSQ_UNLOCKED_VAL);
if (next)
WRITE_ONCE(next->locked, 1);
}
diff --git a/kernel/resource.c b/kernel/resource.c
index 866ef3663a0b..91be1bc50b60 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1844,8 +1844,8 @@ get_free_mem_region(struct device *dev, struct resource *base,
write_lock(&resource_lock);
for (addr = gfr_start(base, size, align, flags);
- gfr_continue(base, addr, size, flags);
- addr = gfr_next(addr, size, flags)) {
+ gfr_continue(base, addr, align, flags);
+ addr = gfr_next(addr, align, flags)) {
if (__region_intersects(base, addr, size, 0, IORES_DESC_NONE) !=
REGION_DISJOINT)
continue;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index e1a6e3c675c0..9a846439b36a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -201,6 +201,20 @@ COND_SYSCALL(recvmmsg_time32);
COND_SYSCALL_COMPAT(recvmmsg_time32);
COND_SYSCALL_COMPAT(recvmmsg_time64);
+/* Posix timer syscalls may be configured out */
+COND_SYSCALL(timer_create);
+COND_SYSCALL(timer_gettime);
+COND_SYSCALL(timer_getoverrun);
+COND_SYSCALL(timer_settime);
+COND_SYSCALL(timer_delete);
+COND_SYSCALL(clock_adjtime);
+COND_SYSCALL(getitimer);
+COND_SYSCALL(setitimer);
+COND_SYSCALL(alarm);
+COND_SYSCALL_COMPAT(timer_create);
+COND_SYSCALL_COMPAT(getitimer);
+COND_SYSCALL_COMPAT(setitimer);
+
/*
* Architecture specific syscalls: see further below
*/
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 828aeecbd1e8..9b6fcb8d85e7 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -17,40 +17,6 @@
#include <linux/time_namespace.h>
#include <linux/compat.h>
-#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
-/* Architectures may override SYS_NI and COMPAT_SYS_NI */
-#include <asm/syscall_wrapper.h>
-#endif
-
-asmlinkage long sys_ni_posix_timers(void)
-{
- pr_err_once("process %d (%s) attempted a POSIX timer syscall "
- "while CONFIG_POSIX_TIMERS is not set\n",
- current->pid, current->comm);
- return -ENOSYS;
-}
-
-#ifndef SYS_NI
-#define SYS_NI(name) SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers)
-#endif
-
-#ifndef COMPAT_SYS_NI
-#define COMPAT_SYS_NI(name) SYSCALL_ALIAS(compat_sys_##name, sys_ni_posix_timers)
-#endif
-
-SYS_NI(timer_create);
-SYS_NI(timer_gettime);
-SYS_NI(timer_getoverrun);
-SYS_NI(timer_settime);
-SYS_NI(timer_delete);
-SYS_NI(clock_adjtime);
-SYS_NI(getitimer);
-SYS_NI(setitimer);
-SYS_NI(clock_adjtime32);
-#ifdef __ARCH_WANT_SYS_ALARM
-SYS_NI(alarm);
-#endif
-
/*
* We preserve minimal support for CLOCK_REALTIME and CLOCK_MONOTONIC
* as it is easy to remain compatible with little code. CLOCK_BOOTTIME
@@ -158,18 +124,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
which_clock);
}
-#ifdef CONFIG_COMPAT
-COMPAT_SYS_NI(timer_create);
-#endif
-
-#if defined(CONFIG_COMPAT) || defined(CONFIG_ALPHA)
-COMPAT_SYS_NI(getitimer);
-COMPAT_SYS_NI(setitimer);
-#endif
-
#ifdef CONFIG_COMPAT_32BIT_TIME
-SYS_NI(timer_settime32);
-SYS_NI(timer_gettime32);
SYSCALL_DEFINE2(clock_settime32, const clockid_t, which_clock,
struct old_timespec32 __user *, tp)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8de8bec5f366..b01ae7d36021 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1183,18 +1183,19 @@ static void __add_hash_entry(struct ftrace_hash *hash,
hash->count++;
}
-static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
+static struct ftrace_func_entry *
+add_hash_entry(struct ftrace_hash *hash, unsigned long ip)
{
struct ftrace_func_entry *entry;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
- return -ENOMEM;
+ return NULL;
entry->ip = ip;
__add_hash_entry(hash, entry);
- return 0;
+ return entry;
}
static void
@@ -1349,7 +1350,6 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
struct ftrace_func_entry *entry;
struct ftrace_hash *new_hash;
int size;
- int ret;
int i;
new_hash = alloc_ftrace_hash(size_bits);
@@ -1366,8 +1366,7 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
- ret = add_hash_entry(new_hash, entry->ip);
- if (ret < 0)
+ if (add_hash_entry(new_hash, entry->ip) == NULL)
goto free_hash;
}
}
@@ -2536,7 +2535,7 @@ ftrace_find_unique_ops(struct dyn_ftrace *rec)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
/* Protected by rcu_tasks for reading, and direct_mutex for writing */
-static struct ftrace_hash *direct_functions = EMPTY_HASH;
+static struct ftrace_hash __rcu *direct_functions = EMPTY_HASH;
static DEFINE_MUTEX(direct_mutex);
int ftrace_direct_func_count;
@@ -2555,39 +2554,6 @@ unsigned long ftrace_find_rec_direct(unsigned long ip)
return entry->direct;
}
-static struct ftrace_func_entry*
-ftrace_add_rec_direct(unsigned long ip, unsigned long addr,
- struct ftrace_hash **free_hash)
-{
- struct ftrace_func_entry *entry;
-
- if (ftrace_hash_empty(direct_functions) ||
- direct_functions->count > 2 * (1 << direct_functions->size_bits)) {
- struct ftrace_hash *new_hash;
- int size = ftrace_hash_empty(direct_functions) ? 0 :
- direct_functions->count + 1;
-
- if (size < 32)
- size = 32;
-
- new_hash = dup_hash(direct_functions, size);
- if (!new_hash)
- return NULL;
-
- *free_hash = direct_functions;
- direct_functions = new_hash;
- }
-
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry)
- return NULL;
-
- entry->ip = ip;
- entry->direct = addr;
- __add_hash_entry(direct_functions, entry);
- return entry;
-}
-
static void call_direct_funcs(unsigned long ip, unsigned long pip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
@@ -4223,8 +4189,8 @@ enter_record(struct ftrace_hash *hash, struct dyn_ftrace *rec, int clear_filter)
/* Do nothing if it exists */
if (entry)
return 0;
-
- ret = add_hash_entry(hash, rec->ip);
+ if (add_hash_entry(hash, rec->ip) == NULL)
+ ret = -ENOMEM;
}
return ret;
}
@@ -5266,7 +5232,8 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return 0;
}
- return add_hash_entry(hash, ip);
+ entry = add_hash_entry(hash, ip);
+ return entry ? 0 : -ENOMEM;
}
static int
@@ -5410,7 +5377,7 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long
*/
int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
{
- struct ftrace_hash *hash, *free_hash = NULL;
+ struct ftrace_hash *hash, *new_hash = NULL, *free_hash = NULL;
struct ftrace_func_entry *entry, *new;
int err = -EBUSY, size, i;
@@ -5436,17 +5403,44 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
}
}
- /* ... and insert them to direct_functions hash. */
err = -ENOMEM;
+
+ /* Make a copy hash to place the new and the old entries in */
+ size = hash->count + direct_functions->count;
+ if (size > 32)
+ size = 32;
+ new_hash = alloc_ftrace_hash(fls(size));
+ if (!new_hash)
+ goto out_unlock;
+
+ /* Now copy over the existing direct entries */
+ size = 1 << direct_functions->size_bits;
+ for (i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &direct_functions->buckets[i], hlist) {
+ new = add_hash_entry(new_hash, entry->ip);
+ if (!new)
+ goto out_unlock;
+ new->direct = entry->direct;
+ }
+ }
+
+ /* ... and add the new entries */
+ size = 1 << hash->size_bits;
for (i = 0; i < size; i++) {
hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
- new = ftrace_add_rec_direct(entry->ip, addr, &free_hash);
+ new = add_hash_entry(new_hash, entry->ip);
if (!new)
- goto out_remove;
+ goto out_unlock;
+ /* Update both the copy and the hash entry */
+ new->direct = addr;
entry->direct = addr;
}
}
+ free_hash = direct_functions;
+ rcu_assign_pointer(direct_functions, new_hash);
+ new_hash = NULL;
+
ops->func = call_direct_funcs;
ops->flags = MULTI_FLAGS;
ops->trampoline = FTRACE_REGS_ADDR;
@@ -5454,17 +5448,17 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
err = register_ftrace_function_nolock(ops);
- out_remove:
- if (err)
- remove_direct_functions_hash(hash, addr);
-
out_unlock:
mutex_unlock(&direct_mutex);
- if (free_hash) {
+ if (free_hash && free_hash != EMPTY_HASH) {
synchronize_rcu_tasks();
free_ftrace_hash(free_hash);
}
+
+ if (new_hash)
+ free_ftrace_hash(new_hash);
+
return err;
}
EXPORT_SYMBOL_GPL(register_ftrace_direct);
@@ -6309,7 +6303,7 @@ ftrace_graph_set_hash(struct ftrace_hash *hash, char *buffer)
if (entry)
continue;
- if (add_hash_entry(hash, rec->ip) < 0)
+ if (add_hash_entry(hash, rec->ip) == NULL)
goto out;
} else {
if (entry) {
diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c
index 6fd7d4ecbbc6..fa03094e9e69 100644
--- a/kernel/trace/rethook.c
+++ b/kernel/trace/rethook.c
@@ -48,7 +48,7 @@ static void rethook_free_rcu(struct rcu_head *head)
*/
void rethook_stop(struct rethook *rh)
{
- WRITE_ONCE(rh->handler, NULL);
+ rcu_assign_pointer(rh->handler, NULL);
}
/**
@@ -63,7 +63,7 @@ void rethook_stop(struct rethook *rh)
*/
void rethook_free(struct rethook *rh)
{
- WRITE_ONCE(rh->handler, NULL);
+ rethook_stop(rh);
call_rcu(&rh->rcu, rethook_free_rcu);
}
@@ -82,6 +82,12 @@ static int rethook_fini_pool(struct objpool_head *head, void *context)
return 0;
}
+static inline rethook_handler_t rethook_get_handler(struct rethook *rh)
+{
+ return (rethook_handler_t)rcu_dereference_check(rh->handler,
+ rcu_read_lock_any_held());
+}
+
/**
* rethook_alloc() - Allocate struct rethook.
* @data: a data to pass the @handler when hooking the return.
@@ -107,7 +113,7 @@ struct rethook *rethook_alloc(void *data, rethook_handler_t handler,
return ERR_PTR(-ENOMEM);
rh->data = data;
- rh->handler = handler;
+ rcu_assign_pointer(rh->handler, handler);
/* initialize the objpool for rethook nodes */
if (objpool_init(&rh->pool, num, size, GFP_KERNEL, rh,
@@ -135,9 +141,10 @@ static void free_rethook_node_rcu(struct rcu_head *head)
*/
void rethook_recycle(struct rethook_node *node)
{
- lockdep_assert_preemption_disabled();
+ rethook_handler_t handler;
- if (likely(READ_ONCE(node->rethook->handler)))
+ handler = rethook_get_handler(node->rethook);
+ if (likely(handler))
objpool_push(node, &node->rethook->pool);
else
call_rcu(&node->rcu, free_rethook_node_rcu);
@@ -153,9 +160,7 @@ NOKPROBE_SYMBOL(rethook_recycle);
*/
struct rethook_node *rethook_try_get(struct rethook *rh)
{
- rethook_handler_t handler = READ_ONCE(rh->handler);
-
- lockdep_assert_preemption_disabled();
+ rethook_handler_t handler = rethook_get_handler(rh);
/* Check whether @rh is going to be freed. */
if (unlikely(!handler))
@@ -300,7 +305,7 @@ unsigned long rethook_trampoline_handler(struct pt_regs *regs,
rhn = container_of(first, struct rethook_node, llist);
if (WARN_ON_ONCE(rhn->frame != frame))
break;
- handler = READ_ONCE(rhn->rethook->handler);
+ handler = rethook_get_handler(rhn->rethook);
if (handler)
handler(rhn, rhn->rethook->data,
correct_ret_addr, regs);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 43cc47d7faaf..9286f88fcd32 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -644,8 +644,8 @@ static inline bool __rb_time_read(rb_time_t *t, u64 *ret, unsigned long *cnt)
*cnt = rb_time_cnt(top);
- /* If top and bottom counts don't match, this interrupted a write */
- if (*cnt != rb_time_cnt(bottom))
+ /* If top, msb or bottom counts don't match, this interrupted a write */
+ if (*cnt != rb_time_cnt(msb) || *cnt != rb_time_cnt(bottom))
return false;
/* The shift to msb will lose its cnt bits */
@@ -700,44 +700,6 @@ rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set)
return local_try_cmpxchg(l, &expect, set);
}
-static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
-{
- unsigned long cnt, top, bottom, msb;
- unsigned long cnt2, top2, bottom2, msb2;
- u64 val;
-
- /* The cmpxchg always fails if it interrupted an update */
- if (!__rb_time_read(t, &val, &cnt2))
- return false;
-
- if (val != expect)
- return false;
-
- cnt = local_read(&t->cnt);
- if ((cnt & 3) != cnt2)
- return false;
-
- cnt2 = cnt + 1;
-
- rb_time_split(val, &top, &bottom, &msb);
- top = rb_time_val_cnt(top, cnt);
- bottom = rb_time_val_cnt(bottom, cnt);
-
- rb_time_split(set, &top2, &bottom2, &msb2);
- top2 = rb_time_val_cnt(top2, cnt2);
- bottom2 = rb_time_val_cnt(bottom2, cnt2);
-
- if (!rb_time_read_cmpxchg(&t->cnt, cnt, cnt2))
- return false;
- if (!rb_time_read_cmpxchg(&t->msb, msb, msb2))
- return false;
- if (!rb_time_read_cmpxchg(&t->top, top, top2))
- return false;
- if (!rb_time_read_cmpxchg(&t->bottom, bottom, bottom2))
- return false;
- return true;
-}
-
#else /* 64 bits */
/* local64_t always succeeds */
@@ -751,11 +713,6 @@ static void rb_time_set(rb_time_t *t, u64 val)
{
local64_set(&t->time, val);
}
-
-static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set)
-{
- return local64_try_cmpxchg(&t->time, &expect, set);
-}
#endif
/*
@@ -924,9 +881,14 @@ static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int f
if (!nr_pages || !full)
return true;
- dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
+ /*
+ * Add one as dirty will never equal nr_pages, as the sub-buffer
+ * that the writer is on is not counted as dirty.
+ * This is needed if "buffer_percent" is set to 100.
+ */
+ dirty = ring_buffer_nr_dirty_pages(buffer, cpu) + 1;
- return (dirty * 100) > (full * nr_pages);
+ return (dirty * 100) >= (full * nr_pages);
}
/*
@@ -987,7 +949,8 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
/* make sure the waiters see the new index */
smp_wmb();
- rb_wake_up_waiters(&rbwork->work);
+ /* This can be called in any context */
+ irq_work_queue(&rbwork->work);
}
/**
@@ -1787,6 +1750,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
free_buffer_page(bpage);
}
+ free_page((unsigned long)cpu_buffer->free_page);
+
kfree(cpu_buffer);
}
@@ -2407,7 +2372,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
*/
barrier();
- if ((iter->head + length) > commit || length > BUF_MAX_DATA_SIZE)
+ if ((iter->head + length) > commit || length > BUF_PAGE_SIZE)
/* Writer corrupted the read? */
goto reset;
@@ -2981,25 +2946,6 @@ static unsigned rb_calculate_event_length(unsigned length)
return length;
}
-static u64 rb_time_delta(struct ring_buffer_event *event)
-{
- switch (event->type_len) {
- case RINGBUF_TYPE_PADDING:
- return 0;
-
- case RINGBUF_TYPE_TIME_EXTEND:
- return rb_event_time_stamp(event);
-
- case RINGBUF_TYPE_TIME_STAMP:
- return 0;
-
- case RINGBUF_TYPE_DATA:
- return event->time_delta;
- default:
- return 0;
- }
-}
-
static inline bool
rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event)
@@ -3007,8 +2953,6 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
unsigned long new_index, old_index;
struct buffer_page *bpage;
unsigned long addr;
- u64 write_stamp;
- u64 delta;
new_index = rb_event_index(event);
old_index = new_index + rb_event_ts_length(event);
@@ -3017,41 +2961,34 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
bpage = READ_ONCE(cpu_buffer->tail_page);
- delta = rb_time_delta(event);
-
- if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp))
- return false;
-
- /* Make sure the write stamp is read before testing the location */
- barrier();
-
+ /*
+ * Make sure the tail_page is still the same and
+ * the next write location is the end of this event
+ */
if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
unsigned long write_mask =
local_read(&bpage->write) & ~RB_WRITE_MASK;
unsigned long event_length = rb_event_length(event);
- /* Something came in, can't discard */
- if (!rb_time_cmpxchg(&cpu_buffer->write_stamp,
- write_stamp, write_stamp - delta))
- return false;
-
/*
- * It's possible that the event time delta is zero
- * (has the same time stamp as the previous event)
- * in which case write_stamp and before_stamp could
- * be the same. In such a case, force before_stamp
- * to be different than write_stamp. It doesn't
- * matter what it is, as long as its different.
+ * For the before_stamp to be different than the write_stamp
+ * to make sure that the next event adds an absolute
+ * value and does not rely on the saved write stamp, which
+ * is now going to be bogus.
+ *
+ * By setting the before_stamp to zero, the next event
+ * is not going to use the write_stamp and will instead
+ * create an absolute timestamp. This means there's no
+ * reason to update the wirte_stamp!
*/
- if (!delta)
- rb_time_set(&cpu_buffer->before_stamp, 0);
+ rb_time_set(&cpu_buffer->before_stamp, 0);
/*
* If an event were to come in now, it would see that the
* write_stamp and the before_stamp are different, and assume
* that this event just added itself before updating
* the write stamp. The interrupting event will fix the
- * write stamp for us, and use the before stamp as its delta.
+ * write stamp for us, and use an absolute timestamp.
*/
/*
@@ -3488,7 +3425,7 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
return;
/*
- * If this interrupted another event,
+ * If this interrupted another event,
*/
if (atomic_inc_return(this_cpu_ptr(&checking)) != 1)
goto out;
@@ -3582,7 +3519,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
* absolute timestamp.
* Don't bother if this is the start of a new page (w == 0).
*/
- if (unlikely(!a_ok || !b_ok || (info->before != info->after && w))) {
+ if (!w) {
+ /* Use the sub-buffer timestamp */
+ info->delta = 0;
+ } else if (unlikely(!a_ok || !b_ok || info->before != info->after)) {
info->add_timestamp |= RB_ADD_STAMP_FORCE | RB_ADD_STAMP_EXTEND;
info->length += RB_LEN_TIME_EXTEND;
} else {
@@ -3605,26 +3545,19 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* See if we shot pass the end of this buffer page */
if (unlikely(write > BUF_PAGE_SIZE)) {
- /* before and after may now different, fix it up*/
- b_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
- a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
- if (a_ok && b_ok && info->before != info->after)
- (void)rb_time_cmpxchg(&cpu_buffer->before_stamp,
- info->before, info->after);
- if (a_ok && b_ok)
- check_buffer(cpu_buffer, info, CHECK_FULL_PAGE);
+ check_buffer(cpu_buffer, info, CHECK_FULL_PAGE);
return rb_move_tail(cpu_buffer, tail, info);
}
if (likely(tail == w)) {
- u64 save_before;
- bool s_ok;
-
/* Nothing interrupted us between A and C */
/*D*/ rb_time_set(&cpu_buffer->write_stamp, info->ts);
- barrier();
- /*E*/ s_ok = rb_time_read(&cpu_buffer->before_stamp, &save_before);
- RB_WARN_ON(cpu_buffer, !s_ok);
+ /*
+ * If something came in between C and D, the write stamp
+ * may now not be in sync. But that's fine as the before_stamp
+ * will be different and then next event will just be forced
+ * to use an absolute timestamp.
+ */
if (likely(!(info->add_timestamp &
(RB_ADD_STAMP_FORCE | RB_ADD_STAMP_ABSOLUTE))))
/* This did not interrupt any time update */
@@ -3632,41 +3565,40 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
else
/* Just use full timestamp for interrupting event */
info->delta = info->ts;
- barrier();
check_buffer(cpu_buffer, info, tail);
- if (unlikely(info->ts != save_before)) {
- /* SLOW PATH - Interrupted between C and E */
-
- a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
- RB_WARN_ON(cpu_buffer, !a_ok);
-
- /* Write stamp must only go forward */
- if (save_before > info->after) {
- /*
- * We do not care about the result, only that
- * it gets updated atomically.
- */
- (void)rb_time_cmpxchg(&cpu_buffer->write_stamp,
- info->after, save_before);
- }
- }
} else {
u64 ts;
/* SLOW PATH - Interrupted between A and C */
- a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
- /* Was interrupted before here, write_stamp must be valid */
+
+ /* Save the old before_stamp */
+ a_ok = rb_time_read(&cpu_buffer->before_stamp, &info->before);
RB_WARN_ON(cpu_buffer, !a_ok);
+
+ /*
+ * Read a new timestamp and update the before_stamp to make
+ * the next event after this one force using an absolute
+ * timestamp. This is in case an interrupt were to come in
+ * between E and F.
+ */
ts = rb_time_stamp(cpu_buffer->buffer);
+ rb_time_set(&cpu_buffer->before_stamp, ts);
+
barrier();
- /*E*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
- info->after < ts &&
- rb_time_cmpxchg(&cpu_buffer->write_stamp,
- info->after, ts)) {
- /* Nothing came after this event between C and E */
+ /*E*/ a_ok = rb_time_read(&cpu_buffer->write_stamp, &info->after);
+ /* Was interrupted before here, write_stamp must be valid */
+ RB_WARN_ON(cpu_buffer, !a_ok);
+ barrier();
+ /*F*/ if (write == (local_read(&tail_page->write) & RB_WRITE_MASK) &&
+ info->after == info->before && info->after < ts) {
+ /*
+ * Nothing came after this event between C and F, it is
+ * safe to use info->after for the delta as it
+ * matched info->before and is still valid.
+ */
info->delta = ts - info->after;
} else {
/*
- * Interrupted between C and E:
+ * Interrupted between C and F:
* Lost the previous events time stamp. Just set the
* delta to zero, and this will be the same time as
* the event this event interrupted. And the events that
@@ -3717,6 +3649,12 @@ rb_reserve_next_event(struct trace_buffer *buffer,
int nr_loops = 0;
int add_ts_default;
+ /* ring buffer does cmpxchg, make sure it is safe in NMI context */
+ if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) &&
+ (unlikely(in_nmi()))) {
+ return NULL;
+ }
+
rb_start_commit(cpu_buffer);
/* The commit page can not change after this */
@@ -3740,6 +3678,8 @@ rb_reserve_next_event(struct trace_buffer *buffer,
if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
add_ts_default = RB_ADD_STAMP_ABSOLUTE;
info.length += RB_LEN_TIME_EXTEND;
+ if (info.length > BUF_MAX_DATA_SIZE)
+ goto out_fail;
} else {
add_ts_default = RB_ADD_STAMP_NONE;
}
@@ -5121,7 +5061,8 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
if (!iter)
return NULL;
- iter->event = kmalloc(BUF_MAX_DATA_SIZE, flags);
+ /* Holds the entire event: data and meta data */
+ iter->event = kmalloc(BUF_PAGE_SIZE, flags);
if (!iter->event) {
kfree(iter);
return NULL;
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index 8dfe85499d4a..354c2117be43 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -477,6 +477,17 @@ static int __init synth_event_gen_test_init(void)
ret = test_trace_synth_event();
WARN_ON(ret);
+
+ /* Disable when done */
+ trace_array_set_clr_event(gen_synth_test->tr,
+ "synthetic",
+ "gen_synth_test", false);
+ trace_array_set_clr_event(empty_synth_test->tr,
+ "synthetic",
+ "empty_synth_test", false);
+ trace_array_set_clr_event(create_synth_test->tr,
+ "synthetic",
+ "create_synth_test", false);
out:
return ret;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9aebf904ff97..a0defe156b57 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1894,6 +1894,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
__update_max_tr(tr, tsk, cpu);
arch_spin_unlock(&tr->max_lock);
+
+ /* Any waiters on the old snapshot buffer need to wake up */
+ ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
}
/**
@@ -1945,12 +1948,23 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
static int wait_on_pipe(struct trace_iterator *iter, int full)
{
+ int ret;
+
/* Iterators are static, they should be filled or empty */
if (trace_buffer_iter(iter, iter->cpu_file))
return 0;
- return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
- full);
+ ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+ /*
+ * Make sure this is still the snapshot buffer, as if a snapshot were
+ * to happen, this would now be the main buffer.
+ */
+ if (iter->snapshot)
+ iter->array_buffer = &iter->tr->max_buffer;
+#endif
+ return ret;
}
#ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -2360,13 +2374,7 @@ int is_tracing_stopped(void)
return global_trace.stop_count;
}
-/**
- * tracing_start - quick start of the tracer
- *
- * If tracing is enabled but was stopped by tracing_stop,
- * this will start the tracer back up.
- */
-void tracing_start(void)
+static void tracing_start_tr(struct trace_array *tr)
{
struct trace_buffer *buffer;
unsigned long flags;
@@ -2374,119 +2382,83 @@ void tracing_start(void)
if (tracing_disabled)
return;
- raw_spin_lock_irqsave(&global_trace.start_lock, flags);
- if (--global_trace.stop_count) {
- if (global_trace.stop_count < 0) {
+ raw_spin_lock_irqsave(&tr->start_lock, flags);
+ if (--tr->stop_count) {
+ if (WARN_ON_ONCE(tr->stop_count < 0)) {
/* Someone screwed up their debugging */
- WARN_ON_ONCE(1);
- global_trace.stop_count = 0;
+ tr->stop_count = 0;
}
goto out;
}
/* Prevent the buffers from switching */
- arch_spin_lock(&global_trace.max_lock);
+ arch_spin_lock(&tr->max_lock);
- buffer = global_trace.array_buffer.buffer;
+ buffer = tr->array_buffer.buffer;
if (buffer)
ring_buffer_record_enable(buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
- buffer = global_trace.max_buffer.buffer;
+ buffer = tr->max_buffer.buffer;
if (buffer)
ring_buffer_record_enable(buffer);
#endif
- arch_spin_unlock(&global_trace.max_lock);
-
- out:
- raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
-}
-
-static void tracing_start_tr(struct trace_array *tr)
-{
- struct trace_buffer *buffer;
- unsigned long flags;
-
- if (tracing_disabled)
- return;
-
- /* If global, we need to also start the max tracer */
- if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
- return tracing_start();
-
- raw_spin_lock_irqsave(&tr->start_lock, flags);
-
- if (--tr->stop_count) {
- if (tr->stop_count < 0) {
- /* Someone screwed up their debugging */
- WARN_ON_ONCE(1);
- tr->stop_count = 0;
- }
- goto out;
- }
-
- buffer = tr->array_buffer.buffer;
- if (buffer)
- ring_buffer_record_enable(buffer);
+ arch_spin_unlock(&tr->max_lock);
out:
raw_spin_unlock_irqrestore(&tr->start_lock, flags);
}
/**
- * tracing_stop - quick stop of the tracer
+ * tracing_start - quick start of the tracer
*
- * Light weight way to stop tracing. Use in conjunction with
- * tracing_start.
+ * If tracing is enabled but was stopped by tracing_stop,
+ * this will start the tracer back up.
*/
-void tracing_stop(void)
+void tracing_start(void)
+
+{
+ return tracing_start_tr(&global_trace);
+}
+
+static void tracing_stop_tr(struct trace_array *tr)
{
struct trace_buffer *buffer;
unsigned long flags;
- raw_spin_lock_irqsave(&global_trace.start_lock, flags);
- if (global_trace.stop_count++)
+ raw_spin_lock_irqsave(&tr->start_lock, flags);
+ if (tr->stop_count++)
goto out;
/* Prevent the buffers from switching */
- arch_spin_lock(&global_trace.max_lock);
+ arch_spin_lock(&tr->max_lock);
- buffer = global_trace.array_buffer.buffer;
+ buffer = tr->array_buffer.buffer;
if (buffer)
ring_buffer_record_disable(buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
- buffer = global_trace.max_buffer.buffer;
+ buffer = tr->max_buffer.buffer;
if (buffer)
ring_buffer_record_disable(buffer);
#endif
- arch_spin_unlock(&global_trace.max_lock);
+ arch_spin_unlock(&tr->max_lock);
out:
- raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
+ raw_spin_unlock_irqrestore(&tr->start_lock, flags);
}
-static void tracing_stop_tr(struct trace_array *tr)
+/**
+ * tracing_stop - quick stop of the tracer
+ *
+ * Light weight way to stop tracing. Use in conjunction with
+ * tracing_start.
+ */
+void tracing_stop(void)
{
- struct trace_buffer *buffer;
- unsigned long flags;
-
- /* If global, we need to also stop the max tracer */
- if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
- return tracing_stop();
-
- raw_spin_lock_irqsave(&tr->start_lock, flags);
- if (tr->stop_count++)
- goto out;
-
- buffer = tr->array_buffer.buffer;
- if (buffer)
- ring_buffer_record_disable(buffer);
-
- out:
- raw_spin_unlock_irqrestore(&tr->start_lock, flags);
+ return tracing_stop_tr(&global_trace);
}
static int trace_save_cmdline(struct task_struct *tsk)
@@ -2770,8 +2742,11 @@ void trace_buffered_event_enable(void)
for_each_tracing_cpu(cpu) {
page = alloc_pages_node(cpu_to_node(cpu),
GFP_KERNEL | __GFP_NORETRY, 0);
- if (!page)
- goto failed;
+ /* This is just an optimization and can handle failures */
+ if (!page) {
+ pr_err("Failed to allocate event buffer\n");
+ break;
+ }
event = page_address(page);
memset(event, 0, sizeof(*event));
@@ -2785,10 +2760,6 @@ void trace_buffered_event_enable(void)
WARN_ON_ONCE(1);
preempt_enable();
}
-
- return;
- failed:
- trace_buffered_event_disable();
}
static void enable_trace_buffered_event(void *data)
@@ -2823,11 +2794,9 @@ void trace_buffered_event_disable(void)
if (--trace_buffered_event_ref)
return;
- preempt_disable();
/* For each CPU, set the buffer as used. */
- smp_call_function_many(tracing_buffer_mask,
- disable_trace_buffered_event, NULL, 1);
- preempt_enable();
+ on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
+ NULL, true);
/* Wait for all current users to finish */
synchronize_rcu();
@@ -2836,17 +2805,19 @@ void trace_buffered_event_disable(void)
free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
per_cpu(trace_buffered_event, cpu) = NULL;
}
+
/*
- * Make sure trace_buffered_event is NULL before clearing
- * trace_buffered_event_cnt.
+ * Wait for all CPUs that potentially started checking if they can use
+ * their event buffer only after the previous synchronize_rcu() call and
+ * they still read a valid pointer from trace_buffered_event. It must be
+ * ensured they don't see cleared trace_buffered_event_cnt else they
+ * could wrongly decide to use the pointed-to buffer which is now freed.
*/
- smp_wmb();
+ synchronize_rcu();
- preempt_disable();
- /* Do the work on each cpu */
- smp_call_function_many(tracing_buffer_mask,
- enable_trace_buffered_event, NULL, 1);
- preempt_enable();
+ /* For each CPU, relinquish the buffer */
+ on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
+ true);
}
static struct trace_buffer *temp_buffer;
@@ -4765,7 +4736,11 @@ static int s_show(struct seq_file *m, void *v)
iter->leftover = ret;
} else {
- print_trace_line(iter);
+ ret = print_trace_line(iter);
+ if (ret == TRACE_TYPE_PARTIAL_LINE) {
+ iter->seq.full = 0;
+ trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
+ }
ret = trace_print_seq(m, &iter->seq);
/*
* If we overflow the seq_file buffer, then it will
@@ -5007,6 +4982,12 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp)
return 0;
}
+int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
+{
+ tracing_release_file_tr(inode, filp);
+ return single_release(inode, filp);
+}
+
static int tracing_mark_open(struct inode *inode, struct file *filp)
{
stream_open(inode, filp);
@@ -6387,13 +6368,15 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
if (!tr->array_buffer.buffer)
return 0;
+ /* Do not allow tracing while resizing ring buffer */
+ tracing_stop_tr(tr);
+
ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
if (ret < 0)
- return ret;
+ goto out_start;
#ifdef CONFIG_TRACER_MAX_TRACE
- if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
- !tr->current_trace->use_max_tr)
+ if (!tr->allocated_snapshot)
goto out;
ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
@@ -6418,7 +6401,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
WARN_ON(1);
tracing_disabled = 1;
}
- return ret;
+ goto out_start;
}
update_buffer_entries(&tr->max_buffer, cpu);
@@ -6427,7 +6410,8 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
#endif /* CONFIG_TRACER_MAX_TRACE */
update_buffer_entries(&tr->array_buffer, cpu);
-
+ out_start:
+ tracing_start_tr(tr);
return ret;
}
@@ -8547,7 +8531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
wait_index = READ_ONCE(iter->wait_index);
- ret = wait_on_pipe(iter, iter->tr->buffer_percent);
+ ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
if (ret)
goto out;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b7f4ea25a194..0489e72c8169 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -617,6 +617,7 @@ int tracing_open_generic(struct inode *inode, struct file *filp);
int tracing_open_generic_tr(struct inode *inode, struct file *filp);
int tracing_open_file_tr(struct inode *inode, struct file *filp);
int tracing_release_file_tr(struct inode *inode, struct file *filp);
+int tracing_single_release_file_tr(struct inode *inode, struct file *filp);
bool tracing_is_disabled(void);
bool tracer_tracing_is_on(struct trace_array *tr);
void tracer_tracing_on(struct trace_array *tr);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1abc07fba1b9..5ecf3c8bde20 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5623,10 +5623,12 @@ static int event_hist_open(struct inode *inode, struct file *file)
{
int ret;
- ret = security_locked_down(LOCKDOWN_TRACEFS);
+ ret = tracing_open_file_tr(inode, file);
if (ret)
return ret;
+ /* Clear private_data to avoid warning in single_open() */
+ file->private_data = NULL;
return single_open(file, hist_show, file);
}
@@ -5634,7 +5636,7 @@ const struct file_operations event_hist_fops = {
.open = event_hist_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = tracing_single_release_file_tr,
};
#ifdef CONFIG_HIST_TRIGGERS_DEBUG
@@ -5900,10 +5902,12 @@ static int event_hist_debug_open(struct inode *inode, struct file *file)
{
int ret;
- ret = security_locked_down(LOCKDOWN_TRACEFS);
+ ret = tracing_open_file_tr(inode, file);
if (ret)
return ret;
+ /* Clear private_data to avoid warning in single_open() */
+ file->private_data = NULL;
return single_open(file, hist_debug_show, file);
}
@@ -5911,7 +5915,7 @@ const struct file_operations event_hist_debug_fops = {
.open = event_hist_debug_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = tracing_single_release_file_tr,
};
#endif
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 846e02c0fb59..e7af286af4f1 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1137,7 +1137,7 @@ EXPORT_SYMBOL_GPL(synth_event_add_fields);
* @cmd: A pointer to the dynevent_cmd struct representing the new event
* @name: The name of the synthetic event
* @mod: The module creating the event, NULL if not created from a module
- * @args: Variable number of arg (pairs), one pair for each field
+ * @...: Variable number of arg (pairs), one pair for each field
*
* NOTE: Users normally won't want to call this function directly, but
* rather use the synth_event_gen_cmd_start() wrapper, which
@@ -1695,7 +1695,7 @@ __synth_event_trace_end(struct synth_event_trace_state *trace_state)
* synth_event_trace - Trace a synthetic event
* @file: The trace_event_file representing the synthetic event
* @n_vals: The number of values in vals
- * @args: Variable number of args containing the event values
+ * @...: Variable number of args containing the event values
*
* Trace a synthetic event using the values passed in the variable
* argument list.
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d8b302d01083..3e7fa44dc2b2 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1587,11 +1587,12 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
{
struct print_entry *field;
struct trace_seq *s = &iter->seq;
+ int max = iter->ent_size - offsetof(struct print_entry, buf);
trace_assign_type(field, iter->ent);
seq_print_ip_sym(s, field->ip, flags);
- trace_seq_printf(s, ": %s", field->buf);
+ trace_seq_printf(s, ": %.*s", max, field->buf);
return trace_handle_return(s);
}
@@ -1600,10 +1601,11 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct print_entry *field;
+ int max = iter->ent_size - offsetof(struct print_entry, buf);
trace_assign_type(field, iter->ent);
- trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf);
+ trace_seq_printf(&iter->seq, "# %lx %.*s", field->ip, max, field->buf);
return trace_handle_return(&iter->seq);
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6e578f576a6f..2989b57e154a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1684,9 +1684,6 @@ static int wq_select_unbound_cpu(int cpu)
pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
}
- if (cpumask_empty(wq_unbound_cpumask))
- return cpu;
-
new_cpu = __this_cpu_read(wq_rr_cpu_last);
new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
if (unlikely(new_cpu >= nr_cpu_ids)) {
@@ -6515,6 +6512,17 @@ static inline void wq_watchdog_init(void) { }
#endif /* CONFIG_WQ_WATCHDOG */
+static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask)
+{
+ if (!cpumask_intersects(wq_unbound_cpumask, mask)) {
+ pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n",
+ cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask));
+ return;
+ }
+
+ cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask);
+}
+
/**
* workqueue_init_early - early init for workqueue subsystem
*
@@ -6534,11 +6542,11 @@ void __init workqueue_init_early(void)
BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
- cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
- cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
-
+ cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
+ restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ));
+ restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN));
if (!cpumask_empty(&wq_cmdline_cpumask))
- cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask);
+ restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask);
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);