summaryrefslogtreecommitdiff
path: root/kernel/events
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c110
-rw-r--r--kernel/events/hw_breakpoint.c4
-rw-r--r--kernel/events/uprobes.c81
3 files changed, 164 insertions, 31 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..4f08b17d6426 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1103,7 +1103,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);
raw_spin_lock_init(&cpuctx->hrtimer_lock);
- hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
timer->function = perf_mux_hrtimer_handler;
}
@@ -1121,7 +1121,7 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
if (!cpuctx->hrtimer_active) {
cpuctx->hrtimer_active = 1;
hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD);
}
raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);
@@ -1887,6 +1887,89 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->generation++;
}
+static int
+perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
+{
+ if (!has_aux(aux_event))
+ return 0;
+
+ if (!event->pmu->aux_output_match)
+ return 0;
+
+ return event->pmu->aux_output_match(aux_event);
+}
+
+static void put_event(struct perf_event *event);
+static void event_sched_out(struct perf_event *event,
+ struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx);
+
+static void perf_put_aux_event(struct perf_event *event)
+{
+ struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+ struct perf_event *iter;
+
+ /*
+ * If event uses aux_event tear down the link
+ */
+ if (event->aux_event) {
+ iter = event->aux_event;
+ event->aux_event = NULL;
+ put_event(iter);
+ return;
+ }
+
+ /*
+ * If the event is an aux_event, tear down all links to
+ * it from other events.
+ */
+ for_each_sibling_event(iter, event->group_leader) {
+ if (iter->aux_event != event)
+ continue;
+
+ iter->aux_event = NULL;
+ put_event(event);
+
+ /*
+ * If it's ACTIVE, schedule it out and put it into ERROR
+ * state so that we don't try to schedule it again. Note
+ * that perf_event_enable() will clear the ERROR status.
+ */
+ event_sched_out(iter, cpuctx, ctx);
+ perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+ }
+}
+
+static int perf_get_aux_event(struct perf_event *event,
+ struct perf_event *group_leader)
+{
+ /*
+ * Our group leader must be an aux event if we want to be
+ * an aux_output. This way, the aux event will precede its
+ * aux_output events in the group, and therefore will always
+ * schedule first.
+ */
+ if (!group_leader)
+ return 0;
+
+ if (!perf_aux_output_match(event, group_leader))
+ return 0;
+
+ if (!atomic_long_inc_not_zero(&group_leader->refcount))
+ return 0;
+
+ /*
+ * Link aux_outputs to their aux event; this is undone in
+ * perf_group_detach() by perf_put_aux_event(). When the
+ * group in torn down, the aux_output events loose their
+ * link to the aux_event and can't schedule any more.
+ */
+ event->aux_event = group_leader;
+
+ return 1;
+}
+
static void perf_group_detach(struct perf_event *event)
{
struct perf_event *sibling, *tmp;
@@ -1902,6 +1985,8 @@ static void perf_group_detach(struct perf_event *event)
event->attach_state &= ~PERF_ATTACH_GROUP;
+ perf_put_aux_event(event);
+
/*
* If this is a sibling, remove it from its group.
*/
@@ -4089,10 +4174,8 @@ alloc_perf_context(struct pmu *pmu, struct task_struct *task)
return NULL;
__perf_event_init_context(ctx);
- if (task) {
- ctx->task = task;
- get_task_struct(task);
- }
+ if (task)
+ ctx->task = get_task_struct(task);
ctx->pmu = pmu;
return ctx;
@@ -9491,7 +9574,7 @@ static void perf_swevent_start_hrtimer(struct perf_event *event)
period = max_t(u64, 10000, hwc->sample_period);
}
hrtimer_start(&hwc->hrtimer, ns_to_ktime(period),
- HRTIMER_MODE_REL_PINNED);
+ HRTIMER_MODE_REL_PINNED_HARD);
}
static void perf_swevent_cancel_hrtimer(struct perf_event *event)
@@ -9513,7 +9596,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
if (!is_sampling_event(event))
return;
- hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
hwc->hrtimer.function = perf_swevent_hrtimer;
/*
@@ -10355,8 +10438,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
* and we cannot use the ctx information because we need the
* pmu before we get a ctx.
*/
- get_task_struct(task);
- event->hw.target = task;
+ event->hw.target = get_task_struct(task);
}
event->clock = &local_clock;
@@ -10426,6 +10508,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
goto err_ns;
}
+ if (event->attr.aux_output &&
+ !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
+ err = -EOPNOTSUPP;
+ goto err_pmu;
+ }
+
err = exclusive_event_init(event);
if (err)
goto err_pmu;
@@ -11082,6 +11170,8 @@ SYSCALL_DEFINE5(perf_event_open,
}
}
+ if (event->attr.aux_output && !perf_get_aux_event(event, group_leader))
+ goto err_locked;
/*
* Must be under the same ctx::mutex as perf_install_in_context(),
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index c5cd852fe86b..3cc8416ec844 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -413,7 +413,7 @@ static int hw_breakpoint_parse(struct perf_event *bp,
int register_perf_hw_breakpoint(struct perf_event *bp)
{
- struct arch_hw_breakpoint hw;
+ struct arch_hw_breakpoint hw = { };
int err;
err = reserve_bp_slot(bp);
@@ -461,7 +461,7 @@ int
modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
bool check)
{
- struct arch_hw_breakpoint hw;
+ struct arch_hw_breakpoint hw = { };
int err;
err = hw_breakpoint_parse(bp, attr, &hw);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 84fa00497c49..94d38a39d72e 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -26,6 +26,7 @@
#include <linux/percpu-rwsem.h>
#include <linux/task_work.h>
#include <linux/shmem_fs.h>
+#include <linux/khugepaged.h>
#include <linux/uprobes.h>
@@ -143,17 +144,19 @@ static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
*
* @vma: vma that holds the pte pointing to page
* @addr: address the old @page is mapped at
- * @page: the cowed page we are replacing by kpage
- * @kpage: the modified page we replace page by
+ * @old_page: the page we are replacing by new_page
+ * @new_page: the modified page we replace page by
*
- * Returns 0 on success, -EFAULT on failure.
+ * If @new_page is NULL, only unmap @old_page.
+ *
+ * Returns 0 on success, negative error code otherwise.
*/
static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
struct page *old_page, struct page *new_page)
{
struct mm_struct *mm = vma->vm_mm;
struct page_vma_mapped_walk pvmw = {
- .page = old_page,
+ .page = compound_head(old_page),
.vma = vma,
.address = addr,
};
@@ -164,12 +167,12 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
addr + PAGE_SIZE);
- VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
- err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
- false);
- if (err)
- return err;
+ if (new_page) {
+ err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+ &memcg, false);
+ if (err)
+ return err;
+ }
/* For try_to_free_swap() and munlock_vma_page() below */
lock_page(old_page);
@@ -177,15 +180,20 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
mmu_notifier_invalidate_range_start(&range);
err = -EAGAIN;
if (!page_vma_mapped_walk(&pvmw)) {
- mem_cgroup_cancel_charge(new_page, memcg, false);
+ if (new_page)
+ mem_cgroup_cancel_charge(new_page, memcg, false);
goto unlock;
}
VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
- get_page(new_page);
- page_add_new_anon_rmap(new_page, vma, addr, false);
- mem_cgroup_commit_charge(new_page, memcg, false, false);
- lru_cache_add_active_or_unevictable(new_page, vma);
+ if (new_page) {
+ get_page(new_page);
+ page_add_new_anon_rmap(new_page, vma, addr, false);
+ mem_cgroup_commit_charge(new_page, memcg, false, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
+ } else
+ /* no new page, just dec_mm_counter for old_page */
+ dec_mm_counter(mm, MM_ANONPAGES);
if (!PageAnon(old_page)) {
dec_mm_counter(mm, mm_counter_file(old_page));
@@ -194,8 +202,9 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
flush_cache_page(vma, addr, pte_pfn(*pvmw.pte));
ptep_clear_flush_notify(vma, addr, pvmw.pte);
- set_pte_at_notify(mm, addr, pvmw.pte,
- mk_pte(new_page, vma->vm_page_prot));
+ if (new_page)
+ set_pte_at_notify(mm, addr, pvmw.pte,
+ mk_pte(new_page, vma->vm_page_prot));
page_remove_rmap(old_page, false);
if (!page_mapped(old_page))
@@ -464,6 +473,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
struct page *old_page, *new_page;
struct vm_area_struct *vma;
int ret, is_register, ref_ctr_updated = 0;
+ bool orig_page_huge = false;
is_register = is_swbp_insn(&opcode);
uprobe = container_of(auprobe, struct uprobe, arch);
@@ -471,7 +481,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
retry:
/* Read the page with vaddr into memory */
ret = get_user_pages_remote(NULL, mm, vaddr, 1,
- FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+ FOLL_FORCE | FOLL_SPLIT_PMD, &old_page, &vma, NULL);
if (ret <= 0)
return ret;
@@ -488,6 +498,10 @@ retry:
ref_ctr_updated = 1;
}
+ ret = 0;
+ if (!is_register && !PageAnon(old_page))
+ goto put_old;
+
ret = anon_vma_prepare(vma);
if (ret)
goto put_old;
@@ -501,8 +515,33 @@ retry:
copy_highpage(new_page, old_page);
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
+ if (!is_register) {
+ struct page *orig_page;
+ pgoff_t index;
+
+ VM_BUG_ON_PAGE(!PageAnon(old_page), old_page);
+
+ index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
+ orig_page = find_get_page(vma->vm_file->f_inode->i_mapping,
+ index);
+
+ if (orig_page) {
+ if (PageUptodate(orig_page) &&
+ pages_identical(new_page, orig_page)) {
+ /* let go new_page */
+ put_page(new_page);
+ new_page = NULL;
+
+ if (PageCompound(orig_page))
+ orig_page_huge = true;
+ }
+ put_page(orig_page);
+ }
+ }
+
ret = __replace_page(vma, vaddr, old_page, new_page);
- put_page(new_page);
+ if (new_page)
+ put_page(new_page);
put_old:
put_page(old_page);
@@ -513,6 +552,10 @@ put_old:
if (ret && is_register && ref_ctr_updated)
update_ref_ctr(uprobe, mm, -1);
+ /* try collapse pmd for compound page */
+ if (!ret && orig_page_huge)
+ collapse_pte_mapped_thp(mm, vaddr);
+
return ret;
}