diff options
Diffstat (limited to 'arch/csky/kernel/perf_event.c')
-rw-r--r-- | arch/csky/kernel/perf_event.c | 410 |
1 files changed, 375 insertions, 35 deletions
diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c index 376c972f5f37..4c1a1934d76a 100644 --- a/arch/csky/kernel/perf_event.c +++ b/arch/csky/kernel/perf_event.c @@ -9,17 +9,44 @@ #include <linux/platform_device.h> #define CSKY_PMU_MAX_EVENTS 32 +#define DEFAULT_COUNT_WIDTH 48 + +#define HPCR "<0, 0x0>" /* PMU Control reg */ +#define HPSPR "<0, 0x1>" /* Start PC reg */ +#define HPEPR "<0, 0x2>" /* End PC reg */ +#define HPSIR "<0, 0x3>" /* Soft Counter reg */ +#define HPCNTENR "<0, 0x4>" /* Count Enable reg */ +#define HPINTENR "<0, 0x5>" /* Interrupt Enable reg */ +#define HPOFSR "<0, 0x6>" /* Interrupt Status reg */ + +/* The events for a given PMU register set. */ +struct pmu_hw_events { + /* + * The events that are active on the PMU for the given index. + */ + struct perf_event *events[CSKY_PMU_MAX_EVENTS]; -#define HPCR "<0, 0x0>" /* PMU Control reg */ -#define HPCNTENR "<0, 0x4>" /* Count Enable reg */ + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + unsigned long used_mask[BITS_TO_LONGS(CSKY_PMU_MAX_EVENTS)]; +}; static uint64_t (*hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])(void); static void (*hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])(uint64_t val); -struct csky_pmu_t { - struct pmu pmu; - uint32_t hpcr; +static struct csky_pmu_t { + struct pmu pmu; + struct pmu_hw_events __percpu *hw_events; + struct platform_device *plat_device; + uint32_t count_width; + uint32_t hpcr; + u64 max_period; } csky_pmu; +static int csky_pmu_irq; + +#define to_csky_pmu(p) (container_of(p, struct csky_pmu, pmu)) #define cprgr(reg) \ ({ \ @@ -701,6 +728,20 @@ static const int csky_pmu_hw_map[PERF_COUNT_HW_MAX] = { #define CACHE_OP_UNSUPPORTED 0xffff static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { +#ifdef CONFIG_CPU_CK810 + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x5, + [C(RESULT_MISS)] = 0x6, + }, +#else [C(OP_READ)] = { [C(RESULT_ACCESS)] = 0x14, [C(RESULT_MISS)] = 0x15, @@ -710,9 +751,10 @@ static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(RESULT_MISS)] = 0x17, }, [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = 0x5, - [C(RESULT_MISS)] = 0x6, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, +#endif }, [C(L1I)] = { [C(OP_READ)] = { @@ -729,6 +771,20 @@ static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }, [C(LL)] = { +#ifdef CONFIG_CPU_CK810 + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x7, + [C(RESULT_MISS)] = 0x8, + }, +#else [C(OP_READ)] = { [C(RESULT_ACCESS)] = 0x18, [C(RESULT_MISS)] = 0x19, @@ -738,29 +794,48 @@ static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(RESULT_MISS)] = 0x1b, }, [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = 0x7, - [C(RESULT_MISS)] = 0x8, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, +#endif }, [C(DTLB)] = { +#ifdef CONFIG_CPU_CK810 [C(OP_READ)] = { - [C(RESULT_ACCESS)] = 0x5, - [C(RESULT_MISS)] = 0xb, + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, +#else + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x14, + [C(RESULT_MISS)] = 0xb, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x16, + [C(RESULT_MISS)] = 0xb, + }, +#endif [C(OP_PREFETCH)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, }, }, [C(ITLB)] = { +#ifdef CONFIG_CPU_CK810 + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, +#else [C(OP_READ)] = { [C(RESULT_ACCESS)] = 0x3, [C(RESULT_MISS)] = 0xa, }, +#endif [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, @@ -800,11 +875,57 @@ static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; +int csky_pmu_event_set_period(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > (s64)csky_pmu.max_period) + left = csky_pmu.max_period; + + /* + * The hw event starts counting from this event offset, + * mark it to be able to extract future "deltas": + */ + local64_set(&hwc->prev_count, (u64)(-left)); + + if (hw_raw_write_mapping[hwc->idx] != NULL) + hw_raw_write_mapping[hwc->idx]((u64)(-left) & + csky_pmu.max_period); + + cpwcr(HPOFSR, ~BIT(hwc->idx) & cprcr(HPOFSR)); + + perf_event_update_userpage(event); + + return ret; +} + static void csky_perf_event_update(struct perf_event *event, struct hw_perf_event *hwc) { uint64_t prev_raw_count = local64_read(&hwc->prev_count); - uint64_t new_raw_count = hw_raw_read_mapping[hwc->idx](); + /* + * Sign extend count value to 64bit, otherwise delta calculation + * would be incorrect when overflow occurs. + */ + uint64_t new_raw_count = sign_extend64( + hw_raw_read_mapping[hwc->idx](), csky_pmu.count_width - 1); int64_t delta = new_raw_count - prev_raw_count; /* @@ -816,6 +937,11 @@ static void csky_perf_event_update(struct perf_event *event, local64_sub(delta, &hwc->period_left); } +static void csky_pmu_reset(void *info) +{ + cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1)); +} + static void csky_pmu_read(struct perf_event *event) { csky_perf_event_update(event, &event->hw); @@ -844,15 +970,6 @@ static int csky_pmu_event_init(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int ret; - if (event->attr.exclude_user) - csky_pmu.hpcr = BIT(2); - else if (event->attr.exclude_kernel) - csky_pmu.hpcr = BIT(3); - else - csky_pmu.hpcr = BIT(2) | BIT(3); - - csky_pmu.hpcr |= BIT(1) | BIT(0); - switch (event->attr.type) { case PERF_TYPE_HARDWARE: if (event->attr.config >= PERF_COUNT_HW_MAX) @@ -861,21 +978,32 @@ static int csky_pmu_event_init(struct perf_event *event) if (ret == HW_OP_UNSUPPORTED) return -ENOENT; hwc->idx = ret; - return 0; + break; case PERF_TYPE_HW_CACHE: ret = csky_pmu_cache_event(event->attr.config); if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; hwc->idx = ret; - return 0; + break; case PERF_TYPE_RAW: if (hw_raw_read_mapping[event->attr.config] == NULL) return -ENOENT; hwc->idx = event->attr.config; - return 0; + break; default: return -ENOENT; } + + if (event->attr.exclude_user) + csky_pmu.hpcr = BIT(2); + else if (event->attr.exclude_kernel) + csky_pmu.hpcr = BIT(3); + else + csky_pmu.hpcr = BIT(2) | BIT(3); + + csky_pmu.hpcr |= BIT(1) | BIT(0); + + return 0; } /* starts all counters */ @@ -892,6 +1020,7 @@ static void csky_pmu_disable(struct pmu *pmu) static void csky_pmu_start(struct perf_event *event, int flags) { + unsigned long flg; struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -903,16 +1032,34 @@ static void csky_pmu_start(struct perf_event *event, int flags) hwc->state = 0; + csky_pmu_event_set_period(event); + + local_irq_save(flg); + + cpwcr(HPINTENR, BIT(idx) | cprcr(HPINTENR)); cpwcr(HPCNTENR, BIT(idx) | cprcr(HPCNTENR)); + + local_irq_restore(flg); } -static void csky_pmu_stop(struct perf_event *event, int flags) +static void csky_pmu_stop_event(struct perf_event *event) { + unsigned long flg; struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + local_irq_save(flg); + + cpwcr(HPINTENR, ~BIT(idx) & cprcr(HPINTENR)); + cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR)); + + local_irq_restore(flg); +} + +static void csky_pmu_stop(struct perf_event *event, int flags) +{ if (!(event->hw.state & PERF_HES_STOPPED)) { - cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR)); + csky_pmu_stop_event(event); event->hw.state |= PERF_HES_STOPPED; } @@ -925,22 +1072,26 @@ static void csky_pmu_stop(struct perf_event *event, int flags) static void csky_pmu_del(struct perf_event *event, int flags) { + struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); + struct hw_perf_event *hwc = &event->hw; + csky_pmu_stop(event, PERF_EF_UPDATE); + hw_events->events[hwc->idx] = NULL; + perf_event_update_userpage(event); } /* allocate hardware counter and optionally start counting */ static int csky_pmu_add(struct perf_event *event, int flags) { + struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); struct hw_perf_event *hwc = &event->hw; - local64_set(&hwc->prev_count, 0); - - if (hw_raw_write_mapping[hwc->idx] != NULL) - hw_raw_write_mapping[hwc->idx](0); + hw_events->events[hwc->idx] = event; hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) csky_pmu_start(event, PERF_EF_RELOAD); @@ -949,8 +1100,110 @@ static int csky_pmu_add(struct perf_event *event, int flags) return 0; } -int __init init_hw_perf_events(void) +static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev) +{ + struct perf_sample_data data; + struct pmu_hw_events *cpuc = this_cpu_ptr(csky_pmu.hw_events); + struct pt_regs *regs; + int idx; + + /* + * Did an overflow occur? + */ + if (!cprcr(HPOFSR)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + csky_pmu_disable(&csky_pmu.pmu); + + for (idx = 0; idx < CSKY_PMU_MAX_EVENTS; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!(cprcr(HPOFSR) & BIT(idx))) + continue; + + hwc = &event->hw; + csky_perf_event_update(event, &event->hw); + perf_sample_data_init(&data, 0, hwc->last_period); + csky_pmu_event_set_period(event); + + if (perf_event_overflow(event, &data, regs)) + csky_pmu_stop_event(event); + } + + csky_pmu_enable(&csky_pmu.pmu); + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static int csky_pmu_request_irq(irq_handler_t handler) { + int err, irqs; + struct platform_device *pmu_device = csky_pmu.plat_device; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_err("no irqs for PMUs defined\n"); + return -ENODEV; + } + + csky_pmu_irq = platform_get_irq(pmu_device, 0); + if (csky_pmu_irq < 0) + return -ENODEV; + err = request_percpu_irq(csky_pmu_irq, handler, "csky-pmu", + this_cpu_ptr(csky_pmu.hw_events)); + if (err) { + pr_err("unable to request IRQ%d for CSKY PMU counters\n", + csky_pmu_irq); + return err; + } + + return 0; +} + +static void csky_pmu_free_irq(void) +{ + int irq; + struct platform_device *pmu_device = csky_pmu.plat_device; + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0) + free_percpu_irq(irq, this_cpu_ptr(csky_pmu.hw_events)); +} + +int init_hw_perf_events(void) +{ + csky_pmu.hw_events = alloc_percpu_gfp(struct pmu_hw_events, + GFP_KERNEL); + if (!csky_pmu.hw_events) { + pr_info("failed to allocate per-cpu PMU data.\n"); + return -ENOMEM; + } + csky_pmu.pmu = (struct pmu) { .pmu_enable = csky_pmu_enable, .pmu_disable = csky_pmu_disable, @@ -1022,10 +1275,97 @@ int __init init_hw_perf_events(void) hw_raw_write_mapping[0x1a] = csky_pmu_write_l2wac; hw_raw_write_mapping[0x1b] = csky_pmu_write_l2wmc; - csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + return 0; +} - cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1)); +static int csky_pmu_starting_cpu(unsigned int cpu) +{ + enable_percpu_irq(csky_pmu_irq, 0); + return 0; +} - return perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW); +static int csky_pmu_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(csky_pmu_irq); + return 0; } -arch_initcall(init_hw_perf_events); + +int csky_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table) +{ + struct device_node *node = pdev->dev.of_node; + int ret; + + ret = init_hw_perf_events(); + if (ret) { + pr_notice("[perf] failed to probe PMU!\n"); + return ret; + } + + if (of_property_read_u32(node, "count-width", + &csky_pmu.count_width)) { + csky_pmu.count_width = DEFAULT_COUNT_WIDTH; + } + csky_pmu.max_period = BIT(csky_pmu.count_width) - 1; + + csky_pmu.plat_device = pdev; + + /* Ensure the PMU has sane values out of reset. */ + on_each_cpu(csky_pmu_reset, &csky_pmu, 1); + + ret = csky_pmu_request_irq(csky_pmu_handle_irq); + if (ret) { + csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + pr_notice("[perf] PMU request irq fail!\n"); + } + + ret = cpuhp_setup_state(CPUHP_AP_PERF_ONLINE, "AP_PERF_ONLINE", + csky_pmu_starting_cpu, + csky_pmu_dying_cpu); + if (ret) { + csky_pmu_free_irq(); + free_percpu(csky_pmu.hw_events); + return ret; + } + + ret = perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW); + if (ret) { + csky_pmu_free_irq(); + free_percpu(csky_pmu.hw_events); + } + + return ret; +} + +const static struct of_device_id csky_pmu_of_device_ids[] = { + {.compatible = "csky,csky-pmu"}, + {}, +}; + +static int csky_pmu_dev_probe(struct platform_device *pdev) +{ + return csky_pmu_device_probe(pdev, csky_pmu_of_device_ids); +} + +static struct platform_driver csky_pmu_driver = { + .driver = { + .name = "csky-pmu", + .of_match_table = csky_pmu_of_device_ids, + }, + .probe = csky_pmu_dev_probe, +}; + +static int __init csky_pmu_probe(void) +{ + int ret; + + ret = platform_driver_register(&csky_pmu_driver); + if (ret) + pr_notice("[perf] PMU initialization failed\n"); + else + pr_notice("[perf] PMU initialization done\n"); + + return ret; +} + +device_initcall(csky_pmu_probe); |