diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-30 16:40:08 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-30 16:40:08 -0700 |
commit | 9b82f05f869a823d43ea4186f5f732f2924d3693 (patch) | |
tree | 6aaa625789d7d345d0694ebe20276f0b42e5a149 /include/linux | |
parent | 4b9fd8a829a1eec7442e38afff21d610604de56a (diff) | |
parent | 629b3df7ecb01fddfdf71cb5d3c563d143117c33 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"The main changes in this cycle were:
Kernel side changes:
- A couple of x86/cpu cleanups and changes were grandfathered in due
to patch dependencies. These clean up the set of CPU model/family
matching macros with a consistent namespace and C99 initializer
style.
- A bunch of updates to various low level PMU drivers:
* AMD Family 19h L3 uncore PMU
* Intel Tiger Lake uncore support
* misc fixes to LBR TOS sampling
- optprobe fixes
- perf/cgroup: optimize cgroup event sched-in processing
- misc cleanups and fixes
Tooling side changes are to:
- perf {annotate,expr,record,report,stat,test}
- perl scripting
- libapi, libperf and libtraceevent
- vendor events on Intel and S390, ARM cs-etm
- Intel PT updates
- Documentation changes and updates to core facilities
- misc cleanups, fixes and other enhancements"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (89 commits)
cpufreq/intel_pstate: Fix wrong macro conversion
x86/cpu: Cleanup the now unused CPU match macros
hwrng: via_rng: Convert to new X86 CPU match macros
crypto: Convert to new CPU match macros
ASoC: Intel: Convert to new X86 CPU match macros
powercap/intel_rapl: Convert to new X86 CPU match macros
PCI: intel-mid: Convert to new X86 CPU match macros
mmc: sdhci-acpi: Convert to new X86 CPU match macros
intel_idle: Convert to new X86 CPU match macros
extcon: axp288: Convert to new X86 CPU match macros
thermal: Convert to new X86 CPU match macros
hwmon: Convert to new X86 CPU match macros
platform/x86: Convert to new CPU match macros
EDAC: Convert to new X86 CPU match macros
cpufreq: Convert to new X86 CPU match macros
ACPI: Convert to new X86 CPU match macros
x86/platform: Convert to new CPU match macros
x86/kernel: Convert to new CPU match macros
x86/kvm: Convert to new CPU match macros
x86/perf/events: Convert to new CPU match macros
...
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/min_heap.h | 134 | ||||
-rw-r--r-- | include/linux/mod_devicetable.h | 4 | ||||
-rw-r--r-- | include/linux/perf_event.h | 19 |
3 files changed, 154 insertions, 3 deletions
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h new file mode 100644 index 000000000000..44077837385f --- /dev/null +++ b/include/linux/min_heap.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MIN_HEAP_H +#define _LINUX_MIN_HEAP_H + +#include <linux/bug.h> +#include <linux/string.h> +#include <linux/types.h> + +/** + * struct min_heap - Data structure to hold a min-heap. + * @data: Start of array holding the heap elements. + * @nr: Number of elements currently in the heap. + * @size: Maximum number of elements that can be held in current storage. + */ +struct min_heap { + void *data; + int nr; + int size; +}; + +/** + * struct min_heap_callbacks - Data/functions to customise the min_heap. + * @elem_size: The nr of each element in bytes. + * @less: Partial order function for this heap. + * @swp: Swap elements function. + */ +struct min_heap_callbacks { + int elem_size; + bool (*less)(const void *lhs, const void *rhs); + void (*swp)(void *lhs, void *rhs); +}; + +/* Sift the element at pos down the heap. */ +static __always_inline +void min_heapify(struct min_heap *heap, int pos, + const struct min_heap_callbacks *func) +{ + void *left, *right, *parent, *smallest; + void *data = heap->data; + + for (;;) { + if (pos * 2 + 1 >= heap->nr) + break; + + left = data + ((pos * 2 + 1) * func->elem_size); + parent = data + (pos * func->elem_size); + smallest = parent; + if (func->less(left, smallest)) + smallest = left; + + if (pos * 2 + 2 < heap->nr) { + right = data + ((pos * 2 + 2) * func->elem_size); + if (func->less(right, smallest)) + smallest = right; + } + if (smallest == parent) + break; + func->swp(smallest, parent); + if (smallest == left) + pos = (pos * 2) + 1; + else + pos = (pos * 2) + 2; + } +} + +/* Floyd's approach to heapification that is O(nr). */ +static __always_inline +void min_heapify_all(struct min_heap *heap, + const struct min_heap_callbacks *func) +{ + int i; + + for (i = heap->nr / 2; i >= 0; i--) + min_heapify(heap, i, func); +} + +/* Remove minimum element from the heap, O(log2(nr)). */ +static __always_inline +void min_heap_pop(struct min_heap *heap, + const struct min_heap_callbacks *func) +{ + void *data = heap->data; + + if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) + return; + + /* Place last element at the root (position 0) and then sift down. */ + heap->nr--; + memcpy(data, data + (heap->nr * func->elem_size), func->elem_size); + min_heapify(heap, 0, func); +} + +/* + * Remove the minimum element and then push the given element. The + * implementation performs 1 sift (O(log2(nr))) and is therefore more + * efficient than a pop followed by a push that does 2. + */ +static __always_inline +void min_heap_pop_push(struct min_heap *heap, + const void *element, + const struct min_heap_callbacks *func) +{ + memcpy(heap->data, element, func->elem_size); + min_heapify(heap, 0, func); +} + +/* Push an element on to the heap, O(log2(nr)). */ +static __always_inline +void min_heap_push(struct min_heap *heap, const void *element, + const struct min_heap_callbacks *func) +{ + void *data = heap->data; + void *child, *parent; + int pos; + + if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) + return; + + /* Place at the end of data. */ + pos = heap->nr; + memcpy(data + (pos * func->elem_size), element, func->elem_size); + heap->nr++; + + /* Sift child at pos up. */ + for (; pos > 0; pos = (pos - 1) / 2) { + child = data + (pos * func->elem_size); + parent = data + ((pos - 1) / 2) * func->elem_size; + if (func->less(parent, child)) + break; + func->swp(parent, child); + } +} + +#endif /* _LINUX_MIN_HEAP_H */ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index e3596db077dc..f8b66d43acf6 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -667,9 +667,7 @@ struct x86_cpu_id { kernel_ulong_t driver_data; }; -#define X86_FEATURE_MATCH(x) \ - { X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, x } - +/* Wild cards for x86_cpu_id::vendor, family, model and feature */ #define X86_VENDOR_ANY 0xffff #define X86_FAMILY_ANY 0 #define X86_MODEL_ANY 0 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 547773f5894e..8768a39b5258 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -93,14 +93,26 @@ struct perf_raw_record { /* * branch stack layout: * nr: number of taken branches stored in entries[] + * hw_idx: The low level index of raw branch records + * for the most recent branch. + * -1ULL means invalid/unknown. * * Note that nr can vary from sample to sample * branches (to, from) are stored from most recent * to least recent, i.e., entries[0] contains the most * recent branch. + * The entries[] is an abstraction of raw branch records, + * which may not be stored in age order in HW, e.g. Intel LBR. + * The hw_idx is to expose the low level index of raw + * branch record for the most recent branch aka entries[0]. + * The hw_idx index is between -1 (unknown) and max depth, + * which can be retrieved in /sys/devices/cpu/caps/branches. + * For the architectures whose raw branch records are + * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { __u64 nr; + __u64 hw_idx; struct perf_branch_entry entries[0]; }; @@ -850,6 +862,13 @@ struct perf_cpu_context { int sched_cb_usage; int online; + /* + * Per-CPU storage for iterators used in visit_groups_merge. The default + * storage is of size 2 to hold the CPU and any CPU event iterators. + */ + int heap_size; + struct perf_event **heap; + struct perf_event *heap_default[2]; }; struct perf_output_handle { |