diff options
489 files changed, 14315 insertions, 7098 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events new file mode 100644 index 000000000000..0adeb524c0d4 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events @@ -0,0 +1,62 @@ +What: /sys/devices/cpu/events/ + /sys/devices/cpu/events/branch-misses + /sys/devices/cpu/events/cache-references + /sys/devices/cpu/events/cache-misses + /sys/devices/cpu/events/stalled-cycles-frontend + /sys/devices/cpu/events/branch-instructions + /sys/devices/cpu/events/stalled-cycles-backend + /sys/devices/cpu/events/instructions + /sys/devices/cpu/events/cpu-cycles + +Date: 2013/01/08 + +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> + +Description: Generic performance monitoring events + + A collection of performance monitoring events that may be + supported by many/most CPUs. These events can be monitored + using the 'perf(1)' tool. + + The contents of each file would look like: + + event=0xNNNN + + where 'N' is a hex digit and the number '0xNNNN' shows the + "raw code" for the perf event identified by the file's + "basename". + + +What: /sys/devices/cpu/events/PM_LD_MISS_L1 + /sys/devices/cpu/events/PM_LD_REF_L1 + /sys/devices/cpu/events/PM_CYC + /sys/devices/cpu/events/PM_BRU_FIN + /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC + /sys/devices/cpu/events/PM_BRU_MPRED + /sys/devices/cpu/events/PM_INST_CMPL + /sys/devices/cpu/events/PM_CMPLU_STALL + +Date: 2013/01/08 + +Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> + Linux Powerpc mailing list <linuxppc-dev@ozlabs.org> + +Description: POWER-systems specific performance monitoring events + + A collection of performance monitoring events that may be + supported by the POWER CPU. These events can be monitored + using the 'perf(1)' tool. + + These events may not be supported by other CPUs. + + The contents of each file would look like: + + event=0xNNNN + + where 'N' is a hex digit and the number '0xNNNN' shows the + "raw code" for the perf event identified by the file's + "basename". + + Further, multiple terms like 'event=0xNNNN' can be specified + and separated with comma. All available terms are defined in + the /sys/bus/event_source/devices/<dev>/format file. diff --git a/Documentation/ABI/testing/sysfs-platform-ts5500 b/Documentation/ABI/testing/sysfs-platform-ts5500 new file mode 100644 index 000000000000..c88375a537a1 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-platform-ts5500 @@ -0,0 +1,47 @@ +What: /sys/devices/platform/ts5500/adc +Date: January 2013 +KernelVersion: 3.7 +Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com> +Description: + Indicates the presence of an A/D Converter. If it is present, + it will display "1", otherwise "0". + +What: /sys/devices/platform/ts5500/ereset +Date: January 2013 +KernelVersion: 3.7 +Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com> +Description: + Indicates the presence of an external reset. If it is present, + it will display "1", otherwise "0". + +What: /sys/devices/platform/ts5500/id +Date: January 2013 +KernelVersion: 3.7 +Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com> +Description: + Product ID of the TS board. TS-5500 ID is 0x60. + +What: /sys/devices/platform/ts5500/jumpers +Date: January 2013 +KernelVersion: 3.7 +Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com> +Description: + Bitfield showing the jumpers' state. If a jumper is present, + the corresponding bit is set. For instance, 0x0e means jumpers + 2, 3 and 4 are set. + +What: /sys/devices/platform/ts5500/rs485 +Date: January 2013 +KernelVersion: 3.7 +Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com> +Description: + Indicates the presence of the RS485 option. If it is present, + it will display "1", otherwise "0". + +What: /sys/devices/platform/ts5500/sram +Date: January 2013 +KernelVersion: 3.7 +Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com> +Description: + Indicates the presence of the SRAM option. If it is present, + it will display "1", otherwise "0". diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 53e6fca146d7..a09178086c30 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -127,15 +127,42 @@ on the number of vectors that can be allocated; pci_enable_msi_block() returns as soon as it finds any constraint that doesn't allow the call to succeed. -4.2.3 pci_disable_msi +4.2.3 pci_enable_msi_block_auto + +int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *count) + +This variation on pci_enable_msi() call allows a device driver to request +the maximum possible number of MSIs. The MSI specification only allows +interrupts to be allocated in powers of two, up to a maximum of 2^5 (32). + +If this function returns a positive number, it indicates that it has +succeeded and the returned value is the number of allocated interrupts. In +this case, the function enables MSI on this device and updates dev->irq to +be the lowest of the new interrupts assigned to it. The other interrupts +assigned to the device are in the range dev->irq to dev->irq + returned +value - 1. + +If this function returns a negative number, it indicates an error and +the driver should not attempt to request any more MSI interrupts for +this device. + +If the device driver needs to know the number of interrupts the device +supports it can pass the pointer count where that number is stored. The +device driver must decide what action to take if pci_enable_msi_block_auto() +succeeds, but returns a value less than the number of interrupts supported. +If the device driver does not need to know the number of interrupts +supported, it can set the pointer count to NULL. + +4.2.4 pci_disable_msi void pci_disable_msi(struct pci_dev *dev) This function should be used to undo the effect of pci_enable_msi() or -pci_enable_msi_block(). Calling it restores dev->irq to the pin-based -interrupt number and frees the previously allocated message signaled -interrupt(s). The interrupt may subsequently be assigned to another -device, so drivers should not cache the value of dev->irq. +pci_enable_msi_block() or pci_enable_msi_block_auto(). Calling it restores +dev->irq to the pin-based interrupt number and frees the previously +allocated message signaled interrupt(s). The interrupt may subsequently be +assigned to another device, so drivers should not cache the value of +dev->irq. Before calling this function, a device driver must always call free_irq() on any interrupt for which it previously called request_irq(). diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index 27f2b21a9d5c..d9ca5be9b471 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt @@ -253,6 +253,8 @@ This performs an atomic exchange operation on the atomic variable v, setting the given new value. It returns the old value that the atomic variable v had just before the operation. +atomic_xchg requires explicit memory barriers around the operation. + int atomic_cmpxchg(atomic_t *v, int old, int new); This performs an atomic compare exchange operation on the atomic value v, diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 3c4e1b3b80a1..fa5d8a9ae205 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1685,6 +1685,7 @@ explicit lock operations, described later). These include: xchg(); cmpxchg(); + atomic_xchg(); atomic_cmpxchg(); atomic_inc_return(); atomic_dec_return(); diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt index 6f51fed45f2d..53d6a3c51d87 100644 --- a/Documentation/trace/ftrace.txt +++ b/Documentation/trace/ftrace.txt @@ -1842,6 +1842,89 @@ an error. # cat buffer_size_kb 85 +Snapshot +-------- +CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature +available to all non latency tracers. (Latency tracers which +record max latency, such as "irqsoff" or "wakeup", can't use +this feature, since those are already using the snapshot +mechanism internally.) + +Snapshot preserves a current trace buffer at a particular point +in time without stopping tracing. Ftrace swaps the current +buffer with a spare buffer, and tracing continues in the new +current (=previous spare) buffer. + +The following debugfs files in "tracing" are related to this +feature: + + snapshot: + + This is used to take a snapshot and to read the output + of the snapshot. Echo 1 into this file to allocate a + spare buffer and to take a snapshot (swap), then read + the snapshot from this file in the same format as + "trace" (described above in the section "The File + System"). Both reads snapshot and tracing are executable + in parallel. When the spare buffer is allocated, echoing + 0 frees it, and echoing else (positive) values clear the + snapshot contents. + More details are shown in the table below. + + status\input | 0 | 1 | else | + --------------+------------+------------+------------+ + not allocated |(do nothing)| alloc+swap | EINVAL | + --------------+------------+------------+------------+ + allocated | free | swap | clear | + --------------+------------+------------+------------+ + +Here is an example of using the snapshot feature. + + # echo 1 > events/sched/enable + # echo 1 > snapshot + # cat snapshot +# tracer: nop +# +# entries-in-buffer/entries-written: 71/71 #P:8 +# +# _-----=> irqs-off +# / _----=> need-resched +# | / _---=> hardirq/softirq +# || / _--=> preempt-depth +# ||| / delay +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# | | | |||| | | + <idle>-0 [005] d... 2440.603828: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2242 next_prio=120 + sleep-2242 [005] d... 2440.603846: sched_switch: prev_comm=snapshot-test-2 prev_pid=2242 prev_prio=120 prev_state=R ==> next_comm=kworker/5:1 next_pid=60 next_prio=120 +[...] + <idle>-0 [002] d... 2440.707230: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2229 next_prio=120 + + # cat trace +# tracer: nop +# +# entries-in-buffer/entries-written: 77/77 #P:8 +# +# _-----=> irqs-off +# / _----=> need-resched +# | / _---=> hardirq/softirq +# || / _--=> preempt-depth +# ||| / delay +# TASK-PID CPU# |||| TIMESTAMP FUNCTION +# | | | |||| | | + <idle>-0 [007] d... 2440.707395: sched_switch: prev_comm=swapper/7 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2243 next_prio=120 + snapshot-test-2-2229 [002] d... 2440.707438: sched_switch: prev_comm=snapshot-test-2 prev_pid=2229 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120 +[...] + + +If you try to use this snapshot feature when current tracer is +one of the latency tracers, you will get the following results. + + # echo wakeup > current_tracer + # echo 1 > snapshot +bash: echo: write error: Device or resource busy + # cat snapshot +cat: snapshot: Device or resource busy + ----------- More details can be found in the source code, in the diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index e540fd67f767..b443f1de0e5a 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -390,6 +390,7 @@ Protocol: 2.00+ F Special (0xFF = undefined) 10 Reserved 11 Minimal Linux Bootloader <http://sebastian-plotz.blogspot.de> + 12 OVMF UEFI virtualization stack Please contact <hpa@zytor.com> if you need a bootloader ID value assigned. diff --git a/MAINTAINERS b/MAINTAINERS index 35a56bcd5e75..526fb85f2f7e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1303,7 +1303,7 @@ F: include/linux/dmaengine.h F: include/linux/async_tx.h AT24 EEPROM DRIVER -M: Wolfram Sang <w.sang@pengutronix.de> +M: Wolfram Sang <wsa@the-dreams.de> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/misc/eeprom/at24.c @@ -3757,12 +3757,11 @@ S: Maintained F: drivers/i2c/i2c-stub.c I2C SUBSYSTEM -M: Wolfram Sang <w.sang@pengutronix.de> +M: Wolfram Sang <wsa@the-dreams.de> M: "Ben Dooks (embedded platforms)" <ben-linux@fluff.org> L: linux-i2c@vger.kernel.org W: http://i2c.wiki.kernel.org/ -T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-i2c/ -T: git git://git.pengutronix.de/git/wsa/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git S: Maintained F: Documentation/i2c/ F: drivers/i2c/ @@ -5778,15 +5777,6 @@ L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/muxes/i2c-mux-pca9541.c -PCA9564/PCA9665 I2C BUS DRIVER -M: Wolfram Sang <w.sang@pengutronix.de> -L: linux-i2c@vger.kernel.org -S: Maintained -F: drivers/i2c/algos/i2c-algo-pca.c -F: drivers/i2c/busses/i2c-pca-* -F: include/linux/i2c-algo-pca.h -F: include/linux/i2c-pca-platform.h - PCDP - PRIMARY CONSOLE AND DEBUG PORT M: Khalid Aziz <khalid@gonehiking.org> S: Maintained @@ -6598,7 +6588,7 @@ F: drivers/dma/dw_dmac_regs.h F: drivers/dma/dw_dmac.c TIMEKEEPING, NTP -M: John Stultz <johnstul@us.ibm.com> +M: John Stultz <john.stultz@linaro.org> M: Thomas Gleixner <tglx@linutronix.de> T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core S: Supported @@ -7543,6 +7533,11 @@ F: drivers/net/team/ F: include/linux/if_team.h F: include/uapi/linux/if_team.h +TECHNOLOGIC SYSTEMS TS-5500 PLATFORM SUPPORT +M: Savoir-faire Linux Inc. <kernel@savoirfairelinux.com> +S: Maintained +F: arch/x86/platform/ts5500/ + TECHNOTREND USB IR RECEIVER M: Sean Young <sean@mess.org> L: linux-media@vger.kernel.org @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Unicycling Gorilla # *DOCUMENTATION* @@ -165,7 +165,8 @@ export srctree objtree VPATH # then ARCH is assigned, getting whatever value it gets normally, and # SUBARCH is subsequently ignored. -SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ +SUBARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ + -e s/sun4u/sparc64/ \ -e s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ diff --git a/arch/Kconfig b/arch/Kconfig index 7f8f281f2585..97fb7d0365d1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -76,6 +76,15 @@ config OPTPROBES depends on KPROBES && HAVE_OPTPROBES depends on !PREEMPT +config KPROBES_ON_FTRACE + def_bool y + depends on KPROBES && HAVE_KPROBES_ON_FTRACE + depends on DYNAMIC_FTRACE_WITH_REGS + help + If function tracer is enabled and the arch supports full + passing of pt_regs to function tracing, then kprobes can + optimize on top of function tracing. + config UPROBES bool "Transparent user-space probes (EXPERIMENTAL)" depends on UPROBE_EVENT && PERF_EVENTS @@ -158,6 +167,9 @@ config HAVE_KRETPROBES config HAVE_OPTPROBES bool +config HAVE_KPROBES_ON_FTRACE + bool + config HAVE_NMI_WATCHDOG bool # diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 9d5904cc7712..9b504af2e966 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -5,7 +5,6 @@ config ALPHA select HAVE_IDE select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS - select HAVE_IRQ_WORK select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_DMA_ATTRS diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 14db93e4c8a8..dbc1760f418b 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1139,6 +1139,7 @@ struct rusage32 { SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) { struct rusage32 r; + cputime_t utime, stime; if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) return -EINVAL; @@ -1146,8 +1147,9 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) memset(&r, 0, sizeof(r)); switch (who) { case RUSAGE_SELF: - jiffies_to_timeval32(current->utime, &r.ru_utime); - jiffies_to_timeval32(current->stime, &r.ru_stime); + task_cputime(current, &utime, &stime); + jiffies_to_timeval32(utime, &r.ru_utime); + jiffies_to_timeval32(stime, &r.ru_stime); r.ru_minflt = current->min_flt; r.ru_majflt = current->maj_flt; break; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 67874b82a4ed..9bbe760f2352 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -36,7 +36,6 @@ config ARM select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)) select HAVE_IDE if PCI || ISA || PCMCIA - select HAVE_IRQ_WORK select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h index 4eb6d005ffaa..86dff32a0737 100644 --- a/arch/arm/include/asm/smp_scu.h +++ b/arch/arm/include/asm/smp_scu.h @@ -7,8 +7,14 @@ #ifndef __ASSEMBLER__ unsigned int scu_get_core_count(void __iomem *); -void scu_enable(void __iomem *); int scu_power_mode(void __iomem *, unsigned int); + +#ifdef CONFIG_SMP +void scu_enable(void __iomem *scu_base); +#else +static inline void scu_enable(void __iomem *scu_base) {} +#endif + #endif #endif diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index b9f015e843d8..45eac87ed66a 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -75,7 +75,7 @@ void scu_enable(void __iomem *scu_base) int scu_power_mode(void __iomem *scu_base, unsigned int mode) { unsigned int val; - int cpu = cpu_logical_map(smp_processor_id()); + int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0); if (mode > 3 || mode == 1 || cpu > 3) return -EINVAL; diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index 981dc1e1da51..e6c061282939 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -28,6 +28,7 @@ #include <asm/arch_timer.h> #include <asm/cacheflush.h> +#include <asm/cputype.h> #include <asm/smp_plat.h> #include <asm/smp_twd.h> #include <asm/hardware/arm_timer.h> @@ -59,7 +60,7 @@ static void __init highbank_scu_map_io(void) void highbank_set_cpu_jump(int cpu, void *jump_addr) { - cpu = cpu_logical_map(cpu); + cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(cpu), 0); writel(virt_to_phys(jump_addr), HB_JUMP_TABLE_VIRT(cpu)); __cpuc_flush_dcache_area(HB_JUMP_TABLE_VIRT(cpu), 16); outer_clean_range(HB_JUMP_TABLE_PHYS(cpu), diff --git a/arch/arm/mach-highbank/sysregs.h b/arch/arm/mach-highbank/sysregs.h index 70af9d13fcef..5995df7f2622 100644 --- a/arch/arm/mach-highbank/sysregs.h +++ b/arch/arm/mach-highbank/sysregs.h @@ -37,7 +37,7 @@ extern void __iomem *sregs_base; static inline void highbank_set_core_pwr(void) { - int cpu = cpu_logical_map(smp_processor_id()); + int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0); if (scu_base_addr) scu_power_mode(scu_base_addr, SCU_PM_POWEROFF); else @@ -46,7 +46,7 @@ static inline void highbank_set_core_pwr(void) static inline void highbank_clear_core_pwr(void) { - int cpu = cpu_logical_map(smp_processor_id()); + int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0); if (scu_base_addr) scu_power_mode(scu_base_addr, SCU_PM_NORMAL); else diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f8f362aafee9..75e915b72471 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -21,7 +21,6 @@ config ARM64 select HAVE_GENERIC_DMA_COHERENT select HAVE_GENERIC_HARDIRQS select HAVE_HW_BREAKPOINT if PERF_EVENTS - select HAVE_IRQ_WORK select HAVE_MEMBLOCK select HAVE_PERF_EVENTS select IRQ_DOMAIN diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index b6f3ad5441c5..67e4aaad78f5 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -24,7 +24,6 @@ config BLACKFIN select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_IDE - select HAVE_IRQ_WORK select HAVE_KERNEL_GZIP if RAMKERNEL select HAVE_KERNEL_BZIP2 if RAMKERNEL select HAVE_KERNEL_LZMA if RAMKERNEL @@ -38,7 +37,6 @@ config BLACKFIN select HAVE_GENERIC_HARDIRQS select GENERIC_ATOMIC64 select GENERIC_IRQ_PROBE - select IRQ_PER_CPU if SMP select USE_GENERIC_SMP_HELPERS if SMP select HAVE_NMI_WATCHDOG if NMI_WATCHDOG select GENERIC_SMP_IDLE_THREAD diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 9d262645f667..17df48fc8f44 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -3,7 +3,6 @@ config FRV default y select HAVE_IDE select HAVE_ARCH_TRACEHOOK - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_UID16 select HAVE_GENERIC_HARDIRQS diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 0744f7d7b1fd..e4decc6b8947 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -12,9 +12,7 @@ config HEXAGON # select ARCH_WANT_OPTIONAL_GPIOLIB # select ARCH_REQUIRE_GPIOLIB # select HAVE_CLK - # select IRQ_PER_CPU # select GENERIC_PENDING_IRQ if SMP - select HAVE_IRQ_WORK select GENERIC_ATOMIC64 select HAVE_PERF_EVENTS select HAVE_GENERIC_HARDIRQS diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 3279646120e3..00c2e88f7755 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -29,7 +29,6 @@ config IA64 select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP - select IRQ_PER_CPU select GENERIC_IRQ_SHOW select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 7fcf7f08ab06..e2d3f5baf265 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -11,99 +11,19 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in nsec. + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec. * Otherwise we measure cpu time in jiffies using the generic definitions. */ #ifndef __IA64_CPUTIME_H #define __IA64_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING -#include <asm-generic/cputime.h> +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +# include <asm-generic/cputime.h> #else - -#include <linux/time.h> -#include <linux/jiffies.h> -#include <asm/processor.h> - -typedef u64 __nocast cputime_t; -typedef u64 __nocast cputime64_t; - -#define cputime_one_jiffy jiffies_to_cputime(1) - -/* - * Convert cputime <-> jiffies (HZ) - */ -#define cputime_to_jiffies(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) -#define jiffies_to_cputime(__jif) \ - (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) -#define cputime64_to_jiffies64(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) -#define jiffies64_to_cputime64(__jif) \ - (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) - -/* - * Convert cputime <-> microseconds - */ -#define cputime_to_usecs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_USEC) -#define usecs_to_cputime(__usecs) \ - (__force cputime_t)((__usecs) * NSEC_PER_USEC) -#define usecs_to_cputime64(__usecs) \ - (__force cputime64_t)((__usecs) * NSEC_PER_USEC) - -/* - * Convert cputime <-> seconds - */ -#define cputime_to_secs(__ct) \ - ((__force u64)(__ct) / NSEC_PER_SEC) -#define secs_to_cputime(__secs) \ - (__force cputime_t)((__secs) * NSEC_PER_SEC) - -/* - * Convert cputime <-> timespec (nsec) - */ -static inline cputime_t timespec_to_cputime(const struct timespec *val) -{ - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; - return (__force cputime_t) ret; -} -static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) -{ - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; -} - -/* - * Convert cputime <-> timeval (msec) - */ -static inline cputime_t timeval_to_cputime(struct timeval *val) -{ - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; - return (__force cputime_t) ret; -} -static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) -{ - val->tv_sec = (__force u64) ct / NSEC_PER_SEC; - val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; -} - -/* - * Convert cputime <-> clock (USER_HZ) - */ -#define cputime_to_clock_t(__ct) \ - ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) -#define clock_t_to_cputime(__x) \ - (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) - -/* - * Convert cputime64 to clock. - */ -#define cputime64_to_clock_t(__ct) \ - cputime_to_clock_t((__force cputime_t)__ct) - +# include <asm/processor.h> +# include <asm-generic/cputime_nsecs.h> extern void arch_vtime_task_switch(struct task_struct *tsk); +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* __IA64_CPUTIME_H */ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index ff2ae4136584..020d655ed082 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -31,7 +31,7 @@ struct thread_info { mm_segment_t addr_limit; /* user-level address space limit */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ struct restart_block restart_block; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE __u64 ac_stamp; __u64 ac_leave; __u64 ac_stime; @@ -69,7 +69,7 @@ struct thread_info { #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define setup_thread_stack(p, org) \ *task_thread_info(p) = *task_thread_info(org); \ task_thread_info(p)->ac_stime = 0; \ diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h index c57fa910f2c9..00cf03e0cb82 100644 --- a/arch/ia64/include/asm/xen/minstate.h +++ b/arch/ia64/include/asm/xen/minstate.h @@ -1,5 +1,5 @@ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* read ar.itc in advance, and use it before leaving bank 0 */ #define XEN_ACCOUNT_GET_STAMP \ MOV_FROM_ITC(pUStk, p6, r20, r2); diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index a48bd9a9927b..46c9e3007315 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -41,7 +41,7 @@ void foo(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 6bfd8429ee0f..7a53530f22c2 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -724,7 +724,7 @@ GLOBAL_ENTRY(__paravirt_leave_syscall) #endif .global __paravirt_work_processed_syscall; __paravirt_work_processed_syscall: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE adds r2=PT(LOADRS)+16,r12 MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 @@ -762,7 +762,7 @@ __paravirt_work_processed_syscall: ld8 r29=[r2],16 // M0|1 load cr.ipsr ld8 r28=[r3],16 // M0|1 load cr.iip -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 ;; ld8 r30=[r2],16 // M0|1 load cr.ifs @@ -793,7 +793,7 @@ __paravirt_work_processed_syscall: ld8.fill r1=[r3],16 // M0|1 load r1 (pUStk) mov r17=1 // A ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) st1 [r15]=r17 // M2|3 #else (pUStk) st1 [r14]=r17 // M2|3 @@ -813,7 +813,7 @@ __paravirt_work_processed_syscall: shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition COVER // B add current frame into dirty partition & set cr.ifs ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE mov r19=ar.bsp // M2 get new backing store pointer st8 [r14]=r22 // M save time at leave mov f10=f0 // F clear f10 @@ -948,7 +948,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) adds r16=PT(CR_IPSR)+16,r12 adds r17=PT(CR_IIP)+16,r12 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE .pred.rel.mutex pUStk,pKStk MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave @@ -981,7 +981,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ;; ld8.fill r12=[r16],16 ld8.fill r13=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 #else (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 @@ -989,7 +989,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ;; ld8 r20=[r16],16 // ar.fpsr ld8.fill r15=[r17],16 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred #endif ;; @@ -997,7 +997,7 @@ GLOBAL_ENTRY(__paravirt_leave_kernel) ld8.fill r2=[r17] (pUStk) mov r17=1 ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; // mib : mov add br -> mib : ld8 add br // bbb_ : br nop cover;; mbb_ : mov br cover;; diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index e662f178b990..c4cd45d97749 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -529,7 +529,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down) nop.i 0 ;; mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting #else nop.m 0 @@ -555,7 +555,7 @@ GLOBAL_ENTRY(paravirt_fsys_bubble_down) cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 br.call.sptk.many b7=ia64_syscall_setup // B ;; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mov.m r30=ar.itc is called in advance add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 4738ff7bd66a..9be4e497f3d3 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1073,7 +1073,7 @@ END(ia64_native_sched_clock) sched_clock = ia64_native_sched_clock #endif -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE GLOBAL_ENTRY(cycle_to_cputime) alloc r16=ar.pfs,1,0,0,0 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 @@ -1091,7 +1091,7 @@ GLOBAL_ENTRY(cycle_to_cputime) shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT br.ret.sptk.many rp END(cycle_to_cputime) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_IA64_BRL_EMU diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index fa25689fc453..689ffcaa284e 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -784,7 +784,7 @@ ENTRY(break_fault) (p8) adds r28=16,r28 // A switch cr.iip to next bundle (p9) adds r8=1,r8 // A increment ei to next slot -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE ;; mov b6=r30 // I0 setup syscall handler branch reg early #else @@ -801,7 +801,7 @@ ENTRY(break_fault) // /////////////////////////////////////////////////////////////////////// st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting #else mov b6=r30 // I0 setup syscall handler branch reg early @@ -817,7 +817,7 @@ ENTRY(break_fault) cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? br.call.sptk.many b7=ia64_syscall_setup // B 1: -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE // mov.m r30=ar.itc is called in advance, and r13 is current add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A @@ -1043,7 +1043,7 @@ END(ia64_syscall_setup) DBG_FAULT(16) FAULT(16) -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(__IA64_ASM_PARAVIRTUALIZED_NATIVE) /* * There is no particular reason for this code to be here, other than * that there happens to be space here that would go unused otherwise. diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h index d56753a11636..cc82a7d744c9 100644 --- a/arch/ia64/kernel/minstate.h +++ b/arch/ia64/kernel/minstate.h @@ -4,7 +4,7 @@ #include "entry.h" #include "paravirt_inst.h" -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* read ar.itc in advance, and use it before leaving bank 0 */ #define ACCOUNT_GET_STAMP \ (pUStk) mov.m r20=ar.itc; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 88a794536bc0..fbaac1afb844 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -77,7 +77,7 @@ static struct clocksource clocksource_itc = { }; static struct clocksource *itc_clocksource; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include <linux/kernel_stat.h> @@ -136,13 +136,14 @@ void vtime_account_system(struct task_struct *tsk) account_system_time(tsk, 0, delta, delta); } +EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { account_idle_time(vtime_delta(tsk)); } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static irqreturn_t timer_interrupt (int irq, void *dev_id) diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index ae700f49e51d..b0768a657920 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -130,7 +130,6 @@ extern int handle_kernel_fault(struct pt_regs *regs); #define start_thread(_regs, _pc, _usp) \ do { \ (_regs)->pc = (_pc); \ - ((struct switch_stack *)(_regs))[-1].a6 = 0; \ setframeformat(_regs); \ if (current->mm) \ (_regs)->d5 = current->mm->start_data; \ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2ac626ab9d43..9becc44d9d7a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -4,7 +4,6 @@ config MIPS select HAVE_GENERIC_DMA_COHERENT select HAVE_IDE select HAVE_OPROFILE - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select PERF_USE_VMALLOC select HAVE_ARCH_KGDB @@ -2161,7 +2160,6 @@ source "mm/Kconfig" config SMP bool "Multi-Processing support" depends on SYS_SUPPORTS_SMP - select IRQ_PER_CPU select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b77feffbadea..a32e34ecda9e 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -9,14 +9,12 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT select HAVE_GENERIC_HARDIRQS select BROKEN_RODATA select GENERIC_IRQ_PROBE select GENERIC_PCI_IOMAP - select IRQ_PER_CPU select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_SMP_IDLE_THREAD select GENERIC_STRNCPY_FROM_USER diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 17903f1f356b..561ccca7b1a7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -118,14 +118,12 @@ config PPC select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64 select HAVE_GENERIC_HARDIRQS select ARCH_WANT_IPC_PARSE_VERSION select SPARSE_IRQ - select IRQ_PER_CPU select IRQ_DOMAIN select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig index 29bb11ec6c64..4f35fc462385 100644 --- a/arch/powerpc/configs/chroma_defconfig +++ b/arch/powerpc/configs/chroma_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=256 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 88fa5c46f66f..f7df8362911f 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_PPC_BOOK3E_64=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 840a2c2d0430..bcedeea0df89 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -1,6 +1,6 @@ CONFIG_PPC64=y CONFIG_ALTIVEC=y -# CONFIG_VIRT_CPU_ACCOUNTING is not set +# CONFIG_VIRT_CPU_ACCOUNTING_NATIVE is not set CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_EXPERIMENTAL=y diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 483733bd06d4..607559ab271f 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in * the same units as the timebase. Otherwise we measure cpu time * in jiffies using the generic definitions. */ @@ -16,7 +16,7 @@ #ifndef __POWERPC_CPUTIME_H #define __POWERPC_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include <asm-generic/cputime.h> #ifdef __KERNEL__ static inline void setup_cputime_one_jiffy(void) { } @@ -231,5 +231,5 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk) static inline void arch_vtime_task_switch(struct task_struct *tsk) { } #endif /* __KERNEL__ */ -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __POWERPC_CPUTIME_H */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 531fe0c3108f..b1e7f2af1016 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -145,7 +145,7 @@ struct dtl_entry { extern struct kmem_cache *dtl_cache; /* - * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls + * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls * reading from the dispatch trace log. If other code wants to consume * DTL entries, it can set this pointer to a function that will get * called once for each DTL entry that gets processed. diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 9710be3a2d17..136bba62efa4 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -11,6 +11,7 @@ #include <linux/types.h> #include <asm/hw_irq.h> +#include <linux/device.h> #define MAX_HWEVENTS 8 #define MAX_EVENT_ALTERNATIVES 8 @@ -35,6 +36,7 @@ struct power_pmu { void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); int (*limited_pmc_event)(u64 event_id); u32 flags; + const struct attribute_group **attr_groups; int n_generic; int *generic_events; int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] @@ -109,3 +111,27 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs); * If an event_id is not subject to the constraint expressed by a particular * field, then it will have 0 in both the mask and value for that field. */ + +extern ssize_t power_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page); + +/* + * EVENT_VAR() is same as PMU_EVENT_VAR with a suffix. + * + * Having a suffix allows us to have aliases in sysfs - eg: the generic + * event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and + * 'PM_CYC' where the latter is the name by which the event is known in + * POWER CPU specification. + */ +#define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix +#define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr + +#define EVENT_ATTR(_name, _id, _suffix) \ + PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_PM_##_id, \ + power_events_sysfs_show) + +#define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g) +#define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g) + +#define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(PM_##_name, _id, _p) +#define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index ea2a86e8ff95..2d0e1f5d8339 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -24,7 +24,7 @@ * user_time and system_time fields in the paca. */ -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define ACCOUNT_CPU_USER_ENTRY(ra, rb) #define ACCOUNT_CPU_USER_EXIT(ra, rb) #define ACCOUNT_STOLEN_TIME @@ -70,7 +70,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) #endif /* CONFIG_PPC_SPLPAR */ -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* * Macros for storing registers into and loading registers from diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3d990d3bd8ba..ac057013f9fd 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -94,7 +94,7 @@ system_call_common: addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ -#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR) +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) BEGIN_FW_FTR_SECTION beq 33f /* if from user, see if there are any DTL entries to process */ @@ -110,7 +110,7 @@ BEGIN_FW_FTR_SECTION addi r9,r1,STACK_FRAME_OVERHEAD 33: END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) -#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */ /* * A syscall should always be called with interrupts enabled diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 127361e093f4..f77fa22754bc 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; EXPORT_SYMBOL_GPL(ppc_tb_freq); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Factors for converting from cputime_t (timebase ticks) to * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). @@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk) if (stolen) account_steal_time(stolen); } +EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { @@ -377,7 +378,7 @@ void vtime_account_user(struct task_struct *tsk) account_user_time(tsk, utime, utimescaled); } -#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #define calc_cputime_factors() #endif @@ -668,7 +669,7 @@ int update_persistent_clock(struct timespec now) struct rtc_time tm; if (!ppc_md.set_rtc_time) - return 0; + return -ENODEV; to_tm(now.tv_sec + 1 + timezone_offset, &tm); tm.tm_year -= 1900; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index aa2465e21f1a..fa476d50791f 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1305,6 +1305,16 @@ static int power_pmu_event_idx(struct perf_event *event) return event->hw.idx; } +ssize_t power_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%02llx\n", pmu_attr->id); +} + struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, @@ -1537,6 +1547,8 @@ int __cpuinit register_power_pmu(struct power_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + power_pmu.attr_groups = ppmu->attr_groups; + #ifdef MSR_HV /* * Use FCHV to ignore kernel events if MSR.HV is set. diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 2ee01e38d5e2..b554879bd31e 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -51,6 +51,18 @@ #define MMCR1_PMCSEL_MSK 0xff /* + * Power7 event codes. + */ +#define PME_PM_CYC 0x1e +#define PME_PM_GCT_NOSLOT_CYC 0x100f8 +#define PME_PM_CMPLU_STALL 0x4000a +#define PME_PM_INST_CMPL 0x2 +#define PME_PM_LD_REF_L1 0xc880 +#define PME_PM_LD_MISS_L1 0x400f0 +#define PME_PM_BRU_FIN 0x10068 +#define PME_PM_BRU_MPRED 0x400f6 + +/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -307,14 +319,14 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) } static int power7_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */ - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a, /* CMPLU_STALL */ - [PERF_COUNT_HW_INSTRUCTIONS] = 2, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/ - [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ - [PERF_COUNT_HW_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ + [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL, + [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL, + [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1, + [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN, + [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BRU_MPRED, }; #define C(x) PERF_COUNT_HW_CACHE_##x @@ -362,6 +374,57 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, }; + +GENERIC_EVENT_ATTR(cpu-cycles, CYC); +GENERIC_EVENT_ATTR(stalled-cycles-frontend, GCT_NOSLOT_CYC); +GENERIC_EVENT_ATTR(stalled-cycles-backend, CMPLU_STALL); +GENERIC_EVENT_ATTR(instructions, INST_CMPL); +GENERIC_EVENT_ATTR(cache-references, LD_REF_L1); +GENERIC_EVENT_ATTR(cache-misses, LD_MISS_L1); +GENERIC_EVENT_ATTR(branch-instructions, BRU_FIN); +GENERIC_EVENT_ATTR(branch-misses, BRU_MPRED); + +POWER_EVENT_ATTR(CYC, CYC); +POWER_EVENT_ATTR(GCT_NOSLOT_CYC, GCT_NOSLOT_CYC); +POWER_EVENT_ATTR(CMPLU_STALL, CMPLU_STALL); +POWER_EVENT_ATTR(INST_CMPL, INST_CMPL); +POWER_EVENT_ATTR(LD_REF_L1, LD_REF_L1); +POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1); +POWER_EVENT_ATTR(BRU_FIN, BRU_FIN) +POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED); + +static struct attribute *power7_events_attr[] = { + GENERIC_EVENT_PTR(CYC), + GENERIC_EVENT_PTR(GCT_NOSLOT_CYC), + GENERIC_EVENT_PTR(CMPLU_STALL), + GENERIC_EVENT_PTR(INST_CMPL), + GENERIC_EVENT_PTR(LD_REF_L1), + GENERIC_EVENT_PTR(LD_MISS_L1), + GENERIC_EVENT_PTR(BRU_FIN), + GENERIC_EVENT_PTR(BRU_MPRED), + + POWER_EVENT_PTR(CYC), + POWER_EVENT_PTR(GCT_NOSLOT_CYC), + POWER_EVENT_PTR(CMPLU_STALL), + POWER_EVENT_PTR(INST_CMPL), + POWER_EVENT_PTR(LD_REF_L1), + POWER_EVENT_PTR(LD_MISS_L1), + POWER_EVENT_PTR(BRU_FIN), + POWER_EVENT_PTR(BRU_MPRED), + NULL +}; + + +static struct attribute_group power7_pmu_events_group = { + .name = "events", + .attrs = power7_events_attr, +}; + +static const struct attribute_group *power7_pmu_attr_groups[] = { + &power7_pmu_events_group, + NULL, +}; + static struct power_pmu power7_pmu = { .name = "POWER7", .n_counter = 6, @@ -373,6 +436,7 @@ static struct power_pmu power7_pmu = { .get_alternatives = power7_get_alternatives, .disable_pmc = power7_disable_pmc, .flags = PPMU_ALT_SIPR, + .attr_groups = power7_pmu_attr_groups, .n_generic = ARRAY_SIZE(power7_generic_events), .generic_events = power7_generic_events, .cache_events = &power7_cache_events, diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 25db92a8e1cf..49318385d4fa 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -24,6 +24,7 @@ #include <linux/errno.h> #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/slab.h> diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index a7648543c59e..0cc0ac07a55d 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -57,7 +57,7 @@ static u8 dtl_event_mask = 0x7; */ static int dtl_buf_entries = N_DISPATCH_LOG; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct dtl_ring { u64 write_index; struct dtl_entry *write_ptr; @@ -142,7 +142,7 @@ static u64 dtl_current_index(struct dtl *dtl) return per_cpu(dtl_rings, dtl->cpu).write_index; } -#else /* CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int dtl_start(struct dtl *dtl) { @@ -188,7 +188,7 @@ static u64 dtl_current_index(struct dtl *dtl) { return lppaca_of(dtl->cpu).dtl_idx; } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int dtl_enable(struct dtl *dtl) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ca55882465d6..527e12c9573b 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -281,7 +281,7 @@ static struct notifier_block pci_dn_reconfig_nb = { struct kmem_cache *dtl_cache; -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Allocate space for the dispatch trace log for all possible cpus * and register the buffers with the hypervisor. This is used for @@ -332,12 +332,12 @@ static int alloc_dispatch_logs(void) return 0; } -#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ +#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static inline int alloc_dispatch_logs(void) { return 0; } -#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ static int alloc_dispatch_log_kmem_cache(void) { diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b5ea38c25647..c15ba7d1be64 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -78,7 +78,6 @@ config S390 select HAVE_KVM if 64BIT select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select ARCH_HAVE_NMI_SAFE_CMPXCHG select HAVE_DEBUG_KMEMLEAK diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index a5f4f5a1d24b..0aa98db8a80d 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -120,6 +120,9 @@ static int s390_next_ktime(ktime_t expires, nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires)); do_div(nsecs, 125); S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9); + /* Program the maximum value if we have an overflow (== year 2042) */ + if (unlikely(S390_lowcore.clock_comparator < sched_clock_base_cc)) + S390_lowcore.clock_comparator = -1ULL; set_clock_comparator(S390_lowcore.clock_comparator); return 0; } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index e84b8b68444a..ce9cc5aa2033 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void vtime_account(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); u64 timer, system; @@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk) virt_timer_forward(system); } -EXPORT_SYMBOL_GPL(vtime_account); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); void vtime_account_system(struct task_struct *tsk) -__attribute__((alias("vtime_account"))); +__attribute__((alias("vtime_account_irq_enter"))); EXPORT_SYMBOL_GPL(vtime_account_system); void __kprobes vtime_stop_cpu(void) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index babc2b826c5c..9c833c585871 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -11,7 +11,6 @@ config SUPERH select HAVE_ARCH_TRACEHOOK select HAVE_DMA_API_DEBUG select HAVE_DMA_ATTRS - select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_CUSTOM_GPIO_H @@ -91,9 +90,6 @@ config GENERIC_CSUM config GENERIC_HWEIGHT def_bool y -config IRQ_PER_CPU - def_bool y - config GENERIC_GPIO def_bool n diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 9f2edb5c5551..9bff3db17c8c 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -23,7 +23,6 @@ config SPARC select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select RTC_CLASS select RTC_DRV_M48T59 - select HAVE_IRQ_WORK select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL @@ -61,6 +60,7 @@ config SPARC64 select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_SYSCALL_WRAPPERS + select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 7870be0f5adc..08fcce90316b 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -71,7 +71,6 @@ #define PMD_PADDR _AC(0xfffffffe,UL) #define PMD_PADDR_SHIFT _AC(11,UL) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE #define PMD_ISHUGE _AC(0x00000001,UL) /* This is the PMD layout when PMD_ISHUGE is set. With 4MB huge @@ -86,7 +85,6 @@ #define PMD_HUGE_ACCESSED _AC(0x00000080,UL) #define PMD_HUGE_EXEC _AC(0x00000040,UL) #define PMD_HUGE_SPLITTING _AC(0x00000020,UL) -#endif /* PGDs point to PMD tables which are 8K aligned. */ #define PGD_PADDR _AC(0xfffffffc,UL) @@ -628,6 +626,12 @@ static inline unsigned long pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } +static inline int pmd_large(pmd_t pmd) +{ + return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) == + (PMD_ISHUGE | PMD_HUGE_PRESENT); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_young(pmd_t pmd) { @@ -646,12 +650,6 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return val >> (PAGE_SHIFT - PMD_PADDR_SHIFT); } -static inline int pmd_large(pmd_t pmd) -{ - return (pmd_val(pmd) & (PMD_ISHUGE | PMD_HUGE_PRESENT)) == - (PMD_ISHUGE | PMD_HUGE_PRESENT); -} - static inline int pmd_trans_splitting(pmd_t pmd) { return (pmd_val(pmd) & (PMD_ISHUGE|PMD_HUGE_SPLITTING)) == diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c index 1271b3a27d4e..be5bdf93c767 100644 --- a/arch/sparc/kernel/sbus.c +++ b/arch/sparc/kernel/sbus.c @@ -554,10 +554,8 @@ static void __init sbus_iommu_init(struct platform_device *op) regs = pr->phys_addr; iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC); - if (!iommu) - goto fatal_memory_error; strbuf = kzalloc(sizeof(*strbuf), GFP_ATOMIC); - if (!strbuf) + if (!iommu || !strbuf) goto fatal_memory_error; op->dev.archdata.iommu = iommu; @@ -656,6 +654,8 @@ static void __init sbus_iommu_init(struct platform_device *op) return; fatal_memory_error: + kfree(iommu); + kfree(strbuf); prom_printf("sbus_iommu_init: Fatal memory allocation error.\n"); } diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 42c55df3aec3..01ee23dd724d 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -66,6 +66,56 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 1; } +static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, + unsigned long end, int write, struct page **pages, + int *nr) +{ + struct page *head, *page, *tail; + u32 mask; + int refs; + + mask = PMD_HUGE_PRESENT; + if (write) + mask |= PMD_HUGE_WRITE; + if ((pmd_val(pmd) & mask) != mask) + return 0; + + refs = 0; + head = pmd_page(pmd); + page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + tail = page; + do { + VM_BUG_ON(compound_head(page) != head); + pages[*nr] = page; + (*nr)++; + page++; + refs++; + } while (addr += PAGE_SIZE, addr != end); + + if (!page_cache_add_speculative(head, refs)) { + *nr -= refs; + return 0; + } + + if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { + *nr -= refs; + while (refs--) + put_page(head); + return 0; + } + + /* Any tail page need their mapcount reference taken before we + * return. + */ + while (refs--) { + if (PageTail(tail)) + get_huge_page_tail(tail); + tail++; + } + + return 1; +} + static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -77,9 +127,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, pmd_t pmd = *pmdp; next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) + if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; - if (!gup_pte_range(pmd, addr, next, write, pages, nr)) + if (unlikely(pmd_large(pmd))) { + if (!gup_huge_pmd(pmdp, pmd, addr, next, + write, pages, nr)) + return 0; + } else if (!gup_pte_range(pmd, addr, next, write, + pages, nr)) return 0; } while (pmdp++, addr = next, addr != end); diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 875d008828b8..1bb7ad4aeff4 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -140,6 +140,8 @@ config ARCH_DEFCONFIG source "init/Kconfig" +source "kernel/Kconfig.freezer" + menu "Tilera-specific configuration" config NR_CPUS diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index 2a9b293fece6..31672918064c 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -250,7 +250,9 @@ static inline void writeq(u64 val, unsigned long addr) #define iowrite32 writel #define iowrite64 writeq -static inline void memset_io(void *dst, int val, size_t len) +#if CHIP_HAS_MMIO() || defined(CONFIG_PCI) + +static inline void memset_io(volatile void *dst, int val, size_t len) { int x; BUG_ON((unsigned long)dst & 0x3); @@ -277,6 +279,8 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, writel(*(u32 *)(src + x), dst + x); } +#endif + /* * The Tile architecture does not support IOPORT, even with PCI. * Unfortunately we can't yet simply not declare these methods, diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index b4e96fef2cf8..241c0bb60b12 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -18,32 +18,20 @@ #include <arch/interrupts.h> #include <arch/chip.h> -#if !defined(__tilegx__) && defined(__ASSEMBLY__) - /* * The set of interrupts we want to allow when interrupts are nominally * disabled. The remainder are effectively "NMI" interrupts from * the point of view of the generic Linux code. Note that synchronous * interrupts (aka "non-queued") are not blocked by the mask in any case. */ -#if CHIP_HAS_AUX_PERF_COUNTERS() -#define LINUX_MASKABLE_INTERRUPTS_HI \ - (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT))) -#else -#define LINUX_MASKABLE_INTERRUPTS_HI \ - (~(INT_MASK_HI(INT_PERF_COUNT))) -#endif - -#else - -#if CHIP_HAS_AUX_PERF_COUNTERS() -#define LINUX_MASKABLE_INTERRUPTS \ - (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) -#else #define LINUX_MASKABLE_INTERRUPTS \ - (~(INT_MASK(INT_PERF_COUNT))) -#endif + (~((_AC(1,ULL) << INT_PERF_COUNT) | (_AC(1,ULL) << INT_AUX_PERF_COUNT))) +#if CHIP_HAS_SPLIT_INTR_MASK() +/* The same macro, but for the two 32-bit SPRs separately. */ +#define LINUX_MASKABLE_INTERRUPTS_LO (-1) +#define LINUX_MASKABLE_INTERRUPTS_HI \ + (~((1 << (INT_PERF_COUNT - 32)) | (1 << (INT_AUX_PERF_COUNT - 32)))) #endif #ifndef __ASSEMBLY__ @@ -126,7 +114,7 @@ * to know our current state. */ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); -#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR) +#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR) /* Disable interrupts. */ #define arch_local_irq_disable() \ @@ -165,7 +153,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Prevent the given interrupt from being enabled next time we enable irqs. */ #define arch_local_irq_mask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt)) + (__get_cpu_var(interrupts_enabled_mask) &= ~(1ULL << (interrupt))) /* Prevent the given interrupt from being enabled immediately. */ #define arch_local_irq_mask_now(interrupt) do { \ @@ -175,7 +163,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Allow the given interrupt to be enabled next time we enable irqs. */ #define arch_local_irq_unmask(interrupt) \ - (__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt)) + (__get_cpu_var(interrupts_enabled_mask) |= (1ULL << (interrupt))) /* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */ #define arch_local_irq_unmask_now(interrupt) do { \ @@ -250,7 +238,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Disable interrupts. */ #define IRQ_DISABLE(tmp0, tmp1) \ { \ - movei tmp0, -1; \ + movei tmp0, LINUX_MASKABLE_INTERRUPTS_LO; \ moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \ }; \ { \ diff --git a/arch/tile/include/uapi/arch/interrupts_32.h b/arch/tile/include/uapi/arch/interrupts_32.h index 96b5710505b6..2efe3f68b2d6 100644 --- a/arch/tile/include/uapi/arch/interrupts_32.h +++ b/arch/tile/include/uapi/arch/interrupts_32.h @@ -15,6 +15,7 @@ #ifndef __ARCH_INTERRUPTS_H__ #define __ARCH_INTERRUPTS_H__ +#ifndef __KERNEL__ /** Mask for an interrupt. */ /* Note: must handle breaking interrupts into high and low words manually. */ #define INT_MASK_LO(intno) (1 << (intno)) @@ -23,6 +24,7 @@ #ifndef __ASSEMBLER__ #define INT_MASK(intno) (1ULL << (intno)) #endif +#endif /** Where a given interrupt executes */ @@ -92,216 +94,216 @@ #ifndef __ASSEMBLER__ #define QUEUED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ 0) #define NONQUEUED_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ 0) #define CRITICAL_MASKED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ 0) #define CRITICAL_UNMASKED_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ 0) #define MASKABLE_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ 0) #define UNMASKABLE_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ 0) #define SYNC_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_SN_ACCESS) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_IDN_REFILL) | \ - INT_MASK(INT_UDN_REFILL) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_SN_STATIC_ACCESS) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_SN_ACCESS) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_IDN_REFILL) | \ + (1ULL << INT_UDN_REFILL) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_SN_STATIC_ACCESS) | \ 0) #define NON_SYNC_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_DMATLB_MISS) | \ - INT_MASK(INT_DMATLB_ACCESS) | \ - INT_MASK(INT_SNITLB_MISS) | \ - INT_MASK(INT_SN_NOTIFY) | \ - INT_MASK(INT_SN_FIREWALL) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_DMA_NOTIFY) | \ - INT_MASK(INT_IDN_CA) | \ - INT_MASK(INT_UDN_CA) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DMA_ASID) | \ - INT_MASK(INT_SNI_ASID) | \ - INT_MASK(INT_DMA_CPL) | \ - INT_MASK(INT_SN_CPL) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_DMATLB_MISS) | \ + (1ULL << INT_DMATLB_ACCESS) | \ + (1ULL << INT_SNITLB_MISS) | \ + (1ULL << INT_SN_NOTIFY) | \ + (1ULL << INT_SN_FIREWALL) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_DMA_NOTIFY) | \ + (1ULL << INT_IDN_CA) | \ + (1ULL << INT_UDN_CA) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DMA_ASID) | \ + (1ULL << INT_SNI_ASID) | \ + (1ULL << INT_DMA_CPL) | \ + (1ULL << INT_SN_CPL) | \ + (1ULL << INT_DOUBLE_FAULT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ 0) #endif /* !__ASSEMBLER__ */ #endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/include/uapi/arch/interrupts_64.h b/arch/tile/include/uapi/arch/interrupts_64.h index 5bb58b2e4e6f..13c9f9182348 100644 --- a/arch/tile/include/uapi/arch/interrupts_64.h +++ b/arch/tile/include/uapi/arch/interrupts_64.h @@ -15,6 +15,7 @@ #ifndef __ARCH_INTERRUPTS_H__ #define __ARCH_INTERRUPTS_H__ +#ifndef __KERNEL__ /** Mask for an interrupt. */ #ifdef __ASSEMBLER__ /* Note: must handle breaking interrupts into high and low words manually. */ @@ -22,6 +23,7 @@ #else #define INT_MASK(intno) (1ULL << (intno)) #endif +#endif /** Where a given interrupt executes */ @@ -85,192 +87,192 @@ #ifndef __ASSEMBLER__ #define QUEUED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_AUX_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_IPI_3) | \ - INT_MASK(INT_IPI_2) | \ - INT_MASK(INT_IPI_1) | \ - INT_MASK(INT_IPI_0) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ 0) #define NONQUEUED_INTERRUPTS ( \ - INT_MASK(INT_SINGLE_STEP_3) | \ - INT_MASK(INT_SINGLE_STEP_2) | \ - INT_MASK(INT_SINGLE_STEP_1) | \ - INT_MASK(INT_SINGLE_STEP_0) | \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_ILL_TRANS) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ 0) #define CRITICAL_MASKED_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_SINGLE_STEP_3) | \ - INT_MASK(INT_SINGLE_STEP_2) | \ - INT_MASK(INT_SINGLE_STEP_1) | \ - INT_MASK(INT_SINGLE_STEP_0) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_AUX_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_IPI_3) | \ - INT_MASK(INT_IPI_2) | \ - INT_MASK(INT_IPI_1) | \ - INT_MASK(INT_IPI_0) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ 0) #define CRITICAL_UNMASKED_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_ILL_TRANS) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ 0) #define MASKABLE_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_SINGLE_STEP_3) | \ - INT_MASK(INT_SINGLE_STEP_2) | \ - INT_MASK(INT_SINGLE_STEP_1) | \ - INT_MASK(INT_SINGLE_STEP_0) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_AUX_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_IPI_3) | \ - INT_MASK(INT_IPI_2) | \ - INT_MASK(INT_IPI_1) | \ - INT_MASK(INT_IPI_0) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ 0) #define UNMASKABLE_INTERRUPTS ( \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_ILL_TRANS) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ 0) #define SYNC_INTERRUPTS ( \ - INT_MASK(INT_SINGLE_STEP_3) | \ - INT_MASK(INT_SINGLE_STEP_2) | \ - INT_MASK(INT_SINGLE_STEP_1) | \ - INT_MASK(INT_SINGLE_STEP_0) | \ - INT_MASK(INT_IDN_COMPLETE) | \ - INT_MASK(INT_UDN_COMPLETE) | \ - INT_MASK(INT_ITLB_MISS) | \ - INT_MASK(INT_ILL) | \ - INT_MASK(INT_GPV) | \ - INT_MASK(INT_IDN_ACCESS) | \ - INT_MASK(INT_UDN_ACCESS) | \ - INT_MASK(INT_SWINT_3) | \ - INT_MASK(INT_SWINT_2) | \ - INT_MASK(INT_SWINT_1) | \ - INT_MASK(INT_SWINT_0) | \ - INT_MASK(INT_ILL_TRANS) | \ - INT_MASK(INT_UNALIGN_DATA) | \ - INT_MASK(INT_DTLB_MISS) | \ - INT_MASK(INT_DTLB_ACCESS) | \ + (1ULL << INT_SINGLE_STEP_3) | \ + (1ULL << INT_SINGLE_STEP_2) | \ + (1ULL << INT_SINGLE_STEP_1) | \ + (1ULL << INT_SINGLE_STEP_0) | \ + (1ULL << INT_IDN_COMPLETE) | \ + (1ULL << INT_UDN_COMPLETE) | \ + (1ULL << INT_ITLB_MISS) | \ + (1ULL << INT_ILL) | \ + (1ULL << INT_GPV) | \ + (1ULL << INT_IDN_ACCESS) | \ + (1ULL << INT_UDN_ACCESS) | \ + (1ULL << INT_SWINT_3) | \ + (1ULL << INT_SWINT_2) | \ + (1ULL << INT_SWINT_1) | \ + (1ULL << INT_SWINT_0) | \ + (1ULL << INT_ILL_TRANS) | \ + (1ULL << INT_UNALIGN_DATA) | \ + (1ULL << INT_DTLB_MISS) | \ + (1ULL << INT_DTLB_ACCESS) | \ 0) #define NON_SYNC_INTERRUPTS ( \ - INT_MASK(INT_MEM_ERROR) | \ - INT_MASK(INT_IDN_FIREWALL) | \ - INT_MASK(INT_UDN_FIREWALL) | \ - INT_MASK(INT_TILE_TIMER) | \ - INT_MASK(INT_AUX_TILE_TIMER) | \ - INT_MASK(INT_IDN_TIMER) | \ - INT_MASK(INT_UDN_TIMER) | \ - INT_MASK(INT_IDN_AVAIL) | \ - INT_MASK(INT_UDN_AVAIL) | \ - INT_MASK(INT_IPI_3) | \ - INT_MASK(INT_IPI_2) | \ - INT_MASK(INT_IPI_1) | \ - INT_MASK(INT_IPI_0) | \ - INT_MASK(INT_PERF_COUNT) | \ - INT_MASK(INT_AUX_PERF_COUNT) | \ - INT_MASK(INT_INTCTRL_3) | \ - INT_MASK(INT_INTCTRL_2) | \ - INT_MASK(INT_INTCTRL_1) | \ - INT_MASK(INT_INTCTRL_0) | \ - INT_MASK(INT_BOOT_ACCESS) | \ - INT_MASK(INT_WORLD_ACCESS) | \ - INT_MASK(INT_I_ASID) | \ - INT_MASK(INT_D_ASID) | \ - INT_MASK(INT_DOUBLE_FAULT) | \ + (1ULL << INT_MEM_ERROR) | \ + (1ULL << INT_IDN_FIREWALL) | \ + (1ULL << INT_UDN_FIREWALL) | \ + (1ULL << INT_TILE_TIMER) | \ + (1ULL << INT_AUX_TILE_TIMER) | \ + (1ULL << INT_IDN_TIMER) | \ + (1ULL << INT_UDN_TIMER) | \ + (1ULL << INT_IDN_AVAIL) | \ + (1ULL << INT_UDN_AVAIL) | \ + (1ULL << INT_IPI_3) | \ + (1ULL << INT_IPI_2) | \ + (1ULL << INT_IPI_1) | \ + (1ULL << INT_IPI_0) | \ + (1ULL << INT_PERF_COUNT) | \ + (1ULL << INT_AUX_PERF_COUNT) | \ + (1ULL << INT_INTCTRL_3) | \ + (1ULL << INT_INTCTRL_2) | \ + (1ULL << INT_INTCTRL_1) | \ + (1ULL << INT_INTCTRL_0) | \ + (1ULL << INT_BOOT_ACCESS) | \ + (1ULL << INT_WORLD_ACCESS) | \ + (1ULL << INT_I_ASID) | \ + (1ULL << INT_D_ASID) | \ + (1ULL << INT_DOUBLE_FAULT) | \ 0) #endif /* !__ASSEMBLER__ */ #endif /* !__ARCH_INTERRUPTS_H__ */ diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S index 54bc9a6678e8..4ea080902654 100644 --- a/arch/tile/kernel/intvec_64.S +++ b/arch/tile/kernel/intvec_64.S @@ -1035,7 +1035,9 @@ handle_syscall: /* Ensure that the syscall number is within the legal range. */ { moveli r20, hw2(sys_call_table) +#ifdef CONFIG_COMPAT blbs r30, .Lcompat_syscall +#endif } { cmpltu r21, TREG_SYSCALL_NR_NAME, r21 @@ -1093,6 +1095,7 @@ handle_syscall: j .Lresume_userspace /* jump into middle of interrupt_return */ } +#ifdef CONFIG_COMPAT .Lcompat_syscall: /* * Load the base of the compat syscall table in r20, and @@ -1117,6 +1120,7 @@ handle_syscall: { move r15, r4; addxi r4, r4, 0 } { move r16, r5; addxi r5, r5, 0 } j .Lload_syscall_pointer +#endif .Linvalid_syscall: /* Report an invalid syscall back to the user program */ diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 0e5661e7d00d..caf93ae11793 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -159,7 +159,7 @@ static void save_arch_state(struct thread_struct *t); int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct *p) { - struct pt_regs *childregs = task_pt_regs(p), *regs = current_pt_regs(); + struct pt_regs *childregs = task_pt_regs(p); unsigned long ksp; unsigned long *callee_regs; diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c index baa3d905fee2..d1b5c913ae72 100644 --- a/arch/tile/kernel/reboot.c +++ b/arch/tile/kernel/reboot.c @@ -16,6 +16,7 @@ #include <linux/reboot.h> #include <linux/smp.h> #include <linux/pm.h> +#include <linux/export.h> #include <asm/page.h> #include <asm/setup.h> #include <hv/hypervisor.h> @@ -49,3 +50,4 @@ void machine_restart(char *cmd) /* No interesting distinction to be made here. */ void (*pm_power_off)(void) = NULL; +EXPORT_SYMBOL(pm_power_off); diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 6a649a4462d3..d1e15f7b59c6 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -31,6 +31,7 @@ #include <linux/timex.h> #include <linux/hugetlb.h> #include <linux/start_kernel.h> +#include <linux/screen_info.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/cacheflush.h> @@ -49,6 +50,10 @@ static inline int ABS(int x) { return x >= 0 ? x : -x; } /* Chip information */ char chip_model[64] __write_once; +#ifdef CONFIG_VT +struct screen_info screen_info; +#endif + struct pglist_data node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index b2f44c28dda6..ed258b8ae320 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -112,7 +112,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) p->pc, p->sp, p->ex1); p = NULL; } - if (!kbt->profile || (INT_MASK(p->faultnum) & QUEUED_INTERRUPTS) == 0) + if (!kbt->profile || ((1ULL << p->faultnum) & QUEUED_INTERRUPTS) == 0) return p; return NULL; } @@ -484,6 +484,7 @@ void save_stack_trace(struct stack_trace *trace) { save_stack_trace_tsk(NULL, trace); } +EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index db4fb89e12d8..8f8ad814b139 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c @@ -12,6 +12,7 @@ * more details. */ +#include <linux/export.h> #include <asm/page.h> #include <asm/cacheflush.h> #include <arch/icache.h> @@ -165,3 +166,4 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh) __insn_mtspr(SPR_DSTREAM_PF, old_dstream_pf); #endif } +EXPORT_SYMBOL_GPL(finv_buffer_remote); diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c index fdc403614d12..75947edccb26 100644 --- a/arch/tile/lib/cpumask.c +++ b/arch/tile/lib/cpumask.c @@ -16,6 +16,7 @@ #include <linux/ctype.h> #include <linux/errno.h> #include <linux/smp.h> +#include <linux/export.h> /* * Allow cropping out bits beyond the end of the array. @@ -50,3 +51,4 @@ int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits) } while (*bp != '\0' && *bp != '\n'); return 0; } +EXPORT_SYMBOL(bitmap_parselist_crop); diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index dd5f0a33fdaf..4385cb6fa00a 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c @@ -55,6 +55,8 @@ EXPORT_SYMBOL(hv_dev_poll_cancel); EXPORT_SYMBOL(hv_dev_close); EXPORT_SYMBOL(hv_sysconf); EXPORT_SYMBOL(hv_confstr); +EXPORT_SYMBOL(hv_get_rtc); +EXPORT_SYMBOL(hv_set_rtc); /* libgcc.a */ uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 5f7868dcd6d4..1ae911939a18 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -408,6 +408,7 @@ void homecache_change_page_home(struct page *page, int order, int home) __set_pte(ptep, pte_set_home(pteval, home)); } } +EXPORT_SYMBOL(homecache_change_page_home); struct page *homecache_alloc_pages(gfp_t gfp_mask, unsigned int order, int home) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 225543bf45a5..260857a53b87 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1,7 +1,7 @@ # Select 32 or 64 bit config 64BIT bool "64-bit kernel" if ARCH = "x86" - default ARCH = "x86_64" + default ARCH != "i386" ---help--- Say yes to build a 64-bit kernel - formerly known as x86_64 Say no to build a 32-bit kernel - formerly known as i386 @@ -28,7 +28,6 @@ config X86 select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS - select HAVE_IRQ_WORK select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_MEMBLOCK @@ -40,10 +39,12 @@ config X86 select HAVE_DMA_CONTIGUOUS if !SWIOTLB select HAVE_KRETPROBES select HAVE_OPTPROBES + select HAVE_KPROBES_ON_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_FP_TEST @@ -106,6 +107,7 @@ config X86 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) select GENERIC_TIME_VSYSCALL if X86_64 select KTIME_SCALAR if X86_32 + select ALWAYS_USE_PERSISTENT_CLOCK select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 @@ -114,6 +116,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select GENERIC_SIGALTSTACK + select ARCH_USE_BUILTIN_BSWAP config INSTRUCTION_DECODER def_bool y @@ -320,6 +323,10 @@ config X86_BIGSMP ---help--- This option is needed for the systems that have more than 8 CPUs +config GOLDFISH + def_bool y + depends on X86_GOLDFISH + if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" @@ -402,6 +409,14 @@ config X86_UV # Following is an alphabetically sorted list of 32 bit extended platforms # Please maintain the alphabetic order if and when there are additions +config X86_GOLDFISH + bool "Goldfish (Virtual Platform)" + depends on X86_32 + ---help--- + Enable support for the Goldfish virtual platform used primarily + for Android development. Unless you are building for the Android + Goldfish emulator say N here. + config X86_INTEL_CE bool "CE4100 TV platform" depends on PCI @@ -2188,6 +2203,15 @@ config GEOS ---help--- This option enables system support for the Traverse Technologies GEOS. +config TS5500 + bool "Technologic Systems TS-5500 platform support" + depends on MELAN + select CHECK_SIGNATURE + select NEW_LEDS + select LEDS_CLASS + ---help--- + This option enables system support for the Technologic Systems TS-5500. + endif # X86_32 config AMD_NB diff --git a/arch/x86/Makefile b/arch/x86/Makefile index e71fc4279aab..5c477260294f 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -2,7 +2,11 @@ # select defconfig based on actual architecture ifeq ($(ARCH),x86) + ifeq ($(shell uname -m),x86_64) + KBUILD_DEFCONFIG := x86_64_defconfig + else KBUILD_DEFCONFIG := i386_defconfig + endif else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 88f7ff6da404..7cb56c6ca351 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, { real_mode = rmode; + sanitize_boot_params(real_mode); + if (real_mode->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 0e6dc0ee0eea..674019d8e235 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -18,6 +18,7 @@ #include <asm/page.h> #include <asm/boot.h> #include <asm/bootparam.h> +#include <asm/bootparam_utils.h> #define BOOT_BOOT_H #include "../ctype.h" diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 5598547281a7..94447086e551 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,3 +1,4 @@ +# CONFIG_64BIT is not set CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9cd8fd..a54ee1d054d9 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } +static inline u16 amd_get_node_id(struct pci_dev *pdev) +{ + struct pci_dev *misc; + int i; + + for (i = 0; i != amd_nb_num(); i++) { + misc = node_to_amd_nb(i)->misc; + + if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) && + PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn)) + return i; + } + + WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev)); + return 0; +} + #else #define amd_nb_num(x) 0 diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h new file mode 100644 index 000000000000..5b5e9cb774b5 --- /dev/null +++ b/arch/x86/include/asm/bootparam_utils.h @@ -0,0 +1,38 @@ +#ifndef _ASM_X86_BOOTPARAM_UTILS_H +#define _ASM_X86_BOOTPARAM_UTILS_H + +#include <asm/bootparam.h> + +/* + * This file is included from multiple environments. Do not + * add completing #includes to make it standalone. + */ + +/* + * Deal with bootloaders which fail to initialize unknown fields in + * boot_params to zero. The list fields in this list are taken from + * analysis of kexec-tools; if other broken bootloaders initialize a + * different set of fields we will need to figure out how to disambiguate. + * + */ +static void sanitize_boot_params(struct boot_params *boot_params) +{ + if (boot_params->sentinel) { + /*fields in boot_params are not valid, clear them */ + memset(&boot_params->olpc_ofw_header, 0, + (char *)&boot_params->alt_mem_k - + (char *)&boot_params->olpc_ofw_header); + memset(&boot_params->kbd_status, 0, + (char *)&boot_params->hdr - + (char *)&boot_params->kbd_status); + memset(&boot_params->_pad7[0], 0, + (char *)&boot_params->edd_mbr_sig_buffer[0] - + (char *)&boot_params->_pad7[0]); + memset(&boot_params->_pad8[0], 0, + (char *)&boot_params->eddbuf[0] - + (char *)&boot_params->_pad8[0]); + memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9)); + } +} + +#endif /* _ASM_X86_BOOTPARAM_UTILS_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 2d9075e863a0..93fe929d1cee 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -167,6 +167,7 @@ #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) +#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9a25b522d377..86cb51e1ca96 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -44,7 +44,6 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 -#define ARCH_SUPPORTS_FTRACE_SAVE_REGS #endif #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 434e2106cc87..b18df579c0e9 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); #ifdef CONFIG_PCI_MSI -extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); +extern int default_setup_hpet_msi(unsigned int irq, unsigned int id); #else -static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) +static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id) { return -EINVAL; } @@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); static inline int hpet_enable(void) { return 0; } static inline int is_hpet_enabled(void) { return 0; } #define hpet_readl(a) 0 +#define default_setup_hpet_msi NULL #endif #endif /* _ASM_X86_HPET_H */ diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index eb92a6ed2be7..10a78c3d3d5a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, irq_attr->polarity = polarity; } +/* Intel specific interrupt remapping information */ struct irq_2_iommu { struct intel_iommu *iommu; u16 irte_index; @@ -108,6 +109,12 @@ struct irq_2_iommu { u8 irte_mask; }; +/* AMD specific interrupt remapping information */ +struct irq_2_irte { + u16 devid; /* Device ID for IRTE table */ + u16 index; /* Index into IRTE table*/ +}; + /* * This is performance-critical, we want to do it O(1) * @@ -120,7 +127,11 @@ struct irq_cfg { u8 vector; u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP - struct irq_2_iommu irq_2_iommu; + u8 remapped : 1; + union { + struct irq_2_iommu irq_2_iommu; + struct irq_2_irte irq_2_irte; + }; #endif }; diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index b518c7509933..86095ed14135 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -25,6 +25,7 @@ extern void init_hypervisor(struct cpuinfo_x86 *c); extern void init_hypervisor_platform(void); +extern bool hypervisor_x2apic_available(void); /* * x86 hypervisor information @@ -41,6 +42,9 @@ struct hypervisor_x86 { /* Platform setup (run once per boot) */ void (*init_platform)(void); + + /* X2APIC detection (run once per boot) */ + bool (*x2apic_available)(void); }; extern const struct hypervisor_x86 *x86_hyper; @@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_xen_hvm; extern const struct hypervisor_x86 x86_hyper_kvm; -static inline bool hypervisor_x2apic_available(void) -{ - if (kvm_para_available()) - return true; - if (xen_x2apic_para_available()) - return true; - return false; -} - #endif diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 73d8c5398ea9..459e50a424d1 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -144,11 +144,24 @@ extern int timer_through_8259; (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) struct io_apic_irq_attr; +struct irq_cfg; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); void setup_IO_APIC_irq_extra(u32 gsi); extern void ioapic_insert_resources(void); +extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, + unsigned int, int, + struct io_apic_irq_attr *); +extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, + unsigned int, int, + struct io_apic_irq_attr *); +extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); + +extern void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id); +extern void native_eoi_ioapic_pin(int apic, int pin, int vector); int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); @@ -179,6 +192,12 @@ extern void __init native_io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); +extern void native_disable_io_apic(void); +extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); +extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); +extern int native_ioapic_set_affinity(struct irq_data *, + const struct cpumask *, + bool); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { @@ -193,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned { x86_io_apic_ops.modify(apic, reg, value); } + +extern void io_apic_eoi(unsigned int apic, unsigned int vector); + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 @@ -223,6 +245,12 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_read NULL #define native_io_apic_write NULL #define native_io_apic_modify NULL +#define native_disable_io_apic NULL +#define native_io_apic_print_entries NULL +#define native_ioapic_set_affinity NULL +#define native_setup_ioapic_entry NULL +#define native_compose_msi_msg NULL +#define native_eoi_ioapic_pin NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5fb9bbbd2f14..95fd3527f632 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -26,8 +26,6 @@ #ifdef CONFIG_IRQ_REMAP -extern int irq_remapping_enabled; - extern void setup_irq_remapping_ops(void); extern int irq_remapping_supported(void); extern int irq_remapping_prepare(void); @@ -40,21 +38,19 @@ extern int setup_ioapic_remapped_entry(int irq, unsigned int destination, int vector, struct io_apic_irq_attr *attr); -extern int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force); extern void free_remapped_irq(int irq); extern void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); -extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); -extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle); extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); +extern void panic_if_irq_remap(const char *msg); +extern bool setup_remapped_irq(int irq, + struct irq_cfg *cfg, + struct irq_chip *chip); -#else /* CONFIG_IRQ_REMAP */ +void irq_remap_modify_chip_defaults(struct irq_chip *chip); -#define irq_remapping_enabled 0 +#else /* CONFIG_IRQ_REMAP */ static inline void setup_irq_remapping_ops(void) { } static inline int irq_remapping_supported(void) { return 0; } @@ -71,30 +67,30 @@ static inline int setup_ioapic_remapped_entry(int irq, { return -ENODEV; } -static inline int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force) -{ - return 0; -} static inline void free_remapped_irq(int irq) { } static inline void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) { } -static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) +static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { return -ENODEV; } -static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) + +static inline void panic_if_irq_remap(const char *msg) +{ +} + +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) { - return -ENODEV; } -static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) + +static inline bool setup_remapped_irq(int irq, + struct irq_cfg *cfg, + struct irq_chip *chip) { - return -ENODEV; + return false; } #endif /* CONFIG_IRQ_REMAP */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 1508e518c7e3..aac5fa62a86c 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -109,8 +109,8 @@ #define UV_BAU_MESSAGE 0xf5 -/* Xen vector callback to receive events in a HVM domain */ -#define XEN_HVM_EVTCHN_CALLBACK 0xf3 +/* Vector on which hypervisor callbacks will be delivered */ +#define HYPERVISOR_CALLBACK_VECTOR 0xf3 /* * Local APIC timer IRQ vector is on a different priority level, diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 5ed1f16187be..65231e173baf 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, return ret; } -static inline int kvm_para_available(void) +static inline bool kvm_para_available(void) { unsigned int eax, ebx, ecx, edx; char signature[13]; if (boot_cpu_data.cpuid_level < 0) - return 0; /* So we don't blow up on old processors */ + return false; /* So we don't blow up on old processors */ if (cpu_has_hypervisor) { cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); @@ -101,10 +101,10 @@ static inline int kvm_para_available(void) signature[12] = 0; if (strcmp(signature, "KVMKVMKVM") == 0) - return 1; + return true; } - return 0; + return false; } static inline unsigned int kvm_arch_para_features(void) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 48142971b25d..79327e9483a3 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -27,20 +27,20 @@ #define __asmlinkage_protect0(ret) \ __asmlinkage_protect_n(ret) #define __asmlinkage_protect1(ret, arg1) \ - __asmlinkage_protect_n(ret, "g" (arg1)) + __asmlinkage_protect_n(ret, "m" (arg1)) #define __asmlinkage_protect2(ret, arg1, arg2) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) #define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) #define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ - "g" (arg4)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) #define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ - "g" (arg4), "g" (arg5)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) #define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ - "g" (arg4), "g" (arg5), "g" (arg6)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ecdfee60ee4a..f4076af1f4ed 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -3,6 +3,90 @@ #include <uapi/asm/mce.h> +/* + * Machine Check support for x86 + */ + +/* MCG_CAP register defines */ +#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ +#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ +#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ +#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ +#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ +#define MCG_EXT_CNT_SHIFT 16 +#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) +#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ + +/* MCG_STATUS register defines */ +#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ +#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ + +/* MCi_STATUS register defines */ +#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ +#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ +#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ +#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ +#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ +#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ +#define MCI_STATUS_AR (1ULL<<55) /* Action required */ +#define MCACOD 0xffff /* MCA Error Code */ + +/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ +#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ +#define MCACOD_SCRUBMSK 0xfff0 +#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ +#define MCACOD_DATA 0x0134 /* Data Load */ +#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ + +/* MCi_MISC register defines */ +#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) +#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) +#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ +#define MCI_MISC_ADDR_LINEAR 1 /* linear address */ +#define MCI_MISC_ADDR_PHYS 2 /* physical address */ +#define MCI_MISC_ADDR_MEM 3 /* memory address */ +#define MCI_MISC_ADDR_GENERIC 7 /* generic */ + +/* CTL2 register defines */ +#define MCI_CTL2_CMCI_EN (1ULL << 30) +#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL + +#define MCJ_CTX_MASK 3 +#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) +#define MCJ_CTX_RANDOM 0 /* inject context: random */ +#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ +#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ +#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ +#define MCJ_EXCEPTION 0x8 /* raise as exception */ +#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ + +#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ + +/* Software defined banks */ +#define MCE_EXTENDED_BANK 128 +#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) + +#define MCE_LOG_LEN 32 +#define MCE_LOG_SIGNATURE "MACHINECHECK" + +/* + * This structure contains all data related to the MCE log. Also + * carries a signature to make it easier to find from external + * debugging tools. Each entry is only valid when its finished flag + * is set. + */ +struct mce_log { + char signature[12]; /* "MACHINECHECK" */ + unsigned len; /* = MCE_LOG_LEN */ + unsigned next; + unsigned flags; + unsigned recordlen; /* length of struct mce */ + struct mce entry[MCE_LOG_LEN]; +}; struct mca_config { bool dont_log_ce; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 79ce5685ab64..c2934be2446a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -11,4 +11,8 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; +void hyperv_callback_vector(void); +void hyperv_vector_handler(struct pt_regs *regs); +void hv_register_vmbus_handler(int irq, irq_handler_t handler); + #endif diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index dba7805176bf..c28fd02f4bf7 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq) #define arch_teardown_msi_irq x86_teardown_msi_irq #define arch_restore_msi_irqs x86_restore_msi_irqs /* implemented in arch/x86/kernel/apic/io_apic. */ +struct msi_desc; int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev, int irq); +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset); /* default to the implementation in drivers/lib/msi.c */ #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS #define HAVE_DEFAULT_MSI_RESTORE_IRQS diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 4fabcdf1cfa7..57cb63402213 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,8 +29,13 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL -#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) -#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) +#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) +#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) +#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) + +#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37 +#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ + (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) #define AMD64_EVENTSEL_EVENT \ (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) @@ -46,8 +51,12 @@ #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) +#define AMD64_RAW_EVENT_MASK_NB \ + (AMD64_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK) #define AMD64_NUM_COUNTERS 4 #define AMD64_NUM_COUNTERS_CORE 6 +#define AMD64_NUM_COUNTERS_NB 4 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5199db2923d3..fc304279b559 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +static inline unsigned long pud_pfn(pud_t pud) +{ + return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) @@ -781,6 +786,18 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) memcpy(dst, src, count * sizeof(pgd_t)); } +/* + * The x86 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} +static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmd) +{ +} #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 8faa215a503e..9ee322103c6d 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -66,13 +66,6 @@ do { \ __flush_tlb_one((vaddr)); \ } while (0) -/* - * The i386 doesn't have any external MMU info: the kernel page - * tables contain all the necessary information. - */ -#define update_mmu_cache(vma, address, ptep) do { } while (0) -#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) - #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 47356f9df82e..615b0c78449f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_unmap(pte) ((void)(pte))/* NOP */ -#define update_mmu_cache(vma, address, ptep) do { } while (0) -#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) - /* Encode and de-code a swap entry */ #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 888184b2fc85..cf500543f6ff 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -943,7 +943,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); -extern int amd_get_nb_id(int cpu); +extern u16 amd_get_nb_id(int cpu); struct aperfmperf { u64 aperf, mperf; diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 6c7fc25f2c34..5c6e4fb370f5 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -47,6 +47,12 @@ # define NEED_NOPL 0 #endif +#ifdef CONFIG_MATOM +# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) +#else +# define NEED_MOVBE 0 +#endif + #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -80,7 +86,7 @@ #define REQUIRED_MASK2 0 #define REQUIRED_MASK3 (NEED_NOPL) -#define REQUIRED_MASK4 0 +#define REQUIRED_MASK4 (NEED_MOVBE) #define REQUIRED_MASK5 0 #define REQUIRED_MASK6 0 #define REQUIRED_MASK7 0 diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 57693498519c..7669941cc9d2 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -181,19 +181,38 @@ struct x86_platform_ops { }; struct pci_dev; +struct msi_msg; struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); + void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq, + unsigned int dest, struct msi_msg *msg, + u8 hpet_id); void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev, int irq); + int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; +struct IO_APIC_route_entry; +struct io_apic_irq_attr; +struct irq_data; +struct cpumask; + struct x86_io_apic_ops { - void (*init) (void); - unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*write) (unsigned int apic, unsigned int reg, unsigned int value); - void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); + void (*init) (void); + unsigned int (*read) (unsigned int apic, unsigned int reg); + void (*write) (unsigned int apic, unsigned int reg, unsigned int value); + void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); + void (*disable)(void); + void (*print_entries)(unsigned int apic, unsigned int nr_entries); + int (*set_affinity)(struct irq_data *data, + const struct cpumask *mask, + bool force); + int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr); + void (*eoi_ioapic_pin)(int apic, int pin, int vector); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index f8fde90bc45e..d8829751b3f8 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -1,10 +1,499 @@ #ifdef CONFIG_KMEMCHECK /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ # include <asm-generic/xor.h> +#elif !defined(_ASM_X86_XOR_H) +#define _ASM_X86_XOR_H + +/* + * Optimized RAID-5 checksumming functions for SSE. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +/* + * Based on + * High-speed RAID5 checksumming functions utilizing SSE instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +/* + * x86-64 changes / gcc fixes from Andi Kleen. + * Copyright 2002 Andi Kleen, SuSE Labs. + * + * This hasn't been optimized for the hammer yet, but there are likely + * no advantages to be gotten from x86-64 here anyways. + */ + +#include <asm/i387.h> + +#ifdef CONFIG_X86_32 +/* reduce register pressure */ +# define XOR_CONSTANT_CONSTRAINT "i" #else +# define XOR_CONSTANT_CONSTRAINT "re" +#endif + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" +#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" +#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" +#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" +#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" +#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" +#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" +#define NOP(x) + +#define BLK64(pf, op, i) \ + pf(i) \ + op(i, 0) \ + op(i + 1, 1) \ + op(i + 2, 2) \ + op(i + 3, 3) + +static void +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + PF1(i) \ + PF1(i + 2) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), + [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(PF3, XO3, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), + [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + PF4(i) \ + PF4(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + XO4(i, 0) \ + XO4(i + 1, 1) \ + XO4(i + 2, 2) \ + XO4(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " add %[inc], %[p5] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), + [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(PF3, XO3, i) \ + BLK64(PF4, XO4, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " add %[inc], %[p5] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), + [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static struct xor_block_template xor_block_sse_pf64 = { + .name = "prefetch64-sse", + .do_2 = xor_sse_2_pf64, + .do_3 = xor_sse_3_pf64, + .do_4 = xor_sse_4_pf64, + .do_5 = xor_sse_5_pf64, +}; + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef NOP +#undef BLK64 +#undef BLOCK + +#undef XOR_CONSTANT_CONSTRAINT + #ifdef CONFIG_X86_32 # include <asm/xor_32.h> #else # include <asm/xor_64.h> #endif -#endif + +#define XOR_SELECT_TEMPLATE(FASTEST) \ + AVX_SELECT(FASTEST) + +#endif /* _ASM_X86_XOR_H */ diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index f79cb7ec0e06..ce05722e3c68 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -2,7 +2,7 @@ #define _ASM_X86_XOR_32_H /* - * Optimized RAID-5 checksumming functions for MMX and SSE. + * Optimized RAID-5 checksumming functions for MMX. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = { .do_5 = xor_p5_mmx_5, }; -/* - * Cache avoiding checksumming functions utilizing KNI instructions - * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) - */ - -#define OFFS(x) "16*("#x")" -#define PF_OFFS(x) "256+16*("#x")" -#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" -#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" -#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" -#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" -#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" -#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" -#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" -#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" -#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" -#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" -#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" -#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" -#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" - - -static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - LD(i, 0) \ - LD(i + 1, 1) \ - PF1(i) \ - PF1(i + 2) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2) - : - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i,0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i,0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - XO2(i,0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - ST(i,0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r"(p2), "+r"(p3) - : - : "memory" ); - - kernel_fpu_end(); -} - -static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i,0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i,0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO2(i,0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - XO3(i,0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - ST(i,0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " addl $256, %4 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) - : - : "memory" ); - - kernel_fpu_end(); -} - -static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - /* Make sure GCC forgets anything it knows about p4 or p5, - such that it won't pass to the asm volatile below a - register that is shared with any other variable. That's - because we modify p4 and p5 there, but we can't mark them - as read/write, otherwise we'd overflow the 10-asm-operands - limit of GCC < 3.1. */ - asm("" : "+r" (p4), "+r" (p5)); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i,0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i,0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - XO2(i,0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - PF4(i) \ - PF4(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO3(i,0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - XO4(i,0) \ - XO4(i + 1, 1) \ - XO4(i + 2, 2) \ - XO4(i + 3, 3) \ - ST(i,0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " addl $256, %4 ;\n" - " addl $256, %5 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : "r" (p4), "r" (p5) - : "memory"); - - /* p4 and p5 were modified, and now the variables are dead. - Clobber them just to be sure nobody does something stupid - like assuming they have some legal value. */ - asm("" : "=r" (p4), "=r" (p5)); - - kernel_fpu_end(); -} - static struct xor_block_template xor_block_pIII_sse = { .name = "pIII_sse", .do_2 = xor_sse_2, @@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = { /* Also try the generic routines. */ #include <asm-generic/xor.h> +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ #undef XOR_TRY_TEMPLATES #define XOR_TRY_TEMPLATES \ do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_8regs_p); \ - xor_speed(&xor_block_32regs); \ - xor_speed(&xor_block_32regs_p); \ AVX_XOR_SPEED; \ - if (cpu_has_xmm) \ + if (cpu_has_xmm) { \ xor_speed(&xor_block_pIII_sse); \ - if (cpu_has_mmx) { \ + xor_speed(&xor_block_sse_pf64); \ + } else if (cpu_has_mmx) { \ xor_speed(&xor_block_pII_mmx); \ xor_speed(&xor_block_p5_mmx); \ + } else { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ } \ } while (0) -/* We force the use of the SSE xor block because it can write around L2. - We may also be able to load into the L1 only depending on how the cpu - deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ - AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) - #endif /* _ASM_X86_XOR_32_H */ diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index 87ac522c4af5..546f1e3b87cc 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h @@ -1,301 +1,6 @@ #ifndef _ASM_X86_XOR_64_H #define _ASM_X86_XOR_64_H -/* - * Optimized RAID-5 checksumming functions for MMX and SSE. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * You should have received a copy of the GNU General Public License - * (for example /usr/src/linux/COPYING); if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * Cache avoiding checksumming functions utilizing KNI instructions - * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) - */ - -/* - * Based on - * High-speed RAID5 checksumming functions utilizing SSE instructions. - * Copyright (C) 1998 Ingo Molnar. - */ - -/* - * x86-64 changes / gcc fixes from Andi Kleen. - * Copyright 2002 Andi Kleen, SuSE Labs. - * - * This hasn't been optimized for the hammer yet, but there are likely - * no advantages to be gotten from x86-64 here anyways. - */ - -#include <asm/i387.h> - -#define OFFS(x) "16*("#x")" -#define PF_OFFS(x) "256+16*("#x")" -#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" -#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" -#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" -#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" -#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" -#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" -#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" -#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n" -#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" -#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" -#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" -#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" -#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n" - - -static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned int lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - LD(i, 0) \ - LD(i + 1, 1) \ - PF1(i) \ - PF1(i + 2) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addq %[inc], %[p1] ;\n" - " addq %[inc], %[p2] ;\n" - " decl %[cnt] ; jnz 1b" - : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines) - : [inc] "r" (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned int lines = bytes >> 8; - - kernel_fpu_begin(); - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addq %[inc], %[p1] ;\n" - " addq %[inc], %[p2] ;\n" - " addq %[inc], %[p3] ;\n" - " decl %[cnt] ; jnz 1b" - : [cnt] "+r" (lines), - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) - : [inc] "r" (256UL) - : "memory"); - kernel_fpu_end(); -} - -static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned int lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - XO3(i, 0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addq %[inc], %[p1] ;\n" - " addq %[inc], %[p2] ;\n" - " addq %[inc], %[p3] ;\n" - " addq %[inc], %[p4] ;\n" - " decl %[cnt] ; jnz 1b" - : [cnt] "+c" (lines), - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) - : [inc] "r" (256UL) - : "memory" ); - - kernel_fpu_end(); -} - -static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned int lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - PF4(i) \ - PF4(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO3(i, 0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - XO4(i, 0) \ - XO4(i + 1, 1) \ - XO4(i + 2, 2) \ - XO4(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addq %[inc], %[p1] ;\n" - " addq %[inc], %[p2] ;\n" - " addq %[inc], %[p3] ;\n" - " addq %[inc], %[p4] ;\n" - " addq %[inc], %[p5] ;\n" - " decl %[cnt] ; jnz 1b" - : [cnt] "+c" (lines), - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4), - [p5] "+r" (p5) - : [inc] "r" (256UL) - : "memory"); - - kernel_fpu_end(); -} - static struct xor_block_template xor_block_sse = { .name = "generic_sse", .do_2 = xor_sse_2, @@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = { /* Also try the AVX routines */ #include <asm/xor_avx.h> +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ #undef XOR_TRY_TEMPLATES #define XOR_TRY_TEMPLATES \ do { \ AVX_XOR_SPEED; \ + xor_speed(&xor_block_sse_pf64); \ xor_speed(&xor_block_sse); \ } while (0) -/* We force the use of the SSE xor block because it can write around L2. - We may also be able to load into the L1 only depending on how the cpu - deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ - AVX_SELECT(&xor_block_sse) - #endif /* _ASM_X86_XOR_64_H */ diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 58c829871c31..a0eab85ce7b8 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -4,66 +4,6 @@ #include <linux/types.h> #include <asm/ioctls.h> -/* - * Machine Check support for x86 - */ - -/* MCG_CAP register defines */ -#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ -#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ -#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ -#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ -#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ -#define MCG_EXT_CNT_SHIFT 16 -#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) -#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ - -/* MCG_STATUS register defines */ -#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ -#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ -#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ - -/* MCi_STATUS register defines */ -#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ -#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ -#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ -#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ -#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ -#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ -#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ -#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ -#define MCI_STATUS_AR (1ULL<<55) /* Action required */ -#define MCACOD 0xffff /* MCA Error Code */ - -/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ -#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ -#define MCACOD_SCRUBMSK 0xfff0 -#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ -#define MCACOD_DATA 0x0134 /* Data Load */ -#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ - -/* MCi_MISC register defines */ -#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) -#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) -#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ -#define MCI_MISC_ADDR_LINEAR 1 /* linear address */ -#define MCI_MISC_ADDR_PHYS 2 /* physical address */ -#define MCI_MISC_ADDR_MEM 3 /* memory address */ -#define MCI_MISC_ADDR_GENERIC 7 /* generic */ - -/* CTL2 register defines */ -#define MCI_CTL2_CMCI_EN (1ULL << 30) -#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL - -#define MCJ_CTX_MASK 3 -#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) -#define MCJ_CTX_RANDOM 0 /* inject context: random */ -#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ -#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ -#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ -#define MCJ_EXCEPTION 0x8 /* raise as exception */ -#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ - /* Fields are zero when not available */ struct mce { __u64 status; @@ -87,35 +27,8 @@ struct mce { __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ }; -/* - * This structure contains all data related to the MCE log. Also - * carries a signature to make it easier to find from external - * debugging tools. Each entry is only valid when its finished flag - * is set. - */ - -#define MCE_LOG_LEN 32 - -struct mce_log { - char signature[12]; /* "MACHINECHECK" */ - unsigned len; /* = MCE_LOG_LEN */ - unsigned next; - unsigned flags; - unsigned recordlen; /* length of struct mce */ - struct mce entry[MCE_LOG_LEN]; -}; - -#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ - -#define MCE_LOG_SIGNATURE "MACHINECHECK" - #define MCE_GET_RECORD_LEN _IOR('M', 1, int) #define MCE_GET_LOG_LEN _IOR('M', 2, int) #define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) -/* Software defined banks */ -#define MCE_EXTENDED_BANK 128 -#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) - #endif /* _UAPI_ASM_X86_MCE_H */ diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 433a59fb1a74..075a40255591 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -194,6 +194,8 @@ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 #define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34e923a53762..ac3b3d002833 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -65,8 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o -obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_OPTPROBES) += kprobes-opt.o +obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index afdc3f756dea..c9876efecafb 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -240,7 +240,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n, dw_apb_clockevent_pause(adev->timer); if (system_state == SYSTEM_RUNNING) { pr_debug("skipping APBT CPU %lu offline\n", cpu); - } else if (adev) { + } else { pr_debug("APBT clockevent for cpu %lu offline\n", cpu); dw_apb_clockevent_stop(adev->timer); } @@ -311,7 +311,6 @@ void __init apbt_time_init(void) #ifdef CONFIG_SMP int i; struct sfi_timer_table_entry *p_mtmr; - unsigned int percpu_timer; struct apbt_dev *adev; #endif @@ -346,13 +345,10 @@ void __init apbt_time_init(void) return; } pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); - if (num_possible_cpus() <= sfi_mtimer_num) { - percpu_timer = 1; + if (num_possible_cpus() <= sfi_mtimer_num) apbt_num_timers_used = num_possible_cpus(); - } else { - percpu_timer = 0; + else apbt_num_timers_used = 1; - } pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); /* here we set up per CPU timer data structure */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b994cc84aa7e..a5b4dce1b7ac 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void) * Now that local APIC setup is completed for BP, configure the fault * handling for interrupt remapping. */ - if (irq_remapping_enabled) - irq_remap_enable_fault_handling(); + irq_remap_enable_fault_handling(); } @@ -2251,8 +2250,7 @@ static int lapic_suspend(void) local_irq_save(flags); disable_local_APIC(); - if (irq_remapping_enabled) - irq_remapping_disable(); + irq_remapping_disable(); local_irq_restore(flags); return 0; @@ -2268,16 +2266,15 @@ static void lapic_resume(void) return; local_irq_save(flags); - if (irq_remapping_enabled) { - /* - * IO-APIC and PIC have their own resume routines. - * We just mask them here to make sure the interrupt - * subsystem is completely quiet while we enable x2apic - * and interrupt-remapping. - */ - mask_ioapic_entries(); - legacy_pic->mask_all(); - } + + /* + * IO-APIC and PIC have their own resume routines. + * We just mask them here to make sure the interrupt + * subsystem is completely quiet while we enable x2apic + * and interrupt-remapping. + */ + mask_ioapic_entries(); + legacy_pic->mask_all(); if (x2apic_mode) enable_x2apic(); @@ -2320,8 +2317,7 @@ static void lapic_resume(void) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - if (irq_remapping_enabled) - irq_remapping_reenable(x2apic_mode); + irq_remapping_reenable(x2apic_mode); local_irq_restore(flags); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b739d398bb29..9ed796ccc32c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -68,22 +68,6 @@ #define for_each_irq_pin(entry, head) \ for (entry = head; entry; entry = entry->next) -#ifdef CONFIG_IRQ_REMAP -static void irq_remap_modify_chip_defaults(struct irq_chip *chip); -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return cfg->irq_2_iommu.iommu != NULL; -} -#else -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return false; -} -static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ -} -#endif - /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) return cfg; } -static int alloc_irq_from(unsigned int from, int node) +static int alloc_irqs_from(unsigned int from, unsigned int count, int node) { - return irq_alloc_desc_from(from, node); + return irq_alloc_descs_from(from, count, node); } static void free_irq_at(unsigned int at, struct irq_cfg *cfg) @@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mpc_ioapic_addr(idx) & ~PAGE_MASK); } -static inline void io_apic_eoi(unsigned int apic, unsigned int vector) +void io_apic_eoi(unsigned int apic, unsigned int vector) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(vector, &io_apic->eoi); @@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data) * Otherwise, we simulate the EOI message manually by changing the trigger * mode to edge and then back to level, with RTE being masked during this. */ -static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) +void native_eoi_ioapic_pin(int apic, int pin, int vector) { if (mpc_ioapic_ver(apic) >= 0x20) { - /* - * Intr-remapping uses pin number as the virtual vector - * in the RTE. Actual vector is programmed in - * intr-remapping table entry. Hence for the io-apic - * EOI we use the pin number. - */ - if (cfg && irq_remapped(cfg)) - io_apic_eoi(apic, pin); - else - io_apic_eoi(apic, vector); + io_apic_eoi(apic, vector); } else { struct IO_APIC_route_entry entry, entry1; @@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) } } -static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { struct irq_pin_list *entry; unsigned long flags; raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, cfg->irq_2_pin) - __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); + x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin, + cfg->vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) } raw_spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_pin(apic, pin, entry.vector, NULL); + x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1304,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi = false; } - if (irq_remapped(cfg)) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); + if (setup_remapped_irq(irq, cfg, chip)) fasteoi = trigger != 0; - } hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; irq_set_chip_and_handler_name(irq, chip, hdl, fasteoi ? "fasteoi" : "edge"); } -static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) +int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) { - if (irq_remapping_enabled) - return setup_ioapic_remapped_entry(irq, entry, destination, - vector, attr); - memset(entry, 0, sizeof(*entry)); entry->delivery_mode = apic->irq_delivery_mode; @@ -1370,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, cfg->vector, irq, attr->trigger, attr->polarity, dest); - if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", + if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { + pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); __clear_irq_vector(irq, cfg); @@ -1479,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, struct IO_APIC_route_entry entry; unsigned int dest; - if (irq_remapping_enabled) - return; - memset(&entry, 0, sizeof(entry)); /* @@ -1513,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, ioapic_write_entry(ioapic_idx, pin, entry); } -__apicdebuginit(void) print_IO_APIC(int ioapic_idx) +void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) { int i; + + pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n"); + + for (i = 0; i <= nr_entries; i++) { + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + pr_debug(" %02x %02X ", i, entry.dest); + pr_cont("%1d %1d %1d %1d %1d " + "%1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector); + } +} + +void intel_ir_io_apic_print_entries(unsigned int apic, + unsigned int nr_entries) +{ + int i; + + pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n"); + + for (i = 0; i <= nr_entries; i++) { + struct IR_IO_APIC_route_entry *ir_entry; + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + ir_entry = (struct IR_IO_APIC_route_entry *)&entry; + + pr_debug(" %02x %04X ", i, ir_entry->index); + pr_cont("%1d %1d %1d %1d %1d " + "%1d %1d %X %02X\n", + ir_entry->format, + ir_entry->mask, + ir_entry->trigger, + ir_entry->irr, + ir_entry->polarity, + ir_entry->delivery_status, + ir_entry->index2, + ir_entry->zero, + ir_entry->vector); + } +} + +__apicdebuginit(void) print_IO_APIC(int ioapic_idx) +{ union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; @@ -1568,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - if (irq_remapping_enabled) { - printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" - " Pol Stat Indx2 Zero Vect:\n"); - } else { - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect:\n"); - } - - for (i = 0; i <= reg_01.bits.entries; i++) { - if (irq_remapping_enabled) { - struct IO_APIC_route_entry entry; - struct IR_IO_APIC_route_entry *ir_entry; - - entry = ioapic_read_entry(ioapic_idx, i); - ir_entry = (struct IR_IO_APIC_route_entry *) &entry; - printk(KERN_DEBUG " %02x %04X ", - i, - ir_entry->index - ); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %X %02X\n", - ir_entry->format, - ir_entry->mask, - ir_entry->trigger, - ir_entry->irr, - ir_entry->polarity, - ir_entry->delivery_status, - ir_entry->index2, - ir_entry->zero, - ir_entry->vector - ); - } else { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(ioapic_idx, i); - printk(KERN_DEBUG " %02x %02X ", - i, - entry.dest - ); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } + x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); } __apicdebuginit(void) print_IO_APICs(void) @@ -1921,30 +1890,14 @@ void __init enable_IO_APIC(void) clear_IO_APIC(); } -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) +void native_disable_io_apic(void) { /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - if (!legacy_pic->nr_legacy_irqs) - return; - - /* * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. - * - * With interrupt-remapping, for now we will use virtual wire A mode, - * as virtual wire B is little complex (need to configure both - * IOAPIC RTE as well as interrupt-remapping table entry). - * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { + if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -1964,12 +1917,25 @@ void disable_IO_APIC(void) ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } + if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(ioapic_i8259.pin != -1); + +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ /* - * Use virtual wire A mode when interrupt remapping is enabled. + * Clear the IO-APIC before rebooting: */ - if (cpu_has_apic || apic_from_smp_config()) - disconnect_bsp_APIC(!irq_remapping_enabled && - ioapic_i8259.pin != -1); + clear_IO_APIC(); + + if (!legacy_pic->nr_legacy_irqs) + return; + + x86_io_apic_ops.disable(); } #ifdef CONFIG_X86_32 @@ -2322,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq apic = entry->apic; pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(cfg)) - io_apic_write(apic, 0x11 + pin*2, dest); + + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -2369,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, return 0; } -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) + +int native_ioapic_set_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force) { unsigned int dest, irq = data->irq; unsigned long flags; @@ -2548,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data) ioapic_irqd_unmask(data, cfg, masked); } -#ifdef CONFIG_IRQ_REMAP -static void ir_ack_apic_edge(struct irq_data *data) -{ - ack_APIC_irq(); -} - -static void ir_ack_apic_level(struct irq_data *data) -{ - ack_APIC_irq(); - eoi_ioapic_irq(data->irq, data->chip_data); -} - -static void ir_print_prefix(struct irq_data *data, struct seq_file *p) -{ - seq_printf(p, " IR-%s", data->chip->name); -} - -static void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ - chip->irq_print_chip = ir_print_prefix; - chip->irq_ack = ir_ack_apic_edge; - chip->irq_eoi = ir_ack_apic_level; - - chip->irq_set_affinity = set_remapped_irq_affinity; -} -#endif /* CONFIG_IRQ_REMAP */ - static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .irq_startup = startup_ioapic_irq, @@ -2582,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_unmask = unmask_ioapic_irq, .irq_ack = ack_apic_edge, .irq_eoi = ack_apic_level, - .irq_set_affinity = ioapic_set_affinity, + .irq_set_affinity = native_ioapic_set_affinity, .irq_retrigger = ioapic_retrigger_irq, }; @@ -2781,8 +2717,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - if (irq_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); + panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2814,8 +2749,7 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } - if (irq_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); + panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC"); local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) @@ -2982,37 +2916,58 @@ device_initcall(ioapic_init_ops); /* * Dynamic irq allocate and deallocation */ -unsigned int create_irq_nr(unsigned int from, int node) +unsigned int __create_irqs(unsigned int from, unsigned int count, int node) { - struct irq_cfg *cfg; + struct irq_cfg **cfg; unsigned long flags; - unsigned int ret = 0; - int irq; + int irq, i; if (from < nr_irqs_gsi) from = nr_irqs_gsi; - irq = alloc_irq_from(from, node); - if (irq < 0) - return 0; - cfg = alloc_irq_cfg(irq, node); - if (!cfg) { - free_irq_at(irq, NULL); + cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node); + if (!cfg) return 0; + + irq = alloc_irqs_from(from, count, node); + if (irq < 0) + goto out_cfgs; + + for (i = 0; i < count; i++) { + cfg[i] = alloc_irq_cfg(irq + i, node); + if (!cfg[i]) + goto out_irqs; } raw_spin_lock_irqsave(&vector_lock, flags); - if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) - ret = irq; + for (i = 0; i < count; i++) + if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus())) + goto out_vecs; raw_spin_unlock_irqrestore(&vector_lock, flags); - if (ret) { - irq_set_chip_data(irq, cfg); - irq_clear_status_flags(irq, IRQ_NOREQUEST); - } else { - free_irq_at(irq, cfg); + for (i = 0; i < count; i++) { + irq_set_chip_data(irq + i, cfg[i]); + irq_clear_status_flags(irq + i, IRQ_NOREQUEST); } - return ret; + + kfree(cfg); + return irq; + +out_vecs: + for (i--; i >= 0; i--) + __clear_irq_vector(irq + i, cfg[i]); + raw_spin_unlock_irqrestore(&vector_lock, flags); +out_irqs: + for (i = 0; i < count; i++) + free_irq_at(irq + i, cfg[i]); +out_cfgs: + kfree(cfg); + return 0; +} + +unsigned int create_irq_nr(unsigned int from, int node) +{ + return __create_irqs(from, 1, node); } int create_irq(void) @@ -3037,48 +2992,35 @@ void destroy_irq(unsigned int irq) irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); - if (irq_remapped(cfg)) - free_remapped_irq(irq); + free_remapped_irq(irq); + raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); raw_spin_unlock_irqrestore(&vector_lock, flags); free_irq_at(irq, cfg); } +void destroy_irqs(unsigned int irq, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + destroy_irq(irq + i); +} + /* * MSI message composition */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) +void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) { - struct irq_cfg *cfg; - int err; - unsigned dest; - - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; + struct irq_cfg *cfg = irq_cfg(irq); - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - if (irq_remapped(cfg)) { - compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); - return err; - } + msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) - msg->address_hi = MSI_ADDR_BASE_HI | - MSI_ADDR_EXT_DEST_ID(dest); - else - msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); msg->address_lo = MSI_ADDR_BASE_LO | @@ -3097,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); +} - return err; +#ifdef CONFIG_PCI_MSI +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg; + int err; + unsigned dest; + + if (disable_apic) + return -ENXIO; + + cfg = irq_cfg(irq); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; + + x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); + + return 0; } static int @@ -3136,23 +3102,28 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) { struct irq_chip *chip = &msi_chip; struct msi_msg msg; + unsigned int irq = irq_base + irq_offset; int ret; ret = msi_compose_msg(dev, irq, &msg, -1); if (ret < 0) return ret; - irq_set_msi_desc(irq, msidesc); - write_msi_msg(irq, &msg); + irq_set_msi_desc_off(irq_base, irq_offset, msidesc); - if (irq_remapped(irq_get_chip_data(irq))) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); - } + /* + * MSI-X message is written per-IRQ, the offset is always 0. + * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. + */ + if (!irq_offset) + write_msi_msg(irq, &msg); + + setup_remapped_irq(irq, irq_get_chip_data(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); @@ -3163,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; + int node, ret; - /* x86 doesn't support multiple MSI yet */ + /* Multiple MSI vectors only supported with interrupt remapping */ if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; - sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want, node); if (irq == 0) - return -1; + return -ENOSPC; + irq_want = irq + 1; - if (!irq_remapping_enabled) - goto no_ir; - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_remapped_irq(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq, index, - sub_handle); - if (ret < 0) - goto error; - } -no_ir: - ret = setup_msi_irq(dev, msidesc, irq); + ret = setup_msi_irq(dev, msidesc, irq, 0); if (ret < 0) goto error; - sub_handle++; } return 0; @@ -3298,26 +3249,19 @@ static struct irq_chip hpet_msi_type = { .irq_retrigger = ioapic_retrigger_irq, }; -int arch_setup_hpet_msi(unsigned int irq, unsigned int id) +int default_setup_hpet_msi(unsigned int irq, unsigned int id) { struct irq_chip *chip = &hpet_msi_type; struct msi_msg msg; int ret; - if (irq_remapping_enabled) { - ret = setup_hpet_msi_remapped(irq, id); - if (ret) - return ret; - } - ret = msi_compose_msg(NULL, irq, &msg, id); if (ret < 0) return ret; hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - if (irq_remapped(irq_get_chip_data(irq))) - irq_remap_modify_chip_defaults(chip); + setup_remapped_irq(irq, irq_get_chip_data(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; @@ -3683,10 +3627,7 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - if (irq_remapping_enabled) - set_remapped_irq_affinity(idata, mask, false); - else - ioapic_set_affinity(idata, mask, false); + x86_io_apic_ops.set_affinity(idata, mask, false); } } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index cce91bf26676..7434d8556d09 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; - if (WARN_ONCE(!mask, "empty IPI mask")) + if (!mask) return; local_irq_save(flags); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index e03a1e180e81..562a76d433c8 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg) } early_param("x2apic_phys", set_x2apic_phys_mode); -static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static bool x2apic_fadt_phys(void) { - if (x2apic_phys) - return x2apic_enabled(); - else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && - x2apic_enabled()) { + if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { printk(KERN_DEBUG "System requires x2apic physical mode\n"); - return 1; + return true; } - else - return 0; + return false; +} + +static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); } static void @@ -82,7 +83,7 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && x2apic_phys) + if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) return 1; return apic == &apic_x2apic_phys; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d65464e43503..8d7012b7f402 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -899,6 +899,7 @@ static void apm_cpu_idle(void) static int use_apm_idle; /* = 0 */ static unsigned int last_jiffies; /* = 0 */ static unsigned int last_stime; /* = 0 */ + cputime_t stime; int apm_idle_done = 0; unsigned int jiffies_since_last_check = jiffies - last_jiffies; @@ -906,23 +907,23 @@ static void apm_cpu_idle(void) WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: + task_cputime(current, NULL, &stime); if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; } else if (jiffies_since_last_check > idle_period) { unsigned int idle_percentage; - idle_percentage = current->stime - last_stime; + idle_percentage = stime - last_stime; idle_percentage *= 100; idle_percentage /= jiffies_since_last_check; use_apm_idle = (idle_percentage > idle_threshold); if (apm_info.forbid_idle) use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; } + last_jiffies = jiffies; + last_stime = stime; + bucket = IDLE_LEAKY_MAX; while (!need_resched()) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15239fffd6fe..782c456eaa01 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } -int amd_get_nb_id(int cpu) +u16 amd_get_nb_id(int cpu) { - int id = 0; + u16 id = 0; #ifdef CONFIG_SMP id = per_cpu(cpu_llc_id, cpu); #endif diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index a8f8fa9769d6..1e7e84a02eba 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void) if (x86_hyper->init_platform) x86_hyper->init_platform(); } + +bool __init hypervisor_x2apic_available(void) +{ + return x86_hyper && + x86_hyper->x2apic_available && + x86_hyper->x2apic_available(); +} diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 84c1309c4c0c..7c6f7d548c0f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1226,7 +1226,7 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { .notifier_call = cacheinfo_cpu_callback, }; -static int __cpuinit cache_sysfs_init(void) +static int __init cache_sysfs_init(void) { int i; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 0a630dd4b620..a7d26d83fb70 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -14,10 +14,15 @@ #include <linux/time.h> #include <linux/clocksource.h> #include <linux/module.h> +#include <linux/hardirq.h> +#include <linux/interrupt.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv.h> #include <asm/mshyperv.h> +#include <asm/desc.h> +#include <asm/idle.h> +#include <asm/irq_regs.h> struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); @@ -30,6 +35,13 @@ static bool __init ms_hyperv_platform(void) if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) return false; + /* + * Xen emulates Hyper-V to support enlightened Windows. + * Check to see first if we are on a Xen Hypervisor. + */ + if (xen_cpuid_base()) + return false; + cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); @@ -68,7 +80,14 @@ static void __init ms_hyperv_init_platform(void) printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); - clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); + if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) + clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); +#if IS_ENABLED(CONFIG_HYPERV) + /* + * Setup the IDT for hypervisor callback. + */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); +#endif } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { @@ -77,3 +96,36 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init_platform = ms_hyperv_init_platform, }; EXPORT_SYMBOL(x86_hyper_ms_hyperv); + +#if IS_ENABLED(CONFIG_HYPERV) +static int vmbus_irq = -1; +static irq_handler_t vmbus_isr; + +void hv_register_vmbus_handler(int irq, irq_handler_t handler) +{ + vmbus_irq = irq; + vmbus_isr = handler; +} + +void hyperv_vector_handler(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; + + irq_enter(); + exit_idle(); + + desc = irq_to_desc(vmbus_irq); + + if (desc) + generic_handle_irq_desc(vmbus_irq, desc); + + irq_exit(); + set_irq_regs(old_regs); +} +#else +void hv_register_vmbus_handler(int irq, irq_handler_t handler) +{ +} +#endif +EXPORT_SYMBOL_GPL(hv_register_vmbus_handler); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6774c17a5576..bf0f01aea994 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event, } else { hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); - hwc->event_base_rdpmc = hwc->idx; + hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx); } } @@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = { .attrs = NULL, }; -struct perf_pmu_events_attr { - struct device_attribute attr; - u64 id; -}; - /* * Remove all undefined events (x86_pmu.event_map(id) == 0) * out of events_attr attributes. @@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr -#define EVENT_ATTR(_name, _id) \ -static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ - .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ - .id = PERF_COUNT_HW_##_id, \ -}; +#define EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ + events_sysfs_show) EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 115c1ea97746..7f5c75c2afdd 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -325,6 +325,8 @@ struct x86_pmu { int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; + int (*addr_offset)(int index, bool eventsel); + int (*rdpmc_index)(int index); u64 (*event_map)(int); int max_events; int num_counters; @@ -446,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs u64 x86_perf_event_update(struct perf_event *event); -static inline int x86_pmu_addr_offset(int index) +static inline unsigned int x86_pmu_config_addr(int index) { - int offset; - - /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ - alternative_io(ASM_NOP2, - "shll $1, %%eax", - X86_FEATURE_PERFCTR_CORE, - "=a" (offset), - "a" (index)); - - return offset; + return x86_pmu.eventsel + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, true) : index); } -static inline unsigned int x86_pmu_config_addr(int index) +static inline unsigned int x86_pmu_event_addr(int index) { - return x86_pmu.eventsel + x86_pmu_addr_offset(index); + return x86_pmu.perfctr + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, false) : index); } -static inline unsigned int x86_pmu_event_addr(int index) +static inline int x86_pmu_rdpmc_index(int index) { - return x86_pmu.perfctr + x86_pmu_addr_offset(index); + return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; } int x86_setup_perfctr(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c93bc4e813a0..dfdab42aed27 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event) return amd_perfmon_event_map[hw_event]; } -static int amd_pmu_hw_config(struct perf_event *event) +static struct event_constraint *amd_nb_event_constraint; + +/* + * Previously calculated offsets + */ +static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; +static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; +static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; + +/* + * Legacy CPUs: + * 4 counters starting at 0xc0010000 each offset by 1 + * + * CPUs with core performance counter extensions: + * 6 counters starting at 0xc0010200 each offset by 2 + * + * CPUs with north bridge performance counter extensions: + * 4 additional counters starting at 0xc0010240 each offset by 2 + * (indexed right above either one of the above core counters) + */ +static inline int amd_pmu_addr_offset(int index, bool eventsel) { - int ret; + int offset, first, base; - /* pass precise event sampling to ibs: */ - if (event->attr.precise_ip && get_ibs_caps()) - return -ENOENT; + if (!index) + return index; + + if (eventsel) + offset = event_offsets[index]; + else + offset = count_offsets[index]; + + if (offset) + return offset; + + if (amd_nb_event_constraint && + test_bit(index, amd_nb_event_constraint->idxmsk)) { + /* + * calculate the offset of NB counters with respect to + * base eventsel or perfctr + */ + + first = find_first_bit(amd_nb_event_constraint->idxmsk, + X86_PMC_IDX_MAX); + + if (eventsel) + base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel; + else + base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr; + + offset = base + ((index - first) << 1); + } else if (!cpu_has_perfctr_core) + offset = index; + else + offset = index << 1; + + if (eventsel) + event_offsets[index] = offset; + else + count_offsets[index] = offset; + + return offset; +} + +static inline int amd_pmu_rdpmc_index(int index) +{ + int ret, first; + + if (!index) + return index; + + ret = rdpmc_indexes[index]; - ret = x86_pmu_hw_config(event); if (ret) return ret; - if (has_branch_stack(event)) - return -EOPNOTSUPP; + if (amd_nb_event_constraint && + test_bit(index, amd_nb_event_constraint->idxmsk)) { + /* + * according to the mnual, ECX value of the NB counters is + * the index of the NB counter (0, 1, 2 or 3) plus 6 + */ + + first = find_first_bit(amd_nb_event_constraint->idxmsk, + X86_PMC_IDX_MAX); + ret = index - first + 6; + } else + ret = index; + + rdpmc_indexes[index] = ret; + + return ret; +} +static int amd_core_hw_config(struct perf_event *event) +{ if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 @@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event) event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_OS); else if (event->attr.exclude_host) - event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; + event->hw.config |= AMD64_EVENTSEL_GUESTONLY; else if (event->attr.exclude_guest) - event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; + event->hw.config |= AMD64_EVENTSEL_HOSTONLY; + + return 0; +} + +/* + * NB counters do not support the following event select bits: + * Host/Guest only + * Counter mask + * Invert counter mask + * Edge detect + * OS/User mode + */ +static int amd_nb_hw_config(struct perf_event *event) +{ + /* for NB, we only allow system wide counting mode */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; - if (event->attr.type != PERF_TYPE_RAW) - return 0; + event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | + ARCH_PERFMON_EVENTSEL_OS); - event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; + if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB | + ARCH_PERFMON_EVENTSEL_INT)) + return -EINVAL; return 0; } @@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) return (hwc->config & 0xe0) == 0xe0; } +static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc) +{ + return amd_nb_event_constraint && amd_is_nb_event(hwc); +} + static inline int amd_has_nb(struct cpu_hw_events *cpuc) { struct amd_nb *nb = cpuc->amd_nb; @@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc) return nb && nb->nb_id != -1; } -static void amd_put_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) +static int amd_pmu_hw_config(struct perf_event *event) +{ + int ret; + + /* pass precise event sampling to ibs: */ + if (event->attr.precise_ip && get_ibs_caps()) + return -ENOENT; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + if (event->attr.type == PERF_TYPE_RAW) + event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; + + if (amd_is_perfctr_nb_event(&event->hw)) + return amd_nb_hw_config(event); + + return amd_core_hw_config(event); +} + +static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; struct amd_nb *nb = cpuc->amd_nb; int i; /* - * only care about NB events - */ - if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) - return; - - /* * need to scan whole list because event may not have * been assigned during scheduling * @@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, } } +static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc) +{ + int core_id = cpu_data(smp_processor_id()).cpu_core_id; + + /* deliver interrupts only to this core */ + if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) { + hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE; + hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK; + hwc->config |= (u64)(core_id) << + AMD64_EVENTSEL_INT_CORE_SEL_SHIFT; + } +} + /* * AMD64 NorthBridge events need special treatment because * counter access needs to be synchronized across all cores @@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, * * Given that resources are allocated (cmpxchg), they must be * eventually freed for others to use. This is accomplished by - * calling amd_put_event_constraints(). + * calling __amd_put_nb_event_constraints() * * Non NB events are not impacted by this restriction. */ static struct event_constraint * -amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, + struct event_constraint *c) { struct hw_perf_event *hwc = &event->hw; struct amd_nb *nb = cpuc->amd_nb; - struct perf_event *old = NULL; - int max = x86_pmu.num_counters; - int i, j, k = -1; + struct perf_event *old; + int idx, new = -1; - /* - * if not NB event or no NB, then no constraints - */ - if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) - return &unconstrained; + if (!c) + c = &unconstrained; + + if (cpuc->is_fake) + return c; /* * detect if already present, if so reuse @@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) * because of successive calls to x86_schedule_events() from * hw_perf_group_sched_in() without hw_perf_enable() */ - for (i = 0; i < max; i++) { - /* - * keep track of first free slot - */ - if (k == -1 && !nb->owners[i]) - k = i; + for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { + if (new == -1 || hwc->idx == idx) + /* assign free slot, prefer hwc->idx */ + old = cmpxchg(nb->owners + idx, NULL, event); + else if (nb->owners[idx] == event) + /* event already present */ + old = event; + else + continue; + + if (old && old != event) + continue; + + /* reassign to this slot */ + if (new != -1) + cmpxchg(nb->owners + new, event, NULL); + new = idx; /* already present, reuse */ - if (nb->owners[i] == event) - goto done; - } - /* - * not present, so grab a new slot - * starting either at: - */ - if (hwc->idx != -1) { - /* previous assignment */ - i = hwc->idx; - } else if (k != -1) { - /* start from free slot found */ - i = k; - } else { - /* - * event not found, no slot found in - * first pass, try again from the - * beginning - */ - i = 0; - } - j = i; - do { - old = cmpxchg(nb->owners+i, NULL, event); - if (!old) + if (old == event) break; - if (++i == max) - i = 0; - } while (i != j); -done: - if (!old) - return &nb->event_constraints[i]; - - return &emptyconstraint; + } + + if (new == -1) + return &emptyconstraint; + + if (amd_is_perfctr_nb_event(hwc)) + amd_nb_interrupt_hw_config(hwc); + + return &nb->event_constraints[new]; } static struct amd_nb *amd_alloc_nb(int cpu) @@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu) struct amd_nb *nb; int i, nb_id; - cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; + cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; if (boot_cpu_data.x86_max_cores < 2) return; @@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu) } } +static struct event_constraint * +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + /* + * if not NB event or no NB, then no constraints + */ + if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) + return &unconstrained; + + return __amd_get_nb_event_constraints(cpuc, event, + amd_nb_event_constraint); +} + +static void amd_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) + __amd_put_nb_event_constraints(cpuc, event); +} + PMU_FORMAT_ATTR(event, "config:0-7,32-35"); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); @@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); +static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0); +static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0); + static struct event_constraint * amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -561,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* not yet implemented */ - return &emptyconstraint; + return __amd_get_nb_event_constraints(cpuc, event, + amd_nb_event_constraint); default: return &emptyconstraint; } @@ -587,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = { .schedule_events = x86_schedule_events, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, + .addr_offset = amd_pmu_addr_offset, + .rdpmc_index = amd_pmu_rdpmc_index, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), .num_counters = AMD64_NUM_COUNTERS, @@ -608,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = { static int setup_event_constraints(void) { - if (boot_cpu_data.x86 >= 0x15) + if (boot_cpu_data.x86 == 0x15) x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; return 0; } @@ -638,6 +790,23 @@ static int setup_perfctr_core(void) return 0; } +static int setup_perfctr_nb(void) +{ + if (!cpu_has_perfctr_nb) + return -ENODEV; + + x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB; + + if (cpu_has_perfctr_core) + amd_nb_event_constraint = &amd_NBPMC96; + else + amd_nb_event_constraint = &amd_NBPMC74; + + printk(KERN_INFO "perf: AMD northbridge performance counters detected\n"); + + return 0; +} + __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ @@ -648,6 +817,7 @@ __init int amd_pmu_init(void) setup_event_constraints(); setup_perfctr_core(); + setup_perfctr_nb(); /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, @@ -678,7 +848,7 @@ void amd_pmu_disable_virt(void) * SVM is disabled the Guest-only bits still gets set and the counter * will not count anything. */ - cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; + cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; /* Reload all events */ x86_pmu_disable_all(); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d22d0c4edcfd..03a36321ec54 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -33,6 +33,9 @@ #define VMWARE_PORT_CMD_GETVERSION 10 #define VMWARE_PORT_CMD_GETHZ 45 +#define VMWARE_PORT_CMD_GETVCPU_INFO 68 +#define VMWARE_PORT_CMD_LEGACY_X2APIC 3 +#define VMWARE_PORT_CMD_VCPU_RESERVED 31 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ __asm__("inl (%%dx)" : \ @@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); } +/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ +static bool __init vmware_legacy_x2apic_available(void) +{ + uint32_t eax, ebx, ecx, edx; + VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx); + return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 && + (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; +} + const __refconst struct hypervisor_x86 x86_hyper_vmware = { .name = "VMware", .detect = vmware_platform, .set_cpu_features = vmware_set_cpu_features, .init_platform = vmware_platform_setup, + .x2apic_available = vmware_legacy_x2apic_available, }; EXPORT_SYMBOL(x86_hyper_vmware); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6ed91d9980e2..8831176aa5ef 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1091,11 +1091,18 @@ ENTRY(xen_failsafe_callback) _ASM_EXTABLE(4b,9b) ENDPROC(xen_failsafe_callback) -BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, +BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, xen_evtchn_do_upcall) #endif /* CONFIG_XEN */ +#if IS_ENABLED(CONFIG_HYPERV) + +BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, + hyperv_vector_handler) + +#endif /* CONFIG_HYPERV */ + #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index cb3c591339aa..048f2240f8e6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1454,11 +1454,16 @@ ENTRY(xen_failsafe_callback) CFI_ENDPROC END(xen_failsafe_callback) -apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ +apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ xen_hvm_callback_vector xen_evtchn_do_upcall #endif /* CONFIG_XEN */ +#if IS_ENABLED(CONFIG_HYPERV) +apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ + hyperv_callback_vector hyperv_vector_handler +#endif /* CONFIG_HYPERV */ + /* * Some functions should be protected against kprobes */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index c18f59d10101..6773c918b8cc 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -18,6 +18,7 @@ #include <asm/io_apic.h> #include <asm/bios_ebda.h> #include <asm/tlbflush.h> +#include <asm/bootparam_utils.h> static void __init i386_default_early_setup(void) { @@ -30,6 +31,8 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { + sanitize_boot_params(&boot_params); + memblock_reserve(__pa_symbol(&_text), __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 037df57a99ac..849fc9e63c2f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -25,6 +25,7 @@ #include <asm/kdebug.h> #include <asm/e820.h> #include <asm/bios_ebda.h> +#include <asm/bootparam_utils.h> static void __init zap_identity_mappings(void) { @@ -46,6 +47,7 @@ static void __init copy_bootdata(char *real_mode_data) char * command_line; memcpy(&boot_params, real_mode_data, sizeof boot_params); + sanitize_boot_params(&boot_params); if (boot_params.hdr.cmd_line_ptr) { command_line = __va(boot_params.hdr.cmd_line_ptr); memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c8932c79e78b..3c3f58a0808f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -307,36 +307,45 @@ default_entry: movl %eax,%cr0 /* - * New page tables may be in 4Mbyte page mode and may - * be using the global pages. + * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave + * bits like NT set. This would confuse the debugger if this code is traced. So + * initialize them properly now before switching to protected mode. That means + * DF in particular (even though we have cleared it earlier after copying the + * command line) because GCC expects it. + */ + pushl $0 + popfl + +/* + * New page tables may be in 4Mbyte page mode and may be using the global pages. * - * NOTE! If we are on a 486 we may have no cr4 at all! - * Specifically, cr4 exists if and only if CPUID exists - * and has flags other than the FPU flag set. + * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists + * if and only if CPUID exists and has flags other than the FPU flag set. */ + movl $-1,pa(X86_CPUID) # preset CPUID level movl $X86_EFLAGS_ID,%ecx pushl %ecx - popfl + popfl # set EFLAGS=ID pushfl - popl %eax - pushl $0 - popfl - pushfl - popl %edx - xorl %edx,%eax - testl %ecx,%eax - jz 6f # No ID flag = no CPUID = no CR4 + popl %eax # get EFLAGS + testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set? + jz enable_paging # hw disallowed setting of ID bit + # which means no CPUID and no CR4 + + xorl %eax,%eax + cpuid + movl %eax,pa(X86_CPUID) # save largest std CPUID function movl $1,%eax cpuid - andl $~1,%edx # Ignore CPUID.FPU - jz 6f # No flags or only CPUID.FPU = no CR4 + andl $~1,%edx # Ignore CPUID.FPU + jz enable_paging # No flags or only CPUID.FPU = no CR4 movl pa(mmu_cr4_features),%eax movl %eax,%cr4 testb $X86_CR4_PAE, %al # check if PAE is enabled - jz 6f + jz enable_paging /* Check if extended functions are implemented */ movl $0x80000000, %eax @@ -344,7 +353,7 @@ default_entry: /* Value must be in the range 0x80000001 to 0x8000ffff */ subl $0x80000001, %eax cmpl $(0x8000ffff-0x80000001), %eax - ja 6f + ja enable_paging /* Clear bogus XD_DISABLE bits */ call verify_cpu @@ -353,7 +362,7 @@ default_entry: cpuid /* Execute Disable bit supported? */ btl $(X86_FEATURE_NX & 31), %edx - jnc 6f + jnc enable_paging /* Setup EFER (Extended Feature Enable Register) */ movl $MSR_EFER, %ecx @@ -363,7 +372,7 @@ default_entry: /* Make changes effective */ wrmsr -6: +enable_paging: /* * Enable paging @@ -378,14 +387,6 @@ default_entry: addl $__PAGE_OFFSET, %esp /* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. - */ - pushl $0 - popfl - -/* * start system 32-bit setup. We need to re-do some of the things done * in 16-bit mode for the "real" operations. */ @@ -394,31 +395,11 @@ default_entry: jz 1f # Did we do this already? call *%eax 1: - -/* check if it is 486 or 386. */ + /* - * XXX - this does a lot of unnecessary setup. Alignment checks don't - * apply at our cpl of 0 and the stack ought to be aligned already, and - * we don't need to preserve eflags. + * Check if it is 486 */ - movl $-1,X86_CPUID # -1 for no CPUID initially - movb $3,X86 # at least 386 - pushfl # push EFLAGS - popl %eax # get EFLAGS - movl %eax,%ecx # save original EFLAGS - xorl $0x240000,%eax # flip AC and ID bits in EFLAGS - pushl %eax # copy to EFLAGS - popfl # set EFLAGS - pushfl # get new EFLAGS - popl %eax # put it in eax - xorl %ecx,%eax # change in flags - pushl %ecx # restore original EFLAGS - popfl - testl $0x40000,%eax # check if AC bit changed - je is386 - - movb $4,X86 # at least 486 - testl $0x200000,%eax # check if ID bit changed + cmpl $-1,X86_CPUID je is486 /* get vendor info */ @@ -444,11 +425,10 @@ default_entry: movb %cl,X86_MASK movl %edx,X86_CAPABILITY -is486: movl $0x50022,%ecx # set AM, WP, NE and MP - jmp 2f - -is386: movl $2,%ecx # set MP -2: movl %cr0,%eax +is486: + movb $4,X86 + movl $0x50022,%ecx # set AM, WP, NE and MP + movl %cr0,%eax andl $0x80000011,%eax # Save PG,PE,ET orl %ecx,%eax movl %eax,%cr0 @@ -473,7 +453,6 @@ is386: movl $2,%ecx # set MP xorl %eax,%eax # Clear LDT lldt %ax - cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder jmp *(initial_code) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index e28670f9a589..da85a8e830a1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta, static int hpet_setup_msi_irq(unsigned int irq) { - if (arch_setup_hpet_msi(irq, hpet_blockid)) { + if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) { destroy_irq(irq); return -EINVAL; } diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile new file mode 100644 index 000000000000..0d33169cc1a2 --- /dev/null +++ b/arch/x86/kernel/kprobes/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for kernel probes +# + +obj-$(CONFIG_KPROBES) += core.o +obj-$(CONFIG_OPTPROBES) += opt.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h index 3230b68ef29a..2e9d4b5af036 100644 --- a/arch/x86/kernel/kprobes-common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig return addr; } #endif + +#ifdef CONFIG_KPROBES_ON_FTRACE +extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb); +#else +static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + return 0; +} +#endif #endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c index 57916c0d3cf6..e124554598ee 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes/core.c @@ -58,7 +58,7 @@ #include <asm/insn.h> #include <asm/debugreg.h> -#include "kprobes-common.h" +#include "common.h" void jprobe_return_end(void); @@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); * Groups, and some special opcodes can not boost. * This is non-const and volatile to keep gcc from statically * optimizing it out, as variable_test_bit makes gcc think only - * *(unsigned long*) is used. + * *(unsigned long*) is used. */ static volatile u32 twobyte_is_boostable[256 / 32] = { /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ @@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) struct __arch_relative_insn { u8 op; s32 raddr; - } __attribute__((packed)) *insn; + } __packed *insn; insn = (struct __arch_relative_insn *)from; insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); @@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } -#ifdef KPROBES_CAN_USE_FTRACE -static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - /* - * Emulate singlestep (and also recover regs->ip) - * as if there is a 5byte nop - */ - regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - __this_cpu_write(current_kprobe, NULL); -} -#endif - /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { -#ifdef KPROBES_CAN_USE_FTRACE - if (kprobe_ftrace(p)) { - skip_singlestep(p, regs, kcb); - return 1; - } -#endif - setup_singlestep(p, regs, kcb, 0); + if (!skip_singlestep(p, regs, kcb)) + setup_singlestep(p, regs, kcb, 0); return 1; } } /* else: not a kprobe fault; let the kernel handle it */ @@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -#ifdef KPROBES_CAN_USE_FTRACE -/* Ftrace callback handler for kprobes */ -void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) -{ - struct kprobe *p; - struct kprobe_ctlblk *kcb; - unsigned long flags; - - /* Disable irq for emulating a breakpoint and avoiding preempt */ - local_irq_save(flags); - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto end; - - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ - regs->ip = ip + sizeof(kprobe_opcode_t); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) - skip_singlestep(p, regs, kcb); - /* - * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe. - */ - } -end: - local_irq_restore(flags); -} - -int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.insn = NULL; - p->ainsn.boostable = -1; - return 0; -} -#endif - int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c new file mode 100644 index 000000000000..23ef5c556f06 --- /dev/null +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -0,0 +1,93 @@ +/* + * Dynamic Ftrace based Kprobes Optimization + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Hitachi Ltd., 2012 + */ +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/hardirq.h> +#include <linux/preempt.h> +#include <linux/ftrace.h> + +#include "common.h" + +static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + return 1; +} + +int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + if (kprobe_ftrace(p)) + return __skip_singlestep(p, regs, kcb); + else + return 0; +} + +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + __skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c index c5e410eed403..76dc6f095724 100644 --- a/arch/x86/kernel/kprobes-opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -37,7 +37,7 @@ #include <asm/insn.h> #include <asm/debugreg.h> -#include "kprobes-common.h" +#include "common.h" unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9c2bd8bd4b4c..2b44ea5f269d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -505,6 +505,7 @@ static bool __init kvm_detect(void) const struct hypervisor_x86 x86_hyper_kvm __refconst = { .name = "KVM", .detect = kvm_detect, + .x2apic_available = kvm_para_available, }; EXPORT_SYMBOL_GPL(x86_hyper_kvm); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b629bbe0d9bd..29a8120e6fe8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,7 +22,7 @@ #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> #include <linux/rcupdate.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/context_tracking.h> #include <asm/uaccess.h> diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 801602b5d745..2e8f3d3b5641 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void) if (century) { century = bcd2bin(century); year += century * 100; - printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); } else year += CMOS_YEARS_OFFS; diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 97ef74b88e0f..dbded5aedb81 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (flags & MAP_FIXED) return addr; - /* for MAP_32BIT mappings we force the legact mmap base */ + /* for MAP_32BIT mappings we force the legacy mmap base */ if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) goto bottomup; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 06ccb5073a3f..4b9ea101fe3b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -623,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) ns_now = __cycles_2_ns(tsc_now); if (cpu_khz) { - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + + cpu_khz / 2) / cpu_khz; *offset = ns_now - mult_frac(tsc_now, *scale, (1UL << CYC2NS_SCALE_FACTOR)); } diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index c71025b67462..0ba4cfb4f412 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (auprobe->insn[i] == 0x66) continue; - if (auprobe->insn[i] == 0x90) + if (auprobe->insn[i] == 0x90) { + regs->ip += i + 1; return true; + } break; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7a3d075a814a..d065d67c2672 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -19,6 +19,7 @@ #include <asm/time.h> #include <asm/irq.h> #include <asm/io_apic.h> +#include <asm/hpet.h> #include <asm/pat.h> #include <asm/tsc.h> #include <asm/iommu.h> @@ -111,15 +112,22 @@ struct x86_platform_ops x86_platform = { EXPORT_SYMBOL_GPL(x86_platform); struct x86_msi_ops x86_msi = { - .setup_msi_irqs = native_setup_msi_irqs, - .teardown_msi_irq = native_teardown_msi_irq, - .teardown_msi_irqs = default_teardown_msi_irqs, - .restore_msi_irqs = default_restore_msi_irqs, + .setup_msi_irqs = native_setup_msi_irqs, + .compose_msi_msg = native_compose_msi_msg, + .teardown_msi_irq = native_teardown_msi_irq, + .teardown_msi_irqs = default_teardown_msi_irqs, + .restore_msi_irqs = default_restore_msi_irqs, + .setup_hpet_msi = default_setup_hpet_msi, }; struct x86_io_apic_ops x86_io_apic_ops = { - .init = native_io_apic_init_mappings, - .read = native_io_apic_read, - .write = native_io_apic_write, - .modify = native_io_apic_modify, + .init = native_io_apic_init_mappings, + .read = native_io_apic_read, + .write = native_io_apic_write, + .modify = native_io_apic_modify, + .disable = native_disable_io_apic, + .print_entries = native_io_apic_print_entries, + .set_affinity = native_ioapic_set_affinity, + .setup_entry = native_setup_ioapic_entry, + .eoi_ioapic_pin = native_eoi_ioapic_pin, }; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 027088f2f7dd..fb674fd3fc22 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -748,13 +748,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, return; } #endif + /* Kernel addresses are always protection faults: */ + if (address >= TASK_SIZE) + error_code |= PF_PROT; - if (unlikely(show_unhandled_signals)) + if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); - /* Kernel addresses are always protection faults: */ tsk->thread.cr2 = address; - tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2ead3c8a4c84..d6eeead43758 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -605,7 +605,7 @@ kernel_physical_mapping_init(unsigned long start, } if (pgd_changed) - sync_global_pgds(addr, end); + sync_global_pgds(addr, end - 1); __flush_tlb_all(); @@ -831,6 +831,9 @@ int kern_addr_valid(unsigned long addr) if (pud_none(*pud)) return 0; + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) return 0; @@ -981,7 +984,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) } } - sync_global_pgds((unsigned long)start_page, end); + sync_global_pgds((unsigned long)start_page, end - 1); return 0; } diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index c80b9fb95734..8dabbed409ee 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -9,6 +9,7 @@ #include <linux/memblock.h> static u64 patterns[] __initdata = { + /* The first entry has to be 0 to leave memtest with zeroed memory */ 0, 0xffffffffffffffffULL, 0x5555555555555555ULL, @@ -110,15 +111,8 @@ void __init early_memtest(unsigned long start, unsigned long end) return; printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); - for (i = 0; i < memtest_pattern; i++) { + for (i = memtest_pattern-1; i < UINT_MAX; --i) { idx = i % ARRAY_SIZE(patterns); do_one_pass(patterns[idx], start, end); } - - if (idx > 0) { - printk(KERN_INFO "early_memtest: wipe out " - "test pattern from memory\n"); - /* additional test with pattern 0 will do this */ - do_one_pass(0, start, end); - } } diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 4ddf497ca65b..cdd0da9dd530 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -149,39 +149,40 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) int node, pxm; if (srat_disabled()) - return -1; - if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) { - bad_srat(); - return -1; - } + goto out_err; + if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) + goto out_err_bad_srat; if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) - return -1; - + goto out_err; if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) - return -1; + goto out_err; + start = ma->base_address; end = start + ma->length; pxm = ma->proximity_domain; if (acpi_srat_revision <= 1) pxm &= 0xff; + node = setup_node(pxm); if (node < 0) { printk(KERN_ERR "SRAT: Too many proximity domains.\n"); - bad_srat(); - return -1; + goto out_err_bad_srat; } - if (numa_add_memblk(node, start, end) < 0) { - bad_srat(); - return -1; - } + if (numa_add_memblk(node, start, end) < 0) + goto out_err_bad_srat; node_set(node, numa_nodes_parsed); printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1); + return 0; +out_err_bad_srat: + bad_srat(); +out_err: + return -1; } void __init acpi_numa_arch_fixup(void) {} diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 13a6b29e2e5d..282375f13c7e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -335,7 +335,7 @@ static const struct file_operations fops_tlbflush = { .llseek = default_llseek, }; -static int __cpuinit create_tlb_flushall_shift(void) +static int __init create_tlb_flushall_shift(void) { debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, arch_debugfs_dir, NULL, &fops_tlbflush); diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 8d874396cb29..01e0231a113e 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -2,10 +2,12 @@ obj-y += ce4100/ obj-y += efi/ obj-y += geode/ +obj-y += goldfish/ obj-y += iris/ obj-y += mrst/ obj-y += olpc/ obj-y += scx200/ obj-y += sfi/ +obj-y += ts5500/ obj-y += visws/ obj-y += uv/ diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index d9c1b95af17c..7145ec63c520 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -11,20 +11,21 @@ * published by the Free Software Foundation. */ #include <linux/kernel.h> +#include <linux/init.h> #include <linux/acpi.h> #include <linux/efi.h> #include <linux/efi-bgrt.h> struct acpi_table_bgrt *bgrt_tab; -void *bgrt_image; -size_t bgrt_image_size; +void *__initdata bgrt_image; +size_t __initdata bgrt_image_size; struct bmp_header { u16 id; u32 size; } __packed; -void efi_bgrt_init(void) +void __init efi_bgrt_init(void) { acpi_status status; void __iomem *image; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 77cf0090c0a3..928bf837040a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -87,7 +87,7 @@ EXPORT_SYMBOL(efi_enabled); static int __init setup_noefi(char *arg) { - clear_bit(EFI_BOOT, &x86_efi_facility); + clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); return 0; } early_param("noefi", setup_noefi); diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile new file mode 100644 index 000000000000..f030b532fdf3 --- /dev/null +++ b/arch/x86/platform/goldfish/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_GOLDFISH) += goldfish.o diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c new file mode 100644 index 000000000000..1693107a518e --- /dev/null +++ b/arch/x86/platform/goldfish/goldfish.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2007 Google, Inc. + * Copyright (C) 2011 Intel, Inc. + * Copyright (C) 2013 Intel, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/irq.h> +#include <linux/platform_device.h> + +/* + * Where in virtual device memory the IO devices (timers, system controllers + * and so on) + */ + +#define GOLDFISH_PDEV_BUS_BASE (0xff001000) +#define GOLDFISH_PDEV_BUS_END (0xff7fffff) +#define GOLDFISH_PDEV_BUS_IRQ (4) + +#define GOLDFISH_TTY_BASE (0x2000) + +static struct resource goldfish_pdev_bus_resources[] = { + { + .start = GOLDFISH_PDEV_BUS_BASE, + .end = GOLDFISH_PDEV_BUS_END, + .flags = IORESOURCE_MEM, + }, + { + .start = GOLDFISH_PDEV_BUS_IRQ, + .end = GOLDFISH_PDEV_BUS_IRQ, + .flags = IORESOURCE_IRQ, + } +}; + +static int __init goldfish_init(void) +{ + platform_device_register_simple("goldfish_pdev_bus", -1, + goldfish_pdev_bus_resources, 2); + return 0; +} +device_initcall(goldfish_init); diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index 7785b72ecc3a..bcd1a703e3e6 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -35,7 +35,7 @@ static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; /* All CPUs enumerated by SFI must be present and enabled */ -static void __cpuinit mp_sfi_register_lapic(u8 id) +static void __init mp_sfi_register_lapic(u8 id) { if (MAX_LOCAL_APIC - id <= 0) { pr_warning("Processor #%d invalid (max %d)\n", diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile new file mode 100644 index 000000000000..c54e348c96a7 --- /dev/null +++ b/arch/x86/platform/ts5500/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_TS5500) += ts5500.o diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c new file mode 100644 index 000000000000..39febb214e8c --- /dev/null +++ b/arch/x86/platform/ts5500/ts5500.c @@ -0,0 +1,339 @@ +/* + * Technologic Systems TS-5500 Single Board Computer support + * + * Copyright (C) 2013 Savoir-faire Linux Inc. + * Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any later + * version. + * + * + * This driver registers the Technologic Systems TS-5500 Single Board Computer + * (SBC) and its devices, and exposes information to userspace such as jumpers' + * state or available options. For further information about sysfs entries, see + * Documentation/ABI/testing/sysfs-platform-ts5500. + * + * This code actually supports the TS-5500 platform, but it may be extended to + * support similar Technologic Systems x86-based platforms, such as the TS-5600. + */ + +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/leds.h> +#include <linux/module.h> +#include <linux/platform_data/gpio-ts5500.h> +#include <linux/platform_data/max197.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +/* Product code register */ +#define TS5500_PRODUCT_CODE_ADDR 0x74 +#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */ + +/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */ +#define TS5500_SRAM_RS485_ADC_ADDR 0x75 +#define TS5500_SRAM BIT(0) /* SRAM option */ +#define TS5500_RS485 BIT(1) /* RS-485 option */ +#define TS5500_ADC BIT(2) /* A/D converter option */ +#define TS5500_RS485_RTS BIT(6) /* RTS for RS-485 */ +#define TS5500_RS485_AUTO BIT(7) /* Automatic RS-485 */ + +/* External Reset/Industrial Temperature Range options register */ +#define TS5500_ERESET_ITR_ADDR 0x76 +#define TS5500_ERESET BIT(0) /* External Reset option */ +#define TS5500_ITR BIT(1) /* Indust. Temp. Range option */ + +/* LED/Jumpers register */ +#define TS5500_LED_JP_ADDR 0x77 +#define TS5500_LED BIT(0) /* LED flag */ +#define TS5500_JP1 BIT(1) /* Automatic CMOS */ +#define TS5500_JP2 BIT(2) /* Enable Serial Console */ +#define TS5500_JP3 BIT(3) /* Write Enable Drive A */ +#define TS5500_JP4 BIT(4) /* Fast Console (115K baud) */ +#define TS5500_JP5 BIT(5) /* User Jumper */ +#define TS5500_JP6 BIT(6) /* Console on COM1 (req. JP2) */ +#define TS5500_JP7 BIT(7) /* Undocumented (Unused) */ + +/* A/D Converter registers */ +#define TS5500_ADC_CONV_BUSY_ADDR 0x195 /* Conversion state register */ +#define TS5500_ADC_CONV_BUSY BIT(0) +#define TS5500_ADC_CONV_INIT_LSB_ADDR 0x196 /* Start conv. / LSB register */ +#define TS5500_ADC_CONV_MSB_ADDR 0x197 /* MSB register */ +#define TS5500_ADC_CONV_DELAY 12 /* usec */ + +/** + * struct ts5500_sbc - TS-5500 board description + * @id: Board product ID. + * @sram: Flag for SRAM option. + * @rs485: Flag for RS-485 option. + * @adc: Flag for Analog/Digital converter option. + * @ereset: Flag for External Reset option. + * @itr: Flag for Industrial Temperature Range option. + * @jumpers: Bitfield for jumpers' state. + */ +struct ts5500_sbc { + int id; + bool sram; + bool rs485; + bool adc; + bool ereset; + bool itr; + u8 jumpers; +}; + +/* Board signatures in BIOS shadow RAM */ +static const struct { + const char * const string; + const ssize_t offset; +} ts5500_signatures[] __initdata = { + { "TS-5x00 AMD Elan", 0xb14 }, +}; + +static int __init ts5500_check_signature(void) +{ + void __iomem *bios; + int i, ret = -ENODEV; + + bios = ioremap(0xf0000, 0x10000); + if (!bios) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(ts5500_signatures); i++) { + if (check_signature(bios + ts5500_signatures[i].offset, + ts5500_signatures[i].string, + strlen(ts5500_signatures[i].string))) { + ret = 0; + break; + } + } + + iounmap(bios); + return ret; +} + +static int __init ts5500_detect_config(struct ts5500_sbc *sbc) +{ + u8 tmp; + int ret = 0; + + if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500")) + return -EBUSY; + + tmp = inb(TS5500_PRODUCT_CODE_ADDR); + if (tmp != TS5500_PRODUCT_CODE) { + pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp); + ret = -ENODEV; + goto cleanup; + } + sbc->id = tmp; + + tmp = inb(TS5500_SRAM_RS485_ADC_ADDR); + sbc->sram = tmp & TS5500_SRAM; + sbc->rs485 = tmp & TS5500_RS485; + sbc->adc = tmp & TS5500_ADC; + + tmp = inb(TS5500_ERESET_ITR_ADDR); + sbc->ereset = tmp & TS5500_ERESET; + sbc->itr = tmp & TS5500_ITR; + + tmp = inb(TS5500_LED_JP_ADDR); + sbc->jumpers = tmp & ~TS5500_LED; + +cleanup: + release_region(TS5500_PRODUCT_CODE_ADDR, 4); + return ret; +} + +static ssize_t ts5500_show_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ts5500_sbc *sbc = dev_get_drvdata(dev); + + return sprintf(buf, "0x%.2x\n", sbc->id); +} + +static ssize_t ts5500_show_jumpers(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ts5500_sbc *sbc = dev_get_drvdata(dev); + + return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1); +} + +#define TS5500_SHOW(field) \ + static ssize_t ts5500_show_##field(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct ts5500_sbc *sbc = dev_get_drvdata(dev); \ + return sprintf(buf, "%d\n", sbc->field); \ + } + +TS5500_SHOW(sram) +TS5500_SHOW(rs485) +TS5500_SHOW(adc) +TS5500_SHOW(ereset) +TS5500_SHOW(itr) + +static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL); +static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL); +static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL); +static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL); +static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL); +static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL); +static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL); + +static struct attribute *ts5500_attributes[] = { + &dev_attr_id.attr, + &dev_attr_jumpers.attr, + &dev_attr_sram.attr, + &dev_attr_rs485.attr, + &dev_attr_adc.attr, + &dev_attr_ereset.attr, + &dev_attr_itr.attr, + NULL +}; + +static const struct attribute_group ts5500_attr_group = { + .attrs = ts5500_attributes, +}; + +static struct resource ts5500_dio1_resource[] = { + DEFINE_RES_IRQ_NAMED(7, "DIO1 interrupt"), +}; + +static struct platform_device ts5500_dio1_pdev = { + .name = "ts5500-dio1", + .id = -1, + .resource = ts5500_dio1_resource, + .num_resources = 1, +}; + +static struct resource ts5500_dio2_resource[] = { + DEFINE_RES_IRQ_NAMED(6, "DIO2 interrupt"), +}; + +static struct platform_device ts5500_dio2_pdev = { + .name = "ts5500-dio2", + .id = -1, + .resource = ts5500_dio2_resource, + .num_resources = 1, +}; + +static void ts5500_led_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + outb(!!brightness, TS5500_LED_JP_ADDR); +} + +static enum led_brightness ts5500_led_get(struct led_classdev *led_cdev) +{ + return (inb(TS5500_LED_JP_ADDR) & TS5500_LED) ? LED_FULL : LED_OFF; +} + +static struct led_classdev ts5500_led_cdev = { + .name = "ts5500:green:", + .brightness_set = ts5500_led_set, + .brightness_get = ts5500_led_get, +}; + +static int ts5500_adc_convert(u8 ctrl) +{ + u8 lsb, msb; + + /* Start conversion (ensure the 3 MSB are set to 0) */ + outb(ctrl & 0x1f, TS5500_ADC_CONV_INIT_LSB_ADDR); + + /* + * The platform has CPLD logic driving the A/D converter. + * The conversion must complete within 11 microseconds, + * otherwise we have to re-initiate a conversion. + */ + udelay(TS5500_ADC_CONV_DELAY); + if (inb(TS5500_ADC_CONV_BUSY_ADDR) & TS5500_ADC_CONV_BUSY) + return -EBUSY; + + /* Read the raw data */ + lsb = inb(TS5500_ADC_CONV_INIT_LSB_ADDR); + msb = inb(TS5500_ADC_CONV_MSB_ADDR); + + return (msb << 8) | lsb; +} + +static struct max197_platform_data ts5500_adc_pdata = { + .convert = ts5500_adc_convert, +}; + +static struct platform_device ts5500_adc_pdev = { + .name = "max197", + .id = -1, + .dev = { + .platform_data = &ts5500_adc_pdata, + }, +}; + +static int __init ts5500_init(void) +{ + struct platform_device *pdev; + struct ts5500_sbc *sbc; + int err; + + /* + * There is no DMI available or PCI bridge subvendor info, + * only the BIOS provides a 16-bit identification call. + * It is safer to find a signature in the BIOS shadow RAM. + */ + err = ts5500_check_signature(); + if (err) + return err; + + pdev = platform_device_register_simple("ts5500", -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + sbc = devm_kzalloc(&pdev->dev, sizeof(struct ts5500_sbc), GFP_KERNEL); + if (!sbc) { + err = -ENOMEM; + goto error; + } + + err = ts5500_detect_config(sbc); + if (err) + goto error; + + platform_set_drvdata(pdev, sbc); + + err = sysfs_create_group(&pdev->dev.kobj, &ts5500_attr_group); + if (err) + goto error; + + ts5500_dio1_pdev.dev.parent = &pdev->dev; + if (platform_device_register(&ts5500_dio1_pdev)) + dev_warn(&pdev->dev, "DIO1 block registration failed\n"); + ts5500_dio2_pdev.dev.parent = &pdev->dev; + if (platform_device_register(&ts5500_dio2_pdev)) + dev_warn(&pdev->dev, "DIO2 block registration failed\n"); + + if (led_classdev_register(&pdev->dev, &ts5500_led_cdev)) + dev_warn(&pdev->dev, "LED registration failed\n"); + + if (sbc->adc) { + ts5500_adc_pdev.dev.parent = &pdev->dev; + if (platform_device_register(&ts5500_adc_pdev)) + dev_warn(&pdev->dev, "ADC registration failed\n"); + } + + return 0; +error: + platform_device_unregister(pdev); + return err; +} +device_initcall(ts5500_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Savoir-faire Linux Inc. <kernel@savoirfairelinux.com>"); +MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver"); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index dbbdca5f508c..0f92173a12b6 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1467,7 +1467,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user, } if (input_arg == 0) { - elements = sizeof(stat_description)/sizeof(*stat_description); + elements = ARRAY_SIZE(stat_description); printk(KERN_DEBUG "# cpu: cpu number\n"); printk(KERN_DEBUG "Sender statistics:\n"); for (i = 0; i < elements; i++) @@ -1508,7 +1508,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr, char *q; int cnt = 0; int val; - int e = sizeof(tunables) / sizeof(*tunables); + int e = ARRAY_SIZE(tunables); p = instr + strspn(instr, WHITESPACE); q = p; diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c index 8784ab30d91b..84ac7f7b0257 100644 --- a/arch/x86/um/fault.c +++ b/arch/x86/um/fault.c @@ -20,7 +20,7 @@ int arch_fixup(unsigned long address, struct uml_pt_regs *regs) const struct exception_table_entry *fixup; fixup = search_exception_tables(address); - if (fixup != 0) { + if (fixup) { UPT_IP(regs) = fixup->fixup; return 1; } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 205ad328aa52..c74436e687bf 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -60,7 +60,7 @@ notrace static cycle_t vread_tsc(void) static notrace cycle_t vread_hpet(void) { - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); + return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); } #ifdef CONFIG_PARAVIRT_CLOCK diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 138e5667409a..39928d16be3b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1517,72 +1517,51 @@ asmlinkage void __init xen_start_kernel(void) #endif } -#ifdef CONFIG_XEN_PVHVM -#define HVM_SHARED_INFO_ADDR 0xFE700000UL -static struct shared_info *xen_hvm_shared_info; -static unsigned long xen_hvm_sip_phys; -static int xen_major, xen_minor; - -static void xen_hvm_connect_shared_info(unsigned long pfn) +void __ref xen_hvm_init_shared_info(void) { + int cpu; struct xen_add_to_physmap xatp; + static struct shared_info *shared_info_page = 0; + if (!shared_info_page) + shared_info_page = (struct shared_info *) + extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = pfn; + xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); -} -static void __init xen_hvm_set_shared_info(struct shared_info *sip) -{ - int cpu; - - HYPERVISOR_shared_info = sip; + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info * page, we use it in the event channel upcall and in some pvclock * related functions. We don't need the vcpu_info placement * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. */ - for_each_online_cpu(cpu) + * HVM. + * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is + * online but xen_hvm_init_shared_info is run at resume time too and + * in that case multiple vcpus might be online. */ + for_each_online_cpu(cpu) { per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; -} - -/* Reconnect the shared_info pfn to a (new) mfn */ -void xen_hvm_resume_shared_info(void) -{ - xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); -} - -/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage. - * On these old tools the shared info page will be placed in E820_Ram. - * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects - * that nothing is mapped up to HVM_SHARED_INFO_ADDR. - * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used - * here for the shared info page. */ -static void __init xen_hvm_init_shared_info(void) -{ - if (xen_major < 4) { - xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); - xen_hvm_sip_phys = __pa(xen_hvm_shared_info); - } else { - xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR; - set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys); - xen_hvm_shared_info = - (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); } - xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); - xen_hvm_set_shared_info(xen_hvm_shared_info); } +#ifdef CONFIG_XEN_PVHVM static void __init init_hvm_pv_info(void) { - uint32_t ecx, edx, pages, msr, base; + int major, minor; + uint32_t eax, ebx, ecx, edx, pages, msr, base; u64 pfn; base = xen_cpuid_base(); + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + major = eax >> 16; + minor = eax & 0xffff; + printk(KERN_INFO "Xen version %d.%d.\n", major, minor); + cpuid(base + 2, &pages, &msr, &ecx, &edx); pfn = __pa(hypercall_page); @@ -1633,22 +1612,12 @@ static void __init xen_hvm_guest_init(void) static bool __init xen_hvm_platform(void) { - uint32_t eax, ebx, ecx, edx, base; - if (xen_pv_domain()) return false; - base = xen_cpuid_base(); - if (!base) + if (!xen_cpuid_base()) return false; - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - xen_major = eax >> 16; - xen_minor = eax & 0xffff; - - printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor); - return true; } @@ -1668,6 +1637,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { .name = "Xen HVM", .detect = xen_hvm_platform, .init_platform = xen_hvm_guest_init, + .x2apic_available = xen_x2apic_para_available, }; EXPORT_SYMBOL(x86_hyper_xen_hvm); #endif diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index ae8a00c39de4..45329c8c226e 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_resume_shared_info(); + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index f9643fc50de5..33ca6e42a4ca 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -89,11 +89,11 @@ ENTRY(xen_iret) */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax), %eax - movl __per_cpu_offset(,%eax,4), %eax - mov xen_vcpu(%eax), %eax + movl %ss:TI_cpu(%eax), %eax + movl %ss:__per_cpu_offset(,%eax,4), %eax + mov %ss:xen_vcpu(%eax), %eax #else - movl xen_vcpu, %eax + movl %ss:xen_vcpu, %eax #endif /* check IF state we're restoring */ @@ -106,11 +106,11 @@ ENTRY(xen_iret) * resuming the code, so we don't have to be worried about * being preempted to another CPU. */ - setz XEN_vcpu_info_mask(%eax) + setz %ss:XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) + cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) /* * If there's something pending, mask events again so we can @@ -118,7 +118,7 @@ xen_iret_start_crit: * touch XEN_vcpu_info_mask. */ jne 1f - movb $1, XEN_vcpu_info_mask(%eax) + movb $1, %ss:XEN_vcpu_info_mask(%eax) 1: popl %eax diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d2e73d19d366..a95b41744ad0 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -40,7 +40,7 @@ void xen_enable_syscall(void); void xen_vcpu_restore(void); void xen_callback_vector(void); -void xen_hvm_resume_shared_info(void); +void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); diff --git a/block/blk-exec.c b/block/blk-exec.c index 74638ec234c8..c88202f973d9 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -5,6 +5,7 @@ #include <linux/module.h> #include <linux/bio.h> #include <linux/blkdev.h> +#include <linux/sched/sysctl.h> #include "blk.h" diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c index e6defd86b424..1e5d8a40101e 100644 --- a/drivers/acpi/apei/cper.c +++ b/drivers/acpi/apei/cper.c @@ -29,6 +29,7 @@ #include <linux/time.h> #include <linux/cper.h> #include <linux/acpi.h> +#include <linux/pci.h> #include <linux/aer.h> /* @@ -249,6 +250,10 @@ static const char *cper_pcie_port_type_strs[] = { static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, const struct acpi_hest_generic_data *gdata) { +#ifdef CONFIG_ACPI_APEI_PCIEAER + struct pci_dev *dev; +#endif + if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? @@ -281,10 +286,18 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", pfx, pcie->bridge.secondary_status, pcie->bridge.control); #ifdef CONFIG_ACPI_APEI_PCIEAER - if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) { - struct aer_capability_regs *aer_regs = (void *)pcie->aer_info; - cper_print_aer(pfx, gdata->error_severity, aer_regs); + dev = pci_get_domain_bus_and_slot(pcie->device_id.segment, + pcie->device_id.bus, pcie->device_id.function); + if (!dev) { + pr_err("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n", + pcie->device_id.segment, pcie->device_id.bus, + pcie->device_id.slot, pcie->device_id.function); + return; } + if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) + cper_print_aer(pfx, dev, gdata->error_severity, + (struct aer_capability_regs *) pcie->aer_info); + pci_dev_put(dev); #endif } diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 497912732566..495aeed26779 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1061,6 +1061,86 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host) {} #endif +int ahci_init_interrupts(struct pci_dev *pdev, struct ahci_host_priv *hpriv) +{ + int rc; + unsigned int maxvec; + + if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) { + rc = pci_enable_msi_block_auto(pdev, &maxvec); + if (rc > 0) { + if ((rc == maxvec) || (rc == 1)) + return rc; + /* + * Assume that advantage of multipe MSIs is negated, + * so fallback to single MSI mode to save resources + */ + pci_disable_msi(pdev); + if (!pci_enable_msi(pdev)) + return 1; + } + } + + pci_intx(pdev, 1); + return 0; +} + +/** + * ahci_host_activate - start AHCI host, request IRQs and register it + * @host: target ATA host + * @irq: base IRQ number to request + * @n_msis: number of MSIs allocated for this host + * @irq_handler: irq_handler used when requesting IRQs + * @irq_flags: irq_flags used when requesting IRQs + * + * Similar to ata_host_activate, but requests IRQs according to AHCI-1.1 + * when multiple MSIs were allocated. That is one MSI per port, starting + * from @irq. + * + * LOCKING: + * Inherited from calling layer (may sleep). + * + * RETURNS: + * 0 on success, -errno otherwise. + */ +int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis) +{ + int i, rc; + + /* Sharing Last Message among several ports is not supported */ + if (n_msis < host->n_ports) + return -EINVAL; + + rc = ata_host_start(host); + if (rc) + return rc; + + for (i = 0; i < host->n_ports; i++) { + rc = devm_request_threaded_irq(host->dev, + irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED, + dev_driver_string(host->dev), host->ports[i]); + if (rc) + goto out_free_irqs; + } + + for (i = 0; i < host->n_ports; i++) + ata_port_desc(host->ports[i], "irq %d", irq + i); + + rc = ata_host_register(host, &ahci_sht); + if (rc) + goto out_free_all_irqs; + + return 0; + +out_free_all_irqs: + i = host->n_ports; +out_free_irqs: + for (i--; i >= 0; i--) + devm_free_irq(host->dev, irq + i, host->ports[i]); + + return rc; +} + static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { unsigned int board_id = ent->driver_data; @@ -1069,7 +1149,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct device *dev = &pdev->dev; struct ahci_host_priv *hpriv; struct ata_host *host; - int n_ports, i, rc; + int n_ports, n_msis, i, rc; int ahci_pci_bar = AHCI_PCI_BAR_STANDARD; VPRINTK("ENTER\n"); @@ -1156,11 +1236,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ahci_sb600_enable_64bit(pdev)) hpriv->flags &= ~AHCI_HFLAG_32BIT_ONLY; - if ((hpriv->flags & AHCI_HFLAG_NO_MSI) || pci_enable_msi(pdev)) - pci_intx(pdev, 1); - hpriv->mmio = pcim_iomap_table(pdev)[ahci_pci_bar]; + n_msis = ahci_init_interrupts(pdev, hpriv); + if (n_msis > 1) + hpriv->flags |= AHCI_HFLAG_MULTI_MSI; + /* save initial config */ ahci_pci_save_initial_config(pdev, hpriv); @@ -1256,6 +1337,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ahci_pci_print_info(host); pci_set_master(pdev); + + if (hpriv->flags & AHCI_HFLAG_MULTI_MSI) + return ahci_host_activate(host, pdev->irq, n_msis); + return ata_host_activate(host, pdev->irq, ahci_interrupt, IRQF_SHARED, &ahci_sht); } diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 9be471200a07..b830e6c9fe49 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -231,6 +231,7 @@ enum { AHCI_HFLAG_DELAY_ENGINE = (1 << 15), /* do not start engine on port start (wait until error-handling stage) */ + AHCI_HFLAG_MULTI_MSI = (1 << 16), /* multiple PCI MSIs */ /* ap->flags bits */ @@ -297,6 +298,8 @@ struct ahci_port_priv { unsigned int ncq_saw_d2h:1; unsigned int ncq_saw_dmas:1; unsigned int ncq_saw_sdb:1; + u32 intr_status; /* interrupts to handle */ + spinlock_t lock; /* protects parent ata_port */ u32 intr_mask; /* interrupts to enable */ bool fbs_supported; /* set iff FBS is supported */ bool fbs_enabled; /* set iff FBS is enabled */ @@ -359,7 +362,10 @@ void ahci_set_em_messages(struct ahci_host_priv *hpriv, struct ata_port_info *pi); int ahci_reset_em(struct ata_host *host); irqreturn_t ahci_interrupt(int irq, void *dev_instance); +irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance); +irqreturn_t ahci_thread_fn(int irq, void *dev_instance); void ahci_print_info(struct ata_host *host, const char *scc_s); +int ahci_host_activate(struct ata_host *host, int irq, unsigned int n_msis); static inline void __iomem *__ahci_port_base(struct ata_host *host, unsigned int port_no) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 6cd7805e47ca..34c82167b962 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1655,19 +1655,16 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat) ata_port_abort(ap); } -static void ahci_port_intr(struct ata_port *ap) +static void ahci_handle_port_interrupt(struct ata_port *ap, + void __iomem *port_mmio, u32 status) { - void __iomem *port_mmio = ahci_port_base(ap); struct ata_eh_info *ehi = &ap->link.eh_info; struct ahci_port_priv *pp = ap->private_data; struct ahci_host_priv *hpriv = ap->host->private_data; int resetting = !!(ap->pflags & ATA_PFLAG_RESETTING); - u32 status, qc_active = 0; + u32 qc_active = 0; int rc; - status = readl(port_mmio + PORT_IRQ_STAT); - writel(status, port_mmio + PORT_IRQ_STAT); - /* ignore BAD_PMP while resetting */ if (unlikely(resetting)) status &= ~PORT_IRQ_BAD_PMP; @@ -1743,6 +1740,107 @@ static void ahci_port_intr(struct ata_port *ap) } } +void ahci_port_intr(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + u32 status; + + status = readl(port_mmio + PORT_IRQ_STAT); + writel(status, port_mmio + PORT_IRQ_STAT); + + ahci_handle_port_interrupt(ap, port_mmio, status); +} + +irqreturn_t ahci_thread_fn(int irq, void *dev_instance) +{ + struct ata_port *ap = dev_instance; + struct ahci_port_priv *pp = ap->private_data; + void __iomem *port_mmio = ahci_port_base(ap); + unsigned long flags; + u32 status; + + spin_lock_irqsave(&ap->host->lock, flags); + status = pp->intr_status; + if (status) + pp->intr_status = 0; + spin_unlock_irqrestore(&ap->host->lock, flags); + + spin_lock_bh(ap->lock); + ahci_handle_port_interrupt(ap, port_mmio, status); + spin_unlock_bh(ap->lock); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL_GPL(ahci_thread_fn); + +void ahci_hw_port_interrupt(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + struct ahci_port_priv *pp = ap->private_data; + u32 status; + + status = readl(port_mmio + PORT_IRQ_STAT); + writel(status, port_mmio + PORT_IRQ_STAT); + + pp->intr_status |= status; +} + +irqreturn_t ahci_hw_interrupt(int irq, void *dev_instance) +{ + struct ata_port *ap_this = dev_instance; + struct ahci_port_priv *pp = ap_this->private_data; + struct ata_host *host = ap_this->host; + struct ahci_host_priv *hpriv = host->private_data; + void __iomem *mmio = hpriv->mmio; + unsigned int i; + u32 irq_stat, irq_masked; + + VPRINTK("ENTER\n"); + + spin_lock(&host->lock); + + irq_stat = readl(mmio + HOST_IRQ_STAT); + + if (!irq_stat) { + u32 status = pp->intr_status; + + spin_unlock(&host->lock); + + VPRINTK("EXIT\n"); + + return status ? IRQ_WAKE_THREAD : IRQ_NONE; + } + + irq_masked = irq_stat & hpriv->port_map; + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap; + + if (!(irq_masked & (1 << i))) + continue; + + ap = host->ports[i]; + if (ap) { + ahci_hw_port_interrupt(ap); + VPRINTK("port %u\n", i); + } else { + VPRINTK("port %u (no irq)\n", i); + if (ata_ratelimit()) + dev_warn(host->dev, + "interrupt on disabled port %u\n", i); + } + } + + writel(irq_stat, mmio + HOST_IRQ_STAT); + + spin_unlock(&host->lock); + + VPRINTK("EXIT\n"); + + return IRQ_WAKE_THREAD; +} +EXPORT_SYMBOL_GPL(ahci_hw_interrupt); + irqreturn_t ahci_interrupt(int irq, void *dev_instance) { struct ata_host *host = dev_instance; @@ -2196,6 +2294,14 @@ static int ahci_port_start(struct ata_port *ap) */ pp->intr_mask = DEF_PORT_IRQ; + /* + * Switch to per-port locking in case each port has its own MSI vector. + */ + if ((hpriv->flags & AHCI_HFLAG_MULTI_MSI)) { + spin_lock_init(&pp->lock); + ap->lock = &pp->lock; + } + ap->private_data = pp; /* engage engines, captain */ diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 564156a8e572..5814deb6963d 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -461,7 +461,7 @@ static int generic_request(struct vdc_port *port, u8 op, void *buf, int len) int op_len, err; void *req_buf; - if (!(((u64)1 << ((u64)op - 1)) & port->operations)) + if (!(((u64)1 << (u64)op) & port->operations)) return -EOPNOTSUPP; switch (op) { diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ad8bf2aa629d..2d3f8825e8b8 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -31,7 +31,7 @@ static struct ecc_settings **ecc_stngs; * *FIXME: Produce a better mapping/linearisation. */ -struct scrubrate { +static const struct scrubrate { u32 scrubval; /* bit pattern for scrub rate */ u32 bandwidth; /* bandwidth consumed (bytes/sec) */ } scrubrates[] = { @@ -239,7 +239,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci) * DRAM base/limit associated with node_id */ static bool amd64_base_limit_match(struct amd64_pvt *pvt, u64 sys_addr, - unsigned nid) + u8 nid) { u64 addr; @@ -265,7 +265,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, u64 sys_addr) { struct amd64_pvt *pvt; - unsigned node_id; + u8 node_id; u32 intlv_en, bits; /* @@ -939,7 +939,8 @@ static u64 get_error_address(struct mce *m) struct amd64_pvt *pvt; u64 cc6_base, tmp_addr; u32 tmp; - u8 mce_nid, intlv_en; + u16 mce_nid; + u8 intlv_en; if ((addr & GENMASK(24, 47)) >> 24 != 0x00fdf7) return addr; @@ -979,10 +980,29 @@ static u64 get_error_address(struct mce *m) return addr; } +static struct pci_dev *pci_get_related_function(unsigned int vendor, + unsigned int device, + struct pci_dev *related) +{ + struct pci_dev *dev = NULL; + + while ((dev = pci_get_device(vendor, device, dev))) { + if (pci_domain_nr(dev->bus) == pci_domain_nr(related->bus) && + (dev->bus->number == related->bus->number) && + (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) + break; + } + + return dev; +} + static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) { + struct amd_northbridge *nb; + struct pci_dev *misc, *f1 = NULL; struct cpuinfo_x86 *c = &boot_cpu_data; int off = range << 3; + u32 llim; amd64_read_pci_cfg(pvt->F1, DRAM_BASE_LO + off, &pvt->ranges[range].base.lo); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_LO + off, &pvt->ranges[range].lim.lo); @@ -996,30 +1016,32 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) amd64_read_pci_cfg(pvt->F1, DRAM_BASE_HI + off, &pvt->ranges[range].base.hi); amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi); - /* Factor in CC6 save area by reading dst node's limit reg */ - if (c->x86 == 0x15) { - struct pci_dev *f1 = NULL; - u8 nid = dram_dst_node(pvt, range); - u32 llim; + /* F15h: factor in CC6 save area by reading dst node's limit reg */ + if (c->x86 != 0x15) + return; - f1 = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x18 + nid, 1)); - if (WARN_ON(!f1)) - return; + nb = node_to_amd_nb(dram_dst_node(pvt, range)); + if (WARN_ON(!nb)) + return; - amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); + misc = nb->misc; + f1 = pci_get_related_function(misc->vendor, PCI_DEVICE_ID_AMD_15H_NB_F1, misc); + if (WARN_ON(!f1)) + return; - pvt->ranges[range].lim.lo &= GENMASK(0, 15); + amd64_read_pci_cfg(f1, DRAM_LOCAL_NODE_LIM, &llim); - /* {[39:27],111b} */ - pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; + pvt->ranges[range].lim.lo &= GENMASK(0, 15); - pvt->ranges[range].lim.hi &= GENMASK(0, 7); + /* {[39:27],111b} */ + pvt->ranges[range].lim.lo |= ((llim & 0x1fff) << 3 | 0x7) << 16; - /* [47:40] */ - pvt->ranges[range].lim.hi |= llim >> 13; + pvt->ranges[range].lim.hi &= GENMASK(0, 7); - pci_dev_put(f1); - } + /* [47:40] */ + pvt->ranges[range].lim.hi |= llim >> 13; + + pci_dev_put(f1); } static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, @@ -1305,7 +1327,7 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, } /* Convert the sys_addr to the normalized DCT address */ -static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, unsigned range, +static u64 f1x_get_norm_dct_addr(struct amd64_pvt *pvt, u8 range, u64 sys_addr, bool hi_rng, u32 dct_sel_base_addr) { @@ -1381,7 +1403,7 @@ static int f10_process_possible_spare(struct amd64_pvt *pvt, u8 dct, int csrow) * -EINVAL: NOT FOUND * 0..csrow = Chip-Select Row */ -static int f1x_lookup_addr_in_dct(u64 in_addr, u32 nid, u8 dct) +static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; @@ -1672,23 +1694,6 @@ static struct amd64_family_type amd64_family_types[] = { }, }; -static struct pci_dev *pci_get_related_function(unsigned int vendor, - unsigned int device, - struct pci_dev *related) -{ - struct pci_dev *dev = NULL; - - dev = pci_get_device(vendor, device, dev); - while (dev) { - if ((dev->bus->number == related->bus->number) && - (PCI_SLOT(dev->devfn) == PCI_SLOT(related->devfn))) - break; - dev = pci_get_device(vendor, device, dev); - } - - return dev; -} - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm @@ -1696,7 +1701,7 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor, * * Algorithm courtesy of Ross LaFetra from AMD. */ -static u16 x4_vectors[] = { +static const u16 x4_vectors[] = { 0x2f57, 0x1afe, 0x66cc, 0xdd88, 0x11eb, 0x3396, 0x7f4c, 0xeac8, 0x0001, 0x0002, 0x0004, 0x0008, @@ -1735,7 +1740,7 @@ static u16 x4_vectors[] = { 0x19a9, 0x2efe, 0xb5cc, 0x6f88, }; -static u16 x8_vectors[] = { +static const u16 x8_vectors[] = { 0x0145, 0x028a, 0x2374, 0x43c8, 0xa1f0, 0x0520, 0x0a40, 0x1480, 0x0211, 0x0422, 0x0844, 0x1088, 0x01b0, 0x44e0, 0x23c0, 0xed80, 0x1011, 0x0116, 0x022c, 0x0458, 0x08b0, 0x8c60, 0x2740, 0x4e80, @@ -1757,7 +1762,7 @@ static u16 x8_vectors[] = { 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, 0x8000, }; -static int decode_syndrome(u16 syndrome, u16 *vectors, unsigned num_vecs, +static int decode_syndrome(u16 syndrome, const u16 *vectors, unsigned num_vecs, unsigned v_dim) { unsigned int i, err_sym; @@ -2181,7 +2186,7 @@ static int init_csrows(struct mem_ctl_info *mci) } /* get all cores on this DCT */ -static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) +static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) { int cpu; @@ -2191,7 +2196,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, unsigned nid) } /* check MCG_CTL on all the cpus on this node */ -static bool amd64_nb_mce_bank_enabled_on_node(unsigned nid) +static bool amd64_nb_mce_bank_enabled_on_node(u16 nid) { cpumask_var_t mask; int cpu, nbe; @@ -2224,7 +2229,7 @@ out: return ret; } -static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) +static int toggle_ecc_err_reporting(struct ecc_settings *s, u16 nid, bool on) { cpumask_var_t cmask; int cpu; @@ -2262,7 +2267,7 @@ static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) return 0; } -static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static bool enable_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { bool ret = true; @@ -2314,7 +2319,7 @@ static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, return ret; } -static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid, +static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, struct pci_dev *F3) { u32 value, mask = 0x3; /* UECC/CECC enable */ @@ -2353,7 +2358,7 @@ static const char *ecc_msg = "'ecc_enable_override'.\n" " (Note that use of the override may cause unknown side effects.)\n"; -static bool ecc_enabled(struct pci_dev *F3, u8 nid) +static bool ecc_enabled(struct pci_dev *F3, u16 nid) { u32 value; u8 ecc_en = 0; @@ -2474,7 +2479,7 @@ static int amd64_init_one_instance(struct pci_dev *F2) struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; int err = 0, ret; - u8 nid = get_node_id(F2); + u16 nid = amd_get_node_id(F2); ret = -ENOMEM; pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); @@ -2566,7 +2571,7 @@ err_ret: static int amd64_probe_one_instance(struct pci_dev *pdev, const struct pci_device_id *mc_type) { - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s; int ret = 0; @@ -2616,7 +2621,7 @@ static void amd64_remove_one_instance(struct pci_dev *pdev) { struct mem_ctl_info *mci; struct amd64_pvt *pvt; - u8 nid = get_node_id(pdev); + u16 nid = amd_get_node_id(pdev); struct pci_dev *F3 = node_to_amd_nb(nid)->misc; struct ecc_settings *s = ecc_stngs[nid]; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index e864f407806c..35637d83f235 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -292,12 +292,6 @@ /* MSRs */ #define MSR_MCGCTL_NBE BIT(4) -/* AMD sets the first MC device at device ID 0x18. */ -static inline u8 get_node_id(struct pci_dev *pdev) -{ - return PCI_SLOT(pdev->devfn) - 0x18; -} - enum amd_families { K8_CPUS = 0, F10_CPUS, @@ -340,7 +334,7 @@ struct amd64_pvt { /* pci_device handles which we utilize */ struct pci_dev *F1, *F2, *F3; - unsigned mc_node_id; /* MC index of this MC node */ + u16 mc_node_id; /* MC index of this MC node */ int ext_model; /* extended model value of this node */ int channel_count; @@ -393,7 +387,7 @@ struct err_info { u32 offset; }; -static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) +static inline u64 get_dram_base(struct amd64_pvt *pvt, u8 i) { u64 addr = ((u64)pvt->ranges[i].base.lo & 0xffff0000) << 8; @@ -403,7 +397,7 @@ static inline u64 get_dram_base(struct amd64_pvt *pvt, unsigned i) return (((u64)pvt->ranges[i].base.hi & 0x000000ff) << 40) | addr; } -static inline u64 get_dram_limit(struct amd64_pvt *pvt, unsigned i) +static inline u64 get_dram_limit(struct amd64_pvt *pvt, u8 i) { u64 lim = (((u64)pvt->ranges[i].lim.lo & 0xffff0000) << 8) | 0x00ffffff; diff --git a/drivers/gpu/drm/nouveau/core/core/falcon.c b/drivers/gpu/drm/nouveau/core/core/falcon.c index 6b0843c33877..e05c15777588 100644 --- a/drivers/gpu/drm/nouveau/core/core/falcon.c +++ b/drivers/gpu/drm/nouveau/core/core/falcon.c @@ -73,8 +73,11 @@ _nouveau_falcon_init(struct nouveau_object *object) nv_debug(falcon, "data limit: %d\n", falcon->data.limit); /* wait for 'uc halted' to be signalled before continuing */ - if (falcon->secret) { - nv_wait(falcon, 0x008, 0x00000010, 0x00000010); + if (falcon->secret && falcon->version < 4) { + if (!falcon->version) + nv_wait(falcon, 0x008, 0x00000010, 0x00000010); + else + nv_wait(falcon, 0x180, 0x80000000, 0); nv_wo32(falcon, 0x004, 0x00000010); } diff --git a/drivers/gpu/drm/nouveau/core/core/subdev.c b/drivers/gpu/drm/nouveau/core/core/subdev.c index f74c30aa33a0..48f06378d3f9 100644 --- a/drivers/gpu/drm/nouveau/core/core/subdev.c +++ b/drivers/gpu/drm/nouveau/core/core/subdev.c @@ -99,7 +99,7 @@ nouveau_subdev_create_(struct nouveau_object *parent, if (ret) return ret; - mutex_init(&subdev->mutex); + __mutex_init(&subdev->mutex, subname, &oclass->lock_class_key); subdev->name = subname; if (parent) { diff --git a/drivers/gpu/drm/nouveau/core/include/core/object.h b/drivers/gpu/drm/nouveau/core/include/core/object.h index 5982935ee23a..106bb19fdd9a 100644 --- a/drivers/gpu/drm/nouveau/core/include/core/object.h +++ b/drivers/gpu/drm/nouveau/core/include/core/object.h @@ -50,10 +50,13 @@ int nouveau_object_fini(struct nouveau_object *, bool suspend); extern struct nouveau_ofuncs nouveau_object_ofuncs; +/* Don't allocate dynamically, because lockdep needs lock_class_keys to be in + * ".data". */ struct nouveau_oclass { u32 handle; - struct nouveau_ofuncs *ofuncs; - struct nouveau_omthds *omthds; + struct nouveau_ofuncs * const ofuncs; + struct nouveau_omthds * const omthds; + struct lock_class_key lock_class_key; }; #define nv_oclass(o) nv_object(o)->oclass diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/base.c b/drivers/gpu/drm/nouveau/core/subdev/fb/base.c index d6d16007ec1a..d62045f454b2 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/base.c @@ -86,8 +86,8 @@ nouveau_fb_preinit(struct nouveau_fb *pfb) return ret; } - if (!nouveau_mm_initialised(&pfb->tags) && tags) { - ret = nouveau_mm_init(&pfb->tags, 0, ++tags, 1); + if (!nouveau_mm_initialised(&pfb->tags)) { + ret = nouveau_mm_init(&pfb->tags, 0, tags ? ++tags : 0, 1); if (ret) return ret; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c index 487cb8c6c204..eac236ed19b2 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv50.c @@ -99,7 +99,7 @@ nv50_fb_vram_init(struct nouveau_fb *pfb) struct nouveau_bios *bios = nouveau_bios(device); const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */ const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */ - u32 size; + u32 size, tags = 0; int ret; pfb->ram.size = nv_rd32(pfb, 0x10020c); @@ -140,10 +140,11 @@ nv50_fb_vram_init(struct nouveau_fb *pfb) return ret; pfb->ram.ranks = (nv_rd32(pfb, 0x100200) & 0x4) ? 2 : 1; + tags = nv_rd32(pfb, 0x100320); break; } - return nv_rd32(pfb, 0x100320); + return tags; } static int diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 69d7b1d0b9d6..1699a9083a2f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -28,6 +28,7 @@ */ #include <core/engine.h> +#include <linux/swiotlb.h> #include <subdev/fb.h> #include <subdev/vm.h> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 8b090f1eb51d..5e7aef23825a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -245,6 +245,8 @@ static int nouveau_drm_probe(struct pci_dev *pdev, return 0; } +static struct lock_class_key drm_client_lock_class_key; + static int nouveau_drm_load(struct drm_device *dev, unsigned long flags) { @@ -256,6 +258,7 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm); if (ret) return ret; + lockdep_set_class(&drm->client.mutex, &drm_client_lock_class_key); dev->dev_private = drm; drm->dev = dev; diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 7a445666e71f..ee4cff534f10 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -2909,14 +2909,14 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) return -EINVAL; } if (tiled) { - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); p->idx += count + 7; } else { - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; @@ -2954,12 +2954,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n"); return -EINVAL; } - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - dst2_offset = ib[idx+2]; + dst2_offset = radeon_get_ib_value(p, idx+2); dst2_offset <<= 8; - src_offset = ib[idx+8]; - src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+8); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n", src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); @@ -3014,12 +3014,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - dst2_offset = ib[idx+2]; + dst2_offset = radeon_get_ib_value(p, idx+2); dst2_offset <<= 8; - src_offset = ib[idx+8]; - src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+8); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); @@ -3046,22 +3046,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* detile bit */ if (idx_value & (1 << 31)) { /* tiled src, linear dst */ - src_offset = ib[idx+1]; + src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); - dst_offset = ib[idx+7]; - dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+7); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; } else { /* linear src, tiled dst */ - src_offset = ib[idx+7]; - src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+7); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); } @@ -3098,12 +3098,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; - dst2_offset = ib[idx+2]; + dst2_offset = radeon_get_ib_value(p, idx+2); dst2_offset <<= 8; - src_offset = ib[idx+8]; - src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+8); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+9) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n", src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); @@ -3135,22 +3135,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) /* detile bit */ if (idx_value & (1 << 31)) { /* tiled src, linear dst */ - src_offset = ib[idx+1]; + src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); - dst_offset = ib[idx+7]; - dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+7); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; } else { /* linear src, tiled dst */ - src_offset = ib[idx+7]; - src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+7); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32; ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); } @@ -3176,10 +3176,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) switch (misc) { case 0: /* L2L, byte */ - src_offset = ib[idx+2]; - src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+2); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n", src_offset + count, radeon_bo_size(src_reloc->robj)); @@ -3216,12 +3216,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n"); return -EINVAL; } - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; - dst2_offset = ib[idx+2]; - dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32; - src_offset = ib[idx+3]; - src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + dst2_offset = radeon_get_ib_value(p, idx+2); + dst2_offset |= ((u64)(radeon_get_ib_value(p, idx+5) & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+3); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n", src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); @@ -3251,10 +3251,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) } } else { /* L2L, dw */ - src_offset = ib[idx+2]; - src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+2); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) { dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n", src_offset + (count * 4), radeon_bo_size(src_reloc->robj)); @@ -3279,8 +3279,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n"); return -EINVAL; } - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", dst_offset, radeon_bo_size(dst_reloc->robj)); diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 69ec24ab8d63..9b2512bf1a46 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -2623,14 +2623,14 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) return -EINVAL; } if (tiled) { - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); p->idx += count + 5; } else { - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; @@ -2658,32 +2658,32 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) /* detile bit */ if (idx_value & (1 << 31)) { /* tiled src, linear dst */ - src_offset = ib[idx+1]; + src_offset = radeon_get_ib_value(p, idx+1); src_offset <<= 8; ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8); - dst_offset = ib[idx+5]; - dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+5); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff; } else { /* linear src, tiled dst */ - src_offset = ib[idx+5]; - src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+5); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32; ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; - dst_offset = ib[idx+1]; + dst_offset = radeon_get_ib_value(p, idx+1); dst_offset <<= 8; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8); } p->idx += 7; } else { if (p->family >= CHIP_RV770) { - src_offset = ib[idx+2]; - src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32; - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; + src_offset = radeon_get_ib_value(p, idx+2); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); @@ -2691,10 +2691,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff; p->idx += 5; } else { - src_offset = ib[idx+2]; - src_offset |= ((u64)(ib[idx+3] & 0xff)) << 32; - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0xff0000)) << 16; + src_offset = radeon_get_ib_value(p, idx+2); + src_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16; ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc); ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc); @@ -2724,8 +2724,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p) DRM_ERROR("bad DMA_PACKET_WRITE\n"); return -EINVAL; } - dst_offset = ib[idx+1]; - dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16; + dst_offset = radeon_get_ib_value(p, idx+1); + dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0x00ff0000)) << 16; if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) { dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n", dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj)); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 1d8ff2f850ba..93f760e27a92 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -38,6 +38,7 @@ #include <drm/radeon_drm.h> #include <linux/seq_file.h> #include <linux/slab.h> +#include <linux/swiotlb.h> #include "radeon_reg.h" #include "radeon.h" diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index b38ef6d8d049..64630f15f181 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -2,7 +2,7 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" - depends on X86 && ACPI && PCI + depends on X86 && ACPI && PCI && X86_LOCAL_APIC help Select this option to run Linux as a Hyper-V client operating system. diff --git a/drivers/input/input.c b/drivers/input/input.c index ce01332f7b3a..c04469928925 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1785,12 +1785,13 @@ static void devm_input_device_release(struct device *dev, void *res) * its driver (or binding fails). Once managed input device is allocated, * it is ready to be set up and registered in the same fashion as regular * input device. There are no special devm_input_device_[un]register() - * variants, regular ones work with both managed and unmanaged devices. + * variants, regular ones work with both managed and unmanaged devices, + * should you need them. In most cases however, managed input device need + * not be explicitly unregistered or freed. * * NOTE: the owner device is set up as parent of input device and users * should not override it. */ - struct input_dev *devm_input_allocate_device(struct device *dev) { struct input_dev *input; @@ -2004,6 +2005,17 @@ static void devm_input_device_unregister(struct device *dev, void *res) * Once device has been successfully registered it can be unregistered * with input_unregister_device(); input_free_device() should not be * called in this case. + * + * Note that this function is also used to register managed input devices + * (ones allocated with devm_input_allocate_device()). Such managed input + * devices need not be explicitly unregistered or freed, their tear down + * is controlled by the devres infrastructure. It is also worth noting + * that tear down of managed input devices is internally a 2-step process: + * registered managed input device is first unregistered, but stays in + * memory and can still handle input_event() calls (although events will + * not be delivered anywhere). The freeing of managed input device will + * happen later, when devres stack is unwound to the point where device + * allocation was made. */ int input_register_device(struct input_dev *dev) { diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 358cd7ee905b..7cd74e29cbc8 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -162,7 +162,7 @@ static unsigned int get_time_pit(void) #define GET_TIME(x) do { x = get_cycles(); } while (0) #define DELTA(x,y) ((y)-(x)) #define TIME_NAME "PCC" -#elif defined(CONFIG_MN10300) +#elif defined(CONFIG_MN10300) || defined(CONFIG_TILE) #define GET_TIME(x) do { x = get_cycles(); } while (0) #define DELTA(x, y) ((x) - (y)) #define TIME_NAME "TSC" diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c index 93c812662134..0de23f41b2d3 100644 --- a/drivers/input/keyboard/lm8323.c +++ b/drivers/input/keyboard/lm8323.c @@ -398,7 +398,7 @@ static irqreturn_t lm8323_irq(int irq, void *_lm) lm8323_configure(lm); } for (i = 0; i < LM8323_NUM_PWMS; i++) { - if (ints & (1 << (INT_PWM1 + i))) { + if (ints & (INT_PWM1 << i)) { dev_vdbg(&lm->client->dev, "pwm%d engine completed\n", i); pwm_done(&lm->pwm[i]); diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index f92d34f45a1c..aaf23aeae2ea 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -553,10 +553,10 @@ static int wacom_set_device_mode(struct usb_interface *intf, int report_id, int if (!rep_data) return error; - rep_data[0] = report_id; - rep_data[1] = mode; - do { + rep_data[0] = report_id; + rep_data[1] = mode; + error = wacom_set_report(intf, WAC_HID_FEATURE_REPORT, report_id, rep_data, length, 1); if (error >= 0) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index c1c74e030a58..d33eaaf783ad 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4017,10 +4017,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count) index -= count - 1; + cfg->remapped = 1; irte_info = &cfg->irq_2_iommu; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; goto out; } @@ -4127,9 +4127,9 @@ static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, index = attr->ioapic_pin; /* Setup IRQ remapping info */ + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; /* Setup IRTE for IOMMU */ irte.val = 0; @@ -4288,9 +4288,9 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, devid = get_device_id(&pdev->dev); irte_info = &cfg->irq_2_iommu; + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index + offset; - irte_info->iommu = (void *)cfg; return 0; } @@ -4314,9 +4314,9 @@ static int setup_hpet_msi(unsigned int irq, unsigned int id) if (index < 0) return index; + cfg->remapped = 1; irte_info->sub_handle = devid; irte_info->irte_index = index; - irte_info->iommu = (void *)cfg; return 0; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 86e2f4a62b9a..174bb654453d 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -41,6 +41,8 @@ #include <asm/irq_remapping.h> #include <asm/iommu_table.h> +#include "irq_remapping.h" + /* No locks are needed as DMA remapping hardware unit * list is constructed at boot time and hotplug of * these units are not supported by the architecture. diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index eca28014ef3e..43d5c8b8e7ad 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -46,6 +46,8 @@ #include <asm/cacheflush.h> #include <asm/iommu.h> +#include "irq_remapping.h" + #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index af8904de1d44..f3b8f23b5d8f 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -68,6 +68,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) { struct ir_table *table = iommu->ir_table; struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); u16 index, start_index; unsigned int mask = 0; unsigned long flags; @@ -115,6 +116,7 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) for (i = index; i < index + count; i++) table->base[i].present = 1; + cfg->remapped = 1; irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; @@ -155,6 +157,7 @@ static int map_irq_to_irte_handle(int irq, u16 *sub_handle) static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) { struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); unsigned long flags; if (!irq_iommu) @@ -162,6 +165,7 @@ static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subha raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + cfg->remapped = 1; irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = subhandle; @@ -425,11 +429,22 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) /* Enable interrupt-remapping */ iommu->gcmd |= DMA_GCMD_IRE; + iommu->gcmd &= ~DMA_GCMD_CFI; /* Block compatibility-format MSIs */ writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRES), sts); + /* + * With CFI clear in the Global Command register, we should be + * protected from dangerous (i.e. compatibility) interrupts + * regardless of x2apic status. Check just to be sure. + */ + if (sts & DMA_GSTS_CFIS) + WARN(1, KERN_WARNING + "Compatibility-format IRQs enabled despite intr remapping;\n" + "you are vulnerable to IRQ injection.\n"); + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } @@ -526,20 +541,24 @@ static int __init intel_irq_remapping_supported(void) static int __init intel_enable_irq_remapping(void) { struct dmar_drhd_unit *drhd; + bool x2apic_present; int setup = 0; int eim = 0; + x2apic_present = x2apic_supported(); + if (parse_ioapics_under_ir() != 1) { printk(KERN_INFO "Not enable interrupt remapping\n"); - return -1; + goto error; } - if (x2apic_supported()) { + if (x2apic_present) { eim = !dmar_x2apic_optout(); - WARN(!eim, KERN_WARNING - "Your BIOS is broken and requested that x2apic be disabled\n" - "This will leave your machine vulnerable to irq-injection attacks\n" - "Use 'intremap=no_x2apic_optout' to override BIOS request\n"); + if (!eim) + printk(KERN_WARNING + "Your BIOS is broken and requested that x2apic be disabled.\n" + "This will slightly decrease performance.\n" + "Use 'intremap=no_x2apic_optout' to override BIOS request.\n"); } for_each_drhd_unit(drhd) { @@ -578,7 +597,7 @@ static int __init intel_enable_irq_remapping(void) if (eim && !ecap_eim_support(iommu->ecap)) { printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, " " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap); - return -1; + goto error; } } @@ -594,7 +613,7 @@ static int __init intel_enable_irq_remapping(void) printk(KERN_ERR "DRHD %Lx: failed to enable queued, " " invalidation, ecap %Lx, ret %d\n", drhd->reg_base_addr, iommu->ecap, ret); - return -1; + goto error; } } @@ -617,6 +636,14 @@ static int __init intel_enable_irq_remapping(void) goto error; irq_remapping_enabled = 1; + + /* + * VT-d has a different layout for IO-APIC entries when + * interrupt remapping is enabled. So it needs a special routine + * to print IO-APIC entries for debugging purposes too. + */ + x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries; + pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic"); return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE; @@ -625,6 +652,11 @@ error: /* * handle error condition gracefully here! */ + + if (x2apic_present) + WARN(1, KERN_WARNING + "Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n"); + return -1; } diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index faf85d6e33fe..d56f8c17c5fe 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -1,11 +1,18 @@ +#include <linux/seq_file.h> +#include <linux/cpumask.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/cpumask.h> #include <linux/errno.h> #include <linux/msi.h> +#include <linux/irq.h> +#include <linux/pci.h> #include <asm/hw_irq.h> #include <asm/irq_remapping.h> +#include <asm/processor.h> +#include <asm/x86_init.h> +#include <asm/apic.h> #include "irq_remapping.h" @@ -17,6 +24,152 @@ int no_x2apic_optout; static struct irq_remap_ops *remap_ops; +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle); +static int set_remapped_irq_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force); + +static bool irq_remapped(struct irq_cfg *cfg) +{ + return (cfg->remapped == 1); +} + +static void irq_remapping_disable_io_apic(void) +{ + /* + * With interrupt-remapping, for now we will use virtual wire A + * mode, as virtual wire B is little complex (need to configure + * both IOAPIC RTE as well as interrupt-remapping table entry). + * As this gets called during crash dump, keep this simple for + * now. + */ + if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(0); +} + +static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + unsigned int irq; + struct msi_desc *msidesc; + + nvec = __roundup_pow_of_two(nvec); + + WARN_ON(!list_is_singular(&dev->msi_list)); + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + WARN_ON(msidesc->irq); + WARN_ON(msidesc->msi_attrib.multiple); + + node = dev_to_node(&dev->dev); + irq = __create_irqs(get_nr_irqs_gsi(), nvec, node); + if (irq == 0) + return -ENOSPC; + + msidesc->msi_attrib.multiple = ilog2(nvec); + for (sub_handle = 0; sub_handle < nvec; sub_handle++) { + if (!sub_handle) { + index = msi_alloc_remapped_irq(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + ret = msi_setup_remapped_irq(dev, irq + sub_handle, + index, sub_handle); + if (ret < 0) + goto error; + } + ret = setup_msi_irq(dev, msidesc, irq, sub_handle); + if (ret < 0) + goto error; + } + return 0; + +error: + destroy_irqs(irq, nvec); + + /* + * Restore altered MSI descriptor fields and prevent just destroyed + * IRQs from tearing down again in default_teardown_msi_irqs() + */ + msidesc->irq = 0; + msidesc->msi_attrib.multiple = 0; + + return ret; +} + +static int do_setup_msix_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + struct msi_desc *msidesc; + unsigned int irq; + + node = dev_to_node(&dev->dev); + irq = get_nr_irqs_gsi(); + sub_handle = 0; + + list_for_each_entry(msidesc, &dev->msi_list, list) { + + irq = create_irq_nr(irq, node); + if (irq == 0) + return -1; + + if (sub_handle == 0) + ret = index = msi_alloc_remapped_irq(dev, irq, nvec); + else + ret = msi_setup_remapped_irq(dev, irq, index, sub_handle); + + if (ret < 0) + goto error; + + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) + goto error; + + sub_handle += 1; + irq += 1; + } + + return 0; + +error: + destroy_irq(irq); + return ret; +} + +static int irq_remapping_setup_msi_irqs(struct pci_dev *dev, + int nvec, int type) +{ + if (type == PCI_CAP_ID_MSI) + return do_setup_msi_irqs(dev, nvec); + else + return do_setup_msix_irqs(dev, nvec); +} + +void eoi_ioapic_pin_remapped(int apic, int pin, int vector) +{ + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + io_apic_eoi(apic, pin); +} + +static void __init irq_remapping_modify_x86_ops(void) +{ + x86_io_apic_ops.disable = irq_remapping_disable_io_apic; + x86_io_apic_ops.set_affinity = set_remapped_irq_affinity; + x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry; + x86_io_apic_ops.eoi_ioapic_pin = eoi_ioapic_pin_remapped; + x86_msi.setup_msi_irqs = irq_remapping_setup_msi_irqs; + x86_msi.setup_hpet_msi = setup_hpet_msi_remapped; + x86_msi.compose_msi_msg = compose_remapped_msi_msg; +} + static __init int setup_nointremap(char *str) { disable_irq_remap = 1; @@ -79,15 +232,24 @@ int __init irq_remapping_prepare(void) int __init irq_remapping_enable(void) { + int ret; + if (!remap_ops || !remap_ops->enable) return -ENODEV; - return remap_ops->enable(); + ret = remap_ops->enable(); + + if (irq_remapping_enabled) + irq_remapping_modify_x86_ops(); + + return ret; } void irq_remapping_disable(void) { - if (!remap_ops || !remap_ops->disable) + if (!irq_remapping_enabled || + !remap_ops || + !remap_ops->disable) return; remap_ops->disable(); @@ -95,7 +257,9 @@ void irq_remapping_disable(void) int irq_remapping_reenable(int mode) { - if (!remap_ops || !remap_ops->reenable) + if (!irq_remapping_enabled || + !remap_ops || + !remap_ops->reenable) return 0; return remap_ops->reenable(mode); @@ -103,6 +267,9 @@ int irq_remapping_reenable(int mode) int __init irq_remap_enable_fault_handling(void) { + if (!irq_remapping_enabled) + return 0; + if (!remap_ops || !remap_ops->enable_faulting) return -ENODEV; @@ -133,23 +300,28 @@ int set_remapped_irq_affinity(struct irq_data *data, const struct cpumask *mask, void free_remapped_irq(int irq) { + struct irq_cfg *cfg = irq_get_chip_data(irq); + if (!remap_ops || !remap_ops->free_irq) return; - remap_ops->free_irq(irq); + if (irq_remapped(cfg)) + remap_ops->free_irq(irq); } void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id) { - if (!remap_ops || !remap_ops->compose_msi_msg) - return; + struct irq_cfg *cfg = irq_get_chip_data(irq); - remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); + if (!irq_remapped(cfg)) + native_compose_msi_msg(pdev, irq, dest, msg, hpet_id); + else if (remap_ops && remap_ops->compose_msi_msg) + remap_ops->compose_msi_msg(pdev, irq, dest, msg, hpet_id); } -int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) +static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) { if (!remap_ops || !remap_ops->msi_alloc_irq) return -ENODEV; @@ -157,8 +329,8 @@ int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) return remap_ops->msi_alloc_irq(pdev, irq, nvec); } -int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) +static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, + int index, int sub_handle) { if (!remap_ops || !remap_ops->msi_setup_irq) return -ENODEV; @@ -173,3 +345,42 @@ int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) return remap_ops->setup_hpet_msi(irq, id); } + +void panic_if_irq_remap(const char *msg) +{ + if (irq_remapping_enabled) + panic(msg); +} + +static void ir_ack_apic_edge(struct irq_data *data) +{ + ack_APIC_irq(); +} + +static void ir_ack_apic_level(struct irq_data *data) +{ + ack_APIC_irq(); + eoi_ioapic_irq(data->irq, data->chip_data); +} + +static void ir_print_prefix(struct irq_data *data, struct seq_file *p) +{ + seq_printf(p, " IR-%s", data->chip->name); +} + +void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ + chip->irq_print_chip = ir_print_prefix; + chip->irq_ack = ir_ack_apic_edge; + chip->irq_eoi = ir_ack_apic_level; + chip->irq_set_affinity = x86_io_apic_ops.set_affinity; +} + +bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip) +{ + if (!irq_remapped(cfg)) + return false; + irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); + irq_remap_modify_chip_defaults(chip); + return true; +} diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 95363acb583f..ecb637670405 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -34,6 +34,7 @@ struct msi_msg; extern int disable_irq_remap; extern int disable_sourceid_checking; extern int no_x2apic_optout; +extern int irq_remapping_enabled; struct irq_remap_ops { /* Check whether Interrupt Remapping is supported */ diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 5f21f629b7ae..deda591f70b9 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/mISDNif.h> #include <linux/kthread.h> +#include <linux/sched.h> #include "core.h" static u_int *debug; @@ -202,6 +203,9 @@ static int mISDNStackd(void *data) { struct mISDNstack *st = data; +#ifdef MISDN_MSG_STATS + cputime_t utime, stime; +#endif int err = 0; sigfillset(¤t->blocked); @@ -303,9 +307,10 @@ mISDNStackd(void *data) "msg %d sleep %d stopped\n", dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt, st->stopped_cnt); + task_cputime(st->thread, &utime, &stime); printk(KERN_DEBUG "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n", - dev_name(&st->dev->dev), st->thread->utime, st->thread->stime); + dev_name(&st->dev->dev), utime, stime); printk(KERN_DEBUG "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n", dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw); diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 49d95040096a..0223ad255cb4 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -1820,7 +1820,7 @@ static int dvb_frontend_ioctl(struct file *file, struct dvb_frontend *fe = dvbdev->priv; struct dtv_frontend_properties *c = &fe->dtv_property_cache; struct dvb_frontend_private *fepriv = fe->frontend_priv; - int err = -ENOTTY; + int err = -EOPNOTSUPP; dev_dbg(fe->dvb->device, "%s: (%d)\n", __func__, _IOC_NR(cmd)); if (fepriv->exit != DVB_FE_NO_EXIT) @@ -1938,7 +1938,7 @@ static int dvb_frontend_ioctl_properties(struct file *file, } } else - err = -ENOTTY; + err = -EOPNOTSUPP; out: kfree(tvp); @@ -2071,7 +2071,7 @@ static int dvb_frontend_ioctl_legacy(struct file *file, struct dvb_frontend *fe = dvbdev->priv; struct dvb_frontend_private *fepriv = fe->frontend_priv; struct dtv_frontend_properties *c = &fe->dtv_property_cache; - int err = -ENOTTY; + int err = -EOPNOTSUPP; switch (cmd) { case FE_GET_INFO: { diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 56d3f697e0c7..0035c01660b6 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -21,7 +21,7 @@ #include "atl1c.h" -#define ATL1C_DRV_VERSION "1.0.1.0-NAPI" +#define ATL1C_DRV_VERSION "1.0.1.1-NAPI" char atl1c_driver_name[] = "atl1c"; char atl1c_driver_version[] = ATL1C_DRV_VERSION; @@ -1652,6 +1652,7 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter) u16 num_alloc = 0; u16 rfd_next_to_use, next_next; struct atl1c_rx_free_desc *rfd_desc; + dma_addr_t mapping; next_next = rfd_next_to_use = rfd_ring->next_to_use; if (++next_next == rfd_ring->count) @@ -1678,9 +1679,18 @@ static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter) ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY); buffer_info->skb = skb; buffer_info->length = adapter->rx_buffer_len; - buffer_info->dma = pci_map_single(pdev, vir_addr, + mapping = pci_map_single(pdev, vir_addr, buffer_info->length, PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(pdev, mapping))) { + dev_kfree_skb(skb); + buffer_info->skb = NULL; + buffer_info->length = 0; + ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_FREE); + netif_warn(adapter, rx_err, adapter->netdev, "RX pci_map_single failed"); + break; + } + buffer_info->dma = mapping; ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE, ATL1C_PCIMAP_FROMDEVICE); rfd_desc->buffer_addr = cpu_to_le64(buffer_info->dma); @@ -2015,7 +2025,29 @@ check_sum: return 0; } -static void atl1c_tx_map(struct atl1c_adapter *adapter, +static void atl1c_tx_rollback(struct atl1c_adapter *adpt, + struct atl1c_tpd_desc *first_tpd, + enum atl1c_trans_queue type) +{ + struct atl1c_tpd_ring *tpd_ring = &adpt->tpd_ring[type]; + struct atl1c_buffer *buffer_info; + struct atl1c_tpd_desc *tpd; + u16 first_index, index; + + first_index = first_tpd - (struct atl1c_tpd_desc *)tpd_ring->desc; + index = first_index; + while (index != tpd_ring->next_to_use) { + tpd = ATL1C_TPD_DESC(tpd_ring, index); + buffer_info = &tpd_ring->buffer_info[index]; + atl1c_clean_buffer(adpt->pdev, buffer_info, 0); + memset(tpd, 0, sizeof(struct atl1c_tpd_desc)); + if (++index == tpd_ring->count) + index = 0; + } + tpd_ring->next_to_use = first_index; +} + +static int atl1c_tx_map(struct atl1c_adapter *adapter, struct sk_buff *skb, struct atl1c_tpd_desc *tpd, enum atl1c_trans_queue type) { @@ -2040,7 +2072,10 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter, buffer_info->length = map_len; buffer_info->dma = pci_map_single(adapter->pdev, skb->data, hdr_len, PCI_DMA_TODEVICE); - ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY); + if (unlikely(pci_dma_mapping_error(adapter->pdev, + buffer_info->dma))) + goto err_dma; + ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE, ATL1C_PCIMAP_TODEVICE); mapped_len += map_len; @@ -2062,6 +2097,10 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter, buffer_info->dma = pci_map_single(adapter->pdev, skb->data + mapped_len, buffer_info->length, PCI_DMA_TODEVICE); + if (unlikely(pci_dma_mapping_error(adapter->pdev, + buffer_info->dma))) + goto err_dma; + ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY); ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_SINGLE, ATL1C_PCIMAP_TODEVICE); @@ -2083,6 +2122,9 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter, frag, 0, buffer_info->length, DMA_TO_DEVICE); + if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) + goto err_dma; + ATL1C_SET_BUFFER_STATE(buffer_info, ATL1C_BUFFER_BUSY); ATL1C_SET_PCIMAP_TYPE(buffer_info, ATL1C_PCIMAP_PAGE, ATL1C_PCIMAP_TODEVICE); @@ -2095,6 +2137,13 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter, /* The last buffer info contain the skb address, so it will be free after unmap */ buffer_info->skb = skb; + + return 0; + +err_dma: + buffer_info->dma = 0; + buffer_info->length = 0; + return -1; } static void atl1c_tx_queue(struct atl1c_adapter *adapter, struct sk_buff *skb, @@ -2157,10 +2206,18 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, if (skb_network_offset(skb) != ETH_HLEN) tpd->word1 |= 1 << TPD_ETH_TYPE_SHIFT; /* Ethernet frame */ - atl1c_tx_map(adapter, skb, tpd, type); - atl1c_tx_queue(adapter, skb, tpd, type); + if (atl1c_tx_map(adapter, skb, tpd, type) < 0) { + netif_info(adapter, tx_done, adapter->netdev, + "tx-skb droppted due to dma error\n"); + /* roll back tpd/buffer */ + atl1c_tx_rollback(adapter, tpd, type); + spin_unlock_irqrestore(&adapter->tx_lock, flags); + dev_kfree_skb(skb); + } else { + atl1c_tx_queue(adapter, skb, tpd, type); + spin_unlock_irqrestore(&adapter->tx_lock, flags); + } - spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index f771ddfba646..a5edac8df67b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -504,13 +504,11 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp, tpa_info->parsing_flags, len_on_bd); - /* set for GRO */ - if (fp->mode == TPA_MODE_GRO) - skb_shinfo(skb)->gso_type = - (GET_FLAG(tpa_info->parsing_flags, - PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) == - PRS_FLAG_OVERETH_IPV6) ? - SKB_GSO_TCPV6 : SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type = + (GET_FLAG(tpa_info->parsing_flags, + PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) == + PRS_FLAG_OVERETH_IPV6) ? + SKB_GSO_TCPV6 : SKB_GSO_TCPV4; } diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index a9b0830fb39d..b9d4bb9530e5 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -693,6 +693,11 @@ static int macb_poll(struct napi_struct *napi, int budget) * get notified when new packets arrive. */ macb_writel(bp, IER, MACB_RX_INT_FLAGS); + + /* Packets received while interrupts were disabled */ + status = macb_readl(bp, RSR); + if (unlikely(status)) + napi_reschedule(napi); } /* TODO: Handle errors */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 20a5af6d87d0..b3e3294cfe53 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1401,6 +1401,7 @@ static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, /* set gso_size to avoid messing up TCP MSS */ skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len), IXGBE_CB(skb)->append_cnt); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; } static void ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 6f82812d0fab..09aa310b6194 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -986,8 +986,13 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, th->seq = htonl(seq_number); length = skb->len; - if (adapter->flags & QLCNIC_FW_LRO_MSS_CAP) + if (adapter->flags & QLCNIC_FW_LRO_MSS_CAP) { skb_shinfo(skb)->gso_size = qlcnic_get_lro_sts_mss(sts_data1); + if (skb->protocol == htons(ETH_P_IPV6)) + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + else + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + } if (vid != 0xffff) __vlan_hwaccel_put_tag(skb, vid); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 11702324a071..998974f78742 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -450,7 +450,6 @@ enum rtl8168_registers { #define PWM_EN (1 << 22) #define RXDV_GATED_EN (1 << 19) #define EARLY_TALLY_EN (1 << 16) -#define FORCE_CLK (1 << 15) /* force clock request */ }; enum rtl_register_content { @@ -514,7 +513,6 @@ enum rtl_register_content { PMEnable = (1 << 0), /* Power Management Enable */ /* Config2 register p. 25 */ - ClkReqEn = (1 << 7), /* Clock Request Enable */ MSIEnable = (1 << 5), /* 8169 only. Reserved in the 8168. */ PCI_Clock_66MHz = 0x01, PCI_Clock_33MHz = 0x00, @@ -535,7 +533,6 @@ enum rtl_register_content { Spi_en = (1 << 3), LanWake = (1 << 1), /* LanWake enable/disable */ PMEStatus = (1 << 0), /* PME status can be reset by PCI RST# */ - ASPM_en = (1 << 0), /* ASPM enable */ /* TBICSR p.28 */ TBIReset = 0x80000000, @@ -684,7 +681,6 @@ enum features { RTL_FEATURE_WOL = (1 << 0), RTL_FEATURE_MSI = (1 << 1), RTL_FEATURE_GMII = (1 << 2), - RTL_FEATURE_FW_LOADED = (1 << 3), }; struct rtl8169_counters { @@ -2389,10 +2385,8 @@ static void rtl_apply_firmware(struct rtl8169_private *tp) struct rtl_fw *rtl_fw = tp->rtl_fw; /* TODO: release firmware once rtl_phy_write_fw signals failures. */ - if (!IS_ERR_OR_NULL(rtl_fw)) { + if (!IS_ERR_OR_NULL(rtl_fw)) rtl_phy_write_fw(tp, rtl_fw); - tp->features |= RTL_FEATURE_FW_LOADED; - } } static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val) @@ -2403,31 +2397,6 @@ static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val) rtl_apply_firmware(tp); } -static void r810x_aldps_disable(struct rtl8169_private *tp) -{ - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, 0x18, 0x0310); - msleep(100); -} - -static void r810x_aldps_enable(struct rtl8169_private *tp) -{ - if (!(tp->features & RTL_FEATURE_FW_LOADED)) - return; - - rtl_writephy(tp, 0x1f, 0x0000); - rtl_writephy(tp, 0x18, 0x8310); -} - -static void r8168_aldps_enable_1(struct rtl8169_private *tp) -{ - if (!(tp->features & RTL_FEATURE_FW_LOADED)) - return; - - rtl_writephy(tp, 0x1f, 0x0000); - rtl_w1w0_phy(tp, 0x15, 0x1000, 0x0000); -} - static void rtl8169s_hw_phy_config(struct rtl8169_private *tp) { static const struct phy_reg phy_reg_init[] = { @@ -3218,8 +3187,6 @@ static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp) rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400); rtl_writephy(tp, 0x1f, 0x0000); - r8168_aldps_enable_1(tp); - /* Broken BIOS workaround: feed GigaMAC registers with MAC address. */ rtl_rar_exgmac_set(tp, tp->dev->dev_addr); } @@ -3294,8 +3261,6 @@ static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x05, 0x8b85); rtl_w1w0_phy(tp, 0x06, 0x4000, 0x0000); rtl_writephy(tp, 0x1f, 0x0000); - - r8168_aldps_enable_1(tp); } static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp) @@ -3303,8 +3268,6 @@ static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp) rtl_apply_firmware(tp); rtl8168f_hw_phy_config(tp); - - r8168_aldps_enable_1(tp); } static void rtl8411_hw_phy_config(struct rtl8169_private *tp) @@ -3402,8 +3365,6 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp) rtl_w1w0_phy(tp, 0x19, 0x0000, 0x0001); rtl_w1w0_phy(tp, 0x10, 0x0000, 0x0400); rtl_writephy(tp, 0x1f, 0x0000); - - r8168_aldps_enable_1(tp); } static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) @@ -3489,19 +3450,21 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp) }; /* Disable ALDPS before ram code */ - r810x_aldps_disable(tp); + rtl_writephy(tp, 0x1f, 0x0000); + rtl_writephy(tp, 0x18, 0x0310); + msleep(100); rtl_apply_firmware(tp); rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init)); - - r810x_aldps_enable(tp); } static void rtl8402_hw_phy_config(struct rtl8169_private *tp) { /* Disable ALDPS before setting firmware */ - r810x_aldps_disable(tp); + rtl_writephy(tp, 0x1f, 0x0000); + rtl_writephy(tp, 0x18, 0x0310); + msleep(20); rtl_apply_firmware(tp); @@ -3511,8 +3474,6 @@ static void rtl8402_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x10, 0x401f); rtl_writephy(tp, 0x19, 0x7030); rtl_writephy(tp, 0x1f, 0x0000); - - r810x_aldps_enable(tp); } static void rtl8106e_hw_phy_config(struct rtl8169_private *tp) @@ -3525,7 +3486,9 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp) }; /* Disable ALDPS before ram code */ - r810x_aldps_disable(tp); + rtl_writephy(tp, 0x1f, 0x0000); + rtl_writephy(tp, 0x18, 0x0310); + msleep(100); rtl_apply_firmware(tp); @@ -3533,8 +3496,6 @@ static void rtl8106e_hw_phy_config(struct rtl8169_private *tp) rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init)); rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); - - r810x_aldps_enable(tp); } static void rtl_hw_phy_config(struct net_device *dev) @@ -5051,6 +5012,8 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) RTL_W8(MaxTxPacketSize, EarlySize); + rtl_disable_clock_request(pdev); + RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); @@ -5059,8 +5022,7 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); RTL_W32(MISC, RTL_R32(MISC) | PWM_EN); - RTL_W8(Config5, (RTL_R8(Config5) & ~Spi_en) | ASPM_en); - RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); + RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en); } static void rtl_hw_start_8168f(struct rtl8169_private *tp) @@ -5085,12 +5047,13 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) RTL_W8(MaxTxPacketSize, EarlySize); + rtl_disable_clock_request(pdev); + RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); - RTL_W32(MISC, RTL_R32(MISC) | PWM_EN | FORCE_CLK); - RTL_W8(Config5, (RTL_R8(Config5) & ~Spi_en) | ASPM_en); - RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); + RTL_W32(MISC, RTL_R32(MISC) | PWM_EN); + RTL_W8(Config5, RTL_R8(Config5) & ~Spi_en); } static void rtl_hw_start_8168f_1(struct rtl8169_private *tp) @@ -5147,10 +5110,8 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) rtl_w1w0_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); - RTL_W32(MISC, (RTL_R32(MISC) | FORCE_CLK) & ~RXDV_GATED_EN); + RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); RTL_W8(MaxTxPacketSize, EarlySize); - RTL_W8(Config5, RTL_R8(Config5) | ASPM_en); - RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000, ERIAR_EXGMAC); @@ -5366,9 +5327,6 @@ static void rtl_hw_start_8105e_1(struct rtl8169_private *tp) RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(DLLPR, RTL_R8(DLLPR) | PFM_EN); - RTL_W8(Config5, RTL_R8(Config5) | ASPM_en); - RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); - RTL_W32(MISC, RTL_R32(MISC) | FORCE_CLK); rtl_ephy_init(tp, e_info_8105e_1, ARRAY_SIZE(e_info_8105e_1)); } @@ -5394,9 +5352,6 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp) RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); RTL_W8(MCU, RTL_R8(MCU) & ~NOW_IS_OOB); - RTL_W8(Config5, RTL_R8(Config5) | ASPM_en); - RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); - RTL_W32(MISC, RTL_R32(MISC) | FORCE_CLK); rtl_ephy_init(tp, e_info_8402, ARRAY_SIZE(e_info_8402)); @@ -5418,10 +5373,7 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) /* Force LAN exit from ASPM if Rx/Tx are not idle */ RTL_W32(FuncEvent, RTL_R32(FuncEvent) | 0x002800); - RTL_W32(MISC, - (RTL_R32(MISC) | DISABLE_LAN_EN | FORCE_CLK) & ~EARLY_TALLY_EN); - RTL_W8(Config5, RTL_R8(Config5) | ASPM_en); - RTL_W8(Config2, RTL_R8(Config2) | ClkReqEn); + RTL_W32(MISC, (RTL_R32(MISC) | DISABLE_LAN_EN) & ~EARLY_TALLY_EN); RTL_W8(MCU, RTL_R8(MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(DLLPR, RTL_R8(DLLPR) & ~PFM_EN); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f07c0612abf6..b75f4b286895 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -69,7 +69,7 @@ #undef STMMAC_XMIT_DEBUG /*#define STMMAC_XMIT_DEBUG*/ -#ifdef STMMAC_TX_DEBUG +#ifdef STMMAC_XMIT_DEBUG #define TX_DBG(fmt, args...) printk(fmt, ## args) #else #define TX_DBG(fmt, args...) do { } while (0) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 0376a5e6b2bf..0b9829fe3eea 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -188,8 +188,6 @@ int stmmac_mdio_register(struct net_device *ndev) goto bus_register_fail; } - priv->mii = new_bus; - found = 0; for (addr = 0; addr < PHY_MAX_ADDR; addr++) { struct phy_device *phydev = new_bus->phy_map[addr]; @@ -237,8 +235,14 @@ int stmmac_mdio_register(struct net_device *ndev) } } - if (!found) + if (!found) { pr_warning("%s: No PHY found\n", ndev->name); + mdiobus_unregister(new_bus); + mdiobus_free(new_bus); + return -ENODEV; + } + + priv->mii = new_bus; return 0; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c8e05e27f38c..19d903598b0d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -411,6 +411,7 @@ static const struct usb_device_id products[] = { }, /* 3. Combined interface devices matching on interface number */ + {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x19d2, 0x0002, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0012, 1)}, diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c index 83564d36e801..a00a03ea4ec9 100644 --- a/drivers/net/wireless/mwl8k.c +++ b/drivers/net/wireless/mwl8k.c @@ -318,20 +318,20 @@ struct mwl8k_sta { #define MWL8K_STA(_sta) ((struct mwl8k_sta *)&((_sta)->drv_priv)) static const struct ieee80211_channel mwl8k_channels_24[] = { - { .center_freq = 2412, .hw_value = 1, }, - { .center_freq = 2417, .hw_value = 2, }, - { .center_freq = 2422, .hw_value = 3, }, - { .center_freq = 2427, .hw_value = 4, }, - { .center_freq = 2432, .hw_value = 5, }, - { .center_freq = 2437, .hw_value = 6, }, - { .center_freq = 2442, .hw_value = 7, }, - { .center_freq = 2447, .hw_value = 8, }, - { .center_freq = 2452, .hw_value = 9, }, - { .center_freq = 2457, .hw_value = 10, }, - { .center_freq = 2462, .hw_value = 11, }, - { .center_freq = 2467, .hw_value = 12, }, - { .center_freq = 2472, .hw_value = 13, }, - { .center_freq = 2484, .hw_value = 14, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2412, .hw_value = 1, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2417, .hw_value = 2, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2422, .hw_value = 3, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2427, .hw_value = 4, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2432, .hw_value = 5, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2437, .hw_value = 6, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2442, .hw_value = 7, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2447, .hw_value = 8, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2452, .hw_value = 9, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2457, .hw_value = 10, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2462, .hw_value = 11, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2467, .hw_value = 12, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2472, .hw_value = 13, }, + { .band = IEEE80211_BAND_2GHZ, .center_freq = 2484, .hw_value = 14, }, }; static const struct ieee80211_rate mwl8k_rates_24[] = { @@ -352,10 +352,10 @@ static const struct ieee80211_rate mwl8k_rates_24[] = { }; static const struct ieee80211_channel mwl8k_channels_50[] = { - { .center_freq = 5180, .hw_value = 36, }, - { .center_freq = 5200, .hw_value = 40, }, - { .center_freq = 5220, .hw_value = 44, }, - { .center_freq = 5240, .hw_value = 48, }, + { .band = IEEE80211_BAND_5GHZ, .center_freq = 5180, .hw_value = 36, }, + { .band = IEEE80211_BAND_5GHZ, .center_freq = 5200, .hw_value = 40, }, + { .band = IEEE80211_BAND_5GHZ, .center_freq = 5220, .hw_value = 44, }, + { .band = IEEE80211_BAND_5GHZ, .center_freq = 5240, .hw_value = 48, }, }; static const struct ieee80211_rate mwl8k_rates_50[] = { diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5099636a6e5f..00cc78c7aa04 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -845,6 +845,32 @@ int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) } EXPORT_SYMBOL(pci_enable_msi_block); +int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec) +{ + int ret, pos, nvec; + u16 msgctl; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSI); + if (!pos) + return -EINVAL; + + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + + if (maxvec) + *maxvec = ret; + + do { + nvec = ret; + ret = pci_enable_msi_block(dev, nvec); + } while (ret > 0); + + if (ret < 0) + return ret; + return nvec; +} +EXPORT_SYMBOL(pci_enable_msi_block_auto); + void pci_msi_shutdown(struct pci_dev *dev) { struct msi_desc *desc; diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 3ea51736f18d..5ab14251839d 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -23,6 +23,9 @@ #include "aerdrv.h" +#define CREATE_TRACE_POINTS +#include <trace/events/ras.h> + #define AER_AGENT_RECEIVER 0 #define AER_AGENT_REQUESTER 1 #define AER_AGENT_COMPLETER 2 @@ -121,12 +124,11 @@ static const char *aer_agent_string[] = { "Transmitter ID" }; -static void __aer_print_error(const char *prefix, +static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info) { int i, status; const char *errmsg = NULL; - status = (info->status & ~info->mask); for (i = 0; i < 32; i++) { @@ -141,26 +143,22 @@ static void __aer_print_error(const char *prefix, aer_uncorrectable_error_string[i] : NULL; if (errmsg) - printk("%s"" [%2d] %-22s%s\n", prefix, i, errmsg, + dev_err(&dev->dev, " [%2d] %-22s%s\n", i, errmsg, info->first_error == i ? " (First)" : ""); else - printk("%s"" [%2d] Unknown Error Bit%s\n", prefix, i, - info->first_error == i ? " (First)" : ""); + dev_err(&dev->dev, " [%2d] Unknown Error Bit%s\n", + i, info->first_error == i ? " (First)" : ""); } } void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) { int id = ((dev->bus->number << 8) | dev->devfn); - char prefix[44]; - - snprintf(prefix, sizeof(prefix), "%s%s %s: ", - (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR, - dev_driver_string(&dev->dev), dev_name(&dev->dev)); if (info->status == 0) { - printk("%s""PCIe Bus Error: severity=%s, type=Unaccessible, " - "id=%04x(Unregistered Agent ID)\n", prefix, + dev_err(&dev->dev, + "PCIe Bus Error: severity=%s, type=Unaccessible, " + "id=%04x(Unregistered Agent ID)\n", aer_error_severity_string[info->severity], id); } else { int layer, agent; @@ -168,22 +166,24 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - printk("%s""PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", - prefix, aer_error_severity_string[info->severity], + dev_err(&dev->dev, + "PCIe Bus Error: severity=%s, type=%s, id=%04x(%s)\n", + aer_error_severity_string[info->severity], aer_error_layer[layer], id, aer_agent_string[agent]); - printk("%s"" device [%04x:%04x] error status/mask=%08x/%08x\n", - prefix, dev->vendor, dev->device, + dev_err(&dev->dev, + " device [%04x:%04x] error status/mask=%08x/%08x\n", + dev->vendor, dev->device, info->status, info->mask); - __aer_print_error(prefix, info); + __aer_print_error(dev, info); if (info->tlp_header_valid) { unsigned char *tlp = (unsigned char *) &info->tlp; - printk("%s"" TLP Header:" + dev_err(&dev->dev, " TLP Header:" " %02x%02x%02x%02x %02x%02x%02x%02x" " %02x%02x%02x%02x %02x%02x%02x%02x\n", - prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, + *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), *(tlp + 11), *(tlp + 10), *(tlp + 9), *(tlp + 8), *(tlp + 15), *(tlp + 14), @@ -192,8 +192,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) } if (info->id && info->error_dev_num > 1 && info->id == id) - printk("%s"" Error of this Agent(%04x) is reported first\n", - prefix, id); + dev_err(&dev->dev, + " Error of this Agent(%04x) is reported first\n", + id); + trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), + info->severity); } void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) @@ -217,7 +220,7 @@ int cper_severity_to_aer(int cper_severity) } EXPORT_SYMBOL_GPL(cper_severity_to_aer); -void cper_print_aer(const char *prefix, int cper_severity, +void cper_print_aer(const char *prefix, struct pci_dev *dev, int cper_severity, struct aer_capability_regs *aer) { int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0; @@ -239,25 +242,27 @@ void cper_print_aer(const char *prefix, int cper_severity, } layer = AER_GET_LAYER_ERROR(aer_severity, status); agent = AER_GET_AGENT(aer_severity, status); - printk("%s""aer_status: 0x%08x, aer_mask: 0x%08x\n", - prefix, status, mask); + dev_err(&dev->dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", + status, mask); cper_print_bits(prefix, status, status_strs, status_strs_size); - printk("%s""aer_layer=%s, aer_agent=%s\n", prefix, + dev_err(&dev->dev, "aer_layer=%s, aer_agent=%s\n", aer_error_layer[layer], aer_agent_string[agent]); if (aer_severity != AER_CORRECTABLE) - printk("%s""aer_uncor_severity: 0x%08x\n", - prefix, aer->uncor_severity); + dev_err(&dev->dev, "aer_uncor_severity: 0x%08x\n", + aer->uncor_severity); if (tlp_header_valid) { const unsigned char *tlp; tlp = (const unsigned char *)&aer->header_log; - printk("%s""aer_tlp_header:" + dev_err(&dev->dev, "aer_tlp_header:" " %02x%02x%02x%02x %02x%02x%02x%02x" " %02x%02x%02x%02x %02x%02x%02x%02x\n", - prefix, *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, + *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), *(tlp + 11), *(tlp + 10), *(tlp + 9), *(tlp + 8), *(tlp + 15), *(tlp + 14), *(tlp + 13), *(tlp + 12)); } + trace_aer_event(dev_name(&dev->dev), (status & ~mask), + aer_severity); } #endif diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 7c0fd9252e6f..84954a726a94 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -19,6 +19,8 @@ static void pci_free_resources(struct pci_dev *dev) static void pci_stop_dev(struct pci_dev *dev) { + pci_pme_active(dev, false); + if (dev->is_added) { pci_proc_detach_device(dev); pci_remove_sysfs_dev_files(dev); diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 923a9da9c829..5e44eaabf457 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -20,14 +20,24 @@ if RTC_CLASS config RTC_HCTOSYS bool "Set system time from RTC on startup and resume" default y + depends on !ALWAYS_USE_PERSISTENT_CLOCK help If you say yes here, the system time (wall clock) will be set using the value read from a specified RTC device. This is useful to avoid unnecessary fsck runs at boot time, and to network better. +config RTC_SYSTOHC + bool "Set the RTC time based on NTP synchronization" + default y + depends on !ALWAYS_USE_PERSISTENT_CLOCK + help + If you say yes here, the system time (wall clock) will be stored + in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11 + minutes if userspace reports synchronized NTP status. + config RTC_HCTOSYS_DEVICE string "RTC used to set the system time" - depends on RTC_HCTOSYS = y + depends on RTC_HCTOSYS = y || RTC_SYSTOHC = y default "rtc0" help The RTC device that will be used to (re)initialize the system diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 4418ef3f9ecc..ec2988b00a44 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -6,6 +6,7 @@ ccflags-$(CONFIG_RTC_DEBUG) := -DDEBUG obj-$(CONFIG_RTC_LIB) += rtc-lib.o obj-$(CONFIG_RTC_HCTOSYS) += hctosys.o +obj-$(CONFIG_RTC_SYSTOHC) += systohc.o obj-$(CONFIG_RTC_CLASS) += rtc-core.o rtc-core-y := class.o interface.o diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index 5143629dedbd..26388f182594 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -50,6 +50,10 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg) struct rtc_device *rtc = to_rtc_device(dev); struct rtc_time tm; struct timespec delta, delta_delta; + + if (has_persistent_clock()) + return 0; + if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) return 0; @@ -88,6 +92,9 @@ static int rtc_resume(struct device *dev) struct timespec new_system, new_rtc; struct timespec sleep_time; + if (has_persistent_clock()) + return 0; + rtc_hctosys_ret = -ENODEV; if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) return 0; diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c index 10c1a3454e48..81c5077feff3 100644 --- a/drivers/rtc/rtc-pl031.c +++ b/drivers/rtc/rtc-pl031.c @@ -350,7 +350,9 @@ static int pl031_probe(struct amba_device *adev, const struct amba_id *id) /* Enable the clockwatch on ST Variants */ if (vendor->clockwatch) data |= RTC_CR_CWEN; - writel(data | RTC_CR_EN, ldata->base + RTC_CR); + else + data |= RTC_CR_EN; + writel(data, ldata->base + RTC_CR); /* * On ST PL031 variants, the RTC reset value does not provide correct diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c new file mode 100644 index 000000000000..bf3e242ccc5c --- /dev/null +++ b/drivers/rtc/systohc.c @@ -0,0 +1,44 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + */ +#include <linux/rtc.h> +#include <linux/time.h> + +/** + * rtc_set_ntp_time - Save NTP synchronized time to the RTC + * @now: Current time of day + * + * Replacement for the NTP platform function update_persistent_clock + * that stores time for later retrieval by rtc_hctosys. + * + * Returns 0 on successful RTC update, -ENODEV if a RTC update is not + * possible at all, and various other -errno for specific temporary failure + * cases. + * + * If temporary failure is indicated the caller should try again 'soon' + */ +int rtc_set_ntp_time(struct timespec now) +{ + struct rtc_device *rtc; + struct rtc_time tm; + int err = -ENODEV; + + if (now.tv_nsec < (NSEC_PER_SEC >> 1)) + rtc_time_to_tm(now.tv_sec, &tm); + else + rtc_time_to_tm(now.tv_sec + 1, &tm); + + rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE); + if (rtc) { + /* rtc_hctosys exclusively uses UTC, so we call set_time here, + * not set_mmss. */ + if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss)) + err = rtc_set_time(rtc, &tm); + rtc_class_close(rtc); + } + + return err; +} diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 19ee901577da..3a6083b386a1 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -33,7 +33,7 @@ #include <linux/of_gpio.h> #include <linux/pm_runtime.h> #include <linux/export.h> -#include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/ioport.h> diff --git a/drivers/staging/csr/bh.c b/drivers/staging/csr/bh.c index 1a1f5c79822a..7b133597e923 100644 --- a/drivers/staging/csr/bh.c +++ b/drivers/staging/csr/bh.c @@ -15,7 +15,7 @@ */ #include "csr_wifi_hip_unifi.h" #include "unifi_priv.h" - +#include <linux/sched/rt.h> /* * --------------------------------------------------------------------------- diff --git a/drivers/staging/csr/unifi_sme.c b/drivers/staging/csr/unifi_sme.c index 7c6c4138fc76..49395da34b7f 100644 --- a/drivers/staging/csr/unifi_sme.c +++ b/drivers/staging/csr/unifi_sme.c @@ -15,7 +15,7 @@ #include "unifi_priv.h" #include "csr_wifi_hip_unifi.h" #include "csr_wifi_hip_conversions.h" - +#include <linux/sched/rt.h> diff --git a/drivers/staging/iio/trigger/Kconfig b/drivers/staging/iio/trigger/Kconfig index 7d3207559265..d44d3ad26fa5 100644 --- a/drivers/staging/iio/trigger/Kconfig +++ b/drivers/staging/iio/trigger/Kconfig @@ -21,7 +21,6 @@ config IIO_GPIO_TRIGGER config IIO_SYSFS_TRIGGER tristate "SYSFS trigger" depends on SYSFS - depends on HAVE_IRQ_WORK select IRQ_WORK help Provides support for using SYSFS entry as IIO triggers. diff --git a/drivers/staging/omapdrm/Kconfig b/drivers/staging/omapdrm/Kconfig index b724a4131435..09f65dc3d2c8 100644 --- a/drivers/staging/omapdrm/Kconfig +++ b/drivers/staging/omapdrm/Kconfig @@ -3,8 +3,8 @@ config DRM_OMAP tristate "OMAP DRM" depends on DRM && !CONFIG_FB_OMAP2 depends on ARCH_OMAP2PLUS || ARCH_MULTIPLATFORM + depends on OMAP2_DSS select DRM_KMS_HELPER - select OMAP2_DSS select FB_SYS_FILLRECT select FB_SYS_COPYAREA select FB_SYS_IMAGEBLIT diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index b3c4a250ff86..40e5b3919e27 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -15,6 +15,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/interrupt.h> #include <linux/mm.h> #include <linux/fs.h> diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c index 18688c12e30d..d7d66ef5cb58 100644 --- a/drivers/video/omap2/dss/dss_features.c +++ b/drivers/video/omap2/dss/dss_features.c @@ -538,6 +538,7 @@ static const enum dss_feat_id omap3630_dss_feat_list[] = { FEAT_ALPHA_FIXED_ZORDER, FEAT_FIFO_MERGE, FEAT_OMAP3_DSI_FIFO_BUG, + FEAT_DPI_USES_VDDS_DSI, }; static const enum dss_feat_id omap4430_es1_0_dss_feat_list[] = { diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 74d77dfa5f63..22f77c5f6012 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1787,7 +1787,7 @@ void xen_callback_vector(void) int rc; uint64_t callback_via; if (xen_have_vector_callback) { - callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK); + callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); rc = xen_set_callback_via(callback_via); if (rc) { printk(KERN_ERR "Request for Xen HVM callback vector" @@ -1798,8 +1798,9 @@ void xen_callback_vector(void) printk(KERN_INFO "Xen HVM callback vector for event delivery is " "enabled\n"); /* in the restore case the vector has already been allocated */ - if (!test_bit(XEN_HVM_EVTCHN_CALLBACK, used_vectors)) - alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector); + if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, + xen_hvm_callback_vector); } } #else diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index 067fcfa1723e..5a27a4599a4a 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -278,8 +278,7 @@ static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) * Only those at cpu present map has its sys interface. */ if (info->flags & XEN_PCPU_FLAGS_INVALID) { - if (pcpu) - unregister_and_remove_pcpu(pcpu); + unregister_and_remove_pcpu(pcpu); return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0c42cdbabecf..49d0b43458b7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -33,6 +33,7 @@ #include <linux/elf.h> #include <linux/utsname.h> #include <linux/coredump.h> +#include <linux/sched.h> #include <asm/uaccess.h> #include <asm/param.h> #include <asm/page.h> @@ -1320,8 +1321,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(cputime.utime, &prstatus->pr_utime); cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { - cputime_to_timeval(p->utime, &prstatus->pr_utime); - cputime_to_timeval(p->stime, &prstatus->pr_stime); + cputime_t utime, stime; + + task_cputime(p, &utime, &stime); + cputime_to_timeval(utime, &prstatus->pr_utime); + cputime_to_timeval(stime, &prstatus->pr_stime); } cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index dc84732e554f..cb240dd3b402 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1375,8 +1375,11 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(cputime.utime, &prstatus->pr_utime); cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { - cputime_to_timeval(p->utime, &prstatus->pr_utime); - cputime_to_timeval(p->stime, &prstatus->pr_stime); + cputime_t utime, stime; + + task_cputime(p, &utime, &stime); + cputime_to_timeval(utime, &prstatus->pr_utime); + cputime_to_timeval(stime, &prstatus->pr_stime); } cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); diff --git a/fs/proc/array.c b/fs/proc/array.c index 6a91e6ffbcbd..f7ed9ee46eb9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -449,7 +449,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, do { min_flt += t->min_flt; maj_flt += t->maj_flt; - gtime += t->gtime; + gtime += task_gtime(t); t = next_thread(t); } while (t != task); @@ -472,7 +472,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt = task->min_flt; maj_flt = task->maj_flt; task_cputime_adjusted(task, &utime, &stime); - gtime = task->gtime; + gtime = task_gtime(task); } /* scale priority and nice values from timeslices to -20..20 */ diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 7003e5266f25..288f068740f6 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -167,12 +167,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) { char *hdr; - struct timeval timestamp; + struct timespec timestamp; size_t len; - do_gettimeofday(×tamp); + /* Report zeroed timestamp if called before timekeeping has resumed. */ + if (__getnstimeofday(×tamp)) { + timestamp.tv_sec = 0; + timestamp.tv_nsec = 0; + } hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", - (long)timestamp.tv_sec, (long)timestamp.tv_usec); + (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000)); WARN_ON_ONCE(!hdr); len = hdr ? strlen(hdr) : 0; persistent_ram_write(prz, hdr, len); diff --git a/fs/select.c b/fs/select.c index 2ef72d965036..8c1c96c27062 100644 --- a/fs/select.c +++ b/fs/select.c @@ -26,6 +26,7 @@ #include <linux/fs.h> #include <linux/rcupdate.h> #include <linux/hrtimer.h> +#include <linux/sched/rt.h> #include <asm/uaccess.h> diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index 9a62937c56ca..51969436b8b8 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -4,66 +4,12 @@ #include <linux/time.h> #include <linux/jiffies.h> -typedef unsigned long __nocast cputime_t; - -#define cputime_one_jiffy jiffies_to_cputime(1) -#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) -#define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) - -typedef u64 __nocast cputime64_t; - -#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) -#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) - -#define nsecs_to_cputime64(__ct) \ - jiffies64_to_cputime64(nsecs_to_jiffies64(__ct)) - - -/* - * Convert cputime to microseconds and back. - */ -#define cputime_to_usecs(__ct) \ - jiffies_to_usecs(cputime_to_jiffies(__ct)) -#define usecs_to_cputime(__usec) \ - jiffies_to_cputime(usecs_to_jiffies(__usec)) -#define usecs_to_cputime64(__usec) \ - jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000)) - -/* - * Convert cputime to seconds and back. - */ -#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) -#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) - -/* - * Convert cputime to timespec and back. - */ -#define timespec_to_cputime(__val) \ - jiffies_to_cputime(timespec_to_jiffies(__val)) -#define cputime_to_timespec(__ct,__val) \ - jiffies_to_timespec(cputime_to_jiffies(__ct),__val) - -/* - * Convert cputime to timeval and back. - */ -#define timeval_to_cputime(__val) \ - jiffies_to_cputime(timeval_to_jiffies(__val)) -#define cputime_to_timeval(__ct,__val) \ - jiffies_to_timeval(cputime_to_jiffies(__ct),__val) - -/* - * Convert cputime to clock and back. - */ -#define cputime_to_clock_t(__ct) \ - jiffies_to_clock_t(cputime_to_jiffies(__ct)) -#define clock_t_to_cputime(__x) \ - jiffies_to_cputime(clock_t_to_jiffies(__x)) +#ifndef CONFIG_VIRT_CPU_ACCOUNTING +# include <asm-generic/cputime_jiffies.h> +#endif -/* - * Convert cputime64 to clock. - */ -#define cputime64_to_clock_t(__ct) \ - jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# include <asm-generic/cputime_nsecs.h> +#endif #endif diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h new file mode 100644 index 000000000000..272ecba9f588 --- /dev/null +++ b/include/asm-generic/cputime_jiffies.h @@ -0,0 +1,72 @@ +#ifndef _ASM_GENERIC_CPUTIME_JIFFIES_H +#define _ASM_GENERIC_CPUTIME_JIFFIES_H + +typedef unsigned long __nocast cputime_t; + +#define cputime_one_jiffy jiffies_to_cputime(1) +#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct) +#define cputime_to_scaled(__ct) (__ct) +#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz) + +typedef u64 __nocast cputime64_t; + +#define cputime64_to_jiffies64(__ct) (__force u64)(__ct) +#define jiffies64_to_cputime64(__jif) (__force cputime64_t)(__jif) + + +/* + * Convert nanoseconds to cputime + */ +#define nsecs_to_cputime64(__nsec) \ + jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec)) +#define nsecs_to_cputime(__nsec) \ + jiffies_to_cputime(nsecs_to_jiffies(__nsec)) + + +/* + * Convert cputime to microseconds and back. + */ +#define cputime_to_usecs(__ct) \ + jiffies_to_usecs(cputime_to_jiffies(__ct)) +#define usecs_to_cputime(__usec) \ + jiffies_to_cputime(usecs_to_jiffies(__usec)) +#define usecs_to_cputime64(__usec) \ + jiffies64_to_cputime64(nsecs_to_jiffies64((__usec) * 1000)) + +/* + * Convert cputime to seconds and back. + */ +#define cputime_to_secs(jif) (cputime_to_jiffies(jif) / HZ) +#define secs_to_cputime(sec) jiffies_to_cputime((sec) * HZ) + +/* + * Convert cputime to timespec and back. + */ +#define timespec_to_cputime(__val) \ + jiffies_to_cputime(timespec_to_jiffies(__val)) +#define cputime_to_timespec(__ct,__val) \ + jiffies_to_timespec(cputime_to_jiffies(__ct),__val) + +/* + * Convert cputime to timeval and back. + */ +#define timeval_to_cputime(__val) \ + jiffies_to_cputime(timeval_to_jiffies(__val)) +#define cputime_to_timeval(__ct,__val) \ + jiffies_to_timeval(cputime_to_jiffies(__ct),__val) + +/* + * Convert cputime to clock and back. + */ +#define cputime_to_clock_t(__ct) \ + jiffies_to_clock_t(cputime_to_jiffies(__ct)) +#define clock_t_to_cputime(__x) \ + jiffies_to_cputime(clock_t_to_jiffies(__x)) + +/* + * Convert cputime64 to clock. + */ +#define cputime64_to_clock_t(__ct) \ + jiffies_64_to_clock_t(cputime64_to_jiffies64(__ct)) + +#endif diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h new file mode 100644 index 000000000000..b6485cafb7bd --- /dev/null +++ b/include/asm-generic/cputime_nsecs.h @@ -0,0 +1,104 @@ +/* + * Definitions for measuring cputime in nsecs resolution. + * + * Based on <arch/ia64/include/asm/cputime.h> + * + * Copyright (C) 2007 FUJITSU LIMITED + * Copyright (C) 2007 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#ifndef _ASM_GENERIC_CPUTIME_NSECS_H +#define _ASM_GENERIC_CPUTIME_NSECS_H + +typedef u64 __nocast cputime_t; +typedef u64 __nocast cputime64_t; + +#define cputime_one_jiffy jiffies_to_cputime(1) + +/* + * Convert cputime <-> jiffies (HZ) + */ +#define cputime_to_jiffies(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define cputime_to_scaled(__ct) (__ct) +#define jiffies_to_cputime(__jif) \ + (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ)) +#define cputime64_to_jiffies64(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / HZ)) +#define jiffies64_to_cputime64(__jif) \ + (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ)) + + +/* + * Convert cputime <-> nanoseconds + */ +#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs)) + + +/* + * Convert cputime <-> microseconds + */ +#define cputime_to_usecs(__ct) \ + ((__force u64)(__ct) / NSEC_PER_USEC) +#define usecs_to_cputime(__usecs) \ + (__force cputime_t)((__usecs) * NSEC_PER_USEC) +#define usecs_to_cputime64(__usecs) \ + (__force cputime64_t)((__usecs) * NSEC_PER_USEC) + +/* + * Convert cputime <-> seconds + */ +#define cputime_to_secs(__ct) \ + ((__force u64)(__ct) / NSEC_PER_SEC) +#define secs_to_cputime(__secs) \ + (__force cputime_t)((__secs) * NSEC_PER_SEC) + +/* + * Convert cputime <-> timespec (nsec) + */ +static inline cputime_t timespec_to_cputime(const struct timespec *val) +{ + u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; + return (__force cputime_t) ret; +} +static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) +{ + val->tv_sec = (__force u64) ct / NSEC_PER_SEC; + val->tv_nsec = (__force u64) ct % NSEC_PER_SEC; +} + +/* + * Convert cputime <-> timeval (msec) + */ +static inline cputime_t timeval_to_cputime(struct timeval *val) +{ + u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; + return (__force cputime_t) ret; +} +static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) +{ + val->tv_sec = (__force u64) ct / NSEC_PER_SEC; + val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC; +} + +/* + * Convert cputime <-> clock (USER_HZ) + */ +#define cputime_to_clock_t(__ct) \ + ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ)) +#define clock_t_to_cputime(__x) \ + (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ)) + +/* + * Convert cputime64 to clock. + */ +#define cputime64_to_clock_t(__ct) \ + cputime_to_clock_t((__force cputime_t)__ct) + +#endif diff --git a/include/linux/aer.h b/include/linux/aer.h index 544abdb2238c..ec10e1b24c1c 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -49,8 +49,8 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) } #endif -extern void cper_print_aer(const char *prefix, int cper_severity, - struct aer_capability_regs *aer); +extern void cper_print_aer(const char *prefix, struct pci_dev *dev, + int cper_severity, struct aer_capability_regs *aer); extern int cper_severity_to_aer(int cper_severity); extern void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, int severity); diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 8a7096fcb01e..66346521cb65 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -161,6 +161,15 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) extern void clockevents_suspend(void); extern void clockevents_resume(void); +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST +extern void tick_broadcast(const struct cpumask *mask); +#else +#define tick_broadcast NULL +#endif +extern int tick_receive_broadcast(void); +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void clockevents_notify(unsigned long reason, void *arg); #else diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index e24339ccb7f0..b28d161c1091 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -3,12 +3,40 @@ #ifdef CONFIG_CONTEXT_TRACKING #include <linux/sched.h> +#include <linux/percpu.h> + +struct context_tracking { + /* + * When active is false, probes are unset in order + * to minimize overhead: TIF flags are cleared + * and calls to user_enter/exit are ignored. This + * may be further optimized using static keys. + */ + bool active; + enum { + IN_KERNEL = 0, + IN_USER, + } state; +}; + +DECLARE_PER_CPU(struct context_tracking, context_tracking); + +static inline bool context_tracking_in_user(void) +{ + return __this_cpu_read(context_tracking.state) == IN_USER; +} + +static inline bool context_tracking_active(void) +{ + return __this_cpu_read(context_tracking.active); +} extern void user_enter(void); extern void user_exit(void); extern void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next); #else +static inline bool context_tracking_in_user(void) { return false; } static inline void user_enter(void) { } static inline void user_exit(void) { } static inline void context_tracking_task_switch(struct task_struct *prev, diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 92691d85c320..e5ca8ef50e9b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,7 +74,7 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * SAVE_REGS - The ftrace_ops wants regs saved at each function called * and passed to the callback. If this flag is set, but the * architecture does not support passing regs - * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the + * (CONFIG_DYNAMIC_FTRACE_WITH_REGS is not defined), then the * ftrace_ops will fail to register, unless the next flag * is set. * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the @@ -418,7 +418,7 @@ void ftrace_modify_all_code(int command); #endif #ifndef FTRACE_REGS_ADDR -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) #else # define FTRACE_REGS_ADDR FTRACE_ADDR @@ -480,7 +480,7 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS /** * ftrace_modify_call - convert from one addr to another (no nop) * @rec: the mcount call site record diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index a3d489531d83..13a54d0bdfa8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -49,7 +49,6 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; - int padding; }; #define FTRACE_MAX_EVENT \ @@ -84,6 +83,9 @@ struct trace_iterator { long idx; cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; }; enum trace_iter_flags { @@ -272,7 +274,7 @@ extern int trace_define_field(struct ftrace_event_call *call, const char *type, extern int trace_add_event_call(struct ftrace_event_call *call); extern void trace_remove_event_call(struct ftrace_event_call *call); -#define is_signed_type(type) (((type)(-1)) < 0) +#define is_signed_type(type) (((type)(-1)) < (type)0) int trace_set_clr_event(const char *system, const char *event, int set); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 624ef3f45c8e..29eb805ea4a6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void); */ #define __irq_enter() \ do { \ - vtime_account_irq_enter(current); \ + account_irq_enter_time(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -169,7 +169,7 @@ extern void irq_enter(void); #define __irq_exit() \ do { \ trace_hardirq_exit(); \ - vtime_account_irq_exit(current); \ + account_irq_exit_time(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) @@ -180,10 +180,10 @@ extern void irq_exit(void); #define nmi_enter() \ do { \ + lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ - lockdep_off(); \ rcu_nmi_enter(); \ trace_hardirq_enter(); \ } while (0) @@ -192,10 +192,10 @@ extern void irq_exit(void); do { \ trace_hardirq_exit(); \ rcu_nmi_exit(); \ - lockdep_on(); \ BUG_ON(!in_nmi()); \ sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ + lockdep_on(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6d087c5f57f7..5cd0f0949927 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -10,7 +10,9 @@ #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/securebits.h> +#include <linux/seqlock.h> #include <net/net_namespace.h> +#include <linux/sched/rt.h> #ifdef CONFIG_SMP # define INIT_PUSHABLE_TASKS(tsk) \ @@ -141,6 +143,15 @@ extern struct task_group root_task_group; # define INIT_PERF_EVENTS(tsk) #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# define INIT_VTIME(tsk) \ + .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ + .vtime_snap = 0, \ + .vtime_snap_whence = VTIME_SYS, +#else +# define INIT_VTIME(tsk) +#endif + #define INIT_TASK_COMM "swapper" /* @@ -210,6 +221,7 @@ extern struct task_group root_task_group; INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ INIT_CPUSET_SEQ \ + INIT_VTIME(tsk) \ } diff --git a/include/linux/irq.h b/include/linux/irq.h index fdf2c4a238cc..bc4e06611958 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -509,8 +509,11 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq) /* Handle dynamic irq creation and destruction */ extern unsigned int create_irq_nr(unsigned int irq_want, int node); +extern unsigned int __create_irqs(unsigned int from, unsigned int count, + int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); +extern void destroy_irqs(unsigned int irq, unsigned int count); /* * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and @@ -528,6 +531,8 @@ extern int irq_set_handler_data(unsigned int irq, void *data); extern int irq_set_chip_data(unsigned int irq, void *data); extern int irq_set_irq_type(unsigned int irq, unsigned int type); extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry); +extern int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, + struct msi_desc *entry); extern struct irq_data *irq_get_irq_data(unsigned int irq); static inline struct irq_chip *irq_get_chip(unsigned int irq) @@ -590,6 +595,9 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, #define irq_alloc_desc_from(from, node) \ irq_alloc_descs(-1, from, 1, node) +#define irq_alloc_descs_from(from, cnt, node) \ + irq_alloc_descs(-1, from, cnt, node) + void irq_free_descs(unsigned int irq, unsigned int cnt); int irq_reserve_irqs(unsigned int from, unsigned int cnt); diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6a9e8f5399e2..f5dbce50466e 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -3,6 +3,20 @@ #include <linux/llist.h> +/* + * An entry can be in one of four states: + * + * free NULL, 0 -> {claimed} : free to be used + * claimed NULL, 3 -> {pending} : claimed to be enqueued + * pending next, 3 -> {busy} : queued, pending callback + * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed + */ + +#define IRQ_WORK_PENDING 1UL +#define IRQ_WORK_BUSY 2UL +#define IRQ_WORK_FLAGS 3UL +#define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ + struct irq_work { unsigned long flags; struct llist_node llnode; @@ -16,8 +30,14 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) work->func = func; } -bool irq_work_queue(struct irq_work *work); +void irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); +#ifdef CONFIG_IRQ_WORK +bool irq_work_needs_cpu(void); +#else +static bool irq_work_needs_cpu(void) { return false; } +#endif + #endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 66b70780e910..ed5f6ed6eb77 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t) extern void account_steal_time(cputime_t); extern void account_idle_time(cputime_t); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) { vtime_account_user(tsk); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 23755ba42abc..4b6ef4d33cc2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -49,16 +49,6 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 -/* - * If function tracer is enabled and the arch supports full - * passing of pt_regs to function tracing, then kprobes can - * optimize on top of function tracing. - */ -#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \ - && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE) -# define KPROBES_CAN_USE_FTRACE -#endif - /* Attach to insert probes on any functions which should be ignored*/ #define __kprobes __attribute__((__section__(".kprobes.text"))) @@ -316,7 +306,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif #endif /* CONFIG_OPTPROBES */ -#ifdef KPROBES_CAN_USE_FTRACE +#ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c497ab0d03d..b7996a768eb2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -22,6 +22,7 @@ #include <linux/rcupdate.h> #include <linux/ratelimit.h> #include <linux/err.h> +#include <linux/irqflags.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -740,15 +741,52 @@ static inline int kvm_deassign_device(struct kvm *kvm, } #endif /* CONFIG_IOMMU_API */ -static inline void kvm_guest_enter(void) +static inline void __guest_enter(void) { - BUG_ON(preemptible()); /* * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system_irqsafe(current); + vtime_account_system(current); current->flags |= PF_VCPU; +} + +static inline void __guest_exit(void) +{ + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); + current->flags &= ~PF_VCPU; +} + +#ifdef CONFIG_CONTEXT_TRACKING +extern void guest_enter(void); +extern void guest_exit(void); + +#else /* !CONFIG_CONTEXT_TRACKING */ +static inline void guest_enter(void) +{ + __guest_enter(); +} + +static inline void guest_exit(void) +{ + __guest_exit(); +} +#endif /* !CONFIG_CONTEXT_TRACKING */ + +static inline void kvm_guest_enter(void) +{ + unsigned long flags; + + BUG_ON(preemptible()); + + local_irq_save(flags); + guest_enter(); + local_irq_restore(flags); + /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspase from rcu point of view. In @@ -761,12 +799,11 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. - */ - vtime_account_system_irqsafe(current); - current->flags &= ~PF_VCPU; + unsigned long flags; + + local_irq_save(flags); + guest_exit(); + local_irq_restore(flags); } /* diff --git a/include/linux/pci.h b/include/linux/pci.h index 15472d691ee6..6fa4dd2a3b9e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1101,6 +1101,12 @@ static inline int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec) return -1; } +static inline int +pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec) +{ + return -1; +} + static inline void pci_msi_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msi(struct pci_dev *dev) @@ -1132,6 +1138,7 @@ static inline int pci_msi_enabled(void) } #else extern int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec); +extern int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec); extern void pci_msi_shutdown(struct pci_dev *dev); extern void pci_disable_msi(struct pci_dev *dev); extern int pci_msix_table_size(struct pci_dev *dev); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 6bfb2faa0b19..e47ee462c2f2 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -135,16 +135,21 @@ struct hw_perf_event { struct { /* software */ struct hrtimer hrtimer; }; + struct { /* tracepoint */ + struct task_struct *tp_target; + /* for tp_event->class */ + struct list_head tp_list; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ - struct arch_hw_breakpoint info; - struct list_head bp_list; /* * Crufty hack to avoid the chicken and egg * problem hw_breakpoint has with context * creation and event initalization. */ struct task_struct *bp_target; + struct arch_hw_breakpoint info; + struct list_head bp_list; }; #endif }; @@ -817,6 +822,17 @@ do { \ } while (0) +struct perf_pmu_events_attr { + struct device_attribute attr; + u64 id; +}; + +#define PMU_EVENT_ATTR(_name, _var, _id, _show) \ +static struct perf_pmu_events_attr _var = { \ + .attr = __ATTR(_name, 0444, _show, NULL), \ + .id = _id, \ +}; + #define PMU_FORMAT_ATTR(_name, _format) \ static ssize_t \ _name##_show(struct device *dev, \ diff --git a/include/linux/printk.h b/include/linux/printk.h index 9afc01e5a0a6..86c4b6294713 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -98,9 +98,6 @@ int no_printk(const char *fmt, ...) extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); -extern int printk_needs_cpu(int cpu); -extern void printk_tick(void); - #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) int vprintk_emit(int facility, int level, diff --git a/include/linux/profile.h b/include/linux/profile.h index a0fc32279fc0..21123902366d 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -82,9 +82,6 @@ int task_handoff_unregister(struct notifier_block * n); int profile_event_register(enum profile_type, struct notifier_block * n); int profile_event_unregister(enum profile_type, struct notifier_block * n); -int register_timer_hook(int (*hook)(struct pt_regs *)); -void unregister_timer_hook(int (*hook)(struct pt_regs *)); - struct pt_regs; #else @@ -135,16 +132,6 @@ static inline int profile_event_unregister(enum profile_type t, struct notifier_ #define profile_handoff_task(a) (0) #define profile_munmap(a) do { } while (0) -static inline int register_timer_hook(int (*hook)(struct pt_regs *)) -{ - return -ENOSYS; -} - -static inline void unregister_timer_hook(int (*hook)(struct pt_regs *)) -{ - return; -} - #endif /* CONFIG_PROFILING */ #endif /* _LINUX_PROFILE_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 275aa3f1062d..b758ce17b309 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -53,7 +53,10 @@ extern int rcutorture_runnable; /* for sysctl */ extern void rcutorture_record_test_transition(void); extern void rcutorture_record_progress(unsigned long vernum); extern void do_trace_rcu_torture_read(char *rcutorturename, - struct rcu_head *rhp); + struct rcu_head *rhp, + unsigned long secs, + unsigned long c_old, + unsigned long c); #else static inline void rcutorture_record_test_transition(void) { @@ -63,9 +66,13 @@ static inline void rcutorture_record_progress(unsigned long vernum) } #ifdef CONFIG_RCU_TRACE extern void do_trace_rcu_torture_read(char *rcutorturename, - struct rcu_head *rhp); + struct rcu_head *rhp, + unsigned long secs, + unsigned long c_old, + unsigned long c); #else -#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ + do { } while (0) #endif #endif @@ -749,7 +756,7 @@ static inline void rcu_preempt_sleep_check(void) * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may * be preempted, but explicit blocking is illegal. Finally, in preemptible - * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, + * RCU implementations in real-time (with -rt patchset) kernel builds, * RCU read-side critical sections may be preempted and they may also * block, but only when acquiring spinlocks that are subject to priority * inheritance. diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 519777e3fa01..1342e69542f3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -167,6 +167,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu); void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 9531845c419f..11d05f9fe8b6 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -138,6 +138,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); +extern int rtc_set_ntp_time(struct timespec now); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); diff --git a/include/linux/sched.h b/include/linux/sched.h index d2112477ff5e..33cc42130371 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -304,19 +304,6 @@ static inline void lockup_detector_init(void) } #endif -#ifdef CONFIG_DETECT_HUNG_TASK -extern unsigned int sysctl_hung_task_panic; -extern unsigned long sysctl_hung_task_check_count; -extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_warnings; -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -#else -/* Avoid need for ifdefs elsewhere in the code */ -enum { sysctl_hung_task_timeout_secs = 0 }; -#endif - /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) @@ -338,23 +325,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); struct nsproxy; struct user_namespace; -/* - * Default maximum number of active map areas, this limits the number of vmas - * per mm struct. Users can overwrite this number by sysctl but there is a - * problem. - * - * When a program's coredump is generated as ELF format, a section is created - * per a vma. In ELF, the number of sections is represented in unsigned short. - * This means the number of sections should be smaller than 65535 at coredump. - * Because the kernel adds some informative sections to a image of program at - * generating coredump, we need some margin. The number of extra sections is - * 1-3 now and depends on arch. We use "5" as safe margin, here. - */ -#define MAPCOUNT_ELF_CORE_MARGIN (5) -#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) - -extern int sysctl_max_map_count; - #include <linux/aio.h> #ifdef CONFIG_MMU @@ -1194,6 +1164,7 @@ struct sched_entity { /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; #endif + /* * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be * removed when useful for applications beyond shares distribution (e.g. @@ -1208,6 +1179,7 @@ struct sched_entity { struct sched_rt_entity { struct list_head run_list; unsigned long timeout; + unsigned long watchdog_stamp; unsigned int time_slice; struct sched_rt_entity *back; @@ -1220,11 +1192,6 @@ struct sched_rt_entity { #endif }; -/* - * default timeslice is 100 msecs (used only for SCHED_RR tasks). - * Timeslices get refilled after they expire. - */ -#define RR_TIMESLICE (100 * HZ / 1000) struct rcu_node; @@ -1368,6 +1335,15 @@ struct task_struct { #ifndef CONFIG_VIRT_CPU_ACCOUNTING struct cputime prev_cputime; #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqlock_t vtime_seqlock; + unsigned long long vtime_snap; + enum { + VTIME_SLEEPING = 0, + VTIME_USER, + VTIME_SYS, + } vtime_snap_whence; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ @@ -1622,37 +1598,6 @@ static inline void set_numabalancing_state(bool enabled) } #endif -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH - * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority - * values are inverted: lower p->prio value means higher priority. - * - * The MAX_USER_RT_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - */ - -#define MAX_USER_RT_PRIO 100 -#define MAX_RT_PRIO MAX_USER_RT_PRIO - -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) - -static inline int rt_prio(int prio) -{ - if (unlikely(prio < MAX_RT_PRIO)) - return 1; - return 0; -} - -static inline int rt_task(struct task_struct *p) -{ - return rt_prio(p->prio); -} - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -1792,6 +1737,37 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime); +extern void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, cputime_t *stimescaled); +extern cputime_t task_gtime(struct task_struct *t); +#else +static inline void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime) +{ + if (utime) + *utime = t->utime; + if (stime) + *stime = t->stime; +} + +static inline void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, + cputime_t *stimescaled) +{ + if (utimescaled) + *utimescaled = t->utimescaled; + if (stimescaled) + *stimescaled = t->stimescaled; +} + +static inline cputime_t task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); @@ -2033,58 +2009,7 @@ extern void wake_up_idle_cpu(int cpu); static inline void wake_up_idle_cpu(int cpu) { } #endif -extern unsigned int sysctl_sched_latency; -extern unsigned int sysctl_sched_min_granularity; -extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_child_runs_first; - -enum sched_tunable_scaling { - SCHED_TUNABLESCALING_NONE, - SCHED_TUNABLESCALING_LOG, - SCHED_TUNABLESCALING_LINEAR, - SCHED_TUNABLESCALING_END, -}; -extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; - -extern unsigned int sysctl_numa_balancing_scan_delay; -extern unsigned int sysctl_numa_balancing_scan_period_min; -extern unsigned int sysctl_numa_balancing_scan_period_max; -extern unsigned int sysctl_numa_balancing_scan_period_reset; -extern unsigned int sysctl_numa_balancing_scan_size; -extern unsigned int sysctl_numa_balancing_settle_count; - -#ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_time_avg; -extern unsigned int sysctl_timer_migration; -extern unsigned int sysctl_sched_shares_window; - -int sched_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, - loff_t *ppos); -#endif -#ifdef CONFIG_SCHED_DEBUG -static inline unsigned int get_sysctl_timer_migration(void) -{ - return sysctl_timer_migration; -} -#else -static inline unsigned int get_sysctl_timer_migration(void) -{ - return 1; -} -#endif -extern unsigned int sysctl_sched_rt_period; -extern int sysctl_sched_rt_runtime; - -int sched_rt_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - #ifdef CONFIG_SCHED_AUTOGROUP -extern unsigned int sysctl_sched_autogroup_enabled; - extern void sched_autogroup_create_attach(struct task_struct *p); extern void sched_autogroup_detach(struct task_struct *p); extern void sched_autogroup_fork(struct signal_struct *sig); @@ -2100,30 +2025,6 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { } static inline void sched_autogroup_exit(struct signal_struct *sig) { } #endif -#ifdef CONFIG_CFS_BANDWIDTH -extern unsigned int sysctl_sched_cfs_bandwidth_slice; -#endif - -#ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern void rt_mutex_adjust_pi(struct task_struct *p); -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return tsk->pi_blocked_on != NULL; -} -#else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} -# define rt_mutex_adjust_pi(p) do { } while (0) -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return false; -} -#endif - extern bool yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); @@ -2753,8 +2654,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); -extern void normalize_rt_tasks(void); - #ifdef CONFIG_CGROUP_SCHED extern struct task_group root_task_group; diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h new file mode 100644 index 000000000000..94e19ea28fc3 --- /dev/null +++ b/include/linux/sched/rt.h @@ -0,0 +1,58 @@ +#ifndef _SCHED_RT_H +#define _SCHED_RT_H + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH + * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority + * values are inverted: lower p->prio value means higher priority. + * + * The MAX_USER_RT_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + */ + +#define MAX_USER_RT_PRIO 100 +#define MAX_RT_PRIO MAX_USER_RT_PRIO + +#define MAX_PRIO (MAX_RT_PRIO + 40) +#define DEFAULT_PRIO (MAX_RT_PRIO + 20) + +static inline int rt_prio(int prio) +{ + if (unlikely(prio < MAX_RT_PRIO)) + return 1; + return 0; +} + +static inline int rt_task(struct task_struct *p) +{ + return rt_prio(p->prio); +} + +#ifdef CONFIG_RT_MUTEXES +extern int rt_mutex_getprio(struct task_struct *p); +extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern void rt_mutex_adjust_pi(struct task_struct *p); +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return tsk->pi_blocked_on != NULL; +} +#else +static inline int rt_mutex_getprio(struct task_struct *p) +{ + return p->normal_prio; +} +# define rt_mutex_adjust_pi(p) do { } while (0) +static inline bool tsk_is_pi_blocked(struct task_struct *tsk) +{ + return false; +} +#endif + +extern void normalize_rt_tasks(void); + + +#endif /* _SCHED_RT_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h new file mode 100644 index 000000000000..d2bb0ae979d0 --- /dev/null +++ b/include/linux/sched/sysctl.h @@ -0,0 +1,110 @@ +#ifndef _SCHED_SYSCTL_H +#define _SCHED_SYSCTL_H + +#ifdef CONFIG_DETECT_HUNG_TASK +extern unsigned int sysctl_hung_task_panic; +extern unsigned long sysctl_hung_task_check_count; +extern unsigned long sysctl_hung_task_timeout_secs; +extern unsigned long sysctl_hung_task_warnings; +extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +#else +/* Avoid need for ifdefs elsewhere in the code */ +enum { sysctl_hung_task_timeout_secs = 0 }; +#endif + +/* + * Default maximum number of active map areas, this limits the number of vmas + * per mm struct. Users can overwrite this number by sysctl but there is a + * problem. + * + * When a program's coredump is generated as ELF format, a section is created + * per a vma. In ELF, the number of sections is represented in unsigned short. + * This means the number of sections should be smaller than 65535 at coredump. + * Because the kernel adds some informative sections to a image of program at + * generating coredump, we need some margin. The number of extra sections is + * 1-3 now and depends on arch. We use "5" as safe margin, here. + */ +#define MAPCOUNT_ELF_CORE_MARGIN (5) +#define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) + +extern int sysctl_max_map_count; + +extern unsigned int sysctl_sched_latency; +extern unsigned int sysctl_sched_min_granularity; +extern unsigned int sysctl_sched_wakeup_granularity; +extern unsigned int sysctl_sched_child_runs_first; + +enum sched_tunable_scaling { + SCHED_TUNABLESCALING_NONE, + SCHED_TUNABLESCALING_LOG, + SCHED_TUNABLESCALING_LINEAR, + SCHED_TUNABLESCALING_END, +}; +extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; + +extern unsigned int sysctl_numa_balancing_scan_delay; +extern unsigned int sysctl_numa_balancing_scan_period_min; +extern unsigned int sysctl_numa_balancing_scan_period_max; +extern unsigned int sysctl_numa_balancing_scan_period_reset; +extern unsigned int sysctl_numa_balancing_scan_size; +extern unsigned int sysctl_numa_balancing_settle_count; + +#ifdef CONFIG_SCHED_DEBUG +extern unsigned int sysctl_sched_migration_cost; +extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_time_avg; +extern unsigned int sysctl_timer_migration; +extern unsigned int sysctl_sched_shares_window; + +int sched_proc_update_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, + loff_t *ppos); +#endif +#ifdef CONFIG_SCHED_DEBUG +static inline unsigned int get_sysctl_timer_migration(void) +{ + return sysctl_timer_migration; +} +#else +static inline unsigned int get_sysctl_timer_migration(void) +{ + return 1; +} +#endif + +/* + * control realtime throttling: + * + * /proc/sys/kernel/sched_rt_period_us + * /proc/sys/kernel/sched_rt_runtime_us + */ +extern unsigned int sysctl_sched_rt_period; +extern int sysctl_sched_rt_runtime; + +#ifdef CONFIG_CFS_BANDWIDTH +extern unsigned int sysctl_sched_cfs_bandwidth_slice; +#endif + +#ifdef CONFIG_SCHED_AUTOGROUP +extern unsigned int sysctl_sched_autogroup_enabled; +#endif + +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + +extern int sched_rr_timeslice; + +extern int sched_rr_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +extern int sched_rt_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + +#endif /* _SCHED_SYSCTL_H */ diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index e0106d8581d3..c65dee059913 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -14,6 +14,8 @@ struct smpboot_thread_data; * @thread_should_run: Check whether the thread should run or not. Called with * preemption disabled. * @thread_fn: The associated thread function + * @create: Optional setup function, called when the thread gets + * created (Not called from the thread context) * @setup: Optional setup function, called when the thread gets * operational the first time * @cleanup: Optional cleanup function, called when the thread @@ -22,6 +24,7 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) + * @selfparking: Thread is not parked by the park function. * @thread_comm: The base name of the thread */ struct smp_hotplug_thread { @@ -29,10 +32,12 @@ struct smp_hotplug_thread { struct list_head list; int (*thread_should_run)(unsigned int cpu); void (*thread_fn)(unsigned int cpu); + void (*create)(unsigned int cpu); void (*setup)(unsigned int cpu); void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); + bool selfparking; const char *thread_comm; }; diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 6eb691b08358..04f4121a23ae 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -151,30 +151,14 @@ void srcu_barrier(struct srcu_struct *sp); * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. * - * Note that if the CPU is in the idle loop from an RCU point of view - * (ie: that we are in the section between rcu_idle_enter() and - * rcu_idle_exit()) then srcu_read_lock_held() returns false even if - * the CPU did an srcu_read_lock(). The reason for this is that RCU - * ignores CPUs that are in such a section, considering these as in - * extended quiescent state, so such a CPU is effectively never in an - * RCU read-side critical section regardless of what RCU primitives it - * invokes. This state of affairs is required --- we need to keep an - * RCU-free window in idle where the CPU may possibly enter into low - * power mode. This way we can notice an extended quiescent state to - * other CPUs that started a grace period. Otherwise we would delay any - * grace period as long as we run in the idle task. - * - * Similarly, we avoid claiming an SRCU read lock held if the current - * CPU is offline. + * Note that SRCU is based on its own statemachine and it doesn't + * relies on normal RCU, it can be called from the CPU which + * is in the idle loop from an RCU point of view or offline. */ static inline int srcu_read_lock_held(struct srcu_struct *sp) { if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) - return 0; - if (!rcu_lockdep_current_cpu_online()) - return 0; return lock_is_held(&sp->dep_map); } @@ -236,8 +220,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) int retval = __srcu_read_lock(sp); rcu_lock_acquire(&(sp)->dep_map); - rcu_lockdep_assert(!rcu_is_cpu_idle(), - "srcu_read_lock() used illegally while idle"); return retval; } @@ -251,8 +233,6 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp) { - rcu_lockdep_assert(!rcu_is_cpu_idle(), - "srcu_read_unlock() used illegally while idle"); rcu_lock_release(&(sp)->dep_map); __srcu_read_unlock(sp, idx); } diff --git a/include/linux/tick.h b/include/linux/tick.h index 1a6567b48492..553272e6af55 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -8,6 +8,8 @@ #include <linux/clockchips.h> #include <linux/irqflags.h> +#include <linux/percpu.h> +#include <linux/hrtimer.h> #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -122,13 +124,26 @@ static inline int tick_oneshot_mode_active(void) { return 0; } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ # ifdef CONFIG_NO_HZ +DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched); + +static inline int tick_nohz_tick_stopped(void) +{ + return __this_cpu_read(tick_cpu_sched.tick_stopped); +} + extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -# else + +# else /* !CONFIG_NO_HZ */ +static inline int tick_nohz_tick_stopped(void) +{ + return 0; +} + static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } diff --git a/include/linux/time.h b/include/linux/time.h index 4d358e9d10f1..a3ab6a814a9c 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -115,8 +115,20 @@ static inline bool timespec_valid_strict(const struct timespec *ts) return true; } +extern bool persistent_clock_exist; + +#ifdef ALWAYS_USE_PERSISTENT_CLOCK +#define has_persistent_clock() true +#else +static inline bool has_persistent_clock(void) +{ + return persistent_clock_exist; +} +#endif + extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); +extern int persistent_clock_is_local; extern int update_persistent_clock(struct timespec now); void timekeeping_init(void); extern int timekeeping_suspended; @@ -158,6 +170,7 @@ extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); extern unsigned int alarm_setitimer(unsigned int seconds); extern int do_getitimer(int which, struct itimerval *value); +extern int __getnstimeofday(struct timespec *tv); extern void getnstimeofday(struct timespec *tv); extern void getrawmonotonic(struct timespec *ts); extern void getnstime_raw_and_real(struct timespec *ts_raw, diff --git a/include/linux/tsacct_kern.h b/include/linux/tsacct_kern.h index 44893e5ec8f7..3251965bf4cc 100644 --- a/include/linux/tsacct_kern.h +++ b/include/linux/tsacct_kern.h @@ -23,12 +23,15 @@ static inline void bacct_add_tsk(struct user_namespace *user_ns, #ifdef CONFIG_TASK_XACCT extern void xacct_add_tsk(struct taskstats *stats, struct task_struct *p); extern void acct_update_integrals(struct task_struct *tsk); +extern void acct_account_cputime(struct task_struct *tsk); extern void acct_clear_integrals(struct task_struct *tsk); #else static inline void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) {} static inline void acct_update_integrals(struct task_struct *tsk) {} +static inline void acct_account_cputime(struct task_struct *tsk) +{} static inline void acct_clear_integrals(struct task_struct *tsk) {} #endif /* CONFIG_TASK_XACCT */ diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 4f628a6fc5b4..02b83db8e2c5 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -35,13 +35,20 @@ struct inode; # include <asm/uprobes.h> #endif +#define UPROBE_HANDLER_REMOVE 1 +#define UPROBE_HANDLER_MASK 1 + +enum uprobe_filter_ctx { + UPROBE_FILTER_REGISTER, + UPROBE_FILTER_UNREGISTER, + UPROBE_FILTER_MMAP, +}; + struct uprobe_consumer { int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); - /* - * filter is optional; If a filter exists, handler is run - * if and only if filter returns true. - */ - bool (*filter)(struct uprobe_consumer *self, struct task_struct *task); + bool (*filter)(struct uprobe_consumer *self, + enum uprobe_filter_ctx ctx, + struct mm_struct *mm); struct uprobe_consumer *next; }; @@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); +extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); @@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { return -ENOSYS; } +static inline int +uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add) +{ + return -ENOSYS; +} static inline void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) { diff --git a/include/linux/vtime.h b/include/linux/vtime.h index ae30ab58431a..71a5782d8c59 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -6,15 +6,46 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -extern void vtime_account(struct task_struct *tsk); -#else +extern void vtime_account_irq_enter(struct task_struct *tsk); + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +static inline bool vtime_accounting_enabled(void) { return true; } +#endif + +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ + static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } -static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } -static inline void vtime_account(struct task_struct *tsk) { } +static inline void vtime_account_user(struct task_struct *tsk) { } +static inline void vtime_account_irq_enter(struct task_struct *tsk) { } +static inline bool vtime_accounting_enabled(void) { return false; } +#endif + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void arch_vtime_task_switch(struct task_struct *tsk); +extern void vtime_account_irq_exit(struct task_struct *tsk); +extern bool vtime_accounting_enabled(void); +extern void vtime_user_enter(struct task_struct *tsk); +static inline void vtime_user_exit(struct task_struct *tsk) +{ + vtime_account_user(tsk); +} +extern void vtime_guest_enter(struct task_struct *tsk); +extern void vtime_guest_exit(struct task_struct *tsk); +extern void vtime_init_idle(struct task_struct *tsk); +#else +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ + /* On hard|softirq exit we always account to hard|softirq cputime */ + vtime_account_system(tsk); +} +static inline void vtime_user_enter(struct task_struct *tsk) { } +static inline void vtime_user_exit(struct task_struct *tsk) { } +static inline void vtime_guest_enter(struct task_struct *tsk) { } +static inline void vtime_guest_exit(struct task_struct *tsk) { } +static inline void vtime_init_idle(struct task_struct *tsk) { } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -23,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk); static inline void irqtime_account_irq(struct task_struct *tsk) { } #endif -static inline void vtime_account_irq_enter(struct task_struct *tsk) +static inline void account_irq_enter_time(struct task_struct *tsk) { - /* - * Hardirq can interrupt idle task anytime. So we need vtime_account() - * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING. - * Softirq can also interrupt idle task directly if it calls - * local_bh_enable(). Such case probably don't exist but we never know. - * Ksoftirqd is not concerned because idle time is flushed on context - * switch. Softirqs in the end of hardirqs are also not a problem because - * the idle time is flushed on hardirq time already. - */ - vtime_account(tsk); + vtime_account_irq_enter(tsk); irqtime_account_irq(tsk); } -static inline void vtime_account_irq_exit(struct task_struct *tsk) +static inline void account_irq_exit_time(struct task_struct *tsk) { - /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_system(tsk); + vtime_account_irq_exit(tsk); irqtime_account_irq(tsk); } diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h new file mode 100644 index 000000000000..88b878383797 --- /dev/null +++ b/include/trace/events/ras.h @@ -0,0 +1,77 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ras + +#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_AER_H + +#include <linux/tracepoint.h> +#include <linux/edac.h> + + +/* + * PCIe AER Trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event on a PCIe device. The event report has + * the following structure: + * + * char * dev_name - The name of the slot where the device resides + * ([domain:]bus:device.function). + * u32 status - Either the correctable or uncorrectable register + * indicating what error or errors have been seen + * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED + */ + +#define aer_correctable_errors \ + {BIT(0), "Receiver Error"}, \ + {BIT(6), "Bad TLP"}, \ + {BIT(7), "Bad DLLP"}, \ + {BIT(8), "RELAY_NUM Rollover"}, \ + {BIT(12), "Replay Timer Timeout"}, \ + {BIT(13), "Advisory Non-Fatal"} + +#define aer_uncorrectable_errors \ + {BIT(4), "Data Link Protocol"}, \ + {BIT(12), "Poisoned TLP"}, \ + {BIT(13), "Flow Control Protocol"}, \ + {BIT(14), "Completion Timeout"}, \ + {BIT(15), "Completer Abort"}, \ + {BIT(16), "Unexpected Completion"}, \ + {BIT(17), "Receiver Overflow"}, \ + {BIT(18), "Malformed TLP"}, \ + {BIT(19), "ECRC"}, \ + {BIT(20), "Unsupported Request"} + +TRACE_EVENT(aer_event, + TP_PROTO(const char *dev_name, + const u32 status, + const u8 severity), + + TP_ARGS(dev_name, status, severity), + + TP_STRUCT__entry( + __string( dev_name, dev_name ) + __field( u32, status ) + __field( u8, severity ) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + __entry->severity = severity; + ), + + TP_printk("%s PCIe Bus Error: severity=%s, %s\n", + __get_str(dev_name), + __entry->severity == HW_EVENT_ERR_CORRECTED ? "Corrected" : + __entry->severity == HW_EVENT_ERR_FATAL ? + "Fatal" : "Uncorrected", + __entry->severity == HW_EVENT_ERR_CORRECTED ? + __print_flags(__entry->status, "|", aer_correctable_errors) : + __print_flags(__entry->status, "|", aer_uncorrectable_errors)) +); + +#endif /* _TRACE_AER_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d4f559b1ec34..1918e832da4f 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -44,8 +44,10 @@ TRACE_EVENT(rcu_utilization, * of a new grace period or the end of an old grace period ("cpustart" * and "cpuend", respectively), a CPU passing through a quiescent * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" - * and "cpuofl", respectively), and a CPU being kicked for being too - * long in dyntick-idle mode ("kick"). + * and "cpuofl", respectively), a CPU being kicked for being too + * long in dyntick-idle mode ("kick"), a CPU accelerating its new + * callbacks to RCU_NEXT_READY_TAIL ("AccReadyCB"), and a CPU + * accelerating its new callbacks to RCU_WAIT_TAIL ("AccWaitCB"). */ TRACE_EVENT(rcu_grace_period, @@ -393,7 +395,7 @@ TRACE_EVENT(rcu_kfree_callback, */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit), + TP_PROTO(char *rcuname, long qlen_lazy, long qlen, long blimit), TP_ARGS(rcuname, qlen_lazy, qlen, blimit), @@ -401,7 +403,7 @@ TRACE_EVENT(rcu_batch_start, __field(char *, rcuname) __field(long, qlen_lazy) __field(long, qlen) - __field(int, blimit) + __field(long, blimit) ), TP_fast_assign( @@ -411,7 +413,7 @@ TRACE_EVENT(rcu_batch_start, __entry->blimit = blimit; ), - TP_printk("%s CBs=%ld/%ld bl=%d", + TP_printk("%s CBs=%ld/%ld bl=%ld", __entry->rcuname, __entry->qlen_lazy, __entry->qlen, __entry->blimit) ); @@ -523,22 +525,30 @@ TRACE_EVENT(rcu_batch_end, */ TRACE_EVENT(rcu_torture_read, - TP_PROTO(char *rcutorturename, struct rcu_head *rhp), + TP_PROTO(char *rcutorturename, struct rcu_head *rhp, + unsigned long secs, unsigned long c_old, unsigned long c), - TP_ARGS(rcutorturename, rhp), + TP_ARGS(rcutorturename, rhp, secs, c_old, c), TP_STRUCT__entry( __field(char *, rcutorturename) __field(struct rcu_head *, rhp) + __field(unsigned long, secs) + __field(unsigned long, c_old) + __field(unsigned long, c) ), TP_fast_assign( __entry->rcutorturename = rcutorturename; __entry->rhp = rhp; + __entry->secs = secs; + __entry->c_old = c_old; + __entry->c = c; ), - TP_printk("%s torture read %p", - __entry->rcutorturename, __entry->rhp) + TP_printk("%s torture read %p %luus c: %lu %lu", + __entry->rcutorturename, __entry->rhp, + __entry->secs, __entry->c_old, __entry->c) ); /* @@ -608,7 +618,8 @@ TRACE_EVENT(rcu_barrier, #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ do { } while (0) -#define trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ + do { } while (0) #define trace_rcu_barrier(name, s, cpu, cnt, done) do { } while (0) #endif /* #else #ifdef CONFIG_RCU_TRACE */ diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h index 77cdba9df274..bb991dfe134f 100644 --- a/include/uapi/linux/auto_fs.h +++ b/include/uapi/linux/auto_fs.h @@ -28,25 +28,16 @@ #define AUTOFS_MIN_PROTO_VERSION AUTOFS_PROTO_VERSION /* - * Architectures where both 32- and 64-bit binaries can be executed - * on 64-bit kernels need this. This keeps the structure format - * uniform, and makes sure the wait_queue_token isn't too big to be - * passed back down to the kernel. - * - * This assumes that on these architectures: - * mode 32 bit 64 bit - * ------------------------- - * int 32 bit 32 bit - * long 32 bit 64 bit - * - * If so, 32-bit user-space code should be backwards compatible. + * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed + * back to the kernel via ioctl from userspace. On architectures where 32- and + * 64-bit userspace binaries can be executed it's important that the size of + * autofs_wqt_t stays constant between 32- and 64-bit Linux kernels so that we + * do not break the binary ABI interface by changing the structure size. */ - -#if defined(__sparc__) || defined(__mips__) || defined(__x86_64__) \ - || defined(__powerpc__) || defined(__s390__) -typedef unsigned int autofs_wqt_t; -#else +#if defined(__ia64__) || defined(__alpha__) /* pure 64bit architectures */ typedef unsigned long autofs_wqt_t; +#else +typedef unsigned int autofs_wqt_t; #endif /* Packet types */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4f63c05d27c9..9fa9c622a7f4 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -579,7 +579,8 @@ enum perf_event_type { * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW * - * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * { u64 nr; + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK * * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER diff --git a/init/Kconfig b/init/Kconfig index be8b7f55312d..7000d9657402 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -20,12 +20,8 @@ config CONSTRUCTORS bool depends on !UML -config HAVE_IRQ_WORK - bool - config IRQ_WORK bool - depends on HAVE_IRQ_WORK config BUILDTIME_EXTABLE_SORT bool @@ -326,10 +322,13 @@ source "kernel/time/Kconfig" menu "CPU/Task time and stats accounting" +config VIRT_CPU_ACCOUNTING + bool + choice prompt "Cputime accounting" default TICK_CPU_ACCOUNTING if !PPC64 - default VIRT_CPU_ACCOUNTING if PPC64 + default VIRT_CPU_ACCOUNTING_NATIVE if PPC64 # Kind of a stub config for the pure tick based cputime accounting config TICK_CPU_ACCOUNTING @@ -342,9 +341,10 @@ config TICK_CPU_ACCOUNTING If unsure, say Y. -config VIRT_CPU_ACCOUNTING +config VIRT_CPU_ACCOUNTING_NATIVE bool "Deterministic task and CPU time accounting" depends on HAVE_VIRT_CPU_ACCOUNTING + select VIRT_CPU_ACCOUNTING help Select this option to enable more accurate task and CPU time accounting. This is done by reading a CPU counter on each @@ -354,6 +354,23 @@ config VIRT_CPU_ACCOUNTING this also enables accounting of stolen time on logically-partitioned systems. +config VIRT_CPU_ACCOUNTING_GEN + bool "Full dynticks CPU time accounting" + depends on HAVE_CONTEXT_TRACKING && 64BIT + select VIRT_CPU_ACCOUNTING + select CONTEXT_TRACKING + help + Select this option to enable task and CPU time accounting on full + dynticks systems. This accounting is implemented by watching every + kernel-user boundaries using the context tracking subsystem. + The accounting is thus performed at the expense of some significant + overhead. + + For now this is only useful if you are working on the full + dynticks subsystem development. + + If unsure, say N. + config IRQ_TIME_ACCOUNTING bool "Fine granularity task level IRQ time accounting" depends on HAVE_IRQ_TIME_ACCOUNTING @@ -453,7 +470,7 @@ config TREE_RCU config TREE_PREEMPT_RCU bool "Preemptible tree-based hierarchical RCU" - depends on PREEMPT && SMP + depends on PREEMPT help This option selects the RCU implementation that is designed for very large SMP systems with hundreds or @@ -461,6 +478,8 @@ config TREE_PREEMPT_RCU is also required. It also scales down nicely to smaller systems. + Select this option if you are unsure. + config TINY_RCU bool "UP-only small-memory-footprint RCU" depends on !PREEMPT && !SMP @@ -486,6 +505,14 @@ config PREEMPT_RCU This option enables preemptible-RCU code that is common between the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. +config RCU_STALL_COMMON + def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE ) + help + This option enables RCU CPU stall code that is common between + the TINY and TREE variants of RCU. The purpose is to allow + the tiny variants to disable RCU CPU stall warnings, while + making these warnings mandatory for the tree variants. + config CONTEXT_TRACKING bool @@ -1263,6 +1290,7 @@ config HOTPLUG config PRINTK default y bool "Enable support for printk" if EXPERT + select IRQ_WORK help This option enables normal printk support. Removing it eliminates most of the message strings from the kernel image diff --git a/init/init_task.c b/init/init_task.c index 8b2f3996b035..ba0a7f362d9e 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -2,6 +2,8 @@ #include <linux/export.h> #include <linux/mqueue.h> #include <linux/sched.h> +#include <linux/sched/sysctl.h> +#include <linux/sched/rt.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/mm.h> diff --git a/kernel/acct.c b/kernel/acct.c index 051e071a06e7..e8b1627ab9c7 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -566,6 +566,7 @@ out: void acct_collect(long exitcode, int group_dead) { struct pacct_struct *pacct = ¤t->signal->pacct; + cputime_t utime, stime; unsigned long vsize = 0; if (group_dead && current->mm) { @@ -593,8 +594,9 @@ void acct_collect(long exitcode, int group_dead) pacct->ac_flag |= ACORE; if (current->flags & PF_SIGNALED) pacct->ac_flag |= AXSIG; - pacct->ac_utime += current->utime; - pacct->ac_stime += current->stime; + task_cputime(current, &utime, &stime); + pacct->ac_utime += utime; + pacct->ac_stime += stime; pacct->ac_minflt += current->min_flt; pacct->ac_majflt += current->maj_flt; spin_unlock_irq(¤t->sighand->siglock); diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index e0e07fd55508..65349f07b878 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -1,29 +1,41 @@ +/* + * Context tracking: Probe on high level context boundaries such as kernel + * and userspace. This includes syscalls and exceptions entry/exit. + * + * This is used by RCU to remove its dependency on the timer tick while a CPU + * runs in userspace. + * + * Started by Frederic Weisbecker: + * + * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> + * + * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton, + * Steven Rostedt, Peter Zijlstra for suggestions and improvements. + * + */ + #include <linux/context_tracking.h> +#include <linux/kvm_host.h> #include <linux/rcupdate.h> #include <linux/sched.h> -#include <linux/percpu.h> #include <linux/hardirq.h> +#include <linux/export.h> -struct context_tracking { - /* - * When active is false, hooks are not set to - * minimize overhead: TIF flags are cleared - * and calls to user_enter/exit are ignored. This - * may be further optimized using static keys. - */ - bool active; - enum { - IN_KERNEL = 0, - IN_USER, - } state; -}; - -static DEFINE_PER_CPU(struct context_tracking, context_tracking) = { +DEFINE_PER_CPU(struct context_tracking, context_tracking) = { #ifdef CONFIG_CONTEXT_TRACKING_FORCE .active = true, #endif }; +/** + * user_enter - Inform the context tracking that the CPU is going to + * enter userspace mode. + * + * This function must be called right before we switch from the kernel + * to userspace, when it's guaranteed the remaining kernel instructions + * to execute won't use any RCU read side critical section because this + * function sets RCU in extended quiescent state. + */ void user_enter(void) { unsigned long flags; @@ -39,40 +51,90 @@ void user_enter(void) if (in_interrupt()) return; + /* Kernel threads aren't supposed to go to userspace */ WARN_ON_ONCE(!current->mm); local_irq_save(flags); if (__this_cpu_read(context_tracking.active) && __this_cpu_read(context_tracking.state) != IN_USER) { - __this_cpu_write(context_tracking.state, IN_USER); + /* + * At this stage, only low level arch entry code remains and + * then we'll run in userspace. We can assume there won't be + * any RCU read-side critical section until the next call to + * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency + * on the tick. + */ + vtime_user_enter(current); rcu_user_enter(); + __this_cpu_write(context_tracking.state, IN_USER); } local_irq_restore(flags); } + +/** + * user_exit - Inform the context tracking that the CPU is + * exiting userspace mode and entering the kernel. + * + * This function must be called after we entered the kernel from userspace + * before any use of RCU read side critical section. This potentially include + * any high level kernel code like syscalls, exceptions, signal handling, etc... + * + * This call supports re-entrancy. This way it can be called from any exception + * handler without needing to know if we came from userspace or not. + */ void user_exit(void) { unsigned long flags; - /* - * Some contexts may involve an exception occuring in an irq, - * leading to that nesting: - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() - * This would mess up the dyntick_nesting count though. And rcu_irq_*() - * helpers are enough to protect RCU uses inside the exception. So - * just return immediately if we detect we are in an IRQ. - */ if (in_interrupt()) return; local_irq_save(flags); if (__this_cpu_read(context_tracking.state) == IN_USER) { - __this_cpu_write(context_tracking.state, IN_KERNEL); + /* + * We are going to run code that may use RCU. Inform + * RCU core about that (ie: we may need the tick again). + */ rcu_user_exit(); + vtime_user_exit(current); + __this_cpu_write(context_tracking.state, IN_KERNEL); } local_irq_restore(flags); } +void guest_enter(void) +{ + if (vtime_accounting_enabled()) + vtime_guest_enter(current); + else + __guest_enter(); +} +EXPORT_SYMBOL_GPL(guest_enter); + +void guest_exit(void) +{ + if (vtime_accounting_enabled()) + vtime_guest_exit(current); + else + __guest_exit(); +} +EXPORT_SYMBOL_GPL(guest_exit); + + +/** + * context_tracking_task_switch - context switch the syscall callbacks + * @prev: the task that is being switched out + * @next: the task that is being switched in + * + * The context tracking uses the syscall slow path to implement its user-kernel + * boundaries probes on syscalls. This way it doesn't impact the syscall fast + * path on CPUs that don't do context tracking. + * + * But we need to clear the flag on the previous task because it may later + * migrate to some CPU that doesn't do the context tracking. As such the TIF + * flag may not be desired there. + */ void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { diff --git a/kernel/cpu.c b/kernel/cpu.c index 3046a503242c..b5e4ab2d427e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -224,11 +224,13 @@ void clear_tasks_mm_cpumask(int cpu) static inline void check_for_tasks(int cpu) { struct task_struct *p; + cputime_t utime, stime; write_lock_irq(&tasklist_lock); for_each_process(p) { + task_cputime(p, &utime, &stime); if (task_cpu(p) == cpu && p->state == TASK_RUNNING && - (p->utime || p->stime)) + (utime || stime)) printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " "(state = %ld, flags = %x)\n", p->comm, task_pid_nr(p), cpu, @@ -254,6 +256,8 @@ static int __ref take_cpu_down(void *_param) return err; cpu_notify(CPU_DYING | param->mod, param->hcpu); + /* Park the stopper thread */ + kthread_park(current); return 0; } diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 418b3f7053aa..d473988c1d0b 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -106,6 +106,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) unsigned long long t2, t3; unsigned long flags; struct timespec ts; + cputime_t utime, stime, stimescaled, utimescaled; /* Though tsk->delays accessed later, early exit avoids * unnecessary returning of other data @@ -114,12 +115,14 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) goto done; tmp = (s64)d->cpu_run_real_total; - cputime_to_timespec(tsk->utime + tsk->stime, &ts); + task_cputime(tsk, &utime, &stime); + cputime_to_timespec(utime + stime, &ts); tmp += timespec_to_ns(&ts); d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; tmp = (s64)d->cpu_scaled_run_real_total; - cputime_to_timespec(tsk->utimescaled + tsk->stimescaled, &ts); + task_cputime_scaled(tsk, &utimescaled, &stimescaled); + cputime_to_timespec(utimescaled + stimescaled, &ts); tmp += timespec_to_ns(&ts); d->cpu_scaled_run_real_total = (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; diff --git a/kernel/events/core.c b/kernel/events/core.c index 7b6646a8c067..5c75791d7269 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6171,11 +6171,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (task) { event->attach_state = PERF_ATTACH_TASK; + + if (attr->type == PERF_TYPE_TRACEPOINT) + event->hw.tp_target = task; #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * hw_breakpoint is a bit difficult here.. */ - if (attr->type == PERF_TYPE_BREAKPOINT) + else if (attr->type == PERF_TYPE_BREAKPOINT) event->hw.bp_target = task; #endif } diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index fe8a916507ed..a64f8aeb5c1f 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -676,7 +676,7 @@ int __init init_hw_breakpoint(void) err_alloc: for_each_possible_cpu(err_cpu) { for (i = 0; i < TYPE_MAX; i++) - kfree(per_cpu(nr_task_bp_pinned[i], cpu)); + kfree(per_cpu(nr_task_bp_pinned[i], err_cpu)); if (err_cpu == cpu) break; } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index dea7acfbb071..a567c8c7ef31 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -27,6 +27,7 @@ #include <linux/pagemap.h> /* read_mapping_page */ #include <linux/slab.h> #include <linux/sched.h> +#include <linux/export.h> #include <linux/rmap.h> /* anon_vma_prepare */ #include <linux/mmu_notifier.h> /* set_pte_at_notify */ #include <linux/swap.h> /* try_to_free_swap */ @@ -41,58 +42,31 @@ #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE static struct rb_root uprobes_tree = RB_ROOT; - -static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ - -#define UPROBES_HASH_SZ 13 - /* - * We need separate register/unregister and mmap/munmap lock hashes because - * of mmap_sem nesting. - * - * uprobe_register() needs to install probes on (potentially) all processes - * and thus needs to acquire multiple mmap_sems (consequtively, not - * concurrently), whereas uprobe_mmap() is called while holding mmap_sem - * for the particular process doing the mmap. - * - * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem - * because of lock order against i_mmap_mutex. This means there's a hole in - * the register vma iteration where a mmap() can happen. - * - * Thus uprobe_register() can race with uprobe_mmap() and we can try and - * install a probe where one is already installed. + * allows us to skip the uprobe_mmap if there are no uprobe events active + * at this time. Probably a fine grained per inode count is better? */ +#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree) -/* serialize (un)register */ -static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; - -#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) +static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ +#define UPROBES_HASH_SZ 13 /* serialize uprobe->pending_list */ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) static struct percpu_rw_semaphore dup_mmap_sem; -/* - * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe - * events active at this time. Probably a fine grained per inode count is - * better? - */ -static atomic_t uprobe_events = ATOMIC_INIT(0); - /* Have a copy of original instruction */ #define UPROBE_COPY_INSN 0 -/* Dont run handlers when first register/ last unregister in progress*/ -#define UPROBE_RUN_HANDLER 1 /* Can skip singlestep */ -#define UPROBE_SKIP_SSTEP 2 +#define UPROBE_SKIP_SSTEP 1 struct uprobe { struct rb_node rb_node; /* node in the rb tree */ atomic_t ref; + struct rw_semaphore register_rwsem; struct rw_semaphore consumer_rwsem; - struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */ struct list_head pending_list; struct uprobe_consumer *consumers; struct inode *inode; /* Also hold a ref to inode */ @@ -430,9 +404,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) u = __insert_uprobe(uprobe); spin_unlock(&uprobes_treelock); - /* For now assume that the instruction need not be single-stepped */ - __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); - return u; } @@ -452,8 +423,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) uprobe->inode = igrab(inode); uprobe->offset = offset; + init_rwsem(&uprobe->register_rwsem); init_rwsem(&uprobe->consumer_rwsem); - mutex_init(&uprobe->copy_mutex); + /* For now assume that the instruction need not be single-stepped */ + __set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); /* add to uprobes_tree, sorted on inode:offset */ cur_uprobe = insert_uprobe(uprobe); @@ -463,38 +436,17 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) kfree(uprobe); uprobe = cur_uprobe; iput(inode); - } else { - atomic_inc(&uprobe_events); } return uprobe; } -static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) -{ - struct uprobe_consumer *uc; - - if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags)) - return; - - down_read(&uprobe->consumer_rwsem); - for (uc = uprobe->consumers; uc; uc = uc->next) { - if (!uc->filter || uc->filter(uc, current)) - uc->handler(uc, regs); - } - up_read(&uprobe->consumer_rwsem); -} - -/* Returns the previous consumer */ -static struct uprobe_consumer * -consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) +static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc) { down_write(&uprobe->consumer_rwsem); uc->next = uprobe->consumers; uprobe->consumers = uc; up_write(&uprobe->consumer_rwsem); - - return uc->next; } /* @@ -588,7 +540,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) return ret; - mutex_lock(&uprobe->copy_mutex); + /* TODO: move this into _register, until then we abuse this sem. */ + down_write(&uprobe->consumer_rwsem); if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) goto out; @@ -612,7 +565,30 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, set_bit(UPROBE_COPY_INSN, &uprobe->flags); out: - mutex_unlock(&uprobe->copy_mutex); + up_write(&uprobe->consumer_rwsem); + + return ret; +} + +static inline bool consumer_filter(struct uprobe_consumer *uc, + enum uprobe_filter_ctx ctx, struct mm_struct *mm) +{ + return !uc->filter || uc->filter(uc, ctx, mm); +} + +static bool filter_chain(struct uprobe *uprobe, + enum uprobe_filter_ctx ctx, struct mm_struct *mm) +{ + struct uprobe_consumer *uc; + bool ret = false; + + down_read(&uprobe->consumer_rwsem); + for (uc = uprobe->consumers; uc; uc = uc->next) { + ret = consumer_filter(uc, ctx, mm); + if (ret) + break; + } + up_read(&uprobe->consumer_rwsem); return ret; } @@ -624,16 +600,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, bool first_uprobe; int ret; - /* - * If probe is being deleted, unregister thread could be done with - * the vma-rmap-walk through. Adding a probe now can be fatal since - * nobody will be able to cleanup. Also we could be from fork or - * mremap path, where the probe might have already been inserted. - * Hence behave as if probe already existed. - */ - if (!uprobe->consumers) - return 0; - ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); if (ret) return ret; @@ -658,14 +624,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static int remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { - /* can happen if uprobe_register() fails */ - if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) - return 0; - set_bit(MMF_RECALC_UPROBES, &mm->flags); return set_orig_insn(&uprobe->arch, mm, vaddr); } +static inline bool uprobe_is_active(struct uprobe *uprobe) +{ + return !RB_EMPTY_NODE(&uprobe->rb_node); +} /* * There could be threads that have already hit the breakpoint. They * will recheck the current insn and restart if find_uprobe() fails. @@ -673,12 +639,15 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad */ static void delete_uprobe(struct uprobe *uprobe) { + if (WARN_ON(!uprobe_is_active(uprobe))) + return; + spin_lock(&uprobes_treelock); rb_erase(&uprobe->rb_node, &uprobes_tree); spin_unlock(&uprobes_treelock); + RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ iput(uprobe->inode); put_uprobe(uprobe); - atomic_dec(&uprobe_events); } struct map_info { @@ -764,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) return curr; } -static int register_for_each_vma(struct uprobe *uprobe, bool is_register) +static int +register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new) { + bool is_register = !!new; struct map_info *info; int err = 0; @@ -794,10 +765,16 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; - if (is_register) - err = install_breakpoint(uprobe, mm, vma, info->vaddr); - else - err |= remove_breakpoint(uprobe, mm, info->vaddr); + if (is_register) { + /* consult only the "caller", new consumer. */ + if (consumer_filter(new, + UPROBE_FILTER_REGISTER, mm)) + err = install_breakpoint(uprobe, mm, vma, info->vaddr); + } else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) { + if (!filter_chain(uprobe, + UPROBE_FILTER_UNREGISTER, mm)) + err |= remove_breakpoint(uprobe, mm, info->vaddr); + } unlock: up_write(&mm->mmap_sem); @@ -810,17 +787,23 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) return err; } -static int __uprobe_register(struct uprobe *uprobe) +static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc) { - return register_for_each_vma(uprobe, true); + consumer_add(uprobe, uc); + return register_for_each_vma(uprobe, uc); } -static void __uprobe_unregister(struct uprobe *uprobe) +static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc) { - if (!register_for_each_vma(uprobe, false)) - delete_uprobe(uprobe); + int err; + + if (!consumer_del(uprobe, uc)) /* WARN? */ + return; + err = register_for_each_vma(uprobe, NULL); /* TODO : cant unregister? schedule a worker thread */ + if (!uprobe->consumers && !err) + delete_uprobe(uprobe); } /* @@ -845,31 +828,59 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * struct uprobe *uprobe; int ret; - if (!inode || !uc || uc->next) - return -EINVAL; - + /* Racy, just to catch the obvious mistakes */ if (offset > i_size_read(inode)) return -EINVAL; - ret = 0; - mutex_lock(uprobes_hash(inode)); + retry: uprobe = alloc_uprobe(inode, offset); - - if (!uprobe) { - ret = -ENOMEM; - } else if (!consumer_add(uprobe, uc)) { - ret = __uprobe_register(uprobe); - if (ret) { - uprobe->consumers = NULL; - __uprobe_unregister(uprobe); - } else { - set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); - } + if (!uprobe) + return -ENOMEM; + /* + * We can race with uprobe_unregister()->delete_uprobe(). + * Check uprobe_is_active() and retry if it is false. + */ + down_write(&uprobe->register_rwsem); + ret = -EAGAIN; + if (likely(uprobe_is_active(uprobe))) { + ret = __uprobe_register(uprobe, uc); + if (ret) + __uprobe_unregister(uprobe, uc); } + up_write(&uprobe->register_rwsem); + put_uprobe(uprobe); - mutex_unlock(uprobes_hash(inode)); - if (uprobe) - put_uprobe(uprobe); + if (unlikely(ret == -EAGAIN)) + goto retry; + return ret; +} +EXPORT_SYMBOL_GPL(uprobe_register); + +/* + * uprobe_apply - unregister a already registered probe. + * @inode: the file in which the probe has to be removed. + * @offset: offset from the start of the file. + * @uc: consumer which wants to add more or remove some breakpoints + * @add: add or remove the breakpoints + */ +int uprobe_apply(struct inode *inode, loff_t offset, + struct uprobe_consumer *uc, bool add) +{ + struct uprobe *uprobe; + struct uprobe_consumer *con; + int ret = -ENOENT; + + uprobe = find_uprobe(inode, offset); + if (!uprobe) + return ret; + + down_write(&uprobe->register_rwsem); + for (con = uprobe->consumers; con && con != uc ; con = con->next) + ; + if (con) + ret = register_for_each_vma(uprobe, add ? uc : NULL); + up_write(&uprobe->register_rwsem); + put_uprobe(uprobe); return ret; } @@ -884,25 +895,42 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume { struct uprobe *uprobe; - if (!inode || !uc) - return; - uprobe = find_uprobe(inode, offset); if (!uprobe) return; - mutex_lock(uprobes_hash(inode)); + down_write(&uprobe->register_rwsem); + __uprobe_unregister(uprobe, uc); + up_write(&uprobe->register_rwsem); + put_uprobe(uprobe); +} +EXPORT_SYMBOL_GPL(uprobe_unregister); - if (consumer_del(uprobe, uc)) { - if (!uprobe->consumers) { - __uprobe_unregister(uprobe); - clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); - } +static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm) +{ + struct vm_area_struct *vma; + int err = 0; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) { + unsigned long vaddr; + loff_t offset; + + if (!valid_vma(vma, false) || + vma->vm_file->f_mapping->host != uprobe->inode) + continue; + + offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT; + if (uprobe->offset < offset || + uprobe->offset >= offset + vma->vm_end - vma->vm_start) + continue; + + vaddr = offset_to_vaddr(vma, uprobe->offset); + err |= remove_breakpoint(uprobe, mm, vaddr); } + up_read(&mm->mmap_sem); - mutex_unlock(uprobes_hash(inode)); - if (uprobe) - put_uprobe(uprobe); + return err; } static struct rb_node * @@ -979,7 +1007,7 @@ int uprobe_mmap(struct vm_area_struct *vma) struct uprobe *uprobe, *u; struct inode *inode; - if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) + if (no_uprobe_events() || !valid_vma(vma, true)) return 0; inode = vma->vm_file->f_mapping->host; @@ -988,9 +1016,14 @@ int uprobe_mmap(struct vm_area_struct *vma) mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); - + /* + * We can race with uprobe_unregister(), this uprobe can be already + * removed. But in this case filter_chain() must return false, all + * consumers have gone away. + */ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - if (!fatal_signal_pending(current)) { + if (!fatal_signal_pending(current) && + filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); } @@ -1025,7 +1058,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e */ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) + if (no_uprobe_events() || !valid_vma(vma, false)) return; if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ @@ -1042,22 +1075,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon /* Slot allocation for XOL */ static int xol_add_vma(struct xol_area *area) { - struct mm_struct *mm; - int ret; - - area->page = alloc_page(GFP_HIGHUSER); - if (!area->page) - return -ENOMEM; - - ret = -EALREADY; - mm = current->mm; + struct mm_struct *mm = current->mm; + int ret = -EALREADY; down_write(&mm->mmap_sem); if (mm->uprobes_state.xol_area) goto fail; ret = -ENOMEM; - /* Try to map as high as possible, this is only a hint. */ area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); if (area->vaddr & ~PAGE_MASK) { @@ -1073,54 +1098,53 @@ static int xol_add_vma(struct xol_area *area) smp_wmb(); /* pairs with get_xol_area() */ mm->uprobes_state.xol_area = area; ret = 0; - -fail: + fail: up_write(&mm->mmap_sem); - if (ret) - __free_page(area->page); return ret; } -static struct xol_area *get_xol_area(struct mm_struct *mm) -{ - struct xol_area *area; - - area = mm->uprobes_state.xol_area; - smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ - - return area; -} - /* - * xol_alloc_area - Allocate process's xol_area. - * This area will be used for storing instructions for execution out of - * line. + * get_xol_area - Allocate process's xol_area if necessary. + * This area will be used for storing instructions for execution out of line. * * Returns the allocated area or NULL. */ -static struct xol_area *xol_alloc_area(void) +static struct xol_area *get_xol_area(void) { + struct mm_struct *mm = current->mm; struct xol_area *area; + area = mm->uprobes_state.xol_area; + if (area) + goto ret; + area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) - return NULL; + goto out; area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); - if (!area->bitmap) - goto fail; + goto free_area; + + area->page = alloc_page(GFP_HIGHUSER); + if (!area->page) + goto free_bitmap; init_waitqueue_head(&area->wq); if (!xol_add_vma(area)) return area; -fail: + __free_page(area->page); + free_bitmap: kfree(area->bitmap); + free_area: kfree(area); - - return get_xol_area(current->mm); + out: + area = mm->uprobes_state.xol_area; + ret: + smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */ + return area; } /* @@ -1186,33 +1210,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area) } /* - * xol_get_insn_slot - If was not allocated a slot, then - * allocate a slot. + * xol_get_insn_slot - allocate a slot for xol. * Returns the allocated slot address or 0. */ -static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr) +static unsigned long xol_get_insn_slot(struct uprobe *uprobe) { struct xol_area *area; unsigned long offset; + unsigned long xol_vaddr; void *vaddr; - area = get_xol_area(current->mm); - if (!area) { - area = xol_alloc_area(); - if (!area) - return 0; - } - current->utask->xol_vaddr = xol_take_insn_slot(area); + area = get_xol_area(); + if (!area) + return 0; - /* - * Initialize the slot if xol_vaddr points to valid - * instruction slot. - */ - if (unlikely(!current->utask->xol_vaddr)) + xol_vaddr = xol_take_insn_slot(area); + if (unlikely(!xol_vaddr)) return 0; - current->utask->vaddr = slot_addr; - offset = current->utask->xol_vaddr & ~PAGE_MASK; + /* Initialize the slot */ + offset = xol_vaddr & ~PAGE_MASK; vaddr = kmap_atomic(area->page); memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); kunmap_atomic(vaddr); @@ -1222,7 +1239,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot */ flush_dcache_page(area->page); - return current->utask->xol_vaddr; + return xol_vaddr; } /* @@ -1240,8 +1257,7 @@ static void xol_free_insn_slot(struct task_struct *tsk) return; slot_addr = tsk->utask->xol_vaddr; - - if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr))) + if (unlikely(!slot_addr)) return; area = tsk->mm->uprobes_state.xol_area; @@ -1303,33 +1319,48 @@ void uprobe_copy_process(struct task_struct *t) } /* - * Allocate a uprobe_task object for the task. - * Called when the thread hits a breakpoint for the first time. + * Allocate a uprobe_task object for the task if if necessary. + * Called when the thread hits a breakpoint. * * Returns: * - pointer to new uprobe_task on success * - NULL otherwise */ -static struct uprobe_task *add_utask(void) +static struct uprobe_task *get_utask(void) { - struct uprobe_task *utask; - - utask = kzalloc(sizeof *utask, GFP_KERNEL); - if (unlikely(!utask)) - return NULL; - - current->utask = utask; - return utask; + if (!current->utask) + current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL); + return current->utask; } /* Prepare to single-step probed instruction out of line. */ static int -pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr) +pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr) { - if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs)) - return 0; + struct uprobe_task *utask; + unsigned long xol_vaddr; + int err; + + utask = get_utask(); + if (!utask) + return -ENOMEM; + + xol_vaddr = xol_get_insn_slot(uprobe); + if (!xol_vaddr) + return -ENOMEM; + + utask->xol_vaddr = xol_vaddr; + utask->vaddr = bp_vaddr; + + err = arch_uprobe_pre_xol(&uprobe->arch, regs); + if (unlikely(err)) { + xol_free_insn_slot(current); + return err; + } - return -EFAULT; + utask->active_uprobe = uprobe; + utask->state = UTASK_SSTEP; + return 0; } /* @@ -1391,6 +1422,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) * This is not strictly accurate, we can race with * uprobe_unregister() and see the already removed * uprobe if delete_uprobe() was not yet called. + * Or this uprobe can be filtered out. */ if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) return; @@ -1452,13 +1484,33 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) return uprobe; } +static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) +{ + struct uprobe_consumer *uc; + int remove = UPROBE_HANDLER_REMOVE; + + down_read(&uprobe->register_rwsem); + for (uc = uprobe->consumers; uc; uc = uc->next) { + int rc = uc->handler(uc, regs); + + WARN(rc & ~UPROBE_HANDLER_MASK, + "bad rc=0x%x from %pf()\n", rc, uc->handler); + remove &= rc; + } + + if (remove && uprobe->consumers) { + WARN_ON(!uprobe_is_active(uprobe)); + unapply_uprobe(uprobe, current->mm); + } + up_read(&uprobe->register_rwsem); +} + /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. */ static void handle_swbp(struct pt_regs *regs) { - struct uprobe_task *utask; struct uprobe *uprobe; unsigned long bp_vaddr; int uninitialized_var(is_swbp); @@ -1483,6 +1535,10 @@ static void handle_swbp(struct pt_regs *regs) } return; } + + /* change it in advance for ->handler() and restart */ + instruction_pointer_set(regs, bp_vaddr); + /* * TODO: move copy_insn/etc into _register and remove this hack. * After we hit the bp, _unregister + _register can install the @@ -1490,32 +1546,16 @@ static void handle_swbp(struct pt_regs *regs) */ smp_rmb(); /* pairs with wmb() in install_breakpoint() */ if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) - goto restart; - - utask = current->utask; - if (!utask) { - utask = add_utask(); - /* Cannot allocate; re-execute the instruction. */ - if (!utask) - goto restart; - } + goto out; handler_chain(uprobe, regs); if (can_skip_sstep(uprobe, regs)) goto out; - if (!pre_ssout(uprobe, regs, bp_vaddr)) { - utask->active_uprobe = uprobe; - utask->state = UTASK_SSTEP; + if (!pre_ssout(uprobe, regs, bp_vaddr)) return; - } -restart: - /* - * cannot singlestep; cannot skip instruction; - * re-execute the instruction. - */ - instruction_pointer_set(regs, bp_vaddr); + /* can_skip_sstep() succeeded, or restart if can't singlestep */ out: put_uprobe(uprobe); } @@ -1609,10 +1649,8 @@ static int __init init_uprobes(void) { int i; - for (i = 0; i < UPROBES_HASH_SZ; i++) { - mutex_init(&uprobes_mutex[i]); + for (i = 0; i < UPROBES_HASH_SZ; i++) mutex_init(&uprobes_mmap_mutex[i]); - } if (percpu_init_rwsem(&dup_mmap_sem)) return -ENOMEM; diff --git a/kernel/exit.c b/kernel/exit.c index b4df21937216..7dd20408707c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -85,6 +85,7 @@ static void __exit_signal(struct task_struct *tsk) bool group_dead = thread_group_leader(tsk); struct sighand_struct *sighand; struct tty_struct *uninitialized_var(tty); + cputime_t utime, stime; sighand = rcu_dereference_check(tsk->sighand, lockdep_tasklist_lock_is_held()); @@ -123,9 +124,10 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime += tsk->utime; - sig->stime += tsk->stime; - sig->gtime += tsk->gtime; + task_cputime(tsk, &utime, &stime); + sig->utime += utime; + sig->stime += stime; + sig->gtime += task_gtime(tsk); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; @@ -1092,7 +1094,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) sig = p->signal; psig->cutime += tgutime + sig->cutime; psig->cstime += tgstime + sig->cstime; - psig->cgtime += p->gtime + sig->gtime + sig->cgtime; + psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime; psig->cmin_flt += p->min_flt + sig->min_flt + sig->cmin_flt; psig->cmaj_flt += diff --git a/kernel/fork.c b/kernel/fork.c index c535f33bbb9c..4133876d8cd2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqlock_init(&p->vtime_seqlock); + p->vtime_snap = 0; + p->vtime_snap_whence = VTIME_SLEEPING; +#endif + #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif diff --git a/kernel/futex.c b/kernel/futex.c index 19eb089ca003..9618b6e9fb36 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -60,6 +60,7 @@ #include <linux/pid.h> #include <linux/nsproxy.h> #include <linux/ptrace.h> +#include <linux/sched/rt.h> #include <asm/futex.h> diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6db7a5ed52b5..cc47812d3feb 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -44,6 +44,8 @@ #include <linux/err.h> #include <linux/debugobjects.h> #include <linux/sched.h> +#include <linux/sched/sysctl.h> +#include <linux/sched/rt.h> #include <linux/timer.h> #include <asm/uaccess.h> @@ -640,21 +642,9 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) * and expiry check is done in the hrtimer_interrupt or in the softirq. */ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base, - int wakeup) + struct hrtimer_clock_base *base) { - if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { - if (wakeup) { - raw_spin_unlock(&base->cpu_base->lock); - raise_softirq_irqoff(HRTIMER_SOFTIRQ); - raw_spin_lock(&base->cpu_base->lock); - } else - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); - - return 1; - } - - return 0; + return base->cpu_base->hres_active && hrtimer_reprogram(timer, base); } static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) @@ -735,8 +725,7 @@ static inline int hrtimer_switch_to_hres(void) { return 0; } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base, - int wakeup) + struct hrtimer_clock_base *base) { return 0; } @@ -995,8 +984,21 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * * XXX send_remote_softirq() ? */ - if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) - hrtimer_enqueue_reprogram(timer, new_base, wakeup); + if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases) + && hrtimer_enqueue_reprogram(timer, new_base)) { + if (wakeup) { + /* + * We need to drop cpu_base->lock to avoid a + * lock ordering issue vs. rq->lock. + */ + raw_spin_unlock(&new_base->cpu_base->lock); + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + local_irq_restore(flags); + return ret; + } else { + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + } unlock_hrtimer_base(timer, &flags); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3aca9f29d30e..cbd97ce0b000 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -90,27 +90,41 @@ int irq_set_handler_data(unsigned int irq, void *data) EXPORT_SYMBOL(irq_set_handler_data); /** - * irq_set_msi_desc - set MSI descriptor data for an irq - * @irq: Interrupt number - * @entry: Pointer to MSI descriptor data + * irq_set_msi_desc_off - set MSI descriptor data for an irq at offset + * @irq_base: Interrupt number base + * @irq_offset: Interrupt number offset + * @entry: Pointer to MSI descriptor data * - * Set the MSI descriptor entry for an irq + * Set the MSI descriptor entry for an irq at offset */ -int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) +int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, + struct msi_desc *entry) { unsigned long flags; - struct irq_desc *desc = irq_get_desc_lock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + struct irq_desc *desc = irq_get_desc_lock(irq_base + irq_offset, &flags, IRQ_GET_DESC_CHECK_GLOBAL); if (!desc) return -EINVAL; desc->irq_data.msi_desc = entry; - if (entry) - entry->irq = irq; + if (entry && !irq_offset) + entry->irq = irq_base; irq_put_desc_unlock(desc, flags); return 0; } /** + * irq_set_msi_desc - set MSI descriptor data for an irq + * @irq: Interrupt number + * @entry: Pointer to MSI descriptor data + * + * Set the MSI descriptor entry for an irq + */ +int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry) +{ + return irq_set_msi_desc_off(irq, 0, entry); +} + +/** * irq_set_chip_data - set irq chip data for an irq * @irq: Interrupt number * @data: Pointer to chip specific data diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index e49a288fa479..fa17855ca65a 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -16,6 +16,7 @@ #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/task_work.h> #include "internals.h" @@ -1524,6 +1525,7 @@ void enable_percpu_irq(unsigned int irq, unsigned int type) out: irq_put_desc_unlock(desc, flags); } +EXPORT_SYMBOL_GPL(enable_percpu_irq); void disable_percpu_irq(unsigned int irq) { @@ -1537,6 +1539,7 @@ void disable_percpu_irq(unsigned int irq) irq_percpu_disable(desc, cpu); irq_put_desc_unlock(desc, flags); } +EXPORT_SYMBOL_GPL(disable_percpu_irq); /* * Internal function to unregister a percpu irqaction. diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 611cd6003c45..7b5f012bde9d 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -80,13 +80,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) /* * All handlers must agree on IRQF_SHARED, so we test just the - * first. Check for action->next as well. + * first. */ action = desc->action; if (!action || !(action->flags & IRQF_SHARED) || - (action->flags & __IRQF_TIMER) || - (action->handler(irq, action->dev_id) == IRQ_HANDLED) || - !action->next) + (action->flags & __IRQF_TIMER)) goto out; /* Already running on another processor */ @@ -104,6 +102,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) do { if (handle_irq_event(desc) == IRQ_HANDLED) ret = IRQ_HANDLED; + /* Make sure that there is still a valid action */ action = desc->action; } while ((desc->istate & IRQS_PENDING) && action); desc->istate &= ~IRQS_POLL_INPROGRESS; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 1588e3b2871b..55fcce6065cf 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -12,37 +12,36 @@ #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/irqflags.h> +#include <linux/sched.h> +#include <linux/tick.h> +#include <linux/cpu.h> +#include <linux/notifier.h> #include <asm/processor.h> -/* - * An entry can be in one of four states: - * - * free NULL, 0 -> {claimed} : free to be used - * claimed NULL, 3 -> {pending} : claimed to be enqueued - * pending next, 3 -> {busy} : queued, pending callback - * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed - */ - -#define IRQ_WORK_PENDING 1UL -#define IRQ_WORK_BUSY 2UL -#define IRQ_WORK_FLAGS 3UL static DEFINE_PER_CPU(struct llist_head, irq_work_list); +static DEFINE_PER_CPU(int, irq_work_raised); /* * Claim the entry so that no one else will poke at it. */ static bool irq_work_claim(struct irq_work *work) { - unsigned long flags, nflags; + unsigned long flags, oflags, nflags; + /* + * Start with our best wish as a premise but only trust any + * flag value after cmpxchg() result. + */ + flags = work->flags & ~IRQ_WORK_PENDING; for (;;) { - flags = work->flags; - if (flags & IRQ_WORK_PENDING) - return false; nflags = flags | IRQ_WORK_FLAGS; - if (cmpxchg(&work->flags, flags, nflags) == flags) + oflags = cmpxchg(&work->flags, flags, nflags); + if (oflags == flags) break; + if (oflags & IRQ_WORK_PENDING) + return false; + flags = oflags; cpu_relax(); } @@ -57,57 +56,69 @@ void __weak arch_irq_work_raise(void) } /* - * Queue the entry and raise the IPI if needed. + * Enqueue the irq_work @entry unless it's already pending + * somewhere. + * + * Can be re-enqueued while the callback is still in progress. */ -static void __irq_work_queue(struct irq_work *work) +void irq_work_queue(struct irq_work *work) { - bool empty; + /* Only queue if not already pending */ + if (!irq_work_claim(work)) + return; + /* Queue the entry and raise the IPI if needed. */ preempt_disable(); - empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); - /* The list was empty, raise self-interrupt to start processing. */ - if (empty) - arch_irq_work_raise(); + llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); + + /* + * If the work is not "lazy" or the tick is stopped, raise the irq + * work interrupt (if supported by the arch), otherwise, just wait + * for the next tick. + */ + if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) { + if (!this_cpu_cmpxchg(irq_work_raised, 0, 1)) + arch_irq_work_raise(); + } preempt_enable(); } +EXPORT_SYMBOL_GPL(irq_work_queue); -/* - * Enqueue the irq_work @entry, returns true on success, failure when the - * @entry was already enqueued by someone else. - * - * Can be re-enqueued while the callback is still in progress. - */ -bool irq_work_queue(struct irq_work *work) +bool irq_work_needs_cpu(void) { - if (!irq_work_claim(work)) { - /* - * Already enqueued, can't do! - */ + struct llist_head *this_list; + + this_list = &__get_cpu_var(irq_work_list); + if (llist_empty(this_list)) return false; - } - __irq_work_queue(work); + /* All work should have been flushed before going offline */ + WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); + return true; } -EXPORT_SYMBOL_GPL(irq_work_queue); -/* - * Run the irq_work entries on this cpu. Requires to be ran from hardirq - * context with local IRQs disabled. - */ -void irq_work_run(void) +static void __irq_work_run(void) { + unsigned long flags; struct irq_work *work; struct llist_head *this_list; struct llist_node *llnode; + + /* + * Reset the "raised" state right before we check the list because + * an NMI may enqueue after we find the list empty from the runner. + */ + __this_cpu_write(irq_work_raised, 0); + barrier(); + this_list = &__get_cpu_var(irq_work_list); if (llist_empty(this_list)) return; - BUG_ON(!in_irq()); BUG_ON(!irqs_disabled()); llnode = llist_del_all(this_list); @@ -119,16 +130,31 @@ void irq_work_run(void) /* * Clear the PENDING bit, after this point the @work * can be re-used. + * Make it immediately visible so that other CPUs trying + * to claim that work don't rely on us to handle their data + * while we are in the middle of the func. */ - work->flags = IRQ_WORK_BUSY; + flags = work->flags & ~IRQ_WORK_PENDING; + xchg(&work->flags, flags); + work->func(work); /* * Clear the BUSY bit and return to the free state if * no-one else claimed it meanwhile. */ - (void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0); + (void)cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY); } } + +/* + * Run the irq_work entries on this cpu. Requires to be ran from hardirq + * context with local IRQs disabled. + */ +void irq_work_run(void) +{ + BUG_ON(!in_irq()); + __irq_work_run(); +} EXPORT_SYMBOL_GPL(irq_work_run); /* @@ -143,3 +169,35 @@ void irq_work_sync(struct irq_work *work) cpu_relax(); } EXPORT_SYMBOL_GPL(irq_work_sync); + +#ifdef CONFIG_HOTPLUG_CPU +static int irq_work_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + case CPU_DYING: + /* Called from stop_machine */ + if (WARN_ON_ONCE(cpu != smp_processor_id())) + break; + __irq_work_run(); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block cpu_notify; + +static __init int irq_work_init_cpu_notifier(void) +{ + cpu_notify.notifier_call = irq_work_cpu_notify; + cpu_notify.priority = 0; + register_cpu_notifier(&cpu_notify); + return 0; +} +device_initcall(irq_work_init_cpu_notifier); + +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 098f396aa409..f423c3ef4a82 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -919,7 +919,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) } #endif /* CONFIG_OPTPROBES */ -#ifdef KPROBES_CAN_USE_FTRACE +#ifdef CONFIG_KPROBES_ON_FTRACE static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { .func = kprobe_ftrace_handler, .flags = FTRACE_OPS_FL_SAVE_REGS, @@ -964,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) (unsigned long)p->addr, 1, 0); WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); } -#else /* !KPROBES_CAN_USE_FTRACE */ +#else /* !CONFIG_KPROBES_ON_FTRACE */ #define prepare_kprobe(p) arch_prepare_kprobe(p) #define arm_kprobe_ftrace(p) do {} while (0) #define disarm_kprobe_ftrace(p) do {} while (0) @@ -1414,12 +1414,12 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p, */ ftrace_addr = ftrace_location((unsigned long)p->addr); if (ftrace_addr) { -#ifdef KPROBES_CAN_USE_FTRACE +#ifdef CONFIG_KPROBES_ON_FTRACE /* Given address is not on the instruction boundary */ if ((unsigned long)p->addr != ftrace_addr) return -EILSEQ; p->flags |= KPROBE_FLAG_FTRACE; -#else /* !KPROBES_CAN_USE_FTRACE */ +#else /* !CONFIG_KPROBES_ON_FTRACE */ return -EINVAL; #endif } diff --git a/kernel/mutex.c b/kernel/mutex.c index a307cc9c9526..52f23011b6e0 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -19,6 +19,7 @@ */ #include <linux/mutex.h> #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/export.h> #include <linux/spinlock.h> #include <linux/interrupt.h> diff --git a/kernel/pid.c b/kernel/pid.c index de9af600006f..f2c6a6825098 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -331,7 +331,7 @@ out: return pid; out_unlock: - spin_unlock(&pidmap_lock); + spin_unlock_irq(&pidmap_lock); out_free: while (++i <= ns->level) free_pidmap(pid->numbers + i); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index a278cad1d5d6..8fd709c9bb58 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -155,11 +155,19 @@ static void bump_cpu_timer(struct k_itimer *timer, static inline cputime_t prof_ticks(struct task_struct *p) { - return p->utime + p->stime; + cputime_t utime, stime; + + task_cputime(p, &utime, &stime); + + return utime + stime; } static inline cputime_t virt_ticks(struct task_struct *p) { - return p->utime; + cputime_t utime; + + task_cputime(p, &utime, NULL); + + return utime; } static int @@ -471,18 +479,23 @@ static void cleanup_timers(struct list_head *head, */ void posix_cpu_timers_exit(struct task_struct *tsk) { + cputime_t utime, stime; + add_device_randomness((const void*) &tsk->se.sum_exec_runtime, sizeof(unsigned long long)); + task_cputime(tsk, &utime, &stime); cleanup_timers(tsk->cpu_timers, - tsk->utime, tsk->stime, tsk->se.sum_exec_runtime); + utime, stime, tsk->se.sum_exec_runtime); } void posix_cpu_timers_exit_group(struct task_struct *tsk) { struct signal_struct *const sig = tsk->signal; + cputime_t utime, stime; + task_cputime(tsk, &utime, &stime); cleanup_timers(tsk->signal->cpu_timers, - tsk->utime + sig->utime, tsk->stime + sig->stime, + utime + sig->utime, stime + sig->stime, tsk->se.sum_exec_runtime + sig->sum_sched_runtime); } @@ -1226,11 +1239,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample, static inline int fastpath_timer_check(struct task_struct *tsk) { struct signal_struct *sig; + cputime_t utime, stime; + + task_cputime(tsk, &utime, &stime); if (!task_cputime_zero(&tsk->cputime_expires)) { struct task_cputime task_sample = { - .utime = tsk->utime, - .stime = tsk->stime, + .utime = utime, + .stime = stime, .sum_exec_runtime = tsk->se.sum_exec_runtime }; @@ -1401,8 +1417,10 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, while (!signal_pending(current)) { if (timer.it.cpu.expires.sched == 0) { /* - * Our timer fired and was reset. + * Our timer fired and was reset, below + * deletion can not fail. */ + posix_cpu_timer_del(&timer); spin_unlock_irq(&timer.it_lock); return 0; } @@ -1420,9 +1438,26 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, * We were interrupted by a signal. */ sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); - posix_cpu_timer_set(&timer, 0, &zero_it, it); + error = posix_cpu_timer_set(&timer, 0, &zero_it, it); + if (!error) { + /* + * Timer is now unarmed, deletion can not fail. + */ + posix_cpu_timer_del(&timer); + } spin_unlock_irq(&timer.it_lock); + while (error == TIMER_RETRY) { + /* + * We need to handle case when timer was or is in the + * middle of firing. In other cases we already freed + * resources. + */ + spin_lock_irq(&timer.it_lock); + error = posix_cpu_timer_del(&timer); + spin_unlock_irq(&timer.it_lock); + } + if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { /* * It actually did fire already. diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 69185ae6b701..10349d5f2ec3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -997,7 +997,7 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, err = kc->clock_adj(which_clock, &ktx); - if (!err && copy_to_user(utx, &ktx, sizeof(ktx))) + if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx))) return -EFAULT; return err; diff --git a/kernel/printk.c b/kernel/printk.c index 267ce780abe8..f24633afa46a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -42,6 +42,7 @@ #include <linux/notifier.h> #include <linux/rculist.h> #include <linux/poll.h> +#include <linux/irq_work.h> #include <asm/uaccess.h> @@ -1959,30 +1960,32 @@ int is_console_locked(void) static DEFINE_PER_CPU(int, printk_pending); static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); -void printk_tick(void) +static void wake_up_klogd_work_func(struct irq_work *irq_work) { - if (__this_cpu_read(printk_pending)) { - int pending = __this_cpu_xchg(printk_pending, 0); - if (pending & PRINTK_PENDING_SCHED) { - char *buf = __get_cpu_var(printk_sched_buf); - printk(KERN_WARNING "[sched_delayed] %s", buf); - } - if (pending & PRINTK_PENDING_WAKEUP) - wake_up_interruptible(&log_wait); + int pending = __this_cpu_xchg(printk_pending, 0); + + if (pending & PRINTK_PENDING_SCHED) { + char *buf = __get_cpu_var(printk_sched_buf); + printk(KERN_WARNING "[sched_delayed] %s", buf); } -} -int printk_needs_cpu(int cpu) -{ - if (cpu_is_offline(cpu)) - printk_tick(); - return __this_cpu_read(printk_pending); + if (pending & PRINTK_PENDING_WAKEUP) + wake_up_interruptible(&log_wait); } +static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { + .func = wake_up_klogd_work_func, + .flags = IRQ_WORK_LAZY, +}; + void wake_up_klogd(void) { - if (waitqueue_active(&log_wait)) + preempt_disable(); + if (waitqueue_active(&log_wait)) { this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); + irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + } + preempt_enable(); } static void console_cont_flush(char *text, size_t size) @@ -2462,6 +2465,7 @@ int printk_sched(const char *fmt, ...) va_end(args); __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED); + irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); local_irq_restore(flags); return r; diff --git a/kernel/profile.c b/kernel/profile.c index 1f391819c42f..dc3384ee874e 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -37,9 +37,6 @@ struct profile_hit { #define NR_PROFILE_HIT (PAGE_SIZE/sizeof(struct profile_hit)) #define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) -/* Oprofile timer tick hook */ -static int (*timer_hook)(struct pt_regs *) __read_mostly; - static atomic_t *prof_buffer; static unsigned long prof_len, prof_shift; @@ -208,25 +205,6 @@ int profile_event_unregister(enum profile_type type, struct notifier_block *n) } EXPORT_SYMBOL_GPL(profile_event_unregister); -int register_timer_hook(int (*hook)(struct pt_regs *)) -{ - if (timer_hook) - return -EBUSY; - timer_hook = hook; - return 0; -} -EXPORT_SYMBOL_GPL(register_timer_hook); - -void unregister_timer_hook(int (*hook)(struct pt_regs *)) -{ - WARN_ON(hook != timer_hook); - timer_hook = NULL; - /* make sure all CPUs see the NULL hook */ - synchronize_sched(); /* Allow ongoing interrupts to complete. */ -} -EXPORT_SYMBOL_GPL(unregister_timer_hook); - - #ifdef CONFIG_SMP /* * Each cpu has a pair of open-addressed hashtables for pending @@ -436,8 +414,6 @@ void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); - if (type == CPU_PROFILING && timer_hook) - timer_hook(regs); if (!user_mode(regs) && prof_cpu_mask != NULL && cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) profile_hit(type, (void *)profile_pc(regs)); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 6cbeaae4406d..acbd28424d81 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, kiov->iov_len, kiov->iov_base); } +/* + * This is declared in linux/regset.h and defined in machine-dependent + * code. We put the export here, near the primary machine-neutral use, + * to ensure no machine forgets it. + */ +EXPORT_SYMBOL_GPL(task_user_regset_view); #endif int ptrace_request(struct task_struct *child, long request, diff --git a/kernel/rcu.h b/kernel/rcu.h index 20dfba576c2b..7f8e7590e3e5 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -111,4 +111,11 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head) extern int rcu_expedited; +#ifdef CONFIG_RCU_STALL_COMMON + +extern int rcu_cpu_stall_suppress; +int rcu_jiffies_till_stall_check(void); + +#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ + #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a2cf76177b44..48ab70384a4c 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -404,11 +404,65 @@ EXPORT_SYMBOL_GPL(rcuhead_debug_descr); #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) -void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp) +void do_trace_rcu_torture_read(char *rcutorturename, struct rcu_head *rhp, + unsigned long secs, + unsigned long c_old, unsigned long c) { - trace_rcu_torture_read(rcutorturename, rhp); + trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c); } EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); #else -#define do_trace_rcu_torture_read(rcutorturename, rhp) do { } while (0) +#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ + do { } while (0) #endif + +#ifdef CONFIG_RCU_STALL_COMMON + +#ifdef CONFIG_PROVE_RCU +#define RCU_STALL_DELAY_DELTA (5 * HZ) +#else +#define RCU_STALL_DELAY_DELTA 0 +#endif + +int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ +int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; + +module_param(rcu_cpu_stall_suppress, int, 0644); +module_param(rcu_cpu_stall_timeout, int, 0644); + +int rcu_jiffies_till_stall_check(void) +{ + int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); + + /* + * Limit check must be consistent with the Kconfig limits + * for CONFIG_RCU_CPU_STALL_TIMEOUT. + */ + if (till_stall_check < 3) { + ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; + till_stall_check = 3; + } else if (till_stall_check > 300) { + ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; + till_stall_check = 300; + } + return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; +} + +static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) +{ + rcu_cpu_stall_suppress = 1; + return NOTIFY_DONE; +} + +static struct notifier_block rcu_panic_block = { + .notifier_call = rcu_panic, +}; + +static int __init check_cpu_stall_init(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); + return 0; +} +early_initcall(check_cpu_stall_init); + +#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index e7dce58f9c2a..a0714a51b6d7 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -51,10 +51,10 @@ static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), struct rcu_ctrlblk *rcp); -#include "rcutiny_plugin.h" - static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; +#include "rcutiny_plugin.h" + /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ static void rcu_idle_enter_common(long long newval) { @@ -193,7 +193,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle); * interrupts don't count, we must be running at the first interrupt * level. */ -int rcu_is_cpu_rrupt_from_idle(void) +static int rcu_is_cpu_rrupt_from_idle(void) { return rcu_dynticks_nesting <= 1; } @@ -205,6 +205,7 @@ int rcu_is_cpu_rrupt_from_idle(void) */ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) { + reset_cpu_stall_ticks(rcp); if (rcp->rcucblist != NULL && rcp->donetail != rcp->curtail) { rcp->donetail = rcp->curtail; @@ -251,6 +252,7 @@ void rcu_bh_qs(int cpu) */ void rcu_check_callbacks(int cpu, int user) { + check_cpu_stalls(); if (user || rcu_is_cpu_rrupt_from_idle()) rcu_sched_qs(cpu); else if (!in_softirq()) diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index f85016a2309b..8a233002faeb 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -33,6 +33,9 @@ struct rcu_ctrlblk { struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ struct rcu_head **curtail; /* ->next pointer of last CB. */ RCU_TRACE(long qlen); /* Number of pending CBs. */ + RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */ + RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */ + RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */ RCU_TRACE(char *name); /* Name of RCU type. */ }; @@ -54,6 +57,51 @@ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#ifdef CONFIG_RCU_TRACE + +static void check_cpu_stall(struct rcu_ctrlblk *rcp) +{ + unsigned long j; + unsigned long js; + + if (rcu_cpu_stall_suppress) + return; + rcp->ticks_this_gp++; + j = jiffies; + js = rcp->jiffies_stall; + if (*rcp->curtail && ULONG_CMP_GE(j, js)) { + pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n", + rcp->name, rcp->ticks_this_gp, rcu_dynticks_nesting, + jiffies - rcp->gp_start, rcp->qlen); + dump_stack(); + } + if (*rcp->curtail && ULONG_CMP_GE(j, js)) + rcp->jiffies_stall = jiffies + + 3 * rcu_jiffies_till_stall_check() + 3; + else if (ULONG_CMP_GE(j, js)) + rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); +} + +static void check_cpu_stall_preempt(void); + +#endif /* #ifdef CONFIG_RCU_TRACE */ + +static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) +{ +#ifdef CONFIG_RCU_TRACE + rcp->ticks_this_gp = 0; + rcp->gp_start = jiffies; + rcp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); +#endif /* #ifdef CONFIG_RCU_TRACE */ +} + +static void check_cpu_stalls(void) +{ + RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk)); + RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk)); + RCU_TRACE(check_cpu_stall_preempt()); +} + #ifdef CONFIG_TINY_PREEMPT_RCU #include <linux/delay.h> @@ -448,6 +496,7 @@ static void rcu_preempt_start_gp(void) /* Official start of GP. */ rcu_preempt_ctrlblk.gpnum++; RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); + reset_cpu_stall_ticks(&rcu_preempt_ctrlblk.rcb); /* Any blocked RCU readers block new GP. */ if (rcu_preempt_blocked_readers_any()) @@ -1054,4 +1103,11 @@ MODULE_AUTHOR("Paul E. McKenney"); MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); MODULE_LICENSE("GPL"); +static void check_cpu_stall_preempt(void) +{ +#ifdef CONFIG_TINY_PREEMPT_RCU + check_cpu_stall(&rcu_preempt_ctrlblk.rcb); +#endif /* #ifdef CONFIG_TINY_PREEMPT_RCU */ +} + #endif /* #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 31dea01c85fd..e1f3a8c96724 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -46,6 +46,7 @@ #include <linux/stat.h> #include <linux/srcu.h> #include <linux/slab.h> +#include <linux/trace_clock.h> #include <asm/byteorder.h> MODULE_LICENSE("GPL"); @@ -207,6 +208,20 @@ MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot"); #define rcu_can_boost() 0 #endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ +#ifdef CONFIG_RCU_TRACE +static u64 notrace rcu_trace_clock_local(void) +{ + u64 ts = trace_clock_local(); + unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC); + return ts; +} +#else /* #ifdef CONFIG_RCU_TRACE */ +static u64 notrace rcu_trace_clock_local(void) +{ + return 0ULL; +} +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + static unsigned long shutdown_time; /* jiffies to system shutdown. */ static unsigned long boost_starttime; /* jiffies of next boost test start. */ DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ @@ -845,7 +860,7 @@ static int rcu_torture_boost(void *arg) /* Wait for the next test interval. */ oldstarttime = boost_starttime; while (ULONG_CMP_LT(jiffies, oldstarttime)) { - schedule_timeout_uninterruptible(1); + schedule_timeout_interruptible(oldstarttime - jiffies); rcu_stutter_wait("rcu_torture_boost"); if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) @@ -1028,7 +1043,6 @@ void rcutorture_trace_dump(void) return; if (atomic_xchg(&beenhere, 1) != 0) return; - do_trace_rcu_torture_read(cur_ops->name, (struct rcu_head *)~0UL); ftrace_dump(DUMP_ALL); } @@ -1042,13 +1056,16 @@ static void rcu_torture_timer(unsigned long unused) { int idx; int completed; + int completed_end; static DEFINE_RCU_RANDOM(rand); static DEFINE_SPINLOCK(rand_lock); struct rcu_torture *p; int pipe_count; + unsigned long long ts; idx = cur_ops->readlock(); completed = cur_ops->completed(); + ts = rcu_trace_clock_local(); p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || @@ -1058,7 +1075,6 @@ static void rcu_torture_timer(unsigned long unused) cur_ops->readunlock(idx); return; } - do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu); if (p->rtort_mbtest == 0) atomic_inc(&n_rcu_torture_mberror); spin_lock(&rand_lock); @@ -1071,10 +1087,14 @@ static void rcu_torture_timer(unsigned long unused) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - if (pipe_count > 1) + completed_end = cur_ops->completed(); + if (pipe_count > 1) { + do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, + completed, completed_end); rcutorture_trace_dump(); + } __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = cur_ops->completed() - completed; + completed = completed_end - completed; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; @@ -1094,11 +1114,13 @@ static int rcu_torture_reader(void *arg) { int completed; + int completed_end; int idx; DEFINE_RCU_RANDOM(rand); struct rcu_torture *p; int pipe_count; struct timer_list t; + unsigned long long ts; VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); set_user_nice(current, 19); @@ -1112,6 +1134,7 @@ rcu_torture_reader(void *arg) } idx = cur_ops->readlock(); completed = cur_ops->completed(); + ts = rcu_trace_clock_local(); p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || @@ -1122,7 +1145,6 @@ rcu_torture_reader(void *arg) schedule_timeout_interruptible(HZ); continue; } - do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu); if (p->rtort_mbtest == 0) atomic_inc(&n_rcu_torture_mberror); cur_ops->read_delay(&rand); @@ -1132,10 +1154,14 @@ rcu_torture_reader(void *arg) /* Should not happen, but... */ pipe_count = RCU_TORTURE_PIPE_LEN; } - if (pipe_count > 1) + completed_end = cur_ops->completed(); + if (pipe_count > 1) { + do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, + ts, completed, completed_end); rcutorture_trace_dump(); + } __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = cur_ops->completed() - completed; + completed = completed_end - completed; if (completed > RCU_TORTURE_PIPE_LEN) { /* Should not happen, but... */ completed = RCU_TORTURE_PIPE_LEN; @@ -1301,19 +1327,35 @@ static void rcu_torture_shuffle_tasks(void) set_cpus_allowed_ptr(reader_tasks[i], shuffle_tmp_mask); } - if (fakewriter_tasks) { for (i = 0; i < nfakewriters; i++) if (fakewriter_tasks[i]) set_cpus_allowed_ptr(fakewriter_tasks[i], shuffle_tmp_mask); } - if (writer_task) set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask); - if (stats_task) set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask); + if (stutter_task) + set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask); + if (fqs_task) + set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask); + if (shutdown_task) + set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask); +#ifdef CONFIG_HOTPLUG_CPU + if (onoff_task) + set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask); +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + if (stall_task) + set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask); + if (barrier_cbs_tasks) + for (i = 0; i < n_barrier_cbs; i++) + if (barrier_cbs_tasks[i]) + set_cpus_allowed_ptr(barrier_cbs_tasks[i], + shuffle_tmp_mask); + if (barrier_task) + set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask); if (rcu_idle_cpu == -1) rcu_idle_cpu = num_online_cpus() - 1; @@ -1749,7 +1791,7 @@ static int rcu_torture_barrier_init(void) barrier_cbs_wq = kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), GFP_KERNEL); - if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0) + if (barrier_cbs_tasks == NULL || !barrier_cbs_wq) return -ENOMEM; for (i = 0; i < n_barrier_cbs; i++) { init_waitqueue_head(&barrier_cbs_wq[i]); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e441b77b614e..5b8ad827fd86 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -105,7 +105,7 @@ int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */ * The rcu_scheduler_active variable transitions from zero to one just * before the first task is spawned. So when this variable is zero, RCU * can assume that there is but one task, allowing RCU to (for example) - * optimized synchronize_sched() to a simple barrier(). When this variable + * optimize synchronize_sched() to a simple barrier(). When this variable * is one, RCU must actually do all the hard work required to detect real * grace periods. This variable is also used to suppress boot-time false * positives from lockdep-RCU error checking. @@ -217,12 +217,6 @@ module_param(blimit, long, 0444); module_param(qhimark, long, 0444); module_param(qlowmark, long, 0444); -int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ -int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; - -module_param(rcu_cpu_stall_suppress, int, 0644); -module_param(rcu_cpu_stall_timeout, int, 0644); - static ulong jiffies_till_first_fqs = RCU_JIFFIES_TILL_FORCE_QS; static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS; @@ -305,17 +299,27 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) } /* - * Does the current CPU require a yet-as-unscheduled grace period? + * Does the current CPU require a not-yet-started grace period? + * The caller must have disabled interrupts to prevent races with + * normal callback registry. */ static int cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) { - struct rcu_head **ntp; + int i; - ntp = rdp->nxttail[RCU_DONE_TAIL + - (ACCESS_ONCE(rsp->completed) != rdp->completed)]; - return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp && - !rcu_gp_in_progress(rsp); + if (rcu_gp_in_progress(rsp)) + return 0; /* No, a grace period is already in progress. */ + if (!rdp->nxttail[RCU_NEXT_TAIL]) + return 0; /* No, this is a no-CBs (or offline) CPU. */ + if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) + return 1; /* Yes, this CPU has newly registered callbacks. */ + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) + if (rdp->nxttail[i - 1] != rdp->nxttail[i] && + ULONG_CMP_LT(ACCESS_ONCE(rsp->completed), + rdp->nxtcompleted[i])) + return 1; /* Yes, CBs for future grace period. */ + return 0; /* No grace period needed. */ } /* @@ -336,7 +340,7 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, bool user) { - trace_rcu_dyntick("Start", oldval, 0); + trace_rcu_dyntick("Start", oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { struct task_struct *idle = idle_task(smp_processor_id()); @@ -727,7 +731,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); * interrupt from idle, return true. The caller must have at least * disabled preemption. */ -int rcu_is_cpu_rrupt_from_idle(void) +static int rcu_is_cpu_rrupt_from_idle(void) { return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; } @@ -793,28 +797,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) return 0; } -static int jiffies_till_stall_check(void) -{ - int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout); - - /* - * Limit check must be consistent with the Kconfig limits - * for CONFIG_RCU_CPU_STALL_TIMEOUT. - */ - if (till_stall_check < 3) { - ACCESS_ONCE(rcu_cpu_stall_timeout) = 3; - till_stall_check = 3; - } else if (till_stall_check > 300) { - ACCESS_ONCE(rcu_cpu_stall_timeout) = 300; - till_stall_check = 300; - } - return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; -} - static void record_gp_stall_check_time(struct rcu_state *rsp) { rsp->gp_start = jiffies; - rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); + rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); } /* @@ -857,7 +843,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3; + rsp->jiffies_stall = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; raw_spin_unlock_irqrestore(&rnp->lock, flags); /* @@ -935,7 +921,7 @@ static void print_cpu_stall(struct rcu_state *rsp) raw_spin_lock_irqsave(&rnp->lock, flags); if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) rsp->jiffies_stall = jiffies + - 3 * jiffies_till_stall_check() + 3; + 3 * rcu_jiffies_till_stall_check() + 3; raw_spin_unlock_irqrestore(&rnp->lock, flags); set_need_resched(); /* kick ourselves to get things going. */ @@ -966,12 +952,6 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) } } -static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) -{ - rcu_cpu_stall_suppress = 1; - return NOTIFY_DONE; -} - /** * rcu_cpu_stall_reset - prevent further stall warnings in current grace period * @@ -989,15 +969,6 @@ void rcu_cpu_stall_reset(void) rsp->jiffies_stall = jiffies + ULONG_MAX / 2; } -static struct notifier_block rcu_panic_block = { - .notifier_call = rcu_panic, -}; - -static void __init check_cpu_stall_init(void) -{ - atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); -} - /* * Update CPU-local rcu_data state to record the newly noticed grace period. * This is used both when we started the grace period and when we notice @@ -1071,6 +1042,145 @@ static void init_callback_list(struct rcu_data *rdp) } /* + * Determine the value that ->completed will have at the end of the + * next subsequent grace period. This is used to tag callbacks so that + * a CPU can invoke callbacks in a timely fashion even if that CPU has + * been dyntick-idle for an extended period with callbacks under the + * influence of RCU_FAST_NO_HZ. + * + * The caller must hold rnp->lock with interrupts disabled. + */ +static unsigned long rcu_cbs_completed(struct rcu_state *rsp, + struct rcu_node *rnp) +{ + /* + * If RCU is idle, we just wait for the next grace period. + * But we can only be sure that RCU is idle if we are looking + * at the root rcu_node structure -- otherwise, a new grace + * period might have started, but just not yet gotten around + * to initializing the current non-root rcu_node structure. + */ + if (rcu_get_root(rsp) == rnp && rnp->gpnum == rnp->completed) + return rnp->completed + 1; + + /* + * Otherwise, wait for a possible partial grace period and + * then the subsequent full grace period. + */ + return rnp->completed + 2; +} + +/* + * If there is room, assign a ->completed number to any callbacks on + * this CPU that have not already been assigned. Also accelerate any + * callbacks that were previously assigned a ->completed number that has + * since proven to be too conservative, which can happen if callbacks get + * assigned a ->completed number while RCU is idle, but with reference to + * a non-root rcu_node structure. This function is idempotent, so it does + * not hurt to call it repeatedly. + * + * The caller must hold rnp->lock with interrupts disabled. + */ +static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp) +{ + unsigned long c; + int i; + + /* If the CPU has no callbacks, nothing to do. */ + if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) + return; + + /* + * Starting from the sublist containing the callbacks most + * recently assigned a ->completed number and working down, find the + * first sublist that is not assignable to an upcoming grace period. + * Such a sublist has something in it (first two tests) and has + * a ->completed number assigned that will complete sooner than + * the ->completed number for newly arrived callbacks (last test). + * + * The key point is that any later sublist can be assigned the + * same ->completed number as the newly arrived callbacks, which + * means that the callbacks in any of these later sublist can be + * grouped into a single sublist, whether or not they have already + * been assigned a ->completed number. + */ + c = rcu_cbs_completed(rsp, rnp); + for (i = RCU_NEXT_TAIL - 1; i > RCU_DONE_TAIL; i--) + if (rdp->nxttail[i] != rdp->nxttail[i - 1] && + !ULONG_CMP_GE(rdp->nxtcompleted[i], c)) + break; + + /* + * If there are no sublist for unassigned callbacks, leave. + * At the same time, advance "i" one sublist, so that "i" will + * index into the sublist where all the remaining callbacks should + * be grouped into. + */ + if (++i >= RCU_NEXT_TAIL) + return; + + /* + * Assign all subsequent callbacks' ->completed number to the next + * full grace period and group them all in the sublist initially + * indexed by "i". + */ + for (; i <= RCU_NEXT_TAIL; i++) { + rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxtcompleted[i] = c; + } + + /* Trace depending on how much we were able to accelerate. */ + if (!*rdp->nxttail[RCU_WAIT_TAIL]) + trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccWaitCB"); + else + trace_rcu_grace_period(rsp->name, rdp->gpnum, "AccReadyCB"); +} + +/* + * Move any callbacks whose grace period has completed to the + * RCU_DONE_TAIL sublist, then compact the remaining sublists and + * assign ->completed numbers to any callbacks in the RCU_NEXT_TAIL + * sublist. This function is idempotent, so it does not hurt to + * invoke it repeatedly. As long as it is not invoked -too- often... + * + * The caller must hold rnp->lock with interrupts disabled. + */ +static void rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp) +{ + int i, j; + + /* If the CPU has no callbacks, nothing to do. */ + if (!rdp->nxttail[RCU_NEXT_TAIL] || !*rdp->nxttail[RCU_DONE_TAIL]) + return; + + /* + * Find all callbacks whose ->completed numbers indicate that they + * are ready to invoke, and put them into the RCU_DONE_TAIL sublist. + */ + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { + if (ULONG_CMP_LT(rnp->completed, rdp->nxtcompleted[i])) + break; + rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[i]; + } + /* Clean up any sublist tail pointers that were misordered above. */ + for (j = RCU_WAIT_TAIL; j < i; j++) + rdp->nxttail[j] = rdp->nxttail[RCU_DONE_TAIL]; + + /* Copy down callbacks to fill in empty sublists. */ + for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { + if (rdp->nxttail[j] == rdp->nxttail[RCU_NEXT_TAIL]) + break; + rdp->nxttail[j] = rdp->nxttail[i]; + rdp->nxtcompleted[j] = rdp->nxtcompleted[i]; + } + + /* Classify any remaining callbacks. */ + rcu_accelerate_cbs(rsp, rnp, rdp); +} + +/* * Advance this CPU's callbacks, but only if the current grace period * has ended. This may be called only from the CPU to whom the rdp * belongs. In addition, the corresponding leaf rcu_node structure's @@ -1080,12 +1190,15 @@ static void __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { /* Did another grace period end? */ - if (rdp->completed != rnp->completed) { + if (rdp->completed == rnp->completed) { - /* Advance callbacks. No harm if list empty. */ - rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + /* No, so just accelerate recent callbacks. */ + rcu_accelerate_cbs(rsp, rnp, rdp); + + } else { + + /* Advance callbacks. */ + rcu_advance_cbs(rsp, rnp, rdp); /* Remember that we saw this grace-period completion. */ rdp->completed = rnp->completed; @@ -1392,17 +1505,10 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) /* * Because there is no grace period in progress right now, * any callbacks we have up to this point will be satisfied - * by the next grace period. So promote all callbacks to be - * handled after the end of the next grace period. If the - * CPU is not yet aware of the end of the previous grace period, - * we need to allow for the callback advancement that will - * occur when it does become aware. Deadlock prevents us from - * making it aware at this point: We cannot acquire a leaf - * rcu_node ->lock while holding the root rcu_node ->lock. + * by the next grace period. So this is a good place to + * assign a grace period number to recently posted callbacks. */ - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - if (rdp->completed == rsp->completed) - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rcu_accelerate_cbs(rsp, rnp, rdp); rsp->gp_flags = RCU_GP_FLAG_INIT; raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ @@ -1527,7 +1633,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) * This GP can't end until cpu checks in, so all of our * callbacks can be processed during the next GP. */ - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rcu_accelerate_cbs(rsp, rnp, rdp); rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ } @@ -1779,7 +1885,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) long bl, count, count_lazy; int i; - /* If no callbacks are ready, just return.*/ + /* If no callbacks are ready, just return. */ if (!cpu_has_callbacks_ready_to_invoke(rdp)) { trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), @@ -2008,19 +2114,19 @@ __rcu_process_callbacks(struct rcu_state *rsp) WARN_ON_ONCE(rdp->beenonline == 0); - /* - * Advance callbacks in response to end of earlier grace - * period that some other CPU ended. - */ + /* Handle the end of a grace period that some other CPU ended. */ rcu_process_gp_end(rsp, rdp); /* Update RCU state based on any recent quiescent states. */ rcu_check_quiescent_state(rsp, rdp); /* Does this CPU require a not-yet-started grace period? */ + local_irq_save(flags); if (cpu_needs_another_gp(rsp, rdp)) { - raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); + raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */ rcu_start_gp(rsp, flags); /* releases above lock */ + } else { + local_irq_restore(flags); } /* If there are callbacks ready, invoke them. */ @@ -2719,9 +2825,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); -#ifdef CONFIG_RCU_USER_QS - WARN_ON_ONCE(rdp->dynticks->in_user); -#endif rdp->cpu = cpu; rdp->rsp = rsp; rcu_boot_init_nocb_percpu_data(rdp); @@ -2938,6 +3041,10 @@ static void __init rcu_init_one(struct rcu_state *rsp, BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ + /* Silence gcc 4.8 warning about array index out of range. */ + if (rcu_num_lvls > RCU_NUM_LVLS) + panic("rcu_init_one: rcu_num_lvls overflow"); + /* Initialize the level-tracking arrays. */ for (i = 0; i < rcu_num_lvls; i++) @@ -3074,7 +3181,6 @@ void __init rcu_init(void) cpu_notifier(rcu_cpu_notify, 0); for_each_online_cpu(cpu) rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); - check_cpu_stall_init(); } #include "rcutree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4b69291b093d..c896b5045d9d 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -102,10 +102,6 @@ struct rcu_dynticks { /* idle-period nonlazy_posted snapshot. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ -#ifdef CONFIG_RCU_USER_QS - bool ignore_user_qs; /* Treat userspace as extended QS or not */ - bool in_user; /* Is the CPU in userland from RCU POV? */ -#endif }; /* RCU's kthread states for tracing. */ @@ -282,6 +278,8 @@ struct rcu_data { */ struct rcu_head *nxtlist; struct rcu_head **nxttail[RCU_NEXT_SIZE]; + unsigned long nxtcompleted[RCU_NEXT_SIZE]; + /* grace periods for sublists. */ long qlen_lazy; /* # of lazy queued callbacks */ long qlen; /* # of queued callbacks, incl lazy */ long qlen_last_fqs_check; @@ -343,11 +341,6 @@ struct rcu_data { #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ -#ifdef CONFIG_PROVE_RCU -#define RCU_STALL_DELAY_DELTA (5 * HZ) -#else -#define RCU_STALL_DELAY_DELTA 0 -#endif #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ /* to take at least one */ /* scheduling clock irq */ diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 16502d3a71c8..13b243a323fa 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -17,6 +17,7 @@ * See rt.c in preempt-rt for proper credits and further information */ #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/spinlock.h> diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 98ec49475460..7890b10084a7 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -10,6 +10,7 @@ #include <linux/kthread.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/spinlock.h> #include <linux/timer.h> #include <linux/freezer.h> diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index a242e691c993..1e09308bf2a1 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -13,6 +13,7 @@ #include <linux/spinlock.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/rt.h> #include <linux/timer.h> #include "rtmutex_common.h" diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 26058d0bebba..4a88f1d51563 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4371,7 +4371,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt) struct task_struct *curr = current; struct rq *rq, *p_rq; unsigned long flags; - bool yielded = 0; + int yielded = 0; local_irq_save(flags); rq = this_rq(); @@ -4667,6 +4667,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); + vtime_init_idle(idle); #if defined(CONFIG_SMP) sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); #endif @@ -7508,6 +7509,25 @@ static int sched_rt_global_constraints(void) } #endif /* CONFIG_RT_GROUP_SCHED */ +int sched_rr_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); + ret = proc_dointvec(table, write, buffer, lenp, ppos); + /* make sure that internally we keep jiffies */ + /* also, writing zero resets timeslice to default */ + if (!ret && write) { + sched_rr_timeslice = sched_rr_timeslice <= 0 ? + RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); + } + mutex_unlock(&mutex); + return ret; +} + int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 23aa789c53ee..1095e878a46f 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c @@ -28,6 +28,8 @@ */ #include <linux/gfp.h> +#include <linux/sched.h> +#include <linux/sched/rt.h> #include "cpupri.h" /* Convert between a 140 based task->prio, and our 102 based cpupri */ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 293b202fcf79..9857329ed280 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -3,6 +3,7 @@ #include <linux/tsacct_kern.h> #include <linux/kernel_stat.h> #include <linux/static_key.h> +#include <linux/context_tracking.h> #include "sched.h" @@ -163,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime, task_group_account_field(p, index, (__force u64) cputime); /* Account for user time used */ - acct_update_integrals(p); + acct_account_cputime(p); } /* @@ -213,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, task_group_account_field(p, index, (__force u64) cputime); /* Account for system time used */ - acct_update_integrals(p); + acct_account_cputime(p); } /* @@ -295,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { struct signal_struct *sig = tsk->signal; + cputime_t utime, stime; struct task_struct *t; times->utime = sig->utime; @@ -308,16 +310,15 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) t = tsk; do { - times->utime += t->utime; - times->stime += t->stime; + task_cputime(tsk, &utime, &stime); + times->utime += utime; + times->stime += stime; times->sum_exec_runtime += task_sched_runtime(t); } while_each_thread(tsk, t); out: rcu_read_unlock(); } -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - #ifdef CONFIG_IRQ_TIME_ACCOUNTING /* * Account a tick to a process and cpustat @@ -382,11 +383,12 @@ static void irqtime_account_idle_ticks(int ticks) irqtime_account_process_tick(current, 0, rq); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ -static void irqtime_account_idle_ticks(int ticks) {} -static void irqtime_account_process_tick(struct task_struct *p, int user_tick, +static inline void irqtime_account_idle_ticks(int ticks) {} +static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, struct rq *rq) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Account a single tick of cpu time. * @p: the process that the cpu time gets accounted to @@ -397,6 +399,9 @@ void account_process_tick(struct task_struct *p, int user_tick) cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); struct rq *rq = this_rq(); + if (vtime_accounting_enabled()) + return; + if (sched_clock_irqtime) { irqtime_account_process_tick(p, user_tick, rq); return; @@ -438,8 +443,7 @@ void account_idle_ticks(unsigned long ticks) account_idle_time(jiffies_to_cputime(ticks)); } - -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* * Use precise platform statistics if available: @@ -461,25 +465,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime *st = cputime.stime; } -void vtime_account_system_irqsafe(struct task_struct *tsk) -{ - unsigned long flags; - - local_irq_save(flags); - vtime_account_system(tsk); - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); - #ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_task_switch(struct task_struct *prev) { + if (!vtime_accounting_enabled()) + return; + if (is_idle_task(prev)) vtime_account_idle(prev); else vtime_account_system(prev); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE vtime_account_user(prev); +#endif arch_vtime_task_switch(prev); } #endif @@ -493,27 +492,40 @@ void vtime_task_switch(struct task_struct *prev) * vtime_account(). */ #ifndef __ARCH_HAS_VTIME_ACCOUNT -void vtime_account(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk) { - if (in_interrupt() || !is_idle_task(tsk)) - vtime_account_system(tsk); - else - vtime_account_idle(tsk); + if (!vtime_accounting_enabled()) + return; + + if (!in_interrupt()) { + /* + * If we interrupted user, context_tracking_in_user() + * is 1 because the context tracking don't hook + * on irq entry/exit. This way we know if + * we need to flush user time on kernel entry. + */ + if (context_tracking_in_user()) { + vtime_account_user(tsk); + return; + } + + if (is_idle_task(tsk)) { + vtime_account_idle(tsk); + return; + } + } + vtime_account_system(tsk); } -EXPORT_SYMBOL_GPL(vtime_account); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ -#else - -#ifndef nsecs_to_cputime -# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) -#endif +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ -static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total) { u64 temp = (__force u64) rtime; - temp *= (__force u64) utime; + temp *= (__force u64) stime; if (sizeof(cputime_t) == 4) temp = div_u64(temp, (__force u32) total); @@ -531,10 +543,10 @@ static void cputime_adjust(struct task_cputime *curr, struct cputime *prev, cputime_t *ut, cputime_t *st) { - cputime_t rtime, utime, total; + cputime_t rtime, stime, total; - utime = curr->utime; - total = utime + curr->stime; + stime = curr->stime; + total = stime + curr->utime; /* * Tick based cputime accounting depend on random scheduling @@ -549,17 +561,17 @@ static void cputime_adjust(struct task_cputime *curr, rtime = nsecs_to_cputime(curr->sum_exec_runtime); if (total) - utime = scale_utime(utime, rtime, total); + stime = scale_stime(stime, rtime, total); else - utime = rtime; + stime = rtime; /* * If the tick based count grows faster than the scheduler one, * the result of the scaling may go backward. * Let's enforce monotonicity. */ - prev->utime = max(prev->utime, utime); - prev->stime = max(prev->stime, rtime - prev->utime); + prev->stime = max(prev->stime, stime); + prev->utime = max(prev->utime, rtime - prev->stime); *ut = prev->utime; *st = prev->stime; @@ -568,11 +580,10 @@ static void cputime_adjust(struct task_cputime *curr, void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) { struct task_cputime cputime = { - .utime = p->utime, - .stime = p->stime, .sum_exec_runtime = p->se.sum_exec_runtime, }; + task_cputime(p, &cputime.utime, &cputime.stime); cputime_adjust(&cputime, &p->prev_cputime, ut, st); } @@ -586,4 +597,221 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime thread_group_cputime(p, &cputime); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static unsigned long long vtime_delta(struct task_struct *tsk) +{ + unsigned long long clock; + + clock = sched_clock(); + if (clock < tsk->vtime_snap) + return 0; + + return clock - tsk->vtime_snap; +} + +static cputime_t get_vtime_delta(struct task_struct *tsk) +{ + unsigned long long delta = vtime_delta(tsk); + + WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); + tsk->vtime_snap += delta; + + /* CHECKME: always safe to convert nsecs to cputime? */ + return nsecs_to_cputime(delta); +} + +static void __vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(tsk); + + account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); +} + +void vtime_account_system(struct task_struct *tsk) +{ + if (!vtime_accounting_enabled()) + return; + + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_account_irq_exit(struct task_struct *tsk) +{ + if (!vtime_accounting_enabled()) + return; + + write_seqlock(&tsk->vtime_seqlock); + if (context_tracking_in_user()) + tsk->vtime_snap_whence = VTIME_USER; + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_account_user(struct task_struct *tsk) +{ + cputime_t delta_cpu; + + if (!vtime_accounting_enabled()) + return; + + delta_cpu = get_vtime_delta(tsk); + + write_seqlock(&tsk->vtime_seqlock); + tsk->vtime_snap_whence = VTIME_SYS; + account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_user_enter(struct task_struct *tsk) +{ + if (!vtime_accounting_enabled()) + return; + + write_seqlock(&tsk->vtime_seqlock); + tsk->vtime_snap_whence = VTIME_USER; + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_guest_enter(struct task_struct *tsk) +{ + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + current->flags |= PF_VCPU; + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_guest_exit(struct task_struct *tsk) +{ + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + current->flags &= ~PF_VCPU; + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_account_idle(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(tsk); + + account_idle_time(delta_cpu); +} + +bool vtime_accounting_enabled(void) +{ + return context_tracking_active(); +} + +void arch_vtime_task_switch(struct task_struct *prev) +{ + write_seqlock(&prev->vtime_seqlock); + prev->vtime_snap_whence = VTIME_SLEEPING; + write_sequnlock(&prev->vtime_seqlock); + + write_seqlock(¤t->vtime_seqlock); + current->vtime_snap_whence = VTIME_SYS; + current->vtime_snap = sched_clock(); + write_sequnlock(¤t->vtime_seqlock); +} + +void vtime_init_idle(struct task_struct *t) +{ + unsigned long flags; + + write_seqlock_irqsave(&t->vtime_seqlock, flags); + t->vtime_snap_whence = VTIME_SYS; + t->vtime_snap = sched_clock(); + write_sequnlock_irqrestore(&t->vtime_seqlock, flags); +} + +cputime_t task_gtime(struct task_struct *t) +{ + unsigned int seq; + cputime_t gtime; + + do { + seq = read_seqbegin(&t->vtime_seqlock); + + gtime = t->gtime; + if (t->flags & PF_VCPU) + gtime += vtime_delta(t); + + } while (read_seqretry(&t->vtime_seqlock, seq)); + + return gtime; +} + +/* + * Fetch cputime raw values from fields of task_struct and + * add up the pending nohz execution time since the last + * cputime snapshot. + */ +static void +fetch_task_cputime(struct task_struct *t, + cputime_t *u_dst, cputime_t *s_dst, + cputime_t *u_src, cputime_t *s_src, + cputime_t *udelta, cputime_t *sdelta) +{ + unsigned int seq; + unsigned long long delta; + + do { + *udelta = 0; + *sdelta = 0; + + seq = read_seqbegin(&t->vtime_seqlock); + + if (u_dst) + *u_dst = *u_src; + if (s_dst) + *s_dst = *s_src; + + /* Task is sleeping, nothing to add */ + if (t->vtime_snap_whence == VTIME_SLEEPING || + is_idle_task(t)) + continue; + + delta = vtime_delta(t); + + /* + * Task runs either in user or kernel space, add pending nohz time to + * the right place. + */ + if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { + *udelta = delta; + } else { + if (t->vtime_snap_whence == VTIME_SYS) + *sdelta = delta; + } + } while (read_seqretry(&t->vtime_seqlock, seq)); +} + + +void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) +{ + cputime_t udelta, sdelta; + + fetch_task_cputime(t, utime, stime, &t->utime, + &t->stime, &udelta, &sdelta); + if (utime) + *utime += udelta; + if (stime) + *stime += sdelta; +} + +void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, cputime_t *stimescaled) +{ + cputime_t udelta, sdelta; + + fetch_task_cputime(t, utimescaled, stimescaled, + &t->utimescaled, &t->stimescaled, &udelta, &sdelta); + if (utimescaled) + *utimescaled += cputime_to_scaled(udelta); + if (stimescaled) + *stimescaled += cputime_to_scaled(sdelta); +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 81fa53643409..7a33e5986fc5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1680,9 +1680,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) } /* ensure we never gain time by being placed backwards. */ - vruntime = max_vruntime(se->vruntime, vruntime); - - se->vruntime = vruntime; + se->vruntime = max_vruntime(se->vruntime, vruntime); } static void check_enqueue_throttle(struct cfs_rq *cfs_rq); @@ -3254,25 +3252,18 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) */ static int select_idle_sibling(struct task_struct *p, int target) { - int cpu = smp_processor_id(); - int prev_cpu = task_cpu(p); struct sched_domain *sd; struct sched_group *sg; - int i; + int i = task_cpu(p); - /* - * If the task is going to be woken-up on this cpu and if it is - * already idle, then it is the right target. - */ - if (target == cpu && idle_cpu(cpu)) - return cpu; + if (idle_cpu(target)) + return target; /* - * If the task is going to be woken-up on the cpu where it previously - * ran and if it is currently idle, then it the right target. + * If the prevous cpu is cache affine and idle, don't be stupid. */ - if (target == prev_cpu && idle_cpu(prev_cpu)) - return prev_cpu; + if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) + return i; /* * Otherwise, iterate the domains and find an elegible idle cpu. @@ -3286,7 +3277,7 @@ static int select_idle_sibling(struct task_struct *p, int target) goto next; for_each_cpu(i, sched_group_cpus(sg)) { - if (!idle_cpu(i)) + if (i == target || !idle_cpu(i)) goto next; } @@ -6101,7 +6092,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task * idle runqueue: */ if (rq->cfs.load.weight) - rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); + rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se)); return rr_interval; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4f02b2847357..127a2c4cf4ab 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -7,6 +7,8 @@ #include <linux/slab.h> +int sched_rr_timeslice = RR_TIMESLICE; + static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); struct rt_bandwidth def_rt_bandwidth; @@ -925,8 +927,8 @@ static void update_curr_rt(struct rq *rq) return; delta_exec = rq->clock_task - curr->se.exec_start; - if (unlikely((s64)delta_exec < 0)) - delta_exec = 0; + if (unlikely((s64)delta_exec <= 0)) + return; schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); @@ -1427,8 +1429,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && - (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && - (p->nr_cpus_allowed > 1)) + cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) return 1; return 0; } @@ -1889,8 +1890,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) * we may need to handle the pulling of RT tasks * now. */ - if (p->on_rq && !rq->rt.rt_nr_running) - pull_rt_task(rq); + if (!p->on_rq || rq->rt.rt_nr_running) + return; + + if (pull_rt_task(rq)) + resched_task(rq->curr); } void init_sched_rt_class(void) @@ -1985,7 +1989,11 @@ static void watchdog(struct rq *rq, struct task_struct *p) if (soft != RLIM_INFINITY) { unsigned long next; - p->rt.timeout++; + if (p->rt.watchdog_stamp != jiffies) { + p->rt.timeout++; + p->rt.watchdog_stamp = jiffies; + } + next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); if (p->rt.timeout > next) p->cputime_expires.sched_exp = p->se.sum_exec_runtime; @@ -2010,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) if (--p->rt.time_slice) return; - p->rt.time_slice = RR_TIMESLICE; + p->rt.time_slice = sched_rr_timeslice; /* * Requeue to the end of queue if we (and all of our ancestors) are the @@ -2041,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) * Time slice is 0 for SCHED_FIFO tasks */ if (task->policy == SCHED_RR) - return RR_TIMESLICE; + return sched_rr_timeslice; else return 0; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fc886441436a..cc03cfdf469f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1,5 +1,7 @@ #include <linux/sched.h> +#include <linux/sched/sysctl.h> +#include <linux/sched/rt.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/stop_machine.h> diff --git a/kernel/signal.c b/kernel/signal.c index 3d09cf6cde75..7f82adbad480 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1632,6 +1632,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) unsigned long flags; struct sighand_struct *psig; bool autoreap = false; + cputime_t utime, stime; BUG_ON(sig == -1); @@ -1669,8 +1670,9 @@ bool do_notify_parent(struct task_struct *tsk, int sig) task_uid(tsk)); rcu_read_unlock(); - info.si_utime = cputime_to_clock_t(tsk->utime + tsk->signal->utime); - info.si_stime = cputime_to_clock_t(tsk->stime + tsk->signal->stime); + task_cputime(tsk, &utime, &stime); + info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime); + info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) @@ -1734,6 +1736,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, unsigned long flags; struct task_struct *parent; struct sighand_struct *sighand; + cputime_t utime, stime; if (for_ptracer) { parent = tsk->parent; @@ -1752,8 +1755,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); rcu_read_unlock(); - info.si_utime = cputime_to_clock_t(tsk->utime); - info.si_stime = cputime_to_clock_t(tsk->stime); + task_cputime(tsk, &utime, &stime); + info.si_utime = cputime_to_clock_t(utime); + info.si_stime = cputime_to_clock_t(stime); info.si_code = why; switch (why) { diff --git a/kernel/smpboot.c b/kernel/smpboot.c index d6c5fc054242..d4abac261779 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -183,9 +183,10 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) kfree(td); return PTR_ERR(tsk); } - get_task_struct(tsk); *per_cpu_ptr(ht->store, cpu) = tsk; + if (ht->create) + ht->create(cpu); return 0; } @@ -225,7 +226,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) { struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); - if (tsk) + if (tsk && !ht->selfparking) kthread_park(tsk); } diff --git a/kernel/softirq.c b/kernel/softirq.c index ed567babe789..f5cc25f147a6 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); - vtime_account_irq_enter(current); + account_irq_enter_time(current); __local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET); @@ -272,7 +272,7 @@ restart: lockdep_softirq_exit(); - vtime_account_irq_exit(current); + account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); tsk_restore_flags(current, old_flags, PF_MEMALLOC); } @@ -341,7 +341,7 @@ static inline void invoke_softirq(void) */ void irq_exit(void) { - vtime_account_irq_exit(current); + account_irq_exit_time(current); trace_hardirq_exit(); sub_preempt_count(IRQ_EXIT_OFFSET); if (!in_interrupt() && local_softirq_pending()) diff --git a/kernel/srcu.c b/kernel/srcu.c index 2b859828cdc3..01d5ccb8bfe3 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -282,12 +282,8 @@ static int srcu_readers_active(struct srcu_struct *sp) */ void cleanup_srcu_struct(struct srcu_struct *sp) { - int sum; - - sum = srcu_readers_active(sp); - WARN_ON(sum); /* Leakage unless caller handles error. */ - if (sum != 0) - return; + if (WARN_ON(srcu_readers_active(sp))) + return; /* Leakage unless caller handles error. */ free_percpu(sp->per_cpu_ref); sp->per_cpu_ref = NULL; } @@ -302,9 +298,8 @@ int __srcu_read_lock(struct srcu_struct *sp) { int idx; + idx = ACCESS_ONCE(sp->completed) & 0x1; preempt_disable(); - idx = rcu_dereference_index_check(sp->completed, - rcu_read_lock_sched_held()) & 0x1; ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1; smp_mb(); /* B */ /* Avoid leaking the critical section. */ ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1; @@ -321,10 +316,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { - preempt_disable(); smp_mb(); /* C */ /* Avoid leaking the critical section. */ - ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1; - preempt_enable(); + this_cpu_dec(sp->per_cpu_ref->c[idx]); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -423,6 +416,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) !lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); + might_sleep(); init_completion(&rcu.completion); head->next = NULL; @@ -455,10 +449,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) * synchronize_srcu - wait for prior SRCU read-side critical-section completion * @sp: srcu_struct with which to synchronize. * - * Flip the completed counter, and wait for the old count to drain to zero. - * As with classic RCU, the updater must use some separate means of - * synchronizing concurrent updates. Can block; must be called from - * process context. + * Wait for the count to drain to zero of both indexes. To avoid the + * possible starvation of synchronize_srcu(), it waits for the count of + * the index=((->completed & 1) ^ 1) to drain to zero at first, + * and then flip the completed and wait for the count of the other index. + * + * Can block; must be called from process context. * * Note that it is illegal to call synchronize_srcu() from the corresponding * SRCU read-side critical section; doing so will result in deadlock. @@ -480,12 +476,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); * Wait for an SRCU grace period to elapse, but be more aggressive about * spinning rather than blocking when waiting. * - * Note that it is illegal to call this function while holding any lock - * that is acquired by a CPU-hotplug notifier. It is also illegal to call - * synchronize_srcu_expedited() from the corresponding SRCU read-side - * critical section; doing so will result in deadlock. However, it is - * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct - * from some other srcu_struct's read-side critical section, as long as + * Note that it is also illegal to call synchronize_srcu_expedited() + * from the corresponding SRCU read-side critical section; + * doing so will result in deadlock. However, it is perfectly legal + * to call synchronize_srcu_expedited() on one srcu_struct from some + * other srcu_struct's read-side critical section, as long as * the resulting graph of srcu_structs is acyclic. */ void synchronize_srcu_expedited(struct srcu_struct *sp) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2f194e965715..95d178c62d5a 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -18,7 +18,7 @@ #include <linux/stop_machine.h> #include <linux/interrupt.h> #include <linux/kallsyms.h> - +#include <linux/smpboot.h> #include <linux/atomic.h> /* @@ -37,10 +37,10 @@ struct cpu_stopper { spinlock_t lock; bool enabled; /* is this stopper enabled? */ struct list_head works; /* list of pending works */ - struct task_struct *thread; /* stopper thread */ }; static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); +static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task); static bool stop_machine_initialized = false; static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) @@ -62,16 +62,18 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) } /* queue @work to @stopper. if offline, @work is completed immediately */ -static void cpu_stop_queue_work(struct cpu_stopper *stopper, - struct cpu_stop_work *work) +static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { + struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); + struct task_struct *p = per_cpu(cpu_stopper_task, cpu); + unsigned long flags; spin_lock_irqsave(&stopper->lock, flags); if (stopper->enabled) { list_add_tail(&work->list, &stopper->works); - wake_up_process(stopper->thread); + wake_up_process(p); } else cpu_stop_signal_done(work->done, false); @@ -108,7 +110,7 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; cpu_stop_init_done(&done, 1); - cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work); + cpu_stop_queue_work(cpu, &work); wait_for_completion(&done.completion); return done.executed ? done.ret : -ENOENT; } @@ -130,7 +132,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf) { *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; - cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); + cpu_stop_queue_work(cpu, work_buf); } /* static data for stop_cpus */ @@ -159,8 +161,7 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask, */ preempt_disable(); for_each_cpu(cpu, cpumask) - cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), - &per_cpu(stop_cpus_work, cpu)); + cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu)); preempt_enable(); } @@ -244,20 +245,25 @@ int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) return ret; } -static int cpu_stopper_thread(void *data) +static int cpu_stop_should_run(unsigned int cpu) +{ + struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); + unsigned long flags; + int run; + + spin_lock_irqsave(&stopper->lock, flags); + run = !list_empty(&stopper->works); + spin_unlock_irqrestore(&stopper->lock, flags); + return run; +} + +static void cpu_stopper_thread(unsigned int cpu) { - struct cpu_stopper *stopper = data; + struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); struct cpu_stop_work *work; int ret; repeat: - set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ - - if (kthread_should_stop()) { - __set_current_state(TASK_RUNNING); - return 0; - } - work = NULL; spin_lock_irq(&stopper->lock); if (!list_empty(&stopper->works)) { @@ -273,8 +279,6 @@ repeat: struct cpu_stop_done *done = work->done; char ksym_buf[KSYM_NAME_LEN] __maybe_unused; - __set_current_state(TASK_RUNNING); - /* cpu stop callbacks are not allowed to sleep */ preempt_disable(); @@ -290,88 +294,55 @@ repeat: ksym_buf), arg); cpu_stop_signal_done(done, true); - } else - schedule(); - - goto repeat; + goto repeat; + } } extern void sched_set_stop_task(int cpu, struct task_struct *stop); -/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ -static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static void cpu_stop_create(unsigned int cpu) +{ + sched_set_stop_task(cpu, per_cpu(cpu_stopper_task, cpu)); +} + +static void cpu_stop_park(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); - struct task_struct *p; - - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - BUG_ON(stopper->thread || stopper->enabled || - !list_empty(&stopper->works)); - p = kthread_create_on_node(cpu_stopper_thread, - stopper, - cpu_to_node(cpu), - "migration/%d", cpu); - if (IS_ERR(p)) - return notifier_from_errno(PTR_ERR(p)); - get_task_struct(p); - kthread_bind(p, cpu); - sched_set_stop_task(cpu, p); - stopper->thread = p; - break; - - case CPU_ONLINE: - /* strictly unnecessary, as first user will wake it */ - wake_up_process(stopper->thread); - /* mark enabled */ - spin_lock_irq(&stopper->lock); - stopper->enabled = true; - spin_unlock_irq(&stopper->lock); - break; - -#ifdef CONFIG_HOTPLUG_CPU - case CPU_UP_CANCELED: - case CPU_POST_DEAD: - { - struct cpu_stop_work *work; - - sched_set_stop_task(cpu, NULL); - /* kill the stopper */ - kthread_stop(stopper->thread); - /* drain remaining works */ - spin_lock_irq(&stopper->lock); - list_for_each_entry(work, &stopper->works, list) - cpu_stop_signal_done(work->done, false); - stopper->enabled = false; - spin_unlock_irq(&stopper->lock); - /* release the stopper */ - put_task_struct(stopper->thread); - stopper->thread = NULL; - break; - } -#endif - } + struct cpu_stop_work *work; + unsigned long flags; - return NOTIFY_OK; + /* drain remaining works */ + spin_lock_irqsave(&stopper->lock, flags); + list_for_each_entry(work, &stopper->works, list) + cpu_stop_signal_done(work->done, false); + stopper->enabled = false; + spin_unlock_irqrestore(&stopper->lock, flags); } -/* - * Give it a higher priority so that cpu stopper is available to other - * cpu notifiers. It currently shares the same priority as sched - * migration_notifier. - */ -static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { - .notifier_call = cpu_stop_cpu_callback, - .priority = 10, +static void cpu_stop_unpark(unsigned int cpu) +{ + struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); + + spin_lock_irq(&stopper->lock); + stopper->enabled = true; + spin_unlock_irq(&stopper->lock); +} + +static struct smp_hotplug_thread cpu_stop_threads = { + .store = &cpu_stopper_task, + .thread_should_run = cpu_stop_should_run, + .thread_fn = cpu_stopper_thread, + .thread_comm = "migration/%u", + .create = cpu_stop_create, + .setup = cpu_stop_unpark, + .park = cpu_stop_park, + .unpark = cpu_stop_unpark, + .selfparking = true, }; static int __init cpu_stop_init(void) { - void *bcpu = (void *)(long)smp_processor_id(); unsigned int cpu; - int err; for_each_possible_cpu(cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); @@ -380,15 +351,8 @@ static int __init cpu_stop_init(void) INIT_LIST_HEAD(&stopper->works); } - /* start one for the boot cpu */ - err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, - bcpu); - BUG_ON(err != NOTIFY_OK); - cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); - register_cpu_notifier(&cpu_stop_cpu_notifier); - + BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); stop_machine_initialized = true; - return 0; } early_initcall(cpu_stop_init); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c88878db491e..4fc9be955c71 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -61,6 +61,7 @@ #include <linux/kmod.h> #include <linux/capability.h> #include <linux/binfmts.h> +#include <linux/sched/sysctl.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -403,6 +404,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_rt_handler, }, + { + .procname = "sched_rr_timeslice_ms", + .data = &sched_rr_timeslice, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sched_rr_handler, + }, #ifdef CONFIG_SCHED_AUTOGROUP { .procname = "sched_autogroup_enabled", diff --git a/kernel/time.c b/kernel/time.c index d226c6a3fd28..c2a27dd93142 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -115,6 +115,12 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, } /* + * Indicates if there is an offset between the system clock and the hardware + * clock/persistent clock/rtc. + */ +int persistent_clock_is_local; + +/* * Adjust the time obtained from the CMOS to be UTC time instead of * local time. * @@ -135,6 +141,8 @@ static inline void warp_clock(void) struct timespec adjust; adjust = current_kernel_time(); + if (sys_tz.tz_minuteswest != 0) + persistent_clock_is_local = 1; adjust.tv_sec += sys_tz.tz_minuteswest * 60; do_settimeofday(&adjust); } diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 8601f0db1261..24510d84efd7 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -12,6 +12,11 @@ config CLOCKSOURCE_WATCHDOG config ARCH_CLOCKSOURCE_DATA bool +# Platforms has a persistent clock +config ALWAYS_USE_PERSISTENT_CLOCK + bool + default n + # Timekeeping vsyscall support config GENERIC_TIME_VSYSCALL bool @@ -38,6 +43,10 @@ config GENERIC_CLOCKEVENTS_BUILD default y depends on GENERIC_CLOCKEVENTS +# Architecture can handle broadcast in a driver-agnostic way +config ARCH_HAS_TICK_BROADCAST + bool + # Clockevents broadcasting infrastructure config GENERIC_CLOCKEVENTS_BROADCAST bool diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 24174b4d669b..b10a42bb0165 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -15,6 +15,7 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/rtc.h> #include "tick-internal.h" @@ -483,8 +484,7 @@ out: return leap; } -#ifdef CONFIG_GENERIC_CMOS_UPDATE - +#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) static void sync_cmos_clock(struct work_struct *work); static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); @@ -510,14 +510,26 @@ static void sync_cmos_clock(struct work_struct *work) } getnstimeofday(&now); - if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) - fail = update_persistent_clock(now); + if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) { + struct timespec adjust = now; + + fail = -ENODEV; + if (persistent_clock_is_local) + adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); +#ifdef CONFIG_GENERIC_CMOS_UPDATE + fail = update_persistent_clock(adjust); +#endif +#ifdef CONFIG_RTC_SYSTOHC + if (fail == -ENODEV) + fail = rtc_set_ntp_time(adjust); +#endif + } next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2); if (next.tv_nsec <= 0) next.tv_nsec += NSEC_PER_SEC; - if (!fail) + if (!fail || fail == -ENODEV) next.tv_sec = 659; else next.tv_sec = 0; diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f113755695e2..2fb8cb88df8d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -18,6 +18,7 @@ #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> +#include <linux/smp.h> #include "tick-internal.h" @@ -86,6 +87,22 @@ int tick_is_broadcast_device(struct clock_event_device *dev) return (dev && tick_broadcast_device.evtdev == dev); } +static void err_broadcast(const struct cpumask *mask) +{ + pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); +} + +static void tick_device_setup_broadcast_func(struct clock_event_device *dev) +{ + if (!dev->broadcast) + dev->broadcast = tick_broadcast; + if (!dev->broadcast) { + pr_warn_once("%s depends on broadcast, but no broadcast function available\n", + dev->name); + dev->broadcast = err_broadcast; + } +} + /* * Check, if the device is disfunctional and a place holder, which * needs to be handled by the broadcast device. @@ -105,6 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) */ if (!tick_device_is_functional(dev)) { dev->event_handler = tick_handle_periodic; + tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_get_broadcast_mask()); tick_broadcast_start_periodic(tick_broadcast_device.evtdev); ret = 1; @@ -116,15 +134,33 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) */ if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { int cpu = smp_processor_id(); - cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); tick_broadcast_clear_oneshot(cpu); + } else { + tick_device_setup_broadcast_func(dev); } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); return ret; } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +int tick_receive_broadcast(void) +{ + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + struct clock_event_device *evt = td->evtdev; + + if (!evt) + return -ENODEV; + + if (!evt->event_handler) + return -EINVAL; + + evt->event_handler(evt); + return 0; +} +#endif + /* * Broadcast the event to the cpus, which are set in the mask (mangled). */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d58e552d9fd1..314b9ee07edf 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -20,6 +20,7 @@ #include <linux/profile.h> #include <linux/sched.h> #include <linux/module.h> +#include <linux/irq_work.h> #include <asm/irq_regs.h> @@ -28,7 +29,7 @@ /* * Per cpu nohz control structure */ -static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); +DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); /* * The time, when the last jiffy update happened. Protected by jiffies_lock. @@ -331,8 +332,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, time_delta = timekeeping_max_deferment(); } while (read_seqretry(&jiffies_lock, seq)); - if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || - arch_needs_cpu(cpu)) { + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || + arch_needs_cpu(cpu) || irq_work_needs_cpu()) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; } else { @@ -631,8 +632,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) static void tick_nohz_account_idle_ticks(struct tick_sched *ts) { -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE unsigned long ticks; + + if (vtime_accounting_enabled()) + return; /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cbc6acb0db3f..1e35515a875e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -29,6 +29,9 @@ static struct timekeeper timekeeper; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; +/* Flag for if there is a persistent clock on this platform */ +bool __read_mostly persistent_clock_exist = false; + static inline void tk_normalize_xtime(struct timekeeper *tk) { while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { @@ -264,19 +267,18 @@ static void timekeeping_forward_now(struct timekeeper *tk) } /** - * getnstimeofday - Returns the time of day in a timespec + * __getnstimeofday - Returns the time of day in a timespec. * @ts: pointer to the timespec to be set * - * Returns the time of day in a timespec. + * Updates the time of day in the timespec. + * Returns 0 on success, or -ve when suspended (timespec will be undefined). */ -void getnstimeofday(struct timespec *ts) +int __getnstimeofday(struct timespec *ts) { struct timekeeper *tk = &timekeeper; unsigned long seq; s64 nsecs = 0; - WARN_ON(timekeeping_suspended); - do { seq = read_seqbegin(&tk->lock); @@ -287,6 +289,26 @@ void getnstimeofday(struct timespec *ts) ts->tv_nsec = 0; timespec_add_ns(ts, nsecs); + + /* + * Do not bail out early, in case there were callers still using + * the value, even in the face of the WARN_ON. + */ + if (unlikely(timekeeping_suspended)) + return -EAGAIN; + return 0; +} +EXPORT_SYMBOL(__getnstimeofday); + +/** + * getnstimeofday - Returns the time of day in a timespec. + * @ts: pointer to the timespec to be set + * + * Returns the time of day in a timespec (WARN if suspended). + */ +void getnstimeofday(struct timespec *ts) +{ + WARN_ON(__getnstimeofday(ts)); } EXPORT_SYMBOL(getnstimeofday); @@ -640,12 +662,14 @@ void __init timekeeping_init(void) struct timespec now, boot, tmp; read_persistent_clock(&now); + if (!timespec_valid_strict(&now)) { pr_warn("WARNING: Persistent clock returned invalid value!\n" " Check your CMOS/BIOS settings.\n"); now.tv_sec = 0; now.tv_nsec = 0; - } + } else if (now.tv_sec || now.tv_nsec) + persistent_clock_exist = true; read_boot_clock(&boot); if (!timespec_valid_strict(&boot)) { @@ -718,11 +742,12 @@ void timekeeping_inject_sleeptime(struct timespec *delta) { struct timekeeper *tk = &timekeeper; unsigned long flags; - struct timespec ts; - /* Make sure we don't set the clock twice */ - read_persistent_clock(&ts); - if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) + /* + * Make sure we don't set the clock twice, as timekeeping_resume() + * already did it + */ + if (has_persistent_clock()) return; write_seqlock_irqsave(&tk->lock, flags); diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl index eb51d76e058a..3f42652a6a37 100644 --- a/kernel/timeconst.pl +++ b/kernel/timeconst.pl @@ -369,10 +369,8 @@ if ($hz eq '--can') { die "Usage: $0 HZ\n"; } - @val = @{$canned_values{$hz}}; - if (!defined(@val)) { - @val = compute_values($hz); - } + $cv = $canned_values{$hz}; + @val = defined($cv) ? @$cv : compute_values($hz); output($hz, @val); } exit 0; diff --git a/kernel/timer.c b/kernel/timer.c index 367d00858482..dbf7a78a1ef1 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -39,6 +39,7 @@ #include <linux/kallsyms.h> #include <linux/irq_work.h> #include <linux/sched.h> +#include <linux/sched/sysctl.h> #include <linux/slab.h> #include <asm/uaccess.h> @@ -1351,7 +1352,6 @@ void update_process_times(int user_tick) account_process_tick(p, user_tick); run_local_timers(); rcu_check_callbacks(cpu, user_tick); - printk_tick(); #ifdef CONFIG_IRQ_WORK if (in_irq()) irq_work_run(); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d89335a485f..36567564e221 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -39,6 +39,9 @@ config HAVE_DYNAMIC_FTRACE help See Documentation/trace/ftrace-design.txt +config HAVE_DYNAMIC_FTRACE_WITH_REGS + bool + config HAVE_FTRACE_MCOUNT_RECORD bool help @@ -250,6 +253,16 @@ config FTRACE_SYSCALLS help Basic tracer to catch the syscall entry and exit events. +config TRACER_SNAPSHOT + bool "Create a snapshot trace buffer" + select TRACER_MAX_TRACE + help + Allow tracing users to take snapshot of the current buffer using the + ftrace interface, e.g.: + + echo 1 > /sys/kernel/debug/tracing/snapshot + cat snapshot + config TRACE_BRANCH_PROFILING bool select GENERIC_TRACER @@ -434,6 +447,11 @@ config DYNAMIC_FTRACE were made. If so, it runs stop_machine (stops all CPUS) and modifies the code to jump over the call to ftrace. +config DYNAMIC_FTRACE_WITH_REGS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_REGS + config FUNCTION_PROFILER bool "Kernel function profiler" depends on FUNCTION_TRACER diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c0bd0308741c..71259e2b6b61 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -147,7 +147,7 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) return; local_irq_save(flags); - buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); + buf = this_cpu_ptr(bt->msg_data); va_start(args, fmt); n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); va_end(args); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 41473b4ad7a4..ce8c3d68292f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -111,6 +111,26 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); #define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) #endif +/* + * Traverse the ftrace_global_list, invoking all entries. The reason that we + * can use rcu_dereference_raw() is that elements removed from this list + * are simply leaked, so there is no need to interact with a grace-period + * mechanism. The rcu_dereference_raw() calls are needed to handle + * concurrent insertions into the ftrace_global_list. + * + * Silly Alpha and silly pointer-speculation compiler optimizations! + */ +#define do_for_each_ftrace_op(op, list) \ + op = rcu_dereference_raw(list); \ + do + +/* + * Optimized for just a single item in the list (as that is the normal case). + */ +#define while_for_each_ftrace_op(op) \ + while (likely(op = rcu_dereference_raw((op)->next)) && \ + unlikely((op) != &ftrace_list_end)) + /** * ftrace_nr_registered_ops - return number of ops registered * @@ -132,29 +152,21 @@ int ftrace_nr_registered_ops(void) return cnt; } -/* - * Traverse the ftrace_global_list, invoking all entries. The reason that we - * can use rcu_dereference_raw() is that elements removed from this list - * are simply leaked, so there is no need to interact with a grace-period - * mechanism. The rcu_dereference_raw() calls are needed to handle - * concurrent insertions into the ftrace_global_list. - * - * Silly Alpha and silly pointer-speculation compiler optimizations! - */ static void ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs) { - if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) + int bit; + + bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX); + if (bit < 0) return; - trace_recursion_set(TRACE_GLOBAL_BIT); - op = rcu_dereference_raw(ftrace_global_list); /*see above*/ - while (op != &ftrace_list_end) { + do_for_each_ftrace_op(op, ftrace_global_list) { op->func(ip, parent_ip, op, regs); - op = rcu_dereference_raw(op->next); /*see above*/ - }; - trace_recursion_clear(TRACE_GLOBAL_BIT); + } while_for_each_ftrace_op(op); + + trace_clear_recursion(bit); } static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, @@ -221,10 +233,24 @@ static void update_global_ops(void) * registered callers. */ if (ftrace_global_list == &ftrace_list_end || - ftrace_global_list->next == &ftrace_list_end) + ftrace_global_list->next == &ftrace_list_end) { func = ftrace_global_list->func; - else + /* + * As we are calling the function directly. + * If it does not have recursion protection, + * the function_trace_op needs to be updated + * accordingly. + */ + if (ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) + global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; + else + global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE; + } else { func = ftrace_global_list_func; + /* The list has its own recursion protection. */ + global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE; + } + /* If we filter on pids, update to use the pid function */ if (!list_empty(&ftrace_pids)) { @@ -337,7 +363,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) return -EINVAL; -#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS +#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS /* * If the ftrace_ops specifies SAVE_REGS, then it only can be used * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. @@ -4090,14 +4116,11 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, */ preempt_disable_notrace(); trace_recursion_set(TRACE_CONTROL_BIT); - op = rcu_dereference_raw(ftrace_control_list); - while (op != &ftrace_list_end) { + do_for_each_ftrace_op(op, ftrace_control_list) { if (!ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip)) op->func(ip, parent_ip, op, regs); - - op = rcu_dereference_raw(op->next); - }; + } while_for_each_ftrace_op(op); trace_recursion_clear(TRACE_CONTROL_BIT); preempt_enable_notrace(); } @@ -4112,27 +4135,26 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) { struct ftrace_ops *op; + int bit; if (function_trace_stop) return; - if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) + bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + if (bit < 0) return; - trace_recursion_set(TRACE_INTERNAL_BIT); /* * Some of the ops may be dynamically allocated, * they must be freed after a synchronize_sched(). */ preempt_disable_notrace(); - op = rcu_dereference_raw(ftrace_ops_list); - while (op != &ftrace_list_end) { + do_for_each_ftrace_op(op, ftrace_ops_list) { if (ftrace_ops_test(op, ip)) op->func(ip, parent_ip, op, regs); - op = rcu_dereference_raw(op->next); - }; + } while_for_each_ftrace_op(op); preempt_enable_notrace(); - trace_recursion_clear(TRACE_INTERNAL_BIT); + trace_clear_recursion(bit); } /* @@ -4143,8 +4165,8 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * Archs are to support both the regs and ftrace_ops at the same time. * If they support ftrace_ops, it is assumed they support regs. * If call backs want to use regs, they must either check for regs - * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS. - * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved. + * being NULL, or CONFIG_DYNAMIC_FTRACE_WITH_REGS. + * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. * An architecture can pass partial regs with ftrace_ops and still * set the ARCH_SUPPORT_FTARCE_OPS. */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ce8514feedcd..7244acde77b0 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3,8 +3,10 @@ * * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ +#include <linux/ftrace_event.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> +#include <linux/trace_seq.h> #include <linux/spinlock.h> #include <linux/debugfs.h> #include <linux/uaccess.h> @@ -21,7 +23,6 @@ #include <linux/fs.h> #include <asm/local.h> -#include "trace.h" static void update_pages_handler(struct work_struct *work); @@ -2432,41 +2433,76 @@ rb_reserve_next_event(struct ring_buffer *buffer, #ifdef CONFIG_TRACING -#define TRACE_RECURSIVE_DEPTH 16 +/* + * The lock and unlock are done within a preempt disable section. + * The current_context per_cpu variable can only be modified + * by the current task between lock and unlock. But it can + * be modified more than once via an interrupt. To pass this + * information from the lock to the unlock without having to + * access the 'in_interrupt()' functions again (which do show + * a bit of overhead in something as critical as function tracing, + * we use a bitmask trick. + * + * bit 0 = NMI context + * bit 1 = IRQ context + * bit 2 = SoftIRQ context + * bit 3 = normal context. + * + * This works because this is the order of contexts that can + * preempt other contexts. A SoftIRQ never preempts an IRQ + * context. + * + * When the context is determined, the corresponding bit is + * checked and set (if it was set, then a recursion of that context + * happened). + * + * On unlock, we need to clear this bit. To do so, just subtract + * 1 from the current_context and AND it to itself. + * + * (binary) + * 101 - 1 = 100 + * 101 & 100 = 100 (clearing bit zero) + * + * 1010 - 1 = 1001 + * 1010 & 1001 = 1000 (clearing bit 1) + * + * The least significant bit can be cleared this way, and it + * just so happens that it is the same bit corresponding to + * the current context. + */ +static DEFINE_PER_CPU(unsigned int, current_context); -/* Keep this code out of the fast path cache */ -static noinline void trace_recursive_fail(void) +static __always_inline int trace_recursive_lock(void) { - /* Disable all tracing before we do anything else */ - tracing_off_permanent(); - - printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" - "HC[%lu]:SC[%lu]:NMI[%lu]\n", - trace_recursion_buffer(), - hardirq_count() >> HARDIRQ_SHIFT, - softirq_count() >> SOFTIRQ_SHIFT, - in_nmi()); - - WARN_ON_ONCE(1); -} + unsigned int val = this_cpu_read(current_context); + int bit; -static inline int trace_recursive_lock(void) -{ - trace_recursion_inc(); + if (in_interrupt()) { + if (in_nmi()) + bit = 0; + else if (in_irq()) + bit = 1; + else + bit = 2; + } else + bit = 3; - if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) - return 0; + if (unlikely(val & (1 << bit))) + return 1; - trace_recursive_fail(); + val |= (1 << bit); + this_cpu_write(current_context, val); - return -1; + return 0; } -static inline void trace_recursive_unlock(void) +static __always_inline void trace_recursive_unlock(void) { - WARN_ON_ONCE(!trace_recursion_buffer()); + unsigned int val = this_cpu_read(current_context); - trace_recursion_dec(); + val--; + val &= this_cpu_read(current_context); + this_cpu_write(current_context, val); } #else @@ -3067,6 +3103,24 @@ ring_buffer_dropped_events_cpu(struct ring_buffer *buffer, int cpu) EXPORT_SYMBOL_GPL(ring_buffer_dropped_events_cpu); /** + * ring_buffer_read_events_cpu - get the number of events successfully read + * @buffer: The ring buffer + * @cpu: The per CPU buffer to get the number of events read + */ +unsigned long +ring_buffer_read_events_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + return cpu_buffer->read; +} +EXPORT_SYMBOL_GPL(ring_buffer_read_events_cpu); + +/** * ring_buffer_entries - get the number of entries in a buffer * @buffer: The ring buffer * @@ -3425,7 +3479,7 @@ static void rb_advance_iter(struct ring_buffer_iter *iter) /* check for end of page padding */ if ((iter->head >= rb_page_size(iter->head_page)) && (iter->head_page != cpu_buffer->commit_page)) - rb_advance_iter(iter); + rb_inc_iter(iter); } static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 3c13e46d7d24..c2e2c2310374 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -39,6 +39,7 @@ #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> +#include <linux/sched/rt.h> #include "trace.h" #include "trace_output.h" @@ -249,7 +250,7 @@ static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; static struct tracer *trace_types __read_mostly; /* current_trace points to the tracer that is currently active */ -static struct tracer *current_trace __read_mostly; +static struct tracer *current_trace __read_mostly = &nop_trace; /* * trace_types_lock is used to protect the trace_types list. @@ -709,10 +710,13 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - if (!current_trace->use_max_tr) { - WARN_ON_ONCE(1); + + if (!current_trace->allocated_snapshot) { + /* Only the nop tracer should hit this when disabling */ + WARN_ON_ONCE(current_trace != &nop_trace); return; } + arch_spin_lock(&ftrace_max_lock); tr->buffer = max_tr.buffer; @@ -739,10 +743,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - if (!current_trace->use_max_tr) { - WARN_ON_ONCE(1); + if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) return; - } arch_spin_lock(&ftrace_max_lock); @@ -862,10 +864,13 @@ int register_tracer(struct tracer *type) current_trace = type; - /* If we expanded the buffers, make sure the max is expanded too */ - if (ring_buffer_expanded && type->use_max_tr) - ring_buffer_resize(max_tr.buffer, trace_buf_size, - RING_BUFFER_ALL_CPUS); + if (type->use_max_tr) { + /* If we expanded the buffers, make sure the max is expanded too */ + if (ring_buffer_expanded) + ring_buffer_resize(max_tr.buffer, trace_buf_size, + RING_BUFFER_ALL_CPUS); + type->allocated_snapshot = true; + } /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); @@ -881,10 +886,14 @@ int register_tracer(struct tracer *type) /* Only reset on passing, to avoid touching corrupted buffers */ tracing_reset_online_cpus(tr); - /* Shrink the max buffer again */ - if (ring_buffer_expanded && type->use_max_tr) - ring_buffer_resize(max_tr.buffer, 1, - RING_BUFFER_ALL_CPUS); + if (type->use_max_tr) { + type->allocated_snapshot = false; + + /* Shrink the max buffer again */ + if (ring_buffer_expanded) + ring_buffer_resize(max_tr.buffer, 1, + RING_BUFFER_ALL_CPUS); + } printk(KERN_CONT "PASSED\n"); } @@ -922,6 +931,9 @@ void tracing_reset(struct trace_array *tr, int cpu) { struct ring_buffer *buffer = tr->buffer; + if (!buffer) + return; + ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ @@ -936,6 +948,9 @@ void tracing_reset_online_cpus(struct trace_array *tr) struct ring_buffer *buffer = tr->buffer; int cpu; + if (!buffer) + return; + ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ @@ -1167,7 +1182,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; - entry->padding = 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | @@ -1335,7 +1349,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, */ preempt_disable_notrace(); - use_stack = ++__get_cpu_var(ftrace_stack_reserve); + use_stack = __this_cpu_inc_return(ftrace_stack_reserve); /* * We don't need any atomic variables, just a barrier. * If an interrupt comes in, we don't care, because it would @@ -1389,7 +1403,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, out: /* Again, don't let gcc optimize things here */ barrier(); - __get_cpu_var(ftrace_stack_reserve)--; + __this_cpu_dec(ftrace_stack_reserve); preempt_enable_notrace(); } @@ -1517,7 +1531,6 @@ static struct trace_buffer_struct *trace_percpu_nmi_buffer; static char *get_trace_buf(void) { struct trace_buffer_struct *percpu_buffer; - struct trace_buffer_struct *buffer; /* * If we have allocated per cpu buffers, then we do not @@ -1535,9 +1548,7 @@ static char *get_trace_buf(void) if (!percpu_buffer) return NULL; - buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); - - return buffer->buffer; + return this_cpu_ptr(&percpu_buffer->buffer[0]); } static int alloc_percpu_trace_buffer(void) @@ -1942,21 +1953,27 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) static void *s_start(struct seq_file *m, loff_t *pos) { struct trace_iterator *iter = m->private; - static struct tracer *old_tracer; int cpu_file = iter->cpu_file; void *p = NULL; loff_t l = 0; int cpu; - /* copy the tracer to avoid using a global lock all around */ + /* + * copy the tracer to avoid using a global lock all around. + * iter->trace is a copy of current_trace, the pointer to the + * name may be used instead of a strcmp(), as iter->trace->name + * will point to the same string as current_trace->name. + */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(current_trace && iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); - atomic_inc(&trace_record_cmdline_disabled); + if (iter->snapshot && iter->trace->use_max_tr) + return ERR_PTR(-EBUSY); + + if (!iter->snapshot) + atomic_inc(&trace_record_cmdline_disabled); if (*pos != iter->pos) { iter->ent = NULL; @@ -1995,7 +2012,11 @@ static void s_stop(struct seq_file *m, void *p) { struct trace_iterator *iter = m->private; - atomic_dec(&trace_record_cmdline_disabled); + if (iter->snapshot && iter->trace->use_max_tr) + return; + + if (!iter->snapshot) + atomic_dec(&trace_record_cmdline_disabled); trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } @@ -2080,8 +2101,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter) unsigned long total; const char *name = "preemption"; - if (type) - name = type->name; + name = type->name; get_total_entries(tr, &total, &entries); @@ -2430,7 +2450,7 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file) +__tracing_open(struct inode *inode, struct file *file, bool snapshot) { long cpu_file = (long) inode->i_private; struct trace_iterator *iter; @@ -2457,16 +2477,16 @@ __tracing_open(struct inode *inode, struct file *file) if (!iter->trace) goto fail; - if (current_trace) - *iter->trace = *current_trace; + *iter->trace = *current_trace; if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; - if (current_trace && current_trace->print_max) + if (current_trace->print_max || snapshot) iter->tr = &max_tr; else iter->tr = &global_trace; + iter->snapshot = snapshot; iter->pos = -1; mutex_init(&iter->mutex); iter->cpu_file = cpu_file; @@ -2483,8 +2503,9 @@ __tracing_open(struct inode *inode, struct file *file) if (trace_clocks[trace_clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; - /* stop the trace while dumping */ - tracing_stop(); + /* stop the trace while dumping if we are not opening "snapshot" */ + if (!iter->snapshot) + tracing_stop(); if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { for_each_tracing_cpu(cpu) { @@ -2547,8 +2568,9 @@ static int tracing_release(struct inode *inode, struct file *file) if (iter->trace && iter->trace->close) iter->trace->close(iter); - /* reenable tracing if it was previously enabled */ - tracing_start(); + if (!iter->snapshot) + /* reenable tracing if it was previously enabled */ + tracing_start(); mutex_unlock(&trace_types_lock); mutex_destroy(&iter->mutex); @@ -2576,7 +2598,7 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file); + iter = __tracing_open(inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) @@ -3014,10 +3036,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, int r; mutex_lock(&trace_types_lock); - if (current_trace) - r = sprintf(buf, "%s\n", current_trace->name); - else - r = sprintf(buf, "\n"); + r = sprintf(buf, "%s\n", current_trace->name); mutex_unlock(&trace_types_lock); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); @@ -3183,6 +3202,7 @@ static int tracing_set_tracer(const char *buf) static struct trace_option_dentry *topts; struct trace_array *tr = &global_trace; struct tracer *t; + bool had_max_tr; int ret = 0; mutex_lock(&trace_types_lock); @@ -3207,9 +3227,21 @@ static int tracing_set_tracer(const char *buf) goto out; trace_branch_disable(); - if (current_trace && current_trace->reset) + if (current_trace->reset) current_trace->reset(tr); - if (current_trace && current_trace->use_max_tr) { + + had_max_tr = current_trace->allocated_snapshot; + current_trace = &nop_trace; + + if (had_max_tr && !t->use_max_tr) { + /* + * We need to make sure that the update_max_tr sees that + * current_trace changed to nop_trace to keep it from + * swapping the buffers after we resize it. + * The update_max_tr is called from interrupts disabled + * so a synchronized_sched() is sufficient. + */ + synchronize_sched(); /* * We don't free the ring buffer. instead, resize it because * The max_tr ring buffer has some state (e.g. ring->clock) and @@ -3217,18 +3249,19 @@ static int tracing_set_tracer(const char *buf) */ ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); set_buffer_entries(&max_tr, 1); + tracing_reset_online_cpus(&max_tr); + current_trace->allocated_snapshot = false; } destroy_trace_option_files(topts); - current_trace = &nop_trace; - topts = create_trace_option_files(t); - if (t->use_max_tr) { + if (t->use_max_tr && !had_max_tr) { /* we need to make per cpu buffer sizes equivalent */ ret = resize_buffer_duplicate_size(&max_tr, &global_trace, RING_BUFFER_ALL_CPUS); if (ret < 0) goto out; + t->allocated_snapshot = true; } if (t->init) { @@ -3336,8 +3369,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) ret = -ENOMEM; goto fail; } - if (current_trace) - *iter->trace = *current_trace; + *iter->trace = *current_trace; if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { ret = -ENOMEM; @@ -3477,7 +3509,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; - static struct tracer *old_tracer; ssize_t sret; /* return any leftover data */ @@ -3489,10 +3520,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); /* @@ -3648,7 +3677,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, }; - static struct tracer *old_tracer; ssize_t ret; size_t rem; unsigned int i; @@ -3658,10 +3686,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, /* copy the tracer to avoid using a global lock all around */ mutex_lock(&trace_types_lock); - if (unlikely(old_tracer != current_trace && current_trace)) { - old_tracer = current_trace; + if (unlikely(iter->trace->name != current_trace->name)) *iter->trace = *current_trace; - } mutex_unlock(&trace_types_lock); mutex_lock(&iter->mutex); @@ -4037,8 +4063,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, * Reset the buffer so that it doesn't have incomparable timestamps. */ tracing_reset_online_cpus(&global_trace); - if (max_tr.buffer) - tracing_reset_online_cpus(&max_tr); + tracing_reset_online_cpus(&max_tr); mutex_unlock(&trace_types_lock); @@ -4054,6 +4079,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file) return single_open(file, tracing_clock_show, NULL); } +#ifdef CONFIG_TRACER_SNAPSHOT +static int tracing_snapshot_open(struct inode *inode, struct file *file) +{ + struct trace_iterator *iter; + int ret = 0; + + if (file->f_mode & FMODE_READ) { + iter = __tracing_open(inode, file, true); + if (IS_ERR(iter)) + ret = PTR_ERR(iter); + } + return ret; +} + +static ssize_t +tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + unsigned long val; + int ret; + + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + mutex_lock(&trace_types_lock); + + if (current_trace->use_max_tr) { + ret = -EBUSY; + goto out; + } + + switch (val) { + case 0: + if (current_trace->allocated_snapshot) { + /* free spare buffer */ + ring_buffer_resize(max_tr.buffer, 1, + RING_BUFFER_ALL_CPUS); + set_buffer_entries(&max_tr, 1); + tracing_reset_online_cpus(&max_tr); + current_trace->allocated_snapshot = false; + } + break; + case 1: + if (!current_trace->allocated_snapshot) { + /* allocate spare buffer */ + ret = resize_buffer_duplicate_size(&max_tr, + &global_trace, RING_BUFFER_ALL_CPUS); + if (ret < 0) + break; + current_trace->allocated_snapshot = true; + } + + local_irq_disable(); + /* Now, we're going to swap */ + update_max_tr(&global_trace, current, smp_processor_id()); + local_irq_enable(); + break; + default: + if (current_trace->allocated_snapshot) + tracing_reset_online_cpus(&max_tr); + else + ret = -EINVAL; + break; + } + + if (ret >= 0) { + *ppos += cnt; + ret = cnt; + } +out: + mutex_unlock(&trace_types_lock); + return ret; +} +#endif /* CONFIG_TRACER_SNAPSHOT */ + + static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -4110,6 +4216,16 @@ static const struct file_operations trace_clock_fops = { .write = tracing_clock_write, }; +#ifdef CONFIG_TRACER_SNAPSHOT +static const struct file_operations snapshot_fops = { + .open = tracing_snapshot_open, + .read = seq_read, + .write = tracing_snapshot_write, + .llseek = tracing_seek, + .release = tracing_release, +}; +#endif /* CONFIG_TRACER_SNAPSHOT */ + struct ftrace_buffer_info { struct trace_array *tr; void *spare; @@ -4414,6 +4530,9 @@ tracing_stats_read(struct file *filp, char __user *ubuf, cnt = ring_buffer_dropped_events_cpu(tr->buffer, cpu); trace_seq_printf(s, "dropped events: %ld\n", cnt); + cnt = ring_buffer_read_events_cpu(tr->buffer, cpu); + trace_seq_printf(s, "read events: %ld\n", cnt); + count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); kfree(s); @@ -4490,7 +4609,7 @@ struct dentry *tracing_init_dentry(void) static struct dentry *d_percpu; -struct dentry *tracing_dentry_percpu(void) +static struct dentry *tracing_dentry_percpu(void) { static int once; struct dentry *d_tracer; @@ -4906,6 +5025,11 @@ static __init int tracer_init_debugfs(void) &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif +#ifdef CONFIG_TRACER_SNAPSHOT + trace_create_file("snapshot", 0644, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops); +#endif + create_trace_options_dir(); for_each_tracing_cpu(cpu) @@ -5014,6 +5138,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) if (disable_tracing) ftrace_kill(); + /* Simulate the iterator */ trace_init_global_iter(&iter); for_each_tracing_cpu(cpu) { @@ -5025,10 +5150,6 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) /* don't look at user memory in panic mode */ trace_flags &= ~TRACE_ITER_SYM_USEROBJ; - /* Simulate the iterator */ - iter.tr = &global_trace; - iter.trace = current_trace; - switch (oops_dump_mode) { case DUMP_ALL: iter.cpu_file = TRACE_PIPE_ALL_CPU; @@ -5173,7 +5294,7 @@ __init static int tracer_alloc_buffers(void) init_irq_work(&trace_work_wakeup, trace_wake_up); register_tracer(&nop_trace); - current_trace = &nop_trace; + /* All seems OK, enable tracing */ tracing_disabled = 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c75d7988902c..57d7e5397d56 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -287,20 +287,62 @@ struct tracer { struct tracer_flags *flags; bool print_max; bool use_max_tr; + bool allocated_snapshot; }; /* Only current can touch trace_recursion */ -#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) -#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) -/* Ring buffer has the 10 LSB bits to count */ -#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) - -/* for function tracing recursion */ -#define TRACE_INTERNAL_BIT (1<<11) -#define TRACE_GLOBAL_BIT (1<<12) -#define TRACE_CONTROL_BIT (1<<13) +/* + * For function tracing recursion: + * The order of these bits are important. + * + * When function tracing occurs, the following steps are made: + * If arch does not support a ftrace feature: + * call internal function (uses INTERNAL bits) which calls... + * If callback is registered to the "global" list, the list + * function is called and recursion checks the GLOBAL bits. + * then this function calls... + * The function callback, which can use the FTRACE bits to + * check for recursion. + * + * Now if the arch does not suppport a feature, and it calls + * the global list function which calls the ftrace callback + * all three of these steps will do a recursion protection. + * There's no reason to do one if the previous caller already + * did. The recursion that we are protecting against will + * go through the same steps again. + * + * To prevent the multiple recursion checks, if a recursion + * bit is set that is higher than the MAX bit of the current + * check, then we know that the check was made by the previous + * caller, and we can skip the current check. + */ +enum { + TRACE_BUFFER_BIT, + TRACE_BUFFER_NMI_BIT, + TRACE_BUFFER_IRQ_BIT, + TRACE_BUFFER_SIRQ_BIT, + + /* Start of function recursion bits */ + TRACE_FTRACE_BIT, + TRACE_FTRACE_NMI_BIT, + TRACE_FTRACE_IRQ_BIT, + TRACE_FTRACE_SIRQ_BIT, + + /* GLOBAL_BITs must be greater than FTRACE_BITs */ + TRACE_GLOBAL_BIT, + TRACE_GLOBAL_NMI_BIT, + TRACE_GLOBAL_IRQ_BIT, + TRACE_GLOBAL_SIRQ_BIT, + + /* INTERNAL_BITs must be greater than GLOBAL_BITs */ + TRACE_INTERNAL_BIT, + TRACE_INTERNAL_NMI_BIT, + TRACE_INTERNAL_IRQ_BIT, + TRACE_INTERNAL_SIRQ_BIT, + + TRACE_CONTROL_BIT, /* * Abuse of the trace_recursion. @@ -309,11 +351,77 @@ struct tracer { * was called in irq context but we have irq tracing off. Since this * can only be modified by current, we can reuse trace_recursion. */ -#define TRACE_IRQ_BIT (1<<13) + TRACE_IRQ_BIT, +}; + +#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) +#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) +#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) + +#define TRACE_CONTEXT_BITS 4 + +#define TRACE_FTRACE_START TRACE_FTRACE_BIT +#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT +#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_LIST_START TRACE_INTERNAL_BIT +#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_CONTEXT_MASK TRACE_LIST_MAX + +static __always_inline int trace_get_context_bit(void) +{ + int bit; -#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) -#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) -#define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) + if (in_interrupt()) { + if (in_nmi()) + bit = 0; + + else if (in_irq()) + bit = 1; + else + bit = 2; + } else + bit = 3; + + return bit; +} + +static __always_inline int trace_test_and_set_recursion(int start, int max) +{ + unsigned int val = current->trace_recursion; + int bit; + + /* A previous recursion check was made */ + if ((val & TRACE_CONTEXT_MASK) > max) + return 0; + + bit = trace_get_context_bit() + start; + if (unlikely(val & (1 << bit))) + return -1; + + val |= 1 << bit; + current->trace_recursion = val; + barrier(); + + return bit; +} + +static __always_inline void trace_clear_recursion(int bit) +{ + unsigned int val = current->trace_recursion; + + if (!bit) + return; + + bit = 1 << bit; + val &= ~bit; + + barrier(); + current->trace_recursion = val; +} #define TRACE_PIPE_ALL_CPU -1 diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 394783531cbb..aa8f5f48dae6 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -21,8 +21,6 @@ #include <linux/ktime.h> #include <linux/trace_clock.h> -#include "trace.h" - /* * trace_clock_local(): the simplest and least coherent tracing clock. * @@ -44,6 +42,7 @@ u64 notrace trace_clock_local(void) return clock; } +EXPORT_SYMBOL_GPL(trace_clock_local); /* * trace_clock(): 'between' trace clock. Not completely serialized, @@ -86,7 +85,7 @@ u64 notrace trace_clock_global(void) local_irq_save(flags); this_cpu = raw_smp_processor_id(); - now = cpu_clock(this_cpu); + now = sched_clock_cpu(this_cpu); /* * If in an NMI context then dont risk lockups and return the * cpu_clock() time: diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 880073d0b946..57e9b284250c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -116,7 +116,6 @@ static int trace_define_common_fields(void) __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); - __common_field(int, padding); return ret; } diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8e3ad8082ab7..601152523326 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -47,34 +47,6 @@ static void function_trace_start(struct trace_array *tr) tracing_reset_online_cpus(tr); } -static void -function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) -{ - struct trace_array *tr = func_trace; - struct trace_array_cpu *data; - unsigned long flags; - long disabled; - int cpu; - int pc; - - if (unlikely(!ftrace_function_enabled)) - return; - - pc = preempt_count(); - preempt_disable_notrace(); - local_save_flags(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); - - if (likely(disabled == 1)) - trace_function(tr, ip, parent_ip, flags, pc); - - atomic_dec(&data->disabled); - preempt_enable_notrace(); -} - /* Our option */ enum { TRACE_FUNC_OPT_STACK = 0x1, @@ -85,34 +57,34 @@ static struct tracer_flags func_flags; static void function_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) - { struct trace_array *tr = func_trace; struct trace_array_cpu *data; unsigned long flags; - long disabled; + int bit; int cpu; int pc; if (unlikely(!ftrace_function_enabled)) return; - /* - * Need to use raw, since this must be called before the - * recursive protection is performed. - */ - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disabled = atomic_inc_return(&data->disabled); + pc = preempt_count(); + preempt_disable_notrace(); - if (likely(disabled == 1)) { - pc = preempt_count(); + bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); + if (bit < 0) + goto out; + + cpu = smp_processor_id(); + data = tr->data[cpu]; + if (!atomic_read(&data->disabled)) { + local_save_flags(flags); trace_function(tr, ip, parent_ip, flags, pc); } + trace_clear_recursion(bit); - atomic_dec(&data->disabled); - local_irq_restore(flags); + out: + preempt_enable_notrace(); } static void @@ -185,11 +157,6 @@ static void tracing_start_function_trace(void) { ftrace_function_enabled = 0; - if (trace_flags & TRACE_ITER_PREEMPTONLY) - trace_ops.func = function_trace_call_preempt_only; - else - trace_ops.func = function_trace_call; - if (func_flags.val & TRACE_FUNC_OPT_STACK) register_ftrace_function(&trace_stack_ops); else diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 4edb4b74eb7e..39ada66389cc 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -47,6 +47,8 @@ struct fgraph_data { #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 #define TRACE_GRAPH_PRINT_IRQS 0x40 +static unsigned int max_depth; + static struct tracer_opt trace_opts[] = { /* Display overruns? (for self-debug purpose) */ { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) }, @@ -189,10 +191,16 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) ftrace_pop_return_trace(&trace, &ret, frame_pointer); trace.rettime = trace_clock_local(); - ftrace_graph_return(&trace); barrier(); current->curr_ret_stack--; + /* + * The trace should run after decrementing the ret counter + * in case an interrupt were to come in. We don't want to + * lose the interrupt if max_depth is set. + */ + ftrace_graph_return(&trace); + if (unlikely(!ret)) { ftrace_graph_stop(); WARN_ON(1); @@ -250,8 +258,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) return 0; /* trace it when it is-nested-in or is a function enabled. */ - if (!(trace->depth || ftrace_graph_addr(trace->func)) || - ftrace_graph_ignore_irqs()) + if ((!(trace->depth || ftrace_graph_addr(trace->func)) || + ftrace_graph_ignore_irqs()) || + (max_depth && trace->depth >= max_depth)) return 0; local_irq_save(flags); @@ -1457,6 +1466,59 @@ static struct tracer graph_trace __read_mostly = { #endif }; + +static ssize_t +graph_depth_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + unsigned long val; + int ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + max_depth = val; + + *ppos += cnt; + + return cnt; +} + +static ssize_t +graph_depth_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + char buf[15]; /* More than enough to hold UINT_MAX + "\n"*/ + int n; + + n = sprintf(buf, "%d\n", max_depth); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, n); +} + +static const struct file_operations graph_depth_fops = { + .open = tracing_open_generic, + .write = graph_depth_write, + .read = graph_depth_read, + .llseek = generic_file_llseek, +}; + +static __init int init_graph_debugfs(void) +{ + struct dentry *d_tracer; + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; + + trace_create_file("max_graph_depth", 0644, d_tracer, + NULL, &graph_depth_fops); + + return 0; +} +fs_initcall(init_graph_debugfs); + static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 933708677814..5c7e09d10d74 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -66,7 +66,6 @@ #define TP_FLAG_TRACE 1 #define TP_FLAG_PROFILE 2 #define TP_FLAG_REGISTERED 4 -#define TP_FLAG_UPROBE 8 /* data_rloc: data relative location, compatible with u32 */ diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 9fe45fcefca0..75aa97fbe1a1 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,8 +15,8 @@ #include <linux/kallsyms.h> #include <linux/uaccess.h> #include <linux/ftrace.h> +#include <linux/sched/rt.h> #include <trace/events/sched.h> - #include "trace.h" static struct trace_array *wakeup_trace; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 47623169a815..51c819c12c29 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -415,7 +415,8 @@ static void trace_selftest_test_recursion_func(unsigned long ip, * The ftrace infrastructure should provide the recursion * protection. If not, this will crash the kernel! */ - trace_selftest_recursion_cnt++; + if (trace_selftest_recursion_cnt++ > 10) + return; DYN_FTRACE_TEST_NAME(); } @@ -452,7 +453,6 @@ trace_selftest_function_recursion(void) char *func_name; int len; int ret; - int cnt; /* The previous test PASSED */ pr_cont("PASSED\n"); @@ -510,19 +510,10 @@ trace_selftest_function_recursion(void) unregister_ftrace_function(&test_recsafe_probe); - /* - * If arch supports all ftrace features, and no other task - * was on the list, we should be fine. - */ - if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC) - cnt = 2; /* Should have recursed */ - else - cnt = 1; - ret = -1; - if (trace_selftest_recursion_cnt != cnt) { - pr_cont("*callback not called expected %d times (%d)* ", - cnt, trace_selftest_recursion_cnt); + if (trace_selftest_recursion_cnt != 2) { + pr_cont("*callback not called expected 2 times (%d)* ", + trace_selftest_recursion_cnt); goto out; } @@ -568,7 +559,7 @@ trace_selftest_function_regs(void) int ret; int supported = 0; -#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS supported = 1; #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 7609dd6714c2..5329e13e74a1 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -77,7 +77,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -enum print_line_t +static enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags, struct trace_event *event) { @@ -130,7 +130,7 @@ end: return TRACE_TYPE_HANDLED; } -enum print_line_t +static enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags, struct trace_event *event) { @@ -270,7 +270,7 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call) return ret; } -void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) +static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) { struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; @@ -305,7 +305,7 @@ void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id) trace_current_buffer_unlock_commit(buffer, event, 0, 0); } -void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) +static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) { struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; @@ -337,7 +337,7 @@ void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret) trace_current_buffer_unlock_commit(buffer, event, 0, 0); } -int reg_event_syscall_enter(struct ftrace_event_call *call) +static int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; @@ -356,7 +356,7 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) return ret; } -void unreg_event_syscall_enter(struct ftrace_event_call *call) +static void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; @@ -371,7 +371,7 @@ void unreg_event_syscall_enter(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -int reg_event_syscall_exit(struct ftrace_event_call *call) +static int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; @@ -390,7 +390,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) return ret; } -void unreg_event_syscall_exit(struct ftrace_event_call *call) +static void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; @@ -459,7 +459,7 @@ unsigned long __init __weak arch_syscall_addr(int nr) return (unsigned long)sys_call_table[nr]; } -int __init init_ftrace_syscalls(void) +static int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; unsigned long addr; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c86e6d4f67fb..8dad2a92dee9 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -28,20 +28,21 @@ #define UPROBE_EVENT_SYSTEM "uprobes" +struct trace_uprobe_filter { + rwlock_t rwlock; + int nr_systemwide; + struct list_head perf_events; +}; + /* * uprobe event core functions */ -struct trace_uprobe; -struct uprobe_trace_consumer { - struct uprobe_consumer cons; - struct trace_uprobe *tu; -}; - struct trace_uprobe { struct list_head list; struct ftrace_event_class class; struct ftrace_event_call call; - struct uprobe_trace_consumer *consumer; + struct trace_uprobe_filter filter; + struct uprobe_consumer consumer; struct inode *inode; char *filename; unsigned long offset; @@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list); static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); +static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) +{ + rwlock_init(&filter->rwlock); + filter->nr_systemwide = 0; + INIT_LIST_HEAD(&filter->perf_events); +} + +static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter) +{ + return !filter->nr_systemwide && list_empty(&filter->perf_events); +} + /* * Allocate new trace_uprobe and initialize it (including uprobes). */ @@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs) goto error; INIT_LIST_HEAD(&tu->list); + tu->consumer.handler = uprobe_dispatcher; + init_trace_uprobe_filter(&tu->filter); return tu; error: @@ -253,12 +268,18 @@ static int create_trace_uprobe(int argc, char **argv) if (ret) goto fail_address_parse; + inode = igrab(path.dentry->d_inode); + path_put(&path); + + if (!inode || !S_ISREG(inode->i_mode)) { + ret = -EINVAL; + goto fail_address_parse; + } + ret = kstrtoul(arg, 0, &offset); if (ret) goto fail_address_parse; - inode = igrab(path.dentry->d_inode); - argc -= 2; argv += 2; @@ -356,7 +377,7 @@ fail_address_parse: if (inode) iput(inode); - pr_info("Failed to parse address.\n"); + pr_info("Failed to parse address or file.\n"); return ret; } @@ -465,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = { }; /* uprobe handler */ -static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) +static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; @@ -475,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) unsigned long irq_flags; struct ftrace_event_call *call = &tu->call; - tu->nhit++; - local_save_flags(irq_flags); pc = preempt_count(); @@ -485,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) event = trace_current_buffer_lock_reserve(&buffer, call->event.type, size, irq_flags, pc); if (!event) - return; + return 0; entry = ring_buffer_event_data(event); - entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); + entry->ip = instruction_pointer(task_pt_regs(current)); data = (u8 *)&entry[1]; for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); if (!filter_current_check_discard(buffer, call, entry, event)) trace_buffer_unlock_commit(buffer, event, irq_flags, pc); + + return 0; } /* Event entry printers */ @@ -533,42 +554,43 @@ partial: return TRACE_TYPE_PARTIAL_LINE; } -static int probe_event_enable(struct trace_uprobe *tu, int flag) +static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu) { - struct uprobe_trace_consumer *utc; - int ret = 0; + return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE); +} - if (!tu->inode || tu->consumer) - return -EINTR; +typedef bool (*filter_func_t)(struct uprobe_consumer *self, + enum uprobe_filter_ctx ctx, + struct mm_struct *mm); - utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); - if (!utc) +static int +probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter) +{ + int ret = 0; + + if (is_trace_uprobe_enabled(tu)) return -EINTR; - utc->cons.handler = uprobe_dispatcher; - utc->cons.filter = NULL; - ret = uprobe_register(tu->inode, tu->offset, &utc->cons); - if (ret) { - kfree(utc); - return ret; - } + WARN_ON(!uprobe_filter_is_empty(&tu->filter)); tu->flags |= flag; - utc->tu = tu; - tu->consumer = utc; + tu->consumer.filter = filter; + ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); + if (ret) + tu->flags &= ~flag; - return 0; + return ret; } static void probe_event_disable(struct trace_uprobe *tu, int flag) { - if (!tu->inode || !tu->consumer) + if (!is_trace_uprobe_enabled(tu)) return; - uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); + WARN_ON(!uprobe_filter_is_empty(&tu->filter)); + + uprobe_unregister(tu->inode, tu->offset, &tu->consumer); tu->flags &= ~flag; - kfree(tu->consumer); - tu->consumer = NULL; } static int uprobe_event_define_fields(struct ftrace_event_call *event_call) @@ -642,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu) } #ifdef CONFIG_PERF_EVENTS +static bool +__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) +{ + struct perf_event *event; + + if (filter->nr_systemwide) + return true; + + list_for_each_entry(event, &filter->perf_events, hw.tp_list) { + if (event->hw.tp_target->mm == mm) + return true; + } + + return false; +} + +static inline bool +uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event) +{ + return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); +} + +static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) +{ + bool done; + + write_lock(&tu->filter.rwlock); + if (event->hw.tp_target) { + /* + * event->parent != NULL means copy_process(), we can avoid + * uprobe_apply(). current->mm must be probed and we can rely + * on dup_mmap() which preserves the already installed bp's. + * + * attr.enable_on_exec means that exec/mmap will install the + * breakpoints we need. + */ + done = tu->filter.nr_systemwide || + event->parent || event->attr.enable_on_exec || + uprobe_filter_event(tu, event); + list_add(&event->hw.tp_list, &tu->filter.perf_events); + } else { + done = tu->filter.nr_systemwide; + tu->filter.nr_systemwide++; + } + write_unlock(&tu->filter.rwlock); + + if (!done) + uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); + + return 0; +} + +static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) +{ + bool done; + + write_lock(&tu->filter.rwlock); + if (event->hw.tp_target) { + list_del(&event->hw.tp_list); + done = tu->filter.nr_systemwide || + (event->hw.tp_target->flags & PF_EXITING) || + uprobe_filter_event(tu, event); + } else { + tu->filter.nr_systemwide--; + done = tu->filter.nr_systemwide; + } + write_unlock(&tu->filter.rwlock); + + if (!done) + uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); + + return 0; +} + +static bool uprobe_perf_filter(struct uprobe_consumer *uc, + enum uprobe_filter_ctx ctx, struct mm_struct *mm) +{ + struct trace_uprobe *tu; + int ret; + + tu = container_of(uc, struct trace_uprobe, consumer); + read_lock(&tu->filter.rwlock); + ret = __uprobe_perf_filter(&tu->filter, mm); + read_unlock(&tu->filter.rwlock); + + return ret; +} + /* uprobe profile handler */ -static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) +static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) { struct ftrace_event_call *call = &tu->call; struct uprobe_trace_entry_head *entry; @@ -652,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) int size, __size, i; int rctx; + if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) + return UPROBE_HANDLER_REMOVE; + __size = sizeof(*entry) + tu->size; size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) - return; + return 0; preempt_disable(); @@ -664,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) if (!entry) goto out; - entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); + entry->ip = instruction_pointer(task_pt_regs(current)); data = (u8 *)&entry[1]; for (i = 0; i < tu->nr_args; i++) call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); @@ -674,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) out: preempt_enable(); + return 0; } #endif /* CONFIG_PERF_EVENTS */ @@ -684,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, switch (type) { case TRACE_REG_REGISTER: - return probe_event_enable(tu, TP_FLAG_TRACE); + return probe_event_enable(tu, TP_FLAG_TRACE, NULL); case TRACE_REG_UNREGISTER: probe_event_disable(tu, TP_FLAG_TRACE); @@ -692,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return probe_event_enable(tu, TP_FLAG_PROFILE); + return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter); case TRACE_REG_PERF_UNREGISTER: probe_event_disable(tu, TP_FLAG_PROFILE); return 0; + + case TRACE_REG_PERF_OPEN: + return uprobe_perf_open(tu, data); + + case TRACE_REG_PERF_CLOSE: + return uprobe_perf_close(tu, data); + #endif default: return 0; @@ -706,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) { - struct uprobe_trace_consumer *utc; struct trace_uprobe *tu; + int ret = 0; - utc = container_of(con, struct uprobe_trace_consumer, cons); - tu = utc->tu; - if (!tu || tu->consumer != utc) - return 0; + tu = container_of(con, struct trace_uprobe, consumer); + tu->nhit++; if (tu->flags & TP_FLAG_TRACE) - uprobe_trace_func(tu, regs); + ret |= uprobe_trace_func(tu, regs); #ifdef CONFIG_PERF_EVENTS if (tu->flags & TP_FLAG_PROFILE) - uprobe_perf_func(tu, regs); + ret |= uprobe_perf_func(tu, regs); #endif - return 0; + return ret; } static struct trace_event_functions uprobe_funcs = { diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 625df0b44690..a1dd9a1b1327 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -32,6 +32,7 @@ void bacct_add_tsk(struct user_namespace *user_ns, { const struct cred *tcred; struct timespec uptime, ts; + cputime_t utime, stime, utimescaled, stimescaled; u64 ac_etime; BUILD_BUG_ON(TS_COMM_LEN < TASK_COMM_LEN); @@ -65,10 +66,15 @@ void bacct_add_tsk(struct user_namespace *user_ns, stats->ac_ppid = pid_alive(tsk) ? task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0; rcu_read_unlock(); - stats->ac_utime = cputime_to_usecs(tsk->utime); - stats->ac_stime = cputime_to_usecs(tsk->stime); - stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); - stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); + + task_cputime(tsk, &utime, &stime); + stats->ac_utime = cputime_to_usecs(utime); + stats->ac_stime = cputime_to_usecs(stime); + + task_cputime_scaled(tsk, &utimescaled, &stimescaled); + stats->ac_utimescaled = cputime_to_usecs(utimescaled); + stats->ac_stimescaled = cputime_to_usecs(stimescaled); + stats->ac_minflt = tsk->min_flt; stats->ac_majflt = tsk->maj_flt; @@ -115,11 +121,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p) #undef KB #undef MB -/** - * acct_update_integrals - update mm integral fields in task_struct - * @tsk: task_struct for accounting - */ -void acct_update_integrals(struct task_struct *tsk) +static void __acct_update_integrals(struct task_struct *tsk, + cputime_t utime, cputime_t stime) { if (likely(tsk->mm)) { cputime_t time, dtime; @@ -128,7 +131,7 @@ void acct_update_integrals(struct task_struct *tsk) u64 delta; local_irq_save(flags); - time = tsk->stime + tsk->utime; + time = stime + utime; dtime = time - tsk->acct_timexpd; jiffies_to_timeval(cputime_to_jiffies(dtime), &value); delta = value.tv_sec; @@ -145,6 +148,27 @@ void acct_update_integrals(struct task_struct *tsk) } /** + * acct_update_integrals - update mm integral fields in task_struct + * @tsk: task_struct for accounting + */ +void acct_update_integrals(struct task_struct *tsk) +{ + cputime_t utime, stime; + + task_cputime(tsk, &utime, &stime); + __acct_update_integrals(tsk, utime, stime); +} + +/** + * acct_account_cputime - update mm integral after cputime update + * @tsk: task_struct for accounting + */ +void acct_account_cputime(struct task_struct *tsk) +{ + __acct_update_integrals(tsk, tsk->utime, tsk->stime); +} + +/** * acct_clear_integrals - clear the mm integral fields in task_struct * @tsk: task_struct whose accounting fields are cleared */ diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 75a2ab3d0b02..27689422aa92 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/sysctl.h> #include <linux/smpboot.h> +#include <linux/sched/rt.h> #include <asm/irq_regs.h> #include <linux/kvm_para.h> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 67604e599384..a1714c897e3f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -605,61 +605,6 @@ config PROVE_LOCKING For more details, see Documentation/lockdep-design.txt. -config PROVE_RCU - bool "RCU debugging: prove RCU correctness" - depends on PROVE_LOCKING - default n - help - This feature enables lockdep extensions that check for correct - use of RCU APIs. This is currently under development. Say Y - if you want to debug RCU usage or help work on the PROVE_RCU - feature. - - Say N if you are unsure. - -config PROVE_RCU_REPEATEDLY - bool "RCU debugging: don't disable PROVE_RCU on first splat" - depends on PROVE_RCU - default n - help - By itself, PROVE_RCU will disable checking upon issuing the - first warning (or "splat"). This feature prevents such - disabling, allowing multiple RCU-lockdep warnings to be printed - on a single reboot. - - Say Y to allow multiple RCU-lockdep warnings per boot. - - Say N if you are unsure. - -config PROVE_RCU_DELAY - bool "RCU debugging: preemptible RCU race provocation" - depends on DEBUG_KERNEL && PREEMPT_RCU - default n - help - There is a class of races that involve an unlikely preemption - of __rcu_read_unlock() just after ->rcu_read_lock_nesting has - been set to INT_MIN. This feature inserts a delay at that - point to increase the probability of these races. - - Say Y to increase probability of preemption of __rcu_read_unlock(). - - Say N if you are unsure. - -config SPARSE_RCU_POINTER - bool "RCU debugging: sparse-based checks for pointer usage" - default n - help - This feature enables the __rcu sparse annotation for - RCU-protected pointers. This annotation will cause sparse - to flag any non-RCU used of annotated pointers. This can be - helpful when debugging RCU usage. Please note that this feature - is not intended to enforce code cleanliness; it is instead merely - a debugging aid. - - Say Y to make sparse flag questionable use of RCU-protected pointers - - Say N if you are unsure. - config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -937,6 +882,63 @@ config BOOT_PRINTK_DELAY BOOT_PRINTK_DELAY also may cause LOCKUP_DETECTOR to detect what it believes to be lockup conditions. +menu "RCU Debugging" + +config PROVE_RCU + bool "RCU debugging: prove RCU correctness" + depends on PROVE_LOCKING + default n + help + This feature enables lockdep extensions that check for correct + use of RCU APIs. This is currently under development. Say Y + if you want to debug RCU usage or help work on the PROVE_RCU + feature. + + Say N if you are unsure. + +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say Y to allow multiple RCU-lockdep warnings per boot. + + Say N if you are unsure. + +config PROVE_RCU_DELAY + bool "RCU debugging: preemptible RCU race provocation" + depends on DEBUG_KERNEL && PREEMPT_RCU + default n + help + There is a class of races that involve an unlikely preemption + of __rcu_read_unlock() just after ->rcu_read_lock_nesting has + been set to INT_MIN. This feature inserts a delay at that + point to increase the probability of these races. + + Say Y to increase probability of preemption of __rcu_read_unlock(). + + Say N if you are unsure. + +config SPARSE_RCU_POINTER + bool "RCU debugging: sparse-based checks for pointer usage" + default n + help + This feature enables the __rcu sparse annotation for + RCU-protected pointers. This annotation will cause sparse + to flag any non-RCU used of annotated pointers. This can be + helpful when debugging RCU usage. Please note that this feature + is not intended to enforce code cleanliness; it is instead merely + a debugging aid. + + Say Y to make sparse flag questionable use of RCU-protected pointers + + Say N if you are unsure. + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL @@ -970,7 +972,7 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" - depends on TREE_RCU || TREE_PREEMPT_RCU + depends on RCU_STALL_COMMON range 3 300 default 21 help @@ -1008,6 +1010,7 @@ config RCU_CPU_STALL_INFO config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL + select TRACE_CLOCK help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. @@ -1015,6 +1018,8 @@ config RCU_TRACE Say Y here if you want to enable RCU tracing Say N if you are unsure. +endmenu # "RCU Debugging" + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 09255ec8159c..fbb60b103e64 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3030,7 +3030,9 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, if (memcg) { s->memcg_params->memcg = memcg; s->memcg_params->root_cache = root_cache; - } + } else + s->memcg_params->is_root_cache = true; + return 0; } diff --git a/mm/mlock.c b/mm/mlock.c index f0b9ce572fc7..c9bd528b01d2 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -517,11 +517,11 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) static int do_mlockall(int flags) { struct vm_area_struct * vma, * prev = NULL; - unsigned int def_flags = 0; if (flags & MCL_FUTURE) - def_flags = VM_LOCKED; - current->mm->def_flags = def_flags; + current->mm->def_flags |= VM_LOCKED; + else + current->mm->def_flags &= ~VM_LOCKED; if (flags == MCL_FUTURE) goto out; diff --git a/mm/mmap.c b/mm/mmap.c index d1e4124f3d0e..09da0b264982 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -32,6 +32,7 @@ #include <linux/khugepaged.h> #include <linux/uprobes.h> #include <linux/rbtree_augmented.h> +#include <linux/sched/sysctl.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> diff --git a/mm/mremap.c b/mm/mremap.c index e1031e1f6a61..f9766f460299 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -19,6 +19,7 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/mmu_notifier.h> +#include <linux/sched/sysctl.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> diff --git a/mm/nommu.c b/mm/nommu.c index 79c3cac87afa..b20db4e22263 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -29,6 +29,7 @@ #include <linux/security.h> #include <linux/syscalls.h> #include <linux/audit.h> +#include <linux/sched/sysctl.h> #include <asm/uaccess.h> #include <asm/tlb.h> diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0713bfbf0954..66a0024becd9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -35,6 +35,7 @@ #include <linux/buffer_head.h> /* __set_page_dirty_buffers */ #include <linux/pagevec.h> #include <linux/timer.h> +#include <linux/sched/rt.h> #include <trace/events/writeback.h> /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index df2022ff0c8a..d1107adf174a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -58,6 +58,7 @@ #include <linux/prefetch.h> #include <linux/migrate.h> #include <linux/page-debug-flags.h> +#include <linux/sched/rt.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -773,6 +774,10 @@ void __init init_cma_reserved_pageblock(struct page *page) set_pageblock_migratetype(page, MIGRATE_CMA); __free_pages(page, pageblock_order); totalram_pages += pageblock_nr_pages; +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page)) + totalhigh_pages += pageblock_nr_pages; +#endif } #endif @@ -4416,10 +4421,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat, * round what is now in bits to nearest long in bits, then return it in * bytes. */ -static unsigned long __init usemap_size(unsigned long zonesize) +static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize) { unsigned long usemapsize; + zonesize += zone_start_pfn & (pageblock_nr_pages-1); usemapsize = roundup(zonesize, pageblock_nr_pages); usemapsize = usemapsize >> pageblock_order; usemapsize *= NR_PAGEBLOCK_BITS; @@ -4429,17 +4435,19 @@ static unsigned long __init usemap_size(unsigned long zonesize) } static void __init setup_usemap(struct pglist_data *pgdat, - struct zone *zone, unsigned long zonesize) + struct zone *zone, + unsigned long zone_start_pfn, + unsigned long zonesize) { - unsigned long usemapsize = usemap_size(zonesize); + unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize); zone->pageblock_flags = NULL; if (usemapsize) zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, usemapsize); } #else -static inline void setup_usemap(struct pglist_data *pgdat, - struct zone *zone, unsigned long zonesize) {} +static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone, + unsigned long zone_start_pfn, unsigned long zonesize) {} #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE @@ -4590,7 +4598,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, continue; set_pageblock_order(); - setup_usemap(pgdat, zone, size); + setup_usemap(pgdat, zone, zone_start_pfn, size); ret = init_currently_empty_zone(zone, zone_start_pfn, size, MEMMAP_EARLY); BUG_ON(ret); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 183f97a86bb2..553921511e4e 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -440,7 +440,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, /* this is an hash collision with the temporary selected node. Choose * the one with the lowest address */ - if ((tmp_max == max) && + if ((tmp_max == max) && max_orig_node && (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) goto out; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 7f884e3fb955..8660ea3be705 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -16,6 +16,7 @@ #include <linux/etherdevice.h> #include <linux/llc.h> #include <linux/slab.h> +#include <linux/pkt_sched.h> #include <net/net_namespace.h> #include <net/llc.h> #include <net/llc_pdu.h> @@ -40,6 +41,7 @@ static void br_send_bpdu(struct net_bridge_port *p, skb->dev = p->dev; skb->protocol = htons(ETH_P_802_2); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, LLC_RESERVE); memcpy(__skb_put(skb, length), data, length); diff --git a/net/core/datagram.c b/net/core/datagram.c index 0337e2b76862..368f9c3f9dc6 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -187,7 +187,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, skb_queue_walk(queue, skb) { *peeked = skb->peeked; if (flags & MSG_PEEK) { - if (*off >= skb->len) { + if (*off >= skb->len && skb->len) { *off -= skb->len; continue; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9547a273b9e9..ded146b217f1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -928,24 +928,25 @@ static void parp_redo(struct sk_buff *skb) static int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct arphdr *arp; + const struct arphdr *arp; + + if (dev->flags & IFF_NOARP || + skb->pkt_type == PACKET_OTHERHOST || + skb->pkt_type == PACKET_LOOPBACK) + goto freeskb; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out_of_mem; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull(skb, arp_hdr_len(dev))) goto freeskb; arp = arp_hdr(skb); - if (arp->ar_hln != dev->addr_len || - dev->flags & IFF_NOARP || - skb->pkt_type == PACKET_OTHERHOST || - skb->pkt_type == PACKET_LOOPBACK || - arp->ar_pln != 4) + if (arp->ar_hln != dev->addr_len || arp->ar_pln != 4) goto freeskb; - skb = skb_share_check(skb, GFP_ATOMIC); - if (skb == NULL) - goto out_of_mem; - memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 7302b0b7b642..83acc1405a18 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> +#include <net/ipv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_NPT.h> @@ -18,11 +19,20 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; __wsum src_sum = 0, dst_sum = 0; + struct in6_addr pfx; unsigned int i; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; + /* Ensure that LSB of prefix is zero */ + ipv6_addr_prefix(&pfx, &npt->src_pfx.in6, npt->src_pfx_len); + if (!ipv6_addr_equal(&pfx, &npt->src_pfx.in6)) + return -EINVAL; + ipv6_addr_prefix(&pfx, &npt->dst_pfx.in6, npt->dst_pfx_len); + if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) + return -EINVAL; + for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { src_sum = csum_add(src_sum, (__force __wsum)npt->src_pfx.in6.s6_addr16[i]); @@ -30,7 +40,7 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]); } - npt->adjustment = (__force __sum16) csum_sub(src_sum, dst_sum); + npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; } @@ -51,7 +61,7 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, idx = i / 32; addr->s6_addr32[idx] &= mask; - addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx]; + addr->s6_addr32[idx] |= ~mask & npt->dst_pfx.in6.s6_addr32[idx]; } if (pfx_len <= 48) @@ -66,8 +76,8 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, return false; } - sum = (__force __sum16) csum_add((__force __wsum)addr->s6_addr16[idx], - npt->adjustment); + sum = ~csum_fold(csum_add(csum_unfold((__force __sum16)addr->s6_addr16[idx]), + csum_unfold(npt->adjustment))); if (sum == CSUM_MANGLED_0) sum = 0; *(__force __sum16 *)&addr->s6_addr16[idx] = sum; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 516fbc96feff..0479c64aa83c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2004,7 +2004,8 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - memcpy(sdata->vif.bss_conf.mcast_rate, rate, sizeof(rate)); + memcpy(sdata->vif.bss_conf.mcast_rate, rate, + sizeof(int) * IEEE80211_NUM_BANDS); return 0; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a3552929a21d..5107248af7fb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3400,6 +3400,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = 0; +out: while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, IEEE80211_CHAN_DISABLED)) { if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { @@ -3408,14 +3409,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, goto out; } - ret = chandef_downgrade(chandef); + ret |= chandef_downgrade(chandef); } if (chandef->width != vht_chandef.width) sdata_info(sdata, - "local regulatory prevented using AP HT/VHT configuration, downgraded\n"); + "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); -out: WARN_ON_ONCE(!cfg80211_chandef_valid(chandef)); return ret; } @@ -3529,8 +3529,11 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, */ ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); - while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) + while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { ifmgd->flags |= chandef_downgrade(&chandef); + ret = ieee80211_vif_use_channel(sdata, &chandef, + IEEE80211_CHANCTX_SHARED); + } return ret; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 746048b13ef3..ae8ec6f27688 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -61,14 +61,27 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 1; } +static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, + unsigned int sctphoff) +{ + __u32 crc32; + struct sk_buff *iter; + + crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff); + skb_walk_frags(skb, iter) + crc32 = sctp_update_cksum((u8 *) iter->data, + skb_headlen(iter), crc32); + sctph->checksum = sctp_end_cksum(crc32); + + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + static int sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; - struct sk_buff *iter; - __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -92,13 +105,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, sctph = (void *) skb_network_header(skb) + sctphoff; sctph->source = cp->vport; - /* Calculate the checksum */ - crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); - skb_walk_frags(skb, iter) - crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter), - crc32); - crc32 = sctp_end_cksum(crc32); - sctph->checksum = crc32; + sctp_nat_csum(skb, sctph, sctphoff); return 1; } @@ -109,8 +116,6 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; - struct sk_buff *iter; - __be32 crc32; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -134,13 +139,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, sctph = (void *) skb_network_header(skb) + sctphoff; sctph->dest = cp->dport; - /* Calculate the checksum */ - crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); - skb_walk_frags(skb, iter) - crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter), - crc32); - crc32 = sctp_end_cksum(crc32); - sctph->checksum = crc32; + sctp_nat_csum(skb, sctph, sctphoff); return 1; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index effa10c9e4e3..44fd10c539ac 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1795,6 +1795,8 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) GFP_KERNEL); if (!tinfo->buf) goto outtinfo; + } else { + tinfo->buf = NULL; } tinfo->id = id; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 51561eafcb72..79e8ed4ac7ce 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1135,9 +1135,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg, memset(&opt, 0, sizeof(opt)); opt.rate.rate = cl->rate.rate_bps >> 3; - opt.buffer = cl->buffer; + opt.buffer = PSCHED_NS2TICKS(cl->buffer); opt.ceil.rate = cl->ceil.rate_bps >> 3; - opt.cbuffer = cl->cbuffer; + opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); opt.quantum = cl->quantum; opt.prio = cl->prio; opt.level = cl->level; diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 7521d944c0fb..cf4852814e0c 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -3,8 +3,8 @@ # menuconfig IP_SCTP - tristate "The SCTP Protocol (EXPERIMENTAL)" - depends on INET && EXPERIMENTAL + tristate "The SCTP Protocol" + depends on INET depends on IPV6 || IPV6=n select CRYPTO select CRYPTO_HMAC diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f3f0f4dc31dd..391a245d5203 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -326,9 +326,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, */ rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid && laddr->state != SCTP_ADDR_SRC) + if (!laddr->valid) continue; - if ((laddr->a.sa.sa_family == AF_INET6) && + if ((laddr->state == SCTP_ADDR_SRC) && + (laddr->a.sa.sa_family == AF_INET6) && (scope <= sctp_scope(&laddr->a))) { bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); if (!baddr || (matchlen < bmatchlen)) { diff --git a/samples/Kconfig b/samples/Kconfig index 7b6792a18c05..6181c2cc9ca0 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -5,12 +5,6 @@ menuconfig SAMPLES if SAMPLES -config SAMPLE_TRACEPOINTS - tristate "Build tracepoints examples -- loadable modules only" - depends on TRACEPOINTS && m - help - This build tracepoints example modules. - config SAMPLE_TRACE_EVENTS tristate "Build trace_events examples -- loadable modules only" depends on EVENT_TRACING && m diff --git a/samples/Makefile b/samples/Makefile index 5ef08bba96ce..1a60c62e2045 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,4 +1,4 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \ +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ diff --git a/samples/tracepoints/Makefile b/samples/tracepoints/Makefile deleted file mode 100644 index 36479ad9ae14..000000000000 --- a/samples/tracepoints/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# builds the tracepoint example kernel modules; -# then to use one (as root): insmod <module_name.ko> - -obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-sample.o -obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample.o -obj-$(CONFIG_SAMPLE_TRACEPOINTS) += tracepoint-probe-sample2.o diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h deleted file mode 100644 index 4d46be965961..000000000000 --- a/samples/tracepoints/tp-samples-trace.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _TP_SAMPLES_TRACE_H -#define _TP_SAMPLES_TRACE_H - -#include <linux/proc_fs.h> /* for struct inode and struct file */ -#include <linux/tracepoint.h> - -DECLARE_TRACE(subsys_event, - TP_PROTO(struct inode *inode, struct file *file), - TP_ARGS(inode, file)); -DECLARE_TRACE_NOARGS(subsys_eventb); -#endif diff --git a/samples/tracepoints/tracepoint-probe-sample.c b/samples/tracepoints/tracepoint-probe-sample.c deleted file mode 100644 index 744c0b9652a7..000000000000 --- a/samples/tracepoints/tracepoint-probe-sample.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * tracepoint-probe-sample.c - * - * sample tracepoint probes. - */ - -#include <linux/module.h> -#include <linux/file.h> -#include <linux/dcache.h> -#include "tp-samples-trace.h" - -/* - * Here the caller only guarantees locking for struct file and struct inode. - * Locking must therefore be done in the probe to use the dentry. - */ -static void probe_subsys_event(void *ignore, - struct inode *inode, struct file *file) -{ - path_get(&file->f_path); - dget(file->f_path.dentry); - printk(KERN_INFO "Event is encountered with filename %s\n", - file->f_path.dentry->d_name.name); - dput(file->f_path.dentry); - path_put(&file->f_path); -} - -static void probe_subsys_eventb(void *ignore) -{ - printk(KERN_INFO "Event B is encountered\n"); -} - -static int __init tp_sample_trace_init(void) -{ - int ret; - - ret = register_trace_subsys_event(probe_subsys_event, NULL); - WARN_ON(ret); - ret = register_trace_subsys_eventb(probe_subsys_eventb, NULL); - WARN_ON(ret); - - return 0; -} - -module_init(tp_sample_trace_init); - -static void __exit tp_sample_trace_exit(void) -{ - unregister_trace_subsys_eventb(probe_subsys_eventb, NULL); - unregister_trace_subsys_event(probe_subsys_event, NULL); - tracepoint_synchronize_unregister(); -} - -module_exit(tp_sample_trace_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Tracepoint Probes Samples"); diff --git a/samples/tracepoints/tracepoint-probe-sample2.c b/samples/tracepoints/tracepoint-probe-sample2.c deleted file mode 100644 index 9fcf990e5d4b..000000000000 --- a/samples/tracepoints/tracepoint-probe-sample2.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * tracepoint-probe-sample2.c - * - * 2nd sample tracepoint probes. - */ - -#include <linux/module.h> -#include <linux/fs.h> -#include "tp-samples-trace.h" - -/* - * Here the caller only guarantees locking for struct file and struct inode. - * Locking must therefore be done in the probe to use the dentry. - */ -static void probe_subsys_event(void *ignore, - struct inode *inode, struct file *file) -{ - printk(KERN_INFO "Event is encountered with inode number %lu\n", - inode->i_ino); -} - -static int __init tp_sample_trace_init(void) -{ - int ret; - - ret = register_trace_subsys_event(probe_subsys_event, NULL); - WARN_ON(ret); - - return 0; -} - -module_init(tp_sample_trace_init); - -static void __exit tp_sample_trace_exit(void) -{ - unregister_trace_subsys_event(probe_subsys_event, NULL); - tracepoint_synchronize_unregister(); -} - -module_exit(tp_sample_trace_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Tracepoint Probes Samples"); diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c deleted file mode 100644 index f4d89e008c32..000000000000 --- a/samples/tracepoints/tracepoint-sample.c +++ /dev/null @@ -1,57 +0,0 @@ -/* tracepoint-sample.c - * - * Executes a tracepoint when /proc/tracepoint-sample is opened. - * - * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/proc_fs.h> -#include "tp-samples-trace.h" - -DEFINE_TRACE(subsys_event); -DEFINE_TRACE(subsys_eventb); - -struct proc_dir_entry *pentry_sample; - -static int my_open(struct inode *inode, struct file *file) -{ - int i; - - trace_subsys_event(inode, file); - for (i = 0; i < 10; i++) - trace_subsys_eventb(); - return -EPERM; -} - -static const struct file_operations mark_ops = { - .open = my_open, - .llseek = noop_llseek, -}; - -static int __init sample_init(void) -{ - printk(KERN_ALERT "sample init\n"); - pentry_sample = proc_create("tracepoint-sample", 0444, NULL, - &mark_ops); - if (!pentry_sample) - return -EPERM; - return 0; -} - -static void __exit sample_exit(void) -{ - printk(KERN_ALERT "sample exit\n"); - remove_proc_entry("tracepoint-sample", NULL); -} - -module_init(sample_init) -module_exit(sample_exit) - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Tracepoint sample"); diff --git a/tools/Makefile b/tools/Makefile index 1f9a529fe544..798fa0ef048e 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -15,7 +15,7 @@ help: @echo ' x86_energy_perf_policy - Intel energy policy tool' @echo '' @echo 'You can do:' - @echo ' $$ make -C tools/<tool>_install' + @echo ' $$ make -C tools/ <tool>_install' @echo '' @echo ' from the kernel command line to build and install one of' @echo ' the tools above' diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 5a824e355d04..82b0606dcb8a 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -13,8 +13,7 @@ * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * License along with this program; if not, see <http://www.gnu.org/licenses> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * @@ -1224,6 +1223,34 @@ static int field_is_long(struct format_field *field) return 0; } +static unsigned int type_size(const char *name) +{ + /* This covers all FIELD_IS_STRING types. */ + static struct { + const char *type; + unsigned int size; + } table[] = { + { "u8", 1 }, + { "u16", 2 }, + { "u32", 4 }, + { "u64", 8 }, + { "s8", 1 }, + { "s16", 2 }, + { "s32", 4 }, + { "s64", 8 }, + { "char", 1 }, + { }, + }; + int i; + + for (i = 0; table[i].type; i++) { + if (!strcmp(table[i].type, name)) + return table[i].size; + } + + return 0; +} + static int event_read_fields(struct event_format *event, struct format_field **fields) { struct format_field *field = NULL; @@ -1233,6 +1260,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f int count = 0; do { + unsigned int size_dynamic = 0; + type = read_token(&token); if (type == EVENT_NEWLINE) { free_token(token); @@ -1391,6 +1420,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f field->type = new_type; strcat(field->type, " "); strcat(field->type, field->name); + size_dynamic = type_size(field->name); free_token(field->name); strcat(field->type, brackets); field->name = token; @@ -1463,7 +1493,8 @@ static int event_read_fields(struct event_format *event, struct format_field **f if (read_expect_type(EVENT_ITEM, &token)) goto fail; - /* add signed type */ + if (strtoul(token, NULL, 0)) + field->flags |= FIELD_IS_SIGNED; free_token(token); if (read_expected(EVENT_OP, ";") < 0) @@ -1478,10 +1509,14 @@ static int event_read_fields(struct event_format *event, struct format_field **f if (field->flags & FIELD_IS_ARRAY) { if (field->arraylen) field->elementsize = field->size / field->arraylen; + else if (field->flags & FIELD_IS_DYNAMIC) + field->elementsize = size_dynamic; else if (field->flags & FIELD_IS_STRING) field->elementsize = 1; - else - field->elementsize = event->pevent->long_size; + else if (field->flags & FIELD_IS_LONG) + field->elementsize = event->pevent ? + event->pevent->long_size : + sizeof(long); } else field->elementsize = field->size; @@ -1785,6 +1820,8 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) strcmp(token, "/") == 0 || strcmp(token, "<") == 0 || strcmp(token, ">") == 0 || + strcmp(token, "<=") == 0 || + strcmp(token, ">=") == 0 || strcmp(token, "==") == 0 || strcmp(token, "!=") == 0) { @@ -2481,7 +2518,7 @@ process_dynamic_array(struct event_format *event, struct print_arg *arg, char ** free_token(token); arg = alloc_arg(); - if (!field) { + if (!arg) { do_warning("%s: not enough memory!", __func__); *tok = NULL; return EVENT_ERROR; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 24a4bbabc5d5..7be7e89533e4 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -13,8 +13,7 @@ * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * License along with this program; if not, see <http://www.gnu.org/licenses> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h index bc075006966e..e76c9acb92cd 100644 --- a/tools/lib/traceevent/event-utils.h +++ b/tools/lib/traceevent/event-utils.h @@ -13,8 +13,7 @@ * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * License along with this program; if not, see <http://www.gnu.org/licenses> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 5ea4326ad11f..2500e75583fc 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -13,8 +13,7 @@ * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * License along with this program; if not, see <http://www.gnu.org/licenses> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c index f023a133abb6..bba701cf10e6 100644 --- a/tools/lib/traceevent/parse-utils.c +++ b/tools/lib/traceevent/parse-utils.c @@ -1,3 +1,22 @@ +/* + * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; + * version 2.1 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses> + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ #include <stdio.h> #include <stdlib.h> #include <string.h> diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index b1ccc923e8a5..a57db805136a 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -13,8 +13,7 @@ * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * License along with this program; if not, see <http://www.gnu.org/licenses> * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index ef6d22e879eb..eb30044a922a 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -222,10 +222,14 @@ install-pdf: pdf #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) +ifneq ($(MAKECMDGOALS),clean) +ifneq ($(MAKECMDGOALS),tags) $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE -include $(OUTPUT)PERF-VERSION-FILE +endif +endif # # Determine "include::" file references in asciidoc files. diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index c8ffd9fd5c6a..5ad07ef417f0 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -61,11 +61,13 @@ OPTIONS --stdio:: Use the stdio interface. ---tui:: Use the TUI interface Use of --tui requires a tty, if one is not +--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not present, as when piping to other commands, the stdio interface is used. This interfaces starts by centering on the line with more samples, TAB/UNTAB cycles through the lines with more samples. +--gtk:: Use the GTK interface. + -C:: --cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can be provided as a comma-separated list with no space: 0,1. Ranges of @@ -88,6 +90,9 @@ OPTIONS --objdump=<path>:: Path to objdump binary. +--skip-missing:: + Skip symbols that cannot be annotated. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index c1057701a7dc..e9a8349a7172 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -24,6 +24,13 @@ OPTIONS -r:: --remove=:: Remove specified file from the cache. +-M:: +--missing=:: + List missing build ids in the cache for the specified file. +-u:: +--update:: + Update specified file of the cache. It can be used to update kallsyms + kernel dso to vmlinux in order to support annotation. -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index 194f37d635df..5b3123d5721f 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -22,10 +22,6 @@ specified perf.data files. OPTIONS ------- --M:: ---displacement:: - Show position displacement relative to baseline. - -D:: --dump-raw-trace:: Dump raw trace in ASCII. diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index 15217345c2fa..1ceb3700ffbb 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -28,6 +28,10 @@ OPTIONS --verbose=:: Show all fields. +-g:: +--group:: + Show event group information. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-list[1], diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f4d91bebd59d..02284a0067f0 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -57,11 +57,44 @@ OPTIONS -s:: --sort=:: - Sort by key(s): pid, comm, dso, symbol, parent, srcline. + Sort histogram entries by given key(s) - multiple keys can be specified + in CSV format. Following sort keys are available: + pid, comm, dso, symbol, parent, cpu, srcline. + + Each key has following meaning: + + - comm: command (name) of the task which can be read via /proc/<pid>/comm + - pid: command and tid of the task + - dso: name of library or module executed at the time of sample + - symbol: name of function executed at the time of sample + - parent: name of function matched to the parent regex filter. Unmatched + entries are displayed as "[other]". + - cpu: cpu number the task ran at the time of sample + - srcline: filename and line number executed at the time of sample. The + DWARF debuggin info must be provided. + + By default, comm, dso and symbol keys are used. + (i.e. --sort comm,dso,symbol) + + If --branch-stack option is used, following sort keys are also + available: + dso_from, dso_to, symbol_from, symbol_to, mispredict. + + - dso_from: name of library or module branched from + - dso_to: name of library or module branched to + - symbol_from: name of function branched from + - symbol_to: name of function branched to + - mispredict: "N" for predicted branch, "Y" for mispredicted branch + + And default sort keys are changed to comm, dso_from, symbol_from, dso_to + and symbol_to, see '--branch-stack'. -p:: --parent=<regex>:: - regex filter to identify parent, see: '--sort parent' + A regex filter to identify parent. The parent is a caller of this + function and searched through the callchain, thus it requires callchain + information recorded. The pattern is in the exteneded regex format and + defaults to "\^sys_|^do_page_fault", see '--sort parent'. -x:: --exclude-other:: @@ -74,7 +107,6 @@ OPTIONS -t:: --field-separator=:: - Use a special separator character and don't pad with spaces, replacing all occurrences of this separator in symbol names (and other output) with a '.' character, that thus it's the only non valid separator. @@ -171,6 +203,9 @@ OPTIONS --objdump=<path>:: Path to objdump binary. +--group:: + Show event group information together. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index a4027f221a53..9f1f054b8432 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -336,7 +336,6 @@ scripts listed by the 'perf script -l' command e.g.: ---- root@tropicana:~# perf script -l List of available trace scripts: - workqueue-stats workqueue stats (ins/exe/create/destroy) wakeup-latency system-wide min/max/avg wakeup latency rw-by-file <comm> r/w activity for a program, by file rw-by-pid system-wide r/w activity @@ -402,7 +401,6 @@ should show a new entry for your script: ---- root@tropicana:~# perf script -l List of available trace scripts: - workqueue-stats workqueue stats (ins/exe/create/destroy) wakeup-latency system-wide min/max/avg wakeup latency rw-by-file <comm> r/w activity for a program, by file rw-by-pid system-wide r/w activity diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index cf0c3107e06e..faf4f4feebcc 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -114,6 +114,17 @@ with it. --append may be used here. Examples: perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage +-I msecs:: +--interval-print msecs:: + Print count deltas every N milliseconds (minimum: 100ms) + example: perf stat -I 1000 -e cycles -a sleep 5 + +--aggr-socket:: +Aggregate counts per processor socket for system-wide mode measurements. This +is a useful mode to detect imbalance between sockets. To enable this mode, +use --aggr-socket in addition to -a. (system-wide). The output includes the +socket number and the number of online processors on that socket. This is +useful to gauge the amount of aggregation. EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index b24ac40fcd58..d1d3e5121f89 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -23,6 +23,10 @@ from 'perf test list'. OPTIONS ------- +-s:: +--skip:: + Tests to skip (comma separater numeric list). + -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 5b80d84d6b4a..a414bc95fd52 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -60,7 +60,7 @@ Default is to monitor all CPUS. -i:: --inherit:: - Child tasks inherit counters, only makes sens with -p option. + Child tasks do not inherit counters. -k <path>:: --vmlinux=<path>:: diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8ab05e543ef4..a2108ca1cc17 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -47,10 +47,11 @@ include config/utilities.mak # backtrace post unwind. # # Define NO_BACKTRACE if you do not want stack backtrace debug feature +# +# Define NO_LIBNUMA if you do not want numa perf benchmark $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) --include $(OUTPUT)PERF-VERSION-FILE uname_M := $(shell uname -m 2>/dev/null || echo not) @@ -148,13 +149,25 @@ RM = rm -f MKDIR = mkdir FIND = find INSTALL = install +FLEX = flex +BISON= bison # sparse is architecture-neutral, which means that we need to tell it # explicitly what architecture to check for. Fix this up for yours.. SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ +ifneq ($(MAKECMDGOALS),clean) +ifneq ($(MAKECMDGOALS),tags) -include config/feature-tests.mak +ifeq ($(call get-executable,$(FLEX)),) + dummy := $(error Error: $(FLEX) is missing on this system, please install it) +endif + +ifeq ($(call get-executable,$(BISON)),) + dummy := $(error Error: $(BISON) is missing on this system, please install it) +endif + ifeq ($(call try-cc,$(SOURCE_HELLO),$(CFLAGS) -Werror -fstack-protector-all,-fstack-protector-all),y) CFLAGS := $(CFLAGS) -fstack-protector-all endif @@ -206,6 +219,8 @@ ifeq ($(call try-cc,$(SOURCE_BIONIC),$(CFLAGS),bionic),y) EXTLIBS := $(filter-out -lpthread,$(EXTLIBS)) BASIC_CFLAGS += -I. endif +endif # MAKECMDGOALS != tags +endif # MAKECMDGOALS != clean # Guard against environment variables BUILTIN_OBJS = @@ -230,11 +245,19 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a TE_LIB := -L$(TE_PATH) -ltraceevent +export LIBTRACEEVENT + +# python extension build directories +PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ +PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/ +PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/ +export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP + +python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so + PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py -export LIBTRACEEVENT - $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \ --quiet build_ext; \ @@ -269,20 +292,17 @@ endif export PERL_PATH -FLEX = flex -BISON= bison - $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c: util/parse-events.y - $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c + $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c: util/pmu.y - $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c + $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c @@ -378,8 +398,11 @@ LIB_H += util/rblist.h LIB_H += util/intlist.h LIB_H += util/perf_regs.h LIB_H += util/unwind.h -LIB_H += ui/helpline.h LIB_H += util/vdso.h +LIB_H += ui/helpline.h +LIB_H += ui/progress.h +LIB_H += ui/util.h +LIB_H += ui/ui.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -453,6 +476,7 @@ LIB_OBJS += $(OUTPUT)util/stat.o LIB_OBJS += $(OUTPUT)ui/setup.o LIB_OBJS += $(OUTPUT)ui/helpline.o LIB_OBJS += $(OUTPUT)ui/progress.o +LIB_OBJS += $(OUTPUT)ui/util.o LIB_OBJS += $(OUTPUT)ui/hist.o LIB_OBJS += $(OUTPUT)ui/stdio/hist.o @@ -471,7 +495,8 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o LIB_OBJS += $(OUTPUT)tests/pmu.o -LIB_OBJS += $(OUTPUT)tests/util.o +LIB_OBJS += $(OUTPUT)tests/hists_link.o +LIB_OBJS += $(OUTPUT)tests/python-use.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o @@ -510,14 +535,13 @@ PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) # # Platform specific tweaks # +ifneq ($(MAKECMDGOALS),clean) +ifneq ($(MAKECMDGOALS),tags) # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If # we had "elif" things would have been much nicer... --include config.mak.autogen --include config.mak - ifdef NO_LIBELF NO_DWARF := 1 NO_DEMANGLE := 1 @@ -557,6 +581,11 @@ else endif # SOURCE_LIBELF endif # NO_LIBELF +# There's only x86 (both 32 and 64) support for CFI unwind so far +ifneq ($(ARCH),x86) + NO_LIBUNWIND := 1 +endif + ifndef NO_LIBUNWIND # for linking with debug library, run like: # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ @@ -646,7 +675,6 @@ ifndef NO_NEWT LIB_OBJS += $(OUTPUT)ui/browsers/hists.o LIB_OBJS += $(OUTPUT)ui/browsers/map.o LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o - LIB_OBJS += $(OUTPUT)ui/util.o LIB_OBJS += $(OUTPUT)ui/tui/setup.o LIB_OBJS += $(OUTPUT)ui/tui/util.o LIB_OBJS += $(OUTPUT)ui/tui/helpline.o @@ -655,9 +683,6 @@ ifndef NO_NEWT LIB_H += ui/browsers/map.h LIB_H += ui/keysyms.h LIB_H += ui/libslang.h - LIB_H += ui/progress.h - LIB_H += ui/util.h - LIB_H += ui/ui.h endif endif @@ -673,14 +698,12 @@ ifndef NO_GTK2 BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null) LIB_OBJS += $(OUTPUT)ui/gtk/browser.o + LIB_OBJS += $(OUTPUT)ui/gtk/hists.o LIB_OBJS += $(OUTPUT)ui/gtk/setup.o LIB_OBJS += $(OUTPUT)ui/gtk/util.o LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o LIB_OBJS += $(OUTPUT)ui/gtk/progress.o - # Make sure that it'd be included only once. - ifeq ($(findstring -DNEWT_SUPPORT,$(BASIC_CFLAGS)),) - LIB_OBJS += $(OUTPUT)ui/util.o - endif + LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o endif endif @@ -707,7 +730,7 @@ disable-python = $(eval $(disable-python_code)) define disable-python_code BASIC_CFLAGS += -DNO_LIBPYTHON $(if $(1),$(warning No $(1) was found)) - $(warning Python support won't be built) + $(warning Python support will not be built) endef override PYTHON := \ @@ -715,19 +738,10 @@ override PYTHON := \ ifndef PYTHON $(call disable-python,python interpreter) - python-clean := else PYTHON_WORD := $(call shell-wordify,$(PYTHON)) - # python extension build directories - PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ - PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/ - PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/ - export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP - - python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so - ifdef NO_LIBPYTHON $(call disable-python) else @@ -839,10 +853,24 @@ ifndef NO_BACKTRACE endif endif +ifndef NO_LIBNUMA + FLAGS_LIBNUMA = $(ALL_CFLAGS) $(ALL_LDFLAGS) -lnuma + ifneq ($(call try-cc,$(SOURCE_LIBNUMA),$(FLAGS_LIBNUMA),libnuma),y) + msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numa-libs-devel or libnuma-dev); + else + BASIC_CFLAGS += -DLIBNUMA_SUPPORT + BUILTIN_OBJS += $(OUTPUT)bench/numa.o + EXTLIBS += -lnuma + endif +endif + ifdef ASCIIDOC8 export ASCIIDOC8 endif +endif # MAKECMDGOALS != tags +endif # MAKECMDGOALS != clean + # Shell quote (do not use $(call) to accommodate ancient setups); ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) @@ -884,7 +912,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ + $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ @@ -948,7 +976,13 @@ $(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS $(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ - '-DBINDIR="$(bindir_SQ)"' \ + '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ + $< + +$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS + $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ + -DPYTHONPATH='"$(OUTPUT)python"' \ + -DPYTHON='"$(PYTHON_WORD)"' \ $< $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS @@ -1099,7 +1133,7 @@ perfexec_instdir = $(prefix)/$(perfexecdir) endif perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) -install: all try-install-man +install-bin: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' @@ -1120,6 +1154,8 @@ install: all try-install-man $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' +install: install-bin try-install-man + install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 3e975cb6232e..aacef07ebf31 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -155,6 +155,7 @@ static int perf_session_env__lookup_binutils_path(struct perf_session_env *env, if (lookup_path(buf)) goto out; free(buf); + buf = NULL; } if (!strcmp(arch, "arm")) diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 8f89998eeaf4..a5223e6a7b43 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -1,6 +1,7 @@ #ifndef BENCH_H #define BENCH_H +extern int bench_numa(int argc, const char **argv, const char *prefix); extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); extern int bench_mem_memcpy(int argc, const char **argv, diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c new file mode 100644 index 000000000000..30d1c3225b46 --- /dev/null +++ b/tools/perf/bench/numa.c @@ -0,0 +1,1731 @@ +/* + * numa.c + * + * numa: Simulate NUMA-sensitive workload and measure their NUMA performance + */ + +#include "../perf.h" +#include "../builtin.h" +#include "../util/util.h" +#include "../util/parse-options.h" + +#include "bench.h" + +#include <errno.h> +#include <sched.h> +#include <stdio.h> +#include <assert.h> +#include <malloc.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <pthread.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <sys/wait.h> +#include <sys/prctl.h> +#include <sys/types.h> + +#include <numa.h> +#include <numaif.h> + +/* + * Regular printout to the terminal, supressed if -q is specified: + */ +#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) + +/* + * Debug printf: + */ +#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0) + +struct thread_data { + int curr_cpu; + cpu_set_t bind_cpumask; + int bind_node; + u8 *process_data; + int process_nr; + int thread_nr; + int task_nr; + unsigned int loops_done; + u64 val; + u64 runtime_ns; + pthread_mutex_t *process_lock; +}; + +/* Parameters set by options: */ + +struct params { + /* Startup synchronization: */ + bool serialize_startup; + + /* Task hierarchy: */ + int nr_proc; + int nr_threads; + + /* Working set sizes: */ + const char *mb_global_str; + const char *mb_proc_str; + const char *mb_proc_locked_str; + const char *mb_thread_str; + + double mb_global; + double mb_proc; + double mb_proc_locked; + double mb_thread; + + /* Access patterns to the working set: */ + bool data_reads; + bool data_writes; + bool data_backwards; + bool data_zero_memset; + bool data_rand_walk; + u32 nr_loops; + u32 nr_secs; + u32 sleep_usecs; + + /* Working set initialization: */ + bool init_zero; + bool init_random; + bool init_cpu0; + + /* Misc options: */ + int show_details; + int run_all; + int thp; + + long bytes_global; + long bytes_process; + long bytes_process_locked; + long bytes_thread; + + int nr_tasks; + bool show_quiet; + + bool show_convergence; + bool measure_convergence; + + int perturb_secs; + int nr_cpus; + int nr_nodes; + + /* Affinity options -C and -N: */ + char *cpu_list_str; + char *node_list_str; +}; + + +/* Global, read-writable area, accessible to all processes and threads: */ + +struct global_info { + u8 *data; + + pthread_mutex_t startup_mutex; + int nr_tasks_started; + + pthread_mutex_t startup_done_mutex; + + pthread_mutex_t start_work_mutex; + int nr_tasks_working; + + pthread_mutex_t stop_work_mutex; + u64 bytes_done; + + struct thread_data *threads; + + /* Convergence latency measurement: */ + bool all_converged; + bool stop_work; + + int print_once; + + struct params p; +}; + +static struct global_info *g = NULL; + +static int parse_cpus_opt(const struct option *opt, const char *arg, int unset); +static int parse_nodes_opt(const struct option *opt, const char *arg, int unset); + +struct params p0; + +static const struct option options[] = { + OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"), + OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"), + + OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"), + OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"), + OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"), + OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"), + + OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"), + OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"), + OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), + + OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), + OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), + OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), + OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), + OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"), + + + OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"), + OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"), + OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"), + OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"), + + OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"), + OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"), + OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), + OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), + OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), + OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"), + OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), + + /* Special option string parsing callbacks: */ + OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]", + "bind the first N tasks to these specific cpus (the rest is unbound)", + parse_cpus_opt), + OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]", + "bind the first N tasks to these specific memory nodes (the rest is unbound)", + parse_nodes_opt), + OPT_END() +}; + +static const char * const bench_numa_usage[] = { + "perf bench numa <options>", + NULL +}; + +static const char * const numa_usage[] = { + "perf bench numa mem [<options>]", + NULL +}; + +static cpu_set_t bind_to_cpu(int target_cpu) +{ + cpu_set_t orig_mask, mask; + int ret; + + ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); + BUG_ON(ret); + + CPU_ZERO(&mask); + + if (target_cpu == -1) { + int cpu; + + for (cpu = 0; cpu < g->p.nr_cpus; cpu++) + CPU_SET(cpu, &mask); + } else { + BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus); + CPU_SET(target_cpu, &mask); + } + + ret = sched_setaffinity(0, sizeof(mask), &mask); + BUG_ON(ret); + + return orig_mask; +} + +static cpu_set_t bind_to_node(int target_node) +{ + int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes; + cpu_set_t orig_mask, mask; + int cpu; + int ret; + + BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus); + BUG_ON(!cpus_per_node); + + ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask); + BUG_ON(ret); + + CPU_ZERO(&mask); + + if (target_node == -1) { + for (cpu = 0; cpu < g->p.nr_cpus; cpu++) + CPU_SET(cpu, &mask); + } else { + int cpu_start = (target_node + 0) * cpus_per_node; + int cpu_stop = (target_node + 1) * cpus_per_node; + + BUG_ON(cpu_stop > g->p.nr_cpus); + + for (cpu = cpu_start; cpu < cpu_stop; cpu++) + CPU_SET(cpu, &mask); + } + + ret = sched_setaffinity(0, sizeof(mask), &mask); + BUG_ON(ret); + + return orig_mask; +} + +static void bind_to_cpumask(cpu_set_t mask) +{ + int ret; + + ret = sched_setaffinity(0, sizeof(mask), &mask); + BUG_ON(ret); +} + +static void mempol_restore(void) +{ + int ret; + + ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1); + + BUG_ON(ret); +} + +static void bind_to_memnode(int node) +{ + unsigned long nodemask; + int ret; + + if (node == -1) + return; + + BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)); + nodemask = 1L << node; + + ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); + dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret); + + BUG_ON(ret); +} + +#define HPSIZE (2*1024*1024) + +#define set_taskname(fmt...) \ +do { \ + char name[20]; \ + \ + snprintf(name, 20, fmt); \ + prctl(PR_SET_NAME, name); \ +} while (0) + +static u8 *alloc_data(ssize_t bytes0, int map_flags, + int init_zero, int init_cpu0, int thp, int init_random) +{ + cpu_set_t orig_mask; + ssize_t bytes; + u8 *buf; + int ret; + + if (!bytes0) + return NULL; + + /* Allocate and initialize all memory on CPU#0: */ + if (init_cpu0) { + orig_mask = bind_to_node(0); + bind_to_memnode(0); + } + + bytes = bytes0 + HPSIZE; + + buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0); + BUG_ON(buf == (void *)-1); + + if (map_flags == MAP_PRIVATE) { + if (thp > 0) { + ret = madvise(buf, bytes, MADV_HUGEPAGE); + if (ret && !g->print_once) { + g->print_once = 1; + printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n"); + } + } + if (thp < 0) { + ret = madvise(buf, bytes, MADV_NOHUGEPAGE); + if (ret && !g->print_once) { + g->print_once = 1; + printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n"); + } + } + } + + if (init_zero) { + bzero(buf, bytes); + } else { + /* Initialize random contents, different in each word: */ + if (init_random) { + u64 *wbuf = (void *)buf; + long off = rand(); + long i; + + for (i = 0; i < bytes/8; i++) + wbuf[i] = i + off; + } + } + + /* Align to 2MB boundary: */ + buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1)); + + /* Restore affinity: */ + if (init_cpu0) { + bind_to_cpumask(orig_mask); + mempol_restore(); + } + + return buf; +} + +static void free_data(void *data, ssize_t bytes) +{ + int ret; + + if (!data) + return; + + ret = munmap(data, bytes); + BUG_ON(ret); +} + +/* + * Create a shared memory buffer that can be shared between processes, zeroed: + */ +static void * zalloc_shared_data(ssize_t bytes) +{ + return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random); +} + +/* + * Create a shared memory buffer that can be shared between processes: + */ +static void * setup_shared_data(ssize_t bytes) +{ + return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); +} + +/* + * Allocate process-local memory - this will either be shared between + * threads of this process, or only be accessed by this thread: + */ +static void * setup_private_data(ssize_t bytes) +{ + return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random); +} + +/* + * Return a process-shared (global) mutex: + */ +static void init_global_mutex(pthread_mutex_t *mutex) +{ + pthread_mutexattr_t attr; + + pthread_mutexattr_init(&attr); + pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init(mutex, &attr); +} + +static int parse_cpu_list(const char *arg) +{ + p0.cpu_list_str = strdup(arg); + + dprintf("got CPU list: {%s}\n", p0.cpu_list_str); + + return 0; +} + +static void parse_setup_cpu_list(void) +{ + struct thread_data *td; + char *str0, *str; + int t; + + if (!g->p.cpu_list_str) + return; + + dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); + + str0 = str = strdup(g->p.cpu_list_str); + t = 0; + + BUG_ON(!str); + + tprintf("# binding tasks to CPUs:\n"); + tprintf("# "); + + while (true) { + int bind_cpu, bind_cpu_0, bind_cpu_1; + char *tok, *tok_end, *tok_step, *tok_len, *tok_mul; + int bind_len; + int step; + int mul; + + tok = strsep(&str, ","); + if (!tok) + break; + + tok_end = strstr(tok, "-"); + + dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); + if (!tok_end) { + /* Single CPU specified: */ + bind_cpu_0 = bind_cpu_1 = atol(tok); + } else { + /* CPU range specified (for example: "5-11"): */ + bind_cpu_0 = atol(tok); + bind_cpu_1 = atol(tok_end + 1); + } + + step = 1; + tok_step = strstr(tok, "#"); + if (tok_step) { + step = atol(tok_step + 1); + BUG_ON(step <= 0 || step >= g->p.nr_cpus); + } + + /* + * Mask length. + * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4', + * where the _4 means the next 4 CPUs are allowed. + */ + bind_len = 1; + tok_len = strstr(tok, "_"); + if (tok_len) { + bind_len = atol(tok_len + 1); + BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus); + } + + /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ + mul = 1; + tok_mul = strstr(tok, "x"); + if (tok_mul) { + mul = atol(tok_mul + 1); + BUG_ON(mul <= 0); + } + + dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul); + + BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus); + BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus); + BUG_ON(bind_cpu_0 > bind_cpu_1); + + for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { + int i; + + for (i = 0; i < mul; i++) { + int cpu; + + if (t >= g->p.nr_tasks) { + printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu); + goto out; + } + td = g->threads + t; + + if (t) + tprintf(","); + if (bind_len > 1) { + tprintf("%2d/%d", bind_cpu, bind_len); + } else { + tprintf("%2d", bind_cpu); + } + + CPU_ZERO(&td->bind_cpumask); + for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) { + BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus); + CPU_SET(cpu, &td->bind_cpumask); + } + t++; + } + } + } +out: + + tprintf("\n"); + + if (t < g->p.nr_tasks) + printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); + + free(str0); +} + +static int parse_cpus_opt(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) +{ + if (!arg) + return -1; + + return parse_cpu_list(arg); +} + +static int parse_node_list(const char *arg) +{ + p0.node_list_str = strdup(arg); + + dprintf("got NODE list: {%s}\n", p0.node_list_str); + + return 0; +} + +static void parse_setup_node_list(void) +{ + struct thread_data *td; + char *str0, *str; + int t; + + if (!g->p.node_list_str) + return; + + dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); + + str0 = str = strdup(g->p.node_list_str); + t = 0; + + BUG_ON(!str); + + tprintf("# binding tasks to NODEs:\n"); + tprintf("# "); + + while (true) { + int bind_node, bind_node_0, bind_node_1; + char *tok, *tok_end, *tok_step, *tok_mul; + int step; + int mul; + + tok = strsep(&str, ","); + if (!tok) + break; + + tok_end = strstr(tok, "-"); + + dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end); + if (!tok_end) { + /* Single NODE specified: */ + bind_node_0 = bind_node_1 = atol(tok); + } else { + /* NODE range specified (for example: "5-11"): */ + bind_node_0 = atol(tok); + bind_node_1 = atol(tok_end + 1); + } + + step = 1; + tok_step = strstr(tok, "#"); + if (tok_step) { + step = atol(tok_step + 1); + BUG_ON(step <= 0 || step >= g->p.nr_nodes); + } + + /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */ + mul = 1; + tok_mul = strstr(tok, "x"); + if (tok_mul) { + mul = atol(tok_mul + 1); + BUG_ON(mul <= 0); + } + + dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step); + + BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes); + BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes); + BUG_ON(bind_node_0 > bind_node_1); + + for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) { + int i; + + for (i = 0; i < mul; i++) { + if (t >= g->p.nr_tasks) { + printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node); + goto out; + } + td = g->threads + t; + + if (!t) + tprintf(" %2d", bind_node); + else + tprintf(",%2d", bind_node); + + td->bind_node = bind_node; + t++; + } + } + } +out: + + tprintf("\n"); + + if (t < g->p.nr_tasks) + printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t); + + free(str0); +} + +static int parse_nodes_opt(const struct option *opt __maybe_unused, + const char *arg, int unset __maybe_unused) +{ + if (!arg) + return -1; + + return parse_node_list(arg); + + return 0; +} + +#define BIT(x) (1ul << x) + +static inline uint32_t lfsr_32(uint32_t lfsr) +{ + const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31); + return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps); +} + +/* + * Make sure there's real data dependency to RAM (when read + * accesses are enabled), so the compiler, the CPU and the + * kernel (KSM, zero page, etc.) cannot optimize away RAM + * accesses: + */ +static inline u64 access_data(u64 *data __attribute__((unused)), u64 val) +{ + if (g->p.data_reads) + val += *data; + if (g->p.data_writes) + *data = val + 1; + return val; +} + +/* + * The worker process does two types of work, a forwards going + * loop and a backwards going loop. + * + * We do this so that on multiprocessor systems we do not create + * a 'train' of processing, with highly synchronized processes, + * skewing the whole benchmark. + */ +static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val) +{ + long words = bytes/sizeof(u64); + u64 *data = (void *)__data; + long chunk_0, chunk_1; + u64 *d0, *d, *d1; + long off; + long i; + + BUG_ON(!data && words); + BUG_ON(data && !words); + + if (!data) + return val; + + /* Very simple memset() work variant: */ + if (g->p.data_zero_memset && !g->p.data_rand_walk) { + bzero(data, bytes); + return val; + } + + /* Spread out by PID/TID nr and by loop nr: */ + chunk_0 = words/nr_max; + chunk_1 = words/g->p.nr_loops; + off = nr*chunk_0 + loop*chunk_1; + + while (off >= words) + off -= words; + + if (g->p.data_rand_walk) { + u32 lfsr = nr + loop + val; + int j; + + for (i = 0; i < words/1024; i++) { + long start, end; + + lfsr = lfsr_32(lfsr); + + start = lfsr % words; + end = min(start + 1024, words-1); + + if (g->p.data_zero_memset) { + bzero(data + start, (end-start) * sizeof(u64)); + } else { + for (j = start; j < end; j++) + val = access_data(data + j, val); + } + } + } else if (!g->p.data_backwards || (nr + loop) & 1) { + + d0 = data + off; + d = data + off + 1; + d1 = data + words; + + /* Process data forwards: */ + for (;;) { + if (unlikely(d >= d1)) + d = data; + if (unlikely(d == d0)) + break; + + val = access_data(d, val); + + d++; + } + } else { + /* Process data backwards: */ + + d0 = data + off; + d = data + off - 1; + d1 = data + words; + + /* Process data forwards: */ + for (;;) { + if (unlikely(d < data)) + d = data + words-1; + if (unlikely(d == d0)) + break; + + val = access_data(d, val); + + d--; + } + } + + return val; +} + +static void update_curr_cpu(int task_nr, unsigned long bytes_worked) +{ + unsigned int cpu; + + cpu = sched_getcpu(); + + g->threads[task_nr].curr_cpu = cpu; + prctl(0, bytes_worked); +} + +#define MAX_NR_NODES 64 + +/* + * Count the number of nodes a process's threads + * are spread out on. + * + * A count of 1 means that the process is compressed + * to a single node. A count of g->p.nr_nodes means it's + * spread out on the whole system. + */ +static int count_process_nodes(int process_nr) +{ + char node_present[MAX_NR_NODES] = { 0, }; + int nodes; + int n, t; + + for (t = 0; t < g->p.nr_threads; t++) { + struct thread_data *td; + int task_nr; + int node; + + task_nr = process_nr*g->p.nr_threads + t; + td = g->threads + task_nr; + + node = numa_node_of_cpu(td->curr_cpu); + node_present[node] = 1; + } + + nodes = 0; + + for (n = 0; n < MAX_NR_NODES; n++) + nodes += node_present[n]; + + return nodes; +} + +/* + * Count the number of distinct process-threads a node contains. + * + * A count of 1 means that the node contains only a single + * process. If all nodes on the system contain at most one + * process then we are well-converged. + */ +static int count_node_processes(int node) +{ + int processes = 0; + int t, p; + + for (p = 0; p < g->p.nr_proc; p++) { + for (t = 0; t < g->p.nr_threads; t++) { + struct thread_data *td; + int task_nr; + int n; + + task_nr = p*g->p.nr_threads + t; + td = g->threads + task_nr; + + n = numa_node_of_cpu(td->curr_cpu); + if (n == node) { + processes++; + break; + } + } + } + + return processes; +} + +static void calc_convergence_compression(int *strong) +{ + unsigned int nodes_min, nodes_max; + int p; + + nodes_min = -1; + nodes_max = 0; + + for (p = 0; p < g->p.nr_proc; p++) { + unsigned int nodes = count_process_nodes(p); + + nodes_min = min(nodes, nodes_min); + nodes_max = max(nodes, nodes_max); + } + + /* Strong convergence: all threads compress on a single node: */ + if (nodes_min == 1 && nodes_max == 1) { + *strong = 1; + } else { + *strong = 0; + tprintf(" {%d-%d}", nodes_min, nodes_max); + } +} + +static void calc_convergence(double runtime_ns_max, double *convergence) +{ + unsigned int loops_done_min, loops_done_max; + int process_groups; + int nodes[MAX_NR_NODES]; + int distance; + int nr_min; + int nr_max; + int strong; + int sum; + int nr; + int node; + int cpu; + int t; + + if (!g->p.show_convergence && !g->p.measure_convergence) + return; + + for (node = 0; node < g->p.nr_nodes; node++) + nodes[node] = 0; + + loops_done_min = -1; + loops_done_max = 0; + + for (t = 0; t < g->p.nr_tasks; t++) { + struct thread_data *td = g->threads + t; + unsigned int loops_done; + + cpu = td->curr_cpu; + + /* Not all threads have written it yet: */ + if (cpu < 0) + continue; + + node = numa_node_of_cpu(cpu); + + nodes[node]++; + + loops_done = td->loops_done; + loops_done_min = min(loops_done, loops_done_min); + loops_done_max = max(loops_done, loops_done_max); + } + + nr_max = 0; + nr_min = g->p.nr_tasks; + sum = 0; + + for (node = 0; node < g->p.nr_nodes; node++) { + nr = nodes[node]; + nr_min = min(nr, nr_min); + nr_max = max(nr, nr_max); + sum += nr; + } + BUG_ON(nr_min > nr_max); + + BUG_ON(sum > g->p.nr_tasks); + + if (0 && (sum < g->p.nr_tasks)) + return; + + /* + * Count the number of distinct process groups present + * on nodes - when we are converged this will decrease + * to g->p.nr_proc: + */ + process_groups = 0; + + for (node = 0; node < g->p.nr_nodes; node++) { + int processes = count_node_processes(node); + + nr = nodes[node]; + tprintf(" %2d/%-2d", nr, processes); + + process_groups += processes; + } + + distance = nr_max - nr_min; + + tprintf(" [%2d/%-2d]", distance, process_groups); + + tprintf(" l:%3d-%-3d (%3d)", + loops_done_min, loops_done_max, loops_done_max-loops_done_min); + + if (loops_done_min && loops_done_max) { + double skew = 1.0 - (double)loops_done_min/loops_done_max; + + tprintf(" [%4.1f%%]", skew * 100.0); + } + + calc_convergence_compression(&strong); + + if (strong && process_groups == g->p.nr_proc) { + if (!*convergence) { + *convergence = runtime_ns_max; + tprintf(" (%6.1fs converged)\n", *convergence/1e9); + if (g->p.measure_convergence) { + g->all_converged = true; + g->stop_work = true; + } + } + } else { + if (*convergence) { + tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9); + *convergence = 0; + } + tprintf("\n"); + } +} + +static void show_summary(double runtime_ns_max, int l, double *convergence) +{ + tprintf("\r # %5.1f%% [%.1f mins]", + (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0); + + calc_convergence(runtime_ns_max, convergence); + + if (g->p.show_details >= 0) + fflush(stdout); +} + +static void *worker_thread(void *__tdata) +{ + struct thread_data *td = __tdata; + struct timeval start0, start, stop, diff; + int process_nr = td->process_nr; + int thread_nr = td->thread_nr; + unsigned long last_perturbance; + int task_nr = td->task_nr; + int details = g->p.show_details; + int first_task, last_task; + double convergence = 0; + u64 val = td->val; + double runtime_ns_max; + u8 *global_data; + u8 *process_data; + u8 *thread_data; + u64 bytes_done; + long work_done; + u32 l; + + bind_to_cpumask(td->bind_cpumask); + bind_to_memnode(td->bind_node); + + set_taskname("thread %d/%d", process_nr, thread_nr); + + global_data = g->data; + process_data = td->process_data; + thread_data = setup_private_data(g->p.bytes_thread); + + bytes_done = 0; + + last_task = 0; + if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1) + last_task = 1; + + first_task = 0; + if (process_nr == 0 && thread_nr == 0) + first_task = 1; + + if (details >= 2) { + printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n", + process_nr, thread_nr, global_data, process_data, thread_data); + } + + if (g->p.serialize_startup) { + pthread_mutex_lock(&g->startup_mutex); + g->nr_tasks_started++; + pthread_mutex_unlock(&g->startup_mutex); + + /* Here we will wait for the main process to start us all at once: */ + pthread_mutex_lock(&g->start_work_mutex); + g->nr_tasks_working++; + + /* Last one wake the main process: */ + if (g->nr_tasks_working == g->p.nr_tasks) + pthread_mutex_unlock(&g->startup_done_mutex); + + pthread_mutex_unlock(&g->start_work_mutex); + } + + gettimeofday(&start0, NULL); + + start = stop = start0; + last_perturbance = start.tv_sec; + + for (l = 0; l < g->p.nr_loops; l++) { + start = stop; + + if (g->stop_work) + break; + + val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val); + val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val); + val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val); + + if (g->p.sleep_usecs) { + pthread_mutex_lock(td->process_lock); + usleep(g->p.sleep_usecs); + pthread_mutex_unlock(td->process_lock); + } + /* + * Amount of work to be done under a process-global lock: + */ + if (g->p.bytes_process_locked) { + pthread_mutex_lock(td->process_lock); + val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val); + pthread_mutex_unlock(td->process_lock); + } + + work_done = g->p.bytes_global + g->p.bytes_process + + g->p.bytes_process_locked + g->p.bytes_thread; + + update_curr_cpu(task_nr, work_done); + bytes_done += work_done; + + if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs) + continue; + + td->loops_done = l; + + gettimeofday(&stop, NULL); + + /* Check whether our max runtime timed out: */ + if (g->p.nr_secs) { + timersub(&stop, &start0, &diff); + if (diff.tv_sec >= g->p.nr_secs) { + g->stop_work = true; + break; + } + } + + /* Update the summary at most once per second: */ + if (start.tv_sec == stop.tv_sec) + continue; + + /* + * Perturb the first task's equilibrium every g->p.perturb_secs seconds, + * by migrating to CPU#0: + */ + if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) { + cpu_set_t orig_mask; + int target_cpu; + int this_cpu; + + last_perturbance = stop.tv_sec; + + /* + * Depending on where we are running, move into + * the other half of the system, to create some + * real disturbance: + */ + this_cpu = g->threads[task_nr].curr_cpu; + if (this_cpu < g->p.nr_cpus/2) + target_cpu = g->p.nr_cpus-1; + else + target_cpu = 0; + + orig_mask = bind_to_cpu(target_cpu); + + /* Here we are running on the target CPU already */ + if (details >= 1) + printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu); + + bind_to_cpumask(orig_mask); + } + + if (details >= 3) { + timersub(&stop, &start, &diff); + runtime_ns_max = diff.tv_sec * 1000000000; + runtime_ns_max += diff.tv_usec * 1000; + + if (details >= 0) { + printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n", + process_nr, thread_nr, runtime_ns_max / bytes_done, val); + } + fflush(stdout); + } + if (!last_task) + continue; + + timersub(&stop, &start0, &diff); + runtime_ns_max = diff.tv_sec * 1000000000ULL; + runtime_ns_max += diff.tv_usec * 1000ULL; + + show_summary(runtime_ns_max, l, &convergence); + } + + gettimeofday(&stop, NULL); + timersub(&stop, &start0, &diff); + td->runtime_ns = diff.tv_sec * 1000000000ULL; + td->runtime_ns += diff.tv_usec * 1000ULL; + + free_data(thread_data, g->p.bytes_thread); + + pthread_mutex_lock(&g->stop_work_mutex); + g->bytes_done += bytes_done; + pthread_mutex_unlock(&g->stop_work_mutex); + + return NULL; +} + +/* + * A worker process starts a couple of threads: + */ +static void worker_process(int process_nr) +{ + pthread_mutex_t process_lock; + struct thread_data *td; + pthread_t *pthreads; + u8 *process_data; + int task_nr; + int ret; + int t; + + pthread_mutex_init(&process_lock, NULL); + set_taskname("process %d", process_nr); + + /* + * Pick up the memory policy and the CPU binding of our first thread, + * so that we initialize memory accordingly: + */ + task_nr = process_nr*g->p.nr_threads; + td = g->threads + task_nr; + + bind_to_memnode(td->bind_node); + bind_to_cpumask(td->bind_cpumask); + + pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t)); + process_data = setup_private_data(g->p.bytes_process); + + if (g->p.show_details >= 3) { + printf(" # process %2d global mem: %p, process mem: %p\n", + process_nr, g->data, process_data); + } + + for (t = 0; t < g->p.nr_threads; t++) { + task_nr = process_nr*g->p.nr_threads + t; + td = g->threads + task_nr; + + td->process_data = process_data; + td->process_nr = process_nr; + td->thread_nr = t; + td->task_nr = task_nr; + td->val = rand(); + td->curr_cpu = -1; + td->process_lock = &process_lock; + + ret = pthread_create(pthreads + t, NULL, worker_thread, td); + BUG_ON(ret); + } + + for (t = 0; t < g->p.nr_threads; t++) { + ret = pthread_join(pthreads[t], NULL); + BUG_ON(ret); + } + + free_data(process_data, g->p.bytes_process); + free(pthreads); +} + +static void print_summary(void) +{ + if (g->p.show_details < 0) + return; + + printf("\n ###\n"); + printf(" # %d %s will execute (on %d nodes, %d CPUs):\n", + g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus); + printf(" # %5dx %5ldMB global shared mem operations\n", + g->p.nr_loops, g->p.bytes_global/1024/1024); + printf(" # %5dx %5ldMB process shared mem operations\n", + g->p.nr_loops, g->p.bytes_process/1024/1024); + printf(" # %5dx %5ldMB thread local mem operations\n", + g->p.nr_loops, g->p.bytes_thread/1024/1024); + + printf(" ###\n"); + + printf("\n ###\n"); fflush(stdout); +} + +static void init_thread_data(void) +{ + ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; + int t; + + g->threads = zalloc_shared_data(size); + + for (t = 0; t < g->p.nr_tasks; t++) { + struct thread_data *td = g->threads + t; + int cpu; + + /* Allow all nodes by default: */ + td->bind_node = -1; + + /* Allow all CPUs by default: */ + CPU_ZERO(&td->bind_cpumask); + for (cpu = 0; cpu < g->p.nr_cpus; cpu++) + CPU_SET(cpu, &td->bind_cpumask); + } +} + +static void deinit_thread_data(void) +{ + ssize_t size = sizeof(*g->threads)*g->p.nr_tasks; + + free_data(g->threads, size); +} + +static int init(void) +{ + g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0); + + /* Copy over options: */ + g->p = p0; + + g->p.nr_cpus = numa_num_configured_cpus(); + + g->p.nr_nodes = numa_max_node() + 1; + + /* char array in count_process_nodes(): */ + BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0); + + if (g->p.show_quiet && !g->p.show_details) + g->p.show_details = -1; + + /* Some memory should be specified: */ + if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str) + return -1; + + if (g->p.mb_global_str) { + g->p.mb_global = atof(g->p.mb_global_str); + BUG_ON(g->p.mb_global < 0); + } + + if (g->p.mb_proc_str) { + g->p.mb_proc = atof(g->p.mb_proc_str); + BUG_ON(g->p.mb_proc < 0); + } + + if (g->p.mb_proc_locked_str) { + g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str); + BUG_ON(g->p.mb_proc_locked < 0); + BUG_ON(g->p.mb_proc_locked > g->p.mb_proc); + } + + if (g->p.mb_thread_str) { + g->p.mb_thread = atof(g->p.mb_thread_str); + BUG_ON(g->p.mb_thread < 0); + } + + BUG_ON(g->p.nr_threads <= 0); + BUG_ON(g->p.nr_proc <= 0); + + g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads; + + g->p.bytes_global = g->p.mb_global *1024L*1024L; + g->p.bytes_process = g->p.mb_proc *1024L*1024L; + g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L; + g->p.bytes_thread = g->p.mb_thread *1024L*1024L; + + g->data = setup_shared_data(g->p.bytes_global); + + /* Startup serialization: */ + init_global_mutex(&g->start_work_mutex); + init_global_mutex(&g->startup_mutex); + init_global_mutex(&g->startup_done_mutex); + init_global_mutex(&g->stop_work_mutex); + + init_thread_data(); + + tprintf("#\n"); + parse_setup_cpu_list(); + parse_setup_node_list(); + tprintf("#\n"); + + print_summary(); + + return 0; +} + +static void deinit(void) +{ + free_data(g->data, g->p.bytes_global); + g->data = NULL; + + deinit_thread_data(); + + free_data(g, sizeof(*g)); + g = NULL; +} + +/* + * Print a short or long result, depending on the verbosity setting: + */ +static void print_res(const char *name, double val, + const char *txt_unit, const char *txt_short, const char *txt_long) +{ + if (!name) + name = "main,"; + + if (g->p.show_quiet) + printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); + else + printf(" %14.3f %s\n", val, txt_long); +} + +static int __bench_numa(const char *name) +{ + struct timeval start, stop, diff; + u64 runtime_ns_min, runtime_ns_sum; + pid_t *pids, pid, wpid; + double delta_runtime; + double runtime_avg; + double runtime_sec_max; + double runtime_sec_min; + int wait_stat; + double bytes; + int i, t; + + if (init()) + return -1; + + pids = zalloc(g->p.nr_proc * sizeof(*pids)); + pid = -1; + + /* All threads try to acquire it, this way we can wait for them to start up: */ + pthread_mutex_lock(&g->start_work_mutex); + + if (g->p.serialize_startup) { + tprintf(" #\n"); + tprintf(" # Startup synchronization: ..."); fflush(stdout); + } + + gettimeofday(&start, NULL); + + for (i = 0; i < g->p.nr_proc; i++) { + pid = fork(); + dprintf(" # process %2d: PID %d\n", i, pid); + + BUG_ON(pid < 0); + if (!pid) { + /* Child process: */ + worker_process(i); + + exit(0); + } + pids[i] = pid; + + } + /* Wait for all the threads to start up: */ + while (g->nr_tasks_started != g->p.nr_tasks) + usleep(1000); + + BUG_ON(g->nr_tasks_started != g->p.nr_tasks); + + if (g->p.serialize_startup) { + double startup_sec; + + pthread_mutex_lock(&g->startup_done_mutex); + + /* This will start all threads: */ + pthread_mutex_unlock(&g->start_work_mutex); + + /* This mutex is locked - the last started thread will wake us: */ + pthread_mutex_lock(&g->startup_done_mutex); + + gettimeofday(&stop, NULL); + + timersub(&stop, &start, &diff); + + startup_sec = diff.tv_sec * 1000000000.0; + startup_sec += diff.tv_usec * 1000.0; + startup_sec /= 1e9; + + tprintf(" threads initialized in %.6f seconds.\n", startup_sec); + tprintf(" #\n"); + + start = stop; + pthread_mutex_unlock(&g->startup_done_mutex); + } else { + gettimeofday(&start, NULL); + } + + /* Parent process: */ + + + for (i = 0; i < g->p.nr_proc; i++) { + wpid = waitpid(pids[i], &wait_stat, 0); + BUG_ON(wpid < 0); + BUG_ON(!WIFEXITED(wait_stat)); + + } + + runtime_ns_sum = 0; + runtime_ns_min = -1LL; + + for (t = 0; t < g->p.nr_tasks; t++) { + u64 thread_runtime_ns = g->threads[t].runtime_ns; + + runtime_ns_sum += thread_runtime_ns; + runtime_ns_min = min(thread_runtime_ns, runtime_ns_min); + } + + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + + BUG_ON(bench_format != BENCH_FORMAT_DEFAULT); + + tprintf("\n ###\n"); + tprintf("\n"); + + runtime_sec_max = diff.tv_sec * 1000000000.0; + runtime_sec_max += diff.tv_usec * 1000.0; + runtime_sec_max /= 1e9; + + runtime_sec_min = runtime_ns_min/1e9; + + bytes = g->bytes_done; + runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9; + + if (g->p.measure_convergence) { + print_res(name, runtime_sec_max, + "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge"); + } + + print_res(name, runtime_sec_max, + "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime"); + + print_res(name, runtime_sec_min, + "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime"); + + print_res(name, runtime_avg, + "secs,", "runtime-avg/thread", "secs average thread-runtime"); + + delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0; + print_res(name, delta_runtime / runtime_sec_max * 100.0, + "%,", "spread-runtime/thread", "% difference between max/avg runtime"); + + print_res(name, bytes / g->p.nr_tasks / 1e9, + "GB,", "data/thread", "GB data processed, per thread"); + + print_res(name, bytes / 1e9, + "GB,", "data-total", "GB data processed, total"); + + print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks), + "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime"); + + print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max, + "GB/sec,", "thread-speed", "GB/sec/thread speed"); + + print_res(name, bytes / runtime_sec_max / 1e9, + "GB/sec,", "total-speed", "GB/sec total speed"); + + free(pids); + + deinit(); + + return 0; +} + +#define MAX_ARGS 50 + +static int command_size(const char **argv) +{ + int size = 0; + + while (*argv) { + size++; + argv++; + } + + BUG_ON(size >= MAX_ARGS); + + return size; +} + +static void init_params(struct params *p, const char *name, int argc, const char **argv) +{ + int i; + + printf("\n # Running %s \"perf bench numa", name); + + for (i = 0; i < argc; i++) + printf(" %s", argv[i]); + + printf("\"\n"); + + memset(p, 0, sizeof(*p)); + + /* Initialize nonzero defaults: */ + + p->serialize_startup = 1; + p->data_reads = true; + p->data_writes = true; + p->data_backwards = true; + p->data_rand_walk = true; + p->nr_loops = -1; + p->init_random = true; +} + +static int run_bench_numa(const char *name, const char **argv) +{ + int argc = command_size(argv); + + init_params(&p0, name, argc, argv); + argc = parse_options(argc, argv, options, bench_numa_usage, 0); + if (argc) + goto err; + + if (__bench_numa(name)) + goto err; + + return 0; + +err: + usage_with_options(numa_usage, options); + return -1; +} + +#define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk" +#define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1" + +#define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1" +#define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1" + +#define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1" +#define OPT_BW_NOTHP OPT_BW, "--thp", "-1" + +/* + * The built-in test-suite executed by "perf bench numa -a". + * + * (A minimum of 4 nodes and 16 GB of RAM is recommended.) + */ +static const char *tests[][MAX_ARGS] = { + /* Basic single-stream NUMA bandwidth measurements: */ + { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024", + "-C" , "0", "-M", "0", OPT_BW_RAM }, + { "RAM-bw-local-NOTHP,", + "mem", "-p", "1", "-t", "1", "-P", "1024", + "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP }, + { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024", + "-C" , "0", "-M", "1", OPT_BW_RAM }, + + /* 2-stream NUMA bandwidth measurements: */ + { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024", + "-C", "0,2", "-M", "0x2", OPT_BW_RAM }, + { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024", + "-C", "0,2", "-M", "1x2", OPT_BW_RAM }, + + /* Cross-stream NUMA bandwidth measurement: */ + { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024", + "-C", "0,8", "-M", "1,0", OPT_BW_RAM }, + + /* Convergence latency measurements: */ + { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV }, + { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV }, + { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV }, + { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, + { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV }, + { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV }, + { " 4x4-convergence-NOTHP,", + "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP }, + { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV }, + { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV }, + { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV }, + { " 8x4-convergence-NOTHP,", + "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP }, + { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV }, + { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV }, + { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV }, + { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV }, + { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV }, + + /* Various NUMA process/thread layout bandwidth measurements: */ + { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW }, + { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW }, + { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW }, + { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW }, + { " 8x1-bw-process-NOTHP,", + "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP }, + { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW }, + + { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW }, + { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW }, + { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW }, + { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW }, + + { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW }, + { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW }, + { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW }, + { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW }, + { " 4x8-bw-thread-NOTHP,", + "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP }, + { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW }, + { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW }, + + { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW }, + { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW }, + + { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW }, + { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP }, + { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW }, + { "numa01-bw-thread-NOTHP,", + "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP }, +}; + +static int bench_all(void) +{ + int nr = ARRAY_SIZE(tests); + int ret; + int i; + + ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'"); + BUG_ON(ret < 0); + + for (i = 0; i < nr; i++) { + if (run_bench_numa(tests[i][0], tests[i] + 1)) + return -1; + } + + printf("\n"); + + return 0; +} + +int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused) +{ + init_params(&p0, "main,", argc, argv); + argc = parse_options(argc, argv, options, bench_numa_usage, 0); + if (argc) + goto err; + + if (p0.run_all) + return bench_all(); + + if (__bench_numa(NULL)) + goto err; + + return 0; + +err: + usage_with_options(numa_usage, options); + return -1; +} diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index dc870cf31b79..2e6961ea3184 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -34,9 +34,10 @@ struct perf_annotate { struct perf_tool tool; - bool force, use_tui, use_stdio; + bool force, use_tui, use_stdio, use_gtk; bool full_paths; bool print_line; + bool skip_missing; const char *sym_hist_filter; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -138,9 +139,22 @@ find_next: continue; } - if (use_browser > 0) { + if (use_browser == 2) { + int ret; + + ret = hist_entry__gtk_annotate(he, evidx, NULL); + if (!ret || !ann->skip_missing) + return; + + /* skip missing symbols */ + nd = rb_next(nd); + } else if (use_browser == 1) { key = hist_entry__tui_annotate(he, evidx, NULL); switch (key) { + case -1: + if (!ann->skip_missing) + return; + /* fall through */ case K_RIGHT: next = rb_next(nd); break; @@ -224,6 +238,10 @@ static int __cmd_annotate(struct perf_annotate *ann) ui__error("The %s file has no samples!\n", session->filename); goto out_delete; } + + if (use_browser == 2) + perf_gtk__show_annotations(); + out_delete: /* * Speed up the exit process, for large files this can @@ -270,6 +288,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"), OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"), OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, @@ -280,6 +299,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) "print matching source lines (may be slow)"), OPT_BOOLEAN('P', "full-paths", &annotate.full_paths, "Don't shorten the displayed pathnames"), + OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, + "Skip symbols that cannot be annotated"), OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), @@ -300,6 +321,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) use_browser = 0; else if (annotate.use_tui) use_browser = 1; + else if (annotate.use_gtk) + use_browser = 2; setup_browser(true); @@ -309,7 +332,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) if (symbol__init() < 0) return -1; - setup_sorting(annotate_usage, options); + if (setup_sorting() < 0) + usage_with_options(annotate_usage, options); if (argc) { /* diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index cae9a5fd2ecf..77298bf892b8 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -35,6 +35,18 @@ struct bench_suite { /* sentinel: easy for help */ #define suite_all { "all", "Test all benchmark suites", NULL } +#ifdef LIBNUMA_SUPPORT +static struct bench_suite numa_suites[] = { + { "mem", + "Benchmark for NUMA workloads", + bench_numa }, + suite_all, + { NULL, + NULL, + NULL } +}; +#endif + static struct bench_suite sched_suites[] = { { "messaging", "Benchmark for scheduler and IPC mechanisms", @@ -68,6 +80,11 @@ struct bench_subsys { }; static struct bench_subsys subsystems[] = { +#ifdef LIBNUMA_SUPPORT + { "numa", + "NUMA scheduling and MM behavior", + numa_suites }, +#endif { "sched", "scheduler and IPC mechanism", sched_suites }, @@ -159,6 +176,7 @@ static void all_suite(struct bench_subsys *subsys) /* FROM HERE */ printf("# Running %s/%s benchmark...\n", subsys->name, suites[i].name); + fflush(stdout); argv[1] = suites[i].name; suites[i].fn(1, argv, NULL); @@ -225,6 +243,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused) printf("# Running %s/%s benchmark...\n", subsystems[i].name, subsystems[i].suites[j].name); + fflush(stdout); status = subsystems[i].suites[j].fn(argc - 1, argv + 1, prefix); goto end; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index fae8b250b2ca..c96c8fa38243 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -14,6 +14,7 @@ #include "util/parse-options.h" #include "util/strlist.h" #include "util/build-id.h" +#include "util/session.h" #include "util/symbol.h" static int build_id_cache__add_file(const char *filename, const char *debugdir) @@ -58,19 +59,89 @@ static int build_id_cache__remove_file(const char *filename, return err; } +static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) +{ + char filename[PATH_MAX]; + u8 build_id[BUILD_ID_SIZE]; + + if (dso__build_id_filename(dso, filename, sizeof(filename)) && + filename__read_build_id(filename, build_id, + sizeof(build_id)) != sizeof(build_id)) { + if (errno == ENOENT) + return false; + + pr_warning("Problems with %s file, consider removing it from the cache\n", + filename); + } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) { + pr_warning("Problems with %s file, consider removing it from the cache\n", + filename); + } + + return true; +} + +static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp) +{ + struct perf_session *session = perf_session__new(filename, O_RDONLY, + force, false, NULL); + if (session == NULL) + return -1; + + perf_session__fprintf_dsos_buildid(session, fp, dso__missing_buildid_cache, 0); + perf_session__delete(session); + + return 0; +} + +static int build_id_cache__update_file(const char *filename, + const char *debugdir) +{ + u8 build_id[BUILD_ID_SIZE]; + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + int err; + + if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { + pr_debug("Couldn't read a build-id in %s\n", filename); + return -1; + } + + build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + err = build_id_cache__remove_s(sbuild_id, debugdir); + if (!err) { + err = build_id_cache__add_s(sbuild_id, debugdir, filename, + false, false); + } + if (verbose) + pr_info("Updating %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); + + return err; +} + int cmd_buildid_cache(int argc, const char **argv, const char *prefix __maybe_unused) { struct strlist *list; struct str_node *pos; + int ret = 0; + bool force = false; char debugdir[PATH_MAX]; char const *add_name_list_str = NULL, - *remove_name_list_str = NULL; + *remove_name_list_str = NULL, + *missing_filename = NULL, + *update_name_list_str = NULL; + const struct option buildid_cache_options[] = { OPT_STRING('a', "add", &add_name_list_str, "file list", "file(s) to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), + OPT_STRING('M', "missing", &missing_filename, "file", + "to find missing build ids in the cache"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_STRING('u', "update", &update_name_list_str, "file list", + "file(s) to update"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_END() }; @@ -125,5 +196,26 @@ int cmd_buildid_cache(int argc, const char **argv, } } - return 0; + if (missing_filename) + ret = build_id_cache__fprintf_missing(missing_filename, force, stdout); + + if (update_name_list_str) { + list = strlist__new(true, update_name_list_str); + if (list) { + strlist__for_each(pos, list) + if (build_id_cache__update_file(pos->s, debugdir)) { + if (errno == ENOENT) { + pr_debug("%s wasn't in the cache\n", + pos->s); + continue; + } + pr_warning("Couldn't update %s: %s\n", + pos->s, strerror(errno)); + } + + strlist__delete(list); + } + } + + return ret; } diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index a82d99fec83e..e74366a13218 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -44,23 +44,26 @@ static int filename__fprintf_build_id(const char *name, FILE *fp) return fprintf(fp, "%s\n", sbuild_id); } +static bool dso__skip_buildid(struct dso *dso, int with_hits) +{ + return with_hits && !dso->hit; +} + static int perf_session__list_build_ids(bool force, bool with_hits) { struct perf_session *session; symbol__elf_init(); - - session = perf_session__new(input_name, O_RDONLY, force, false, - &build_id__mark_dso_hit_ops); - if (session == NULL) - return -1; - /* * See if this is an ELF file first: */ - if (filename__fprintf_build_id(session->filename, stdout)) + if (filename__fprintf_build_id(input_name, stdout)) goto out; + session = perf_session__new(input_name, O_RDONLY, force, false, + &build_id__mark_dso_hit_ops); + if (session == NULL) + return -1; /* * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID @@ -68,9 +71,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) if (with_hits || session->fd_pipe) perf_session__process_events(session, &build_id__mark_dso_hit_ops); - perf_session__fprintf_dsos_buildid(session, stdout, with_hits); -out: + perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); perf_session__delete(session); +out: return 0; } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 93b852f8a5d5..d207a97a2db1 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -23,7 +23,6 @@ static char const *input_old = "perf.data.old", *input_new = "perf.data"; static char diff__default_sort_order[] = "dso,symbol"; static bool force; -static bool show_displacement; static bool show_period; static bool show_formula; static bool show_baseline_only; @@ -146,58 +145,47 @@ static int setup_compute(const struct option *opt, const char *str, return -EINVAL; } -static double get_period_percent(struct hist_entry *he, u64 period) +double perf_diff__period_percent(struct hist_entry *he, u64 period) { u64 total = he->hists->stats.total_period; return (period * 100.0) / total; } -double perf_diff__compute_delta(struct hist_entry *he) +double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair) { - struct hist_entry *pair = hist_entry__next_pair(he); - double new_percent = get_period_percent(he, he->stat.period); - double old_percent = pair ? get_period_percent(pair, pair->stat.period) : 0.0; + double new_percent = perf_diff__period_percent(he, he->stat.period); + double old_percent = perf_diff__period_percent(pair, pair->stat.period); he->diff.period_ratio_delta = new_percent - old_percent; he->diff.computed = true; return he->diff.period_ratio_delta; } -double perf_diff__compute_ratio(struct hist_entry *he) +double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair) { - struct hist_entry *pair = hist_entry__next_pair(he); double new_period = he->stat.period; - double old_period = pair ? pair->stat.period : 0; + double old_period = pair->stat.period; he->diff.computed = true; - he->diff.period_ratio = pair ? (new_period / old_period) : 0; + he->diff.period_ratio = new_period / old_period; return he->diff.period_ratio; } -s64 perf_diff__compute_wdiff(struct hist_entry *he) +s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair) { - struct hist_entry *pair = hist_entry__next_pair(he); u64 new_period = he->stat.period; - u64 old_period = pair ? pair->stat.period : 0; + u64 old_period = pair->stat.period; he->diff.computed = true; - - if (!pair) - he->diff.wdiff = 0; - else - he->diff.wdiff = new_period * compute_wdiff_w2 - - old_period * compute_wdiff_w1; + he->diff.wdiff = new_period * compute_wdiff_w2 - + old_period * compute_wdiff_w1; return he->diff.wdiff; } -static int formula_delta(struct hist_entry *he, char *buf, size_t size) +static int formula_delta(struct hist_entry *he, struct hist_entry *pair, + char *buf, size_t size) { - struct hist_entry *pair = hist_entry__next_pair(he); - - if (!pair) - return -1; - return scnprintf(buf, size, "(%" PRIu64 " * 100 / %" PRIu64 ") - " "(%" PRIu64 " * 100 / %" PRIu64 ")", @@ -205,41 +193,36 @@ static int formula_delta(struct hist_entry *he, char *buf, size_t size) pair->stat.period, pair->hists->stats.total_period); } -static int formula_ratio(struct hist_entry *he, char *buf, size_t size) +static int formula_ratio(struct hist_entry *he, struct hist_entry *pair, + char *buf, size_t size) { - struct hist_entry *pair = hist_entry__next_pair(he); double new_period = he->stat.period; - double old_period = pair ? pair->stat.period : 0; - - if (!pair) - return -1; + double old_period = pair->stat.period; return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period); } -static int formula_wdiff(struct hist_entry *he, char *buf, size_t size) +static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair, + char *buf, size_t size) { - struct hist_entry *pair = hist_entry__next_pair(he); u64 new_period = he->stat.period; - u64 old_period = pair ? pair->stat.period : 0; - - if (!pair) - return -1; + u64 old_period = pair->stat.period; return scnprintf(buf, size, "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")", new_period, compute_wdiff_w2, old_period, compute_wdiff_w1); } -int perf_diff__formula(char *buf, size_t size, struct hist_entry *he) +int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair, + char *buf, size_t size) { switch (compute) { case COMPUTE_DELTA: - return formula_delta(he, buf, size); + return formula_delta(he, pair, buf, size); case COMPUTE_RATIO: - return formula_ratio(he, buf, size); + return formula_ratio(he, pair, buf, size); case COMPUTE_WEIGHTED_DIFF: - return formula_wdiff(he, buf, size); + return formula_wdiff(he, pair, buf, size); default: BUG_ON(1); } @@ -292,48 +275,6 @@ static struct perf_tool tool = { .ordering_requires_timestamps = true, }; -static void insert_hist_entry_by_name(struct rb_root *root, - struct hist_entry *he) -{ - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - if (hist_entry__cmp(he, iter) < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); -} - -static void hists__name_resort(struct hists *self, bool sort) -{ - unsigned long position = 1; - struct rb_root tmp = RB_ROOT; - struct rb_node *next = rb_first(&self->entries); - - while (next != NULL) { - struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node); - - next = rb_next(&n->rb_node); - n->position = position++; - - if (sort) { - rb_erase(&n->rb_node, &self->entries); - insert_hist_entry_by_name(&tmp, n); - } - } - - if (sort) - self->entries = tmp; -} - static struct perf_evsel *evsel_match(struct perf_evsel *evsel, struct perf_evlist *evlist) { @@ -346,34 +287,34 @@ static struct perf_evsel *evsel_match(struct perf_evsel *evsel, return NULL; } -static void perf_evlist__resort_hists(struct perf_evlist *evlist, bool name) +static void perf_evlist__collapse_resort(struct perf_evlist *evlist) { struct perf_evsel *evsel; list_for_each_entry(evsel, &evlist->entries, node) { struct hists *hists = &evsel->hists; - hists__output_resort(hists); - - /* - * The hists__name_resort only sets possition - * if name is false. - */ - if (name || ((!name) && show_displacement)) - hists__name_resort(hists, name); + hists__collapse_resort(hists); } } static void hists__baseline_only(struct hists *hists) { - struct rb_node *next = rb_first(&hists->entries); + struct rb_root *root; + struct rb_node *next; + if (sort__need_collapse) + root = &hists->entries_collapsed; + else + root = hists->entries_in; + + next = rb_first(root); while (next != NULL) { - struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node); + struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in); - next = rb_next(&he->rb_node); + next = rb_next(&he->rb_node_in); if (!hist_entry__next_pair(he)) { - rb_erase(&he->rb_node, &hists->entries); + rb_erase(&he->rb_node_in, root); hist_entry__free(he); } } @@ -385,18 +326,21 @@ static void hists__precompute(struct hists *hists) while (next != NULL) { struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node); + struct hist_entry *pair = hist_entry__next_pair(he); next = rb_next(&he->rb_node); + if (!pair) + continue; switch (compute) { case COMPUTE_DELTA: - perf_diff__compute_delta(he); + perf_diff__compute_delta(he, pair); break; case COMPUTE_RATIO: - perf_diff__compute_ratio(he); + perf_diff__compute_ratio(he, pair); break; case COMPUTE_WEIGHTED_DIFF: - perf_diff__compute_wdiff(he); + perf_diff__compute_wdiff(he, pair); break; default: BUG_ON(1); @@ -470,19 +414,30 @@ static void insert_hist_entry_by_compute(struct rb_root *root, static void hists__compute_resort(struct hists *hists) { - struct rb_root tmp = RB_ROOT; - struct rb_node *next = rb_first(&hists->entries); + struct rb_root *root; + struct rb_node *next; + + if (sort__need_collapse) + root = &hists->entries_collapsed; + else + root = hists->entries_in; + + hists->entries = RB_ROOT; + next = rb_first(root); + + hists->nr_entries = 0; + hists->stats.total_period = 0; + hists__reset_col_len(hists); while (next != NULL) { - struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node); + struct hist_entry *he; - next = rb_next(&he->rb_node); + he = rb_entry(next, struct hist_entry, rb_node_in); + next = rb_next(&he->rb_node_in); - rb_erase(&he->rb_node, &hists->entries); - insert_hist_entry_by_compute(&tmp, he, compute); + insert_hist_entry_by_compute(&hists->entries, he, compute); + hists__inc_nr_entries(hists, he); } - - hists->entries = tmp; } static void hists__process(struct hists *old, struct hists *new) @@ -497,6 +452,8 @@ static void hists__process(struct hists *old, struct hists *new) if (sort_compute) { hists__precompute(new); hists__compute_resort(new); + } else { + hists__output_resort(new); } hists__fprintf(new, true, 0, 0, stdout); @@ -528,8 +485,8 @@ static int __cmd_diff(void) evlist_old = older->evlist; evlist_new = newer->evlist; - perf_evlist__resort_hists(evlist_old, true); - perf_evlist__resort_hists(evlist_new, false); + perf_evlist__collapse_resort(evlist_old); + perf_evlist__collapse_resort(evlist_new); list_for_each_entry(evsel, &evlist_new->entries, node) { struct perf_evsel *evsel_old; @@ -562,8 +519,6 @@ static const char * const diff_usage[] = { static const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('M', "displacement", &show_displacement, - "Show position displacement relative to baseline"), OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, "Show only items with match in baseline"), OPT_CALLBACK('c', "compute", &compute, @@ -597,40 +552,32 @@ static const struct option options[] = { static void ui_init(void) { - perf_hpp__init(); - - /* No overhead column. */ - perf_hpp__column_enable(PERF_HPP__OVERHEAD, false); - /* - * Display baseline/delta/ratio/displacement/ + * Display baseline/delta/ratio * formula/periods columns. */ - perf_hpp__column_enable(PERF_HPP__BASELINE, true); + perf_hpp__column_enable(PERF_HPP__BASELINE); switch (compute) { case COMPUTE_DELTA: - perf_hpp__column_enable(PERF_HPP__DELTA, true); + perf_hpp__column_enable(PERF_HPP__DELTA); break; case COMPUTE_RATIO: - perf_hpp__column_enable(PERF_HPP__RATIO, true); + perf_hpp__column_enable(PERF_HPP__RATIO); break; case COMPUTE_WEIGHTED_DIFF: - perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF, true); + perf_hpp__column_enable(PERF_HPP__WEIGHTED_DIFF); break; default: BUG_ON(1); }; - if (show_displacement) - perf_hpp__column_enable(PERF_HPP__DISPL, true); - if (show_formula) - perf_hpp__column_enable(PERF_HPP__FORMULA, true); + perf_hpp__column_enable(PERF_HPP__FORMULA); if (show_period) { - perf_hpp__column_enable(PERF_HPP__PERIOD, true); - perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE, true); + perf_hpp__column_enable(PERF_HPP__PERIOD); + perf_hpp__column_enable(PERF_HPP__PERIOD_BASELINE); } } @@ -658,7 +605,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) ui_init(); - setup_sorting(diff_usage, options); + if (setup_sorting() < 0) + usage_with_options(diff_usage, options); + setup_pager(); sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL); diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index c20f1dcfb7e2..05bd9dfe875c 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -15,39 +15,6 @@ #include "util/parse-options.h" #include "util/session.h" -struct perf_attr_details { - bool freq; - bool verbose; -}; - -static int comma_printf(bool *first, const char *fmt, ...) -{ - va_list args; - int ret = 0; - - if (!*first) { - ret += printf(","); - } else { - ret += printf(":"); - *first = false; - } - - va_start(args, fmt); - ret += vprintf(fmt, args); - va_end(args); - return ret; -} - -static int __if_print(bool *first, const char *field, u64 value) -{ - if (value == 0) - return 0; - - return comma_printf(first, " %s: %" PRIu64, field, value); -} - -#define if_print(field) __if_print(&first, #field, pos->attr.field) - static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) { struct perf_session *session; @@ -57,52 +24,8 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details if (session == NULL) return -ENOMEM; - list_for_each_entry(pos, &session->evlist->entries, node) { - bool first = true; - - printf("%s", perf_evsel__name(pos)); - - if (details->verbose || details->freq) { - comma_printf(&first, " sample_freq=%" PRIu64, - (u64)pos->attr.sample_freq); - } - - if (details->verbose) { - if_print(type); - if_print(config); - if_print(config1); - if_print(config2); - if_print(size); - if_print(sample_type); - if_print(read_format); - if_print(disabled); - if_print(inherit); - if_print(pinned); - if_print(exclusive); - if_print(exclude_user); - if_print(exclude_kernel); - if_print(exclude_hv); - if_print(exclude_idle); - if_print(mmap); - if_print(comm); - if_print(freq); - if_print(inherit_stat); - if_print(enable_on_exec); - if_print(task); - if_print(watermark); - if_print(precise_ip); - if_print(mmap_data); - if_print(sample_id_all); - if_print(exclude_host); - if_print(exclude_guest); - if_print(__reserved_1); - if_print(wakeup_events); - if_print(bp_type); - if_print(branch_sample_type); - } - - putchar('\n'); - } + list_for_each_entry(pos, &session->evlist->entries, node) + perf_evsel__fprintf(pos, details, stdout); perf_session__delete(session); return 0; @@ -116,6 +39,8 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"), OPT_BOOLEAN('v', "verbose", &details.verbose, "Show all event attr details"), + OPT_BOOLEAN('g', "group", &details.event_group, + "Show event group information"), OPT_END() }; const char * const evlist_usage[] = { @@ -127,5 +52,10 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(evlist_usage, options); + if (details.event_group && (details.verbose || details.freq)) { + pr_err("--group option is not compatible with other options\n"); + usage_with_options(evlist_usage, options); + } + return __cmd_evlist(input_name, &details); } diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 0b4b796167be..46878daca5cc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -17,6 +17,7 @@ #include "util/debug.h" #include <linux/rbtree.h> +#include <linux/string.h> struct alloc_stat; typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); @@ -340,7 +341,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session, int n_lines, int is_caller) { struct rb_node *next; - struct machine *machine; + struct machine *machine = &session->machines.host; printf("%.102s\n", graph_dotted_line); printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); @@ -349,11 +350,6 @@ static void __print_result(struct rb_root *root, struct perf_session *session, next = rb_first(root); - machine = perf_session__find_host_machine(session); - if (!machine) { - pr_err("__print_result: couldn't find kernel information\n"); - return; - } while (next && n_lines--) { struct alloc_stat *data = rb_entry(next, struct alloc_stat, node); @@ -614,8 +610,7 @@ static struct sort_dimension *avail_sorts[] = { &pingpong_sort_dimension, }; -#define NUM_AVAIL_SORTS \ - (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) +#define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts)) static int sort_dimension__add(const char *tok, struct list_head *list) { @@ -624,12 +619,11 @@ static int sort_dimension__add(const char *tok, struct list_head *list) for (i = 0; i < NUM_AVAIL_SORTS; i++) { if (!strcmp(avail_sorts[i]->name, tok)) { - sort = malloc(sizeof(*sort)); + sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i])); if (!sort) { - pr_err("%s: malloc failed\n", __func__); + pr_err("%s: memdup failed\n", __func__); return -1; } - memcpy(sort, avail_sorts[i], sizeof(*sort)); list_add_tail(&sort->list, list); return 0; } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index ca3f80ebc100..37a769d7f9fe 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -973,8 +973,7 @@ __cmd_buildid_list(const char *file_name, int argc, const char **argv) int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) { - const char *file_name; - + const char *file_name = NULL; const struct option kvm_options[] = { OPT_STRING('i', "input", &file_name, "file", "Input file name"), diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f3151d3c70ce..774c90713a53 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -224,130 +224,28 @@ static bool perf_evlist__equal(struct perf_evlist *evlist, static int perf_record__open(struct perf_record *rec) { + char msg[512]; struct perf_evsel *pos; struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct perf_record_opts *opts = &rec->opts; int rc = 0; - /* - * Set the evsel leader links before we configure attributes, - * since some might depend on this info. - */ - if (opts->group) - perf_evlist__set_leader(evlist); - - perf_evlist__config_attrs(evlist, opts); + perf_evlist__config(evlist, opts); list_for_each_entry(pos, &evlist->entries, node) { - struct perf_event_attr *attr = &pos->attr; - /* - * Check if parse_single_tracepoint_event has already asked for - * PERF_SAMPLE_TIME. - * - * XXX this is kludgy but short term fix for problems introduced by - * eac23d1c that broke 'perf script' by having different sample_types - * when using multiple tracepoint events when we use a perf binary - * that tries to use sample_id_all on an older kernel. - * - * We need to move counter creation to perf_session, support - * different sample_types, etc. - */ - bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; - -fallback_missing_features: - if (opts->exclude_guest_missing) - attr->exclude_guest = attr->exclude_host = 0; -retry_sample_id: - attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; try_again: if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { - int err = errno; - - if (err == EPERM || err == EACCES) { - ui__error_paranoid(); - rc = -err; - goto out; - } else if (err == ENODEV && opts->target.cpu_list) { - pr_err("No such device - did you specify" - " an out-of-range profile CPU?\n"); - rc = -err; - goto out; - } else if (err == EINVAL) { - if (!opts->exclude_guest_missing && - (attr->exclude_guest || attr->exclude_host)) { - pr_debug("Old kernel, cannot exclude " - "guest or host samples.\n"); - opts->exclude_guest_missing = true; - goto fallback_missing_features; - } else if (!opts->sample_id_all_missing) { - /* - * Old kernel, no attr->sample_id_type_all field - */ - opts->sample_id_all_missing = true; - if (!opts->sample_time && !opts->raw_samples && !time_needed) - attr->sample_type &= ~PERF_SAMPLE_TIME; - - goto retry_sample_id; - } - } - - /* - * If it's cycles then fall back to hrtimer - * based cpu-clock-tick sw counter, which - * is always available even if no PMU support. - * - * PPC returns ENXIO until 2.6.37 (behavior changed - * with commit b0a873e). - */ - if ((err == ENOENT || err == ENXIO) - && attr->type == PERF_TYPE_HARDWARE - && attr->config == PERF_COUNT_HW_CPU_CYCLES) { - + if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { if (verbose) - ui__warning("The cycles event is not supported, " - "trying to fall back to cpu-clock-ticks\n"); - attr->type = PERF_TYPE_SOFTWARE; - attr->config = PERF_COUNT_SW_CPU_CLOCK; - if (pos->name) { - free(pos->name); - pos->name = NULL; - } + ui__warning("%s\n", msg); goto try_again; } - if (err == ENOENT) { - ui__error("The %s event is not supported.\n", - perf_evsel__name(pos)); - rc = -err; - goto out; - } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) { - ui__error("\'precise\' request may not be supported. " - "Try removing 'p' modifier\n"); - rc = -err; - goto out; - } - - printf("\n"); - error("sys_perf_event_open() syscall returned with %d " - "(%s) for event %s. /bin/dmesg may provide " - "additional information.\n", - err, strerror(err), perf_evsel__name(pos)); - -#if defined(__i386__) || defined(__x86_64__) - if (attr->type == PERF_TYPE_HARDWARE && - err == EOPNOTSUPP) { - pr_err("No hardware sampling interrupt available." - " No APIC? If so then you can boot the kernel" - " with the \"lapic\" boot parameter to" - " force-enable it.\n"); - rc = -err; - goto out; - } -#endif - - pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); - rc = -err; + rc = -errno; + perf_evsel__open_strerror(pos, &opts->target, + errno, msg, sizeof(msg)); + ui__error("%s\n", msg); goto out; } } @@ -430,10 +328,6 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data) { int err; struct perf_tool *tool = data; - - if (machine__is_host(machine)) - return; - /* *As for guest kernel when processing subcommand record&report, *we arrange module mmap prior to guest kernel mmap and trigger @@ -592,6 +486,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) goto out_delete_session; } + if (!evsel_list->nr_groups) + perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); + /* * perf_session__delete(session) will be called at perf_record__exit() */ @@ -618,12 +515,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) rec->post_processing_offset = lseek(output, 0, SEEK_CUR); - machine = perf_session__find_host_machine(session); - if (!machine) { - pr_err("Couldn't find native kernel information.\n"); - err = -1; - goto out_delete_session; - } + machine = &session->machines.host; if (opts->pipe_output) { err = perf_event__synthesize_attrs(tool, session, @@ -676,9 +568,10 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" "Check /proc/modules permission or run as root.\n"); - if (perf_guest) - perf_session__process_machines(session, tool, - perf_event__synthesize_guest_os); + if (perf_guest) { + machines__process_guests(&session->machines, + perf_event__synthesize_guest_os, tool); + } if (!opts->target.system_wide) err = perf_event__synthesize_thread_map(tool, evsel_list->threads, @@ -875,11 +768,10 @@ static int get_stack_size(char *str, unsigned long *_size) } #endif /* LIBUNWIND_SUPPORT */ -static int -parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, - int unset) +int record_parse_callchain_opt(const struct option *opt, + const char *arg, int unset) { - struct perf_record *rec = (struct perf_record *)opt->value; + struct perf_record_opts *opts = opt->value; char *tok, *name, *saveptr = NULL; char *buf; int ret = -1; @@ -905,7 +797,7 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, /* Framepointer style */ if (!strncmp(name, "fp", sizeof("fp"))) { if (!strtok_r(NULL, ",", &saveptr)) { - rec->opts.call_graph = CALLCHAIN_FP; + opts->call_graph = CALLCHAIN_FP; ret = 0; } else pr_err("callchain: No more arguments " @@ -918,20 +810,20 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, const unsigned long default_stack_dump_size = 8192; ret = 0; - rec->opts.call_graph = CALLCHAIN_DWARF; - rec->opts.stack_dump_size = default_stack_dump_size; + opts->call_graph = CALLCHAIN_DWARF; + opts->stack_dump_size = default_stack_dump_size; tok = strtok_r(NULL, ",", &saveptr); if (tok) { unsigned long size = 0; ret = get_stack_size(tok, &size); - rec->opts.stack_dump_size = size; + opts->stack_dump_size = size; } if (!ret) pr_debug("callchain: stack dump size %d\n", - rec->opts.stack_dump_size); + opts->stack_dump_size); #endif /* LIBUNWIND_SUPPORT */ } else { pr_err("callchain: Unknown -g option " @@ -944,7 +836,7 @@ parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg, free(buf); if (!ret) - pr_debug("callchain: type %d\n", rec->opts.call_graph); + pr_debug("callchain: type %d\n", opts->call_graph); return ret; } @@ -982,9 +874,9 @@ static struct perf_record record = { #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " #ifdef LIBUNWIND_SUPPORT -static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; #else -static const char callchain_help[] = CALLCHAIN_HELP "[fp]"; +const char record_callchain_help[] = CALLCHAIN_HELP "[fp]"; #endif /* @@ -1028,9 +920,9 @@ const struct option record_options[] = { "number of mmap data pages"), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]", - callchain_help, &parse_callchain_opt, - "fp"), + OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts, + "mode[,dump_size]", record_callchain_help, + &record_parse_callchain_opt, "fp"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fc251005dd3d..96b5a7fee4bb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -8,6 +8,7 @@ #include "builtin.h" #include "util/util.h" +#include "util/cache.h" #include "util/annotate.h" #include "util/color.h" @@ -54,6 +55,16 @@ struct perf_report { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; +static int perf_report_config(const char *var, const char *value, void *cb) +{ + if (!strcmp(var, "report.group")) { + symbol_conf.event_group = perf_config_bool(var, value); + return 0; + } + + return perf_default_config(var, value, cb); +} + static int perf_report__add_branch_hist_entry(struct perf_tool *tool, struct addr_location *al, struct perf_sample *sample, @@ -299,6 +310,21 @@ static size_t hists__fprintf_nr_sample_events(struct hists *self, char unit; unsigned long nr_samples = self->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = self->stats.total_period; + struct perf_evsel *evsel = hists_to_evsel(self); + char buf[512]; + size_t size = sizeof(buf); + + if (symbol_conf.event_group && evsel->nr_members > 1) { + struct perf_evsel *pos; + + perf_evsel__group_desc(evsel, buf, size); + evname = buf; + + for_each_group_member(pos, evsel) { + nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE]; + nr_events += pos->hists.stats.total_period; + } + } nr_samples = convert_unit(nr_samples, &unit); ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit); @@ -319,6 +345,10 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, struct hists *hists = &pos->hists; const char *evname = perf_evsel__name(pos); + if (symbol_conf.event_group && + !perf_evsel__is_group_leader(pos)) + continue; + hists__fprintf_nr_sample_events(hists, evname, stdout); hists__fprintf(hists, true, 0, 0, stdout); fprintf(stdout, "\n\n"); @@ -372,7 +402,7 @@ static int __cmd_report(struct perf_report *rep) if (ret) goto out_delete; - kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION]; + kernel_map = session->machines.host.vmlinux_maps[MAP__FUNCTION]; kernel_kmap = map__kmap(kernel_map); if (kernel_map == NULL || (kernel_map->dso->hit && @@ -416,8 +446,16 @@ static int __cmd_report(struct perf_report *rep) hists->symbol_filter_str = rep->symbol_filter_str; hists__collapse_resort(hists); - hists__output_resort(hists); nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE]; + + /* Non-group events are considered as leader */ + if (symbol_conf.event_group && + !perf_evsel__is_group_leader(pos)) { + struct hists *leader_hists = &pos->leader->hists; + + hists__match(leader_hists, hists); + hists__link(leader_hists, hists); + } } if (nr_samples == 0) { @@ -425,11 +463,22 @@ static int __cmd_report(struct perf_report *rep) goto out_delete; } + list_for_each_entry(pos, &session->evlist->entries, node) + hists__output_resort(&pos->hists); + if (use_browser > 0) { if (use_browser == 1) { - perf_evlist__tui_browse_hists(session->evlist, help, - NULL, - &session->header.env); + ret = perf_evlist__tui_browse_hists(session->evlist, + help, + NULL, + &session->header.env); + /* + * Usually "ret" is the last pressed key, and we only + * care if the key notifies us to switch data file. + */ + if (ret != K_SWITCH_INPUT_DATA) + ret = 0; + } else if (use_browser == 2) { perf_evlist__gtk_browse_hists(session->evlist, help, NULL); @@ -595,8 +644,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN(0, "stdio", &report.use_stdio, "Use the stdio interface"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent, dso_to," - " dso_from, symbol_to, symbol_from, mispredict"), + "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline," + " dso_to, dso_from, symbol_to, symbol_from, mispredict"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", @@ -638,6 +687,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), + OPT_BOOLEAN(0, "group", &symbol_conf.event_group, + "Show event group information together"), OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", "use branch records for histogram filling", parse_branch_mode), OPT_STRING(0, "objdump", &objdump_path, "path", @@ -645,6 +696,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_END() }; + perf_config(perf_report_config, NULL); + argc = parse_options(argc, argv, options, report_usage, 0); if (report.use_stdio) @@ -663,6 +716,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) else input_name = "perf.data"; } + + if (strcmp(input_name, "-") != 0) + setup_browser(true); + else { + use_browser = 0; + perf_hpp__column_enable(PERF_HPP__OVERHEAD); + perf_hpp__init(); + } + +repeat: session = perf_session__new(input_name, O_RDONLY, report.force, false, &report.tool); if (session == NULL) @@ -688,14 +751,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) } - if (strcmp(input_name, "-") != 0) - setup_browser(true); - else { - use_browser = 0; - perf_hpp__init(); - } - - setup_sorting(report_usage, options); + if (setup_sorting() < 0) + usage_with_options(report_usage, options); /* * Only in the newt browser we are doing integrated annotation, @@ -763,6 +820,12 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) } ret = __cmd_report(&report); + if (ret == K_SWITCH_INPUT_DATA) { + perf_session__delete(session); + goto repeat; + } else + ret = 0; + error: perf_session__delete(session); return ret; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index cc28b85dabd5..138229439a93 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1475,9 +1475,9 @@ static int perf_sched__read_events(struct perf_sched *sched, bool destroy, goto out_delete; } - sched->nr_events = session->hists.stats.nr_events[0]; - sched->nr_lost_events = session->hists.stats.total_lost; - sched->nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->stats.nr_events[0]; + sched->nr_lost_events = session->stats.total_lost; + sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST]; } if (destroy) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b363e7b292b2..92d4658f56fb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -692,7 +692,7 @@ static int parse_output_fields(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) { char *tok; - int i, imax = sizeof(all_output_options) / sizeof(struct output_option); + int i, imax = ARRAY_SIZE(all_output_options); int j; int rc = 0; char *str = strdup(arg); @@ -909,18 +909,6 @@ static const char *ends_with(const char *str, const char *suffix) return NULL; } -static char *ltrim(char *str) -{ - int len = strlen(str); - - while (len && isspace(*str)) { - len--; - str++; - } - - return str; -} - static int read_script_info(struct script_desc *desc, const char *filename) { char line[BUFSIZ], *p; @@ -1487,7 +1475,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } - perf_session__fprintf_info(session, stdout, show_full_info); + if (!script_name && !generate_script_lang) + perf_session__fprintf_info(session, stdout, show_full_info); if (!no_callchain) symbol_conf.use_callchain = true; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c247faca7127..99848761f573 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -65,6 +65,11 @@ #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" +static void print_stat(int argc, const char **argv); +static void print_counter_aggr(struct perf_evsel *counter, char *prefix); +static void print_counter(struct perf_evsel *counter, char *prefix); +static void print_aggr_socket(char *prefix); + static struct perf_evlist *evsel_list; static struct perf_target target = { @@ -75,6 +80,7 @@ static int run_count = 1; static bool no_inherit = false; static bool scale = true; static bool no_aggr = false; +static bool aggr_socket = false; static pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; @@ -87,6 +93,9 @@ static FILE *output = NULL; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; +static unsigned int interval = 0; +static struct timespec ref_time; +static struct cpu_map *sock_map; static volatile int done = 0; @@ -94,6 +103,28 @@ struct perf_stat { struct stats res_stats[3]; }; +static inline void diff_timespec(struct timespec *r, struct timespec *a, + struct timespec *b) +{ + r->tv_sec = a->tv_sec - b->tv_sec; + if (a->tv_nsec < b->tv_nsec) { + r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec; + r->tv_sec--; + } else { + r->tv_nsec = a->tv_nsec - b->tv_nsec ; + } +} + +static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) +{ + return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; +} + +static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) +{ + return perf_evsel__cpus(evsel)->nr; +} + static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { evsel->priv = zalloc(sizeof(struct perf_stat)); @@ -106,14 +137,27 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) evsel->priv = NULL; } -static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) +static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) { - return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; + void *addr; + size_t sz; + + sz = sizeof(*evsel->counts) + + (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values)); + + addr = zalloc(sz); + if (!addr) + return -ENOMEM; + + evsel->prev_raw_counts = addr; + + return 0; } -static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) +static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) { - return perf_evsel__cpus(evsel)->nr; + free(evsel->prev_raw_counts); + evsel->prev_raw_counts = NULL; } static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; @@ -132,8 +176,6 @@ static struct stats walltime_nsecs_stats; static int create_perf_stat_counter(struct perf_evsel *evsel) { struct perf_event_attr *attr = &evsel->attr; - bool exclude_guest_missing = false; - int ret; if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | @@ -141,38 +183,16 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->inherit = !no_inherit; -retry: - if (exclude_guest_missing) - evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; - - if (perf_target__has_cpu(&target)) { - ret = perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); - if (ret) - goto check_ret; - return 0; - } + if (perf_target__has_cpu(&target)) + return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); if (!perf_target__has_task(&target) && - !perf_evsel__is_group_member(evsel)) { + perf_evsel__is_group_leader(evsel)) { attr->disabled = 1; attr->enable_on_exec = 1; } - ret = perf_evsel__open_per_thread(evsel, evsel_list->threads); - if (!ret) - return 0; - /* fall through */ -check_ret: - if (ret && errno == EINVAL) { - if (!exclude_guest_missing && - (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { - pr_debug("Old kernel, cannot exclude " - "guest or host samples.\n"); - exclude_guest_missing = true; - goto retry; - } - } - return ret; + return perf_evsel__open_per_thread(evsel, evsel_list->threads); } /* @@ -269,15 +289,79 @@ static int read_counter(struct perf_evsel *counter) return 0; } +static void print_interval(void) +{ + static int num_print_interval; + struct perf_evsel *counter; + struct perf_stat *ps; + struct timespec ts, rs; + char prefix[64]; + + if (no_aggr) { + list_for_each_entry(counter, &evsel_list->entries, node) { + ps = counter->priv; + memset(ps->res_stats, 0, sizeof(ps->res_stats)); + read_counter(counter); + } + } else { + list_for_each_entry(counter, &evsel_list->entries, node) { + ps = counter->priv; + memset(ps->res_stats, 0, sizeof(ps->res_stats)); + read_counter_aggr(counter); + } + } + clock_gettime(CLOCK_MONOTONIC, &ts); + diff_timespec(&rs, &ts, &ref_time); + sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); + + if (num_print_interval == 0 && !csv_output) { + if (aggr_socket) + fprintf(output, "# time socket cpus counts events\n"); + else if (no_aggr) + fprintf(output, "# time CPU counts events\n"); + else + fprintf(output, "# time counts events\n"); + } + + if (++num_print_interval == 25) + num_print_interval = 0; + + if (aggr_socket) + print_aggr_socket(prefix); + else if (no_aggr) { + list_for_each_entry(counter, &evsel_list->entries, node) + print_counter(counter, prefix); + } else { + list_for_each_entry(counter, &evsel_list->entries, node) + print_counter_aggr(counter, prefix); + } +} + static int __run_perf_stat(int argc __maybe_unused, const char **argv) { + char msg[512]; unsigned long long t0, t1; struct perf_evsel *counter; + struct timespec ts; int status = 0; int child_ready_pipe[2], go_pipe[2]; const bool forks = (argc > 0); char buf; + if (interval) { + ts.tv_sec = interval / 1000; + ts.tv_nsec = (interval % 1000) * 1000000; + } else { + ts.tv_sec = 1; + ts.tv_nsec = 0; + } + + if (aggr_socket + && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) { + perror("cannot build socket map"); + return -1; + } + if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) { perror("failed to create pipes"); return -1; @@ -348,20 +432,13 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv) continue; } - if (errno == EPERM || errno == EACCES) { - error("You may not have permission to collect %sstats.\n" - "\t Consider tweaking" - " /proc/sys/kernel/perf_event_paranoid or running as root.", - target.system_wide ? "system-wide " : ""); - } else { - error("open_counter returned with %d (%s). " - "/bin/dmesg may provide additional information.\n", - errno, strerror(errno)); - } + perf_evsel__open_strerror(counter, &target, + errno, msg, sizeof(msg)); + ui__error("%s\n", msg); + if (child_pid != -1) kill(child_pid, SIGTERM); - pr_err("Not all events could be opened.\n"); return -1; } counter->supported = true; @@ -377,14 +454,25 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv) * Enable counters and exec the command: */ t0 = rdclock(); + clock_gettime(CLOCK_MONOTONIC, &ref_time); if (forks) { close(go_pipe[1]); + if (interval) { + while (!waitpid(child_pid, &status, WNOHANG)) { + nanosleep(&ts, NULL); + print_interval(); + } + } wait(&status); if (WIFSIGNALED(status)) psignal(WTERMSIG(status), argv[0]); } else { - while(!done) sleep(1); + while (!done) { + nanosleep(&ts, NULL); + if (interval) + print_interval(); + } } t1 = rdclock(); @@ -454,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg) print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); } -static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) +static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double msecs = avg / 1e6; char cpustr[16] = { '\0', }; const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; - if (no_aggr) + if (aggr_socket) + sprintf(cpustr, "S%*d%s%*d%s", + csv_output ? 0 : -5, + cpu, + csv_sep, + csv_output ? 0 : 4, + nr, + csv_sep); + else if (no_aggr) sprintf(cpustr, "CPU%*d%s", csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[cpu], csv_sep); @@ -470,7 +566,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - if (csv_output) + if (csv_output || interval) return; if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) @@ -659,7 +755,7 @@ static void print_ll_cache_misses(int cpu, fprintf(output, " of all LL-cache hits "); } -static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) +static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg) { double total, ratio = 0.0; char cpustr[16] = { '\0', }; @@ -672,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) else fmt = "%s%18.0f%s%-25s"; - if (no_aggr) + if (aggr_socket) + sprintf(cpustr, "S%*d%s%*d%s", + csv_output ? 0 : -5, + cpu, + csv_sep, + csv_output ? 0 : 4, + nr, + csv_sep); + else if (no_aggr) sprintf(cpustr, "CPU%*d%s", csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[cpu], csv_sep); @@ -684,12 +788,11 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); - if (csv_output) + if (csv_output || interval) return; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = avg_stats(&runtime_cycles_stats[cpu]); - if (total) ratio = avg / total; @@ -779,16 +882,83 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) } } +static void print_aggr_socket(char *prefix) +{ + struct perf_evsel *counter; + u64 ena, run, val; + int cpu, s, s2, sock, nr; + + if (!sock_map) + return; + + for (s = 0; s < sock_map->nr; s++) { + sock = cpu_map__socket(sock_map, s); + list_for_each_entry(counter, &evsel_list->entries, node) { + val = ena = run = 0; + nr = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = cpu_map__get_socket(evsel_list->cpus, cpu); + if (s2 != sock) + continue; + val += counter->counts->cpu[cpu].val; + ena += counter->counts->cpu[cpu].ena; + run += counter->counts->cpu[cpu].run; + nr++; + } + if (prefix) + fprintf(output, "%s", prefix); + + if (run == 0 || ena == 0) { + fprintf(output, "S%*d%s%*d%s%*s%s%*s", + csv_output ? 0 : -5, + s, + csv_sep, + csv_output ? 0 : 4, + nr, + csv_sep, + csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + csv_sep, + csv_output ? 0 : -24, + perf_evsel__name(counter)); + if (counter->cgrp) + fprintf(output, "%s%s", + csv_sep, counter->cgrp->name); + + fputc('\n', output); + continue; + } + + if (nsec_counter(counter)) + nsec_printout(sock, nr, counter, val); + else + abs_printout(sock, nr, counter, val); + + if (!csv_output) { + print_noise(counter, 1.0); + + if (run != ena) + fprintf(output, " (%.2f%%)", + 100.0 * run / ena); + } + fputc('\n', output); + } + } +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode */ -static void print_counter_aggr(struct perf_evsel *counter) +static void print_counter_aggr(struct perf_evsel *counter, char *prefix) { struct perf_stat *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; + if (prefix) + fprintf(output, "%s", prefix); + if (scaled == -1) { fprintf(output, "%*s%s%*s", csv_output ? 0 : 18, @@ -805,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter) } if (nsec_counter(counter)) - nsec_printout(-1, counter, avg); + nsec_printout(-1, 0, counter, avg); else - abs_printout(-1, counter, avg); + abs_printout(-1, 0, counter, avg); print_noise(counter, avg); @@ -831,7 +1001,7 @@ static void print_counter_aggr(struct perf_evsel *counter) * Print out the results of a single counter: * does not use aggregated count in system-wide */ -static void print_counter(struct perf_evsel *counter) +static void print_counter(struct perf_evsel *counter, char *prefix) { u64 ena, run, val; int cpu; @@ -840,6 +1010,10 @@ static void print_counter(struct perf_evsel *counter) val = counter->counts->cpu[cpu].val; ena = counter->counts->cpu[cpu].ena; run = counter->counts->cpu[cpu].run; + + if (prefix) + fprintf(output, "%s", prefix); + if (run == 0 || ena == 0) { fprintf(output, "CPU%*d%s%*s%s%*s", csv_output ? 0 : -4, @@ -859,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter) } if (nsec_counter(counter)) - nsec_printout(cpu, counter, val); + nsec_printout(cpu, 0, counter, val); else - abs_printout(cpu, counter, val); + abs_printout(cpu, 0, counter, val); if (!csv_output) { print_noise(counter, 1.0); @@ -899,12 +1073,14 @@ static void print_stat(int argc, const char **argv) fprintf(output, ":\n\n"); } - if (no_aggr) { + if (aggr_socket) + print_aggr_socket(NULL); + else if (no_aggr) { list_for_each_entry(counter, &evsel_list->entries, node) - print_counter(counter); + print_counter(counter, NULL); } else { list_for_each_entry(counter, &evsel_list->entries, node) - print_counter_aggr(counter); + print_counter_aggr(counter, NULL); } if (!csv_output) { @@ -925,7 +1101,7 @@ static volatile int signr = -1; static void skip_signal(int signo) { - if(child_pid == -1) + if ((child_pid == -1) || interval) done = 1; signr = signo; @@ -1145,6 +1321,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "command to run prior to the measured command"), OPT_STRING(0, "post", &post_cmd, "command", "command to run after to the measured command"), + OPT_UINTEGER('I', "interval-print", &interval, + "print counts at regular interval in ms (>= 100)"), + OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"), OPT_END() }; const char * const stat_usage[] = { @@ -1231,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(stat_usage, options); } + if (aggr_socket) { + if (!perf_target__has_cpu(&target)) { + fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n"); + usage_with_options(stat_usage, options); + } + no_aggr = true; + } + if (add_default_attributes()) goto out; @@ -1245,12 +1432,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(stat_usage, options); return -1; } + if (interval && interval < 100) { + pr_err("print interval must be >= 100ms\n"); + usage_with_options(stat_usage, options); + return -1; + } list_for_each_entry(pos, &evsel_list->entries, node) { if (perf_evsel__alloc_stat_priv(pos) < 0 || perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0) goto out_free_fd; } + if (interval) { + list_for_each_entry(pos, &evsel_list->entries, node) { + if (perf_evsel__alloc_prev_raw_counts(pos) < 0) + goto out_free_fd; + } + } /* * We dont want to block the signals - that would cause @@ -1260,6 +1458,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) */ atexit(sig_atexit); signal(SIGINT, skip_signal); + signal(SIGCHLD, skip_signal); signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); @@ -1272,11 +1471,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) status = run_perf_stat(argc, argv); } - if (status != -1) + if (status != -1 && !interval) print_stat(argc, argv); out_free_fd: - list_for_each_entry(pos, &evsel_list->entries, node) + list_for_each_entry(pos, &evsel_list->entries, node) { perf_evsel__free_stat_priv(pos); + perf_evsel__free_counts(pos); + perf_evsel__free_prev_raw_counts(pos); + } perf_evlist__delete_maps(evsel_list); out: perf_evlist__delete(evsel_list); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c9ff3950cd4b..72f6eb7b4173 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -68,27 +68,7 @@ #include <linux/unistd.h> #include <linux/types.h> -void get_term_dimensions(struct winsize *ws) -{ - char *s = getenv("LINES"); - - if (s != NULL) { - ws->ws_row = atoi(s); - s = getenv("COLUMNS"); - if (s != NULL) { - ws->ws_col = atoi(s); - if (ws->ws_row && ws->ws_col) - return; - } - } -#ifdef TIOCGWINSZ - if (ioctl(1, TIOCGWINSZ, ws) == 0 && - ws->ws_row && ws->ws_col) - return; -#endif - ws->ws_row = 25; - ws->ws_col = 80; -} +static volatile int done; static void perf_top__update_print_entries(struct perf_top *top) { @@ -453,8 +433,10 @@ static int perf_top__key_mapped(struct perf_top *top, int c) return 0; } -static void perf_top__handle_keypress(struct perf_top *top, int c) +static bool perf_top__handle_keypress(struct perf_top *top, int c) { + bool ret = true; + if (!perf_top__key_mapped(top, c)) { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; struct termios tc, save; @@ -475,7 +457,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) tcsetattr(0, TCSAFLUSH, &save); if (!perf_top__key_mapped(top, c)) - return; + return ret; } switch (c) { @@ -537,7 +519,8 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) printf("exiting.\n"); if (top->dump_symtab) perf_session__fprintf_dsos(top->session, stderr); - exit(0); + ret = false; + break; case 's': perf_top__prompt_symbol(top, "Enter details symbol"); break; @@ -560,6 +543,8 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) default: break; } + + return ret; } static void perf_top__sort_new_samples(void *arg) @@ -596,13 +581,12 @@ static void *display_thread_tui(void *arg) * via --uid. */ list_for_each_entry(pos, &top->evlist->entries, node) - pos->hists.uid_filter_str = top->target.uid_str; + pos->hists.uid_filter_str = top->record_opts.target.uid_str; perf_evlist__tui_browse_hists(top->evlist, help, &hbt, &top->session->header.env); - exit_browser(0); - exit(0); + done = 1; return NULL; } @@ -626,7 +610,7 @@ repeat: /* trash return*/ getc(stdin); - while (1) { + while (!done) { perf_top__print_sym_table(top); /* * Either timeout expired or we got an EINTR due to SIGWINCH, @@ -640,15 +624,14 @@ repeat: continue; /* Fall trhu */ default: - goto process_hotkey; + c = getc(stdin); + tcsetattr(0, TCSAFLUSH, &save); + + if (perf_top__handle_keypress(top, c)) + goto repeat; + done = 1; } } -process_hotkey: - c = getc(stdin); - tcsetattr(0, TCSAFLUSH, &save); - - perf_top__handle_keypress(top, c); - goto repeat; return NULL; } @@ -716,7 +699,7 @@ static void perf_event__process_sample(struct perf_tool *tool, static struct intlist *seen; if (!seen) - seen = intlist__new(); + seen = intlist__new(NULL); if (!intlist__has_entry(seen, event->ip.pid)) { pr_err("Can't find guest [%d]'s kernel information\n", @@ -727,8 +710,8 @@ static void perf_event__process_sample(struct perf_tool *tool, } if (!machine) { - pr_err("%u unprocessable samples recorded.", - top->session->hists.stats.nr_unprocessable_samples++); + pr_err("%u unprocessable samples recorded.\r", + top->session->stats.nr_unprocessable_samples++); return; } @@ -847,13 +830,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) ++top->us_samples; if (top->hide_user_symbols) continue; - machine = perf_session__find_host_machine(session); + machine = &session->machines.host; break; case PERF_RECORD_MISC_KERNEL: ++top->kernel_samples; if (top->hide_kernel_symbols) continue; - machine = perf_session__find_host_machine(session); + machine = &session->machines.host; break; case PERF_RECORD_MISC_GUEST_KERNEL: ++top->guest_kernel_samples; @@ -878,7 +861,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) hists__inc_nr_events(&evsel->hists, event->header.type); machine__process_event(machine, event); } else - ++session->hists.stats.nr_unknown_events; + ++session->stats.nr_unknown_events; } } @@ -890,123 +873,42 @@ static void perf_top__mmap_read(struct perf_top *top) perf_top__mmap_read_idx(top, i); } -static void perf_top__start_counters(struct perf_top *top) +static int perf_top__start_counters(struct perf_top *top) { + char msg[512]; struct perf_evsel *counter; struct perf_evlist *evlist = top->evlist; + struct perf_record_opts *opts = &top->record_opts; - if (top->group) - perf_evlist__set_leader(evlist); + perf_evlist__config(evlist, opts); list_for_each_entry(counter, &evlist->entries, node) { - struct perf_event_attr *attr = &counter->attr; - - attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; - - if (top->freq) { - attr->sample_type |= PERF_SAMPLE_PERIOD; - attr->freq = 1; - attr->sample_freq = top->freq; - } - - if (evlist->nr_entries > 1) { - attr->sample_type |= PERF_SAMPLE_ID; - attr->read_format |= PERF_FORMAT_ID; - } - - if (perf_target__has_cpu(&top->target)) - attr->sample_type |= PERF_SAMPLE_CPU; - - if (symbol_conf.use_callchain) - attr->sample_type |= PERF_SAMPLE_CALLCHAIN; - - attr->mmap = 1; - attr->comm = 1; - attr->inherit = top->inherit; -fallback_missing_features: - if (top->exclude_guest_missing) - attr->exclude_guest = attr->exclude_host = 0; -retry_sample_id: - attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; try_again: if (perf_evsel__open(counter, top->evlist->cpus, top->evlist->threads) < 0) { - int err = errno; - - if (err == EPERM || err == EACCES) { - ui__error_paranoid(); - goto out_err; - } else if (err == EINVAL) { - if (!top->exclude_guest_missing && - (attr->exclude_guest || attr->exclude_host)) { - pr_debug("Old kernel, cannot exclude " - "guest or host samples.\n"); - top->exclude_guest_missing = true; - goto fallback_missing_features; - } else if (!top->sample_id_all_missing) { - /* - * Old kernel, no attr->sample_id_type_all field - */ - top->sample_id_all_missing = true; - goto retry_sample_id; - } - } - /* - * If it's cycles then fall back to hrtimer - * based cpu-clock-tick sw counter, which - * is always available even if no PMU support: - */ - if ((err == ENOENT || err == ENXIO) && - (attr->type == PERF_TYPE_HARDWARE) && - (attr->config == PERF_COUNT_HW_CPU_CYCLES)) { - + if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose) - ui__warning("Cycles event not supported,\n" - "trying to fall back to cpu-clock-ticks\n"); - - attr->type = PERF_TYPE_SOFTWARE; - attr->config = PERF_COUNT_SW_CPU_CLOCK; - if (counter->name) { - free(counter->name); - counter->name = NULL; - } + ui__warning("%s\n", msg); goto try_again; } - if (err == ENOENT) { - ui__error("The %s event is not supported.\n", - perf_evsel__name(counter)); - goto out_err; - } else if (err == EMFILE) { - ui__error("Too many events are opened.\n" - "Try again after reducing the number of events\n"); - goto out_err; - } else if ((err == EOPNOTSUPP) && (attr->precise_ip)) { - ui__error("\'precise\' request may not be supported. " - "Try removing 'p' modifier\n"); - goto out_err; - } - - ui__error("The sys_perf_event_open() syscall " - "returned with %d (%s). /bin/dmesg " - "may provide additional information.\n" - "No CONFIG_PERF_EVENTS=y kernel support " - "configured?\n", err, strerror(err)); + perf_evsel__open_strerror(counter, &opts->target, + errno, msg, sizeof(msg)); + ui__error("%s\n", msg); goto out_err; } } - if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) { + if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { ui__error("Failed to mmap with %d (%s)\n", errno, strerror(errno)); goto out_err; } - return; + return 0; out_err: - exit_browser(0); - exit(0); + return -1; } static int perf_top__setup_sample_type(struct perf_top *top) @@ -1016,7 +918,7 @@ static int perf_top__setup_sample_type(struct perf_top *top) ui__error("Selected -g but \"sym\" not present in --sort/-s."); return -EINVAL; } - } else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) { + } else if (callchain_param.mode != CHAIN_NONE) { if (callchain_register_param(&callchain_param) < 0) { ui__error("Can't register callchain params.\n"); return -EINVAL; @@ -1028,6 +930,7 @@ static int perf_top__setup_sample_type(struct perf_top *top) static int __cmd_top(struct perf_top *top) { + struct perf_record_opts *opts = &top->record_opts; pthread_t thread; int ret; /* @@ -1042,26 +945,42 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; - if (perf_target__has_task(&top->target)) + if (perf_target__has_task(&opts->target)) perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, perf_event__process, - &top->session->host_machine); + &top->session->machines.host); else perf_event__synthesize_threads(&top->tool, perf_event__process, - &top->session->host_machine); - perf_top__start_counters(top); + &top->session->machines.host); + + ret = perf_top__start_counters(top); + if (ret) + goto out_delete; + top->session->evlist = top->evlist; perf_session__set_id_hdr_size(top->session); + /* + * When perf is starting the traced process, all the events (apart from + * group members) have enable_on_exec=1 set, so don't spoil it by + * prematurely enabling them. + * + * XXX 'top' still doesn't start workloads like record, trace, but should, + * so leave the check here. + */ + if (!perf_target__none(&opts->target)) + perf_evlist__enable(top->evlist); + /* Wait for a minimal set of events before starting the snapshot */ poll(top->evlist->pollfd, top->evlist->nr_fds, 100); perf_top__mmap_read(top); + ret = -1; if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : display_thread), top)) { ui__error("Could not create display thread.\n"); - exit(-1); + goto out_delete; } if (top->realtime_prio) { @@ -1070,11 +989,11 @@ static int __cmd_top(struct perf_top *top) param.sched_priority = top->realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { ui__error("Could not set realtime priority.\n"); - exit(-1); + goto out_delete; } } - while (1) { + while (!done) { u64 hits = top->samples; perf_top__mmap_read(top); @@ -1083,126 +1002,67 @@ static int __cmd_top(struct perf_top *top) ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100); } + ret = 0; out_delete: perf_session__delete(top->session); top->session = NULL; - return 0; + return ret; } static int parse_callchain_opt(const struct option *opt, const char *arg, int unset) { - struct perf_top *top = (struct perf_top *)opt->value; - char *tok, *tok2; - char *endptr; - /* * --no-call-graph */ - if (unset) { - top->dont_use_callchains = true; + if (unset) return 0; - } symbol_conf.use_callchain = true; - if (!arg) - return 0; - - tok = strtok((char *)arg, ","); - if (!tok) - return -1; - - /* get the output mode */ - if (!strncmp(tok, "graph", strlen(arg))) - callchain_param.mode = CHAIN_GRAPH_ABS; - - else if (!strncmp(tok, "flat", strlen(arg))) - callchain_param.mode = CHAIN_FLAT; - - else if (!strncmp(tok, "fractal", strlen(arg))) - callchain_param.mode = CHAIN_GRAPH_REL; - - else if (!strncmp(tok, "none", strlen(arg))) { - callchain_param.mode = CHAIN_NONE; - symbol_conf.use_callchain = false; - - return 0; - } else - return -1; - - /* get the min percentage */ - tok = strtok(NULL, ","); - if (!tok) - goto setup; - - callchain_param.min_percent = strtod(tok, &endptr); - if (tok == endptr) - return -1; - - /* get the print limit */ - tok2 = strtok(NULL, ","); - if (!tok2) - goto setup; - - if (tok2[0] != 'c') { - callchain_param.print_limit = strtod(tok2, &endptr); - tok2 = strtok(NULL, ","); - if (!tok2) - goto setup; - } - - /* get the call chain order */ - if (!strcmp(tok2, "caller")) - callchain_param.order = ORDER_CALLER; - else if (!strcmp(tok2, "callee")) - callchain_param.order = ORDER_CALLEE; - else - return -1; -setup: - if (callchain_register_param(&callchain_param) < 0) { - fprintf(stderr, "Can't register callchain params\n"); - return -1; - } - return 0; + return record_parse_callchain_opt(opt, arg, unset); } int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) { - struct perf_evsel *pos; int status; char errbuf[BUFSIZ]; struct perf_top top = { .count_filter = 5, .delay_secs = 2, - .freq = 4000, /* 4 KHz */ - .mmap_pages = 128, - .sym_pcnt_filter = 5, - .target = { - .uses_mmap = true, + .record_opts = { + .mmap_pages = UINT_MAX, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .freq = 4000, /* 4 KHz */ + .target = { + .uses_mmap = true, + }, }, + .sym_pcnt_filter = 5, }; - char callchain_default_opt[] = "fractal,0.5,callee"; + struct perf_record_opts *opts = &top.record_opts; + struct perf_target *target = &opts->target; const struct option options[] = { OPT_CALLBACK('e', "event", &top.evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option), - OPT_INTEGER('c', "count", &top.default_interval, - "event period to sample"), - OPT_STRING('p', "pid", &top.target.pid, "pid", + OPT_U64('c', "count", &opts->user_interval, "event period to sample"), + OPT_STRING('p', "pid", &target->pid, "pid", "profile events on existing process id"), - OPT_STRING('t', "tid", &top.target.tid, "tid", + OPT_STRING('t', "tid", &target->tid, "tid", "profile events on existing thread id"), - OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide, + OPT_BOOLEAN('a', "all-cpus", &target->system_wide, "system-wide collection from all CPUs"), - OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu", + OPT_STRING('C', "cpu", &target->cpu_list, "cpu", "list of cpus to monitor"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols, "hide kernel symbols"), - OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"), + OPT_UINTEGER('m', "mmap-pages", &opts->mmap_pages, + "number of mmap data pages"), OPT_INTEGER('r', "realtime", &top.realtime_prio, "collect data with this RT SCHED_FIFO priority"), OPT_INTEGER('d', "delay", &top.delay_secs, @@ -1211,16 +1071,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "dump the symbol table used for profiling"), OPT_INTEGER('f', "count-filter", &top.count_filter, "only display functions with more events than this"), - OPT_BOOLEAN('g', "group", &top.group, + OPT_BOOLEAN('g', "group", &opts->group, "put the counters into a counter group"), - OPT_BOOLEAN('i', "inherit", &top.inherit, - "child tasks inherit counters"), + OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit, + "child tasks do not inherit counters"), OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name", "symbol to annotate"), - OPT_BOOLEAN('z', "zero", &top.zero, - "zero history across updates"), - OPT_INTEGER('F', "freq", &top.freq, - "profile at this frequency"), + OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"), + OPT_UINTEGER('F', "freq", &opts->user_freq, "profile at this frequency"), OPT_INTEGER('E', "entries", &top.print_entries, "display this many functions"), OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols, @@ -1233,10 +1091,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), - OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order", - "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. " - "Default: fractal,0.5,callee", &parse_callchain_opt, - callchain_default_opt), + OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, + "mode[,dump_size]", record_callchain_help, + &parse_callchain_opt, "fp"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", @@ -1251,7 +1108,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"), + OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_END() }; const char * const top_usage[] = { @@ -1272,7 +1129,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (sort_order == default_sort_order) sort_order = "dso,symbol"; - setup_sorting(top_usage, options); + if (setup_sorting() < 0) + usage_with_options(top_usage, options); if (top.use_stdio) use_browser = 0; @@ -1281,33 +1139,33 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) setup_browser(false); - status = perf_target__validate(&top.target); + status = perf_target__validate(target); if (status) { - perf_target__strerror(&top.target, status, errbuf, BUFSIZ); + perf_target__strerror(target, status, errbuf, BUFSIZ); ui__warning("%s", errbuf); } - status = perf_target__parse_uid(&top.target); + status = perf_target__parse_uid(target); if (status) { int saved_errno = errno; - perf_target__strerror(&top.target, status, errbuf, BUFSIZ); + perf_target__strerror(target, status, errbuf, BUFSIZ); ui__error("%s", errbuf); status = -saved_errno; goto out_delete_evlist; } - if (perf_target__none(&top.target)) - top.target.system_wide = true; + if (perf_target__none(target)) + target->system_wide = true; - if (perf_evlist__create_maps(top.evlist, &top.target) < 0) + if (perf_evlist__create_maps(top.evlist, target) < 0) usage_with_options(top_usage, options); if (!top.evlist->nr_entries && perf_evlist__add_default(top.evlist) < 0) { ui__error("Not enough memory for event selector list\n"); - return -ENOMEM; + goto out_delete_maps; } symbol_conf.nr_events = top.evlist->nr_entries; @@ -1315,24 +1173,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (top.delay_secs < 1) top.delay_secs = 1; + if (opts->user_interval != ULLONG_MAX) + opts->default_interval = opts->user_interval; + if (opts->user_freq != UINT_MAX) + opts->freq = opts->user_freq; + /* * User specified count overrides default frequency. */ - if (top.default_interval) - top.freq = 0; - else if (top.freq) { - top.default_interval = top.freq; + if (opts->default_interval) + opts->freq = 0; + else if (opts->freq) { + opts->default_interval = opts->freq; } else { ui__error("frequency and count are zero, aborting\n"); - exit(EXIT_FAILURE); - } - - list_for_each_entry(pos, &top.evlist->entries, node) { - /* - * Fill in the ones not specifically initialized via -c: - */ - if (!pos->attr.sample_period) - pos->attr.sample_period = top.default_interval; + status = -EINVAL; + goto out_delete_maps; } top.sym_evsel = perf_evlist__first(top.evlist); @@ -1365,6 +1221,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) status = __cmd_top(&top); +out_delete_maps: + perf_evlist__delete_maps(top.evlist); out_delete_evlist: perf_evlist__delete(top.evlist); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7932ffa29889..d222d7fc7e96 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -455,7 +455,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } - perf_evlist__config_attrs(evlist, &trace->opts); + perf_evlist__config(evlist, &trace->opts); signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index f5ac77485a4f..b4eabb44e381 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -225,3 +225,14 @@ int main(void) return on_exit(NULL, NULL); } endef + +define SOURCE_LIBNUMA +#include <numa.h> +#include <numaif.h> + +int main(void) +{ + numa_available(); + return 0; +} +endef
\ No newline at end of file diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index e5413125e6bb..8ef3bd30a549 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -13,7 +13,7 @@ newline := $(newline) # what should replace a newline when escaping # newlines; the default is a bizarre string. # -nl-escape = $(or $(1),m822df3020w6a44id34bt574ctac44eb9f4n) +nl-escape = $(if $(1),$(1),m822df3020w6a44id34bt574ctac44eb9f4n) # escape-nl # @@ -173,9 +173,9 @@ _ge-abspath = $(if $(is-executable),$(1)) # Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default) # define get-executable-or-default -$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) +$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2),$(1))) endef -_ge_attempt = $(or $(get-executable),$(_gea_warn),$(call _gea_err,$(2))) +_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) _gea_warn = $(warning The path '$(1)' is not executable.) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 0f661fbce6a8..095b88207cd3 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -328,14 +328,23 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode)) return 0; + status = 1; /* Check for ENOSPC and EIO errors.. */ - if (fflush(stdout)) - die("write failure on standard output: %s", strerror(errno)); - if (ferror(stdout)) - die("unknown write failure on standard output"); - if (fclose(stdout)) - die("close failed on standard output: %s", strerror(errno)); - return 0; + if (fflush(stdout)) { + fprintf(stderr, "write failure on standard output: %s", strerror(errno)); + goto out; + } + if (ferror(stdout)) { + fprintf(stderr, "unknown write failure on standard output"); + goto out; + } + if (fclose(stdout)) { + fprintf(stderr, "close failed on standard output: %s", strerror(errno)); + goto out; + } + status = 0; +out: + return status; } static void handle_internal_command(int argc, const char **argv) @@ -467,7 +476,8 @@ int main(int argc, const char **argv) cmd += 5; argv[0] = cmd; handle_internal_command(argc, argv); - die("cannot handle %s internally", cmd); + fprintf(stderr, "cannot handle %s internally", cmd); + goto out; } /* Look for flags.. */ @@ -485,7 +495,7 @@ int main(int argc, const char **argv) printf("\n usage: %s\n\n", perf_usage_string); list_common_cmds_help(); printf("\n %s\n\n", perf_more_info_string); - exit(1); + goto out; } cmd = argv[0]; @@ -517,7 +527,7 @@ int main(int argc, const char **argv) fprintf(stderr, "Expansion of alias '%s' failed; " "'%s' is not a perf-command\n", cmd, argv[0]); - exit(1); + goto out; } if (!done_help) { cmd = argv[0] = help_unknown_cmd(cmd); @@ -528,6 +538,6 @@ int main(int argc, const char **argv) fprintf(stderr, "Failed to run command '%s': %s\n", cmd, strerror(errno)); - +out: return 1; } diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 2c340e7da458..c2206c87fc9f 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -1,10 +1,6 @@ #ifndef _PERF_PERF_H #define _PERF_PERF_H -struct winsize; - -void get_term_dimensions(struct winsize *ws); - #include <asm/unistd.h> #if defined(__i386__) @@ -107,32 +103,6 @@ void get_term_dimensions(struct winsize *ws); #include "util/types.h" #include <stdbool.h> -struct perf_mmap { - void *base; - int mask; - unsigned int prev; -}; - -static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) -{ - struct perf_event_mmap_page *pc = mm->base; - int head = pc->data_head; - rmb(); - return head; -} - -static inline void perf_mmap__write_tail(struct perf_mmap *md, - unsigned long tail) -{ - struct perf_event_mmap_page *pc = md->base; - - /* - * ensure all reads are done before we write the tail out. - */ - /* mb(); */ - pc->data_tail = tail; -} - /* * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all * counters in the current task. @@ -237,8 +207,6 @@ struct perf_record_opts { bool raw_samples; bool sample_address; bool sample_time; - bool sample_id_all_missing; - bool exclude_guest_missing; bool period; unsigned int freq; unsigned int mmap_pages; diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record deleted file mode 100644 index 8edda9078d5d..000000000000 --- a/tools/perf/scripts/perl/bin/workqueue-stats-record +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -perf record -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@ diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report deleted file mode 100644 index 6d91411d248c..000000000000 --- a/tools/perf/scripts/perl/bin/workqueue-stats-report +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -# description: workqueue stats (ins/exe/create/destroy) -perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl index 4bb3ecd33472..8b20787021c1 100644 --- a/tools/perf/scripts/perl/rwtop.pl +++ b/tools/perf/scripts/perl/rwtop.pl @@ -17,6 +17,7 @@ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; use lib "./Perf-Trace-Util/lib"; use Perf::Trace::Core; use Perf::Trace::Util; +use POSIX qw/SIGALRM SA_RESTART/; my $default_interval = 3; my $nlines = 20; @@ -90,7 +91,10 @@ sub syscalls::sys_enter_write sub trace_begin { - $SIG{ALRM} = \&set_print_pending; + my $sa = POSIX::SigAction->new(\&set_print_pending); + $sa->flags(SA_RESTART); + $sa->safe(1); + POSIX::sigaction(SIGALRM, $sa) or die "Can't set SIGALRM handler: $!\n"; alarm 1; } diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl deleted file mode 100644 index a8eaff5119e0..000000000000 --- a/tools/perf/scripts/perl/workqueue-stats.pl +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/perl -w -# (c) 2009, Tom Zanussi <tzanussi@gmail.com> -# Licensed under the terms of the GNU GPL License version 2 - -# Displays workqueue stats -# -# Usage: -# -# perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e -# workqueue:workqueue_destruction -e workqueue:workqueue_execution -# -e workqueue:workqueue_insertion -# -# perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl - -use 5.010000; -use strict; -use warnings; - -use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; -use lib "./Perf-Trace-Util/lib"; -use Perf::Trace::Core; -use Perf::Trace::Util; - -my @cpus; - -sub workqueue::workqueue_destruction -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, - $thread_comm, $thread_pid) = @_; - - $cpus[$common_cpu]{$thread_pid}{destroyed}++; - $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; -} - -sub workqueue::workqueue_creation -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, - $thread_comm, $thread_pid, $cpu) = @_; - - $cpus[$common_cpu]{$thread_pid}{created}++; - $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; -} - -sub workqueue::workqueue_execution -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, - $thread_comm, $thread_pid, $func) = @_; - - $cpus[$common_cpu]{$thread_pid}{executed}++; - $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; -} - -sub workqueue::workqueue_insertion -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, - $thread_comm, $thread_pid, $func) = @_; - - $cpus[$common_cpu]{$thread_pid}{inserted}++; - $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; -} - -sub trace_end -{ - print "workqueue work stats:\n\n"; - my $cpu = 0; - printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name"); - printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----"); - foreach my $pidhash (@cpus) { - while ((my $pid, my $wqhash) = each %$pidhash) { - my $ins = $$wqhash{'inserted'} || 0; - my $exe = $$wqhash{'executed'} || 0; - my $comm = $$wqhash{'comm'} || ""; - if ($ins || $exe) { - printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm); - } - } - $cpu++; - } - - $cpu = 0; - print "\nworkqueue lifecycle stats:\n\n"; - printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name"); - printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----"); - foreach my $pidhash (@cpus) { - while ((my $pid, my $wqhash) = each %$pidhash) { - my $created = $$wqhash{'created'} || 0; - my $destroyed = $$wqhash{'destroyed'} || 0; - my $comm = $$wqhash{'comm'} || ""; - if ($created || $destroyed) { - printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed, - $comm); - } - } - $cpu++; - } - - print_unhandled(); -} - -my %unhandled; - -sub print_unhandled -{ - if ((scalar keys %unhandled) == 0) { - return; - } - - print "\nunhandled events:\n\n"; - - printf("%-40s %10s\n", "event", "count"); - printf("%-40s %10s\n", "----------------------------------------", - "-----------"); - - foreach my $event_name (keys %unhandled) { - printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); - } -} - -sub trace_unhandled -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; - - $unhandled{$event_name}++; -} diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 25638a986257..bdcceb886f77 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -19,6 +19,11 @@ * permissions. All the event text files are stored there. */ +/* + * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select + * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. + */ +#define __SANE_USERSPACE_TYPES__ #include <stdlib.h> #include <stdio.h> #include <inttypes.h> @@ -33,8 +38,6 @@ extern int verbose; -bool test_attr__enabled; - static char *dir; void test_attr__init(void) @@ -146,7 +149,7 @@ static int run_dir(const char *d, const char *perf) { char cmd[3*PATH_MAX]; - snprintf(cmd, 3*PATH_MAX, "python %s/attr.py -d %s/attr/ -p %s %s", + snprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %s", d, d, perf, verbose ? "-v" : ""); return system(cmd); diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index e702b82dcb86..2f629ca485bc 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -68,7 +68,7 @@ class Event(dict): self[key] = val def __init__(self, name, data, base): - log.info(" Event %s" % name); + log.debug(" Event %s" % name); self.name = name; self.group = '' self.add(base) @@ -97,6 +97,14 @@ class Event(dict): return False return True + def diff(self, other): + for t in Event.terms: + if not self.has_key(t) or not other.has_key(t): + continue + if not self.compare_data(self[t], other[t]): + log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) + + # Test file description needs to have following sections: # [config] # - just single instance in file @@ -113,7 +121,7 @@ class Test(object): parser = ConfigParser.SafeConfigParser() parser.read(path) - log.warning("running '%s'" % path) + log.debug("running '%s'" % path) self.path = path self.test_dir = options.test_dir @@ -128,7 +136,7 @@ class Test(object): self.expect = {} self.result = {} - log.info(" loading expected events"); + log.debug(" loading expected events"); self.load_events(path, self.expect) def is_event(self, name): @@ -164,7 +172,7 @@ class Test(object): self.perf, self.command, tempdir, self.args) ret = os.WEXITSTATUS(os.system(cmd)) - log.info(" running '%s' ret %d " % (cmd, ret)) + log.warning(" running '%s' ret %d " % (cmd, ret)) if ret != int(self.ret): raise Unsup(self) @@ -172,7 +180,7 @@ class Test(object): def compare(self, expect, result): match = {} - log.info(" compare"); + log.debug(" compare"); # For each expected event find all matching # events in result. Fail if there's not any. @@ -187,10 +195,11 @@ class Test(object): else: log.debug(" ->FAIL"); - log.info(" match: [%s] matches %s" % (exp_name, str(exp_list))) + log.debug(" match: [%s] matches %s" % (exp_name, str(exp_list))) # we did not any matching event - fail if (not exp_list): + exp_event.diff(res_event) raise Fail(self, 'match failure'); match[exp_name] = exp_list @@ -208,10 +217,10 @@ class Test(object): if res_group not in match[group]: raise Fail(self, 'group failure') - log.info(" group: [%s] matches group leader %s" % + log.debug(" group: [%s] matches group leader %s" % (exp_name, str(match[group]))) - log.info(" matched") + log.debug(" matched") def resolve_groups(self, events): for name, event in events.items(): @@ -233,7 +242,7 @@ class Test(object): self.run_cmd(tempdir); # load events expectation for the test - log.info(" loading result events"); + log.debug(" loading result events"); for f in glob.glob(tempdir + '/event*'): self.load_events(f, self.result); diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index f1485d8e6a0b..5bc3880f7be5 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -7,7 +7,7 @@ size=96 config=0 sample_period=4000 sample_type=263 -read_format=7 +read_format=0 disabled=1 inherit=1 pinned=0 diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group index a6599e9a19d3..57739cacdb2a 100644 --- a/tools/perf/tests/attr/test-record-group +++ b/tools/perf/tests/attr/test-record-group @@ -6,12 +6,14 @@ args = --group -e cycles,instructions kill >/dev/null 2>&1 fd=1 group_fd=-1 sample_type=327 +read_format=4 [event-2:base-record] fd=2 group_fd=1 config=1 sample_type=327 +read_format=4 mmap=0 comm=0 enable_on_exec=0 diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1 index 5a8359da38af..c5548d054aff 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/attr/test-record-group1 @@ -1,11 +1,12 @@ [config] command = record -args = -e '{cycles,instructions}' kill >/tmp/krava 2>&1 +args = -e '{cycles,instructions}' kill >/dev/null 2>&1 [event-1:base-record] fd=1 group_fd=-1 sample_type=327 +read_format=4 [event-2:base-record] fd=2 @@ -13,6 +14,7 @@ group_fd=1 type=0 config=1 sample_type=327 +read_format=4 mmap=0 comm=0 enable_on_exec=0 diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 186f67535494..acb98e0e39f2 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -4,6 +4,7 @@ * Builtin regression testing command: ever growing number of sanity tests */ #include "builtin.h" +#include "intlist.h" #include "tests.h" #include "debug.h" #include "color.h" @@ -69,6 +70,14 @@ static struct test { .func = test__attr, }, { + .desc = "Test matching and linking mutliple hists", + .func = test__hists_link, + }, + { + .desc = "Try 'use perf' in python, checking link problems", + .func = test__python_use, + }, + { .func = NULL, }, }; @@ -97,7 +106,7 @@ static bool perf_test__matches(int curr, int argc, const char *argv[]) return false; } -static int __cmd_test(int argc, const char *argv[]) +static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { int i = 0; int width = 0; @@ -118,13 +127,28 @@ static int __cmd_test(int argc, const char *argv[]) continue; pr_info("%2d: %-*s:", i, width, tests[curr].desc); + + if (intlist__find(skiplist, i)) { + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); + continue; + } + pr_debug("\n--- start ---\n"); err = tests[curr].func(); pr_debug("---- end ----\n%s:", tests[curr].desc); - if (err) - color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n"); - else + + switch (err) { + case TEST_OK: pr_info(" Ok\n"); + break; + case TEST_SKIP: + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); + break; + case TEST_FAIL: + default: + color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n"); + break; + } } return 0; @@ -152,11 +176,14 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]", NULL, }; + const char *skip = NULL; const struct option test_options[] = { + OPT_STRING('s', "skip", &skip, "tests", "tests to skip"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_END() }; + struct intlist *skiplist = NULL; argc = parse_options(argc, argv, test_options, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) @@ -169,5 +196,8 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) if (symbol__init() < 0) return -1; - return __cmd_test(argc, argv); + if (skip != NULL) + skiplist = intlist__new(skip); + + return __cmd_test(argc, argv, skiplist); } diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index e61fc828a158..0fd99a9adb91 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -22,7 +22,7 @@ static int perf_evsel__roundtrip_cache_name_test(void) for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { __perf_evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - err = parse_events(evlist, name, 0); + err = parse_events(evlist, name); if (err) ret = err; } @@ -70,7 +70,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names) return -ENOMEM; for (i = 0; i < nr_names; ++i) { - err = parse_events(evlist, names[i], 0); + err = parse_events(evlist, names[i]); if (err) { pr_debug("failed to parse event '%s', err %d\n", names[i], err); diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c new file mode 100644 index 000000000000..1be64a6c5daf --- /dev/null +++ b/tools/perf/tests/hists_link.c @@ -0,0 +1,500 @@ +#include "perf.h" +#include "tests.h" +#include "debug.h" +#include "symbol.h" +#include "sort.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "thread.h" +#include "parse-events.h" + +static struct { + u32 pid; + const char *comm; +} fake_threads[] = { + { 100, "perf" }, + { 200, "perf" }, + { 300, "bash" }, +}; + +static struct { + u32 pid; + u64 start; + const char *filename; +} fake_mmap_info[] = { + { 100, 0x40000, "perf" }, + { 100, 0x50000, "libc" }, + { 100, 0xf0000, "[kernel]" }, + { 200, 0x40000, "perf" }, + { 200, 0x50000, "libc" }, + { 200, 0xf0000, "[kernel]" }, + { 300, 0x40000, "bash" }, + { 300, 0x50000, "libc" }, + { 300, 0xf0000, "[kernel]" }, +}; + +struct fake_sym { + u64 start; + u64 length; + const char *name; +}; + +static struct fake_sym perf_syms[] = { + { 700, 100, "main" }, + { 800, 100, "run_command" }, + { 900, 100, "cmd_record" }, +}; + +static struct fake_sym bash_syms[] = { + { 700, 100, "main" }, + { 800, 100, "xmalloc" }, + { 900, 100, "xfree" }, +}; + +static struct fake_sym libc_syms[] = { + { 700, 100, "malloc" }, + { 800, 100, "free" }, + { 900, 100, "realloc" }, +}; + +static struct fake_sym kernel_syms[] = { + { 700, 100, "schedule" }, + { 800, 100, "page_fault" }, + { 900, 100, "sys_perf_event_open" }, +}; + +static struct { + const char *dso_name; + struct fake_sym *syms; + size_t nr_syms; +} fake_symbols[] = { + { "perf", perf_syms, ARRAY_SIZE(perf_syms) }, + { "bash", bash_syms, ARRAY_SIZE(bash_syms) }, + { "libc", libc_syms, ARRAY_SIZE(libc_syms) }, + { "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) }, +}; + +static struct machine *setup_fake_machine(struct machines *machines) +{ + struct machine *machine = machines__find(machines, HOST_KERNEL_ID); + size_t i; + + if (machine == NULL) { + pr_debug("Not enough memory for machine setup\n"); + return NULL; + } + + for (i = 0; i < ARRAY_SIZE(fake_threads); i++) { + struct thread *thread; + + thread = machine__findnew_thread(machine, fake_threads[i].pid); + if (thread == NULL) + goto out; + + thread__set_comm(thread, fake_threads[i].comm); + } + + for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { + union perf_event fake_mmap_event = { + .mmap = { + .header = { .misc = PERF_RECORD_MISC_USER, }, + .pid = fake_mmap_info[i].pid, + .start = fake_mmap_info[i].start, + .len = 0x1000ULL, + .pgoff = 0ULL, + }, + }; + + strcpy(fake_mmap_event.mmap.filename, + fake_mmap_info[i].filename); + + machine__process_mmap_event(machine, &fake_mmap_event); + } + + for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) { + size_t k; + struct dso *dso; + + dso = __dsos__findnew(&machine->user_dsos, + fake_symbols[i].dso_name); + if (dso == NULL) + goto out; + + /* emulate dso__load() */ + dso__set_loaded(dso, MAP__FUNCTION); + + for (k = 0; k < fake_symbols[i].nr_syms; k++) { + struct symbol *sym; + struct fake_sym *fsym = &fake_symbols[i].syms[k]; + + sym = symbol__new(fsym->start, fsym->length, + STB_GLOBAL, fsym->name); + if (sym == NULL) + goto out; + + symbols__insert(&dso->symbols[MAP__FUNCTION], sym); + } + } + + return machine; + +out: + pr_debug("Not enough memory for machine setup\n"); + machine__delete_threads(machine); + machine__delete(machine); + return NULL; +} + +struct sample { + u32 pid; + u64 ip; + struct thread *thread; + struct map *map; + struct symbol *sym; +}; + +static struct sample fake_common_samples[] = { + /* perf [kernel] schedule() */ + { .pid = 100, .ip = 0xf0000 + 700, }, + /* perf [perf] main() */ + { .pid = 200, .ip = 0x40000 + 700, }, + /* perf [perf] cmd_record() */ + { .pid = 200, .ip = 0x40000 + 900, }, + /* bash [bash] xmalloc() */ + { .pid = 300, .ip = 0x40000 + 800, }, + /* bash [libc] malloc() */ + { .pid = 300, .ip = 0x50000 + 700, }, +}; + +static struct sample fake_samples[][5] = { + { + /* perf [perf] run_command() */ + { .pid = 100, .ip = 0x40000 + 800, }, + /* perf [libc] malloc() */ + { .pid = 100, .ip = 0x50000 + 700, }, + /* perf [kernel] page_fault() */ + { .pid = 100, .ip = 0xf0000 + 800, }, + /* perf [kernel] sys_perf_event_open() */ + { .pid = 200, .ip = 0xf0000 + 900, }, + /* bash [libc] free() */ + { .pid = 300, .ip = 0x50000 + 800, }, + }, + { + /* perf [libc] free() */ + { .pid = 200, .ip = 0x50000 + 800, }, + /* bash [libc] malloc() */ + { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */ + /* bash [bash] xfee() */ + { .pid = 300, .ip = 0x40000 + 900, }, + /* bash [libc] realloc() */ + { .pid = 300, .ip = 0x50000 + 900, }, + /* bash [kernel] page_fault() */ + { .pid = 300, .ip = 0xf0000 + 800, }, + }, +}; + +static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) +{ + struct perf_evsel *evsel; + struct addr_location al; + struct hist_entry *he; + struct perf_sample sample = { .cpu = 0, }; + size_t i = 0, k; + + /* + * each evsel will have 10 samples - 5 common and 5 distinct. + * However the second evsel also has a collapsed entry for + * "bash [libc] malloc" so total 9 entries will be in the tree. + */ + list_for_each_entry(evsel, &evlist->entries, node) { + for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) { + const union perf_event event = { + .ip = { + .header = { + .misc = PERF_RECORD_MISC_USER, + }, + .pid = fake_common_samples[k].pid, + .ip = fake_common_samples[k].ip, + }, + }; + + if (perf_event__preprocess_sample(&event, machine, &al, + &sample, 0) < 0) + goto out; + + he = __hists__add_entry(&evsel->hists, &al, NULL, 1); + if (he == NULL) + goto out; + + fake_common_samples[k].thread = al.thread; + fake_common_samples[k].map = al.map; + fake_common_samples[k].sym = al.sym; + } + + for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) { + const union perf_event event = { + .ip = { + .header = { + .misc = PERF_RECORD_MISC_USER, + }, + .pid = fake_samples[i][k].pid, + .ip = fake_samples[i][k].ip, + }, + }; + + if (perf_event__preprocess_sample(&event, machine, &al, + &sample, 0) < 0) + goto out; + + he = __hists__add_entry(&evsel->hists, &al, NULL, 1); + if (he == NULL) + goto out; + + fake_samples[i][k].thread = al.thread; + fake_samples[i][k].map = al.map; + fake_samples[i][k].sym = al.sym; + } + i++; + } + + return 0; + +out: + pr_debug("Not enough memory for adding a hist entry\n"); + return -1; +} + +static int find_sample(struct sample *samples, size_t nr_samples, + struct thread *t, struct map *m, struct symbol *s) +{ + while (nr_samples--) { + if (samples->thread == t && samples->map == m && + samples->sym == s) + return 1; + samples++; + } + return 0; +} + +static int __validate_match(struct hists *hists) +{ + size_t count = 0; + struct rb_root *root; + struct rb_node *node; + + /* + * Only entries from fake_common_samples should have a pair. + */ + if (sort__need_collapse) + root = &hists->entries_collapsed; + else + root = hists->entries_in; + + node = rb_first(root); + while (node) { + struct hist_entry *he; + + he = rb_entry(node, struct hist_entry, rb_node_in); + + if (hist_entry__has_pairs(he)) { + if (find_sample(fake_common_samples, + ARRAY_SIZE(fake_common_samples), + he->thread, he->ms.map, he->ms.sym)) { + count++; + } else { + pr_debug("Can't find the matched entry\n"); + return -1; + } + } + + node = rb_next(node); + } + + if (count != ARRAY_SIZE(fake_common_samples)) { + pr_debug("Invalid count for matched entries: %zd of %zd\n", + count, ARRAY_SIZE(fake_common_samples)); + return -1; + } + + return 0; +} + +static int validate_match(struct hists *leader, struct hists *other) +{ + return __validate_match(leader) || __validate_match(other); +} + +static int __validate_link(struct hists *hists, int idx) +{ + size_t count = 0; + size_t count_pair = 0; + size_t count_dummy = 0; + struct rb_root *root; + struct rb_node *node; + + /* + * Leader hists (idx = 0) will have dummy entries from other, + * and some entries will have no pair. However every entry + * in other hists should have (dummy) pair. + */ + if (sort__need_collapse) + root = &hists->entries_collapsed; + else + root = hists->entries_in; + + node = rb_first(root); + while (node) { + struct hist_entry *he; + + he = rb_entry(node, struct hist_entry, rb_node_in); + + if (hist_entry__has_pairs(he)) { + if (!find_sample(fake_common_samples, + ARRAY_SIZE(fake_common_samples), + he->thread, he->ms.map, he->ms.sym) && + !find_sample(fake_samples[idx], + ARRAY_SIZE(fake_samples[idx]), + he->thread, he->ms.map, he->ms.sym)) { + count_dummy++; + } + count_pair++; + } else if (idx) { + pr_debug("A entry from the other hists should have pair\n"); + return -1; + } + + count++; + node = rb_next(node); + } + + /* + * Note that we have a entry collapsed in the other (idx = 1) hists. + */ + if (idx == 0) { + if (count_dummy != ARRAY_SIZE(fake_samples[1]) - 1) { + pr_debug("Invalid count of dummy entries: %zd of %zd\n", + count_dummy, ARRAY_SIZE(fake_samples[1]) - 1); + return -1; + } + if (count != count_pair + ARRAY_SIZE(fake_samples[0])) { + pr_debug("Invalid count of total leader entries: %zd of %zd\n", + count, count_pair + ARRAY_SIZE(fake_samples[0])); + return -1; + } + } else { + if (count != count_pair) { + pr_debug("Invalid count of total other entries: %zd of %zd\n", + count, count_pair); + return -1; + } + if (count_dummy > 0) { + pr_debug("Other hists should not have dummy entries: %zd\n", + count_dummy); + return -1; + } + } + + return 0; +} + +static int validate_link(struct hists *leader, struct hists *other) +{ + return __validate_link(leader, 0) || __validate_link(other, 1); +} + +static void print_hists(struct hists *hists) +{ + int i = 0; + struct rb_root *root; + struct rb_node *node; + + if (sort__need_collapse) + root = &hists->entries_collapsed; + else + root = hists->entries_in; + + pr_info("----- %s --------\n", __func__); + node = rb_first(root); + while (node) { + struct hist_entry *he; + + he = rb_entry(node, struct hist_entry, rb_node_in); + + pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n", + i, he->thread->comm, he->ms.map->dso->short_name, + he->ms.sym->name, he->stat.period); + + i++; + node = rb_next(node); + } +} + +int test__hists_link(void) +{ + int err = -1; + struct machines machines; + struct machine *machine = NULL; + struct perf_evsel *evsel, *first; + struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); + + if (evlist == NULL) + return -ENOMEM; + + err = parse_events(evlist, "cpu-clock"); + if (err) + goto out; + err = parse_events(evlist, "task-clock"); + if (err) + goto out; + + /* default sort order (comm,dso,sym) will be used */ + if (setup_sorting() < 0) + goto out; + + machines__init(&machines); + + /* setup threads/dso/map/symbols also */ + machine = setup_fake_machine(&machines); + if (!machine) + goto out; + + if (verbose > 1) + machine__fprintf(machine, stderr); + + /* process sample events */ + err = add_hist_entries(evlist, machine); + if (err < 0) + goto out; + + list_for_each_entry(evsel, &evlist->entries, node) { + hists__collapse_resort(&evsel->hists); + + if (verbose > 2) + print_hists(&evsel->hists); + } + + first = perf_evlist__first(evlist); + evsel = perf_evlist__last(evlist); + + /* match common entries */ + hists__match(&first->hists, &evsel->hists); + err = validate_match(&first->hists, &evsel->hists); + if (err) + goto out; + + /* link common and/or dummy entries */ + hists__link(&first->hists, &evsel->hists); + err = validate_link(&first->hists, &evsel->hists); + if (err) + goto out; + + err = 0; + +out: + /* tear down everything */ + perf_evlist__delete(evlist); + machines__exit(&machines); + + return err; +} diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index e1746811e14b..cdd50755af51 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -22,36 +22,16 @@ int test__basic_mmap(void) struct thread_map *threads; struct cpu_map *cpus; struct perf_evlist *evlist; - struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, - .read_format = PERF_FORMAT_ID, - .sample_type = PERF_SAMPLE_ID, - .watermark = 0, - }; cpu_set_t cpu_set; const char *syscall_names[] = { "getsid", "getppid", "getpgrp", "getpgid", }; pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp, (void*)getpgid }; #define nsyscalls ARRAY_SIZE(syscall_names) - int ids[nsyscalls]; unsigned int nr_events[nsyscalls], expected_nr_events[nsyscalls], i, j; struct perf_evsel *evsels[nsyscalls], *evsel; - for (i = 0; i < nsyscalls; ++i) { - char name[64]; - - snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); - ids[i] = trace_event__id(name); - if (ids[i] < 0) { - pr_debug("Is debugfs mounted on /sys/kernel/debug?\n"); - return -1; - } - nr_events[i] = 0; - expected_nr_events[i] = random() % 257; - } - threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); @@ -79,18 +59,19 @@ int test__basic_mmap(void) goto out_free_cpus; } - /* anonymous union fields, can't be initialized above */ - attr.wakeup_events = 1; - attr.sample_period = 1; - for (i = 0; i < nsyscalls; ++i) { - attr.config = ids[i]; - evsels[i] = perf_evsel__new(&attr, i); + char name[64]; + + snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]); + evsels[i] = perf_evsel__newtp("syscalls", name, i); if (evsels[i] == NULL) { pr_debug("perf_evsel__new\n"); goto out_free_evlist; } + evsels[i]->attr.wakeup_events = 1; + perf_evsel__set_sample_id(evsels[i]); + perf_evlist__add(evlist, evsels[i]); if (perf_evsel__open(evsels[i], cpus, threads) < 0) { @@ -99,6 +80,9 @@ int test__basic_mmap(void) strerror(errno)); goto out_close_fd; } + + nr_events[i] = 0; + expected_nr_events[i] = 1 + rand() % 127; } if (perf_evlist__mmap(evlist, 128, true) < 0) { @@ -128,6 +112,7 @@ int test__basic_mmap(void) goto out_munmap; } + err = -1; evsel = perf_evlist__id2evsel(evlist, sample.id); if (evsel == NULL) { pr_debug("event with id %" PRIu64 @@ -137,16 +122,17 @@ int test__basic_mmap(void) nr_events[evsel->idx]++; } + err = 0; list_for_each_entry(evsel, &evlist->entries, node) { if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { pr_debug("expected %d %s events, got %d\n", expected_nr_events[evsel->idx], perf_evsel__name(evsel), nr_events[evsel->idx]); + err = -1; goto out_munmap; } } - err = 0; out_munmap: perf_evlist__munmap(evlist); out_close_fd: diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c index 31072aba0d54..b0657a9ccda6 100644 --- a/tools/perf/tests/open-syscall-all-cpus.c +++ b/tools/perf/tests/open-syscall-all-cpus.c @@ -7,20 +7,12 @@ int test__open_syscall_event_on_all_cpus(void) { int err = -1, fd, cpu; - struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *evsel; - struct perf_event_attr attr; unsigned int nr_open_calls = 111, i; cpu_set_t cpu_set; - int id = trace_event__id("sys_enter_open"); + struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); - if (id < 0) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); - return -1; - } - - threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -32,15 +24,11 @@ int test__open_syscall_event_on_all_cpus(void) goto out_thread_map_delete; } - CPU_ZERO(&cpu_set); - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_TRACEPOINT; - attr.config = id; - evsel = perf_evsel__new(&attr, 0); + evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0); if (evsel == NULL) { - pr_debug("perf_evsel__new\n"); + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); goto out_thread_map_delete; } @@ -110,6 +98,7 @@ int test__open_syscall_event_on_all_cpus(void) } } + perf_evsel__free_counts(evsel); out_close_fd: perf_evsel__close_fd(evsel, 1, threads->nr); out_evsel_delete: diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c index 98be8b518b4f..befc0671f95d 100644 --- a/tools/perf/tests/open-syscall.c +++ b/tools/perf/tests/open-syscall.c @@ -6,29 +6,18 @@ int test__open_syscall_event(void) { int err = -1, fd; - struct thread_map *threads; struct perf_evsel *evsel; - struct perf_event_attr attr; unsigned int nr_open_calls = 111, i; - int id = trace_event__id("sys_enter_open"); + struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); - if (id < 0) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); - return -1; - } - - threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; } - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_TRACEPOINT; - attr.config = id; - evsel = perf_evsel__new(&attr, 0); + evsel = perf_evsel__newtp("syscalls", "sys_enter_open", 0); if (evsel == NULL) { - pr_debug("perf_evsel__new\n"); + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 32ee478905eb..c5636f36fe31 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -3,6 +3,7 @@ #include "evsel.h" #include "evlist.h" #include "sysfs.h" +#include "debugfs.h" #include "tests.h" #include <linux/hw_breakpoint.h> @@ -22,6 +23,7 @@ static int test__checkevent_tracepoint(struct perf_evlist *evlist) struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); @@ -34,6 +36,7 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist) struct perf_evsel *evsel; TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1); + TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups); list_for_each_entry(evsel, &evlist->entries, node) { TEST_ASSERT_VAL("wrong type", @@ -463,10 +466,10 @@ static int test__checkevent_pmu_events(struct perf_evlist *evlist) static int test__checkterms_simple(struct list_head *terms) { - struct parse_events__term *term; + struct parse_events_term *term; /* config=10 */ - term = list_entry(terms->next, struct parse_events__term, list); + term = list_entry(terms->next, struct parse_events_term, list); TEST_ASSERT_VAL("wrong type term", term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG); TEST_ASSERT_VAL("wrong type val", @@ -475,7 +478,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong config", !term->config); /* config1 */ - term = list_entry(term->list.next, struct parse_events__term, list); + term = list_entry(term->list.next, struct parse_events_term, list); TEST_ASSERT_VAL("wrong type term", term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1); TEST_ASSERT_VAL("wrong type val", @@ -484,7 +487,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong config", !term->config); /* config2=3 */ - term = list_entry(term->list.next, struct parse_events__term, list); + term = list_entry(term->list.next, struct parse_events_term, list); TEST_ASSERT_VAL("wrong type term", term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2); TEST_ASSERT_VAL("wrong type val", @@ -493,7 +496,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong config", !term->config); /* umask=1*/ - term = list_entry(term->list.next, struct parse_events__term, list); + term = list_entry(term->list.next, struct parse_events_term, list); TEST_ASSERT_VAL("wrong type term", term->type_term == PARSE_EVENTS__TERM_TYPE_USER); TEST_ASSERT_VAL("wrong type val", @@ -509,6 +512,7 @@ static int test__group1(struct perf_evlist *evlist) struct perf_evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* instructions:k */ evsel = leader = perf_evlist__first(evlist); @@ -521,7 +525,9 @@ static int test__group1(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); /* cycles:upp */ evsel = perf_evsel__next(evsel); @@ -536,6 +542,7 @@ static int test__group1(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); return 0; } @@ -545,6 +552,7 @@ static int test__group2(struct perf_evlist *evlist) struct perf_evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* faults + :ku modifier */ evsel = leader = perf_evlist__first(evlist); @@ -557,7 +565,9 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); /* cache-references + :u modifier */ evsel = perf_evsel__next(evsel); @@ -567,10 +577,11 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); /* cycles:k */ evsel = perf_evsel__next(evsel); @@ -583,7 +594,7 @@ static int test__group2(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); return 0; } @@ -593,6 +604,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) struct perf_evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); /* group1 syscalls:sys_enter_open:H */ evsel = leader = perf_evlist__first(evlist); @@ -606,9 +618,11 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong group name", !strcmp(leader->group_name, "group1")); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); /* group1 cycles:kppp */ evsel = perf_evsel__next(evsel); @@ -624,6 +638,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); /* group2 cycles + G modifier */ evsel = leader = perf_evsel__next(evsel); @@ -636,9 +651,11 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong group name", !strcmp(leader->group_name, "group2")); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); /* group2 1:3 + G modifier */ evsel = perf_evsel__next(evsel); @@ -651,6 +668,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); /* instructions:u */ evsel = perf_evsel__next(evsel); @@ -663,7 +681,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); return 0; } @@ -673,6 +691,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused) struct perf_evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); /* cycles:u + p */ evsel = leader = perf_evlist__first(evlist); @@ -687,7 +706,9 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); /* instructions:kp + p */ evsel = perf_evsel__next(evsel); @@ -702,6 +723,7 @@ static int test__group4(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); return 0; } @@ -711,6 +733,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) struct perf_evsel *evsel, *leader; TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); /* cycles + G */ evsel = leader = perf_evlist__first(evlist); @@ -724,7 +747,9 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); /* instructions + G */ evsel = perf_evsel__next(evsel); @@ -738,6 +763,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); /* cycles:G */ evsel = leader = perf_evsel__next(evsel); @@ -751,7 +777,9 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); /* instructions:G */ evsel = perf_evsel__next(evsel); @@ -765,6 +793,7 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); /* cycles */ evsel = perf_evsel__next(evsel); @@ -777,18 +806,235 @@ static int test__group5(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", !perf_evsel__is_group_member(evsel)); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + + return 0; +} + +static int test__group_gh1(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + + /* cycles + :H group modifier */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + + /* cache-misses:G + :H group modifier */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + + return 0; +} + +static int test__group_gh2(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + + /* cycles + :G group modifier */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + + /* cache-misses:H + :G group modifier */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + + return 0; +} + +static int test__group_gh3(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + + /* cycles:G + :u group modifier */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + + /* cache-misses:H + :u group modifier */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + + return 0; +} + +static int test__group_gh4(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups); + + /* cycles:G + :uG group modifier */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + + /* cache-misses:H + :uG group modifier */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); return 0; } -struct test__event_st { +static int count_tracepoints(void) +{ + char events_path[PATH_MAX]; + struct dirent *events_ent; + DIR *events_dir; + int cnt = 0; + + scnprintf(events_path, PATH_MAX, "%s/tracing/events", + debugfs_find_mountpoint()); + + events_dir = opendir(events_path); + + TEST_ASSERT_VAL("Can't open events dir", events_dir); + + while ((events_ent = readdir(events_dir))) { + char sys_path[PATH_MAX]; + struct dirent *sys_ent; + DIR *sys_dir; + + if (!strcmp(events_ent->d_name, ".") + || !strcmp(events_ent->d_name, "..") + || !strcmp(events_ent->d_name, "enable") + || !strcmp(events_ent->d_name, "header_event") + || !strcmp(events_ent->d_name, "header_page")) + continue; + + scnprintf(sys_path, PATH_MAX, "%s/%s", + events_path, events_ent->d_name); + + sys_dir = opendir(sys_path); + TEST_ASSERT_VAL("Can't open sys dir", sys_dir); + + while ((sys_ent = readdir(sys_dir))) { + if (!strcmp(sys_ent->d_name, ".") + || !strcmp(sys_ent->d_name, "..") + || !strcmp(sys_ent->d_name, "enable") + || !strcmp(sys_ent->d_name, "filter")) + continue; + + cnt++; + } + + closedir(sys_dir); + } + + closedir(events_dir); + return cnt; +} + +static int test__all_tracepoints(struct perf_evlist *evlist) +{ + TEST_ASSERT_VAL("wrong events count", + count_tracepoints() == evlist->nr_entries); + + return test__checkevent_tracepoint_multi(evlist); +} + +struct evlist_test { const char *name; __u32 type; int (*check)(struct perf_evlist *evlist); }; -static struct test__event_st test__events[] = { +static struct evlist_test test__events[] = { [0] = { .name = "syscalls:sys_enter_open", .check = test__checkevent_tracepoint, @@ -921,9 +1167,29 @@ static struct test__event_st test__events[] = { .name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles", .check = test__group5, }, + [33] = { + .name = "*:*", + .check = test__all_tracepoints, + }, + [34] = { + .name = "{cycles,cache-misses:G}:H", + .check = test__group_gh1, + }, + [35] = { + .name = "{cycles,cache-misses:H}:G", + .check = test__group_gh2, + }, + [36] = { + .name = "{cycles:G,cache-misses:H}:u", + .check = test__group_gh3, + }, + [37] = { + .name = "{cycles:G,cache-misses:H}:uG", + .check = test__group_gh4, + }, }; -static struct test__event_st test__events_pmu[] = { +static struct evlist_test test__events_pmu[] = { [0] = { .name = "cpu/config=10,config1,config2=3,period=1000/u", .check = test__checkevent_pmu, @@ -934,20 +1200,20 @@ static struct test__event_st test__events_pmu[] = { }, }; -struct test__term { +struct terms_test { const char *str; __u32 type; int (*check)(struct list_head *terms); }; -static struct test__term test__terms[] = { +static struct terms_test test__terms[] = { [0] = { .str = "config=10,config1,config2=3,umask=1", .check = test__checkterms_simple, }, }; -static int test_event(struct test__event_st *e) +static int test_event(struct evlist_test *e) { struct perf_evlist *evlist; int ret; @@ -956,7 +1222,7 @@ static int test_event(struct test__event_st *e) if (evlist == NULL) return -ENOMEM; - ret = parse_events(evlist, e->name, 0); + ret = parse_events(evlist, e->name); if (ret) { pr_debug("failed to parse event '%s', err %d\n", e->name, ret); @@ -969,13 +1235,13 @@ static int test_event(struct test__event_st *e) return ret; } -static int test_events(struct test__event_st *events, unsigned cnt) +static int test_events(struct evlist_test *events, unsigned cnt) { int ret1, ret2 = 0; unsigned i; for (i = 0; i < cnt; i++) { - struct test__event_st *e = &events[i]; + struct evlist_test *e = &events[i]; pr_debug("running test %d '%s'\n", i, e->name); ret1 = test_event(e); @@ -986,7 +1252,7 @@ static int test_events(struct test__event_st *events, unsigned cnt) return ret2; } -static int test_term(struct test__term *t) +static int test_term(struct terms_test *t) { struct list_head *terms; int ret; @@ -1010,13 +1276,13 @@ static int test_term(struct test__term *t) return ret; } -static int test_terms(struct test__term *terms, unsigned cnt) +static int test_terms(struct terms_test *terms, unsigned cnt) { int ret = 0; unsigned i; for (i = 0; i < cnt; i++) { - struct test__term *t = &terms[i]; + struct terms_test *t = &terms[i]; pr_debug("running test %d '%s'\n", i, t->str); ret = test_term(t); @@ -1067,7 +1333,7 @@ static int test_pmu_events(void) while (!ret && (ent = readdir(dir))) { #define MAX_NAME 100 - struct test__event_st e; + struct evlist_test e; char name[MAX_NAME]; if (!strcmp(ent->d_name, ".") || diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 70e0d4421df8..1e8e5128d0da 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -96,22 +96,22 @@ int test__PERF_RECORD(void) err = perf_evlist__prepare_workload(evlist, &opts, argv); if (err < 0) { pr_debug("Couldn't run the workload!\n"); - goto out_delete_evlist; + goto out_delete_maps; } /* * Config the evsels, setting attr->comm on the first one, etc. */ evsel = perf_evlist__first(evlist); - evsel->attr.sample_type |= PERF_SAMPLE_CPU; - evsel->attr.sample_type |= PERF_SAMPLE_TID; - evsel->attr.sample_type |= PERF_SAMPLE_TIME; - perf_evlist__config_attrs(evlist, &opts); + perf_evsel__set_sample_bit(evsel, CPU); + perf_evsel__set_sample_bit(evsel, TID); + perf_evsel__set_sample_bit(evsel, TIME); + perf_evlist__config(evlist, &opts); err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); if (err < 0) { pr_debug("sched__get_first_possible_cpu: %s\n", strerror(errno)); - goto out_delete_evlist; + goto out_delete_maps; } cpu = err; @@ -121,7 +121,7 @@ int test__PERF_RECORD(void) */ if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) { pr_debug("sched_setaffinity: %s\n", strerror(errno)); - goto out_delete_evlist; + goto out_delete_maps; } /* @@ -131,7 +131,7 @@ int test__PERF_RECORD(void) err = perf_evlist__open(evlist); if (err < 0) { pr_debug("perf_evlist__open: %s\n", strerror(errno)); - goto out_delete_evlist; + goto out_delete_maps; } /* @@ -142,7 +142,7 @@ int test__PERF_RECORD(void) err = perf_evlist__mmap(evlist, opts.mmap_pages, false); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", strerror(errno)); - goto out_delete_evlist; + goto out_delete_maps; } /* @@ -305,6 +305,8 @@ found_exit: } out_err: perf_evlist__munmap(evlist); +out_delete_maps: + perf_evlist__delete_maps(evlist); out_delete_evlist: perf_evlist__delete(evlist); out: diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index a5f379863b8f..12b322fa3475 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -19,10 +19,8 @@ static struct test_format { { "krava23", "config2:28-29,38\n", }, }; -#define TEST_FORMATS_CNT (sizeof(test_formats) / sizeof(struct test_format)) - /* Simulated users input. */ -static struct parse_events__term test_terms[] = { +static struct parse_events_term test_terms[] = { { .config = (char *) "krava01", .val.num = 15, @@ -78,7 +76,6 @@ static struct parse_events__term test_terms[] = { .type_term = PARSE_EVENTS__TERM_TYPE_USER, }, }; -#define TERMS_CNT (sizeof(test_terms) / sizeof(struct parse_events__term)) /* * Prepare format directory data, exported by kernel @@ -93,7 +90,7 @@ static char *test_format_dir_get(void) if (!mkdtemp(dir)) return NULL; - for (i = 0; i < TEST_FORMATS_CNT; i++) { + for (i = 0; i < ARRAY_SIZE(test_formats); i++) { static char name[PATH_MAX]; struct test_format *format = &test_formats[i]; FILE *file; @@ -130,14 +127,12 @@ static struct list_head *test_terms_list(void) static LIST_HEAD(terms); unsigned int i; - for (i = 0; i < TERMS_CNT; i++) + for (i = 0; i < ARRAY_SIZE(test_terms); i++) list_add_tail(&test_terms[i].list, &terms); return &terms; } -#undef TERMS_CNT - int test__pmu(void) { char *format = test_format_dir_get(); diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c new file mode 100644 index 000000000000..7760277c6def --- /dev/null +++ b/tools/perf/tests/python-use.c @@ -0,0 +1,23 @@ +/* + * Just test if we can load the python binding. + */ + +#include <stdio.h> +#include <stdlib.h> +#include "tests.h" + +extern int verbose; + +int test__python_use(void) +{ + char *cmd; + int ret; + + if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s", + PYTHONPATH, PYTHON, verbose ? "" : "2> /dev/null") < 0) + return -1; + + ret = system(cmd) ? -1 : 0; + free(cmd); + return ret; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index fc121edab016..5de0be1ff4b6 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -1,6 +1,12 @@ #ifndef TESTS_H #define TESTS_H +enum { + TEST_OK = 0, + TEST_FAIL = -1, + TEST_SKIP = -2, +}; + /* Tests */ int test__vmlinux_matches_kallsyms(void); int test__open_syscall_event(void); @@ -15,8 +21,7 @@ int test__pmu(void); int test__attr(void); int test__dso_data(void); int test__parse_events(void); - -/* Util */ -int trace_event__id(const char *evname); +int test__hists_link(void); +int test__python_use(void); #endif /* TESTS_H */ diff --git a/tools/perf/tests/util.c b/tools/perf/tests/util.c deleted file mode 100644 index 748f2e8f6961..000000000000 --- a/tools/perf/tests/util.c +++ /dev/null @@ -1,30 +0,0 @@ -#include <stdio.h> -#include <unistd.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include "tests.h" -#include "debugfs.h" - -int trace_event__id(const char *evname) -{ - char *filename; - int err = -1, fd; - - if (asprintf(&filename, - "%s/syscalls/%s/id", - tracing_events_path, evname) < 0) - return -1; - - fd = open(filename, O_RDONLY); - if (fd >= 0) { - char id[16]; - if (read(fd, id, sizeof(id)) > 0) - err = atoi(id); - close(fd); - } - - free(filename); - return err; -} diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 0d1cdbee2f59..7b4c4d26d1ba 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -44,7 +44,7 @@ int test__vmlinux_matches_kallsyms(void) */ if (machine__create_kernel_maps(&kallsyms) < 0) { pr_debug("machine__create_kernel_maps "); - return -1; + goto out; } /* @@ -101,7 +101,8 @@ int test__vmlinux_matches_kallsyms(void) */ if (machine__load_vmlinux_path(&vmlinux, type, vmlinux_matches_kallsyms_filter) <= 0) { - pr_debug("machine__load_vmlinux_path "); + pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n"); + err = TEST_SKIP; goto out; } @@ -226,5 +227,7 @@ detour: map__fprintf(pos, stderr); } out: + machine__exit(&kallsyms); + machine__exit(&vmlinux); return err; } diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 4aeb7d5df939..809ea4632a34 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -273,6 +273,8 @@ void ui_browser__hide(struct ui_browser *browser __maybe_unused) { pthread_mutex_lock(&ui__lock); ui_helpline__pop(); + free(browser->helpline); + browser->helpline = NULL; pthread_mutex_unlock(&ui__lock); } @@ -471,7 +473,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *browser) return row; } -static struct ui_browser__colorset { +static struct ui_browser_colorset { const char *name, *fg, *bg; int colorset; } ui_browser__colorsets[] = { @@ -706,7 +708,7 @@ void ui_browser__init(void) perf_config(ui_browser__color_config, NULL); while (ui_browser__colorsets[i].name) { - struct ui_browser__colorset *c = &ui_browser__colorsets[i++]; + struct ui_browser_colorset *c = &ui_browser__colorsets[i++]; sltt_set_color(c->colorset, c->name, c->fg, c->bg); } diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5dab3ca96980..7dca1555c610 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -182,6 +182,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ab->selection = dl; } +static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym) +{ + if (!dl || !dl->ins || !ins__is_jump(dl->ins) + || !disasm_line__has_offset(dl) + || dl->ops.target.offset >= symbol__size(sym)) + return false; + + return true; +} + static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); @@ -195,8 +205,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (strstr(sym->name, "@plt")) return; - if (!cursor || !cursor->ins || !ins__is_jump(cursor->ins) || - !disasm_line__has_offset(cursor)) + if (!disasm_line__is_valid_jump(cursor, sym)) return; target = ab->offsets[cursor->ops.target.offset]; @@ -788,17 +797,9 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser struct disasm_line *dl = browser->offsets[offset], *dlt; struct browser_disasm_line *bdlt; - if (!dl || !dl->ins || !ins__is_jump(dl->ins) || - !disasm_line__has_offset(dl)) + if (!disasm_line__is_valid_jump(dl, sym)) continue; - if (dl->ops.target.offset >= size) { - ui__error("jump to after symbol!\n" - "size: %zx, jump target: %" PRIx64, - size, dl->ops.target.offset); - continue; - } - dlt = browser->offsets[dl->ops.target.offset]; /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we @@ -921,11 +922,11 @@ out_free_offsets: #define ANNOTATE_CFG(n) \ { .name = #n, .value = &annotate_browser__opts.n, } - + /* * Keep the entries sorted, they are bsearch'ed */ -static struct annotate__config { +static struct annotate_config { const char *name; bool *value; } annotate__configs[] = { @@ -939,7 +940,7 @@ static struct annotate__config { static int annotate_config__cmp(const void *name, const void *cfgp) { - const struct annotate__config *cfg = cfgp; + const struct annotate_config *cfg = cfgp; return strcmp(name, cfg->name); } @@ -947,7 +948,7 @@ static int annotate_config__cmp(const void *name, const void *cfgp) static int annotate__config(const char *var, const char *value, void *data __maybe_unused) { - struct annotate__config *cfg; + struct annotate_config *cfg; const char *name; if (prefixcmp(var, "annotate.") != 0) @@ -955,7 +956,7 @@ static int annotate__config(const char *var, const char *value, name = var + 9; cfg = bsearch(name, annotate__configs, ARRAY_SIZE(annotate__configs), - sizeof(struct annotate__config), annotate_config__cmp); + sizeof(struct annotate_config), annotate_config__cmp); if (cfg == NULL) return -1; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index ccc4bd161420..aa22704047d6 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -567,26 +567,128 @@ static int hist_browser__show_callchain(struct hist_browser *browser, return row - first_row; } -#define HPP__COLOR_FN(_name, _field) \ -static int hist_browser__hpp_color_ ## _name(struct perf_hpp *hpp, \ - struct hist_entry *he) \ +struct hpp_arg { + struct ui_browser *b; + char folded_sign; + bool current_entry; +}; + +static int __hpp__color_callchain(struct hpp_arg *arg) +{ + if (!symbol_conf.use_callchain) + return 0; + + slsmg_printf("%c ", arg->folded_sign); + return 2; +} + +static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he, + u64 (*get_field)(struct hist_entry *), + int (*callchain_cb)(struct hpp_arg *)) +{ + int ret = 0; + double percent = 0.0; + struct hists *hists = he->hists; + struct hpp_arg *arg = hpp->ptr; + + if (hists->stats.total_period) + percent = 100.0 * get_field(he) / hists->stats.total_period; + + ui_browser__set_percent_color(arg->b, percent, arg->current_entry); + + if (callchain_cb) + ret += callchain_cb(arg); + + ret += scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent); + slsmg_printf("%s", hpp->buf); + + if (symbol_conf.event_group) { + int prev_idx, idx_delta; + struct perf_evsel *evsel = hists_to_evsel(hists); + struct hist_entry *pair; + int nr_members = evsel->nr_members; + + if (nr_members <= 1) + goto out; + + prev_idx = perf_evsel__group_idx(evsel); + + list_for_each_entry(pair, &he->pairs.head, pairs.node) { + u64 period = get_field(pair); + u64 total = pair->hists->stats.total_period; + + if (!total) + continue; + + evsel = hists_to_evsel(pair->hists); + idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1; + + while (idx_delta--) { + /* + * zero-fill group members in the middle which + * have no sample + */ + ui_browser__set_percent_color(arg->b, 0.0, + arg->current_entry); + ret += scnprintf(hpp->buf, hpp->size, + " %6.2f%%", 0.0); + slsmg_printf("%s", hpp->buf); + } + + percent = 100.0 * period / total; + ui_browser__set_percent_color(arg->b, percent, + arg->current_entry); + ret += scnprintf(hpp->buf, hpp->size, + " %6.2f%%", percent); + slsmg_printf("%s", hpp->buf); + + prev_idx = perf_evsel__group_idx(evsel); + } + + idx_delta = nr_members - prev_idx - 1; + + while (idx_delta--) { + /* + * zero-fill group members at last which have no sample + */ + ui_browser__set_percent_color(arg->b, 0.0, + arg->current_entry); + ret += scnprintf(hpp->buf, hpp->size, + " %6.2f%%", 0.0); + slsmg_printf("%s", hpp->buf); + } + } +out: + if (!arg->current_entry || !arg->b->navkeypressed) + ui_browser__set_color(arg->b, HE_COLORSET_NORMAL); + + return ret; +} + +#define __HPP_COLOR_PERCENT_FN(_type, _field, _cb) \ +static u64 __hpp_get_##_field(struct hist_entry *he) \ +{ \ + return he->stat._field; \ +} \ + \ +static int hist_browser__hpp_color_##_type(struct perf_hpp *hpp, \ + struct hist_entry *he) \ { \ - struct hists *hists = he->hists; \ - double percent = 100.0 * he->stat._field / hists->stats.total_period; \ - *(double *)hpp->ptr = percent; \ - return scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent); \ + return __hpp__color_fmt(hpp, he, __hpp_get_##_field, _cb); \ } -HPP__COLOR_FN(overhead, period) -HPP__COLOR_FN(overhead_sys, period_sys) -HPP__COLOR_FN(overhead_us, period_us) -HPP__COLOR_FN(overhead_guest_sys, period_guest_sys) -HPP__COLOR_FN(overhead_guest_us, period_guest_us) +__HPP_COLOR_PERCENT_FN(overhead, period, __hpp__color_callchain) +__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys, NULL) +__HPP_COLOR_PERCENT_FN(overhead_us, period_us, NULL) +__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys, NULL) +__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, NULL) -#undef HPP__COLOR_FN +#undef __HPP_COLOR_PERCENT_FN void hist_browser__init_hpp(void) { + perf_hpp__column_enable(PERF_HPP__OVERHEAD); + perf_hpp__init(); perf_hpp__format[PERF_HPP__OVERHEAD].color = @@ -606,13 +708,13 @@ static int hist_browser__show_entry(struct hist_browser *browser, unsigned short row) { char s[256]; - double percent; - int i, printed = 0; + int printed = 0; int width = browser->b.width; char folded_sign = ' '; bool current_entry = ui_browser__is_current_entry(&browser->b, row); off_t row_offset = entry->row_offset; bool first = true; + struct perf_hpp_fmt *fmt; if (current_entry) { browser->he_selection = entry; @@ -625,41 +727,30 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (row_offset == 0) { + struct hpp_arg arg = { + .b = &browser->b, + .folded_sign = folded_sign, + .current_entry = current_entry, + }; struct perf_hpp hpp = { .buf = s, .size = sizeof(s), + .ptr = &arg, }; ui_browser__gotorc(&browser->b, row, 0); - for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { - if (!perf_hpp__format[i].cond) - continue; - + perf_hpp__for_each_format(fmt) { if (!first) { slsmg_printf(" "); width -= 2; } first = false; - if (perf_hpp__format[i].color) { - hpp.ptr = &percent; - /* It will set percent for us. See HPP__COLOR_FN above. */ - width -= perf_hpp__format[i].color(&hpp, entry); - - ui_browser__set_percent_color(&browser->b, percent, current_entry); - - if (i == PERF_HPP__OVERHEAD && symbol_conf.use_callchain) { - slsmg_printf("%c ", folded_sign); - width -= 2; - } - - slsmg_printf("%s", s); - - if (!current_entry || !browser->b.navkeypressed) - ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + if (fmt->color) { + width -= fmt->color(&hpp, entry); } else { - width -= perf_hpp__format[i].entry(&hpp, entry); + width -= fmt->entry(&hpp, entry); slsmg_printf("%s", s); } } @@ -1098,6 +1189,21 @@ static int hists__browser_title(struct hists *hists, char *bf, size_t size, const struct thread *thread = hists->thread_filter; unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = hists->stats.total_period; + struct perf_evsel *evsel = hists_to_evsel(hists); + char buf[512]; + size_t buflen = sizeof(buf); + + if (symbol_conf.event_group && evsel->nr_members > 1) { + struct perf_evsel *pos; + + perf_evsel__group_desc(evsel, buf, buflen); + ev_name = buf; + + for_each_group_member(pos, evsel) { + nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE]; + nr_events += pos->hists.stats.total_period; + } + } nr_samples = convert_unit(nr_samples, &unit); printed = scnprintf(bf, size, @@ -1135,6 +1241,96 @@ static inline bool is_report_browser(void *timer) return timer == NULL; } +/* + * Only runtime switching of perf data file will make "input_name" point + * to a malloced buffer. So add "is_input_name_malloced" flag to decide + * whether we need to call free() for current "input_name" during the switch. + */ +static bool is_input_name_malloced = false; + +static int switch_data_file(void) +{ + char *pwd, *options[32], *abs_path[32], *tmp; + DIR *pwd_dir; + int nr_options = 0, choice = -1, ret = -1; + struct dirent *dent; + + pwd = getenv("PWD"); + if (!pwd) + return ret; + + pwd_dir = opendir(pwd); + if (!pwd_dir) + return ret; + + memset(options, 0, sizeof(options)); + memset(options, 0, sizeof(abs_path)); + + while ((dent = readdir(pwd_dir))) { + char path[PATH_MAX]; + u64 magic; + char *name = dent->d_name; + FILE *file; + + if (!(dent->d_type == DT_REG)) + continue; + + snprintf(path, sizeof(path), "%s/%s", pwd, name); + + file = fopen(path, "r"); + if (!file) + continue; + + if (fread(&magic, 1, 8, file) < 8) + goto close_file_and_continue; + + if (is_perf_magic(magic)) { + options[nr_options] = strdup(name); + if (!options[nr_options]) + goto close_file_and_continue; + + abs_path[nr_options] = strdup(path); + if (!abs_path[nr_options]) { + free(options[nr_options]); + ui__warning("Can't search all data files due to memory shortage.\n"); + fclose(file); + break; + } + + nr_options++; + } + +close_file_and_continue: + fclose(file); + if (nr_options >= 32) { + ui__warning("Too many perf data files in PWD!\n" + "Only the first 32 files will be listed.\n"); + break; + } + } + closedir(pwd_dir); + + if (nr_options) { + choice = ui__popup_menu(nr_options, options); + if (choice < nr_options && choice >= 0) { + tmp = strdup(abs_path[choice]); + if (tmp) { + if (is_input_name_malloced) + free((void *)input_name); + input_name = tmp; + is_input_name_malloced = true; + ret = 0; + } else + ui__warning("Data switch failed due to memory shortage!\n"); + } + } + + free_popup_options(options, nr_options); + free_popup_options(abs_path, nr_options); + return ret; +} + + static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, const char *helpline, const char *ev_name, bool left_exits, @@ -1169,7 +1365,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, int choice = 0, annotate = -2, zoom_dso = -2, zoom_thread = -2, annotate_f = -2, annotate_t = -2, browse_map = -2; - int scripts_comm = -2, scripts_symbol = -2, scripts_all = -2; + int scripts_comm = -2, scripts_symbol = -2, + scripts_all = -2, switch_data = -2; nr_options = 0; @@ -1226,6 +1423,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (is_report_browser(hbt)) goto do_scripts; continue; + case 's': + if (is_report_browser(hbt)) + goto do_data_switch; + continue; case K_F1: case 'h': case '?': @@ -1245,6 +1446,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "d Zoom into current DSO\n" "t Zoom into current Thread\n" "r Run available scripts('perf report' only)\n" + "s Switch to another data file in PWD ('perf report' only)\n" "P Print histograms to perf.hist.N\n" "V Verbose (DSO names in callchains, etc)\n" "/ Filter symbol by name"); @@ -1352,6 +1554,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (asprintf(&options[nr_options], "Run scripts for all samples") > 0) scripts_all = nr_options++; + if (is_report_browser(hbt) && asprintf(&options[nr_options], + "Switch to another data file in PWD") > 0) + switch_data = nr_options++; add_exit_option: options[nr_options++] = (char *)"Exit"; retry_popup_menu: @@ -1462,6 +1667,16 @@ do_scripts: script_browse(script_opt); } + /* Switch to another data file */ + else if (choice == switch_data) { +do_data_switch: + if (!switch_data_file()) { + key = K_SWITCH_INPUT_DATA; + break; + } else + ui__warning("Won't switch the data files due to\n" + "no valid data file get selected!\n"); + } } out_free_stack: pstack__delete(fstack); @@ -1494,6 +1709,16 @@ static void perf_evsel_menu__write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); + if (symbol_conf.event_group && evsel->nr_members > 1) { + struct perf_evsel *pos; + + ev_name = perf_evsel__group_name(evsel); + + for_each_group_member(pos, evsel) { + nr_events += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE]; + } + } + nr_events = convert_unit(nr_events, &unit); printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, unit, unit == ' ' ? "" : " ", ev_name); @@ -1578,6 +1803,7 @@ browse_hists: "Do you really want to exit?")) continue; /* Fall thru */ + case K_SWITCH_INPUT_DATA: case 'q': case CTRL('c'): goto out; @@ -1604,8 +1830,19 @@ out: return key; } +static bool filter_group_entries(struct ui_browser *self __maybe_unused, + void *entry) +{ + struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node); + + if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel)) + return true; + + return false; +} + static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, - const char *help, + int nr_entries, const char *help, struct hist_browser_timer *hbt, struct perf_session_env *env) { @@ -1616,7 +1853,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, .refresh = ui_browser__list_head_refresh, .seek = ui_browser__list_head_seek, .write = perf_evsel_menu__write, - .nr_entries = evlist->nr_entries, + .filter = filter_group_entries, + .nr_entries = nr_entries, .priv = evlist, }, .env = env, @@ -1632,20 +1870,37 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, menu.b.width = line_len; } - return perf_evsel_menu__run(&menu, evlist->nr_entries, help, hbt); + return perf_evsel_menu__run(&menu, nr_entries, help, hbt); } int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, struct perf_session_env *env) { - if (evlist->nr_entries == 1) { + int nr_entries = evlist->nr_entries; + +single_entry: + if (nr_entries == 1) { struct perf_evsel *first = list_entry(evlist->entries.next, struct perf_evsel, node); const char *ev_name = perf_evsel__name(first); - return perf_evsel__hists_browse(first, evlist->nr_entries, help, + + return perf_evsel__hists_browse(first, nr_entries, help, ev_name, false, hbt, env); } - return __perf_evlist__tui_browse_hists(evlist, help, hbt, env); + if (symbol_conf.event_group) { + struct perf_evsel *pos; + + nr_entries = 0; + list_for_each_entry(pos, &evlist->entries, node) + if (perf_evsel__is_group_leader(pos)) + nr_entries++; + + if (nr_entries == 1) + goto single_entry; + } + + return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, + hbt, env); } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c new file mode 100644 index 000000000000..7d8dc581a545 --- /dev/null +++ b/tools/perf/ui/gtk/annotate.c @@ -0,0 +1,229 @@ +#include "gtk.h" +#include "util/debug.h" +#include "util/annotate.h" +#include "ui/helpline.h" + + +enum { + ANN_COL__PERCENT, + ANN_COL__OFFSET, + ANN_COL__LINE, + + MAX_ANN_COLS +}; + +static const char *const col_names[] = { + "Overhead", + "Offset", + "Line" +}; + +static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym, + struct disasm_line *dl, int evidx) +{ + struct sym_hist *symhist; + double percent = 0.0; + const char *markup; + int ret = 0; + + strcpy(buf, ""); + + if (dl->offset == (s64) -1) + return 0; + + symhist = annotation__histogram(symbol__annotation(sym), evidx); + if (!symhist->addr[dl->offset]) + return 0; + + percent = 100.0 * symhist->addr[dl->offset] / symhist->sum; + + markup = perf_gtk__get_percent_color(percent); + if (markup) + ret += scnprintf(buf, size, "%s", markup); + ret += scnprintf(buf + ret, size - ret, "%6.2f%%", percent); + if (markup) + ret += scnprintf(buf + ret, size - ret, "</span>"); + + return ret; +} + +static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym, + struct map *map, struct disasm_line *dl) +{ + u64 start = map__rip_2objdump(map, sym->start); + + strcpy(buf, ""); + + if (dl->offset == (s64) -1) + return 0; + + return scnprintf(buf, size, "%"PRIx64, start + dl->offset); +} + +static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl) +{ + int ret = 0; + char *line = g_markup_escape_text(dl->line, -1); + const char *markup = "<span fgcolor='gray'>"; + + strcpy(buf, ""); + + if (!line) + return 0; + + if (dl->offset != (s64) -1) + markup = NULL; + + if (markup) + ret += scnprintf(buf, size, "%s", markup); + ret += scnprintf(buf + ret, size - ret, "%s", line); + if (markup) + ret += scnprintf(buf + ret, size - ret, "</span>"); + + g_free(line); + return ret; +} + +static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, + struct map *map, int evidx, + struct hist_browser_timer *hbt __maybe_unused) +{ + struct disasm_line *pos, *n; + struct annotation *notes; + GType col_types[MAX_ANN_COLS]; + GtkCellRenderer *renderer; + GtkListStore *store; + GtkWidget *view; + int i; + char s[512]; + + notes = symbol__annotation(sym); + + for (i = 0; i < MAX_ANN_COLS; i++) { + col_types[i] = G_TYPE_STRING; + } + store = gtk_list_store_newv(MAX_ANN_COLS, col_types); + + view = gtk_tree_view_new(); + renderer = gtk_cell_renderer_text_new(); + + for (i = 0; i < MAX_ANN_COLS; i++) { + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, col_names[i], renderer, "markup", + i, NULL); + } + + gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); + g_object_unref(GTK_TREE_MODEL(store)); + + list_for_each_entry(pos, ¬es->src->source, node) { + GtkTreeIter iter; + + gtk_list_store_append(store, &iter); + + if (perf_gtk__get_percent(s, sizeof(s), sym, pos, evidx)) + gtk_list_store_set(store, &iter, ANN_COL__PERCENT, s, -1); + if (perf_gtk__get_offset(s, sizeof(s), sym, map, pos)) + gtk_list_store_set(store, &iter, ANN_COL__OFFSET, s, -1); + if (perf_gtk__get_line(s, sizeof(s), pos)) + gtk_list_store_set(store, &iter, ANN_COL__LINE, s, -1); + } + + gtk_container_add(GTK_CONTAINER(window), view); + + list_for_each_entry_safe(pos, n, ¬es->src->source, node) { + list_del(&pos->node); + disasm_line__free(pos); + } + + return 0; +} + +int symbol__gtk_annotate(struct symbol *sym, struct map *map, int evidx, + struct hist_browser_timer *hbt) +{ + GtkWidget *window; + GtkWidget *notebook; + GtkWidget *scrolled_window; + GtkWidget *tab_label; + + if (map->dso->annotate_warned) + return -1; + + if (symbol__annotate(sym, map, 0) < 0) { + ui__error("%s", ui_helpline__current); + return -1; + } + + if (perf_gtk__is_active_context(pgctx)) { + window = pgctx->main_window; + notebook = pgctx->notebook; + } else { + GtkWidget *vbox; + GtkWidget *infobar; + GtkWidget *statbar; + + signal(SIGSEGV, perf_gtk__signal); + signal(SIGFPE, perf_gtk__signal); + signal(SIGINT, perf_gtk__signal); + signal(SIGQUIT, perf_gtk__signal); + signal(SIGTERM, perf_gtk__signal); + + window = gtk_window_new(GTK_WINDOW_TOPLEVEL); + gtk_window_set_title(GTK_WINDOW(window), "perf annotate"); + + g_signal_connect(window, "delete_event", gtk_main_quit, NULL); + + pgctx = perf_gtk__activate_context(window); + if (!pgctx) + return -1; + + vbox = gtk_vbox_new(FALSE, 0); + notebook = gtk_notebook_new(); + pgctx->notebook = notebook; + + gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0); + + infobar = perf_gtk__setup_info_bar(); + if (infobar) { + gtk_box_pack_start(GTK_BOX(vbox), infobar, + FALSE, FALSE, 0); + } + + statbar = perf_gtk__setup_statusbar(); + gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0); + + gtk_container_add(GTK_CONTAINER(window), vbox); + } + + scrolled_window = gtk_scrolled_window_new(NULL, NULL); + tab_label = gtk_label_new(sym->name); + + gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window), + GTK_POLICY_AUTOMATIC, + GTK_POLICY_AUTOMATIC); + + gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, + tab_label); + + perf_gtk__annotate_symbol(scrolled_window, sym, map, evidx, hbt); + return 0; +} + +void perf_gtk__show_annotations(void) +{ + GtkWidget *window; + + if (!perf_gtk__is_active_context(pgctx)) + return; + + window = pgctx->main_window; + gtk_widget_show_all(window); + + perf_gtk__resize_window(window); + gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER); + + gtk_main(); + + perf_gtk__deactivate_context(&pgctx); +} diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index 253b6219a39e..c95012cdb438 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -8,15 +8,13 @@ #include <signal.h> -#define MAX_COLUMNS 32 - -static void perf_gtk__signal(int sig) +void perf_gtk__signal(int sig) { perf_gtk__exit(false); psignal(sig, "perf"); } -static void perf_gtk__resize_window(GtkWidget *window) +void perf_gtk__resize_window(GtkWidget *window) { GdkRectangle rect; GdkScreen *screen; @@ -36,7 +34,7 @@ static void perf_gtk__resize_window(GtkWidget *window) gtk_window_resize(GTK_WINDOW(window), width, height); } -static const char *perf_gtk__get_percent_color(double percent) +const char *perf_gtk__get_percent_color(double percent) { if (percent >= MIN_RED) return "<span fgcolor='red'>"; @@ -45,155 +43,8 @@ static const char *perf_gtk__get_percent_color(double percent) return NULL; } -#define HPP__COLOR_FN(_name, _field) \ -static int perf_gtk__hpp_color_ ## _name(struct perf_hpp *hpp, \ - struct hist_entry *he) \ -{ \ - struct hists *hists = he->hists; \ - double percent = 100.0 * he->stat._field / hists->stats.total_period; \ - const char *markup; \ - int ret = 0; \ - \ - markup = perf_gtk__get_percent_color(percent); \ - if (markup) \ - ret += scnprintf(hpp->buf, hpp->size, "%s", markup); \ - ret += scnprintf(hpp->buf + ret, hpp->size - ret, "%6.2f%%", percent); \ - if (markup) \ - ret += scnprintf(hpp->buf + ret, hpp->size - ret, "</span>"); \ - \ - return ret; \ -} - -HPP__COLOR_FN(overhead, period) -HPP__COLOR_FN(overhead_sys, period_sys) -HPP__COLOR_FN(overhead_us, period_us) -HPP__COLOR_FN(overhead_guest_sys, period_guest_sys) -HPP__COLOR_FN(overhead_guest_us, period_guest_us) - -#undef HPP__COLOR_FN - -void perf_gtk__init_hpp(void) -{ - perf_hpp__init(); - - perf_hpp__format[PERF_HPP__OVERHEAD].color = - perf_gtk__hpp_color_overhead; - perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color = - perf_gtk__hpp_color_overhead_sys; - perf_hpp__format[PERF_HPP__OVERHEAD_US].color = - perf_gtk__hpp_color_overhead_us; - perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color = - perf_gtk__hpp_color_overhead_guest_sys; - perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = - perf_gtk__hpp_color_overhead_guest_us; -} - -static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) -{ - GType col_types[MAX_COLUMNS]; - GtkCellRenderer *renderer; - struct sort_entry *se; - GtkListStore *store; - struct rb_node *nd; - GtkWidget *view; - int i, col_idx; - int nr_cols; - char s[512]; - - struct perf_hpp hpp = { - .buf = s, - .size = sizeof(s), - }; - - nr_cols = 0; - - for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { - if (!perf_hpp__format[i].cond) - continue; - - col_types[nr_cols++] = G_TYPE_STRING; - } - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - - col_types[nr_cols++] = G_TYPE_STRING; - } - - store = gtk_list_store_newv(nr_cols, col_types); - - view = gtk_tree_view_new(); - - renderer = gtk_cell_renderer_text_new(); - - col_idx = 0; - - for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { - if (!perf_hpp__format[i].cond) - continue; - - perf_hpp__format[i].header(&hpp); - - gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), - -1, s, - renderer, "markup", - col_idx++, NULL); - } - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - - gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), - -1, se->se_header, - renderer, "text", - col_idx++, NULL); - } - - gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); - - g_object_unref(GTK_TREE_MODEL(store)); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - GtkTreeIter iter; - - if (h->filtered) - continue; - - gtk_list_store_append(store, &iter); - - col_idx = 0; - - for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { - if (!perf_hpp__format[i].cond) - continue; - - if (perf_hpp__format[i].color) - perf_hpp__format[i].color(&hpp, h); - else - perf_hpp__format[i].entry(&hpp, h); - - gtk_list_store_set(store, &iter, col_idx++, s, -1); - } - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - - se->se_snprintf(h, s, ARRAY_SIZE(s), - hists__col_len(hists, se->se_width_idx)); - - gtk_list_store_set(store, &iter, col_idx++, s, -1); - } - } - - gtk_container_add(GTK_CONTAINER(window), view); -} - #ifdef HAVE_GTK_INFO_BAR -static GtkWidget *perf_gtk__setup_info_bar(void) +GtkWidget *perf_gtk__setup_info_bar(void) { GtkWidget *info_bar; GtkWidget *label; @@ -220,7 +71,7 @@ static GtkWidget *perf_gtk__setup_info_bar(void) } #endif -static GtkWidget *perf_gtk__setup_statusbar(void) +GtkWidget *perf_gtk__setup_statusbar(void) { GtkWidget *stbar; unsigned ctxid; @@ -234,79 +85,3 @@ static GtkWidget *perf_gtk__setup_statusbar(void) return stbar; } - -int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, - const char *help, - struct hist_browser_timer *hbt __maybe_unused) -{ - struct perf_evsel *pos; - GtkWidget *vbox; - GtkWidget *notebook; - GtkWidget *info_bar; - GtkWidget *statbar; - GtkWidget *window; - - signal(SIGSEGV, perf_gtk__signal); - signal(SIGFPE, perf_gtk__signal); - signal(SIGINT, perf_gtk__signal); - signal(SIGQUIT, perf_gtk__signal); - signal(SIGTERM, perf_gtk__signal); - - window = gtk_window_new(GTK_WINDOW_TOPLEVEL); - - gtk_window_set_title(GTK_WINDOW(window), "perf report"); - - g_signal_connect(window, "delete_event", gtk_main_quit, NULL); - - pgctx = perf_gtk__activate_context(window); - if (!pgctx) - return -1; - - vbox = gtk_vbox_new(FALSE, 0); - - notebook = gtk_notebook_new(); - - list_for_each_entry(pos, &evlist->entries, node) { - struct hists *hists = &pos->hists; - const char *evname = perf_evsel__name(pos); - GtkWidget *scrolled_window; - GtkWidget *tab_label; - - scrolled_window = gtk_scrolled_window_new(NULL, NULL); - - gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window), - GTK_POLICY_AUTOMATIC, - GTK_POLICY_AUTOMATIC); - - perf_gtk__show_hists(scrolled_window, hists); - - tab_label = gtk_label_new(evname); - - gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label); - } - - gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0); - - info_bar = perf_gtk__setup_info_bar(); - if (info_bar) - gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0); - - statbar = perf_gtk__setup_statusbar(); - gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0); - - gtk_container_add(GTK_CONTAINER(window), vbox); - - gtk_widget_show_all(window); - - perf_gtk__resize_window(window); - - gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER); - - ui_helpline__push(help); - - gtk_main(); - - perf_gtk__deactivate_context(&pgctx); - - return 0; -} diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index 856320e2cc05..3d96785ef155 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -10,6 +10,7 @@ struct perf_gtk_context { GtkWidget *main_window; + GtkWidget *notebook; #ifdef HAVE_GTK_INFO_BAR GtkWidget *info_bar; @@ -33,7 +34,14 @@ void perf_gtk__init_helpline(void); void perf_gtk__init_progress(void); void perf_gtk__init_hpp(void); -#ifndef HAVE_GTK_INFO_BAR +void perf_gtk__signal(int sig); +void perf_gtk__resize_window(GtkWidget *window); +const char *perf_gtk__get_percent_color(double percent); +GtkWidget *perf_gtk__setup_statusbar(void); + +#ifdef HAVE_GTK_INFO_BAR +GtkWidget *perf_gtk__setup_info_bar(void); +#else static inline GtkWidget *perf_gtk__setup_info_bar(void) { return NULL; diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c index 5db4432ff12a..3388cbd12186 100644 --- a/tools/perf/ui/gtk/helpline.c +++ b/tools/perf/ui/gtk/helpline.c @@ -24,17 +24,7 @@ static void gtk_helpline_push(const char *msg) pgctx->statbar_ctx_id, msg); } -static struct ui_helpline gtk_helpline_fns = { - .pop = gtk_helpline_pop, - .push = gtk_helpline_push, -}; - -void perf_gtk__init_helpline(void) -{ - helpline_fns = >k_helpline_fns; -} - -int perf_gtk__show_helpline(const char *fmt, va_list ap) +static int gtk_helpline_show(const char *fmt, va_list ap) { int ret; char *ptr; @@ -54,3 +44,14 @@ int perf_gtk__show_helpline(const char *fmt, va_list ap) return ret; } + +static struct ui_helpline gtk_helpline_fns = { + .pop = gtk_helpline_pop, + .push = gtk_helpline_push, + .show = gtk_helpline_show, +}; + +void perf_gtk__init_helpline(void) +{ + helpline_fns = >k_helpline_fns; +} diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c new file mode 100644 index 000000000000..1e764a8ad259 --- /dev/null +++ b/tools/perf/ui/gtk/hists.c @@ -0,0 +1,312 @@ +#include "../evlist.h" +#include "../cache.h" +#include "../evsel.h" +#include "../sort.h" +#include "../hist.h" +#include "../helpline.h" +#include "gtk.h" + +#define MAX_COLUMNS 32 + +static int __percent_color_snprintf(char *buf, size_t size, double percent) +{ + int ret = 0; + const char *markup; + + markup = perf_gtk__get_percent_color(percent); + if (markup) + ret += scnprintf(buf, size, markup); + + ret += scnprintf(buf + ret, size - ret, " %6.2f%%", percent); + + if (markup) + ret += scnprintf(buf + ret, size - ret, "</span>"); + + return ret; +} + + +static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he, + u64 (*get_field)(struct hist_entry *)) +{ + int ret; + double percent = 0.0; + struct hists *hists = he->hists; + + if (hists->stats.total_period) + percent = 100.0 * get_field(he) / hists->stats.total_period; + + ret = __percent_color_snprintf(hpp->buf, hpp->size, percent); + + if (symbol_conf.event_group) { + int prev_idx, idx_delta; + struct perf_evsel *evsel = hists_to_evsel(hists); + struct hist_entry *pair; + int nr_members = evsel->nr_members; + + if (nr_members <= 1) + return ret; + + prev_idx = perf_evsel__group_idx(evsel); + + list_for_each_entry(pair, &he->pairs.head, pairs.node) { + u64 period = get_field(pair); + u64 total = pair->hists->stats.total_period; + + evsel = hists_to_evsel(pair->hists); + idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1; + + while (idx_delta--) { + /* + * zero-fill group members in the middle which + * have no sample + */ + ret += __percent_color_snprintf(hpp->buf + ret, + hpp->size - ret, + 0.0); + } + + percent = 100.0 * period / total; + ret += __percent_color_snprintf(hpp->buf + ret, + hpp->size - ret, + percent); + + prev_idx = perf_evsel__group_idx(evsel); + } + + idx_delta = nr_members - prev_idx - 1; + + while (idx_delta--) { + /* + * zero-fill group members at last which have no sample + */ + ret += __percent_color_snprintf(hpp->buf + ret, + hpp->size - ret, + 0.0); + } + } + return ret; +} + +#define __HPP_COLOR_PERCENT_FN(_type, _field) \ +static u64 he_get_##_field(struct hist_entry *he) \ +{ \ + return he->stat._field; \ +} \ + \ +static int perf_gtk__hpp_color_##_type(struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + return __hpp__color_fmt(hpp, he, he_get_##_field); \ +} + +__HPP_COLOR_PERCENT_FN(overhead, period) +__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) +__HPP_COLOR_PERCENT_FN(overhead_us, period_us) +__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) +__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) + +#undef __HPP_COLOR_PERCENT_FN + + +void perf_gtk__init_hpp(void) +{ + perf_hpp__column_enable(PERF_HPP__OVERHEAD); + + perf_hpp__init(); + + perf_hpp__format[PERF_HPP__OVERHEAD].color = + perf_gtk__hpp_color_overhead; + perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color = + perf_gtk__hpp_color_overhead_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_US].color = + perf_gtk__hpp_color_overhead_us; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color = + perf_gtk__hpp_color_overhead_guest_sys; + perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = + perf_gtk__hpp_color_overhead_guest_us; +} + +static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists) +{ + struct perf_hpp_fmt *fmt; + GType col_types[MAX_COLUMNS]; + GtkCellRenderer *renderer; + struct sort_entry *se; + GtkListStore *store; + struct rb_node *nd; + GtkWidget *view; + int col_idx; + int nr_cols; + char s[512]; + + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = hists_to_evsel(hists), + }; + + nr_cols = 0; + + perf_hpp__for_each_format(fmt) + col_types[nr_cols++] = G_TYPE_STRING; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + col_types[nr_cols++] = G_TYPE_STRING; + } + + store = gtk_list_store_newv(nr_cols, col_types); + + view = gtk_tree_view_new(); + + renderer = gtk_cell_renderer_text_new(); + + col_idx = 0; + + perf_hpp__for_each_format(fmt) { + fmt->header(&hpp); + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, ltrim(s), + renderer, "markup", + col_idx++, NULL); + } + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, se->se_header, + renderer, "text", + col_idx++, NULL); + } + + gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); + + g_object_unref(GTK_TREE_MODEL(store)); + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + GtkTreeIter iter; + + if (h->filtered) + continue; + + gtk_list_store_append(store, &iter); + + col_idx = 0; + + perf_hpp__for_each_format(fmt) { + if (fmt->color) + fmt->color(&hpp, h); + else + fmt->entry(&hpp, h); + + gtk_list_store_set(store, &iter, col_idx++, s, -1); + } + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + se->se_snprintf(h, s, ARRAY_SIZE(s), + hists__col_len(hists, se->se_width_idx)); + + gtk_list_store_set(store, &iter, col_idx++, s, -1); + } + } + + gtk_container_add(GTK_CONTAINER(window), view); +} + +int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, + const char *help, + struct hist_browser_timer *hbt __maybe_unused) +{ + struct perf_evsel *pos; + GtkWidget *vbox; + GtkWidget *notebook; + GtkWidget *info_bar; + GtkWidget *statbar; + GtkWidget *window; + + signal(SIGSEGV, perf_gtk__signal); + signal(SIGFPE, perf_gtk__signal); + signal(SIGINT, perf_gtk__signal); + signal(SIGQUIT, perf_gtk__signal); + signal(SIGTERM, perf_gtk__signal); + + window = gtk_window_new(GTK_WINDOW_TOPLEVEL); + + gtk_window_set_title(GTK_WINDOW(window), "perf report"); + + g_signal_connect(window, "delete_event", gtk_main_quit, NULL); + + pgctx = perf_gtk__activate_context(window); + if (!pgctx) + return -1; + + vbox = gtk_vbox_new(FALSE, 0); + + notebook = gtk_notebook_new(); + + gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0); + + info_bar = perf_gtk__setup_info_bar(); + if (info_bar) + gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0); + + statbar = perf_gtk__setup_statusbar(); + gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0); + + gtk_container_add(GTK_CONTAINER(window), vbox); + + list_for_each_entry(pos, &evlist->entries, node) { + struct hists *hists = &pos->hists; + const char *evname = perf_evsel__name(pos); + GtkWidget *scrolled_window; + GtkWidget *tab_label; + char buf[512]; + size_t size = sizeof(buf); + + if (symbol_conf.event_group) { + if (!perf_evsel__is_group_leader(pos)) + continue; + + if (pos->nr_members > 1) { + perf_evsel__group_desc(pos, buf, size); + evname = buf; + } + } + + scrolled_window = gtk_scrolled_window_new(NULL, NULL); + + gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window), + GTK_POLICY_AUTOMATIC, + GTK_POLICY_AUTOMATIC); + + perf_gtk__show_hists(scrolled_window, hists); + + tab_label = gtk_label_new(evname); + + gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label); + } + + gtk_widget_show_all(window); + + perf_gtk__resize_window(window); + + gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER); + + ui_helpline__push(help); + + gtk_main(); + + perf_gtk__deactivate_context(&pgctx); + + return 0; +} diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index a49bcf3c190b..700fb3cfa1c7 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -16,9 +16,16 @@ static void nop_helpline__push(const char *msg __maybe_unused) { } +static int nop_helpline__show(const char *fmt __maybe_unused, + va_list ap __maybe_unused) +{ + return 0; +} + static struct ui_helpline default_helpline_fns = { .pop = nop_helpline__pop, .push = nop_helpline__push, + .show = nop_helpline__show, }; struct ui_helpline *helpline_fns = &default_helpline_fns; @@ -59,3 +66,8 @@ void ui_helpline__puts(const char *msg) ui_helpline__pop(); ui_helpline__push(msg); } + +int ui_helpline__vshow(const char *fmt, va_list ap) +{ + return helpline_fns->show(fmt, ap); +} diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h index baa28a4d16b9..46181f4fc07e 100644 --- a/tools/perf/ui/helpline.h +++ b/tools/perf/ui/helpline.h @@ -9,6 +9,7 @@ struct ui_helpline { void (*pop)(void); void (*push)(const char *msg); + int (*show)(const char *fmt, va_list ap); }; extern struct ui_helpline *helpline_fns; @@ -20,28 +21,9 @@ void ui_helpline__push(const char *msg); void ui_helpline__vpush(const char *fmt, va_list ap); void ui_helpline__fpush(const char *fmt, ...); void ui_helpline__puts(const char *msg); +int ui_helpline__vshow(const char *fmt, va_list ap); extern char ui_helpline__current[512]; - -#ifdef NEWT_SUPPORT extern char ui_helpline__last_msg[]; -int ui_helpline__show_help(const char *format, va_list ap); -#else -static inline int ui_helpline__show_help(const char *format __maybe_unused, - va_list ap __maybe_unused) -{ - return 0; -} -#endif /* NEWT_SUPPORT */ - -#ifdef GTK2_SUPPORT -int perf_gtk__show_helpline(const char *format, va_list ap); -#else -static inline int perf_gtk__show_helpline(const char *format __maybe_unused, - va_list ap __maybe_unused) -{ - return 0; -} -#endif /* GTK2_SUPPORT */ #endif /* _PERF_UI_HELPLINE_H_ */ diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index aa84130024d5..d671e63aa351 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -3,151 +3,163 @@ #include "../util/hist.h" #include "../util/util.h" #include "../util/sort.h" - +#include "../util/evsel.h" /* hist period print (hpp) functions */ -static int hpp__header_overhead(struct perf_hpp *hpp) -{ - return scnprintf(hpp->buf, hpp->size, "Overhead"); -} - -static int hpp__width_overhead(struct perf_hpp *hpp __maybe_unused) -{ - return 8; -} - -static int hpp__color_overhead(struct perf_hpp *hpp, struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period / hists->stats.total_period; - return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent); -} +typedef int (*hpp_snprint_fn)(char *buf, size_t size, const char *fmt, ...); -static int hpp__entry_overhead(struct perf_hpp *hpp, struct hist_entry *he) +static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, + u64 (*get_field)(struct hist_entry *), + const char *fmt, hpp_snprint_fn print_fn, + bool fmt_percent) { + int ret; struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period / hists->stats.total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%%"; - - return scnprintf(hpp->buf, hpp->size, fmt, percent); -} -static int hpp__header_overhead_sys(struct perf_hpp *hpp) -{ - const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; - - return scnprintf(hpp->buf, hpp->size, fmt, "sys"); -} + if (fmt_percent) { + double percent = 0.0; -static int hpp__width_overhead_sys(struct perf_hpp *hpp __maybe_unused) -{ - return 7; -} + if (hists->stats.total_period) + percent = 100.0 * get_field(he) / + hists->stats.total_period; -static int hpp__color_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period_sys / hists->stats.total_period; + ret = print_fn(hpp->buf, hpp->size, fmt, percent); + } else + ret = print_fn(hpp->buf, hpp->size, fmt, get_field(he)); - return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent); -} + if (symbol_conf.event_group) { + int prev_idx, idx_delta; + struct perf_evsel *evsel = hists_to_evsel(hists); + struct hist_entry *pair; + int nr_members = evsel->nr_members; -static int hpp__entry_overhead_sys(struct perf_hpp *hpp, struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period_sys / hists->stats.total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%"; + if (nr_members <= 1) + return ret; - return scnprintf(hpp->buf, hpp->size, fmt, percent); -} + prev_idx = perf_evsel__group_idx(evsel); -static int hpp__header_overhead_us(struct perf_hpp *hpp) -{ - const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; + list_for_each_entry(pair, &he->pairs.head, pairs.node) { + u64 period = get_field(pair); + u64 total = pair->hists->stats.total_period; - return scnprintf(hpp->buf, hpp->size, fmt, "user"); -} + if (!total) + continue; -static int hpp__width_overhead_us(struct perf_hpp *hpp __maybe_unused) -{ - return 7; -} + evsel = hists_to_evsel(pair->hists); + idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1; -static int hpp__color_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period_us / hists->stats.total_period; + while (idx_delta--) { + /* + * zero-fill group members in the middle which + * have no sample + */ + ret += print_fn(hpp->buf + ret, hpp->size - ret, + fmt, 0); + } - return percent_color_snprintf(hpp->buf, hpp->size, "%6.2f%%", percent); -} + if (fmt_percent) + ret += print_fn(hpp->buf + ret, hpp->size - ret, + fmt, 100.0 * period / total); + else + ret += print_fn(hpp->buf + ret, hpp->size - ret, + fmt, period); -static int hpp__entry_overhead_us(struct perf_hpp *hpp, struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period_us / hists->stats.total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%"; - - return scnprintf(hpp->buf, hpp->size, fmt, percent); -} - -static int hpp__header_overhead_guest_sys(struct perf_hpp *hpp) -{ - return scnprintf(hpp->buf, hpp->size, "guest sys"); -} - -static int hpp__width_overhead_guest_sys(struct perf_hpp *hpp __maybe_unused) -{ - return 9; -} - -static int hpp__color_overhead_guest_sys(struct perf_hpp *hpp, - struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period_guest_sys / hists->stats.total_period; - - return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent); -} - -static int hpp__entry_overhead_guest_sys(struct perf_hpp *hpp, - struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period_guest_sys / hists->stats.total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% "; - - return scnprintf(hpp->buf, hpp->size, fmt, percent); -} - -static int hpp__header_overhead_guest_us(struct perf_hpp *hpp) -{ - return scnprintf(hpp->buf, hpp->size, "guest usr"); -} + prev_idx = perf_evsel__group_idx(evsel); + } -static int hpp__width_overhead_guest_us(struct perf_hpp *hpp __maybe_unused) -{ - return 9; -} + idx_delta = nr_members - prev_idx - 1; -static int hpp__color_overhead_guest_us(struct perf_hpp *hpp, - struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period_guest_us / hists->stats.total_period; - - return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%% ", percent); + while (idx_delta--) { + /* + * zero-fill group members at last which have no sample + */ + ret += print_fn(hpp->buf + ret, hpp->size - ret, + fmt, 0); + } + } + return ret; } -static int hpp__entry_overhead_guest_us(struct perf_hpp *hpp, - struct hist_entry *he) -{ - struct hists *hists = he->hists; - double percent = 100.0 * he->stat.period_guest_us / hists->stats.total_period; - const char *fmt = symbol_conf.field_sep ? "%.2f" : " %6.2f%% "; +#define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ +static int hpp__header_##_type(struct perf_hpp *hpp) \ +{ \ + int len = _min_width; \ + \ + if (symbol_conf.event_group) { \ + struct perf_evsel *evsel = hpp->ptr; \ + \ + len = max(len, evsel->nr_members * _unit_width); \ + } \ + return scnprintf(hpp->buf, hpp->size, "%*s", len, _str); \ +} + +#define __HPP_WIDTH_FN(_type, _min_width, _unit_width) \ +static int hpp__width_##_type(struct perf_hpp *hpp __maybe_unused) \ +{ \ + int len = _min_width; \ + \ + if (symbol_conf.event_group) { \ + struct perf_evsel *evsel = hpp->ptr; \ + \ + len = max(len, evsel->nr_members * _unit_width); \ + } \ + return len; \ +} + +#define __HPP_COLOR_PERCENT_FN(_type, _field) \ +static u64 he_get_##_field(struct hist_entry *he) \ +{ \ + return he->stat._field; \ +} \ + \ +static int hpp__color_##_type(struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \ + (hpp_snprint_fn)percent_color_snprintf, true); \ +} + +#define __HPP_ENTRY_PERCENT_FN(_type, _field) \ +static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \ + return __hpp__fmt(hpp, he, he_get_##_field, fmt, \ + scnprintf, true); \ +} + +#define __HPP_ENTRY_RAW_FN(_type, _field) \ +static u64 he_get_raw_##_field(struct hist_entry *he) \ +{ \ + return he->stat._field; \ +} \ + \ +static int hpp__entry_##_type(struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + const char *fmt = symbol_conf.field_sep ? " %"PRIu64 : " %11"PRIu64; \ + return __hpp__fmt(hpp, he, he_get_raw_##_field, fmt, scnprintf, false); \ +} + +#define HPP_PERCENT_FNS(_type, _str, _field, _min_width, _unit_width) \ +__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ +__HPP_WIDTH_FN(_type, _min_width, _unit_width) \ +__HPP_COLOR_PERCENT_FN(_type, _field) \ +__HPP_ENTRY_PERCENT_FN(_type, _field) + +#define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \ +__HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ +__HPP_WIDTH_FN(_type, _min_width, _unit_width) \ +__HPP_ENTRY_RAW_FN(_type, _field) + + +HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8) +HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8) +HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8) +HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8) +HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8) + +HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12) +HPP_RAW_FNS(period, "Period", period, 12, 12) - return scnprintf(hpp->buf, hpp->size, fmt, percent); -} static int hpp__header_baseline(struct perf_hpp *hpp) { @@ -179,7 +191,7 @@ static int hpp__color_baseline(struct perf_hpp *hpp, struct hist_entry *he) { double percent = baseline_percent(he); - if (hist_entry__has_pairs(he)) + if (hist_entry__has_pairs(he) || symbol_conf.field_sep) return percent_color_snprintf(hpp->buf, hpp->size, " %6.2f%%", percent); else return scnprintf(hpp->buf, hpp->size, " "); @@ -196,44 +208,6 @@ static int hpp__entry_baseline(struct perf_hpp *hpp, struct hist_entry *he) return scnprintf(hpp->buf, hpp->size, " "); } -static int hpp__header_samples(struct perf_hpp *hpp) -{ - const char *fmt = symbol_conf.field_sep ? "%s" : "%11s"; - - return scnprintf(hpp->buf, hpp->size, fmt, "Samples"); -} - -static int hpp__width_samples(struct perf_hpp *hpp __maybe_unused) -{ - return 11; -} - -static int hpp__entry_samples(struct perf_hpp *hpp, struct hist_entry *he) -{ - const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%11" PRIu64; - - return scnprintf(hpp->buf, hpp->size, fmt, he->stat.nr_events); -} - -static int hpp__header_period(struct perf_hpp *hpp) -{ - const char *fmt = symbol_conf.field_sep ? "%s" : "%12s"; - - return scnprintf(hpp->buf, hpp->size, fmt, "Period"); -} - -static int hpp__width_period(struct perf_hpp *hpp __maybe_unused) -{ - return 12; -} - -static int hpp__entry_period(struct perf_hpp *hpp, struct hist_entry *he) -{ - const char *fmt = symbol_conf.field_sep ? "%" PRIu64 : "%12" PRIu64; - - return scnprintf(hpp->buf, hpp->size, fmt, he->stat.period); -} - static int hpp__header_period_baseline(struct perf_hpp *hpp) { const char *fmt = symbol_conf.field_sep ? "%s" : "%12s"; @@ -254,6 +228,7 @@ static int hpp__entry_period_baseline(struct perf_hpp *hpp, struct hist_entry *h return scnprintf(hpp->buf, hpp->size, fmt, period); } + static int hpp__header_delta(struct perf_hpp *hpp) { const char *fmt = symbol_conf.field_sep ? "%s" : "%7s"; @@ -268,14 +243,18 @@ static int hpp__width_delta(struct perf_hpp *hpp __maybe_unused) static int hpp__entry_delta(struct perf_hpp *hpp, struct hist_entry *he) { + struct hist_entry *pair = hist_entry__next_pair(he); const char *fmt = symbol_conf.field_sep ? "%s" : "%7.7s"; char buf[32] = " "; - double diff; + double diff = 0.0; - if (he->diff.computed) - diff = he->diff.period_ratio_delta; - else - diff = perf_diff__compute_delta(he); + if (pair) { + if (he->diff.computed) + diff = he->diff.period_ratio_delta; + else + diff = perf_diff__compute_delta(he, pair); + } else + diff = perf_diff__period_percent(he, he->stat.period); if (fabs(diff) >= 0.01) scnprintf(buf, sizeof(buf), "%+4.2F%%", diff); @@ -297,14 +276,17 @@ static int hpp__width_ratio(struct perf_hpp *hpp __maybe_unused) static int hpp__entry_ratio(struct perf_hpp *hpp, struct hist_entry *he) { + struct hist_entry *pair = hist_entry__next_pair(he); const char *fmt = symbol_conf.field_sep ? "%s" : "%14s"; char buf[32] = " "; - double ratio; + double ratio = 0.0; - if (he->diff.computed) - ratio = he->diff.period_ratio; - else - ratio = perf_diff__compute_ratio(he); + if (pair) { + if (he->diff.computed) + ratio = he->diff.period_ratio; + else + ratio = perf_diff__compute_ratio(he, pair); + } if (ratio > 0.0) scnprintf(buf, sizeof(buf), "%+14.6F", ratio); @@ -326,14 +308,17 @@ static int hpp__width_wdiff(struct perf_hpp *hpp __maybe_unused) static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he) { + struct hist_entry *pair = hist_entry__next_pair(he); const char *fmt = symbol_conf.field_sep ? "%s" : "%14s"; char buf[32] = " "; - s64 wdiff; + s64 wdiff = 0; - if (he->diff.computed) - wdiff = he->diff.wdiff; - else - wdiff = perf_diff__compute_wdiff(he); + if (pair) { + if (he->diff.computed) + wdiff = he->diff.wdiff; + else + wdiff = perf_diff__compute_wdiff(he, pair); + } if (wdiff != 0) scnprintf(buf, sizeof(buf), "%14ld", wdiff); @@ -341,30 +326,6 @@ static int hpp__entry_wdiff(struct perf_hpp *hpp, struct hist_entry *he) return scnprintf(hpp->buf, hpp->size, fmt, buf); } -static int hpp__header_displ(struct perf_hpp *hpp) -{ - return scnprintf(hpp->buf, hpp->size, "Displ."); -} - -static int hpp__width_displ(struct perf_hpp *hpp __maybe_unused) -{ - return 6; -} - -static int hpp__entry_displ(struct perf_hpp *hpp, - struct hist_entry *he) -{ - struct hist_entry *pair = hist_entry__next_pair(he); - long displacement = pair ? pair->position - he->position : 0; - const char *fmt = symbol_conf.field_sep ? "%s" : "%6.6s"; - char buf[32] = " "; - - if (displacement) - scnprintf(buf, sizeof(buf), "%+4ld", displacement); - - return scnprintf(hpp->buf, hpp->size, fmt, buf); -} - static int hpp__header_formula(struct perf_hpp *hpp) { const char *fmt = symbol_conf.field_sep ? "%s" : "%70s"; @@ -379,67 +340,91 @@ static int hpp__width_formula(struct perf_hpp *hpp __maybe_unused) static int hpp__entry_formula(struct perf_hpp *hpp, struct hist_entry *he) { + struct hist_entry *pair = hist_entry__next_pair(he); const char *fmt = symbol_conf.field_sep ? "%s" : "%-70s"; char buf[96] = " "; - perf_diff__formula(buf, sizeof(buf), he); + if (pair) + perf_diff__formula(he, pair, buf, sizeof(buf)); + return scnprintf(hpp->buf, hpp->size, fmt, buf); } -#define HPP__COLOR_PRINT_FNS(_name) \ - .header = hpp__header_ ## _name, \ - .width = hpp__width_ ## _name, \ - .color = hpp__color_ ## _name, \ - .entry = hpp__entry_ ## _name +#define HPP__COLOR_PRINT_FNS(_name) \ + { \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .color = hpp__color_ ## _name, \ + .entry = hpp__entry_ ## _name \ + } -#define HPP__PRINT_FNS(_name) \ - .header = hpp__header_ ## _name, \ - .width = hpp__width_ ## _name, \ - .entry = hpp__entry_ ## _name +#define HPP__PRINT_FNS(_name) \ + { \ + .header = hpp__header_ ## _name, \ + .width = hpp__width_ ## _name, \ + .entry = hpp__entry_ ## _name \ + } struct perf_hpp_fmt perf_hpp__format[] = { - { .cond = false, HPP__COLOR_PRINT_FNS(baseline) }, - { .cond = true, HPP__COLOR_PRINT_FNS(overhead) }, - { .cond = false, HPP__COLOR_PRINT_FNS(overhead_sys) }, - { .cond = false, HPP__COLOR_PRINT_FNS(overhead_us) }, - { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_sys) }, - { .cond = false, HPP__COLOR_PRINT_FNS(overhead_guest_us) }, - { .cond = false, HPP__PRINT_FNS(samples) }, - { .cond = false, HPP__PRINT_FNS(period) }, - { .cond = false, HPP__PRINT_FNS(period_baseline) }, - { .cond = false, HPP__PRINT_FNS(delta) }, - { .cond = false, HPP__PRINT_FNS(ratio) }, - { .cond = false, HPP__PRINT_FNS(wdiff) }, - { .cond = false, HPP__PRINT_FNS(displ) }, - { .cond = false, HPP__PRINT_FNS(formula) } + HPP__COLOR_PRINT_FNS(baseline), + HPP__COLOR_PRINT_FNS(overhead), + HPP__COLOR_PRINT_FNS(overhead_sys), + HPP__COLOR_PRINT_FNS(overhead_us), + HPP__COLOR_PRINT_FNS(overhead_guest_sys), + HPP__COLOR_PRINT_FNS(overhead_guest_us), + HPP__PRINT_FNS(samples), + HPP__PRINT_FNS(period), + HPP__PRINT_FNS(period_baseline), + HPP__PRINT_FNS(delta), + HPP__PRINT_FNS(ratio), + HPP__PRINT_FNS(wdiff), + HPP__PRINT_FNS(formula) }; +LIST_HEAD(perf_hpp__list); + + #undef HPP__COLOR_PRINT_FNS #undef HPP__PRINT_FNS +#undef HPP_PERCENT_FNS +#undef HPP_RAW_FNS + +#undef __HPP_HEADER_FN +#undef __HPP_WIDTH_FN +#undef __HPP_COLOR_PERCENT_FN +#undef __HPP_ENTRY_PERCENT_FN +#undef __HPP_ENTRY_RAW_FN + + void perf_hpp__init(void) { if (symbol_conf.show_cpu_utilization) { - perf_hpp__format[PERF_HPP__OVERHEAD_SYS].cond = true; - perf_hpp__format[PERF_HPP__OVERHEAD_US].cond = true; + perf_hpp__column_enable(PERF_HPP__OVERHEAD_SYS); + perf_hpp__column_enable(PERF_HPP__OVERHEAD_US); if (perf_guest) { - perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].cond = true; - perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].cond = true; + perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_SYS); + perf_hpp__column_enable(PERF_HPP__OVERHEAD_GUEST_US); } } if (symbol_conf.show_nr_samples) - perf_hpp__format[PERF_HPP__SAMPLES].cond = true; + perf_hpp__column_enable(PERF_HPP__SAMPLES); if (symbol_conf.show_total_period) - perf_hpp__format[PERF_HPP__PERIOD].cond = true; + perf_hpp__column_enable(PERF_HPP__PERIOD); +} + +void perf_hpp__column_register(struct perf_hpp_fmt *format) +{ + list_add_tail(&format->list, &perf_hpp__list); } -void perf_hpp__column_enable(unsigned col, bool enable) +void perf_hpp__column_enable(unsigned col) { BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__format[col].cond = enable; + perf_hpp__column_register(&perf_hpp__format[col]); } static inline void advance_hpp(struct perf_hpp *hpp, int inc) @@ -452,27 +437,29 @@ int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he, bool color) { const char *sep = symbol_conf.field_sep; + struct perf_hpp_fmt *fmt; char *start = hpp->buf; - int i, ret; + int ret; bool first = true; if (symbol_conf.exclude_other && !he->parent) return 0; - for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { - if (!perf_hpp__format[i].cond) - continue; - + perf_hpp__for_each_format(fmt) { + /* + * If there's no field_sep, we still need + * to display initial ' '. + */ if (!sep || !first) { ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " "); advance_hpp(hpp, ret); + } else first = false; - } - if (color && perf_hpp__format[i].color) - ret = perf_hpp__format[i].color(hpp, he); + if (color && fmt->color) + ret = fmt->color(hpp, he); else - ret = perf_hpp__format[i].entry(hpp, he); + ret = fmt->entry(hpp, he); advance_hpp(hpp, ret); } @@ -504,16 +491,18 @@ int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size, */ unsigned int hists__sort_list_width(struct hists *hists) { + struct perf_hpp_fmt *fmt; struct sort_entry *se; - int i, ret = 0; + int i = 0, ret = 0; + struct perf_hpp dummy_hpp = { + .ptr = hists_to_evsel(hists), + }; - for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { - if (!perf_hpp__format[i].cond) - continue; + perf_hpp__for_each_format(fmt) { if (i) ret += 2; - ret += perf_hpp__format[i].width(NULL); + ret += fmt->width(&dummy_hpp); } list_for_each_entry(se, &hist_entry__sort_list, list) diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h index 809eca5707fa..65092d576b4e 100644 --- a/tools/perf/ui/keysyms.h +++ b/tools/perf/ui/keysyms.h @@ -23,5 +23,6 @@ #define K_TIMER -1 #define K_ERROR -2 #define K_RESIZE -3 +#define K_SWITCH_INPUT_DATA -4 #endif /* _PERF_KEYSYMS_H_ */ diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index ebb4cc107876..ae6a789cb0f6 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -8,7 +8,7 @@ pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; void setup_browser(bool fallback_to_pager) { - if (!isatty(1) || dump_trace) + if (use_browser < 2 && (!isatty(1) || dump_trace)) use_browser = 0; /* default to TUI */ @@ -30,6 +30,7 @@ void setup_browser(bool fallback_to_pager) if (fallback_to_pager) setup_pager(); + perf_hpp__column_enable(PERF_HPP__OVERHEAD); perf_hpp__init(); break; } diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index f0ee204f99bb..ff1f60cf442e 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -3,6 +3,7 @@ #include "../../util/util.h" #include "../../util/hist.h" #include "../../util/sort.h" +#include "../../util/evsel.h" static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) @@ -335,17 +336,19 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, FILE *fp) { + struct perf_hpp_fmt *fmt; struct sort_entry *se; struct rb_node *nd; size_t ret = 0; unsigned int width; const char *sep = symbol_conf.field_sep; const char *col_width = symbol_conf.col_width_list_str; - int idx, nr_rows = 0; + int nr_rows = 0; char bf[96]; struct perf_hpp dummy_hpp = { .buf = bf, .size = sizeof(bf), + .ptr = hists_to_evsel(hists), }; bool first = true; @@ -355,16 +358,14 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, goto print_entries; fprintf(fp, "# "); - for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { - if (!perf_hpp__format[idx].cond) - continue; + perf_hpp__for_each_format(fmt) { if (!first) fprintf(fp, "%s", sep ?: " "); else first = false; - perf_hpp__format[idx].header(&dummy_hpp); + fmt->header(&dummy_hpp); fprintf(fp, "%s", bf); } @@ -400,18 +401,16 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, first = true; fprintf(fp, "# "); - for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { - unsigned int i; - if (!perf_hpp__format[idx].cond) - continue; + perf_hpp__for_each_format(fmt) { + unsigned int i; if (!first) fprintf(fp, "%s", sep ?: " "); else first = false; - width = perf_hpp__format[idx].width(&dummy_hpp); + width = fmt->width(&dummy_hpp); for (i = 0; i < width; i++) fprintf(fp, "."); } @@ -462,7 +461,7 @@ out: return ret; } -size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) +size_t events_stats__fprintf(struct events_stats *stats, FILE *fp) { int i; size_t ret = 0; @@ -470,7 +469,7 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { const char *name; - if (hists->stats.nr_events[i] == 0) + if (stats->nr_events[i] == 0) continue; name = perf_event__name(i); @@ -478,7 +477,7 @@ size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) continue; ret += fprintf(fp, "%16s events: %10d\n", name, - hists->stats.nr_events[i]); + stats->nr_events[i]); } return ret; diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c index 2884d2f41e33..1c8b9afd5d6e 100644 --- a/tools/perf/ui/tui/helpline.c +++ b/tools/perf/ui/tui/helpline.c @@ -8,6 +8,8 @@ #include "../ui.h" #include "../libslang.h" +char ui_helpline__last_msg[1024]; + static void tui_helpline__pop(void) { } @@ -23,20 +25,7 @@ static void tui_helpline__push(const char *msg) strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; } -struct ui_helpline tui_helpline_fns = { - .pop = tui_helpline__pop, - .push = tui_helpline__push, -}; - -void ui_helpline__init(void) -{ - helpline_fns = &tui_helpline_fns; - ui_helpline__puts(" "); -} - -char ui_helpline__last_msg[1024]; - -int ui_helpline__show_help(const char *format, va_list ap) +static int tui_helpline__show(const char *format, va_list ap) { int ret; static int backlog; @@ -55,3 +44,15 @@ int ui_helpline__show_help(const char *format, va_list ap) return ret; } + +struct ui_helpline tui_helpline_fns = { + .pop = tui_helpline__pop, + .push = tui_helpline__push, + .show = tui_helpline__show, +}; + +void ui_helpline__init(void) +{ + helpline_fns = &tui_helpline_fns; + ui_helpline__puts(" "); +} diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c index 4f989774c8c6..e3e0a963d03a 100644 --- a/tools/perf/ui/util.c +++ b/tools/perf/ui/util.c @@ -52,7 +52,6 @@ int ui__warning(const char *format, ...) return ret; } - /** * perf_error__register - Register error logging functions * @eops: The pointer to error logging function struct diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index 6aa34e5afdcf..055fef34b6f6 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -26,13 +26,13 @@ VN=$(expr "$VN" : v*'\(.*\)') if test -r $GVF then - VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) + VC=$(sed -e 's/^#define PERF_VERSION "\(.*\)"/\1/' <$GVF) else VC=unset fi test "$VN" = "$VC" || { echo >&2 "PERF_VERSION = $VN" - echo "PERF_VERSION = $VN" >$GVF + echo "#define PERF_VERSION \"$VN\"" >$GVF } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 07aaeea60000..d33fe937e6f1 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -809,7 +809,7 @@ fallback: pr_err("Can't annotate %s:\n\n" "No vmlinux file%s\nwas found in the path.\n\n" "Please use:\n\n" - " perf buildid-cache -av vmlinux\n\n" + " perf buildid-cache -vu vmlinux\n\n" "or:\n\n" " --vmlinux vmlinux\n", sym->name, build_id_msg ?: ""); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 8eec94358a4a..c422440fe611 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -6,6 +6,7 @@ #include "types.h" #include "symbol.h" #include "hist.h" +#include "sort.h" #include <linux/list.h> #include <linux/rbtree.h> #include <pthread.h> @@ -154,6 +155,29 @@ static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, } #endif +#ifdef GTK2_SUPPORT +int symbol__gtk_annotate(struct symbol *sym, struct map *map, int evidx, + struct hist_browser_timer *hbt); + +static inline int hist_entry__gtk_annotate(struct hist_entry *he, int evidx, + struct hist_browser_timer *hbt) +{ + return symbol__gtk_annotate(he->ms.sym, he->ms.map, evidx, hbt); +} + +void perf_gtk__show_annotations(void); +#else +static inline int hist_entry__gtk_annotate(struct hist_entry *he __maybe_unused, + int evidx __maybe_unused, + struct hist_browser_timer *hbt + __maybe_unused) +{ + return 0; +} + +static inline void perf_gtk__show_annotations(void) {} +#endif + extern const char *disassembler_style; #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index d3b3f5d82137..42b6a632fe7b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -444,7 +444,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, struct callchain_cursor_node *node = *cursor->last; if (!node) { - node = calloc(sizeof(*node), 1); + node = calloc(1, sizeof(*node)); if (!node) return -ENOMEM; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index eb340571e7d6..3ee9f67d5af0 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -143,4 +143,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor) cursor->curr = cursor->curr->next; cursor->pos++; } + +struct option; + +int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset); +extern const char record_callchain_help[]; #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 2b32ffa9ebdb..f817046e22b1 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -1,4 +1,5 @@ #include "util.h" +#include "sysfs.h" #include "../perf.h" #include "cpumap.h" #include <assert.h> @@ -201,3 +202,56 @@ void cpu_map__delete(struct cpu_map *map) { free(map); } + +int cpu_map__get_socket(struct cpu_map *map, int idx) +{ + FILE *fp; + const char *mnt; + char path[PATH_MAX]; + int cpu, ret; + + if (idx > map->nr) + return -1; + + cpu = map->map[idx]; + + mnt = sysfs_find_mountpoint(); + if (!mnt) + return -1; + + sprintf(path, + "%s/devices/system/cpu/cpu%d/topology/physical_package_id", + mnt, cpu); + + fp = fopen(path, "r"); + if (!fp) + return -1; + ret = fscanf(fp, "%d", &cpu); + fclose(fp); + return ret == 1 ? cpu : -1; +} + +int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) +{ + struct cpu_map *sock; + int nr = cpus->nr; + int cpu, s1, s2; + + sock = calloc(1, sizeof(*sock) + nr * sizeof(int)); + if (!sock) + return -1; + + for (cpu = 0; cpu < nr; cpu++) { + s1 = cpu_map__get_socket(cpus, cpu); + for (s2 = 0; s2 < sock->nr; s2++) { + if (s1 == sock->map[s2]) + break; + } + if (s2 == sock->nr) { + sock->map[sock->nr] = s1; + sock->nr++; + } + } + *sockp = sock; + return 0; +} diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 2f68a3b8c285..161b00756a12 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -14,6 +14,15 @@ struct cpu_map *cpu_map__dummy_new(void); void cpu_map__delete(struct cpu_map *map); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); +int cpu_map__get_socket(struct cpu_map *map, int idx); +int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); + +static inline int cpu_map__socket(struct cpu_map *sock, int s) +{ + if (!sock || s > sock->nr || s < 0) + return 0; + return sock->map[s]; +} static inline int cpu_map__nr(const struct cpu_map *map) { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 03f830b48148..399e74c34c1a 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -23,10 +23,8 @@ int eprintf(int level, const char *fmt, ...) if (verbose >= level) { va_start(args, fmt); - if (use_browser == 1) - ret = ui_helpline__show_help(fmt, args); - else if (use_browser == 2) - ret = perf_gtk__show_helpline(fmt, args); + if (use_browser >= 1) + ui_helpline__vshow(fmt, args); else ret = vfprintf(stderr, fmt, args); va_end(args); @@ -49,28 +47,6 @@ int dump_printf(const char *fmt, ...) return ret; } -#if !defined(NEWT_SUPPORT) && !defined(GTK2_SUPPORT) -int ui__warning(const char *format, ...) -{ - va_list args; - - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); - return 0; -} -#endif - -int ui__error_paranoid(void) -{ - return ui__error("Permission error - are you root?\n" - "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n" - " -1 - Not paranoid at all\n" - " 0 - Disallow raw tracepoint access for unpriv\n" - " 1 - Disallow cpu events for unpriv\n" - " 2 - Disallow kernel profiling for unpriv\n"); -} - void trace_event(union perf_event *event) { unsigned char *raw_event = (void *)event; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 83e8d234af6b..efbd98805ad0 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -5,6 +5,8 @@ #include <stdbool.h> #include "event.h" #include "../ui/helpline.h" +#include "../ui/progress.h" +#include "../ui/util.h" extern int verbose; extern bool quiet, dump_trace; @@ -12,39 +14,7 @@ extern bool quiet, dump_trace; int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); void trace_event(union perf_event *event); -struct ui_progress; -struct perf_error_ops; - -#if defined(NEWT_SUPPORT) || defined(GTK2_SUPPORT) - -#include "../ui/progress.h" int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); -#include "../ui/util.h" - -#else - -static inline void ui_progress__update(u64 curr __maybe_unused, - u64 total __maybe_unused, - const char *title __maybe_unused) {} -static inline void ui_progress__finish(void) {} - -#define ui__error(format, arg...) ui__warning(format, ##arg) - -static inline int -perf_error__register(struct perf_error_ops *eops __maybe_unused) -{ - return 0; -} - -static inline int -perf_error__unregister(struct perf_error_ops *eops __maybe_unused) -{ - return 0; -} - -#endif /* NEWT_SUPPORT || GTK2_SUPPORT */ - int ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2))); -int ui__error_paranoid(void); #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d6d9a465acdb..6f7d5a9d6b05 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -539,13 +539,13 @@ struct dso *__dsos__findnew(struct list_head *head, const char *name) } size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, - bool with_hits) + bool (skip)(struct dso *dso, int parm), int parm) { struct dso *pos; size_t ret = 0; list_for_each_entry(pos, head, node) { - if (with_hits && !pos->hit) + if (skip && skip(pos, parm)) continue; ret += dso__fprintf_buildid(pos, fp); ret += fprintf(fp, " %s\n", pos->long_name); @@ -583,7 +583,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp) if (dso->short_name != dso->long_name) ret += fprintf(fp, "%s, ", dso->long_name); ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type], - dso->loaded ? "" : "NOT "); + dso__loaded(dso, type) ? "" : "NOT "); ret += dso__fprintf_buildid(dso, fp); ret += fprintf(fp, ")\n"); for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) { diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index e03276940b99..450199ab51b5 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -138,7 +138,7 @@ struct dso *__dsos__findnew(struct list_head *head, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, - bool with_hits); + bool (skip)(struct dso *dso, int parm), int parm); size_t __dsos__fprintf(struct list_head *head, FILE *fp); size_t dso__fprintf_buildid(struct dso *dso, FILE *fp); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 3cf2c3e0605f..5cd13d768cec 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -476,8 +476,10 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, } } - if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) + if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) { + free(event); return -ENOENT; + } map = machine->vmlinux_maps[MAP__FUNCTION]; size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 705293489e3c..bc4ad7977438 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -49,10 +49,16 @@ struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, return evlist; } -void perf_evlist__config_attrs(struct perf_evlist *evlist, - struct perf_record_opts *opts) +void perf_evlist__config(struct perf_evlist *evlist, + struct perf_record_opts *opts) { struct perf_evsel *evsel; + /* + * Set the evsel leader links before we configure attributes, + * since some might depend on this info. + */ + if (opts->group) + perf_evlist__set_leader(evlist); if (evlist->cpus->map[0] < 0) opts->no_inherit = true; @@ -61,7 +67,7 @@ void perf_evlist__config_attrs(struct perf_evlist *evlist, perf_evsel__config(evsel, opts); if (evlist->nr_entries > 1) - evsel->attr.sample_type |= PERF_SAMPLE_ID; + perf_evsel__set_sample_id(evsel); } } @@ -111,18 +117,21 @@ void __perf_evlist__set_leader(struct list_head *list) struct perf_evsel *evsel, *leader; leader = list_entry(list->next, struct perf_evsel, node); - leader->leader = NULL; + evsel = list_entry(list->prev, struct perf_evsel, node); + + leader->nr_members = evsel->idx - leader->idx + 1; list_for_each_entry(evsel, list, node) { - if (evsel != leader) - evsel->leader = leader; + evsel->leader = leader; } } void perf_evlist__set_leader(struct perf_evlist *evlist) { - if (evlist->nr_entries) + if (evlist->nr_entries) { + evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; __perf_evlist__set_leader(&evlist->entries); + } } int perf_evlist__add_default(struct perf_evlist *evlist) @@ -222,7 +231,7 @@ void perf_evlist__disable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { - if (perf_evsel__is_group_member(pos)) + if (!perf_evsel__is_group_leader(pos)) continue; for (thread = 0; thread < evlist->threads->nr; thread++) ioctl(FD(pos, cpu, thread), @@ -238,7 +247,7 @@ void perf_evlist__enable(struct perf_evlist *evlist) for (cpu = 0; cpu < cpu_map__nr(evlist->cpus); cpu++) { list_for_each_entry(pos, &evlist->entries, node) { - if (perf_evsel__is_group_member(pos)) + if (!perf_evsel__is_group_leader(pos)) continue; for (thread = 0; thread < evlist->threads->nr; thread++) ioctl(FD(pos, cpu, thread), @@ -366,7 +375,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) if ((old & md->mask) + size != ((old + size) & md->mask)) { unsigned int offset = old; unsigned int len = min(sizeof(*event), size), cpy; - void *dst = &evlist->event_copy; + void *dst = &md->event_copy; do { cpy = min(md->mask + 1 - (offset & md->mask), len); @@ -376,7 +385,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) len -= cpy; } while (len); - event = &evlist->event_copy; + event = &md->event_copy; } old += size; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 56003f779e60..2dd07bd60b4f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -17,10 +17,18 @@ struct perf_record_opts; #define PERF_EVLIST__HLIST_BITS 8 #define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) +struct perf_mmap { + void *base; + int mask; + unsigned int prev; + union perf_event event_copy; +}; + struct perf_evlist { struct list_head entries; struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; int nr_entries; + int nr_groups; int nr_fds; int nr_mmaps; int mmap_len; @@ -29,7 +37,6 @@ struct perf_evlist { pid_t pid; } workload; bool overwrite; - union perf_event event_copy; struct perf_mmap *mmap; struct pollfd *pollfd; struct thread_map *threads; @@ -76,8 +83,8 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); int perf_evlist__open(struct perf_evlist *evlist); -void perf_evlist__config_attrs(struct perf_evlist *evlist, - struct perf_record_opts *opts); +void perf_evlist__config(struct perf_evlist *evlist, + struct perf_record_opts *opts); int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct perf_record_opts *opts, @@ -135,4 +142,25 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) } size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); + +static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->base; + int head = pc->data_head; + rmb(); + return head; +} + +static inline void perf_mmap__write_tail(struct perf_mmap *md, + unsigned long tail) +{ + struct perf_event_mmap_page *pc = md->base; + + /* + * ensure all reads are done before we write the tail out. + */ + /* mb(); */ + pc->data_tail = tail; +} + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1b16dd1edc8e..9c82f98f26de 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -22,6 +22,11 @@ #include <linux/perf_event.h> #include "perf_regs.h" +static struct { + bool sample_id_all; + bool exclude_guest; +} perf_missing_features; + #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) static int __perf_evsel__sample_size(u64 sample_type) @@ -50,11 +55,36 @@ void hists__init(struct hists *hists) pthread_mutex_init(&hists->lock, NULL); } +void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, + enum perf_event_sample_format bit) +{ + if (!(evsel->attr.sample_type & bit)) { + evsel->attr.sample_type |= bit; + evsel->sample_size += sizeof(u64); + } +} + +void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, + enum perf_event_sample_format bit) +{ + if (evsel->attr.sample_type & bit) { + evsel->attr.sample_type &= ~bit; + evsel->sample_size -= sizeof(u64); + } +} + +void perf_evsel__set_sample_id(struct perf_evsel *evsel) +{ + perf_evsel__set_sample_bit(evsel, ID); + evsel->attr.read_format |= PERF_FORMAT_ID; +} + void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { evsel->idx = idx; evsel->attr = *attr; + evsel->leader = evsel; INIT_LIST_HEAD(&evsel->node); hists__init(&evsel->hists); evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); @@ -404,6 +434,31 @@ const char *perf_evsel__name(struct perf_evsel *evsel) return evsel->name ?: "unknown"; } +const char *perf_evsel__group_name(struct perf_evsel *evsel) +{ + return evsel->group_name ?: "anon group"; +} + +int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) +{ + int ret; + struct perf_evsel *pos; + const char *group_name = perf_evsel__group_name(evsel); + + ret = scnprintf(buf, size, "%s", group_name); + + ret += scnprintf(buf + ret, size - ret, " { %s", + perf_evsel__name(evsel)); + + for_each_group_member(pos, evsel) + ret += scnprintf(buf + ret, size - ret, ", %s", + perf_evsel__name(pos)); + + ret += scnprintf(buf + ret, size - ret, " }"); + + return ret; +} + /* * The enable_on_exec/disabled value strategy: * @@ -438,13 +493,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; int track = !evsel->idx; /* only the first counter needs these */ - attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; + attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; attr->inherit = !opts->no_inherit; - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING | - PERF_FORMAT_ID; - attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; + perf_evsel__set_sample_bit(evsel, IP); + perf_evsel__set_sample_bit(evsel, TID); /* * We default some events to a 1 default interval. But keep @@ -453,7 +506,7 @@ void perf_evsel__config(struct perf_evsel *evsel, if (!attr->sample_period || (opts->user_freq != UINT_MAX && opts->user_interval != ULLONG_MAX)) { if (opts->freq) { - attr->sample_type |= PERF_SAMPLE_PERIOD; + perf_evsel__set_sample_bit(evsel, PERIOD); attr->freq = 1; attr->sample_freq = opts->freq; } else { @@ -468,16 +521,16 @@ void perf_evsel__config(struct perf_evsel *evsel, attr->inherit_stat = 1; if (opts->sample_address) { - attr->sample_type |= PERF_SAMPLE_ADDR; + perf_evsel__set_sample_bit(evsel, ADDR); attr->mmap_data = track; } if (opts->call_graph) { - attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); if (opts->call_graph == CALLCHAIN_DWARF) { - attr->sample_type |= PERF_SAMPLE_REGS_USER | - PERF_SAMPLE_STACK_USER; + perf_evsel__set_sample_bit(evsel, REGS_USER); + perf_evsel__set_sample_bit(evsel, STACK_USER); attr->sample_regs_user = PERF_REGS_MASK; attr->sample_stack_user = opts->stack_dump_size; attr->exclude_callchain_user = 1; @@ -485,20 +538,20 @@ void perf_evsel__config(struct perf_evsel *evsel, } if (perf_target__has_cpu(&opts->target)) - attr->sample_type |= PERF_SAMPLE_CPU; + perf_evsel__set_sample_bit(evsel, CPU); if (opts->period) - attr->sample_type |= PERF_SAMPLE_PERIOD; + perf_evsel__set_sample_bit(evsel, PERIOD); - if (!opts->sample_id_all_missing && + if (!perf_missing_features.sample_id_all && (opts->sample_time || !opts->no_inherit || perf_target__has_cpu(&opts->target))) - attr->sample_type |= PERF_SAMPLE_TIME; + perf_evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples) { - attr->sample_type |= PERF_SAMPLE_TIME; - attr->sample_type |= PERF_SAMPLE_RAW; - attr->sample_type |= PERF_SAMPLE_CPU; + perf_evsel__set_sample_bit(evsel, TIME); + perf_evsel__set_sample_bit(evsel, RAW); + perf_evsel__set_sample_bit(evsel, CPU); } if (opts->no_delay) { @@ -506,7 +559,7 @@ void perf_evsel__config(struct perf_evsel *evsel, attr->wakeup_events = 1; } if (opts->branch_stack) { - attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = opts->branch_stack; } @@ -519,14 +572,14 @@ void perf_evsel__config(struct perf_evsel *evsel, * Disabling only independent events or group leaders, * keeping group members enabled. */ - if (!perf_evsel__is_group_member(evsel)) + if (perf_evsel__is_group_leader(evsel)) attr->disabled = 1; /* * Setting enable_on_exec for independent events and * group leaders for traced executed by perf. */ - if (perf_target__none(&opts->target) && !perf_evsel__is_group_member(evsel)) + if (perf_target__none(&opts->target) && perf_evsel__is_group_leader(evsel)) attr->enable_on_exec = 1; } @@ -612,6 +665,11 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) } } +void perf_evsel__free_counts(struct perf_evsel *evsel) +{ + free(evsel->counts); +} + void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); @@ -631,6 +689,28 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } +static inline void compute_deltas(struct perf_evsel *evsel, + int cpu, + struct perf_counts_values *count) +{ + struct perf_counts_values tmp; + + if (!evsel->prev_raw_counts) + return; + + if (cpu == -1) { + tmp = evsel->prev_raw_counts->aggr; + evsel->prev_raw_counts->aggr = *count; + } else { + tmp = evsel->prev_raw_counts->cpu[cpu]; + evsel->prev_raw_counts->cpu[cpu] = *count; + } + + count->val = count->val - tmp.val; + count->ena = count->ena - tmp.ena; + count->run = count->run - tmp.run; +} + int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale) { @@ -646,6 +726,8 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) return -errno; + compute_deltas(evsel, cpu, &count); + if (scale) { if (count.run == 0) count.val = 0; @@ -684,6 +766,8 @@ int __perf_evsel__read(struct perf_evsel *evsel, } } + compute_deltas(evsel, -1, aggr); + evsel->counts->scaled = 0; if (scale) { if (aggr->run == 0) { @@ -707,7 +791,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) struct perf_evsel *leader = evsel->leader; int fd; - if (!perf_evsel__is_group_member(evsel)) + if (perf_evsel__is_group_leader(evsel)) return -1; /* @@ -738,6 +822,13 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, pid = evsel->cgrp->fd; } +fallback_missing_features: + if (perf_missing_features.exclude_guest) + evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; +retry_sample_id: + if (perf_missing_features.sample_id_all) + evsel->attr.sample_id_all = 0; + for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < threads->nr; thread++) { @@ -754,13 +845,26 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, group_fd, flags); if (FD(evsel, cpu, thread) < 0) { err = -errno; - goto out_close; + goto try_fallback; } } } return 0; +try_fallback: + if (err != -EINVAL || cpu > 0 || thread > 0) + goto out_close; + + if (!perf_missing_features.exclude_guest && + (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { + perf_missing_features.exclude_guest = true; + goto fallback_missing_features; + } else if (!perf_missing_features.sample_id_all) { + perf_missing_features.sample_id_all = true; + goto retry_sample_id; + } + out_close: do { while (--thread >= 0) { @@ -1205,3 +1309,225 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, return 0; } + +static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) +{ + va_list args; + int ret = 0; + + if (!*first) { + ret += fprintf(fp, ","); + } else { + ret += fprintf(fp, ":"); + *first = false; + } + + va_start(args, fmt); + ret += vfprintf(fp, fmt, args); + va_end(args); + return ret; +} + +static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value) +{ + if (value == 0) + return 0; + + return comma_fprintf(fp, first, " %s: %" PRIu64, field, value); +} + +#define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field) + +struct bit_names { + int bit; + const char *name; +}; + +static int bits__fprintf(FILE *fp, const char *field, u64 value, + struct bit_names *bits, bool *first) +{ + int i = 0, printed = comma_fprintf(fp, first, " %s: ", field); + bool first_bit = true; + + do { + if (value & bits[i].bit) { + printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name); + first_bit = false; + } + } while (bits[++i].name != NULL); + + return printed; +} + +static int sample_type__fprintf(FILE *fp, bool *first, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_##n, #n } + struct bit_names bits[] = { + bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), + bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), + bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), + bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), + { .name = NULL, } + }; +#undef bit_name + return bits__fprintf(fp, "sample_type", value, bits, first); +} + +static int read_format__fprintf(FILE *fp, bool *first, u64 value) +{ +#define bit_name(n) { PERF_FORMAT_##n, #n } + struct bit_names bits[] = { + bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), + bit_name(ID), bit_name(GROUP), + { .name = NULL, } + }; +#undef bit_name + return bits__fprintf(fp, "read_format", value, bits, first); +} + +int perf_evsel__fprintf(struct perf_evsel *evsel, + struct perf_attr_details *details, FILE *fp) +{ + bool first = true; + int printed = 0; + + if (details->event_group) { + struct perf_evsel *pos; + + if (!perf_evsel__is_group_leader(evsel)) + return 0; + + if (evsel->nr_members > 1) + printed += fprintf(fp, "%s{", evsel->group_name ?: ""); + + printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + for_each_group_member(pos, evsel) + printed += fprintf(fp, ",%s", perf_evsel__name(pos)); + + if (evsel->nr_members > 1) + printed += fprintf(fp, "}"); + goto out; + } + + printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + + if (details->verbose || details->freq) { + printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, + (u64)evsel->attr.sample_freq); + } + + if (details->verbose) { + if_print(type); + if_print(config); + if_print(config1); + if_print(config2); + if_print(size); + printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type); + if (evsel->attr.read_format) + printed += read_format__fprintf(fp, &first, evsel->attr.read_format); + if_print(disabled); + if_print(inherit); + if_print(pinned); + if_print(exclusive); + if_print(exclude_user); + if_print(exclude_kernel); + if_print(exclude_hv); + if_print(exclude_idle); + if_print(mmap); + if_print(comm); + if_print(freq); + if_print(inherit_stat); + if_print(enable_on_exec); + if_print(task); + if_print(watermark); + if_print(precise_ip); + if_print(mmap_data); + if_print(sample_id_all); + if_print(exclude_host); + if_print(exclude_guest); + if_print(__reserved_1); + if_print(wakeup_events); + if_print(bp_type); + if_print(branch_sample_type); + } +out: + fputc('\n', fp); + return ++printed; +} + +bool perf_evsel__fallback(struct perf_evsel *evsel, int err, + char *msg, size_t msgsize) +{ + if ((err == ENOENT || err == ENXIO) && + evsel->attr.type == PERF_TYPE_HARDWARE && + evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) { + /* + * If it's cycles then fall back to hrtimer based + * cpu-clock-tick sw counter, which is always available even if + * no PMU support. + * + * PPC returns ENXIO until 2.6.37 (behavior changed with commit + * b0a873e). + */ + scnprintf(msg, msgsize, "%s", +"The cycles event is not supported, trying to fall back to cpu-clock-ticks"); + + evsel->attr.type = PERF_TYPE_SOFTWARE; + evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK; + + free(evsel->name); + evsel->name = NULL; + return true; + } + + return false; +} + +int perf_evsel__open_strerror(struct perf_evsel *evsel, + struct perf_target *target, + int err, char *msg, size_t size) +{ + switch (err) { + case EPERM: + case EACCES: + return scnprintf(msg, size, "%s", + "You may not have permission to collect %sstats.\n" + "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n" + " -1 - Not paranoid at all\n" + " 0 - Disallow raw tracepoint access for unpriv\n" + " 1 - Disallow cpu events for unpriv\n" + " 2 - Disallow kernel profiling for unpriv", + target->system_wide ? "system-wide " : ""); + case ENOENT: + return scnprintf(msg, size, "The %s event is not supported.", + perf_evsel__name(evsel)); + case EMFILE: + return scnprintf(msg, size, "%s", + "Too many events are opened.\n" + "Try again after reducing the number of events."); + case ENODEV: + if (target->cpu_list) + return scnprintf(msg, size, "%s", + "No such device - did you specify an out-of-range profile CPU?\n"); + break; + case EOPNOTSUPP: + if (evsel->attr.precise_ip) + return scnprintf(msg, size, "%s", + "\'precise\' request may not be supported. Try removing 'p' modifier."); +#if defined(__i386__) || defined(__x86_64__) + if (evsel->attr.type == PERF_TYPE_HARDWARE) + return scnprintf(msg, size, "%s", + "No hardware sampling interrupt available.\n" + "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it."); +#endif + break; + default: + break; + } + + return scnprintf(msg, size, + "The sys_perf_event_open() syscall returned with %d (%s) for event (%s). \n" + "/bin/dmesg may provide additional information.\n" + "No CONFIG_PERF_EVENTS=y kernel support configured?\n", + err, strerror(err), perf_evsel__name(evsel)); +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 3d2b8017438c..52021c3087df 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -53,6 +53,7 @@ struct perf_evsel { struct xyarray *sample_id; u64 *id; struct perf_counts *counts; + struct perf_counts *prev_raw_counts; int idx; u32 ids; struct hists hists; @@ -73,10 +74,13 @@ struct perf_evsel { bool needs_swap; /* parse modifier helper */ int exclude_GH; + int nr_members; struct perf_evsel *leader; char *group_name; }; +#define hists_to_evsel(h) container_of(h, struct perf_evsel, hists) + struct cpu_map; struct thread_map; struct perf_evlist; @@ -110,14 +114,30 @@ extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *perf_evsel__name(struct perf_evsel *evsel); +const char *perf_evsel__group_name(struct perf_evsel *evsel); +int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); void perf_evsel__free_fd(struct perf_evsel *evsel); void perf_evsel__free_id(struct perf_evsel *evsel); +void perf_evsel__free_counts(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); +void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, + enum perf_event_sample_format bit); +void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, + enum perf_event_sample_format bit); + +#define perf_evsel__set_sample_bit(evsel, bit) \ + __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) + +#define perf_evsel__reset_sample_bit(evsel, bit) \ + __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) + +void perf_evsel__set_sample_id(struct perf_evsel *evsel); + int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, const char *filter); @@ -226,8 +246,34 @@ static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) return list_entry(evsel->node.next, struct perf_evsel, node); } -static inline bool perf_evsel__is_group_member(const struct perf_evsel *evsel) +static inline bool perf_evsel__is_group_leader(const struct perf_evsel *evsel) +{ + return evsel->leader == evsel; +} + +struct perf_attr_details { + bool freq; + bool verbose; + bool event_group; +}; + +int perf_evsel__fprintf(struct perf_evsel *evsel, + struct perf_attr_details *details, FILE *fp); + +bool perf_evsel__fallback(struct perf_evsel *evsel, int err, + char *msg, size_t msgsize); +int perf_evsel__open_strerror(struct perf_evsel *evsel, + struct perf_target *target, + int err, char *msg, size_t size); + +static inline int perf_evsel__group_idx(struct perf_evsel *evsel) { - return evsel->leader != NULL; + return evsel->idx - evsel->leader->idx; } + +#define for_each_group_member(_evsel, _leader) \ +for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ + (_evsel) && (_evsel)->leader == (_leader); \ + (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b7da4634a047..f4bfd79ef6a7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -148,7 +148,7 @@ static char *do_read_string(int fd, struct perf_header *ph) u32 len; char *buf; - sz = read(fd, &len, sizeof(len)); + sz = readn(fd, &len, sizeof(len)); if (sz < (ssize_t)sizeof(len)) return NULL; @@ -159,7 +159,7 @@ static char *do_read_string(int fd, struct perf_header *ph) if (!buf) return NULL; - ret = read(fd, buf, len); + ret = readn(fd, buf, len); if (ret == (ssize_t)len) { /* * strings are padded by zeroes @@ -287,12 +287,12 @@ static int dsos__write_buildid_table(struct perf_header *header, int fd) struct perf_session *session = container_of(header, struct perf_session, header); struct rb_node *nd; - int err = machine__write_buildid_table(&session->host_machine, fd); + int err = machine__write_buildid_table(&session->machines.host, fd); if (err) return err; - for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { + for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); err = machine__write_buildid_table(pos, fd); if (err) @@ -313,7 +313,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, if (is_kallsyms) { if (symbol_conf.kptr_restrict) { pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); - return 0; + err = 0; + goto out_free; } realname = (char *) name; } else @@ -448,9 +449,9 @@ static int perf_session__cache_build_ids(struct perf_session *session) if (mkdir(debugdir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->host_machine, debugdir); + ret = machine__cache_build_ids(&session->machines.host, debugdir); - for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { + for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret |= machine__cache_build_ids(pos, debugdir); } @@ -467,9 +468,9 @@ static bool machine__read_build_ids(struct machine *machine, bool with_hits) static bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) { struct rb_node *nd; - bool ret = machine__read_build_ids(&session->host_machine, with_hits); + bool ret = machine__read_build_ids(&session->machines.host, with_hits); - for (nd = rb_first(&session->machines); nd; nd = rb_next(nd)) { + for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); ret |= machine__read_build_ids(pos, with_hits); } @@ -954,6 +955,7 @@ static int write_topo_node(int fd, int node) } fclose(fp); + fp = NULL; ret = do_write(fd, &mem_total, sizeof(u64)); if (ret) @@ -980,7 +982,8 @@ static int write_topo_node(int fd, int node) ret = do_write_string(fd, buf); done: free(buf); - fclose(fp); + if (fp) + fclose(fp); return ret; } @@ -1051,16 +1054,25 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused, struct perf_pmu *pmu = NULL; off_t offset = lseek(fd, 0, SEEK_CUR); __u32 pmu_num = 0; + int ret; /* write real pmu_num later */ - do_write(fd, &pmu_num, sizeof(pmu_num)); + ret = do_write(fd, &pmu_num, sizeof(pmu_num)); + if (ret < 0) + return ret; while ((pmu = perf_pmu__scan(pmu))) { if (!pmu->name) continue; pmu_num++; - do_write(fd, &pmu->type, sizeof(pmu->type)); - do_write_string(fd, pmu->name); + + ret = do_write(fd, &pmu->type, sizeof(pmu->type)); + if (ret < 0) + return ret; + + ret = do_write_string(fd, pmu->name); + if (ret < 0) + return ret; } if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) { @@ -1073,6 +1085,52 @@ static int write_pmu_mappings(int fd, struct perf_header *h __maybe_unused, } /* + * File format: + * + * struct group_descs { + * u32 nr_groups; + * struct group_desc { + * char name[]; + * u32 leader_idx; + * u32 nr_members; + * }[nr_groups]; + * }; + */ +static int write_group_desc(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist) +{ + u32 nr_groups = evlist->nr_groups; + struct perf_evsel *evsel; + int ret; + + ret = do_write(fd, &nr_groups, sizeof(nr_groups)); + if (ret < 0) + return ret; + + list_for_each_entry(evsel, &evlist->entries, node) { + if (perf_evsel__is_group_leader(evsel) && + evsel->nr_members > 1) { + const char *name = evsel->group_name ?: "{anon_group}"; + u32 leader_idx = evsel->idx; + u32 nr_members = evsel->nr_members; + + ret = do_write_string(fd, name); + if (ret < 0) + return ret; + + ret = do_write(fd, &leader_idx, sizeof(leader_idx)); + if (ret < 0) + return ret; + + ret = do_write(fd, &nr_members, sizeof(nr_members)); + if (ret < 0) + return ret; + } + } + return 0; +} + +/* * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(ARCH)/util/header.c */ @@ -1209,14 +1267,14 @@ read_event_desc(struct perf_header *ph, int fd) size_t msz; /* number of events */ - ret = read(fd, &nre, sizeof(nre)); + ret = readn(fd, &nre, sizeof(nre)); if (ret != (ssize_t)sizeof(nre)) goto error; if (ph->needs_swap) nre = bswap_32(nre); - ret = read(fd, &sz, sizeof(sz)); + ret = readn(fd, &sz, sizeof(sz)); if (ret != (ssize_t)sizeof(sz)) goto error; @@ -1244,7 +1302,7 @@ read_event_desc(struct perf_header *ph, int fd) * must read entire on-file attr struct to * sync up with layout. */ - ret = read(fd, buf, sz); + ret = readn(fd, buf, sz); if (ret != (ssize_t)sz) goto error; @@ -1253,7 +1311,7 @@ read_event_desc(struct perf_header *ph, int fd) memcpy(&evsel->attr, buf, msz); - ret = read(fd, &nr, sizeof(nr)); + ret = readn(fd, &nr, sizeof(nr)); if (ret != (ssize_t)sizeof(nr)) goto error; @@ -1274,7 +1332,7 @@ read_event_desc(struct perf_header *ph, int fd) evsel->id = id; for (j = 0 ; j < nr; j++) { - ret = read(fd, id, sizeof(*id)); + ret = readn(fd, id, sizeof(*id)); if (ret != (ssize_t)sizeof(*id)) goto error; if (ph->needs_swap) @@ -1435,6 +1493,31 @@ error: fprintf(fp, "# pmu mappings: unable to read\n"); } +static void print_group_desc(struct perf_header *ph, int fd __maybe_unused, + FILE *fp) +{ + struct perf_session *session; + struct perf_evsel *evsel; + u32 nr = 0; + + session = container_of(ph, struct perf_session, header); + + list_for_each_entry(evsel, &session->evlist->entries, node) { + if (perf_evsel__is_group_leader(evsel) && + evsel->nr_members > 1) { + fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", + perf_evsel__name(evsel)); + + nr = evsel->nr_members - 1; + } else if (nr) { + fprintf(fp, ",%s", perf_evsel__name(evsel)); + + if (--nr == 0) + fprintf(fp, "}\n"); + } + } +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -1506,14 +1589,14 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, while (offset < limit) { ssize_t len; - if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) + if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev)) return -1; if (header->needs_swap) perf_event_header__bswap(&old_bev.header); len = old_bev.header.size - sizeof(old_bev); - if (read(input, filename, len) != len) + if (readn(input, filename, len) != len) return -1; bev.header = old_bev.header; @@ -1548,14 +1631,14 @@ static int perf_header__read_build_ids(struct perf_header *header, while (offset < limit) { ssize_t len; - if (read(input, &bev, sizeof(bev)) != sizeof(bev)) + if (readn(input, &bev, sizeof(bev)) != sizeof(bev)) goto out; if (header->needs_swap) perf_event_header__bswap(&bev.header); len = bev.header.size - sizeof(bev); - if (read(input, filename, len) != len) + if (readn(input, filename, len) != len) goto out; /* * The a1645ce1 changeset: @@ -1641,7 +1724,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, size_t ret; u32 nr; - ret = read(fd, &nr, sizeof(nr)); + ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) return -1; @@ -1650,7 +1733,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, ph->env.nr_cpus_online = nr; - ret = read(fd, &nr, sizeof(nr)); + ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) return -1; @@ -1684,7 +1767,7 @@ static int process_total_mem(struct perf_file_section *section __maybe_unused, uint64_t mem; size_t ret; - ret = read(fd, &mem, sizeof(mem)); + ret = readn(fd, &mem, sizeof(mem)); if (ret != sizeof(mem)) return -1; @@ -1756,7 +1839,7 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, u32 nr, i; struct strbuf sb; - ret = read(fd, &nr, sizeof(nr)); + ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) return -1; @@ -1792,7 +1875,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused char *str; struct strbuf sb; - ret = read(fd, &nr, sizeof(nr)); + ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) return -1; @@ -1813,7 +1896,7 @@ static int process_cpu_topology(struct perf_file_section *section __maybe_unused } ph->env.sibling_cores = strbuf_detach(&sb, NULL); - ret = read(fd, &nr, sizeof(nr)); + ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) return -1; @@ -1850,7 +1933,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse struct strbuf sb; /* nr nodes */ - ret = read(fd, &nr, sizeof(nr)); + ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) goto error; @@ -1862,15 +1945,15 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse for (i = 0; i < nr; i++) { /* node number */ - ret = read(fd, &node, sizeof(node)); + ret = readn(fd, &node, sizeof(node)); if (ret != sizeof(node)) goto error; - ret = read(fd, &mem_total, sizeof(u64)); + ret = readn(fd, &mem_total, sizeof(u64)); if (ret != sizeof(u64)) goto error; - ret = read(fd, &mem_free, sizeof(u64)); + ret = readn(fd, &mem_free, sizeof(u64)); if (ret != sizeof(u64)) goto error; @@ -1909,7 +1992,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused u32 type; struct strbuf sb; - ret = read(fd, &pmu_num, sizeof(pmu_num)); + ret = readn(fd, &pmu_num, sizeof(pmu_num)); if (ret != sizeof(pmu_num)) return -1; @@ -1925,7 +2008,7 @@ static int process_pmu_mappings(struct perf_file_section *section __maybe_unused strbuf_init(&sb, 128); while (pmu_num) { - if (read(fd, &type, sizeof(type)) != sizeof(type)) + if (readn(fd, &type, sizeof(type)) != sizeof(type)) goto error; if (ph->needs_swap) type = bswap_32(type); @@ -1949,6 +2032,98 @@ error: return -1; } +static int process_group_desc(struct perf_file_section *section __maybe_unused, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + size_t ret = -1; + u32 i, nr, nr_groups; + struct perf_session *session; + struct perf_evsel *evsel, *leader = NULL; + struct group_desc { + char *name; + u32 leader_idx; + u32 nr_members; + } *desc; + + if (readn(fd, &nr_groups, sizeof(nr_groups)) != sizeof(nr_groups)) + return -1; + + if (ph->needs_swap) + nr_groups = bswap_32(nr_groups); + + ph->env.nr_groups = nr_groups; + if (!nr_groups) { + pr_debug("group desc not available\n"); + return 0; + } + + desc = calloc(nr_groups, sizeof(*desc)); + if (!desc) + return -1; + + for (i = 0; i < nr_groups; i++) { + desc[i].name = do_read_string(fd, ph); + if (!desc[i].name) + goto out_free; + + if (readn(fd, &desc[i].leader_idx, sizeof(u32)) != sizeof(u32)) + goto out_free; + + if (readn(fd, &desc[i].nr_members, sizeof(u32)) != sizeof(u32)) + goto out_free; + + if (ph->needs_swap) { + desc[i].leader_idx = bswap_32(desc[i].leader_idx); + desc[i].nr_members = bswap_32(desc[i].nr_members); + } + } + + /* + * Rebuild group relationship based on the group_desc + */ + session = container_of(ph, struct perf_session, header); + session->evlist->nr_groups = nr_groups; + + i = nr = 0; + list_for_each_entry(evsel, &session->evlist->entries, node) { + if (evsel->idx == (int) desc[i].leader_idx) { + evsel->leader = evsel; + /* {anon_group} is a dummy name */ + if (strcmp(desc[i].name, "{anon_group}")) + evsel->group_name = desc[i].name; + evsel->nr_members = desc[i].nr_members; + + if (i >= nr_groups || nr > 0) { + pr_debug("invalid group desc\n"); + goto out_free; + } + + leader = evsel; + nr = evsel->nr_members - 1; + i++; + } else if (nr) { + /* This is a group member */ + evsel->leader = leader; + + nr--; + } + } + + if (i != nr_groups || nr != 0) { + pr_debug("invalid group desc\n"); + goto out_free; + } + + ret = 0; +out_free: + while ((int) --i >= 0) + free(desc[i].name); + free(desc); + + return ret; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1988,6 +2163,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings), + FEAT_OPP(HEADER_GROUP_DESC, group_desc), }; struct header_print_data { @@ -2077,7 +2253,7 @@ static int perf_header__adds_write(struct perf_header *header, if (!nr_sections) return 0; - feat_sec = p = calloc(sizeof(*feat_sec), nr_sections); + feat_sec = p = calloc(nr_sections, sizeof(*feat_sec)); if (feat_sec == NULL) return -ENOMEM; @@ -2249,7 +2425,7 @@ int perf_header__process_sections(struct perf_header *header, int fd, if (!nr_sections) return 0; - feat_sec = sec = calloc(sizeof(*feat_sec), nr_sections); + feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec)); if (!feat_sec) return -1; @@ -2912,16 +3088,22 @@ int perf_event__process_tracing_data(union perf_event *event, session->repipe); padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; - if (read(session->fd, buf, padding) < 0) - die("reading input file"); + if (readn(session->fd, buf, padding) < 0) { + pr_err("%s: reading input file", __func__); + return -1; + } if (session->repipe) { int retw = write(STDOUT_FILENO, buf, padding); - if (retw <= 0 || retw != padding) - die("repiping tracing data padding"); + if (retw <= 0 || retw != padding) { + pr_err("%s: repiping tracing data padding", __func__); + return -1; + } } - if (size_read + padding != size) - die("tracing data size mismatch"); + if (size_read + padding != size) { + pr_err("%s: tracing data size mismatch", __func__); + return -1; + } perf_evlist__prepare_tracepoint_events(session->evlist, session->pevent); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 20f0344accb1..c9fc55cada6d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -29,6 +29,7 @@ enum { HEADER_NUMA_TOPOLOGY, HEADER_BRANCH_STACK, HEADER_PMU_MAPPINGS, + HEADER_GROUP_DESC, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -79,6 +80,7 @@ struct perf_session_env { char *numa_nodes; int nr_pmu_mappings; char *pmu_mappings; + int nr_groups; }; struct perf_header { diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index cb17e2a8c6ed..f855941bebea 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -4,6 +4,7 @@ #include "hist.h" #include "session.h" #include "sort.h" +#include "evsel.h" #include <math.h> static bool hists__filter_entry_by_dso(struct hists *hists, @@ -82,6 +83,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_DSO, len); } + if (h->parent) + hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen); + if (h->branch_info) { int symlen; /* @@ -242,6 +246,14 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) if (he->ms.map) he->ms.map->referenced = true; + + if (he->branch_info) { + if (he->branch_info->from.map) + he->branch_info->from.map->referenced = true; + if (he->branch_info->to.map) + he->branch_info->to.map->referenced = true; + } + if (symbol_conf.use_callchain) callchain_init(he->callchain); @@ -251,7 +263,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) return he; } -static void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) +void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h) { if (!h->filtered) { hists__calc_col_len(hists, h); @@ -285,7 +297,13 @@ static struct hist_entry *add_hist_entry(struct hists *hists, parent = *p; he = rb_entry(parent, struct hist_entry, rb_node_in); - cmp = hist_entry__cmp(entry, he); + /* + * Make sure that it receives arguments in a same order as + * hist_entry__collapse() so that we can use an appropriate + * function when searching an entry regardless which sort + * keys were used. + */ + cmp = hist_entry__cmp(he, entry); if (!cmp) { he_stat__add_period(&he->stat, period); @@ -523,6 +541,62 @@ void hists__collapse_resort_threaded(struct hists *hists) * reverse the map, sort on period. */ +static int period_cmp(u64 period_a, u64 period_b) +{ + if (period_a > period_b) + return 1; + if (period_a < period_b) + return -1; + return 0; +} + +static int hist_entry__sort_on_period(struct hist_entry *a, + struct hist_entry *b) +{ + int ret; + int i, nr_members; + struct perf_evsel *evsel; + struct hist_entry *pair; + u64 *periods_a, *periods_b; + + ret = period_cmp(a->stat.period, b->stat.period); + if (ret || !symbol_conf.event_group) + return ret; + + evsel = hists_to_evsel(a->hists); + nr_members = evsel->nr_members; + if (nr_members <= 1) + return ret; + + periods_a = zalloc(sizeof(periods_a) * nr_members); + periods_b = zalloc(sizeof(periods_b) * nr_members); + + if (!periods_a || !periods_b) + goto out; + + list_for_each_entry(pair, &a->pairs.head, pairs.node) { + evsel = hists_to_evsel(pair->hists); + periods_a[perf_evsel__group_idx(evsel)] = pair->stat.period; + } + + list_for_each_entry(pair, &b->pairs.head, pairs.node) { + evsel = hists_to_evsel(pair->hists); + periods_b[perf_evsel__group_idx(evsel)] = pair->stat.period; + } + + for (i = 1; i < nr_members; i++) { + ret = period_cmp(periods_a[i], periods_b[i]); + if (ret) + break; + } + +out: + free(periods_a); + free(periods_b); + + return ret; +} + static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, u64 min_callchain_hits) @@ -539,7 +613,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node); - if (he->stat.period > iter->stat.period) + if (hist_entry__sort_on_period(he, iter) > 0) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -711,25 +785,38 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize) return symbol__annotate(he->ms.sym, he->ms.map, privsize); } +void events_stats__inc(struct events_stats *stats, u32 type) +{ + ++stats->nr_events[0]; + ++stats->nr_events[type]; +} + void hists__inc_nr_events(struct hists *hists, u32 type) { - ++hists->stats.nr_events[0]; - ++hists->stats.nr_events[type]; + events_stats__inc(&hists->stats, type); } static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { - struct rb_node **p = &hists->entries.rb_node; + struct rb_root *root; + struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; int cmp; + if (sort__need_collapse) + root = &hists->entries_collapsed; + else + root = hists->entries_in; + + p = &root->rb_node; + while (*p != NULL) { parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); + he = rb_entry(parent, struct hist_entry, rb_node_in); - cmp = hist_entry__cmp(pair, he); + cmp = hist_entry__collapse(he, pair); if (!cmp) goto out; @@ -744,8 +831,8 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, if (he) { memset(&he->stat, 0, sizeof(he->stat)); he->hists = hists; - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &hists->entries); + rb_link_node(&he->rb_node_in, parent, p); + rb_insert_color(&he->rb_node_in, root); hists__inc_nr_entries(hists, he); } out: @@ -755,11 +842,16 @@ out: static struct hist_entry *hists__find_entry(struct hists *hists, struct hist_entry *he) { - struct rb_node *n = hists->entries.rb_node; + struct rb_node *n; + + if (sort__need_collapse) + n = hists->entries_collapsed.rb_node; + else + n = hists->entries_in->rb_node; while (n) { - struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node); - int64_t cmp = hist_entry__cmp(he, iter); + struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in); + int64_t cmp = hist_entry__collapse(iter, he); if (cmp < 0) n = n->rb_left; @@ -777,15 +869,21 @@ static struct hist_entry *hists__find_entry(struct hists *hists, */ void hists__match(struct hists *leader, struct hists *other) { + struct rb_root *root; struct rb_node *nd; struct hist_entry *pos, *pair; - for (nd = rb_first(&leader->entries); nd; nd = rb_next(nd)) { - pos = rb_entry(nd, struct hist_entry, rb_node); + if (sort__need_collapse) + root = &leader->entries_collapsed; + else + root = leader->entries_in; + + for (nd = rb_first(root); nd; nd = rb_next(nd)) { + pos = rb_entry(nd, struct hist_entry, rb_node_in); pair = hists__find_entry(other, pos); if (pair) - hist__entry_add_pair(pos, pair); + hist_entry__add_pair(pair, pos); } } @@ -796,17 +894,23 @@ void hists__match(struct hists *leader, struct hists *other) */ int hists__link(struct hists *leader, struct hists *other) { + struct rb_root *root; struct rb_node *nd; struct hist_entry *pos, *pair; - for (nd = rb_first(&other->entries); nd; nd = rb_next(nd)) { - pos = rb_entry(nd, struct hist_entry, rb_node); + if (sort__need_collapse) + root = &other->entries_collapsed; + else + root = other->entries_in; + + for (nd = rb_first(root); nd; nd = rb_next(nd)) { + pos = rb_entry(nd, struct hist_entry, rb_node_in); if (!hist_entry__has_pairs(pos)) { pair = hists__add_dummy_entry(leader, pos); if (pair == NULL) return -1; - hist__entry_add_pair(pair, pos); + hist_entry__add_pair(pos, pair); } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 8b091a51e4a2..38624686ee9a 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -96,8 +96,10 @@ void hists__decay_entries_threaded(struct hists *hists, bool zap_user, bool zap_kernel); void hists__output_recalc_col_len(struct hists *hists, int max_rows); +void hists__inc_nr_entries(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *self, u32 type); -size_t hists__fprintf_nr_events(struct hists *self, FILE *fp); +void events_stats__inc(struct events_stats *stats, u32 type); +size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); size_t hists__fprintf(struct hists *self, bool show_header, int max_rows, int max_cols, FILE *fp); @@ -126,13 +128,19 @@ struct perf_hpp { }; struct perf_hpp_fmt { - bool cond; int (*header)(struct perf_hpp *hpp); int (*width)(struct perf_hpp *hpp); int (*color)(struct perf_hpp *hpp, struct hist_entry *he); int (*entry)(struct perf_hpp *hpp, struct hist_entry *he); + + struct list_head list; }; +extern struct list_head perf_hpp__list; + +#define perf_hpp__for_each_format(format) \ + list_for_each_entry(format, &perf_hpp__list, list) + extern struct perf_hpp_fmt perf_hpp__format[]; enum { @@ -148,14 +156,14 @@ enum { PERF_HPP__DELTA, PERF_HPP__RATIO, PERF_HPP__WEIGHTED_DIFF, - PERF_HPP__DISPL, PERF_HPP__FORMULA, PERF_HPP__MAX_INDEX }; void perf_hpp__init(void); -void perf_hpp__column_enable(unsigned col, bool enable); +void perf_hpp__column_register(struct perf_hpp_fmt *format); +void perf_hpp__column_enable(unsigned col); int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he, bool color); @@ -219,8 +227,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist __maybe_unused, unsigned int hists__sort_list_width(struct hists *self); -double perf_diff__compute_delta(struct hist_entry *he); -double perf_diff__compute_ratio(struct hist_entry *he); -s64 perf_diff__compute_wdiff(struct hist_entry *he); -int perf_diff__formula(char *buf, size_t size, struct hist_entry *he); +double perf_diff__compute_delta(struct hist_entry *he, struct hist_entry *pair); +double perf_diff__compute_ratio(struct hist_entry *he, struct hist_entry *pair); +s64 perf_diff__compute_wdiff(struct hist_entry *he, struct hist_entry *pair); +int perf_diff__formula(struct hist_entry *he, struct hist_entry *pair, + char *buf, size_t size); +double perf_diff__period_percent(struct hist_entry *he, u64 period); #endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index a55d8cf083c9..45cf10a562bd 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -14,6 +14,7 @@ #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) #define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) #define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) +#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c index 9d0740024ba8..11a8d86f7fea 100644 --- a/tools/perf/util/intlist.c +++ b/tools/perf/util/intlist.c @@ -59,16 +59,40 @@ void intlist__remove(struct intlist *ilist, struct int_node *node) struct int_node *intlist__find(struct intlist *ilist, int i) { - struct int_node *node = NULL; - struct rb_node *rb_node = rblist__find(&ilist->rblist, (void *)((long)i)); + struct int_node *node; + struct rb_node *rb_node; + if (ilist == NULL) + return NULL; + + node = NULL; + rb_node = rblist__find(&ilist->rblist, (void *)((long)i)); if (rb_node) node = container_of(rb_node, struct int_node, rb_node); return node; } -struct intlist *intlist__new(void) +static int intlist__parse_list(struct intlist *ilist, const char *s) +{ + char *sep; + int err; + + do { + long value = strtol(s, &sep, 10); + err = -EINVAL; + if (*sep != ',' && *sep != '\0') + break; + err = intlist__add(ilist, value); + if (err) + break; + s = sep + 1; + } while (*sep != '\0'); + + return err; +} + +struct intlist *intlist__new(const char *slist) { struct intlist *ilist = malloc(sizeof(*ilist)); @@ -77,9 +101,15 @@ struct intlist *intlist__new(void) ilist->rblist.node_cmp = intlist__node_cmp; ilist->rblist.node_new = intlist__node_new; ilist->rblist.node_delete = intlist__node_delete; + + if (slist && intlist__parse_list(ilist, slist)) + goto out_delete; } return ilist; +out_delete: + intlist__delete(ilist); + return NULL; } void intlist__delete(struct intlist *ilist) diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index 6d63ab90db50..62351dad848f 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h @@ -15,7 +15,7 @@ struct intlist { struct rblist rblist; }; -struct intlist *intlist__new(void); +struct intlist *intlist__new(const char *slist); void intlist__delete(struct intlist *ilist); void intlist__remove(struct intlist *ilist, struct int_node *in); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1f09d0581e6b..efdb38e65a92 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1,10 +1,15 @@ +#include "callchain.h" #include "debug.h" #include "event.h" +#include "evsel.h" +#include "hist.h" #include "machine.h" #include "map.h" +#include "sort.h" #include "strlist.h" #include "thread.h" #include <stdbool.h> +#include "unwind.h" int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { @@ -48,6 +53,29 @@ static void dsos__delete(struct list_head *dsos) } } +void machine__delete_dead_threads(struct machine *machine) +{ + struct thread *n, *t; + + list_for_each_entry_safe(t, n, &machine->dead_threads, node) { + list_del(&t->node); + thread__delete(t); + } +} + +void machine__delete_threads(struct machine *machine) +{ + struct rb_node *nd = rb_first(&machine->threads); + + while (nd) { + struct thread *t = rb_entry(nd, struct thread, rb_node); + + rb_erase(&t->rb_node, &machine->threads); + nd = rb_next(nd); + thread__delete(t); + } +} + void machine__exit(struct machine *machine) { map_groups__exit(&machine->kmaps); @@ -63,10 +91,22 @@ void machine__delete(struct machine *machine) free(machine); } -struct machine *machines__add(struct rb_root *machines, pid_t pid, +void machines__init(struct machines *machines) +{ + machine__init(&machines->host, "", HOST_KERNEL_ID); + machines->guests = RB_ROOT; +} + +void machines__exit(struct machines *machines) +{ + machine__exit(&machines->host); + /* XXX exit guest */ +} + +struct machine *machines__add(struct machines *machines, pid_t pid, const char *root_dir) { - struct rb_node **p = &machines->rb_node; + struct rb_node **p = &machines->guests.rb_node; struct rb_node *parent = NULL; struct machine *pos, *machine = malloc(sizeof(*machine)); @@ -88,18 +128,21 @@ struct machine *machines__add(struct rb_root *machines, pid_t pid, } rb_link_node(&machine->rb_node, parent, p); - rb_insert_color(&machine->rb_node, machines); + rb_insert_color(&machine->rb_node, &machines->guests); return machine; } -struct machine *machines__find(struct rb_root *machines, pid_t pid) +struct machine *machines__find(struct machines *machines, pid_t pid) { - struct rb_node **p = &machines->rb_node; + struct rb_node **p = &machines->guests.rb_node; struct rb_node *parent = NULL; struct machine *machine; struct machine *default_machine = NULL; + if (pid == HOST_KERNEL_ID) + return &machines->host; + while (*p != NULL) { parent = *p; machine = rb_entry(parent, struct machine, rb_node); @@ -116,7 +159,7 @@ struct machine *machines__find(struct rb_root *machines, pid_t pid) return default_machine; } -struct machine *machines__findnew(struct rb_root *machines, pid_t pid) +struct machine *machines__findnew(struct machines *machines, pid_t pid) { char path[PATH_MAX]; const char *root_dir = ""; @@ -150,12 +193,12 @@ out: return machine; } -void machines__process(struct rb_root *machines, - machine__process_t process, void *data) +void machines__process_guests(struct machines *machines, + machine__process_t process, void *data) { struct rb_node *nd; - for (nd = rb_first(machines); nd; nd = rb_next(nd)) { + for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); process(pos, data); } @@ -175,12 +218,14 @@ char *machine__mmap_name(struct machine *machine, char *bf, size_t size) return bf; } -void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size) +void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) { struct rb_node *node; struct machine *machine; - for (node = rb_first(machines); node; node = rb_next(node)) { + machines->host.id_hdr_size = id_hdr_size; + + for (node = rb_first(&machines->guests); node; node = rb_next(node)) { machine = rb_entry(node, struct machine, rb_node); machine->id_hdr_size = id_hdr_size; } @@ -264,6 +309,537 @@ int machine__process_lost_event(struct machine *machine __maybe_unused, return 0; } +struct map *machine__new_module(struct machine *machine, u64 start, + const char *filename) +{ + struct map *map; + struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename); + + if (dso == NULL) + return NULL; + + map = map__new2(start, dso, MAP__FUNCTION); + if (map == NULL) + return NULL; + + if (machine__is_host(machine)) + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; + else + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + map_groups__insert(&machine->kmaps, map); + return map; +} + +size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) +{ + struct rb_node *nd; + size_t ret = __dsos__fprintf(&machines->host.kernel_dsos, fp) + + __dsos__fprintf(&machines->host.user_dsos, fp); + + for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); + ret += __dsos__fprintf(&pos->kernel_dsos, fp); + ret += __dsos__fprintf(&pos->user_dsos, fp); + } + + return ret; +} + +size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp, + bool (skip)(struct dso *dso, int parm), int parm) +{ + return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, skip, parm) + + __dsos__fprintf_buildid(&machine->user_dsos, fp, skip, parm); +} + +size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, + bool (skip)(struct dso *dso, int parm), int parm) +{ + struct rb_node *nd; + size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm); + + for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); + ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm); + } + return ret; +} + +size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) +{ + int i; + size_t printed = 0; + struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso; + + if (kdso->has_build_id) { + char filename[PATH_MAX]; + if (dso__build_id_filename(kdso, filename, sizeof(filename))) + printed += fprintf(fp, "[0] %s\n", filename); + } + + for (i = 0; i < vmlinux_path__nr_entries; ++i) + printed += fprintf(fp, "[%d] %s\n", + i + kdso->has_build_id, vmlinux_path[i]); + + return printed; +} + +size_t machine__fprintf(struct machine *machine, FILE *fp) +{ + size_t ret = 0; + struct rb_node *nd; + + for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { + struct thread *pos = rb_entry(nd, struct thread, rb_node); + + ret += thread__fprintf(pos, fp); + } + + return ret; +} + +static struct dso *machine__get_kernel(struct machine *machine) +{ + const char *vmlinux_name = NULL; + struct dso *kernel; + + if (machine__is_host(machine)) { + vmlinux_name = symbol_conf.vmlinux_name; + if (!vmlinux_name) + vmlinux_name = "[kernel.kallsyms]"; + + kernel = dso__kernel_findnew(machine, vmlinux_name, + "[kernel]", + DSO_TYPE_KERNEL); + } else { + char bf[PATH_MAX]; + + if (machine__is_default_guest(machine)) + vmlinux_name = symbol_conf.default_guest_vmlinux_name; + if (!vmlinux_name) + vmlinux_name = machine__mmap_name(machine, bf, + sizeof(bf)); + + kernel = dso__kernel_findnew(machine, vmlinux_name, + "[guest.kernel]", + DSO_TYPE_GUEST_KERNEL); + } + + if (kernel != NULL && (!kernel->has_build_id)) + dso__read_running_kernel_build_id(kernel, machine); + + return kernel; +} + +struct process_args { + u64 start; +}; + +static int symbol__in_kernel(void *arg, const char *name, + char type __maybe_unused, u64 start) +{ + struct process_args *args = arg; + + if (strchr(name, '[')) + return 0; + + args->start = start; + return 1; +} + +/* Figure out the start address of kernel map from /proc/kallsyms */ +static u64 machine__get_kernel_start_addr(struct machine *machine) +{ + const char *filename; + char path[PATH_MAX]; + struct process_args args; + + if (machine__is_host(machine)) { + filename = "/proc/kallsyms"; + } else { + if (machine__is_default_guest(machine)) + filename = (char *)symbol_conf.default_guest_kallsyms; + else { + sprintf(path, "%s/proc/kallsyms", machine->root_dir); + filename = path; + } + } + + if (symbol__restricted_filename(filename, "/proc/kallsyms")) + return 0; + + if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0) + return 0; + + return args.start; +} + +int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) +{ + enum map_type type; + u64 start = machine__get_kernel_start_addr(machine); + + for (type = 0; type < MAP__NR_TYPES; ++type) { + struct kmap *kmap; + + machine->vmlinux_maps[type] = map__new2(start, kernel, type); + if (machine->vmlinux_maps[type] == NULL) + return -1; + + machine->vmlinux_maps[type]->map_ip = + machine->vmlinux_maps[type]->unmap_ip = + identity__map_ip; + kmap = map__kmap(machine->vmlinux_maps[type]); + kmap->kmaps = &machine->kmaps; + map_groups__insert(&machine->kmaps, + machine->vmlinux_maps[type]); + } + + return 0; +} + +void machine__destroy_kernel_maps(struct machine *machine) +{ + enum map_type type; + + for (type = 0; type < MAP__NR_TYPES; ++type) { + struct kmap *kmap; + + if (machine->vmlinux_maps[type] == NULL) + continue; + + kmap = map__kmap(machine->vmlinux_maps[type]); + map_groups__remove(&machine->kmaps, + machine->vmlinux_maps[type]); + if (kmap->ref_reloc_sym) { + /* + * ref_reloc_sym is shared among all maps, so free just + * on one of them. + */ + if (type == MAP__FUNCTION) { + free((char *)kmap->ref_reloc_sym->name); + kmap->ref_reloc_sym->name = NULL; + free(kmap->ref_reloc_sym); + } + kmap->ref_reloc_sym = NULL; + } + + map__delete(machine->vmlinux_maps[type]); + machine->vmlinux_maps[type] = NULL; + } +} + +int machines__create_guest_kernel_maps(struct machines *machines) +{ + int ret = 0; + struct dirent **namelist = NULL; + int i, items = 0; + char path[PATH_MAX]; + pid_t pid; + char *endp; + + if (symbol_conf.default_guest_vmlinux_name || + symbol_conf.default_guest_modules || + symbol_conf.default_guest_kallsyms) { + machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID); + } + + if (symbol_conf.guestmount) { + items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL); + if (items <= 0) + return -ENOENT; + for (i = 0; i < items; i++) { + if (!isdigit(namelist[i]->d_name[0])) { + /* Filter out . and .. */ + continue; + } + pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10); + if ((*endp != '\0') || + (endp == namelist[i]->d_name) || + (errno == ERANGE)) { + pr_debug("invalid directory (%s). Skipping.\n", + namelist[i]->d_name); + continue; + } + sprintf(path, "%s/%s/proc/kallsyms", + symbol_conf.guestmount, + namelist[i]->d_name); + ret = access(path, R_OK); + if (ret) { + pr_debug("Can't access file %s\n", path); + goto failure; + } + machines__create_kernel_maps(machines, pid); + } +failure: + free(namelist); + } + + return ret; +} + +void machines__destroy_kernel_maps(struct machines *machines) +{ + struct rb_node *next = rb_first(&machines->guests); + + machine__destroy_kernel_maps(&machines->host); + + while (next) { + struct machine *pos = rb_entry(next, struct machine, rb_node); + + next = rb_next(&pos->rb_node); + rb_erase(&pos->rb_node, &machines->guests); + machine__delete(pos); + } +} + +int machines__create_kernel_maps(struct machines *machines, pid_t pid) +{ + struct machine *machine = machines__findnew(machines, pid); + + if (machine == NULL) + return -1; + + return machine__create_kernel_maps(machine); +} + +int machine__load_kallsyms(struct machine *machine, const char *filename, + enum map_type type, symbol_filter_t filter) +{ + struct map *map = machine->vmlinux_maps[type]; + int ret = dso__load_kallsyms(map->dso, filename, map, filter); + + if (ret > 0) { + dso__set_loaded(map->dso, type); + /* + * Since /proc/kallsyms will have multiple sessions for the + * kernel, with modules between them, fixup the end of all + * sections. + */ + __map_groups__fixup_end(&machine->kmaps, type); + } + + return ret; +} + +int machine__load_vmlinux_path(struct machine *machine, enum map_type type, + symbol_filter_t filter) +{ + struct map *map = machine->vmlinux_maps[type]; + int ret = dso__load_vmlinux_path(map->dso, map, filter); + + if (ret > 0) { + dso__set_loaded(map->dso, type); + map__reloc_vmlinux(map); + } + + return ret; +} + +static void map_groups__fixup_end(struct map_groups *mg) +{ + int i; + for (i = 0; i < MAP__NR_TYPES; ++i) + __map_groups__fixup_end(mg, i); +} + +static char *get_kernel_version(const char *root_dir) +{ + char version[PATH_MAX]; + FILE *file; + char *name, *tmp; + const char *prefix = "Linux version "; + + sprintf(version, "%s/proc/version", root_dir); + file = fopen(version, "r"); + if (!file) + return NULL; + + version[0] = '\0'; + tmp = fgets(version, sizeof(version), file); + fclose(file); + + name = strstr(version, prefix); + if (!name) + return NULL; + name += strlen(prefix); + tmp = strchr(name, ' '); + if (tmp) + *tmp = '\0'; + + return strdup(name); +} + +static int map_groups__set_modules_path_dir(struct map_groups *mg, + const char *dir_name) +{ + struct dirent *dent; + DIR *dir = opendir(dir_name); + int ret = 0; + + if (!dir) { + pr_debug("%s: cannot open %s dir\n", __func__, dir_name); + return -1; + } + + while ((dent = readdir(dir)) != NULL) { + char path[PATH_MAX]; + struct stat st; + + /*sshfs might return bad dent->d_type, so we have to stat*/ + snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); + if (stat(path, &st)) + continue; + + if (S_ISDIR(st.st_mode)) { + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + ret = map_groups__set_modules_path_dir(mg, path); + if (ret < 0) + goto out; + } else { + char *dot = strrchr(dent->d_name, '.'), + dso_name[PATH_MAX]; + struct map *map; + char *long_name; + + if (dot == NULL || strcmp(dot, ".ko")) + continue; + snprintf(dso_name, sizeof(dso_name), "[%.*s]", + (int)(dot - dent->d_name), dent->d_name); + + strxfrchar(dso_name, '-', '_'); + map = map_groups__find_by_name(mg, MAP__FUNCTION, + dso_name); + if (map == NULL) + continue; + + long_name = strdup(path); + if (long_name == NULL) { + ret = -1; + goto out; + } + dso__set_long_name(map->dso, long_name); + map->dso->lname_alloc = 1; + dso__kernel_module_get_build_id(map->dso, ""); + } + } + +out: + closedir(dir); + return ret; +} + +static int machine__set_modules_path(struct machine *machine) +{ + char *version; + char modules_path[PATH_MAX]; + + version = get_kernel_version(machine->root_dir); + if (!version) + return -1; + + snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel", + machine->root_dir, version); + free(version); + + return map_groups__set_modules_path_dir(&machine->kmaps, modules_path); +} + +static int machine__create_modules(struct machine *machine) +{ + char *line = NULL; + size_t n; + FILE *file; + struct map *map; + const char *modules; + char path[PATH_MAX]; + + if (machine__is_default_guest(machine)) + modules = symbol_conf.default_guest_modules; + else { + sprintf(path, "%s/proc/modules", machine->root_dir); + modules = path; + } + + if (symbol__restricted_filename(path, "/proc/modules")) + return -1; + + file = fopen(modules, "r"); + if (file == NULL) + return -1; + + while (!feof(file)) { + char name[PATH_MAX]; + u64 start; + char *sep; + int line_len; + + line_len = getline(&line, &n, file); + if (line_len < 0) + break; + + if (!line) + goto out_failure; + + line[--line_len] = '\0'; /* \n */ + + sep = strrchr(line, 'x'); + if (sep == NULL) + continue; + + hex2u64(sep + 1, &start); + + sep = strchr(line, ' '); + if (sep == NULL) + continue; + + *sep = '\0'; + + snprintf(name, sizeof(name), "[%s]", line); + map = machine__new_module(machine, start, name); + if (map == NULL) + goto out_delete_line; + dso__kernel_module_get_build_id(map->dso, machine->root_dir); + } + + free(line); + fclose(file); + + return machine__set_modules_path(machine); + +out_delete_line: + free(line); +out_failure: + return -1; +} + +int machine__create_kernel_maps(struct machine *machine) +{ + struct dso *kernel = machine__get_kernel(machine); + + if (kernel == NULL || + __machine__create_kernel_maps(machine, kernel) < 0) + return -1; + + if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { + if (machine__is_host(machine)) + pr_debug("Problems creating module maps, " + "continuing anyway...\n"); + else + pr_debug("Problems creating module maps for guest %d, " + "continuing anyway...\n", machine->pid); + } + + /* + * Now that we have all the maps created, just set the ->end of them: + */ + map_groups__fixup_end(&machine->kmaps); + return 0; +} + static void machine__set_kernel_mmap_len(struct machine *machine, union perf_event *event) { @@ -462,3 +1038,189 @@ int machine__process_event(struct machine *machine, union perf_event *event) return ret; } + +void machine__remove_thread(struct machine *machine, struct thread *th) +{ + machine->last_match = NULL; + rb_erase(&th->rb_node, &machine->threads); + /* + * We may have references to this thread, for instance in some hist_entry + * instances, so just move them to a separate list. + */ + list_add_tail(&th->node, &machine->dead_threads); +} + +static bool symbol__match_parent_regex(struct symbol *sym) +{ + if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) + return 1; + + return 0; +} + +static const u8 cpumodes[] = { + PERF_RECORD_MISC_USER, + PERF_RECORD_MISC_KERNEL, + PERF_RECORD_MISC_GUEST_USER, + PERF_RECORD_MISC_GUEST_KERNEL +}; +#define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) + +static void ip__resolve_ams(struct machine *machine, struct thread *thread, + struct addr_map_symbol *ams, + u64 ip) +{ + struct addr_location al; + size_t i; + u8 m; + + memset(&al, 0, sizeof(al)); + + for (i = 0; i < NCPUMODES; i++) { + m = cpumodes[i]; + /* + * We cannot use the header.misc hint to determine whether a + * branch stack address is user, kernel, guest, hypervisor. + * Branches may straddle the kernel/user/hypervisor boundaries. + * Thus, we have to try consecutively until we find a match + * or else, the symbol is unknown + */ + thread__find_addr_location(thread, machine, m, MAP__FUNCTION, + ip, &al, NULL); + if (al.sym) + goto found; + } +found: + ams->addr = ip; + ams->al_addr = al.addr; + ams->sym = al.sym; + ams->map = al.map; +} + +struct branch_info *machine__resolve_bstack(struct machine *machine, + struct thread *thr, + struct branch_stack *bs) +{ + struct branch_info *bi; + unsigned int i; + + bi = calloc(bs->nr, sizeof(struct branch_info)); + if (!bi) + return NULL; + + for (i = 0; i < bs->nr; i++) { + ip__resolve_ams(machine, thr, &bi[i].to, bs->entries[i].to); + ip__resolve_ams(machine, thr, &bi[i].from, bs->entries[i].from); + bi[i].flags = bs->entries[i].flags; + } + return bi; +} + +static int machine__resolve_callchain_sample(struct machine *machine, + struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent) + +{ + u8 cpumode = PERF_RECORD_MISC_USER; + unsigned int i; + int err; + + callchain_cursor_reset(&callchain_cursor); + + if (chain->nr > PERF_MAX_STACK_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + + for (i = 0; i < chain->nr; i++) { + u64 ip; + struct addr_location al; + + if (callchain_param.order == ORDER_CALLEE) + ip = chain->ips[i]; + else + ip = chain->ips[chain->nr - i - 1]; + + if (ip >= PERF_CONTEXT_MAX) { + switch (ip) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 0; + } + continue; + } + + al.filtered = false; + thread__find_addr_location(thread, machine, cpumode, + MAP__FUNCTION, ip, &al, NULL); + if (al.sym != NULL) { + if (sort__has_parent && !*parent && + symbol__match_parent_regex(al.sym)) + *parent = al.sym; + if (!symbol_conf.use_callchain) + break; + } + + err = callchain_cursor_append(&callchain_cursor, + ip, al.map, al.sym); + if (err) + return err; + } + + return 0; +} + +static int unwind_entry(struct unwind_entry *entry, void *arg) +{ + struct callchain_cursor *cursor = arg; + return callchain_cursor_append(cursor, entry->ip, + entry->map, entry->sym); +} + +int machine__resolve_callchain(struct machine *machine, + struct perf_evsel *evsel, + struct thread *thread, + struct perf_sample *sample, + struct symbol **parent) + +{ + int ret; + + callchain_cursor_reset(&callchain_cursor); + + ret = machine__resolve_callchain_sample(machine, thread, + sample->callchain, parent); + if (ret) + return ret; + + /* Can we do dwarf post unwind? */ + if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) && + (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER))) + return 0; + + /* Bail out if nothing was captured. */ + if ((!sample->user_regs.regs) || + (!sample->user_stack.size)) + return 0; + + return unwind__get_entries(unwind_entry, &callchain_cursor, machine, + thread, evsel->attr.sample_regs_user, + sample); + +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index b7cde7467d55..5ac5892f2326 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -47,23 +47,32 @@ int machine__process_event(struct machine *machine, union perf_event *event); typedef void (*machine__process_t)(struct machine *machine, void *data); -void machines__process(struct rb_root *machines, - machine__process_t process, void *data); +struct machines { + struct machine host; + struct rb_root guests; +}; + +void machines__init(struct machines *machines); +void machines__exit(struct machines *machines); -struct machine *machines__add(struct rb_root *machines, pid_t pid, +void machines__process_guests(struct machines *machines, + machine__process_t process, void *data); + +struct machine *machines__add(struct machines *machines, pid_t pid, const char *root_dir); -struct machine *machines__find_host(struct rb_root *machines); -struct machine *machines__find(struct rb_root *machines, pid_t pid); -struct machine *machines__findnew(struct rb_root *machines, pid_t pid); +struct machine *machines__find_host(struct machines *machines); +struct machine *machines__find(struct machines *machines, pid_t pid); +struct machine *machines__findnew(struct machines *machines, pid_t pid); -void machines__set_id_hdr_size(struct rb_root *machines, u16 id_hdr_size); +void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); char *machine__mmap_name(struct machine *machine, char *bf, size_t size); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); +void machine__delete_dead_threads(struct machine *machine); +void machine__delete_threads(struct machine *machine); void machine__delete(struct machine *machine); - struct branch_info *machine__resolve_bstack(struct machine *machine, struct thread *thread, struct branch_stack *bs); @@ -129,19 +138,19 @@ int machine__load_kallsyms(struct machine *machine, const char *filename, int machine__load_vmlinux_path(struct machine *machine, enum map_type type, symbol_filter_t filter); -size_t machine__fprintf_dsos_buildid(struct machine *machine, - FILE *fp, bool with_hits); -size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp); -size_t machines__fprintf_dsos_buildid(struct rb_root *machines, - FILE *fp, bool with_hits); +size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp, + bool (skip)(struct dso *dso, int parm), int parm); +size_t machines__fprintf_dsos(struct machines *machines, FILE *fp); +size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, + bool (skip)(struct dso *dso, int parm), int parm); void machine__destroy_kernel_maps(struct machine *machine); int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel); int machine__create_kernel_maps(struct machine *machine); -int machines__create_kernel_maps(struct rb_root *machines, pid_t pid); -int machines__create_guest_kernel_maps(struct rb_root *machines); -void machines__destroy_guest_kernel_maps(struct rb_root *machines); +int machines__create_kernel_maps(struct machines *machines, pid_t pid); +int machines__create_guest_kernel_maps(struct machines *machines); +void machines__destroy_kernel_maps(struct machines *machines); size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 0328d45c4f2a..6fcb9de62340 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -11,6 +11,7 @@ #include "strlist.h" #include "vdso.h" #include "build-id.h" +#include <linux/string.h> const char *map_type__name[MAP__NR_TYPES] = { [MAP__FUNCTION] = "Functions", @@ -19,7 +20,8 @@ const char *map_type__name[MAP__NR_TYPES] = { static inline int is_anon_memory(const char *filename) { - return strcmp(filename, "//anon") == 0; + return !strcmp(filename, "//anon") || + !strcmp(filename, "/anon_hugepage (deleted)"); } static inline int is_no_dso_memory(const char *filename) @@ -28,29 +30,29 @@ static inline int is_no_dso_memory(const char *filename) !strcmp(filename, "[heap]"); } -void map__init(struct map *self, enum map_type type, +void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso) { - self->type = type; - self->start = start; - self->end = end; - self->pgoff = pgoff; - self->dso = dso; - self->map_ip = map__map_ip; - self->unmap_ip = map__unmap_ip; - RB_CLEAR_NODE(&self->rb_node); - self->groups = NULL; - self->referenced = false; - self->erange_warned = false; + map->type = type; + map->start = start; + map->end = end; + map->pgoff = pgoff; + map->dso = dso; + map->map_ip = map__map_ip; + map->unmap_ip = map__unmap_ip; + RB_CLEAR_NODE(&map->rb_node); + map->groups = NULL; + map->referenced = false; + map->erange_warned = false; } struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, char *filename, enum map_type type) { - struct map *self = malloc(sizeof(*self)); + struct map *map = malloc(sizeof(*map)); - if (self != NULL) { + if (map != NULL) { char newfilename[PATH_MAX]; struct dso *dso; int anon, no_dso, vdso; @@ -73,10 +75,10 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, if (dso == NULL) goto out_delete; - map__init(self, type, start, start + len, pgoff, dso); + map__init(map, type, start, start + len, pgoff, dso); if (anon || no_dso) { - self->map_ip = self->unmap_ip = identity__map_ip; + map->map_ip = map->unmap_ip = identity__map_ip; /* * Set memory without DSO as loaded. All map__find_* @@ -84,12 +86,12 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, * unnecessary map__load warning. */ if (no_dso) - dso__set_loaded(dso, self->type); + dso__set_loaded(dso, map->type); } } - return self; + return map; out_delete: - free(self); + free(map); return NULL; } @@ -112,48 +114,48 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return map; } -void map__delete(struct map *self) +void map__delete(struct map *map) { - free(self); + free(map); } -void map__fixup_start(struct map *self) +void map__fixup_start(struct map *map) { - struct rb_root *symbols = &self->dso->symbols[self->type]; + struct rb_root *symbols = &map->dso->symbols[map->type]; struct rb_node *nd = rb_first(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); - self->start = sym->start; + map->start = sym->start; } } -void map__fixup_end(struct map *self) +void map__fixup_end(struct map *map) { - struct rb_root *symbols = &self->dso->symbols[self->type]; + struct rb_root *symbols = &map->dso->symbols[map->type]; struct rb_node *nd = rb_last(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); - self->end = sym->end; + map->end = sym->end; } } #define DSO__DELETED "(deleted)" -int map__load(struct map *self, symbol_filter_t filter) +int map__load(struct map *map, symbol_filter_t filter) { - const char *name = self->dso->long_name; + const char *name = map->dso->long_name; int nr; - if (dso__loaded(self->dso, self->type)) + if (dso__loaded(map->dso, map->type)) return 0; - nr = dso__load(self->dso, self, filter); + nr = dso__load(map->dso, map, filter); if (nr < 0) { - if (self->dso->has_build_id) { + if (map->dso->has_build_id) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - build_id__sprintf(self->dso->build_id, - sizeof(self->dso->build_id), + build_id__sprintf(map->dso->build_id, + sizeof(map->dso->build_id), sbuild_id); pr_warning("%s with build id %s not found", name, sbuild_id); @@ -183,43 +185,36 @@ int map__load(struct map *self, symbol_filter_t filter) * Only applies to the kernel, as its symtabs aren't relative like the * module ones. */ - if (self->dso->kernel) - map__reloc_vmlinux(self); + if (map->dso->kernel) + map__reloc_vmlinux(map); return 0; } -struct symbol *map__find_symbol(struct map *self, u64 addr, +struct symbol *map__find_symbol(struct map *map, u64 addr, symbol_filter_t filter) { - if (map__load(self, filter) < 0) + if (map__load(map, filter) < 0) return NULL; - return dso__find_symbol(self->dso, self->type, addr); + return dso__find_symbol(map->dso, map->type, addr); } -struct symbol *map__find_symbol_by_name(struct map *self, const char *name, +struct symbol *map__find_symbol_by_name(struct map *map, const char *name, symbol_filter_t filter) { - if (map__load(self, filter) < 0) + if (map__load(map, filter) < 0) return NULL; - if (!dso__sorted_by_name(self->dso, self->type)) - dso__sort_by_name(self->dso, self->type); + if (!dso__sorted_by_name(map->dso, map->type)) + dso__sort_by_name(map->dso, map->type); - return dso__find_symbol_by_name(self->dso, self->type, name); + return dso__find_symbol_by_name(map->dso, map->type, name); } -struct map *map__clone(struct map *self) +struct map *map__clone(struct map *map) { - struct map *map = malloc(sizeof(*self)); - - if (!map) - return NULL; - - memcpy(map, self, sizeof(*self)); - - return map; + return memdup(map, sizeof(*map)); } int map__overlap(struct map *l, struct map *r) @@ -236,10 +231,10 @@ int map__overlap(struct map *l, struct map *r) return 0; } -size_t map__fprintf(struct map *self, FILE *fp) +size_t map__fprintf(struct map *map, FILE *fp) { return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n", - self->start, self->end, self->pgoff, self->dso->name); + map->start, map->end, map->pgoff, map->dso->name); } size_t map__fprintf_dsoname(struct map *map, FILE *fp) @@ -527,9 +522,9 @@ static u64 map__reloc_unmap_ip(struct map *map, u64 ip) return ip - (s64)map->pgoff; } -void map__reloc_vmlinux(struct map *self) +void map__reloc_vmlinux(struct map *map) { - struct kmap *kmap = map__kmap(self); + struct kmap *kmap = map__kmap(map); s64 reloc; if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->unrelocated_addr) @@ -541,9 +536,9 @@ void map__reloc_vmlinux(struct map *self) if (!reloc) return; - self->map_ip = map__reloc_map_ip; - self->unmap_ip = map__reloc_unmap_ip; - self->pgoff = reloc; + map->map_ip = map__reloc_map_ip; + map->unmap_ip = map__reloc_unmap_ip; + map->pgoff = reloc; } void maps__insert(struct rb_root *maps, struct map *map) @@ -566,9 +561,9 @@ void maps__insert(struct rb_root *maps, struct map *map) rb_insert_color(&map->rb_node, maps); } -void maps__remove(struct rb_root *self, struct map *map) +void maps__remove(struct rb_root *maps, struct map *map) { - rb_erase(&map->rb_node, self); + rb_erase(&map->rb_node, maps); } struct map *maps__find(struct rb_root *maps, u64 ip) diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index bcb39e2a6965..a887f2c9dfbb 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -57,9 +57,9 @@ struct map_groups { struct machine *machine; }; -static inline struct kmap *map__kmap(struct map *self) +static inline struct kmap *map__kmap(struct map *map) { - return (struct kmap *)(self + 1); + return (struct kmap *)(map + 1); } static inline u64 map__map_ip(struct map *map, u64 ip) @@ -85,27 +85,27 @@ struct symbol; typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); -void map__init(struct map *self, enum map_type type, +void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, char *filename, enum map_type type); struct map *map__new2(u64 start, struct dso *dso, enum map_type type); -void map__delete(struct map *self); -struct map *map__clone(struct map *self); +void map__delete(struct map *map); +struct map *map__clone(struct map *map); int map__overlap(struct map *l, struct map *r); -size_t map__fprintf(struct map *self, FILE *fp); +size_t map__fprintf(struct map *map, FILE *fp); size_t map__fprintf_dsoname(struct map *map, FILE *fp); -int map__load(struct map *self, symbol_filter_t filter); -struct symbol *map__find_symbol(struct map *self, +int map__load(struct map *map, symbol_filter_t filter); +struct symbol *map__find_symbol(struct map *map, u64 addr, symbol_filter_t filter); -struct symbol *map__find_symbol_by_name(struct map *self, const char *name, +struct symbol *map__find_symbol_by_name(struct map *map, const char *name, symbol_filter_t filter); -void map__fixup_start(struct map *self); -void map__fixup_end(struct map *self); +void map__fixup_start(struct map *map); +void map__fixup_end(struct map *map); -void map__reloc_vmlinux(struct map *self); +void map__reloc_vmlinux(struct map *map); size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, int verbose, FILE *fp); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2d8d53bec17e..c84f48cf9678 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -380,8 +380,8 @@ static int add_tracepoint(struct list_head **listp, int *idx, return 0; } -static int add_tracepoint_multi(struct list_head **list, int *idx, - char *sys_name, char *evt_name) +static int add_tracepoint_multi_event(struct list_head **list, int *idx, + char *sys_name, char *evt_name) { char evt_path[MAXPATHLEN]; struct dirent *evt_ent; @@ -408,6 +408,47 @@ static int add_tracepoint_multi(struct list_head **list, int *idx, ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name); } + closedir(evt_dir); + return ret; +} + +static int add_tracepoint_event(struct list_head **list, int *idx, + char *sys_name, char *evt_name) +{ + return strpbrk(evt_name, "*?") ? + add_tracepoint_multi_event(list, idx, sys_name, evt_name) : + add_tracepoint(list, idx, sys_name, evt_name); +} + +static int add_tracepoint_multi_sys(struct list_head **list, int *idx, + char *sys_name, char *evt_name) +{ + struct dirent *events_ent; + DIR *events_dir; + int ret = 0; + + events_dir = opendir(tracing_events_path); + if (!events_dir) { + perror("Can't open event dir"); + return -1; + } + + while (!ret && (events_ent = readdir(events_dir))) { + if (!strcmp(events_ent->d_name, ".") + || !strcmp(events_ent->d_name, "..") + || !strcmp(events_ent->d_name, "enable") + || !strcmp(events_ent->d_name, "header_event") + || !strcmp(events_ent->d_name, "header_page")) + continue; + + if (!strglobmatch(events_ent->d_name, sys_name)) + continue; + + ret = add_tracepoint_event(list, idx, events_ent->d_name, + evt_name); + } + + closedir(events_dir); return ret; } @@ -420,9 +461,10 @@ int parse_events_add_tracepoint(struct list_head **list, int *idx, if (ret) return ret; - return strpbrk(event, "*?") ? - add_tracepoint_multi(list, idx, sys, event) : - add_tracepoint(list, idx, sys, event); + if (strpbrk(sys, "*?")) + return add_tracepoint_multi_sys(list, idx, sys, event); + else + return add_tracepoint_event(list, idx, sys, event); } static int @@ -492,7 +534,7 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, } static int config_term(struct perf_event_attr *attr, - struct parse_events__term *term) + struct parse_events_term *term) { #define CHECK_TYPE_VAL(type) \ do { \ @@ -537,7 +579,7 @@ do { \ static int config_attr(struct perf_event_attr *attr, struct list_head *head, int fail) { - struct parse_events__term *term; + struct parse_events_term *term; list_for_each_entry(term, head, list) if (config_term(attr, term) && fail) @@ -563,14 +605,14 @@ int parse_events_add_numeric(struct list_head **list, int *idx, return add_event(list, idx, &attr, NULL); } -static int parse_events__is_name_term(struct parse_events__term *term) +static int parse_events__is_name_term(struct parse_events_term *term) { return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; } static char *pmu_event_name(struct list_head *head_terms) { - struct parse_events__term *term; + struct parse_events_term *term; list_for_each_entry(term, head_terms, list) if (parse_events__is_name_term(term)) @@ -657,14 +699,6 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int exclude = eu | ek | eh; int exclude_GH = evsel ? evsel->exclude_GH : 0; - /* - * We are here for group and 'GH' was not set as event - * modifier and whatever event/group modifier override - * default 'GH' setup. - */ - if (evsel && !exclude_GH) - eH = eG = 0; - memset(mod, 0, sizeof(*mod)); while (*str) { @@ -814,7 +848,7 @@ static int parse_events__scanner(const char *str, void *data, int start_token) */ int parse_events_terms(struct list_head *terms, const char *str) { - struct parse_events_data__terms data = { + struct parse_events_terms data = { .terms = NULL, }; int ret; @@ -830,10 +864,9 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } -int parse_events(struct perf_evlist *evlist, const char *str, - int unset __maybe_unused) +int parse_events(struct perf_evlist *evlist, const char *str) { - struct parse_events_data__events data = { + struct parse_events_evlist data = { .list = LIST_HEAD_INIT(data.list), .idx = evlist->nr_entries, }; @@ -843,6 +876,7 @@ int parse_events(struct perf_evlist *evlist, const char *str, if (!ret) { int entries = data.idx - evlist->nr_entries; perf_evlist__splice_list_tail(evlist, &data.list, entries); + evlist->nr_groups += data.nr_groups; return 0; } @@ -858,7 +892,7 @@ int parse_events_option(const struct option *opt, const char *str, int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; - int ret = parse_events(evlist, str, unset); + int ret = parse_events(evlist, str); if (ret) { fprintf(stderr, "invalid or unsupported event: '%s'\n", str); @@ -1121,16 +1155,16 @@ void print_events(const char *event_glob, bool name_only) print_tracepoint_events(NULL, NULL, name_only); } -int parse_events__is_hardcoded_term(struct parse_events__term *term) +int parse_events__is_hardcoded_term(struct parse_events_term *term) { return term->type_term != PARSE_EVENTS__TERM_TYPE_USER; } -static int new_term(struct parse_events__term **_term, int type_val, +static int new_term(struct parse_events_term **_term, int type_val, int type_term, char *config, char *str, u64 num) { - struct parse_events__term *term; + struct parse_events_term *term; term = zalloc(sizeof(*term)); if (!term) @@ -1156,21 +1190,21 @@ static int new_term(struct parse_events__term **_term, int type_val, return 0; } -int parse_events__term_num(struct parse_events__term **term, +int parse_events_term__num(struct parse_events_term **term, int type_term, char *config, u64 num) { return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term, config, NULL, num); } -int parse_events__term_str(struct parse_events__term **term, +int parse_events_term__str(struct parse_events_term **term, int type_term, char *config, char *str) { return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term, config, str, 0); } -int parse_events__term_sym_hw(struct parse_events__term **term, +int parse_events_term__sym_hw(struct parse_events_term **term, char *config, unsigned idx) { struct event_symbol *sym; @@ -1188,8 +1222,8 @@ int parse_events__term_sym_hw(struct parse_events__term **term, (char *) "event", (char *) sym->symbol, 0); } -int parse_events__term_clone(struct parse_events__term **new, - struct parse_events__term *term) +int parse_events_term__clone(struct parse_events_term **new, + struct parse_events_term *term) { return new_term(new, term->type_val, term->type_term, term->config, term->val.str, term->val.num); @@ -1197,7 +1231,7 @@ int parse_events__term_clone(struct parse_events__term **new, void parse_events__free_terms(struct list_head *terms) { - struct parse_events__term *term, *h; + struct parse_events_term *term, *h; list_for_each_entry_safe(term, h, terms, list) free(term); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b7af80b8bdda..8a4859315fd9 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -29,8 +29,7 @@ const char *event_type(int type); extern int parse_events_option(const struct option *opt, const char *str, int unset); -extern int parse_events(struct perf_evlist *evlist, const char *str, - int unset); +extern int parse_events(struct perf_evlist *evlist, const char *str); extern int parse_events_terms(struct list_head *terms, const char *str); extern int parse_filter(const struct option *opt, const char *str, int unset); @@ -51,7 +50,7 @@ enum { PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, }; -struct parse_events__term { +struct parse_events_term { char *config; union { char *str; @@ -62,24 +61,25 @@ struct parse_events__term { struct list_head list; }; -struct parse_events_data__events { +struct parse_events_evlist { struct list_head list; int idx; + int nr_groups; }; -struct parse_events_data__terms { +struct parse_events_terms { struct list_head *terms; }; -int parse_events__is_hardcoded_term(struct parse_events__term *term); -int parse_events__term_num(struct parse_events__term **_term, +int parse_events__is_hardcoded_term(struct parse_events_term *term); +int parse_events_term__num(struct parse_events_term **_term, int type_term, char *config, u64 num); -int parse_events__term_str(struct parse_events__term **_term, +int parse_events_term__str(struct parse_events_term **_term, int type_term, char *config, char *str); -int parse_events__term_sym_hw(struct parse_events__term **term, +int parse_events_term__sym_hw(struct parse_events_term **term, char *config, unsigned idx); -int parse_events__term_clone(struct parse_events__term **new, - struct parse_events__term *term); +int parse_events_term__clone(struct parse_events_term **new, + struct parse_events_term *term); void parse_events__free_terms(struct list_head *terms); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 0f9914ae6bac..afc44c18dfe1 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -1,5 +1,4 @@ %pure-parser -%name-prefix "parse_events_" %parse-param {void *_data} %parse-param {void *scanner} %lex-param {void* scanner} @@ -23,6 +22,14 @@ do { \ YYABORT; \ } while (0) +static inc_group_count(struct list_head *list, + struct parse_events_evlist *data) +{ + /* Count groups only have more than 1 members */ + if (!list_is_last(list->next, list)) + data->nr_groups++; +} + %} %token PE_START_EVENTS PE_START_TERMS @@ -68,7 +75,7 @@ do { \ char *str; u64 num; struct list_head *head; - struct parse_events__term *term; + struct parse_events_term *term; } %% @@ -79,7 +86,7 @@ PE_START_TERMS start_terms start_events: groups { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; parse_events_update_lists($1, &data->list); } @@ -123,6 +130,7 @@ PE_NAME '{' events '}' { struct list_head *list = $3; + inc_group_count(list, _data); parse_events__set_leader($1, list); $$ = list; } @@ -131,6 +139,7 @@ PE_NAME '{' events '}' { struct list_head *list = $2; + inc_group_count(list, _data); parse_events__set_leader(NULL, list); $$ = list; } @@ -186,7 +195,7 @@ event_def: event_pmu | event_pmu: PE_NAME '/' event_config '/' { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_pmu(&list, &data->idx, $1, $3)); @@ -202,7 +211,7 @@ PE_VALUE_SYM_SW event_legacy_symbol: value_sym '/' event_config '/' { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; int type = $1 >> 16; int config = $1 & 255; @@ -215,7 +224,7 @@ value_sym '/' event_config '/' | value_sym sep_slash_dc { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; int type = $1 >> 16; int config = $1 & 255; @@ -228,7 +237,7 @@ value_sym sep_slash_dc event_legacy_cache: PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, $5)); @@ -237,7 +246,7 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT | PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, $3, NULL)); @@ -246,7 +255,7 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT | PE_NAME_CACHE_TYPE { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_cache(&list, &data->idx, $1, NULL, NULL)); @@ -256,7 +265,7 @@ PE_NAME_CACHE_TYPE event_legacy_mem: PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, @@ -266,7 +275,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc | PE_PREFIX_MEM PE_VALUE sep_dc { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_breakpoint(&list, &data->idx, @@ -277,7 +286,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc event_legacy_tracepoint: PE_NAME ':' PE_NAME { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_tracepoint(&list, &data->idx, $1, $3)); @@ -287,7 +296,7 @@ PE_NAME ':' PE_NAME event_legacy_numeric: PE_VALUE ':' PE_VALUE { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_numeric(&list, &data->idx, (u32)$1, $3, NULL)); @@ -297,7 +306,7 @@ PE_VALUE ':' PE_VALUE event_legacy_raw: PE_RAW { - struct parse_events_data__events *data = _data; + struct parse_events_evlist *data = _data; struct list_head *list = NULL; ABORT_ON(parse_events_add_numeric(&list, &data->idx, @@ -307,7 +316,7 @@ PE_RAW start_terms: event_config { - struct parse_events_data__terms *data = _data; + struct parse_events_terms *data = _data; data->terms = $1; } @@ -315,7 +324,7 @@ event_config: event_config ',' event_term { struct list_head *head = $1; - struct parse_events__term *term = $3; + struct parse_events_term *term = $3; ABORT_ON(!head); list_add_tail(&term->list, head); @@ -325,7 +334,7 @@ event_config ',' event_term event_term { struct list_head *head = malloc(sizeof(*head)); - struct parse_events__term *term = $1; + struct parse_events_term *term = $1; ABORT_ON(!head); INIT_LIST_HEAD(head); @@ -336,70 +345,70 @@ event_term event_term: PE_NAME '=' PE_NAME { - struct parse_events__term *term; + struct parse_events_term *term; - ABORT_ON(parse_events__term_str(&term, PARSE_EVENTS__TERM_TYPE_USER, + ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3)); $$ = term; } | PE_NAME '=' PE_VALUE { - struct parse_events__term *term; + struct parse_events_term *term; - ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER, + ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3)); $$ = term; } | PE_NAME '=' PE_VALUE_SYM_HW { - struct parse_events__term *term; + struct parse_events_term *term; int config = $3 & 255; - ABORT_ON(parse_events__term_sym_hw(&term, $1, config)); + ABORT_ON(parse_events_term__sym_hw(&term, $1, config)); $$ = term; } | PE_NAME { - struct parse_events__term *term; + struct parse_events_term *term; - ABORT_ON(parse_events__term_num(&term, PARSE_EVENTS__TERM_TYPE_USER, + ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, 1)); $$ = term; } | PE_VALUE_SYM_HW { - struct parse_events__term *term; + struct parse_events_term *term; int config = $1 & 255; - ABORT_ON(parse_events__term_sym_hw(&term, NULL, config)); + ABORT_ON(parse_events_term__sym_hw(&term, NULL, config)); $$ = term; } | PE_TERM '=' PE_NAME { - struct parse_events__term *term; + struct parse_events_term *term; - ABORT_ON(parse_events__term_str(&term, (int)$1, NULL, $3)); + ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3)); $$ = term; } | PE_TERM '=' PE_VALUE { - struct parse_events__term *term; + struct parse_events_term *term; - ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, $3)); + ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3)); $$ = term; } | PE_TERM { - struct parse_events__term *term; + struct parse_events_term *term; - ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, 1)); + ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1)); $$ = term; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 9bdc60c6f138..4c6f9c490a8d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,4 +1,3 @@ - #include <linux/list.h> #include <sys/types.h> #include <sys/stat.h> @@ -11,6 +10,19 @@ #include "parse-events.h" #include "cpumap.h" +struct perf_pmu_alias { + char *name; + struct list_head terms; + struct list_head list; +}; + +struct perf_pmu_format { + char *name; + int value; + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + struct list_head list; +}; + #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" int perf_pmu_parse(struct list_head *list, char *name); @@ -85,7 +97,7 @@ static int pmu_format(char *name, struct list_head *format) static int perf_pmu__new_alias(struct list_head *list, char *name, FILE *file) { - struct perf_pmu__alias *alias; + struct perf_pmu_alias *alias; char buf[256]; int ret; @@ -172,15 +184,15 @@ static int pmu_aliases(char *name, struct list_head *head) return 0; } -static int pmu_alias_terms(struct perf_pmu__alias *alias, +static int pmu_alias_terms(struct perf_pmu_alias *alias, struct list_head *terms) { - struct parse_events__term *term, *clone; + struct parse_events_term *term, *clone; LIST_HEAD(list); int ret; list_for_each_entry(term, &alias->terms, list) { - ret = parse_events__term_clone(&clone, term); + ret = parse_events_term__clone(&clone, term); if (ret) { parse_events__free_terms(&list); return ret; @@ -360,10 +372,10 @@ struct perf_pmu *perf_pmu__find(char *name) return pmu_lookup(name); } -static struct perf_pmu__format* +static struct perf_pmu_format * pmu_find_format(struct list_head *formats, char *name) { - struct perf_pmu__format *format; + struct perf_pmu_format *format; list_for_each_entry(format, formats, list) if (!strcmp(format->name, name)) @@ -403,9 +415,9 @@ static __u64 pmu_format_value(unsigned long *format, __u64 value) */ static int pmu_config_term(struct list_head *formats, struct perf_event_attr *attr, - struct parse_events__term *term) + struct parse_events_term *term) { - struct perf_pmu__format *format; + struct perf_pmu_format *format; __u64 *vp; /* @@ -450,7 +462,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms) { - struct parse_events__term *term; + struct parse_events_term *term; list_for_each_entry(term, head_terms, list) if (pmu_config_term(formats, attr, term)) @@ -471,10 +483,10 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, return perf_pmu__config_terms(&pmu->format, attr, head_terms); } -static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu, - struct parse_events__term *term) +static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, + struct parse_events_term *term) { - struct perf_pmu__alias *alias; + struct perf_pmu_alias *alias; char *name; if (parse_events__is_hardcoded_term(term)) @@ -507,8 +519,8 @@ static struct perf_pmu__alias *pmu_find_alias(struct perf_pmu *pmu, */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) { - struct parse_events__term *term, *h; - struct perf_pmu__alias *alias; + struct parse_events_term *term, *h; + struct perf_pmu_alias *alias; int ret; list_for_each_entry_safe(term, h, head_terms, list) { @@ -527,7 +539,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms) int perf_pmu__new_format(struct list_head *list, char *name, int config, unsigned long *bits) { - struct perf_pmu__format *format; + struct perf_pmu_format *format; format = zalloc(sizeof(*format)); if (!format) @@ -548,7 +560,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) if (!to) to = from; - memset(bits, 0, BITS_TO_LONGS(PERF_PMU_FORMAT_BITS)); + memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS)); for (b = from; b <= to; b++) set_bit(b, bits); } diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index a313ed76a49a..32fe55b659fa 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -12,19 +12,6 @@ enum { #define PERF_PMU_FORMAT_BITS 64 -struct perf_pmu__format { - char *name; - int value; - DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); - struct list_head list; -}; - -struct perf_pmu__alias { - char *name; - struct list_head terms; - struct list_head list; -}; - struct perf_pmu { char *name; __u32 type; @@ -42,7 +29,7 @@ int perf_pmu__config_terms(struct list_head *formats, struct list_head *head_terms); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, - struct list_head *head_terms); + struct list_head *head_terms); int perf_pmu_wrap(void); void perf_pmu_error(struct list_head *list, char *name, char const *msg); diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y index ec898047ebb9..bfd7e8509869 100644 --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -1,5 +1,4 @@ -%name-prefix "perf_pmu_" %parse-param {struct list_head *format} %parse-param {char *name} diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1daf5c14e751..be0329394d56 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -413,12 +413,12 @@ static int convert_variable_type(Dwarf_Die *vr_die, dwarf_diename(vr_die), dwarf_diename(&type)); return -EINVAL; } + if (die_get_real_type(&type, &type) == NULL) { + pr_warning("Failed to get a type" + " information.\n"); + return -ENOENT; + } if (ret == DW_TAG_pointer_type) { - if (die_get_real_type(&type, &type) == NULL) { - pr_warning("Failed to get a type" - " information.\n"); - return -ENOENT; - } while (*ref_ptr) ref_ptr = &(*ref_ptr)->next; /* Add new reference with offset +0 */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index c40c2d33199e..64536a993f4a 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -18,4 +18,5 @@ util/cgroup.c util/debugfs.c util/rblist.c util/strlist.c +util/sysfs.c ../../lib/rbtree.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index a2657fd96837..925e0c3e6d91 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1045,3 +1045,12 @@ error: if (PyErr_Occurred()) PyErr_SetString(PyExc_ImportError, "perf: Init failed!"); } + +/* + * Dummy, to avoid dragging all the test_attr infrastructure in the python + * binding. + */ +void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, + int fd, int group_fd, unsigned long flags) +{ +} diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index f80605eb1855..eacec859f299 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -292,6 +292,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, ns = nsecs - s * NSECS_PER_SEC; scripting_context->event_data = data; + scripting_context->pevent = evsel->tp_format->pevent; ENTER; SAVETMPS; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 14683dfca2ee..e87aa5d9696b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -265,6 +265,7 @@ static void python_process_tracepoint(union perf_event *perf_event ns = nsecs - s * NSECS_PER_SEC; scripting_context->event_data = data; + scripting_context->pevent = evsel->tp_format->pevent; context = PyCObject_FromVoidPtr(scripting_context, NULL); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ce6f51162386..bd85280bb6e8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -16,7 +16,6 @@ #include "cpumap.h" #include "event-parse.h" #include "perf_regs.h" -#include "unwind.h" #include "vdso.h" static int perf_session__open(struct perf_session *self, bool force) @@ -87,13 +86,12 @@ void perf_session__set_id_hdr_size(struct perf_session *session) { u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist); - session->host_machine.id_hdr_size = id_hdr_size; machines__set_id_hdr_size(&session->machines, id_hdr_size); } int perf_session__create_kernel_maps(struct perf_session *self) { - int ret = machine__create_kernel_maps(&self->host_machine); + int ret = machine__create_kernel_maps(&self->machines.host); if (ret >= 0) ret = machines__create_guest_kernel_maps(&self->machines); @@ -102,8 +100,7 @@ int perf_session__create_kernel_maps(struct perf_session *self) static void perf_session__destroy_kernel_maps(struct perf_session *self) { - machine__destroy_kernel_maps(&self->host_machine); - machines__destroy_guest_kernel_maps(&self->machines); + machines__destroy_kernel_maps(&self->machines); } struct perf_session *perf_session__new(const char *filename, int mode, @@ -128,22 +125,11 @@ struct perf_session *perf_session__new(const char *filename, int mode, goto out; memcpy(self->filename, filename, len); - /* - * On 64bit we can mmap the data file in one go. No need for tiny mmap - * slices. On 32bit we use 32MB. - */ -#if BITS_PER_LONG == 64 - self->mmap_window = ULLONG_MAX; -#else - self->mmap_window = 32 * 1024 * 1024ULL; -#endif - self->machines = RB_ROOT; self->repipe = repipe; INIT_LIST_HEAD(&self->ordered_samples.samples); INIT_LIST_HEAD(&self->ordered_samples.sample_cache); INIT_LIST_HEAD(&self->ordered_samples.to_free); - machine__init(&self->host_machine, "", HOST_KERNEL_ID); - hists__init(&self->hists); + machines__init(&self->machines); if (mode == O_RDONLY) { if (perf_session__open(self, force) < 0) @@ -171,37 +157,30 @@ out_delete: return NULL; } -static void machine__delete_dead_threads(struct machine *machine) -{ - struct thread *n, *t; - - list_for_each_entry_safe(t, n, &machine->dead_threads, node) { - list_del(&t->node); - thread__delete(t); - } -} - static void perf_session__delete_dead_threads(struct perf_session *session) { - machine__delete_dead_threads(&session->host_machine); + machine__delete_dead_threads(&session->machines.host); } -static void machine__delete_threads(struct machine *self) +static void perf_session__delete_threads(struct perf_session *session) { - struct rb_node *nd = rb_first(&self->threads); - - while (nd) { - struct thread *t = rb_entry(nd, struct thread, rb_node); - - rb_erase(&t->rb_node, &self->threads); - nd = rb_next(nd); - thread__delete(t); - } + machine__delete_threads(&session->machines.host); } -static void perf_session__delete_threads(struct perf_session *session) +static void perf_session_env__delete(struct perf_session_env *env) { - machine__delete_threads(&session->host_machine); + free(env->hostname); + free(env->os_release); + free(env->version); + free(env->arch); + free(env->cpu_desc); + free(env->cpuid); + + free(env->cmdline); + free(env->sibling_cores); + free(env->sibling_threads); + free(env->numa_nodes); + free(env->pmu_mappings); } void perf_session__delete(struct perf_session *self) @@ -209,198 +188,13 @@ void perf_session__delete(struct perf_session *self) perf_session__destroy_kernel_maps(self); perf_session__delete_dead_threads(self); perf_session__delete_threads(self); - machine__exit(&self->host_machine); + perf_session_env__delete(&self->header.env); + machines__exit(&self->machines); close(self->fd); free(self); vdso__exit(); } -void machine__remove_thread(struct machine *self, struct thread *th) -{ - self->last_match = NULL; - rb_erase(&th->rb_node, &self->threads); - /* - * We may have references to this thread, for instance in some hist_entry - * instances, so just move them to a separate list. - */ - list_add_tail(&th->node, &self->dead_threads); -} - -static bool symbol__match_parent_regex(struct symbol *sym) -{ - if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) - return 1; - - return 0; -} - -static const u8 cpumodes[] = { - PERF_RECORD_MISC_USER, - PERF_RECORD_MISC_KERNEL, - PERF_RECORD_MISC_GUEST_USER, - PERF_RECORD_MISC_GUEST_KERNEL -}; -#define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) - -static void ip__resolve_ams(struct machine *self, struct thread *thread, - struct addr_map_symbol *ams, - u64 ip) -{ - struct addr_location al; - size_t i; - u8 m; - - memset(&al, 0, sizeof(al)); - - for (i = 0; i < NCPUMODES; i++) { - m = cpumodes[i]; - /* - * We cannot use the header.misc hint to determine whether a - * branch stack address is user, kernel, guest, hypervisor. - * Branches may straddle the kernel/user/hypervisor boundaries. - * Thus, we have to try consecutively until we find a match - * or else, the symbol is unknown - */ - thread__find_addr_location(thread, self, m, MAP__FUNCTION, - ip, &al, NULL); - if (al.sym) - goto found; - } -found: - ams->addr = ip; - ams->al_addr = al.addr; - ams->sym = al.sym; - ams->map = al.map; -} - -struct branch_info *machine__resolve_bstack(struct machine *self, - struct thread *thr, - struct branch_stack *bs) -{ - struct branch_info *bi; - unsigned int i; - - bi = calloc(bs->nr, sizeof(struct branch_info)); - if (!bi) - return NULL; - - for (i = 0; i < bs->nr; i++) { - ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); - ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); - bi[i].flags = bs->entries[i].flags; - } - return bi; -} - -static int machine__resolve_callchain_sample(struct machine *machine, - struct thread *thread, - struct ip_callchain *chain, - struct symbol **parent) - -{ - u8 cpumode = PERF_RECORD_MISC_USER; - unsigned int i; - int err; - - callchain_cursor_reset(&callchain_cursor); - - if (chain->nr > PERF_MAX_STACK_DEPTH) { - pr_warning("corrupted callchain. skipping...\n"); - return 0; - } - - for (i = 0; i < chain->nr; i++) { - u64 ip; - struct addr_location al; - - if (callchain_param.order == ORDER_CALLEE) - ip = chain->ips[i]; - else - ip = chain->ips[chain->nr - i - 1]; - - if (ip >= PERF_CONTEXT_MAX) { - switch (ip) { - case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; - break; - case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; - break; - case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; - break; - default: - pr_debug("invalid callchain context: " - "%"PRId64"\n", (s64) ip); - /* - * It seems the callchain is corrupted. - * Discard all. - */ - callchain_cursor_reset(&callchain_cursor); - return 0; - } - continue; - } - - al.filtered = false; - thread__find_addr_location(thread, machine, cpumode, - MAP__FUNCTION, ip, &al, NULL); - if (al.sym != NULL) { - if (sort__has_parent && !*parent && - symbol__match_parent_regex(al.sym)) - *parent = al.sym; - if (!symbol_conf.use_callchain) - break; - } - - err = callchain_cursor_append(&callchain_cursor, - ip, al.map, al.sym); - if (err) - return err; - } - - return 0; -} - -static int unwind_entry(struct unwind_entry *entry, void *arg) -{ - struct callchain_cursor *cursor = arg; - return callchain_cursor_append(cursor, entry->ip, - entry->map, entry->sym); -} - -int machine__resolve_callchain(struct machine *machine, - struct perf_evsel *evsel, - struct thread *thread, - struct perf_sample *sample, - struct symbol **parent) - -{ - int ret; - - callchain_cursor_reset(&callchain_cursor); - - ret = machine__resolve_callchain_sample(machine, thread, - sample->callchain, parent); - if (ret) - return ret; - - /* Can we do dwarf post unwind? */ - if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) && - (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER))) - return 0; - - /* Bail out if nothing was captured. */ - if ((!sample->user_regs.regs) || - (!sample->user_stack.size)) - return 0; - - return unwind__get_entries(unwind_entry, &callchain_cursor, machine, - thread, evsel->attr.sample_regs_user, - sample); - -} - static int process_event_synth_tracing_data_stub(union perf_event *event __maybe_unused, struct perf_session *session @@ -1027,7 +821,7 @@ static struct machine * return perf_session__findnew_machine(session, pid); } - return perf_session__find_host_machine(session); + return &session->machines.host; } static int perf_session_deliver_event(struct perf_session *session, @@ -1065,11 +859,11 @@ static int perf_session_deliver_event(struct perf_session *session, case PERF_RECORD_SAMPLE: dump_sample(evsel, event, sample); if (evsel == NULL) { - ++session->hists.stats.nr_unknown_id; + ++session->stats.nr_unknown_id; return 0; } if (machine == NULL) { - ++session->hists.stats.nr_unprocessable_samples; + ++session->stats.nr_unprocessable_samples; return 0; } return tool->sample(tool, event, sample, evsel, machine); @@ -1083,7 +877,7 @@ static int perf_session_deliver_event(struct perf_session *session, return tool->exit(tool, event, sample, machine); case PERF_RECORD_LOST: if (tool->lost == perf_event__process_lost) - session->hists.stats.total_lost += event->lost.lost; + session->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); case PERF_RECORD_READ: return tool->read(tool, event, sample, evsel, machine); @@ -1092,7 +886,7 @@ static int perf_session_deliver_event(struct perf_session *session, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); default: - ++session->hists.stats.nr_unknown_events; + ++session->stats.nr_unknown_events; return -1; } } @@ -1106,8 +900,8 @@ static int perf_session__preprocess_sample(struct perf_session *session, if (!ip_callchain__valid(sample->callchain, event)) { pr_debug("call-chain problem with event, skipping it.\n"); - ++session->hists.stats.nr_invalid_chains; - session->hists.stats.total_invalid_chains += sample->period; + ++session->stats.nr_invalid_chains; + session->stats.total_invalid_chains += sample->period; return -EINVAL; } return 0; @@ -1165,7 +959,7 @@ static int perf_session__process_event(struct perf_session *session, if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; - hists__inc_nr_events(&session->hists, event->header.type); + events_stats__inc(&session->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, tool, file_offset); @@ -1201,7 +995,7 @@ void perf_event_header__bswap(struct perf_event_header *self) struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) { - return machine__findnew_thread(&session->host_machine, pid); + return machine__findnew_thread(&session->machines.host, pid); } static struct thread *perf_session__register_idle_thread(struct perf_session *self) @@ -1220,39 +1014,39 @@ static void perf_session__warn_about_errors(const struct perf_session *session, const struct perf_tool *tool) { if (tool->lost == perf_event__process_lost && - session->hists.stats.nr_events[PERF_RECORD_LOST] != 0) { + session->stats.nr_events[PERF_RECORD_LOST] != 0) { ui__warning("Processed %d events and lost %d chunks!\n\n" "Check IO/CPU overload!\n\n", - session->hists.stats.nr_events[0], - session->hists.stats.nr_events[PERF_RECORD_LOST]); + session->stats.nr_events[0], + session->stats.nr_events[PERF_RECORD_LOST]); } - if (session->hists.stats.nr_unknown_events != 0) { + if (session->stats.nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " "file generated by a more recent tool?\n\n" "If that is not the case, consider " "reporting to linux-kernel@vger.kernel.org.\n\n", - session->hists.stats.nr_unknown_events); + session->stats.nr_unknown_events); } - if (session->hists.stats.nr_unknown_id != 0) { + if (session->stats.nr_unknown_id != 0) { ui__warning("%u samples with id not present in the header\n", - session->hists.stats.nr_unknown_id); + session->stats.nr_unknown_id); } - if (session->hists.stats.nr_invalid_chains != 0) { + if (session->stats.nr_invalid_chains != 0) { ui__warning("Found invalid callchains!\n\n" "%u out of %u events were discarded for this reason.\n\n" "Consider reporting to linux-kernel@vger.kernel.org.\n\n", - session->hists.stats.nr_invalid_chains, - session->hists.stats.nr_events[PERF_RECORD_SAMPLE]); + session->stats.nr_invalid_chains, + session->stats.nr_events[PERF_RECORD_SAMPLE]); } - if (session->hists.stats.nr_unprocessable_samples != 0) { + if (session->stats.nr_unprocessable_samples != 0) { ui__warning("%u unprocessable samples recorded.\n" "Do you have a KVM guest running and not using 'perf kvm'?\n", - session->hists.stats.nr_unprocessable_samples); + session->stats.nr_unprocessable_samples); } } @@ -1369,6 +1163,18 @@ fetch_mmaped_event(struct perf_session *session, return event; } +/* + * On 64bit we can mmap the data file in one go. No need for tiny mmap + * slices. On 32bit we use 32MB. + */ +#if BITS_PER_LONG == 64 +#define MMAP_SIZE ULLONG_MAX +#define NUM_MMAPS 1 +#else +#define MMAP_SIZE (32 * 1024 * 1024ULL) +#define NUM_MMAPS 128 +#endif + int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, u64 file_size, struct perf_tool *tool) @@ -1376,7 +1182,7 @@ int __perf_session__process_events(struct perf_session *session, u64 head, page_offset, file_offset, file_pos, progress_next; int err, mmap_prot, mmap_flags, map_idx = 0; size_t mmap_size; - char *buf, *mmaps[8]; + char *buf, *mmaps[NUM_MMAPS]; union perf_event *event; uint32_t size; @@ -1391,7 +1197,7 @@ int __perf_session__process_events(struct perf_session *session, progress_next = file_size / 16; - mmap_size = session->mmap_window; + mmap_size = MMAP_SIZE; if (mmap_size > file_size) mmap_size = file_size; @@ -1526,16 +1332,13 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps, size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp) { - return __dsos__fprintf(&self->host_machine.kernel_dsos, fp) + - __dsos__fprintf(&self->host_machine.user_dsos, fp) + - machines__fprintf_dsos(&self->machines, fp); + return machines__fprintf_dsos(&self->machines, fp); } size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, FILE *fp, - bool with_hits) + bool (skip)(struct dso *dso, int parm), int parm) { - size_t ret = machine__fprintf_dsos_buildid(&self->host_machine, fp, with_hits); - return ret + machines__fprintf_dsos_buildid(&self->machines, fp, with_hits); + return machines__fprintf_dsos_buildid(&self->machines, fp, skip, parm); } size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) @@ -1543,11 +1346,11 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) struct perf_evsel *pos; size_t ret = fprintf(fp, "Aggregated stats:\n"); - ret += hists__fprintf_nr_events(&session->hists, fp); + ret += events_stats__fprintf(&session->stats, fp); list_for_each_entry(pos, &session->evlist->entries, node) { ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); - ret += hists__fprintf_nr_events(&pos->hists, fp); + ret += events_stats__fprintf(&pos->hists.stats, fp); } return ret; @@ -1559,7 +1362,7 @@ size_t perf_session__fprintf(struct perf_session *session, FILE *fp) * FIXME: Here we have to actually print all the machines in this * session, not just the host... */ - return machine__fprintf(&session->host_machine, fp); + return machine__fprintf(&session->machines.host, fp); } void perf_session__remove_thread(struct perf_session *session, @@ -1568,10 +1371,10 @@ void perf_session__remove_thread(struct perf_session *session, /* * FIXME: This one makes no sense, we need to remove the thread from * the machine it belongs to, perf_session can have many machines, so - * doing it always on ->host_machine is wrong. Fix when auditing all + * doing it always on ->machines.host is wrong. Fix when auditing all * the 'perf kvm' code. */ - machine__remove_thread(&session->host_machine, th); + machine__remove_thread(&session->machines.host, th); } struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index cea133a6bdf1..b5c0847edfa9 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -30,16 +30,10 @@ struct ordered_samples { struct perf_session { struct perf_header header; unsigned long size; - unsigned long mmap_window; - struct machine host_machine; - struct rb_root machines; + struct machines machines; struct perf_evlist *evlist; struct pevent *pevent; - /* - * FIXME: Need to split this up further, we need global - * stats + per event stats. - */ - struct hists hists; + struct events_stats stats; int fd; bool fd_pipe; bool repipe; @@ -54,7 +48,7 @@ struct perf_tool; struct perf_session *perf_session__new(const char *filename, int mode, bool force, bool repipe, struct perf_tool *tool); -void perf_session__delete(struct perf_session *self); +void perf_session__delete(struct perf_session *session); void perf_event_header__bswap(struct perf_event_header *self); @@ -81,43 +75,24 @@ void perf_session__set_id_hdr_size(struct perf_session *session); void perf_session__remove_thread(struct perf_session *self, struct thread *th); static inline -struct machine *perf_session__find_host_machine(struct perf_session *self) -{ - return &self->host_machine; -} - -static inline struct machine *perf_session__find_machine(struct perf_session *self, pid_t pid) { - if (pid == HOST_KERNEL_ID) - return &self->host_machine; return machines__find(&self->machines, pid); } static inline struct machine *perf_session__findnew_machine(struct perf_session *self, pid_t pid) { - if (pid == HOST_KERNEL_ID) - return &self->host_machine; return machines__findnew(&self->machines, pid); } -static inline -void perf_session__process_machines(struct perf_session *self, - struct perf_tool *tool, - machine__process_t process) -{ - process(&self->host_machine, tool); - return machines__process(&self->machines, process, tool); -} - struct thread *perf_session__findnew(struct perf_session *self, pid_t pid); size_t perf_session__fprintf(struct perf_session *self, FILE *fp); size_t perf_session__fprintf_dsos(struct perf_session *self, FILE *fp); -size_t perf_session__fprintf_dsos_buildid(struct perf_session *self, - FILE *fp, bool with_hits); +size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp, + bool (fn)(struct dso *dso, int parm), int parm); size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index cfd1c0feb32d..d41926cb9e3f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -60,7 +60,7 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__thread_snprintf(struct hist_entry *self, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%*s:%5d", width, + return repsep_snprintf(bf, size, "%*s:%5d", width - 6, self->thread->comm ?: "", self->thread->pid); } @@ -97,6 +97,16 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); } +struct sort_entry sort_comm = { + .se_header = "Command", + .se_cmp = sort__comm_cmp, + .se_collapse = sort__comm_collapse, + .se_snprintf = hist_entry__comm_snprintf, + .se_width_idx = HISTC_COMM, +}; + +/* --sort dso */ + static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) { struct dso *dso_l = map_l ? map_l->dso : NULL; @@ -117,40 +127,12 @@ static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) return strcmp(dso_name_l, dso_name_r); } -struct sort_entry sort_comm = { - .se_header = "Command", - .se_cmp = sort__comm_cmp, - .se_collapse = sort__comm_collapse, - .se_snprintf = hist_entry__comm_snprintf, - .se_width_idx = HISTC_COMM, -}; - -/* --sort dso */ - static int64_t sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) { return _sort__dso_cmp(left->ms.map, right->ms.map); } - -static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, - u64 ip_l, u64 ip_r) -{ - if (!sym_l || !sym_r) - return cmp_null(sym_l, sym_r); - - if (sym_l == sym_r) - return 0; - - if (sym_l) - ip_l = sym_l->start; - if (sym_r) - ip_r = sym_r->start; - - return (int64_t)(ip_r - ip_l); -} - static int _hist_entry__dso_snprintf(struct map *map, char *bf, size_t size, unsigned int width) { @@ -169,9 +151,43 @@ static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); } +struct sort_entry sort_dso = { + .se_header = "Shared Object", + .se_cmp = sort__dso_cmp, + .se_snprintf = hist_entry__dso_snprintf, + .se_width_idx = HISTC_DSO, +}; + +/* --sort symbol */ + +static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) +{ + u64 ip_l, ip_r; + + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + if (sym_l == sym_r) + return 0; + + ip_l = sym_l->start; + ip_r = sym_r->start; + + return (int64_t)(ip_r - ip_l); +} + +static int64_t +sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) +{ + if (!left->ms.sym && !right->ms.sym) + return right->level - left->level; + + return _sort__sym_cmp(left->ms.sym, right->ms.sym); +} + static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, u64 ip, char level, char *bf, size_t size, - unsigned int width __maybe_unused) + unsigned int width) { size_t ret = 0; @@ -197,43 +213,13 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, return ret; } - -struct sort_entry sort_dso = { - .se_header = "Shared Object", - .se_cmp = sort__dso_cmp, - .se_snprintf = hist_entry__dso_snprintf, - .se_width_idx = HISTC_DSO, -}; - static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, - size_t size, - unsigned int width __maybe_unused) + size_t size, unsigned int width) { return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, self->level, bf, size, width); } -/* --sort symbol */ -static int64_t -sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) -{ - u64 ip_l, ip_r; - - if (!left->ms.sym && !right->ms.sym) - return right->level - left->level; - - if (!left->ms.sym || !right->ms.sym) - return cmp_null(left->ms.sym, right->ms.sym); - - if (left->ms.sym == right->ms.sym) - return 0; - - ip_l = left->ms.sym->start; - ip_r = right->ms.sym->start; - - return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); -} - struct sort_entry sort_sym = { .se_header = "Symbol", .se_cmp = sort__sym_cmp, @@ -253,7 +239,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, size_t size, unsigned int width __maybe_unused) { - FILE *fp; + FILE *fp = NULL; char cmd[PATH_MAX + 2], *path = self->srcline, *nl; size_t line_len; @@ -274,7 +260,6 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, if (getline(&path, &line_len, fp) < 0 || !line_len) goto out_ip; - fclose(fp); self->srcline = strdup(path); if (self->srcline == NULL) goto out_ip; @@ -284,8 +269,12 @@ static int hist_entry__srcline_snprintf(struct hist_entry *self, char *bf, *nl = '\0'; path = self->srcline; out_path: + if (fp) + pclose(fp); return repsep_snprintf(bf, size, "%s", path); out_ip: + if (fp) + pclose(fp); return repsep_snprintf(bf, size, "%-#*llx", BITS_PER_LONG / 4, self->ip); } @@ -335,7 +324,7 @@ sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__cpu_snprintf(struct hist_entry *self, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*d", width, self->cpu); + return repsep_snprintf(bf, size, "%*d", width, self->cpu); } struct sort_entry sort_cpu = { @@ -345,6 +334,8 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* sort keys for branch stacks */ + static int64_t sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -359,13 +350,6 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, bf, size, width); } -struct sort_entry sort_dso_from = { - .se_header = "Source Shared Object", - .se_cmp = sort__dso_from_cmp, - .se_snprintf = hist_entry__dso_from_snprintf, - .se_width_idx = HISTC_DSO_FROM, -}; - static int64_t sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -389,8 +373,7 @@ sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) if (!from_l->sym && !from_r->sym) return right->level - left->level; - return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, - from_r->addr); + return _sort__sym_cmp(from_l->sym, from_r->sym); } static int64_t @@ -402,12 +385,11 @@ sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) if (!to_l->sym && !to_r->sym) return right->level - left->level; - return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); + return _sort__sym_cmp(to_l->sym, to_r->sym); } static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, - size_t size, - unsigned int width __maybe_unused) + size_t size, unsigned int width) { struct addr_map_symbol *from = &self->branch_info->from; return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, @@ -416,8 +398,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, } static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, - size_t size, - unsigned int width __maybe_unused) + size_t size, unsigned int width) { struct addr_map_symbol *to = &self->branch_info->to; return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, @@ -425,6 +406,13 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, } +struct sort_entry sort_dso_from = { + .se_header = "Source Shared Object", + .se_cmp = sort__dso_from_cmp, + .se_snprintf = hist_entry__dso_from_snprintf, + .se_width_idx = HISTC_DSO_FROM, +}; + struct sort_entry sort_dso_to = { .se_header = "Target Shared Object", .se_cmp = sort__dso_to_cmp, @@ -484,30 +472,40 @@ struct sort_dimension { #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } -static struct sort_dimension sort_dimensions[] = { +static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_PID, "pid", sort_thread), DIM(SORT_COMM, "comm", sort_comm), DIM(SORT_DSO, "dso", sort_dso), - DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), - DIM(SORT_DSO_TO, "dso_to", sort_dso_to), DIM(SORT_SYM, "symbol", sort_sym), - DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), - DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), DIM(SORT_PARENT, "parent", sort_parent), DIM(SORT_CPU, "cpu", sort_cpu), - DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), DIM(SORT_SRCLINE, "srcline", sort_srcline), }; +#undef DIM + +#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) } + +static struct sort_dimension bstack_sort_dimensions[] = { + DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), + DIM(SORT_DSO_TO, "dso_to", sort_dso_to), + DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from), + DIM(SORT_SYM_TO, "symbol_to", sort_sym_to), + DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), +}; + +#undef DIM + int sort_dimension__add(const char *tok) { unsigned int i; - for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { - struct sort_dimension *sd = &sort_dimensions[i]; + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { + struct sort_dimension *sd = &common_sort_dimensions[i]; if (strncasecmp(tok, sd->name, strlen(tok))) continue; + if (sd->entry == &sort_parent) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { @@ -518,9 +516,7 @@ int sort_dimension__add(const char *tok) return -EINVAL; } sort__has_parent = 1; - } else if (sd->entry == &sort_sym || - sd->entry == &sort_sym_from || - sd->entry == &sort_sym_to) { + } else if (sd->entry == &sort_sym) { sort__has_sym = 1; } @@ -530,52 +526,69 @@ int sort_dimension__add(const char *tok) if (sd->entry->se_collapse) sort__need_collapse = 1; - if (list_empty(&hist_entry__sort_list)) { - if (!strcmp(sd->name, "pid")) - sort__first_dimension = SORT_PID; - else if (!strcmp(sd->name, "comm")) - sort__first_dimension = SORT_COMM; - else if (!strcmp(sd->name, "dso")) - sort__first_dimension = SORT_DSO; - else if (!strcmp(sd->name, "symbol")) - sort__first_dimension = SORT_SYM; - else if (!strcmp(sd->name, "parent")) - sort__first_dimension = SORT_PARENT; - else if (!strcmp(sd->name, "cpu")) - sort__first_dimension = SORT_CPU; - else if (!strcmp(sd->name, "symbol_from")) - sort__first_dimension = SORT_SYM_FROM; - else if (!strcmp(sd->name, "symbol_to")) - sort__first_dimension = SORT_SYM_TO; - else if (!strcmp(sd->name, "dso_from")) - sort__first_dimension = SORT_DSO_FROM; - else if (!strcmp(sd->name, "dso_to")) - sort__first_dimension = SORT_DSO_TO; - else if (!strcmp(sd->name, "mispredict")) - sort__first_dimension = SORT_MISPREDICT; - } + if (list_empty(&hist_entry__sort_list)) + sort__first_dimension = i; list_add_tail(&sd->entry->list, &hist_entry__sort_list); sd->taken = 1; return 0; } + + for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { + struct sort_dimension *sd = &bstack_sort_dimensions[i]; + + if (strncasecmp(tok, sd->name, strlen(tok))) + continue; + + if (sort__branch_mode != 1) + return -EINVAL; + + if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) + sort__has_sym = 1; + + if (sd->taken) + return 0; + + if (sd->entry->se_collapse) + sort__need_collapse = 1; + + if (list_empty(&hist_entry__sort_list)) + sort__first_dimension = i + __SORT_BRANCH_STACK; + + list_add_tail(&sd->entry->list, &hist_entry__sort_list); + sd->taken = 1; + + return 0; + } + return -ESRCH; } -void setup_sorting(const char * const usagestr[], const struct option *opts) +int setup_sorting(void) { char *tmp, *tok, *str = strdup(sort_order); + int ret = 0; + + if (str == NULL) { + error("Not enough memory to setup sort keys"); + return -ENOMEM; + } for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - if (sort_dimension__add(tok) < 0) { + ret = sort_dimension__add(tok); + if (ret == -EINVAL) { + error("Invalid --sort key: `%s'", tok); + break; + } else if (ret == -ESRCH) { error("Unknown --sort key: `%s'", tok); - usage_with_options(usagestr, opts); + break; } } free(str); + return ret; } void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index b4e8c3ba559d..b13e56f6ccbe 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -55,9 +55,6 @@ struct he_stat { struct hist_entry_diff { bool computed; - /* PERF_HPP__DISPL */ - int displacement; - /* PERF_HPP__DELTA */ double period_ratio_delta; @@ -118,25 +115,29 @@ static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he) return NULL; } -static inline void hist__entry_add_pair(struct hist_entry *he, +static inline void hist_entry__add_pair(struct hist_entry *he, struct hist_entry *pair) { list_add_tail(&he->pairs.head, &pair->pairs.node); } enum sort_type { + /* common sort keys */ SORT_PID, SORT_COMM, SORT_DSO, SORT_SYM, SORT_PARENT, SORT_CPU, - SORT_DSO_FROM, + SORT_SRCLINE, + + /* branch stack specific sort keys */ + __SORT_BRANCH_STACK, + SORT_DSO_FROM = __SORT_BRANCH_STACK, SORT_DSO_TO, SORT_SYM_FROM, SORT_SYM_TO, SORT_MISPREDICT, - SORT_SRCLINE, }; /* @@ -159,7 +160,7 @@ struct sort_entry { extern struct sort_entry sort_thread; extern struct list_head hist_entry__sort_list; -void setup_sorting(const char * const usagestr[], const struct option *opts); +int setup_sorting(void); extern int sort_dimension__add(const char *); void sort_entry__setup_elide(struct sort_entry *self, struct strlist *list, const char *list_name, FILE *fp); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 346707df04b9..29c7b2cb2521 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -332,6 +332,24 @@ char *strxfrchar(char *s, char from, char to) } /** + * ltrim - Removes leading whitespace from @s. + * @s: The string to be stripped. + * + * Return pointer to the first non-whitespace character in @s. + */ +char *ltrim(char *s) +{ + int len = strlen(s); + + while (len && isspace(*s)) { + len--; + s++; + } + + return s; +} + +/** * rtrim - Removes trailing whitespace from @s. * @s: The string to be stripped. * diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 155d8b7078a7..55433aa42c8f 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -35,11 +35,11 @@ out_delete: return NULL; } -static void str_node__delete(struct str_node *self, bool dupstr) +static void str_node__delete(struct str_node *snode, bool dupstr) { if (dupstr) - free((void *)self->s); - free(self); + free((void *)snode->s); + free(snode); } static @@ -59,12 +59,12 @@ static int strlist__node_cmp(struct rb_node *rb_node, const void *entry) return strcmp(snode->s, str); } -int strlist__add(struct strlist *self, const char *new_entry) +int strlist__add(struct strlist *slist, const char *new_entry) { - return rblist__add_node(&self->rblist, new_entry); + return rblist__add_node(&slist->rblist, new_entry); } -int strlist__load(struct strlist *self, const char *filename) +int strlist__load(struct strlist *slist, const char *filename) { char entry[1024]; int err; @@ -80,7 +80,7 @@ int strlist__load(struct strlist *self, const char *filename) continue; entry[len - 1] = '\0'; - err = strlist__add(self, entry); + err = strlist__add(slist, entry); if (err != 0) goto out; } @@ -107,56 +107,56 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry) return snode; } -static int strlist__parse_list_entry(struct strlist *self, const char *s) +static int strlist__parse_list_entry(struct strlist *slist, const char *s) { if (strncmp(s, "file://", 7) == 0) - return strlist__load(self, s + 7); + return strlist__load(slist, s + 7); - return strlist__add(self, s); + return strlist__add(slist, s); } -int strlist__parse_list(struct strlist *self, const char *s) +int strlist__parse_list(struct strlist *slist, const char *s) { char *sep; int err; while ((sep = strchr(s, ',')) != NULL) { *sep = '\0'; - err = strlist__parse_list_entry(self, s); + err = strlist__parse_list_entry(slist, s); *sep = ','; if (err != 0) return err; s = sep + 1; } - return *s ? strlist__parse_list_entry(self, s) : 0; + return *s ? strlist__parse_list_entry(slist, s) : 0; } -struct strlist *strlist__new(bool dupstr, const char *slist) +struct strlist *strlist__new(bool dupstr, const char *list) { - struct strlist *self = malloc(sizeof(*self)); + struct strlist *slist = malloc(sizeof(*slist)); - if (self != NULL) { - rblist__init(&self->rblist); - self->rblist.node_cmp = strlist__node_cmp; - self->rblist.node_new = strlist__node_new; - self->rblist.node_delete = strlist__node_delete; + if (slist != NULL) { + rblist__init(&slist->rblist); + slist->rblist.node_cmp = strlist__node_cmp; + slist->rblist.node_new = strlist__node_new; + slist->rblist.node_delete = strlist__node_delete; - self->dupstr = dupstr; - if (slist && strlist__parse_list(self, slist) != 0) + slist->dupstr = dupstr; + if (slist && strlist__parse_list(slist, list) != 0) goto out_error; } - return self; + return slist; out_error: - free(self); + free(slist); return NULL; } -void strlist__delete(struct strlist *self) +void strlist__delete(struct strlist *slist) { - if (self != NULL) - rblist__delete(&self->rblist); + if (slist != NULL) + rblist__delete(&slist->rblist); } struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx) diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index dd9f922ec67c..5c7f87069d9c 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -17,34 +17,34 @@ struct strlist { }; struct strlist *strlist__new(bool dupstr, const char *slist); -void strlist__delete(struct strlist *self); +void strlist__delete(struct strlist *slist); -void strlist__remove(struct strlist *self, struct str_node *sn); -int strlist__load(struct strlist *self, const char *filename); -int strlist__add(struct strlist *self, const char *str); +void strlist__remove(struct strlist *slist, struct str_node *sn); +int strlist__load(struct strlist *slist, const char *filename); +int strlist__add(struct strlist *slist, const char *str); -struct str_node *strlist__entry(const struct strlist *self, unsigned int idx); -struct str_node *strlist__find(struct strlist *self, const char *entry); +struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx); +struct str_node *strlist__find(struct strlist *slist, const char *entry); -static inline bool strlist__has_entry(struct strlist *self, const char *entry) +static inline bool strlist__has_entry(struct strlist *slist, const char *entry) { - return strlist__find(self, entry) != NULL; + return strlist__find(slist, entry) != NULL; } -static inline bool strlist__empty(const struct strlist *self) +static inline bool strlist__empty(const struct strlist *slist) { - return rblist__empty(&self->rblist); + return rblist__empty(&slist->rblist); } -static inline unsigned int strlist__nr_entries(const struct strlist *self) +static inline unsigned int strlist__nr_entries(const struct strlist *slist) { - return rblist__nr_entries(&self->rblist); + return rblist__nr_entries(&slist->rblist); } /* For strlist iteration */ -static inline struct str_node *strlist__first(struct strlist *self) +static inline struct str_node *strlist__first(struct strlist *slist) { - struct rb_node *rn = rb_first(&self->rblist.entries); + struct rb_node *rn = rb_first(&slist->rblist.entries); return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; } static inline struct str_node *strlist__next(struct str_node *sn) @@ -59,21 +59,21 @@ static inline struct str_node *strlist__next(struct str_node *sn) /** * strlist_for_each - iterate over a strlist * @pos: the &struct str_node to use as a loop cursor. - * @self: the &struct strlist for loop. + * @slist: the &struct strlist for loop. */ -#define strlist__for_each(pos, self) \ - for (pos = strlist__first(self); pos; pos = strlist__next(pos)) +#define strlist__for_each(pos, slist) \ + for (pos = strlist__first(slist); pos; pos = strlist__next(pos)) /** * strlist_for_each_safe - iterate over a strlist safe against removal of * str_node * @pos: the &struct str_node to use as a loop cursor. * @n: another &struct str_node to use as temporary storage. - * @self: the &struct strlist for loop. + * @slist: the &struct strlist for loop. */ -#define strlist__for_each_safe(pos, n, self) \ - for (pos = strlist__first(self), n = strlist__next(pos); pos;\ +#define strlist__for_each_safe(pos, n, slist) \ + for (pos = strlist__first(slist), n = strlist__next(pos); pos;\ pos = n, n = strlist__next(n)) -int strlist__parse_list(struct strlist *self, const char *s); +int strlist__parse_list(struct strlist *slist, const char *s); #endif /* __PERF_STRLIST_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index db0cc92cf2ea..54efcb5659ac 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1,6 +1,3 @@ -#include <libelf.h> -#include <gelf.h> -#include <elf.h> #include <fcntl.h> #include <stdio.h> #include <errno.h> @@ -718,6 +715,17 @@ int dso__load_sym(struct dso *dso, struct map *map, sym.st_value); used_opd = true; } + /* + * When loading symbols in a data mapping, ABS symbols (which + * has a value of SHN_ABS in its st_shndx) failed at + * elf_getscn(). And it marks the loading as a failure so + * already loaded symbols cannot be fixed up. + * + * I'm not sure what should be done. Just ignore them for now. + * - Namhyung Kim + */ + if (sym.st_shndx == SHN_ABS) + continue; sec = elf_getscn(runtime_ss->elf, sym.st_shndx); if (!sec) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 259f8f2ea9c9..a7390cde63bc 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -1,6 +1,5 @@ #include "symbol.h" -#include <elf.h> #include <stdio.h> #include <fcntl.h> #include <string.h> diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 295f8d4feedf..e6432d85b43d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -28,8 +28,8 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, symbol_filter_t filter); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, symbol_filter_t filter); -static int vmlinux_path__nr_entries; -static char **vmlinux_path; +int vmlinux_path__nr_entries; +char **vmlinux_path; struct symbol_conf symbol_conf = { .exclude_other = true, @@ -202,13 +202,6 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) curr->end = ~0ULL; } -static void map_groups__fixup_end(struct map_groups *mg) -{ - int i; - for (i = 0; i < MAP__NR_TYPES; ++i) - __map_groups__fixup_end(mg, i); -} - struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) { size_t namelen = strlen(name) + 1; @@ -652,8 +645,8 @@ discard_symbol: rb_erase(&pos->rb_node, root); return count + moved; } -static bool symbol__restricted_filename(const char *filename, - const char *restricted_filename) +bool symbol__restricted_filename(const char *filename, + const char *restricted_filename) { bool restricted = false; @@ -775,10 +768,6 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) else machine = NULL; - name = malloc(PATH_MAX); - if (!name) - return -1; - dso->adjust_symbols = 0; if (strncmp(dso->name, "/tmp/perf-", 10) == 0) { @@ -802,6 +791,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (machine) root_dir = machine->root_dir; + name = malloc(PATH_MAX); + if (!name) + return -1; + /* Iterate over candidate debug images. * Keep track of "interesting" ones (those which have a symtab, dynsym, * and/or opd section) for processing. @@ -887,200 +880,6 @@ struct map *map_groups__find_by_name(struct map_groups *mg, return NULL; } -static int map_groups__set_modules_path_dir(struct map_groups *mg, - const char *dir_name) -{ - struct dirent *dent; - DIR *dir = opendir(dir_name); - int ret = 0; - - if (!dir) { - pr_debug("%s: cannot open %s dir\n", __func__, dir_name); - return -1; - } - - while ((dent = readdir(dir)) != NULL) { - char path[PATH_MAX]; - struct stat st; - - /*sshfs might return bad dent->d_type, so we have to stat*/ - snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); - if (stat(path, &st)) - continue; - - if (S_ISDIR(st.st_mode)) { - if (!strcmp(dent->d_name, ".") || - !strcmp(dent->d_name, "..")) - continue; - - ret = map_groups__set_modules_path_dir(mg, path); - if (ret < 0) - goto out; - } else { - char *dot = strrchr(dent->d_name, '.'), - dso_name[PATH_MAX]; - struct map *map; - char *long_name; - - if (dot == NULL || strcmp(dot, ".ko")) - continue; - snprintf(dso_name, sizeof(dso_name), "[%.*s]", - (int)(dot - dent->d_name), dent->d_name); - - strxfrchar(dso_name, '-', '_'); - map = map_groups__find_by_name(mg, MAP__FUNCTION, - dso_name); - if (map == NULL) - continue; - - long_name = strdup(path); - if (long_name == NULL) { - ret = -1; - goto out; - } - dso__set_long_name(map->dso, long_name); - map->dso->lname_alloc = 1; - dso__kernel_module_get_build_id(map->dso, ""); - } - } - -out: - closedir(dir); - return ret; -} - -static char *get_kernel_version(const char *root_dir) -{ - char version[PATH_MAX]; - FILE *file; - char *name, *tmp; - const char *prefix = "Linux version "; - - sprintf(version, "%s/proc/version", root_dir); - file = fopen(version, "r"); - if (!file) - return NULL; - - version[0] = '\0'; - tmp = fgets(version, sizeof(version), file); - fclose(file); - - name = strstr(version, prefix); - if (!name) - return NULL; - name += strlen(prefix); - tmp = strchr(name, ' '); - if (tmp) - *tmp = '\0'; - - return strdup(name); -} - -static int machine__set_modules_path(struct machine *machine) -{ - char *version; - char modules_path[PATH_MAX]; - - version = get_kernel_version(machine->root_dir); - if (!version) - return -1; - - snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s/kernel", - machine->root_dir, version); - free(version); - - return map_groups__set_modules_path_dir(&machine->kmaps, modules_path); -} - -struct map *machine__new_module(struct machine *machine, u64 start, - const char *filename) -{ - struct map *map; - struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename); - - if (dso == NULL) - return NULL; - - map = map__new2(start, dso, MAP__FUNCTION); - if (map == NULL) - return NULL; - - if (machine__is_host(machine)) - dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; - else - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; - map_groups__insert(&machine->kmaps, map); - return map; -} - -static int machine__create_modules(struct machine *machine) -{ - char *line = NULL; - size_t n; - FILE *file; - struct map *map; - const char *modules; - char path[PATH_MAX]; - - if (machine__is_default_guest(machine)) - modules = symbol_conf.default_guest_modules; - else { - sprintf(path, "%s/proc/modules", machine->root_dir); - modules = path; - } - - if (symbol__restricted_filename(path, "/proc/modules")) - return -1; - - file = fopen(modules, "r"); - if (file == NULL) - return -1; - - while (!feof(file)) { - char name[PATH_MAX]; - u64 start; - char *sep; - int line_len; - - line_len = getline(&line, &n, file); - if (line_len < 0) - break; - - if (!line) - goto out_failure; - - line[--line_len] = '\0'; /* \n */ - - sep = strrchr(line, 'x'); - if (sep == NULL) - continue; - - hex2u64(sep + 1, &start); - - sep = strchr(line, ' '); - if (sep == NULL) - continue; - - *sep = '\0'; - - snprintf(name, sizeof(name), "[%s]", line); - map = machine__new_module(machine, start, name); - if (map == NULL) - goto out_delete_line; - dso__kernel_module_get_build_id(map->dso, machine->root_dir); - } - - free(line); - fclose(file); - - return machine__set_modules_path(machine); - -out_delete_line: - free(line); -out_failure: - return -1; -} - int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, symbol_filter_t filter) { @@ -1124,8 +923,10 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, filename = dso__build_id_filename(dso, NULL, 0); if (filename != NULL) { err = dso__load_vmlinux(dso, map, filename, filter); - if (err > 0) + if (err > 0) { + dso->lname_alloc = 1; goto out; + } free(filename); } @@ -1133,6 +934,7 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, err = dso__load_vmlinux(dso, map, vmlinux_path[i], filter); if (err > 0) { dso__set_long_name(dso, strdup(vmlinux_path[i])); + dso->lname_alloc = 1; break; } } @@ -1172,6 +974,7 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map, if (err > 0) { dso__set_long_name(dso, strdup(symbol_conf.vmlinux_name)); + dso->lname_alloc = 1; goto out_fixup; } return err; @@ -1300,195 +1103,6 @@ out_try_fixup: return err; } -size_t machines__fprintf_dsos(struct rb_root *machines, FILE *fp) -{ - struct rb_node *nd; - size_t ret = 0; - - for (nd = rb_first(machines); nd; nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret += __dsos__fprintf(&pos->kernel_dsos, fp); - ret += __dsos__fprintf(&pos->user_dsos, fp); - } - - return ret; -} - -size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp, - bool with_hits) -{ - return __dsos__fprintf_buildid(&machine->kernel_dsos, fp, with_hits) + - __dsos__fprintf_buildid(&machine->user_dsos, fp, with_hits); -} - -size_t machines__fprintf_dsos_buildid(struct rb_root *machines, - FILE *fp, bool with_hits) -{ - struct rb_node *nd; - size_t ret = 0; - - for (nd = rb_first(machines); nd; nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret += machine__fprintf_dsos_buildid(pos, fp, with_hits); - } - return ret; -} - -static struct dso *machine__get_kernel(struct machine *machine) -{ - const char *vmlinux_name = NULL; - struct dso *kernel; - - if (machine__is_host(machine)) { - vmlinux_name = symbol_conf.vmlinux_name; - if (!vmlinux_name) - vmlinux_name = "[kernel.kallsyms]"; - - kernel = dso__kernel_findnew(machine, vmlinux_name, - "[kernel]", - DSO_TYPE_KERNEL); - } else { - char bf[PATH_MAX]; - - if (machine__is_default_guest(machine)) - vmlinux_name = symbol_conf.default_guest_vmlinux_name; - if (!vmlinux_name) - vmlinux_name = machine__mmap_name(machine, bf, - sizeof(bf)); - - kernel = dso__kernel_findnew(machine, vmlinux_name, - "[guest.kernel]", - DSO_TYPE_GUEST_KERNEL); - } - - if (kernel != NULL && (!kernel->has_build_id)) - dso__read_running_kernel_build_id(kernel, machine); - - return kernel; -} - -struct process_args { - u64 start; -}; - -static int symbol__in_kernel(void *arg, const char *name, - char type __maybe_unused, u64 start) -{ - struct process_args *args = arg; - - if (strchr(name, '[')) - return 0; - - args->start = start; - return 1; -} - -/* Figure out the start address of kernel map from /proc/kallsyms */ -static u64 machine__get_kernel_start_addr(struct machine *machine) -{ - const char *filename; - char path[PATH_MAX]; - struct process_args args; - - if (machine__is_host(machine)) { - filename = "/proc/kallsyms"; - } else { - if (machine__is_default_guest(machine)) - filename = (char *)symbol_conf.default_guest_kallsyms; - else { - sprintf(path, "%s/proc/kallsyms", machine->root_dir); - filename = path; - } - } - - if (symbol__restricted_filename(filename, "/proc/kallsyms")) - return 0; - - if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0) - return 0; - - return args.start; -} - -int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) -{ - enum map_type type; - u64 start = machine__get_kernel_start_addr(machine); - - for (type = 0; type < MAP__NR_TYPES; ++type) { - struct kmap *kmap; - - machine->vmlinux_maps[type] = map__new2(start, kernel, type); - if (machine->vmlinux_maps[type] == NULL) - return -1; - - machine->vmlinux_maps[type]->map_ip = - machine->vmlinux_maps[type]->unmap_ip = - identity__map_ip; - kmap = map__kmap(machine->vmlinux_maps[type]); - kmap->kmaps = &machine->kmaps; - map_groups__insert(&machine->kmaps, - machine->vmlinux_maps[type]); - } - - return 0; -} - -void machine__destroy_kernel_maps(struct machine *machine) -{ - enum map_type type; - - for (type = 0; type < MAP__NR_TYPES; ++type) { - struct kmap *kmap; - - if (machine->vmlinux_maps[type] == NULL) - continue; - - kmap = map__kmap(machine->vmlinux_maps[type]); - map_groups__remove(&machine->kmaps, - machine->vmlinux_maps[type]); - if (kmap->ref_reloc_sym) { - /* - * ref_reloc_sym is shared among all maps, so free just - * on one of them. - */ - if (type == MAP__FUNCTION) { - free((char *)kmap->ref_reloc_sym->name); - kmap->ref_reloc_sym->name = NULL; - free(kmap->ref_reloc_sym); - } - kmap->ref_reloc_sym = NULL; - } - - map__delete(machine->vmlinux_maps[type]); - machine->vmlinux_maps[type] = NULL; - } -} - -int machine__create_kernel_maps(struct machine *machine) -{ - struct dso *kernel = machine__get_kernel(machine); - - if (kernel == NULL || - __machine__create_kernel_maps(machine, kernel) < 0) - return -1; - - if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { - if (machine__is_host(machine)) - pr_debug("Problems creating module maps, " - "continuing anyway...\n"); - else - pr_debug("Problems creating module maps for guest %d, " - "continuing anyway...\n", machine->pid); - } - - /* - * Now that we have all the maps created, just set the ->end of them: - */ - map_groups__fixup_end(&machine->kmaps); - return 0; -} - static void vmlinux_path__exit(void) { while (--vmlinux_path__nr_entries >= 0) { @@ -1549,25 +1163,6 @@ out_fail: return -1; } -size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) -{ - int i; - size_t printed = 0; - struct dso *kdso = machine->vmlinux_maps[MAP__FUNCTION]->dso; - - if (kdso->has_build_id) { - char filename[PATH_MAX]; - if (dso__build_id_filename(kdso, filename, sizeof(filename))) - printed += fprintf(fp, "[0] %s\n", filename); - } - - for (i = 0; i < vmlinux_path__nr_entries; ++i) - printed += fprintf(fp, "[%d] %s\n", - i + kdso->has_build_id, vmlinux_path[i]); - - return printed; -} - static int setup_list(struct strlist **list, const char *list_str, const char *list_name) { @@ -1671,108 +1266,3 @@ void symbol__exit(void) symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; symbol_conf.initialized = false; } - -int machines__create_kernel_maps(struct rb_root *machines, pid_t pid) -{ - struct machine *machine = machines__findnew(machines, pid); - - if (machine == NULL) - return -1; - - return machine__create_kernel_maps(machine); -} - -int machines__create_guest_kernel_maps(struct rb_root *machines) -{ - int ret = 0; - struct dirent **namelist = NULL; - int i, items = 0; - char path[PATH_MAX]; - pid_t pid; - char *endp; - - if (symbol_conf.default_guest_vmlinux_name || - symbol_conf.default_guest_modules || - symbol_conf.default_guest_kallsyms) { - machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID); - } - - if (symbol_conf.guestmount) { - items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL); - if (items <= 0) - return -ENOENT; - for (i = 0; i < items; i++) { - if (!isdigit(namelist[i]->d_name[0])) { - /* Filter out . and .. */ - continue; - } - pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10); - if ((*endp != '\0') || - (endp == namelist[i]->d_name) || - (errno == ERANGE)) { - pr_debug("invalid directory (%s). Skipping.\n", - namelist[i]->d_name); - continue; - } - sprintf(path, "%s/%s/proc/kallsyms", - symbol_conf.guestmount, - namelist[i]->d_name); - ret = access(path, R_OK); - if (ret) { - pr_debug("Can't access file %s\n", path); - goto failure; - } - machines__create_kernel_maps(machines, pid); - } -failure: - free(namelist); - } - - return ret; -} - -void machines__destroy_guest_kernel_maps(struct rb_root *machines) -{ - struct rb_node *next = rb_first(machines); - - while (next) { - struct machine *pos = rb_entry(next, struct machine, rb_node); - - next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, machines); - machine__delete(pos); - } -} - -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type, symbol_filter_t filter) -{ - struct map *map = machine->vmlinux_maps[type]; - int ret = dso__load_kallsyms(map->dso, filename, map, filter); - - if (ret > 0) { - dso__set_loaded(map->dso, type); - /* - * Since /proc/kallsyms will have multiple sessions for the - * kernel, with modules between them, fixup the end of all - * sections. - */ - __map_groups__fixup_end(&machine->kmaps, type); - } - - return ret; -} - -int machine__load_vmlinux_path(struct machine *machine, enum map_type type, - symbol_filter_t filter) -{ - struct map *map = machine->vmlinux_maps[type]; - int ret = dso__load_vmlinux_path(map->dso, map, filter); - - if (ret > 0) { - dso__set_loaded(map->dso, type); - map__reloc_vmlinux(map); - } - - return ret; -} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index de68f98b236d..b62ca37c4b77 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -16,8 +16,8 @@ #ifdef LIBELF_SUPPORT #include <libelf.h> #include <gelf.h> -#include <elf.h> #endif +#include <elf.h> #include "dso.h" @@ -96,7 +96,8 @@ struct symbol_conf { initialized, kptr_restrict, annotate_asm_raw, - annotate_src; + annotate_src, + event_group; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -120,6 +121,8 @@ struct symbol_conf { }; extern struct symbol_conf symbol_conf; +extern int vmlinux_path__nr_entries; +extern char **vmlinux_path; static inline void *symbol__priv(struct symbol *sym) { @@ -223,6 +226,8 @@ size_t symbol__fprintf_symname_offs(const struct symbol *sym, size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); size_t symbol__fprintf(struct symbol *sym, FILE *fp); bool symbol_type__is_a(char symbol_type, enum map_type map_type); +bool symbol__restricted_filename(const char *filename, + const char *restricted_filename); int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, symbol_filter_t filter, diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c index 48c6902e749f..f71e9eafe15a 100644 --- a/tools/perf/util/sysfs.c +++ b/tools/perf/util/sysfs.c @@ -8,7 +8,7 @@ static const char * const sysfs_known_mountpoints[] = { }; static int sysfs_found; -char sysfs_mountpoint[PATH_MAX]; +char sysfs_mountpoint[PATH_MAX + 1]; static int sysfs_valid_mountpoint(const char *sysfs) { diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index df59623ac763..632e40e5ceca 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -54,10 +54,10 @@ int thread__comm_len(struct thread *self) return self->comm_len; } -static size_t thread__fprintf(struct thread *self, FILE *fp) +size_t thread__fprintf(struct thread *thread, FILE *fp) { - return fprintf(fp, "Thread %d %s\n", self->pid, self->comm) + - map_groups__fprintf(&self->mg, verbose, fp); + return fprintf(fp, "Thread %d %s\n", thread->pid, thread->comm) + + map_groups__fprintf(&thread->mg, verbose, fp); } void thread__insert_map(struct thread *self, struct map *map) @@ -84,17 +84,3 @@ int thread__fork(struct thread *self, struct thread *parent) return -ENOMEM; return 0; } - -size_t machine__fprintf(struct machine *machine, FILE *fp) -{ - size_t ret = 0; - struct rb_node *nd; - - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); - - ret += thread__fprintf(pos, fp); - } - - return ret; -} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index f2fa17caa7d5..5ad266403098 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -30,6 +30,7 @@ int thread__set_comm(struct thread *self, const char *comm); int thread__comm_len(struct thread *self); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); +size_t thread__fprintf(struct thread *thread, FILE *fp); static inline struct map *thread__find_map(struct thread *self, enum map_type type, u64 addr) diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 884dde9b9bc1..54d37a4753c5 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -26,6 +26,8 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) float samples_per_sec = top->samples / top->delay_secs; float ksamples_per_sec = top->kernel_samples / top->delay_secs; float esamples_percent = (100.0 * top->exact_samples) / top->samples; + struct perf_record_opts *opts = &top->record_opts; + struct perf_target *target = &opts->target; size_t ret = 0; if (!perf_guest) { @@ -61,31 +63,31 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) struct perf_evsel *first = perf_evlist__first(top->evlist); ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", (uint64_t)first->attr.sample_period, - top->freq ? "Hz" : ""); + opts->freq ? "Hz" : ""); } ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel)); ret += SNPRINTF(bf + ret, size - ret, "], "); - if (top->target.pid) + if (target->pid) ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s", - top->target.pid); - else if (top->target.tid) + target->pid); + else if (target->tid) ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", - top->target.tid); - else if (top->target.uid_str != NULL) + target->tid); + else if (target->uid_str != NULL) ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", - top->target.uid_str); + target->uid_str); else ret += SNPRINTF(bf + ret, size - ret, " (all"); - if (top->target.cpu_list) + if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", top->evlist->cpus->nr > 1 ? "s" : "", - top->target.cpu_list); + target->cpu_list); else { - if (top->target.tid) + if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 86ff1b15059b..7ebf357dc9e1 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -14,7 +14,7 @@ struct perf_session; struct perf_top { struct perf_tool tool; struct perf_evlist *evlist; - struct perf_target target; + struct perf_record_opts record_opts; /* * Symbols will be added here in perf_event__process_sample and will * get out after decayed. @@ -24,24 +24,16 @@ struct perf_top { u64 exact_samples; u64 guest_us_samples, guest_kernel_samples; int print_entries, count_filter, delay_secs; - int freq; bool hide_kernel_symbols, hide_user_symbols, zero; bool use_tui, use_stdio; bool sort_has_symbols; - bool dont_use_callchains; bool kptr_restrict_warned; bool vmlinux_warned; - bool inherit; - bool group; - bool sample_id_all_missing; - bool exclude_guest_missing; bool dump_symtab; struct hist_entry *sym_filter_entry; struct perf_evsel *sym_evsel; struct perf_session *session; struct winsize winsize; - unsigned int mmap_pages; - int default_interval; int realtime_prio; int sym_pcnt_filter; const char *sym_filter; diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 5906e8426cc7..805d1f52c5b4 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -12,6 +12,8 @@ */ unsigned int page_size; +bool test_attr__enabled; + bool perf_host = true; bool perf_guest = false; @@ -218,3 +220,25 @@ void dump_stack(void) #else void dump_stack(void) {} #endif + +void get_term_dimensions(struct winsize *ws) +{ + char *s = getenv("LINES"); + + if (s != NULL) { + ws->ws_row = atoi(s); + s = getenv("COLUMNS"); + if (s != NULL) { + ws->ws_col = atoi(s); + if (ws->ws_row && ws->ws_col) + return; + } + } +#ifdef TIOCGWINSZ + if (ioctl(1, TIOCGWINSZ, ws) == 0 && + ws->ws_row && ws->ws_col) + return; +#endif + ws->ws_row = 25; + ws->ws_col = 80; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c2330918110c..09b4c26b71aa 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -265,10 +265,14 @@ bool is_power_of_2(unsigned long n) size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); +char *ltrim(char *s); char *rtrim(char *s); void dump_stack(void); extern unsigned int page_size; +struct winsize; +void get_term_dimensions(struct winsize *ws); + #endif |