diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 16:56:17 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-17 16:56:17 -0800 |
commit | fa7f578076a8814caa5371e9f4949e408140766d (patch) | |
tree | 8021be6f956e8a259523c394a01183797ce04422 | |
parent | 2dcd9c71c1ffa9a036e09047f60e08383bb0abb6 (diff) | |
parent | d1b069f5febc850556cf49e9bb9092d3179c5be5 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- a bit more MM
- procfs updates
- dynamic-debug fixes
- lib/ updates
- checkpatch
- epoll
- nilfs2
- signals
- rapidio
- PID management cleanup and optimization
- kcov updates
- sysvipc updates
- quite a few misc things all over the place
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (94 commits)
EXPERT Kconfig menu: fix broken EXPERT menu
include/asm-generic/topology.h: remove unused parent_node() macro
arch/tile/include/asm/topology.h: remove unused parent_node() macro
arch/sparc/include/asm/topology_64.h: remove unused parent_node() macro
arch/sh/include/asm/topology.h: remove unused parent_node() macro
arch/ia64/include/asm/topology.h: remove unused parent_node() macro
drivers/pcmcia/sa1111_badge4.c: avoid unused function warning
mm: add infrastructure for get_user_pages_fast() benchmarking
sysvipc: make get_maxid O(1) again
sysvipc: properly name ipc_addid() limit parameter
sysvipc: duplicate lock comments wrt ipc_addid()
sysvipc: unteach ids->next_id for !CHECKPOINT_RESTORE
initramfs: use time64_t timestamps
drivers/watchdog: make use of devm_register_reboot_notifier()
kernel/reboot.c: add devm_register_reboot_notifier()
kcov: update documentation
Makefile: support flag -fsanitizer-coverage=trace-cmp
kcov: support comparison operands collection
kcov: remove pointless current != NULL check
kernel/panic.c: add TAINT_AUX
...
115 files changed, 1892 insertions, 952 deletions
diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst index 12278a926370..fdf72429f801 100644 --- a/Documentation/admin-guide/dynamic-debug-howto.rst +++ b/Documentation/admin-guide/dynamic-debug-howto.rst @@ -18,7 +18,7 @@ shortcut for ``print_hex_dump(KERN_DEBUG)``. For ``print_hex_dump_debug()``/``print_hex_dump_bytes()``, format string is its ``prefix_str`` argument, if it is constant string; or ``hexdump`` -in case ``prefix_str`` is build dynamically. +in case ``prefix_str`` is built dynamically. Dynamic debug has even more useful features: @@ -197,8 +197,8 @@ line line number matches the callsite line number exactly. A range of line numbers matches any callsite between the first and last line number inclusive. An empty first number means - the first line in the file, an empty line number means the - last number in the file. Examples:: + the first line in the file, an empty last line number means the + last line number in the file. Examples:: line 1603 // exactly line 1603 line 1600-1605 // the six lines from line 1600 to line 1605 diff --git a/Documentation/clearing-warn-once.txt b/Documentation/clearing-warn-once.txt new file mode 100644 index 000000000000..5b1f5d547be1 --- /dev/null +++ b/Documentation/clearing-warn-once.txt @@ -0,0 +1,7 @@ + +WARN_ONCE / WARN_ON_ONCE only print a warning once. + +echo 1 > /sys/kernel/debug/clear_warn_once + +clears the state and allows the warnings to print once again. +This can be useful after test suite runs to reproduce problems. diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 44886c91e112..c2f6452e38ed 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -12,19 +12,30 @@ To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic parts of kernel is disabled (e.g. scheduler, locking). -Usage ------ +kcov is also able to collect comparison operands from the instrumented code +(this feature currently requires that the kernel is compiled with clang). + +Prerequisites +------------- Configure the kernel with:: CONFIG_KCOV=y CONFIG_KCOV requires gcc built on revision 231296 or later. + +If the comparison operands need to be collected, set:: + + CONFIG_KCOV_ENABLE_COMPARISONS=y + Profiling data will only become accessible once debugfs has been mounted:: mount -t debugfs none /sys/kernel/debug -The following program demonstrates kcov usage from within a test program: +Coverage collection +------------------- +The following program demonstrates coverage collection from within a test +program using kcov: .. code-block:: c @@ -44,6 +55,9 @@ The following program demonstrates kcov usage from within a test program: #define KCOV_DISABLE _IO('c', 101) #define COVER_SIZE (64<<10) + #define KCOV_TRACE_PC 0 + #define KCOV_TRACE_CMP 1 + int main(int argc, char **argv) { int fd; @@ -64,7 +78,7 @@ The following program demonstrates kcov usage from within a test program: if ((void*)cover == MAP_FAILED) perror("mmap"), exit(1); /* Enable coverage collection on the current thread. */ - if (ioctl(fd, KCOV_ENABLE, 0)) + if (ioctl(fd, KCOV_ENABLE, KCOV_TRACE_PC)) perror("ioctl"), exit(1); /* Reset coverage from the tail of the ioctl() call. */ __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); @@ -111,3 +125,80 @@ The interface is fine-grained to allow efficient forking of test processes. That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode, mmaps coverage buffer and then forks child processes in a loop. Child processes only need to enable coverage (disable happens automatically on thread end). + +Comparison operands collection +------------------------------ +Comparison operands collection is similar to coverage collection: + +.. code-block:: c + + /* Same includes and defines as above. */ + + /* Number of 64-bit words per record. */ + #define KCOV_WORDS_PER_CMP 4 + + /* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ + + #define KCOV_CMP_CONST (1 << 0) + #define KCOV_CMP_SIZE(n) ((n) << 1) + #define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + + int main(int argc, char **argv) + { + int fd; + uint64_t *cover, type, arg1, arg2, is_const, size; + unsigned long n, i; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + /* + * Note that the buffer pointer is of type uint64_t*, because all + * the comparison operands are promoted to uint64_t. + */ + cover = (uint64_t *)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + /* Note KCOV_TRACE_CMP instead of KCOV_TRACE_PC. */ + if (ioctl(fd, KCOV_ENABLE, KCOV_TRACE_CMP)) + perror("ioctl"), exit(1); + __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); + read(-1, NULL, 0); + /* Read number of comparisons collected. */ + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) { + type = cover[i * KCOV_WORDS_PER_CMP + 1]; + /* arg1 and arg2 - operands of the comparison. */ + arg1 = cover[i * KCOV_WORDS_PER_CMP + 2]; + arg2 = cover[i * KCOV_WORDS_PER_CMP + 3]; + /* ip - caller address. */ + ip = cover[i * KCOV_WORDS_PER_CMP + 4]; + /* size of the operands. */ + size = 1 << ((type & KCOV_CMP_MASK) >> 1); + /* is_const - true if either operand is a compile-time constant.*/ + is_const = type & KCOV_CMP_CONST; + printf("ip: 0x%lx type: 0x%lx, arg1: 0x%lx, arg2: 0x%lx, " + "size: %lu, %s\n", + ip, type, arg1, arg2, size, + is_const ? "const" : "non-const"); + } + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + /* Free resources. */ + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } + +Note that the kcov modes (coverage collection or comparison operands) are +mutually exclusive. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index ec571b9bb18a..2a84bb334894 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -181,6 +181,7 @@ read the file /proc/PID/status: VmPTE: 20 kb VmSwap: 0 kB HugetlbPages: 0 kB + CoreDumping: 0 Threads: 1 SigQ: 0/28578 SigPnd: 0000000000000000 @@ -253,6 +254,8 @@ Table 1-2: Contents of the status files (as of 4.8) VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) HugetlbPages size of hugetlb memory portions + CoreDumping process's memory is currently being dumped + (killing the process may lead to a corrupted core) Threads number of threads SigQ number of signals queued/max. number for queue SigPnd bitmap of pending signals for the thread diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 055c8b3e1018..b920423f88cb 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -818,7 +818,7 @@ tooling to work, you can do: swappiness This control is used to define how aggressive the kernel will swap -memory pages. Higher values will increase agressiveness, lower values +memory pages. Higher values will increase aggressiveness, lower values decrease the amount of swap. A value of 0 instructs the kernel not to initiate swap until the amount of free and file-backed pages is less than the high water mark in a zone. @@ -375,8 +375,6 @@ CFLAGS_KERNEL = AFLAGS_KERNEL = LDFLAGS_vmlinux = CFLAGS_GCOV := -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,) -CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) - # Use USERINCLUDE when you must reference the UAPI directories only. USERINCLUDE := \ @@ -659,6 +657,7 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLA KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO endif +include scripts/Makefile.kcov include scripts/Makefile.gcc-plugins ifdef CONFIG_READABLE_ASM diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 3ad8f6988363..82f9bf702804 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -34,13 +34,6 @@ &node_to_cpu_mask[node]) /* - * Returns the number of the node containing Node 'nid'. - * Not implemented here. Multi-level hierarchies detected with - * the help of node_distance(). - */ -#define parent_node(nid) (nid) - -/* * Determines the node for a given pci bus */ #define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index f7693f49c573..f4db2168d1b8 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -31,8 +31,8 @@ void foo(void) DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); - BUILD_BUG_ON(sizeof(struct upid) != 32); - DEFINE(IA64_UPID_SHIFT, 5); + BUILD_BUG_ON(sizeof(struct upid) != 16); + DEFINE(IA64_UPID_SHIFT, 4); BLANK(); diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index f23781d6bbb3..f0bfa1448744 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -60,7 +60,7 @@ void bust_spinlocks(int yes) void do_BUG(const char *file, int line) { bust_spinlocks(1); - printk(KERN_EMERG "------------[ cut here ]------------\n"); + printk(KERN_EMERG CUT_HERE); printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); } diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 1fbb5da17dd2..e47761cdcb98 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) LOAD_INT(c), LOAD_FRAC(c), count_active_contexts(), atomic_read(&nr_spu_contexts), - task_active_pid_ns(current)->last_pid); + idr_get_cursor(&task_active_pid_ns(current)->idr)); return 0; } diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index f2d9d3079d4e..627ce8e75e01 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -104,6 +104,18 @@ static void error(char *x) while(1); /* Halt */ } +unsigned long __stack_chk_guard; + +void __stack_chk_guard_setup(void) +{ + __stack_chk_guard = 0x000a0dff; +} + +void __stack_chk_fail(void) +{ + error("stack-protector: Kernel stack is corrupted\n"); +} + #ifdef CONFIG_SUPERH64 #define stackalign 8 #else @@ -118,6 +130,8 @@ void decompress_kernel(void) { unsigned long output_addr; + __stack_chk_guard_setup(); + #ifdef CONFIG_SUPERH64 output_addr = (CONFIG_MEMORY_START + 0x2000); #else diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 9a32eb4098df..1db470e02456 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -5,7 +5,6 @@ #ifdef CONFIG_NUMA #define cpu_to_node(cpu) ((void)(cpu),0) -#define parent_node(node) ((void)(node),0) #define cpumask_of_node(node) ((void)node, cpu_online_mask) diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 3831b1911a19..34c628a22ea5 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -11,8 +11,6 @@ static inline int cpu_to_node(int cpu) return numa_cpu_lookup_table[cpu]; } -#define parent_node(node) (node) - #define cpumask_of_node(node) ((node) == -1 ? \ cpu_all_mask : \ &numa_cpumask_lookup_table[node]) diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index b11d5fcd2c41..635a0a4596f0 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -29,12 +29,6 @@ static inline int cpu_to_node(int cpu) return cpu_2_node[cpu]; } -/* - * Returns the number of the node containing Node 'node'. - * This architecture is flat, so it is a pretty simple function! - */ -#define parent_node(node) (node) - /* Returns a bitmask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { diff --git a/drivers/misc/lkdtm_bugs.c b/drivers/misc/lkdtm_bugs.c index b0f7af872bb5..7eebbdfbcacd 100644 --- a/drivers/misc/lkdtm_bugs.c +++ b/drivers/misc/lkdtm_bugs.c @@ -63,9 +63,11 @@ void lkdtm_BUG(void) BUG(); } +static int warn_counter; + void lkdtm_WARNING(void) { - WARN_ON(1); + WARN(1, "Warning message trigger count: %d\n", warn_counter++); } void lkdtm_EXCEPTION(void) diff --git a/drivers/pcmcia/sa1111_badge4.c b/drivers/pcmcia/sa1111_badge4.c index 2f490930430d..93a5c7423d80 100644 --- a/drivers/pcmcia/sa1111_badge4.c +++ b/drivers/pcmcia/sa1111_badge4.c @@ -144,6 +144,7 @@ int pcmcia_badge4_init(struct sa1111_dev *dev) sa11xx_drv_pcmcia_add_one); } +#ifndef MODULE static int __init pcmv_setup(char *s) { int v[4]; @@ -158,3 +159,4 @@ static int __init pcmv_setup(char *s) } __setup("pcmv=", pcmv_setup); +#endif diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 665d9e94a7e1..ec4bc1515f0d 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -959,9 +959,10 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, nents = dma_map_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir); - if (nents == -EFAULT) { + if (nents == 0) { rmcd_error("Failed to map SG list"); - return -EFAULT; + ret = -EFAULT; + goto err_pg; } ret = do_dma_request(req, xfer, sync, nents); diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c index e67b923b1ca6..4931ed790428 100644 --- a/drivers/rapidio/switches/idt_gen2.c +++ b/drivers/rapidio/switches/idt_gen2.c @@ -458,7 +458,7 @@ static void idtg2_remove(struct rio_dev *rdev) idtg2_sysfs(rdev, false); } -static struct rio_device_id idtg2_id_table[] = { +static const struct rio_device_id idtg2_id_table[] = { {RIO_DEVICE(RIO_DID_IDTCPS1848, RIO_VID_IDT)}, {RIO_DEVICE(RIO_DID_IDTCPS1616, RIO_VID_IDT)}, {RIO_DEVICE(RIO_DID_IDTVPS1616, RIO_VID_IDT)}, diff --git a/drivers/rapidio/switches/idt_gen3.c b/drivers/rapidio/switches/idt_gen3.c index c5923a547bed..85a3908294d9 100644 --- a/drivers/rapidio/switches/idt_gen3.c +++ b/drivers/rapidio/switches/idt_gen3.c @@ -348,7 +348,7 @@ static void idtg3_shutdown(struct rio_dev *rdev) } } -static struct rio_device_id idtg3_id_table[] = { +static const struct rio_device_id idtg3_id_table[] = { {RIO_DEVICE(RIO_DID_IDTRXS1632, RIO_VID_IDT)}, {RIO_DEVICE(RIO_DID_IDTRXS2448, RIO_VID_IDT)}, { 0, } /* terminate list */ diff --git a/drivers/rapidio/switches/idtcps.c b/drivers/rapidio/switches/idtcps.c index 7fbb60d31796..4058ce2c76fa 100644 --- a/drivers/rapidio/switches/idtcps.c +++ b/drivers/rapidio/switches/idtcps.c @@ -168,7 +168,7 @@ static void idtcps_remove(struct rio_dev *rdev) spin_unlock(&rdev->rswitch->lock); } -static struct rio_device_id idtcps_id_table[] = { +static const struct rio_device_id idtcps_id_table[] = { {RIO_DEVICE(RIO_DID_IDTCPS6Q, RIO_VID_IDT)}, {RIO_DEVICE(RIO_DID_IDTCPS8, RIO_VID_IDT)}, {RIO_DEVICE(RIO_DID_IDTCPS10Q, RIO_VID_IDT)}, diff --git a/drivers/rapidio/switches/tsi568.c b/drivers/rapidio/switches/tsi568.c index 8a43561b9d17..1214628b7ded 100644 --- a/drivers/rapidio/switches/tsi568.c +++ b/drivers/rapidio/switches/tsi568.c @@ -169,7 +169,7 @@ static void tsi568_remove(struct rio_dev *rdev) spin_unlock(&rdev->rswitch->lock); } -static struct rio_device_id tsi568_id_table[] = { +static const struct rio_device_id tsi568_id_table[] = { {RIO_DEVICE(RIO_DID_TSI568, RIO_VID_TUNDRA)}, { 0, } /* terminate list */ }; diff --git a/drivers/rapidio/switches/tsi57x.c b/drivers/rapidio/switches/tsi57x.c index 2700d15f7584..9f063e214836 100644 --- a/drivers/rapidio/switches/tsi57x.c +++ b/drivers/rapidio/switches/tsi57x.c @@ -336,7 +336,7 @@ static void tsi57x_remove(struct rio_dev *rdev) spin_unlock(&rdev->rswitch->lock); } -static struct rio_device_id tsi57x_id_table[] = { +static const struct rio_device_id tsi57x_id_table[] = { {RIO_DEVICE(RIO_DID_TSI572, RIO_VID_TUNDRA)}, {RIO_DEVICE(RIO_DID_TSI574, RIO_VID_TUNDRA)}, {RIO_DEVICE(RIO_DID_TSI577, RIO_VID_TUNDRA)}, diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 74265b2f806c..8a8d952f8df9 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -137,25 +137,6 @@ int watchdog_init_timeout(struct watchdog_device *wdd, } EXPORT_SYMBOL_GPL(watchdog_init_timeout); -static int watchdog_reboot_notifier(struct notifier_block *nb, - unsigned long code, void *data) -{ - struct watchdog_device *wdd = container_of(nb, struct watchdog_device, - reboot_nb); - - if (code == SYS_DOWN || code == SYS_HALT) { - if (watchdog_active(wdd)) { - int ret; - - ret = wdd->ops->stop(wdd); - if (ret) - return NOTIFY_BAD; - } - } - - return NOTIFY_DONE; -} - static int watchdog_restart_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -244,19 +225,6 @@ static int __watchdog_register_device(struct watchdog_device *wdd) } } - if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { - wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; - - ret = register_reboot_notifier(&wdd->reboot_nb); - if (ret) { - pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", - wdd->id, ret); - watchdog_dev_unregister(wdd); - ida_simple_remove(&watchdog_ida, wdd->id); - return ret; - } - } - if (wdd->ops->restart) { wdd->restart_nb.notifier_call = watchdog_restart_notifier; @@ -302,9 +270,6 @@ static void __watchdog_unregister_device(struct watchdog_device *wdd) if (wdd->ops->restart) unregister_restart_handler(&wdd->restart_nb); - if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) - unregister_reboot_notifier(&wdd->reboot_nb); - watchdog_dev_unregister(wdd); ida_simple_remove(&watchdog_ida, wdd->id); } diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index 0826e663bd5a..1e971a50d7fb 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -42,6 +42,7 @@ #include <linux/miscdevice.h> /* For handling misc devices */ #include <linux/module.h> /* For module stuff/... */ #include <linux/mutex.h> /* For mutexes */ +#include <linux/reboot.h> /* For reboot notifier */ #include <linux/slab.h> /* For memory functions */ #include <linux/types.h> /* For standard types (like size_t) */ #include <linux/watchdog.h> /* For watchdog specific items */ @@ -1016,6 +1017,25 @@ static struct class watchdog_class = { .dev_groups = wdt_groups, }; +static int watchdog_reboot_notifier(struct notifier_block *nb, + unsigned long code, void *data) +{ + struct watchdog_device *wdd; + + wdd = container_of(nb, struct watchdog_device, reboot_nb); + if (code == SYS_DOWN || code == SYS_HALT) { + if (watchdog_active(wdd)) { + int ret; + + ret = wdd->ops->stop(wdd); + if (ret) + return NOTIFY_BAD; + } + } + + return NOTIFY_DONE; +} + /* * watchdog_dev_register: register a watchdog device * @wdd: watchdog device @@ -1049,6 +1069,18 @@ int watchdog_dev_register(struct watchdog_device *wdd) if (ret) { device_destroy(&watchdog_class, devno); watchdog_cdev_unregister(wdd); + return ret; + } + + if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status)) { + wdd->reboot_nb.notifier_call = watchdog_reboot_notifier; + + ret = devm_register_reboot_notifier(dev, &wdd->reboot_nb); + if (ret) { + pr_err("watchdog%d: Cannot register reboot notifier (%d)\n", + wdd->id, ret); + watchdog_dev_unregister(wdd); + } } return ret; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 4ac49d038bf3..8fc41705c7cd 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -81,7 +81,8 @@ static int autofs4_write(struct autofs_sb_info *sbi, spin_unlock_irqrestore(¤t->sighand->siglock, flags); } - return (bytes > 0); + /* if 'wr' returned 0 (impossible) we assume -EIO (safe) */ + return bytes == 0 ? 0 : wr < 0 ? wr : -EIO; } static void autofs4_notify_daemon(struct autofs_sb_info *sbi, @@ -95,6 +96,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, } pkt; struct file *pipe = NULL; size_t pktsz; + int ret; pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n", (unsigned long) wq->wait_queue_token, @@ -169,7 +171,18 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, mutex_unlock(&sbi->wq_mutex); if (autofs4_write(sbi, pipe, &pkt, pktsz)) + switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { + case 0: + break; + case -ENOMEM: + case -ERESTARTSYS: + /* Just fail this one */ + autofs4_wait_release(sbi, wq->wait_queue_token, ret); + break; + default: autofs4_catatonic_mode(sbi); + break; + } fput(pipe); } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 396a3c075fd4..afd548ebc328 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -276,12 +276,6 @@ static DEFINE_MUTEX(epmutex); /* Used to check for epoll file descriptor inclusion loops */ static struct nested_calls poll_loop_ncalls; -/* Used for safe wake up implementation */ -static struct nested_calls poll_safewake_ncalls; - -/* Used to call file's f_op->poll() under the nested calls boundaries */ -static struct nested_calls poll_readywalk_ncalls; - /* Slab cache used to allocate "struct epitem" */ static struct kmem_cache *epi_cache __read_mostly; @@ -551,40 +545,21 @@ out_unlock: * this special case of epoll. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC -static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, - unsigned long events, int subclass) + +static struct nested_calls poll_safewake_ncalls; + +static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) { unsigned long flags; + wait_queue_head_t *wqueue = (wait_queue_head_t *)cookie; - spin_lock_irqsave_nested(&wqueue->lock, flags, subclass); - wake_up_locked_poll(wqueue, events); + spin_lock_irqsave_nested(&wqueue->lock, flags, call_nests + 1); + wake_up_locked_poll(wqueue, POLLIN); spin_unlock_irqrestore(&wqueue->lock, flags); -} -#else -static inline void ep_wake_up_nested(wait_queue_head_t *wqueue, - unsigned long events, int subclass) -{ - wake_up_poll(wqueue, events); -} -#endif -static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) -{ - ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN, - 1 + call_nests); return 0; } -/* - * Perform a safe wake up of the poll wait list. The problem is that - * with the new callback'd wake up system, it is possible that the - * poll callback is reentered from inside the call to wake_up() done - * on the poll wait queue head. The rule is that we cannot reenter the - * wake up code from the same task more than EP_MAX_NESTS times, - * and we cannot reenter the same wait queue head at all. This will - * enable to have a hierarchy of epoll file descriptor of no more than - * EP_MAX_NESTS deep. - */ static void ep_poll_safewake(wait_queue_head_t *wq) { int this_cpu = get_cpu(); @@ -595,6 +570,15 @@ static void ep_poll_safewake(wait_queue_head_t *wq) put_cpu(); } +#else + +static void ep_poll_safewake(wait_queue_head_t *wq) +{ + wake_up_poll(wq, POLLIN); +} + +#endif + static void ep_remove_wait_queue(struct eppoll_entry *pwq) { wait_queue_head_t *whead; @@ -880,11 +864,33 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file) return 0; } -static inline unsigned int ep_item_poll(struct epitem *epi, poll_table *pt) +static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, + void *priv); +static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, + poll_table *pt); + +/* + * Differs from ep_eventpoll_poll() in that internal callers already have + * the ep->mtx so we need to start from depth=1, such that mutex_lock_nested() + * is correctly annotated. + */ +static unsigned int ep_item_poll(struct epitem *epi, poll_table *pt, int depth) { + struct eventpoll *ep; + bool locked; + pt->_key = epi->event.events; + if (!is_file_epoll(epi->ffd.file)) + return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & + epi->event.events; + + ep = epi->ffd.file->private_data; + poll_wait(epi->ffd.file, &ep->poll_wait, pt); + locked = pt && (pt->_qproc == ep_ptable_queue_proc); - return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & epi->event.events; + return ep_scan_ready_list(epi->ffd.file->private_data, + ep_read_events_proc, &depth, depth, + locked) & epi->event.events; } static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, @@ -892,13 +898,15 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, { struct epitem *epi, *tmp; poll_table pt; + int depth = *(int *)priv; init_poll_funcptr(&pt, NULL); + depth++; list_for_each_entry_safe(epi, tmp, head, rdllink) { - if (ep_item_poll(epi, &pt)) + if (ep_item_poll(epi, &pt, depth)) { return POLLIN | POLLRDNORM; - else { + } else { /* * Item has been dropped into the ready list by the poll * callback, but it's not actually ready, as far as @@ -912,48 +920,20 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, return 0; } -static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, - poll_table *pt); - -struct readyevents_arg { - struct eventpoll *ep; - bool locked; -}; - -static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests) -{ - struct readyevents_arg *arg = priv; - - return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL, - call_nests + 1, arg->locked); -} - static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) { - int pollflags; struct eventpoll *ep = file->private_data; - struct readyevents_arg arg; - - /* - * During ep_insert() we already hold the ep->mtx for the tfile. - * Prevent re-aquisition. - */ - arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc); - arg.ep = ep; + int depth = 0; /* Insert inside our poll wait queue */ poll_wait(file, &ep->poll_wait, wait); /* * Proceed to find out if wanted events are really available inside - * the ready list. This need to be done under ep_call_nested() - * supervision, since the call to f_op->poll() done on listed files - * could re-enter here. + * the ready list. */ - pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS, - ep_poll_readyevents_proc, &arg, ep, current); - - return pollflags != -1 ? pollflags : 0; + return ep_scan_ready_list(ep, ep_read_events_proc, + &depth, depth, false); } #ifdef CONFIG_PROC_FS @@ -1472,7 +1452,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, * this operation completes, the poll callback can start hitting * the new item. */ - revents = ep_item_poll(epi, &epq.pt); + revents = ep_item_poll(epi, &epq.pt, 1); /* * We have to check if something went wrong during the poll wait queue @@ -1606,7 +1586,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ - revents = ep_item_poll(epi, &pt); + revents = ep_item_poll(epi, &pt, 1); /* * If the item is "hot" and it is not registered inside the ready @@ -1674,7 +1654,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head, list_del_init(&epi->rdllink); - revents = ep_item_poll(epi, &pt); + revents = ep_item_poll(epi, &pt, 1); /* * If the event mask intersect the caller-requested one, @@ -2313,11 +2293,10 @@ static int __init eventpoll_init(void) */ ep_nested_calls_init(&poll_loop_ncalls); +#ifdef CONFIG_DEBUG_LOCK_ALLOC /* Initialize the structure used to perform safe poll wait head wake ups */ ep_nested_calls_init(&poll_safewake_ncalls); - - /* Initialize the structure used to perform file's f_op->poll() calls */ - ep_nested_calls_init(&poll_readywalk_ncalls); +#endif /* * We can have many thousands of epitems, so prevent this from @@ -2327,11 +2306,11 @@ static int __init eventpoll_init(void) /* Allocates slab cache used to allocate "struct epitem" items */ epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), - 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); /* Allocates slab cache used to allocate "struct eppoll_entry" */ pwq_cache = kmem_cache_create("eventpoll_pwq", - sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL); + sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); return 0; } diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 81cecbe6d7cf..b833ffeee1e1 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -291,7 +291,6 @@ static int fat_parse_long(struct inode *dir, loff_t *pos, } } parse_long: - slots = 0; ds = (struct msdos_dir_slot *)*de; id = ds->id; if (!(id & 0x40)) diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 8aec5e732abf..b63a4df7327b 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -98,13 +98,11 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len) void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, struct hfs_bnode *src_node, int src, int len) { - struct hfs_btree *tree; struct page *src_page, *dst_page; hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); if (!len) return; - tree = src_node->tree; src += src_node->page_offset; dst += dst_node->page_offset; src_page = src_node->page[0]; @@ -237,7 +235,6 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid) static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) { - struct super_block *sb; struct hfs_bnode *node, *node2; struct address_space *mapping; struct page *page; @@ -249,7 +246,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) return NULL; } - sb = tree->inode->i_sb; size = sizeof(struct hfs_bnode) + tree->pages_per_bnode * sizeof(struct page *); node = kzalloc(size, GFP_KERNEL); diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index d77015c3f22c..177fae4e6581 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -127,14 +127,12 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len) void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst, struct hfs_bnode *src_node, int src, int len) { - struct hfs_btree *tree; struct page **src_page, **dst_page; int l; hfs_dbg(BNODE_MOD, "copybytes: %u,%u,%u\n", dst, src, len); if (!len) return; - tree = src_node->tree; src += src_node->page_offset; dst += dst_node->page_offset; src_page = src_node->page + (src >> PAGE_SHIFT); @@ -401,7 +399,6 @@ struct hfs_bnode *hfs_bnode_findhash(struct hfs_btree *tree, u32 cnid) static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) { - struct super_block *sb; struct hfs_bnode *node, *node2; struct address_space *mapping; struct page *page; @@ -414,7 +411,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) return NULL; } - sb = tree->inode->i_sb; size = sizeof(struct hfs_bnode) + tree->pages_per_bnode * sizeof(struct page *); node = kzalloc(size, GFP_KERNEL); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 515d13c196da..1a2894aa0194 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -150,7 +150,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, if (err) return err; - inode = nilfs_new_inode(dir, S_IFLNK | S_IRWXUGO); + inode = nilfs_new_inode(dir, S_IFLNK | 0777); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index f65392fecb5c..f572538dcc4f 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1954,8 +1954,6 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, err, ii->vfs_inode.i_ino); return err; } - mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(ifile); spin_lock(&nilfs->ns_inode_lock); if (likely(!ii->i_bh)) ii->i_bh = ibh; @@ -1964,6 +1962,10 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, goto retry; } + // Always redirty the buffer to avoid race condition + mark_buffer_dirty(ii->i_bh); + nilfs_mdt_mark_dirty(ifile); + clear_bit(NILFS_I_QUEUED, &ii->i_state); set_bit(NILFS_I_BUSY, &ii->i_state); list_move_tail(&ii->i_dirty, &sci->sc_dirty_files); @@ -2400,11 +2402,11 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) return err; } -static void nilfs_construction_timeout(unsigned long data) +static void nilfs_construction_timeout(struct timer_list *t) { - struct task_struct *p = (struct task_struct *)data; + struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer); - wake_up_process(p); + wake_up_process(sci->sc_timer_task); } static void @@ -2542,8 +2544,7 @@ static int nilfs_segctor_thread(void *arg) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int timeout = 0; - sci->sc_timer.data = (unsigned long)current; - sci->sc_timer.function = nilfs_construction_timeout; + sci->sc_timer_task = current; /* start sync. */ sci->sc_task = current; @@ -2674,7 +2675,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_gc_inodes); INIT_LIST_HEAD(&sci->sc_iput_queue); INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); - init_timer(&sci->sc_timer); + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 1060949d7dd2..84084a4d9b3e 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -180,6 +180,7 @@ struct nilfs_sc_info { unsigned long sc_watermark; struct timer_list sc_timer; + struct task_struct *sc_timer_task; struct task_struct *sc_task; }; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 1541a1e9221a..1341a41e7b43 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -630,22 +630,22 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, } /** - * nilfs_sufile_truncate_range - truncate range of segment array - * @sufile: inode of segment usage file - * @start: start segment number (inclusive) - * @end: end segment number (inclusive) - * - * Return Value: On success, 0 is returned. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid number of segments specified - * - * %-EBUSY - Dirty or active segments are present in the range - */ + * nilfs_sufile_truncate_range - truncate range of segment array + * @sufile: inode of segment usage file + * @start: start segment number (inclusive) + * @end: end segment number (inclusive) + * + * Return Value: On success, 0 is returned. On error, one of the + * following negative error codes is returned. + * + * %-EIO - I/O error. + * + * %-ENOMEM - Insufficient amount of memory available. + * + * %-EINVAL - Invalid number of segments specified + * + * %-EBUSY - Dirty or active segments are present in the range + */ static int nilfs_sufile_truncate_range(struct inode *sufile, __u64 start, __u64 end) { diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 4fc018dfcfae..3ce20cd44a20 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -160,7 +160,6 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) ii->i_bh = NULL; ii->i_state = 0; ii->i_cno = 0; - ii->vfs_inode.i_version = 1; nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode); return &ii->vfs_inode; } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 2dd75bf619ad..afebb5067cec 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -737,7 +737,7 @@ struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno) } else if (cno > root->cno) { n = n->rb_right; } else { - atomic_inc(&root->count); + refcount_inc(&root->count); spin_unlock(&nilfs->ns_cptree_lock); return root; } @@ -776,7 +776,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) } else if (cno > root->cno) { p = &(*p)->rb_right; } else { - atomic_inc(&root->count); + refcount_inc(&root->count); spin_unlock(&nilfs->ns_cptree_lock); kfree(new); return root; @@ -786,7 +786,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) new->cno = cno; new->ifile = NULL; new->nilfs = nilfs; - atomic_set(&new->count, 1); + refcount_set(&new->count, 1); atomic64_set(&new->inodes_count, 0); atomic64_set(&new->blocks_count, 0); @@ -806,7 +806,7 @@ nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) void nilfs_put_root(struct nilfs_root *root) { - if (atomic_dec_and_test(&root->count)) { + if (refcount_dec_and_test(&root->count)) { struct the_nilfs *nilfs = root->nilfs; nilfs_sysfs_delete_snapshot_group(root); diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index b305c6f033e7..883d732b0259 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -27,6 +27,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/slab.h> +#include <linux/refcount.h> struct nilfs_sc_info; struct nilfs_sysfs_dev_subgroups; @@ -246,7 +247,7 @@ struct nilfs_root { __u64 cno; struct rb_node rb_node; - atomic_t count; + refcount_t count; struct the_nilfs *nilfs; struct inode *ifile; @@ -299,7 +300,7 @@ void nilfs_swap_super_block(struct the_nilfs *); static inline void nilfs_get_root(struct nilfs_root *root) { - atomic_inc(&root->count); + refcount_inc(&root->count); } static inline int nilfs_valid_fs(struct the_nilfs *nilfs) diff --git a/fs/pipe.c b/fs/pipe.c index 349c9d56d4b3..6d98566201ef 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1018,13 +1018,19 @@ const struct file_operations pipefifo_fops = { /* * Currently we rely on the pipe array holding a power-of-2 number - * of pages. + * of pages. Returns 0 on error. */ -static inline unsigned int round_pipe_size(unsigned int size) +unsigned int round_pipe_size(unsigned int size) { unsigned long nr_pages; + if (size < pipe_min_size) + size = pipe_min_size; + nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (nr_pages == 0) + return 0; + return roundup_pow_of_two(nr_pages) << PAGE_SHIFT; } @@ -1040,6 +1046,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) long ret = 0; size = round_pipe_size(arg); + if (size == 0) + return -EINVAL; nr_pages = size >> PAGE_SHIFT; if (!nr_pages) @@ -1117,20 +1125,13 @@ out_revert_acct: } /* - * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax + * This should work even if CONFIG_PROC_FS isn't set, as proc_dopipe_max_size * will return an error. */ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, size_t *lenp, loff_t *ppos) { - int ret; - - ret = proc_dointvec_minmax(table, write, buf, lenp, ppos); - if (ret < 0 || !write) - return ret; - - pipe_max_size = round_pipe_size(pipe_max_size); - return ret; + return proc_dopipe_max_size(table, write, buf, lenp, ppos); } /* diff --git a/fs/proc/Makefile b/fs/proc/Makefile index f7456c4e7d0f..ead487e80510 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -21,6 +21,7 @@ proc-y += loadavg.o proc-y += meminfo.o proc-y += stat.o proc-y += uptime.o +proc-y += util.o proc-y += version.o proc-y += softirqs.o proc-y += namespaces.o diff --git a/fs/proc/array.c b/fs/proc/array.c index 6f6fc1672ad1..79375fc115d2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -366,6 +366,11 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) cpumask_pr_args(&task->cpus_allowed)); } +static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) +{ + seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state); +} + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -376,6 +381,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, if (mm) { task_mem(m, mm); + task_core_dumping(m, mm); mmput(mm); } task_sig(m, task); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a34195e92b20..9aad373cf11d 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -103,28 +103,7 @@ static inline struct task_struct *get_proc_task(struct inode *inode) void task_dump_owner(struct task_struct *task, mode_t mode, kuid_t *ruid, kgid_t *rgid); -static inline unsigned name_to_int(const struct qstr *qstr) -{ - const char *name = qstr->name; - int len = qstr->len; - unsigned n = 0; - - if (len > 1 && *name == '0') - goto out; - while (len-- > 0) { - unsigned c = *name++ - '0'; - if (c > 9) - goto out; - if (n >= (~0U-9)/10) - goto out; - n *= 10; - n += c; - } - return n; -out: - return ~0U; -} - +unsigned name_to_int(const struct qstr *qstr); /* * Offset of the first process in the /proc root directory.. */ diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c index 9bc5c58c00ee..a000d7547479 100644 --- a/fs/proc/loadavg.c +++ b/fs/proc/loadavg.c @@ -24,7 +24,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v) LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]), LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]), nr_running(), nr_threads, - task_active_pid_ns(current)->last_pid); + idr_get_cursor(&task_active_pid_ns(current)->idr)); return 0; } diff --git a/fs/proc/util.c b/fs/proc/util.c new file mode 100644 index 000000000000..b161cfa0f9fa --- /dev/null +++ b/fs/proc/util.c @@ -0,0 +1,23 @@ +#include <linux/dcache.h> + +unsigned name_to_int(const struct qstr *qstr) +{ + const char *name = qstr->name; + int len = qstr->len; + unsigned n = 0; + + if (len > 1 && *name == '0') + goto out; + do { + unsigned c = *name++ - '0'; + if (c > 9) + goto out; + if (n >= (~0U-9)/10) + goto out; + n *= 10; + n += c; + } while (--len > 0); + return n; +out: + return ~0U; +} diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index af2cc94a61bf..963b755d19b0 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -4,6 +4,8 @@ #include <linux/compiler.h> +#define CUT_HERE "------------[ cut here ]------------\n" + #ifdef CONFIG_GENERIC_BUG #define BUGFLAG_WARNING (1 << 0) #define BUGFLAG_ONCE (1 << 1) @@ -90,10 +92,11 @@ extern void warn_slowpath_null(const char *file, const int line); #define __WARN_printf_taint(taint, arg...) \ warn_slowpath_fmt_taint(__FILE__, __LINE__, taint, arg) #else +extern __printf(1, 2) void __warn_printk(const char *fmt, ...); #define __WARN() __WARN_TAINT(TAINT_WARN) -#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0) +#define __WARN_printf(arg...) do { __warn_printk(arg); __WARN(); } while (0) #define __WARN_printf_taint(taint, arg...) \ - do { printk(arg); __WARN_TAINT(taint); } while (0) + do { __warn_printk(arg); __WARN_TAINT(taint); } while (0) #endif /* used internally by panic.c */ @@ -130,7 +133,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, #ifndef WARN_ON_ONCE #define WARN_ON_ONCE(condition) ({ \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(.data.once) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -142,7 +145,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, #endif #define WARN_ONCE(condition, format...) ({ \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(.data.once) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ @@ -153,7 +156,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, }) #define WARN_TAINT_ONCE(condition, taint, format...) ({ \ - static bool __section(.data.unlikely) __warned; \ + static bool __section(.data.once) __warned; \ int __ret_warn_once = !!(condition); \ \ if (unlikely(__ret_warn_once && !__warned)) { \ diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 6d9576931084..03cc5f9bba71 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -44,6 +44,7 @@ extern char __entry_text_start[], __entry_text_end[]; extern char __start_rodata[], __end_rodata[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __softirqentry_text_start[], __softirqentry_text_end[]; +extern char __start_once[], __end_once[]; /* Start and end of .ctors section - used for constructor calls. */ extern char __ctors_start[], __ctors_end[]; diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 5d2add1a6c96..238873739550 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -44,9 +44,6 @@ #define cpu_to_mem(cpu) ((void)(cpu),0) #endif -#ifndef parent_node -#define parent_node(node) ((void)(node),0) -#endif #ifndef cpumask_of_node #ifdef CONFIG_NEED_MULTIPLE_NODES #define cpumask_of_node(node) ((node) == 0 ? cpu_online_mask : cpu_none_mask) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index bdcd1caae092..ee8b707d9fa9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -223,6 +223,9 @@ MEM_KEEP(init.data) \ MEM_KEEP(exit.data) \ *(.data.unlikely) \ + VMLINUX_SYMBOL(__start_once) = .; \ + *(.data.once) \ + VMLINUX_SYMBOL(__end_once) = .; \ STRUCT_ALIGN(); \ *(__tracepoints) \ /* implement dynamic printk debug */ \ diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index f2deb71958b2..1030651f8309 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -15,7 +15,7 @@ #ifndef _LINUX_BITFIELD_H #define _LINUX_BITFIELD_H -#include <linux/bug.h> +#include <linux/build_bug.h> /* * Bitfield access macros diff --git a/include/linux/bug.h b/include/linux/bug.h index da4231c905c8..fe5916550da8 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -43,6 +43,8 @@ enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs); /* These are defined by the architecture */ int is_valid_bugaddr(unsigned long addr); +void generic_bug_clear_once(void); + #else /* !CONFIG_GENERIC_BUG */ static inline enum bug_trap_type report_bug(unsigned long bug_addr, @@ -51,6 +53,9 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, return BUG_TRAP_TYPE_BUG; } + +static inline void generic_bug_clear_once(void) {} + #endif /* CONFIG_GENERIC_BUG */ /* diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index a06583e41f80..3b609edffa8f 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -16,3 +16,6 @@ * with any version that can compile the kernel */ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +#define randomized_struct_fields_start struct { +#define randomized_struct_fields_end }; diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 6dfec4d638df..872f930f1b06 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -32,6 +32,7 @@ #include <linux/types.h> #include <linux/spinlock_types.h> +#include <linux/atomic.h> struct device; struct device_node; @@ -71,7 +72,7 @@ struct gen_pool { */ struct gen_pool_chunk { struct list_head next_chunk; /* next chunk in pool */ - atomic_t avail; + atomic_long_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 8062e6cc607c..6a532629c983 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -105,7 +105,6 @@ extern struct group_info init_groups; .numbers = { { \ .nr = 0, \ .ns = &init_pid_ns, \ - .pid_chain = { .next = NULL, .pprev = NULL }, \ }, } \ } diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index d29e1e21bf3f..b1d861caca16 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -42,18 +42,21 @@ */ #define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ ({ \ - ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ - might_sleep_if(sleep_us); \ + u64 __timeout_us = (timeout_us); \ + unsigned long __sleep_us = (sleep_us); \ + ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ + might_sleep_if((__sleep_us) != 0); \ for (;;) { \ (val) = op(addr); \ if (cond) \ break; \ - if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + if (__timeout_us && \ + ktime_compare(ktime_get(), __timeout) > 0) { \ (val) = op(addr); \ break; \ } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ + if (__sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) @@ -77,17 +80,20 @@ */ #define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ ({ \ - ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + u64 __timeout_us = (timeout_us); \ + unsigned long __delay_us = (delay_us); \ + ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ for (;;) { \ (val) = op(addr); \ if (cond) \ break; \ - if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + if (__timeout_us && \ + ktime_compare(ktime_get(), __timeout) > 0) { \ (val) = op(addr); \ break; \ } \ - if (delay_us) \ - udelay(delay_us); \ + if (__delay_us) \ + udelay(__delay_us); \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 474812abe773..b5630c8eb2f3 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -19,7 +19,10 @@ struct ipc_ids { bool tables_initialized; struct rw_semaphore rwsem; struct idr ipcs_idr; + int max_id; +#ifdef CONFIG_CHECKPOINT_RESTORE int next_id; +#endif struct rhashtable key_ht; }; diff --git a/include/linux/kcov.h b/include/linux/kcov.h index f5d8ce4f4f86..3ecf6f5e3a5f 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -8,19 +8,23 @@ struct task_struct; #ifdef CONFIG_KCOV -void kcov_task_init(struct task_struct *t); -void kcov_task_exit(struct task_struct *t); - enum kcov_mode { /* Coverage collection is not enabled yet. */ KCOV_MODE_DISABLED = 0, + /* KCOV was initialized, but tracing mode hasn't been chosen yet. */ + KCOV_MODE_INIT = 1, /* * Tracing coverage collection mode. * Covered PCs are collected in a per-task buffer. */ - KCOV_MODE_TRACE = 1, + KCOV_MODE_TRACE_PC = 2, + /* Collecting comparison operands mode. */ + KCOV_MODE_TRACE_CMP = 3, }; +void kcov_task_init(struct task_struct *t); +void kcov_task_exit(struct task_struct *t); + #else static inline void kcov_task_init(struct task_struct *t) {} diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4b484ab9e163..ce51455e2adf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -549,7 +549,8 @@ extern enum system_states { #define TAINT_UNSIGNED_MODULE 13 #define TAINT_SOFTLOCKUP 14 #define TAINT_LIVEPATCH 15 -#define TAINT_FLAGS_COUNT 16 +#define TAINT_AUX 16 +#define TAINT_FLAGS_COUNT 17 struct taint_flag { char c_true; /* character printed when tainted */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 15cab3967d6d..1fbde8a880d9 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -104,9 +104,16 @@ extern nodemask_t _unused_nodemask_arg_; * * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. */ -#define nodemask_pr_args(maskp) \ - ((maskp) != NULL) ? MAX_NUMNODES : 0, \ - ((maskp) != NULL) ? (maskp)->bits : NULL +#define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ + __nodemask_pr_bits(maskp) +static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) +{ + return m ? MAX_NUMNODES : 0; +} +static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) +{ + return m ? m->bits : NULL; +} /* * The inline keyword gives the compiler room to decide to inline, or diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e942558b3585..9132c5cb41f1 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -96,6 +96,17 @@ void set_pfnblock_flags_mask(struct page *page, #define set_pageblock_skip(page) \ set_pageblock_flags_group(page, 1, PB_migrate_skip, \ PB_migrate_skip) +#else +static inline bool get_pageblock_skip(struct page *page) +{ + return false; +} +static inline void clear_pageblock_skip(struct page *page) +{ +} +static inline void set_pageblock_skip(struct page *page) +{ +} #endif /* CONFIG_COMPACTION */ #endif /* PAGEBLOCK_FLAGS_H */ diff --git a/include/linux/pid.h b/include/linux/pid.h index dfd684ce0787..7633d55d9a24 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -51,10 +51,8 @@ enum pid_type */ struct upid { - /* Try to keep pid_chain in the same cacheline as nr for find_vpid */ int nr; struct pid_namespace *ns; - struct hlist_node pid_chain; }; struct pid diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index c78af6061644..49538b172483 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -10,15 +10,8 @@ #include <linux/nsproxy.h> #include <linux/kref.h> #include <linux/ns_common.h> +#include <linux/idr.h> -struct pidmap { - atomic_t nr_free; - void *page; -}; - -#define BITS_PER_PAGE (PAGE_SIZE * 8) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) -#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) struct fs_pin; @@ -30,10 +23,9 @@ enum { /* definitions for pid_namespace's hide_pid field */ struct pid_namespace { struct kref kref; - struct pidmap pidmap[PIDMAP_ENTRIES]; + struct idr idr; struct rcu_head rcu; - int last_pid; - unsigned int nr_hashed; + unsigned int pid_allocated; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; @@ -57,7 +49,7 @@ struct pid_namespace { extern struct pid_namespace init_pid_ns; -#define PIDNS_HASH_ADDING (1U << 31) +#define PIDNS_ADDING (1U << 31) #ifdef CONFIG_PID_NS static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) @@ -106,6 +98,6 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); -void pidmap_init(void); +void pid_idr_init(void); #endif /* _LINUX_PID_NS_H */ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 6a80cfc63e0c..2dc5e9870fcd 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -191,5 +191,6 @@ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); struct pipe_inode_info *get_pipe_info(struct file *file); int create_pipe_files(struct file **, int); +unsigned int round_pipe_size(unsigned int size); #endif diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0ca448c1cb42..23a9c89c7ad9 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -22,7 +22,6 @@ #define _LINUX_RADIX_TREE_H #include <linux/bitops.h> -#include <linux/bug.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/preempt.h> diff --git a/include/linux/reboot.h b/include/linux/reboot.h index d03da0eb95ca..e63799a6e895 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -6,6 +6,8 @@ #include <linux/notifier.h> #include <uapi/linux/reboot.h> +struct device; + #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN #define SYS_HALT 0x0002 /* Notify of system halt */ @@ -39,6 +41,8 @@ extern int reboot_force; extern int register_reboot_notifier(struct notifier_block *); extern int unregister_reboot_notifier(struct notifier_block *); +extern int devm_register_reboot_notifier(struct device *, struct notifier_block *); + extern int register_restart_handler(struct notifier_block *); extern int unregister_restart_handler(struct notifier_block *); extern void do_kernel_restart(char *cmd); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index b769ecfcc3bd..992bc9948232 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -51,6 +51,9 @@ extern int proc_dointvec_minmax(struct ctl_table *, int, extern int proc_douintvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int proc_dopipe_max_size(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 33eabbb8ada1..9529867717a8 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -8,4 +8,28 @@ #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +enum { + /* + * Tracing coverage collection mode. + * Covered PCs are collected in a per-task buffer. + * In new KCOV version the mode is chosen by calling + * ioctl(fd, KCOV_ENABLE, mode). In older versions the mode argument + * was supposed to be 0 in such a call. So, for reasons of backward + * compatibility, we have chosen the value KCOV_TRACE_PC to be 0. + */ + KCOV_TRACE_PC = 0, + /* Collecting comparison operands mode. */ + KCOV_TRACE_CMP = 1, +}; + +/* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ +#define KCOV_CMP_CONST (1 << 0) +#define KCOV_CMP_SIZE(n) ((n) << 1) +#define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/init/Kconfig b/init/Kconfig index 7d5a6fbac56a..2934249fba46 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -283,19 +283,6 @@ config CROSS_MEMORY_ATTACH to directly read from or write to another process' address space. See the man page for more details. -config FHANDLE - bool "open by fhandle syscalls" if EXPERT - select EXPORTFS - default y - help - If you say Y here, a user level program will be able to map - file names to handle and then later use the handle for - different file system operations. This is useful in implementing - userspace file servers, which now track files using handles instead - of names. The handle would remain the same even if file names - get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2) - syscalls. - config USELIB bool "uselib syscall" def_bool ALPHA || M68K || SPARC || X86_32 || IA32_EMULATION @@ -883,18 +870,6 @@ config SOCK_CGROUP_DATA endif # CGROUPS -config CHECKPOINT_RESTORE - bool "Checkpoint/restore support" if EXPERT - select PROC_CHILDREN - default n - help - Enables additional kernel features in a sake of checkpoint/restore. - In particular it adds auxiliary prctl codes to setup process text, - data and heap segment sizes, and a few additional /proc filesystem - entries. - - If unsure, say N here. - menuconfig NAMESPACES bool "Namespaces support" if EXPERT depends on MULTIUSER @@ -1163,6 +1138,19 @@ config SYSCTL_SYSCALL If unsure say N here. +config FHANDLE + bool "open by fhandle syscalls" if EXPERT + select EXPORTFS + default y + help + If you say Y here, a user level program will be able to map + file names to handle and then later use the handle for + different file system operations. This is useful in implementing + userspace file servers, which now track files using handles instead + of names. The handle would remain the same even if file names + get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2) + syscalls. + config POSIX_TIMERS bool "Posix Clocks & timers" if EXPERT default y @@ -1180,54 +1168,6 @@ config POSIX_TIMERS If unsure say y. -config KALLSYMS - bool "Load all symbols for debugging/ksymoops" if EXPERT - default y - help - Say Y here to let the kernel print out symbolic crash information and - symbolic stack backtraces. This increases the size of the kernel - somewhat, as all symbols have to be loaded into the kernel image. - -config KALLSYMS_ALL - bool "Include all symbols in kallsyms" - depends on DEBUG_KERNEL && KALLSYMS - help - Normally kallsyms only contains the symbols of functions for nicer - OOPS messages and backtraces (i.e., symbols from the text and inittext - sections). This is sufficient for most cases. And only in very rare - cases (e.g., when a debugger is used) all symbols are required (e.g., - names of variables from the data sections, etc). - - This option makes sure that all symbols are loaded into the kernel - image (i.e., symbols from all sections) in cost of increased kernel - size (depending on the kernel configuration, it may be 300KiB or - something like this). - - Say N unless you really need all symbols. - -config KALLSYMS_ABSOLUTE_PERCPU - bool - depends on KALLSYMS - default X86_64 && SMP - -config KALLSYMS_BASE_RELATIVE - bool - depends on KALLSYMS - default !IA64 && !(TILE && 64BIT) - help - Instead of emitting them as absolute values in the native word size, - emit the symbol references in the kallsyms table as 32-bit entries, - each containing a relative value in the range [base, base + U32_MAX] - or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either - an absolute value in the range [0, S32_MAX] or a relative value in the - range [base, base + S32_MAX], where base is the lowest relative symbol - address encountered in the image. - - On 64-bit builds, this reduces the size of the address table by 50%, - but more importantly, it results in entries whose values are build - time constants, and no relocation pass is required at runtime to fix - up the entries based on the runtime load address of the kernel. - config PRINTK default y bool "Enable support for printk" if EXPERT @@ -1339,16 +1279,6 @@ config EVENTFD If unsure, say Y. -# syscall, maps, verifier -config BPF_SYSCALL - bool "Enable bpf() system call" - select ANON_INODES - select BPF - default n - help - Enable the bpf() system call that allows to manipulate eBPF - programs and maps via file descriptors. - config SHMEM bool "Use full shmem filesystem" if EXPERT default y @@ -1378,14 +1308,6 @@ config ADVISE_SYSCALLS applications use these syscalls, you can disable this option to save space. -config USERFAULTFD - bool "Enable userfaultfd() system call" - select ANON_INODES - depends on MMU - help - Enable the userfaultfd() system call that allows to intercept and - handle page faults in userland. - config MEMBARRIER bool "Enable membarrier() system call" if EXPERT default y @@ -1398,6 +1320,86 @@ config MEMBARRIER If unsure, say Y. +config CHECKPOINT_RESTORE + bool "Checkpoint/restore support" if EXPERT + select PROC_CHILDREN + default n + help + Enables additional kernel features in a sake of checkpoint/restore. + In particular it adds auxiliary prctl codes to setup process text, + data and heap segment sizes, and a few additional /proc filesystem + entries. + + If unsure, say N here. + +config KALLSYMS + bool "Load all symbols for debugging/ksymoops" if EXPERT + default y + help + Say Y here to let the kernel print out symbolic crash information and + symbolic stack backtraces. This increases the size of the kernel + somewhat, as all symbols have to be loaded into the kernel image. + +config KALLSYMS_ALL + bool "Include all symbols in kallsyms" + depends on DEBUG_KERNEL && KALLSYMS + help + Normally kallsyms only contains the symbols of functions for nicer + OOPS messages and backtraces (i.e., symbols from the text and inittext + sections). This is sufficient for most cases. And only in very rare + cases (e.g., when a debugger is used) all symbols are required (e.g., + names of variables from the data sections, etc). + + This option makes sure that all symbols are loaded into the kernel + image (i.e., symbols from all sections) in cost of increased kernel + size (depending on the kernel configuration, it may be 300KiB or + something like this). + + Say N unless you really need all symbols. + +config KALLSYMS_ABSOLUTE_PERCPU + bool + depends on KALLSYMS + default X86_64 && SMP + +config KALLSYMS_BASE_RELATIVE + bool + depends on KALLSYMS + default !IA64 && !(TILE && 64BIT) + help + Instead of emitting them as absolute values in the native word size, + emit the symbol references in the kallsyms table as 32-bit entries, + each containing a relative value in the range [base, base + U32_MAX] + or, when KALLSYMS_ABSOLUTE_PERCPU is in effect, each containing either + an absolute value in the range [0, S32_MAX] or a relative value in the + range [base, base + S32_MAX], where base is the lowest relative symbol + address encountered in the image. + + On 64-bit builds, this reduces the size of the address table by 50%, + but more importantly, it results in entries whose values are build + time constants, and no relocation pass is required at runtime to fix + up the entries based on the runtime load address of the kernel. + +# end of the "standard kernel features (expert users)" menu + +# syscall, maps, verifier +config BPF_SYSCALL + bool "Enable bpf() system call" + select ANON_INODES + select BPF + default n + help + Enable the bpf() system call that allows to manipulate eBPF + programs and maps via file descriptors. + +config USERFAULTFD + bool "Enable userfaultfd() system call" + select ANON_INODES + depends on MMU + help + Enable the userfaultfd() system call that allows to intercept and + handle page faults in userland. + config EMBEDDED bool "Embedded system" option allnoconfig_y diff --git a/init/initramfs.c b/init/initramfs.c index 7046feffef6b..7e99a0038942 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -109,7 +109,7 @@ static void __init free_hash(void) } } -static long __init do_utime(char *filename, time_t mtime) +static long __init do_utime(char *filename, time64_t mtime) { struct timespec64 t[2]; @@ -125,10 +125,10 @@ static __initdata LIST_HEAD(dir_list); struct dir_entry { struct list_head list; char *name; - time_t mtime; + time64_t mtime; }; -static void __init dir_add(const char *name, time_t mtime) +static void __init dir_add(const char *name, time64_t mtime) { struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL); if (!de) @@ -150,7 +150,7 @@ static void __init dir_utime(void) } } -static __initdata time_t mtime; +static __initdata time64_t mtime; /* cpio header parsing */ @@ -177,7 +177,7 @@ static void __init parse_header(char *s) uid = parsed[2]; gid = parsed[3]; nlink = parsed[4]; - mtime = parsed[5]; + mtime = parsed[5]; /* breaks in y2106 */ body_len = parsed[6]; major = parsed[7]; minor = parsed[8]; diff --git a/init/main.c b/init/main.c index 859a786f7c0a..dfec3809e740 100644 --- a/init/main.c +++ b/init/main.c @@ -562,7 +562,6 @@ asmlinkage __visible void __init start_kernel(void) * kmem_cache_init() */ setup_log_buf(0); - pidhash_init(); vfs_caches_init_early(); sort_main_extable(); trap_init(); @@ -669,7 +668,7 @@ asmlinkage __visible void __init start_kernel(void) if (late_time_init) late_time_init(); calibrate_delay(); - pidmap_init(); + pid_idr_init(); anon_vma_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) diff --git a/init/version.c b/init/version.c index 5606341e9efd..bfb4e3f4955e 100644 --- a/init/version.c +++ b/init/version.c @@ -7,7 +7,7 @@ */ #include <generated/compile.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/uts.h> #include <linux/utsname.h> #include <generated/utsrelease.h> diff --git a/ipc/sem.c b/ipc/sem.c index a5cff0e109ab..87bd38f38dc3 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -515,6 +515,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_nsems = nsems; sma->sem_ctime = ktime_get_real_seconds(); + /* ipc_addid() locks sma upon success. */ retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); if (retval < 0) { call_rcu(&sma->sem_perm.rcu, sem_rcu_free); diff --git a/ipc/shm.c b/ipc/shm.c index 7733d768666d..7acda23430aa 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -601,6 +601,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_file = file; shp->shm_creator = current; + /* ipc_addid() locks shp upon success. */ error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); if (error < 0) goto no_id; diff --git a/ipc/util.c b/ipc/util.c index 79b30eee32cd..ff045fec8d83 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -116,13 +116,16 @@ int ipc_init_ids(struct ipc_ids *ids) int err; ids->in_use = 0; ids->seq = 0; - ids->next_id = -1; init_rwsem(&ids->rwsem); err = rhashtable_init(&ids->key_ht, &ipc_kht_params); if (err) return err; idr_init(&ids->ipcs_idr); ids->tables_initialized = true; + ids->max_id = -1; +#ifdef CONFIG_CHECKPOINT_RESTORE + ids->next_id = -1; +#endif return 0; } @@ -186,41 +189,51 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) return NULL; } -/** - * ipc_get_maxid - get the last assigned id - * @ids: ipc identifier set - * - * Called with ipc_ids.rwsem held. +#ifdef CONFIG_CHECKPOINT_RESTORE +/* + * Specify desired id for next allocated IPC object. */ -int ipc_get_maxid(struct ipc_ids *ids) +#define ipc_idr_alloc(ids, new) \ + idr_alloc(&(ids)->ipcs_idr, (new), \ + (ids)->next_id < 0 ? 0 : ipcid_to_idx((ids)->next_id),\ + 0, GFP_NOWAIT) + +static inline int ipc_buildid(int id, struct ipc_ids *ids, + struct kern_ipc_perm *new) { - struct kern_ipc_perm *ipc; - int max_id = -1; - int total, id; + if (ids->next_id < 0) { /* default, behave as !CHECKPOINT_RESTORE */ + new->seq = ids->seq++; + if (ids->seq > IPCID_SEQ_MAX) + ids->seq = 0; + } else { + new->seq = ipcid_to_seqx(ids->next_id); + ids->next_id = -1; + } - if (ids->in_use == 0) - return -1; + return SEQ_MULTIPLIER * new->seq + id; +} - if (ids->in_use == IPCMNI) - return IPCMNI - 1; +#else +#define ipc_idr_alloc(ids, new) \ + idr_alloc(&(ids)->ipcs_idr, (new), 0, 0, GFP_NOWAIT) - /* Look for the last assigned id */ - total = 0; - for (id = 0; id < IPCMNI && total < ids->in_use; id++) { - ipc = idr_find(&ids->ipcs_idr, id); - if (ipc != NULL) { - max_id = id; - total++; - } - } - return max_id; +static inline int ipc_buildid(int id, struct ipc_ids *ids, + struct kern_ipc_perm *new) +{ + new->seq = ids->seq++; + if (ids->seq > IPCID_SEQ_MAX) + ids->seq = 0; + + return SEQ_MULTIPLIER * new->seq + id; } +#endif /* CONFIG_CHECKPOINT_RESTORE */ + /** * ipc_addid - add an ipc identifier * @ids: ipc identifier set * @new: new ipc permission set - * @size: limit for the number of used ids + * @limit: limit for the number of used ids * * Add an entry 'new' to the ipc ids idr. The permissions object is * initialised and the first free entry is set up and the id assigned @@ -229,17 +242,16 @@ int ipc_get_maxid(struct ipc_ids *ids) * * Called with writer ipc_ids.rwsem held. */ -int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) +int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit) { kuid_t euid; kgid_t egid; int id, err; - int next_id = ids->next_id; - if (size > IPCMNI) - size = IPCMNI; + if (limit > IPCMNI) + limit = IPCMNI; - if (!ids->tables_initialized || ids->in_use >= size) + if (!ids->tables_initialized || ids->in_use >= limit) return -ENOSPC; idr_preload(GFP_KERNEL); @@ -254,9 +266,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) new->cuid = new->uid = euid; new->gid = new->cgid = egid; - id = idr_alloc(&ids->ipcs_idr, new, - (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, - GFP_NOWAIT); + id = ipc_idr_alloc(ids, new); idr_preload_end(); if (id >= 0 && new->key != IPC_PRIVATE) { @@ -274,17 +284,11 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) } ids->in_use++; + if (id > ids->max_id) + ids->max_id = id; - if (next_id < 0) { - new->seq = ids->seq++; - if (ids->seq > IPCID_SEQ_MAX) - ids->seq = 0; - } else { - new->seq = ipcid_to_seqx(next_id); - ids->next_id = -1; - } + new->id = ipc_buildid(id, ids, new); - new->id = ipc_buildid(id, new->seq); return id; } @@ -429,6 +433,15 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) ipc_kht_remove(ids, ipcp); ids->in_use--; ipcp->deleted = true; + + if (unlikely(lid == ids->max_id)) { + do { + lid--; + if (lid == -1) + break; + } while (!idr_find(&ids->ipcs_idr, lid)); + ids->max_id = lid; + } } /** diff --git a/ipc/util.h b/ipc/util.h index 579112d90016..89b8ec176fc4 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -13,6 +13,7 @@ #include <linux/unistd.h> #include <linux/err.h> +#include <linux/ipc_namespace.h> #define SEQ_MULTIPLIER (IPCMNI) @@ -99,9 +100,6 @@ void __init ipc_init_proc_interface(const char *path, const char *header, /* must be called with ids->rwsem acquired for writing */ int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); -/* must be called with ids->rwsem acquired for reading */ -int ipc_get_maxid(struct ipc_ids *); - /* must be called with both locks acquired. */ void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); @@ -111,6 +109,23 @@ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *); /* must be called with ipcp locked */ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg); +/** + * ipc_get_maxid - get the last assigned id + * @ids: ipc identifier set + * + * Called with ipc_ids.rwsem held for reading. + */ +static inline int ipc_get_maxid(struct ipc_ids *ids) +{ + if (ids->in_use == 0) + return -1; + + if (ids->in_use == IPCMNI) + return IPCMNI - 1; + + return ids->max_id; +} + /* * For allocation that need to be freed by RCU. * Objects are reference counted, they start with reference count 1. @@ -146,11 +161,6 @@ extern struct msg_msg *load_msg(const void __user *src, size_t len); extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len); -static inline int ipc_buildid(int id, int seq) -{ - return SEQ_MULTIPLIER * seq + id; -} - static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) { return uid / SEQ_MULTIPLIER != ipcp->seq; diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 6db80fc0810b..b3663896278e 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -108,7 +108,8 @@ static int __init parse_crashkernel_mem(char *cmdline, return -EINVAL; } } - } + } else + pr_info("crashkernel size resulted in zero bytes\n"); return 0; } diff --git a/kernel/fork.c b/kernel/fork.c index 4e55eedba8d6..432eadf6b58c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1871,7 +1871,7 @@ static __latent_entropy struct task_struct *copy_process( retval = -ERESTARTNOINTR; goto bad_fork_cancel_cgroup; } - if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) { + if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) { retval = -ENOMEM; goto bad_fork_cancel_cgroup; } diff --git a/kernel/kcov.c b/kernel/kcov.c index fc6af9e1308b..15f33faf4013 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -22,13 +22,21 @@ #include <linux/kcov.h> #include <asm/setup.h> +/* Number of 64-bit words written per one comparison: */ +#define KCOV_WORDS_PER_CMP 4 + /* * kcov descriptor (one per opened debugfs file). * State transitions of the descriptor: * - initial state after open() * - then there must be a single ioctl(KCOV_INIT_TRACE) call * - then, mmap() call (several calls are allowed but not useful) - * - then, repeated enable/disable for a task (only one task a time allowed) + * - then, ioctl(KCOV_ENABLE, arg), where arg is + * KCOV_TRACE_PC - to trace only the PCs + * or + * KCOV_TRACE_CMP - to trace only the comparison operands + * - then, ioctl(KCOV_DISABLE) to disable the task. + * Enabling/disabling ioctls can be repeated (only one task a time allowed). */ struct kcov { /* @@ -48,51 +56,176 @@ struct kcov { struct task_struct *t; }; -/* - * Entry point from instrumented code. - * This is called once per basic-block/edge. - */ -void notrace __sanitizer_cov_trace_pc(void) +static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { - struct task_struct *t; enum kcov_mode mode; - t = current; /* * We are interested in code coverage as a function of a syscall inputs, * so we ignore code executed in interrupts. */ - if (!t || !in_task()) - return; + if (!in_task()) + return false; mode = READ_ONCE(t->kcov_mode); - if (mode == KCOV_MODE_TRACE) { - unsigned long *area; - unsigned long pos; - unsigned long ip = _RET_IP_; + /* + * There is some code that runs in interrupts but for which + * in_interrupt() returns false (e.g. preempt_schedule_irq()). + * READ_ONCE()/barrier() effectively provides load-acquire wrt + * interrupts, there are paired barrier()/WRITE_ONCE() in + * kcov_ioctl_locked(). + */ + barrier(); + return mode == needed_mode; +} +static unsigned long canonicalize_ip(unsigned long ip) +{ #ifdef CONFIG_RANDOMIZE_BASE - ip -= kaslr_offset(); + ip -= kaslr_offset(); #endif + return ip; +} - /* - * There is some code that runs in interrupts but for which - * in_interrupt() returns false (e.g. preempt_schedule_irq()). - * READ_ONCE()/barrier() effectively provides load-acquire wrt - * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). - */ - barrier(); - area = t->kcov_area; - /* The first word is number of subsequent PCs. */ - pos = READ_ONCE(area[0]) + 1; - if (likely(pos < t->kcov_size)) { - area[pos] = ip; - WRITE_ONCE(area[0], pos); - } +/* + * Entry point from instrumented code. + * This is called once per basic-block/edge. + */ +void notrace __sanitizer_cov_trace_pc(void) +{ + struct task_struct *t; + unsigned long *area; + unsigned long ip = canonicalize_ip(_RET_IP_); + unsigned long pos; + + t = current; + if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t)) + return; + + area = t->kcov_area; + /* The first 64-bit word is the number of subsequent PCs. */ + pos = READ_ONCE(area[0]) + 1; + if (likely(pos < t->kcov_size)) { + area[pos] = ip; + WRITE_ONCE(area[0], pos); } } EXPORT_SYMBOL(__sanitizer_cov_trace_pc); +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS +static void write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip) +{ + struct task_struct *t; + u64 *area; + u64 count, start_index, end_pos, max_pos; + + t = current; + if (!check_kcov_mode(KCOV_MODE_TRACE_CMP, t)) + return; + + ip = canonicalize_ip(ip); + + /* + * We write all comparison arguments and types as u64. + * The buffer was allocated for t->kcov_size unsigned longs. + */ + area = (u64 *)t->kcov_area; + max_pos = t->kcov_size * sizeof(unsigned long); + + count = READ_ONCE(area[0]); + + /* Every record is KCOV_WORDS_PER_CMP 64-bit words. */ + start_index = 1 + count * KCOV_WORDS_PER_CMP; + end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64); + if (likely(end_pos <= max_pos)) { + area[start_index] = type; + area[start_index + 1] = arg1; + area[start_index + 2] = arg2; + area[start_index + 3] = ip; + WRITE_ONCE(area[0], count + 1); + } +} + +void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp1); + +void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); + +void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4); + +void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp8); + +void notrace __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp1); + +void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); + +void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4); + +void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8); + +void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) +{ + u64 i; + u64 count = cases[0]; + u64 size = cases[1]; + u64 type = KCOV_CMP_CONST; + + switch (size) { + case 8: + type |= KCOV_CMP_SIZE(0); + break; + case 16: + type |= KCOV_CMP_SIZE(1); + break; + case 32: + type |= KCOV_CMP_SIZE(2); + break; + case 64: + type |= KCOV_CMP_SIZE(3); + break; + default: + return; + } + for (i = 0; i < count; i++) + write_comp_data(type, cases[i + 2], val, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_switch); +#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ + static void kcov_get(struct kcov *kcov) { atomic_inc(&kcov->refcount); @@ -129,6 +262,7 @@ void kcov_task_exit(struct task_struct *t) /* Just to not leave dangling references behind. */ kcov_task_init(t); kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -147,7 +281,7 @@ static int kcov_mmap(struct file *filep, struct vm_area_struct *vma) spin_lock(&kcov->lock); size = kcov->size * sizeof(unsigned long); - if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 || + if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != size) { res = -EINVAL; goto exit; @@ -176,6 +310,7 @@ static int kcov_open(struct inode *inode, struct file *filep) kcov = kzalloc(sizeof(*kcov), GFP_KERNEL); if (!kcov) return -ENOMEM; + kcov->mode = KCOV_MODE_DISABLED; atomic_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; @@ -211,7 +346,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, if (size < 2 || size > INT_MAX / sizeof(unsigned long)) return -EINVAL; kcov->size = size; - kcov->mode = KCOV_MODE_TRACE; + kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: /* @@ -221,17 +356,25 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, * at task exit or voluntary by KCOV_DISABLE. After that it can * be enabled for another task. */ - unused = arg; - if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED || - kcov->area == NULL) + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) return -EINVAL; if (kcov->t != NULL) return -EBUSY; + if (arg == KCOV_TRACE_PC) + kcov->mode = KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + kcov->mode = KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; t = current; /* Cache in task struct for performance. */ t->kcov_size = kcov->size; t->kcov_area = kcov->area; - /* See comment in __sanitizer_cov_trace_pc(). */ + /* See comment in check_kcov_mode(). */ barrier(); WRITE_ONCE(t->kcov_mode, kcov->mode); t->kcov = kcov; @@ -249,6 +392,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, return -EINVAL; kcov_task_init(t); kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; kcov_put(kcov); return 0; default: diff --git a/kernel/panic.c b/kernel/panic.c index bdd18afa19a4..2cfef408fec9 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -27,6 +27,8 @@ #include <linux/console.h> #include <linux/bug.h> #include <linux/ratelimit.h> +#include <linux/debugfs.h> +#include <asm/sections.h> #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -322,6 +324,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { { 'E', ' ', true }, /* TAINT_UNSIGNED_MODULE */ { 'L', ' ', false }, /* TAINT_SOFTLOCKUP */ { 'K', ' ', true }, /* TAINT_LIVEPATCH */ + { 'X', ' ', true }, /* TAINT_AUX */ }; /** @@ -343,6 +346,7 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { * 'E' - Unsigned module has been loaded. * 'L' - A soft lockup has previously occurred. * 'K' - Kernel has been live patched. + * 'X' - Auxiliary taint, for distros' use. * * The string is overwritten by the next call to print_tainted(). */ @@ -518,7 +522,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, { disable_trace_on_warning(); - pr_warn("------------[ cut here ]------------\n"); + if (args) + pr_warn(CUT_HERE); if (file) pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", @@ -582,9 +587,49 @@ EXPORT_SYMBOL(warn_slowpath_fmt_taint); void warn_slowpath_null(const char *file, int line) { + pr_warn(CUT_HERE); __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL); } EXPORT_SYMBOL(warn_slowpath_null); +#else +void __warn_printk(const char *fmt, ...) +{ + va_list args; + + pr_warn(CUT_HERE); + + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); +} +EXPORT_SYMBOL(__warn_printk); +#endif + +#ifdef CONFIG_BUG + +/* Support resetting WARN*_ONCE state */ + +static int clear_warn_once_set(void *data, u64 val) +{ + generic_bug_clear_once(); + memset(__start_once, 0, __end_once - __start_once); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(clear_warn_once_fops, + NULL, + clear_warn_once_set, + "%lld\n"); + +static __init int register_warn_debugfs(void) +{ + /* Don't care about failure */ + debugfs_create_file("clear_warn_once", 0200, NULL, + NULL, &clear_warn_once_fops); + return 0; +} + +device_initcall(register_warn_debugfs); #endif #ifdef CONFIG_CC_STACKPROTECTOR diff --git a/kernel/pid.c b/kernel/pid.c index 020dedbdf066..b13b624e2c49 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -39,11 +39,8 @@ #include <linux/proc_ns.h> #include <linux/proc_fs.h> #include <linux/sched/task.h> +#include <linux/idr.h> -#define pid_hashfn(nr, ns) \ - hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) -static struct hlist_head *pid_hash; -static unsigned int pidhash_shift = 4; struct pid init_struct_pid = INIT_STRUCT_PID; int pid_max = PID_MAX_DEFAULT; @@ -53,15 +50,6 @@ int pid_max = PID_MAX_DEFAULT; int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; -static inline int mk_pid(struct pid_namespace *pid_ns, - struct pidmap *map, int off) -{ - return (map - pid_ns->pidmap)*BITS_PER_PAGE + off; -} - -#define find_next_offset(map, off) \ - find_next_zero_bit((map)->page, BITS_PER_PAGE, off) - /* * PID-map pages start out as NULL, they get allocated upon * first use and are never deallocated. This way a low pid_max @@ -70,11 +58,8 @@ static inline int mk_pid(struct pid_namespace *pid_ns, */ struct pid_namespace init_pid_ns = { .kref = KREF_INIT(2), - .pidmap = { - [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } - }, - .last_pid = 0, - .nr_hashed = PIDNS_HASH_ADDING, + .idr = IDR_INIT, + .pid_allocated = PIDNS_ADDING, .level = 0, .child_reaper = &init_task, .user_ns = &init_user_ns, @@ -101,138 +86,6 @@ EXPORT_SYMBOL_GPL(init_pid_ns); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); -static void free_pidmap(struct upid *upid) -{ - int nr = upid->nr; - struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE; - int offset = nr & BITS_PER_PAGE_MASK; - - clear_bit(offset, map->page); - atomic_inc(&map->nr_free); -} - -/* - * If we started walking pids at 'base', is 'a' seen before 'b'? - */ -static int pid_before(int base, int a, int b) -{ - /* - * This is the same as saying - * - * (a - base + MAXUINT) % MAXUINT < (b - base + MAXUINT) % MAXUINT - * and that mapping orders 'a' and 'b' with respect to 'base'. - */ - return (unsigned)(a - base) < (unsigned)(b - base); -} - -/* - * We might be racing with someone else trying to set pid_ns->last_pid - * at the pid allocation time (there's also a sysctl for this, but racing - * with this one is OK, see comment in kernel/pid_namespace.c about it). - * We want the winner to have the "later" value, because if the - * "earlier" value prevails, then a pid may get reused immediately. - * - * Since pids rollover, it is not sufficient to just pick the bigger - * value. We have to consider where we started counting from. - * - * 'base' is the value of pid_ns->last_pid that we observed when - * we started looking for a pid. - * - * 'pid' is the pid that we eventually found. - */ -static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid) -{ - int prev; - int last_write = base; - do { - prev = last_write; - last_write = cmpxchg(&pid_ns->last_pid, prev, pid); - } while ((prev != last_write) && (pid_before(base, last_write, pid))); -} - -static int alloc_pidmap(struct pid_namespace *pid_ns) -{ - int i, offset, max_scan, pid, last = pid_ns->last_pid; - struct pidmap *map; - - pid = last + 1; - if (pid >= pid_max) - pid = RESERVED_PIDS; - offset = pid & BITS_PER_PAGE_MASK; - map = &pid_ns->pidmap[pid/BITS_PER_PAGE]; - /* - * If last_pid points into the middle of the map->page we - * want to scan this bitmap block twice, the second time - * we start with offset == 0 (or RESERVED_PIDS). - */ - max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset; - for (i = 0; i <= max_scan; ++i) { - if (unlikely(!map->page)) { - void *page = kzalloc(PAGE_SIZE, GFP_KERNEL); - /* - * Free the page if someone raced with us - * installing it: - */ - spin_lock_irq(&pidmap_lock); - if (!map->page) { - map->page = page; - page = NULL; - } - spin_unlock_irq(&pidmap_lock); - kfree(page); - if (unlikely(!map->page)) - return -ENOMEM; - } - if (likely(atomic_read(&map->nr_free))) { - for ( ; ; ) { - if (!test_and_set_bit(offset, map->page)) { - atomic_dec(&map->nr_free); - set_last_pid(pid_ns, last, pid); - return pid; - } - offset = find_next_offset(map, offset); - if (offset >= BITS_PER_PAGE) - break; - pid = mk_pid(pid_ns, map, offset); - if (pid >= pid_max) - break; - } - } - if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) { - ++map; - offset = 0; - } else { - map = &pid_ns->pidmap[0]; - offset = RESERVED_PIDS; - if (unlikely(last == offset)) - break; - } - pid = mk_pid(pid_ns, map, offset); - } - return -EAGAIN; -} - -int next_pidmap(struct pid_namespace *pid_ns, unsigned int last) -{ - int offset; - struct pidmap *map, *end; - - if (last >= PID_MAX_LIMIT) - return -1; - - offset = (last + 1) & BITS_PER_PAGE_MASK; - map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE]; - end = &pid_ns->pidmap[PIDMAP_ENTRIES]; - for (; map < end; map++, offset = 0) { - if (unlikely(!map->page)) - continue; - offset = find_next_bit((map)->page, BITS_PER_PAGE, offset); - if (offset < BITS_PER_PAGE) - return mk_pid(pid_ns, map, offset); - } - return -1; -} - void put_pid(struct pid *pid) { struct pid_namespace *ns; @@ -265,8 +118,7 @@ void free_pid(struct pid *pid) for (i = 0; i <= pid->level; i++) { struct upid *upid = pid->numbers + i; struct pid_namespace *ns = upid->ns; - hlist_del_rcu(&upid->pid_chain); - switch(--ns->nr_hashed) { + switch (--ns->pid_allocated) { case 2: case 1: /* When all that is left in the pid namespace @@ -275,21 +127,20 @@ void free_pid(struct pid *pid) */ wake_up_process(ns->child_reaper); break; - case PIDNS_HASH_ADDING: + case PIDNS_ADDING: /* Handle a fork failure of the first process */ WARN_ON(ns->child_reaper); - ns->nr_hashed = 0; + ns->pid_allocated = 0; /* fall through */ case 0: schedule_work(&ns->proc_work); break; } + + idr_remove(&ns->idr, upid->nr); } spin_unlock_irqrestore(&pidmap_lock, flags); - for (i = 0; i <= pid->level; i++) - free_pidmap(pid->numbers + i); - call_rcu(&pid->rcu, delayed_put_pid); } @@ -308,8 +159,29 @@ struct pid *alloc_pid(struct pid_namespace *ns) tmp = ns; pid->level = ns->level; + for (i = ns->level; i >= 0; i--) { - nr = alloc_pidmap(tmp); + int pid_min = 1; + + idr_preload(GFP_KERNEL); + spin_lock_irq(&pidmap_lock); + + /* + * init really needs pid 1, but after reaching the maximum + * wrap back to RESERVED_PIDS + */ + if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS) + pid_min = RESERVED_PIDS; + + /* + * Store a null pointer so find_pid_ns does not find + * a partially initialized PID (see below). + */ + nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, + pid_max, GFP_ATOMIC); + spin_unlock_irq(&pidmap_lock); + idr_preload_end(); + if (nr < 0) { retval = nr; goto out_free; @@ -334,12 +206,12 @@ struct pid *alloc_pid(struct pid_namespace *ns) upid = pid->numbers + ns->level; spin_lock_irq(&pidmap_lock); - if (!(ns->nr_hashed & PIDNS_HASH_ADDING)) + if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; for ( ; upid >= pid->numbers; --upid) { - hlist_add_head_rcu(&upid->pid_chain, - &pid_hash[pid_hashfn(upid->nr, upid->ns)]); - upid->ns->nr_hashed++; + /* Make the PID visible to find_pid_ns. */ + idr_replace(&upid->ns->idr, pid, upid->nr); + upid->ns->pid_allocated++; } spin_unlock_irq(&pidmap_lock); @@ -350,8 +222,11 @@ out_unlock: put_pid_ns(ns); out_free: + spin_lock_irq(&pidmap_lock); while (++i <= ns->level) - free_pidmap(pid->numbers + i); + idr_remove(&ns->idr, (pid->numbers + i)->nr); + + spin_unlock_irq(&pidmap_lock); kmem_cache_free(ns->pid_cachep, pid); return ERR_PTR(retval); @@ -360,21 +235,13 @@ out_free: void disable_pid_allocation(struct pid_namespace *ns) { spin_lock_irq(&pidmap_lock); - ns->nr_hashed &= ~PIDNS_HASH_ADDING; + ns->pid_allocated &= ~PIDNS_ADDING; spin_unlock_irq(&pidmap_lock); } struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { - struct upid *pnr; - - hlist_for_each_entry_rcu(pnr, - &pid_hash[pid_hashfn(nr, ns)], pid_chain) - if (pnr->nr == nr && pnr->ns == ns) - return container_of(pnr, struct pid, - numbers[ns->level]); - - return NULL; + return idr_find(&ns->idr, nr); } EXPORT_SYMBOL_GPL(find_pid_ns); @@ -530,6 +397,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, if (type != PIDTYPE_PID) { if (type == __PIDTYPE_TGID) type = PIDTYPE_PID; + task = task->group_leader; } nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); @@ -553,35 +421,13 @@ EXPORT_SYMBOL_GPL(task_active_pid_ns); */ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) { - struct pid *pid; - - do { - pid = find_pid_ns(nr, ns); - if (pid) - break; - nr = next_pidmap(ns, nr); - } while (nr > 0); - - return pid; -} - -/* - * The pid hash table is scaled according to the amount of memory in the - * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or - * more. - */ -void __init pidhash_init(void) -{ - pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18, - HASH_EARLY | HASH_SMALL | HASH_ZERO, - &pidhash_shift, NULL, - 0, 4096); + return idr_get_next(&ns->idr, &nr); } -void __init pidmap_init(void) +void __init pid_idr_init(void) { /* Verify no one has done anything silly: */ - BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING); + BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING); /* bump default and minimum pid_max based on number of cpus */ pid_max = min(pid_max_max, max_t(int, pid_max, @@ -590,10 +436,7 @@ void __init pidmap_init(void) PIDS_PER_CPU_MIN * num_possible_cpus()); pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min); - init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); - /* Reserve PID 0. We never call free_pidmap(0) */ - set_bit(0, init_pid_ns.pidmap[0].page); - atomic_dec(&init_pid_ns.pidmap[0].nr_free); + idr_init(&init_pid_ns.idr); init_pid_ns.pid_cachep = KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 4918314893bc..0b53eef7d34b 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -21,6 +21,7 @@ #include <linux/export.h> #include <linux/sched/task.h> #include <linux/sched/signal.h> +#include <linux/idr.h> struct pid_cache { int nr_ids; @@ -98,7 +99,6 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns struct pid_namespace *ns; unsigned int level = parent_pid_ns->level + 1; struct ucounts *ucounts; - int i; int err; err = -EINVAL; @@ -117,17 +117,15 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns if (ns == NULL) goto out_dec; - ns->pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL); - if (!ns->pidmap[0].page) - goto out_free; + idr_init(&ns->idr); ns->pid_cachep = create_pid_cachep(level + 1); if (ns->pid_cachep == NULL) - goto out_free_map; + goto out_free_idr; err = ns_alloc_inum(&ns->ns); if (err) - goto out_free_map; + goto out_free_idr; ns->ns.ops = &pidns_operations; kref_init(&ns->kref); @@ -135,20 +133,13 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; - ns->nr_hashed = PIDNS_HASH_ADDING; + ns->pid_allocated = PIDNS_ADDING; INIT_WORK(&ns->proc_work, proc_cleanup_work); - set_bit(0, ns->pidmap[0].page); - atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1); - - for (i = 1; i < PIDMAP_ENTRIES; i++) - atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE); - return ns; -out_free_map: - kfree(ns->pidmap[0].page); -out_free: +out_free_idr: + idr_destroy(&ns->idr); kmem_cache_free(pid_ns_cachep, ns); out_dec: dec_pid_namespaces(ucounts); @@ -168,11 +159,9 @@ static void delayed_free_pidns(struct rcu_head *p) static void destroy_pid_namespace(struct pid_namespace *ns) { - int i; - ns_free_inum(&ns->ns); - for (i = 0; i < PIDMAP_ENTRIES; i++) - kfree(ns->pidmap[i].page); + + idr_destroy(&ns->idr); call_rcu(&ns->rcu, delayed_free_pidns); } @@ -213,6 +202,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) int rc; struct task_struct *task, *me = current; int init_pids = thread_group_leader(me) ? 1 : 2; + struct pid *pid; /* Don't allow any more processes into the pid namespace */ disable_pid_allocation(pid_ns); @@ -239,20 +229,16 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) * maintain a tasklist for each pid namespace. * */ + rcu_read_lock(); read_lock(&tasklist_lock); - nr = next_pidmap(pid_ns, 1); - while (nr > 0) { - rcu_read_lock(); - - task = pid_task(find_vpid(nr), PIDTYPE_PID); + nr = 2; + idr_for_each_entry_continue(&pid_ns->idr, pid, nr) { + task = pid_task(pid, PIDTYPE_PID); if (task && !__fatal_signal_pending(task)) send_sig_info(SIGKILL, SEND_SIG_FORCED, task); - - rcu_read_unlock(); - - nr = next_pidmap(pid_ns, nr); } read_unlock(&tasklist_lock); + rcu_read_unlock(); /* * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD. @@ -268,7 +254,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) * sys_wait4() above can't reap the EXIT_DEAD children but we do not * really care, we could reparent them to the global init. We could * exit and reap ->child_reaper even if it is not the last thread in - * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(), + * this pid_ns, free_pid(pid_allocated == 0) calls proc_cleanup_work(), * pid_ns can not go away until proc_kill_sb() drops the reference. * * But this ns can also have other tasks injected by setns()+fork(). @@ -282,7 +268,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) */ for (;;) { set_current_state(TASK_INTERRUPTIBLE); - if (pid_ns->nr_hashed == init_pids) + if (pid_ns->pid_allocated == init_pids) break; schedule(); } @@ -301,6 +287,7 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, { struct pid_namespace *pid_ns = task_active_pid_ns(current); struct ctl_table tmp = *table; + int ret, next; if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; @@ -311,8 +298,14 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write, * it should synchronize its usage with external means. */ - tmp.data = &pid_ns->last_pid; - return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + next = idr_get_cursor(&pid_ns->idr) - 1; + + tmp.data = &next; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (!ret && write) + idr_set_cursor(&pid_ns->idr, next + 1); + + return ret; } extern int pid_max; diff --git a/kernel/reboot.c b/kernel/reboot.c index bd30a973fe94..e4ced883d8de 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -104,6 +104,33 @@ int unregister_reboot_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_reboot_notifier); +static void devm_unregister_reboot_notifier(struct device *dev, void *res) +{ + WARN_ON(unregister_reboot_notifier(*(struct notifier_block **)res)); +} + +int devm_register_reboot_notifier(struct device *dev, struct notifier_block *nb) +{ + struct notifier_block **rcnb; + int ret; + + rcnb = devres_alloc(devm_unregister_reboot_notifier, + sizeof(*rcnb), GFP_KERNEL); + if (!rcnb) + return -ENOMEM; + + ret = register_reboot_notifier(nb); + if (!ret) { + *rcnb = nb; + devres_add(dev, rcnb); + } else { + devres_free(rcnb); + } + + return ret; +} +EXPORT_SYMBOL(devm_register_reboot_notifier); + /* * Notifier list for kernel code which wants to be called * to restart the system. diff --git a/kernel/signal.c b/kernel/signal.c index babb36d3d039..9558664bd9ec 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -78,7 +78,7 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force) handler = sig_handler(t, sig); if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && - handler == SIG_DFL && !force) + handler == SIG_DFL && !(force && sig_kernel_only(sig))) return 1; return sig_handler_ignored(handler, sig); @@ -94,13 +94,15 @@ static int sig_ignored(struct task_struct *t, int sig, bool force) if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig)) return 0; - if (!sig_task_ignored(t, sig, force)) - return 0; - /* - * Tracers may want to know about even ignored signals. + * Tracers may want to know about even ignored signal unless it + * is SIGKILL which can't be reported anyway but can be ignored + * by SIGNAL_UNKILLABLE task. */ - return !t->ptrace; + if (t->ptrace && sig != SIGKILL) + return 0; + + return sig_task_ignored(t, sig, force); } /* @@ -929,9 +931,9 @@ static void complete_signal(int sig, struct task_struct *p, int group) * then start taking the whole group down immediately. */ if (sig_fatal(p, sig) && - !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && + !(signal->flags & SIGNAL_GROUP_EXIT) && !sigismember(&t->real_blocked, sig) && - (sig == SIGKILL || !t->ptrace)) { + (sig == SIGKILL || !p->ptrace)) { /* * This signal will be fatal to the whole group. */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4a13a389e99b..557d46728577 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -66,6 +66,7 @@ #include <linux/kexec.h> #include <linux/bpf.h> #include <linux/mount.h> +#include <linux/pipe_fs_i.h> #include <linux/uaccess.h> #include <asm/processor.h> @@ -1816,7 +1817,7 @@ static struct ctl_table fs_table[] = { { .procname = "pipe-max-size", .data = &pipe_max_size, - .maxlen = sizeof(int), + .maxlen = sizeof(pipe_max_size), .mode = 0644, .proc_handler = &pipe_proc_fn, .extra1 = &pipe_min_size, @@ -2575,12 +2576,13 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, if (write) { unsigned int val = *lvalp; + if (*lvalp > UINT_MAX) + return -EINVAL; + if ((param->min && *param->min > val) || (param->max && *param->max < val)) return -ERANGE; - if (*lvalp > UINT_MAX) - return -EINVAL; *valp = val; } else { unsigned int val = *valp; @@ -2620,6 +2622,48 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, do_proc_douintvec_minmax_conv, ¶m); } +struct do_proc_dopipe_max_size_conv_param { + unsigned int *min; +}; + +static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, + unsigned int *valp, + int write, void *data) +{ + struct do_proc_dopipe_max_size_conv_param *param = data; + + if (write) { + unsigned int val; + + if (*lvalp > UINT_MAX) + return -EINVAL; + + val = round_pipe_size(*lvalp); + if (val == 0) + return -EINVAL; + + if (param->min && *param->min > val) + return -ERANGE; + + *valp = val; + } else { + unsigned int val = *valp; + *lvalp = (unsigned long) val; + } + + return 0; +} + +int proc_dopipe_max_size(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct do_proc_dopipe_max_size_conv_param param = { + .min = (unsigned int *) table->extra1, + }; + return do_proc_douintvec(table, write, buffer, lenp, ppos, + do_proc_dopipe_max_size_conv, ¶m); +} + static void validate_coredump_safety(void) { #ifdef CONFIG_COREDUMP @@ -3083,14 +3127,12 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, else bitmap_copy(bitmap, tmp_bitmap, bitmap_len); } - kfree(tmp_bitmap); *lenp -= left; *ppos += *lenp; - return 0; - } else { - kfree(tmp_bitmap); - return err; } + + kfree(tmp_bitmap); + return err; } #else /* CONFIG_PROC_SYSCTL */ @@ -3125,6 +3167,12 @@ int proc_douintvec_minmax(struct ctl_table *table, int write, return -ENOSYS; } +int proc_dopipe_max_size(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} + int proc_dointvec_jiffies(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -3168,6 +3216,7 @@ EXPORT_SYMBOL(proc_douintvec); EXPORT_SYMBOL(proc_dointvec_jiffies); EXPORT_SYMBOL(proc_dointvec_minmax); EXPORT_SYMBOL_GPL(proc_douintvec_minmax); +EXPORT_SYMBOL_GPL(proc_dopipe_max_size); EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); EXPORT_SYMBOL(proc_dointvec_ms_jiffies); EXPORT_SYMBOL(proc_dostring); diff --git a/kernel/umh.c b/kernel/umh.c index 6ff9905250ff..18e5fa4b0e71 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -537,14 +537,14 @@ static int proc_cap_handler(struct ctl_table *table, int write, /* * Drop everything not in the new_cap (but don't add things) */ - spin_lock(&umh_sysctl_lock); if (write) { + spin_lock(&umh_sysctl_lock); if (table->data == CAP_BSET) usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); if (table->data == CAP_PI) usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); + spin_unlock(&umh_sysctl_lock); } - spin_unlock(&umh_sysctl_lock); return 0; } diff --git a/lib/Kconfig b/lib/Kconfig index a2b6745324ab..368972f0db78 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -584,7 +584,7 @@ config PRIME_NUMBERS tristate config STRING_SELFTEST - bool "Test string functions" + tristate "Test string functions" endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8ffd891857ab..947d3e2ed5c2 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -756,6 +756,16 @@ config KCOV For more details, see Documentation/dev-tools/kcov.rst. +config KCOV_ENABLE_COMPARISONS + bool "Enable comparison operands collection by KCOV" + depends on KCOV + default n + help + KCOV also exposes operands of every comparison in the instrumented + code along with operand sizes and PCs of the comparison instructions. + These operands can be used by fuzzing engines to improve the quality + of fuzzing coverage. + config KCOV_INSTRUMENT_ALL bool "Instrument all code by default" depends on KCOV @@ -1850,6 +1860,15 @@ config TEST_BPF If unsure, say N. +config TEST_FIND_BIT + tristate "Test find_bit functions" + default n + help + This builds the "test_find_bit" module that measure find_*_bit() + functions performance. + + If unsure, say N. + config TEST_FIRMWARE tristate "Test firmware loading via userspace interface" default n diff --git a/lib/Makefile b/lib/Makefile index 136a0b254564..d11c48ec8ffd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,12 +40,14 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ once.o refcount.o usercopy.o errseq.o +obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += hexdump.o obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o obj-$(CONFIG_TEST_BPF) += test_bpf.o +obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o diff --git a/lib/bug.c b/lib/bug.c index 1e094408c893..c1b0fad31b10 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -186,7 +186,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_WARN; } - printk(KERN_DEFAULT "------------[ cut here ]------------\n"); + printk(KERN_DEFAULT CUT_HERE); if (file) pr_crit("kernel BUG at %s:%u!\n", file, line); @@ -196,3 +196,26 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_BUG; } + +static void clear_once_table(struct bug_entry *start, struct bug_entry *end) +{ + struct bug_entry *bug; + + for (bug = start; bug < end; bug++) + bug->flags &= ~BUGFLAG_DONE; +} + +void generic_bug_clear_once(void) +{ +#ifdef CONFIG_MODULES + struct module *mod; + + rcu_read_lock_sched(); + list_for_each_entry_rcu(mod, &module_bug_list, bug_list) + clear_once_table(mod->bug_table, + mod->bug_table + mod->num_bugs); + rcu_read_unlock_sched(); +#endif + + clear_once_table(__start___bug_table, __stop___bug_table); +} diff --git a/lib/dma-debug.c b/lib/dma-debug.c index ea4cc3dde4f1..1b34d210452c 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -1495,14 +1495,22 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, if (!entry) return; + /* handle vmalloc and linear addresses */ + if (!is_vmalloc_addr(virt) && !virt_to_page(virt)) + return; + entry->type = dma_debug_coherent; entry->dev = dev; - entry->pfn = page_to_pfn(virt_to_page(virt)); entry->offset = offset_in_page(virt); entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; + if (is_vmalloc_addr(virt)) + entry->pfn = vmalloc_to_pfn(virt); + else + entry->pfn = page_to_pfn(virt_to_page(virt)); + add_dma_entry(entry); } EXPORT_SYMBOL(debug_dma_alloc_coherent); @@ -1513,13 +1521,21 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .pfn = page_to_pfn(virt_to_page(virt)), .offset = offset_in_page(virt), .dev_addr = addr, .size = size, .direction = DMA_BIDIRECTIONAL, }; + /* handle vmalloc and linear addresses */ + if (!is_vmalloc_addr(virt) && !virt_to_page(virt)) + return; + + if (is_vmalloc_addr(virt)) + ref.pfn = vmalloc_to_pfn(virt); + else + ref.pfn = page_to_pfn(virt_to_page(virt)); + if (unlikely(dma_debug_disabled())) return; diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index da796e2dc4f5..c7c96bc7654a 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -360,6 +360,10 @@ static int ddebug_parse_query(char *words[], int nwords, if (parse_lineno(last, &query->last_lineno) < 0) return -EINVAL; + /* special case for last lineno not specified */ + if (query->last_lineno == 0) + query->last_lineno = UINT_MAX; + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno, diff --git a/lib/genalloc.c b/lib/genalloc.c index 144fe6b1a03e..ca06adc4f445 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -194,7 +194,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy chunk->phys_addr = phys; chunk->start_addr = virt; chunk->end_addr = virt + size - 1; - atomic_set(&chunk->avail, size); + atomic_long_set(&chunk->avail, size); spin_lock(&pool->lock); list_add_rcu(&chunk->next_chunk, &pool->chunks); @@ -304,7 +304,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, nbits = (size + (1UL << order) - 1) >> order; rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { - if (size > atomic_read(&chunk->avail)) + if (size > atomic_long_read(&chunk->avail)) continue; start_bit = 0; @@ -324,7 +324,7 @@ retry: addr = chunk->start_addr + ((unsigned long)start_bit << order); size = nbits << order; - atomic_sub(size, &chunk->avail); + atomic_long_sub(size, &chunk->avail); break; } rcu_read_unlock(); @@ -390,7 +390,7 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); BUG_ON(remain); size = nbits << order; - atomic_add(size, &chunk->avail); + atomic_long_add(size, &chunk->avail); rcu_read_unlock(); return; } @@ -464,7 +464,7 @@ size_t gen_pool_avail(struct gen_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) - avail += atomic_read(&chunk->avail); + avail += atomic_long_read(&chunk->avail); rcu_read_unlock(); return avail; } diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c index db0b5aa071fc..e2d329099bf7 100644 --- a/lib/int_sqrt.c +++ b/lib/int_sqrt.c @@ -8,12 +8,13 @@ #include <linux/kernel.h> #include <linux/export.h> +#include <linux/bitops.h> /** - * int_sqrt - rough approximation to sqrt + * int_sqrt - computes the integer square root * @x: integer of which to calculate the sqrt * - * A very rough approximation to the sqrt() function. + * Computes: floor(sqrt(x)) */ unsigned long int_sqrt(unsigned long x) { @@ -22,7 +23,7 @@ unsigned long int_sqrt(unsigned long x) if (x <= 1) return x; - m = 1UL << (BITS_PER_LONG - 2); + m = 1UL << (__fls(x) & ~1UL); while (m != 0) { b = y + m; y >>= 1; diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 0e343fd29570..835242e74aaa 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -11,10 +11,10 @@ MODULE_PARM_DESC(name, msg); __param(int, nnodes, 100, "Number of nodes in the interval tree"); -__param(int, perf_loops, 100000, "Number of iterations modifying the tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the tree"); __param(int, nsearches, 100, "Number of searches to the interval tree"); -__param(int, search_loops, 10000, "Number of iterations searching the tree"); +__param(int, search_loops, 1000, "Number of iterations searching the tree"); __param(bool, search_all, false, "Searches will iterate all nodes in the tree"); __param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 46e4c749e4eb..61a6b5aab07e 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -93,8 +93,8 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { arch_spin_lock(&lock); if (regs && cpu_in_idle(instruction_pointer(regs))) { - pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", - cpu, instruction_pointer(regs)); + pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", + cpu, (void *)instruction_pointer(regs)); } else { pr_warn("NMI backtrace for cpu %d\n", cpu); if (regs) diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 191a238e5a9d..7d36c1e27ff6 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -11,7 +11,7 @@ MODULE_PARM_DESC(name, msg); __param(int, nnodes, 100, "Number of nodes in the rb-tree"); -__param(int, perf_loops, 100000, "Number of iterations modifying the rb-tree"); +__param(int, perf_loops, 1000, "Number of iterations modifying the rb-tree"); __param(int, check_loops, 100, "Number of iterations modifying and verifying the rb-tree"); struct test_node { diff --git a/lib/string.c b/lib/string.c index 5e8d410a93df..64a9e33f1daa 100644 --- a/lib/string.c +++ b/lib/string.c @@ -1052,144 +1052,3 @@ void fortify_panic(const char *name) BUG(); } EXPORT_SYMBOL(fortify_panic); - -#ifdef CONFIG_STRING_SELFTEST -#include <linux/slab.h> -#include <linux/module.h> - -static __init int memset16_selftest(void) -{ - unsigned i, j, k; - u16 v, *p; - - p = kmalloc(256 * 2 * 2, GFP_KERNEL); - if (!p) - return -1; - - for (i = 0; i < 256; i++) { - for (j = 0; j < 256; j++) { - memset(p, 0xa1, 256 * 2 * sizeof(v)); - memset16(p + i, 0xb1b2, j); - for (k = 0; k < 512; k++) { - v = p[k]; - if (k < i) { - if (v != 0xa1a1) - goto fail; - } else if (k < i + j) { - if (v != 0xb1b2) - goto fail; - } else { - if (v != 0xa1a1) - goto fail; - } - } - } - } - -fail: - kfree(p); - if (i < 256) - return (i << 24) | (j << 16) | k; - return 0; -} - -static __init int memset32_selftest(void) -{ - unsigned i, j, k; - u32 v, *p; - - p = kmalloc(256 * 2 * 4, GFP_KERNEL); - if (!p) - return -1; - - for (i = 0; i < 256; i++) { - for (j = 0; j < 256; j++) { - memset(p, 0xa1, 256 * 2 * sizeof(v)); - memset32(p + i, 0xb1b2b3b4, j); - for (k = 0; k < 512; k++) { - v = p[k]; - if (k < i) { - if (v != 0xa1a1a1a1) - goto fail; - } else if (k < i + j) { - if (v != 0xb1b2b3b4) - goto fail; - } else { - if (v != 0xa1a1a1a1) - goto fail; - } - } - } - } - -fail: - kfree(p); - if (i < 256) - return (i << 24) | (j << 16) | k; - return 0; -} - -static __init int memset64_selftest(void) -{ - unsigned i, j, k; - u64 v, *p; - - p = kmalloc(256 * 2 * 8, GFP_KERNEL); - if (!p) - return -1; - - for (i = 0; i < 256; i++) { - for (j = 0; j < 256; j++) { - memset(p, 0xa1, 256 * 2 * sizeof(v)); - memset64(p + i, 0xb1b2b3b4b5b6b7b8ULL, j); - for (k = 0; k < 512; k++) { - v = p[k]; - if (k < i) { - if (v != 0xa1a1a1a1a1a1a1a1ULL) - goto fail; - } else if (k < i + j) { - if (v != 0xb1b2b3b4b5b6b7b8ULL) - goto fail; - } else { - if (v != 0xa1a1a1a1a1a1a1a1ULL) - goto fail; - } - } - } - } - -fail: - kfree(p); - if (i < 256) - return (i << 24) | (j << 16) | k; - return 0; -} - -static __init int string_selftest_init(void) -{ - int test, subtest; - - test = 1; - subtest = memset16_selftest(); - if (subtest) - goto fail; - - test = 2; - subtest = memset32_selftest(); - if (subtest) - goto fail; - - test = 3; - subtest = memset64_selftest(); - if (subtest) - goto fail; - - pr_info("String selftests succeeded\n"); - return 0; -fail: - pr_crit("String selftest failure %d.%08x\n", test, subtest); - return 0; -} - -module_init(string_selftest_init); -#endif /* CONFIG_STRING_SELFTEST */ diff --git a/lib/test_find_bit.c b/lib/test_find_bit.c new file mode 100644 index 000000000000..f4394a36f9aa --- /dev/null +++ b/lib/test_find_bit.c @@ -0,0 +1,144 @@ +/* + * Test for find_*_bit functions. + * + * Copyright (c) 2017 Cavium. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* + * find_bit functions are widely used in kernel, so the successful boot + * is good enough test for correctness. + * + * This test is focused on performance of traversing bitmaps. Two typical + * scenarios are reproduced: + * - randomly filled bitmap with approximately equal number of set and + * cleared bits; + * - sparse bitmap with few set bits at random positions. + */ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/random.h> + +#define BITMAP_LEN (4096UL * 8 * 10) +#define SPARSE 500 + +static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata; + +/* + * This is Schlemiel the Painter's algorithm. It should be called after + * all other tests for the same bitmap because it sets all bits of bitmap to 1. + */ +static int __init test_find_first_bit(void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < len; cnt++) { + i = find_first_bit(bitmap, len); + __clear_bit(i, bitmap); + } + cycles = get_cycles() - cycles; + pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_next_bit(const void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < BITMAP_LEN; cnt++) + i = find_next_bit(bitmap, BITMAP_LEN, i) + 1; + cycles = get_cycles() - cycles; + pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len) +{ + unsigned long i, cnt; + cycles_t cycles; + + cycles = get_cycles(); + for (cnt = i = 0; i < BITMAP_LEN; cnt++) + i = find_next_zero_bit(bitmap, len, i) + 1; + cycles = get_cycles() - cycles; + pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init test_find_last_bit(const void *bitmap, unsigned long len) +{ + unsigned long l, cnt = 0; + cycles_t cycles; + + cycles = get_cycles(); + do { + cnt++; + l = find_last_bit(bitmap, len); + if (l >= len) + break; + len = l; + } while (len); + cycles = get_cycles() - cycles; + pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n", + (u64)cycles, cnt); + + return 0; +} + +static int __init find_bit_test(void) +{ + unsigned long nbits = BITMAP_LEN / SPARSE; + + pr_err("\nStart testing find_bit() with random-filled bitmap\n"); + + get_random_bytes(bitmap, sizeof(bitmap)); + + test_find_next_bit(bitmap, BITMAP_LEN); + test_find_next_zero_bit(bitmap, BITMAP_LEN); + test_find_last_bit(bitmap, BITMAP_LEN); + test_find_first_bit(bitmap, BITMAP_LEN); + + pr_err("\nStart testing find_bit() with sparse bitmap\n"); + + bitmap_zero(bitmap, BITMAP_LEN); + + while (nbits--) + __set_bit(prandom_u32() % BITMAP_LEN, bitmap); + + test_find_next_bit(bitmap, BITMAP_LEN); + test_find_next_zero_bit(bitmap, BITMAP_LEN); + test_find_last_bit(bitmap, BITMAP_LEN); + test_find_first_bit(bitmap, BITMAP_LEN); + + return 0; +} +module_init(find_bit_test); + +static void __exit test_find_bit_cleanup(void) +{ +} +module_exit(test_find_bit_cleanup); + +MODULE_LICENSE("GPL"); diff --git a/lib/test_kasan.c b/lib/test_kasan.c index a25c9763fce1..ef1a3ac1397e 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -353,10 +353,9 @@ static noinline void __init memcg_accounted_kmem_cache(void) */ for (i = 0; i < 5; i++) { p = kmem_cache_alloc(cache, GFP_KERNEL); - if (!p) { - pr_err("Allocation failed\n"); + if (!p) goto free_cache; - } + kmem_cache_free(cache, p); msleep(100); } diff --git a/lib/test_kmod.c b/lib/test_kmod.c index fba78d25e825..337f408b4de6 100644 --- a/lib/test_kmod.c +++ b/lib/test_kmod.c @@ -783,10 +783,8 @@ static int kmod_config_sync_info(struct kmod_test_device *test_dev) free_test_dev_info(test_dev); test_dev->info = vzalloc(config->num_threads * sizeof(struct kmod_test_device_info)); - if (!test_dev->info) { - dev_err(test_dev->dev, "Cannot alloc test_dev info\n"); + if (!test_dev->info) return -ENOMEM; - } return 0; } @@ -1089,10 +1087,8 @@ static struct kmod_test_device *alloc_test_dev_kmod(int idx) struct miscdevice *misc_dev; test_dev = vzalloc(sizeof(struct kmod_test_device)); - if (!test_dev) { - pr_err("Cannot alloc test_dev\n"); + if (!test_dev) goto err_out; - } mutex_init(&test_dev->config_mutex); mutex_init(&test_dev->trigger_mutex); diff --git a/lib/test_list_sort.c b/lib/test_list_sort.c index 28e817387b04..5474f3f3e41d 100644 --- a/lib/test_list_sort.c +++ b/lib/test_list_sort.c @@ -76,17 +76,14 @@ static int __init list_sort_test(void) pr_debug("start testing list_sort()\n"); elts = kcalloc(TEST_LIST_LEN, sizeof(*elts), GFP_KERNEL); - if (!elts) { - pr_err("error: cannot allocate memory\n"); + if (!elts) return err; - } for (i = 0; i < TEST_LIST_LEN; i++) { el = kmalloc(sizeof(*el), GFP_KERNEL); - if (!el) { - pr_err("error: cannot allocate memory\n"); + if (!el) goto exit; - } + /* force some equivalencies */ el->value = prandom_u32() % (TEST_LIST_LEN / 3); el->serial = i; diff --git a/lib/test_string.c b/lib/test_string.c new file mode 100644 index 000000000000..0fcdb82dca86 --- /dev/null +++ b/lib/test_string.c @@ -0,0 +1,141 @@ +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/string.h> + +static __init int memset16_selftest(void) +{ + unsigned i, j, k; + u16 v, *p; + + p = kmalloc(256 * 2 * 2, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset16(p + i, 0xb1b2, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2) + goto fail; + } else { + if (v != 0xa1a1) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int memset32_selftest(void) +{ + unsigned i, j, k; + u32 v, *p; + + p = kmalloc(256 * 2 * 4, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset32(p + i, 0xb1b2b3b4, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1a1a1) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2b3b4) + goto fail; + } else { + if (v != 0xa1a1a1a1) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int memset64_selftest(void) +{ + unsigned i, j, k; + u64 v, *p; + + p = kmalloc(256 * 2 * 8, GFP_KERNEL); + if (!p) + return -1; + + for (i = 0; i < 256; i++) { + for (j = 0; j < 256; j++) { + memset(p, 0xa1, 256 * 2 * sizeof(v)); + memset64(p + i, 0xb1b2b3b4b5b6b7b8ULL, j); + for (k = 0; k < 512; k++) { + v = p[k]; + if (k < i) { + if (v != 0xa1a1a1a1a1a1a1a1ULL) + goto fail; + } else if (k < i + j) { + if (v != 0xb1b2b3b4b5b6b7b8ULL) + goto fail; + } else { + if (v != 0xa1a1a1a1a1a1a1a1ULL) + goto fail; + } + } + } + } + +fail: + kfree(p); + if (i < 256) + return (i << 24) | (j << 16) | k; + return 0; +} + +static __init int string_selftest_init(void) +{ + int test, subtest; + + test = 1; + subtest = memset16_selftest(); + if (subtest) + goto fail; + + test = 2; + subtest = memset32_selftest(); + if (subtest) + goto fail; + + test = 3; + subtest = memset64_selftest(); + if (subtest) + goto fail; + + pr_info("String selftests succeeded\n"); + return 0; +fail: + pr_crit("String selftest failure %d.%08x\n", test, subtest); + return 0; +} + +module_init(string_selftest_init); +MODULE_LICENSE("GPL v2"); diff --git a/mm/Kconfig b/mm/Kconfig index 9c4bdddd80c2..03ff7703d322 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -756,3 +756,12 @@ config PERCPU_STATS This feature collects and exposes statistics via debugfs. The information includes global and per chunk statistics, which can be used to help understand percpu memory usage. + +config GUP_BENCHMARK + bool "Enable infrastructure for get_user_pages_fast() benchmarking" + default n + help + Provides /sys/kernel/debug/gup_benchmark that helps with testing + performance of get_user_pages_fast(). + + See tools/testing/selftests/vm/gup_benchmark.c diff --git a/mm/Makefile b/mm/Makefile index e7ebd176fb93..e669f02c5a54 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_PAGE_COUNTER) += page_counter.o obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o +obj-$(CONFIG_GUP_BENCHMARK) += gup_benchmark.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o diff --git a/mm/compaction.c b/mm/compaction.c index 85395dc6eb13..10cd757f1006 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -219,6 +219,24 @@ static void reset_cached_positions(struct zone *zone) } /* + * Compound pages of >= pageblock_order should consistenly be skipped until + * released. It is always pointless to compact pages of such order (if they are + * migratable), and the pageblocks they occupy cannot contain any free pages. + */ +static bool pageblock_skip_persistent(struct page *page) +{ + if (!PageCompound(page)) + return false; + + page = compound_head(page); + + if (compound_order(page) >= pageblock_order) + return true; + + return false; +} + +/* * This function is called to clear all cached information on pageblocks that * should be skipped for page isolation when the migrate and free page scanner * meet. @@ -242,6 +260,8 @@ static void __reset_isolation_suitable(struct zone *zone) continue; if (zone != page_zone(page)) continue; + if (pageblock_skip_persistent(page)) + continue; clear_pageblock_skip(page); } @@ -275,7 +295,7 @@ static void update_pageblock_skip(struct compact_control *cc, struct zone *zone = cc->zone; unsigned long pfn; - if (cc->ignore_skip_hint) + if (cc->no_set_skip_hint) return; if (!page) @@ -307,7 +327,12 @@ static inline bool isolation_suitable(struct compact_control *cc, return true; } -static void update_pageblock_skip(struct compact_control *cc, +static inline bool pageblock_skip_persistent(struct page *page) +{ + return false; +} + +static inline void update_pageblock_skip(struct compact_control *cc, struct page *page, unsigned long nr_isolated, bool migrate_scanner) { @@ -449,13 +474,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, * and the only danger is skipping too much. */ if (PageCompound(page)) { - unsigned int comp_order = compound_order(page); + const unsigned int order = compound_order(page); - if (likely(comp_order < MAX_ORDER)) { - blockpfn += (1UL << comp_order) - 1; - cursor += (1UL << comp_order) - 1; + if (likely(order < MAX_ORDER)) { + blockpfn += (1UL << order) - 1; + cursor += (1UL << order) - 1; } - goto isolate_fail; } @@ -772,11 +796,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * danger is skipping too much. */ if (PageCompound(page)) { - unsigned int comp_order = compound_order(page); - - if (likely(comp_order < MAX_ORDER)) - low_pfn += (1UL << comp_order) - 1; + const unsigned int order = compound_order(page); + if (likely(order < MAX_ORDER)) + low_pfn += (1UL << order) - 1; goto isolate_fail; } @@ -1928,9 +1951,8 @@ static void kcompactd_do_work(pg_data_t *pgdat) .total_free_scanned = 0, .classzone_idx = pgdat->kcompactd_classzone_idx, .mode = MIGRATE_SYNC_LIGHT, - .ignore_skip_hint = true, + .ignore_skip_hint = false, .gfp_mask = GFP_KERNEL, - }; trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, cc.classzone_idx); diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c new file mode 100644 index 000000000000..5c8e2abeaa15 --- /dev/null +++ b/mm/gup_benchmark.c @@ -0,0 +1,100 @@ +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/ktime.h> +#include <linux/debugfs.h> + +#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) + +struct gup_benchmark { + __u64 delta_usec; + __u64 addr; + __u64 size; + __u32 nr_pages_per_call; + __u32 flags; +}; + +static int __gup_benchmark_ioctl(unsigned int cmd, + struct gup_benchmark *gup) +{ + ktime_t start_time, end_time; + unsigned long i, nr, nr_pages, addr, next; + struct page **pages; + + nr_pages = gup->size / PAGE_SIZE; + pages = kvmalloc(sizeof(void *) * nr_pages, GFP_KERNEL); + if (!pages) + return -ENOMEM; + + i = 0; + nr = gup->nr_pages_per_call; + start_time = ktime_get(); + for (addr = gup->addr; addr < gup->addr + gup->size; addr = next) { + if (nr != gup->nr_pages_per_call) + break; + + next = addr + nr * PAGE_SIZE; + if (next > gup->addr + gup->size) { + next = gup->addr + gup->size; + nr = (next - addr) / PAGE_SIZE; + } + + nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i); + i += nr; + } + end_time = ktime_get(); + + gup->delta_usec = ktime_us_delta(end_time, start_time); + gup->size = addr - gup->addr; + + for (i = 0; i < nr_pages; i++) { + if (!pages[i]) + break; + put_page(pages[i]); + } + + kvfree(pages); + return 0; +} + +static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg) +{ + struct gup_benchmark gup; + int ret; + + if (cmd != GUP_FAST_BENCHMARK) + return -EINVAL; + + if (copy_from_user(&gup, (void __user *)arg, sizeof(gup))) + return -EFAULT; + + ret = __gup_benchmark_ioctl(cmd, &gup); + if (ret) + return ret; + + if (copy_to_user((void __user *)arg, &gup, sizeof(gup))) + return -EFAULT; + + return 0; +} + +static const struct file_operations gup_benchmark_fops = { + .open = nonseekable_open, + .unlocked_ioctl = gup_benchmark_ioctl, +}; + +static int gup_benchmark_init(void) +{ + void *ret; + + ret = debugfs_create_file_unsafe("gup_benchmark", 0600, NULL, NULL, + &gup_benchmark_fops); + if (!ret) + pr_warn("Failed to create gup_benchmark in debugfs"); + + return 0; +} + +late_initcall(gup_benchmark_init); diff --git a/mm/internal.h b/mm/internal.h index 1df011f62480..e6bd35182dae 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -198,6 +198,7 @@ struct compact_control { const int classzone_idx; /* zone index of a direct compactor */ enum migrate_mode mode; /* Async or sync migration mode */ bool ignore_skip_hint; /* Scan blocks even if marked skip */ + bool no_set_skip_hint; /* Don't mark blocks for skipping */ bool ignore_block_suitable; /* Scan blocks considered unsuitable */ bool direct_compaction; /* False from kcompactd or /proc/... */ bool whole_zone; /* Whole zone should/has been scanned */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 55ded92f9809..d4096f4a5c1f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7619,6 +7619,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, .zone = page_zone(pfn_to_page(start)), .mode = MIGRATE_SYNC, .ignore_skip_hint = true, + .no_set_skip_hint = true, .gfp_mask = current_gfp_context(gfp_mask), }; INIT_LIST_HEAD(&cc.migratepages); diff --git a/mm/shmem.c b/mm/shmem.c index 1f97d77551c3..4aa9307feab0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3202,7 +3202,6 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s int len; struct inode *inode; struct page *page; - struct shmem_inode_info *info; len = strlen(symname) + 1; if (len > PAGE_SIZE) @@ -3222,7 +3221,6 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s error = 0; } - info = SHMEM_I(inode); inode->i_size = len-1; if (len <= SHORT_SYMLINK_LEN) { inode->i_link = kmemdup(symname, len, GFP_KERNEL); diff --git a/mm/z3fold.c b/mm/z3fold.c index b2ba2ba585f3..39e19125d6a0 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -404,8 +404,7 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked) WARN_ON(z3fold_page_trylock(zhdr)); else z3fold_page_lock(zhdr); - if (test_bit(PAGE_STALE, &page->private) || - !test_and_clear_bit(NEEDS_COMPACTING, &page->private)) { + if (WARN_ON(!test_and_clear_bit(NEEDS_COMPACTING, &page->private))) { z3fold_page_unlock(zhdr); return; } @@ -413,6 +412,11 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked) list_del_init(&zhdr->buddy); spin_unlock(&pool->lock); + if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { + atomic64_dec(&pool->pages_nr); + return; + } + z3fold_compact_page(zhdr); unbuddied = get_cpu_ptr(pool->unbuddied); fchunks = num_free_chunks(zhdr); @@ -753,9 +757,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) list_del_init(&zhdr->buddy); spin_unlock(&pool->lock); zhdr->cpu = -1; + kref_get(&zhdr->refcount); do_compact_page(zhdr, true); return; } + kref_get(&zhdr->refcount); queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work); z3fold_page_unlock(zhdr); } diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov new file mode 100644 index 000000000000..5cc72037e423 --- /dev/null +++ b/scripts/Makefile.kcov @@ -0,0 +1,7 @@ +ifdef CONFIG_KCOV +CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) +ifeq ($(CONFIG_KCOV_ENABLE_COMPARISONS),y) +CFLAGS_KCOV += $(call cc-option,-fsanitize-coverage=trace-cmp,) +endif + +endif diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 8b80bac055e4..95cda3ecc66b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -454,6 +454,7 @@ our $zero_initializer = qr{(?:(?:0[xX])?0+$Int_type?|NULL|false)\b}; our $logFunctions = qr{(?x: printk(?:_ratelimited|_once|_deferred_once|_deferred|)| (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)| + TP_printk| WARN(?:_RATELIMIT|_ONCE|)| panic| MODULE_[A-Z_]+| @@ -2900,8 +2901,9 @@ sub process { $line =~ /^\+\s*#\s*define\s+\w+\s+$String$/) { $msg_type = ""; - # EFI_GUID is another special case - } elsif ($line =~ /^\+.*\bEFI_GUID\s*\(/) { + # More special cases + } elsif ($line =~ /^\+.*\bEFI_GUID\s*\(/ || + $line =~ /^\+\s*(?:\w+)?\s*DEFINE_PER_CPU/) { $msg_type = ""; # Otherwise set the alternate message types @@ -3103,6 +3105,7 @@ sub process { $line =~ /^\+[a-z_]*init/ || $line =~ /^\+\s*(?:static\s+)?[A-Z_]*ATTR/ || $line =~ /^\+\s*DECLARE/ || + $line =~ /^\+\s*builtin_[\w_]*driver/ || $line =~ /^\+\s*__setup/)) { if (CHK("LINE_SPACING", "Please use a blank line after function/struct/union/enum declarations\n" . $hereprev) && @@ -3182,6 +3185,12 @@ sub process { # check we are in a valid C source file if not then ignore this hunk next if ($realfile !~ /\.(h|c)$/); +# check for unusual line ending [ or ( + if ($line =~ /^\+.*([\[\(])\s*$/) { + CHK("OPEN_ENDED_LINE", + "Lines should not end with a '$1'\n" . $herecurr); + } + # check if this appears to be the start function declaration, save the name if ($sline =~ /^\+\{\s*$/ && $prevline =~ /^\+(?:(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*)?($Ident)\(/) { @@ -3829,28 +3838,10 @@ sub process { "Prefer printk_ratelimited or pr_<level>_ratelimited to printk_ratelimit\n" . $herecurr); } -# printk should use KERN_* levels. Note that follow on printk's on the -# same line do not need a level, so we use the current block context -# to try and find and validate the current printk. In summary the current -# printk includes all preceding printk's which have no newline on the end. -# we assume the first bad printk is the one to report. - if ($line =~ /\bprintk\((?!KERN_)\s*"/) { - my $ok = 0; - for (my $ln = $linenr - 1; $ln >= $first_line; $ln--) { - #print "CHECK<$lines[$ln - 1]\n"; - # we have a preceding printk if it ends - # with "\n" ignore it, else it is to blame - if ($lines[$ln - 1] =~ m{\bprintk\(}) { - if ($rawlines[$ln - 1] !~ m{\\n"}) { - $ok = 1; - } - last; - } - } - if ($ok == 0) { - WARN("PRINTK_WITHOUT_KERN_LEVEL", - "printk() should include KERN_ facility level\n" . $herecurr); - } +# printk should use KERN_* levels + if ($line =~ /\bprintk\s*\(\s*(?!KERN_[A-Z]+\b)/) { + WARN("PRINTK_WITHOUT_KERN_LEVEL", + "printk() should include KERN_<LEVEL> facility level\n" . $herecurr); } if ($line =~ /\bprintk\s*\(\s*KERN_([A-Z]+)/) { @@ -5957,7 +5948,7 @@ sub process { # check for function declarations that have arguments without identifier names if (defined $stat && - $stat =~ /^.\s*(?:extern\s+)?$Type\s*$Ident\s*\(\s*([^{]+)\s*\)\s*;/s && + $stat =~ /^.\s*(?:extern\s+)?$Type\s*(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*\(\s*([^{]+)\s*\)\s*;/s && $1 ne "void") { my $args = trim($1); while ($args =~ m/\s*($Type\s*(?:$Ident|\(\s*\*\s*$Ident?\s*\)\s*$balanced_parens)?)/g) { @@ -6109,7 +6100,7 @@ sub process { next if ($fline =~ /^.[\s$;]*$/); $has_statement = 1; $count++; - $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|return\b|goto\b|continue\b)/); + $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|exit\s*\(\b|return\b|goto\b|continue\b)/); } if (!$has_break && $has_statement) { WARN("MISSING_BREAK", diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index bc443201d3ef..99c96e86eccb 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -57,6 +57,7 @@ my $sections = 0; my $file_emails = 0; my $from_filename = 0; my $pattern_depth = 0; +my $self_test = undef; my $version = 0; my $help = 0; my $find_maintainer_files = 0; @@ -138,6 +139,7 @@ my %VCS_cmds_git = ( "subject_pattern" => "^GitSubject: (.*)", "stat_pattern" => "^(\\d+)\\t(\\d+)\\t\$file\$", "file_exists_cmd" => "git ls-files \$file", + "list_files_cmd" => "git ls-files \$file", ); my %VCS_cmds_hg = ( @@ -167,6 +169,7 @@ my %VCS_cmds_hg = ( "subject_pattern" => "^HgSubject: (.*)", "stat_pattern" => "^(\\d+)\t(\\d+)\t\$file\$", "file_exists_cmd" => "hg files \$file", + "list_files_cmd" => "hg manifest -R \$file", ); my $conf = which_conf(".get_maintainer.conf"); @@ -216,6 +219,14 @@ if (-f $ignore_file) { close($ignore); } +if ($#ARGV > 0) { + foreach (@ARGV) { + if ($_ =~ /^-{1,2}self-test(?:=|$)/) { + die "$P: using --self-test does not allow any other option or argument\n"; + } + } +} + if (!GetOptions( 'email!' => \$email, 'git!' => \$email_git, @@ -252,6 +263,7 @@ if (!GetOptions( 'fe|file-emails!' => \$file_emails, 'f|file' => \$from_filename, 'find-maintainer-files' => \$find_maintainer_files, + 'self-test:s' => \$self_test, 'v|version' => \$version, 'h|help|usage' => \$help, )) { @@ -268,6 +280,12 @@ if ($version != 0) { exit 0; } +if (defined $self_test) { + read_all_maintainer_files(); + self_test(); + exit 0; +} + if (-t STDIN && !@ARGV) { # We're talking to a terminal, but have no command line arguments. die "$P: missing patchfile or -f file - use --help if necessary\n"; @@ -311,14 +329,17 @@ if (!top_of_kernel_tree($lk_path)) { my @typevalue = (); my %keyword_hash; my @mfiles = (); +my @self_test_info = (); sub read_maintainer_file { my ($file) = @_; open (my $maint, '<', "$file") or die "$P: Can't open MAINTAINERS file '$file': $!\n"; + my $i = 1; while (<$maint>) { my $line = $_; + chomp $line; if ($line =~ m/^([A-Z]):\s*(.*)/) { my $type = $1; @@ -338,9 +359,12 @@ sub read_maintainer_file { } push(@typevalue, "$type:$value"); } elsif (!(/^\s*$/ || /^\s*\#/)) { - $line =~ s/\n$//g; push(@typevalue, $line); } + if (defined $self_test) { + push(@self_test_info, {file=>$file, linenr=>$i, line=>$line}); + } + $i++; } close($maint); } @@ -357,26 +381,30 @@ sub find_ignore_git { return grep { $_ !~ /^\.git$/; } @_; } -if (-d "${lk_path}MAINTAINERS") { - opendir(DIR, "${lk_path}MAINTAINERS") or die $!; - my @files = readdir(DIR); - closedir(DIR); - foreach my $file (@files) { - push(@mfiles, "${lk_path}MAINTAINERS/$file") if ($file !~ /^\./); +read_all_maintainer_files(); + +sub read_all_maintainer_files { + if (-d "${lk_path}MAINTAINERS") { + opendir(DIR, "${lk_path}MAINTAINERS") or die $!; + my @files = readdir(DIR); + closedir(DIR); + foreach my $file (@files) { + push(@mfiles, "${lk_path}MAINTAINERS/$file") if ($file !~ /^\./); + } } -} -if ($find_maintainer_files) { - find( { wanted => \&find_is_maintainer_file, - preprocess => \&find_ignore_git, - no_chdir => 1, - }, "${lk_path}"); -} else { - push(@mfiles, "${lk_path}MAINTAINERS") if -f "${lk_path}MAINTAINERS"; -} + if ($find_maintainer_files) { + find( { wanted => \&find_is_maintainer_file, + preprocess => \&find_ignore_git, + no_chdir => 1, + }, "${lk_path}"); + } else { + push(@mfiles, "${lk_path}MAINTAINERS") if -f "${lk_path}MAINTAINERS"; + } -foreach my $file (@mfiles) { - read_maintainer_file("$file"); + foreach my $file (@mfiles) { + read_maintainer_file("$file"); + } } # @@ -586,6 +614,135 @@ if ($web) { exit($exit); +sub self_test { + my @lsfiles = (); + my @good_links = (); + my @bad_links = (); + my @section_headers = (); + my $index = 0; + + @lsfiles = vcs_list_files($lk_path); + + for my $x (@self_test_info) { + $index++; + + ## Section header duplication and missing section content + if (($self_test eq "" || $self_test =~ /\bsections\b/) && + $x->{line} =~ /^\S[^:]/ && + defined $self_test_info[$index] && + $self_test_info[$index]->{line} =~ /^([A-Z]):\s*\S/) { + my $has_S = 0; + my $has_F = 0; + my $has_ML = 0; + my $status = ""; + if (grep(m@^\Q$x->{line}\E@, @section_headers)) { + print("$x->{file}:$x->{linenr}: warning: duplicate section header\t$x->{line}\n"); + } else { + push(@section_headers, $x->{line}); + } + my $nextline = $index; + while (defined $self_test_info[$nextline] && + $self_test_info[$nextline]->{line} =~ /^([A-Z]):\s*(\S.*)/) { + my $type = $1; + my $value = $2; + if ($type eq "S") { + $has_S = 1; + $status = $value; + } elsif ($type eq "F" || $type eq "N") { + $has_F = 1; + } elsif ($type eq "M" || $type eq "R" || $type eq "L") { + $has_ML = 1; + } + $nextline++; + } + if (!$has_ML && $status !~ /orphan|obsolete/i) { + print("$x->{file}:$x->{linenr}: warning: section without email address\t$x->{line}\n"); + } + if (!$has_S) { + print("$x->{file}:$x->{linenr}: warning: section without status \t$x->{line}\n"); + } + if (!$has_F) { + print("$x->{file}:$x->{linenr}: warning: section without file pattern\t$x->{line}\n"); + } + } + + next if ($x->{line} !~ /^([A-Z]):\s*(.*)/); + + my $type = $1; + my $value = $2; + + ## Filename pattern matching + if (($type eq "F" || $type eq "X") && + ($self_test eq "" || $self_test =~ /\bpatterns\b/)) { + $value =~ s@\.@\\\.@g; ##Convert . to \. + $value =~ s/\*/\.\*/g; ##Convert * to .* + $value =~ s/\?/\./g; ##Convert ? to . + ##if pattern is a directory and it lacks a trailing slash, add one + if ((-d $value)) { + $value =~ s@([^/])$@$1/@; + } + if (!grep(m@^$value@, @lsfiles)) { + print("$x->{file}:$x->{linenr}: warning: no file matches\t$x->{line}\n"); + } + + ## Link reachability + } elsif (($type eq "W" || $type eq "Q" || $type eq "B") && + $value =~ /^https?:/ && + ($self_test eq "" || $self_test =~ /\blinks\b/)) { + next if (grep(m@^\Q$value\E$@, @good_links)); + my $isbad = 0; + if (grep(m@^\Q$value\E$@, @bad_links)) { + $isbad = 1; + } else { + my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $value`; + if ($? == 0) { + push(@good_links, $value); + } else { + push(@bad_links, $value); + $isbad = 1; + } + } + if ($isbad) { + print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n"); + } + + ## SCM reachability + } elsif ($type eq "T" && + ($self_test eq "" || $self_test =~ /\bscm\b/)) { + next if (grep(m@^\Q$value\E$@, @good_links)); + my $isbad = 0; + if (grep(m@^\Q$value\E$@, @bad_links)) { + $isbad = 1; + } elsif ($value !~ /^(?:git|quilt|hg)\s+\S/) { + print("$x->{file}:$x->{linenr}: warning: malformed entry\t$x->{line}\n"); + } elsif ($value =~ /^git\s+(\S+)(\s+([^\(]+\S+))?/) { + my $url = $1; + my $branch = ""; + $branch = $3 if $3; + my $output = `git ls-remote --exit-code -h "$url" $branch > /dev/null 2>&1`; + if ($? == 0) { + push(@good_links, $value); + } else { + push(@bad_links, $value); + $isbad = 1; + } + } elsif ($value =~ /^(?:quilt|hg)\s+(https?:\S+)/) { + my $url = $1; + my $output = `wget --spider -q --no-check-certificate --timeout 10 --tries 1 $url`; + if ($? == 0) { + push(@good_links, $value); + } else { + push(@bad_links, $value); + $isbad = 1; + } + } + if ($isbad) { + print("$x->{file}:$x->{linenr}: warning: possible bad link\t$x->{line}\n"); + } + } + } +} + sub ignore_email_address { my ($address) = @_; @@ -863,6 +1020,7 @@ Other options: --sections => print all of the subsystem sections with pattern matches --letters => print all matching 'letter' types from all matching sections --mailmap => use .mailmap file (default: $email_use_mailmap) + --self-test => show potential issues with MAINTAINERS file content --version => show version --help => show this help information @@ -2192,6 +2350,23 @@ sub vcs_file_exists { return $exists; } +sub vcs_list_files { + my ($file) = @_; + + my @lsfiles = (); + + my $vcs_used = vcs_exists(); + return 0 if (!$vcs_used); + + my $cmd = $VCS_cmds{"list_files_cmd"}; + $cmd =~ s/(\$\w+)/$1/eeg; # interpolate $cmd + @lsfiles = &{$VCS_cmds{"execute_cmd"}}($cmd); + + return () if ($? != 0); + + return @lsfiles; +} + sub uniq { my (@parms) = @_; diff --git a/scripts/parse-maintainers.pl b/scripts/parse-maintainers.pl index 5dbd2faa2449..255cef1b098d 100644 --- a/scripts/parse-maintainers.pl +++ b/scripts/parse-maintainers.pl @@ -2,9 +2,44 @@ # SPDX-License-Identifier: GPL-2.0 use strict; +use Getopt::Long qw(:config no_auto_abbrev); + +my $input_file = "MAINTAINERS"; +my $output_file = "MAINTAINERS.new"; +my $output_section = "SECTION.new"; +my $help = 0; my $P = $0; +if (!GetOptions( + 'input=s' => \$input_file, + 'output=s' => \$output_file, + 'section=s' => \$output_section, + 'h|help|usage' => \$help, + )) { + die "$P: invalid argument - use --help if necessary\n"; +} + +if ($help != 0) { + usage(); + exit 0; +} + +sub usage { + print <<EOT; +usage: $P [options] <pattern matching regexes> + + --input => MAINTAINERS file to read (default: MAINTAINERS) + --output => sorted MAINTAINERS file to write (default: MAINTAINERS.new) + --section => new sorted MAINTAINERS file to write to (default: SECTION.new) + +If <pattern match regexes> exist, then the sections that match the +regexes are not written to the output file but are written to the +section file. + +EOT +} + # sort comparison functions sub by_category($$) { my ($a, $b) = @_; @@ -56,13 +91,20 @@ sub trim { sub alpha_output { my ($hashref, $filename) = (@_); + return if ! scalar(keys %$hashref); + open(my $file, '>', "$filename") or die "$P: $filename: open failed - $!\n"; + my $separator; foreach my $key (sort by_category keys %$hashref) { if ($key eq " ") { - chomp $$hashref{$key}; print $file $$hashref{$key}; } else { - print $file "\n" . $key . "\n"; + if (! defined $separator) { + $separator = "\n"; + } else { + print $file $separator; + } + print $file $key . "\n"; foreach my $pattern (sort by_pattern split('\n', %$hashref{$key})) { print $file ($pattern . "\n"); } @@ -112,7 +154,7 @@ sub file_input { my %hash; my %new_hash; -file_input(\%hash, "MAINTAINERS"); +file_input(\%hash, $input_file); foreach my $type (@ARGV) { foreach my $key (keys %hash) { @@ -123,7 +165,7 @@ foreach my $type (@ARGV) { } } -alpha_output(\%hash, "MAINTAINERS.new"); -alpha_output(\%new_hash, "SECTION.new"); +alpha_output(\%hash, $output_file); +alpha_output(\%new_hash, $output_section); exit(0); diff --git a/scripts/spelling.txt b/scripts/spelling.txt index aa0cc49ad1ad..9a058cff49d4 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -1187,6 +1187,10 @@ unknonw||unknown unknow||unknown unkown||unknown unneded||unneeded +unneccecary||unnecessary +unneccesary||unnecessary +unneccessary||unnecessary +unnecesary||unnecessary unneedingly||unnecessarily unnsupported||unsupported unmached||unmatched diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 7c214ceb9386..315df0a70265 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -436,13 +436,13 @@ create_arg_exp(enum filter_exp_type etype) return NULL; arg->type = FILTER_ARG_EXP; - arg->op.type = etype; + arg->exp.type = etype; return arg; } static struct filter_arg * -create_arg_cmp(enum filter_exp_type etype) +create_arg_cmp(enum filter_cmp_type ctype) { struct filter_arg *arg; @@ -452,7 +452,7 @@ create_arg_cmp(enum filter_exp_type etype) /* Use NUM and change if necessary */ arg->type = FILTER_ARG_NUM; - arg->op.type = etype; + arg->num.type = ctype; return arg; } diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e49eca1915f8..7f45806bd863 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -18,6 +18,7 @@ TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += virtual_address_range +TEST_GEN_FILES += gup_benchmark TEST_PROGS := run_vmtests diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c new file mode 100644 index 000000000000..36df55132036 --- /dev/null +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -0,0 +1,91 @@ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <linux/types.h> + +#define MB (1UL << 20) +#define PAGE_SIZE sysconf(_SC_PAGESIZE) + +#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) + +struct gup_benchmark { + __u64 delta_usec; + __u64 addr; + __u64 size; + __u32 nr_pages_per_call; + __u32 flags; +}; + +int main(int argc, char **argv) +{ + struct gup_benchmark gup; + unsigned long size = 128 * MB; + int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + char *p; + + while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) { + switch (opt) { + case 'm': + size = atoi(optarg) * MB; + break; + case 'r': + repeats = atoi(optarg); + break; + case 'n': + nr_pages = atoi(optarg); + break; + case 't': + thp = 1; + break; + case 'T': + thp = 0; + break; + case 'w': + write = 1; + default: + return -1; + } + } + + gup.nr_pages_per_call = nr_pages; + gup.flags = write; + + fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + + p = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (p == MAP_FAILED) + perror("mmap"), exit(1); + gup.addr = (unsigned long)p; + + if (thp == 1) + madvise(p, size, MADV_HUGEPAGE); + else if (thp == 0) + madvise(p, size, MADV_NOHUGEPAGE); + + for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) + p[0] = 0; + + for (i = 0; i < repeats; i++) { + gup.size = size; + if (ioctl(fd, GUP_FAST_BENCHMARK, &gup)) + perror("ioctl"), exit(1); + + printf("Time: %lld us", gup.delta_usec); + if (gup.size != size) + printf(", truncated (size: %lld)", gup.size); + printf("\n"); + } + + return 0; +} |