diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-02-03 16:25:42 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-02-03 16:25:42 -0800 |
commit | 617aebe6a97efa539cc4b8a52adccd89596e6be0 (patch) | |
tree | 51c7753c940fd3727b8cc3e93553c57f89d1d9d2 | |
parent | 0771ad44a20bc512d1123bac728d3a89ea6febe6 (diff) | |
parent | e47e311843dece8073146f3606871280ee9beb87 (diff) |
Merge tag 'usercopy-v4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull hardened usercopy whitelisting from Kees Cook:
"Currently, hardened usercopy performs dynamic bounds checking on slab
cache objects. This is good, but still leaves a lot of kernel memory
available to be copied to/from userspace in the face of bugs.
To further restrict what memory is available for copying, this creates
a way to whitelist specific areas of a given slab cache object for
copying to/from userspace, allowing much finer granularity of access
control.
Slab caches that are never exposed to userspace can declare no
whitelist for their objects, thereby keeping them unavailable to
userspace via dynamic copy operations. (Note, an implicit form of
whitelisting is the use of constant sizes in usercopy operations and
get_user()/put_user(); these bypass all hardened usercopy checks since
these sizes cannot change at runtime.)
This new check is WARN-by-default, so any mistakes can be found over
the next several releases without breaking anyone's system.
The series has roughly the following sections:
- remove %p and improve reporting with offset
- prepare infrastructure and whitelist kmalloc
- update VFS subsystem with whitelists
- update SCSI subsystem with whitelists
- update network subsystem with whitelists
- update process memory with whitelists
- update per-architecture thread_struct with whitelists
- update KVM with whitelists and fix ioctl bug
- mark all other allocations as not whitelisted
- update lkdtm for more sensible test overage"
* tag 'usercopy-v4.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux: (38 commits)
lkdtm: Update usercopy tests for whitelisting
usercopy: Restrict non-usercopy caches to size 0
kvm: x86: fix KVM_XEN_HVM_CONFIG ioctl
kvm: whitelist struct kvm_vcpu_arch
arm: Implement thread_struct whitelist for hardened usercopy
arm64: Implement thread_struct whitelist for hardened usercopy
x86: Implement thread_struct whitelist for hardened usercopy
fork: Provide usercopy whitelisting for task_struct
fork: Define usercopy region in thread_stack slab caches
fork: Define usercopy region in mm_struct slab caches
net: Restrict unwhitelisted proto caches to size 0
sctp: Copy struct sctp_sock.autoclose to userspace using put_user()
sctp: Define usercopy region in SCTP proto slab cache
caif: Define usercopy region in caif proto slab cache
ip: Define usercopy region in IP proto slab cache
net: Define usercopy region in struct proto slab cache
scsi: Define usercopy region in scsi_sense_cache slab cache
cifs: Define usercopy region in cifs_request slab cache
vxfs: Define usercopy region in vxfs_inode slab cache
ufs: Define usercopy region in ufs_inode_cache slab cache
...
45 files changed, 515 insertions, 215 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index d007b2a15b22..467dfa35bf96 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -245,6 +245,17 @@ config ARCH_TASK_STRUCT_ON_STACK config ARCH_TASK_STRUCT_ALLOCATOR bool +config HAVE_ARCH_THREAD_STRUCT_WHITELIST + bool + depends on !ARCH_TASK_STRUCT_ALLOCATOR + help + An architecture should select this to provide hardened usercopy + knowledge about what region of the thread_struct should be + whitelisted for copying to userspace. Normally this is only the + FPU registers. Specifically, arch_thread_struct_whitelist() + should be implemented. Without this, the entire thread_struct + field in task_struct will be left whitelisted. + # Select if arch has its private alloc_thread_stack() function config ARCH_THREAD_STACK_ALLOCATOR bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4daddc53e7bc..7e3d53575486 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -51,6 +51,7 @@ config ARM select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARM_SMCCC if CPU_V7 select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 338cbe0a18ef..1bf65b47808a 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -45,6 +45,16 @@ struct thread_struct { struct debug_info debug; }; +/* + * Everything usercopied to/from thread_struct is statically-sized, so + * no hardened usercopy whitelist is needed. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = *size = 0; +} + #define INIT_THREAD { } #define start_thread(regs,pc,sp) \ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b2b95f79c746..53612879fe56 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -91,6 +91,7 @@ config ARM64 select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index cee4ae25a5d1..6db43ebd648d 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -113,6 +113,16 @@ struct thread_struct { struct debug_info debug; /* debugging */ }; +/* + * Everything usercopied to/from thread_struct is statically-sized, so + * no hardened usercopy whitelist is needed. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = *size = 0; +} + #ifdef CONFIG_COMPAT #define task_user_tls(t) \ ({ \ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 72d5149bcaa1..b0771ceabb4b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -116,6 +116,7 @@ config X86 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index efbde088a718..6207f2f740b6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -507,6 +507,14 @@ struct thread_struct { */ }; +/* Whitelist the FPU state from the task_struct for hardened usercopy. */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = offsetof(struct thread_struct, fpu.state); + *size = fpu_kernel_xstate_size; +} + /* * Thread-synchronous status. * diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c53298dfbf50..5040ff9de6ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4237,13 +4237,14 @@ set_identity_unlock: mutex_unlock(&kvm->lock); break; case KVM_XEN_HVM_CONFIG: { + struct kvm_xen_hvm_config xhc; r = -EFAULT; - if (copy_from_user(&kvm->arch.xen_hvm_config, argp, - sizeof(struct kvm_xen_hvm_config))) + if (copy_from_user(&xhc, argp, sizeof(xhc))) goto out; r = -EINVAL; - if (kvm->arch.xen_hvm_config.flags) + if (xhc.flags) goto out; + memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc)); r = 0; break; } diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h index 687a0dbbe199..9e513dcfd809 100644 --- a/drivers/misc/lkdtm.h +++ b/drivers/misc/lkdtm.h @@ -76,8 +76,8 @@ void __init lkdtm_usercopy_init(void); void __exit lkdtm_usercopy_exit(void); void lkdtm_USERCOPY_HEAP_SIZE_TO(void); void lkdtm_USERCOPY_HEAP_SIZE_FROM(void); -void lkdtm_USERCOPY_HEAP_FLAG_TO(void); -void lkdtm_USERCOPY_HEAP_FLAG_FROM(void); +void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void); +void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void); void lkdtm_USERCOPY_STACK_FRAME_TO(void); void lkdtm_USERCOPY_STACK_FRAME_FROM(void); void lkdtm_USERCOPY_STACK_BEYOND(void); diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c index 4942da93d066..2154d1bfd18b 100644 --- a/drivers/misc/lkdtm_core.c +++ b/drivers/misc/lkdtm_core.c @@ -177,8 +177,8 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(ATOMIC_TIMING), CRASHTYPE(USERCOPY_HEAP_SIZE_TO), CRASHTYPE(USERCOPY_HEAP_SIZE_FROM), - CRASHTYPE(USERCOPY_HEAP_FLAG_TO), - CRASHTYPE(USERCOPY_HEAP_FLAG_FROM), + CRASHTYPE(USERCOPY_HEAP_WHITELIST_TO), + CRASHTYPE(USERCOPY_HEAP_WHITELIST_FROM), CRASHTYPE(USERCOPY_STACK_FRAME_TO), CRASHTYPE(USERCOPY_STACK_FRAME_FROM), CRASHTYPE(USERCOPY_STACK_BEYOND), diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c index a64372cc148d..9725aed305bb 100644 --- a/drivers/misc/lkdtm_usercopy.c +++ b/drivers/misc/lkdtm_usercopy.c @@ -20,7 +20,7 @@ */ static volatile size_t unconst = 0; static volatile size_t cache_size = 1024; -static struct kmem_cache *bad_cache; +static struct kmem_cache *whitelist_cache; static const unsigned char test_text[] = "This is a test.\n"; @@ -115,10 +115,16 @@ free_user: vm_munmap(user_addr, PAGE_SIZE); } +/* + * This checks for whole-object size validation with hardened usercopy, + * with or without usercopy whitelisting. + */ static void do_usercopy_heap_size(bool to_user) { unsigned long user_addr; unsigned char *one, *two; + void __user *test_user_addr; + void *test_kern_addr; size_t size = unconst + 1024; one = kmalloc(size, GFP_KERNEL); @@ -139,27 +145,30 @@ static void do_usercopy_heap_size(bool to_user) memset(one, 'A', size); memset(two, 'B', size); + test_user_addr = (void __user *)(user_addr + 16); + test_kern_addr = one + 16; + if (to_user) { pr_info("attempting good copy_to_user of correct size\n"); - if (copy_to_user((void __user *)user_addr, one, size)) { + if (copy_to_user(test_user_addr, test_kern_addr, size / 2)) { pr_warn("copy_to_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_to_user of too large size\n"); - if (copy_to_user((void __user *)user_addr, one, 2 * size)) { + if (copy_to_user(test_user_addr, test_kern_addr, size)) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; } } else { pr_info("attempting good copy_from_user of correct size\n"); - if (copy_from_user(one, (void __user *)user_addr, size)) { + if (copy_from_user(test_kern_addr, test_user_addr, size / 2)) { pr_warn("copy_from_user failed unexpectedly?!\n"); goto free_user; } pr_info("attempting bad copy_from_user of too large size\n"); - if (copy_from_user(one, (void __user *)user_addr, 2 * size)) { + if (copy_from_user(test_kern_addr, test_user_addr, size)) { pr_warn("copy_from_user failed, but lacked Oops\n"); goto free_user; } @@ -172,77 +181,79 @@ free_kernel: kfree(two); } -static void do_usercopy_heap_flag(bool to_user) +/* + * This checks for the specific whitelist window within an object. If this + * test passes, then do_usercopy_heap_size() tests will pass too. + */ +static void do_usercopy_heap_whitelist(bool to_user) { - unsigned long user_addr; - unsigned char *good_buf = NULL; - unsigned char *bad_buf = NULL; + unsigned long user_alloc; + unsigned char *buf = NULL; + unsigned char __user *user_addr; + size_t offset, size; /* Make sure cache was prepared. */ - if (!bad_cache) { + if (!whitelist_cache) { pr_warn("Failed to allocate kernel cache\n"); return; } /* - * Allocate one buffer from each cache (kmalloc will have the - * SLAB_USERCOPY flag already, but "bad_cache" won't). + * Allocate a buffer with a whitelisted window in the buffer. */ - good_buf = kmalloc(cache_size, GFP_KERNEL); - bad_buf = kmem_cache_alloc(bad_cache, GFP_KERNEL); - if (!good_buf || !bad_buf) { - pr_warn("Failed to allocate buffers from caches\n"); + buf = kmem_cache_alloc(whitelist_cache, GFP_KERNEL); + if (!buf) { + pr_warn("Failed to allocate buffer from whitelist cache\n"); goto free_alloc; } /* Allocate user memory we'll poke at. */ - user_addr = vm_mmap(NULL, 0, PAGE_SIZE, + user_alloc = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, 0); - if (user_addr >= TASK_SIZE) { + if (user_alloc >= TASK_SIZE) { pr_warn("Failed to allocate user memory\n"); goto free_alloc; } + user_addr = (void __user *)user_alloc; - memset(good_buf, 'A', cache_size); - memset(bad_buf, 'B', cache_size); + memset(buf, 'B', cache_size); + + /* Whitelisted window in buffer, from kmem_cache_create_usercopy. */ + offset = (cache_size / 4) + unconst; + size = (cache_size / 16) + unconst; if (to_user) { - pr_info("attempting good copy_to_user with SLAB_USERCOPY\n"); - if (copy_to_user((void __user *)user_addr, good_buf, - cache_size)) { + pr_info("attempting good copy_to_user inside whitelist\n"); + if (copy_to_user(user_addr, buf + offset, size)) { pr_warn("copy_to_user failed unexpectedly?!\n"); goto free_user; } - pr_info("attempting bad copy_to_user w/o SLAB_USERCOPY\n"); - if (copy_to_user((void __user *)user_addr, bad_buf, - cache_size)) { + pr_info("attempting bad copy_to_user outside whitelist\n"); + if (copy_to_user(user_addr, buf + offset - 1, size)) { pr_warn("copy_to_user failed, but lacked Oops\n"); goto free_user; } } else { - pr_info("attempting good copy_from_user with SLAB_USERCOPY\n"); - if (copy_from_user(good_buf, (void __user *)user_addr, - cache_size)) { + pr_info("attempting good copy_from_user inside whitelist\n"); + if (copy_from_user(buf + offset, user_addr, size)) { pr_warn("copy_from_user failed unexpectedly?!\n"); goto free_user; } - pr_info("attempting bad copy_from_user w/o SLAB_USERCOPY\n"); - if (copy_from_user(bad_buf, (void __user *)user_addr, - cache_size)) { + pr_info("attempting bad copy_from_user outside whitelist\n"); + if (copy_from_user(buf + offset - 1, user_addr, size)) { pr_warn("copy_from_user failed, but lacked Oops\n"); goto free_user; } } free_user: - vm_munmap(user_addr, PAGE_SIZE); + vm_munmap(user_alloc, PAGE_SIZE); free_alloc: - if (bad_buf) - kmem_cache_free(bad_cache, bad_buf); - kfree(good_buf); + if (buf) + kmem_cache_free(whitelist_cache, buf); } /* Callable tests. */ @@ -256,14 +267,14 @@ void lkdtm_USERCOPY_HEAP_SIZE_FROM(void) do_usercopy_heap_size(false); } -void lkdtm_USERCOPY_HEAP_FLAG_TO(void) +void lkdtm_USERCOPY_HEAP_WHITELIST_TO(void) { - do_usercopy_heap_flag(true); + do_usercopy_heap_whitelist(true); } -void lkdtm_USERCOPY_HEAP_FLAG_FROM(void) +void lkdtm_USERCOPY_HEAP_WHITELIST_FROM(void) { - do_usercopy_heap_flag(false); + do_usercopy_heap_whitelist(false); } void lkdtm_USERCOPY_STACK_FRAME_TO(void) @@ -314,11 +325,15 @@ free_user: void __init lkdtm_usercopy_init(void) { /* Prepare cache that lacks SLAB_USERCOPY flag. */ - bad_cache = kmem_cache_create("lkdtm-no-usercopy", cache_size, 0, - 0, NULL); + whitelist_cache = + kmem_cache_create_usercopy("lkdtm-usercopy", cache_size, + 0, 0, + cache_size / 4, + cache_size / 16, + NULL); } void __exit lkdtm_usercopy_exit(void) { - kmem_cache_destroy(bad_cache); + kmem_cache_destroy(whitelist_cache); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 976c936029cb..8efe4731ed89 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -79,14 +79,15 @@ int scsi_init_sense_cache(struct Scsi_Host *shost) if (shost->unchecked_isa_dma) { scsi_sense_isadma_cache = kmem_cache_create("scsi_sense_cache(DMA)", - SCSI_SENSE_BUFFERSIZE, 0, - SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL); + SCSI_SENSE_BUFFERSIZE, 0, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA, NULL); if (!scsi_sense_isadma_cache) ret = -ENOMEM; } else { scsi_sense_cache = - kmem_cache_create("scsi_sense_cache", - SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, NULL); + kmem_cache_create_usercopy("scsi_sense_cache", + SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, + 0, SCSI_SENSE_BUFFERSIZE, NULL); if (!scsi_sense_cache) ret = -ENOMEM; } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index ee236231cafa..af2832aaeec5 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -444,11 +444,15 @@ unacquire_none: static int __init befs_init_inodecache(void) { - befs_inode_cachep = kmem_cache_create("befs_inode_cache", - sizeof (struct befs_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + befs_inode_cachep = kmem_cache_create_usercopy("befs_inode_cache", + sizeof(struct befs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct befs_inode_info, + i_data.symlink), + sizeof_field(struct befs_inode_info, + i_data.symlink), + init_once); if (befs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a7be591d8e18..32cdea67bbfd 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1239,9 +1239,11 @@ cifs_init_request_bufs(void) cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n", CIFSMaxBufSize, CIFSMaxBufSize); */ - cifs_req_cachep = kmem_cache_create("cifs_request", + cifs_req_cachep = kmem_cache_create_usercopy("cifs_request", CIFSMaxBufSize + max_hdr_size, 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN, 0, + CIFSMaxBufSize + max_hdr_size, + NULL); if (cifs_req_cachep == NULL) return -ENOMEM; @@ -1267,9 +1269,9 @@ cifs_init_request_bufs(void) more SMBs to use small buffer alloc and is still much more efficient to alloc 1 per page off the slab compared to 17K (5page) alloc of large cifs buffers even when page debugging is on */ - cifs_sm_req_cachep = kmem_cache_create("cifs_small_rq", + cifs_sm_req_cachep = kmem_cache_create_usercopy("cifs_small_rq", MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, - NULL); + 0, MAX_CIFS_SMALL_BUFFER_SIZE, NULL); if (cifs_sm_req_cachep == NULL) { mempool_destroy(cifs_req_poolp); kmem_cache_destroy(cifs_req_cachep); diff --git a/fs/dcache.c b/fs/dcache.c index 5b207e0de459..cca2b377ff0a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3602,8 +3602,9 @@ static void __init dcache_init(void) * but it is probably not worth it because of the cache nature * of the dcache. */ - dentry_cache = KMEM_CACHE(dentry, - SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT); + dentry_cache = KMEM_CACHE_USERCOPY(dentry, + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT, + d_iname); /* Hash may have been set up in dcache_init_early */ if (!hashdist) @@ -3641,8 +3642,8 @@ void __init vfs_caches_init_early(void) void __init vfs_caches_init(void) { - names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + names_cachep = kmem_cache_create_usercopy("names_cache", PATH_MAX, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, PATH_MAX, NULL); dcache_init(); inode_init(); diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 7e244093c0e5..179cd5c2f52a 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -193,10 +193,13 @@ static void exofs_init_once(void *foo) */ static int init_inodecache(void) { - exofs_inode_cachep = kmem_cache_create("exofs_inode_cache", + exofs_inode_cachep = kmem_cache_create_usercopy("exofs_inode_cache", sizeof(struct exofs_i_info), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | - SLAB_ACCOUNT, exofs_init_once); + SLAB_ACCOUNT, + offsetof(struct exofs_i_info, i_data), + sizeof_field(struct exofs_i_info, i_data), + exofs_init_once); if (exofs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 554c98b8a93a..db5f9daa7780 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -221,11 +221,13 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", - sizeof(struct ext2_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ext2_inode_cachep = kmem_cache_create_usercopy("ext2_inode_cache", + sizeof(struct ext2_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ext2_inode_info, i_data), + sizeof_field(struct ext2_inode_info, i_data), + init_once); if (ext2_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 82eead1c8b84..899e6faf3381 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1038,11 +1038,13 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", - sizeof(struct ext4_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache", + sizeof(struct ext4_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ext4_inode_info, i_data), + sizeof_field(struct ext4_inode_info, i_data), + init_once); if (ext4_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/fs/fhandle.c b/fs/fhandle.c index 0ace128f5d23..0ee727485615 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -69,8 +69,7 @@ static long do_sys_name_to_handle(struct path *path, } else retval = 0; /* copy the mount id */ - if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id, - sizeof(*mnt_id)) || + if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || copy_to_user(ufh, handle, sizeof(struct file_handle) + handle_bytes)) retval = -EFAULT; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index f989efa051a0..48b24bb50d02 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -332,9 +332,13 @@ vxfs_init(void) { int rv; - vxfs_inode_cachep = kmem_cache_create("vxfs_inode", + vxfs_inode_cachep = kmem_cache_create_usercopy("vxfs_inode", sizeof(struct vxfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, + offsetof(struct vxfs_inode_info, vii_immed.vi_immed), + sizeof_field(struct vxfs_inode_info, + vii_immed.vi_immed), + NULL); if (!vxfs_inode_cachep) return -ENOMEM; rv = register_filesystem(&vxfs_fs_type); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 90373aebfdca..1b9264fd54b6 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -965,9 +965,11 @@ static int __init init_jfs_fs(void) int rc; jfs_inode_cachep = - kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, - init_once); + kmem_cache_create_usercopy("jfs_ip", sizeof(struct jfs_inode_info), + 0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|SLAB_ACCOUNT, + offsetof(struct jfs_inode_info, i_inline), + sizeof_field(struct jfs_inode_info, i_inline), + init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 36f1390b5ed7..62d49e53061c 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -610,11 +610,16 @@ void orangefs_kill_sb(struct super_block *sb) int orangefs_inode_cache_initialize(void) { - orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache", - sizeof(struct orangefs_inode_s), - 0, - ORANGEFS_CACHE_CREATE_FLAGS, - orangefs_inode_cache_ctor); + orangefs_inode_cache = kmem_cache_create_usercopy( + "orangefs_inode_cache", + sizeof(struct orangefs_inode_s), + 0, + ORANGEFS_CACHE_CREATE_FLAGS, + offsetof(struct orangefs_inode_s, + link_target), + sizeof_field(struct orangefs_inode_s, + link_target), + orangefs_inode_cache_ctor); if (!orangefs_inode_cache) { gossip_err("Cannot create orangefs_inode_cache\n"); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index b6ba80e05bff..8254b8b3690f 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1467,11 +1467,14 @@ static void init_once(void *foo) static int __init init_inodecache(void) { - ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", - sizeof(struct ufs_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), - init_once); + ufs_inode_cachep = kmem_cache_create_usercopy("ufs_inode_cache", + sizeof(struct ufs_inode_info), 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD| + SLAB_ACCOUNT), + offsetof(struct ufs_inode_info, i_u1.i_symlink), + sizeof_field(struct ufs_inode_info, + i_u1.i_symlink), + init_once); if (ufs_inode_cachep == NULL) return -ENOMEM; return 0; diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 05b8650f06f5..5be31eb7b266 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -104,6 +104,20 @@ extern int arch_task_struct_size __read_mostly; # define arch_task_struct_size (sizeof(struct task_struct)) #endif +#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST +/* + * If an architecture has not declared a thread_struct whitelist we + * must assume something there may need to be copied to userspace. + */ +static inline void arch_thread_struct_whitelist(unsigned long *offset, + unsigned long *size) +{ + *offset = 0; + /* Handle dynamically sized thread_struct. */ + *size = arch_task_struct_size - offsetof(struct task_struct, thread); +} +#endif + #ifdef CONFIG_VMAP_STACK static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) { diff --git a/include/linux/slab.h b/include/linux/slab.h index 50697a1d6621..231abc8976c5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -135,9 +135,15 @@ struct mem_cgroup; void __init kmem_cache_init(void); bool slab_is_available(void); -struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, - slab_flags_t, - void (*)(void *)); +extern bool usercopy_fallback; + +struct kmem_cache *kmem_cache_create(const char *name, size_t size, + size_t align, slab_flags_t flags, + void (*ctor)(void *)); +struct kmem_cache *kmem_cache_create_usercopy(const char *name, + size_t size, size_t align, slab_flags_t flags, + size_t useroffset, size_t usersize, + void (*ctor)(void *)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -153,9 +159,20 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *); * f.e. add ____cacheline_aligned_in_smp to the struct declaration * then the objects will be properly aligned in SMP configurations. */ -#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ - sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL) +#define KMEM_CACHE(__struct, __flags) \ + kmem_cache_create(#__struct, sizeof(struct __struct), \ + __alignof__(struct __struct), (__flags), NULL) + +/* + * To whitelist a single field for copying to/from usercopy, use this + * macro instead for KMEM_CACHE() above. + */ +#define KMEM_CACHE_USERCOPY(__struct, __flags, __field) \ + kmem_cache_create_usercopy(#__struct, \ + sizeof(struct __struct), \ + __alignof__(struct __struct), (__flags), \ + offsetof(struct __struct, __field), \ + sizeof_field(struct __struct, __field), NULL) /* * Common kmalloc functions provided by all allocators @@ -167,15 +184,11 @@ void kzfree(const void *); size_t ksize(const void *); #ifdef CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page); +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user); #else -static inline const char *__check_heap_object(const void *ptr, - unsigned long n, - struct page *page) -{ - return NULL; -} +static inline void __check_heap_object(const void *ptr, unsigned long n, + struct page *page, bool to_user) { } #endif /* diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 072e46e9e1d5..7385547c04b1 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -85,6 +85,9 @@ struct kmem_cache { unsigned int *random_seq; #endif + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ + struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 0adae162dc8f..8ad99c47b19c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -135,6 +135,9 @@ struct kmem_cache { struct kasan_cache kasan_info; #endif + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ + struct kmem_cache_node *node[MAX_NUMNODES]; }; diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 2181719fd907..998a4ba28eba 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -20,12 +20,20 @@ enum { #endif /** + * sizeof_field(TYPE, MEMBER) + * + * @TYPE: The structure containing the field of interest + * @MEMBER: The field to return the size of + */ +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) + +/** * offsetofend(TYPE, MEMBER) * * @TYPE: The type of the structure * @MEMBER: The member within the structure to get the end offset of */ #define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 251e655d407f..efe79c1cdd47 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -273,4 +273,12 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) #endif +#ifdef CONFIG_HARDENED_USERCOPY +void usercopy_warn(const char *name, const char *detail, bool to_user, + unsigned long offset, unsigned long len); +void __noreturn usercopy_abort(const char *name, const char *detail, + bool to_user, unsigned long offset, + unsigned long len); +#endif + #endif /* __LINUX_UACCESS_H__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 02369e379d35..03e92dda1813 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -203,12 +203,17 @@ struct sctp_sock { /* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */ __u32 param_flags; - struct sctp_initmsg initmsg; struct sctp_rtoinfo rtoinfo; struct sctp_paddrparams paddrparam; - struct sctp_event_subscribe subscribe; struct sctp_assocparams assocparams; + /* + * These two structures must be grouped together for the usercopy + * whitelist region. + */ + struct sctp_event_subscribe subscribe; + struct sctp_initmsg initmsg; + int user_frag; __u32 autoclose; diff --git a/include/net/sock.h b/include/net/sock.h index 63731289186a..169c92afcafa 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1109,6 +1109,8 @@ struct proto { struct kmem_cache *slab; unsigned int obj_size; slab_flags_t slab_flags; + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ struct percpu_counter *orphan_count; diff --git a/kernel/fork.c b/kernel/fork.c index 5e6cf0dd031c..5c372c954f3b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -283,8 +283,9 @@ static void free_thread_stack(struct task_struct *tsk) void thread_stack_cache_init(void) { - thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, - THREAD_SIZE, 0, NULL); + thread_stack_cache = kmem_cache_create_usercopy("thread_stack", + THREAD_SIZE, THREAD_SIZE, 0, 0, + THREAD_SIZE, NULL); BUG_ON(thread_stack_cache == NULL); } # endif @@ -693,6 +694,21 @@ static void set_max_threads(unsigned int max_threads_suggested) int arch_task_struct_size __read_mostly; #endif +static void task_struct_whitelist(unsigned long *offset, unsigned long *size) +{ + /* Fetch thread_struct whitelist for the architecture. */ + arch_thread_struct_whitelist(offset, size); + + /* + * Handle zero-sized whitelist or empty thread_struct, otherwise + * adjust offset to position of thread_struct in task_struct. + */ + if (unlikely(*size == 0)) + *offset = 0; + else + *offset += offsetof(struct task_struct, thread); +} + void __init fork_init(void) { int i; @@ -701,11 +717,14 @@ void __init fork_init(void) #define ARCH_MIN_TASKALIGN 0 #endif int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN); + unsigned long useroffset, usersize; /* create a slab on which task_structs can be allocated */ - task_struct_cachep = kmem_cache_create("task_struct", + task_struct_whitelist(&useroffset, &usersize); + task_struct_cachep = kmem_cache_create_usercopy("task_struct", arch_task_struct_size, align, - SLAB_PANIC|SLAB_ACCOUNT, NULL); + SLAB_PANIC|SLAB_ACCOUNT, + useroffset, usersize, NULL); #endif /* do the arch specific task caches init */ @@ -2248,9 +2267,11 @@ void __init proc_caches_init(void) * maximum number of CPU's we can ever have. The cpumask_allocation * is at the end of the structure, exactly for that reason. */ - mm_cachep = kmem_cache_create("mm_struct", + mm_cachep = kmem_cache_create_usercopy("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, + offsetof(struct mm_struct, saved_auxv), + sizeof_field(struct mm_struct, saved_auxv), NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); diff --git a/mm/slab.c b/mm/slab.c index 226906294183..cd86f15071ad 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1281,7 +1281,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, node) + nr_node_ids * sizeof(struct kmem_cache_node *), - SLAB_HWCACHE_ALIGN); + SLAB_HWCACHE_ALIGN, 0, 0); list_add(&kmem_cache->list, &slab_caches); slab_state = PARTIAL; @@ -1291,7 +1291,8 @@ void __init kmem_cache_init(void) */ kmalloc_caches[INDEX_NODE] = create_kmalloc_cache( kmalloc_info[INDEX_NODE].name, - kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS); + kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS, + 0, kmalloc_size(INDEX_NODE)); slab_state = PARTIAL_NODE; setup_kmalloc_cache_index_table(); @@ -4385,13 +4386,15 @@ module_init(slab_proc_init); #ifdef CONFIG_HARDENED_USERCOPY /* - * Rejects objects that are incorrectly sized. + * Rejects incorrectly sized objects and objects that are to be copied + * to/from userspace but do not fall entirely within the containing slab + * cache's usercopy region. * * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page) +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user) { struct kmem_cache *cachep; unsigned int objnr; @@ -4405,11 +4408,26 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Find offset within object. */ offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); - /* Allow address range falling entirely within object size. */ - if (offset <= cachep->object_size && n <= cachep->object_size - offset) - return NULL; + /* Allow address range falling entirely within usercopy region. */ + if (offset >= cachep->useroffset && + offset - cachep->useroffset <= cachep->usersize && + n <= cachep->useroffset - offset + cachep->usersize) + return; + + /* + * If the copy is still within the allocated object, produce + * a warning instead of rejecting the copy. This is intended + * to be a temporary method to find any missing usercopy + * whitelists. + */ + if (usercopy_fallback && + offset <= cachep->object_size && + n <= cachep->object_size - offset) { + usercopy_warn("SLAB object", cachep->name, to_user, offset, n); + return; + } - return cachep->name; + usercopy_abort("SLAB object", cachep->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ diff --git a/mm/slab.h b/mm/slab.h index e8e2095a6185..51813236e773 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -22,6 +22,8 @@ struct kmem_cache { unsigned int size; /* The aligned/padded/added on size */ unsigned int align; /* Alignment as calculated */ slab_flags_t flags; /* Active flags on the slab */ + size_t useroffset; /* Usercopy region offset */ + size_t usersize; /* Usercopy region size */ const char *name; /* Slab name for sysfs */ int refcount; /* Use counter */ void (*ctor)(void *); /* Called on object slot creation */ @@ -92,9 +94,11 @@ struct kmem_cache *kmalloc_slab(size_t, gfp_t); int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags); extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size, - slab_flags_t flags); + slab_flags_t flags, size_t useroffset, + size_t usersize); extern void create_boot_cache(struct kmem_cache *, const char *name, - size_t size, slab_flags_t flags); + size_t size, slab_flags_t flags, size_t useroffset, + size_t usersize); int slab_unmergeable(struct kmem_cache *s); struct kmem_cache *find_mergeable(size_t size, size_t align, diff --git a/mm/slab_common.c b/mm/slab_common.c index deeddf95cdcf..10f127b2de7c 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -31,6 +31,14 @@ LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; +#ifdef CONFIG_HARDENED_USERCOPY +bool usercopy_fallback __ro_after_init = + IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK); +module_param(usercopy_fallback, bool, 0400); +MODULE_PARM_DESC(usercopy_fallback, + "WARN instead of reject usercopy whitelist violations"); +#endif + static LIST_HEAD(slab_caches_to_rcu_destroy); static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work); static DECLARE_WORK(slab_caches_to_rcu_destroy_work, @@ -310,6 +318,9 @@ int slab_unmergeable(struct kmem_cache *s) if (s->ctor) return 1; + if (s->usersize) + return 1; + /* * We may have set a slab to be unmergeable during bootstrap. */ @@ -368,12 +379,16 @@ struct kmem_cache *find_mergeable(size_t size, size_t align, static struct kmem_cache *create_cache(const char *name, size_t object_size, size_t size, size_t align, - slab_flags_t flags, void (*ctor)(void *), + slab_flags_t flags, size_t useroffset, + size_t usersize, void (*ctor)(void *), struct mem_cgroup *memcg, struct kmem_cache *root_cache) { struct kmem_cache *s; int err; + if (WARN_ON(useroffset + usersize > object_size)) + useroffset = usersize = 0; + err = -ENOMEM; s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); if (!s) @@ -384,6 +399,8 @@ static struct kmem_cache *create_cache(const char *name, s->size = size; s->align = align; s->ctor = ctor; + s->useroffset = useroffset; + s->usersize = usersize; err = init_memcg_params(s, memcg, root_cache); if (err) @@ -408,11 +425,13 @@ out_free_cache: } /* - * kmem_cache_create - Create a cache. + * kmem_cache_create_usercopy - Create a cache. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. * @flags: SLAB flags + * @useroffset: Usercopy region offset + * @usersize: Usercopy region size * @ctor: A constructor for the objects. * * Returns a ptr to the cache on success, NULL on failure. @@ -432,8 +451,9 @@ out_free_cache: * as davem. */ struct kmem_cache * -kmem_cache_create(const char *name, size_t size, size_t align, - slab_flags_t flags, void (*ctor)(void *)) +kmem_cache_create_usercopy(const char *name, size_t size, size_t align, + slab_flags_t flags, size_t useroffset, size_t usersize, + void (*ctor)(void *)) { struct kmem_cache *s = NULL; const char *cache_name; @@ -464,7 +484,13 @@ kmem_cache_create(const char *name, size_t size, size_t align, */ flags &= CACHE_CREATE_MASK; - s = __kmem_cache_alias(name, size, align, flags, ctor); + /* Fail closed on bad usersize of useroffset values. */ + if (WARN_ON(!usersize && useroffset) || + WARN_ON(size < usersize || size - usersize < useroffset)) + usersize = useroffset = 0; + + if (!usersize) + s = __kmem_cache_alias(name, size, align, flags, ctor); if (s) goto out_unlock; @@ -476,7 +502,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, s = create_cache(cache_name, size, size, calculate_alignment(flags, align, size), - flags, ctor, NULL, NULL); + flags, useroffset, usersize, ctor, NULL, NULL); if (IS_ERR(s)) { err = PTR_ERR(s); kfree_const(cache_name); @@ -502,6 +528,15 @@ out_unlock: } return s; } +EXPORT_SYMBOL(kmem_cache_create_usercopy); + +struct kmem_cache * +kmem_cache_create(const char *name, size_t size, size_t align, + slab_flags_t flags, void (*ctor)(void *)) +{ + return kmem_cache_create_usercopy(name, size, align, flags, 0, 0, + ctor); +} EXPORT_SYMBOL(kmem_cache_create); static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) @@ -614,6 +649,7 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg, s = create_cache(cache_name, root_cache->object_size, root_cache->size, root_cache->align, root_cache->flags & CACHE_CREATE_MASK, + root_cache->useroffset, root_cache->usersize, root_cache->ctor, memcg, root_cache); /* * If we could not create a memcg cache, do not complain, because @@ -881,13 +917,15 @@ bool slab_is_available(void) #ifndef CONFIG_SLOB /* Create a cache during boot when no slab services are available yet */ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size, - slab_flags_t flags) + slab_flags_t flags, size_t useroffset, size_t usersize) { int err; s->name = name; s->size = s->object_size = size; s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size); + s->useroffset = useroffset; + s->usersize = usersize; slab_init_memcg_params(s); @@ -901,14 +939,15 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz } struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, - slab_flags_t flags) + slab_flags_t flags, size_t useroffset, + size_t usersize) { struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT); if (!s) panic("Out of memory when creating slab %s\n", name); - create_boot_cache(s, name, size, flags); + create_boot_cache(s, name, size, flags, useroffset, usersize); list_add(&s->list, &slab_caches); memcg_link_cache(s); s->refcount = 1; @@ -1062,7 +1101,8 @@ void __init setup_kmalloc_cache_index_table(void) static void __init new_kmalloc_cache(int idx, slab_flags_t flags) { kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, - kmalloc_info[idx].size, flags); + kmalloc_info[idx].size, flags, 0, + kmalloc_info[idx].size); } /* @@ -1103,7 +1143,7 @@ void __init create_kmalloc_caches(slab_flags_t flags) BUG_ON(!n); kmalloc_dma_caches[i] = create_kmalloc_cache(n, - size, SLAB_CACHE_DMA | flags); + size, SLAB_CACHE_DMA | flags, 0, 0); } } #endif diff --git a/mm/slub.c b/mm/slub.c index 693b7074bc53..cc71176c6eef 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3813,13 +3813,15 @@ EXPORT_SYMBOL(__kmalloc_node); #ifdef CONFIG_HARDENED_USERCOPY /* - * Rejects objects that are incorrectly sized. + * Rejects incorrectly sized objects and objects that are to be copied + * to/from userspace but do not fall entirely within the containing slab + * cache's usercopy region. * * Returns NULL if check passes, otherwise const char * to name of cache * to indicate an error. */ -const char *__check_heap_object(const void *ptr, unsigned long n, - struct page *page) +void __check_heap_object(const void *ptr, unsigned long n, struct page *page, + bool to_user) { struct kmem_cache *s; unsigned long offset; @@ -3827,11 +3829,11 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Find object and usable object size. */ s = page->slab_cache; - object_size = slab_ksize(s); /* Reject impossible pointers. */ if (ptr < page_address(page)) - return s->name; + usercopy_abort("SLUB object not in SLUB page?!", NULL, + to_user, 0, n); /* Find offset within object. */ offset = (ptr - page_address(page)) % s->size; @@ -3839,15 +3841,31 @@ const char *__check_heap_object(const void *ptr, unsigned long n, /* Adjust for redzone and reject if within the redzone. */ if (kmem_cache_debug(s) && s->flags & SLAB_RED_ZONE) { if (offset < s->red_left_pad) - return s->name; + usercopy_abort("SLUB object in left red zone", + s->name, to_user, offset, n); offset -= s->red_left_pad; } - /* Allow address range falling entirely within object size. */ - if (offset <= object_size && n <= object_size - offset) - return NULL; + /* Allow address range falling entirely within usercopy region. */ + if (offset >= s->useroffset && + offset - s->useroffset <= s->usersize && + n <= s->useroffset - offset + s->usersize) + return; + + /* + * If the copy is still within the allocated object, produce + * a warning instead of rejecting the copy. This is intended + * to be a temporary method to find any missing usercopy + * whitelists. + */ + object_size = slab_ksize(s); + if (usercopy_fallback && + offset <= object_size && n <= object_size - offset) { + usercopy_warn("SLUB object", s->name, to_user, offset, n); + return; + } - return s->name; + usercopy_abort("SLUB object", s->name, to_user, offset, n); } #endif /* CONFIG_HARDENED_USERCOPY */ @@ -4181,7 +4199,7 @@ void __init kmem_cache_init(void) kmem_cache = &boot_kmem_cache; create_boot_cache(kmem_cache_node, "kmem_cache_node", - sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN); + sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0); register_hotmemory_notifier(&slab_memory_callback_nb); @@ -4191,7 +4209,7 @@ void __init kmem_cache_init(void) create_boot_cache(kmem_cache, "kmem_cache", offsetof(struct kmem_cache, node) + nr_node_ids * sizeof(struct kmem_cache_node *), - SLAB_HWCACHE_ALIGN); + SLAB_HWCACHE_ALIGN, 0, 0); kmem_cache = bootstrap(&boot_kmem_cache); @@ -5061,6 +5079,12 @@ static ssize_t cache_dma_show(struct kmem_cache *s, char *buf) SLAB_ATTR_RO(cache_dma); #endif +static ssize_t usersize_show(struct kmem_cache *s, char *buf) +{ + return sprintf(buf, "%zu\n", s->usersize); +} +SLAB_ATTR_RO(usersize); + static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) { return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU)); @@ -5435,6 +5459,7 @@ static struct attribute *slab_attrs[] = { #ifdef CONFIG_FAILSLAB &failslab_attr.attr, #endif + &usersize_attr.attr, NULL }; diff --git a/mm/usercopy.c b/mm/usercopy.c index a9852b24715d..e9e9325f7638 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -58,12 +58,40 @@ static noinline int check_stack_object(const void *obj, unsigned long len) return GOOD_STACK; } -static void report_usercopy(const void *ptr, unsigned long len, - bool to_user, const char *type) +/* + * If these functions are reached, then CONFIG_HARDENED_USERCOPY has found + * an unexpected state during a copy_from_user() or copy_to_user() call. + * There are several checks being performed on the buffer by the + * __check_object_size() function. Normal stack buffer usage should never + * trip the checks, and kernel text addressing will always trip the check. + * For cache objects, it is checking that only the whitelisted range of + * bytes for a given cache is being accessed (via the cache's usersize and + * useroffset fields). To adjust a cache whitelist, use the usercopy-aware + * kmem_cache_create_usercopy() function to create the cache (and + * carefully audit the whitelist range). + */ +void usercopy_warn(const char *name, const char *detail, bool to_user, + unsigned long offset, unsigned long len) +{ + WARN_ONCE(1, "Bad or missing usercopy whitelist? Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", + to_user ? "exposure" : "overwrite", + to_user ? "from" : "to", + name ? : "unknown?!", + detail ? " '" : "", detail ? : "", detail ? "'" : "", + offset, len); +} + +void __noreturn usercopy_abort(const char *name, const char *detail, + bool to_user, unsigned long offset, + unsigned long len) { - pr_emerg("kernel memory %s attempt detected %s %p (%s) (%lu bytes)\n", - to_user ? "exposure" : "overwrite", - to_user ? "from" : "to", ptr, type ? : "unknown", len); + pr_emerg("Kernel memory %s attempt detected %s %s%s%s%s (offset %lu, size %lu)!\n", + to_user ? "exposure" : "overwrite", + to_user ? "from" : "to", + name ? : "unknown?!", + detail ? " '" : "", detail ? : "", detail ? "'" : "", + offset, len); + /* * For greater effect, it would be nice to do do_group_exit(), * but BUG() actually hooks all the lock-breaking and per-arch @@ -73,10 +101,10 @@ static void report_usercopy(const void *ptr, unsigned long len, } /* Returns true if any portion of [ptr,ptr+n) over laps with [low,high). */ -static bool overlaps(const void *ptr, unsigned long n, unsigned long low, - unsigned long high) +static bool overlaps(const unsigned long ptr, unsigned long n, + unsigned long low, unsigned long high) { - unsigned long check_low = (uintptr_t)ptr; + const unsigned long check_low = ptr; unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ @@ -87,15 +115,15 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low, } /* Is this address range in the kernel text area? */ -static inline const char *check_kernel_text_object(const void *ptr, - unsigned long n) +static inline void check_kernel_text_object(const unsigned long ptr, + unsigned long n, bool to_user) { unsigned long textlow = (unsigned long)_stext; unsigned long texthigh = (unsigned long)_etext; unsigned long textlow_linear, texthigh_linear; if (overlaps(ptr, n, textlow, texthigh)) - return "<kernel text>"; + usercopy_abort("kernel text", NULL, to_user, ptr - textlow, n); /* * Some architectures have virtual memory mappings with a secondary @@ -108,32 +136,30 @@ static inline const char *check_kernel_text_object(const void *ptr, textlow_linear = (unsigned long)lm_alias(textlow); /* No different mapping: we're done. */ if (textlow_linear == textlow) - return NULL; + return; /* Check the secondary mapping... */ texthigh_linear = (unsigned long)lm_alias(texthigh); if (overlaps(ptr, n, textlow_linear, texthigh_linear)) - return "<linear kernel text>"; - - return NULL; + usercopy_abort("linear kernel text", NULL, to_user, + ptr - textlow_linear, n); } -static inline const char *check_bogus_address(const void *ptr, unsigned long n) +static inline void check_bogus_address(const unsigned long ptr, unsigned long n, + bool to_user) { /* Reject if object wraps past end of memory. */ - if ((unsigned long)ptr + n < (unsigned long)ptr) - return "<wrapped address>"; + if (ptr + n < ptr) + usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n); /* Reject if NULL or ZERO-allocation. */ if (ZERO_OR_NULL_PTR(ptr)) - return "<null>"; - - return NULL; + usercopy_abort("null address", NULL, to_user, ptr, n); } /* Checks for allocs that are marked in some way as spanning multiple pages. */ -static inline const char *check_page_span(const void *ptr, unsigned long n, - struct page *page, bool to_user) +static inline void check_page_span(const void *ptr, unsigned long n, + struct page *page, bool to_user) { #ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN const void *end = ptr + n - 1; @@ -150,28 +176,28 @@ static inline const char *check_page_span(const void *ptr, unsigned long n, if (ptr >= (const void *)__start_rodata && end <= (const void *)__end_rodata) { if (!to_user) - return "<rodata>"; - return NULL; + usercopy_abort("rodata", NULL, to_user, 0, n); + return; } /* Allow kernel data region (if not marked as Reserved). */ if (ptr >= (const void *)_sdata && end <= (const void *)_edata) - return NULL; + return; /* Allow kernel bss region (if not marked as Reserved). */ if (ptr >= (const void *)__bss_start && end <= (const void *)__bss_stop) - return NULL; + return; /* Is the object wholly within one base page? */ if (likely(((unsigned long)ptr & (unsigned long)PAGE_MASK) == ((unsigned long)end & (unsigned long)PAGE_MASK))) - return NULL; + return; /* Allow if fully inside the same compound (__GFP_COMP) page. */ endpage = virt_to_head_page(end); if (likely(endpage == page)) - return NULL; + return; /* * Reject if range is entirely either Reserved (i.e. special or @@ -181,36 +207,37 @@ static inline const char *check_page_span(const void *ptr, unsigned long n, is_reserved = PageReserved(page); is_cma = is_migrate_cma_page(page); if (!is_reserved && !is_cma) - return "<spans multiple pages>"; + usercopy_abort("spans multiple pages", NULL, to_user, 0, n); for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) { page = virt_to_head_page(ptr); if (is_reserved && !PageReserved(page)) - return "<spans Reserved and non-Reserved pages>"; + usercopy_abort("spans Reserved and non-Reserved pages", + NULL, to_user, 0, n); if (is_cma && !is_migrate_cma_page(page)) - return "<spans CMA and non-CMA pages>"; + usercopy_abort("spans CMA and non-CMA pages", NULL, + to_user, 0, n); } #endif - - return NULL; } -static inline const char *check_heap_object(const void *ptr, unsigned long n, - bool to_user) +static inline void check_heap_object(const void *ptr, unsigned long n, + bool to_user) { struct page *page; if (!virt_addr_valid(ptr)) - return NULL; + return; page = virt_to_head_page(ptr); - /* Check slab allocator for flags and size. */ - if (PageSlab(page)) - return __check_heap_object(ptr, n, page); - - /* Verify object does not incorrectly span multiple pages. */ - return check_page_span(ptr, n, page, to_user); + if (PageSlab(page)) { + /* Check slab allocator for flags and size. */ + __check_heap_object(ptr, n, page, to_user); + } else { + /* Verify object does not incorrectly span multiple pages. */ + check_page_span(ptr, n, page, to_user); + } } /* @@ -221,21 +248,15 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, */ void __check_object_size(const void *ptr, unsigned long n, bool to_user) { - const char *err; - /* Skip all tests if size is zero. */ if (!n) return; /* Check for invalid addresses. */ - err = check_bogus_address(ptr, n); - if (err) - goto report; + check_bogus_address((const unsigned long)ptr, n, to_user); /* Check for bad heap object. */ - err = check_heap_object(ptr, n, to_user); - if (err) - goto report; + check_heap_object(ptr, n, to_user); /* Check for bad stack object. */ switch (check_stack_object(ptr, n)) { @@ -251,16 +272,10 @@ void __check_object_size(const void *ptr, unsigned long n, bool to_user) */ return; default: - err = "<process stack>"; - goto report; + usercopy_abort("process stack", NULL, to_user, 0, n); } /* Check for object in kernel to avoid text exposure. */ - err = check_kernel_text_object(ptr, n); - if (!err) - return; - -report: - report_usercopy(ptr, n, to_user, err); + check_kernel_text_object((const unsigned long)ptr, n, to_user); } EXPORT_SYMBOL(__check_object_size); diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 64048cec41e0..b109445a1df9 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1032,6 +1032,8 @@ static int caif_create(struct net *net, struct socket *sock, int protocol, static struct proto prot = {.name = "PF_CAIF", .owner = THIS_MODULE, .obj_size = sizeof(struct caifsock), + .useroffset = offsetof(struct caifsock, conn_req.param), + .usersize = sizeof_field(struct caifsock, conn_req.param) }; if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) diff --git a/net/core/sock.c b/net/core/sock.c index e50e7b3f2223..b026e1717df4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3191,8 +3191,10 @@ static int req_prot_init(const struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { if (alloc_slab) { - prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, + prot->slab = kmem_cache_create_usercopy(prot->name, + prot->obj_size, 0, SLAB_HWCACHE_ALIGN | prot->slab_flags, + prot->useroffset, prot->usersize, NULL); if (prot->slab == NULL) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7c509697ebc7..9b367fc48d7d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -990,6 +990,8 @@ struct proto raw_prot = { .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw_sock), + .useroffset = offsetof(struct raw_sock, filter), + .usersize = sizeof_field(struct raw_sock, filter), .h.raw_hash = &raw_v4_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_raw_setsockopt, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ddda7eb3c623..4c25339b1984 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1272,6 +1272,8 @@ struct proto rawv6_prot = { .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw6_sock), + .useroffset = offsetof(struct raw6_sock, filter), + .usersize = sizeof_field(struct raw6_sock, filter), .h.raw_hash = &raw_v6_hashinfo, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_rawv6_setsockopt, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 356e387f82e7..ebb8cb9eb0bd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5053,7 +5053,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv len = sizeof(int); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) + if (put_user(sctp_sk(sk)->autoclose, (int __user *)optval)) return -EFAULT; return 0; } @@ -8552,6 +8552,10 @@ struct proto sctp_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp_sock), + .useroffset = offsetof(struct sctp_sock, subscribe), + .usersize = offsetof(struct sctp_sock, initmsg) - + offsetof(struct sctp_sock, subscribe) + + sizeof_field(struct sctp_sock, initmsg), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, .sysctl_wmem = sysctl_sctp_wmem, @@ -8591,6 +8595,10 @@ struct proto sctpv6_prot = { .unhash = sctp_unhash, .get_port = sctp_get_port, .obj_size = sizeof(struct sctp6_sock), + .useroffset = offsetof(struct sctp6_sock, sctp.subscribe), + .usersize = offsetof(struct sctp6_sock, sctp.initmsg) - + offsetof(struct sctp6_sock, sctp.subscribe) + + sizeof_field(struct sctp6_sock, sctp.initmsg), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, .sysctl_wmem = sysctl_sctp_wmem, diff --git a/security/Kconfig b/security/Kconfig index 3709db95027f..c4302067a3ad 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -164,6 +164,20 @@ config HARDENED_USERCOPY or are part of the kernel text. This kills entire classes of heap overflow exploits and similar kernel memory exposures. +config HARDENED_USERCOPY_FALLBACK + bool "Allow usercopy whitelist violations to fallback to object size" + depends on HARDENED_USERCOPY + default y + help + This is a temporary option that allows missing usercopy whitelists + to be discovered via a WARN() to the kernel log, instead of + rejecting the copy, falling back to non-whitelisted hardened + usercopy that checks the slab allocation size instead of the + whitelist size. This option will be removed once it seems like + all missing usercopy whitelists have been identified and fixed. + Booting with "slab_common.usercopy_fallback=Y/N" can change + this setting. + config HARDENED_USERCOPY_PAGESPAN bool "Refuse to copy allocations that span multiple pages" depends on HARDENED_USERCOPY diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f40d46e24bcc..524becf88e04 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -138,6 +138,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "__reiserfs_panic", "lbug_with_loc", "fortify_panic", + "usercopy_abort", }; if (func->bind == STB_WEAK) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 35db929f92f0..001085b611ad 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4019,8 +4019,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, /* A kmem cache lets us meet the alignment requirements of fx_save. */ if (!vcpu_align) vcpu_align = __alignof__(struct kvm_vcpu); - kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, - SLAB_ACCOUNT, NULL); + kvm_vcpu_cache = + kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align, + SLAB_ACCOUNT, + offsetof(struct kvm_vcpu, arch), + sizeof_field(struct kvm_vcpu, arch), + NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; goto out_free_3; |