diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2014-12-19 19:00:52 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2014-12-19 19:00:52 +1100 |
commit | d45262453122c02a058f1fa264ab6b485aab011c (patch) | |
tree | 7b490ec8f7cf5b6bab752aadd212761dc4489fcd | |
parent | 1927b0b7a5f4eea5e6e17fe0d07a6bd90a9d08d0 (diff) | |
parent | 350b61c87ab5b359da2269d02be4ca0fe5f81c97 (diff) |
Merge branch 'akpm-current/current'
Conflicts:
mm/fremap.c
174 files changed, 2846 insertions, 846 deletions
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt index 9f4e3824e71..4b95aca6e6d 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt @@ -58,7 +58,7 @@ fsl,sgtl5000 SGTL5000: Ultra Low-Power Audio Codec gmt,g751 G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire Interface infineon,slb9635tt Infineon SLB9635 (Soft-) I2C TPM (old protocol, max 100khz) infineon,slb9645tt Infineon SLB9645 I2C TPM (new protocol, max 400khz) -isl,isl12057 Intersil ISL12057 I2C RTC Chip +isl,isl12057 Intersil ISL12057 I2C RTC/Alarm Chip isil,isl29028 (deprecated, use isl) isl,isl29028 Intersil ISL29028 Ambient Light and Proximity Sensor maxim,ds1050 5 Bit Programmable, Pulse-Width Modulator diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index ce1126aceed..223c32171dc 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -180,6 +180,16 @@ dos1xfloppy -- If set, use a fallback default BIOS Parameter Block <bool>: 0,1,yes,no,true,false +LIMITATION +--------------------------------------------------------------------- +* The fallocated region of file is discarded at umount/evict time + when using fallocate with FALLOC_FL_KEEP_SIZE. + So, User should assume that fallocated region can be discarded at + last close if there is memory pressure resulting in eviction of + the inode from the memory. As a result, for any dependency on + the fallocated region, user should make sure to recheck fallocate + after reopening the file. + TODO ---------------------------------------------------------------------- * Need to get rid of the raw scanning stuff. Instead, always use diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt index 79699c20076..62261c04060 100644 --- a/Documentation/leds/leds-class.txt +++ b/Documentation/leds/leds-class.txt @@ -2,9 +2,6 @@ LED handling under Linux ======================== -If you're reading this and thinking about keyboard leds, these are -handled by the input subsystem and the led class is *not* needed. - In its simplest form, the LED class just allows control of LEDs from userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the LED is defined in max_brightness file. The brightness file will set the brightness diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 5a615c14f75..8858db8f880 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -216,6 +216,12 @@ dentry names: equivalent of %s dentry->d_name.name we used to use, %pd<n> prints n last components. %pD does the same thing for struct file. +task_struct comm name: + + %pT + + For printing task_struct->comm. + struct va_format: %pV diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt index 560e4363a55..f609142f406 100644 --- a/Documentation/vm/remap_file_pages.txt +++ b/Documentation/vm/remap_file_pages.txt @@ -18,10 +18,9 @@ on 32-bit systems to map files bigger than can linearly fit into 32-bit virtual address space. This use-case is not critical anymore since 64-bit systems are widely available. -The plan is to deprecate the syscall and replace it with an emulation. -The emulation will create new VMAs instead of nonlinear mappings. It's -going to work slower for rare users of remap_file_pages() but ABI is -preserved. +The syscall is deprecated and replaced it with an emulation now. The +emulation creates new VMAs instead of nonlinear mappings. It's going to +work slower for rare users of remap_file_pages() but ABI is preserved. One side effect of emulation (apart from performance) is that user can hit vm.max_map_count limit more easily due to additional VMAs. See comment for diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 48bbea6898b..d5b98ab514b 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -27,8 +27,6 @@ struct thread_info { int bpt_nsaved; unsigned long bpt_addr[2]; /* breakpoint handling */ unsigned int bpt_insn[2]; - - struct restart_block restart_block; }; /* @@ -40,9 +38,6 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .addr_limit = KERNEL_DS, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 0086b472bc2..836fbd44f65 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -44,6 +44,7 @@ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_SPACEAVAIL 5 /* ensure resources are available */ #define MADV_DONTNEED 6 /* don't need these pages */ +#define MADV_FREE 7 /* free pages only if memory pressure */ /* common/generic parameters */ #define MADV_REMOVE 9 /* remove these pages & resources */ diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 6cec2881acb..8dbfb15f174 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -150,7 +150,7 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) struct switch_stack *sw = (struct switch_stack *)regs - 1; long i, err = __get_user(regs->pc, &sc->sc_pc); - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; sw->r26 = (unsigned long) ret_from_sys_call; diff --git a/arch/arc/include/asm/thread_info.h b/arch/arc/include/asm/thread_info.h index 02bc5ec0fb2..1163a1838ac 100644 --- a/arch/arc/include/asm/thread_info.h +++ b/arch/arc/include/asm/thread_info.h @@ -46,7 +46,6 @@ struct thread_info { struct exec_domain *exec_domain;/* execution domain */ __u32 cpu; /* current CPU */ unsigned long thr_ptr; /* TLS ptr */ - struct restart_block restart_block; }; /* @@ -62,9 +61,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index cb3142a2d40..114234e83ca 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -104,7 +104,7 @@ SYSCALL_DEFINE0(rt_sigreturn) struct pt_regs *regs = current_pt_regs(); /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* Since we stacked the signal on a word boundary, * then 'sp' should be word aligned here. If it's diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index a31ecdad4b5..54dc91486c1 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -249,6 +249,7 @@ PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING); PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY); PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY); +PMD_BIT_FUNC(mkclean, &= ~L_PMD_SECT_DIRTY); PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index d890e41f552..72812a1f3d1 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -68,7 +68,6 @@ struct thread_info { #ifdef CONFIG_ARM_THUMBEE unsigned long thumbee_state; /* ThumbEE Handler Base register */ #endif - struct restart_block restart_block; }; #define INIT_THREAD_INFO(tsk) \ @@ -81,9 +80,6 @@ struct thread_info { .cpu_domain = domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO, DOMAIN_CLIENT), \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 8aa6f1b87c9..023ac905e4c 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -191,7 +191,7 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs) struct sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 64-bit boundary, @@ -221,7 +221,7 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) struct rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 64-bit boundary, diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index df22314f57c..22e6157fe8d 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -281,10 +281,12 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 459bf8e5320..702e1e6a0d8 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -48,7 +48,6 @@ struct thread_info { mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ struct exec_domain *exec_domain; /* execution domain */ - struct restart_block restart_block; int preempt_count; /* 0 => preemptable, <0 => bug */ int cpu; /* cpu */ }; @@ -60,9 +59,6 @@ struct thread_info { .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 6fa792137ed..660ccf9f752 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -131,7 +131,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) struct rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 128-bit boundary, then 'sp' should diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 5a1ba6e80d4..64565c4ecbb 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -347,7 +347,7 @@ asmlinkage int compat_sys_sigreturn(struct pt_regs *regs) struct compat_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 64-bit boundary, @@ -381,7 +381,7 @@ asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) struct compat_rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 64-bit boundary, diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h index a978f3fe7c2..d56afa99a51 100644 --- a/arch/avr32/include/asm/thread_info.h +++ b/arch/avr32/include/asm/thread_info.h @@ -30,7 +30,6 @@ struct thread_info { saved by debug handler when setting up trampoline */ - struct restart_block restart_block; __u8 supervisor_stack[0]; }; @@ -41,9 +40,6 @@ struct thread_info { .flags = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall \ - } \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c index d6a8193a1d2..e41c84516e5 100644 --- a/arch/avr32/kernel/asm-offsets.c +++ b/arch/avr32/kernel/asm-offsets.c @@ -18,7 +18,6 @@ void foo(void) OFFSET(TI_preempt_count, thread_info, preempt_count); OFFSET(TI_rar_saved, thread_info, rar_saved); OFFSET(TI_rsr_saved, thread_info, rsr_saved); - OFFSET(TI_restart_block, thread_info, restart_block); BLANK(); OFFSET(TSK_active_mm, task_struct, active_mm); BLANK(); diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c index d309fbcc3bd..8f1c63b9b98 100644 --- a/arch/avr32/kernel/signal.c +++ b/arch/avr32/kernel/signal.c @@ -69,7 +69,7 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) sigset_t set; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; frame = (struct rt_sigframe __user *)regs->sp; pr_debug("SIG return: frame = %p\n", frame); diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h index 55f473bdad3..57c3a8bd583 100644 --- a/arch/blackfin/include/asm/thread_info.h +++ b/arch/blackfin/include/asm/thread_info.h @@ -42,7 +42,6 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* address limit */ - struct restart_block restart_block; #ifndef CONFIG_SMP struct l1_scratch_task_info l1_task_info; #endif @@ -58,9 +57,6 @@ struct thread_info { .flags = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c index ef275571d88..f2a8b5493bd 100644 --- a/arch/blackfin/kernel/signal.c +++ b/arch/blackfin/kernel/signal.c @@ -44,7 +44,7 @@ rt_restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *p int err = 0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; #define RESTORE(x) err |= __get_user(regs->x, &sc->sc_##x) diff --git a/arch/c6x/include/asm/thread_info.h b/arch/c6x/include/asm/thread_info.h index d4e9ef87076..584e253f321 100644 --- a/arch/c6x/include/asm/thread_info.h +++ b/arch/c6x/include/asm/thread_info.h @@ -45,7 +45,6 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 = preemptable, <0 = BUG */ mm_segment_t addr_limit; /* thread address space */ - struct restart_block restart_block; }; /* @@ -61,9 +60,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c index fe68226f6c4..3c4bb5a5c38 100644 --- a/arch/c6x/kernel/signal.c +++ b/arch/c6x/kernel/signal.c @@ -68,7 +68,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs) sigset_t set; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a dword boundary, diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c index 9b32d338838..74d7ba35120 100644 --- a/arch/cris/arch-v10/kernel/signal.c +++ b/arch/cris/arch-v10/kernel/signal.c @@ -67,7 +67,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) unsigned long old_usp; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* restore the regs from &sc->regs (same as sc, since regs is first) * (sc is already checked for VERIFY_READ since the sigframe was diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c index 78ce3b1c9bc..870e3e06931 100644 --- a/arch/cris/arch-v32/kernel/signal.c +++ b/arch/cris/arch-v32/kernel/signal.c @@ -59,7 +59,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) unsigned long old_usp; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Restore the registers from &sc->regs. sc is already checked diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h index 55dede18c03..7286db5ed90 100644 --- a/arch/cris/include/asm/thread_info.h +++ b/arch/cris/include/asm/thread_info.h @@ -38,7 +38,6 @@ struct thread_info { 0-0xBFFFFFFF for user-thead 0-0xFFFFFFFF for kernel-thread */ - struct restart_block restart_block; __u8 supervisor_stack[0]; }; @@ -56,9 +55,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h index af29e17c018..6b917f1c295 100644 --- a/arch/frv/include/asm/thread_info.h +++ b/arch/frv/include/asm/thread_info.h @@ -41,7 +41,6 @@ struct thread_info { * 0-0xBFFFFFFF for user-thead * 0-0xFFFFFFFF for kernel-thread */ - struct restart_block restart_block; __u8 supervisor_stack[0]; }; @@ -65,9 +64,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/frv/kernel/asm-offsets.c b/arch/frv/kernel/asm-offsets.c index 9de96843a27..446e89d500c 100644 --- a/arch/frv/kernel/asm-offsets.c +++ b/arch/frv/kernel/asm-offsets.c @@ -40,7 +40,6 @@ void foo(void) OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PREEMPT_COUNT, thread_info, preempt_count); OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit); - OFFSET(TI_RESTART_BLOCK, thread_info, restart_block); BLANK(); /* offsets into register file storage */ diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index dc3d59de087..336713ab474 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -62,7 +62,7 @@ static int restore_sigcontext(struct sigcontext __user *sc, int *_gr8) unsigned long tbr, psr; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; tbr = user->i.tbr; psr = user->i.psr; diff --git a/arch/hexagon/include/asm/thread_info.h b/arch/hexagon/include/asm/thread_info.h index a59dad3b369..bacd3d6030c 100644 --- a/arch/hexagon/include/asm/thread_info.h +++ b/arch/hexagon/include/asm/thread_info.h @@ -56,7 +56,6 @@ struct thread_info { * used for syscalls somehow; * seems to have a function pointer and four arguments */ - struct restart_block restart_block; /* Points to the current pt_regs frame */ struct pt_regs *regs; /* @@ -83,9 +82,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = 1, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ .sp = 0, \ .regs = NULL, \ } diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index eadd70e47e7..b039a624c17 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -239,7 +239,7 @@ asmlinkage int sys_rt_sigreturn(void) sigset_t blocked; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; frame = (struct rt_sigframe __user *)pt_psp(regs); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 5b17418b422..c16f21a068f 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -27,7 +27,6 @@ struct thread_info { __u32 status; /* Thread synchronous flags */ mm_segment_t addr_limit; /* user-level address space limit */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ - struct restart_block restart_block; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE __u64 ac_stamp; __u64 ac_leave; @@ -46,9 +45,6 @@ struct thread_info { .cpu = 0, \ .addr_limit = KERNEL_DS, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #ifndef ASM_OFFSETS_C diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 6d92170be45..b3a124da71e 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -46,7 +46,7 @@ restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) long err; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* restore scratch that always needs gets updated during signal delivery: */ err = __get_user(flags, &sc->sc_flags); diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h index 00171703402..32422d0211c 100644 --- a/arch/m32r/include/asm/thread_info.h +++ b/arch/m32r/include/asm/thread_info.h @@ -34,7 +34,6 @@ struct thread_info { 0-0xBFFFFFFF for user-thread 0-0xFFFFFFFF for kernel-thread */ - struct restart_block restart_block; __u8 supervisor_stack[0]; }; @@ -49,7 +48,6 @@ struct thread_info { #define TI_CPU 0x00000010 #define TI_PRE_COUNT 0x00000014 #define TI_ADDR_LIMIT 0x00000018 -#define TI_RESTART_BLOCK 0x000001C #endif @@ -68,9 +66,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 95408b8f130..7736c6660a1 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -48,7 +48,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned int err = 0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; #define COPY(x) err |= __get_user(regs->x, &sc->sc_##x) COPY(r4); diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h index 21a4784ca5a..c54256e69e6 100644 --- a/arch/m68k/include/asm/thread_info.h +++ b/arch/m68k/include/asm/thread_info.h @@ -31,7 +31,6 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ __u32 cpu; /* should always be 0 on m68k */ unsigned long tp_value; /* thread pointer */ - struct restart_block restart_block; }; #endif /* __ASSEMBLY__ */ @@ -41,9 +40,6 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .addr_limit = KERNEL_DS, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_stack (init_thread_union.stack) diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 967a8b7e152..d7179281e74 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -655,7 +655,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *usc, void __u int err = 0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* get previous context */ if (copy_from_user(&context, usc, sizeof(context))) @@ -693,7 +693,7 @@ rt_restore_ucontext(struct pt_regs *regs, struct switch_stack *sw, int err; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; err = __get_user(temp, &uc->uc_mcontext.version); if (temp != MCONTEXT_VERSION) diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h index 47711336119..afb3ca4776d 100644 --- a/arch/metag/include/asm/thread_info.h +++ b/arch/metag/include/asm/thread_info.h @@ -35,9 +35,8 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space */ - struct restart_block restart_block; - u8 supervisor_stack[0]; + u8 supervisor_stack[0] __aligned(8); }; #else /* !__ASSEMBLY__ */ @@ -74,9 +73,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c index 0d100d5c140..ce49d429c74 100644 --- a/arch/metag/kernel/signal.c +++ b/arch/metag/kernel/signal.c @@ -48,7 +48,7 @@ static int restore_sigcontext(struct pt_regs *regs, int err; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; err = metag_gp_regs_copyin(regs, 0, sizeof(struct user_gp_regs), NULL, &sc->regs); diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h index 8c9d36591a0..b699fbd7de4 100644 --- a/arch/microblaze/include/asm/thread_info.h +++ b/arch/microblaze/include/asm/thread_info.h @@ -71,7 +71,6 @@ struct thread_info { __u32 cpu; /* current CPU */ __s32 preempt_count; /* 0 => preemptable,< 0 => BUG*/ mm_segment_t addr_limit; /* thread address space */ - struct restart_block restart_block; struct cpu_context cpu_context; }; @@ -87,9 +86,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 8955a3829cf..0245c27fa72 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -89,7 +89,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) int rval; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 99eea59604e..75a8c55d3dc 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -34,7 +34,6 @@ struct thread_info { * 0x7fffffff for user-thead * 0xffffffff for kernel-thread */ - struct restart_block restart_block; struct pt_regs *regs; }; @@ -49,9 +48,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index cfcb876cae6..106e741aa7e 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -67,6 +67,7 @@ #define MADV_SEQUENTIAL 2 /* expect sequential page references */ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_DONTNEED 4 /* don't need these pages */ +#define MADV_FREE 5 /* free pages only if memory pressure */ /* common parameters: try to keep these consistent across architectures */ #define MADV_REMOVE 9 /* remove these pages & resources */ diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index b1d84bd4efb..3b2dfdb4865 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -98,7 +98,6 @@ void output_thread_info_defines(void) OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE_COUNT, thread_info, preempt_count); OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit); - OFFSET(TI_RESTART_BLOCK, thread_info, restart_block); OFFSET(TI_REGS, thread_info, regs); DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 545bf11bd2e..6a28c792d86 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -243,7 +243,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) int i; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; err |= __get_user(regs->cp0_epc, &sc->sc_pc); diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index d69179c0d49..19a7705f2a0 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -220,7 +220,7 @@ static int restore_sigcontext32(struct pt_regs *regs, int i; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; err |= __get_user(regs->cp0_epc, &sc->sc_pc); err |= __get_user(regs->hi, &sc->sc_mdhi); diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h index bf280eaccd3..c1c374f0ec1 100644 --- a/arch/mn10300/include/asm/thread_info.h +++ b/arch/mn10300/include/asm/thread_info.h @@ -50,7 +50,6 @@ struct thread_info { 0-0xBFFFFFFF for user-thead 0-0xFFFFFFFF for kernel-thread */ - struct restart_block restart_block; __u8 supervisor_stack[0]; }; @@ -80,9 +79,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c index 47b3bb0c04f..d780670cbaf 100644 --- a/arch/mn10300/kernel/asm-offsets.c +++ b/arch/mn10300/kernel/asm-offsets.c @@ -28,7 +28,6 @@ void foo(void) OFFSET(TI_cpu, thread_info, cpu); OFFSET(TI_preempt_count, thread_info, preempt_count); OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_restart_block, thread_info, restart_block); BLANK(); OFFSET(REG_D0, pt_regs, d0); diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index a6c0858592c..8609845f12c 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -40,7 +40,7 @@ static int restore_sigcontext(struct pt_regs *regs, unsigned int err = 0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (is_using_fpu(current)) fpu_kill_state(current); diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h index d797acc901e..875f0845a70 100644 --- a/arch/openrisc/include/asm/thread_info.h +++ b/arch/openrisc/include/asm/thread_info.h @@ -57,7 +57,6 @@ struct thread_info { 0-0x7FFFFFFF for user-thead 0-0xFFFFFFFF for kernel-thread */ - struct restart_block restart_block; __u8 supervisor_stack[0]; /* saved context data */ @@ -79,9 +78,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = 1, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ .ksp = 0, \ } diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 7d1b8235bf9..4112175bf80 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -46,7 +46,7 @@ static int restore_sigcontext(struct pt_regs *regs, int err = 0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Restore the regs from &sc->regs. diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index a8461183554..fb13e386556 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -14,7 +14,6 @@ struct thread_info { mm_segment_t addr_limit; /* user-level address space limit */ __u32 cpu; /* current CPU */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ - struct restart_block restart_block; }; #define INIT_THREAD_INFO(tsk) \ @@ -25,9 +24,6 @@ struct thread_info { .cpu = 0, \ .addr_limit = KERNEL_DS, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall \ - } \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 294d251ca7b..6cb8db76fd4 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -40,6 +40,7 @@ #define MADV_SPACEAVAIL 5 /* insure that resources are reserved */ #define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */ #define MADV_VPS_INHERIT 7 /* Inherit parents page size */ +#define MADV_FREE 8 /* free pages only if memory pressure */ /* common/generic parameters */ #define MADV_REMOVE 9 /* remove these pages & resources */ diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 012d4fa63d9..9b910a0251b 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -99,7 +99,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) sigframe_size = PARISC_RT_SIGFRAME_SIZE32; #endif - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* Unwind the user stack to get the rt_sigframe structure. */ frame = (struct rt_sigframe __user *) diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index b9dcc936e2d..ddf4e29fbd3 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -494,9 +494,11 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd) #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index ebc4f165690..06596aa4359 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -43,7 +43,6 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - struct restart_block restart_block; unsigned long local_flags; /* private flags for thread */ /* low level flags - has atomic operations done on it */ @@ -59,9 +58,6 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ .flags = 0, \ } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index b171001698f..d3a831ac0f9 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1231,7 +1231,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, int tm_restore = 0; #endif /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; rt_sf = (struct rt_sigframe __user *) (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); @@ -1504,7 +1504,7 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, #endif /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); sc = &sf->sctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 2cb0c94cafa..c7c24d2e2bd 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -666,7 +666,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, #endif /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (!access_ok(VERIFY_READ, uc, sizeof(*uc))) goto badframe; diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 4d62fd5b56e..ef1df718642 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -39,7 +39,6 @@ struct thread_info { unsigned long sys_call_table; /* System call table address */ unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - struct restart_block restart_block; unsigned int system_call; __u64 user_timer; __u64 system_timer; @@ -56,9 +55,6 @@ struct thread_info { .flags = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 34d5fa7b01b..bc1df12dd4f 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -209,7 +209,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) int i; /* Alwys make any pending restarted system call return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs))) return -EFAULT; diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 6a2ac257d98..b3ae6f70c6d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -162,7 +162,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) _sigregs user_sregs; /* Alwys make any pending restarted system call return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs))) return -EFAULT; diff --git a/arch/score/include/asm/thread_info.h b/arch/score/include/asm/thread_info.h index 656b7ada932..33864fa2a8d 100644 --- a/arch/score/include/asm/thread_info.h +++ b/arch/score/include/asm/thread_info.h @@ -42,7 +42,6 @@ struct thread_info { * 0-0xFFFFFFFF for kernel-thread */ mm_segment_t addr_limit; - struct restart_block restart_block; struct pt_regs *regs; }; @@ -58,9 +57,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = 1, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/score/kernel/asm-offsets.c b/arch/score/kernel/asm-offsets.c index 57788f44c6f..b4d5214a7a7 100644 --- a/arch/score/kernel/asm-offsets.c +++ b/arch/score/kernel/asm-offsets.c @@ -106,7 +106,6 @@ void output_thread_info_defines(void) OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE_COUNT, thread_info, preempt_count); OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit); - OFFSET(TI_RESTART_BLOCK, thread_info, restart_block); OFFSET(TI_REGS, thread_info, regs); DEFINE(KERNEL_STACK_SIZE, THREAD_SIZE); DEFINE(KERNEL_STACK_MASK, THREAD_MASK); diff --git a/arch/score/kernel/signal.c b/arch/score/kernel/signal.c index 1651807774a..e381c8c4ff6 100644 --- a/arch/score/kernel/signal.c +++ b/arch/score/kernel/signal.c @@ -141,7 +141,7 @@ score_rt_sigreturn(struct pt_regs *regs) int sig; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; frame = (struct rt_sigframe __user *) regs->regs[0]; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h index ad27ffa65e2..657c0391962 100644 --- a/arch/sh/include/asm/thread_info.h +++ b/arch/sh/include/asm/thread_info.h @@ -33,7 +33,6 @@ struct thread_info { __u32 cpu; int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; /* thread address space */ - struct restart_block restart_block; unsigned long previous_sp; /* sp of previous stack in case of nested IRQ stacks */ __u8 supervisor_stack[0]; @@ -63,9 +62,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c index 08a2be775b6..542225fedb1 100644 --- a/arch/sh/kernel/asm-offsets.c +++ b/arch/sh/kernel/asm-offsets.c @@ -25,7 +25,6 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_RESTART_BLOCK,offsetof(struct thread_info, restart_block)); DEFINE(TI_SIZE, sizeof(struct thread_info)); #ifdef CONFIG_HIBERNATION diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 2f002b24fb9..0b34f2a704f 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -156,7 +156,7 @@ asmlinkage int sys_sigreturn(void) int r0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -186,7 +186,7 @@ asmlinkage int sys_rt_sigreturn(void) int r0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index 897abe7b871..71993c6a7d9 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -260,7 +260,7 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3, long long ret; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -294,7 +294,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3, long long ret; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 1ff9e786416..e890921d5a7 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -731,6 +731,15 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return __pmd(pte_val(pte)); } +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + pte_t pte = __pte(pmd_val(pmd)); + + pte = pte_mkclean(pte); + + return __pmd(pte_val(pte)); +} + static inline pmd_t pmd_mkyoung(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h index 025c98446b1..fd7bd0a440c 100644 --- a/arch/sparc/include/asm/thread_info_32.h +++ b/arch/sparc/include/asm/thread_info_32.h @@ -47,8 +47,6 @@ struct thread_info { struct reg_window32 reg_window[NSWINS]; /* align for ldd! */ unsigned long rwbuf_stkptrs[NSWINS]; unsigned long w_saved; - - struct restart_block restart_block; }; /* @@ -62,9 +60,6 @@ struct thread_info { .flags = 0, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) @@ -103,7 +98,6 @@ register struct thread_info *current_thread_info_reg asm("g6"); #define TI_REG_WINDOW 0x30 #define TI_RWIN_SPTRS 0x230 #define TI_W_SAVED 0x250 -/* #define TI_RESTART_BLOCK 0x25n */ /* Nobody cares */ /* * thread information flag bit numbers diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 798f0279a4b..ff455164732 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -58,8 +58,6 @@ struct thread_info { unsigned long gsr[7]; unsigned long xfsr[7]; - struct restart_block restart_block; - struct pt_regs *kern_una_regs; unsigned int kern_una_insn; @@ -92,10 +90,9 @@ struct thread_info { #define TI_RWIN_SPTRS 0x000003c8 #define TI_GSR 0x00000400 #define TI_XFSR 0x00000438 -#define TI_RESTART_BLOCK 0x00000470 -#define TI_KUNA_REGS 0x000004a0 -#define TI_KUNA_INSN 0x000004a8 -#define TI_FPREGS 0x000004c0 +#define TI_KUNA_REGS 0x00000470 +#define TI_KUNA_INSN 0x00000478 +#define TI_FPREGS 0x00000480 /* We embed this in the uppermost byte of thread_info->flags */ #define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */ @@ -124,9 +121,6 @@ struct thread_info { .current_ds = ASI_P, \ .exec_domain = &default_exec_domain, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 62deba7be1a..4eed773a773 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -150,7 +150,7 @@ void do_sigreturn32(struct pt_regs *regs) int err, i; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; synchronize_user_stack(); @@ -235,7 +235,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) int err, i; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; synchronize_user_stack(); regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 9ee72fc8e0e..52aa5e4ce5e 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -70,7 +70,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) int err; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; synchronize_user_stack(); diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 1a699986803..d88beff47ba 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -254,7 +254,7 @@ void do_rt_sigreturn(struct pt_regs *regs) int err; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; synchronize_user_stack (); sf = (struct rt_signal_frame __user *) diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 981a769b955..a27651e866e 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2730,8 +2730,6 @@ void __init trap_init(void) TI_NEW_CHILD != offsetof(struct thread_info, new_child) || TI_CURRENT_DS != offsetof(struct thread_info, current_ds) || - TI_RESTART_BLOCK != offsetof(struct thread_info, - restart_block) || TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) || TI_KUNA_INSN != offsetof(struct thread_info, diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 48e4fd0f38e..96c14c1430d 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -36,7 +36,6 @@ struct thread_info { mm_segment_t addr_limit; /* thread address space (KERNEL_DS or USER_DS) */ - struct restart_block restart_block; struct single_step_state *step_state; /* single step state (if non-zero) */ int align_ctl; /* controls unaligned access */ @@ -57,9 +56,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ .step_state = NULL, \ .align_ctl = 0, \ } diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index bb0a9ce7ae2..8a524e332c1 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -48,7 +48,7 @@ int restore_sigcontext(struct pt_regs *regs, int err; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Enforce that sigcontext is like pt_regs, and doesn't mess diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h index 1c5b2a83046..e04114c4fcd 100644 --- a/arch/um/include/asm/thread_info.h +++ b/arch/um/include/asm/thread_info.h @@ -22,7 +22,6 @@ struct thread_info { mm_segment_t addr_limit; /* thread address space: 0-0xBFFFFFFF for user 0-0xFFFFFFFF for kernel */ - struct restart_block restart_block; struct thread_info *real_thread; /* Points to non-IRQ stack */ }; @@ -34,9 +33,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ .real_thread = NULL, \ } diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h index af36d8eabdf..63e2839dfeb 100644 --- a/arch/unicore32/include/asm/thread_info.h +++ b/arch/unicore32/include/asm/thread_info.h @@ -79,7 +79,6 @@ struct thread_info { #ifdef CONFIG_UNICORE_FPU_F64 struct fp_state fpstate __attribute__((aligned(8))); #endif - struct restart_block restart_block; }; #define INIT_THREAD_INFO(tsk) \ @@ -89,9 +88,6 @@ struct thread_info { .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c index 7c8fb7018dc..d329f85766c 100644 --- a/arch/unicore32/kernel/signal.c +++ b/arch/unicore32/kernel/signal.c @@ -105,7 +105,7 @@ asmlinkage int __sys_rt_sigreturn(struct pt_regs *regs) struct rt_sigframe __user *frame; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* * Since we stacked the signal on a 64-bit boundary, diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index f9e181aaba9..d0165c9a293 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -169,7 +169,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, u32 tmp; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; get_user_try { /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e8a5454acc9..5e105ca5de5 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -278,6 +278,11 @@ static inline pmd_t pmd_mkold(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_ACCESSED); } +static inline pmd_t pmd_mkclean(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_DIRTY); +} + static inline pmd_t pmd_wrprotect(pmd_t pmd) { return pmd_clear_flags(pmd, _PAGE_RW); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 547e344a6dc..8550f2427d5 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -31,7 +31,6 @@ struct thread_info { __u32 cpu; /* current CPU */ int saved_preempt_count; mm_segment_t addr_limit; - struct restart_block restart_block; void __user *sysenter_return; unsigned int sig_on_uaccess_error:1; unsigned int uaccess_err:1; /* uaccess failed */ @@ -45,9 +44,6 @@ struct thread_info { .cpu = 0, \ .saved_preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 485981059a4..47cc835cec5 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -334,6 +334,7 @@ void arch_crash_save_vmcoreinfo(void) #endif vmcoreinfo_append_str("KERNELOFFSET=%lx\n", (unsigned long)&_text - __START_KERNEL); + VMCOREINFO_PHYS_BASE(phys_base); } /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ed37a768d0f..0a62df4abcf 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -69,7 +69,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned int err = 0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; get_user_try { diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 79d824551c1..0c8c32bfd79 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -157,7 +157,7 @@ static int copy_sc_from_user(struct pt_regs *regs, int err, pid; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; err = copy_from_user(&sc, from, sizeof(sc)); if (err) diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h index 470153e8547..a9b5d3ba196 100644 --- a/arch/xtensa/include/asm/thread_info.h +++ b/arch/xtensa/include/asm/thread_info.h @@ -51,7 +51,6 @@ struct thread_info { __s32 preempt_count; /* 0 => preemptable,< 0 => BUG*/ mm_segment_t addr_limit; /* thread address space */ - struct restart_block restart_block; unsigned long cpenable; @@ -72,7 +71,6 @@ struct thread_info { #define TI_CPU 0x00000010 #define TI_PRE_COUNT 0x00000014 #define TI_ADDR_LIMIT 0x00000018 -#define TI_RESTART_BLOCK 0x000001C #endif @@ -90,9 +88,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 201aec0e044..1b19f25bc56 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -80,6 +80,7 @@ #define MADV_SEQUENTIAL 2 /* expect sequential page references */ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_DONTNEED 4 /* don't need these pages */ +#define MADV_FREE 5 /* free pages only if memory pressure */ /* common parameters: try to keep these consistent across architectures */ #define MADV_REMOVE 9 /* remove these pages & resources */ diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 4612321c73c..3d733ba16f2 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -245,7 +245,7 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3, int ret; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (regs->depc > 64) panic("rt_sigreturn in double exception!\n"); diff --git a/block/genhd.c b/block/genhd.c index 0a536dc05f3..64600e911aa 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -850,7 +850,7 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || (!disk_max_parts(sgp) && + if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) && (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) diff --git a/drivers/input/Kconfig b/drivers/input/Kconfig index a11ff74a512..9eac8de9e8b 100644 --- a/drivers/input/Kconfig +++ b/drivers/input/Kconfig @@ -178,6 +178,15 @@ comment "Input Device Drivers" source "drivers/input/keyboard/Kconfig" +config INPUT_LEDS + bool "LED Support" + depends on LEDS_CLASS = INPUT || LEDS_CLASS = y + select LEDS_TRIGGERS + default y + help + This option enables support for LEDs on keyboards managed + by the input layer. + source "drivers/input/mouse/Kconfig" source "drivers/input/joystick/Kconfig" diff --git a/drivers/input/Makefile b/drivers/input/Makefile index 5ca3f631497..2ab5f3336da 100644 --- a/drivers/input/Makefile +++ b/drivers/input/Makefile @@ -6,6 +6,9 @@ obj-$(CONFIG_INPUT) += input-core.o input-core-y := input.o input-compat.o input-mt.o ff-core.o +ifeq ($(CONFIG_INPUT_LEDS),y) +input-core-y += leds.o +endif obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o diff --git a/drivers/input/input.c b/drivers/input/input.c index 213e3a1903e..a1e609a6522 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -711,6 +711,9 @@ static void input_disconnect_device(struct input_dev *dev) handle->open = 0; spin_unlock_irq(&dev->event_lock); + + if (is_event_supported(EV_LED, dev->evbit, EV_MAX)) + input_led_disconnect(dev); } /** @@ -2141,6 +2144,9 @@ int input_register_device(struct input_dev *dev) list_add_tail(&dev->node, &input_dev_list); + if (is_event_supported(EV_LED, dev->evbit, EV_MAX)) + input_led_connect(dev); + list_for_each_entry(handler, &input_handler_list, node) input_attach_handler(dev, handler); @@ -2426,6 +2432,8 @@ static int __init input_init(void) goto fail2; } + input_led_init(); + return 0; fail2: input_proc_exit(); @@ -2435,6 +2443,7 @@ static int __init input_init(void) static void __exit input_exit(void) { + input_led_exit(); input_proc_exit(); unregister_chrdev_region(MKDEV(INPUT_MAJOR, 0), INPUT_MAX_CHAR_DEVICES); diff --git a/drivers/input/leds.c b/drivers/input/leds.c new file mode 100644 index 00000000000..193ee2425db --- /dev/null +++ b/drivers/input/leds.c @@ -0,0 +1,272 @@ +/* + * LED support for the input layer + * + * Copyright 2010-2014 Samuel Thibault <samuel.thibault@ens-lyon.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/leds.h> +#include <linux/input.h> + +/* + * Keyboard LEDs are propagated by default like the following example: + * + * VT keyboard numlock trigger + * -> vt::numl VT LED + * -> vt-numl VT trigger + * -> per-device inputX::numl LED + * + * Userland can however choose the trigger for the vt::numl LED, or + * independently choose the trigger for any inputx::numl LED. + * + * + * VT LED classes and triggers are registered on-demand according to + * existing LED devices + */ + +/* Handler for VT LEDs, just triggers the corresponding VT trigger. */ +static void vt_led_set(struct led_classdev *cdev, + enum led_brightness brightness); +static struct led_classdev vt_leds[LED_CNT] = { +#define DEFINE_INPUT_LED(vt_led, nam, deftrig) \ + [vt_led] = { \ + .name = "vt::"nam, \ + .max_brightness = 1, \ + .brightness_set = vt_led_set, \ + .default_trigger = deftrig, \ + } +/* Default triggers for the VT LEDs just correspond to the legacy + * usage. */ + DEFINE_INPUT_LED(LED_NUML, "numl", "kbd-numlock"), + DEFINE_INPUT_LED(LED_CAPSL, "capsl", "kbd-capslock"), + DEFINE_INPUT_LED(LED_SCROLLL, "scrolll", "kbd-scrollock"), + DEFINE_INPUT_LED(LED_COMPOSE, "compose", NULL), + DEFINE_INPUT_LED(LED_KANA, "kana", "kbd-kanalock"), + DEFINE_INPUT_LED(LED_SLEEP, "sleep", NULL), + DEFINE_INPUT_LED(LED_SUSPEND, "suspend", NULL), + DEFINE_INPUT_LED(LED_MUTE, "mute", NULL), + DEFINE_INPUT_LED(LED_MISC, "misc", NULL), + DEFINE_INPUT_LED(LED_MAIL, "mail", NULL), + DEFINE_INPUT_LED(LED_CHARGING, "charging", NULL), +}; +static const char *const vt_led_names[LED_CNT] = { + [LED_NUML] = "numl", + [LED_CAPSL] = "capsl", + [LED_SCROLLL] = "scrolll", + [LED_COMPOSE] = "compose", + [LED_KANA] = "kana", + [LED_SLEEP] = "sleep", + [LED_SUSPEND] = "suspend", + [LED_MUTE] = "mute", + [LED_MISC] = "misc", + [LED_MAIL] = "mail", + [LED_CHARGING] = "charging", +}; +/* Handler for hotplug initialization */ +static void vt_led_trigger_activate(struct led_classdev *cdev); +/* VT triggers */ +static struct led_trigger vt_led_triggers[LED_CNT] = { +#define DEFINE_INPUT_LED_TRIGGER(vt_led, nam) \ + [vt_led] = { \ + .name = "vt-"nam, \ + .activate = vt_led_trigger_activate, \ + } + DEFINE_INPUT_LED_TRIGGER(LED_NUML, "numl"), + DEFINE_INPUT_LED_TRIGGER(LED_CAPSL, "capsl"), + DEFINE_INPUT_LED_TRIGGER(LED_SCROLLL, "scrolll"), + DEFINE_INPUT_LED_TRIGGER(LED_COMPOSE, "compose"), + DEFINE_INPUT_LED_TRIGGER(LED_KANA, "kana"), + DEFINE_INPUT_LED_TRIGGER(LED_SLEEP, "sleep"), + DEFINE_INPUT_LED_TRIGGER(LED_SUSPEND, "suspend"), + DEFINE_INPUT_LED_TRIGGER(LED_MUTE, "mute"), + DEFINE_INPUT_LED_TRIGGER(LED_MISC, "misc"), + DEFINE_INPUT_LED_TRIGGER(LED_MAIL, "mail"), + DEFINE_INPUT_LED_TRIGGER(LED_CHARGING, "charging"), +}; + +/* Lock for registration coherency */ +static DEFINE_MUTEX(vt_led_registered_lock); + +/* Which VT LED classes and triggers are registered */ +static unsigned long vt_led_registered[BITS_TO_LONGS(LED_CNT)]; + +/* Number of input devices having each LED */ +static int vt_led_references[LED_CNT]; + +static int vt_led_state[LED_CNT]; +static struct work_struct vt_led_work[LED_CNT]; + +static void vt_led_cb(struct work_struct *work) +{ + int led = work - vt_led_work; + + led_trigger_event(&vt_led_triggers[led], vt_led_state[led]); +} + +/* VT LED state change, tell the VT trigger. */ +static void vt_led_set(struct led_classdev *cdev, + enum led_brightness brightness) +{ + int led = cdev - vt_leds; + + vt_led_state[led] = !!brightness; + schedule_work(&vt_led_work[led]); +} + +/* LED state change for some keyboard, notify that keyboard. */ +static void perdevice_input_led_set(struct led_classdev *cdev, + enum led_brightness brightness) +{ + struct input_dev *dev; + struct led_classdev *leds; + int led; + + dev = cdev->dev->platform_data; + if (!dev) + /* Still initializing */ + return; + leds = dev->leds; + led = cdev - leds; + + input_event(dev, EV_LED, led, !!brightness); + input_event(dev, EV_SYN, SYN_REPORT, 0); +} + +/* Keyboard hotplug, initialize its LED status */ +static void vt_led_trigger_activate(struct led_classdev *cdev) +{ + struct led_trigger *trigger = cdev->trigger; + int led = trigger - vt_led_triggers; + + if (cdev->brightness_set) + cdev->brightness_set(cdev, vt_leds[led].brightness); +} + +/* Free led stuff from input device, used at abortion and disconnection. */ +static void input_led_delete(struct input_dev *dev) +{ + if (dev) { + struct led_classdev *leds = dev->leds; + if (leds) { + int i; + for (i = 0; i < LED_CNT; i++) + kfree(leds[i].name); + kfree(leds); + dev->leds = NULL; + } + } +} + +/* A new input device with potential LEDs to connect. */ +int input_led_connect(struct input_dev *dev) +{ + int i, error = 0; + struct led_classdev *leds; + + dev->leds = leds = kcalloc(LED_CNT, sizeof(*leds), GFP_KERNEL); + if (!dev->leds) + return -ENOMEM; + + /* lazily register missing VT LEDs */ + mutex_lock(&vt_led_registered_lock); + for (i = 0; i < LED_CNT; i++) + if (vt_leds[i].name && test_bit(i, dev->ledbit)) { + if (!vt_led_references[i]) { + led_trigger_register(&vt_led_triggers[i]); + /* This keyboard is first to have led i, + * try to register it */ + if (!led_classdev_register(NULL, &vt_leds[i])) + vt_led_references[i] = 1; + else + led_trigger_unregister(&vt_led_triggers[i]); + } else + vt_led_references[i]++; + } + mutex_unlock(&vt_led_registered_lock); + + /* and register this device's LEDs */ + for (i = 0; i < LED_CNT; i++) + if (vt_leds[i].name && test_bit(i, dev->ledbit)) { + leds[i].name = kasprintf(GFP_KERNEL, "%s::%s", + dev_name(&dev->dev), + vt_led_names[i]); + if (!leds[i].name) { + error = -ENOMEM; + goto err; + } + leds[i].max_brightness = 1; + leds[i].brightness_set = perdevice_input_led_set; + leds[i].default_trigger = vt_led_triggers[i].name; + } + + /* No issue so far, we can register for real. */ + for (i = 0; i < LED_CNT; i++) + if (leds[i].name) { + led_classdev_register(&dev->dev, &leds[i]); + leds[i].dev->platform_data = dev; + perdevice_input_led_set(&leds[i], + vt_leds[i].brightness); + } + + return 0; + +err: + input_led_delete(dev); + return error; +} + +/* + * Disconnected input device. Clean it, and deregister now-useless VT LEDs + * and triggers. + */ +void input_led_disconnect(struct input_dev *dev) +{ + int i; + struct led_classdev *leds = dev->leds; + + for (i = 0; i < LED_CNT; i++) + if (leds[i].name) + led_classdev_unregister(&leds[i]); + + input_led_delete(dev); + + mutex_lock(&vt_led_registered_lock); + for (i = 0; i < LED_CNT; i++) { + if (!vt_leds[i].name || !test_bit(i, dev->ledbit)) + continue; + + vt_led_references[i]--; + if (vt_led_references[i]) { + /* Still some devices needing it */ + continue; + } + + led_classdev_unregister(&vt_leds[i]); + led_trigger_unregister(&vt_led_triggers[i]); + clear_bit(i, vt_led_registered); + } + mutex_unlock(&vt_led_registered_lock); +} + +void __init input_led_init(void) +{ + unsigned i; + + for (i = 0; i < LED_CNT; i++) + INIT_WORK(&vt_led_work[i], vt_led_cb); +} + +void __exit input_led_exit(void) +{ + unsigned i; + + for (i = 0; i < LED_CNT; i++) + cancel_work_sync(&vt_led_work[i]); +} diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index a6c3d2f153f..59f4228cc9c 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -11,9 +11,6 @@ menuconfig NEW_LEDS Say Y to enable Linux LED support. This allows control of supported LEDs from both userspace and optionally, by kernel events (triggers). - This is not related to standard keyboard LEDs which are controlled - via the input system. - if NEW_LEDS config LEDS_CLASS diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 54be83d3efd..374480cb69b 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -343,7 +343,7 @@ void st_int_recv(void *disc_data, /* Unknow packet? */ default: type = *ptr; - if (st_gdata->list[type] == NULL) { + if (type >= ST_MAX_CHANNELS || st_gdata->list[type] == NULL) { pr_err("chip/interface misbehavior dropping" " frame starting with 0x%02x", type); goto done; diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c index 6e1fcfb5d7e..b94b6225576 100644 --- a/drivers/rtc/rtc-isl12057.c +++ b/drivers/rtc/rtc-isl12057.c @@ -1,5 +1,5 @@ /* - * rtc-isl12057 - Driver for Intersil ISL12057 I2C Real Time Clock + * rtc-isl12057 - Driver for Intersil ISL12057 I2C Real Time Clock / Alarm * * Copyright (C) 2013, Arnaud EBALARD <arno@natisbad.org> * @@ -79,8 +79,10 @@ #define ISL12057_MEM_MAP_LEN 0x10 struct isl12057_rtc_data { + struct rtc_device *rtc; struct regmap *regmap; struct mutex lock; + int irq; }; static void isl12057_rtc_regs_to_tm(struct rtc_time *tm, u8 *regs) @@ -160,14 +162,47 @@ static int isl12057_i2c_validate_chip(struct regmap *regmap) return 0; } -static int isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm) +static int _isl12057_rtc_clear_alarm(struct device *dev) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(data->regmap, ISL12057_REG_SR, + ISL12057_REG_SR_A1F, 0); + if (ret) + dev_err(dev, "%s: clearing alarm failed (%d)\n", __func__, ret); + + return ret; +} + +static int _isl12057_rtc_update_alarm(struct device *dev, int enable) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(data->regmap, ISL12057_REG_INT, + ISL12057_REG_INT_A1IE, + enable ? ISL12057_REG_INT_A1IE : 0); + if (ret) + dev_err(dev, "%s: changing alarm interrupt flag failed (%d)\n", + __func__, ret); + + return ret; +} + +/* + * Note: as we only read from device and do not perform any update, there is + * no need for an equivalent function which would try and get driver's main + * lock. Here, it is safe for everyone if we just use regmap internal lock + * on the device when reading. + */ +static int _isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm) { struct isl12057_rtc_data *data = dev_get_drvdata(dev); u8 regs[ISL12057_RTC_SEC_LEN]; unsigned int sr; int ret; - mutex_lock(&data->lock); ret = regmap_read(data->regmap, ISL12057_REG_SR, &sr); if (ret) { dev_err(dev, "%s: unable to read oscillator status flag (%d)\n", @@ -187,8 +222,6 @@ static int isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm) __func__, ret); out: - mutex_unlock(&data->lock); - if (ret) return ret; @@ -197,6 +230,168 @@ out: return rtc_valid_tm(tm); } +static int isl12057_rtc_update_alarm(struct device *dev, int enable) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + int ret; + + mutex_lock(&data->lock); + ret = _isl12057_rtc_update_alarm(dev, enable); + mutex_unlock(&data->lock); + + return ret; +} + +static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + struct rtc_time rtc_tm, *alarm_tm = &alarm->time; + unsigned long rtc_secs, alarm_secs; + u8 regs[ISL12057_A1_SEC_LEN]; + unsigned int ir; + int ret; + + mutex_lock(&data->lock); + ret = regmap_bulk_read(data->regmap, ISL12057_REG_A1_SC, regs, + ISL12057_A1_SEC_LEN); + if (ret) { + dev_err(dev, "%s: reading alarm section failed (%d)\n", + __func__, ret); + goto err_unlock; + } + + alarm_tm->tm_sec = bcd2bin(regs[0] & 0x7f); + alarm_tm->tm_min = bcd2bin(regs[1] & 0x7f); + alarm_tm->tm_hour = bcd2bin(regs[2] & 0x3f); + alarm_tm->tm_mday = bcd2bin(regs[3] & 0x3f); + alarm_tm->tm_wday = -1; + + /* + * The alarm section does not store year/month. We use the ones in rtc + * section as a basis and increment month and then year if needed to get + * alarm after current time. + */ + ret = _isl12057_rtc_read_time(dev, &rtc_tm); + if (ret) + goto err_unlock; + + alarm_tm->tm_year = rtc_tm.tm_year; + alarm_tm->tm_mon = rtc_tm.tm_mon; + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err_unlock; + + ret = rtc_tm_to_time(alarm_tm, &alarm_secs); + if (ret) + goto err_unlock; + + if (alarm_secs < rtc_secs) { + if (alarm_tm->tm_mon == 11) { + alarm_tm->tm_mon = 0; + alarm_tm->tm_year += 1; + } else { + alarm_tm->tm_mon += 1; + } + } + + ret = regmap_read(data->regmap, ISL12057_REG_INT, &ir); + if (ret) { + dev_err(dev, "%s: reading alarm interrupt flag failed (%d)\n", + __func__, ret); + goto err_unlock; + } + + alarm->enabled = !!(ir & ISL12057_REG_INT_A1IE); + +err_unlock: + mutex_unlock(&data->lock); + + return ret; +} + +static int isl12057_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + struct rtc_time *alarm_tm = &alarm->time; + unsigned long rtc_secs, alarm_secs; + u8 regs[ISL12057_A1_SEC_LEN]; + struct rtc_time rtc_tm; + int ret, enable = 1; + + mutex_lock(&data->lock); + ret = _isl12057_rtc_read_time(dev, &rtc_tm); + if (ret) + goto err_unlock; + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err_unlock; + + ret = rtc_tm_to_time(alarm_tm, &alarm_secs); + if (ret) + goto err_unlock; + + /* If alarm time is before current time, disable the alarm */ + if (!alarm->enabled || alarm_secs <= rtc_secs) { + enable = 0; + } else { + /* + * Chip only support alarms up to one month in the future. Let's + * return an error if we get something after that limit. + * Comparison is done by incrementing rtc_tm month field by one + * and checking alarm value is still below. + */ + if (rtc_tm.tm_mon == 11) { /* handle year wrapping */ + rtc_tm.tm_mon = 0; + rtc_tm.tm_year += 1; + } else { + rtc_tm.tm_mon += 1; + } + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err_unlock; + + if (alarm_secs > rtc_secs) { + dev_err(dev, "%s: max for alarm is one month (%d)\n", + __func__, ret); + ret = -EINVAL; + goto err_unlock; + } + } + + /* Disable the alarm before modifying it */ + ret = _isl12057_rtc_update_alarm(dev, 0); + if (ret < 0) { + dev_err(dev, "%s: unable to disable the alarm (%d)\n", + __func__, ret); + goto err_unlock; + } + + /* Program alarm registers */ + regs[0] = bin2bcd(alarm_tm->tm_sec) & 0x7f; + regs[1] = bin2bcd(alarm_tm->tm_min) & 0x7f; + regs[2] = bin2bcd(alarm_tm->tm_hour) & 0x3f; + regs[3] = bin2bcd(alarm_tm->tm_mday) & 0x3f; + + ret = regmap_bulk_write(data->regmap, ISL12057_REG_A1_SC, regs, + ISL12057_A1_SEC_LEN); + if (ret < 0) { + dev_err(dev, "%s: writing alarm section failed (%d)\n", + __func__, ret); + goto err_unlock; + } + + /* Enable or disable alarm */ + ret = _isl12057_rtc_update_alarm(dev, enable); + +err_unlock: + mutex_unlock(&data->lock); + + return ret; +} + static int isl12057_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct isl12057_rtc_data *data = dev_get_drvdata(dev); @@ -262,9 +457,48 @@ static int isl12057_check_rtc_status(struct device *dev, struct regmap *regmap) return 0; } +static int isl12057_rtc_alarm_irq_enable(struct device *dev, + unsigned int enable) +{ + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev); + int ret = -ENOTTY; + + if (rtc_data->irq) + ret = isl12057_rtc_update_alarm(dev, enable); + + return ret; +} + +static irqreturn_t isl12057_rtc_interrupt(int irq, void *data) +{ + struct i2c_client *client = data; + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(&client->dev); + struct rtc_device *rtc = rtc_data->rtc; + int ret, handled = IRQ_NONE; + unsigned int sr; + + ret = regmap_read(rtc_data->regmap, ISL12057_REG_SR, &sr); + if (!ret && (sr & ISL12057_REG_SR_A1F)) { + dev_dbg(&client->dev, "RTC alarm!\n"); + + rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF); + + /* Acknowledge and disable the alarm */ + _isl12057_rtc_clear_alarm(&client->dev); + _isl12057_rtc_update_alarm(&client->dev, 0); + + handled = IRQ_HANDLED; + } + + return handled; +} + static const struct rtc_class_ops rtc_ops = { - .read_time = isl12057_rtc_read_time, + .read_time = _isl12057_rtc_read_time, .set_time = isl12057_rtc_set_time, + .read_alarm = isl12057_rtc_read_alarm, + .set_alarm = isl12057_rtc_set_alarm, + .alarm_irq_enable = isl12057_rtc_alarm_irq_enable, }; static struct regmap_config isl12057_rtc_regmap_config = { @@ -277,7 +511,6 @@ static int isl12057_probe(struct i2c_client *client, { struct device *dev = &client->dev; struct isl12057_rtc_data *data; - struct rtc_device *rtc; struct regmap *regmap; int ret; @@ -310,10 +543,79 @@ static int isl12057_probe(struct i2c_client *client, data->regmap = regmap; dev_set_drvdata(dev, data); - rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops, THIS_MODULE); - return PTR_ERR_OR_ZERO(rtc); + if (client->irq > 0) { + ret = devm_request_threaded_irq(dev, client->irq, NULL, + isl12057_rtc_interrupt, + IRQF_SHARED|IRQF_ONESHOT, + DRV_NAME, client); + if (!ret) + data->irq = client->irq; + else + dev_err(dev, "%s: irq %d unavailable (%d)\n", __func__, + client->irq, ret); + } + + /* + * This is needed to have 'wakealarm' sysfs entry available. One + * would expect the device to be marked as a wakeup source only + * when an IRQ pin of the RTC is routed to an interrupt line of the + * CPU. In practice, such an IRQ pin can be connected to a PMIC and + * this allows the device to be powered up when RTC alarm rings. + */ + device_init_wakeup(dev, true); + + data->rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops, + THIS_MODULE); + ret = PTR_ERR_OR_ZERO(data->rtc); + if (ret) { + dev_err(dev, "%s: unable to register RTC device (%d)\n", + __func__, ret); + goto err; + } + + /* We cannot support UIE mode if we do not have an IRQ line */ + if (!data->irq) + data->rtc->uie_unsupported = 1; + +err: + return ret; } +static int isl12057_remove(struct i2c_client *client) +{ + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(&client->dev); + + if (rtc_data->irq > 0) + device_init_wakeup(&client->dev, false); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int isl12057_rtc_suspend(struct device *dev) +{ + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + return enable_irq_wake(rtc_data->irq); + + return 0; +} + +static int isl12057_rtc_resume(struct device *dev) +{ + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + return disable_irq_wake(rtc_data->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(isl12057_rtc_pm_ops, isl12057_rtc_suspend, + isl12057_rtc_resume); + #ifdef CONFIG_OF static const struct of_device_id isl12057_dt_match[] = { { .compatible = "isl,isl12057" }, @@ -331,13 +633,15 @@ static struct i2c_driver isl12057_driver = { .driver = { .name = DRV_NAME, .owner = THIS_MODULE, + .pm = &isl12057_rtc_pm_ops, .of_match_table = of_match_ptr(isl12057_dt_match), }, .probe = isl12057_probe, + .remove = isl12057_remove, .id_table = isl12057_id, }; module_i2c_driver(isl12057_driver); MODULE_AUTHOR("Arnaud EBALARD <arno@natisbad.org>"); -MODULE_DESCRIPTION("Intersil ISL12057 RTC driver"); +MODULE_DESCRIPTION("Intersil ISL12057 RTC/Alarm driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index b24aa010f68..65cd80bf9ae 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -13,6 +13,10 @@ config VT bool "Virtual terminal" if EXPERT depends on !S390 && !UML select INPUT + select NEW_LEDS + select LEDS_CLASS + select LEDS_TRIGGERS + select INPUT_LEDS default y ---help--- If you say Y here, you will get support for terminal devices with diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 8a89f6e7715..acfb4c00e48 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -33,6 +33,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/leds.h> #include <linux/kbd_kern.h> #include <linux/kbd_diacr.h> @@ -130,6 +131,7 @@ static char rep; /* flag telling character repeat */ static int shift_state = 0; static unsigned char ledstate = 0xff; /* undefined */ +static unsigned char lockstate = 0xff; /* undefined */ static unsigned char ledioctl; /* @@ -961,6 +963,41 @@ static void k_brl(struct vc_data *vc, unsigned char value, char up_flag) } } +/* We route VT keyboard "leds" through triggers */ +static void kbd_ledstate_trigger_activate(struct led_classdev *cdev); + +static struct led_trigger ledtrig_ledstate[] = { +#define DEFINE_LEDSTATE_TRIGGER(kbd_led, nam) \ + [kbd_led] = { \ + .name = nam, \ + .activate = kbd_ledstate_trigger_activate, \ + } + DEFINE_LEDSTATE_TRIGGER(VC_SCROLLOCK, "kbd-scrollock"), + DEFINE_LEDSTATE_TRIGGER(VC_NUMLOCK, "kbd-numlock"), + DEFINE_LEDSTATE_TRIGGER(VC_CAPSLOCK, "kbd-capslock"), + DEFINE_LEDSTATE_TRIGGER(VC_KANALOCK, "kbd-kanalock"), +#undef DEFINE_LEDSTATE_TRIGGER +}; + +static void kbd_lockstate_trigger_activate(struct led_classdev *cdev); + +static struct led_trigger ledtrig_lockstate[] = { +#define DEFINE_LOCKSTATE_TRIGGER(kbd_led, nam) \ + [kbd_led] = { \ + .name = nam, \ + .activate = kbd_lockstate_trigger_activate, \ + } + DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLOCK, "kbd-shiftlock"), + DEFINE_LOCKSTATE_TRIGGER(VC_ALTGRLOCK, "kbd-altgrlock"), + DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLOCK, "kbd-ctrllock"), + DEFINE_LOCKSTATE_TRIGGER(VC_ALTLOCK, "kbd-altlock"), + DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTLLOCK, "kbd-shiftllock"), + DEFINE_LOCKSTATE_TRIGGER(VC_SHIFTRLOCK, "kbd-shiftrlock"), + DEFINE_LOCKSTATE_TRIGGER(VC_CTRLLLOCK, "kbd-ctrlllock"), + DEFINE_LOCKSTATE_TRIGGER(VC_CTRLRLOCK, "kbd-ctrlrlock"), +#undef DEFINE_LOCKSTATE_TRIGGER +}; + /* * The leds display either (i) the status of NumLock, CapsLock, ScrollLock, * or (ii) whatever pattern of lights people want to show using KDSETLED, @@ -995,18 +1032,25 @@ static inline unsigned char getleds(void) return kb->ledflagstate; } -static int kbd_update_leds_helper(struct input_handle *handle, void *data) +/* Called on trigger connection, to set initial state */ +static void kbd_ledstate_trigger_activate(struct led_classdev *cdev) { - unsigned char leds = *(unsigned char *)data; + struct led_trigger *trigger = cdev->trigger; + int led = trigger - ledtrig_ledstate; - if (test_bit(EV_LED, handle->dev->evbit)) { - input_inject_event(handle, EV_LED, LED_SCROLLL, !!(leds & 0x01)); - input_inject_event(handle, EV_LED, LED_NUML, !!(leds & 0x02)); - input_inject_event(handle, EV_LED, LED_CAPSL, !!(leds & 0x04)); - input_inject_event(handle, EV_SYN, SYN_REPORT, 0); - } + tasklet_disable(&keyboard_tasklet); + led_trigger_event(trigger, ledstate & (1 << led) ? LED_FULL : LED_OFF); + tasklet_enable(&keyboard_tasklet); +} - return 0; +static void kbd_lockstate_trigger_activate(struct led_classdev *cdev) +{ + struct led_trigger *trigger = cdev->trigger; + int led = trigger - ledtrig_lockstate; + + tasklet_disable(&keyboard_tasklet); + led_trigger_event(trigger, lockstate & (1 << led) ? LED_FULL : LED_OFF); + tasklet_enable(&keyboard_tasklet); } /** @@ -1095,16 +1139,29 @@ static void kbd_bh(unsigned long dummy) { unsigned char leds; unsigned long flags; - + int i; + spin_lock_irqsave(&led_lock, flags); leds = getleds(); spin_unlock_irqrestore(&led_lock, flags); if (leds != ledstate) { - input_handler_for_each_handle(&kbd_handler, &leds, - kbd_update_leds_helper); + for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++) + if ((leds ^ ledstate) & (1 << i)) + led_trigger_event(&ledtrig_ledstate[i], + leds & (1 << i) + ? LED_FULL : LED_OFF); ledstate = leds; } + + if (kbd->lockstate != lockstate) { + for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++) + if ((kbd->lockstate ^ lockstate) & (1 << i)) + led_trigger_event(&ledtrig_lockstate[i], + kbd->lockstate & (1 << i) + ? LED_FULL : LED_OFF); + lockstate = kbd->lockstate; + } } DECLARE_TASKLET_DISABLED(keyboard_tasklet, kbd_bh, 0); @@ -1442,20 +1499,6 @@ static void kbd_disconnect(struct input_handle *handle) kfree(handle); } -/* - * Start keyboard handler on the new keyboard by refreshing LED state to - * match the rest of the system. - */ -static void kbd_start(struct input_handle *handle) -{ - tasklet_disable(&keyboard_tasklet); - - if (ledstate != 0xff) - kbd_update_leds_helper(handle, &ledstate); - - tasklet_enable(&keyboard_tasklet); -} - static const struct input_device_id kbd_ids[] = { { .flags = INPUT_DEVICE_ID_MATCH_EVBIT, @@ -1477,7 +1520,6 @@ static struct input_handler kbd_handler = { .match = kbd_match, .connect = kbd_connect, .disconnect = kbd_disconnect, - .start = kbd_start, .name = "kbd", .id_table = kbd_ids, }; @@ -1501,6 +1543,20 @@ int __init kbd_init(void) if (error) return error; + for (i = 0; i < ARRAY_SIZE(ledtrig_ledstate); i++) { + error = led_trigger_register(&ledtrig_ledstate[i]); + if (error) + pr_err("error %d while registering trigger %s\n", + error, ledtrig_ledstate[i].name); + } + + for (i = 0; i < ARRAY_SIZE(ledtrig_lockstate); i++) { + error = led_trigger_register(&ledtrig_lockstate[i]); + if (error) + pr_err("error %d while registering trigger %s\n", + error, ledtrig_lockstate[i].name); + } + tasklet_enable(&keyboard_tasklet); tasklet_schedule(&keyboard_tasklet); diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a8bc47f75fa..5b6e9f24623 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -107,7 +107,10 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } if (!journal) { - ret = generic_file_fsync(file, start, end, datasync); + if (test_opt(inode->i_sb, BARRIER)) + ret = generic_file_fsync(file, start, end, datasync); + else + ret = __generic_file_fsync(file, start, end, datasync); if (!ret && !hlist_empty(&inode->i_dentry)) ret = ext4_sync_parent(inode); goto out; diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 91ad9e1c944..d3dd5baccdf 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -303,6 +303,29 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) return dclus; } +int fat_get_mapped_cluster(struct inode *inode, sector_t sector, + sector_t last_block, + unsigned long *mapped_blocks, sector_t *bmap) +{ + struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + int cluster, offset; + + cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); + offset = sector & (sbi->sec_per_clus - 1); + cluster = fat_bmap_cluster(inode, cluster); + if (cluster < 0) + return cluster; + else if (cluster) { + *bmap = fat_clus_to_blknr(sbi, cluster) + offset; + *mapped_blocks = sbi->sec_per_clus - offset; + if (*mapped_blocks > last_block - sector) + *mapped_blocks = last_block - sector; + } + + return 0; +} + int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, unsigned long *mapped_blocks, int create) { @@ -311,7 +334,6 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, const unsigned long blocksize = sb->s_blocksize; const unsigned char blocksize_bits = sb->s_blocksize_bits; sector_t last_block; - int cluster, offset; *phys = 0; *mapped_blocks = 0; @@ -338,16 +360,6 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, return 0; } - cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); - offset = sector & (sbi->sec_per_clus - 1); - cluster = fat_bmap_cluster(inode, cluster); - if (cluster < 0) - return cluster; - else if (cluster) { - *phys = fat_clus_to_blknr(sbi, cluster) + offset; - *mapped_blocks = sbi->sec_per_clus - offset; - if (*mapped_blocks > last_block - sector) - *mapped_blocks = last_block - sector; - } - return 0; + return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks, + phys); } diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 64e295e8ff3..e59eb6ece9a 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -288,6 +288,9 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) extern void fat_cache_inval_inode(struct inode *inode); extern int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus); +extern int fat_get_mapped_cluster(struct inode *inode, sector_t sector, + sector_t last_block, + unsigned long *mapped_blocks, sector_t *bmap); extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, unsigned long *mapped_blocks, int create); @@ -387,6 +390,7 @@ static inline unsigned long fat_dir_hash(int logstart) { return hash_32(logstart, FAT_HASH_BITS); } +extern int fat_add_cluster(struct inode *inode); /* fat/misc.c */ extern __printf(3, 4) __cold diff --git a/fs/fat/file.c b/fs/fat/file.c index 8429c68e305..eac05ebd7c7 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -17,8 +17,12 @@ #include <linux/blkdev.h> #include <linux/fsnotify.h> #include <linux/security.h> +#include <linux/falloc.h> #include "fat.h" +static long fat_fallocate(struct file *file, int mode, + loff_t offset, loff_t len); + static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr) { u32 attr; @@ -182,6 +186,7 @@ const struct file_operations fat_file_operations = { #endif .fsync = fat_file_fsync, .splice_read = generic_file_splice_read, + .fallocate = fat_fallocate, }; static int fat_cont_expand(struct inode *inode, loff_t size) @@ -220,6 +225,63 @@ out: return err; } +/* + * Preallocate space for a file. This implements fat's fallocate file + * operation, which gets called from sys_fallocate system call. User + * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set + * we just allocate clusters without zeroing them out. Otherwise we + * allocate and zero out clusters via an expanding truncate. + */ +static long fat_fallocate(struct file *file, int mode, + loff_t offset, loff_t len) +{ + int nr_cluster; /* Number of clusters to be allocated */ + loff_t mm_bytes; /* Number of bytes to be allocated for file */ + loff_t ondisksize; /* block aligned on-disk size in bytes*/ + struct inode *inode = file->f_mapping->host; + struct super_block *sb = inode->i_sb; + struct msdos_sb_info *sbi = MSDOS_SB(sb); + int err = 0; + + /* No support for hole punch or other fallocate flags. */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + + /* No support for dir */ + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + mutex_lock(&inode->i_mutex); + if (mode & FALLOC_FL_KEEP_SIZE) { + ondisksize = inode->i_blocks << 9; + if ((offset + len) <= ondisksize) + goto error; + + /* First compute the number of clusters to be allocated */ + mm_bytes = offset + len - ondisksize; + nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >> + sbi->cluster_bits; + + /* Start the allocation.We are not zeroing out the clusters */ + while (nr_cluster-- > 0) { + err = fat_add_cluster(inode); + if (err) + goto error; + } + } else { + err = inode_newsize_ok(inode, (offset + len)); + if (err) + goto error; + + /* This is just an expanding truncate */ + err = fat_cont_expand(inode, (offset + len)); + } + +error: + mutex_unlock(&inode->i_mutex); + return err; +} + /* Free all clusters after the skip'th cluster. */ static int fat_free(struct inode *inode, int skip) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 7b41a2dcdd7..1956dae03c7 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -101,7 +101,7 @@ static struct fat_floppy_defaults { }, }; -static int fat_add_cluster(struct inode *inode) +int fat_add_cluster(struct inode *inode) { int err, cluster; @@ -123,7 +123,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); unsigned long mapped_blocks; - sector_t phys; + sector_t phys, last_block; int err, offset; err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); @@ -143,8 +143,14 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, return -EIO; } + last_block = inode->i_blocks >> (sb->s_blocksize_bits - 9); offset = (unsigned long)iblock & (sbi->sec_per_clus - 1); - if (!offset) { + /* + * allocate a cluster according to the following. + * 1) no more available blocks + * 2) not part of fallocate region + */ + if (!offset && !(iblock < last_block)) { /* TODO: multiple cluster allocation would be desirable. */ err = fat_add_cluster(inode); if (err) @@ -282,13 +288,44 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, return ret; } +static int fat_get_block_bmap(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + struct super_block *sb = inode->i_sb; + unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; + int err; + sector_t bmap, last_block; + unsigned long mapped_blocks; + + BUG_ON(create != 0); + + last_block = inode->i_blocks >> (sb->s_blocksize_bits - 9); + + if (iblock >= last_block) + return 0; + + err = fat_get_mapped_cluster(inode, iblock, last_block, &mapped_blocks, + &bmap); + if (err) + return err; + + if (bmap) { + map_bh(bh_result, sb, bmap); + max_blocks = min(mapped_blocks, max_blocks); + } + + bh_result->b_size = max_blocks << sb->s_blocksize_bits; + + return 0; +} + static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { sector_t blocknr; /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ down_read(&MSDOS_I(mapping->host)->truncate_lock); - blocknr = generic_block_bmap(mapping, block, fat_get_block); + blocknr = generic_block_bmap(mapping, block, fat_get_block_bmap); up_read(&MSDOS_I(mapping->host)->truncate_lock); return blocknr; @@ -562,12 +599,35 @@ out: EXPORT_SYMBOL_GPL(fat_build_inode); +static int __fat_write_inode(struct inode *inode, int wait); static void fat_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); if (!inode->i_nlink) { inode->i_size = 0; fat_truncate_blocks(inode, 0); + } else { + /* Release unwritten fallocated blocks on inode eviction. */ + if ((inode->i_blocks << 9) > + round_up(MSDOS_I(inode)->mmu_private, + inode->i_sb->s_blocksize)) { + int err; + + fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private); + /* Fallocate results in updating the i_start/iogstart + * for the zero byte file. So, make it return to + * original state during evict and commit it to avoid + * any corruption on the next access to the cluster + * chain for the file. + */ + err = __fat_write_inode(inode, inode_needs_sync(inode)); + if (err) { + fat_msg(inode->i_sb, KERN_WARNING, "Failed to " + "update on disk inode for unused fallocated " + "blocks, inode could be corrupted. Please run " + "fsck"); + } + } } invalidate_inode_buffers(inode); clear_inode(inode); diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 32602c667b4..7892e6fddb6 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1, return hfsplus_strcmp(&k1->cat.name, &k2->cat.name); } -void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, - u32 parent, struct qstr *str) +/* Generates key for catalog file/folders record. */ +int hfsplus_cat_build_key(struct super_block *sb, + hfsplus_btree_key *key, u32 parent, struct qstr *str) { - int len; + int len, err; key->cat.parent = cpu_to_be32(parent); - if (str) { - hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, - str->name, str->len); - len = be16_to_cpu(key->cat.name.length); - } else { - key->cat.name.length = 0; - len = 0; - } + err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, + str->name, str->len); + if (unlikely(err < 0)) + return err; + + len = be16_to_cpu(key->cat.name.length); key->key_len = cpu_to_be16(6 + 2 * len); + return 0; +} + +/* Generates key for catalog thread record. */ +void hfsplus_cat_build_key_with_cnid(struct super_block *sb, + hfsplus_btree_key *key, u32 parent) +{ + key->cat.parent = cpu_to_be32(parent); + key->cat.name.length = 0; + key->key_len = cpu_to_be16(6); } static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, @@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb, hfsplus_cat_entry *entry, int type, u32 parentid, struct qstr *str) { + int err; + entry->type = cpu_to_be16(type); entry->thread.reserved = 0; entry->thread.parentID = cpu_to_be32(parentid); - hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, + err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, str->name, str->len); + if (unlikely(err < 0)) + return err; + return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2; } @@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, int err; u16 type; - hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid); err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry)); if (err) return err; @@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, if (err) return err; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, dir->i_ino, str); + if (unlikely(entry_size < 0)) { + err = entry_size; + goto err2; + } + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) @@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, if (err) goto err2; - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + if (unlikely(err)) + goto err1; + entry_size = hfsplus_cat_build_record(&entry, cnid, inode); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { @@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, return 0; err1: - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); if (!hfs_brec_find(&fd, hfs_find_rec_by_key)) hfs_brec_remove(&fd); err2: @@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) if (!str) { int len; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) off + 2, len); fd.search_key->key_len = cpu_to_be16(6 + len); } else - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + if (unlikely(err)) + goto out; err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) @@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) if (err) goto out; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid, dst_fd = src_fd; /* find the old dir entry and read the data */ - hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); + err = hfsplus_cat_build_key(sb, src_fd.search_key, + src_dir->i_ino, src_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid, type = be16_to_cpu(entry.type); /* create new dir entry with the data from the old entry */ - hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); + err = hfsplus_cat_build_key(sb, dst_fd.search_key, + dst_dir->i_ino, dst_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) @@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid, dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; /* finally remove the old entry */ - hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); + err = hfsplus_cat_build_key(sb, src_fd.search_key, + src_dir->i_ino, src_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid, src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; /* remove old thread entry */ - hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid); err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid, goto out; /* create new thread entry */ - hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid); entry_size = hfsplus_fill_cat_thread(sb, &entry, type, dst_dir->i_ino, dst_name); + if (unlikely(entry_size < 0)) { + err = entry_size; + goto out; + } + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 610a3260bef..435bea231cc 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (err) return ERR_PTR(err); - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, + &dentry->d_name); + if (unlikely(err < 0)) + goto fail; again: err = hfs_brec_read(&fd, &entry, sizeof(entry)); if (err) { @@ -97,9 +100,11 @@ again: be32_to_cpu(entry.file.permissions.dev); str.len = sprintf(name, "iNode%d", linkid); str.name = name; - hfsplus_cat_build_key(sb, fd.search_key, + err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); + if (unlikely(err < 0)) + goto fail; goto again; } } else if (!dentry->d_fsdata) @@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx) err = -ENOMEM; goto out; } - hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index eb5e059f481..b0441d65fa5 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1, const hfsplus_btree_key *k2); int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1, const hfsplus_btree_key *k2); -void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, +int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, u32 parent, struct qstr *str); +void hfsplus_cat_build_key_with_cnid(struct super_block *sb, + hfsplus_btree_key *key, u32 parent); void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); int hfsplus_find_cat(struct super_block *sb, u32 cnid, struct hfs_find_data *fd); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 4cf2024b87d..593af2fdcc2 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = hfs_find_init(sbi->cat_tree, &fd); if (err) goto out_put_root; - hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); + err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); + if (unlikely(err < 0)) + goto out_put_root; if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) diff --git a/fs/mpage.c b/fs/mpage.c index 3e79220baba..587c7ed4185 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -482,6 +482,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, struct buffer_head map_bh; loff_t i_size = i_size_read(inode); int ret = 0; + int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -590,7 +591,7 @@ page_is_mapped: * This page will go to BIO. Do we need to send this BIO off first? */ if (bio && mpd->last_block_in_bio != blocks[0] - 1) - bio = mpage_bio_submit(WRITE, bio); + bio = mpage_bio_submit(wr, bio); alloc_new: if (bio == NULL) { @@ -614,7 +615,7 @@ alloc_new: */ length = first_unmapped << blkbits; if (bio_add_page(bio, page, length, 0) < length) { - bio = mpage_bio_submit(WRITE, bio); + bio = mpage_bio_submit(wr, bio); goto alloc_new; } @@ -624,7 +625,7 @@ alloc_new: set_page_writeback(page); unlock_page(page); if (boundary || (first_unmapped != blocks_per_page)) { - bio = mpage_bio_submit(WRITE, bio); + bio = mpage_bio_submit(wr, bio); if (boundary_block) { write_boundary_block(boundary_bdev, boundary_block, 1 << blkbits); @@ -636,7 +637,7 @@ alloc_new: confused: if (bio) - bio = mpage_bio_submit(WRITE, bio); + bio = mpage_bio_submit(wr, bio); if (mpd->use_writepage) { ret = mapping->a_ops->writepage(page, wbc); @@ -692,8 +693,11 @@ mpage_writepages(struct address_space *mapping, }; ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); - if (mpd.bio) - mpage_bio_submit(WRITE, mpd.bio); + if (mpd.bio) { + int wr = (wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC : WRITE); + mpage_bio_submit(wr, mpd.bio); + } } blk_finish_plug(&plug); return ret; @@ -710,8 +714,11 @@ int mpage_writepage(struct page *page, get_block_t get_block, .use_writepage = 0, }; int ret = __mpage_writepage(page, wbc, &mpd); - if (mpd.bio) - mpage_bio_submit(WRITE, mpd.bio); + if (mpd.bio) { + int wr = (wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC : WRITE); + mpage_bio_submit(wr, mpd.bio); + } return ret; } EXPORT_SYMBOL(mpage_writepage); diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index a93bf989225..fcae9ef1a32 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5662,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, u32 cpos, u32 phys_cpos, u32 len, int flags, struct ocfs2_cached_dealloc_ctxt *dealloc, - u64 refcount_loc) + u64 refcount_loc, bool refcount_tree_locked) { int ret, credits = 0, extra_blocks = 0; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); @@ -5676,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode, BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); - ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, - &ref_tree, NULL); - if (ret) { - mlog_errno(ret); - goto bail; + if (!refcount_tree_locked) { + ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, + &ref_tree, NULL); + if (ret) { + mlog_errno(ret); + goto bail; + } } ret = ocfs2_prepare_refcount_change_for_del(inode, @@ -7021,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, u64 refcount_loc = le64_to_cpu(di->i_refcount_loc); struct ocfs2_extent_tree et; struct ocfs2_cached_dealloc_ctxt dealloc; + struct ocfs2_refcount_tree *ref_tree = NULL; ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); ocfs2_init_dealloc_ctxt(&dealloc); @@ -7130,9 +7133,18 @@ start: phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); + if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) { + status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, + &ref_tree, NULL); + if (status) { + mlog_errno(status); + goto bail; + } + } + status = ocfs2_remove_btree_range(inode, &et, trunc_cpos, phys_cpos, trunc_len, flags, &dealloc, - refcount_loc); + refcount_loc, true); if (status < 0) { mlog_errno(status); goto bail; @@ -7147,6 +7159,8 @@ start: goto start; bail: + if (ref_tree) + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); ocfs2_schedule_truncate_log_flush(osb, 1); diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index ca381c58412..fb09b97db16 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, u32 cpos, u32 phys_cpos, u32 len, int flags, struct ocfs2_cached_dealloc_ctxt *dealloc, - u64 refcount_loc); + u64 refcount_loc, bool refcount_tree_locked); int ocfs2_num_free_extents(struct ocfs2_super *osb, struct ocfs2_extent_tree *et); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d9f222987f2..0aca748e0a7 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -28,6 +28,7 @@ #include <linux/pipe_fs_i.h> #include <linux/mpage.h> #include <linux/quotaops.h> +#include <linux/blkdev.h> #include <cluster/masklog.h> @@ -47,6 +48,9 @@ #include "ocfs2_trace.h" #include "buffer_head_io.h" +#include "dir.h" +#include "namei.h" +#include "sysfile.h" static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -506,18 +510,21 @@ bail: * * called like this: dio->get_blocks(dio->inode, fs_startblk, * fs_count, map_bh, dio->rw == WRITE); - * - * Note that we never bother to allocate blocks here, and thus ignore the - * create argument. */ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int ret; + u32 cpos = 0; + int alloc_locked = 0; u64 p_blkno, inode_blocks, contig_blocks; unsigned int ext_flags; unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; + unsigned long len = bh_result->b_size; + unsigned int clusters_to_alloc = 0; + + cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock); /* This function won't even be called if the request isn't all * nicely aligned and of the right size, so there's no need @@ -539,6 +546,40 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, /* We should already CoW the refcounted extent in case of create. */ BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); + /* allocate blocks if no p_blkno is found, and create == 1 */ + if (!p_blkno && create) { + ret = ocfs2_inode_lock(inode, NULL, 1); + if (ret < 0) { + mlog_errno(ret); + goto bail; + } + + alloc_locked = 1; + + /* fill hole, allocate blocks can't be larger than the size + * of the hole */ + clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len); + if (clusters_to_alloc > contig_blocks) + clusters_to_alloc = contig_blocks; + + /* allocate extent and insert them into the extent tree */ + ret = ocfs2_extend_allocation(inode, cpos, + clusters_to_alloc, 0); + if (ret < 0) { + mlog_errno(ret); + goto bail; + } + + ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, + &contig_blocks, &ext_flags); + if (ret < 0) { + mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", + (unsigned long long)iblock); + ret = -EIO; + goto bail; + } + } + /* * get_more_blocks() expects us to describe a hole by clearing * the mapped bit on bh_result(). @@ -556,6 +597,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, contig_blocks = max_blocks; bh_result->b_size = contig_blocks << blocksize_bits; bail: + if (alloc_locked) + ocfs2_inode_unlock(inode, 1); return ret; } @@ -597,6 +640,180 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) return try_to_free_buffers(page); } +static int ocfs2_is_overwrite(struct ocfs2_super *osb, + struct inode *inode , loff_t offset) +{ + int ret = 0; + u32 v_cpos = 0; + u32 p_cpos = 0; + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + + v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, + &num_clusters, &ext_flags); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) + return 1; + + return 0; +} + +static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, + struct iov_iter *iter, + loff_t offset) +{ + ssize_t ret = 0; + ssize_t written = 0; + bool orphaned = false; + int is_overwrite = 0; + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file)->i_mapping->host; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct buffer_head *di_bh = NULL; + size_t count = iter->count; + journal_t *journal = osb->journal->j_journal; + u32 zero_len; + int cluster_align; + loff_t final_size = offset + count; + int append_write = offset >= i_size_read(inode) ? 1 : 0; + unsigned int num_clusters = 0; + unsigned int ext_flags = 0; + + { + u64 o = offset; + + zero_len = do_div(o, 1 << osb->s_clustersize_bits); + cluster_align = !!zero_len; + } + + /* + * when final_size > inode->i_size, inode->i_size will be + * updated after direct write, so add the inode to orphan + * dir first. + */ + if (final_size > i_size_read(inode)) { + ret = ocfs2_add_inode_to_orphan(osb, inode); + if (ret < 0) + goto out; + orphaned = true; + } + + if (append_write) { + ret = ocfs2_inode_lock(inode, &di_bh, 1); + if (ret < 0) { + mlog_errno(ret); + goto clean_orphan; + } + + if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) + ret = ocfs2_zero_extend(inode, di_bh, offset); + else + ret = ocfs2_extend_no_holes(inode, di_bh, offset, offset); + if (ret < 0) { + mlog_errno(ret); + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + goto clean_orphan; + } + + is_overwrite = ocfs2_is_overwrite(osb, inode, offset); + if (is_overwrite < 0) { + mlog_errno(is_overwrite); + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + goto clean_orphan; + } + + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + di_bh = NULL; + } + + written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, + iter, offset, + ocfs2_direct_IO_get_blocks, + ocfs2_dio_end_io, NULL, 0); + if (unlikely(written < 0)) { + loff_t i_size = i_size_read(inode); + + if (offset + count > i_size) { + ret = ocfs2_inode_lock(inode, &di_bh, 1); + if (ret < 0) { + mlog_errno(ret); + goto clean_orphan; + } + + if (i_size == i_size_read(inode)) { + ret = ocfs2_truncate_file(inode, di_bh, + i_size); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + goto clean_orphan; + } + } + + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + + ret = jbd2_journal_force_commit(journal); + if (ret < 0) + mlog_errno(ret); + } + } else if (append_write && !is_overwrite && !cluster_align) { + u32 p_cpos = 0; + u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); + + ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, + &num_clusters, &ext_flags); + if (ret < 0) { + mlog_errno(ret); + goto clean_orphan; + } + + BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); + + ret = blkdev_issue_zeroout(osb->sb->s_bdev, + p_cpos << (osb->s_clustersize_bits - 9), + zero_len >> 9, + GFP_KERNEL); + if (ret < 0) + mlog_errno(ret); + } + +clean_orphan: + if (orphaned) { + int tmp_ret; + int update_isize = written > 0 ? 1 : 0; + loff_t end = update_isize ? offset + written : 0; + tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, + update_isize, end); + if (tmp_ret < 0) { + ret = tmp_ret; + goto out; + } + + tmp_ret = jbd2_journal_force_commit(journal); + if (tmp_ret < 0) { + ret = tmp_ret; + mlog_errno(tmp_ret); + } + } + +out: + if (ret >= 0) + ret = written; + return ret; +} + static ssize_t ocfs2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, @@ -604,6 +821,9 @@ static ssize_t ocfs2_direct_IO(int rw, { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file)->i_mapping->host; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int full_coherency = !(osb->s_mount_opt & + OCFS2_MOUNT_COHERENCY_BUFFERED); /* * Fallback to buffered I/O if we see an inode without @@ -612,14 +832,19 @@ static ssize_t ocfs2_direct_IO(int rw, if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) return 0; - /* Fallback to buffered I/O if we are appending. */ - if (i_size_read(inode) <= offset) + /* Fallback to buffered I/O if we are appending and + * concurrent O_DIRECT writes are allowed. + */ + if (i_size_read(inode) <= offset && !full_coherency) return 0; - return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, + if (rw == READ) + return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter, offset, ocfs2_direct_IO_get_blocks, ocfs2_dio_end_io, NULL, 0); + else + return ocfs2_direct_IO_write(iocb, iter, offset); } static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, @@ -894,7 +1119,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) } } -static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) { int i; @@ -915,7 +1140,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) page_cache_release(wc->w_target_page); } ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); +} +static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +{ + ocfs2_unlock_pages(wc); brelse(wc->w_di_bh); kfree(wc); } @@ -1818,16 +2047,6 @@ try_again: if (ret) goto out_commit; } - /* - * We don't want this to fail in ocfs2_write_end(), so do it - * here. - */ - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out_quota; - } /* * Fill our page array first. That way we've grabbed enough so @@ -1978,7 +2197,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { - int i; + int i, ret; unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1); struct inode *inode = mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -2028,6 +2247,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } } + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + copied = ret; + mlog_errno(ret); + goto out; + } + out_write_size: pos += copied; if (pos > i_size_read(inode)) { @@ -2042,11 +2269,20 @@ out_write_size: ocfs2_update_inode_fsync_trans(handle, inode, 1); ocfs2_journal_dirty(handle, wc->w_di_bh); +out: + /* unlock pages before dealloc since it needs acquiring j_trans_barrier + * lock, or it will cause a deadlock since journal commit threads holds + * this lock and will ask for the page lock when flushing the data. + * put it here to preserve the unlock order. + */ + ocfs2_unlock_pages(wc); + ocfs2_commit_trans(osb, handle); ocfs2_run_deallocs(osb, &wc->w_dealloc); - ocfs2_free_write_ctxt(wc); + brelse(wc->w_di_bh); + kfree(wc); return copied; } diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 79d56dc981b..319e786175a 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -4479,7 +4479,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0, - &dealloc, 0); + &dealloc, 0, false); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index b46278f9ae4..fd6bbbbd7d7 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -385,8 +385,12 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, head = &res->granted; list_for_each_entry(lock, head, list) { - if (lock->ml.cookie == cookie) + /* if lock is found but unlock is pending ignore the bast */ + if (lock->ml.cookie == cookie) { + if (lock->unlock_pending) + break; goto do_ast; + } } mlog(0, "Got %sast for unknown lock! cookie=%u:%llu, name=%.*s, " diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3689b359204..b26b476e1f0 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -498,16 +498,6 @@ static void dlm_lockres_release(struct kref *kref) mlog(0, "destroying lockres %.*s\n", res->lockname.len, res->lockname.name); - spin_lock(&dlm->track_lock); - if (!list_empty(&res->tracking)) - list_del_init(&res->tracking); - else { - mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", - res->lockname.len, res->lockname.name); - dlm_print_one_lock_resource(res); - } - spin_unlock(&dlm->track_lock); - atomic_dec(&dlm->res_cur_count); if (!hlist_unhashed(&res->hash_node) || @@ -695,14 +685,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, res->inflight_assert_workers); } -static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - spin_lock(&res->spinlock); - __dlm_lockres_grab_inflight_worker(dlm, res); - spin_unlock(&res->spinlock); -} - static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { @@ -1646,6 +1628,7 @@ send_response: } mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", dlm->node_num, res->lockname.len, res->lockname.name); + spin_lock(&res->spinlock); ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, DLM_ASSERT_MASTER_MLE_CLEANUP); if (ret < 0) { @@ -1653,7 +1636,8 @@ send_response: response = DLM_MASTER_RESP_ERROR; dlm_lockres_put(res); } else - dlm_lockres_grab_inflight_worker(dlm, res); + __dlm_lockres_grab_inflight_worker(dlm, res); + spin_unlock(&res->spinlock); } else { if (res) dlm_lockres_put(res); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 69aac6f088a..2e5e6d5fffe 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -211,6 +211,16 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm, __dlm_unhash_lockres(dlm, res); + spin_lock(&dlm->track_lock); + if (!list_empty(&res->tracking)) + list_del_init(&res->tracking); + else { + mlog(ML_ERROR, "Resource %.*s not on the Tracking list\n", + res->lockname.len, res->lockname.name); + __dlm_print_one_lock_resource(res); + } + spin_unlock(&dlm->track_lock); + /* lockres is not in the hash now. drop the flag and wake up * any processes waiting in dlm_get_lock_resource. */ if (!master) { diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 69fb9f75b08..38f3a562d77 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -295,7 +295,7 @@ out: return ret; } -static int ocfs2_set_inode_size(handle_t *handle, +int ocfs2_set_inode_size(handle_t *handle, struct inode *inode, struct buffer_head *fe_bh, u64 new_i_size) @@ -441,7 +441,7 @@ out: return status; } -static int ocfs2_truncate_file(struct inode *inode, +int ocfs2_truncate_file(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size) { @@ -709,6 +709,13 @@ leave: return status; } +int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, + u32 clusters_to_add, int mark_unwritten) +{ + return __ocfs2_extend_allocation(inode, logical_start, + clusters_to_add, mark_unwritten); +} + /* * While a write will already be ordering the data, a truncate will not. * Thus, we need to explicitly order the zeroed pages. @@ -1352,44 +1359,6 @@ out: return ret; } -/* - * Will look for holes and unwritten extents in the range starting at - * pos for count bytes (inclusive). - */ -static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos, - size_t count) -{ - int ret = 0; - unsigned int extent_flags; - u32 cpos, clusters, extent_len, phys_cpos; - struct super_block *sb = inode->i_sb; - - cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits; - clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos; - - while (clusters) { - ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len, - &extent_flags); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - - if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) { - ret = 1; - break; - } - - if (extent_len > clusters) - extent_len = clusters; - - clusters -= extent_len; - cpos += extent_len; - } -out: - return ret; -} - static int ocfs2_write_remove_suid(struct inode *inode) { int ret; @@ -1803,7 +1772,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos, phys_cpos, trunc_len, flags, - &dealloc, refcount_loc); + &dealloc, refcount_loc, false); if (ret < 0) { mlog_errno(ret); goto out; @@ -2109,6 +2078,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file, struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; loff_t saved_pos = 0, end; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int full_coherency = !(osb->s_mount_opt & + OCFS2_MOUNT_COHERENCY_BUFFERED); /* * We start with a read level meta lock and only jump to an ex @@ -2197,23 +2169,11 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * one node could wind up truncating another * nodes writes. */ - if (end > i_size_read(inode)) { + if (end > i_size_read(inode) && !full_coherency) { *direct_io = 0; break; } - /* - * We don't fill holes during direct io, so - * check for them here. If any are found, the - * caller will have to retake some cluster - * locks and initiate the io as buffered. - */ - ret = ocfs2_check_range_for_holes(inode, saved_pos, count); - if (ret == 1) { - *direct_io = 0; - ret = 0; - } else if (ret < 0) - mlog_errno(ret); break; } @@ -2243,6 +2203,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, u32 old_clusters; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + struct address_space *mapping = file->f_mapping; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); @@ -2357,11 +2318,50 @@ relock: iov_iter_truncate(from, count); if (direct_io) { + loff_t endbyte; + ssize_t written_buffered; written = generic_file_direct_write(iocb, from, *ppos); - if (written < 0) { + if (written < 0 || written == count) { ret = written; goto out_dio; } + + /* + * direct-io write to a hole: fall through to buffered I/O + * for completing the rest of the request. + */ + count -= written; + written_buffered = generic_perform_write(file, from, *ppos); + /* + * If generic_file_buffered_write() returned a synchronous error + * then we want to return the number of bytes which were + * direct-written, or the error code if that was zero. Note + * that this differs from normal direct-io semantics, which + * will return -EFOO even if some bytes were written. + */ + if (written_buffered < 0) { + ret = written_buffered; + goto out; + } + + /* We need to ensure that the page cache pages are written to + * disk and invalidated to preserve the expected O_DIRECT + * semantics. + */ + endbyte = *ppos + written_buffered - written - 1; + ret = filemap_write_and_wait_range(file->f_mapping, *ppos, + endbyte); + if (ret == 0) { + written = written_buffered; + invalidate_mapping_pages(mapping, + *ppos >> PAGE_CACHE_SHIFT, + endbyte >> PAGE_CACHE_SHIFT); + } else { + /* + * We don't know how much we wrote, so just return + * the number of bytes which were direct-written + */ + } } else { current->backing_dev_info = file->f_mapping->backing_dev_info; written = generic_perform_write(file, from, *ppos); diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 97bf761c9e7..e8c62f22215 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -51,13 +51,22 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *meta_ac, enum ocfs2_alloc_restarted *reason_ret); +int ocfs2_set_inode_size(handle_t *handle, + struct inode *inode, + struct buffer_head *fe_bh, + u64 new_i_size); int ocfs2_simple_size_update(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size); +int ocfs2_truncate_file(struct inode *inode, + struct buffer_head *di_bh, + u64 new_i_size); int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, u64 new_i_size, u64 zero_to); int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, loff_t zero_to); +int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, + u32 clusters_to_add, int mark_unwritten); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index c8b25de9efb..ae1a5e9f8f1 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1191,17 +1191,9 @@ void ocfs2_evict_inode(struct inode *inode) int ocfs2_drop_inode(struct inode *inode) { struct ocfs2_inode_info *oi = OCFS2_I(inode); - int res; - trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags); - - if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) - res = 1; - else - res = generic_drop_inode(inode); - - return res; + return 1; } /* diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index ca3431ee7f2..5e86b247c82 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -81,6 +81,8 @@ struct ocfs2_inode_info tid_t i_sync_tid; tid_t i_datasync_tid; + wait_queue_head_t append_dio_wq; + struct dquot *i_dquot[MAXQUOTAS]; }; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4f502382180..e238bbc27a3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -50,6 +50,8 @@ #include "sysfile.h" #include "uptodate.h" #include "quota.h" +#include "file.h" +#include "namei.h" #include "buffer_head_io.h" #include "ocfs2_trace.h" @@ -69,13 +71,15 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, static int ocfs2_trylock_journal(struct ocfs2_super *osb, int slot_num); static int ocfs2_recover_orphans(struct ocfs2_super *osb, - int slot); + int slot, + enum ocfs2_orphan_reco_type orphan_reco_type); static int ocfs2_commit_thread(void *arg); static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, int slot_num, struct ocfs2_dinode *la_dinode, struct ocfs2_dinode *tl_dinode, - struct ocfs2_quota_recovery *qrec); + struct ocfs2_quota_recovery *qrec, + enum ocfs2_orphan_reco_type orphan_reco_type); static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) { @@ -149,7 +153,8 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb) return 0; } -void ocfs2_queue_replay_slots(struct ocfs2_super *osb) +void ocfs2_queue_replay_slots(struct ocfs2_super *osb, + enum ocfs2_orphan_reco_type orphan_reco_type) { struct ocfs2_replay_map *replay_map = osb->replay_map; int i; @@ -163,7 +168,8 @@ void ocfs2_queue_replay_slots(struct ocfs2_super *osb) for (i = 0; i < replay_map->rm_slots; i++) if (replay_map->rm_replay_slots[i]) ocfs2_queue_recovery_completion(osb->journal, i, NULL, - NULL, NULL); + NULL, NULL, + orphan_reco_type); replay_map->rm_state = REPLAY_DONE; } @@ -1174,6 +1180,7 @@ struct ocfs2_la_recovery_item { struct ocfs2_dinode *lri_la_dinode; struct ocfs2_dinode *lri_tl_dinode; struct ocfs2_quota_recovery *lri_qrec; + enum ocfs2_orphan_reco_type lri_orphan_reco_type; }; /* Does the second half of the recovery process. By this point, the @@ -1195,6 +1202,7 @@ void ocfs2_complete_recovery(struct work_struct *work) struct ocfs2_dinode *la_dinode, *tl_dinode; struct ocfs2_la_recovery_item *item, *n; struct ocfs2_quota_recovery *qrec; + enum ocfs2_orphan_reco_type orphan_reco_type; LIST_HEAD(tmp_la_list); trace_ocfs2_complete_recovery( @@ -1212,6 +1220,7 @@ void ocfs2_complete_recovery(struct work_struct *work) la_dinode = item->lri_la_dinode; tl_dinode = item->lri_tl_dinode; qrec = item->lri_qrec; + orphan_reco_type = item->lri_orphan_reco_type; trace_ocfs2_complete_recovery_slot(item->lri_slot, la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0, @@ -1236,7 +1245,8 @@ void ocfs2_complete_recovery(struct work_struct *work) kfree(tl_dinode); } - ret = ocfs2_recover_orphans(osb, item->lri_slot); + ret = ocfs2_recover_orphans(osb, item->lri_slot, + orphan_reco_type); if (ret < 0) mlog_errno(ret); @@ -1261,7 +1271,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, int slot_num, struct ocfs2_dinode *la_dinode, struct ocfs2_dinode *tl_dinode, - struct ocfs2_quota_recovery *qrec) + struct ocfs2_quota_recovery *qrec, + enum ocfs2_orphan_reco_type orphan_reco_type) { struct ocfs2_la_recovery_item *item; @@ -1285,6 +1296,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, item->lri_slot = slot_num; item->lri_tl_dinode = tl_dinode; item->lri_qrec = qrec; + item->lri_orphan_reco_type = orphan_reco_type; spin_lock(&journal->j_lock); list_add_tail(&item->lri_list, &journal->j_la_cleanups); @@ -1304,7 +1316,8 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) /* No need to queue up our truncate_log as regular cleanup will catch * that */ ocfs2_queue_recovery_completion(journal, osb->slot_num, - osb->local_alloc_copy, NULL, NULL); + osb->local_alloc_copy, NULL, NULL, + ORPHAN_NEED_TRUNCATE); ocfs2_schedule_truncate_log_flush(osb, 0); osb->local_alloc_copy = NULL; @@ -1312,7 +1325,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) /* queue to recover orphan slots for all offline slots */ ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); - ocfs2_queue_replay_slots(osb); + ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE); ocfs2_free_replay_slots(osb); } @@ -1323,7 +1336,8 @@ void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) osb->slot_num, NULL, NULL, - osb->quota_rec); + osb->quota_rec, + ORPHAN_NEED_TRUNCATE); osb->quota_rec = NULL; } } @@ -1360,7 +1374,7 @@ restart: /* queue recovery for our own slot */ ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, - NULL, NULL); + NULL, NULL, ORPHAN_NO_NEED_TRUNCATE); spin_lock(&osb->osb_lock); while (rm->rm_used) { @@ -1419,13 +1433,14 @@ skip_recovery: continue; } ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], - NULL, NULL, qrec); + NULL, NULL, qrec, + ORPHAN_NEED_TRUNCATE); } ocfs2_super_unlock(osb, 1); /* queue recovery for offline slots */ - ocfs2_queue_replay_slots(osb); + ocfs2_queue_replay_slots(osb, ORPHAN_NEED_TRUNCATE); bail: mutex_lock(&osb->recovery_lock); @@ -1712,7 +1727,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, /* This will kfree the memory pointed to by la_copy and tl_copy */ ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, - tl_copy, NULL); + tl_copy, NULL, ORPHAN_NEED_TRUNCATE); status = 0; done: @@ -1902,7 +1917,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) for (i = 0; i < osb->max_slots; i++) ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL, - NULL); + NULL, ORPHAN_NO_NEED_TRUNCATE); /* * We queued a recovery on orphan slots, increment the sequence * number and update LVB so other node will skip the scan for a while @@ -2090,6 +2105,39 @@ static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot); } +static int ocfs2_truncate_file_locked(struct inode *inode) +{ + struct buffer_head *di_bh = NULL; + int ret; + + ret = ocfs2_rw_lock(inode, 1); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_inode_lock(inode, &di_bh, 1); + if (ret < 0) { + ocfs2_rw_unlock(inode, 1); + mlog_errno(ret); + goto out; + } + + ret = ocfs2_truncate_file(inode, di_bh, i_size_read(inode)); + if (ret < 0) { + if (ret != -ENOSPC) + mlog_errno(ret); + ret = -ENOSPC; + } + + ocfs2_inode_unlock(inode, 1); + ocfs2_rw_unlock(inode, 1); + brelse(di_bh); + +out: + return ret; +} + /* * Orphan recovery. Each mounted node has it's own orphan dir which we * must run during recovery. Our strategy here is to build a list of @@ -2109,7 +2157,8 @@ static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, * advertising our state to ocfs2_delete_inode(). */ static int ocfs2_recover_orphans(struct ocfs2_super *osb, - int slot) + int slot, + enum ocfs2_orphan_reco_type orphan_reco_type) { int ret = 0; struct inode *inode = NULL; @@ -2134,12 +2183,38 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, iter = oi->ip_next_orphan; - spin_lock(&oi->ip_lock); - /* Set the proper information to get us going into - * ocfs2_delete_inode. */ - oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; - spin_unlock(&oi->ip_lock); + /* + * We need to take and drop the inode lock to + * force read inode from disk. + */ + ret = ocfs2_inode_lock(inode, NULL, 0); + if (ret) { + mlog_errno(ret); + goto next; + } + ocfs2_inode_unlock(inode, 0); + + if (inode->i_nlink == 0) { + spin_lock(&oi->ip_lock); + /* Set the proper information to get us going into + * ocfs2_delete_inode. */ + oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; + spin_unlock(&oi->ip_lock); + } else if (orphan_reco_type == ORPHAN_NEED_TRUNCATE) { + ret = ocfs2_truncate_file_locked(inode); + if (ret) { + mlog_errno(ret); + goto next; + } + + ret = ocfs2_del_inode_from_orphan(osb, inode, 0, 0); + if (ret) + mlog_errno(ret); + + wake_up(&OCFS2_I(inode)->append_dio_wq); + } /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */ +next: iput(inode); inode = iter; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 7f8cde94abf..f4cd3c3e9fb 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -472,6 +472,11 @@ static inline int ocfs2_unlink_credits(struct super_block *sb) * orphan dir index leaf */ #define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4) +/* dinode + orphan dir dinode + extent tree leaf block + orphan dir entry + + * orphan dir index root + orphan dir index leaf */ +#define OCFS2_INODE_ADD_TO_ORPHAN_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 4) +#define OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS OCFS2_INODE_ADD_TO_ORPHAN_CREDITS + /* dinode update, old dir dinode update, new dir dinode update, old * dir dir entry, new dir dir entry, dir entry update for renaming * directory + target unlink + 3 x dir index leaves */ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b931e04e338..f15b560e1bb 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -81,6 +81,12 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, char *name, struct ocfs2_dir_lookup_result *lookup); +static int ocfs2_dio_prepare_orphan_dir(struct ocfs2_super *osb, + struct inode **ret_orphan_dir, + u64 blkno, + char *name, + struct ocfs2_dir_lookup_result *lookup); + static int ocfs2_orphan_add(struct ocfs2_super *osb, handle_t *handle, struct inode *inode, @@ -89,6 +95,15 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, struct ocfs2_dir_lookup_result *lookup, struct inode *orphan_dir_inode); +static int ocfs2_dio_orphan_add(struct ocfs2_super *osb, + handle_t *handle, + struct inode *inode, + struct buffer_head *fe_bh, + char *name, + struct ocfs2_dir_lookup_result *lookup, + struct inode *orphan_dir_inode, + bool orphaned); + static int ocfs2_create_symlink_data(struct ocfs2_super *osb, handle_t *handle, struct inode *inode, @@ -2137,6 +2152,51 @@ out: return ret; } +/** + * Copy from ocfs2_prepare_orphan_dir(). The difference: + * It will still lock orphan dir if entry exists. + * Caller must take care of -EEXIST and responsible for unlock. +*/ +static int ocfs2_dio_prepare_orphan_dir(struct ocfs2_super *osb, + struct inode **ret_orphan_dir, + u64 blkno, + char *name, + struct ocfs2_dir_lookup_result *lookup) +{ + struct inode *orphan_dir_inode = NULL; + struct buffer_head *orphan_dir_bh = NULL; + int ret = 0; + + ret = ocfs2_lookup_lock_orphan_dir(osb, &orphan_dir_inode, + &orphan_dir_bh); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + + ret = __ocfs2_prepare_orphan_dir(orphan_dir_inode, orphan_dir_bh, + blkno, name, lookup); + if (ret < 0 && ret != -EEXIST) { + mlog_errno(ret); + goto out; + } + + *ret_orphan_dir = orphan_dir_inode; + +out: + brelse(orphan_dir_bh); + + if (ret && ret != -EEXIST) { + ocfs2_inode_unlock(orphan_dir_inode, 1); + mutex_unlock(&orphan_dir_inode->i_mutex); + iput(orphan_dir_inode); + } + + if (ret && ret != -EEXIST) + mlog_errno(ret); + return ret; +} + static int ocfs2_orphan_add(struct ocfs2_super *osb, handle_t *handle, struct inode *inode, @@ -2226,6 +2286,100 @@ leave: return status; } +/** + * Copy from ocfs2_orphan_add, the difference: + * 1. Do not add entry if already added. + * 2. Update di flags OCFS2_DIO_ORPHANED_FL and record the + * orphan slot. +*/ +static int ocfs2_dio_orphan_add(struct ocfs2_super *osb, + handle_t *handle, + struct inode *inode, + struct buffer_head *fe_bh, + char *name, + struct ocfs2_dir_lookup_result *lookup, + struct inode *orphan_dir_inode, + bool orphaned) +{ + struct buffer_head *orphan_dir_bh = NULL; + int status = 0; + struct ocfs2_dinode *orphan_fe; + struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; + + trace_ocfs2_dio_orphan_add_begin( + (unsigned long long)OCFS2_I(inode)->ip_blkno); + + status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh); + if (status < 0) { + mlog_errno(status); + goto leave; + } + + status = ocfs2_journal_access_di(handle, + INODE_CACHE(orphan_dir_inode), + orphan_dir_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto leave; + } + + /* + * We're going to journal the change of i_flags and i_dio_orphaned_slot. + * It's safe anyway, though some callers may duplicate the journaling. + * Journaling within the func just make the logic look more + * straightforward. + */ + status = ocfs2_journal_access_di(handle, + INODE_CACHE(inode), + fe_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto leave; + } + + /* we're a cluster, and nlink can change on disk from + * underneath us... */ + orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data; + if (S_ISDIR(inode->i_mode)) + ocfs2_add_links_count(orphan_fe, 1); + set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); + ocfs2_journal_dirty(handle, orphan_dir_bh); + + /* It may already be orphaned by ocfs2_unlink/ocfs2_rename */ + if (!orphaned) { + status = __ocfs2_add_entry(handle, orphan_dir_inode, name, + OCFS2_ORPHAN_NAMELEN, inode, + OCFS2_I(inode)->ip_blkno, + orphan_dir_bh, lookup); + if (status < 0) { + mlog_errno(status); + goto rollback; + } + } + + /* Update flag OCFS2_DIO_ORPHANED_FL and record the orphan slot */ + fe->i_flags |= cpu_to_le32(OCFS2_DIO_ORPHANED_FL); + fe->i_dio_orphaned_slot = cpu_to_le16(osb->slot_num); + + ocfs2_journal_dirty(handle, fe_bh); + + trace_ocfs2_dio_orphan_add_end((unsigned long long)OCFS2_I(inode)->ip_blkno, + osb->slot_num); + +rollback: + if (status < 0) { + if (S_ISDIR(inode->i_mode)) + ocfs2_add_links_count(orphan_fe, -1); + set_nlink(orphan_dir_inode, ocfs2_read_links_count(orphan_fe)); + } + +leave: + brelse(orphan_dir_bh); + + return status; +} /* unlike orphan_add, we expect the orphan dir to already be locked here. */ int ocfs2_orphan_del(struct ocfs2_super *osb, handle_t *handle, @@ -2500,6 +2654,200 @@ leave: return status; } +static int ocfs2_dio_orphan_recovered(struct inode *inode) +{ + int ret; + struct buffer_head *di_bh = NULL; + struct ocfs2_dinode *di = NULL; + + ret = ocfs2_inode_lock(inode, &di_bh, 1); + if (ret < 0) { + mlog_errno(ret); + return 0; + } + + di = (struct ocfs2_dinode *) di_bh->b_data; + ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)); + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + + return ret; +} + +int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, + struct inode *inode) +{ + char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; + struct inode *orphan_dir_inode = NULL; + struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; + struct buffer_head *di_bh = NULL; + int status = 0; + handle_t *handle = NULL; + struct ocfs2_dinode *di = NULL; + bool orphaned = false; + +restart: + status = ocfs2_inode_lock(inode, &di_bh, 1); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + di = (struct ocfs2_dinode *) di_bh->b_data; + /* + * Another append dio crashed? + * If so, wait for recovery first. + */ + if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) { + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + wait_event_interruptible(OCFS2_I(inode)->append_dio_wq, + ocfs2_dio_orphan_recovered(inode)); + goto restart; + } + + status = ocfs2_dio_prepare_orphan_dir(osb, &orphan_dir_inode, + OCFS2_I(inode)->ip_blkno, + orphan_name, + &orphan_insert); + if (status < 0 && status != -EEXIST) { + mlog_errno(status); + goto bail_unlock_inode; + } else if (status == -EEXIST) { + mlog(ML_NOTICE, "inode %llu already added to " + "orphan dir %llu.\n", + OCFS2_I(inode)->ip_blkno, + OCFS2_I(orphan_dir_inode)->ip_blkno); + if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { + mlog_errno(status); + goto bail_unlock_orphan; + } + orphaned = true; + } + + handle = ocfs2_start_trans(osb, + OCFS2_INODE_ADD_TO_ORPHAN_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + goto bail_unlock_orphan; + } + + status = ocfs2_dio_orphan_add(osb, handle, inode, di_bh, orphan_name, + &orphan_insert, orphan_dir_inode, orphaned); + if (status) + mlog_errno(status); + + ocfs2_commit_trans(osb, handle); + +bail_unlock_orphan: + ocfs2_inode_unlock(orphan_dir_inode, 1); + mutex_unlock(&orphan_dir_inode->i_mutex); + iput(orphan_dir_inode); + + ocfs2_free_dir_lookup_result(&orphan_insert); + +bail_unlock_inode: + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + +bail: + return status; +} + +int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb, + struct inode *inode, int update_isize, + loff_t end) +{ + struct inode *orphan_dir_inode = NULL; + struct buffer_head *orphan_dir_bh = NULL; + struct buffer_head *di_bh = NULL; + struct ocfs2_dinode *di = NULL; + handle_t *handle = NULL; + int status = 0; + + status = ocfs2_inode_lock(inode, &di_bh, 1); + if (status < 0) { + mlog_errno(status); + goto bail; + } + di = (struct ocfs2_dinode *) di_bh->b_data; + + orphan_dir_inode = ocfs2_get_system_file_inode(osb, + ORPHAN_DIR_SYSTEM_INODE, + le16_to_cpu(di->i_dio_orphaned_slot)); + if (!orphan_dir_inode) { + status = -EEXIST; + mlog_errno(status); + goto bail_unlock_inode; + } + + mutex_lock(&orphan_dir_inode->i_mutex); + status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); + if (status < 0) { + mutex_unlock(&orphan_dir_inode->i_mutex); + iput(orphan_dir_inode); + mlog_errno(status); + goto bail_unlock_inode; + } + + handle = ocfs2_start_trans(osb, + OCFS2_INODE_DEL_FROM_ORPHAN_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + goto bail_unlock_orphan; + } + + BUG_ON(!(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))); + + /* Only delete entry if OCFS2_ORPHANED_FL not set, or + * there are two entries added */ + if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) || + (di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL) && + (di->i_orphaned_slot != di->i_dio_orphaned_slot))) { + status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, + inode, orphan_dir_bh); + if (status < 0) { + mlog_errno(status); + goto bail_commit; + } + } + + status = ocfs2_journal_access_di(handle, + INODE_CACHE(inode), + di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto bail_commit; + } + + di->i_flags &= ~cpu_to_le32(OCFS2_DIO_ORPHANED_FL); + di->i_dio_orphaned_slot = 0; + + if (update_isize) { + status = ocfs2_set_inode_size(handle, inode, di_bh, end); + if (status) + mlog_errno(status); + } else + ocfs2_journal_dirty(handle, di_bh); + +bail_commit: + ocfs2_commit_trans(osb, handle); + +bail_unlock_orphan: + ocfs2_inode_unlock(orphan_dir_inode, 1); + mutex_unlock(&orphan_dir_inode->i_mutex); + brelse(orphan_dir_bh); + iput(orphan_dir_inode); + +bail_unlock_inode: + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + +bail: + return status; +} + int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, struct inode *inode, struct dentry *dentry) diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h index e5d059d4f11..562554026a6 100644 --- a/fs/ocfs2/namei.h +++ b/fs/ocfs2/namei.h @@ -38,6 +38,11 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, int ocfs2_create_inode_in_orphan(struct inode *dir, int mode, struct inode **new_inode); +int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, + struct inode *inode); +int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb, + struct inode *inode, int update_isize, + loff_t end); int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, struct inode *new_inode, struct dentry *new_dentry); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7d6b7d09045..131cc1cd856 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -209,6 +209,11 @@ struct ocfs2_lock_res { #endif }; +enum ocfs2_orphan_reco_type { + ORPHAN_NO_NEED_TRUNCATE = 0, + ORPHAN_NEED_TRUNCATE, +}; + enum ocfs2_orphan_scan_state { ORPHAN_SCAN_ACTIVE, ORPHAN_SCAN_INACTIVE @@ -724,6 +729,16 @@ static inline unsigned int ocfs2_clusters_for_bytes(struct super_block *sb, return clusters; } +static inline unsigned int ocfs2_bytes_to_clusters(struct super_block *sb, + u64 bytes) +{ + int cl_bits = OCFS2_SB(sb)->s_clustersize_bits; + unsigned int clusters; + + clusters = (unsigned int)(bytes >> cl_bits); + return clusters; +} + static inline u64 ocfs2_blocks_for_bytes(struct super_block *sb, u64 bytes) { diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 938387a10d5..e933ea2e976 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -229,6 +229,7 @@ #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ #define OCFS2_QUOTA_FL (0x00001000) /* Quota file */ +#define OCFS2_DIO_ORPHANED_FL (0X00002000) /* On the orphan list especially for dio */ /* * Flags on ocfs2_dinode.i_dyn_features @@ -729,7 +730,9 @@ struct ocfs2_dinode { inode belongs to. Only valid if allocated from a discontiguous block group */ -/*A0*/ __le64 i_reserved2[3]; +/*A0*/ __le16 i_dio_orphaned_slot; /* only used for append dio write */ + __le16 i_reserved1[3]; + __le64 i_reserved2[2]; /*B8*/ union { __le64 i_pad1; /* Generic way to refer to this 64bit union */ diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 6cb019b7c6a..2d471cc879c 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -2373,6 +2373,9 @@ DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_add_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_orphan_add_end); +DEFINE_OCFS2_ULL_EVENT(ocfs2_dio_orphan_add_begin); +DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_dio_orphan_add_end); + TRACE_EVENT(ocfs2_orphan_del, TP_PROTO(unsigned long long dir, const char *name, int namelen), TP_ARGS(dir, name, namelen), diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 83723179e1e..04ee6d16cb3 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1768,6 +1768,8 @@ static void ocfs2_inode_init_once(void *data) ocfs2_lock_res_init_once(&oi->ip_inode_lockres); ocfs2_lock_res_init_once(&oi->ip_open_lockres); + init_waitqueue_head(&oi->append_dio_wq); + ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), &ocfs2_inode_caching_ops); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index aa1eee06420..d3ebf2e6185 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -12,6 +12,9 @@ #include <linux/vmstat.h> #include <linux/atomic.h> #include <linux/vmalloc.h> +#ifdef CONFIG_CMA +#include <linux/cma.h> +#endif #include <asm/page.h> #include <asm/pgtable.h> #include "internal.h" @@ -138,6 +141,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #ifdef CONFIG_TRANSPARENT_HUGEPAGE "AnonHugePages: %8lu kB\n" #endif +#ifdef CONFIG_CMA + "CmaTotal: %8lu kB\n" + "CmaFree: %8lu kB\n" +#endif , K(i.totalram), K(i.freeram), @@ -187,12 +194,16 @@ static int meminfo_proc_show(struct seq_file *m, void *v) vmi.used >> 10, vmi.largest_chunk >> 10 #ifdef CONFIG_MEMORY_FAILURE - ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) + , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE - ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * + , K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * HPAGE_PMD_NR) #endif +#ifdef CONFIG_CMA + , K(totalcma_pages) + , K(global_page_state(NR_FREE_CMA_PAGES)) +#endif ); hugetlb_report_meminfo(m); diff --git a/fs/select.c b/fs/select.c index 467bb1cb3ea..f684c750e08 100644 --- a/fs/select.c +++ b/fs/select.c @@ -971,7 +971,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, if (ret == -EINTR) { struct restart_block *restart_block; - restart_block = ¤t_thread_info()->restart_block; + restart_block = ¤t->restart_block; restart_block->fn = do_restart_poll; restart_block->poll.ufds = ufds; restart_block->poll.nfds = nfds; diff --git a/include/linux/cma.h b/include/linux/cma.h index a93438beb33..9384ba66e97 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -15,6 +15,7 @@ struct cma; +extern unsigned long totalcma_pages; extern phys_addr_t cma_get_base(struct cma *cma); extern unsigned long cma_get_size(struct cma *cma); diff --git a/include/linux/crc64_ecma.h b/include/linux/crc64_ecma.h new file mode 100644 index 00000000000..bba7a4d692b --- /dev/null +++ b/include/linux/crc64_ecma.h @@ -0,0 +1,56 @@ +/* + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __CRC64_ECMA_H_ +#define __CRC64_ECMA_H_ + +#include <linux/types.h> + + +#define CRC64_DEFAULT_INITVAL 0xFFFFFFFFFFFFFFFFULL + + +/* + * crc64_ecma_seed - Initializes the CRC64 ECMA seed. + */ +u64 crc64_ecma_seed(void); + +/* + * crc64_ecma - Computes the 64 bit ECMA CRC. + * + * @pdata: pointer to the data to compute checksum for. + * @nbytes: number of bytes in data buffer. + * @seed: CRC seed. + */ +u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed); + +#endif /* __CRC64_ECMA_H_ */ diff --git a/include/linux/fs.h b/include/linux/fs.h index f90c0282c11..86397708b7a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2481,8 +2481,12 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, - unsigned long size, pgoff_t pgoff); +static inline int generic_file_remap_pages(struct vm_area_struct *vma, + unsigned long addr, unsigned long size, pgoff_t pgoff) +{ + BUG(); + return 0; +} int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ad9051bab26..07f736b18ff 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -19,6 +19,9 @@ extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, unsigned int flags); +extern int madvise_free_huge_pmd(struct mmu_gather *tlb, + struct vm_area_struct *vma, + pmd_t *pmd, unsigned long addr); extern int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr); @@ -56,6 +59,7 @@ extern pmd_t *page_check_address_pmd(struct page *page, unsigned long address, enum page_check_address_pmd_flag flag, spinlock_t **ptl); +extern int pmd_freeable(pmd_t pmd); #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 3037fc085e8..d3d43ecf148 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -193,6 +193,9 @@ extern struct task_group root_task_group; .nr_cpus_allowed= NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ .se = { \ .group_node = LIST_HEAD_INIT(tsk.se.group_node), \ }, \ diff --git a/include/linux/input.h b/include/linux/input.h index 82ce323b998..3b4c32f7312 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -79,6 +79,7 @@ struct input_value { * @led: reflects current state of device's LEDs * @snd: reflects current state of sound effects * @sw: reflects current state of device's switches + * @leds: leds objects for the device's LEDs * @open: this method is called when the very first user calls * input_open_device(). The driver must prepare the device * to start generating events (start polling thread, @@ -164,6 +165,8 @@ struct input_dev { unsigned long snd[BITS_TO_LONGS(SND_CNT)]; unsigned long sw[BITS_TO_LONGS(SW_CNT)]; + struct led_classdev *leds; + int (*open)(struct input_dev *dev); void (*close)(struct input_dev *dev); int (*flush)(struct input_dev *dev, struct file *file); @@ -531,4 +534,29 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file); int input_ff_create_memless(struct input_dev *dev, void *data, int (*play_effect)(struct input_dev *, void *, struct ff_effect *)); +#ifdef CONFIG_INPUT_LEDS + +void input_led_init(void); +void input_led_exit(void); + +int input_led_connect(struct input_dev *dev); +void input_led_disconnect(struct input_dev *dev); + +#else + +static inline void input_led_init(void) { } + +static inline void input_led_exit(void) { } + +static inline int input_led_connect(struct input_dev *dev) +{ + return 0; +} + +static inline void input_led_disconnect(struct input_dev *dev) +{ +} + +#endif + #endif diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 9d957b7ae09..bee3c5b097d 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -258,6 +258,8 @@ unsigned long paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) #define VMCOREINFO_CONFIG(name) \ vmcoreinfo_append_str("CONFIG_%s=y\n", #name) +#define VMCOREINFO_PHYS_BASE(value) \ + vmcoreinfo_append_str("PHYS_BASE=%lx\n", (unsigned long)value) extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; diff --git a/include/linux/oom.h b/include/linux/oom.h index 853698c721f..76200984d1e 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -85,11 +85,6 @@ static inline void oom_killer_enable(void) oom_killer_disabled = false; } -static inline bool oom_gfp_allowed(gfp_t gfp_mask) -{ - return (gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY); -} - extern struct task_struct *find_lock_task_mm(struct task_struct *p); static inline bool task_will_free_mem(struct task_struct *task) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index c0c2bce6b0b..94d5bcacc83 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -75,6 +75,7 @@ enum ttu_flags { TTU_UNMAP = 1, /* unmap mode */ TTU_MIGRATION = 2, /* migration mode */ TTU_MUNLOCK = 4, /* munlock mode */ + TTU_FREE = 8, /* free mode */ TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ @@ -181,7 +182,8 @@ static inline void page_dup_rmap(struct page *page) * Called from mm/vmscan.c to handle paging out */ int page_referenced(struct page *, int is_locked, - struct mem_cgroup *memcg, unsigned long *vm_flags); + struct mem_cgroup *memcg, unsigned long *vm_flags, + int *is_pte_dirty); #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) @@ -260,9 +262,12 @@ int rmap_walk(struct page *page, struct rmap_walk_control *rwc); static inline int page_referenced(struct page *page, int is_locked, struct mem_cgroup *memcg, - unsigned long *vm_flags) + unsigned long *vm_flags, + int *is_pte_dirty) { *vm_flags = 0; + if (is_pte_dirty) + *is_pte_dirty = 0; return 0; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 8db31ef98d2..22ee0d5d7f8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1370,6 +1370,8 @@ struct task_struct { unsigned long atomic_flags; /* Flags needing atomic access. */ + struct restart_block restart_block; + pid_t pid; pid_t tgid; diff --git a/include/linux/string.h b/include/linux/string.h index 2e22a2e58f3..a0c6fd5fb5c 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -117,6 +117,7 @@ void *memchr_inv(const void *s, int c, size_t n); extern char *kstrdup(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); +extern char *kstrimdup(const char *s, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); extern char **argv_split(gfp_t gfp, const char *str, int *argcp); diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9246d32dc97..2b1cef88b82 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -25,6 +25,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), PGFREE, PGACTIVATE, PGDEACTIVATE, PGFAULT, PGMAJFAULT, + PGLAZYFREED, FOR_ALL_ZONES(PGREFILL), FOR_ALL_ZONES(PGSTEAL_KSWAPD), FOR_ALL_ZONES(PGSTEAL_DIRECT), diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index ddc3b36f104..7a94102b7a0 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -34,6 +34,7 @@ #define MADV_SEQUENTIAL 2 /* expect sequential page references */ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_DONTNEED 4 /* don't need these pages */ +#define MADV_FREE 5 /* free pages only if memory pressure */ /* common parameters: try to keep these consistent across architectures */ #define MADV_REMOVE 9 /* remove these pages & resources */ diff --git a/init/Kconfig b/init/Kconfig index 0c90b97d1fa..d68d8b0780b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1280,22 +1280,6 @@ source "usr/Kconfig" endif -config INIT_FALLBACK - bool "Fall back to defaults if init= parameter is bad" - default y - help - If enabled, the kernel will try the default init binaries if an - explicit request from the init= parameter fails. - - This can have unexpected effects. For example, booting - with init=/sbin/kiosk_app will run /sbin/init or even /bin/sh - if /sbin/kiosk_app cannot be executed. - - The default value of Y is consistent with historical behavior. - Selecting N is likely to be more appropriate for most uses, - especially on kiosks and on kernels that are intended to be - run under the control of a script. - config CC_OPTIMIZE_FOR_SIZE bool "Optimize for size" help diff --git a/init/main.c b/init/main.c index 61b993767db..cf954286596 100644 --- a/init/main.c +++ b/init/main.c @@ -966,13 +966,8 @@ static int __ref kernel_init(void *unused) ret = run_init_process(execute_command); if (!ret) return 0; -#ifndef CONFIG_INIT_FALLBACK panic("Requested init %s failed (error %d).", execute_command, ret); -#else - pr_err("Failed to execute %s (error %d). Attempting defaults...\n", - execute_command, ret); -#endif } if (!try_to_run_init_process("/sbin/init") || !try_to_run_init_process("/etc/init") || diff --git a/kernel/compat.c b/kernel/compat.c index ebb3c369d03..24f00610c57 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -276,8 +276,7 @@ COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, * core implementation decides to return random nonsense. */ if (ret == -ERESTART_RESTARTBLOCK) { - struct restart_block *restart - = ¤t_thread_info()->restart_block; + struct restart_block *restart = ¤t->restart_block; restart->fn = compat_nanosleep_restart; restart->nanosleep.compat_rmtp = rmtp; @@ -860,7 +859,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, return -EFAULT; if (err == -ERESTART_RESTARTBLOCK) { - restart = ¤t_thread_info()->restart_block; + restart = ¤t->restart_block; restart->fn = compat_clock_nanosleep_restart; restart->nanosleep.compat_rmtp = rmtp; } diff --git a/kernel/exit.c b/kernel/exit.c index 1ea4369890a..6806c55475e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1287,9 +1287,15 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) static int wait_consider_task(struct wait_opts *wo, int ptrace, struct task_struct *p) { + /* + * We can race with wait_task_zombie() from another thread. + * Ensure that EXIT_ZOMBIE -> EXIT_DEAD/EXIT_TRACE transition + * can't confuse the checks below. + */ + int exit_state = ACCESS_ONCE(p->exit_state); int ret; - if (unlikely(p->exit_state == EXIT_DEAD)) + if (unlikely(exit_state == EXIT_DEAD)) return 0; ret = eligible_child(wo, p); @@ -1310,7 +1316,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, return 0; } - if (unlikely(p->exit_state == EXIT_TRACE)) { + if (unlikely(exit_state == EXIT_TRACE)) { /* * ptrace == 0 means we are the natural parent. In this case * we should clear notask_error, debugger will notify us. @@ -1337,7 +1343,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, } /* slay zombie? */ - if (p->exit_state == EXIT_ZOMBIE) { + if (exit_state == EXIT_ZOMBIE) { /* we don't reap group leaders with subthreads */ if (!delay_group_leader(p)) { /* diff --git a/kernel/futex.c b/kernel/futex.c index 63678b573d6..f4d8a85641e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2217,7 +2217,7 @@ retry: if (!abs_time) goto out; - restart = ¤t_thread_info()->restart_block; + restart = ¤t->restart_block; restart->fn = futex_wait_restart; restart->futex.uaddr = uaddr; restart->futex.val = val; diff --git a/kernel/signal.c b/kernel/signal.c index 16a30529525..33a52759cc0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2501,7 +2501,7 @@ EXPORT_SYMBOL(unblock_all_signals); */ SYSCALL_DEFINE0(restart_syscall) { - struct restart_block *restart = ¤t_thread_info()->restart_block; + struct restart_block *restart = ¤t->restart_block; return restart->fn(restart); } diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index a7077d3ae52..1b001ed1edb 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -788,7 +788,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, goto out; } - restart = ¤t_thread_info()->restart_block; + restart = ¤t->restart_block; restart->fn = alarm_timer_nsleep_restart; restart->nanosleep.clockid = type; restart->nanosleep.expires = exp.tv64; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 37e50aadd47..dbcec65d08c 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1591,7 +1591,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, goto out; } - restart = ¤t_thread_info()->restart_block; + restart = ¤t->restart_block; restart->fn = hrtimer_nanosleep_restart; restart->nanosleep.clockid = t.timer.base->clockid; restart->nanosleep.rmtp = rmtp; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index a16b67859e2..0075da74abf 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1334,8 +1334,7 @@ static long posix_cpu_nsleep_restart(struct restart_block *restart_block); static int posix_cpu_nsleep(const clockid_t which_clock, int flags, struct timespec *rqtp, struct timespec __user *rmtp) { - struct restart_block *restart_block = - ¤t_thread_info()->restart_block; + struct restart_block *restart_block = ¤t->restart_block; struct itimerspec it; int error; diff --git a/lib/Kconfig b/lib/Kconfig index 54cf309a92a..2faf7b2de5b 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -180,6 +180,13 @@ config CRC8 when they need to do cyclic redundancy check according CRC8 algorithm. Module will be called crc8. +config CRC64_ECMA + tristate "CRC64 ECMA function" + help + This option provides CRC64 ECMA function. Drivers may select this + when they need to do cyclic redundancy check according to the CRC64 + ECMA algorithm. + config AUDIT_GENERIC bool depends on AUDIT && !AUDIT_ARCH diff --git a/lib/Makefile b/lib/Makefile index 3c3b30b9e02..f42838ac6f3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -72,6 +72,7 @@ obj-$(CONFIG_CRC32) += crc32.o obj-$(CONFIG_CRC7) += crc7.o obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_CRC8) += crc8.o +obj-$(CONFIG_CRC64_ECMA) += crc64_ecma.o obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ diff --git a/lib/crc64_ecma.c b/lib/crc64_ecma.c new file mode 100644 index 00000000000..41629ea5a60 --- /dev/null +++ b/lib/crc64_ecma.c @@ -0,0 +1,341 @@ +/* + * Copyright 2013 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <linux/crc64_ecma.h> + + +#define CRC64_BYTE_MASK 0xFF +#define CRC64_TABLE_SIZE 256 + + +struct crc64_table { + u64 seed; + u64 table[CRC64_TABLE_SIZE]; +}; + + +static struct crc64_table CRC64_ECMA_182 = { + CRC64_DEFAULT_INITVAL, + { + 0x0000000000000000ULL, + 0xb32e4cbe03a75f6fULL, + 0xf4843657a840a05bULL, + 0x47aa7ae9abe7ff34ULL, + 0x7bd0c384ff8f5e33ULL, + 0xc8fe8f3afc28015cULL, + 0x8f54f5d357cffe68ULL, + 0x3c7ab96d5468a107ULL, + 0xf7a18709ff1ebc66ULL, + 0x448fcbb7fcb9e309ULL, + 0x0325b15e575e1c3dULL, + 0xb00bfde054f94352ULL, + 0x8c71448d0091e255ULL, + 0x3f5f08330336bd3aULL, + 0x78f572daa8d1420eULL, + 0xcbdb3e64ab761d61ULL, + 0x7d9ba13851336649ULL, + 0xceb5ed8652943926ULL, + 0x891f976ff973c612ULL, + 0x3a31dbd1fad4997dULL, + 0x064b62bcaebc387aULL, + 0xb5652e02ad1b6715ULL, + 0xf2cf54eb06fc9821ULL, + 0x41e11855055bc74eULL, + 0x8a3a2631ae2dda2fULL, + 0x39146a8fad8a8540ULL, + 0x7ebe1066066d7a74ULL, + 0xcd905cd805ca251bULL, + 0xf1eae5b551a2841cULL, + 0x42c4a90b5205db73ULL, + 0x056ed3e2f9e22447ULL, + 0xb6409f5cfa457b28ULL, + 0xfb374270a266cc92ULL, + 0x48190ecea1c193fdULL, + 0x0fb374270a266cc9ULL, + 0xbc9d3899098133a6ULL, + 0x80e781f45de992a1ULL, + 0x33c9cd4a5e4ecdceULL, + 0x7463b7a3f5a932faULL, + 0xc74dfb1df60e6d95ULL, + 0x0c96c5795d7870f4ULL, + 0xbfb889c75edf2f9bULL, + 0xf812f32ef538d0afULL, + 0x4b3cbf90f69f8fc0ULL, + 0x774606fda2f72ec7ULL, + 0xc4684a43a15071a8ULL, + 0x83c230aa0ab78e9cULL, + 0x30ec7c140910d1f3ULL, + 0x86ace348f355aadbULL, + 0x3582aff6f0f2f5b4ULL, + 0x7228d51f5b150a80ULL, + 0xc10699a158b255efULL, + 0xfd7c20cc0cdaf4e8ULL, + 0x4e526c720f7dab87ULL, + 0x09f8169ba49a54b3ULL, + 0xbad65a25a73d0bdcULL, + 0x710d64410c4b16bdULL, + 0xc22328ff0fec49d2ULL, + 0x85895216a40bb6e6ULL, + 0x36a71ea8a7ace989ULL, + 0x0adda7c5f3c4488eULL, + 0xb9f3eb7bf06317e1ULL, + 0xfe5991925b84e8d5ULL, + 0x4d77dd2c5823b7baULL, + 0x64b62bcaebc387a1ULL, + 0xd7986774e864d8ceULL, + 0x90321d9d438327faULL, + 0x231c512340247895ULL, + 0x1f66e84e144cd992ULL, + 0xac48a4f017eb86fdULL, + 0xebe2de19bc0c79c9ULL, + 0x58cc92a7bfab26a6ULL, + 0x9317acc314dd3bc7ULL, + 0x2039e07d177a64a8ULL, + 0x67939a94bc9d9b9cULL, + 0xd4bdd62abf3ac4f3ULL, + 0xe8c76f47eb5265f4ULL, + 0x5be923f9e8f53a9bULL, + 0x1c4359104312c5afULL, + 0xaf6d15ae40b59ac0ULL, + 0x192d8af2baf0e1e8ULL, + 0xaa03c64cb957be87ULL, + 0xeda9bca512b041b3ULL, + 0x5e87f01b11171edcULL, + 0x62fd4976457fbfdbULL, + 0xd1d305c846d8e0b4ULL, + 0x96797f21ed3f1f80ULL, + 0x2557339fee9840efULL, + 0xee8c0dfb45ee5d8eULL, + 0x5da24145464902e1ULL, + 0x1a083bacedaefdd5ULL, + 0xa9267712ee09a2baULL, + 0x955cce7fba6103bdULL, + 0x267282c1b9c65cd2ULL, + 0x61d8f8281221a3e6ULL, + 0xd2f6b4961186fc89ULL, + 0x9f8169ba49a54b33ULL, + 0x2caf25044a02145cULL, + 0x6b055fede1e5eb68ULL, + 0xd82b1353e242b407ULL, + 0xe451aa3eb62a1500ULL, + 0x577fe680b58d4a6fULL, + 0x10d59c691e6ab55bULL, + 0xa3fbd0d71dcdea34ULL, + 0x6820eeb3b6bbf755ULL, + 0xdb0ea20db51ca83aULL, + 0x9ca4d8e41efb570eULL, + 0x2f8a945a1d5c0861ULL, + 0x13f02d374934a966ULL, + 0xa0de61894a93f609ULL, + 0xe7741b60e174093dULL, + 0x545a57dee2d35652ULL, + 0xe21ac88218962d7aULL, + 0x5134843c1b317215ULL, + 0x169efed5b0d68d21ULL, + 0xa5b0b26bb371d24eULL, + 0x99ca0b06e7197349ULL, + 0x2ae447b8e4be2c26ULL, + 0x6d4e3d514f59d312ULL, + 0xde6071ef4cfe8c7dULL, + 0x15bb4f8be788911cULL, + 0xa6950335e42fce73ULL, + 0xe13f79dc4fc83147ULL, + 0x521135624c6f6e28ULL, + 0x6e6b8c0f1807cf2fULL, + 0xdd45c0b11ba09040ULL, + 0x9aefba58b0476f74ULL, + 0x29c1f6e6b3e0301bULL, + 0xc96c5795d7870f42ULL, + 0x7a421b2bd420502dULL, + 0x3de861c27fc7af19ULL, + 0x8ec62d7c7c60f076ULL, + 0xb2bc941128085171ULL, + 0x0192d8af2baf0e1eULL, + 0x4638a2468048f12aULL, + 0xf516eef883efae45ULL, + 0x3ecdd09c2899b324ULL, + 0x8de39c222b3eec4bULL, + 0xca49e6cb80d9137fULL, + 0x7967aa75837e4c10ULL, + 0x451d1318d716ed17ULL, + 0xf6335fa6d4b1b278ULL, + 0xb199254f7f564d4cULL, + 0x02b769f17cf11223ULL, + 0xb4f7f6ad86b4690bULL, + 0x07d9ba1385133664ULL, + 0x4073c0fa2ef4c950ULL, + 0xf35d8c442d53963fULL, + 0xcf273529793b3738ULL, + 0x7c0979977a9c6857ULL, + 0x3ba3037ed17b9763ULL, + 0x888d4fc0d2dcc80cULL, + 0x435671a479aad56dULL, + 0xf0783d1a7a0d8a02ULL, + 0xb7d247f3d1ea7536ULL, + 0x04fc0b4dd24d2a59ULL, + 0x3886b22086258b5eULL, + 0x8ba8fe9e8582d431ULL, + 0xcc0284772e652b05ULL, + 0x7f2cc8c92dc2746aULL, + 0x325b15e575e1c3d0ULL, + 0x8175595b76469cbfULL, + 0xc6df23b2dda1638bULL, + 0x75f16f0cde063ce4ULL, + 0x498bd6618a6e9de3ULL, + 0xfaa59adf89c9c28cULL, + 0xbd0fe036222e3db8ULL, + 0x0e21ac88218962d7ULL, + 0xc5fa92ec8aff7fb6ULL, + 0x76d4de52895820d9ULL, + 0x317ea4bb22bfdfedULL, + 0x8250e80521188082ULL, + 0xbe2a516875702185ULL, + 0x0d041dd676d77eeaULL, + 0x4aae673fdd3081deULL, + 0xf9802b81de97deb1ULL, + 0x4fc0b4dd24d2a599ULL, + 0xfceef8632775faf6ULL, + 0xbb44828a8c9205c2ULL, + 0x086ace348f355aadULL, + 0x34107759db5dfbaaULL, + 0x873e3be7d8faa4c5ULL, + 0xc094410e731d5bf1ULL, + 0x73ba0db070ba049eULL, + 0xb86133d4dbcc19ffULL, + 0x0b4f7f6ad86b4690ULL, + 0x4ce50583738cb9a4ULL, + 0xffcb493d702be6cbULL, + 0xc3b1f050244347ccULL, + 0x709fbcee27e418a3ULL, + 0x3735c6078c03e797ULL, + 0x841b8ab98fa4b8f8ULL, + 0xadda7c5f3c4488e3ULL, + 0x1ef430e13fe3d78cULL, + 0x595e4a08940428b8ULL, + 0xea7006b697a377d7ULL, + 0xd60abfdbc3cbd6d0ULL, + 0x6524f365c06c89bfULL, + 0x228e898c6b8b768bULL, + 0x91a0c532682c29e4ULL, + 0x5a7bfb56c35a3485ULL, + 0xe955b7e8c0fd6beaULL, + 0xaeffcd016b1a94deULL, + 0x1dd181bf68bdcbb1ULL, + 0x21ab38d23cd56ab6ULL, + 0x9285746c3f7235d9ULL, + 0xd52f0e859495caedULL, + 0x6601423b97329582ULL, + 0xd041dd676d77eeaaULL, + 0x636f91d96ed0b1c5ULL, + 0x24c5eb30c5374ef1ULL, + 0x97eba78ec690119eULL, + 0xab911ee392f8b099ULL, + 0x18bf525d915feff6ULL, + 0x5f1528b43ab810c2ULL, + 0xec3b640a391f4fadULL, + 0x27e05a6e926952ccULL, + 0x94ce16d091ce0da3ULL, + 0xd3646c393a29f297ULL, + 0x604a2087398eadf8ULL, + 0x5c3099ea6de60cffULL, + 0xef1ed5546e415390ULL, + 0xa8b4afbdc5a6aca4ULL, + 0x1b9ae303c601f3cbULL, + 0x56ed3e2f9e224471ULL, + 0xe5c372919d851b1eULL, + 0xa26908783662e42aULL, + 0x114744c635c5bb45ULL, + 0x2d3dfdab61ad1a42ULL, + 0x9e13b115620a452dULL, + 0xd9b9cbfcc9edba19ULL, + 0x6a978742ca4ae576ULL, + 0xa14cb926613cf817ULL, + 0x1262f598629ba778ULL, + 0x55c88f71c97c584cULL, + 0xe6e6c3cfcadb0723ULL, + 0xda9c7aa29eb3a624ULL, + 0x69b2361c9d14f94bULL, + 0x2e184cf536f3067fULL, + 0x9d36004b35545910ULL, + 0x2b769f17cf112238ULL, + 0x9858d3a9ccb67d57ULL, + 0xdff2a94067518263ULL, + 0x6cdce5fe64f6dd0cULL, + 0x50a65c93309e7c0bULL, + 0xe388102d33392364ULL, + 0xa4226ac498dedc50ULL, + 0x170c267a9b79833fULL, + 0xdcd7181e300f9e5eULL, + 0x6ff954a033a8c131ULL, + 0x28532e49984f3e05ULL, + 0x9b7d62f79be8616aULL, + 0xa707db9acf80c06dULL, + 0x14299724cc279f02ULL, + 0x5383edcd67c06036ULL, + 0xe0ada17364673f59ULL + } +}; + + +/* + * crc64_ecma_seed - Initializes the CRC64 ECMA seed. + */ +u64 crc64_ecma_seed(void) +{ + return CRC64_ECMA_182.seed; +} +EXPORT_SYMBOL(crc64_ecma_seed); + +/* + * crc64_ecma - Computes the 64 bit ECMA CRC. + * + * pdata: pointer to the data to compute checksum for. + * nbytes: number of bytes in data buffer. + * seed: CRC seed. + */ +u64 crc64_ecma(u8 const *pdata, u32 nbytes, u64 seed) +{ + unsigned int i; + u64 crc = seed; + + for (i = 0; i < nbytes; i++) + crc = CRC64_ECMA_182.table[(crc ^ pdata[i]) & CRC64_BYTE_MASK] ^ + (crc >> 8); + + return crc; +} +EXPORT_SYMBOL(crc64_ecma); + +MODULE_DESCRIPTION("CRC64 ECMA function"); +MODULE_AUTHOR("Freescale Semiconductor Inc."); +MODULE_LICENSE("GPL"); diff --git a/lib/show_mem.c b/lib/show_mem.c index 5e256271b47..7de89f4a36c 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -8,6 +8,7 @@ #include <linux/mm.h> #include <linux/nmi.h> #include <linux/quicklist.h> +#include <linux/cma.h> void show_mem(unsigned int filter) { @@ -38,7 +39,12 @@ void show_mem(unsigned int filter) printk("%lu pages RAM\n", total); printk("%lu pages HighMem/MovableOnly\n", highmem); +#ifdef CONFIG_CMA + printk("%lu pages reserved\n", (reserved - totalcma_pages)); + printk("%lu pages cma reserved\n", totalcma_pages); +#else printk("%lu pages reserved\n", reserved); +#endif #ifdef CONFIG_QUICKLIST printk("%lu pages in pagetable cache\n", quicklist_total_size()); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index ec337f64f52..d57551c031f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1240,6 +1240,21 @@ char *address_val(char *buf, char *end, const void *addr, return number(buf, end, num, spec); } +static noinline_for_stack +char *comm_name(char *buf, char *end, struct task_struct *tsk, + struct printf_spec spec, const char *fmt) +{ + char name[TASK_COMM_LEN]; + + /* Caller can pass NULL instead of current. */ + if (!tsk) + tsk = current; + /* Not using get_task_comm() in case I'm in IRQ context. */ + memcpy(name, tsk->comm, TASK_COMM_LEN); + name[sizeof(name) - 1] = '\0'; + return string(buf, end, name, spec); +} + int kptr_restrict __read_mostly; /* @@ -1318,6 +1333,7 @@ int kptr_restrict __read_mostly; * (default assumed to be phys_addr_t, passed by reference) * - 'd[234]' For a dentry name (optionally 2-4 last components) * - 'D[234]' Same as 'd' but for a struct file + * - 'T' task_struct->comm * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1329,7 +1345,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, { int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0); - if (!ptr && *fmt != 'K') { + if (!ptr && *fmt != 'K' && *fmt != 'T') { /* * Print (null) with the same width as a pointer so it makes * tabular output look nice. @@ -1459,6 +1475,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return dentry_name(buf, end, ((const struct file *)ptr)->f_path.dentry, spec, fmt); + case 'T': + return comm_name(buf, end, ptr, spec, fmt); } spec.flags |= SMALL; if (spec.field_width == -1) { diff --git a/mm/Makefile b/mm/Makefile index 4bf586e6637..3548460ab7b 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -3,7 +3,7 @@ # mmu-y := nommu.o -mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o memory.o mincore.o \ +mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ vmalloc.o pagewalk.o pgtable-generic.o @@ -337,6 +337,7 @@ int __init cma_declare_contiguous(phys_addr_t base, if (ret) goto err; + totalcma_pages += (size / PAGE_SIZE); pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M, &base); return 0; diff --git a/mm/fremap.c b/mm/fremap.c deleted file mode 100644 index 2805d71cf47..00000000000 --- a/mm/fremap.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * linux/mm/fremap.c - * - * Explicit pagetable population and nonlinear (random) mappings support. - * - * started by Ingo Molnar, Copyright (C) 2002, 2003 - */ -#include <linux/export.h> -#include <linux/backing-dev.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/file.h> -#include <linux/mman.h> -#include <linux/pagemap.h> -#include <linux/swapops.h> -#include <linux/rmap.h> -#include <linux/syscalls.h> -#include <linux/mmu_notifier.h> - -#include <asm/mmu_context.h> -#include <asm/cacheflush.h> -#include <asm/tlbflush.h> - -#include "internal.h" - -static int mm_counter(struct page *page) -{ - return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES; -} - -static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = *ptep; - struct page *page; - swp_entry_t entry; - - if (pte_present(pte)) { - flush_cache_page(vma, addr, pte_pfn(pte)); - pte = ptep_clear_flush_notify(vma, addr, ptep); - page = vm_normal_page(vma, addr, pte); - if (page) { - if (pte_dirty(pte)) - set_page_dirty(page); - update_hiwater_rss(mm); - dec_mm_counter(mm, mm_counter(page)); - page_remove_rmap(page); - page_cache_release(page); - } - } else { /* zap_pte() is not called when pte_none() */ - if (!pte_file(pte)) { - update_hiwater_rss(mm); - entry = pte_to_swp_entry(pte); - if (non_swap_entry(entry)) { - if (is_migration_entry(entry)) { - page = migration_entry_to_page(entry); - dec_mm_counter(mm, mm_counter(page)); - } - } else { - free_swap_and_cache(entry); - dec_mm_counter(mm, MM_SWAPENTS); - } - } - pte_clear_not_present_full(mm, addr, ptep, 0); - } -} - -/* - * Install a file pte to a given virtual memory address, release any - * previously existing mapping. - */ -static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long addr, unsigned long pgoff, pgprot_t prot) -{ - int err = -ENOMEM; - pte_t *pte, ptfile; - spinlock_t *ptl; - - pte = get_locked_pte(mm, addr, &ptl); - if (!pte) - goto out; - - ptfile = pgoff_to_pte(pgoff); - - if (!pte_none(*pte)) - zap_pte(mm, vma, addr, pte); - - set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile)); - /* - * We don't need to run update_mmu_cache() here because the "file pte" - * being installed by install_file_pte() is not a real pte - it's a - * non-present entry (like a swap entry), noting what file offset should - * be mapped there when there's a fault (in a non-linear vma where - * that's not obvious). - */ - pte_unmap_unlock(pte, ptl); - err = 0; -out: - return err; -} - -int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, - unsigned long size, pgoff_t pgoff) -{ - struct mm_struct *mm = vma->vm_mm; - int err; - - do { - err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); - if (err) - return err; - - size -= PAGE_SIZE; - addr += PAGE_SIZE; - pgoff++; - } while (size); - - return 0; -} -EXPORT_SYMBOL(generic_file_remap_pages); - -/** - * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma - * @start: start of the remapped virtual memory range - * @size: size of the remapped virtual memory range - * @prot: new protection bits of the range (see NOTE) - * @pgoff: to-be-mapped page of the backing store file - * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO. - * - * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma - * (shared backing store file). - * - * This syscall works purely via pagetables, so it's the most efficient - * way to map the same (large) file into a given virtual window. Unlike - * mmap()/mremap() it does not create any new vmas. The new mappings are - * also safe across swapout. - * - * NOTE: the @prot parameter right now is ignored (but must be zero), - * and the vma's default protection is used. Arbitrary protections - * might be implemented in the future. - */ -SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, - unsigned long, prot, unsigned long, pgoff, unsigned long, flags) -{ - struct mm_struct *mm = current->mm; - struct address_space *mapping; - struct vm_area_struct *vma; - int err = -EINVAL; - int has_write_lock = 0; - vm_flags_t vm_flags = 0; - - pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. " - "See Documentation/vm/remap_file_pages.txt.\n", - current->comm, current->pid); - - if (prot) - return err; - /* - * Sanitize the syscall parameters: - */ - start = start & PAGE_MASK; - size = size & PAGE_MASK; - - /* Does the address range wrap, or is the span zero-sized? */ - if (start + size <= start) - return err; - - /* Does pgoff wrap? */ - if (pgoff + (size >> PAGE_SHIFT) < pgoff) - return err; - - /* Can we represent this offset inside this architecture's pte's? */ -#if PTE_FILE_MAX_BITS < BITS_PER_LONG - if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS)) - return err; -#endif - - /* We need down_write() to change vma->vm_flags. */ - down_read(&mm->mmap_sem); - retry: - vma = find_vma(mm, start); - - /* - * Make sure the vma is shared, that it supports prefaulting, - * and that the remapped range is valid and fully within - * the single existing vma. - */ - if (!vma || !(vma->vm_flags & VM_SHARED)) - goto out; - - if (!vma->vm_ops || !vma->vm_ops->remap_pages) - goto out; - - if (start < vma->vm_start || start + size > vma->vm_end) - goto out; - - /* Must set VM_NONLINEAR before any pages are populated. */ - if (!(vma->vm_flags & VM_NONLINEAR)) { - /* - * vm_private_data is used as a swapout cursor - * in a VM_NONLINEAR vma. - */ - if (vma->vm_private_data) - goto out; - - /* Don't need a nonlinear mapping, exit success */ - if (pgoff == linear_page_index(vma, start)) { - err = 0; - goto out; - } - - if (!has_write_lock) { -get_write_lock: - up_read(&mm->mmap_sem); - down_write(&mm->mmap_sem); - has_write_lock = 1; - goto retry; - } - mapping = vma->vm_file->f_mapping; - /* - * page_mkclean doesn't work on nonlinear vmas, so if - * dirty pages need to be accounted, emulate with linear - * vmas. - */ - if (mapping_cap_account_dirty(mapping)) { - unsigned long addr; - struct file *file = get_file(vma->vm_file); - /* mmap_region may free vma; grab the info now */ - vm_flags = vma->vm_flags; - - addr = mmap_region(file, start, size, vm_flags, pgoff); - fput(file); - if (IS_ERR_VALUE(addr)) { - err = addr; - } else { - BUG_ON(addr != start); - err = 0; - } - goto out_freed; - } - i_mmap_lock_write(mapping); - flush_dcache_mmap_lock(mapping); - vma->vm_flags |= VM_NONLINEAR; - vma_interval_tree_remove(vma, &mapping->i_mmap); - vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); - flush_dcache_mmap_unlock(mapping); - i_mmap_unlock_write(mapping); - } - - if (vma->vm_flags & VM_LOCKED) { - /* - * drop PG_Mlocked flag for over-mapped range - */ - if (!has_write_lock) - goto get_write_lock; - vm_flags = vma->vm_flags; - munlock_vma_pages_range(vma, start, start + size); - vma->vm_flags = vm_flags; - } - - mmu_notifier_invalidate_range_start(mm, start, start + size); - err = vma->vm_ops->remap_pages(vma, start, size, pgoff); - mmu_notifier_invalidate_range_end(mm, start, start + size); - - /* - * We can't clear VM_NONLINEAR because we'd have to do - * it after ->populate completes, and that would prevent - * downgrading the lock. (Locks can't be upgraded). - */ - -out: - if (vma) - vm_flags = vma->vm_flags; -out_freed: - if (likely(!has_write_lock)) - up_read(&mm->mmap_sem); - else - up_write(&mm->mmap_sem); - if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK))) - mm_populate(start, size); - - return err; -} diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 817a875f2b8..cf3b67bdf86 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1383,6 +1383,36 @@ out: return 0; } +int madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, + pmd_t *pmd, unsigned long addr) + +{ + spinlock_t *ptl; + struct mm_struct *mm = tlb->mm; + int ret = 1; + + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + struct page *page; + pmd_t orig_pmd; + + orig_pmd = pmdp_get_and_clear(mm, addr, pmd); + + /* No hugepage in swapcache */ + page = pmd_page(orig_pmd); + VM_BUG_ON_PAGE(PageSwapCache(page), page); + + orig_pmd = pmd_mkold(orig_pmd); + orig_pmd = pmd_mkclean(orig_pmd); + + set_pmd_at(mm, addr, pmd, orig_pmd); + tlb_remove_pmd_tlb_entry(tlb, pmd, addr); + spin_unlock(ptl); + ret = 0; + } + + return ret; +} + int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr) { @@ -1620,6 +1650,11 @@ unlock: return NULL; } +int pmd_freeable(pmd_t pmd) +{ + return !pmd_dirty(pmd); +} + static int __split_huge_page_splitting(struct page *page, struct vm_area_struct *vma, unsigned long address) diff --git a/mm/madvise.c b/mm/madvise.c index a271adc9328..6fc9b8298da 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -19,6 +19,14 @@ #include <linux/blkdev.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/mmu_notifier.h> + +#include <asm/tlb.h> + +struct madvise_free_private { + struct vm_area_struct *vma; + struct mmu_gather *tlb; +}; /* * Any behaviour which results in changes to the vma->vm_flags needs to @@ -31,6 +39,7 @@ static int madvise_need_mmap_write(int behavior) case MADV_REMOVE: case MADV_WILLNEED: case MADV_DONTNEED: + case MADV_FREE: return 0; default: /* be safe, default to 1. list exceptions explicitly */ @@ -251,6 +260,138 @@ static long madvise_willneed(struct vm_area_struct *vma, return 0; } +static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) + +{ + struct madvise_free_private *fp = walk->private; + struct mmu_gather *tlb = fp->tlb; + struct mm_struct *mm = tlb->mm; + struct vm_area_struct *vma = fp->vma; + spinlock_t *ptl; + pte_t *pte, ptent; + struct page *page; + unsigned long next; + + next = pmd_addr_end(addr, end); + if (pmd_trans_huge(*pmd)) { + if (next - addr != HPAGE_PMD_SIZE) + split_huge_page_pmd(vma, addr, pmd); + else if (!madvise_free_huge_pmd(tlb, vma, pmd, addr)) + goto next; + /* fall through */ + } + + if (pmd_trans_unstable(pmd)) + return 0; + + pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + arch_enter_lazy_mmu_mode(); + for (; addr != end; pte++, addr += PAGE_SIZE) { + ptent = *pte; + + if (!pte_present(ptent)) + continue; + + page = vm_normal_page(vma, addr, ptent); + if (!page) + continue; + + if (PageSwapCache(page)) { + if (!trylock_page(page)) + continue; + + if (!try_to_free_swap(page)) { + unlock_page(page); + continue; + } + + ClearPageDirty(page); + unlock_page(page); + } + + /* + * Some of architecture(ex, PPC) don't update TLB + * with set_pte_at and tlb_remove_tlb_entry so for + * the portability, remap the pte with old|clean + * after pte clearing. + */ + ptent = ptep_get_and_clear_full(mm, addr, pte, + tlb->fullmm); + ptent = pte_mkold(ptent); + ptent = pte_mkclean(ptent); + set_pte_at(mm, addr, pte, ptent); + tlb_remove_tlb_entry(tlb, pte, addr); + } + arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(pte - 1, ptl); +next: + cond_resched(); + return 0; +} + +static void madvise_free_page_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + struct madvise_free_private fp = { + .vma = vma, + .tlb = tlb, + }; + + struct mm_walk free_walk = { + .pmd_entry = madvise_free_pte_range, + .mm = vma->vm_mm, + .private = &fp, + }; + + BUG_ON(addr >= end); + tlb_start_vma(tlb, vma); + walk_page_range(addr, end, &free_walk); + tlb_end_vma(tlb, vma); +} + +static int madvise_free_single_vma(struct vm_area_struct *vma, + unsigned long start_addr, unsigned long end_addr) +{ + unsigned long start, end; + struct mm_struct *mm = vma->vm_mm; + struct mmu_gather tlb; + + if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP)) + return -EINVAL; + + /* MADV_FREE works for only anon vma at the moment */ + if (vma->vm_file) + return -EINVAL; + + start = max(vma->vm_start, start_addr); + if (start >= vma->vm_end) + return -EINVAL; + end = min(vma->vm_end, end_addr); + if (end <= vma->vm_start) + return -EINVAL; + + lru_add_drain(); + tlb_gather_mmu(&tlb, mm, start, end); + update_hiwater_rss(mm); + + mmu_notifier_invalidate_range_start(mm, start, end); + madvise_free_page_range(&tlb, vma, start, end); + mmu_notifier_invalidate_range_end(mm, start, end); + tlb_finish_mmu(&tlb, start, end); + + return 0; +} + +static long madvise_free(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end) +{ + *prev = vma; + return madvise_free_single_vma(vma, start, end); +} + /* * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about @@ -381,6 +522,14 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, return madvise_remove(vma, prev, start, end); case MADV_WILLNEED: return madvise_willneed(vma, prev, start, end); + case MADV_FREE: + /* + * XXX: In this implementation, MADV_FREE works like + * MADV_DONTNEED on swapless system or full swap. + */ + if (get_nr_swap_pages() > 0) + return madvise_free(vma, prev, start, end); + /* passthrough */ case MADV_DONTNEED: return madvise_dontneed(vma, prev, start, end); default: @@ -400,6 +549,7 @@ madvise_behavior_valid(int behavior) case MADV_REMOVE: case MADV_WILLNEED: case MADV_DONTNEED: + case MADV_FREE: #ifdef CONFIG_KSM case MADV_MERGEABLE: case MADV_UNMERGEABLE: diff --git a/mm/memory.c b/mm/memory.c index ee5cfc18e22..33f7370cc09 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2966,6 +2966,8 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, int dirtied = 0; int ret, tmp; + WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem)); + ret = __do_fault(vma, address, pgoff, flags, &fault_page); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; @@ -2996,6 +2998,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (set_page_dirty(fault_page)) dirtied = 1; + /* + * Take a local copy of the address_space - page.mapping may be zeroed + * by truncate after unlock_page(). The address_space itself remains + * pinned by vma->vm_file's reference. We rely on unlock_page()'s + * release semantics to prevent the compiler from undoing this copying. + */ mapping = fault_page->mapping; unlock_page(fault_page); if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) { diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9fab10795be..b82b61e94bb 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1331,7 +1331,7 @@ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) } /* - * Confirm all pages in a range [start, end) is belongs to the same zone. + * Confirm all pages in a range [start, end) belong to the same zone. */ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) { @@ -1342,10 +1342,11 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) for (pfn = start_pfn; pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES) { - i = 0; - /* This is just a CONFIG_HOLES_IN_ZONE check.*/ - while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i)) - i++; + /* Find the first valid pfn in this pageblock */ + for (i = 0; i < MAX_ORDER_NR_PAGES; i++) { + if (pfn_valid(pfn + i)) + break; + } if (i == MAX_ORDER_NR_PAGES) continue; page = pfn_to_page(pfn + i); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c1b273f1837..0e0961b8c39 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -162,12 +162,6 @@ static const struct mempolicy_operations { enum mpol_rebind_step step); } mpol_ops[MPOL_MAX]; -/* Check that the nodemask contains at least one populated zone */ -static int is_valid_nodemask(const nodemask_t *nodemask) -{ - return nodes_intersects(*nodemask, node_states[N_MEMORY]); -} - static inline int mpol_store_user_nodemask(const struct mempolicy *pol) { return pol->flags & MPOL_MODE_FLAGS; @@ -202,7 +196,7 @@ static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) { - if (!is_valid_nodemask(nodes)) + if (nodes_empty(*nodes)) return -EINVAL; pol->v.nodes = *nodes; return 0; @@ -234,7 +228,7 @@ static int mpol_set_nodemask(struct mempolicy *pol, nodes = NULL; /* explicit local allocation */ else { if (pol->flags & MPOL_F_RELATIVE_NODES) - mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1); + mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1); else nodes_and(nsc->mask2, *nodes, nsc->mask1); diff --git a/mm/mmap.c b/mm/mmap.c index 7b36aa7cc89..e996cfd802b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2629,6 +2629,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) return vm_munmap(addr, len); } + +/* + * Emulation of deprecated remap_file_pages() syscall. + */ +SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, + unsigned long, prot, unsigned long, pgoff, unsigned long, flags) +{ + + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long populate = 0; + unsigned long ret = -EINVAL; + struct file *file; + + pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. " + "See Documentation/vm/remap_file_pages.txt.\n", + current->comm, current->pid); + + if (prot) + return ret; + start = start & PAGE_MASK; + size = size & PAGE_MASK; + + if (start + size <= start) + return ret; + + /* Does pgoff wrap? */ + if (pgoff + (size >> PAGE_SHIFT) < pgoff) + return ret; + + down_write(&mm->mmap_sem); + vma = find_vma(mm, start); + + if (!vma || !(vma->vm_flags & VM_SHARED)) + goto out; + + if (start < vma->vm_start || start + size > vma->vm_end) + goto out; + + if (pgoff == linear_page_index(vma, start)) { + ret = 0; + goto out; + } + + prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; + prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; + prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; + + flags &= MAP_NONBLOCK; + flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; + if (vma->vm_flags & VM_LOCKED) { + flags |= MAP_LOCKED; + /* drop PG_Mlocked flag for over-mapped range */ + munlock_vma_pages_range(vma, start, start + size); + } + + file = get_file(vma->vm_file); + ret = do_mmap_pgoff(vma->vm_file, start, size, + prot, flags, pgoff, &populate); + fput(file); +out: + up_write(&mm->mmap_sem); + if (populate) + mm_populate(ret, populate); + if (!IS_ERR_VALUE(ret)) + ret = 0; + return ret; +} + static inline void verify_mm_writelocked(struct mm_struct *mm) { #ifdef CONFIG_DEBUG_VM diff --git a/mm/nommu.c b/mm/nommu.c index b51eadf6d95..099cc72aa39 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1983,14 +1983,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) } EXPORT_SYMBOL(filemap_map_pages); -int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr, - unsigned long size, pgoff_t pgoff) -{ - BUG(); - return 0; -} -EXPORT_SYMBOL(generic_file_remap_pages); - static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fa974d87f60..1bb65e6f48d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -111,6 +111,7 @@ static DEFINE_SPINLOCK(managed_page_count_lock); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; +unsigned long totalcma_pages __read_mostly; /* * When calculating the number of globally allowed dirty pages, there * is a certain number of per-zone reserves that should not be @@ -2331,12 +2332,21 @@ static inline struct page * __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, enum zone_type high_zoneidx, nodemask_t *nodemask, struct zone *preferred_zone, - int classzone_idx, int migratetype) + int classzone_idx, int migratetype, unsigned long *did_some_progress) { struct page *page; - /* Acquire the per-zone oom lock for each zone */ + *did_some_progress = 0; + + if (oom_killer_disabled) + return NULL; + + /* + * Acquire the per-zone oom lock for each zone. If that + * fails, somebody else is making progress for us. + */ if (!oom_zonelist_trylock(zonelist, gfp_mask)) { + *did_some_progress = 1; schedule_timeout_uninterruptible(1); return NULL; } @@ -2362,12 +2372,18 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; if (!(gfp_mask & __GFP_NOFAIL)) { + /* Coredumps can quickly deplete all memory reserves */ + if (current->flags & PF_DUMPCORE) + goto out; /* The OOM killer will not help higher order allocs */ if (order > PAGE_ALLOC_COSTLY_ORDER) goto out; /* The OOM killer does not needlessly kill tasks for lowmem */ if (high_zoneidx < ZONE_NORMAL) goto out; + /* The OOM killer does not compensate for light reclaim */ + if (!(gfp_mask & __GFP_FS)) + goto out; /* * GFP_THISNODE contains __GFP_NORETRY and we never hit this. * Sanity check for bare calls of __GFP_THISNODE, not real OOM. @@ -2380,7 +2396,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, } /* Exhausted what can be done so it's blamo time */ out_of_memory(zonelist, gfp_mask, order, nodemask, false); - + *did_some_progress = 1; out: oom_zonelist_unlock(zonelist, gfp_mask); return page; @@ -2657,7 +2673,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, (gfp_mask & GFP_THISNODE) == GFP_THISNODE) goto nopage; -restart: if (!(gfp_mask & __GFP_NO_KSWAPD)) wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone, nodemask); @@ -2787,51 +2802,27 @@ rebalance: if (page) goto got_pg; - /* - * If we failed to make any progress reclaiming, then we are - * running out of options and have to consider going OOM - */ - if (!did_some_progress) { - if (oom_gfp_allowed(gfp_mask)) { - if (oom_killer_disabled) - goto nopage; - /* Coredumps can quickly deplete all memory reserves */ - if ((current->flags & PF_DUMPCORE) && - !(gfp_mask & __GFP_NOFAIL)) - goto nopage; - page = __alloc_pages_may_oom(gfp_mask, order, - zonelist, high_zoneidx, - nodemask, preferred_zone, - classzone_idx, migratetype); - if (page) - goto got_pg; - - if (!(gfp_mask & __GFP_NOFAIL)) { - /* - * The oom killer is not called for high-order - * allocations that may fail, so if no progress - * is being made, there are no other options and - * retrying is unlikely to help. - */ - if (order > PAGE_ALLOC_COSTLY_ORDER) - goto nopage; - /* - * The oom killer is not called for lowmem - * allocations to prevent needlessly killing - * innocent tasks. - */ - if (high_zoneidx < ZONE_NORMAL) - goto nopage; - } - - goto restart; - } - } - /* Check if we should retry the allocation */ pages_reclaimed += did_some_progress; if (should_alloc_retry(gfp_mask, order, did_some_progress, pages_reclaimed)) { + /* + * If we fail to make progress by freeing individual + * pages, but the allocation wants us to keep going, + * start OOM killing tasks. + */ + if (!did_some_progress) { + page = __alloc_pages_may_oom(gfp_mask, order, zonelist, + high_zoneidx, nodemask, + preferred_zone, classzone_idx, + migratetype,&did_some_progress); + if (page) + goto got_pg; + if (!did_some_progress) { + BUG_ON(gfp_mask & __GFP_NOFAIL); + goto nopage; + } + } /* Wait for some write requests to complete then retry */ wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); goto rebalance; @@ -2876,6 +2867,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, unsigned int cpuset_mems_cookie; int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; int classzone_idx; + gfp_t mask; gfp_mask &= gfp_allowed_mask; @@ -2909,22 +2901,24 @@ retry_cpuset: classzone_idx = zonelist_zone_idx(preferred_zoneref); /* First allocation attempt */ - page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, - zonelist, high_zoneidx, alloc_flags, - preferred_zone, classzone_idx, migratetype); + mask = gfp_mask|__GFP_HARDWALL; + page = get_page_from_freelist(mask, nodemask, order, zonelist, + high_zoneidx, alloc_flags, preferred_zone, + classzone_idx, migratetype); if (unlikely(!page)) { /* * Runtime PM, block IO and its error handling path * can deadlock because I/O on the device might not * complete. */ - gfp_mask = memalloc_noio_flags(gfp_mask); - page = __alloc_pages_slowpath(gfp_mask, order, + mask = memalloc_noio_flags(gfp_mask); + + page = __alloc_pages_slowpath(mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, classzone_idx, migratetype); } - trace_mm_page_alloc(page, order, gfp_mask, migratetype); + trace_mm_page_alloc(page, order, mask, migratetype); out: /* @@ -5586,7 +5580,7 @@ void __init mem_init_print_info(const char *str) pr_info("Memory: %luK/%luK available " "(%luK kernel code, %luK rwdata, %luK rodata, " - "%luK init, %luK bss, %luK reserved" + "%luK init, %luK bss, %luK reserved, %luK cma-reserved" #ifdef CONFIG_HIGHMEM ", %luK highmem" #endif @@ -5594,7 +5588,8 @@ void __init mem_init_print_info(const char *str) nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10), codesize >> 10, datasize >> 10, rosize >> 10, (init_data_size + init_code_size) >> 10, bss_size >> 10, - (physpages - totalram_pages) << (PAGE_SHIFT-10), + (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10), + totalcma_pages << (PAGE_SHIFT-10), #ifdef CONFIG_HIGHMEM totalhigh_pages << (PAGE_SHIFT-10), #endif diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 72f5ac381ab..e5d9c527796 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -176,8 +176,11 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, undo: for (pfn = start_pfn; pfn < undo_pfn; - pfn += pageblock_nr_pages) - unset_migratetype_isolate(pfn_to_page(pfn), migratetype); + pfn += pageblock_nr_pages) { + page = __first_valid_page(pfn, pageblock_nr_pages); + if (page) + unset_migratetype_isolate(page, migratetype); + } return -EBUSY; } diff --git a/mm/rmap.c b/mm/rmap.c index c5bc241127b..b4047838da0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -666,6 +666,7 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) } struct page_referenced_arg { + int dirtied; int mapcount; int referenced; unsigned long vm_flags; @@ -680,6 +681,7 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; spinlock_t *ptl; int referenced = 0; + int dirty = 0; struct page_referenced_arg *pra = arg; if (unlikely(PageTransHuge(page))) { @@ -703,6 +705,15 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, /* go ahead even if the pmd is pmd_trans_splitting() */ if (pmdp_clear_flush_young_notify(vma, address, pmd)) referenced++; + + /* + * Use pmd_freeable instead of raw pmd_dirty because in some + * of architecture, pmd_dirty is not defined unless + * CONFIG_TRANSPARENT_HUGEPAGE is enabled + */ + if (!pmd_freeable(*pmd)) + dirty++; + spin_unlock(ptl); } else { pte_t *pte; @@ -732,6 +743,10 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, if (likely(!(vma->vm_flags & VM_SEQ_READ))) referenced++; } + + if (pte_dirty(*pte)) + dirty++; + pte_unmap_unlock(pte, ptl); } @@ -740,6 +755,9 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, pra->vm_flags |= vma->vm_flags; } + if (dirty) + pra->dirtied++; + pra->mapcount--; if (!pra->mapcount) return SWAP_SUCCESS; /* To break the loop */ @@ -764,6 +782,7 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) * @is_locked: caller holds lock on the page * @memcg: target memory cgroup * @vm_flags: collect encountered vma->vm_flags who actually referenced the page + * @is_pte_dirty: ptes which have marked dirty bit - used for lazyfree page * * Quick test_and_clear_referenced for all mappings to a page, * returns the number of ptes which referenced the page. @@ -771,7 +790,8 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) int page_referenced(struct page *page, int is_locked, struct mem_cgroup *memcg, - unsigned long *vm_flags) + unsigned long *vm_flags, + int *is_pte_dirty) { int ret; int we_locked = 0; @@ -786,6 +806,9 @@ int page_referenced(struct page *page, }; *vm_flags = 0; + if (is_pte_dirty) + *is_pte_dirty = 0; + if (!page_mapped(page)) return 0; @@ -813,6 +836,9 @@ int page_referenced(struct page *page, if (we_locked) unlock_page(page); + if (is_pte_dirty) + *is_pte_dirty = pra.dirtied; + return pra.referenced; } @@ -1145,6 +1171,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, spinlock_t *ptl; int ret = SWAP_AGAIN; enum ttu_flags flags = (enum ttu_flags)arg; + int dirty = 0; pte = page_check_address(page, mm, address, &ptl, 0); if (!pte) @@ -1174,7 +1201,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, pteval = ptep_clear_flush(vma, address, pte); /* Move the dirty bit to the physical page now the pte is gone. */ - if (pte_dirty(pteval)) + dirty = pte_dirty(pteval); + if (dirty) set_page_dirty(page); /* Update high watermark before we lower rss */ @@ -1203,6 +1231,19 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_entry_t entry = { .val = page_private(page) }; pte_t swp_pte; + if (flags & TTU_FREE) { + VM_BUG_ON_PAGE(PageSwapCache(page), page); + if (!dirty && !PageDirty(page)) { + /* It's a freeable page by MADV_FREE */ + dec_mm_counter(mm, MM_ANONPAGES); + goto discard; + } else { + set_pte_at(mm, address, pte, pteval); + ret = SWAP_FAIL; + goto out_unmap; + } + } + if (PageSwapCache(page)) { /* * Store the swap location in the pte. @@ -1244,6 +1285,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } else dec_mm_counter(mm, MM_FILEPAGES); +discard: page_remove_rmap(page); page_cache_release(page); diff --git a/mm/util.c b/mm/util.c index fec39d4509a..d25558ba661 100644 --- a/mm/util.c +++ b/mm/util.c @@ -3,6 +3,7 @@ #include <linux/string.h> #include <linux/compiler.h> #include <linux/export.h> +#include <linux/ctype.h> #include <linux/err.h> #include <linux/sched.h> #include <linux/security.h> @@ -62,6 +63,35 @@ char *kstrndup(const char *s, size_t max, gfp_t gfp) EXPORT_SYMBOL(kstrndup); /** + * kstrimdup - Trim and copy a %NUL terminated string. + * @s: the string to trim and duplicate + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + * + * Returns an address, which the caller must kfree, containing + * a duplicate of the passed string with leading and/or trailing + * whitespace (as defined by isspace) removed. + */ +char *kstrimdup(const char *s, gfp_t gfp) +{ + char *buf; + char *begin = skip_spaces(s); + size_t len = strlen(begin); + + while (len && isspace(begin[len - 1])) + len--; + + buf = kmalloc_track_caller(len + 1, gfp); + if (!buf) + return NULL; + + memcpy(buf, begin, len); + buf[len] = '\0'; + + return buf; +} +EXPORT_SYMBOL(kstrimdup); + +/** * kmemdup - duplicate region of memory * * @src: memory region to duplicate diff --git a/mm/vmscan.c b/mm/vmscan.c index bd9a72bc4a1..5e8772b2b9e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -713,13 +713,17 @@ enum page_references { }; static enum page_references page_check_references(struct page *page, - struct scan_control *sc) + struct scan_control *sc, + bool *freeable) { int referenced_ptes, referenced_page; unsigned long vm_flags; + int pte_dirty; + + VM_BUG_ON_PAGE(!PageLocked(page), page); referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, - &vm_flags); + &vm_flags, &pte_dirty); referenced_page = TestClearPageReferenced(page); /* @@ -760,6 +764,10 @@ static enum page_references page_check_references(struct page *page, return PAGEREF_KEEP; } + if (PageAnon(page) && !pte_dirty && !PageSwapCache(page) && + !PageDirty(page)) + *freeable = true; + /* Reclaim if clean, defer dirty pages to writeback */ if (referenced_page && !PageSwapBacked(page)) return PAGEREF_RECLAIM_CLEAN; @@ -828,6 +836,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, int may_enter_fs; enum page_references references = PAGEREF_RECLAIM_CLEAN; bool dirty, writeback; + bool freeable = false; cond_resched(); @@ -951,7 +960,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, } if (!force_reclaim) - references = page_check_references(page, sc); + references = page_check_references(page, sc, + &freeable); switch (references) { case PAGEREF_ACTIVATE: @@ -968,22 +978,31 @@ static unsigned long shrink_page_list(struct list_head *page_list, * Try to allocate it some swap space here. */ if (PageAnon(page) && !PageSwapCache(page)) { - if (!(sc->gfp_mask & __GFP_IO)) - goto keep_locked; - if (!add_to_swap(page, page_list)) - goto activate_locked; - may_enter_fs = 1; - - /* Adding to swap updated mapping */ - mapping = page_mapping(page); + if (!freeable) { + if (!(sc->gfp_mask & __GFP_IO)) + goto keep_locked; + if (!add_to_swap(page, page_list)) + goto activate_locked; + may_enter_fs = 1; + /* Adding to swap updated mapping */ + mapping = page_mapping(page); + } else { + if (likely(!PageTransHuge(page))) + goto unmap; + /* try_to_unmap isn't aware of THP page */ + if (unlikely(split_huge_page_to_list(page, + page_list))) + goto keep_locked; + } } - +unmap: /* * The page is mapped into the page tables of one or more * processes. Try to unmap it here. */ - if (page_mapped(page) && mapping) { - switch (try_to_unmap(page, ttu_flags)) { + if (page_mapped(page) && (mapping || freeable)) { + switch (try_to_unmap(page, + freeable ? TTU_FREE : ttu_flags)) { case SWAP_FAIL: goto activate_locked; case SWAP_AGAIN: @@ -991,7 +1010,20 @@ static unsigned long shrink_page_list(struct list_head *page_list, case SWAP_MLOCK: goto cull_mlocked; case SWAP_SUCCESS: - ; /* try to free the page below */ + /* try to free the page below */ + if (!freeable) + break; + /* + * Freeable anon page doesn't have mapping + * due to skipping of swapcache so we free + * page in here rather than __remove_mapping. + */ + VM_BUG_ON_PAGE(PageSwapCache(page), page); + if (!page_freeze_refs(page, 1)) + goto keep_locked; + __clear_page_locked(page); + count_vm_event(PGLAZYFREED); + goto free_it; } } @@ -1731,7 +1763,7 @@ static void shrink_active_list(unsigned long nr_to_scan, } if (page_referenced(page, 0, sc->target_mem_cgroup, - &vm_flags)) { + &vm_flags, NULL)) { nr_rotated += hpage_nr_pages(page); /* * Identify referenced, file-backed active pages and diff --git a/mm/vmstat.c b/mm/vmstat.c index 1284f89fca0..5fba97d122a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -816,6 +816,7 @@ const char * const vmstat_text[] = { "pgfault", "pgmajfault", + "pglazyfreed", TEXTS_FOR_ZONES("pgrefill") TEXTS_FOR_ZONES("pgsteal_kswapd") |