diff options
Diffstat (limited to 'arch/powerpc/kernel')
30 files changed, 1092 insertions, 397 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 23f8082d7bfa..811f441a125f 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -15,14 +15,10 @@ CFLAGS_btext.o += -fPIC endif CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) -CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) -# -fstack-protector triggers protection checks in this code, -# but it is being used too early to link to meaningful stack_chk logic. -CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) - ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE) @@ -100,6 +96,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 8d58c61908f7..cbc7c42cdb74 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -204,7 +204,7 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) int i, size; #ifdef __powerpc64__ - size = ppc64_caches.dline_size; + size = ppc64_caches.l1d.block_size; #else size = L1_CACHE_BYTES; #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0601e6a7297c..f25239b3a06f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -91,9 +91,6 @@ int main(void) DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp)); #endif -#ifdef CONFIG_CC_STACKPROTECTOR - DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary)); -#endif DEFINE(KSP, offsetof(struct thread_struct, ksp)); DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); #ifdef CONFIG_BOOKE @@ -163,12 +160,12 @@ int main(void) DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); #ifdef CONFIG_PPC64 - DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); - DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); - DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); - DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); - DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); - DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); + DEFINE(DCACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1d.block_size)); + DEFINE(DCACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1d.log_block_size)); + DEFINE(DCACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1d.blocks_per_page)); + DEFINE(ICACHEL1BLOCKSIZE, offsetof(struct ppc64_caches, l1i.block_size)); + DEFINE(ICACHEL1LOGBLOCKSIZE, offsetof(struct ppc64_caches, l1i.log_block_size)); + DEFINE(ICACHEL1BLOCKSPERPAGE, offsetof(struct ppc64_caches, l1i.blocks_per_page)); /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); @@ -252,9 +249,9 @@ int main(void) DEFINE(ACCOUNT_STARTTIME_USER, offsetof(struct paca_struct, accounting.starttime_user)); DEFINE(ACCOUNT_USER_TIME, - offsetof(struct paca_struct, accounting.user_time)); + offsetof(struct paca_struct, accounting.utime)); DEFINE(ACCOUNT_SYSTEM_TIME, - offsetof(struct paca_struct, accounting.system_time)); + offsetof(struct paca_struct, accounting.stime)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); @@ -265,9 +262,9 @@ int main(void) DEFINE(ACCOUNT_STARTTIME_USER, offsetof(struct thread_info, accounting.starttime_user)); DEFINE(ACCOUNT_USER_TIME, - offsetof(struct thread_info, accounting.user_time)); + offsetof(struct thread_info, accounting.utime)); DEFINE(ACCOUNT_SYSTEM_TIME, - offsetof(struct thread_info, accounting.system_time)); + offsetof(struct thread_info, accounting.stime)); #endif #endif /* CONFIG_PPC64 */ @@ -498,6 +495,7 @@ int main(void) DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); + DEFINE(KVM_RADIX, offsetof(struct kvm, arch.radix)); DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr)); DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); @@ -537,6 +535,7 @@ int main(void) DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); + DEFINE(VCPU_FAULT_GPA, offsetof(struct kvm_vcpu, arch.fault_gpa)); DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index c6689f658b50..d0ea7860e02b 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -46,7 +46,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev) * map_page, and unmap_page on highmem, use normal dma_ops * for everything else. */ -struct dma_map_ops swiotlb_dma_ops = { +const struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_direct_alloc_coherent, .free = __dma_direct_free_coherent, .mmap = dma_direct_mmap_coherent, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 6877e3fa95bb..41c749586bd2 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -33,7 +33,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev) struct dev_archdata __maybe_unused *sd = &dev->archdata; #ifdef CONFIG_SWIOTLB - if (sd->max_direct_dma_addr && sd->dma_ops == &swiotlb_dma_ops) + if (sd->max_direct_dma_addr && dev->dma_ops == &swiotlb_dma_ops) pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT); #endif @@ -274,7 +274,7 @@ static inline void dma_direct_sync_single(struct device *dev, } #endif -struct dma_map_ops dma_direct_ops = { +const struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc_coherent, .free = dma_direct_free_coherent, .mmap = dma_direct_mmap_coherent, @@ -316,7 +316,7 @@ EXPORT_SYMBOL(dma_set_coherent_mask); int __dma_set_mask(struct device *dev, u64 dma_mask) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL)) return dma_ops->set_dma_mask(dev, dma_mask); @@ -344,7 +344,7 @@ EXPORT_SYMBOL(dma_set_mask); u64 __dma_get_required_mask(struct device *dev) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if (unlikely(dma_ops == NULL)) return 0; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index d88573bdd090..b94887165a10 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -545,7 +545,7 @@ static void *eeh_pe_detach_dev(void *data, void *userdata) static void *__eeh_clear_pe_frozen_state(void *data, void *flag) { struct eeh_pe *pe = (struct eeh_pe *)data; - bool *clear_sw_state = flag; + bool clear_sw_state = *(bool *)flag; int i, rc = 1; for (i = 0; rc && i < 3; i++) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 5742dbdbee46..3841d749a430 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -674,11 +674,7 @@ BEGIN_FTR_SECTION mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */ END_FTR_SECTION_IFSET(CPU_FTR_SPE) #endif /* CONFIG_SPE */ -#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) - lwz r0,TSK_STACK_CANARY(r2) - lis r4,__stack_chk_guard@ha - stw r0,__stack_chk_guard@l(r4) -#endif + lwz r0,_CCR(r1) mtcrf 0xFF,r0 /* r3-r12 are destroyed -- Cort */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d39d6118c6e9..857bf7c5b946 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -93,7 +93,7 @@ USE_FIXED_SECTION(real_vectors) __start_interrupts: /* No virt vectors corresponding with 0x0..0x100 */ -EXC_VIRT_NONE(0x4000, 0x4100) +EXC_VIRT_NONE(0x4000, 0x100) #ifdef CONFIG_PPC_P7_NAP @@ -114,15 +114,15 @@ EXC_VIRT_NONE(0x4000, 0x4100) #define IDLETEST NOTEST #endif -EXC_REAL_BEGIN(system_reset, 0x100, 0x200) +EXC_REAL_BEGIN(system_reset, 0x100, 0x100) SET_SCRATCH0(r13) GET_PACA(r13) clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, IDLETEST, 0x100) -EXC_REAL_END(system_reset, 0x100, 0x200) -EXC_VIRT_NONE(0x4100, 0x4200) +EXC_REAL_END(system_reset, 0x100, 0x100) +EXC_VIRT_NONE(0x4100, 0x100) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) @@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) lbz r0,HSTATE_HWTHREAD_REQ(r13) cmpwi r0,0 beq 1f - b kvm_start_guest + BRANCH_TO_KVM(r10, kvm_start_guest) 1: #endif @@ -166,7 +166,7 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi) #endif /* CONFIG_PPC_PSERIES */ -EXC_REAL_BEGIN(machine_check, 0x200, 0x300) +EXC_REAL_BEGIN(machine_check, 0x200, 0x100) /* This is moved out of line as it can be patched by FW, but * some code path might still want to branch into the original * vector @@ -186,8 +186,8 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE b machine_check_pSeries_0 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -EXC_REAL_END(machine_check, 0x200, 0x300) -EXC_VIRT_NONE(0x4200, 0x4300) +EXC_REAL_END(machine_check, 0x200, 0x100) +EXC_VIRT_NONE(0x4200, 0x100) TRAMP_REAL_BEGIN(machine_check_powernv_early) BEGIN_FTR_SECTION EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) @@ -381,12 +381,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early) lbz r3,PACA_THREAD_IDLE_STATE(r13) cmpwi r3,PNV_THREAD_NAP bgt 10f - IDLE_STATE_ENTER_SEQ(PPC_NAP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) /* No return */ 10: cmpwi r3,PNV_THREAD_SLEEP bgt 2f - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) /* No return */ 2: @@ -400,7 +400,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ ori r13,r13,1 SET_PACA(r13) - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* No return */ 4: #endif @@ -483,8 +483,8 @@ EXC_COMMON_BEGIN(unrecover_mce) b 1b -EXC_REAL(data_access, 0x300, 0x380) -EXC_VIRT(data_access, 0x4300, 0x4380, 0x300) +EXC_REAL(data_access, 0x300, 0x80) +EXC_VIRT(data_access, 0x4300, 0x80, 0x300) TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) EXC_COMMON_BEGIN(data_access_common) @@ -512,7 +512,7 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) -EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400) +EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) @@ -533,9 +533,9 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x400) mtctr r10 bctr #endif -EXC_REAL_END(data_access_slb, 0x380, 0x400) +EXC_REAL_END(data_access_slb, 0x380, 0x80) -EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400) +EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) @@ -556,12 +556,12 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x4400) mtctr r10 bctr #endif -EXC_VIRT_END(data_access_slb, 0x4380, 0x4400) +EXC_VIRT_END(data_access_slb, 0x4380, 0x80) TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) -EXC_REAL(instruction_access, 0x400, 0x480) -EXC_VIRT(instruction_access, 0x4400, 0x4480, 0x400) +EXC_REAL(instruction_access, 0x400, 0x80) +EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) TRAMP_KVM(PACA_EXGEN, 0x400) EXC_COMMON_BEGIN(instruction_access_common) @@ -580,7 +580,7 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) -EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500) +EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480) @@ -596,9 +596,9 @@ EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x500) mtctr r10 bctr #endif -EXC_REAL_END(instruction_access_slb, 0x480, 0x500) +EXC_REAL_END(instruction_access_slb, 0x480, 0x80) -EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500) +EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) SET_SCRATCH0(r13) EXCEPTION_PROLOG_0(PACA_EXSLB) EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480) @@ -614,7 +614,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x4500) mtctr r10 bctr #endif -EXC_VIRT_END(instruction_access_slb, 0x4480, 0x4500) +EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) TRAMP_KVM(PACA_EXSLB, 0x480) @@ -711,23 +711,19 @@ EXC_COMMON_BEGIN(bad_addr_slb) bl slb_miss_bad_addr b ret_from_except -EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x600) +EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) .globl hardware_interrupt_hv; hardware_interrupt_hv: BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) -do_kvm_H0x500: - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) -do_kvm_0x500: - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -EXC_REAL_END(hardware_interrupt, 0x500, 0x600) +EXC_REAL_END(hardware_interrupt, 0x500, 0x100) -EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x4600) +EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION @@ -735,13 +731,15 @@ hardware_interrupt_relon_hv: FTR_SECTION_ELSE _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) +EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) +TRAMP_KVM(PACA_EXGEN, 0x500) +TRAMP_KVM_HV(PACA_EXGEN, 0x500) EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) -EXC_REAL(alignment, 0x600, 0x700) -EXC_VIRT(alignment, 0x4600, 0x4700, 0x600) +EXC_REAL(alignment, 0x600, 0x100) +EXC_VIRT(alignment, 0x4600, 0x100, 0x600) TRAMP_KVM(PACA_EXGEN, 0x600) EXC_COMMON_BEGIN(alignment_common) mfspr r10,SPRN_DAR @@ -760,8 +758,8 @@ EXC_COMMON_BEGIN(alignment_common) b ret_from_except -EXC_REAL(program_check, 0x700, 0x800) -EXC_VIRT(program_check, 0x4700, 0x4800, 0x700) +EXC_REAL(program_check, 0x700, 0x100) +EXC_VIRT(program_check, 0x4700, 0x100, 0x700) TRAMP_KVM(PACA_EXGEN, 0x700) EXC_COMMON_BEGIN(program_check_common) EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) @@ -772,8 +770,8 @@ EXC_COMMON_BEGIN(program_check_common) b ret_from_except -EXC_REAL(fp_unavailable, 0x800, 0x900) -EXC_VIRT(fp_unavailable, 0x4800, 0x4900, 0x800) +EXC_REAL(fp_unavailable, 0x800, 0x100) +EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800) TRAMP_KVM(PACA_EXGEN, 0x800) EXC_COMMON_BEGIN(fp_unavailable_common) EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) @@ -805,20 +803,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x980) -EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x4980, 0x900) +EXC_REAL_MASKABLE(decrementer, 0x900, 0x80) +EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) -EXC_REAL_HV(hdecrementer, 0x980, 0xa00) -EXC_VIRT_HV(hdecrementer, 0x4980, 0x4a00, 0x980) +EXC_REAL_HV(hdecrementer, 0x980, 0x80) +EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980) TRAMP_KVM_HV(PACA_EXGEN, 0x980) EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) -EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0xb00) -EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x4b00, 0xa00) +EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100) +EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00) TRAMP_KVM(PACA_EXGEN, 0xa00) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) @@ -827,11 +825,36 @@ EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception) #endif -EXC_REAL(trap_0b, 0xb00, 0xc00) -EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) +EXC_REAL(trap_0b, 0xb00, 0x100) +EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00) TRAMP_KVM(PACA_EXGEN, 0xb00) EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + /* + * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems + * that support it) before changing to HMT_MEDIUM. That allows the KVM + * code to save that value into the guest state (it is the guest's PPR + * value). Otherwise just change to HMT_MEDIUM as userspace has + * already saved the PPR. + */ +#define SYSCALL_KVMTEST \ + SET_SCRATCH0(r13); \ + GET_PACA(r13); \ + std r9,PACA_EXGEN+EX_R9(r13); \ + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ + HMT_MEDIUM; \ + std r10,PACA_EXGEN+EX_R10(r13); \ + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \ + mfcr r9; \ + KVMTEST_PR(0xc00); \ + GET_SCRATCH0(r13) + +#else +#define SYSCALL_KVMTEST \ + HMT_MEDIUM +#endif + #define LOAD_SYSCALL_HANDLER(reg) \ __LOAD_HANDLER(reg, system_call_common) @@ -884,50 +907,30 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ b system_call_common ; #endif -EXC_REAL_BEGIN(system_call, 0xc00, 0xd00) - /* - * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems - * that support it) before changing to HMT_MEDIUM. That allows the KVM - * code to save that value into the guest state (it is the guest's PPR - * value). Otherwise just change to HMT_MEDIUM as userspace has - * already saved the PPR. - */ -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER - SET_SCRATCH0(r13) - GET_PACA(r13) - std r9,PACA_EXGEN+EX_R9(r13) - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); - HMT_MEDIUM; - std r10,PACA_EXGEN+EX_R10(r13) - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); - mfcr r9 - KVMTEST_PR(0xc00) - GET_SCRATCH0(r13) -#else - HMT_MEDIUM; -#endif +EXC_REAL_BEGIN(system_call, 0xc00, 0x100) + SYSCALL_KVMTEST SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_RFID SYSCALL_PSERIES_3 -EXC_REAL_END(system_call, 0xc00, 0xd00) +EXC_REAL_END(system_call, 0xc00, 0x100) -EXC_VIRT_BEGIN(system_call, 0x4c00, 0x4d00) - HMT_MEDIUM +EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) + SYSCALL_KVMTEST SYSCALL_PSERIES_1 SYSCALL_PSERIES_2_DIRECT SYSCALL_PSERIES_3 -EXC_VIRT_END(system_call, 0x4c00, 0x4d00) +EXC_VIRT_END(system_call, 0x4c00, 0x100) TRAMP_KVM(PACA_EXGEN, 0xc00) -EXC_REAL(single_step, 0xd00, 0xe00) -EXC_VIRT(single_step, 0x4d00, 0x4e00, 0xd00) +EXC_REAL(single_step, 0xd00, 0x100) +EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00) TRAMP_KVM(PACA_EXGEN, 0xd00) EXC_COMMON(single_step_common, 0xd00, single_step_exception) -EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0xe20) -EXC_VIRT_NONE(0x4e00, 0x4e20) +EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20) +EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00) EXC_COMMON_BEGIN(h_data_storage_common) mfspr r10,SPRN_HDAR @@ -942,14 +945,14 @@ EXC_COMMON_BEGIN(h_data_storage_common) b ret_from_except -EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0xe40) -EXC_VIRT_NONE(0x4e20, 0x4e40) +EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20) +EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20) TRAMP_KVM_HV(PACA_EXGEN, 0xe20) EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception) -EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0xe60) -EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x4e60, 0xe40) +EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20) +EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40) TRAMP_KVM_HV(PACA_EXGEN, 0xe40) EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) @@ -959,9 +962,9 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) * first, and then eventaully from there to the trampoline to get into virtual * mode. */ -__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0xe80, hmi_exception_early) -__TRAMP_REAL_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) -EXC_VIRT_NONE(0x4e60, 0x4e80) +__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early) +__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) +EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) @@ -979,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD - bl hmi_exception_realmode + BRANCH_LINK_TO_FAR(r4, hmi_exception_realmode) /* Windup the stack. */ /* Move original HSRR0 and HSRR1 into the respective regs */ ld r9,_MSR(r1) @@ -1015,8 +1018,8 @@ hmi_exception_after_realmode: EXC_COMMON_ASYNC(hmi_exception_common, 0xe60, handle_hmi_exception) -EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0xea0) -EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x4ea0, 0xe80) +EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20) +EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80) TRAMP_KVM_HV(PACA_EXGEN, 0xe80) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) @@ -1025,24 +1028,26 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) #endif -EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0xec0) -EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x4ec0, 0xea0) +EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20) +EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0) TRAMP_KVM_HV(PACA_EXGEN, 0xea0) EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) -EXC_REAL_NONE(0xec0, 0xf00) -EXC_VIRT_NONE(0x4ec0, 0x4f00) +EXC_REAL_NONE(0xec0, 0x20) +EXC_VIRT_NONE(0x4ec0, 0x20) +EXC_REAL_NONE(0xee0, 0x20) +EXC_VIRT_NONE(0x4ee0, 0x20) -EXC_REAL_OOL(performance_monitor, 0xf00, 0xf20) -EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x4f20, 0xf00) +EXC_REAL_OOL(performance_monitor, 0xf00, 0x20) +EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00) TRAMP_KVM(PACA_EXGEN, 0xf00) EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) -EXC_REAL_OOL(altivec_unavailable, 0xf20, 0xf40) -EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x4f40, 0xf20) +EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20) +EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20) TRAMP_KVM(PACA_EXGEN, 0xf20) EXC_COMMON_BEGIN(altivec_unavailable_common) EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) @@ -1078,8 +1083,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) b ret_from_except -EXC_REAL_OOL(vsx_unavailable, 0xf40, 0xf60) -EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x4f60, 0xf40) +EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20) +EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40) TRAMP_KVM(PACA_EXGEN, 0xf40) EXC_COMMON_BEGIN(vsx_unavailable_common) EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) @@ -1114,41 +1119,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) b ret_from_except -EXC_REAL_OOL(facility_unavailable, 0xf60, 0xf80) -EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x4f80, 0xf60) +EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20) +EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60) TRAMP_KVM(PACA_EXGEN, 0xf60) EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception) -EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0xfa0) -EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x4fa0, 0xf80) +EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20) +EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80) TRAMP_KVM_HV(PACA_EXGEN, 0xf80) EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception) -EXC_REAL_NONE(0xfa0, 0x1200) -EXC_VIRT_NONE(0x4fa0, 0x5200) +EXC_REAL_NONE(0xfa0, 0x20) +EXC_VIRT_NONE(0x4fa0, 0x20) +EXC_REAL_NONE(0xfc0, 0x20) +EXC_VIRT_NONE(0x4fc0, 0x20) +EXC_REAL_NONE(0xfe0, 0x20) +EXC_VIRT_NONE(0x4fe0, 0x20) + +EXC_REAL_NONE(0x1000, 0x100) +EXC_VIRT_NONE(0x5000, 0x100) +EXC_REAL_NONE(0x1100, 0x100) +EXC_VIRT_NONE(0x5100, 0x100) #ifdef CONFIG_CBE_RAS -EXC_REAL_HV(cbe_system_error, 0x1200, 0x1300) -EXC_VIRT_NONE(0x5200, 0x5300) +EXC_REAL_HV(cbe_system_error, 0x1200, 0x100) +EXC_VIRT_NONE(0x5200, 0x100) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200) EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception) #else /* CONFIG_CBE_RAS */ -EXC_REAL_NONE(0x1200, 0x1300) -EXC_VIRT_NONE(0x5200, 0x5300) +EXC_REAL_NONE(0x1200, 0x100) +EXC_VIRT_NONE(0x5200, 0x100) #endif -EXC_REAL(instruction_breakpoint, 0x1300, 0x1400) -EXC_VIRT(instruction_breakpoint, 0x5300, 0x5400, 0x1300) +EXC_REAL(instruction_breakpoint, 0x1300, 0x100) +EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300) TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300) EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception) -EXC_REAL_NONE(0x1400, 0x1500) -EXC_VIRT_NONE(0x5400, 0x5500) +EXC_REAL_NONE(0x1400, 0x100) +EXC_VIRT_NONE(0x5400, 0x100) -EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600) +EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) mtspr SPRN_SPRG_HSCRATCH0,r13 EXCEPTION_PROLOG_0(PACA_EXGEN) EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) @@ -1163,14 +1177,14 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x1600) KVMTEST_PR(0x1500) EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV) -EXC_REAL_END(denorm_exception_hv, 0x1500, 0x1600) +EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION -EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x5600) +EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100) b exc_real_0x1500_denorm_exception_hv -EXC_VIRT_END(denorm_exception, 0x5500, 0x5600) +EXC_VIRT_END(denorm_exception, 0x5500, 0x100) #else -EXC_VIRT_NONE(0x5500, 0x5600) +EXC_VIRT_NONE(0x5500, 0x100) #endif TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500) @@ -1243,18 +1257,18 @@ EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception) #ifdef CONFIG_CBE_RAS -EXC_REAL_HV(cbe_maintenance, 0x1600, 0x1700) -EXC_VIRT_NONE(0x5600, 0x5700) +EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100) +EXC_VIRT_NONE(0x5600, 0x100) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600) EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception) #else /* CONFIG_CBE_RAS */ -EXC_REAL_NONE(0x1600, 0x1700) -EXC_VIRT_NONE(0x5600, 0x5700) +EXC_REAL_NONE(0x1600, 0x100) +EXC_VIRT_NONE(0x5600, 0x100) #endif -EXC_REAL(altivec_assist, 0x1700, 0x1800) -EXC_VIRT(altivec_assist, 0x5700, 0x5800, 0x1700) +EXC_REAL(altivec_assist, 0x1700, 0x100) +EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700) TRAMP_KVM(PACA_EXGEN, 0x1700) #ifdef CONFIG_ALTIVEC EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception) @@ -1264,13 +1278,13 @@ EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception) #ifdef CONFIG_CBE_RAS -EXC_REAL_HV(cbe_thermal, 0x1800, 0x1900) -EXC_VIRT_NONE(0x5800, 0x5900) +EXC_REAL_HV(cbe_thermal, 0x1800, 0x100) +EXC_VIRT_NONE(0x5800, 0x100) TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800) EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception) #else /* CONFIG_CBE_RAS */ -EXC_REAL_NONE(0x1800, 0x1900) -EXC_VIRT_NONE(0x5800, 0x5900) +EXC_REAL_NONE(0x1800, 0x100) +EXC_VIRT_NONE(0x5800, 0x100) #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8f0c7c5d93f2..8ff0dd4e77a7 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -406,12 +406,35 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) void crash_fadump(struct pt_regs *regs, const char *str) { struct fadump_crash_info_header *fdh = NULL; + int old_cpu, this_cpu; if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) return; + /* + * old_cpu == -1 means this is the first CPU which has come here, + * go ahead and trigger fadump. + * + * old_cpu != -1 means some other CPU has already on it's way + * to trigger fadump, just keep looping here. + */ + this_cpu = smp_processor_id(); + old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); + + if (old_cpu != -1) { + /* + * We can't loop here indefinitely. Wait as long as fadump + * is in force. If we race with fadump un-registration this + * loop will break and then we go down to normal panic path + * and reboot. If fadump is in force the first crashing + * cpu will definitely trigger fadump. + */ + while (fw_dump.dump_registered) + cpu_relax(); + return; + } + fdh = __va(fw_dump.fadumphdr_addr); - crashing_cpu = smp_processor_id(); fdh->crashing_cpu = crashing_cpu; crash_save_vmcoreinfo(); diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 4d3aa05e28be..53cc9270aac8 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -228,8 +228,10 @@ int hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); bp = __this_cpu_read(bp_per_reg); - if (!bp) + if (!bp) { + rc = NOTIFY_DONE; goto out; + } info = counter_arch_bp(bp); /* diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 72dac0b58061..5f61cc0349c0 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -40,9 +40,7 @@ #define _WORC GPR11 #define _PTCR GPR12 -#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ - PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ - PSSCR_MTL_MASK +#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 .text @@ -205,7 +203,7 @@ pnv_enter_arch207_idle_mode: stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f - IDLE_STATE_ENTER_SEQ(PPC_NAP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) /* No return */ 2: /* Sleep or winkle */ @@ -239,7 +237,7 @@ pnv_fastsleep_workaround_at_entry: common_enter: /* common code for all the threads entering sleep or winkle */ bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) fastsleep_workaround_at_entry: ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -250,7 +248,7 @@ fastsleep_workaround_at_entry: /* Fast sleep workaround */ li r3,1 li r4,1 - bl opal_rm_config_cpu_idle_state + bl opal_config_cpu_idle_state /* Clear Lock bit */ li r0,0 @@ -261,10 +259,10 @@ fastsleep_workaround_at_entry: enter_winkle: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* - * r3 - requested stop state + * r3 - PSSCR value corresponding to the requested stop state. */ power_enter_stop: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE @@ -274,13 +272,22 @@ power_enter_stop: stb r4,HSTATE_HWTHREAD_STATE(r13) #endif /* + * Check if we are executing the lite variant with ESL=EC=0 + */ + andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED + clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ + bne 1f + IDLE_STATE_ENTER_SEQ(PPC_STOP) + li r3,0 /* Since we didn't lose state, return 0 */ + b pnv_wakeup_noloss +/* * Check if the requested state is a deep idle state. */ - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) +1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 bge 2f - IDLE_STATE_ENTER_SEQ(PPC_STOP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) 2: /* * Entering deep idle state. @@ -302,7 +309,7 @@ lwarx_loop_stop: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ(PPC_STOP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ @@ -353,16 +360,17 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ 20: nop; - /* - * r3 - requested stop state + * r3 - The PSSCR value corresponding to the stop state. + * r4 - The PSSCR mask corrresonding to the stop state. */ _GLOBAL(power9_idle_stop) - LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE) - or r4,r4,r3 - mtspr SPRN_PSSCR, r4 - li r4, 1 + mfspr r5,SPRN_PSSCR + andc r5,r5,r4 + or r3,r3,r5 + mtspr SPRN_PSSCR,r3 LOAD_REG_ADDR(r5,power_enter_stop) + li r4,1 b pnv_powersave_common /* No return */ /* @@ -544,7 +552,7 @@ timebase_resync: */ ble cr3,clear_lock /* Time base re-sync */ - bl opal_rm_resync_timebase; + bl opal_resync_timebase; /* * If waking up from sleep, per core state is not lost, skip to * clear_lock. @@ -633,7 +641,7 @@ hypervisor_state_restored: fastsleep_workaround_at_exit: li r3,1 li r4,0 - bl opal_rm_config_cpu_idle_state + bl opal_config_cpu_idle_state b timebase_resync /* diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index 3963f0b68d52..a1854d1ded8b 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <asm/io.h> #include <asm/pci-bridge.h> +#include <asm/isa-bridge.h> /* * Here comes the ppc64 implementation of the IOMAP diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index ae1316106e2b..bb6f8993412e 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -29,6 +29,7 @@ #include <asm/pci-bridge.h> #include <asm/machdep.h> #include <asm/ppc-pci.h> +#include <asm/isa-bridge.h> unsigned long isa_io_base; /* NULL if no ISA bus */ EXPORT_SYMBOL(isa_io_base); @@ -167,6 +168,97 @@ void __init isa_bridge_find_early(struct pci_controller *hose) } /** + * isa_bridge_find_early - Find and map the ISA IO space early before + * main PCI discovery. This is optionally called by + * the arch code when adding PCI PHBs to get early + * access to ISA IO ports + */ +void __init isa_bridge_init_non_pci(struct device_node *np) +{ + const __be32 *ranges, *pbasep = NULL; + int rlen, i, rs; + u32 na, ns, pna; + u64 cbase, pbase, size = 0; + + /* If we already have an ISA bridge, bail off */ + if (isa_bridge_devnode != NULL) + return; + + pna = of_n_addr_cells(np); + if (of_property_read_u32(np, "#address-cells", &na) || + of_property_read_u32(np, "#size-cells", &ns)) { + pr_warn("ISA: Non-PCI bridge %s is missing address format\n", + np->full_name); + return; + } + + /* Check it's a supported address format */ + if (na != 2 || ns != 1) { + pr_warn("ISA: Non-PCI bridge %s has unsupported address format\n", + np->full_name); + return; + } + rs = na + ns + pna; + + /* Grab the ranges property */ + ranges = of_get_property(np, "ranges", &rlen); + if (ranges == NULL || rlen < rs) { + pr_warn("ISA: Non-PCI bridge %s has absent or invalid ranges\n", + np->full_name); + return; + } + + /* Parse it. We are only looking for IO space */ + for (i = 0; (i + rs - 1) < rlen; i += rs) { + if (be32_to_cpup(ranges + i) != 1) + continue; + cbase = be32_to_cpup(ranges + i + 1); + size = of_read_number(ranges + i + na + pna, ns); + pbasep = ranges + i + na; + break; + } + + /* Got something ? */ + if (!size || !pbasep) { + pr_warn("ISA: Non-PCI bridge %s has no usable IO range\n", + np->full_name); + return; + } + + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(size); + if (size > 0x10000) + size = 0x10000; + + /* Map pbase */ + pbase = of_translate_address(np, pbasep); + if (pbase == OF_BAD_ADDR) { + pr_warn("ISA: Non-PCI bridge %s failed to translate IO base\n", + np->full_name); + return; + } + + /* We need page alignment */ + if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { + pr_warn("ISA: Non-PCI bridge %s has non aligned IO range\n", + np->full_name); + return; + } + + /* Got it */ + isa_bridge_devnode = np; + + /* Set the global ISA io base to indicate we have an ISA bridge + * and map it + */ + isa_io_base = ISA_IO_BASE; + __ioremap_at(pbase, (void *)ISA_IO_BASE, + size, pgprot_val(pgprot_noncached(__pgprot(0)))); + + pr_debug("ISA: Non-PCI bridge is %s\n", np->full_name); +} + +/** * isa_bridge_find_late - Find and map the ISA IO space upon discovery of * a new ISA bridge */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 735ff3d3f77d..fce05a38851c 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -285,6 +285,7 @@ asm(".global kretprobe_trampoline\n" ".type kretprobe_trampoline, @function\n" "kretprobe_trampoline:\n" "nop\n" + "blr\n" ".size kretprobe_trampoline, .-kretprobe_trampoline\n"); /* @@ -337,6 +338,13 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, kretprobe_assert(ri, orig_ret_address, trampoline_address); regs->nip = orig_ret_address; + /* + * Make LR point to the orig_ret_address. + * When the 'nop' inside the kretprobe_trampoline + * is optimized, we can do a 'blr' after executing the + * detour buffer code. + */ + regs->link = orig_ret_address; reset_current_kprobe(); kretprobe_hash_unlock(current, &flags); @@ -467,15 +475,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) return 0; } -/* - * Wrapper routine to for handling exceptions. - */ -int __kprobes kprobe_exceptions_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - return NOTIFY_DONE; -} - unsigned long arch_deref_entry_point(void *entry) { return ppc_global_function_entry(entry); diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index bc525ea0dc09..0694d20f85b6 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -233,7 +233,8 @@ static int __init add_legacy_isa_port(struct device_node *np, * * Note: Don't even try on P8 lpc, we know it's not directly mapped */ - if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) { + if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") || + of_get_property(isa_brg, "ranges", NULL)) { taddr = of_translate_address(np, reg); if (taddr == OF_BAD_ADDR) taddr = 0; diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 32be2a844947..ae179cb1bb3c 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -80,12 +80,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * each other. */ ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -96,12 +96,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Now invalidate the instruction cache */ - lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ + lwz r7,ICACHEL1BLOCKSIZE(r10) /* Get Icache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 - lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */ + lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -128,12 +128,12 @@ _GLOBAL(flush_dcache_range) * Different systems have different cache line sizes */ ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 @@ -156,12 +156,12 @@ EXPORT_SYMBOL(flush_dcache_range) */ _GLOBAL(flush_dcache_phys_range) ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mfmsr r5 /* Disable MMU Data Relocation */ @@ -184,12 +184,12 @@ _GLOBAL(flush_dcache_phys_range) _GLOBAL(flush_inval_dcache_range) ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ + lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ addi r5,r7,-1 andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ + lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */ srw. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ sync @@ -225,8 +225,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Flush the dcache */ ld r7,PPC64_CACHES@toc(r2) clrrdi r3,r3,PAGE_SHIFT /* Page align */ - lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */ - lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ + lwz r4,DCACHEL1BLOCKSPERPAGE(r7) /* Get # dcache blocks per page */ + lwz r5,DCACHEL1BLOCKSIZE(r7) /* Get dcache block size */ mr r6,r3 mtctr r4 0: dcbst 0,r6 @@ -236,8 +236,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) /* Now invalidate the icache */ - lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */ - lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ + lwz r4,ICACHEL1BLOCKSPERPAGE(r7) /* Get # icache blocks per page */ + lwz r5,ICACHEL1BLOCKSIZE(r7) /* Get icache block size */ mtctr r4 1: icbi 0,r3 add r3,r3,r5 diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index bb1807184bad..0b0f89685b67 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -286,14 +286,6 @@ static void dedotify_versions(struct modversion_info *vers, for (end = (void *)vers + size; vers < end; vers++) if (vers->name[0] == '.') { memmove(vers->name, vers->name+1, strlen(vers->name)); -#ifdef ARCH_RELOCATES_KCRCTAB - /* The TOC symbol has no CRC computed. To avoid CRC - * check failing, we must force it to the expected - * value (see CRC check in module.c). - */ - if (!strcmp(vers->name, "TOC.")) - vers->crc = -(unsigned long)reloc_start; -#endif } } diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c new file mode 100644 index 000000000000..2282bf4e63cd --- /dev/null +++ b/arch/powerpc/kernel/optprobes.c @@ -0,0 +1,347 @@ +/* + * Code for Kernel probes Jump optimization. + * + * Copyright 2017, Anju T, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kprobes.h> +#include <linux/jump_label.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <asm/kprobes.h> +#include <asm/ptrace.h> +#include <asm/cacheflush.h> +#include <asm/code-patching.h> +#include <asm/sstep.h> +#include <asm/ppc-opcode.h> + +#define TMPL_CALL_HDLR_IDX \ + (optprobe_template_call_handler - optprobe_template_entry) +#define TMPL_EMULATE_IDX \ + (optprobe_template_call_emulate - optprobe_template_entry) +#define TMPL_RET_IDX \ + (optprobe_template_ret - optprobe_template_entry) +#define TMPL_OP_IDX \ + (optprobe_template_op_address - optprobe_template_entry) +#define TMPL_INSN_IDX \ + (optprobe_template_insn - optprobe_template_entry) +#define TMPL_END_IDX \ + (optprobe_template_end - optprobe_template_entry) + +DEFINE_INSN_CACHE_OPS(ppc_optinsn); + +static bool insn_page_in_use; + +static void *__ppc_alloc_insn_page(void) +{ + if (insn_page_in_use) + return NULL; + insn_page_in_use = true; + return &optinsn_slot; +} + +static void __ppc_free_insn_page(void *page __maybe_unused) +{ + insn_page_in_use = false; +} + +struct kprobe_insn_cache kprobe_ppc_optinsn_slots = { + .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex), + .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages), + /* insn_size initialized later */ + .alloc = __ppc_alloc_insn_page, + .free = __ppc_free_insn_page, + .nr_garbage = 0, +}; + +/* + * Check if we can optimize this probe. Returns NIP post-emulation if this can + * be optimized and 0 otherwise. + */ +static unsigned long can_optimize(struct kprobe *p) +{ + struct pt_regs regs; + struct instruction_op op; + unsigned long nip = 0; + + /* + * kprobe placed for kretprobe during boot time + * has a 'nop' instruction, which can be emulated. + * So further checks can be skipped. + */ + if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) + return (unsigned long)p->addr + sizeof(kprobe_opcode_t); + + /* + * We only support optimizing kernel addresses, but not + * module addresses. + * + * FIXME: Optimize kprobes placed in module addresses. + */ + if (!is_kernel_addr((unsigned long)p->addr)) + return 0; + + memset(®s, 0, sizeof(struct pt_regs)); + regs.nip = (unsigned long)p->addr; + regs.trap = 0x0; + regs.msr = MSR_KERNEL; + + /* + * Kprobe placed in conditional branch instructions are + * not optimized, as we can't predict the nip prior with + * dummy pt_regs and can not ensure that the return branch + * from detour buffer falls in the range of address (i.e 32MB). + * A branch back from trampoline is set up in the detour buffer + * to the nip returned by the analyse_instr() here. + * + * Ensure that the instruction is not a conditional branch, + * and that can be emulated. + */ + if (!is_conditional_branch(*p->ainsn.insn) && + analyse_instr(&op, ®s, *p->ainsn.insn)) + nip = regs.nip; + + return nip; +} + +static void optimized_callback(struct optimized_kprobe *op, + struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; + + /* This is possible if op is under delayed unoptimizing */ + if (kprobe_disabled(&op->kp)) + return; + + local_irq_save(flags); + hard_irq_disable(); + + if (kprobe_running()) { + kprobes_inc_nmissed_count(&op->kp); + } else { + __this_cpu_write(current_kprobe, &op->kp); + regs->nip = (unsigned long)op->kp.addr; + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + opt_pre_handler(&op->kp, regs); + __this_cpu_write(current_kprobe, NULL); + } + + /* + * No need for an explicit __hard_irq_enable() here. + * local_irq_restore() will re-enable interrupts, + * if they were hard disabled. + */ + local_irq_restore(flags); +} +NOKPROBE_SYMBOL(optimized_callback); + +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) +{ + if (op->optinsn.insn) { + free_ppc_optinsn_slot(op->optinsn.insn, 1); + op->optinsn.insn = NULL; + } +} + +/* + * emulate_step() requires insn to be emulated as + * second parameter. Load register 'r4' with the + * instruction. + */ +void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr) +{ + /* addis r4,0,(insn)@h */ + *addr++ = PPC_INST_ADDIS | ___PPC_RT(4) | + ((val >> 16) & 0xffff); + + /* ori r4,r4,(insn)@l */ + *addr = PPC_INST_ORI | ___PPC_RA(4) | ___PPC_RS(4) | + (val & 0xffff); +} + +/* + * Generate instructions to load provided immediate 64-bit value + * to register 'r3' and patch these instructions at 'addr'. + */ +void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr) +{ + /* lis r3,(op)@highest */ + *addr++ = PPC_INST_ADDIS | ___PPC_RT(3) | + ((val >> 48) & 0xffff); + + /* ori r3,r3,(op)@higher */ + *addr++ = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) | + ((val >> 32) & 0xffff); + + /* rldicr r3,r3,32,31 */ + *addr++ = PPC_INST_RLDICR | ___PPC_RA(3) | ___PPC_RS(3) | + __PPC_SH64(32) | __PPC_ME64(31); + + /* oris r3,r3,(op)@h */ + *addr++ = PPC_INST_ORIS | ___PPC_RA(3) | ___PPC_RS(3) | + ((val >> 16) & 0xffff); + + /* ori r3,r3,(op)@l */ + *addr = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) | + (val & 0xffff); +} + +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) +{ + kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step; + kprobe_opcode_t *op_callback_addr, *emulate_step_addr; + long b_offset; + unsigned long nip; + + kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; + + nip = can_optimize(p); + if (!nip) + return -EILSEQ; + + /* Allocate instruction slot for detour buffer */ + buff = get_ppc_optinsn_slot(); + if (!buff) + return -ENOMEM; + + /* + * OPTPROBE uses 'b' instruction to branch to optinsn.insn. + * + * The target address has to be relatively nearby, to permit use + * of branch instruction in powerpc, because the address is specified + * in an immediate field in the instruction opcode itself, ie 24 bits + * in the opcode specify the address. Therefore the address should + * be within 32MB on either side of the current instruction. + */ + b_offset = (unsigned long)buff - (unsigned long)p->addr; + if (!is_offset_in_branch_range(b_offset)) + goto error; + + /* Check if the return address is also within 32MB range */ + b_offset = (unsigned long)(buff + TMPL_RET_IDX) - + (unsigned long)nip; + if (!is_offset_in_branch_range(b_offset)) + goto error; + + /* Setup template */ + memcpy(buff, optprobe_template_entry, + TMPL_END_IDX * sizeof(kprobe_opcode_t)); + + /* + * Fixup the template with instructions to: + * 1. load the address of the actual probepoint + */ + patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX); + + /* + * 2. branch to optimized_callback() and emulate_step() + */ + kprobe_lookup_name("optimized_callback", op_callback_addr); + kprobe_lookup_name("emulate_step", emulate_step_addr); + if (!op_callback_addr || !emulate_step_addr) { + WARN(1, "kprobe_lookup_name() failed\n"); + goto error; + } + + branch_op_callback = create_branch((unsigned int *)buff + TMPL_CALL_HDLR_IDX, + (unsigned long)op_callback_addr, + BRANCH_SET_LINK); + + branch_emulate_step = create_branch((unsigned int *)buff + TMPL_EMULATE_IDX, + (unsigned long)emulate_step_addr, + BRANCH_SET_LINK); + + if (!branch_op_callback || !branch_emulate_step) + goto error; + + buff[TMPL_CALL_HDLR_IDX] = branch_op_callback; + buff[TMPL_EMULATE_IDX] = branch_emulate_step; + + /* + * 3. load instruction to be emulated into relevant register, and + */ + patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX); + + /* + * 4. branch back from trampoline + */ + buff[TMPL_RET_IDX] = create_branch((unsigned int *)buff + TMPL_RET_IDX, + (unsigned long)nip, 0); + + flush_icache_range((unsigned long)buff, + (unsigned long)(&buff[TMPL_END_IDX])); + + op->optinsn.insn = buff; + + return 0; + +error: + free_ppc_optinsn_slot(buff, 0); + return -ERANGE; + +} + +int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) +{ + return optinsn->insn != NULL; +} + +/* + * On powerpc, Optprobes always replaces one instruction (4 bytes + * aligned and 4 bytes long). It is impossible to encounter another + * kprobe in this address range. So always return 0. + */ +int arch_check_optimized_kprobe(struct optimized_kprobe *op) +{ + return 0; +} + +void arch_optimize_kprobes(struct list_head *oplist) +{ + struct optimized_kprobe *op; + struct optimized_kprobe *tmp; + + list_for_each_entry_safe(op, tmp, oplist, list) { + /* + * Backup instructions which will be replaced + * by jump address + */ + memcpy(op->optinsn.copied_insn, op->kp.addr, + RELATIVEJUMP_SIZE); + patch_instruction(op->kp.addr, + create_branch((unsigned int *)op->kp.addr, + (unsigned long)op->optinsn.insn, 0)); + list_del_init(&op->list); + } +} + +void arch_unoptimize_kprobe(struct optimized_kprobe *op) +{ + arch_arm_kprobe(&op->kp); +} + +void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list) +{ + struct optimized_kprobe *op; + struct optimized_kprobe *tmp; + + list_for_each_entry_safe(op, tmp, oplist, list) { + arch_unoptimize_kprobe(op); + list_move(&op->list, done_list); + } +} + +int arch_within_optimized_kprobe(struct optimized_kprobe *op, + unsigned long addr) +{ + return ((unsigned long)op->kp.addr <= addr && + (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); +} diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S new file mode 100644 index 000000000000..53e429b5a29d --- /dev/null +++ b/arch/powerpc/kernel/optprobes_head.S @@ -0,0 +1,135 @@ +/* + * Code to prepare detour buffer for optprobes in Kernel. + * + * Copyright 2017, Anju T, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <asm/ppc_asm.h> +#include <asm/ptrace.h> +#include <asm/asm-offsets.h> + +#define OPT_SLOT_SIZE 65536 + + .balign 4 + + /* + * Reserve an area to allocate slots for detour buffer. + * This is part of .text section (rather than vmalloc area) + * as this needs to be within 32MB of the probed address. + */ + .global optinsn_slot +optinsn_slot: + .space OPT_SLOT_SIZE + + /* + * Optprobe template: + * This template gets copied into one of the slots in optinsn_slot + * and gets fixed up with real optprobe structures et al. + */ + .global optprobe_template_entry +optprobe_template_entry: + /* Create an in-memory pt_regs */ + stdu r1,-INT_FRAME_SIZE(r1) + SAVE_GPR(0,r1) + /* Save the previous SP into stack */ + addi r0,r1,INT_FRAME_SIZE + std r0,GPR1(r1) + SAVE_10GPRS(2,r1) + SAVE_10GPRS(12,r1) + SAVE_10GPRS(22,r1) + /* Save SPRS */ + mfmsr r5 + std r5,_MSR(r1) + li r5,0x700 + std r5,_TRAP(r1) + li r5,0 + std r5,ORIG_GPR3(r1) + std r5,RESULT(r1) + mfctr r5 + std r5,_CTR(r1) + mflr r5 + std r5,_LINK(r1) + mfspr r5,SPRN_XER + std r5,_XER(r1) + mfcr r5 + std r5,_CCR(r1) + lbz r5,PACASOFTIRQEN(r13) + std r5,SOFTE(r1) + mfdar r5 + std r5,_DAR(r1) + mfdsisr r5 + std r5,_DSISR(r1) + + .global optprobe_template_op_address +optprobe_template_op_address: + /* + * Parameters to optimized_callback(): + * 1. optimized_kprobe structure in r3 + */ + nop + nop + nop + nop + nop + /* 2. pt_regs pointer in r4 */ + addi r4,r1,STACK_FRAME_OVERHEAD + + .global optprobe_template_call_handler +optprobe_template_call_handler: + /* Branch to optimized_callback() */ + nop + + /* + * Parameters for instruction emulation: + * 1. Pass SP in register r3. + */ + addi r3,r1,STACK_FRAME_OVERHEAD + + .global optprobe_template_insn +optprobe_template_insn: + /* 2, Pass instruction to be emulated in r4 */ + nop + nop + + .global optprobe_template_call_emulate +optprobe_template_call_emulate: + /* Branch to emulate_step() */ + nop + + /* + * All done. + * Now, restore the registers... + */ + ld r5,_MSR(r1) + mtmsr r5 + ld r5,_CTR(r1) + mtctr r5 + ld r5,_LINK(r1) + mtlr r5 + ld r5,_XER(r1) + mtxer r5 + ld r5,_CCR(r1) + mtcr r5 + ld r5,_DAR(r1) + mtdar r5 + ld r5,_DSISR(r1) + mtdsisr r5 + REST_GPR(0,r1) + REST_10GPRS(2,r1) + REST_10GPRS(12,r1) + REST_10GPRS(22,r1) + /* Restore the previous SP */ + addi r1,r1,INT_FRAME_SIZE + + .global optprobe_template_ret +optprobe_template_ret: + /* ... and jump back from trampoline */ + nop + + .global optprobe_template_end +optprobe_template_end: diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 74bec5498972..8e6fde8d28f3 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -25,6 +25,7 @@ #include <linux/of_address.h> #include <linux/of_pci.h> #include <linux/mm.h> +#include <linux/shmem_fs.h> #include <linux/list.h> #include <linux/syscalls.h> #include <linux/irq.h> @@ -59,14 +60,14 @@ resource_size_t isa_mem_base; EXPORT_SYMBOL(isa_mem_base); -static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; +static const struct dma_map_ops *pci_dma_ops = &dma_direct_ops; -void set_pci_dma_ops(struct dma_map_ops *dma_ops) +void set_pci_dma_ops(const struct dma_map_ops *dma_ops) { pci_dma_ops = dma_ops; } -struct dma_map_ops *get_pci_dma_ops(void) +const struct dma_map_ops *get_pci_dma_ops(void) { return pci_dma_ops; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 04885cec24df..5dd056df0baa 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -64,12 +64,6 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> -#ifdef CONFIG_CC_STACKPROTECTOR -#include <linux/stackprotector.h> -unsigned long __stack_chk_guard __read_mostly; -EXPORT_SYMBOL(__stack_chk_guard); -#endif - /* Transactional Memory debug */ #ifdef TM_DEBUG_SW #define TM_DEBUG(x...) printk(KERN_INFO x) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ec47a939cbdd..616de028f7f8 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -649,6 +649,7 @@ static void __init early_cmdline_parse(void) struct option_vector1 { u8 byte1; u8 arch_versions; + u8 arch_versions3; } __packed; struct option_vector2 { @@ -691,6 +692,9 @@ struct option_vector5 { u8 reserved2; __be16 reserved3; u8 subprocessors; + u8 byte22; + u8 intarch; + u8 mmu; } __packed; struct option_vector6 { @@ -700,7 +704,7 @@ struct option_vector6 { } __packed; struct ibm_arch_vec { - struct { u32 mask, val; } pvrs[10]; + struct { u32 mask, val; } pvrs[12]; u8 num_vectors; @@ -750,6 +754,14 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .val = cpu_to_be32(0x004d0000), }, { + .mask = cpu_to_be32(0xffff0000), /* POWER9 */ + .val = cpu_to_be32(0x004e0000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */ + .val = cpu_to_be32(0x0f000005), + }, + { .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */ .val = cpu_to_be32(0x0f000004), }, @@ -774,6 +786,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .byte1 = 0, .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 | OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07, + .arch_versions3 = OV1_PPC_3_00, }, .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)), @@ -826,7 +839,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { 0, #endif .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN), - .bin_opts = 0, + .bin_opts = OV5_FEAT(OV5_RESIZE_HPT), .micro_checkpoint = 0, .reserved0 = 0, .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */ @@ -836,6 +849,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved2 = 0, .reserved3 = 0, .subprocessors = 1, + .intarch = 0, + .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | + OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), }, /* option vector 6: IBM PAPR hints */ @@ -2834,6 +2850,9 @@ static void __init prom_find_boot_cpu(void) cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); + if (!PHANDLE_VALID(cpu_pkg)) + return; + prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval)); prom.cpu = be32_to_cpu(rval); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 112cc3b2ee1a..b8a4987f58cf 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1145,31 +1145,29 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) void __init rtas_initialize(void) { unsigned long rtas_region = RTAS_INSTANTIATE_MAX; + u32 base, size, entry; + int no_base, no_size, no_entry; /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. */ rtas.dev = of_find_node_by_name(NULL, "rtas"); - if (rtas.dev) { - const __be32 *basep, *entryp, *sizep; - - basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); - sizep = of_get_property(rtas.dev, "rtas-size", NULL); - if (basep != NULL && sizep != NULL) { - rtas.base = __be32_to_cpu(*basep); - rtas.size = __be32_to_cpu(*sizep); - entryp = of_get_property(rtas.dev, - "linux,rtas-entry", NULL); - if (entryp == NULL) /* Ugh */ - rtas.entry = rtas.base; - else - rtas.entry = __be32_to_cpu(*entryp); - } else - rtas.dev = NULL; - } if (!rtas.dev) return; + no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base); + no_size = of_property_read_u32(rtas.dev, "rtas-size", &size); + if (no_base || no_size) { + of_node_put(rtas.dev); + rtas.dev = NULL; + return; + } + + rtas.base = base; + rtas.size = size; + no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); + rtas.entry = no_entry ? rtas.base : entry; + /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any */ diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 2bf1f9b5b34b..3650732639ed 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -21,6 +21,7 @@ #include <linux/cpu.h> #include <linux/workqueue.h> #include <linux/slab.h> +#include <linux/topology.h> #include <linux/uaccess.h> #include <asm/io.h> @@ -282,6 +283,7 @@ static void prrn_work_fn(struct work_struct *work) * the RTAS event. */ pseries_devicetree_update(-prrn_update_scope); + arch_update_cpu_topology(); } static DECLARE_WORK(prrn_work, prrn_work_fn); @@ -434,7 +436,10 @@ static void do_event_scan(void) } if (error == 0) { - pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); + if (rtas_error_type((struct rtas_error_log *)logdata) != + RTAS_TYPE_PRRN) + pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, + 0); handle_rtas_event((struct rtas_error_log *)logdata); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index f516ac508ae3..4697da895133 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -87,6 +87,15 @@ EXPORT_SYMBOL(machine_id); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +/* + * These are used in binfmt_elf.c to put aux entries on the stack + * for each elf executable being started. + */ +int dcache_bsize; +int icache_bsize; +int ucache_bsize; + + unsigned long klimit = (unsigned long) _end; /* diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 7fcf1f7f01c1..2f88f6cf1a42 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -59,14 +59,6 @@ EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); /* - * These are used in binfmt_elf.c to put aux entries on the stack - * for each elf executable being started. - */ -int dcache_bsize; -int icache_bsize; -int ucache_bsize; - -/* * We're called here very early in the boot. * * Note that the kernel may be running at an address which is different diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6824157e4d2e..b9855f1b290a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -77,25 +77,18 @@ int spinning_secondaries; u64 ppc64_pft_size; -/* Pick defaults since we might want to patch instructions - * before we've read this from the device tree. - */ struct ppc64_caches ppc64_caches = { - .dline_size = 0x40, - .log_dline_size = 6, - .iline_size = 0x40, - .log_iline_size = 6 + .l1d = { + .block_size = 0x40, + .log_block_size = 6, + }, + .l1i = { + .block_size = 0x40, + .log_block_size = 6 + }, }; EXPORT_SYMBOL_GPL(ppc64_caches); -/* - * These are used in binfmt_elf.c to put aux entries on the stack - * for each elf executable being started. - */ -int dcache_bsize; -int icache_bsize; -int ucache_bsize; - #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) void __init setup_tlb_core_data(void) { @@ -408,74 +401,135 @@ void smp_release_cpus(void) * cache informations about the CPU that will be used by cache flush * routines and/or provided to userland */ + +static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, + u32 bsize, u32 sets) +{ + info->size = size; + info->sets = sets; + info->line_size = lsize; + info->block_size = bsize; + info->log_block_size = __ilog2(bsize); + info->blocks_per_page = PAGE_SIZE / bsize; + + if (sets == 0) + info->assoc = 0xffff; + else + info->assoc = size / (sets * lsize); +} + +static bool __init parse_cache_info(struct device_node *np, + bool icache, + struct ppc_cache_info *info) +{ + static const char *ipropnames[] __initdata = { + "i-cache-size", + "i-cache-sets", + "i-cache-block-size", + "i-cache-line-size", + }; + static const char *dpropnames[] __initdata = { + "d-cache-size", + "d-cache-sets", + "d-cache-block-size", + "d-cache-line-size", + }; + const char **propnames = icache ? ipropnames : dpropnames; + const __be32 *sizep, *lsizep, *bsizep, *setsp; + u32 size, lsize, bsize, sets; + bool success = true; + + size = 0; + sets = -1u; + lsize = bsize = cur_cpu_spec->dcache_bsize; + sizep = of_get_property(np, propnames[0], NULL); + if (sizep != NULL) + size = be32_to_cpu(*sizep); + setsp = of_get_property(np, propnames[1], NULL); + if (setsp != NULL) + sets = be32_to_cpu(*setsp); + bsizep = of_get_property(np, propnames[2], NULL); + lsizep = of_get_property(np, propnames[3], NULL); + if (bsizep == NULL) + bsizep = lsizep; + if (lsizep != NULL) + lsize = be32_to_cpu(*lsizep); + if (bsizep != NULL) + bsize = be32_to_cpu(*bsizep); + if (sizep == NULL || bsizep == NULL || lsizep == NULL) + success = false; + + /* + * OF is weird .. it represents fully associative caches + * as "1 way" which doesn't make much sense and doesn't + * leave room for direct mapped. We'll assume that 0 + * in OF means direct mapped for that reason. + */ + if (sets == 1) + sets = 0; + else if (sets == 0) + sets = 1; + + init_cache_info(info, size, lsize, bsize, sets); + + return success; +} + void __init initialize_cache_info(void) { - struct device_node *np; - unsigned long num_cpus = 0; + struct device_node *cpu = NULL, *l2, *l3 = NULL; + u32 pvr; DBG(" -> initialize_cache_info()\n"); - for_each_node_by_type(np, "cpu") { - num_cpus += 1; + /* + * All shipping POWER8 machines have a firmware bug that + * puts incorrect information in the device-tree. This will + * be (hopefully) fixed for future chips but for now hard + * code the values if we are running on one of these + */ + pvr = PVR_VER(mfspr(SPRN_PVR)); + if (pvr == PVR_POWER8 || pvr == PVR_POWER8E || + pvr == PVR_POWER8NVL) { + /* size lsize blk sets */ + init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32); + init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64); + init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512); + init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192); + } else + cpu = of_find_node_by_type(NULL, "cpu"); + + /* + * We're assuming *all* of the CPUs have the same + * d-cache and i-cache sizes... -Peter + */ + if (cpu) { + if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) + DBG("Argh, can't find dcache properties !\n"); + + if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) + DBG("Argh, can't find icache properties !\n"); /* - * We're assuming *all* of the CPUs have the same - * d-cache and i-cache sizes... -Peter + * Try to find the L2 and L3 if any. Assume they are + * unified and use the D-side properties. */ - if (num_cpus == 1) { - const __be32 *sizep, *lsizep; - u32 size, lsize; - - size = 0; - lsize = cur_cpu_spec->dcache_bsize; - sizep = of_get_property(np, "d-cache-size", NULL); - if (sizep != NULL) - size = be32_to_cpu(*sizep); - lsizep = of_get_property(np, "d-cache-block-size", - NULL); - /* fallback if block size missing */ - if (lsizep == NULL) - lsizep = of_get_property(np, - "d-cache-line-size", - NULL); - if (lsizep != NULL) - lsize = be32_to_cpu(*lsizep); - if (sizep == NULL || lsizep == NULL) - DBG("Argh, can't find dcache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - ppc64_caches.dsize = size; - ppc64_caches.dline_size = lsize; - ppc64_caches.log_dline_size = __ilog2(lsize); - ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; - - size = 0; - lsize = cur_cpu_spec->icache_bsize; - sizep = of_get_property(np, "i-cache-size", NULL); - if (sizep != NULL) - size = be32_to_cpu(*sizep); - lsizep = of_get_property(np, "i-cache-block-size", - NULL); - if (lsizep == NULL) - lsizep = of_get_property(np, - "i-cache-line-size", - NULL); - if (lsizep != NULL) - lsize = be32_to_cpu(*lsizep); - if (sizep == NULL || lsizep == NULL) - DBG("Argh, can't find icache properties ! " - "sizep: %p, lsizep: %p\n", sizep, lsizep); - - ppc64_caches.isize = size; - ppc64_caches.iline_size = lsize; - ppc64_caches.log_iline_size = __ilog2(lsize); - ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; + l2 = of_find_next_cache_node(cpu); + of_node_put(cpu); + if (l2) { + parse_cache_info(l2, false, &ppc64_caches.l2); + l3 = of_find_next_cache_node(l2); + of_node_put(l2); + } + if (l3) { + parse_cache_info(l3, false, &ppc64_caches.l3); + of_node_put(l3); } } /* For use by binfmt_elf */ - dcache_bsize = ppc64_caches.dline_size; - icache_bsize = ppc64_caches.iline_size; + dcache_bsize = ppc64_caches.l1d.block_size; + icache_bsize = ppc64_caches.l1i.block_size; DBG(" <- initialize_cache_info()\n"); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 893bd7f79be6..573fb3a461b5 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -707,7 +707,7 @@ void start_secondary(void *unused) unsigned int cpu = smp_processor_id(); int i, base; - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc2e08d415fa..14e485525e31 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -57,6 +57,7 @@ #include <linux/clk-provider.h> #include <linux/suspend.h> #include <linux/rtc.h> +#include <linux/cputime.h> #include <asm/trace.h> #include <asm/io.h> @@ -72,7 +73,6 @@ #include <asm/smp.h> #include <asm/vdso_datapage.h> #include <asm/firmware.h> -#include <asm/cputime.h> #include <asm/asm-prototypes.h> /* powerpc clocksource/clockevent code */ @@ -152,20 +152,11 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* - * Factors for converting from cputime_t (timebase ticks) to - * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). - * These are all stored as 0.64 fixed-point binary fractions. + * Factor for converting from cputime_t (timebase ticks) to + * microseconds. This is stored as 0.64 fixed-point binary fraction. */ -u64 __cputime_jiffies_factor; -EXPORT_SYMBOL(__cputime_jiffies_factor); u64 __cputime_usec_factor; EXPORT_SYMBOL(__cputime_usec_factor); -u64 __cputime_sec_factor; -EXPORT_SYMBOL(__cputime_sec_factor); -u64 __cputime_clockt_factor; -EXPORT_SYMBOL(__cputime_clockt_factor); - -cputime_t cputime_one_jiffy; #ifdef CONFIG_PPC_SPLPAR void (*dtl_consumer)(struct dtl_entry *, u64); @@ -181,14 +172,8 @@ static void calc_cputime_factors(void) { struct div_result res; - div128_by_32(HZ, 0, tb_ticks_per_sec, &res); - __cputime_jiffies_factor = res.result_low; div128_by_32(1000000, 0, tb_ticks_per_sec, &res); __cputime_usec_factor = res.result_low; - div128_by_32(1, 0, tb_ticks_per_sec, &res); - __cputime_sec_factor = res.result_low; - div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); - __cputime_clockt_factor = res.result_low; } /* @@ -271,25 +256,19 @@ void accumulate_stolen_time(void) sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); - acct->system_time -= sst; - acct->user_time -= ust; - local_paca->stolen_time += ust + sst; + acct->stime -= sst; + acct->utime -= ust; + acct->steal_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) { - u64 stolen = 0; + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) + return scan_dispatch_log(stop_tb); - if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { - stolen = scan_dispatch_log(stop_tb); - get_paca()->accounting.system_time -= stolen; - } - - stolen += get_paca()->stolen_time; - get_paca()->stolen_time = 0; - return stolen; + return 0; } #else /* CONFIG_PPC_SPLPAR */ @@ -305,28 +284,27 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * or soft irq state. */ static unsigned long vtime_delta(struct task_struct *tsk, - unsigned long *sys_scaled, - unsigned long *stolen) + unsigned long *stime_scaled, + unsigned long *steal_time) { unsigned long now, nowscaled, deltascaled; - unsigned long udelta, delta, user_scaled; + unsigned long stime; + unsigned long utime, utime_scaled; struct cpu_accounting_data *acct = get_accounting(tsk); WARN_ON_ONCE(!irqs_disabled()); now = mftb(); nowscaled = read_spurr(now); - acct->system_time += now - acct->starttime; + stime = now - acct->starttime; acct->starttime = now; deltascaled = nowscaled - acct->startspurr; acct->startspurr = nowscaled; - *stolen = calculate_stolen_time(now); + *steal_time = calculate_stolen_time(now); - delta = acct->system_time; - acct->system_time = 0; - udelta = acct->user_time - acct->utime_sspurr; - acct->utime_sspurr = acct->user_time; + utime = acct->utime - acct->utime_sspurr; + acct->utime_sspurr = acct->utime; /* * Because we don't read the SPURR on every kernel entry/exit, @@ -338,62 +316,105 @@ static unsigned long vtime_delta(struct task_struct *tsk, * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick. */ - *sys_scaled = delta; - user_scaled = udelta; - if (deltascaled != delta + udelta) { - if (udelta) { - *sys_scaled = deltascaled * delta / (delta + udelta); - user_scaled = deltascaled - *sys_scaled; + *stime_scaled = stime; + utime_scaled = utime; + if (deltascaled != stime + utime) { + if (utime) { + *stime_scaled = deltascaled * stime / (stime + utime); + utime_scaled = deltascaled - *stime_scaled; } else { - *sys_scaled = deltascaled; + *stime_scaled = deltascaled; } } - acct->user_time_scaled += user_scaled; + acct->utime_scaled += utime_scaled; - return delta; + return stime; } void vtime_account_system(struct task_struct *tsk) { - unsigned long delta, sys_scaled, stolen; + unsigned long stime, stime_scaled, steal_time; + struct cpu_accounting_data *acct = get_accounting(tsk); + + stime = vtime_delta(tsk, &stime_scaled, &steal_time); - delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_system_time(tsk, 0, delta); - tsk->stimescaled += sys_scaled; - if (stolen) - account_steal_time(stolen); + stime -= min(stime, steal_time); + acct->steal_time += steal_time; + + if ((tsk->flags & PF_VCPU) && !irq_count()) { + acct->gtime += stime; + acct->utime_scaled += stime_scaled; + } else { + if (hardirq_count()) + acct->hardirq_time += stime; + else if (in_serving_softirq()) + acct->softirq_time += stime; + else + acct->stime += stime; + + acct->stime_scaled += stime_scaled; + } } EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { - unsigned long delta, sys_scaled, stolen; + unsigned long stime, stime_scaled, steal_time; + struct cpu_accounting_data *acct = get_accounting(tsk); - delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_idle_time(delta + stolen); + stime = vtime_delta(tsk, &stime_scaled, &steal_time); + acct->idle_time += stime + steal_time; } /* - * Transfer the user time accumulated in the paca - * by the exception entry and exit code to the generic - * process user time records. + * Account the whole cputime accumulated in the paca * Must be called with interrupts disabled. * Assumes that vtime_account_system/idle() has been called * recently (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { - cputime_t utime, utimescaled; struct cpu_accounting_data *acct = get_accounting(tsk); - utime = acct->user_time; - utimescaled = acct->user_time_scaled; - acct->user_time = 0; - acct->user_time_scaled = 0; + if (acct->utime) + account_user_time(tsk, cputime_to_nsecs(acct->utime)); + + if (acct->utime_scaled) + tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); + + if (acct->gtime) + account_guest_time(tsk, cputime_to_nsecs(acct->gtime)); + + if (acct->steal_time) + account_steal_time(cputime_to_nsecs(acct->steal_time)); + + if (acct->idle_time) + account_idle_time(cputime_to_nsecs(acct->idle_time)); + + if (acct->stime) + account_system_index_time(tsk, cputime_to_nsecs(acct->stime), + CPUTIME_SYSTEM); + if (acct->stime_scaled) + tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); + + if (acct->hardirq_time) + account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time), + CPUTIME_IRQ); + if (acct->softirq_time) + account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time), + CPUTIME_SOFTIRQ); + + acct->utime = 0; + acct->utime_scaled = 0; acct->utime_sspurr = 0; - account_user_time(tsk, utime); - tsk->utimescaled += utimescaled; + acct->gtime = 0; + acct->steal_time = 0; + acct->idle_time = 0; + acct->stime = 0; + acct->stime_scaled = 0; + acct->hardirq_time = 0; + acct->softirq_time = 0; } #ifdef CONFIG_PPC32 @@ -407,8 +428,7 @@ void arch_vtime_task_switch(struct task_struct *prev) struct cpu_accounting_data *acct = get_accounting(current); acct->starttime = get_accounting(prev)->starttime; - acct->system_time = 0; - acct->user_time = 0; + acct->startspurr = get_accounting(prev)->startspurr; } #endif /* CONFIG_PPC32 */ @@ -1018,7 +1038,6 @@ void __init time_init(void) tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; calc_cputime_factors(); - setup_cputime_one_jiffy(); /* * Compute scale factor for sched_clock. diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 4111d30badfa..22b01a3962f0 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -736,16 +736,14 @@ static int __init vdso_init(void) if (firmware_has_feature(FW_FEATURE_LPAR)) vdso_data->platform |= 1; vdso_data->physicalMemorySize = memblock_phys_mem_size(); - vdso_data->dcache_size = ppc64_caches.dsize; - vdso_data->dcache_line_size = ppc64_caches.dline_size; - vdso_data->icache_size = ppc64_caches.isize; - vdso_data->icache_line_size = ppc64_caches.iline_size; - - /* XXXOJN: Blocks should be added to ppc64_caches and used instead */ - vdso_data->dcache_block_size = ppc64_caches.dline_size; - vdso_data->icache_block_size = ppc64_caches.iline_size; - vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size; - vdso_data->icache_log_block_size = ppc64_caches.log_iline_size; + vdso_data->dcache_size = ppc64_caches.l1d.size; + vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; + vdso_data->icache_size = ppc64_caches.l1i.size; + vdso_data->icache_line_size = ppc64_caches.l1i.line_size; + vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; + vdso_data->icache_block_size = ppc64_caches.l1i.block_size; + vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; + vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; /* * Calculate the size of the 64 bits vDSO |