summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/alpha/kernel/asm-offsets.c6
-rw-r--r--arch/alpha/kernel/core_marvel.c6
-rw-r--r--arch/alpha/kernel/core_t2.c24
-rw-r--r--arch/alpha/kernel/core_titan.c34
-rw-r--r--arch/alpha/kernel/core_tsunami.c28
-rw-r--r--arch/alpha/kernel/module.c6
-rw-r--r--arch/alpha/kernel/pci.c6
-rw-r--r--arch/alpha/kernel/pci_iommu.c34
-rw-r--r--arch/alpha/kernel/smp.c4
-rw-r--r--arch/alpha/kernel/srm_env.c2
-rw-r--r--arch/alpha/kernel/sys_alcor.c2
-rw-r--r--arch/alpha/kernel/sys_marvel.c12
-rw-r--r--arch/alpha/kernel/sys_sable.c6
-rw-r--r--arch/alpha/kernel/sys_sio.c2
-rw-r--r--arch/alpha/kernel/traps.c5
-rw-r--r--arch/arm/Kconfig15
-rw-r--r--arch/arm/kernel/asm-offsets.c8
-rw-r--r--arch/arm/kernel/atags.c2
-rw-r--r--arch/arm/kernel/ecard.c56
-rw-r--r--arch/arm/mach-at91/at91sam9261_devices.c11
-rw-r--r--arch/arm/mach-at91/at91sam9rl_devices.c14
-rw-r--r--arch/arm/mach-davinci/clock.c6
-rw-r--r--arch/arm/mm/iomap.c4
-rw-r--r--arch/avr32/kernel/asm-offsets.c9
-rw-r--r--arch/avr32/kernel/setup.c2
-rw-r--r--arch/avr32/mm/tlb.c6
-rw-r--r--arch/blackfin/kernel/asm-offsets.c3
-rw-r--r--arch/blackfin/kernel/signal.c2
-rw-r--r--arch/cris/kernel/profile.c4
-rw-r--r--arch/cris/mm/init.c1
-rw-r--r--arch/frv/kernel/asm-offsets.c9
-rw-r--r--arch/frv/kernel/signal.c4
-rw-r--r--arch/frv/kernel/traps.c38
-rw-r--r--arch/frv/mb93090-mb00/pci-iomap.c4
-rw-r--r--arch/frv/mm/unaligned.c217
-rw-r--r--arch/h8300/kernel/asm-offsets.c6
-rw-r--r--arch/ia64/Kconfig9
-rw-r--r--arch/ia64/Makefile1
-rw-r--r--arch/ia64/hp/common/hwsw_iommu.c61
-rw-r--r--arch/ia64/hp/common/sba_iommu.c70
-rw-r--r--arch/ia64/kernel/asm-offsets.c7
-rw-r--r--arch/ia64/kernel/perfmon.c6
-rw-r--r--arch/ia64/kernel/salinfo.c10
-rw-r--r--arch/ia64/kvm/Kconfig49
-rw-r--r--arch/ia64/kvm/Makefile58
-rw-r--r--arch/ia64/kvm/asm-offsets.c251
-rw-r--r--arch/ia64/kvm/kvm-ia64.c1806
-rw-r--r--arch/ia64/kvm/kvm_fw.c500
-rw-r--r--arch/ia64/kvm/kvm_minstate.h273
-rw-r--r--arch/ia64/kvm/lapic.h25
-rw-r--r--arch/ia64/kvm/misc.h93
-rw-r--r--arch/ia64/kvm/mmio.c341
-rw-r--r--arch/ia64/kvm/optvfault.S918
-rw-r--r--arch/ia64/kvm/process.c970
-rw-r--r--arch/ia64/kvm/trampoline.S1038
-rw-r--r--arch/ia64/kvm/vcpu.c2163
-rw-r--r--arch/ia64/kvm/vcpu.h740
-rw-r--r--arch/ia64/kvm/vmm.c66
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1424
-rw-r--r--arch/ia64/kvm/vti.h290
-rw-r--r--arch/ia64/kvm/vtlb.c636
-rw-r--r--arch/ia64/mm/init.c9
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c5
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_proc_fs.c29
-rw-r--r--arch/ia64/sn/pci/pci_dma.c81
-rw-r--r--arch/m68k/kernel/asm-offsets.c4
-rw-r--r--arch/m68k/kernel/ints.c10
-rw-r--r--arch/m68k/mac/iop.c85
-rw-r--r--arch/m68k/mac/oss.c4
-rw-r--r--arch/m68k/mm/init.c1
-rw-r--r--arch/m68k/q40/q40ints.c2
-rw-r--r--arch/m68knommu/kernel/asm-offsets.c6
-rw-r--r--arch/mips/Kconfig38
-rw-r--r--arch/mips/Kconfig.debug10
-rw-r--r--arch/mips/Makefile12
-rw-r--r--arch/mips/au1000/common/cputable.c5
-rw-r--r--arch/mips/au1000/common/dbdma.c6
-rw-r--r--arch/mips/au1000/common/dbg_io.c6
-rw-r--r--arch/mips/au1000/common/dma.c5
-rw-r--r--arch/mips/au1000/common/gpio.c5
-rw-r--r--arch/mips/au1000/common/irq.c8
-rw-r--r--arch/mips/au1000/common/pci.c2
-rw-r--r--arch/mips/au1000/common/platform.c117
-rw-r--r--arch/mips/au1000/common/power.c11
-rw-r--r--arch/mips/au1000/common/prom.c2
-rw-r--r--arch/mips/au1000/common/puts.c1
-rw-r--r--arch/mips/au1000/common/reset.c8
-rw-r--r--arch/mips/au1000/common/setup.c11
-rw-r--r--arch/mips/au1000/common/sleeper.S2
-rw-r--r--arch/mips/au1000/common/time.c35
-rw-r--r--arch/mips/au1000/db1x00/board_setup.c15
-rw-r--r--arch/mips/au1000/db1x00/init.c5
-rw-r--r--arch/mips/au1000/db1x00/irqmap.c19
-rw-r--r--arch/mips/au1000/mtx-1/board_setup.c12
-rw-r--r--arch/mips/au1000/mtx-1/init.c6
-rw-r--r--arch/mips/au1000/mtx-1/irqmap.c19
-rw-r--r--arch/mips/au1000/mtx-1/platform.c1
-rw-r--r--arch/mips/au1000/pb1000/board_setup.c11
-rw-r--r--arch/mips/au1000/pb1000/init.c6
-rw-r--r--arch/mips/au1000/pb1000/irqmap.c18
-rw-r--r--arch/mips/au1000/pb1100/board_setup.c11
-rw-r--r--arch/mips/au1000/pb1100/init.c6
-rw-r--r--arch/mips/au1000/pb1100/irqmap.c19
-rw-r--r--arch/mips/au1000/pb1200/Makefile1
-rw-r--r--arch/mips/au1000/pb1200/board_setup.c20
-rw-r--r--arch/mips/au1000/pb1200/init.c6
-rw-r--r--arch/mips/au1000/pb1200/irqmap.c20
-rw-r--r--arch/mips/au1000/pb1200/platform.c84
-rw-r--r--arch/mips/au1000/pb1500/board_setup.c11
-rw-r--r--arch/mips/au1000/pb1500/init.c6
-rw-r--r--arch/mips/au1000/pb1500/irqmap.c19
-rw-r--r--arch/mips/au1000/pb1550/board_setup.c13
-rw-r--r--arch/mips/au1000/pb1550/init.c6
-rw-r--r--arch/mips/au1000/pb1550/irqmap.c19
-rw-r--r--arch/mips/au1000/xxs1500/board_setup.c11
-rw-r--r--arch/mips/au1000/xxs1500/init.c6
-rw-r--r--arch/mips/au1000/xxs1500/irqmap.c19
-rw-r--r--arch/mips/basler/excite/excite_procfs.c30
-rw-r--r--arch/mips/configs/mipssim_defconfig1
-rw-r--r--arch/mips/configs/pnx8550-jbs_defconfig1
-rw-r--r--arch/mips/configs/pnx8550-stb810_defconfig1
-rw-r--r--arch/mips/dec/time.c71
-rw-r--r--arch/mips/jmr3927/rbhma3100/setup.c11
-rw-r--r--arch/mips/kernel/Makefile8
-rw-r--r--arch/mips/kernel/asm-offsets.c478
-rw-r--r--arch/mips/kernel/cevt-ds1287.c129
-rw-r--r--arch/mips/kernel/cevt-gt641xx.c2
-rw-r--r--arch/mips/kernel/cpu-probe.c21
-rw-r--r--arch/mips/kernel/csrc-ioasic.c65
-rw-r--r--arch/mips/kernel/gpio_txx9.c87
-rw-r--r--arch/mips/kernel/irq-gic.c295
-rw-r--r--arch/mips/kernel/irq-msc01.c10
-rw-r--r--arch/mips/kernel/signal-common.h2
-rw-r--r--arch/mips/kernel/smp-cmp.c265
-rw-r--r--arch/mips/kernel/smp-mt.c143
-rw-r--r--arch/mips/kernel/smp.c4
-rw-r--r--arch/mips/kernel/smtc.c11
-rw-r--r--arch/mips/kernel/spram.c221
-rw-r--r--arch/mips/kernel/sync-r4k.c159
-rw-r--r--arch/mips/kernel/time.c5
-rw-r--r--arch/mips/kernel/traps.c213
-rw-r--r--arch/mips/lib/iomap-pci.c4
-rw-r--r--arch/mips/math-emu/ieee754dp.h2
-rw-r--r--arch/mips/math-emu/ieee754sp.h2
-rw-r--r--arch/mips/mips-boards/generic/Makefile1
-rw-r--r--arch/mips/mips-boards/generic/amon.c80
-rw-r--r--arch/mips/mips-boards/generic/init.c23
-rw-r--r--arch/mips/mips-boards/generic/memory.c4
-rw-r--r--arch/mips/mips-boards/generic/time.c31
-rw-r--r--arch/mips/mips-boards/malta/Makefile1
-rw-r--r--arch/mips/mips-boards/malta/malta_int.c354
-rw-r--r--arch/mips/mips-boards/malta/malta_setup.c10
-rw-r--r--arch/mips/mipssim/sim_setup.c6
-rw-r--r--arch/mips/mm/Makefile37
-rw-r--r--arch/mips/mm/c-r4k.c62
-rw-r--r--arch/mips/mm/cache.c31
-rw-r--r--arch/mips/mm/init.c11
-rw-r--r--arch/mips/mm/page.c684
-rw-r--r--arch/mips/mm/pg-r4k.c534
-rw-r--r--arch/mips/mm/pg-sb1.c302
-rw-r--r--arch/mips/mm/pgtable.c1
-rw-r--r--arch/mips/mm/tlb-r4k.c2
-rw-r--r--arch/mips/mm/uasm.c26
-rw-r--r--arch/mips/mm/uasm.h4
-rw-r--r--arch/mips/nxp/pnx8550/common/Makefile (renamed from arch/mips/philips/pnx8550/common/Makefile)0
-rw-r--r--arch/mips/nxp/pnx8550/common/gdb_hook.c (renamed from arch/mips/philips/pnx8550/common/gdb_hook.c)0
-rw-r--r--arch/mips/nxp/pnx8550/common/int.c (renamed from arch/mips/philips/pnx8550/common/int.c)0
-rw-r--r--arch/mips/nxp/pnx8550/common/pci.c (renamed from arch/mips/philips/pnx8550/common/pci.c)0
-rw-r--r--arch/mips/nxp/pnx8550/common/platform.c (renamed from arch/mips/philips/pnx8550/common/platform.c)2
-rw-r--r--arch/mips/nxp/pnx8550/common/proc.c (renamed from arch/mips/philips/pnx8550/common/proc.c)0
-rw-r--r--arch/mips/nxp/pnx8550/common/prom.c (renamed from arch/mips/philips/pnx8550/common/prom.c)0
-rw-r--r--arch/mips/nxp/pnx8550/common/reset.c (renamed from arch/mips/philips/pnx8550/common/reset.c)0
-rw-r--r--arch/mips/nxp/pnx8550/common/setup.c (renamed from arch/mips/philips/pnx8550/common/setup.c)0
-rw-r--r--arch/mips/nxp/pnx8550/common/time.c (renamed from arch/mips/philips/pnx8550/common/time.c)0
-rw-r--r--arch/mips/nxp/pnx8550/jbs/Makefile (renamed from arch/mips/philips/pnx8550/jbs/Makefile)2
-rw-r--r--arch/mips/nxp/pnx8550/jbs/board_setup.c (renamed from arch/mips/philips/pnx8550/jbs/board_setup.c)0
-rw-r--r--arch/mips/nxp/pnx8550/jbs/init.c (renamed from arch/mips/philips/pnx8550/jbs/init.c)2
-rw-r--r--arch/mips/nxp/pnx8550/jbs/irqmap.c (renamed from arch/mips/philips/pnx8550/jbs/irqmap.c)3
-rw-r--r--arch/mips/nxp/pnx8550/stb810/Makefile (renamed from arch/mips/philips/pnx8550/stb810/Makefile)2
-rw-r--r--arch/mips/nxp/pnx8550/stb810/board_setup.c (renamed from arch/mips/philips/pnx8550/stb810/board_setup.c)2
-rw-r--r--arch/mips/nxp/pnx8550/stb810/irqmap.c (renamed from arch/mips/philips/pnx8550/stb810/irqmap.c)3
-rw-r--r--arch/mips/nxp/pnx8550/stb810/prom_init.c (renamed from arch/mips/philips/pnx8550/stb810/prom_init.c)2
-rw-r--r--arch/mips/oprofile/common.c1
-rw-r--r--arch/mips/oprofile/op_impl.h1
-rw-r--r--arch/mips/oprofile/op_model_mipsxx.c39
-rw-r--r--arch/mips/pci/fixup-au1000.c5
-rw-r--r--arch/mips/pci/ops-pnx8550.c4
-rw-r--r--arch/mips/pmc-sierra/yosemite/setup.c3
-rw-r--r--arch/mips/sgi-ip32/ip32-reset.c2
-rw-r--r--arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c2
-rw-r--r--arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c2
-rw-r--r--arch/mips/tx4938/common/dbgio.c4
-rw-r--r--arch/mips/tx4938/common/prom.c11
-rw-r--r--arch/mips/tx4938/toshiba_rbtx4938/irq.c46
-rw-r--r--arch/mips/tx4938/toshiba_rbtx4938/setup.c199
-rw-r--r--arch/mips/vr41xx/common/init.c4
-rw-r--r--arch/mips/vr41xx/common/siu.c36
-rw-r--r--arch/mn10300/kernel/asm-offsets.c9
-rw-r--r--arch/mn10300/unit-asb2305/pci-iomap.c4
-rw-r--r--arch/parisc/kernel/asm-offsets.c6
-rw-r--r--arch/parisc/kernel/pci-dma.c7
-rw-r--r--arch/parisc/lib/iomap.c4
-rw-r--r--arch/parisc/mm/init.c11
-rw-r--r--arch/powerpc/Kconfig14
-rw-r--r--arch/powerpc/Kconfig.debug4
-rw-r--r--arch/powerpc/Makefile1
-rw-r--r--arch/powerpc/boot/dts/mpc8610_hpcd.dts12
-rw-r--r--arch/powerpc/boot/dts/mpc8641_hpcn.dts12
-rw-r--r--arch/powerpc/configs/g5_defconfig1
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c35
-rw-r--r--arch/powerpc/kernel/entry_32.S5
-rw-r--r--arch/powerpc/kernel/head_64.S11
-rw-r--r--arch/powerpc/kernel/irq.c10
-rw-r--r--arch/powerpc/kernel/lparcfg.c6
-rw-r--r--arch/powerpc/kernel/misc_32.S25
-rw-r--r--arch/powerpc/kernel/proc_ppc64.c5
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kernel/rio.c52
-rw-r--r--arch/powerpc/kernel/rtas-proc.c45
-rw-r--r--arch/powerpc/kernel/rtas_flash.c13
-rw-r--r--arch/powerpc/kernel/setup_32.c21
-rw-r--r--arch/powerpc/kvm/44x_tlb.c224
-rw-r--r--arch/powerpc/kvm/44x_tlb.h91
-rw-r--r--arch/powerpc/kvm/Kconfig42
-rw-r--r--arch/powerpc/kvm/Makefile15
-rw-r--r--arch/powerpc/kvm/booke_guest.c615
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S436
-rw-r--r--arch/powerpc/kvm/emulate.c760
-rw-r--r--arch/powerpc/kvm/powerpc.c436
-rw-r--r--arch/powerpc/mm/mem.c39
-rw-r--r--arch/powerpc/platforms/86xx/Kconfig1
-rw-r--r--arch/powerpc/platforms/86xx/mpc8610_hpcd.c190
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_hpcn.c1
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_pciex.c18
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.c3
-rw-r--r--arch/powerpc/platforms/iseries/lpevents.c8
-rw-r--r--arch/powerpc/platforms/iseries/mf.c6
-rw-r--r--arch/powerpc/platforms/iseries/proc.c8
-rw-r--r--arch/powerpc/platforms/iseries/viopath.c7
-rw-r--r--arch/powerpc/platforms/powermac/Makefile5
-rw-r--r--arch/powerpc/platforms/powermac/setup.c3
-rw-r--r--arch/powerpc/platforms/pseries/Makefile1
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c10
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c141
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c7
-rw-r--r--arch/powerpc/platforms/pseries/rtasd.c7
-rw-r--r--arch/powerpc/sysdev/axonram.c5
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c711
-rw-r--r--arch/powerpc/sysdev/fsl_rio.h20
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c41
-rw-r--r--arch/powerpc/sysdev/fsl_soc.h23
-rw-r--r--arch/ppc/kernel/asm-offsets.c7
-rw-r--r--arch/ppc/kernel/pci.c4
-rw-r--r--arch/ppc/platforms/sbc82xx.c2
-rw-r--r--arch/s390/Kconfig14
-rw-r--r--arch/s390/Makefile2
-rw-r--r--arch/s390/kernel/asm-offsets.c54
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/irq.c2
-rw-r--r--arch/s390/kernel/setup.c14
-rw-r--r--arch/s390/kernel/vtime.c1
-rw-r--r--arch/s390/kvm/Kconfig46
-rw-r--r--arch/s390/kvm/Makefile14
-rw-r--r--arch/s390/kvm/diag.c67
-rw-r--r--arch/s390/kvm/gaccess.h274
-rw-r--r--arch/s390/kvm/intercept.c216
-rw-r--r--arch/s390/kvm/interrupt.c592
-rw-r--r--arch/s390/kvm/kvm-s390.c685
-rw-r--r--arch/s390/kvm/kvm-s390.h64
-rw-r--r--arch/s390/kvm/priv.c323
-rw-r--r--arch/s390/kvm/sie64a.S47
-rw-r--r--arch/s390/kvm/sigp.c288
-rw-r--r--arch/s390/mm/pgtable.c65
-rw-r--r--arch/sh/drivers/pci/pci.c4
-rw-r--r--arch/sh/kernel/asm-offsets.c7
-rw-r--r--arch/sh/kernel/irq.c2
-rw-r--r--arch/sh/mm/init.c9
-rw-r--r--arch/sparc/kernel/asm-offsets.c6
-rw-r--r--arch/sparc/kernel/process.c2
-rw-r--r--arch/sparc/kernel/signal.c260
-rw-r--r--arch/sparc/kernel/sys_sparc.c14
-rw-r--r--arch/sparc/lib/iomap.c4
-rw-r--r--arch/sparc64/Kconfig85
-rw-r--r--arch/sparc64/defconfig23
-rw-r--r--arch/sparc64/kernel/Makefile6
-rw-r--r--arch/sparc64/kernel/audit.c6
-rw-r--r--arch/sparc64/kernel/irq.c3
-rw-r--r--arch/sparc64/kernel/isa.c191
-rw-r--r--arch/sparc64/kernel/of_device.c6
-rw-r--r--arch/sparc64/kernel/pci.c2
-rw-r--r--arch/sparc64/kernel/process.c12
-rw-r--r--arch/sparc64/kernel/signal.c16
-rw-r--r--arch/sparc64/kernel/signal32.c272
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c2
-rw-r--r--arch/sparc64/kernel/sys_sparc32.c11
-rw-r--r--arch/sparc64/lib/iomap.c4
-rw-r--r--arch/sparc64/mm/init.c16
-rw-r--r--arch/um/drivers/chan_kern.c15
-rw-r--r--arch/um/drivers/line.c2
-rw-r--r--arch/um/drivers/mcast_kern.c2
-rw-r--r--arch/um/drivers/mconsole_user.c2
-rw-r--r--arch/um/drivers/net_kern.c6
-rw-r--r--arch/um/drivers/port_user.c2
-rw-r--r--arch/um/drivers/slip_kern.c4
-rw-r--r--arch/um/drivers/stdio_console.c4
-rw-r--r--arch/um/drivers/ubd_kern.c385
-rw-r--r--arch/um/include/chan_kern.h2
-rw-r--r--arch/um/kernel/exitcode.c2
-rw-r--r--arch/um/kernel/process.c2
-rw-r--r--arch/um/kernel/time.c3
-rw-r--r--arch/um/kernel/um_arch.c7
-rw-r--r--arch/um/os-Linux/start_up.c14
-rw-r--r--arch/um/os-Linux/sys-i386/task_size.c12
-rw-r--r--arch/v850/kernel/asm-offsets.c7
-rw-r--r--arch/v850/kernel/rte_mb_a_pci.c4
-rw-r--r--arch/x86/Kconfig57
-rw-r--r--arch/x86/boot/edd.c10
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/apm_32.c3
-rw-r--r--arch/x86/kernel/asm-offsets_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_64.c9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c11
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c2
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/io_apic_32.c1
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/kvm.c248
-rw-r--r--arch/x86/kernel/kvmclock.c187
-rw-r--r--arch/x86/kernel/mfgpt_32.c8
-rw-r--r--arch/x86/kernel/olpc.c260
-rw-r--r--arch/x86/kernel/process.c117
-rw-r--r--arch/x86/kernel/process_32.c118
-rw-r--r--arch/x86/kernel/process_64.c123
-rw-r--r--arch/x86/kernel/reboot.c13
-rw-r--r--arch/x86/kernel/setup_32.c6
-rw-r--r--arch/x86/kernel/setup_64.c7
-rw-r--r--arch/x86/kernel/time_32.c1
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S6
-rw-r--r--arch/x86/kvm/Kconfig13
-rw-r--r--arch/x86/kvm/Makefile6
-rw-r--r--arch/x86/kvm/i8254.c611
-rw-r--r--arch/x86/kvm/i8254.h63
-rw-r--r--arch/x86/kvm/irq.c18
-rw-r--r--arch/x86/kvm/irq.h3
-rw-r--r--arch/x86/kvm/kvm_svm.h2
-rw-r--r--arch/x86/kvm/lapic.c35
-rw-r--r--arch/x86/kvm/mmu.c672
-rw-r--r--arch/x86/kvm/mmu.h6
-rw-r--r--arch/x86/kvm/paging_tmpl.h86
-rw-r--r--arch/x86/kvm/segment_descriptor.h29
-rw-r--r--arch/x86/kvm/svm.c352
-rw-r--r--arch/x86/kvm/svm.h3
-rw-r--r--arch/x86/kvm/tss.h59
-rw-r--r--arch/x86/kvm/vmx.c278
-rw-r--r--arch/x86/kvm/vmx.h10
-rw-r--r--arch/x86/kvm/x86.c897
-rw-r--r--arch/x86/kvm/x86_emulate.c285
-rw-r--r--arch/x86/mm/init_32.c36
-rw-r--r--arch/x86/mm/init_64.c9
-rw-r--r--arch/x86/mm/ioremap.c15
-rw-r--r--arch/x86/mm/pat.c9
-rw-r--r--arch/x86/pci/Makefile_321
-rw-r--r--arch/x86/pci/init.c4
-rw-r--r--arch/x86/pci/olpc.c313
-rw-r--r--arch/x86/pci/pci.h1
-rw-r--r--arch/x86/vdso/vdso.S10
-rw-r--r--arch/x86/xen/mmu.c4
-rw-r--r--arch/xtensa/kernel/asm-offsets.c3
372 files changed, 26423 insertions, 5251 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 694c9af520bb..3ea332b009e5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -36,3 +36,6 @@ config HAVE_KPROBES
config HAVE_KRETPROBES
def_bool n
+
+config HAVE_DMA_ATTRS
+ def_bool n
diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
index 6c56c754a0b5..4b18cd94d59d 100644
--- a/arch/alpha/kernel/asm-offsets.c
+++ b/arch/alpha/kernel/asm-offsets.c
@@ -8,13 +8,9 @@
#include <linux/stddef.h>
#include <linux/sched.h>
#include <linux/ptrace.h>
+#include <linux/kbuild.h>
#include <asm/io.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
void foo(void)
{
DEFINE(TI_TASK, offsetof(struct thread_info, task));
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index f10d2eddd2c3..b04f1feb1dda 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -994,7 +994,7 @@ marvel_agp_configure(alpha_agp_info *agp)
* rate, but warn the user.
*/
printk("%s: unknown PLL setting RNGB=%lx (PLL6_CTL=%016lx)\n",
- __FUNCTION__, IO7_PLL_RNGB(agp_pll), agp_pll);
+ __func__, IO7_PLL_RNGB(agp_pll), agp_pll);
break;
}
@@ -1044,13 +1044,13 @@ marvel_agp_translate(alpha_agp_info *agp, dma_addr_t addr)
if (addr < agp->aperture.bus_base ||
addr >= agp->aperture.bus_base + agp->aperture.size) {
- printk("%s: addr out of range\n", __FUNCTION__);
+ printk("%s: addr out of range\n", __func__);
return -EINVAL;
}
pte = aper->arena->ptes[baddr >> PAGE_SHIFT];
if (!(pte & 1)) {
- printk("%s: pte not valid\n", __FUNCTION__);
+ printk("%s: pte not valid\n", __func__);
return -EINVAL;
}
return (pte >> 1) << PAGE_SHIFT;
diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c
index f5ca5255eb06..c0750291b44a 100644
--- a/arch/alpha/kernel/core_t2.c
+++ b/arch/alpha/kernel/core_t2.c
@@ -336,10 +336,7 @@ t2_direct_map_window1(unsigned long base, unsigned long length)
#if DEBUG_PRINT_FINAL_SETTINGS
printk("%s: setting WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n",
- __FUNCTION__,
- *(vulp)T2_WBASE1,
- *(vulp)T2_WMASK1,
- *(vulp)T2_TBASE1);
+ __func__, *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1);
#endif
}
@@ -366,10 +363,7 @@ t2_sg_map_window2(struct pci_controller *hose,
#if DEBUG_PRINT_FINAL_SETTINGS
printk("%s: setting WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n",
- __FUNCTION__,
- *(vulp)T2_WBASE2,
- *(vulp)T2_WMASK2,
- *(vulp)T2_TBASE2);
+ __func__, *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2);
#endif
}
@@ -377,15 +371,15 @@ static void __init
t2_save_configuration(void)
{
#if DEBUG_PRINT_INITIAL_SETTINGS
- printk("%s: HAE_1 was 0x%lx\n", __FUNCTION__, srm_hae); /* HW is 0 */
- printk("%s: HAE_2 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_2);
- printk("%s: HAE_3 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_3);
- printk("%s: HAE_4 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_4);
- printk("%s: HBASE was 0x%lx\n", __FUNCTION__, *(vulp)T2_HBASE);
+ printk("%s: HAE_1 was 0x%lx\n", __func__, srm_hae); /* HW is 0 */
+ printk("%s: HAE_2 was 0x%lx\n", __func__, *(vulp)T2_HAE_2);
+ printk("%s: HAE_3 was 0x%lx\n", __func__, *(vulp)T2_HAE_3);
+ printk("%s: HAE_4 was 0x%lx\n", __func__, *(vulp)T2_HAE_4);
+ printk("%s: HBASE was 0x%lx\n", __func__, *(vulp)T2_HBASE);
- printk("%s: WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", __FUNCTION__,
+ printk("%s: WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", __func__,
*(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1);
- printk("%s: WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", __FUNCTION__,
+ printk("%s: WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", __func__,
*(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2);
#endif
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 819326627b96..319fcb74611e 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -365,21 +365,21 @@ void __init
titan_init_arch(void)
{
#if 0
- printk("%s: titan_init_arch()\n", __FUNCTION__);
- printk("%s: CChip registers:\n", __FUNCTION__);
- printk("%s: CSR_CSC 0x%lx\n", __FUNCTION__, TITAN_cchip->csc.csr);
- printk("%s: CSR_MTR 0x%lx\n", __FUNCTION__, TITAN_cchip->mtr.csr);
- printk("%s: CSR_MISC 0x%lx\n", __FUNCTION__, TITAN_cchip->misc.csr);
- printk("%s: CSR_DIM0 0x%lx\n", __FUNCTION__, TITAN_cchip->dim0.csr);
- printk("%s: CSR_DIM1 0x%lx\n", __FUNCTION__, TITAN_cchip->dim1.csr);
- printk("%s: CSR_DIR0 0x%lx\n", __FUNCTION__, TITAN_cchip->dir0.csr);
- printk("%s: CSR_DIR1 0x%lx\n", __FUNCTION__, TITAN_cchip->dir1.csr);
- printk("%s: CSR_DRIR 0x%lx\n", __FUNCTION__, TITAN_cchip->drir.csr);
-
- printk("%s: DChip registers:\n", __FUNCTION__);
- printk("%s: CSR_DSC 0x%lx\n", __FUNCTION__, TITAN_dchip->dsc.csr);
- printk("%s: CSR_STR 0x%lx\n", __FUNCTION__, TITAN_dchip->str.csr);
- printk("%s: CSR_DREV 0x%lx\n", __FUNCTION__, TITAN_dchip->drev.csr);
+ printk("%s: titan_init_arch()\n", __func__);
+ printk("%s: CChip registers:\n", __func__);
+ printk("%s: CSR_CSC 0x%lx\n", __func__, TITAN_cchip->csc.csr);
+ printk("%s: CSR_MTR 0x%lx\n", __func__, TITAN_cchip->mtr.csr);
+ printk("%s: CSR_MISC 0x%lx\n", __func__, TITAN_cchip->misc.csr);
+ printk("%s: CSR_DIM0 0x%lx\n", __func__, TITAN_cchip->dim0.csr);
+ printk("%s: CSR_DIM1 0x%lx\n", __func__, TITAN_cchip->dim1.csr);
+ printk("%s: CSR_DIR0 0x%lx\n", __func__, TITAN_cchip->dir0.csr);
+ printk("%s: CSR_DIR1 0x%lx\n", __func__, TITAN_cchip->dir1.csr);
+ printk("%s: CSR_DRIR 0x%lx\n", __func__, TITAN_cchip->drir.csr);
+
+ printk("%s: DChip registers:\n", __func__);
+ printk("%s: CSR_DSC 0x%lx\n", __func__, TITAN_dchip->dsc.csr);
+ printk("%s: CSR_STR 0x%lx\n", __func__, TITAN_dchip->str.csr);
+ printk("%s: CSR_DREV 0x%lx\n", __func__, TITAN_dchip->drev.csr);
#endif
boot_cpuid = __hard_smp_processor_id();
@@ -700,13 +700,13 @@ titan_agp_translate(alpha_agp_info *agp, dma_addr_t addr)
if (addr < agp->aperture.bus_base ||
addr >= agp->aperture.bus_base + agp->aperture.size) {
- printk("%s: addr out of range\n", __FUNCTION__);
+ printk("%s: addr out of range\n", __func__);
return -EINVAL;
}
pte = aper->arena->ptes[baddr >> PAGE_SHIFT];
if (!(pte & 1)) {
- printk("%s: pte not valid\n", __FUNCTION__);
+ printk("%s: pte not valid\n", __func__);
return -EINVAL;
}
diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c
index ef91e09590d4..5e7c28f92f19 100644
--- a/arch/alpha/kernel/core_tsunami.c
+++ b/arch/alpha/kernel/core_tsunami.c
@@ -241,8 +241,6 @@ tsunami_probe_write(volatile unsigned long *vaddr)
#define tsunami_probe_read(ADDR) 1
#endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */
-#define FN __FUNCTION__
-
static void __init
tsunami_init_one_pchip(tsunami_pchip *pchip, int index)
{
@@ -383,27 +381,27 @@ tsunami_init_arch(void)
/* NXMs just don't matter to Tsunami--unless they make it
choke completely. */
tmp = (unsigned long)(TSUNAMI_cchip - 1);
- printk("%s: probing bogus address: 0x%016lx\n", FN, bogus_addr);
+ printk("%s: probing bogus address: 0x%016lx\n", __func__, bogus_addr);
printk("\tprobe %s\n",
tsunami_probe_write((unsigned long *)bogus_addr)
? "succeeded" : "failed");
#endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */
#if 0
- printk("%s: CChip registers:\n", FN);
- printk("%s: CSR_CSC 0x%lx\n", FN, TSUNAMI_cchip->csc.csr);
- printk("%s: CSR_MTR 0x%lx\n", FN, TSUNAMI_cchip.mtr.csr);
- printk("%s: CSR_MISC 0x%lx\n", FN, TSUNAMI_cchip->misc.csr);
- printk("%s: CSR_DIM0 0x%lx\n", FN, TSUNAMI_cchip->dim0.csr);
- printk("%s: CSR_DIM1 0x%lx\n", FN, TSUNAMI_cchip->dim1.csr);
- printk("%s: CSR_DIR0 0x%lx\n", FN, TSUNAMI_cchip->dir0.csr);
- printk("%s: CSR_DIR1 0x%lx\n", FN, TSUNAMI_cchip->dir1.csr);
- printk("%s: CSR_DRIR 0x%lx\n", FN, TSUNAMI_cchip->drir.csr);
+ printk("%s: CChip registers:\n", __func__);
+ printk("%s: CSR_CSC 0x%lx\n", __func__, TSUNAMI_cchip->csc.csr);
+ printk("%s: CSR_MTR 0x%lx\n", __func__, TSUNAMI_cchip.mtr.csr);
+ printk("%s: CSR_MISC 0x%lx\n", __func__, TSUNAMI_cchip->misc.csr);
+ printk("%s: CSR_DIM0 0x%lx\n", __func__, TSUNAMI_cchip->dim0.csr);
+ printk("%s: CSR_DIM1 0x%lx\n", __func__, TSUNAMI_cchip->dim1.csr);
+ printk("%s: CSR_DIR0 0x%lx\n", __func__, TSUNAMI_cchip->dir0.csr);
+ printk("%s: CSR_DIR1 0x%lx\n", __func__, TSUNAMI_cchip->dir1.csr);
+ printk("%s: CSR_DRIR 0x%lx\n", __func__, TSUNAMI_cchip->drir.csr);
printk("%s: DChip registers:\n");
- printk("%s: CSR_DSC 0x%lx\n", FN, TSUNAMI_dchip->dsc.csr);
- printk("%s: CSR_STR 0x%lx\n", FN, TSUNAMI_dchip->str.csr);
- printk("%s: CSR_DREV 0x%lx\n", FN, TSUNAMI_dchip->drev.csr);
+ printk("%s: CSR_DSC 0x%lx\n", __func__, TSUNAMI_dchip->dsc.csr);
+ printk("%s: CSR_STR 0x%lx\n", __func__, TSUNAMI_dchip->str.csr);
+ printk("%s: CSR_DREV 0x%lx\n", __func__, TSUNAMI_dchip->drev.csr);
#endif
/* With multiple PCI busses, we play with I/O as physical addrs. */
ioport_resource.end = ~0UL;
diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c
index 026ba9af6d6a..ebc3c894b5a2 100644
--- a/arch/alpha/kernel/module.c
+++ b/arch/alpha/kernel/module.c
@@ -120,6 +120,12 @@ module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs,
nsyms = symtab->sh_size / sizeof(Elf64_Sym);
chains = kcalloc(nsyms, sizeof(struct got_entry), GFP_KERNEL);
+ if (!chains) {
+ printk(KERN_ERR
+ "module %s: no memory for symbol chain buffer\n",
+ me->name);
+ return -ENOMEM;
+ }
got->sh_size = 0;
got->sh_addralign = 8;
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 78357798b6fd..36ab22a7ea12 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -208,7 +208,7 @@ pdev_save_srm_config(struct pci_dev *dev)
tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
if (!tmp) {
- printk(KERN_ERR "%s: kmalloc() failed!\n", __FUNCTION__);
+ printk(KERN_ERR "%s: kmalloc() failed!\n", __func__);
return;
}
tmp->next = srm_saved_configs;
@@ -514,8 +514,8 @@ sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index dd6e334ab9e1..2179c602032a 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -79,25 +79,21 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
#ifdef CONFIG_DISCONTIGMEM
- if (!NODE_DATA(nid) ||
- (NULL == (arena = alloc_bootmem_node(NODE_DATA(nid),
- sizeof(*arena))))) {
- printk("%s: couldn't allocate arena from node %d\n"
- " falling back to system-wide allocation\n",
- __FUNCTION__, nid);
- arena = alloc_bootmem(sizeof(*arena));
- }
-
- if (!NODE_DATA(nid) ||
- (NULL == (arena->ptes = __alloc_bootmem_node(NODE_DATA(nid),
- mem_size,
- align,
- 0)))) {
- printk("%s: couldn't allocate arena ptes from node %d\n"
- " falling back to system-wide allocation\n",
- __FUNCTION__, nid);
- arena->ptes = __alloc_bootmem(mem_size, align, 0);
- }
+ arena = alloc_bootmem_node(NODE_DATA(nid), sizeof(*arena));
+ if (!NODE_DATA(nid) || !arena) {
+ printk("%s: couldn't allocate arena from node %d\n"
+ " falling back to system-wide allocation\n",
+ __func__, nid);
+ arena = alloc_bootmem(sizeof(*arena));
+ }
+
+ arena->ptes = __alloc_bootmem_node(NODE_DATA(nid), mem_size, align, 0);
+ if (!NODE_DATA(nid) || !arena->ptes) {
+ printk("%s: couldn't allocate arena ptes from node %d\n"
+ " falling back to system-wide allocation\n",
+ __func__, nid);
+ arena->ptes = __alloc_bootmem(mem_size, align, 0);
+ }
#else /* CONFIG_DISCONTIGMEM */
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 63c2073401ee..2525692db0ab 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -755,7 +755,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry,
if (atomic_read(&data.unstarted_count) > 0) {
long start_time = jiffies;
printk(KERN_ERR "%s: initial timeout -- trying long wait\n",
- __FUNCTION__);
+ __func__);
timeout = jiffies + 30 * HZ;
while (atomic_read(&data.unstarted_count) > 0
&& time_before(jiffies, timeout))
@@ -764,7 +764,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry,
long delta = jiffies - start_time;
printk(KERN_ERR
"%s: response %ld.%ld seconds into long wait\n",
- __FUNCTION__, delta / HZ,
+ __func__, delta / HZ,
(100 * (delta - ((delta / HZ) * HZ))) / HZ);
}
}
diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c
index f7dd081d57ff..78ad7cd1bbd6 100644
--- a/arch/alpha/kernel/srm_env.c
+++ b/arch/alpha/kernel/srm_env.c
@@ -199,7 +199,7 @@ srm_env_init(void)
printk(KERN_INFO "%s: This Alpha system doesn't "
"know about SRM (or you've booted "
"SRM->MILO->Linux, which gets "
- "misdetected)...\n", __FUNCTION__);
+ "misdetected)...\n", __func__);
return -ENODEV;
}
diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c
index d187d01d2a17..e53a1e1c2f21 100644
--- a/arch/alpha/kernel/sys_alcor.c
+++ b/arch/alpha/kernel/sys_alcor.c
@@ -259,7 +259,7 @@ alcor_init_pci(void)
if (dev && dev->devfn == PCI_DEVFN(6,0)) {
alpha_mv.sys.cia.gru_int_req_bits = XLT_GRU_INT_REQ_BITS;
printk(KERN_INFO "%s: Detected AS500 or XLT motherboard.\n",
- __FUNCTION__);
+ __func__);
}
pci_dev_put(dev);
}
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 922143ea1cdb..828449cd2636 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -80,7 +80,7 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7)
if (!(io7 = marvel_find_io7(pid))) {
printk(KERN_ERR
"%s for nonexistent io7 -- vec %x, pid %d\n",
- __FUNCTION__, irq, pid);
+ __func__, irq, pid);
return NULL;
}
@@ -90,7 +90,7 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7)
if (irq >= 0x180) {
printk(KERN_ERR
"%s for invalid irq -- pid %d adjusted irq %x\n",
- __FUNCTION__, pid, irq);
+ __func__, pid, irq);
return NULL;
}
@@ -110,8 +110,8 @@ io7_enable_irq(unsigned int irq)
ctl = io7_get_irq_ctl(irq, &io7);
if (!ctl || !io7) {
- printk(KERN_ERR "%s: get_ctl failed for irq %x\n",
- __FUNCTION__, irq);
+ printk(KERN_ERR "%s: get_ctl failed for irq %x\n",
+ __func__, irq);
return;
}
@@ -130,8 +130,8 @@ io7_disable_irq(unsigned int irq)
ctl = io7_get_irq_ctl(irq, &io7);
if (!ctl || !io7) {
- printk(KERN_ERR "%s: get_ctl failed for irq %x\n",
- __FUNCTION__, irq);
+ printk(KERN_ERR "%s: get_ctl failed for irq %x\n",
+ __func__, irq);
return;
}
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 906019cfa681..99a7f19da13a 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -454,7 +454,7 @@ sable_lynx_enable_irq(unsigned int irq)
spin_unlock(&sable_lynx_irq_lock);
#if 0
printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n",
- __FUNCTION__, mask, bit, irq);
+ __func__, mask, bit, irq);
#endif
}
@@ -470,7 +470,7 @@ sable_lynx_disable_irq(unsigned int irq)
spin_unlock(&sable_lynx_irq_lock);
#if 0
printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n",
- __FUNCTION__, mask, bit, irq);
+ __func__, mask, bit, irq);
#endif
}
@@ -524,7 +524,7 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
irq = sable_lynx_irq_swizzle->mask_to_irq[bit];
#if 0
printk("%s: vector 0x%lx bit 0x%x irq 0x%x\n",
- __FUNCTION__, vector, bit, irq);
+ __func__, vector, bit, irq);
#endif
handle_irq(irq);
}
diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c
index ee7b9009ebb4..d4327e461c22 100644
--- a/arch/alpha/kernel/sys_sio.c
+++ b/arch/alpha/kernel/sys_sio.c
@@ -89,7 +89,7 @@ sio_pci_route(void)
/* First, ALWAYS read and print the original setting. */
pci_bus_read_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60,
&orig_route_tab);
- printk("%s: PIRQ original 0x%x new 0x%x\n", __FUNCTION__,
+ printk("%s: PIRQ original 0x%x new 0x%x\n", __func__,
orig_route_tab, alpha_mv.sys.sio.route_tab);
#if defined(ALPHA_RESTORE_SRM_SETUP)
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 2dc7f9fed213..dc57790250d2 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -8,6 +8,7 @@
* This file initializes the trap entry points
*/
+#include <linux/jiffies.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/tty.h>
@@ -770,7 +771,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
unsigned long reg, struct pt_regs *regs)
{
static int cnt = 0;
- static long last_time = 0;
+ static unsigned long last_time;
unsigned long tmp1, tmp2, tmp3, tmp4;
unsigned long fake_reg, *reg_addr = &fake_reg;
@@ -781,7 +782,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
with the unaliged access. */
if (!test_thread_flag (TIF_UAC_NOPRINT)) {
- if (cnt >= 5 && jiffies - last_time > 5*HZ) {
+ if (cnt >= 5 && time_after(jiffies, last_time + 5 * HZ)) {
cnt = 0;
}
if (++cnt < 5) {
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d8d253285a94..b786e68914d4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration"
config ARM
bool
default y
+ select HAVE_IDE
select RTC_LIB
select SYS_SUPPORTS_APM_EMULATION
select HAVE_OPROFILE
@@ -223,7 +224,6 @@ config ARCH_CLPS7500
select TIMER_ACORN
select ISA
select NO_IOPORT
- select HAVE_IDE
help
Support for the Cirrus Logic PS7500FE system-on-a-chip.
@@ -236,7 +236,6 @@ config ARCH_CO285
bool "Co-EBSA285"
select FOOTBRIDGE
select FOOTBRIDGE_ADDIN
- select HAVE_IDE
help
Support for Intel's EBSA285 companion chip.
@@ -262,7 +261,6 @@ config ARCH_EP93XX
config ARCH_FOOTBRIDGE
bool "FootBridge"
select FOOTBRIDGE
- select HAVE_IDE
help
Support for systems based on the DC21285 companion chip
("FootBridge"), such as the Simtec CATS and the Rebel NetWinder.
@@ -301,7 +299,6 @@ config ARCH_IOP32X
depends on MMU
select PLAT_IOP
select PCI
- select HAVE_IDE
help
Support for Intel's 80219 and IOP32X (XScale) family of
processors.
@@ -311,14 +308,12 @@ config ARCH_IOP33X
depends on MMU
select PLAT_IOP
select PCI
- select HAVE_IDE
help
Support for Intel's IOP33X (XScale) family of processors.
config ARCH_IXP23XX
bool "IXP23XX-based"
depends on MMU
- select HAVE_IDE
select PCI
help
Support for Intel's IXP23xx (XScale) family of processors.
@@ -336,14 +331,12 @@ config ARCH_IXP4XX
select GENERIC_GPIO
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
- select HAVE_IDE
help
Support for Intel's IXP4XX (XScale) family of processors.
config ARCH_L7200
bool "LinkUp-L7200"
select FIQ
- select HAVE_IDE
help
Say Y here if you intend to run this kernel on a LinkUp Systems
L7200 Software Development Board which uses an ARM720T processor.
@@ -400,7 +393,6 @@ config ARCH_PXA
depends on MMU
select ARCH_MTD_XIP
select GENERIC_GPIO
- select HAVE_IDE
select HAVE_GPIO_LIB
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
@@ -416,7 +408,6 @@ config ARCH_RPC
select ARCH_MAY_HAVE_PC_FDC
select ISA_DMA_API
select NO_IOPORT
- select HAVE_IDE
help
On the Acorn Risc-PC, Linux can support the internal IDE disk and
CD-ROM interface, serial and parallel port, and the floppy drive.
@@ -432,7 +423,6 @@ config ARCH_SA1100
select GENERIC_TIME
select GENERIC_CLOCKEVENTS
select TICK_ONESHOT
- select HAVE_IDE
select HAVE_GPIO_LIB
help
Support for StrongARM 11x0 based boards.
@@ -440,7 +430,6 @@ config ARCH_SA1100
config ARCH_S3C2410
bool "Samsung S3C2410, S3C2412, S3C2413, S3C2440, S3C2442, S3C2443"
select GENERIC_GPIO
- select HAVE_IDE
help
Samsung S3C2410X CPU based systems, such as the Simtec Electronics
BAST (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or
@@ -448,7 +437,6 @@ config ARCH_S3C2410
config ARCH_SHARK
bool "Shark"
- select HAVE_IDE
select ISA
select ISA_DMA
select PCI
@@ -458,7 +446,6 @@ config ARCH_SHARK
config ARCH_LH7A40X
bool "Sharp LH7A40X"
- select HAVE_IDE
help
Say Y here for systems based on one of the Sharp LH7A40X
System on a Chip processors. These CPUs include an ARM922T
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 0a0d2479274b..4a881258bb17 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -16,6 +16,7 @@
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/procinfo.h>
+#include <linux/kbuild.h>
/*
* Make sure that the compiler and target are compatible.
@@ -35,13 +36,6 @@
#error Known good compilers: 3.3
#endif
-/* Use marker if you need to separate the values later */
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
int main(void)
{
DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags.c
index e2e934c38080..64c420805e6f 100644
--- a/arch/arm/kernel/atags.c
+++ b/arch/arm/kernel/atags.c
@@ -35,7 +35,7 @@ create_proc_entries(void)
{
struct proc_dir_entry* tags_entry;
- tags_entry = create_proc_read_entry("atags", 0400, &proc_root, read_buffer, &tags_buffer);
+ tags_entry = create_proc_read_entry("atags", 0400, NULL, read_buffer, &tags_buffer);
if (!tags_entry)
return -ENOMEM;
diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c
index f56d48c451ea..a53c0aba5c14 100644
--- a/arch/arm/kernel/ecard.c
+++ b/arch/arm/kernel/ecard.c
@@ -37,6 +37,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/device.h>
#include <linux/init.h>
#include <linux/mutex.h>
@@ -723,17 +724,14 @@ unsigned int __ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed)
return address;
}
-static int ecard_prints(char *buffer, ecard_t *ec)
+static int ecard_prints(struct seq_file *m, ecard_t *ec)
{
- char *start = buffer;
-
- buffer += sprintf(buffer, " %d: %s ", ec->slot_no,
- ec->easi ? "EASI" : " ");
+ seq_printf(m, " %d: %s ", ec->slot_no, ec->easi ? "EASI" : " ");
if (ec->cid.id == 0) {
struct in_chunk_dir incd;
- buffer += sprintf(buffer, "[%04X:%04X] ",
+ seq_printf(m, "[%04X:%04X] ",
ec->cid.manufacturer, ec->cid.product);
if (!ec->card_desc && ec->cid.cd &&
@@ -744,43 +742,43 @@ static int ecard_prints(char *buffer, ecard_t *ec)
strcpy((char *)ec->card_desc, incd.d.string);
}
- buffer += sprintf(buffer, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*");
+ seq_printf(m, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*");
} else
- buffer += sprintf(buffer, "Simple card %d\n", ec->cid.id);
+ seq_printf(m, "Simple card %d\n", ec->cid.id);
- return buffer - start;
+ return 0;
}
-static int get_ecard_dev_info(char *buf, char **start, off_t pos, int count)
+static int ecard_devices_proc_show(struct seq_file *m, void *v)
{
ecard_t *ec = cards;
- off_t at = 0;
- int len, cnt;
-
- cnt = 0;
- while (ec && count > cnt) {
- len = ecard_prints(buf, ec);
- at += len;
- if (at >= pos) {
- if (!*start) {
- *start = buf + (pos - (at - len));
- cnt = at - pos;
- } else
- cnt += len;
- buf += len;
- }
+
+ while (ec) {
+ ecard_prints(m, ec);
ec = ec->next;
}
- return (count > cnt) ? cnt : count;
+ return 0;
}
+static int ecard_devices_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ecard_devices_proc_show, NULL);
+}
+
+static const struct file_operations bus_ecard_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = ecard_devices_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static struct proc_dir_entry *proc_bus_ecard_dir = NULL;
static void ecard_proc_init(void)
{
- proc_bus_ecard_dir = proc_mkdir("ecard", proc_bus);
- create_proc_info_entry("devices", 0, proc_bus_ecard_dir,
- get_ecard_dev_info);
+ proc_bus_ecard_dir = proc_mkdir("bus/ecard", NULL);
+ proc_create("devices", 0, proc_bus_ecard_dir, &bus_ecard_proc_fops);
}
#define ec_set_resource(ec,nr,st,sz) \
diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c
index 37cd547855b1..728bb8f39441 100644
--- a/arch/arm/mach-at91/at91sam9261_devices.c
+++ b/arch/arm/mach-at91/at91sam9261_devices.c
@@ -539,6 +539,17 @@ void __init at91_add_device_lcdc(struct atmel_lcdfb_info *data)
at91_set_B_periph(AT91_PIN_PB28, 0); /* LCDD23 */
#endif
+ if (ARRAY_SIZE(lcdc_resources) > 2) {
+ void __iomem *fb;
+ struct resource *fb_res = &lcdc_resources[2];
+ size_t fb_len = fb_res->end - fb_res->start + 1;
+
+ fb = ioremap_writecombine(fb_res->start, fb_len);
+ if (fb) {
+ memset(fb, 0, fb_len);
+ iounmap(fb, fb_len);
+ }
+ }
lcdc_data = *data;
platform_device_register(&at91_lcdc_device);
}
diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
index dbb9a5fc2090..054689804e77 100644
--- a/arch/arm/mach-at91/at91sam9rl_devices.c
+++ b/arch/arm/mach-at91/at91sam9rl_devices.c
@@ -381,6 +381,20 @@ void __init at91_add_device_lcdc(struct atmel_lcdfb_info *data)
at91_set_B_periph(AT91_PIN_PC24, 0); /* LCDD22 */
at91_set_B_periph(AT91_PIN_PC25, 0); /* LCDD23 */
+#ifdef CONFIG_FB_INTSRAM
+ {
+ void __iomem *fb;
+ struct resource *fb_res = &lcdc_resources[2];
+ size_t fb_len = fb_res->end - fb_res->start + 1;
+
+ fb = ioremap_writecombine(fb_res->start, fb_len);
+ if (fb) {
+ memset(fb, 0, fb_len);
+ iounmap(fb, fb_len);
+ }
+ }
+#endif
+
lcdc_data = *data;
platform_device_register(&at91_lcdc_device);
}
diff --git a/arch/arm/mach-davinci/clock.c b/arch/arm/mach-davinci/clock.c
index 4143828a9684..c6b94f60e0b2 100644
--- a/arch/arm/mach-davinci/clock.c
+++ b/arch/arm/mach-davinci/clock.c
@@ -311,11 +311,7 @@ static const struct file_operations proc_davinci_ck_operations = {
static int __init davinci_ck_proc_init(void)
{
- struct proc_dir_entry *entry;
-
- entry = create_proc_entry("davinci_clocks", 0, NULL);
- if (entry)
- entry->proc_fops = &proc_davinci_ck_operations;
+ proc_create("davinci_clocks", 0, NULL, &proc_davinci_ck_operations);
return 0;
}
diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c
index 62066f3020c8..7429f8c01015 100644
--- a/arch/arm/mm/iomap.c
+++ b/arch/arm/mm/iomap.c
@@ -26,8 +26,8 @@ EXPORT_SYMBOL(ioport_unmap);
#ifdef CONFIG_PCI
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c
index 078cd33f467b..e4796c67a831 100644
--- a/arch/avr32/kernel/asm-offsets.c
+++ b/arch/avr32/kernel/asm-offsets.c
@@ -5,14 +5,7 @@
*/
#include <linux/thread_info.h>
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
- DEFINE(sym, offsetof(struct str, mem));
+#include <linux/kbuild.h>
void foo(void)
{
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index 2687b730e2d0..ce48c14f4349 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c
@@ -274,6 +274,8 @@ static int __init early_parse_fbmem(char *p)
printk(KERN_WARNING
"Failed to allocate framebuffer memory\n");
fbmem_size = 0;
+ } else {
+ memset(__va(fbmem_start), 0, fbmem_size);
}
}
diff --git a/arch/avr32/mm/tlb.c b/arch/avr32/mm/tlb.c
index b835257a8fa3..cd12edbea9f2 100644
--- a/arch/avr32/mm/tlb.c
+++ b/arch/avr32/mm/tlb.c
@@ -369,11 +369,7 @@ static const struct file_operations proc_tlb_operations = {
static int __init proctlb_init(void)
{
- struct proc_dir_entry *entry;
-
- entry = create_proc_entry("tlb", 0, NULL);
- if (entry)
- entry->proc_fops = &proc_tlb_operations;
+ proc_create("tlb", 0, NULL, &proc_tlb_operations);
return 0;
}
late_initcall(proctlb_init);
diff --git a/arch/blackfin/kernel/asm-offsets.c b/arch/blackfin/kernel/asm-offsets.c
index b56b2741cdea..721f15f3cebf 100644
--- a/arch/blackfin/kernel/asm-offsets.c
+++ b/arch/blackfin/kernel/asm-offsets.c
@@ -34,8 +34,7 @@
#include <linux/hardirq.h>
#include <linux/irq.h>
#include <linux/thread_info.h>
-
-#define DEFINE(sym, val) asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+#include <linux/kbuild.h>
int main(void)
{
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index d1fa24401dc6..cb9d883d493c 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -212,7 +212,7 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t * info,
/* Set up registers for signal handler */
wrusp((unsigned long)frame);
- if (get_personality & FDPIC_FUNCPTRS) {
+ if (current->personality & FDPIC_FUNCPTRS) {
struct fdpic_func_descriptor __user *funcptr =
(struct fdpic_func_descriptor *) ka->sa.sa_handler;
__get_user(regs->pc, &funcptr->text);
diff --git a/arch/cris/kernel/profile.c b/arch/cris/kernel/profile.c
index aad0a9e5991a..44f7b4f79476 100644
--- a/arch/cris/kernel/profile.c
+++ b/arch/cris/kernel/profile.c
@@ -75,9 +75,9 @@ __init init_cris_profile(void)
sample_buffer_pos = sample_buffer;
- entry = create_proc_entry("system_profile", S_IWUSR | S_IRUGO, NULL);
+ entry = proc_create("system_profile", S_IWUSR | S_IRUGO, NULL,
+ &cris_proc_profile_operations);
if (entry) {
- entry->proc_fops = &cris_proc_profile_operations;
entry->size = SAMPLE_BUFFER_SIZE;
}
prof_running = 1;
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index 4207a2b52750..5b06ffa15e34 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -27,7 +27,6 @@ show_mem(void)
printk("\nMem-info:\n");
show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
i = max_mapnr;
while (i-- > 0) {
total++;
diff --git a/arch/frv/kernel/asm-offsets.c b/arch/frv/kernel/asm-offsets.c
index fbb19fc1af40..9de96843a278 100644
--- a/arch/frv/kernel/asm-offsets.c
+++ b/arch/frv/kernel/asm-offsets.c
@@ -7,15 +7,13 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/personality.h>
+#include <linux/kbuild.h>
#include <asm/registers.h>
#include <asm/ucontext.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/gdb-stub.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
#define DEF_PTREG(sym, reg) \
asm volatile("\n->" #sym " %0 offsetof(struct pt_regs, " #reg ")" \
: : "i" (offsetof(struct pt_regs, reg)))
@@ -32,11 +30,6 @@
asm volatile("\n->" #sym " %0 offsetof(struct frv_frame0, " #reg ")" \
: : "i" (offsetof(struct frv_frame0, reg)))
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
- DEFINE(sym, offsetof(struct str, mem));
-
void foo(void)
{
/* offsets into the thread_info structure */
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index d64bcaff54cd..3bdb368292a8 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -297,7 +297,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set)
__frame->lr = (unsigned long) &frame->retcode;
__frame->gr8 = sig;
- if (get_personality & FDPIC_FUNCPTRS) {
+ if (current->personality & FDPIC_FUNCPTRS) {
struct fdpic_func_descriptor __user *funcptr =
(struct fdpic_func_descriptor __user *) ka->sa.sa_handler;
__get_user(__frame->pc, &funcptr->text);
@@ -396,7 +396,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
__frame->gr8 = sig;
__frame->gr9 = (unsigned long) &frame->info;
- if (get_personality & FDPIC_FUNCPTRS) {
+ if (current->personality & FDPIC_FUNCPTRS) {
struct fdpic_func_descriptor __user *funcptr =
(struct fdpic_func_descriptor __user *) ka->sa.sa_handler;
__get_user(__frame->pc, &funcptr->text);
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 7089c2428b3f..1d2dfe67d442 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -49,7 +49,7 @@ asmlinkage void insn_access_error(unsigned long esfr1, unsigned long epcr0, unsi
info.si_signo = SIGSEGV;
info.si_code = SEGV_ACCERR;
info.si_errno = 0;
- info.si_addr = (void *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
+ info.si_addr = (void __user *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
force_sig_info(info.si_signo, &info, current);
} /* end insn_access_error() */
@@ -73,7 +73,7 @@ asmlinkage void illegal_instruction(unsigned long esfr1, unsigned long epcr0, un
epcr0, esr0, esfr1);
info.si_errno = 0;
- info.si_addr = (void *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
+ info.si_addr = (void __user *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
switch (__frame->tbr & TBR_TT) {
case TBR_TT_ILLEGAL_INSTR:
@@ -111,7 +111,8 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
unsigned long esr0)
{
static DEFINE_SPINLOCK(atomic_op_lock);
- unsigned long x, y, z, *p;
+ unsigned long x, y, z;
+ unsigned long __user *p;
mm_segment_t oldfs;
siginfo_t info;
int ret;
@@ -128,7 +129,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
* u32 __atomic_user_cmpxchg32(u32 *ptr, u32 test, u32 new)
*/
case TBR_TT_ATOMIC_CMPXCHG32:
- p = (unsigned long *) __frame->gr8;
+ p = (unsigned long __user *) __frame->gr8;
x = __frame->gr9;
y = __frame->gr10;
@@ -158,7 +159,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
* u32 __atomic_kernel_xchg32(void *v, u32 new)
*/
case TBR_TT_ATOMIC_XCHG32:
- p = (unsigned long *) __frame->gr8;
+ p = (unsigned long __user *) __frame->gr8;
y = __frame->gr9;
for (;;) {
@@ -181,7 +182,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
* ulong __atomic_kernel_XOR_return(ulong i, ulong *v)
*/
case TBR_TT_ATOMIC_XOR:
- p = (unsigned long *) __frame->gr8;
+ p = (unsigned long __user *) __frame->gr8;
x = __frame->gr9;
for (;;) {
@@ -205,7 +206,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
* ulong __atomic_kernel_OR_return(ulong i, ulong *v)
*/
case TBR_TT_ATOMIC_OR:
- p = (unsigned long *) __frame->gr8;
+ p = (unsigned long __user *) __frame->gr8;
x = __frame->gr9;
for (;;) {
@@ -229,7 +230,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
* ulong __atomic_kernel_AND_return(ulong i, ulong *v)
*/
case TBR_TT_ATOMIC_AND:
- p = (unsigned long *) __frame->gr8;
+ p = (unsigned long __user *) __frame->gr8;
x = __frame->gr9;
for (;;) {
@@ -253,7 +254,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
* int __atomic_user_sub_return(atomic_t *v, int i)
*/
case TBR_TT_ATOMIC_SUB:
- p = (unsigned long *) __frame->gr8;
+ p = (unsigned long __user *) __frame->gr8;
x = __frame->gr9;
for (;;) {
@@ -277,7 +278,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
* int __atomic_user_add_return(atomic_t *v, int i)
*/
case TBR_TT_ATOMIC_ADD:
- p = (unsigned long *) __frame->gr8;
+ p = (unsigned long __user *) __frame->gr8;
x = __frame->gr9;
for (;;) {
@@ -322,7 +323,7 @@ error:
info.si_signo = SIGSEGV;
info.si_code = SEGV_ACCERR;
info.si_errno = 0;
- info.si_addr = (void *) __frame->pc;
+ info.si_addr = (void __user *) __frame->pc;
force_sig_info(info.si_signo, &info, current);
}
@@ -343,7 +344,7 @@ asmlinkage void media_exception(unsigned long msr0, unsigned long msr1)
info.si_signo = SIGFPE;
info.si_code = FPE_MDAOVF;
info.si_errno = 0;
- info.si_addr = (void *) __frame->pc;
+ info.si_addr = (void __user *) __frame->pc;
force_sig_info(info.si_signo, &info, current);
} /* end media_exception() */
@@ -361,11 +362,8 @@ asmlinkage void memory_access_exception(unsigned long esr0,
#ifdef CONFIG_MMU
unsigned long fixup;
- if ((esr0 & ESRx_EC) == ESRx_EC_DATA_ACCESS)
- if (handle_misalignment(esr0, ear0, epcr0) == 0)
- return;
-
- if ((fixup = search_exception_table(__frame->pc)) != 0) {
+ fixup = search_exception_table(__frame->pc);
+ if (fixup) {
__frame->pc = fixup;
return;
}
@@ -383,7 +381,7 @@ asmlinkage void memory_access_exception(unsigned long esr0,
info.si_addr = NULL;
if ((esr0 & (ESRx_VALID | ESR0_EAV)) == (ESRx_VALID | ESR0_EAV))
- info.si_addr = (void *) ear0;
+ info.si_addr = (void __user *) ear0;
force_sig_info(info.si_signo, &info, current);
@@ -412,7 +410,7 @@ asmlinkage void data_access_error(unsigned long esfr1, unsigned long esr15, unsi
info.si_signo = SIGSEGV;
info.si_code = SEGV_ACCERR;
info.si_errno = 0;
- info.si_addr = (void *)
+ info.si_addr = (void __user *)
(((esr15 & (ESRx_VALID|ESR15_EAV)) == (ESRx_VALID|ESR15_EAV)) ? ear15 : 0);
force_sig_info(info.si_signo, &info, current);
@@ -446,7 +444,7 @@ asmlinkage void division_exception(unsigned long esfr1, unsigned long esr0, unsi
info.si_signo = SIGFPE;
info.si_code = FPE_INTDIV;
info.si_errno = 0;
- info.si_addr = (void *) __frame->pc;
+ info.si_addr = (void __user *) __frame->pc;
force_sig_info(info.si_signo, &info, current);
} /* end division_exception() */
diff --git a/arch/frv/mb93090-mb00/pci-iomap.c b/arch/frv/mb93090-mb00/pci-iomap.c
index 068fa04bd527..35f6df28351e 100644
--- a/arch/frv/mb93090-mb00/pci-iomap.c
+++ b/arch/frv/mb93090-mb00/pci-iomap.c
@@ -13,8 +13,8 @@
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
diff --git a/arch/frv/mm/unaligned.c b/arch/frv/mm/unaligned.c
deleted file mode 100644
index 8f0375fc15a8..000000000000
--- a/arch/frv/mm/unaligned.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/* unaligned.c: unalignment fixup handler for CPUs on which it is supported (FR451 only)
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/types.h>
-#include <linux/user.h>
-#include <linux/string.h>
-#include <linux/linkage.h>
-#include <linux/init.h>
-
-#include <asm/setup.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#if 0
-#define kdebug(fmt, ...) printk("FDPIC "fmt"\n" ,##__VA_ARGS__ )
-#else
-#define kdebug(fmt, ...) do {} while(0)
-#endif
-
-#define _MA_SIGNED 0x01
-#define _MA_HALF 0x02
-#define _MA_WORD 0x04
-#define _MA_DWORD 0x08
-#define _MA_SZ_MASK 0x0e
-#define _MA_LOAD 0x10
-#define _MA_STORE 0x20
-#define _MA_UPDATE 0x40
-#define _MA_IMM 0x80
-
-#define _MA_LDxU _MA_LOAD | _MA_UPDATE
-#define _MA_LDxI _MA_LOAD | _MA_IMM
-#define _MA_STxU _MA_STORE | _MA_UPDATE
-#define _MA_STxI _MA_STORE | _MA_IMM
-
-static const uint8_t tbl_LDGRk_reg[0x40] = {
- [0x02] = _MA_LOAD | _MA_HALF | _MA_SIGNED, /* LDSH @(GRi,GRj),GRk */
- [0x03] = _MA_LOAD | _MA_HALF, /* LDUH @(GRi,GRj),GRk */
- [0x04] = _MA_LOAD | _MA_WORD, /* LD @(GRi,GRj),GRk */
- [0x05] = _MA_LOAD | _MA_DWORD, /* LDD @(GRi,GRj),GRk */
- [0x12] = _MA_LDxU | _MA_HALF | _MA_SIGNED, /* LDSHU @(GRi,GRj),GRk */
- [0x13] = _MA_LDxU | _MA_HALF, /* LDUHU @(GRi,GRj),GRk */
- [0x14] = _MA_LDxU | _MA_WORD, /* LDU @(GRi,GRj),GRk */
- [0x15] = _MA_LDxU | _MA_DWORD, /* LDDU @(GRi,GRj),GRk */
-};
-
-static const uint8_t tbl_STGRk_reg[0x40] = {
- [0x01] = _MA_STORE | _MA_HALF, /* STH @(GRi,GRj),GRk */
- [0x02] = _MA_STORE | _MA_WORD, /* ST @(GRi,GRj),GRk */
- [0x03] = _MA_STORE | _MA_DWORD, /* STD @(GRi,GRj),GRk */
- [0x11] = _MA_STxU | _MA_HALF, /* STHU @(GRi,GRj),GRk */
- [0x12] = _MA_STxU | _MA_WORD, /* STU @(GRi,GRj),GRk */
- [0x13] = _MA_STxU | _MA_DWORD, /* STDU @(GRi,GRj),GRk */
-};
-
-static const uint8_t tbl_LDSTGRk_imm[0x80] = {
- [0x31] = _MA_LDxI | _MA_HALF | _MA_SIGNED, /* LDSHI @(GRi,d12),GRk */
- [0x32] = _MA_LDxI | _MA_WORD, /* LDI @(GRi,d12),GRk */
- [0x33] = _MA_LDxI | _MA_DWORD, /* LDDI @(GRi,d12),GRk */
- [0x36] = _MA_LDxI | _MA_HALF, /* LDUHI @(GRi,d12),GRk */
- [0x51] = _MA_STxI | _MA_HALF, /* STHI @(GRi,d12),GRk */
- [0x52] = _MA_STxI | _MA_WORD, /* STI @(GRi,d12),GRk */
- [0x53] = _MA_STxI | _MA_DWORD, /* STDI @(GRi,d12),GRk */
-};
-
-
-/*****************************************************************************/
-/*
- * see if we can handle the exception by fixing up a misaligned memory access
- */
-int handle_misalignment(unsigned long esr0, unsigned long ear0, unsigned long epcr0)
-{
- unsigned long insn, addr, *greg;
- int GRi, GRj, GRk, D12, op;
-
- union {
- uint64_t _64;
- uint32_t _32[2];
- uint16_t _16;
- uint8_t _8[8];
- } x;
-
- if (!(esr0 & ESR0_EAV) || !(epcr0 & EPCR0_V) || !(ear0 & 7))
- return -EAGAIN;
-
- epcr0 &= EPCR0_PC;
-
- if (__frame->pc != epcr0) {
- kdebug("MISALIGN: Execution not halted on excepting instruction\n");
- BUG();
- }
-
- if (__get_user(insn, (unsigned long *) epcr0) < 0)
- return -EFAULT;
-
- /* determine the instruction type first */
- switch ((insn >> 18) & 0x7f) {
- case 0x2:
- /* LDx @(GRi,GRj),GRk */
- op = tbl_LDGRk_reg[(insn >> 6) & 0x3f];
- break;
-
- case 0x3:
- /* STx GRk,@(GRi,GRj) */
- op = tbl_STGRk_reg[(insn >> 6) & 0x3f];
- break;
-
- default:
- op = tbl_LDSTGRk_imm[(insn >> 18) & 0x7f];
- break;
- }
-
- if (!op)
- return -EAGAIN;
-
- kdebug("MISALIGN: pc=%08lx insn=%08lx ad=%08lx op=%02x\n", epcr0, insn, ear0, op);
-
- memset(&x, 0xba, 8);
-
- /* validate the instruction parameters */
- greg = (unsigned long *) &__frame->tbr;
-
- GRi = (insn >> 12) & 0x3f;
- GRk = (insn >> 25) & 0x3f;
-
- if (GRi > 31 || GRk > 31)
- return -ENOENT;
-
- if (op & _MA_DWORD && GRk & 1)
- return -EINVAL;
-
- if (op & _MA_IMM) {
- D12 = insn & 0xfff;
- asm ("slli %0,#20,%0 ! srai %0,#20,%0" : "=r"(D12) : "0"(D12)); /* sign extend */
- addr = (GRi ? greg[GRi] : 0) + D12;
- }
- else {
- GRj = (insn >> 0) & 0x3f;
- if (GRj > 31)
- return -ENOENT;
- addr = (GRi ? greg[GRi] : 0) + (GRj ? greg[GRj] : 0);
- }
-
- if (addr != ear0) {
- kdebug("MISALIGN: Calculated addr (%08lx) does not match EAR0 (%08lx)\n",
- addr, ear0);
- return -EFAULT;
- }
-
- /* check the address is okay */
- if (user_mode(__frame) && ___range_ok(ear0, 8) < 0)
- return -EFAULT;
-
- /* perform the memory op */
- if (op & _MA_STORE) {
- /* perform a store */
- x._32[0] = 0;
- if (GRk != 0) {
- if (op & _MA_HALF) {
- x._16 = greg[GRk];
- }
- else {
- x._32[0] = greg[GRk];
- }
- }
- if (op & _MA_DWORD)
- x._32[1] = greg[GRk + 1];
-
- kdebug("MISALIGN: Store GR%d { %08x:%08x } -> %08lx (%dB)\n",
- GRk, x._32[1], x._32[0], addr, op & _MA_SZ_MASK);
-
- if (__memcpy_user((void *) addr, &x, op & _MA_SZ_MASK) != 0)
- return -EFAULT;
- }
- else {
- /* perform a load */
- if (__memcpy_user(&x, (void *) addr, op & _MA_SZ_MASK) != 0)
- return -EFAULT;
-
- if (op & _MA_HALF) {
- if (op & _MA_SIGNED)
- asm ("slli %0,#16,%0 ! srai %0,#16,%0"
- : "=r"(x._32[0]) : "0"(x._16));
- else
- asm ("sethi #0,%0"
- : "=r"(x._32[0]) : "0"(x._16));
- }
-
- kdebug("MISALIGN: Load %08lx (%dB) -> GR%d, { %08x:%08x }\n",
- addr, op & _MA_SZ_MASK, GRk, x._32[1], x._32[0]);
-
- if (GRk != 0)
- greg[GRk] = x._32[0];
- if (op & _MA_DWORD)
- greg[GRk + 1] = x._32[1];
- }
-
- /* update the base pointer if required */
- if (op & _MA_UPDATE)
- greg[GRi] = addr;
-
- /* well... we've done that insn */
- __frame->pc = __frame->pc + 4;
-
- return 0;
-} /* end handle_misalignment() */
diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c
index fc30b4fd0914..2042552e0871 100644
--- a/arch/h8300/kernel/asm-offsets.c
+++ b/arch/h8300/kernel/asm-offsets.c
@@ -13,15 +13,11 @@
#include <linux/kernel_stat.h>
#include <linux/ptrace.h>
#include <linux/hardirq.h>
+#include <linux/kbuild.h>
#include <asm/bootinfo.h>
#include <asm/irq.h>
#include <asm/ptrace.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
int main(void)
{
/* offsets into the task struct */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index cd13e138bd03..0df5f6f75edf 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -19,6 +19,8 @@ config IA64
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_DMA_ATTRS
+ select HAVE_KVM
default y
help
The Itanium Processor Family is Intel's 64-bit successor to
@@ -46,6 +48,9 @@ config MMU
config SWIOTLB
bool
+config IOMMU_HELPER
+ bool
+
config GENERIC_LOCKBREAK
bool
default y
@@ -589,6 +594,8 @@ config MSPEC
source "fs/Kconfig"
+source "arch/ia64/kvm/Kconfig"
+
source "lib/Kconfig"
#
@@ -612,7 +619,7 @@ config IRQ_PER_CPU
default y
config IOMMU_HELPER
- def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC)
+ def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
source "arch/ia64/hp/sim/Kconfig"
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f1645c4f7039..ec4cca477f49 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
+core-$(CONFIG_KVM) += arch/ia64/kvm/
drivers-$(CONFIG_PCI) += arch/ia64/pci/
drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
index 8f6bcfe1dada..1c44ec2a1d58 100644
--- a/arch/ia64/hp/common/hwsw_iommu.c
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -20,10 +20,10 @@
extern int swiotlb_late_init_with_default_size (size_t size);
extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent;
extern ia64_mv_dma_free_coherent swiotlb_free_coherent;
-extern ia64_mv_dma_map_single swiotlb_map_single;
-extern ia64_mv_dma_unmap_single swiotlb_unmap_single;
-extern ia64_mv_dma_map_sg swiotlb_map_sg;
-extern ia64_mv_dma_unmap_sg swiotlb_unmap_sg;
+extern ia64_mv_dma_map_single_attrs swiotlb_map_single_attrs;
+extern ia64_mv_dma_unmap_single_attrs swiotlb_unmap_single_attrs;
+extern ia64_mv_dma_map_sg_attrs swiotlb_map_sg_attrs;
+extern ia64_mv_dma_unmap_sg_attrs swiotlb_unmap_sg_attrs;
extern ia64_mv_dma_supported swiotlb_dma_supported;
extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error;
@@ -31,19 +31,19 @@ extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error;
extern ia64_mv_dma_alloc_coherent sba_alloc_coherent;
extern ia64_mv_dma_free_coherent sba_free_coherent;
-extern ia64_mv_dma_map_single sba_map_single;
-extern ia64_mv_dma_unmap_single sba_unmap_single;
-extern ia64_mv_dma_map_sg sba_map_sg;
-extern ia64_mv_dma_unmap_sg sba_unmap_sg;
+extern ia64_mv_dma_map_single_attrs sba_map_single_attrs;
+extern ia64_mv_dma_unmap_single_attrs sba_unmap_single_attrs;
+extern ia64_mv_dma_map_sg_attrs sba_map_sg_attrs;
+extern ia64_mv_dma_unmap_sg_attrs sba_unmap_sg_attrs;
extern ia64_mv_dma_supported sba_dma_supported;
extern ia64_mv_dma_mapping_error sba_dma_mapping_error;
#define hwiommu_alloc_coherent sba_alloc_coherent
#define hwiommu_free_coherent sba_free_coherent
-#define hwiommu_map_single sba_map_single
-#define hwiommu_unmap_single sba_unmap_single
-#define hwiommu_map_sg sba_map_sg
-#define hwiommu_unmap_sg sba_unmap_sg
+#define hwiommu_map_single_attrs sba_map_single_attrs
+#define hwiommu_unmap_single_attrs sba_unmap_single_attrs
+#define hwiommu_map_sg_attrs sba_map_sg_attrs
+#define hwiommu_unmap_sg_attrs sba_unmap_sg_attrs
#define hwiommu_dma_supported sba_dma_supported
#define hwiommu_dma_mapping_error sba_dma_mapping_error
#define hwiommu_sync_single_for_cpu machvec_dma_sync_single
@@ -98,41 +98,48 @@ hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma
}
dma_addr_t
-hwsw_map_single (struct device *dev, void *addr, size_t size, int dir)
+hwsw_map_single_attrs(struct device *dev, void *addr, size_t size, int dir,
+ struct dma_attrs *attrs)
{
if (use_swiotlb(dev))
- return swiotlb_map_single(dev, addr, size, dir);
+ return swiotlb_map_single_attrs(dev, addr, size, dir, attrs);
else
- return hwiommu_map_single(dev, addr, size, dir);
+ return hwiommu_map_single_attrs(dev, addr, size, dir, attrs);
}
+EXPORT_SYMBOL(hwsw_map_single_attrs);
void
-hwsw_unmap_single (struct device *dev, dma_addr_t iova, size_t size, int dir)
+hwsw_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+ int dir, struct dma_attrs *attrs)
{
if (use_swiotlb(dev))
- return swiotlb_unmap_single(dev, iova, size, dir);
+ return swiotlb_unmap_single_attrs(dev, iova, size, dir, attrs);
else
- return hwiommu_unmap_single(dev, iova, size, dir);
+ return hwiommu_unmap_single_attrs(dev, iova, size, dir, attrs);
}
-
+EXPORT_SYMBOL(hwsw_unmap_single_attrs);
int
-hwsw_map_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+hwsw_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+ int dir, struct dma_attrs *attrs)
{
if (use_swiotlb(dev))
- return swiotlb_map_sg(dev, sglist, nents, dir);
+ return swiotlb_map_sg_attrs(dev, sglist, nents, dir, attrs);
else
- return hwiommu_map_sg(dev, sglist, nents, dir);
+ return hwiommu_map_sg_attrs(dev, sglist, nents, dir, attrs);
}
+EXPORT_SYMBOL(hwsw_map_sg_attrs);
void
-hwsw_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+hwsw_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+ int dir, struct dma_attrs *attrs)
{
if (use_swiotlb(dev))
- return swiotlb_unmap_sg(dev, sglist, nents, dir);
+ return swiotlb_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
else
- return hwiommu_unmap_sg(dev, sglist, nents, dir);
+ return hwiommu_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
}
+EXPORT_SYMBOL(hwsw_unmap_sg_attrs);
void
hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir)
@@ -185,10 +192,6 @@ hwsw_dma_mapping_error (dma_addr_t dma_addr)
}
EXPORT_SYMBOL(hwsw_dma_mapping_error);
-EXPORT_SYMBOL(hwsw_map_single);
-EXPORT_SYMBOL(hwsw_unmap_single);
-EXPORT_SYMBOL(hwsw_map_sg);
-EXPORT_SYMBOL(hwsw_unmap_sg);
EXPORT_SYMBOL(hwsw_dma_supported);
EXPORT_SYMBOL(hwsw_alloc_coherent);
EXPORT_SYMBOL(hwsw_free_coherent);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 9409de5c9441..34421aed1e2a 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -899,16 +899,18 @@ sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
}
/**
- * sba_map_single - map one buffer and return IOVA for DMA
+ * sba_map_single_attrs - map one buffer and return IOVA for DMA
* @dev: instance of PCI owned by the driver that's asking.
* @addr: driver buffer to map.
* @size: number of bytes to map in driver buffer.
* @dir: R/W or both.
+ * @attrs: optional dma attributes
*
* See Documentation/DMA-mapping.txt
*/
dma_addr_t
-sba_map_single(struct device *dev, void *addr, size_t size, int dir)
+sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir,
+ struct dma_attrs *attrs)
{
struct ioc *ioc;
dma_addr_t iovp;
@@ -932,7 +934,8 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
** Device is bit capable of DMA'ing to the buffer...
** just return the PCI address of ptr
*/
- DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n",
+ DBG_BYPASS("sba_map_single_attrs() bypass mask/addr: "
+ "0x%lx/0x%lx\n",
to_pci_dev(dev)->dma_mask, pci_addr);
return pci_addr;
}
@@ -953,7 +956,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
- if (sba_check_pdir(ioc,"Check before sba_map_single()"))
+ if (sba_check_pdir(ioc,"Check before sba_map_single_attrs()"))
panic("Sanity check failed");
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
@@ -982,11 +985,12 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
/* form complete address */
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
- sba_check_pdir(ioc,"Check after sba_map_single()");
+ sba_check_pdir(ioc,"Check after sba_map_single_attrs()");
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
return SBA_IOVA(ioc, iovp, offset);
}
+EXPORT_SYMBOL(sba_map_single_attrs);
#ifdef ENABLE_MARK_CLEAN
static SBA_INLINE void
@@ -1013,15 +1017,17 @@ sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size)
#endif
/**
- * sba_unmap_single - unmap one IOVA and free resources
+ * sba_unmap_single_attrs - unmap one IOVA and free resources
* @dev: instance of PCI owned by the driver that's asking.
* @iova: IOVA of driver buffer previously mapped.
* @size: number of bytes mapped in driver buffer.
* @dir: R/W or both.
+ * @attrs: optional dma attributes
*
* See Documentation/DMA-mapping.txt
*/
-void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
+void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+ int dir, struct dma_attrs *attrs)
{
struct ioc *ioc;
#if DELAYED_RESOURCE_CNT > 0
@@ -1038,7 +1044,8 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
/*
** Address does not fall w/in IOVA, must be bypassing
*/
- DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova);
+ DBG_BYPASS("sba_unmap_single_atttrs() bypass addr: 0x%lx\n",
+ iova);
#ifdef ENABLE_MARK_CLEAN
if (dir == DMA_FROM_DEVICE) {
@@ -1087,7 +1094,7 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif /* DELAYED_RESOURCE_CNT == 0 */
}
-
+EXPORT_SYMBOL(sba_unmap_single_attrs);
/**
* sba_alloc_coherent - allocate/map shared mem for DMA
@@ -1144,7 +1151,8 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
* If device can't bypass or bypass is disabled, pass the 32bit fake
* device to map single to get an iova mapping.
*/
- *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
+ *dma_handle = sba_map_single_attrs(&ioc->sac_only_dev->dev, addr,
+ size, 0, NULL);
return addr;
}
@@ -1161,7 +1169,7 @@ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp
*/
void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
{
- sba_unmap_single(dev, dma_handle, size, 0);
+ sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL);
free_pages((unsigned long) vaddr, get_order(size));
}
@@ -1410,10 +1418,12 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
* @sglist: array of buffer/length pairs
* @nents: number of entries in list
* @dir: R/W or both.
+ * @attrs: optional dma attributes
*
* See Documentation/DMA-mapping.txt
*/
-int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir)
+int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+ int dir, struct dma_attrs *attrs)
{
struct ioc *ioc;
int coalesced, filled = 0;
@@ -1441,16 +1451,16 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
/* Fast path single entry scatterlists. */
if (nents == 1) {
sglist->dma_length = sglist->length;
- sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir);
+ sglist->dma_address = sba_map_single_attrs(dev, sba_sg_address(sglist), sglist->length, dir, attrs);
return 1;
}
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
- if (sba_check_pdir(ioc,"Check before sba_map_sg()"))
+ if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()"))
{
sba_dump_sg(ioc, sglist, nents);
- panic("Check before sba_map_sg()");
+ panic("Check before sba_map_sg_attrs()");
}
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
@@ -1479,10 +1489,10 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
- if (sba_check_pdir(ioc,"Check after sba_map_sg()"))
+ if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()"))
{
sba_dump_sg(ioc, sglist, nents);
- panic("Check after sba_map_sg()\n");
+ panic("Check after sba_map_sg_attrs()\n");
}
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
@@ -1492,18 +1502,20 @@ int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int di
return filled;
}
-
+EXPORT_SYMBOL(sba_map_sg_attrs);
/**
- * sba_unmap_sg - unmap Scatter/Gather list
+ * sba_unmap_sg_attrs - unmap Scatter/Gather list
* @dev: instance of PCI owned by the driver that's asking.
* @sglist: array of buffer/length pairs
* @nents: number of entries in list
* @dir: R/W or both.
+ * @attrs: optional dma attributes
*
* See Documentation/DMA-mapping.txt
*/
-void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
+void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+ int nents, int dir, struct dma_attrs *attrs)
{
#ifdef ASSERT_PDIR_SANITY
struct ioc *ioc;
@@ -1518,13 +1530,14 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in
ASSERT(ioc);
spin_lock_irqsave(&ioc->res_lock, flags);
- sba_check_pdir(ioc,"Check before sba_unmap_sg()");
+ sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()");
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
while (nents && sglist->dma_length) {
- sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
+ sba_unmap_single_attrs(dev, sglist->dma_address,
+ sglist->dma_length, dir, attrs);
sglist = sg_next(sglist);
nents--;
}
@@ -1533,11 +1546,12 @@ void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, in
#ifdef ASSERT_PDIR_SANITY
spin_lock_irqsave(&ioc->res_lock, flags);
- sba_check_pdir(ioc,"Check after sba_unmap_sg()");
+ sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()");
spin_unlock_irqrestore(&ioc->res_lock, flags);
#endif
}
+EXPORT_SYMBOL(sba_unmap_sg_attrs);
/**************************************************************
*
@@ -1918,15 +1932,13 @@ static const struct file_operations ioc_fops = {
static void __init
ioc_proc_init(void)
{
- struct proc_dir_entry *dir, *entry;
+ struct proc_dir_entry *dir;
dir = proc_mkdir("bus/mckinley", NULL);
if (!dir)
return;
- entry = create_proc_entry(ioc_list->name, 0, dir);
- if (entry)
- entry->proc_fops = &ioc_fops;
+ proc_create(ioc_list->name, 0, dir, &ioc_fops);
}
#endif
@@ -2166,10 +2178,6 @@ sba_page_override(char *str)
__setup("sbapagesize=",sba_page_override);
EXPORT_SYMBOL(sba_dma_mapping_error);
-EXPORT_SYMBOL(sba_map_single);
-EXPORT_SYMBOL(sba_unmap_single);
-EXPORT_SYMBOL(sba_map_sg);
-EXPORT_SYMBOL(sba_unmap_sg);
EXPORT_SYMBOL(sba_dma_supported);
EXPORT_SYMBOL(sba_alloc_coherent);
EXPORT_SYMBOL(sba_free_coherent);
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 230a6f92367f..c64a55af9b95 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -9,7 +9,7 @@
#include <linux/sched.h>
#include <linux/pid.h>
#include <linux/clocksource.h>
-
+#include <linux/kbuild.h>
#include <asm-ia64/processor.h>
#include <asm-ia64/ptrace.h>
#include <asm-ia64/siginfo.h>
@@ -19,11 +19,6 @@
#include "../kernel/sigframe.h"
#include "../kernel/fsyscall_gtod_data.h"
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
void foo(void)
{
DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index c8e403752a0c..7fbb51e10bbe 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6695,16 +6695,12 @@ pfm_init(void)
/*
* create /proc/perfmon (mostly for debugging purposes)
*/
- perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
+ perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops);
if (perfmon_dir == NULL) {
printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
pmu_conf = NULL;
return -1;
}
- /*
- * install customized file operations for /proc/perfmon entry
- */
- perfmon_dir->proc_fops = &pfm_proc_fops;
/*
* create /proc/sys/kernel/perfmon (for debugging purposes)
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index b11bb50a197a..ecb9eb78d687 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -648,18 +648,16 @@ salinfo_init(void)
if (!dir)
continue;
- entry = create_proc_entry("event", S_IRUSR, dir);
+ entry = proc_create_data("event", S_IRUSR, dir,
+ &salinfo_event_fops, data);
if (!entry)
continue;
- entry->data = data;
- entry->proc_fops = &salinfo_event_fops;
*sdir++ = entry;
- entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
+ entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir,
+ &salinfo_data_fops, data);
if (!entry)
continue;
- entry->data = data;
- entry->proc_fops = &salinfo_data_fops;
*sdir++ = entry;
/* we missed any events before now */
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
new file mode 100644
index 000000000000..7914e4828504
--- /dev/null
+++ b/arch/ia64/kvm/Kconfig
@@ -0,0 +1,49 @@
+#
+# KVM configuration
+#
+config HAVE_KVM
+ bool
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ depends on HAVE_KVM || IA64
+ default y
+ ---help---
+ Say Y here to get to see options for using your Linux host to run other
+ operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
+ depends on HAVE_KVM && EXPERIMENTAL
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ ---help---
+ Support hosting fully virtualized guest machines using hardware
+ virtualization extensions. You will need a fairly recent
+ processor equipped with virtualization extensions. You will also
+ need to select one or more of the processor modules below.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ To compile this as a module, choose M here: the module
+ will be called kvm.
+
+ If unsure, say N.
+
+config KVM_INTEL
+ tristate "KVM for Intel Itanium 2 processors support"
+ depends on KVM && m
+ ---help---
+ Provides support for KVM on Itanium 2 processors equipped with the VT
+ extensions.
+
+config KVM_TRACE
+ bool
+
+endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
new file mode 100644
index 000000000000..52353397a1a4
--- /dev/null
+++ b/arch/ia64/kvm/Makefile
@@ -0,0 +1,58 @@
+#This Make file is to generate asm-offsets.h and build source.
+#
+
+#Generate asm-offsets.h for vmm module build
+offsets-file := asm-offsets.h
+
+always := $(offsets-file)
+targets := $(offsets-file)
+targets += arch/ia64/kvm/asm-offsets.s
+clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S)
+
+# Default sed regexp - multiline due to syntax constraints
+define sed-y
+ "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+
+quiet_cmd_offsets = GEN $@
+define cmd_offsets
+ (set -e; \
+ echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
+ echo "#define __ASM_KVM_OFFSETS_H__"; \
+ echo "/*"; \
+ echo " * DO NOT MODIFY."; \
+ echo " *"; \
+ echo " * This file was generated by Makefile"; \
+ echo " *"; \
+ echo " */"; \
+ echo ""; \
+ sed -ne $(sed-y) $<; \
+ echo ""; \
+ echo "#endif" ) > $@
+endef
+# We use internal rules to avoid the "is up to date" message from make
+arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c
+ $(call if_changed_dep,cc_s_c)
+
+$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
+ $(call cmd,offsets)
+
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
+EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+
+kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
+obj-$(CONFIG_KVM) += kvm.o
+
+FORCE : $(obj)/$(offsets-file)
+EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
+kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
+ vtlb.o process.o
+#Add link memcpy and memset to avoid possible structure assignment error
+kvm-intel-objs += ../lib/memset.o ../lib/memcpy.o
+obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
new file mode 100644
index 000000000000..4e3dc13a619c
--- /dev/null
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -0,0 +1,251 @@
+/*
+ * asm-offsets.c Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ *
+ * Anthony Xu <anthony.xu@intel.com>
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ * Copyright (c) 2007 Intel Corporation KVM support.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/autoconf.h>
+#include <linux/kvm_host.h>
+
+#include "vcpu.h"
+
+#define task_struct kvm_vcpu
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : :)
+
+#define OFFSET(_sym, _str, _mem) \
+ DEFINE(_sym, offsetof(_str, _mem));
+
+void foo(void)
+{
+ DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
+ DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
+
+ BLANK();
+
+ DEFINE(VMM_VCPU_META_RR0_OFFSET,
+ offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
+ DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+ offsetof(struct kvm_vcpu,
+ arch.metaphysical_saved_rr0));
+ DEFINE(VMM_VCPU_VRR0_OFFSET,
+ offsetof(struct kvm_vcpu, arch.vrr[0]));
+ DEFINE(VMM_VPD_IRR0_OFFSET,
+ offsetof(struct vpd, irr[0]));
+ DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
+ offsetof(struct kvm_vcpu, arch.itc_check));
+ DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
+ offsetof(struct kvm_vcpu, arch.irq_check));
+ DEFINE(VMM_VPD_VHPI_OFFSET,
+ offsetof(struct vpd, vhpi));
+ DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
+ offsetof(struct kvm_vcpu, arch.vsa_base));
+ DEFINE(VMM_VCPU_VPD_OFFSET,
+ offsetof(struct kvm_vcpu, arch.vpd));
+ DEFINE(VMM_VCPU_IRQ_CHECK,
+ offsetof(struct kvm_vcpu, arch.irq_check));
+ DEFINE(VMM_VCPU_TIMER_PENDING,
+ offsetof(struct kvm_vcpu, arch.timer_pending));
+ DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+ offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
+ DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+ offsetof(struct kvm_vcpu, arch.mode_flags));
+ DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
+ offsetof(struct kvm_vcpu, arch.itc_offset));
+ DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
+ offsetof(struct kvm_vcpu, arch.last_itc));
+ DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
+ offsetof(struct kvm_vcpu, arch.saved_gp));
+
+ BLANK();
+
+ DEFINE(VMM_PT_REGS_B6_OFFSET,
+ offsetof(struct kvm_pt_regs, b6));
+ DEFINE(VMM_PT_REGS_B7_OFFSET,
+ offsetof(struct kvm_pt_regs, b7));
+ DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_csd));
+ DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_ssd));
+ DEFINE(VMM_PT_REGS_R8_OFFSET,
+ offsetof(struct kvm_pt_regs, r8));
+ DEFINE(VMM_PT_REGS_R9_OFFSET,
+ offsetof(struct kvm_pt_regs, r9));
+ DEFINE(VMM_PT_REGS_R10_OFFSET,
+ offsetof(struct kvm_pt_regs, r10));
+ DEFINE(VMM_PT_REGS_R11_OFFSET,
+ offsetof(struct kvm_pt_regs, r11));
+ DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
+ offsetof(struct kvm_pt_regs, cr_ipsr));
+ DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
+ offsetof(struct kvm_pt_regs, cr_iip));
+ DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
+ offsetof(struct kvm_pt_regs, cr_ifs));
+ DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_unat));
+ DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_pfs));
+ DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_rsc));
+ DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_rnat));
+
+ DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_bspstore));
+ DEFINE(VMM_PT_REGS_PR_OFFSET,
+ offsetof(struct kvm_pt_regs, pr));
+ DEFINE(VMM_PT_REGS_B0_OFFSET,
+ offsetof(struct kvm_pt_regs, b0));
+ DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
+ offsetof(struct kvm_pt_regs, loadrs));
+ DEFINE(VMM_PT_REGS_R1_OFFSET,
+ offsetof(struct kvm_pt_regs, r1));
+ DEFINE(VMM_PT_REGS_R12_OFFSET,
+ offsetof(struct kvm_pt_regs, r12));
+ DEFINE(VMM_PT_REGS_R13_OFFSET,
+ offsetof(struct kvm_pt_regs, r13));
+ DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_fpsr));
+ DEFINE(VMM_PT_REGS_R15_OFFSET,
+ offsetof(struct kvm_pt_regs, r15));
+ DEFINE(VMM_PT_REGS_R14_OFFSET,
+ offsetof(struct kvm_pt_regs, r14));
+ DEFINE(VMM_PT_REGS_R2_OFFSET,
+ offsetof(struct kvm_pt_regs, r2));
+ DEFINE(VMM_PT_REGS_R3_OFFSET,
+ offsetof(struct kvm_pt_regs, r3));
+ DEFINE(VMM_PT_REGS_R16_OFFSET,
+ offsetof(struct kvm_pt_regs, r16));
+ DEFINE(VMM_PT_REGS_R17_OFFSET,
+ offsetof(struct kvm_pt_regs, r17));
+ DEFINE(VMM_PT_REGS_R18_OFFSET,
+ offsetof(struct kvm_pt_regs, r18));
+ DEFINE(VMM_PT_REGS_R19_OFFSET,
+ offsetof(struct kvm_pt_regs, r19));
+ DEFINE(VMM_PT_REGS_R20_OFFSET,
+ offsetof(struct kvm_pt_regs, r20));
+ DEFINE(VMM_PT_REGS_R21_OFFSET,
+ offsetof(struct kvm_pt_regs, r21));
+ DEFINE(VMM_PT_REGS_R22_OFFSET,
+ offsetof(struct kvm_pt_regs, r22));
+ DEFINE(VMM_PT_REGS_R23_OFFSET,
+ offsetof(struct kvm_pt_regs, r23));
+ DEFINE(VMM_PT_REGS_R24_OFFSET,
+ offsetof(struct kvm_pt_regs, r24));
+ DEFINE(VMM_PT_REGS_R25_OFFSET,
+ offsetof(struct kvm_pt_regs, r25));
+ DEFINE(VMM_PT_REGS_R26_OFFSET,
+ offsetof(struct kvm_pt_regs, r26));
+ DEFINE(VMM_PT_REGS_R27_OFFSET,
+ offsetof(struct kvm_pt_regs, r27));
+ DEFINE(VMM_PT_REGS_R28_OFFSET,
+ offsetof(struct kvm_pt_regs, r28));
+ DEFINE(VMM_PT_REGS_R29_OFFSET,
+ offsetof(struct kvm_pt_regs, r29));
+ DEFINE(VMM_PT_REGS_R30_OFFSET,
+ offsetof(struct kvm_pt_regs, r30));
+ DEFINE(VMM_PT_REGS_R31_OFFSET,
+ offsetof(struct kvm_pt_regs, r31));
+ DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
+ offsetof(struct kvm_pt_regs, ar_ccv));
+ DEFINE(VMM_PT_REGS_F6_OFFSET,
+ offsetof(struct kvm_pt_regs, f6));
+ DEFINE(VMM_PT_REGS_F7_OFFSET,
+ offsetof(struct kvm_pt_regs, f7));
+ DEFINE(VMM_PT_REGS_F8_OFFSET,
+ offsetof(struct kvm_pt_regs, f8));
+ DEFINE(VMM_PT_REGS_F9_OFFSET,
+ offsetof(struct kvm_pt_regs, f9));
+ DEFINE(VMM_PT_REGS_F10_OFFSET,
+ offsetof(struct kvm_pt_regs, f10));
+ DEFINE(VMM_PT_REGS_F11_OFFSET,
+ offsetof(struct kvm_pt_regs, f11));
+ DEFINE(VMM_PT_REGS_R4_OFFSET,
+ offsetof(struct kvm_pt_regs, r4));
+ DEFINE(VMM_PT_REGS_R5_OFFSET,
+ offsetof(struct kvm_pt_regs, r5));
+ DEFINE(VMM_PT_REGS_R6_OFFSET,
+ offsetof(struct kvm_pt_regs, r6));
+ DEFINE(VMM_PT_REGS_R7_OFFSET,
+ offsetof(struct kvm_pt_regs, r7));
+ DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
+ offsetof(struct kvm_pt_regs, eml_unat));
+ DEFINE(VMM_VCPU_IIPA_OFFSET,
+ offsetof(struct kvm_vcpu, arch.cr_iipa));
+ DEFINE(VMM_VCPU_OPCODE_OFFSET,
+ offsetof(struct kvm_vcpu, arch.opcode));
+ DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
+ DEFINE(VMM_VCPU_ISR_OFFSET,
+ offsetof(struct kvm_vcpu, arch.cr_isr));
+ DEFINE(VMM_PT_REGS_R16_SLOT,
+ (((offsetof(struct kvm_pt_regs, r16)
+ - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
+ DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+ offsetof(struct kvm_vcpu, arch.mode_flags));
+ DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
+ BLANK();
+
+ DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
+ DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
+ DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
+ offsetof(struct kvm_vcpu, arch.insvc[0]));
+ DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
+ DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
+
+ DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
+ DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
+ DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
+ DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
+ DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
+ DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
+ DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
+ DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
+ DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
+ DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
+ DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
+ DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
+ DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
+ DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
+ DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
+ DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
+ DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
+ DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
+ DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
+ DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
+ DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
+ DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
+ DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
+ DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
+ DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
+ DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
+ DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
+ DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
+ DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
+ DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
+ DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
+ DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
+ DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
+ DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
+ BLANK();
+}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
new file mode 100644
index 000000000000..6df073240135
--- /dev/null
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -0,0 +1,1806 @@
+
+/*
+ * kvm_ia64.c: Basic KVM suppport On Itanium series processors
+ *
+ *
+ * Copyright (C) 2007, Intel Corporation.
+ * Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/bitops.h>
+#include <linux/hrtimer.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgtable.h>
+#include <asm/gcc_intrin.h>
+#include <asm/pal.h>
+#include <asm/cacheflush.h>
+#include <asm/div64.h>
+#include <asm/tlb.h>
+
+#include "misc.h"
+#include "vti.h"
+#include "iodev.h"
+#include "ioapic.h"
+#include "lapic.h"
+
+static unsigned long kvm_vmm_base;
+static unsigned long kvm_vsa_base;
+static unsigned long kvm_vm_buffer;
+static unsigned long kvm_vm_buffer_size;
+unsigned long kvm_vmm_gp;
+
+static long vp_env_info;
+
+static struct kvm_vmm_info *kvm_vmm_info;
+
+static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { NULL }
+};
+
+
+struct fdesc{
+ unsigned long ip;
+ unsigned long gp;
+};
+
+static void kvm_flush_icache(unsigned long start, unsigned long len)
+{
+ int l;
+
+ for (l = 0; l < (len + 32); l += 32)
+ ia64_fc(start + l);
+
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+static void kvm_flush_tlb_all(void)
+{
+ unsigned long i, j, count0, count1, stride0, stride1, addr;
+ long flags;
+
+ addr = local_cpu_data->ptce_base;
+ count0 = local_cpu_data->ptce_count[0];
+ count1 = local_cpu_data->ptce_count[1];
+ stride0 = local_cpu_data->ptce_stride[0];
+ stride1 = local_cpu_data->ptce_stride[1];
+
+ local_irq_save(flags);
+ for (i = 0; i < count0; ++i) {
+ for (j = 0; j < count1; ++j) {
+ ia64_ptce(addr);
+ addr += stride1;
+ }
+ addr += stride0;
+ }
+ local_irq_restore(flags);
+ ia64_srlz_i(); /* srlz.i implies srlz.d */
+}
+
+long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
+{
+ struct ia64_pal_retval iprv;
+
+ PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
+ (u64)opt_handler);
+
+ return iprv.status;
+}
+
+static DEFINE_SPINLOCK(vp_lock);
+
+void kvm_arch_hardware_enable(void *garbage)
+{
+ long status;
+ long tmp_base;
+ unsigned long pte;
+ unsigned long saved_psr;
+ int slot;
+
+ pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
+ PAGE_KERNEL));
+ local_irq_save(saved_psr);
+ slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+ if (slot < 0)
+ return;
+ local_irq_restore(saved_psr);
+
+ spin_lock(&vp_lock);
+ status = ia64_pal_vp_init_env(kvm_vsa_base ?
+ VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
+ __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
+ if (status != 0) {
+ printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
+ return ;
+ }
+
+ if (!kvm_vsa_base) {
+ kvm_vsa_base = tmp_base;
+ printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
+ }
+ spin_unlock(&vp_lock);
+ ia64_ptr_entry(0x3, slot);
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+
+ long status;
+ int slot;
+ unsigned long pte;
+ unsigned long saved_psr;
+ unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
+
+ pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
+ PAGE_KERNEL));
+
+ local_irq_save(saved_psr);
+ slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+ if (slot < 0)
+ return;
+ local_irq_restore(saved_psr);
+
+ status = ia64_pal_vp_exit_env(host_iva);
+ if (status)
+ printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
+ status);
+ ia64_ptr_entry(0x3, slot);
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+ *(int *)rtn = 0;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_USER_MEMORY:
+
+ r = 1;
+ break;
+ default:
+ r = 0;
+ }
+ return r;
+
+}
+
+static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
+ gpa_t addr)
+{
+ struct kvm_io_device *dev;
+
+ dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+
+ return dev;
+}
+
+static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+ kvm_run->hw.hardware_exit_reason = 1;
+ return 0;
+}
+
+static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ struct kvm_mmio_req *p;
+ struct kvm_io_device *mmio_dev;
+
+ p = kvm_get_vcpu_ioreq(vcpu);
+
+ if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
+ goto mmio;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
+ vcpu->mmio_size = kvm_run->mmio.len = p->size;
+ vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
+
+ if (vcpu->mmio_is_write)
+ memcpy(vcpu->mmio_data, &p->data, p->size);
+ memcpy(kvm_run->mmio.data, &p->data, p->size);
+ kvm_run->exit_reason = KVM_EXIT_MMIO;
+ return 0;
+mmio:
+ mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr);
+ if (mmio_dev) {
+ if (!p->dir)
+ kvm_iodevice_write(mmio_dev, p->addr, p->size,
+ &p->data);
+ else
+ kvm_iodevice_read(mmio_dev, p->addr, p->size,
+ &p->data);
+
+ } else
+ printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
+ p->state = STATE_IORESP_READY;
+
+ return 1;
+}
+
+static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ struct exit_ctl_data *p;
+
+ p = kvm_get_exit_data(vcpu);
+
+ if (p->exit_reason == EXIT_REASON_PAL_CALL)
+ return kvm_pal_emul(vcpu, kvm_run);
+ else {
+ kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+ kvm_run->hw.hardware_exit_reason = 2;
+ return 0;
+ }
+}
+
+static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ struct exit_ctl_data *p;
+
+ p = kvm_get_exit_data(vcpu);
+
+ if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+ kvm_sal_emul(vcpu);
+ return 1;
+ } else {
+ kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+ kvm_run->hw.hardware_exit_reason = 3;
+ return 0;
+ }
+
+}
+
+/*
+ * offset: address offset to IPI space.
+ * value: deliver value.
+ */
+static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
+ uint64_t vector)
+{
+ switch (dm) {
+ case SAPIC_FIXED:
+ kvm_apic_set_irq(vcpu, vector, 0);
+ break;
+ case SAPIC_NMI:
+ kvm_apic_set_irq(vcpu, 2, 0);
+ break;
+ case SAPIC_EXTINT:
+ kvm_apic_set_irq(vcpu, 0, 0);
+ break;
+ case SAPIC_INIT:
+ case SAPIC_PMI:
+ default:
+ printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
+ break;
+ }
+}
+
+static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
+ unsigned long eid)
+{
+ union ia64_lid lid;
+ int i;
+
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ if (kvm->vcpus[i]) {
+ lid.val = VCPU_LID(kvm->vcpus[i]);
+ if (lid.id == id && lid.eid == eid)
+ return kvm->vcpus[i];
+ }
+ }
+
+ return NULL;
+}
+
+static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+ struct kvm_vcpu *target_vcpu;
+ struct kvm_pt_regs *regs;
+ union ia64_ipi_a addr = p->u.ipi_data.addr;
+ union ia64_ipi_d data = p->u.ipi_data.data;
+
+ target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
+ if (!target_vcpu)
+ return handle_vm_error(vcpu, kvm_run);
+
+ if (!target_vcpu->arch.launched) {
+ regs = vcpu_regs(target_vcpu);
+
+ regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
+ regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
+
+ target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ if (waitqueue_active(&target_vcpu->wq))
+ wake_up_interruptible(&target_vcpu->wq);
+ } else {
+ vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
+ if (target_vcpu != vcpu)
+ kvm_vcpu_kick(target_vcpu);
+ }
+
+ return 1;
+}
+
+struct call_data {
+ struct kvm_ptc_g ptc_g_data;
+ struct kvm_vcpu *vcpu;
+};
+
+static void vcpu_global_purge(void *info)
+{
+ struct call_data *p = (struct call_data *)info;
+ struct kvm_vcpu *vcpu = p->vcpu;
+
+ if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+ return;
+
+ set_bit(KVM_REQ_PTC_G, &vcpu->requests);
+ if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
+ vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
+ p->ptc_g_data;
+ } else {
+ clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
+ vcpu->arch.ptc_g_count = 0;
+ set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+ }
+}
+
+static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+ struct kvm *kvm = vcpu->kvm;
+ struct call_data call_data;
+ int i;
+ call_data.ptc_g_data = p->u.ptc_g_data;
+
+ for (i = 0; i < KVM_MAX_VCPUS; i++) {
+ if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
+ KVM_MP_STATE_UNINITIALIZED ||
+ vcpu == kvm->vcpus[i])
+ continue;
+
+ if (waitqueue_active(&kvm->vcpus[i]->wq))
+ wake_up_interruptible(&kvm->vcpus[i]->wq);
+
+ if (kvm->vcpus[i]->cpu != -1) {
+ call_data.vcpu = kvm->vcpus[i];
+ smp_call_function_single(kvm->vcpus[i]->cpu,
+ vcpu_global_purge, &call_data, 0, 1);
+ } else
+ printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
+
+ }
+ return 1;
+}
+
+static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ return 1;
+}
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+
+ ktime_t kt;
+ long itc_diff;
+ unsigned long vcpu_now_itc;
+
+ unsigned long expires;
+ struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+ unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
+ struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+ vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
+
+ if (time_after(vcpu_now_itc, vpd->itm)) {
+ vcpu->arch.timer_check = 1;
+ return 1;
+ }
+ itc_diff = vpd->itm - vcpu_now_itc;
+ if (itc_diff < 0)
+ itc_diff = -itc_diff;
+
+ expires = div64_64(itc_diff, cyc_per_usec);
+ kt = ktime_set(0, 1000 * expires);
+ vcpu->arch.ht_active = 1;
+ hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
+
+ if (irqchip_in_kernel(vcpu->kvm)) {
+ vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+ kvm_vcpu_block(vcpu);
+ hrtimer_cancel(p_ht);
+ vcpu->arch.ht_active = 0;
+
+ if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+ return -EINTR;
+ return 1;
+ } else {
+ printk(KERN_ERR"kvm: Unsupported userspace halt!");
+ return 0;
+ }
+}
+
+static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+ return 0;
+}
+
+static int handle_external_interrupt(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ return 1;
+}
+
+static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run) = {
+ [EXIT_REASON_VM_PANIC] = handle_vm_error,
+ [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio,
+ [EXIT_REASON_PAL_CALL] = handle_pal_call,
+ [EXIT_REASON_SAL_CALL] = handle_sal_call,
+ [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6,
+ [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown,
+ [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
+ [EXIT_REASON_IPI] = handle_ipi,
+ [EXIT_REASON_PTC_G] = handle_global_purge,
+
+};
+
+static const int kvm_vti_max_exit_handlers =
+ sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
+
+static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu)
+{
+}
+
+static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+ struct exit_ctl_data *p_exit_data;
+
+ p_exit_data = kvm_get_exit_data(vcpu);
+ return p_exit_data->exit_reason;
+}
+
+/*
+ * The guest has exited. See if we can fix it or if we need userspace
+ * assistance.
+ */
+static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+ u32 exit_reason = kvm_get_exit_reason(vcpu);
+ vcpu->arch.last_exit = exit_reason;
+
+ if (exit_reason < kvm_vti_max_exit_handlers
+ && kvm_vti_exit_handlers[exit_reason])
+ return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
+ else {
+ kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+ kvm_run->hw.hardware_exit_reason = exit_reason;
+ }
+ return 0;
+}
+
+static inline void vti_set_rr6(unsigned long rr6)
+{
+ ia64_set_rr(RR6, rr6);
+ ia64_srlz_i();
+}
+
+static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+ unsigned long pte;
+ struct kvm *kvm = vcpu->kvm;
+ int r;
+
+ /*Insert a pair of tr to map vmm*/
+ pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
+ r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+ if (r < 0)
+ goto out;
+ vcpu->arch.vmm_tr_slot = r;
+ /*Insert a pairt of tr to map data of vm*/
+ pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
+ r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
+ pte, KVM_VM_DATA_SHIFT);
+ if (r < 0)
+ goto out;
+ vcpu->arch.vm_tr_slot = r;
+ r = 0;
+out:
+ return r;
+
+}
+
+static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+
+ ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
+ ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
+
+}
+
+static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
+{
+ int cpu = smp_processor_id();
+
+ if (vcpu->arch.last_run_cpu != cpu ||
+ per_cpu(last_vcpu, cpu) != vcpu) {
+ per_cpu(last_vcpu, cpu) = vcpu;
+ vcpu->arch.last_run_cpu = cpu;
+ kvm_flush_tlb_all();
+ }
+
+ vcpu->arch.host_rr6 = ia64_get_rr(RR6);
+ vti_set_rr6(vcpu->arch.vmm_rr);
+ return kvm_insert_vmm_mapping(vcpu);
+}
+static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
+{
+ kvm_purge_vmm_mapping(vcpu);
+ vti_set_rr6(vcpu->arch.host_rr6);
+}
+
+static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ union context *host_ctx, *guest_ctx;
+ int r;
+
+ /*Get host and guest context with guest address space.*/
+ host_ctx = kvm_get_host_context(vcpu);
+ guest_ctx = kvm_get_guest_context(vcpu);
+
+ r = kvm_vcpu_pre_transition(vcpu);
+ if (r < 0)
+ goto out;
+ kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
+ kvm_vcpu_post_transition(vcpu);
+ r = 0;
+out:
+ return r;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ int r;
+
+again:
+ preempt_disable();
+
+ kvm_prepare_guest_switch(vcpu);
+ local_irq_disable();
+
+ if (signal_pending(current)) {
+ local_irq_enable();
+ preempt_enable();
+ r = -EINTR;
+ kvm_run->exit_reason = KVM_EXIT_INTR;
+ goto out;
+ }
+
+ vcpu->guest_mode = 1;
+ kvm_guest_enter();
+
+ r = vti_vcpu_run(vcpu, kvm_run);
+ if (r < 0) {
+ local_irq_enable();
+ preempt_enable();
+ kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ goto out;
+ }
+
+ vcpu->arch.launched = 1;
+ vcpu->guest_mode = 0;
+ local_irq_enable();
+
+ /*
+ * We must have an instruction between local_irq_enable() and
+ * kvm_guest_exit(), so the timer interrupt isn't delayed by
+ * the interrupt shadow. The stat.exits increment will do nicely.
+ * But we need to prevent reordering, hence this barrier():
+ */
+ barrier();
+
+ kvm_guest_exit();
+
+ preempt_enable();
+
+ r = kvm_handle_exit(kvm_run, vcpu);
+
+ if (r > 0) {
+ if (!need_resched())
+ goto again;
+ }
+
+out:
+ if (r > 0) {
+ kvm_resched(vcpu);
+ goto again;
+ }
+
+ return r;
+}
+
+static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
+
+ if (!vcpu->mmio_is_write)
+ memcpy(&p->data, vcpu->mmio_data, 8);
+ p->state = STATE_IORESP_READY;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ int r;
+ sigset_t sigsaved;
+
+ vcpu_load(vcpu);
+
+ if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
+ kvm_vcpu_block(vcpu);
+ vcpu_put(vcpu);
+ return -EAGAIN;
+ }
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+ if (vcpu->mmio_needed) {
+ memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+ kvm_set_mmio_data(vcpu);
+ vcpu->mmio_read_completed = 1;
+ vcpu->mmio_needed = 0;
+ }
+ r = __vcpu_run(vcpu, kvm_run);
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+ vcpu_put(vcpu);
+ return r;
+}
+
+/*
+ * Allocate 16M memory for every vm to hold its specific data.
+ * Its memory map is defined in kvm_host.h.
+ */
+static struct kvm *kvm_alloc_kvm(void)
+{
+
+ struct kvm *kvm;
+ uint64_t vm_base;
+
+ vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
+
+ if (!vm_base)
+ return ERR_PTR(-ENOMEM);
+ printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
+
+ /* Zero all pages before use! */
+ memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+
+ kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
+ kvm->arch.vm_base = vm_base;
+
+ return kvm;
+}
+
+struct kvm_io_range {
+ unsigned long start;
+ unsigned long size;
+ unsigned long type;
+};
+
+static const struct kvm_io_range io_ranges[] = {
+ {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
+ {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
+ {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
+ {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
+ {PIB_START, PIB_SIZE, GPFN_PIB},
+};
+
+static void kvm_build_io_pmt(struct kvm *kvm)
+{
+ unsigned long i, j;
+
+ /* Mark I/O ranges */
+ for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
+ i++) {
+ for (j = io_ranges[i].start;
+ j < io_ranges[i].start + io_ranges[i].size;
+ j += PAGE_SIZE)
+ kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
+ io_ranges[i].type, 0);
+ }
+
+}
+
+/*Use unused rids to virtualize guest rid.*/
+#define GUEST_PHYSICAL_RR0 0x1739
+#define GUEST_PHYSICAL_RR4 0x2739
+#define VMM_INIT_RR 0x1660
+
+static void kvm_init_vm(struct kvm *kvm)
+{
+ long vm_base;
+
+ BUG_ON(!kvm);
+
+ kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
+ kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
+ kvm->arch.vmm_init_rr = VMM_INIT_RR;
+
+ vm_base = kvm->arch.vm_base;
+ if (vm_base) {
+ kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
+ kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
+ kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
+ }
+
+ /*
+ *Fill P2M entries for MMIO/IO ranges
+ */
+ kvm_build_io_pmt(kvm);
+
+}
+
+struct kvm *kvm_arch_create_vm(void)
+{
+ struct kvm *kvm = kvm_alloc_kvm();
+
+ if (IS_ERR(kvm))
+ return ERR_PTR(-ENOMEM);
+ kvm_init_vm(kvm);
+
+ return kvm;
+
+}
+
+static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
+ struct kvm_irqchip *chip)
+{
+ int r;
+
+ r = 0;
+ switch (chip->chip_id) {
+ case KVM_IRQCHIP_IOAPIC:
+ memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
+ sizeof(struct kvm_ioapic_state));
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
+static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
+{
+ int r;
+
+ r = 0;
+ switch (chip->chip_id) {
+ case KVM_IRQCHIP_IOAPIC:
+ memcpy(ioapic_irqchip(kvm),
+ &chip->chip.ioapic,
+ sizeof(struct kvm_ioapic_state));
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+}
+
+#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+ struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+ int r;
+
+ vcpu_load(vcpu);
+
+ for (i = 0; i < 16; i++) {
+ vpd->vgr[i] = regs->vpd.vgr[i];
+ vpd->vbgr[i] = regs->vpd.vbgr[i];
+ }
+ for (i = 0; i < 128; i++)
+ vpd->vcr[i] = regs->vpd.vcr[i];
+ vpd->vhpi = regs->vpd.vhpi;
+ vpd->vnat = regs->vpd.vnat;
+ vpd->vbnat = regs->vpd.vbnat;
+ vpd->vpsr = regs->vpd.vpsr;
+
+ vpd->vpr = regs->vpd.vpr;
+
+ r = -EFAULT;
+ r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
+ sizeof(union context));
+ if (r)
+ goto out;
+ r = copy_from_user(vcpu + 1, regs->saved_stack +
+ sizeof(struct kvm_vcpu),
+ IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
+ if (r)
+ goto out;
+ vcpu->arch.exit_data =
+ ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
+
+ RESTORE_REGS(mp_state);
+ RESTORE_REGS(vmm_rr);
+ memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
+ memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
+ RESTORE_REGS(itr_regions);
+ RESTORE_REGS(dtr_regions);
+ RESTORE_REGS(tc_regions);
+ RESTORE_REGS(irq_check);
+ RESTORE_REGS(itc_check);
+ RESTORE_REGS(timer_check);
+ RESTORE_REGS(timer_pending);
+ RESTORE_REGS(last_itc);
+ for (i = 0; i < 8; i++) {
+ vcpu->arch.vrr[i] = regs->vrr[i];
+ vcpu->arch.ibr[i] = regs->ibr[i];
+ vcpu->arch.dbr[i] = regs->dbr[i];
+ }
+ for (i = 0; i < 4; i++)
+ vcpu->arch.insvc[i] = regs->insvc[i];
+ RESTORE_REGS(xtp);
+ RESTORE_REGS(metaphysical_rr0);
+ RESTORE_REGS(metaphysical_rr4);
+ RESTORE_REGS(metaphysical_saved_rr0);
+ RESTORE_REGS(metaphysical_saved_rr4);
+ RESTORE_REGS(fp_psr);
+ RESTORE_REGS(saved_gp);
+
+ vcpu->arch.irq_new_pending = 1;
+ vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
+ set_bit(KVM_REQ_RESUME, &vcpu->requests);
+
+ vcpu_put(vcpu);
+ r = 0;
+out:
+ return r;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm *kvm = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int r = -EINVAL;
+
+ switch (ioctl) {
+ case KVM_SET_MEMORY_REGION: {
+ struct kvm_memory_region kvm_mem;
+ struct kvm_userspace_memory_region kvm_userspace_mem;
+
+ r = -EFAULT;
+ if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
+ goto out;
+ kvm_userspace_mem.slot = kvm_mem.slot;
+ kvm_userspace_mem.flags = kvm_mem.flags;
+ kvm_userspace_mem.guest_phys_addr =
+ kvm_mem.guest_phys_addr;
+ kvm_userspace_mem.memory_size = kvm_mem.memory_size;
+ r = kvm_vm_ioctl_set_memory_region(kvm,
+ &kvm_userspace_mem, 0);
+ if (r)
+ goto out;
+ break;
+ }
+ case KVM_CREATE_IRQCHIP:
+ r = -EFAULT;
+ r = kvm_ioapic_init(kvm);
+ if (r)
+ goto out;
+ break;
+ case KVM_IRQ_LINE: {
+ struct kvm_irq_level irq_event;
+
+ r = -EFAULT;
+ if (copy_from_user(&irq_event, argp, sizeof irq_event))
+ goto out;
+ if (irqchip_in_kernel(kvm)) {
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic,
+ irq_event.irq,
+ irq_event.level);
+ mutex_unlock(&kvm->lock);
+ r = 0;
+ }
+ break;
+ }
+ case KVM_GET_IRQCHIP: {
+ /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+ struct kvm_irqchip chip;
+
+ r = -EFAULT;
+ if (copy_from_user(&chip, argp, sizeof chip))
+ goto out;
+ r = -ENXIO;
+ if (!irqchip_in_kernel(kvm))
+ goto out;
+ r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
+ if (r)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user(argp, &chip, sizeof chip))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_SET_IRQCHIP: {
+ /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+ struct kvm_irqchip chip;
+
+ r = -EFAULT;
+ if (copy_from_user(&chip, argp, sizeof chip))
+ goto out;
+ r = -ENXIO;
+ if (!irqchip_in_kernel(kvm))
+ goto out;
+ r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
+ default:
+ ;
+ }
+out:
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+
+}
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+
+ return -EINVAL;
+}
+
+static int kvm_alloc_vmm_area(void)
+{
+ if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
+ kvm_vmm_base = __get_free_pages(GFP_KERNEL,
+ get_order(KVM_VMM_SIZE));
+ if (!kvm_vmm_base)
+ return -ENOMEM;
+
+ memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+ kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
+
+ printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
+ kvm_vmm_base, kvm_vm_buffer);
+ }
+
+ return 0;
+}
+
+static void kvm_free_vmm_area(void)
+{
+ if (kvm_vmm_base) {
+ /*Zero this area before free to avoid bits leak!!*/
+ memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+ free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
+ kvm_vmm_base = 0;
+ kvm_vm_buffer = 0;
+ kvm_vsa_base = 0;
+ }
+}
+
+/*
+ * Make sure that a cpu that is being hot-unplugged does not have any vcpus
+ * cached on it. Leave it as blank for IA64.
+ */
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
+static int vti_init_vpd(struct kvm_vcpu *vcpu)
+{
+ int i;
+ union cpuid3_t cpuid3;
+ struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+ if (IS_ERR(vpd))
+ return PTR_ERR(vpd);
+
+ /* CPUID init */
+ for (i = 0; i < 5; i++)
+ vpd->vcpuid[i] = ia64_get_cpuid(i);
+
+ /* Limit the CPUID number to 5 */
+ cpuid3.value = vpd->vcpuid[3];
+ cpuid3.number = 4; /* 5 - 1 */
+ vpd->vcpuid[3] = cpuid3.value;
+
+ /*Set vac and vdc fields*/
+ vpd->vac.a_from_int_cr = 1;
+ vpd->vac.a_to_int_cr = 1;
+ vpd->vac.a_from_psr = 1;
+ vpd->vac.a_from_cpuid = 1;
+ vpd->vac.a_cover = 1;
+ vpd->vac.a_bsw = 1;
+ vpd->vac.a_int = 1;
+ vpd->vdc.d_vmsw = 1;
+
+ /*Set virtual buffer*/
+ vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
+
+ return 0;
+}
+
+static int vti_create_vp(struct kvm_vcpu *vcpu)
+{
+ long ret;
+ struct vpd *vpd = vcpu->arch.vpd;
+ unsigned long vmm_ivt;
+
+ vmm_ivt = kvm_vmm_info->vmm_ivt;
+
+ printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
+
+ ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
+
+ if (ret) {
+ printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void init_ptce_info(struct kvm_vcpu *vcpu)
+{
+ ia64_ptce_info_t ptce = {0};
+
+ ia64_get_ptce(&ptce);
+ vcpu->arch.ptce_base = ptce.base;
+ vcpu->arch.ptce_count[0] = ptce.count[0];
+ vcpu->arch.ptce_count[1] = ptce.count[1];
+ vcpu->arch.ptce_stride[0] = ptce.stride[0];
+ vcpu->arch.ptce_stride[1] = ptce.stride[1];
+}
+
+static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
+{
+ struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+
+ if (hrtimer_cancel(p_ht))
+ hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS);
+}
+
+static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
+{
+ struct kvm_vcpu *vcpu;
+ wait_queue_head_t *q;
+
+ vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer);
+ if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
+ goto out;
+
+ q = &vcpu->wq;
+ if (waitqueue_active(q)) {
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ wake_up_interruptible(q);
+ }
+out:
+ vcpu->arch.timer_check = 1;
+ return HRTIMER_NORESTART;
+}
+
+#define PALE_RESET_ENTRY 0x80000000ffffffb0UL
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu *v;
+ int r;
+ int i;
+ long itc_offset;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+ union context *p_ctx = &vcpu->arch.guest;
+ struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
+
+ /*Init vcpu context for first run.*/
+ if (IS_ERR(vmm_vcpu))
+ return PTR_ERR(vmm_vcpu);
+
+ if (vcpu->vcpu_id == 0) {
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+ /*Set entry address for first run.*/
+ regs->cr_iip = PALE_RESET_ENTRY;
+
+ /*Initilize itc offset for vcpus*/
+ itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
+ for (i = 0; i < MAX_VCPU_NUM; i++) {
+ v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+ v->arch.itc_offset = itc_offset;
+ v->arch.last_itc = 0;
+ }
+ } else
+ vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+
+ r = -ENOMEM;
+ vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
+ if (!vcpu->arch.apic)
+ goto out;
+ vcpu->arch.apic->vcpu = vcpu;
+
+ p_ctx->gr[1] = 0;
+ p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
+ p_ctx->gr[13] = (unsigned long)vmm_vcpu;
+ p_ctx->psr = 0x1008522000UL;
+ p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
+ p_ctx->caller_unat = 0;
+ p_ctx->pr = 0x0;
+ p_ctx->ar[36] = 0x0; /*unat*/
+ p_ctx->ar[19] = 0x0; /*rnat*/
+ p_ctx->ar[18] = (unsigned long)vmm_vcpu +
+ ((sizeof(struct kvm_vcpu)+15) & ~15);
+ p_ctx->ar[64] = 0x0; /*pfs*/
+ p_ctx->cr[0] = 0x7e04UL;
+ p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
+ p_ctx->cr[8] = 0x3c;
+
+ /*Initilize region register*/
+ p_ctx->rr[0] = 0x30;
+ p_ctx->rr[1] = 0x30;
+ p_ctx->rr[2] = 0x30;
+ p_ctx->rr[3] = 0x30;
+ p_ctx->rr[4] = 0x30;
+ p_ctx->rr[5] = 0x30;
+ p_ctx->rr[7] = 0x30;
+
+ /*Initilize branch register 0*/
+ p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
+
+ vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
+ vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
+ vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
+
+ hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ vcpu->arch.hlt_timer.function = hlt_timer_fn;
+
+ vcpu->arch.last_run_cpu = -1;
+ vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
+ vcpu->arch.vsa_base = kvm_vsa_base;
+ vcpu->arch.__gp = kvm_vmm_gp;
+ vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
+ vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
+ vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
+ init_ptce_info(vcpu);
+
+ r = 0;
+out:
+ return r;
+}
+
+static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
+{
+ unsigned long psr;
+ int r;
+
+ local_irq_save(psr);
+ r = kvm_insert_vmm_mapping(vcpu);
+ if (r)
+ goto fail;
+ r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
+ if (r)
+ goto fail;
+
+ r = vti_init_vpd(vcpu);
+ if (r) {
+ printk(KERN_DEBUG"kvm: vpd init error!!\n");
+ goto uninit;
+ }
+
+ r = vti_create_vp(vcpu);
+ if (r)
+ goto uninit;
+
+ kvm_purge_vmm_mapping(vcpu);
+ local_irq_restore(psr);
+
+ return 0;
+uninit:
+ kvm_vcpu_uninit(vcpu);
+fail:
+ return r;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+ unsigned int id)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long vm_base = kvm->arch.vm_base;
+ int r;
+ int cpu;
+
+ r = -ENOMEM;
+ if (!vm_base) {
+ printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
+ goto fail;
+ }
+ vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
+ vcpu->kvm = kvm;
+
+ cpu = get_cpu();
+ vti_vcpu_load(vcpu, cpu);
+ r = vti_vcpu_setup(vcpu, id);
+ put_cpu();
+
+ if (r) {
+ printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
+ goto fail;
+ }
+
+ return vcpu;
+fail:
+ return ERR_PTR(r);
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg)
+{
+ return -EINVAL;
+}
+
+static void free_kvm(struct kvm *kvm)
+{
+ unsigned long vm_base = kvm->arch.vm_base;
+
+ if (vm_base) {
+ memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+ free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
+ }
+
+}
+
+static void kvm_release_vm_pages(struct kvm *kvm)
+{
+ struct kvm_memory_slot *memslot;
+ int i, j;
+ unsigned long base_gfn;
+
+ for (i = 0; i < kvm->nmemslots; i++) {
+ memslot = &kvm->memslots[i];
+ base_gfn = memslot->base_gfn;
+
+ for (j = 0; j < memslot->npages; j++) {
+ if (memslot->rmap[j])
+ put_page((struct page *)memslot->rmap[j]);
+ }
+ }
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ kfree(kvm->arch.vioapic);
+ kvm_release_vm_pages(kvm);
+ kvm_free_physmem(kvm);
+ free_kvm(kvm);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ if (cpu != vcpu->cpu) {
+ vcpu->cpu = cpu;
+ if (vcpu->arch.ht_active)
+ kvm_migrate_hlt_timer(vcpu);
+ }
+}
+
+#define SAVE_REGS(_x) regs->_x = vcpu->arch._x
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+ int r;
+ struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+ vcpu_load(vcpu);
+
+ for (i = 0; i < 16; i++) {
+ regs->vpd.vgr[i] = vpd->vgr[i];
+ regs->vpd.vbgr[i] = vpd->vbgr[i];
+ }
+ for (i = 0; i < 128; i++)
+ regs->vpd.vcr[i] = vpd->vcr[i];
+ regs->vpd.vhpi = vpd->vhpi;
+ regs->vpd.vnat = vpd->vnat;
+ regs->vpd.vbnat = vpd->vbnat;
+ regs->vpd.vpsr = vpd->vpsr;
+ regs->vpd.vpr = vpd->vpr;
+
+ r = -EFAULT;
+ r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
+ sizeof(union context));
+ if (r)
+ goto out;
+ r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
+ if (r)
+ goto out;
+ SAVE_REGS(mp_state);
+ SAVE_REGS(vmm_rr);
+ memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
+ memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
+ SAVE_REGS(itr_regions);
+ SAVE_REGS(dtr_regions);
+ SAVE_REGS(tc_regions);
+ SAVE_REGS(irq_check);
+ SAVE_REGS(itc_check);
+ SAVE_REGS(timer_check);
+ SAVE_REGS(timer_pending);
+ SAVE_REGS(last_itc);
+ for (i = 0; i < 8; i++) {
+ regs->vrr[i] = vcpu->arch.vrr[i];
+ regs->ibr[i] = vcpu->arch.ibr[i];
+ regs->dbr[i] = vcpu->arch.dbr[i];
+ }
+ for (i = 0; i < 4; i++)
+ regs->insvc[i] = vcpu->arch.insvc[i];
+ regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
+ SAVE_REGS(xtp);
+ SAVE_REGS(metaphysical_rr0);
+ SAVE_REGS(metaphysical_rr4);
+ SAVE_REGS(metaphysical_saved_rr0);
+ SAVE_REGS(metaphysical_saved_rr4);
+ SAVE_REGS(fp_psr);
+ SAVE_REGS(saved_gp);
+ vcpu_put(vcpu);
+ r = 0;
+out:
+ return r;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+
+ hrtimer_cancel(&vcpu->arch.hlt_timer);
+ kfree(vcpu->arch.apic);
+}
+
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ unsigned long i;
+ struct page *page;
+ int npages = mem->memory_size >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+ unsigned long base_gfn = memslot->base_gfn;
+
+ for (i = 0; i < npages; i++) {
+ page = gfn_to_page(kvm, base_gfn + i);
+ kvm_set_pmt_entry(kvm, base_gfn + i,
+ page_to_pfn(page) << PAGE_SHIFT,
+ _PAGE_AR_RWX|_PAGE_MA_WB);
+ memslot->rmap[i] = (unsigned long)page;
+ }
+
+ return 0;
+}
+
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ kvm_vcpu_uninit(vcpu);
+}
+
+static int vti_cpu_has_kvm_support(void)
+{
+ long avail = 1, status = 1, control = 1;
+ long ret;
+
+ ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
+ if (ret)
+ goto out;
+
+ if (!(avail & PAL_PROC_VM_BIT))
+ goto out;
+
+ printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
+
+ ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
+ if (ret)
+ goto out;
+ printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
+
+ if (!(vp_env_info & VP_OPCODE)) {
+ printk(KERN_WARNING"kvm: No opcode ability on hardware, "
+ "vm_env_info:0x%lx\n", vp_env_info);
+ }
+
+ return 1;
+out:
+ return 0;
+}
+
+static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
+ struct module *module)
+{
+ unsigned long module_base;
+ unsigned long vmm_size;
+
+ unsigned long vmm_offset, func_offset, fdesc_offset;
+ struct fdesc *p_fdesc;
+
+ BUG_ON(!module);
+
+ if (!kvm_vmm_base) {
+ printk("kvm: kvm area hasn't been initilized yet!!\n");
+ return -EFAULT;
+ }
+
+ /*Calculate new position of relocated vmm module.*/
+ module_base = (unsigned long)module->module_core;
+ vmm_size = module->core_size;
+ if (unlikely(vmm_size > KVM_VMM_SIZE))
+ return -EFAULT;
+
+ memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+ kvm_flush_icache(kvm_vmm_base, vmm_size);
+
+ /*Recalculate kvm_vmm_info based on new VMM*/
+ vmm_offset = vmm_info->vmm_ivt - module_base;
+ kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
+ printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
+ kvm_vmm_info->vmm_ivt);
+
+ fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
+ kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
+ fdesc_offset);
+ func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
+ p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+ p_fdesc->ip = KVM_VMM_BASE + func_offset;
+ p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
+
+ printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
+ KVM_VMM_BASE+func_offset);
+
+ fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
+ kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
+ fdesc_offset);
+ func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
+ p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+ p_fdesc->ip = KVM_VMM_BASE + func_offset;
+ p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
+
+ kvm_vmm_gp = p_fdesc->gp;
+
+ printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
+ kvm_vmm_info->vmm_entry);
+ printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
+ KVM_VMM_BASE + func_offset);
+
+ return 0;
+}
+
+int kvm_arch_init(void *opaque)
+{
+ int r;
+ struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
+
+ if (!vti_cpu_has_kvm_support()) {
+ printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (kvm_vmm_info) {
+ printk(KERN_ERR "kvm: Already loaded VMM module!\n");
+ r = -EEXIST;
+ goto out;
+ }
+
+ r = -ENOMEM;
+ kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
+ if (!kvm_vmm_info)
+ goto out;
+
+ if (kvm_alloc_vmm_area())
+ goto out_free0;
+
+ r = kvm_relocate_vmm(vmm_info, vmm_info->module);
+ if (r)
+ goto out_free1;
+
+ return 0;
+
+out_free1:
+ kvm_free_vmm_area();
+out_free0:
+ kfree(kvm_vmm_info);
+out:
+ return r;
+}
+
+void kvm_arch_exit(void)
+{
+ kvm_free_vmm_area();
+ kfree(kvm_vmm_info);
+ kvm_vmm_info = NULL;
+}
+
+static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ struct kvm_memory_slot *memslot;
+ int r, i;
+ long n, base;
+ unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
+ + KVM_MEM_DIRTY_LOG_OFS);
+
+ r = -EINVAL;
+ if (log->slot >= KVM_MEMORY_SLOTS)
+ goto out;
+
+ memslot = &kvm->memslots[log->slot];
+ r = -ENOENT;
+ if (!memslot->dirty_bitmap)
+ goto out;
+
+ n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+ base = memslot->base_gfn / BITS_PER_LONG;
+
+ for (i = 0; i < n/sizeof(long); ++i) {
+ memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
+ dirty_bitmap[base + i] = 0;
+ }
+ r = 0;
+out:
+ return r;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ int r;
+ int n;
+ struct kvm_memory_slot *memslot;
+ int is_dirty = 0;
+
+ spin_lock(&kvm->arch.dirty_log_lock);
+
+ r = kvm_ia64_sync_dirty_log(kvm, log);
+ if (r)
+ goto out;
+
+ r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ if (r)
+ goto out;
+
+ /* If nothing is dirty, don't bother messing with page tables. */
+ if (is_dirty) {
+ kvm_flush_remote_tlbs(kvm);
+ memslot = &kvm->memslots[log->slot];
+ n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+ memset(memslot->dirty_bitmap, 0, n);
+ }
+ r = 0;
+out:
+ spin_unlock(&kvm->arch.dirty_log_lock);
+ return r;
+}
+
+int kvm_arch_hardware_setup(void)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+static void vcpu_kick_intr(void *info)
+{
+#ifdef DEBUG
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
+ printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
+#endif
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int ipi_pcpu = vcpu->cpu;
+
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
+
+ if (vcpu->guest_mode)
+ smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+}
+
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+{
+
+ struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+ if (!test_and_set_bit(vec, &vpd->irr[0])) {
+ vcpu->arch.irq_new_pending = 1;
+ if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
+ kvm_vcpu_kick(vcpu);
+ else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ if (waitqueue_active(&vcpu->wq))
+ wake_up_interruptible(&vcpu->wq);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+{
+ return apic->vcpu->vcpu_id == dest;
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+{
+ return 0;
+}
+
+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
+ unsigned long bitmap)
+{
+ struct kvm_vcpu *lvcpu = kvm->vcpus[0];
+ int i;
+
+ for (i = 1; i < KVM_MAX_VCPUS; i++) {
+ if (!kvm->vcpus[i])
+ continue;
+ if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
+ lvcpu = kvm->vcpus[i];
+ }
+
+ return lvcpu;
+}
+
+static int find_highest_bits(int *dat)
+{
+ u32 bits, bitnum;
+ int i;
+
+ /* loop for all 256 bits */
+ for (i = 7; i >= 0 ; i--) {
+ bits = dat[i];
+ if (bits) {
+ bitnum = fls(bits);
+ return i * 32 + bitnum - 1;
+ }
+ }
+
+ return -1;
+}
+
+int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+ struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+ if (vpd->irr[0] & (1UL << NMI_VECTOR))
+ return NMI_VECTOR;
+ if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
+ return ExtINT_VECTOR;
+
+ return find_highest_bits((int *)&vpd->irr[0]);
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ if (kvm_highest_pending_irq(vcpu) != -1)
+ return 1;
+ return 0;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+ return gfn;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
new file mode 100644
index 000000000000..091f936c4485
--- /dev/null
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -0,0 +1,500 @@
+/*
+ * PAL/SAL call delegation
+ *
+ * Copyright (c) 2004 Li Susie <susie.li@intel.com>
+ * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
+ * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/smp.h>
+
+#include "vti.h"
+#include "misc.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
+
+/*
+ * Handy macros to make sure that the PAL return values start out
+ * as something meaningful.
+ */
+#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \
+ { \
+ x.status = PAL_STATUS_UNIMPLEMENTED; \
+ x.v0 = 0; \
+ x.v1 = 0; \
+ x.v2 = 0; \
+ }
+
+#define INIT_PAL_STATUS_SUCCESS(x) \
+ { \
+ x.status = PAL_STATUS_SUCCESS; \
+ x.v0 = 0; \
+ x.v1 = 0; \
+ x.v2 = 0; \
+ }
+
+static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
+ u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
+ struct exit_ctl_data *p;
+
+ if (vcpu) {
+ p = &vcpu->arch.exit_data;
+ if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+ *gr28 = p->u.pal_data.gr28;
+ *gr29 = p->u.pal_data.gr29;
+ *gr30 = p->u.pal_data.gr30;
+ *gr31 = p->u.pal_data.gr31;
+ return ;
+ }
+ }
+ printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
+}
+
+static void set_pal_result(struct kvm_vcpu *vcpu,
+ struct ia64_pal_retval result) {
+
+ struct exit_ctl_data *p;
+
+ p = kvm_get_exit_data(vcpu);
+ if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
+ p->u.pal_data.ret = result;
+ return ;
+ }
+ INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
+}
+
+static void set_sal_result(struct kvm_vcpu *vcpu,
+ struct sal_ret_values result) {
+ struct exit_ctl_data *p;
+
+ p = kvm_get_exit_data(vcpu);
+ if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
+ p->u.sal_data.ret = result;
+ return ;
+ }
+ printk(KERN_WARNING"Failed to set sal result!!\n");
+}
+
+struct cache_flush_args {
+ u64 cache_type;
+ u64 operation;
+ u64 progress;
+ long status;
+};
+
+cpumask_t cpu_cache_coherent_map;
+
+static void remote_pal_cache_flush(void *data)
+{
+ struct cache_flush_args *args = data;
+ long status;
+ u64 progress = args->progress;
+
+ status = ia64_pal_cache_flush(args->cache_type, args->operation,
+ &progress, NULL);
+ if (status != 0)
+ args->status = status;
+}
+
+static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
+{
+ u64 gr28, gr29, gr30, gr31;
+ struct ia64_pal_retval result = {0, 0, 0, 0};
+ struct cache_flush_args args = {0, 0, 0, 0};
+ long psr;
+
+ gr28 = gr29 = gr30 = gr31 = 0;
+ kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
+
+ if (gr31 != 0)
+ printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
+
+ /* Always call Host Pal in int=1 */
+ gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
+ args.cache_type = gr29;
+ args.operation = gr30;
+ smp_call_function(remote_pal_cache_flush,
+ (void *)&args, 1, 1);
+ if (args.status != 0)
+ printk(KERN_ERR"pal_cache_flush error!,"
+ "status:0x%lx\n", args.status);
+ /*
+ * Call Host PAL cache flush
+ * Clear psr.ic when call PAL_CACHE_FLUSH
+ */
+ local_irq_save(psr);
+ result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
+ &result.v0);
+ local_irq_restore(psr);
+ if (result.status != 0)
+ printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
+ "in1:%lx,in2:%lx\n",
+ vcpu, result.status, gr29, gr30);
+
+#if 0
+ if (gr29 == PAL_CACHE_TYPE_COHERENT) {
+ cpus_setall(vcpu->arch.cache_coherent_map);
+ cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
+ cpus_setall(cpu_cache_coherent_map);
+ cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
+ }
+#endif
+ return result;
+}
+
+struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
+{
+
+ struct ia64_pal_retval result;
+
+ PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
+ return result;
+}
+
+static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
+{
+
+ struct ia64_pal_retval result;
+
+ PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
+
+ /*
+ * PAL_FREQ_BASE may not be implemented in some platforms,
+ * call SAL instead.
+ */
+ if (result.v0 == 0) {
+ result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+ &result.v0,
+ &result.v1);
+ result.v2 = 0;
+ }
+
+ return result;
+}
+
+static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
+{
+
+ struct ia64_pal_retval result;
+
+ PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
+ return result;
+}
+
+static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
+{
+ struct ia64_pal_retval result;
+
+ INIT_PAL_STATUS_UNIMPLEMENTED(result);
+ return result;
+}
+
+static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
+{
+
+ struct ia64_pal_retval result;
+
+ INIT_PAL_STATUS_SUCCESS(result);
+ return result;
+}
+
+static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
+{
+
+ struct ia64_pal_retval result = {0, 0, 0, 0};
+ long in0, in1, in2, in3;
+
+ kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+ result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
+ &result.v2, in2);
+
+ return result;
+}
+
+static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
+{
+
+ pal_cache_config_info_t ci;
+ long status;
+ unsigned long in0, in1, in2, in3, r9, r10;
+
+ kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+ status = ia64_pal_cache_config_info(in1, in2, &ci);
+ r9 = ci.pcci_info_1.pcci1_data;
+ r10 = ci.pcci_info_2.pcci2_data;
+ return ((struct ia64_pal_retval){status, r9, r10, 0});
+}
+
+#define GUEST_IMPL_VA_MSB 59
+#define GUEST_RID_BITS 18
+
+static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
+{
+
+ pal_vm_info_1_u_t vminfo1;
+ pal_vm_info_2_u_t vminfo2;
+ struct ia64_pal_retval result;
+
+ PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
+ if (!result.status) {
+ vminfo1.pvi1_val = result.v0;
+ vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
+ vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
+ result.v0 = vminfo1.pvi1_val;
+ vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
+ vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
+ result.v1 = vminfo2.pvi2_val;
+ }
+
+ return result;
+}
+
+static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
+{
+ struct ia64_pal_retval result;
+
+ INIT_PAL_STATUS_UNIMPLEMENTED(result);
+
+ return result;
+}
+
+static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
+{
+ u64 index = 0;
+ struct exit_ctl_data *p;
+
+ p = kvm_get_exit_data(vcpu);
+ if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
+ index = p->u.pal_data.gr28;
+
+ return index;
+}
+
+int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+
+ u64 gr28;
+ struct ia64_pal_retval result;
+ int ret = 1;
+
+ gr28 = kvm_get_pal_call_index(vcpu);
+ /*printk("pal_call index:%lx\n",gr28);*/
+ switch (gr28) {
+ case PAL_CACHE_FLUSH:
+ result = pal_cache_flush(vcpu);
+ break;
+ case PAL_CACHE_SUMMARY:
+ result = pal_cache_summary(vcpu);
+ break;
+ case PAL_HALT_LIGHT:
+ {
+ vcpu->arch.timer_pending = 1;
+ INIT_PAL_STATUS_SUCCESS(result);
+ if (kvm_highest_pending_irq(vcpu) == -1)
+ ret = kvm_emulate_halt(vcpu);
+
+ }
+ break;
+
+ case PAL_FREQ_RATIOS:
+ result = pal_freq_ratios(vcpu);
+ break;
+
+ case PAL_FREQ_BASE:
+ result = pal_freq_base(vcpu);
+ break;
+
+ case PAL_LOGICAL_TO_PHYSICAL :
+ result = pal_logical_to_physica(vcpu);
+ break;
+
+ case PAL_VM_SUMMARY :
+ result = pal_vm_summary(vcpu);
+ break;
+
+ case PAL_VM_INFO :
+ result = pal_vm_info(vcpu);
+ break;
+ case PAL_PLATFORM_ADDR :
+ result = pal_platform_addr(vcpu);
+ break;
+ case PAL_CACHE_INFO:
+ result = pal_cache_info(vcpu);
+ break;
+ case PAL_PTCE_INFO:
+ INIT_PAL_STATUS_SUCCESS(result);
+ result.v1 = (1L << 32) | 1L;
+ break;
+ case PAL_VM_PAGE_SIZE:
+ result.status = ia64_pal_vm_page_size(&result.v0,
+ &result.v1);
+ break;
+ case PAL_RSE_INFO:
+ result.status = ia64_pal_rse_info(&result.v0,
+ (pal_hints_u_t *)&result.v1);
+ break;
+ case PAL_PROC_GET_FEATURES:
+ result = pal_proc_get_features(vcpu);
+ break;
+ case PAL_DEBUG_INFO:
+ result.status = ia64_pal_debug_info(&result.v0,
+ &result.v1);
+ break;
+ case PAL_VERSION:
+ result.status = ia64_pal_version(
+ (pal_version_u_t *)&result.v0,
+ (pal_version_u_t *)&result.v1);
+
+ break;
+ case PAL_FIXED_ADDR:
+ result.status = PAL_STATUS_SUCCESS;
+ result.v0 = vcpu->vcpu_id;
+ break;
+ default:
+ INIT_PAL_STATUS_UNIMPLEMENTED(result);
+ printk(KERN_WARNING"kvm: Unsupported pal call,"
+ " index:0x%lx\n", gr28);
+ }
+ set_pal_result(vcpu, result);
+ return ret;
+}
+
+static struct sal_ret_values sal_emulator(struct kvm *kvm,
+ long index, unsigned long in1,
+ unsigned long in2, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ unsigned long in6, unsigned long in7)
+{
+ unsigned long r9 = 0;
+ unsigned long r10 = 0;
+ long r11 = 0;
+ long status;
+
+ status = 0;
+ switch (index) {
+ case SAL_FREQ_BASE:
+ status = ia64_sal_freq_base(in1, &r9, &r10);
+ break;
+ case SAL_PCI_CONFIG_READ:
+ printk(KERN_WARNING"kvm: Not allowed to call here!"
+ " SAL_PCI_CONFIG_READ\n");
+ break;
+ case SAL_PCI_CONFIG_WRITE:
+ printk(KERN_WARNING"kvm: Not allowed to call here!"
+ " SAL_PCI_CONFIG_WRITE\n");
+ break;
+ case SAL_SET_VECTORS:
+ if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
+ if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
+ status = -2;
+ } else {
+ kvm->arch.rdv_sal_data.boot_ip = in2;
+ kvm->arch.rdv_sal_data.boot_gp = in3;
+ }
+ printk("Rendvous called! iip:%lx\n\n", in2);
+ } else
+ printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
+ "ignored...\n", in1);
+ break;
+ case SAL_GET_STATE_INFO:
+ /* No more info. */
+ status = -5;
+ r9 = 0;
+ break;
+ case SAL_GET_STATE_INFO_SIZE:
+ /* Return a dummy size. */
+ status = 0;
+ r9 = 128;
+ break;
+ case SAL_CLEAR_STATE_INFO:
+ /* Noop. */
+ break;
+ case SAL_MC_RENDEZ:
+ printk(KERN_WARNING
+ "kvm: called SAL_MC_RENDEZ. ignored...\n");
+ break;
+ case SAL_MC_SET_PARAMS:
+ printk(KERN_WARNING
+ "kvm: called SAL_MC_SET_PARAMS.ignored!\n");
+ break;
+ case SAL_CACHE_FLUSH:
+ if (1) {
+ /*Flush using SAL.
+ This method is faster but has a side
+ effect on other vcpu running on
+ this cpu. */
+ status = ia64_sal_cache_flush(in1);
+ } else {
+ /*Maybe need to implement the method
+ without side effect!*/
+ status = 0;
+ }
+ break;
+ case SAL_CACHE_INIT:
+ printk(KERN_WARNING
+ "kvm: called SAL_CACHE_INIT. ignored...\n");
+ break;
+ case SAL_UPDATE_PAL:
+ printk(KERN_WARNING
+ "kvm: CALLED SAL_UPDATE_PAL. ignored...\n");
+ break;
+ default:
+ printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
+ " index:%ld\n", index);
+ status = -1;
+ break;
+ }
+ return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
+ u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
+
+ struct exit_ctl_data *p;
+
+ p = kvm_get_exit_data(vcpu);
+
+ if (p) {
+ if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+ *in0 = p->u.sal_data.in0;
+ *in1 = p->u.sal_data.in1;
+ *in2 = p->u.sal_data.in2;
+ *in3 = p->u.sal_data.in3;
+ *in4 = p->u.sal_data.in4;
+ *in5 = p->u.sal_data.in5;
+ *in6 = p->u.sal_data.in6;
+ *in7 = p->u.sal_data.in7;
+ return ;
+ }
+ }
+ *in0 = 0;
+}
+
+void kvm_sal_emul(struct kvm_vcpu *vcpu)
+{
+
+ struct sal_ret_values result;
+ u64 index, in1, in2, in3, in4, in5, in6, in7;
+
+ kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
+ &in3, &in4, &in5, &in6, &in7);
+ result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
+ in4, in5, in6, in7);
+ set_sal_result(vcpu, result);
+}
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
new file mode 100644
index 000000000000..13980d9b8bcf
--- /dev/null
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -0,0 +1,273 @@
+/*
+ * kvm_minstate.h: min save macros
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ * Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/types.h>
+#include <asm/kregs.h>
+#include "asm-offsets.h"
+
+#define KVM_MINSTATE_START_SAVE_MIN \
+ mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
+ ;; \
+ mov.m r28 = ar.rnat; \
+ addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
+ ;; \
+ lfetch.fault.excl.nt1 [r22]; \
+ addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
+ mov r23 = ar.bspstore; /* save ar.bspstore */ \
+ ;; \
+ mov ar.bspstore = r22; /* switch to kernel RBS */\
+ ;; \
+ mov r18 = ar.bsp; \
+ mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */
+
+
+
+#define KVM_MINSTATE_END_SAVE_MIN \
+ bsw.1; /* switch back to bank 1 (must be last in insn group) */\
+ ;;
+
+
+#define PAL_VSA_SYNC_READ \
+ /* begin to call pal vps sync_read */ \
+ add r25 = VMM_VPD_BASE_OFFSET, r21; \
+ adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \
+ ;; \
+ ld8 r25 = [r25]; /* read vpd base */ \
+ ld8 r20 = [r20]; \
+ ;; \
+ add r20 = PAL_VPS_SYNC_READ,r20; \
+ ;; \
+{ .mii; \
+ nop 0x0; \
+ mov r24 = ip; \
+ mov b0 = r20; \
+ ;; \
+}; \
+{ .mmb; \
+ add r24 = 0x20, r24; \
+ nop 0x0; \
+ br.cond.sptk b0; /* call the service */ \
+ ;; \
+};
+
+
+
+#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21
+
+/*
+ * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ * psr.ic: off
+ * r31: contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ * psr.ic: off
+ * r2 = points to &pt_regs.r16
+ * r8 = contents of ar.ccv
+ * r9 = contents of ar.csd
+ * r10 = contents of ar.ssd
+ * r11 = FPSR_DEFAULT
+ * r12 = kernel sp (kernel virtual address)
+ * r13 = points to current task_struct (kernel virtual address)
+ * p15 = TRUE if psr.i is set in cr.ipsr
+ * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ * preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro. This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+
+
+#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
+
+#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
+ KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
+ mov r27 = ar.rsc; /* M */ \
+ mov r20 = r1; /* A */ \
+ mov r25 = ar.unat; /* M */ \
+ mov r29 = cr.ipsr; /* M */ \
+ mov r26 = ar.pfs; /* I */ \
+ mov r18 = cr.isr; \
+ COVER; /* B;; (or nothing) */ \
+ ;; \
+ tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \
+ mov r1 = r16; \
+/* mov r21=r16; */ \
+ /* switch from user to kernel RBS: */ \
+ ;; \
+ invala; /* M */ \
+ SAVE_IFS; \
+ ;; \
+ KVM_MINSTATE_START_SAVE_MIN \
+ adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \
+ adds r16 = PT(CR_IPSR),r1; \
+ ;; \
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
+ st8 [r16] = r29; /* save cr.ipsr */ \
+ ;; \
+ lfetch.fault.excl.nt1 [r17]; \
+ tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \
+ mov r29 = b0 \
+ ;; \
+ adds r16 = PT(R8),r1; /* initialize first base pointer */\
+ adds r17 = PT(R9),r1; /* initialize second base pointer */\
+ ;; \
+.mem.offset 0,0; st8.spill [r16] = r8,16; \
+.mem.offset 8,0; st8.spill [r17] = r9,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r16] = r10,24; \
+.mem.offset 8,0; st8.spill [r17] = r11,24; \
+ ;; \
+ mov r9 = cr.iip; /* M */ \
+ mov r10 = ar.fpsr; /* M */ \
+ ;; \
+ st8 [r16] = r9,16; /* save cr.iip */ \
+ st8 [r17] = r30,16; /* save cr.ifs */ \
+ sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \
+ ;; \
+ st8 [r16] = r25,16; /* save ar.unat */ \
+ st8 [r17] = r26,16; /* save ar.pfs */ \
+ shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\
+ ;; \
+ st8 [r16] = r27,16; /* save ar.rsc */ \
+ st8 [r17] = r28,16; /* save ar.rnat */ \
+ ;; /* avoid RAW on r16 & r17 */ \
+ st8 [r16] = r23,16; /* save ar.bspstore */ \
+ st8 [r17] = r31,16; /* save predicates */ \
+ ;; \
+ st8 [r16] = r29,16; /* save b0 */ \
+ st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\
+ ;; \
+.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \
+.mem.offset 8,0; st8.spill [r17] = r12,16; \
+ adds r12 = -16,r1; /* switch to kernel memory stack */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16] = r13,16; \
+.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\
+ mov r13 = r21; /* establish `current' */ \
+ ;; \
+.mem.offset 0,0; st8.spill [r16] = r15,16; \
+.mem.offset 8,0; st8.spill [r17] = r14,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r16] = r2,16; \
+.mem.offset 8,0; st8.spill [r17] = r3,16; \
+ adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \
+ ;; \
+ adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \
+ adds r17 = VMM_VCPU_ISR_OFFSET,r13; \
+ mov r26 = cr.iipa; \
+ mov r27 = cr.isr; \
+ ;; \
+ st8 [r16] = r26; \
+ st8 [r17] = r27; \
+ ;; \
+ EXTRA; \
+ mov r8 = ar.ccv; \
+ mov r9 = ar.csd; \
+ mov r10 = ar.ssd; \
+ movl r11 = FPSR_DEFAULT; /* L-unit */ \
+ adds r17 = VMM_VCPU_GP_OFFSET,r13; \
+ ;; \
+ ld8 r1 = [r17];/* establish kernel global pointer */ \
+ ;; \
+ PAL_VSA_SYNC_READ \
+ KVM_MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ * psr.ic: on
+ * r2: points to &pt_regs.f6
+ * r3: points to &pt_regs.f7
+ * r8: contents of ar.ccv
+ * r9: contents of ar.csd
+ * r10: contents of ar.ssd
+ * r11: FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define KVM_SAVE_REST \
+.mem.offset 0,0; st8.spill [r2] = r16,16; \
+.mem.offset 8,0; st8.spill [r3] = r17,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2] = r18,16; \
+.mem.offset 8,0; st8.spill [r3] = r19,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2] = r20,16; \
+.mem.offset 8,0; st8.spill [r3] = r21,16; \
+ mov r18=b6; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2] = r22,16; \
+.mem.offset 8,0; st8.spill [r3] = r23,16; \
+ mov r19 = b7; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2] = r24,16; \
+.mem.offset 8,0; st8.spill [r3] = r25,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2] = r26,16; \
+.mem.offset 8,0; st8.spill [r3] = r27,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2] = r28,16; \
+.mem.offset 8,0; st8.spill [r3] = r29,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2] = r30,16; \
+.mem.offset 8,0; st8.spill [r3] = r31,32; \
+ ;; \
+ mov ar.fpsr = r11; \
+ st8 [r2] = r8,8; \
+ adds r24 = PT(B6)-PT(F7),r3; \
+ adds r25 = PT(B7)-PT(F7),r3; \
+ ;; \
+ st8 [r24] = r18,16; /* b6 */ \
+ st8 [r25] = r19,16; /* b7 */ \
+ adds r2 = PT(R4)-PT(F6),r2; \
+ adds r3 = PT(R5)-PT(F7),r3; \
+ ;; \
+ st8 [r24] = r9; /* ar.csd */ \
+ st8 [r25] = r10; /* ar.ssd */ \
+ ;; \
+ mov r18 = ar.unat; \
+ adds r19 = PT(EML_UNAT)-PT(R4),r2; \
+ ;; \
+ st8 [r19] = r18; /* eml_unat */ \
+
+
+#define KVM_SAVE_EXTRA \
+.mem.offset 0,0; st8.spill [r2] = r4,16; \
+.mem.offset 8,0; st8.spill [r3] = r5,16; \
+ ;; \
+.mem.offset 0,0; st8.spill [r2] = r6,16; \
+.mem.offset 8,0; st8.spill [r3] = r7; \
+ ;; \
+ mov r26 = ar.unat; \
+ ;; \
+ st8 [r2] = r26;/* eml_unat */ \
+
+#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
+#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
+#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
new file mode 100644
index 000000000000..6d6cbcb14893
--- /dev/null
+++ b/arch/ia64/kvm/lapic.h
@@ -0,0 +1,25 @@
+#ifndef __KVM_IA64_LAPIC_H
+#define __KVM_IA64_LAPIC_H
+
+#include <linux/kvm_host.h>
+
+/*
+ * vlsapic
+ */
+struct kvm_lapic{
+ struct kvm_vcpu *vcpu;
+ uint64_t insvc[4];
+ uint64_t vhpi;
+ uint8_t xtp;
+ uint8_t pal_init_pending;
+ uint8_t pad[2];
+};
+
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_lapic(struct kvm_vcpu *vcpu);
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
+
+#endif
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
new file mode 100644
index 000000000000..e585c4607344
--- /dev/null
+++ b/arch/ia64/kvm/misc.h
@@ -0,0 +1,93 @@
+#ifndef __KVM_IA64_MISC_H
+#define __KVM_IA64_MISC_H
+
+#include <linux/kvm_host.h>
+/*
+ * misc.h
+ * Copyright (C) 2007, Intel Corporation.
+ * Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/*
+ *Return p2m base address at host side!
+ */
+static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
+{
+ return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
+}
+
+static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
+ u64 paddr, u64 mem_flags)
+{
+ uint64_t *pmt_base = kvm_host_get_pmt(kvm);
+ unsigned long pte;
+
+ pte = PAGE_ALIGN(paddr) | mem_flags;
+ pmt_base[gfn] = pte;
+}
+
+/*Function for translating host address to guest address*/
+
+static inline void *to_guest(struct kvm *kvm, void *addr)
+{
+ return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
+ KVM_VM_DATA_BASE);
+}
+
+/*Function for translating guest address to host address*/
+
+static inline void *to_host(struct kvm *kvm, void *addr)
+{
+ return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
+ + kvm->arch.vm_base);
+}
+
+/* Get host context of the vcpu */
+static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
+{
+ union context *ctx = &vcpu->arch.host;
+ return to_guest(vcpu->kvm, ctx);
+}
+
+/* Get guest context of the vcpu */
+static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
+{
+ union context *ctx = &vcpu->arch.guest;
+ return to_guest(vcpu->kvm, ctx);
+}
+
+/* kvm get exit data from gvmm! */
+static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
+{
+ return &vcpu->arch.exit_data;
+}
+
+/*kvm get vcpu ioreq for kvm module!*/
+static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
+{
+ struct exit_ctl_data *p_ctl_data;
+
+ if (vcpu) {
+ p_ctl_data = kvm_get_exit_data(vcpu);
+ if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
+ return &p_ctl_data->u.ioreq;
+ }
+
+ return NULL;
+}
+
+#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
new file mode 100644
index 000000000000..351bf70da463
--- /dev/null
+++ b/arch/ia64/kvm/mmio.c
@@ -0,0 +1,341 @@
+/*
+ * mmio.c: MMIO emulation components.
+ * Copyright (c) 2004, Intel Corporation.
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ * Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
+ *
+ * Copyright (c) 2007 Intel Corporation KVM support.
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/kvm_host.h>
+
+#include "vcpu.h"
+
+static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
+{
+ VLSAPIC_XTP(v) = val;
+}
+
+/*
+ * LSAPIC OFFSET
+ */
+#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20))
+#define PIB_OFST_INTA 0x1E0000
+#define PIB_OFST_XTP 0x1E0008
+
+/*
+ * execute write IPI op.
+ */
+static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
+ uint64_t addr, uint64_t data)
+{
+ struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
+ unsigned long psr;
+
+ local_irq_save(psr);
+
+ p->exit_reason = EXIT_REASON_IPI;
+ p->u.ipi_data.addr.val = addr;
+ p->u.ipi_data.data.val = data;
+ vmm_transition(current_vcpu);
+
+ local_irq_restore(psr);
+
+}
+
+void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
+ unsigned long length, unsigned long val)
+{
+ addr &= (PIB_SIZE - 1);
+
+ switch (addr) {
+ case PIB_OFST_INTA:
+ /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
+ panic_vm(v);
+ break;
+ case PIB_OFST_XTP:
+ if (length == 1) {
+ vlsapic_write_xtp(v, val);
+ } else {
+ /*panic_domain(NULL,
+ "Undefined write on PIB XTP\n");*/
+ panic_vm(v);
+ }
+ break;
+ default:
+ if (PIB_LOW_HALF(addr)) {
+ /*lower half */
+ if (length != 8)
+ /*panic_domain(NULL,
+ "Can't LHF write with size %ld!\n",
+ length);*/
+ panic_vm(v);
+ else
+ vlsapic_write_ipi(v, addr, val);
+ } else { /* upper half
+ printk("IPI-UHF write %lx\n",addr);*/
+ panic_vm(v);
+ }
+ break;
+ }
+}
+
+unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
+ unsigned long length)
+{
+ uint64_t result = 0;
+
+ addr &= (PIB_SIZE - 1);
+
+ switch (addr) {
+ case PIB_OFST_INTA:
+ if (length == 1) /* 1 byte load */
+ ; /* There is no i8259, there is no INTA access*/
+ else
+ /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
+ panic_vm(v);
+
+ break;
+ case PIB_OFST_XTP:
+ if (length == 1) {
+ result = VLSAPIC_XTP(v);
+ /* printk("read xtp %lx\n", result); */
+ } else {
+ /*panic_domain(NULL,
+ "Undefined read on PIB XTP\n");*/
+ panic_vm(v);
+ }
+ break;
+ default:
+ panic_vm(v);
+ break;
+ }
+ return result;
+}
+
+static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
+ u16 s, int ma, int dir)
+{
+ unsigned long iot;
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+ unsigned long psr;
+
+ iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
+
+ local_irq_save(psr);
+
+ /*Intercept the acces for PIB range*/
+ if (iot == GPFN_PIB) {
+ if (!dir)
+ lsapic_write(vcpu, src_pa, s, *dest);
+ else
+ *dest = lsapic_read(vcpu, src_pa, s);
+ goto out;
+ }
+ p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
+ p->u.ioreq.addr = src_pa;
+ p->u.ioreq.size = s;
+ p->u.ioreq.dir = dir;
+ if (dir == IOREQ_WRITE)
+ p->u.ioreq.data = *dest;
+ p->u.ioreq.state = STATE_IOREQ_READY;
+ vmm_transition(vcpu);
+
+ if (p->u.ioreq.state == STATE_IORESP_READY) {
+ if (dir == IOREQ_READ)
+ *dest = p->u.ioreq.data;
+ } else
+ panic_vm(vcpu);
+out:
+ local_irq_restore(psr);
+ return ;
+}
+
+/*
+ dir 1: read 0:write
+ inst_type 0:integer 1:floating point
+ */
+#define SL_INTEGER 0 /* store/load interger*/
+#define SL_FLOATING 1 /* store/load floating*/
+
+void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
+{
+ struct kvm_pt_regs *regs;
+ IA64_BUNDLE bundle;
+ int slot, dir = 0;
+ int inst_type = -1;
+ u16 size = 0;
+ u64 data, slot1a, slot1b, temp, update_reg;
+ s32 imm;
+ INST64 inst;
+
+ regs = vcpu_regs(vcpu);
+
+ if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
+ /* if fetch code fail, return and try again */
+ return;
+ }
+ slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+ if (!slot)
+ inst.inst = bundle.slot0;
+ else if (slot == 1) {
+ slot1a = bundle.slot1a;
+ slot1b = bundle.slot1b;
+ inst.inst = slot1a + (slot1b << 18);
+ } else if (slot == 2)
+ inst.inst = bundle.slot2;
+
+ /* Integer Load/Store */
+ if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
+ inst_type = SL_INTEGER;
+ size = (inst.M1.x6 & 0x3);
+ if ((inst.M1.x6 >> 2) > 0xb) {
+ /*write*/
+ dir = IOREQ_WRITE;
+ data = vcpu_get_gr(vcpu, inst.M4.r2);
+ } else if ((inst.M1.x6 >> 2) < 0xb) {
+ /*read*/
+ dir = IOREQ_READ;
+ }
+ } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
+ /* Integer Load + Reg update */
+ inst_type = SL_INTEGER;
+ dir = IOREQ_READ;
+ size = (inst.M2.x6 & 0x3);
+ temp = vcpu_get_gr(vcpu, inst.M2.r3);
+ update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
+ temp += update_reg;
+ vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
+ } else if (inst.M3.major == 5) {
+ /*Integer Load/Store + Imm update*/
+ inst_type = SL_INTEGER;
+ size = (inst.M3.x6&0x3);
+ if ((inst.M5.x6 >> 2) > 0xb) {
+ /*write*/
+ dir = IOREQ_WRITE;
+ data = vcpu_get_gr(vcpu, inst.M5.r2);
+ temp = vcpu_get_gr(vcpu, inst.M5.r3);
+ imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
+ (inst.M5.imm7 << 23);
+ temp += imm >> 23;
+ vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
+
+ } else if ((inst.M3.x6 >> 2) < 0xb) {
+ /*read*/
+ dir = IOREQ_READ;
+ temp = vcpu_get_gr(vcpu, inst.M3.r3);
+ imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
+ (inst.M3.imm7 << 23);
+ temp += imm >> 23;
+ vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
+
+ }
+ } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
+ && inst.M9.m == 0 && inst.M9.x == 0) {
+ /* Floating-point spill*/
+ struct ia64_fpreg v;
+
+ inst_type = SL_FLOATING;
+ dir = IOREQ_WRITE;
+ vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
+ /* Write high word. FIXME: this is a kludge! */
+ v.u.bits[1] &= 0x3ffff;
+ mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+ data = v.u.bits[0];
+ size = 3;
+ } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
+ /* Floating-point spill + Imm update */
+ struct ia64_fpreg v;
+
+ inst_type = SL_FLOATING;
+ dir = IOREQ_WRITE;
+ vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+ temp = vcpu_get_gr(vcpu, inst.M10.r3);
+ imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+ (inst.M10.imm7 << 23);
+ temp += imm >> 23;
+ vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+
+ /* Write high word.FIXME: this is a kludge! */
+ v.u.bits[1] &= 0x3ffff;
+ mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+ data = v.u.bits[0];
+ size = 3;
+ } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
+ /* Floating-point stf8 + Imm update */
+ struct ia64_fpreg v;
+ inst_type = SL_FLOATING;
+ dir = IOREQ_WRITE;
+ size = 3;
+ vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+ data = v.u.bits[0]; /* Significand. */
+ temp = vcpu_get_gr(vcpu, inst.M10.r3);
+ imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+ (inst.M10.imm7 << 23);
+ temp += imm >> 23;
+ vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+ } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
+ && inst.M15.x6 <= 0x2f) {
+ temp = vcpu_get_gr(vcpu, inst.M15.r3);
+ imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
+ (inst.M15.imm7 << 23);
+ temp += imm >> 23;
+ vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
+
+ vcpu_increment_iip(vcpu);
+ return;
+ } else if (inst.M12.major == 6 && inst.M12.m == 1
+ && inst.M12.x == 1 && inst.M12.x6 == 1) {
+ /* Floating-point Load Pair + Imm ldfp8 M12*/
+ struct ia64_fpreg v;
+
+ inst_type = SL_FLOATING;
+ dir = IOREQ_READ;
+ size = 8; /*ldfd*/
+ mmio_access(vcpu, padr, &data, size, ma, dir);
+ v.u.bits[0] = data;
+ v.u.bits[1] = 0x1003E;
+ vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
+ padr += 8;
+ mmio_access(vcpu, padr, &data, size, ma, dir);
+ v.u.bits[0] = data;
+ v.u.bits[1] = 0x1003E;
+ vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
+ padr += 8;
+ vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
+ vcpu_increment_iip(vcpu);
+ return;
+ } else {
+ inst_type = -1;
+ panic_vm(vcpu);
+ }
+
+ size = 1 << size;
+ if (dir == IOREQ_WRITE) {
+ mmio_access(vcpu, padr, &data, size, ma, dir);
+ } else {
+ mmio_access(vcpu, padr, &data, size, ma, dir);
+ if (inst_type == SL_INTEGER)
+ vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
+ else
+ panic_vm(vcpu);
+
+ }
+ vcpu_increment_iip(vcpu);
+}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
new file mode 100644
index 000000000000..e4f15d641b22
--- /dev/null
+++ b/arch/ia64/kvm/optvfault.S
@@ -0,0 +1,918 @@
+/*
+ * arch/ia64/vmx/optvfault.S
+ * optimize virtualization fault handler
+ *
+ * Copyright (C) 2006 Intel Co
+ * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+#include "vti.h"
+#include "asm-offsets.h"
+
+#define ACCE_MOV_FROM_AR
+#define ACCE_MOV_FROM_RR
+#define ACCE_MOV_TO_RR
+#define ACCE_RSM
+#define ACCE_SSM
+#define ACCE_MOV_TO_PSR
+#define ACCE_THASH
+
+//mov r1=ar3
+GLOBAL_ENTRY(kvm_asm_mov_from_ar)
+#ifndef ACCE_MOV_FROM_AR
+ br.many kvm_virtualization_fault_back
+#endif
+ add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+ add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+ extr.u r17=r25,6,7
+ ;;
+ ld8 r18=[r18]
+ mov r19=ar.itc
+ mov r24=b0
+ ;;
+ add r19=r19,r18
+ addl r20=@gprel(asm_mov_to_reg),gp
+ ;;
+ st8 [r16] = r19
+ adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+ shladd r17=r17,4,r20
+ ;;
+ mov b0=r17
+ br.sptk.few b0
+ ;;
+END(kvm_asm_mov_from_ar)
+
+
+// mov r1=rr[r3]
+GLOBAL_ENTRY(kvm_asm_mov_from_rr)
+#ifndef ACCE_MOV_FROM_RR
+ br.many kvm_virtualization_fault_back
+#endif
+ extr.u r16=r25,20,7
+ extr.u r17=r25,6,7
+ addl r20=@gprel(asm_mov_from_reg),gp
+ ;;
+ adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
+ shladd r16=r16,4,r20
+ mov r24=b0
+ ;;
+ add r27=VMM_VCPU_VRR0_OFFSET,r21
+ mov b0=r16
+ br.many b0
+ ;;
+kvm_asm_mov_from_rr_back_1:
+ adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+ adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
+ shr.u r26=r19,61
+ ;;
+ shladd r17=r17,4,r22
+ shladd r27=r26,3,r27
+ ;;
+ ld8 r19=[r27]
+ mov b0=r17
+ br.many b0
+END(kvm_asm_mov_from_rr)
+
+
+// mov rr[r3]=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_rr)
+#ifndef ACCE_MOV_TO_RR
+ br.many kvm_virtualization_fault_back
+#endif
+ extr.u r16=r25,20,7
+ extr.u r17=r25,13,7
+ addl r20=@gprel(asm_mov_from_reg),gp
+ ;;
+ adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
+ shladd r16=r16,4,r20
+ mov r22=b0
+ ;;
+ add r27=VMM_VCPU_VRR0_OFFSET,r21
+ mov b0=r16
+ br.many b0
+ ;;
+kvm_asm_mov_to_rr_back_1:
+ adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
+ shr.u r23=r19,61
+ shladd r17=r17,4,r20
+ ;;
+ //if rr6, go back
+ cmp.eq p6,p0=6,r23
+ mov b0=r22
+ (p6) br.cond.dpnt.many kvm_virtualization_fault_back
+ ;;
+ mov r28=r19
+ mov b0=r17
+ br.many b0
+kvm_asm_mov_to_rr_back_2:
+ adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+ shladd r27=r23,3,r27
+ ;; // vrr.rid<<4 |0xe
+ st8 [r27]=r19
+ mov b0=r30
+ ;;
+ extr.u r16=r19,8,26
+ extr.u r18 =r19,2,6
+ mov r17 =0xe
+ ;;
+ shladd r16 = r16, 4, r17
+ extr.u r19 =r19,0,8
+ ;;
+ shl r16 = r16,8
+ ;;
+ add r19 = r19, r16
+ ;; //set ve 1
+ dep r19=-1,r19,0,1
+ cmp.lt p6,p0=14,r18
+ ;;
+ (p6) mov r18=14
+ ;;
+ (p6) dep r19=r18,r19,2,6
+ ;;
+ cmp.eq p6,p0=0,r23
+ ;;
+ cmp.eq.or p6,p0=4,r23
+ ;;
+ adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+ (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+ ;;
+ ld4 r16=[r16]
+ cmp.eq p7,p0=r0,r0
+ (p6) shladd r17=r23,1,r17
+ ;;
+ (p6) st8 [r17]=r19
+ (p6) tbit.nz p6,p7=r16,0
+ ;;
+ (p7) mov rr[r28]=r19
+ mov r24=r22
+ br.many b0
+END(kvm_asm_mov_to_rr)
+
+
+//rsm
+GLOBAL_ENTRY(kvm_asm_rsm)
+#ifndef ACCE_RSM
+ br.many kvm_virtualization_fault_back
+#endif
+ add r16=VMM_VPD_BASE_OFFSET,r21
+ extr.u r26=r25,6,21
+ extr.u r27=r25,31,2
+ ;;
+ ld8 r16=[r16]
+ extr.u r28=r25,36,1
+ dep r26=r27,r26,21,2
+ ;;
+ add r17=VPD_VPSR_START_OFFSET,r16
+ add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+ //r26 is imm24
+ dep r26=r28,r26,23,1
+ ;;
+ ld8 r18=[r17]
+ movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
+ ld4 r23=[r22]
+ sub r27=-1,r26
+ mov r24=b0
+ ;;
+ mov r20=cr.ipsr
+ or r28=r27,r28
+ and r19=r18,r27
+ ;;
+ st8 [r17]=r19
+ and r20=r20,r28
+ /* Comment it out due to short of fp lazy alorgithm support
+ adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+ ;;
+ ld8 r27=[r27]
+ ;;
+ tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
+ ;;
+ (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+ */
+ ;;
+ mov cr.ipsr=r20
+ tbit.nz p6,p0=r23,0
+ ;;
+ tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
+ (p6) br.dptk kvm_resume_to_guest
+ ;;
+ add r26=VMM_VCPU_META_RR0_OFFSET,r21
+ add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+ dep r23=-1,r23,0,1
+ ;;
+ ld8 r26=[r26]
+ ld8 r27=[r27]
+ st4 [r22]=r23
+ dep.z r28=4,61,3
+ ;;
+ mov rr[r0]=r26
+ ;;
+ mov rr[r28]=r27
+ ;;
+ srlz.d
+ br.many kvm_resume_to_guest
+END(kvm_asm_rsm)
+
+
+//ssm
+GLOBAL_ENTRY(kvm_asm_ssm)
+#ifndef ACCE_SSM
+ br.many kvm_virtualization_fault_back
+#endif
+ add r16=VMM_VPD_BASE_OFFSET,r21
+ extr.u r26=r25,6,21
+ extr.u r27=r25,31,2
+ ;;
+ ld8 r16=[r16]
+ extr.u r28=r25,36,1
+ dep r26=r27,r26,21,2
+ ;; //r26 is imm24
+ add r27=VPD_VPSR_START_OFFSET,r16
+ dep r26=r28,r26,23,1
+ ;; //r19 vpsr
+ ld8 r29=[r27]
+ mov r24=b0
+ ;;
+ add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+ mov r20=cr.ipsr
+ or r19=r29,r26
+ ;;
+ ld4 r23=[r22]
+ st8 [r27]=r19
+ or r20=r20,r26
+ ;;
+ mov cr.ipsr=r20
+ movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+ ;;
+ and r19=r28,r19
+ tbit.z p6,p0=r23,0
+ ;;
+ cmp.ne.or p6,p0=r28,r19
+ (p6) br.dptk kvm_asm_ssm_1
+ ;;
+ add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+ add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+ dep r23=0,r23,0,1
+ ;;
+ ld8 r26=[r26]
+ ld8 r27=[r27]
+ st4 [r22]=r23
+ dep.z r28=4,61,3
+ ;;
+ mov rr[r0]=r26
+ ;;
+ mov rr[r28]=r27
+ ;;
+ srlz.d
+ ;;
+kvm_asm_ssm_1:
+ tbit.nz p6,p0=r29,IA64_PSR_I_BIT
+ ;;
+ tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
+ (p6) br.dptk kvm_resume_to_guest
+ ;;
+ add r29=VPD_VTPR_START_OFFSET,r16
+ add r30=VPD_VHPI_START_OFFSET,r16
+ ;;
+ ld8 r29=[r29]
+ ld8 r30=[r30]
+ ;;
+ extr.u r17=r29,4,4
+ extr.u r18=r29,16,1
+ ;;
+ dep r17=r18,r17,4,1
+ ;;
+ cmp.gt p6,p0=r30,r17
+ (p6) br.dpnt.few kvm_asm_dispatch_vexirq
+ br.many kvm_resume_to_guest
+END(kvm_asm_ssm)
+
+
+//mov psr.l=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_psr)
+#ifndef ACCE_MOV_TO_PSR
+ br.many kvm_virtualization_fault_back
+#endif
+ add r16=VMM_VPD_BASE_OFFSET,r21
+ extr.u r26=r25,13,7 //r2
+ ;;
+ ld8 r16=[r16]
+ addl r20=@gprel(asm_mov_from_reg),gp
+ ;;
+ adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
+ shladd r26=r26,4,r20
+ mov r24=b0
+ ;;
+ add r27=VPD_VPSR_START_OFFSET,r16
+ mov b0=r26
+ br.many b0
+ ;;
+kvm_asm_mov_to_psr_back:
+ ld8 r17=[r27]
+ add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+ dep r19=0,r19,32,32
+ ;;
+ ld4 r23=[r22]
+ dep r18=0,r17,0,32
+ ;;
+ add r30=r18,r19
+ movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+ ;;
+ st8 [r27]=r30
+ and r27=r28,r30
+ and r29=r28,r17
+ ;;
+ cmp.eq p5,p0=r29,r27
+ cmp.eq p6,p7=r28,r27
+ (p5) br.many kvm_asm_mov_to_psr_1
+ ;;
+ //virtual to physical
+ (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
+ (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+ (p7) dep r23=-1,r23,0,1
+ ;;
+ //physical to virtual
+ (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+ (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+ (p6) dep r23=0,r23,0,1
+ ;;
+ ld8 r26=[r26]
+ ld8 r27=[r27]
+ st4 [r22]=r23
+ dep.z r28=4,61,3
+ ;;
+ mov rr[r0]=r26
+ ;;
+ mov rr[r28]=r27
+ ;;
+ srlz.d
+ ;;
+kvm_asm_mov_to_psr_1:
+ mov r20=cr.ipsr
+ movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
+ ;;
+ or r19=r19,r28
+ dep r20=0,r20,0,32
+ ;;
+ add r20=r19,r20
+ mov b0=r24
+ ;;
+ /* Comment it out due to short of fp lazy algorithm support
+ adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+ ;;
+ ld8 r27=[r27]
+ ;;
+ tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
+ ;;
+ (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+ ;;
+ */
+ mov cr.ipsr=r20
+ cmp.ne p6,p0=r0,r0
+ ;;
+ tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
+ tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
+ (p6) br.dpnt.few kvm_resume_to_guest
+ ;;
+ add r29=VPD_VTPR_START_OFFSET,r16
+ add r30=VPD_VHPI_START_OFFSET,r16
+ ;;
+ ld8 r29=[r29]
+ ld8 r30=[r30]
+ ;;
+ extr.u r17=r29,4,4
+ extr.u r18=r29,16,1
+ ;;
+ dep r17=r18,r17,4,1
+ ;;
+ cmp.gt p6,p0=r30,r17
+ (p6) br.dpnt.few kvm_asm_dispatch_vexirq
+ br.many kvm_resume_to_guest
+END(kvm_asm_mov_to_psr)
+
+
+ENTRY(kvm_asm_dispatch_vexirq)
+//increment iip
+ mov r16=cr.ipsr
+ ;;
+ extr.u r17=r16,IA64_PSR_RI_BIT,2
+ tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+ ;;
+ (p6) mov r18=cr.iip
+ (p6) mov r17=r0
+ (p7) add r17=1,r17
+ ;;
+ (p6) add r18=0x10,r18
+ dep r16=r17,r16,IA64_PSR_RI_BIT,2
+ ;;
+ (p6) mov cr.iip=r18
+ mov cr.ipsr=r16
+ mov r30 =1
+ br.many kvm_dispatch_vexirq
+END(kvm_asm_dispatch_vexirq)
+
+// thash
+// TODO: add support when pta.vf = 1
+GLOBAL_ENTRY(kvm_asm_thash)
+#ifndef ACCE_THASH
+ br.many kvm_virtualization_fault_back
+#endif
+ extr.u r17=r25,20,7 // get r3 from opcode in r25
+ extr.u r18=r25,6,7 // get r1 from opcode in r25
+ addl r20=@gprel(asm_mov_from_reg),gp
+ ;;
+ adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
+ shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17)
+ adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs
+ ;;
+ mov r24=b0
+ ;;
+ ld8 r16=[r16] // get VPD addr
+ mov b0=r17
+ br.many b0 // r19 return value
+ ;;
+kvm_asm_thash_back1:
+ shr.u r23=r19,61 // get RR number
+ adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr
+ adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta
+ ;;
+ shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr
+ ld8 r17=[r16] // get PTA
+ mov r26=1
+ ;;
+ extr.u r29=r17,2,6 // get pta.size
+ ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value
+ ;;
+ extr.u r25=r25,2,6 // get rr.ps
+ shl r22=r26,r29 // 1UL << pta.size
+ ;;
+ shr.u r23=r19,r25 // vaddr >> rr.ps
+ adds r26=3,r29 // pta.size + 3
+ shl r27=r17,3 // pta << 3
+ ;;
+ shl r23=r23,3 // (vaddr >> rr.ps) << 3
+ shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3)
+ movl r16=7<<61
+ ;;
+ adds r22=-1,r22 // (1UL << pta.size) - 1
+ shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<<pta.size
+ and r19=r19,r16 // vaddr & VRN_MASK
+ ;;
+ and r22=r22,r23 // vhpt_offset
+ or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
+ adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
+ ;;
+ or r19=r19,r22 // calc pval
+ shladd r17=r18,4,r26
+ adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+ ;;
+ mov b0=r17
+ br.many b0
+END(kvm_asm_thash)
+
+#define MOV_TO_REG0 \
+{; \
+ nop.b 0x0; \
+ nop.b 0x0; \
+ nop.b 0x0; \
+ ;; \
+};
+
+
+#define MOV_TO_REG(n) \
+{; \
+ mov r##n##=r19; \
+ mov b0=r30; \
+ br.sptk.many b0; \
+ ;; \
+};
+
+
+#define MOV_FROM_REG(n) \
+{; \
+ mov r19=r##n##; \
+ mov b0=r30; \
+ br.sptk.many b0; \
+ ;; \
+};
+
+
+#define MOV_TO_BANK0_REG(n) \
+ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##); \
+{; \
+ mov r26=r2; \
+ mov r2=r19; \
+ bsw.1; \
+ ;; \
+}; \
+{; \
+ mov r##n##=r2; \
+ nop.b 0x0; \
+ bsw.0; \
+ ;; \
+}; \
+{; \
+ mov r2=r26; \
+ mov b0=r30; \
+ br.sptk.many b0; \
+ ;; \
+}; \
+END(asm_mov_to_bank0_reg##n##)
+
+
+#define MOV_FROM_BANK0_REG(n) \
+ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##); \
+{; \
+ mov r26=r2; \
+ nop.b 0x0; \
+ bsw.1; \
+ ;; \
+}; \
+{; \
+ mov r2=r##n##; \
+ nop.b 0x0; \
+ bsw.0; \
+ ;; \
+}; \
+{; \
+ mov r19=r2; \
+ mov r2=r26; \
+ mov b0=r30; \
+}; \
+{; \
+ nop.b 0x0; \
+ nop.b 0x0; \
+ br.sptk.many b0; \
+ ;; \
+}; \
+END(asm_mov_from_bank0_reg##n##)
+
+
+#define JMP_TO_MOV_TO_BANK0_REG(n) \
+{; \
+ nop.b 0x0; \
+ nop.b 0x0; \
+ br.sptk.many asm_mov_to_bank0_reg##n##; \
+ ;; \
+}
+
+
+#define JMP_TO_MOV_FROM_BANK0_REG(n) \
+{; \
+ nop.b 0x0; \
+ nop.b 0x0; \
+ br.sptk.many asm_mov_from_bank0_reg##n##; \
+ ;; \
+}
+
+
+MOV_FROM_BANK0_REG(16)
+MOV_FROM_BANK0_REG(17)
+MOV_FROM_BANK0_REG(18)
+MOV_FROM_BANK0_REG(19)
+MOV_FROM_BANK0_REG(20)
+MOV_FROM_BANK0_REG(21)
+MOV_FROM_BANK0_REG(22)
+MOV_FROM_BANK0_REG(23)
+MOV_FROM_BANK0_REG(24)
+MOV_FROM_BANK0_REG(25)
+MOV_FROM_BANK0_REG(26)
+MOV_FROM_BANK0_REG(27)
+MOV_FROM_BANK0_REG(28)
+MOV_FROM_BANK0_REG(29)
+MOV_FROM_BANK0_REG(30)
+MOV_FROM_BANK0_REG(31)
+
+
+// mov from reg table
+ENTRY(asm_mov_from_reg)
+ MOV_FROM_REG(0)
+ MOV_FROM_REG(1)
+ MOV_FROM_REG(2)
+ MOV_FROM_REG(3)
+ MOV_FROM_REG(4)
+ MOV_FROM_REG(5)
+ MOV_FROM_REG(6)
+ MOV_FROM_REG(7)
+ MOV_FROM_REG(8)
+ MOV_FROM_REG(9)
+ MOV_FROM_REG(10)
+ MOV_FROM_REG(11)
+ MOV_FROM_REG(12)
+ MOV_FROM_REG(13)
+ MOV_FROM_REG(14)
+ MOV_FROM_REG(15)
+ JMP_TO_MOV_FROM_BANK0_REG(16)
+ JMP_TO_MOV_FROM_BANK0_REG(17)
+ JMP_TO_MOV_FROM_BANK0_REG(18)
+ JMP_TO_MOV_FROM_BANK0_REG(19)
+ JMP_TO_MOV_FROM_BANK0_REG(20)
+ JMP_TO_MOV_FROM_BANK0_REG(21)
+ JMP_TO_MOV_FROM_BANK0_REG(22)
+ JMP_TO_MOV_FROM_BANK0_REG(23)
+ JMP_TO_MOV_FROM_BANK0_REG(24)
+ JMP_TO_MOV_FROM_BANK0_REG(25)
+ JMP_TO_MOV_FROM_BANK0_REG(26)
+ JMP_TO_MOV_FROM_BANK0_REG(27)
+ JMP_TO_MOV_FROM_BANK0_REG(28)
+ JMP_TO_MOV_FROM_BANK0_REG(29)
+ JMP_TO_MOV_FROM_BANK0_REG(30)
+ JMP_TO_MOV_FROM_BANK0_REG(31)
+ MOV_FROM_REG(32)
+ MOV_FROM_REG(33)
+ MOV_FROM_REG(34)
+ MOV_FROM_REG(35)
+ MOV_FROM_REG(36)
+ MOV_FROM_REG(37)
+ MOV_FROM_REG(38)
+ MOV_FROM_REG(39)
+ MOV_FROM_REG(40)
+ MOV_FROM_REG(41)
+ MOV_FROM_REG(42)
+ MOV_FROM_REG(43)
+ MOV_FROM_REG(44)
+ MOV_FROM_REG(45)
+ MOV_FROM_REG(46)
+ MOV_FROM_REG(47)
+ MOV_FROM_REG(48)
+ MOV_FROM_REG(49)
+ MOV_FROM_REG(50)
+ MOV_FROM_REG(51)
+ MOV_FROM_REG(52)
+ MOV_FROM_REG(53)
+ MOV_FROM_REG(54)
+ MOV_FROM_REG(55)
+ MOV_FROM_REG(56)
+ MOV_FROM_REG(57)
+ MOV_FROM_REG(58)
+ MOV_FROM_REG(59)
+ MOV_FROM_REG(60)
+ MOV_FROM_REG(61)
+ MOV_FROM_REG(62)
+ MOV_FROM_REG(63)
+ MOV_FROM_REG(64)
+ MOV_FROM_REG(65)
+ MOV_FROM_REG(66)
+ MOV_FROM_REG(67)
+ MOV_FROM_REG(68)
+ MOV_FROM_REG(69)
+ MOV_FROM_REG(70)
+ MOV_FROM_REG(71)
+ MOV_FROM_REG(72)
+ MOV_FROM_REG(73)
+ MOV_FROM_REG(74)
+ MOV_FROM_REG(75)
+ MOV_FROM_REG(76)
+ MOV_FROM_REG(77)
+ MOV_FROM_REG(78)
+ MOV_FROM_REG(79)
+ MOV_FROM_REG(80)
+ MOV_FROM_REG(81)
+ MOV_FROM_REG(82)
+ MOV_FROM_REG(83)
+ MOV_FROM_REG(84)
+ MOV_FROM_REG(85)
+ MOV_FROM_REG(86)
+ MOV_FROM_REG(87)
+ MOV_FROM_REG(88)
+ MOV_FROM_REG(89)
+ MOV_FROM_REG(90)
+ MOV_FROM_REG(91)
+ MOV_FROM_REG(92)
+ MOV_FROM_REG(93)
+ MOV_FROM_REG(94)
+ MOV_FROM_REG(95)
+ MOV_FROM_REG(96)
+ MOV_FROM_REG(97)
+ MOV_FROM_REG(98)
+ MOV_FROM_REG(99)
+ MOV_FROM_REG(100)
+ MOV_FROM_REG(101)
+ MOV_FROM_REG(102)
+ MOV_FROM_REG(103)
+ MOV_FROM_REG(104)
+ MOV_FROM_REG(105)
+ MOV_FROM_REG(106)
+ MOV_FROM_REG(107)
+ MOV_FROM_REG(108)
+ MOV_FROM_REG(109)
+ MOV_FROM_REG(110)
+ MOV_FROM_REG(111)
+ MOV_FROM_REG(112)
+ MOV_FROM_REG(113)
+ MOV_FROM_REG(114)
+ MOV_FROM_REG(115)
+ MOV_FROM_REG(116)
+ MOV_FROM_REG(117)
+ MOV_FROM_REG(118)
+ MOV_FROM_REG(119)
+ MOV_FROM_REG(120)
+ MOV_FROM_REG(121)
+ MOV_FROM_REG(122)
+ MOV_FROM_REG(123)
+ MOV_FROM_REG(124)
+ MOV_FROM_REG(125)
+ MOV_FROM_REG(126)
+ MOV_FROM_REG(127)
+END(asm_mov_from_reg)
+
+
+/* must be in bank 0
+ * parameter:
+ * r31: pr
+ * r24: b0
+ */
+ENTRY(kvm_resume_to_guest)
+ adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+ ld8 r1 =[r16]
+ adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
+ ;;
+ mov r16=cr.ipsr
+ ;;
+ ld8 r20 = [r20]
+ adds r19=VMM_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r25=[r19]
+ extr.u r17=r16,IA64_PSR_RI_BIT,2
+ tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+ ;;
+ (p6) mov r18=cr.iip
+ (p6) mov r17=r0
+ ;;
+ (p6) add r18=0x10,r18
+ (p7) add r17=1,r17
+ ;;
+ (p6) mov cr.iip=r18
+ dep r16=r17,r16,IA64_PSR_RI_BIT,2
+ ;;
+ mov cr.ipsr=r16
+ adds r19= VPD_VPSR_START_OFFSET,r25
+ add r28=PAL_VPS_RESUME_NORMAL,r20
+ add r29=PAL_VPS_RESUME_HANDLER,r20
+ ;;
+ ld8 r19=[r19]
+ mov b0=r29
+ cmp.ne p6,p7 = r0,r0
+ ;;
+ tbit.z p6,p7 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
+ ;;
+ (p6) ld8 r26=[r25]
+ (p7) mov b0=r28
+ mov pr=r31,-2
+ br.sptk.many b0 // call pal service
+ ;;
+END(kvm_resume_to_guest)
+
+
+MOV_TO_BANK0_REG(16)
+MOV_TO_BANK0_REG(17)
+MOV_TO_BANK0_REG(18)
+MOV_TO_BANK0_REG(19)
+MOV_TO_BANK0_REG(20)
+MOV_TO_BANK0_REG(21)
+MOV_TO_BANK0_REG(22)
+MOV_TO_BANK0_REG(23)
+MOV_TO_BANK0_REG(24)
+MOV_TO_BANK0_REG(25)
+MOV_TO_BANK0_REG(26)
+MOV_TO_BANK0_REG(27)
+MOV_TO_BANK0_REG(28)
+MOV_TO_BANK0_REG(29)
+MOV_TO_BANK0_REG(30)
+MOV_TO_BANK0_REG(31)
+
+
+// mov to reg table
+ENTRY(asm_mov_to_reg)
+ MOV_TO_REG0
+ MOV_TO_REG(1)
+ MOV_TO_REG(2)
+ MOV_TO_REG(3)
+ MOV_TO_REG(4)
+ MOV_TO_REG(5)
+ MOV_TO_REG(6)
+ MOV_TO_REG(7)
+ MOV_TO_REG(8)
+ MOV_TO_REG(9)
+ MOV_TO_REG(10)
+ MOV_TO_REG(11)
+ MOV_TO_REG(12)
+ MOV_TO_REG(13)
+ MOV_TO_REG(14)
+ MOV_TO_REG(15)
+ JMP_TO_MOV_TO_BANK0_REG(16)
+ JMP_TO_MOV_TO_BANK0_REG(17)
+ JMP_TO_MOV_TO_BANK0_REG(18)
+ JMP_TO_MOV_TO_BANK0_REG(19)
+ JMP_TO_MOV_TO_BANK0_REG(20)
+ JMP_TO_MOV_TO_BANK0_REG(21)
+ JMP_TO_MOV_TO_BANK0_REG(22)
+ JMP_TO_MOV_TO_BANK0_REG(23)
+ JMP_TO_MOV_TO_BANK0_REG(24)
+ JMP_TO_MOV_TO_BANK0_REG(25)
+ JMP_TO_MOV_TO_BANK0_REG(26)
+ JMP_TO_MOV_TO_BANK0_REG(27)
+ JMP_TO_MOV_TO_BANK0_REG(28)
+ JMP_TO_MOV_TO_BANK0_REG(29)
+ JMP_TO_MOV_TO_BANK0_REG(30)
+ JMP_TO_MOV_TO_BANK0_REG(31)
+ MOV_TO_REG(32)
+ MOV_TO_REG(33)
+ MOV_TO_REG(34)
+ MOV_TO_REG(35)
+ MOV_TO_REG(36)
+ MOV_TO_REG(37)
+ MOV_TO_REG(38)
+ MOV_TO_REG(39)
+ MOV_TO_REG(40)
+ MOV_TO_REG(41)
+ MOV_TO_REG(42)
+ MOV_TO_REG(43)
+ MOV_TO_REG(44)
+ MOV_TO_REG(45)
+ MOV_TO_REG(46)
+ MOV_TO_REG(47)
+ MOV_TO_REG(48)
+ MOV_TO_REG(49)
+ MOV_TO_REG(50)
+ MOV_TO_REG(51)
+ MOV_TO_REG(52)
+ MOV_TO_REG(53)
+ MOV_TO_REG(54)
+ MOV_TO_REG(55)
+ MOV_TO_REG(56)
+ MOV_TO_REG(57)
+ MOV_TO_REG(58)
+ MOV_TO_REG(59)
+ MOV_TO_REG(60)
+ MOV_TO_REG(61)
+ MOV_TO_REG(62)
+ MOV_TO_REG(63)
+ MOV_TO_REG(64)
+ MOV_TO_REG(65)
+ MOV_TO_REG(66)
+ MOV_TO_REG(67)
+ MOV_TO_REG(68)
+ MOV_TO_REG(69)
+ MOV_TO_REG(70)
+ MOV_TO_REG(71)
+ MOV_TO_REG(72)
+ MOV_TO_REG(73)
+ MOV_TO_REG(74)
+ MOV_TO_REG(75)
+ MOV_TO_REG(76)
+ MOV_TO_REG(77)
+ MOV_TO_REG(78)
+ MOV_TO_REG(79)
+ MOV_TO_REG(80)
+ MOV_TO_REG(81)
+ MOV_TO_REG(82)
+ MOV_TO_REG(83)
+ MOV_TO_REG(84)
+ MOV_TO_REG(85)
+ MOV_TO_REG(86)
+ MOV_TO_REG(87)
+ MOV_TO_REG(88)
+ MOV_TO_REG(89)
+ MOV_TO_REG(90)
+ MOV_TO_REG(91)
+ MOV_TO_REG(92)
+ MOV_TO_REG(93)
+ MOV_TO_REG(94)
+ MOV_TO_REG(95)
+ MOV_TO_REG(96)
+ MOV_TO_REG(97)
+ MOV_TO_REG(98)
+ MOV_TO_REG(99)
+ MOV_TO_REG(100)
+ MOV_TO_REG(101)
+ MOV_TO_REG(102)
+ MOV_TO_REG(103)
+ MOV_TO_REG(104)
+ MOV_TO_REG(105)
+ MOV_TO_REG(106)
+ MOV_TO_REG(107)
+ MOV_TO_REG(108)
+ MOV_TO_REG(109)
+ MOV_TO_REG(110)
+ MOV_TO_REG(111)
+ MOV_TO_REG(112)
+ MOV_TO_REG(113)
+ MOV_TO_REG(114)
+ MOV_TO_REG(115)
+ MOV_TO_REG(116)
+ MOV_TO_REG(117)
+ MOV_TO_REG(118)
+ MOV_TO_REG(119)
+ MOV_TO_REG(120)
+ MOV_TO_REG(121)
+ MOV_TO_REG(122)
+ MOV_TO_REG(123)
+ MOV_TO_REG(124)
+ MOV_TO_REG(125)
+ MOV_TO_REG(126)
+ MOV_TO_REG(127)
+END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
new file mode 100644
index 000000000000..5a33f7ed29a0
--- /dev/null
+++ b/arch/ia64/kvm/process.c
@@ -0,0 +1,970 @@
+/*
+ * process.c: handle interruption inject for guests.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Shaofan Li (Susue Li) <susie.li@intel.com>
+ * Xiaoyan Feng (Fleming Feng) <fleming.feng@intel.com>
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ * Xiantao Zhang (xiantao.zhang@intel.com)
+ */
+#include "vcpu.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/fpswa.h>
+#include <asm/kregs.h>
+#include <asm/tlb.h>
+
+fpswa_interface_t *vmm_fpswa_interface;
+
+#define IA64_VHPT_TRANS_VECTOR 0x0000
+#define IA64_INST_TLB_VECTOR 0x0400
+#define IA64_DATA_TLB_VECTOR 0x0800
+#define IA64_ALT_INST_TLB_VECTOR 0x0c00
+#define IA64_ALT_DATA_TLB_VECTOR 0x1000
+#define IA64_DATA_NESTED_TLB_VECTOR 0x1400
+#define IA64_INST_KEY_MISS_VECTOR 0x1800
+#define IA64_DATA_KEY_MISS_VECTOR 0x1c00
+#define IA64_DIRTY_BIT_VECTOR 0x2000
+#define IA64_INST_ACCESS_BIT_VECTOR 0x2400
+#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800
+#define IA64_BREAK_VECTOR 0x2c00
+#define IA64_EXTINT_VECTOR 0x3000
+#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000
+#define IA64_KEY_PERMISSION_VECTOR 0x5100
+#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200
+#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300
+#define IA64_GENEX_VECTOR 0x5400
+#define IA64_DISABLED_FPREG_VECTOR 0x5500
+#define IA64_NAT_CONSUMPTION_VECTOR 0x5600
+#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */
+#define IA64_DEBUG_VECTOR 0x5900
+#define IA64_UNALIGNED_REF_VECTOR 0x5a00
+#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00
+#define IA64_FP_FAULT_VECTOR 0x5c00
+#define IA64_FP_TRAP_VECTOR 0x5d00
+#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00
+#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00
+#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000
+
+/* SDM vol2 5.5 - IVA based interruption handling */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
+ IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \
+ IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
+
+#define DOMN_PAL_REQUEST 0x110000
+#define DOMN_SAL_REQUEST 0x110001
+
+static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
+ 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
+ 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
+ 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
+ 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
+ 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
+ 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
+ 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
+};
+
+static void collect_interruption(struct kvm_vcpu *vcpu)
+{
+ u64 ipsr;
+ u64 vdcr;
+ u64 vifs;
+ unsigned long vpsr;
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+ vpsr = vcpu_get_psr(vcpu);
+ vcpu_bsw0(vcpu);
+ if (vpsr & IA64_PSR_IC) {
+
+ /* Sync mpsr id/da/dd/ss/ed bits to vipsr
+ * since after guest do rfi, we still want these bits on in
+ * mpsr
+ */
+
+ ipsr = regs->cr_ipsr;
+ vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
+ | IA64_PSR_DD | IA64_PSR_SS
+ | IA64_PSR_ED));
+ vcpu_set_ipsr(vcpu, vpsr);
+
+ /* Currently, for trap, we do not advance IIP to next
+ * instruction. That's because we assume caller already
+ * set up IIP correctly
+ */
+
+ vcpu_set_iip(vcpu , regs->cr_iip);
+
+ /* set vifs.v to zero */
+ vifs = VCPU(vcpu, ifs);
+ vifs &= ~IA64_IFS_V;
+ vcpu_set_ifs(vcpu, vifs);
+
+ vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
+ }
+
+ vdcr = VCPU(vcpu, dcr);
+
+ /* Set guest psr
+ * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
+ * be: set to the value of dcr.be
+ * pp: set to the value of dcr.pp
+ */
+ vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
+ vpsr |= (vdcr & IA64_DCR_BE);
+
+ /* VDCR pp bit position is different from VPSR pp bit */
+ if (vdcr & IA64_DCR_PP) {
+ vpsr |= IA64_PSR_PP;
+ } else {
+ vpsr &= ~IA64_PSR_PP;;
+ }
+
+ vcpu_set_psr(vcpu, vpsr);
+
+}
+
+void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
+{
+ u64 viva;
+ struct kvm_pt_regs *regs;
+ union ia64_isr pt_isr;
+
+ regs = vcpu_regs(vcpu);
+
+ /* clear cr.isr.ir (incomplete register frame)*/
+ pt_isr.val = VMX(vcpu, cr_isr);
+ pt_isr.ir = 0;
+ VMX(vcpu, cr_isr) = pt_isr.val;
+
+ collect_interruption(vcpu);
+
+ viva = vcpu_get_iva(vcpu);
+ regs->cr_iip = viva + vec;
+}
+
+static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
+{
+ union ia64_rr rr, rr1;
+
+ rr.val = vcpu_get_rr(vcpu, ifa);
+ rr1.val = 0;
+ rr1.ps = rr.ps;
+ rr1.rid = rr.rid;
+ return (rr1.val);
+}
+
+
+/*
+ * Set vIFA & vITIR & vIHA, when vPSR.ic =1
+ * Parameter:
+ * set_ifa: if true, set vIFA
+ * set_itir: if true, set vITIR
+ * set_iha: if true, set vIHA
+ */
+void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
+ int set_ifa, int set_itir, int set_iha)
+{
+ long vpsr;
+ u64 value;
+
+ vpsr = VCPU(vcpu, vpsr);
+ /* Vol2, Table 8-1 */
+ if (vpsr & IA64_PSR_IC) {
+ if (set_ifa)
+ vcpu_set_ifa(vcpu, vadr);
+ if (set_itir) {
+ value = vcpu_get_itir_on_fault(vcpu, vadr);
+ vcpu_set_itir(vcpu, value);
+ }
+
+ if (set_iha) {
+ value = vcpu_thash(vcpu, vadr);
+ vcpu_set_iha(vcpu, value);
+ }
+ }
+}
+
+/*
+ * Data TLB Fault
+ * @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA */
+ set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
+}
+
+/*
+ * Instruction TLB Fault
+ * @ Instruction TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA */
+ set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
+}
+
+
+
+/*
+ * Data Nested TLB Fault
+ * @ Data Nested TLB Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void nested_dtlb(struct kvm_vcpu *vcpu)
+{
+ inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
+}
+
+/*
+ * Alternate Data TLB Fault
+ * @ Alternate Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
+}
+
+
+/*
+ * Data TLB Fault
+ * @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
+}
+
+/* Deal with:
+ * VHPT Translation Vector
+ */
+static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR, IHA*/
+ set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+ inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
+
+
+}
+
+/*
+ * VHPT Instruction Fault
+ * @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ _vhpt_fault(vcpu, vadr);
+}
+
+
+/*
+ * VHPT Data Fault
+ * @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ _vhpt_fault(vcpu, vadr);
+}
+
+
+
+/*
+ * Deal with:
+ * General Exception vector
+ */
+void _general_exception(struct kvm_vcpu *vcpu)
+{
+ inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
+}
+
+
+/*
+ * Illegal Operation Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_op(struct kvm_vcpu *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Illegal Dependency Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_dep(struct kvm_vcpu *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Reserved Register/Field Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rsv_reg_field(struct kvm_vcpu *vcpu)
+{
+ _general_exception(vcpu);
+}
+/*
+ * Privileged Operation Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+
+void privilege_op(struct kvm_vcpu *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Unimplement Data Address Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void unimpl_daddr(struct kvm_vcpu *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/*
+ * Privileged Register Fault
+ * @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void privilege_reg(struct kvm_vcpu *vcpu)
+{
+ _general_exception(vcpu);
+}
+
+/* Deal with
+ * Nat consumption vector
+ * Parameter:
+ * vaddr: Optional, if t == REGISTER
+ */
+static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
+ enum tlb_miss_type t)
+{
+ /* If vPSR.ic && t == DATA/INST, IFA */
+ if (t == DATA || t == INSTRUCTION) {
+ /* IFA */
+ set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
+ }
+
+ inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
+}
+
+/*
+ * Instruction Nat Page Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ _nat_consumption_fault(vcpu, vadr, INSTRUCTION);
+}
+
+/*
+ * Register Nat Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rnat_consumption(struct kvm_vcpu *vcpu)
+{
+ _nat_consumption_fault(vcpu, 0, REGISTER);
+}
+
+/*
+ * Data Nat Page Consumption Fault
+ * @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ _nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/* Deal with
+ * Page not present vector
+ */
+static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR */
+ set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
+}
+
+
+void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ __page_not_present(vcpu, vadr);
+}
+
+
+void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ __page_not_present(vcpu, vadr);
+}
+
+
+/* Deal with
+ * Data access rights vector
+ */
+void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ /* If vPSR.ic, IFA, ITIR */
+ set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+ inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
+}
+
+fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
+ unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
+ unsigned long *ifs, struct kvm_pt_regs *regs)
+{
+ fp_state_t fp_state;
+ fpswa_ret_t ret;
+ struct kvm_vcpu *vcpu = current_vcpu;
+
+ uint64_t old_rr7 = ia64_get_rr(7UL<<61);
+
+ if (!vmm_fpswa_interface)
+ return (fpswa_ret_t) {-1, 0, 0, 0};
+
+ /*
+ * Just let fpswa driver to use hardware fp registers.
+ * No fp register is valid in memory.
+ */
+ memset(&fp_state, 0, sizeof(fp_state_t));
+
+ /*
+ * unsigned long (*EFI_FPSWA) (
+ * unsigned long trap_type,
+ * void *Bundle,
+ * unsigned long *pipsr,
+ * unsigned long *pfsr,
+ * unsigned long *pisr,
+ * unsigned long *ppreds,
+ * unsigned long *pifs,
+ * void *fp_state);
+ */
+ /*Call host fpswa interface directly to virtualize
+ *guest fpswa request!
+ */
+ ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
+ ia64_srlz_d();
+
+ ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
+ ipsr, fpsr, isr, pr, ifs, &fp_state);
+ ia64_set_rr(7UL << 61, old_rr7);
+ ia64_srlz_d();
+ return ret;
+}
+
+/*
+ * Handle floating-point assist faults and traps for domain.
+ */
+unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
+ unsigned long isr)
+{
+ struct kvm_vcpu *v = current_vcpu;
+ IA64_BUNDLE bundle;
+ unsigned long fault_ip;
+ fpswa_ret_t ret;
+
+ fault_ip = regs->cr_iip;
+ /*
+ * When the FP trap occurs, the trapping instruction is completed.
+ * If ipsr.ri == 0, there is the trapping instruction in previous
+ * bundle.
+ */
+ if (!fp_fault && (ia64_psr(regs)->ri == 0))
+ fault_ip -= 16;
+
+ if (fetch_code(v, fault_ip, &bundle))
+ return -EAGAIN;
+
+ if (!bundle.i64[0] && !bundle.i64[1])
+ return -EACCES;
+
+ ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
+ &isr, &regs->pr, &regs->cr_ifs, regs);
+ return ret.status;
+}
+
+void reflect_interruption(u64 ifa, u64 isr, u64 iim,
+ u64 vec, struct kvm_pt_regs *regs)
+{
+ u64 vector;
+ int status ;
+ struct kvm_vcpu *vcpu = current_vcpu;
+ u64 vpsr = VCPU(vcpu, vpsr);
+
+ vector = vec2off[vec];
+
+ if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
+ panic_vm(vcpu);
+ return;
+ }
+
+ switch (vec) {
+ case 32: /*IA64_FP_FAULT_VECTOR*/
+ status = vmm_handle_fpu_swa(1, regs, isr);
+ if (!status) {
+ vcpu_increment_iip(vcpu);
+ return;
+ } else if (-EAGAIN == status)
+ return;
+ break;
+ case 33: /*IA64_FP_TRAP_VECTOR*/
+ status = vmm_handle_fpu_swa(0, regs, isr);
+ if (!status)
+ return ;
+ else if (-EAGAIN == status) {
+ vcpu_decrement_iip(vcpu);
+ return ;
+ }
+ break;
+ }
+
+ VCPU(vcpu, isr) = isr;
+ VCPU(vcpu, iipa) = regs->cr_iip;
+ if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+ VCPU(vcpu, iim) = iim;
+ else
+ set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
+
+ inject_guest_interruption(vcpu, vector);
+}
+
+static void set_pal_call_data(struct kvm_vcpu *vcpu)
+{
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+ /*FIXME:For static and stacked convention, firmware
+ * has put the parameters in gr28-gr31 before
+ * break to vmm !!*/
+
+ p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28);
+ p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29);
+ p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+ p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
+ p->exit_reason = EXIT_REASON_PAL_CALL;
+}
+
+static void set_pal_call_result(struct kvm_vcpu *vcpu)
+{
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+ if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+ vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
+ vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
+ vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
+ vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
+ } else
+ panic_vm(vcpu);
+}
+
+static void set_sal_call_data(struct kvm_vcpu *vcpu)
+{
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+ p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
+ p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
+ p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
+ p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
+ p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
+ p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
+ p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
+ p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
+ p->exit_reason = EXIT_REASON_SAL_CALL;
+}
+
+static void set_sal_call_result(struct kvm_vcpu *vcpu)
+{
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+ if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+ vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
+ vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
+ vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
+ vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
+ } else
+ panic_vm(vcpu);
+}
+
+void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
+ unsigned long isr, unsigned long iim)
+{
+ struct kvm_vcpu *v = current_vcpu;
+
+ if (ia64_psr(regs)->cpl == 0) {
+ /* Allow hypercalls only when cpl = 0. */
+ if (iim == DOMN_PAL_REQUEST) {
+ set_pal_call_data(v);
+ vmm_transition(v);
+ set_pal_call_result(v);
+ vcpu_increment_iip(v);
+ return;
+ } else if (iim == DOMN_SAL_REQUEST) {
+ set_sal_call_data(v);
+ vmm_transition(v);
+ set_sal_call_result(v);
+ vcpu_increment_iip(v);
+ return;
+ }
+ }
+ reflect_interruption(ifa, isr, iim, 11, regs);
+}
+
+void check_pending_irq(struct kvm_vcpu *vcpu)
+{
+ int mask, h_pending, h_inservice;
+ u64 isr;
+ unsigned long vpsr;
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+ h_pending = highest_pending_irq(vcpu);
+ if (h_pending == NULL_VECTOR) {
+ update_vhpi(vcpu, NULL_VECTOR);
+ return;
+ }
+ h_inservice = highest_inservice_irq(vcpu);
+
+ vpsr = VCPU(vcpu, vpsr);
+ mask = irq_masked(vcpu, h_pending, h_inservice);
+ if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
+ isr = vpsr & IA64_PSR_RI;
+ update_vhpi(vcpu, h_pending);
+ reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+ } else if (mask == IRQ_MASKED_BY_INSVC) {
+ if (VCPU(vcpu, vhpi))
+ update_vhpi(vcpu, NULL_VECTOR);
+ } else {
+ /* masked by vpsr.i or vtpr.*/
+ update_vhpi(vcpu, h_pending);
+ }
+}
+
+static void generate_exirq(struct kvm_vcpu *vcpu)
+{
+ unsigned vpsr;
+ uint64_t isr;
+
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+ vpsr = VCPU(vcpu, vpsr);
+ isr = vpsr & IA64_PSR_RI;
+ if (!(vpsr & IA64_PSR_IC))
+ panic_vm(vcpu);
+ reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+}
+
+void vhpi_detection(struct kvm_vcpu *vcpu)
+{
+ uint64_t threshold, vhpi;
+ union ia64_tpr vtpr;
+ struct ia64_psr vpsr;
+
+ vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+ vtpr.val = VCPU(vcpu, tpr);
+
+ threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
+ vhpi = VCPU(vcpu, vhpi);
+ if (vhpi > threshold) {
+ /* interrupt actived*/
+ generate_exirq(vcpu);
+ }
+}
+
+
+void leave_hypervisor_tail(void)
+{
+ struct kvm_vcpu *v = current_vcpu;
+
+ if (VMX(v, timer_check)) {
+ VMX(v, timer_check) = 0;
+ if (VMX(v, itc_check)) {
+ if (vcpu_get_itc(v) > VCPU(v, itm)) {
+ if (!(VCPU(v, itv) & (1 << 16))) {
+ vcpu_pend_interrupt(v, VCPU(v, itv)
+ & 0xff);
+ VMX(v, itc_check) = 0;
+ } else {
+ v->arch.timer_pending = 1;
+ }
+ VMX(v, last_itc) = VCPU(v, itm) + 1;
+ }
+ }
+ }
+
+ rmb();
+ if (v->arch.irq_new_pending) {
+ v->arch.irq_new_pending = 0;
+ VMX(v, irq_check) = 0;
+ check_pending_irq(v);
+ return;
+ }
+ if (VMX(v, irq_check)) {
+ VMX(v, irq_check) = 0;
+ vhpi_detection(v);
+ }
+}
+
+
+static inline void handle_lds(struct kvm_pt_regs *regs)
+{
+ regs->cr_ipsr |= IA64_PSR_ED;
+}
+
+void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
+{
+ unsigned long pte;
+ union ia64_rr rr;
+
+ rr.val = ia64_get_rr(vadr);
+ pte = vadr & _PAGE_PPN_MASK;
+ pte = pte | PHY_PAGE_WB;
+ thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
+ return;
+}
+
+void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
+{
+ unsigned long vpsr;
+ int type;
+
+ u64 vhpt_adr, gppa, pteval, rr, itir;
+ union ia64_isr misr;
+ union ia64_pta vpta;
+ struct thash_data *data;
+ struct kvm_vcpu *v = current_vcpu;
+
+ vpsr = VCPU(v, vpsr);
+ misr.val = VMX(v, cr_isr);
+
+ type = vec;
+
+ if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
+ if (vec == 2) {
+ if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
+ emulate_io_inst(v, ((vadr << 1) >> 1), 4);
+ return;
+ }
+ }
+ physical_tlb_miss(v, vadr, type);
+ return;
+ }
+ data = vtlb_lookup(v, vadr, type);
+ if (data != 0) {
+ if (type == D_TLB) {
+ gppa = (vadr & ((1UL << data->ps) - 1))
+ + (data->ppn >> (data->ps - 12) << data->ps);
+ if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
+ if (data->pl >= ((regs->cr_ipsr >>
+ IA64_PSR_CPL0_BIT) & 3))
+ emulate_io_inst(v, gppa, data->ma);
+ else {
+ vcpu_set_isr(v, misr.val);
+ data_access_rights(v, vadr);
+ }
+ return ;
+ }
+ }
+ thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
+
+ } else if (type == D_TLB) {
+ if (misr.sp) {
+ handle_lds(regs);
+ return;
+ }
+
+ rr = vcpu_get_rr(v, vadr);
+ itir = rr & (RR_RID_MASK | RR_PS_MASK);
+
+ if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
+ if (vpsr & IA64_PSR_IC) {
+ vcpu_set_isr(v, misr.val);
+ alt_dtlb(v, vadr);
+ } else {
+ nested_dtlb(v);
+ }
+ return ;
+ }
+
+ vpta.val = vcpu_get_pta(v);
+ /* avoid recursively walking (short format) VHPT */
+
+ vhpt_adr = vcpu_thash(v, vadr);
+ if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+ /* VHPT successfully read. */
+ if (!(pteval & _PAGE_P)) {
+ if (vpsr & IA64_PSR_IC) {
+ vcpu_set_isr(v, misr.val);
+ dtlb_fault(v, vadr);
+ } else {
+ nested_dtlb(v);
+ }
+ } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
+ thash_purge_and_insert(v, pteval, itir,
+ vadr, D_TLB);
+ } else if (vpsr & IA64_PSR_IC) {
+ vcpu_set_isr(v, misr.val);
+ dtlb_fault(v, vadr);
+ } else {
+ nested_dtlb(v);
+ }
+ } else {
+ /* Can't read VHPT. */
+ if (vpsr & IA64_PSR_IC) {
+ vcpu_set_isr(v, misr.val);
+ dvhpt_fault(v, vadr);
+ } else {
+ nested_dtlb(v);
+ }
+ }
+ } else if (type == I_TLB) {
+ if (!(vpsr & IA64_PSR_IC))
+ misr.ni = 1;
+ if (!vhpt_enabled(v, vadr, INST_REF)) {
+ vcpu_set_isr(v, misr.val);
+ alt_itlb(v, vadr);
+ return;
+ }
+
+ vpta.val = vcpu_get_pta(v);
+
+ vhpt_adr = vcpu_thash(v, vadr);
+ if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+ /* VHPT successfully read. */
+ if (pteval & _PAGE_P) {
+ if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
+ vcpu_set_isr(v, misr.val);
+ itlb_fault(v, vadr);
+ return ;
+ }
+ rr = vcpu_get_rr(v, vadr);
+ itir = rr & (RR_RID_MASK | RR_PS_MASK);
+ thash_purge_and_insert(v, pteval, itir,
+ vadr, I_TLB);
+ } else {
+ vcpu_set_isr(v, misr.val);
+ inst_page_not_present(v, vadr);
+ }
+ } else {
+ vcpu_set_isr(v, misr.val);
+ ivhpt_fault(v, vadr);
+ }
+ }
+}
+
+void kvm_vexirq(struct kvm_vcpu *vcpu)
+{
+ u64 vpsr, isr;
+ struct kvm_pt_regs *regs;
+
+ regs = vcpu_regs(vcpu);
+ vpsr = VCPU(vcpu, vpsr);
+ isr = vpsr & IA64_PSR_RI;
+ reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
+}
+
+void kvm_ia64_handle_irq(struct kvm_vcpu *v)
+{
+ struct exit_ctl_data *p = &v->arch.exit_data;
+ long psr;
+
+ local_irq_save(psr);
+ p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
+ vmm_transition(v);
+ local_irq_restore(psr);
+
+ VMX(v, timer_check) = 1;
+
+}
+
+static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
+{
+ u64 oldrid, moldrid, oldpsbits, vaddr;
+ struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
+ vaddr = p->vaddr;
+
+ oldrid = VMX(v, vrr[0]);
+ VMX(v, vrr[0]) = p->rr;
+ oldpsbits = VMX(v, psbits[0]);
+ VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
+ moldrid = ia64_get_rr(0x0);
+ ia64_set_rr(0x0, vrrtomrr(p->rr));
+ ia64_srlz_d();
+
+ vaddr = PAGEALIGN(vaddr, p->ps);
+ thash_purge_entries_remote(v, vaddr, p->ps);
+
+ VMX(v, vrr[0]) = oldrid;
+ VMX(v, psbits[0]) = oldpsbits;
+ ia64_set_rr(0x0, moldrid);
+ ia64_dv_serialize_data();
+}
+
+static void vcpu_do_resume(struct kvm_vcpu *vcpu)
+{
+ /*Re-init VHPT and VTLB once from resume*/
+ vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
+ thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
+ vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
+ thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
+
+ ia64_set_pta(vcpu->arch.vhpt.pta.val);
+}
+
+static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
+{
+ if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
+ vcpu_do_resume(vcpu);
+ return;
+ }
+
+ if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
+ thash_purge_all(vcpu);
+ return;
+ }
+
+ if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
+ while (vcpu->arch.ptc_g_count > 0)
+ ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
+ }
+}
+
+void vmm_transition(struct kvm_vcpu *vcpu)
+{
+ ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
+ 0, 0, 0, 0, 0, 0);
+ vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
+ ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
+ 0, 0, 0, 0, 0, 0);
+ kvm_do_resume_op(vcpu);
+}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
new file mode 100644
index 000000000000..30897d44d61e
--- /dev/null
+++ b/arch/ia64/kvm/trampoline.S
@@ -0,0 +1,1038 @@
+/* Save all processor states
+ *
+ * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
+ * Copyright (c) 2007 Anthony Xu <anthony.xu@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include "asm-offsets.h"
+
+
+#define CTX(name) VMM_CTX_##name##_OFFSET
+
+ /*
+ * r32: context_t base address
+ */
+#define SAVE_BRANCH_REGS \
+ add r2 = CTX(B0),r32; \
+ add r3 = CTX(B1),r32; \
+ mov r16 = b0; \
+ mov r17 = b1; \
+ ;; \
+ st8 [r2]=r16,16; \
+ st8 [r3]=r17,16; \
+ ;; \
+ mov r16 = b2; \
+ mov r17 = b3; \
+ ;; \
+ st8 [r2]=r16,16; \
+ st8 [r3]=r17,16; \
+ ;; \
+ mov r16 = b4; \
+ mov r17 = b5; \
+ ;; \
+ st8 [r2]=r16; \
+ st8 [r3]=r17; \
+ ;;
+
+ /*
+ * r33: context_t base address
+ */
+#define RESTORE_BRANCH_REGS \
+ add r2 = CTX(B0),r33; \
+ add r3 = CTX(B1),r33; \
+ ;; \
+ ld8 r16=[r2],16; \
+ ld8 r17=[r3],16; \
+ ;; \
+ mov b0 = r16; \
+ mov b1 = r17; \
+ ;; \
+ ld8 r16=[r2],16; \
+ ld8 r17=[r3],16; \
+ ;; \
+ mov b2 = r16; \
+ mov b3 = r17; \
+ ;; \
+ ld8 r16=[r2]; \
+ ld8 r17=[r3]; \
+ ;; \
+ mov b4=r16; \
+ mov b5=r17; \
+ ;;
+
+
+ /*
+ * r32: context_t base address
+ * bsw == 1
+ * Save all bank1 general registers, r4 ~ r7
+ */
+#define SAVE_GENERAL_REGS \
+ add r2=CTX(R4),r32; \
+ add r3=CTX(R5),r32; \
+ ;; \
+.mem.offset 0,0; \
+ st8.spill [r2]=r4,16; \
+.mem.offset 8,0; \
+ st8.spill [r3]=r5,16; \
+ ;; \
+.mem.offset 0,0; \
+ st8.spill [r2]=r6,48; \
+.mem.offset 8,0; \
+ st8.spill [r3]=r7,48; \
+ ;; \
+.mem.offset 0,0; \
+ st8.spill [r2]=r12; \
+.mem.offset 8,0; \
+ st8.spill [r3]=r13; \
+ ;;
+
+ /*
+ * r33: context_t base address
+ * bsw == 1
+ */
+#define RESTORE_GENERAL_REGS \
+ add r2=CTX(R4),r33; \
+ add r3=CTX(R5),r33; \
+ ;; \
+ ld8.fill r4=[r2],16; \
+ ld8.fill r5=[r3],16; \
+ ;; \
+ ld8.fill r6=[r2],48; \
+ ld8.fill r7=[r3],48; \
+ ;; \
+ ld8.fill r12=[r2]; \
+ ld8.fill r13 =[r3]; \
+ ;;
+
+
+
+
+ /*
+ * r32: context_t base address
+ */
+#define SAVE_KERNEL_REGS \
+ add r2 = CTX(KR0),r32; \
+ add r3 = CTX(KR1),r32; \
+ mov r16 = ar.k0; \
+ mov r17 = ar.k1; \
+ ;; \
+ st8 [r2] = r16,16; \
+ st8 [r3] = r17,16; \
+ ;; \
+ mov r16 = ar.k2; \
+ mov r17 = ar.k3; \
+ ;; \
+ st8 [r2] = r16,16; \
+ st8 [r3] = r17,16; \
+ ;; \
+ mov r16 = ar.k4; \
+ mov r17 = ar.k5; \
+ ;; \
+ st8 [r2] = r16,16; \
+ st8 [r3] = r17,16; \
+ ;; \
+ mov r16 = ar.k6; \
+ mov r17 = ar.k7; \
+ ;; \
+ st8 [r2] = r16; \
+ st8 [r3] = r17; \
+ ;;
+
+
+
+ /*
+ * r33: context_t base address
+ */
+#define RESTORE_KERNEL_REGS \
+ add r2 = CTX(KR0),r33; \
+ add r3 = CTX(KR1),r33; \
+ ;; \
+ ld8 r16=[r2],16; \
+ ld8 r17=[r3],16; \
+ ;; \
+ mov ar.k0=r16; \
+ mov ar.k1=r17; \
+ ;; \
+ ld8 r16=[r2],16; \
+ ld8 r17=[r3],16; \
+ ;; \
+ mov ar.k2=r16; \
+ mov ar.k3=r17; \
+ ;; \
+ ld8 r16=[r2],16; \
+ ld8 r17=[r3],16; \
+ ;; \
+ mov ar.k4=r16; \
+ mov ar.k5=r17; \
+ ;; \
+ ld8 r16=[r2],16; \
+ ld8 r17=[r3],16; \
+ ;; \
+ mov ar.k6=r16; \
+ mov ar.k7=r17; \
+ ;;
+
+
+
+ /*
+ * r32: context_t base address
+ */
+#define SAVE_APP_REGS \
+ add r2 = CTX(BSPSTORE),r32; \
+ mov r16 = ar.bspstore; \
+ ;; \
+ st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
+ mov r16 = ar.rnat; \
+ ;; \
+ st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \
+ mov r16 = ar.fcr; \
+ ;; \
+ st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \
+ mov r16 = ar.eflag; \
+ ;; \
+ st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \
+ mov r16 = ar.cflg; \
+ ;; \
+ st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \
+ mov r16 = ar.fsr; \
+ ;; \
+ st8 [r2] = r16,CTX(FIR)-CTX(FSR); \
+ mov r16 = ar.fir; \
+ ;; \
+ st8 [r2] = r16,CTX(FDR)-CTX(FIR); \
+ mov r16 = ar.fdr; \
+ ;; \
+ st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \
+ mov r16 = ar.unat; \
+ ;; \
+ st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \
+ mov r16 = ar.fpsr; \
+ ;; \
+ st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \
+ mov r16 = ar.pfs; \
+ ;; \
+ st8 [r2] = r16,CTX(LC)-CTX(PFS); \
+ mov r16 = ar.lc; \
+ ;; \
+ st8 [r2] = r16; \
+ ;;
+
+ /*
+ * r33: context_t base address
+ */
+#define RESTORE_APP_REGS \
+ add r2=CTX(BSPSTORE),r33; \
+ ;; \
+ ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \
+ ;; \
+ mov ar.bspstore=r16; \
+ ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \
+ ;; \
+ mov ar.rnat=r16; \
+ ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \
+ ;; \
+ mov ar.fcr=r16; \
+ ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \
+ ;; \
+ mov ar.eflag=r16; \
+ ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \
+ ;; \
+ mov ar.cflg=r16; \
+ ld8 r16=[r2],CTX(FIR)-CTX(FSR); \
+ ;; \
+ mov ar.fsr=r16; \
+ ld8 r16=[r2],CTX(FDR)-CTX(FIR); \
+ ;; \
+ mov ar.fir=r16; \
+ ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \
+ ;; \
+ mov ar.fdr=r16; \
+ ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \
+ ;; \
+ mov ar.unat=r16; \
+ ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \
+ ;; \
+ mov ar.fpsr=r16; \
+ ld8 r16=[r2],CTX(LC)-CTX(PFS); \
+ ;; \
+ mov ar.pfs=r16; \
+ ld8 r16=[r2]; \
+ ;; \
+ mov ar.lc=r16; \
+ ;;
+
+ /*
+ * r32: context_t base address
+ */
+#define SAVE_CTL_REGS \
+ add r2 = CTX(DCR),r32; \
+ mov r16 = cr.dcr; \
+ ;; \
+ st8 [r2] = r16,CTX(IVA)-CTX(DCR); \
+ ;; \
+ mov r16 = cr.iva; \
+ ;; \
+ st8 [r2] = r16,CTX(PTA)-CTX(IVA); \
+ ;; \
+ mov r16 = cr.pta; \
+ ;; \
+ st8 [r2] = r16 ; \
+ ;;
+
+ /*
+ * r33: context_t base address
+ */
+#define RESTORE_CTL_REGS \
+ add r2 = CTX(DCR),r33; \
+ ;; \
+ ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \
+ ;; \
+ mov cr.dcr = r16; \
+ dv_serialize_data; \
+ ;; \
+ ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \
+ ;; \
+ mov cr.iva = r16; \
+ dv_serialize_data; \
+ ;; \
+ ld8 r16 = [r2]; \
+ ;; \
+ mov cr.pta = r16; \
+ dv_serialize_data; \
+ ;;
+
+
+ /*
+ * r32: context_t base address
+ */
+#define SAVE_REGION_REGS \
+ add r2=CTX(RR0),r32; \
+ mov r16=rr[r0]; \
+ dep.z r18=1,61,3; \
+ ;; \
+ st8 [r2]=r16,8; \
+ mov r17=rr[r18]; \
+ dep.z r18=2,61,3; \
+ ;; \
+ st8 [r2]=r17,8; \
+ mov r16=rr[r18]; \
+ dep.z r18=3,61,3; \
+ ;; \
+ st8 [r2]=r16,8; \
+ mov r17=rr[r18]; \
+ dep.z r18=4,61,3; \
+ ;; \
+ st8 [r2]=r17,8; \
+ mov r16=rr[r18]; \
+ dep.z r18=5,61,3; \
+ ;; \
+ st8 [r2]=r16,8; \
+ mov r17=rr[r18]; \
+ dep.z r18=7,61,3; \
+ ;; \
+ st8 [r2]=r17,16; \
+ mov r16=rr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ ;;
+
+ /*
+ * r33:context_t base address
+ */
+#define RESTORE_REGION_REGS \
+ add r2=CTX(RR0),r33;\
+ mov r18=r0; \
+ ;; \
+ ld8 r20=[r2],8; \
+ ;; /* rr0 */ \
+ ld8 r21=[r2],8; \
+ ;; /* rr1 */ \
+ ld8 r22=[r2],8; \
+ ;; /* rr2 */ \
+ ld8 r23=[r2],8; \
+ ;; /* rr3 */ \
+ ld8 r24=[r2],8; \
+ ;; /* rr4 */ \
+ ld8 r25=[r2],16; \
+ ;; /* rr5 */ \
+ ld8 r27=[r2]; \
+ ;; /* rr7 */ \
+ mov rr[r18]=r20; \
+ dep.z r18=1,61,3; \
+ ;; /* rr1 */ \
+ mov rr[r18]=r21; \
+ dep.z r18=2,61,3; \
+ ;; /* rr2 */ \
+ mov rr[r18]=r22; \
+ dep.z r18=3,61,3; \
+ ;; /* rr3 */ \
+ mov rr[r18]=r23; \
+ dep.z r18=4,61,3; \
+ ;; /* rr4 */ \
+ mov rr[r18]=r24; \
+ dep.z r18=5,61,3; \
+ ;; /* rr5 */ \
+ mov rr[r18]=r25; \
+ dep.z r18=7,61,3; \
+ ;; /* rr7 */ \
+ mov rr[r18]=r27; \
+ ;; \
+ srlz.i; \
+ ;;
+
+
+
+ /*
+ * r32: context_t base address
+ * r36~r39:scratch registers
+ */
+#define SAVE_DEBUG_REGS \
+ add r2=CTX(IBR0),r32; \
+ add r3=CTX(DBR0),r32; \
+ mov r16=ibr[r0]; \
+ mov r17=dbr[r0]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ add r18=1,r0; \
+ ;; \
+ mov r16=ibr[r18]; \
+ mov r17=dbr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ add r18=2,r0; \
+ ;; \
+ mov r16=ibr[r18]; \
+ mov r17=dbr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ add r18=2,r0; \
+ ;; \
+ mov r16=ibr[r18]; \
+ mov r17=dbr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ add r18=3,r0; \
+ ;; \
+ mov r16=ibr[r18]; \
+ mov r17=dbr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ add r18=4,r0; \
+ ;; \
+ mov r16=ibr[r18]; \
+ mov r17=dbr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ add r18=5,r0; \
+ ;; \
+ mov r16=ibr[r18]; \
+ mov r17=dbr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ add r18=6,r0; \
+ ;; \
+ mov r16=ibr[r18]; \
+ mov r17=dbr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ add r18=7,r0; \
+ ;; \
+ mov r16=ibr[r18]; \
+ mov r17=dbr[r18]; \
+ ;; \
+ st8 [r2]=r16,8; \
+ st8 [r3]=r17,8; \
+ ;;
+
+
+/*
+ * r33: point to context_t structure
+ * ar.lc are corrupted.
+ */
+#define RESTORE_DEBUG_REGS \
+ add r2=CTX(IBR0),r33; \
+ add r3=CTX(DBR0),r33; \
+ mov r16=7; \
+ mov r17=r0; \
+ ;; \
+ mov ar.lc = r16; \
+ ;; \
+1: \
+ ld8 r18=[r2],8; \
+ ld8 r19=[r3],8; \
+ ;; \
+ mov ibr[r17]=r18; \
+ mov dbr[r17]=r19; \
+ ;; \
+ srlz.i; \
+ ;; \
+ add r17=1,r17; \
+ br.cloop.sptk 1b; \
+ ;;
+
+
+ /*
+ * r32: context_t base address
+ */
+#define SAVE_FPU_LOW \
+ add r2=CTX(F2),r32; \
+ add r3=CTX(F3),r32; \
+ ;; \
+ stf.spill.nta [r2]=f2,32; \
+ stf.spill.nta [r3]=f3,32; \
+ ;; \
+ stf.spill.nta [r2]=f4,32; \
+ stf.spill.nta [r3]=f5,32; \
+ ;; \
+ stf.spill.nta [r2]=f6,32; \
+ stf.spill.nta [r3]=f7,32; \
+ ;; \
+ stf.spill.nta [r2]=f8,32; \
+ stf.spill.nta [r3]=f9,32; \
+ ;; \
+ stf.spill.nta [r2]=f10,32; \
+ stf.spill.nta [r3]=f11,32; \
+ ;; \
+ stf.spill.nta [r2]=f12,32; \
+ stf.spill.nta [r3]=f13,32; \
+ ;; \
+ stf.spill.nta [r2]=f14,32; \
+ stf.spill.nta [r3]=f15,32; \
+ ;; \
+ stf.spill.nta [r2]=f16,32; \
+ stf.spill.nta [r3]=f17,32; \
+ ;; \
+ stf.spill.nta [r2]=f18,32; \
+ stf.spill.nta [r3]=f19,32; \
+ ;; \
+ stf.spill.nta [r2]=f20,32; \
+ stf.spill.nta [r3]=f21,32; \
+ ;; \
+ stf.spill.nta [r2]=f22,32; \
+ stf.spill.nta [r3]=f23,32; \
+ ;; \
+ stf.spill.nta [r2]=f24,32; \
+ stf.spill.nta [r3]=f25,32; \
+ ;; \
+ stf.spill.nta [r2]=f26,32; \
+ stf.spill.nta [r3]=f27,32; \
+ ;; \
+ stf.spill.nta [r2]=f28,32; \
+ stf.spill.nta [r3]=f29,32; \
+ ;; \
+ stf.spill.nta [r2]=f30; \
+ stf.spill.nta [r3]=f31; \
+ ;;
+
+ /*
+ * r32: context_t base address
+ */
+#define SAVE_FPU_HIGH \
+ add r2=CTX(F32),r32; \
+ add r3=CTX(F33),r32; \
+ ;; \
+ stf.spill.nta [r2]=f32,32; \
+ stf.spill.nta [r3]=f33,32; \
+ ;; \
+ stf.spill.nta [r2]=f34,32; \
+ stf.spill.nta [r3]=f35,32; \
+ ;; \
+ stf.spill.nta [r2]=f36,32; \
+ stf.spill.nta [r3]=f37,32; \
+ ;; \
+ stf.spill.nta [r2]=f38,32; \
+ stf.spill.nta [r3]=f39,32; \
+ ;; \
+ stf.spill.nta [r2]=f40,32; \
+ stf.spill.nta [r3]=f41,32; \
+ ;; \
+ stf.spill.nta [r2]=f42,32; \
+ stf.spill.nta [r3]=f43,32; \
+ ;; \
+ stf.spill.nta [r2]=f44,32; \
+ stf.spill.nta [r3]=f45,32; \
+ ;; \
+ stf.spill.nta [r2]=f46,32; \
+ stf.spill.nta [r3]=f47,32; \
+ ;; \
+ stf.spill.nta [r2]=f48,32; \
+ stf.spill.nta [r3]=f49,32; \
+ ;; \
+ stf.spill.nta [r2]=f50,32; \
+ stf.spill.nta [r3]=f51,32; \
+ ;; \
+ stf.spill.nta [r2]=f52,32; \
+ stf.spill.nta [r3]=f53,32; \
+ ;; \
+ stf.spill.nta [r2]=f54,32; \
+ stf.spill.nta [r3]=f55,32; \
+ ;; \
+ stf.spill.nta [r2]=f56,32; \
+ stf.spill.nta [r3]=f57,32; \
+ ;; \
+ stf.spill.nta [r2]=f58,32; \
+ stf.spill.nta [r3]=f59,32; \
+ ;; \
+ stf.spill.nta [r2]=f60,32; \
+ stf.spill.nta [r3]=f61,32; \
+ ;; \
+ stf.spill.nta [r2]=f62,32; \
+ stf.spill.nta [r3]=f63,32; \
+ ;; \
+ stf.spill.nta [r2]=f64,32; \
+ stf.spill.nta [r3]=f65,32; \
+ ;; \
+ stf.spill.nta [r2]=f66,32; \
+ stf.spill.nta [r3]=f67,32; \
+ ;; \
+ stf.spill.nta [r2]=f68,32; \
+ stf.spill.nta [r3]=f69,32; \
+ ;; \
+ stf.spill.nta [r2]=f70,32; \
+ stf.spill.nta [r3]=f71,32; \
+ ;; \
+ stf.spill.nta [r2]=f72,32; \
+ stf.spill.nta [r3]=f73,32; \
+ ;; \
+ stf.spill.nta [r2]=f74,32; \
+ stf.spill.nta [r3]=f75,32; \
+ ;; \
+ stf.spill.nta [r2]=f76,32; \
+ stf.spill.nta [r3]=f77,32; \
+ ;; \
+ stf.spill.nta [r2]=f78,32; \
+ stf.spill.nta [r3]=f79,32; \
+ ;; \
+ stf.spill.nta [r2]=f80,32; \
+ stf.spill.nta [r3]=f81,32; \
+ ;; \
+ stf.spill.nta [r2]=f82,32; \
+ stf.spill.nta [r3]=f83,32; \
+ ;; \
+ stf.spill.nta [r2]=f84,32; \
+ stf.spill.nta [r3]=f85,32; \
+ ;; \
+ stf.spill.nta [r2]=f86,32; \
+ stf.spill.nta [r3]=f87,32; \
+ ;; \
+ stf.spill.nta [r2]=f88,32; \
+ stf.spill.nta [r3]=f89,32; \
+ ;; \
+ stf.spill.nta [r2]=f90,32; \
+ stf.spill.nta [r3]=f91,32; \
+ ;; \
+ stf.spill.nta [r2]=f92,32; \
+ stf.spill.nta [r3]=f93,32; \
+ ;; \
+ stf.spill.nta [r2]=f94,32; \
+ stf.spill.nta [r3]=f95,32; \
+ ;; \
+ stf.spill.nta [r2]=f96,32; \
+ stf.spill.nta [r3]=f97,32; \
+ ;; \
+ stf.spill.nta [r2]=f98,32; \
+ stf.spill.nta [r3]=f99,32; \
+ ;; \
+ stf.spill.nta [r2]=f100,32; \
+ stf.spill.nta [r3]=f101,32; \
+ ;; \
+ stf.spill.nta [r2]=f102,32; \
+ stf.spill.nta [r3]=f103,32; \
+ ;; \
+ stf.spill.nta [r2]=f104,32; \
+ stf.spill.nta [r3]=f105,32; \
+ ;; \
+ stf.spill.nta [r2]=f106,32; \
+ stf.spill.nta [r3]=f107,32; \
+ ;; \
+ stf.spill.nta [r2]=f108,32; \
+ stf.spill.nta [r3]=f109,32; \
+ ;; \
+ stf.spill.nta [r2]=f110,32; \
+ stf.spill.nta [r3]=f111,32; \
+ ;; \
+ stf.spill.nta [r2]=f112,32; \
+ stf.spill.nta [r3]=f113,32; \
+ ;; \
+ stf.spill.nta [r2]=f114,32; \
+ stf.spill.nta [r3]=f115,32; \
+ ;; \
+ stf.spill.nta [r2]=f116,32; \
+ stf.spill.nta [r3]=f117,32; \
+ ;; \
+ stf.spill.nta [r2]=f118,32; \
+ stf.spill.nta [r3]=f119,32; \
+ ;; \
+ stf.spill.nta [r2]=f120,32; \
+ stf.spill.nta [r3]=f121,32; \
+ ;; \
+ stf.spill.nta [r2]=f122,32; \
+ stf.spill.nta [r3]=f123,32; \
+ ;; \
+ stf.spill.nta [r2]=f124,32; \
+ stf.spill.nta [r3]=f125,32; \
+ ;; \
+ stf.spill.nta [r2]=f126; \
+ stf.spill.nta [r3]=f127; \
+ ;;
+
+ /*
+ * r33: point to context_t structure
+ */
+#define RESTORE_FPU_LOW \
+ add r2 = CTX(F2), r33; \
+ add r3 = CTX(F3), r33; \
+ ;; \
+ ldf.fill.nta f2 = [r2], 32; \
+ ldf.fill.nta f3 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f4 = [r2], 32; \
+ ldf.fill.nta f5 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f6 = [r2], 32; \
+ ldf.fill.nta f7 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f8 = [r2], 32; \
+ ldf.fill.nta f9 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f10 = [r2], 32; \
+ ldf.fill.nta f11 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f12 = [r2], 32; \
+ ldf.fill.nta f13 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f14 = [r2], 32; \
+ ldf.fill.nta f15 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f16 = [r2], 32; \
+ ldf.fill.nta f17 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f18 = [r2], 32; \
+ ldf.fill.nta f19 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f20 = [r2], 32; \
+ ldf.fill.nta f21 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f22 = [r2], 32; \
+ ldf.fill.nta f23 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f24 = [r2], 32; \
+ ldf.fill.nta f25 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f26 = [r2], 32; \
+ ldf.fill.nta f27 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f28 = [r2], 32; \
+ ldf.fill.nta f29 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f30 = [r2], 32; \
+ ldf.fill.nta f31 = [r3], 32; \
+ ;;
+
+
+
+ /*
+ * r33: point to context_t structure
+ */
+#define RESTORE_FPU_HIGH \
+ add r2 = CTX(F32), r33; \
+ add r3 = CTX(F33), r33; \
+ ;; \
+ ldf.fill.nta f32 = [r2], 32; \
+ ldf.fill.nta f33 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f34 = [r2], 32; \
+ ldf.fill.nta f35 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f36 = [r2], 32; \
+ ldf.fill.nta f37 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f38 = [r2], 32; \
+ ldf.fill.nta f39 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f40 = [r2], 32; \
+ ldf.fill.nta f41 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f42 = [r2], 32; \
+ ldf.fill.nta f43 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f44 = [r2], 32; \
+ ldf.fill.nta f45 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f46 = [r2], 32; \
+ ldf.fill.nta f47 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f48 = [r2], 32; \
+ ldf.fill.nta f49 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f50 = [r2], 32; \
+ ldf.fill.nta f51 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f52 = [r2], 32; \
+ ldf.fill.nta f53 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f54 = [r2], 32; \
+ ldf.fill.nta f55 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f56 = [r2], 32; \
+ ldf.fill.nta f57 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f58 = [r2], 32; \
+ ldf.fill.nta f59 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f60 = [r2], 32; \
+ ldf.fill.nta f61 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f62 = [r2], 32; \
+ ldf.fill.nta f63 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f64 = [r2], 32; \
+ ldf.fill.nta f65 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f66 = [r2], 32; \
+ ldf.fill.nta f67 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f68 = [r2], 32; \
+ ldf.fill.nta f69 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f70 = [r2], 32; \
+ ldf.fill.nta f71 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f72 = [r2], 32; \
+ ldf.fill.nta f73 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f74 = [r2], 32; \
+ ldf.fill.nta f75 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f76 = [r2], 32; \
+ ldf.fill.nta f77 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f78 = [r2], 32; \
+ ldf.fill.nta f79 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f80 = [r2], 32; \
+ ldf.fill.nta f81 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f82 = [r2], 32; \
+ ldf.fill.nta f83 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f84 = [r2], 32; \
+ ldf.fill.nta f85 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f86 = [r2], 32; \
+ ldf.fill.nta f87 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f88 = [r2], 32; \
+ ldf.fill.nta f89 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f90 = [r2], 32; \
+ ldf.fill.nta f91 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f92 = [r2], 32; \
+ ldf.fill.nta f93 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f94 = [r2], 32; \
+ ldf.fill.nta f95 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f96 = [r2], 32; \
+ ldf.fill.nta f97 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f98 = [r2], 32; \
+ ldf.fill.nta f99 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f100 = [r2], 32; \
+ ldf.fill.nta f101 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f102 = [r2], 32; \
+ ldf.fill.nta f103 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f104 = [r2], 32; \
+ ldf.fill.nta f105 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f106 = [r2], 32; \
+ ldf.fill.nta f107 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f108 = [r2], 32; \
+ ldf.fill.nta f109 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f110 = [r2], 32; \
+ ldf.fill.nta f111 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f112 = [r2], 32; \
+ ldf.fill.nta f113 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f114 = [r2], 32; \
+ ldf.fill.nta f115 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f116 = [r2], 32; \
+ ldf.fill.nta f117 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f118 = [r2], 32; \
+ ldf.fill.nta f119 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f120 = [r2], 32; \
+ ldf.fill.nta f121 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f122 = [r2], 32; \
+ ldf.fill.nta f123 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f124 = [r2], 32; \
+ ldf.fill.nta f125 = [r3], 32; \
+ ;; \
+ ldf.fill.nta f126 = [r2], 32; \
+ ldf.fill.nta f127 = [r3], 32; \
+ ;;
+
+ /*
+ * r32: context_t base address
+ */
+#define SAVE_PTK_REGS \
+ add r2=CTX(PKR0), r32; \
+ mov r16=7; \
+ ;; \
+ mov ar.lc=r16; \
+ mov r17=r0; \
+ ;; \
+1: \
+ mov r18=pkr[r17]; \
+ ;; \
+ srlz.i; \
+ ;; \
+ st8 [r2]=r18, 8; \
+ ;; \
+ add r17 =1,r17; \
+ ;; \
+ br.cloop.sptk 1b; \
+ ;;
+
+/*
+ * r33: point to context_t structure
+ * ar.lc are corrupted.
+ */
+#define RESTORE_PTK_REGS \
+ add r2=CTX(PKR0), r33; \
+ mov r16=7; \
+ ;; \
+ mov ar.lc=r16; \
+ mov r17=r0; \
+ ;; \
+1: \
+ ld8 r18=[r2], 8; \
+ ;; \
+ mov pkr[r17]=r18; \
+ ;; \
+ srlz.i; \
+ ;; \
+ add r17 =1,r17; \
+ ;; \
+ br.cloop.sptk 1b; \
+ ;;
+
+
+/*
+ * void vmm_trampoline( context_t * from,
+ * context_t * to)
+ *
+ * from: r32
+ * to: r33
+ * note: interrupt disabled before call this function.
+ */
+GLOBAL_ENTRY(vmm_trampoline)
+ mov r16 = psr
+ adds r2 = CTX(PSR), r32
+ ;;
+ st8 [r2] = r16, 8 // psr
+ mov r17 = pr
+ ;;
+ st8 [r2] = r17, 8 // pr
+ mov r18 = ar.unat
+ ;;
+ st8 [r2] = r18
+ mov r17 = ar.rsc
+ ;;
+ adds r2 = CTX(RSC),r32
+ ;;
+ st8 [r2]= r17
+ mov ar.rsc =0
+ flushrs
+ ;;
+ SAVE_GENERAL_REGS
+ ;;
+ SAVE_KERNEL_REGS
+ ;;
+ SAVE_APP_REGS
+ ;;
+ SAVE_BRANCH_REGS
+ ;;
+ SAVE_CTL_REGS
+ ;;
+ SAVE_REGION_REGS
+ ;;
+ //SAVE_DEBUG_REGS
+ ;;
+ rsm psr.dfl
+ ;;
+ srlz.d
+ ;;
+ SAVE_FPU_LOW
+ ;;
+ rsm psr.dfh
+ ;;
+ srlz.d
+ ;;
+ SAVE_FPU_HIGH
+ ;;
+ SAVE_PTK_REGS
+ ;;
+ RESTORE_PTK_REGS
+ ;;
+ RESTORE_FPU_HIGH
+ ;;
+ RESTORE_FPU_LOW
+ ;;
+ //RESTORE_DEBUG_REGS
+ ;;
+ RESTORE_REGION_REGS
+ ;;
+ RESTORE_CTL_REGS
+ ;;
+ RESTORE_BRANCH_REGS
+ ;;
+ RESTORE_APP_REGS
+ ;;
+ RESTORE_KERNEL_REGS
+ ;;
+ RESTORE_GENERAL_REGS
+ ;;
+ adds r2=CTX(PSR), r33
+ ;;
+ ld8 r16=[r2], 8 // psr
+ ;;
+ mov psr.l=r16
+ ;;
+ srlz.d
+ ;;
+ ld8 r16=[r2], 8 // pr
+ ;;
+ mov pr =r16,-1
+ ld8 r16=[r2] // unat
+ ;;
+ mov ar.unat=r16
+ ;;
+ adds r2=CTX(RSC),r33
+ ;;
+ ld8 r16 =[r2]
+ ;;
+ mov ar.rsc = r16
+ ;;
+ br.ret.sptk.few b0
+END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
new file mode 100644
index 000000000000..e44027ce5667
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.c
@@ -0,0 +1,2163 @@
+/*
+ * kvm_vcpu.c: handling all virtual cpu related thing.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Shaofan Li (Susue Li) <susie.li@intel.com>
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/ia64regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/kregs.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+
+/*
+ * Special notes:
+ * - Index by it/dt/rt sequence
+ * - Only existing mode transitions are allowed in this table
+ * - RSE is placed at lazy mode when emulating guest partial mode
+ * - If gva happens to be rr0 and rr4, only allowed case is identity
+ * mapping (gva=gpa), or panic! (How?)
+ */
+int mm_switch_table[8][8] = {
+ /* 2004/09/12(Kevin): Allow switch to self */
+ /*
+ * (it,dt,rt): (0,0,0) -> (1,1,1)
+ * This kind of transition usually occurs in the very early
+ * stage of Linux boot up procedure. Another case is in efi
+ * and pal calls. (see "arch/ia64/kernel/head.S")
+ *
+ * (it,dt,rt): (0,0,0) -> (0,1,1)
+ * This kind of transition is found when OSYa exits efi boot
+ * service. Due to gva = gpa in this case (Same region),
+ * data access can be satisfied though itlb entry for physical
+ * emulation is hit.
+ */
+ {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V},
+ {0, 0, 0, 0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0, 0, 0, 0},
+ /*
+ * (it,dt,rt): (0,1,1) -> (1,1,1)
+ * This kind of transition is found in OSYa.
+ *
+ * (it,dt,rt): (0,1,1) -> (0,0,0)
+ * This kind of transition is found in OSYa
+ */
+ {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V},
+ /* (1,0,0)->(1,1,1) */
+ {0, 0, 0, 0, 0, 0, 0, SW_P2V},
+ /*
+ * (it,dt,rt): (1,0,1) -> (1,1,1)
+ * This kind of transition usually occurs when Linux returns
+ * from the low level TLB miss handlers.
+ * (see "arch/ia64/kernel/ivt.S")
+ */
+ {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V},
+ {0, 0, 0, 0, 0, 0, 0, 0},
+ /*
+ * (it,dt,rt): (1,1,1) -> (1,0,1)
+ * This kind of transition usually occurs in Linux low level
+ * TLB miss handler. (see "arch/ia64/kernel/ivt.S")
+ *
+ * (it,dt,rt): (1,1,1) -> (0,0,0)
+ * This kind of transition usually occurs in pal and efi calls,
+ * which requires running in physical mode.
+ * (see "arch/ia64/kernel/head.S")
+ * (1,1,1)->(1,0,0)
+ */
+
+ {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF},
+};
+
+void physical_mode_init(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.mode_flags = GUEST_IN_PHY;
+}
+
+void switch_to_physical_rid(struct kvm_vcpu *vcpu)
+{
+ unsigned long psr;
+
+ /* Save original virtual mode rr[0] and rr[4] */
+ psr = ia64_clear_ic();
+ ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
+ ia64_srlz_d();
+ ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
+ ia64_srlz_d();
+
+ ia64_set_psr(psr);
+ return;
+}
+
+
+void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
+{
+ unsigned long psr;
+
+ psr = ia64_clear_ic();
+ ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
+ ia64_srlz_d();
+ ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
+ ia64_srlz_d();
+ ia64_set_psr(psr);
+ return;
+}
+
+static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
+{
+ return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
+}
+
+void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
+ struct ia64_psr new_psr)
+{
+ int act;
+ act = mm_switch_action(old_psr, new_psr);
+ switch (act) {
+ case SW_V2P:
+ /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
+ old_psr.val, new_psr.val);*/
+ switch_to_physical_rid(vcpu);
+ /*
+ * Set rse to enforced lazy, to prevent active rse
+ *save/restor when guest physical mode.
+ */
+ vcpu->arch.mode_flags |= GUEST_IN_PHY;
+ break;
+ case SW_P2V:
+ switch_to_virtual_rid(vcpu);
+ /*
+ * recover old mode which is saved when entering
+ * guest physical mode
+ */
+ vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
+ break;
+ case SW_SELF:
+ break;
+ case SW_NOP:
+ break;
+ default:
+ /* Sanity check */
+ break;
+ }
+ return;
+}
+
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ * - insertions (itc.*, itr.*)
+ * - purges (ptc.* and ptr.*)
+ * - tpa
+ * - tak
+ * - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
+ struct ia64_psr new_psr)
+{
+
+ if ((old_psr.dt != new_psr.dt)
+ || (old_psr.it != new_psr.it)
+ || (old_psr.rt != new_psr.rt))
+ switch_mm_mode(vcpu, old_psr, new_psr);
+
+ return;
+}
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ * - insertions (itc.*, itr.*)
+ * - purges (ptc.* and ptr.*)
+ * - tpa
+ * - tak
+ * - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+ if (is_physical_mode(vcpu)) {
+ vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
+ switch_to_virtual_rid(vcpu);
+ }
+ return;
+}
+
+/* Recover always follows prepare */
+void recover_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+ if (is_physical_mode(vcpu))
+ switch_to_physical_rid(vcpu);
+ vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
+ return;
+}
+
+#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x)
+
+static u16 gr_info[32] = {
+ 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
+ RPT(r1), RPT(r2), RPT(r3),
+ RPT(r4), RPT(r5), RPT(r6), RPT(r7),
+ RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+ RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+ RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+ RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+ RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+ RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+#define IA64_FIRST_STACKED_GR 32
+#define IA64_FIRST_ROTATING_FR 32
+
+static inline unsigned long
+rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
+{
+ reg += rrb;
+ if (reg >= sor)
+ reg -= sor;
+ return reg;
+}
+
+/*
+ * Return the (rotated) index for floating point register
+ * be in the REGNUM (REGNUM must range from 32-127,
+ * result is in the range from 0-95.
+ */
+static inline unsigned long fph_index(struct kvm_pt_regs *regs,
+ long regnum)
+{
+ unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
+ return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
+}
+
+
+/*
+ * The inverse of the above: given bspstore and the number of
+ * registers, calculate ar.bsp.
+ */
+static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
+ long num_regs)
+{
+ long delta = ia64_rse_slot_num(addr) + num_regs;
+ int i = 0;
+
+ if (num_regs < 0)
+ delta -= 0x3e;
+ if (delta < 0) {
+ while (delta <= -0x3f) {
+ i--;
+ delta += 0x3f;
+ }
+ } else {
+ while (delta >= 0x3f) {
+ i++;
+ delta -= 0x3f;
+ }
+ }
+
+ return addr + num_regs + i;
+}
+
+static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+ unsigned long *val, int *nat)
+{
+ unsigned long *bsp, *addr, *rnat_addr, *bspstore;
+ unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+ unsigned long nat_mask;
+ unsigned long old_rsc, new_rsc;
+ long sof = (regs->cr_ifs) & 0x7f;
+ long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+ long ridx = r1 - 32;
+
+ if (ridx < sor)
+ ridx = rotate_reg(sor, rrb_gr, ridx);
+
+ old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+ new_rsc = old_rsc&(~(0x3));
+ ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+
+ bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+ bsp = kbs + (regs->loadrs >> 19);
+
+ addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+ nat_mask = 1UL << ia64_rse_slot_num(addr);
+ rnat_addr = ia64_rse_rnat_addr(addr);
+
+ if (addr >= bspstore) {
+ ia64_flushrs();
+ ia64_mf();
+ bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+ }
+ *val = *addr;
+ if (nat) {
+ if (bspstore < rnat_addr)
+ *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
+ & nat_mask);
+ else
+ *nat = (int)!!((*rnat_addr) & nat_mask);
+ ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+ }
+}
+
+void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+ unsigned long val, unsigned long nat)
+{
+ unsigned long *bsp, *bspstore, *addr, *rnat_addr;
+ unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+ unsigned long nat_mask;
+ unsigned long old_rsc, new_rsc, psr;
+ unsigned long rnat;
+ long sof = (regs->cr_ifs) & 0x7f;
+ long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+ long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+ long ridx = r1 - 32;
+
+ if (ridx < sor)
+ ridx = rotate_reg(sor, rrb_gr, ridx);
+
+ old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+ /* put RSC to lazy mode, and set loadrs 0 */
+ new_rsc = old_rsc & (~0x3fff0003);
+ ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+ bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
+
+ addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+ nat_mask = 1UL << ia64_rse_slot_num(addr);
+ rnat_addr = ia64_rse_rnat_addr(addr);
+
+ local_irq_save(psr);
+ bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+ if (addr >= bspstore) {
+
+ ia64_flushrs();
+ ia64_mf();
+ *addr = val;
+ bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+ rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+ if (bspstore < rnat_addr)
+ rnat = rnat & (~nat_mask);
+ else
+ *rnat_addr = (*rnat_addr)&(~nat_mask);
+
+ ia64_mf();
+ ia64_loadrs();
+ ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+ } else {
+ rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+ *addr = val;
+ if (bspstore < rnat_addr)
+ rnat = rnat&(~nat_mask);
+ else
+ *rnat_addr = (*rnat_addr) & (~nat_mask);
+
+ ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore);
+ ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+ }
+ local_irq_restore(psr);
+ ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+}
+
+void getreg(unsigned long regnum, unsigned long *val,
+ int *nat, struct kvm_pt_regs *regs)
+{
+ unsigned long addr, *unat;
+ if (regnum >= IA64_FIRST_STACKED_GR) {
+ get_rse_reg(regs, regnum, val, nat);
+ return;
+ }
+
+ /*
+ * Now look at registers in [0-31] range and init correct UNAT
+ */
+ addr = (unsigned long)regs;
+ unat = &regs->eml_unat;;
+
+ addr += gr_info[regnum];
+
+ *val = *(unsigned long *)addr;
+ /*
+ * do it only when requested
+ */
+ if (nat)
+ *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
+}
+
+void setreg(unsigned long regnum, unsigned long val,
+ int nat, struct kvm_pt_regs *regs)
+{
+ unsigned long addr;
+ unsigned long bitmask;
+ unsigned long *unat;
+
+ /*
+ * First takes care of stacked registers
+ */
+ if (regnum >= IA64_FIRST_STACKED_GR) {
+ set_rse_reg(regs, regnum, val, nat);
+ return;
+ }
+
+ /*
+ * Now look at registers in [0-31] range and init correct UNAT
+ */
+ addr = (unsigned long)regs;
+ unat = &regs->eml_unat;
+ /*
+ * add offset from base of struct
+ * and do it !
+ */
+ addr += gr_info[regnum];
+
+ *(unsigned long *)addr = val;
+
+ /*
+ * We need to clear the corresponding UNAT bit to fully emulate the load
+ * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+ */
+ bitmask = 1UL << ((addr >> 3) & 0x3f);
+ if (nat)
+ *unat |= bitmask;
+ else
+ *unat &= ~bitmask;
+
+}
+
+u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+ u64 val;
+
+ if (!reg)
+ return 0;
+ getreg(reg, &val, 0, regs);
+ return val;
+}
+
+void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat)
+{
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+ long sof = (regs->cr_ifs) & 0x7f;
+
+ if (!reg)
+ return;
+ if (reg >= sof + 32)
+ return;
+ setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/
+}
+
+void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+ struct kvm_pt_regs *regs)
+{
+ /* Take floating register rotation into consideration*/
+ if (regnum >= IA64_FIRST_ROTATING_FR)
+ regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+#define CASE_FIXED_FP(reg) \
+ case (reg) : \
+ ia64_stf_spill(fpval, reg); \
+ break
+
+ switch (regnum) {
+ CASE_FIXED_FP(0);
+ CASE_FIXED_FP(1);
+ CASE_FIXED_FP(2);
+ CASE_FIXED_FP(3);
+ CASE_FIXED_FP(4);
+ CASE_FIXED_FP(5);
+
+ CASE_FIXED_FP(6);
+ CASE_FIXED_FP(7);
+ CASE_FIXED_FP(8);
+ CASE_FIXED_FP(9);
+ CASE_FIXED_FP(10);
+ CASE_FIXED_FP(11);
+
+ CASE_FIXED_FP(12);
+ CASE_FIXED_FP(13);
+ CASE_FIXED_FP(14);
+ CASE_FIXED_FP(15);
+ CASE_FIXED_FP(16);
+ CASE_FIXED_FP(17);
+ CASE_FIXED_FP(18);
+ CASE_FIXED_FP(19);
+ CASE_FIXED_FP(20);
+ CASE_FIXED_FP(21);
+ CASE_FIXED_FP(22);
+ CASE_FIXED_FP(23);
+ CASE_FIXED_FP(24);
+ CASE_FIXED_FP(25);
+ CASE_FIXED_FP(26);
+ CASE_FIXED_FP(27);
+ CASE_FIXED_FP(28);
+ CASE_FIXED_FP(29);
+ CASE_FIXED_FP(30);
+ CASE_FIXED_FP(31);
+ CASE_FIXED_FP(32);
+ CASE_FIXED_FP(33);
+ CASE_FIXED_FP(34);
+ CASE_FIXED_FP(35);
+ CASE_FIXED_FP(36);
+ CASE_FIXED_FP(37);
+ CASE_FIXED_FP(38);
+ CASE_FIXED_FP(39);
+ CASE_FIXED_FP(40);
+ CASE_FIXED_FP(41);
+ CASE_FIXED_FP(42);
+ CASE_FIXED_FP(43);
+ CASE_FIXED_FP(44);
+ CASE_FIXED_FP(45);
+ CASE_FIXED_FP(46);
+ CASE_FIXED_FP(47);
+ CASE_FIXED_FP(48);
+ CASE_FIXED_FP(49);
+ CASE_FIXED_FP(50);
+ CASE_FIXED_FP(51);
+ CASE_FIXED_FP(52);
+ CASE_FIXED_FP(53);
+ CASE_FIXED_FP(54);
+ CASE_FIXED_FP(55);
+ CASE_FIXED_FP(56);
+ CASE_FIXED_FP(57);
+ CASE_FIXED_FP(58);
+ CASE_FIXED_FP(59);
+ CASE_FIXED_FP(60);
+ CASE_FIXED_FP(61);
+ CASE_FIXED_FP(62);
+ CASE_FIXED_FP(63);
+ CASE_FIXED_FP(64);
+ CASE_FIXED_FP(65);
+ CASE_FIXED_FP(66);
+ CASE_FIXED_FP(67);
+ CASE_FIXED_FP(68);
+ CASE_FIXED_FP(69);
+ CASE_FIXED_FP(70);
+ CASE_FIXED_FP(71);
+ CASE_FIXED_FP(72);
+ CASE_FIXED_FP(73);
+ CASE_FIXED_FP(74);
+ CASE_FIXED_FP(75);
+ CASE_FIXED_FP(76);
+ CASE_FIXED_FP(77);
+ CASE_FIXED_FP(78);
+ CASE_FIXED_FP(79);
+ CASE_FIXED_FP(80);
+ CASE_FIXED_FP(81);
+ CASE_FIXED_FP(82);
+ CASE_FIXED_FP(83);
+ CASE_FIXED_FP(84);
+ CASE_FIXED_FP(85);
+ CASE_FIXED_FP(86);
+ CASE_FIXED_FP(87);
+ CASE_FIXED_FP(88);
+ CASE_FIXED_FP(89);
+ CASE_FIXED_FP(90);
+ CASE_FIXED_FP(91);
+ CASE_FIXED_FP(92);
+ CASE_FIXED_FP(93);
+ CASE_FIXED_FP(94);
+ CASE_FIXED_FP(95);
+ CASE_FIXED_FP(96);
+ CASE_FIXED_FP(97);
+ CASE_FIXED_FP(98);
+ CASE_FIXED_FP(99);
+ CASE_FIXED_FP(100);
+ CASE_FIXED_FP(101);
+ CASE_FIXED_FP(102);
+ CASE_FIXED_FP(103);
+ CASE_FIXED_FP(104);
+ CASE_FIXED_FP(105);
+ CASE_FIXED_FP(106);
+ CASE_FIXED_FP(107);
+ CASE_FIXED_FP(108);
+ CASE_FIXED_FP(109);
+ CASE_FIXED_FP(110);
+ CASE_FIXED_FP(111);
+ CASE_FIXED_FP(112);
+ CASE_FIXED_FP(113);
+ CASE_FIXED_FP(114);
+ CASE_FIXED_FP(115);
+ CASE_FIXED_FP(116);
+ CASE_FIXED_FP(117);
+ CASE_FIXED_FP(118);
+ CASE_FIXED_FP(119);
+ CASE_FIXED_FP(120);
+ CASE_FIXED_FP(121);
+ CASE_FIXED_FP(122);
+ CASE_FIXED_FP(123);
+ CASE_FIXED_FP(124);
+ CASE_FIXED_FP(125);
+ CASE_FIXED_FP(126);
+ CASE_FIXED_FP(127);
+ }
+#undef CASE_FIXED_FP
+}
+
+void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+ struct kvm_pt_regs *regs)
+{
+ /* Take floating register rotation into consideration*/
+ if (regnum >= IA64_FIRST_ROTATING_FR)
+ regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+
+#define CASE_FIXED_FP(reg) \
+ case (reg) : \
+ ia64_ldf_fill(reg, fpval); \
+ break
+
+ switch (regnum) {
+ CASE_FIXED_FP(2);
+ CASE_FIXED_FP(3);
+ CASE_FIXED_FP(4);
+ CASE_FIXED_FP(5);
+
+ CASE_FIXED_FP(6);
+ CASE_FIXED_FP(7);
+ CASE_FIXED_FP(8);
+ CASE_FIXED_FP(9);
+ CASE_FIXED_FP(10);
+ CASE_FIXED_FP(11);
+
+ CASE_FIXED_FP(12);
+ CASE_FIXED_FP(13);
+ CASE_FIXED_FP(14);
+ CASE_FIXED_FP(15);
+ CASE_FIXED_FP(16);
+ CASE_FIXED_FP(17);
+ CASE_FIXED_FP(18);
+ CASE_FIXED_FP(19);
+ CASE_FIXED_FP(20);
+ CASE_FIXED_FP(21);
+ CASE_FIXED_FP(22);
+ CASE_FIXED_FP(23);
+ CASE_FIXED_FP(24);
+ CASE_FIXED_FP(25);
+ CASE_FIXED_FP(26);
+ CASE_FIXED_FP(27);
+ CASE_FIXED_FP(28);
+ CASE_FIXED_FP(29);
+ CASE_FIXED_FP(30);
+ CASE_FIXED_FP(31);
+ CASE_FIXED_FP(32);
+ CASE_FIXED_FP(33);
+ CASE_FIXED_FP(34);
+ CASE_FIXED_FP(35);
+ CASE_FIXED_FP(36);
+ CASE_FIXED_FP(37);
+ CASE_FIXED_FP(38);
+ CASE_FIXED_FP(39);
+ CASE_FIXED_FP(40);
+ CASE_FIXED_FP(41);
+ CASE_FIXED_FP(42);
+ CASE_FIXED_FP(43);
+ CASE_FIXED_FP(44);
+ CASE_FIXED_FP(45);
+ CASE_FIXED_FP(46);
+ CASE_FIXED_FP(47);
+ CASE_FIXED_FP(48);
+ CASE_FIXED_FP(49);
+ CASE_FIXED_FP(50);
+ CASE_FIXED_FP(51);
+ CASE_FIXED_FP(52);
+ CASE_FIXED_FP(53);
+ CASE_FIXED_FP(54);
+ CASE_FIXED_FP(55);
+ CASE_FIXED_FP(56);
+ CASE_FIXED_FP(57);
+ CASE_FIXED_FP(58);
+ CASE_FIXED_FP(59);
+ CASE_FIXED_FP(60);
+ CASE_FIXED_FP(61);
+ CASE_FIXED_FP(62);
+ CASE_FIXED_FP(63);
+ CASE_FIXED_FP(64);
+ CASE_FIXED_FP(65);
+ CASE_FIXED_FP(66);
+ CASE_FIXED_FP(67);
+ CASE_FIXED_FP(68);
+ CASE_FIXED_FP(69);
+ CASE_FIXED_FP(70);
+ CASE_FIXED_FP(71);
+ CASE_FIXED_FP(72);
+ CASE_FIXED_FP(73);
+ CASE_FIXED_FP(74);
+ CASE_FIXED_FP(75);
+ CASE_FIXED_FP(76);
+ CASE_FIXED_FP(77);
+ CASE_FIXED_FP(78);
+ CASE_FIXED_FP(79);
+ CASE_FIXED_FP(80);
+ CASE_FIXED_FP(81);
+ CASE_FIXED_FP(82);
+ CASE_FIXED_FP(83);
+ CASE_FIXED_FP(84);
+ CASE_FIXED_FP(85);
+ CASE_FIXED_FP(86);
+ CASE_FIXED_FP(87);
+ CASE_FIXED_FP(88);
+ CASE_FIXED_FP(89);
+ CASE_FIXED_FP(90);
+ CASE_FIXED_FP(91);
+ CASE_FIXED_FP(92);
+ CASE_FIXED_FP(93);
+ CASE_FIXED_FP(94);
+ CASE_FIXED_FP(95);
+ CASE_FIXED_FP(96);
+ CASE_FIXED_FP(97);
+ CASE_FIXED_FP(98);
+ CASE_FIXED_FP(99);
+ CASE_FIXED_FP(100);
+ CASE_FIXED_FP(101);
+ CASE_FIXED_FP(102);
+ CASE_FIXED_FP(103);
+ CASE_FIXED_FP(104);
+ CASE_FIXED_FP(105);
+ CASE_FIXED_FP(106);
+ CASE_FIXED_FP(107);
+ CASE_FIXED_FP(108);
+ CASE_FIXED_FP(109);
+ CASE_FIXED_FP(110);
+ CASE_FIXED_FP(111);
+ CASE_FIXED_FP(112);
+ CASE_FIXED_FP(113);
+ CASE_FIXED_FP(114);
+ CASE_FIXED_FP(115);
+ CASE_FIXED_FP(116);
+ CASE_FIXED_FP(117);
+ CASE_FIXED_FP(118);
+ CASE_FIXED_FP(119);
+ CASE_FIXED_FP(120);
+ CASE_FIXED_FP(121);
+ CASE_FIXED_FP(122);
+ CASE_FIXED_FP(123);
+ CASE_FIXED_FP(124);
+ CASE_FIXED_FP(125);
+ CASE_FIXED_FP(126);
+ CASE_FIXED_FP(127);
+ }
+}
+
+void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+ struct ia64_fpreg *val)
+{
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+ getfpreg(reg, val, regs); /* FIXME: handle NATs later*/
+}
+
+void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+ struct ia64_fpreg *val)
+{
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+ if (reg > 1)
+ setfpreg(reg, val, regs); /* FIXME: handle NATs later*/
+}
+
+/************************************************************************
+ * lsapic timer
+ ***********************************************************************/
+u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
+{
+ unsigned long guest_itc;
+ guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC);
+
+ if (guest_itc >= VMX(vcpu, last_itc)) {
+ VMX(vcpu, last_itc) = guest_itc;
+ return guest_itc;
+ } else
+ return VMX(vcpu, last_itc);
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
+static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
+{
+ struct kvm_vcpu *v;
+ int i;
+ long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
+ unsigned long vitv = VCPU(vcpu, itv);
+
+ if (vcpu->vcpu_id == 0) {
+ for (i = 0; i < MAX_VCPU_NUM; i++) {
+ v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+ VMX(v, itc_offset) = itc_offset;
+ VMX(v, last_itc) = 0;
+ }
+ }
+ VMX(vcpu, last_itc) = 0;
+ if (VCPU(vcpu, itm) <= val) {
+ VMX(vcpu, itc_check) = 0;
+ vcpu_unpend_interrupt(vcpu, vitv);
+ } else {
+ VMX(vcpu, itc_check) = 1;
+ vcpu_set_itm(vcpu, VCPU(vcpu, itm));
+ }
+
+}
+
+static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, itm));
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
+{
+ unsigned long vitv = VCPU(vcpu, itv);
+ VCPU(vcpu, itm) = val;
+
+ if (val > vcpu_get_itc(vcpu)) {
+ VMX(vcpu, itc_check) = 1;
+ vcpu_unpend_interrupt(vcpu, vitv);
+ VMX(vcpu, timer_pending) = 0;
+ } else
+ VMX(vcpu, itc_check) = 0;
+}
+
+#define ITV_VECTOR(itv) (itv&0xff)
+#define ITV_IRQ_MASK(itv) (itv&(1<<16))
+
+static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, itv) = val;
+ if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
+ vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
+ vcpu->arch.timer_pending = 0;
+ }
+}
+
+static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
+{
+ int vec;
+
+ vec = highest_inservice_irq(vcpu);
+ if (vec == NULL_VECTOR)
+ return;
+ VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
+ VCPU(vcpu, eoi) = 0;
+ vcpu->arch.irq_new_pending = 1;
+
+}
+
+/* See Table 5-8 in SDM vol2 for the definition */
+int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
+{
+ union ia64_tpr vtpr;
+
+ vtpr.val = VCPU(vcpu, tpr);
+
+ if (h_inservice == NMI_VECTOR)
+ return IRQ_MASKED_BY_INSVC;
+
+ if (h_pending == NMI_VECTOR) {
+ /* Non Maskable Interrupt */
+ return IRQ_NO_MASKED;
+ }
+
+ if (h_inservice == ExtINT_VECTOR)
+ return IRQ_MASKED_BY_INSVC;
+
+ if (h_pending == ExtINT_VECTOR) {
+ if (vtpr.mmi) {
+ /* mask all external IRQ */
+ return IRQ_MASKED_BY_VTPR;
+ } else
+ return IRQ_NO_MASKED;
+ }
+
+ if (is_higher_irq(h_pending, h_inservice)) {
+ if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
+ return IRQ_NO_MASKED;
+ else
+ return IRQ_MASKED_BY_VTPR;
+ } else {
+ return IRQ_MASKED_BY_INSVC;
+ }
+}
+
+void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+ long spsr;
+ int ret;
+
+ local_irq_save(spsr);
+ ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
+ local_irq_restore(spsr);
+
+ vcpu->arch.irq_new_pending = 1;
+}
+
+void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+ long spsr;
+ int ret;
+
+ local_irq_save(spsr);
+ ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
+ local_irq_restore(spsr);
+ if (ret) {
+ vcpu->arch.irq_new_pending = 1;
+ wmb();
+ }
+}
+
+void update_vhpi(struct kvm_vcpu *vcpu, int vec)
+{
+ u64 vhpi;
+
+ if (vec == NULL_VECTOR)
+ vhpi = 0;
+ else if (vec == NMI_VECTOR)
+ vhpi = 32;
+ else if (vec == ExtINT_VECTOR)
+ vhpi = 16;
+ else
+ vhpi = vec >> 4;
+
+ VCPU(vcpu, vhpi) = vhpi;
+ if (VCPU(vcpu, vac).a_int)
+ ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
+ (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
+}
+
+u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
+{
+ int vec, h_inservice, mask;
+
+ vec = highest_pending_irq(vcpu);
+ h_inservice = highest_inservice_irq(vcpu);
+ mask = irq_masked(vcpu, vec, h_inservice);
+ if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
+ if (VCPU(vcpu, vhpi))
+ update_vhpi(vcpu, NULL_VECTOR);
+ return IA64_SPURIOUS_INT_VECTOR;
+ }
+ if (mask == IRQ_MASKED_BY_VTPR) {
+ update_vhpi(vcpu, vec);
+ return IA64_SPURIOUS_INT_VECTOR;
+ }
+ VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
+ vcpu_unpend_interrupt(vcpu, vec);
+ return (u64)vec;
+}
+
+/**************************************************************************
+ Privileged operation emulation routines
+ **************************************************************************/
+u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ union ia64_pta vpta;
+ union ia64_rr vrr;
+ u64 pval;
+ u64 vhpt_offset;
+
+ vpta.val = vcpu_get_pta(vcpu);
+ vrr.val = vcpu_get_rr(vcpu, vadr);
+ vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
+ if (vpta.vf) {
+ pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
+ vpta.val, 0, 0, 0, 0);
+ } else {
+ pval = (vadr & VRN_MASK) | vhpt_offset |
+ (vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
+ }
+ return pval;
+}
+
+u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ union ia64_rr vrr;
+ union ia64_pta vpta;
+ u64 pval;
+
+ vpta.val = vcpu_get_pta(vcpu);
+ vrr.val = vcpu_get_rr(vcpu, vadr);
+ if (vpta.vf) {
+ pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
+ 0, 0, 0, 0, 0);
+ } else
+ pval = 1;
+
+ return pval;
+}
+
+u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
+{
+ struct thash_data *data;
+ union ia64_pta vpta;
+ u64 key;
+
+ vpta.val = vcpu_get_pta(vcpu);
+ if (vpta.vf == 0) {
+ key = 1;
+ return key;
+ }
+ data = vtlb_lookup(vcpu, vadr, D_TLB);
+ if (!data || !data->p)
+ key = 1;
+ else
+ key = data->key;
+
+ return key;
+}
+
+
+
+void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long thash, vadr;
+
+ vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+ thash = vcpu_thash(vcpu, vadr);
+ vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
+}
+
+
+void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long tag, vadr;
+
+ vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+ tag = vcpu_ttag(vcpu, vadr);
+ vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
+}
+
+int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
+{
+ struct thash_data *data;
+ union ia64_isr visr, pt_isr;
+ struct kvm_pt_regs *regs;
+ struct ia64_psr vpsr;
+
+ regs = vcpu_regs(vcpu);
+ pt_isr.val = VMX(vcpu, cr_isr);
+ visr.val = 0;
+ visr.ei = pt_isr.ei;
+ visr.ir = pt_isr.ir;
+ vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+ visr.na = 1;
+
+ data = vhpt_lookup(vadr);
+ if (data) {
+ if (data->p == 0) {
+ vcpu_set_isr(vcpu, visr.val);
+ data_page_not_present(vcpu, vadr);
+ return IA64_FAULT;
+ } else if (data->ma == VA_MATTR_NATPAGE) {
+ vcpu_set_isr(vcpu, visr.val);
+ dnat_page_consumption(vcpu, vadr);
+ return IA64_FAULT;
+ } else {
+ *padr = (data->gpaddr >> data->ps << data->ps) |
+ (vadr & (PSIZE(data->ps) - 1));
+ return IA64_NO_FAULT;
+ }
+ }
+
+ data = vtlb_lookup(vcpu, vadr, D_TLB);
+ if (data) {
+ if (data->p == 0) {
+ vcpu_set_isr(vcpu, visr.val);
+ data_page_not_present(vcpu, vadr);
+ return IA64_FAULT;
+ } else if (data->ma == VA_MATTR_NATPAGE) {
+ vcpu_set_isr(vcpu, visr.val);
+ dnat_page_consumption(vcpu, vadr);
+ return IA64_FAULT;
+ } else{
+ *padr = ((data->ppn >> (data->ps - 12)) << data->ps)
+ | (vadr & (PSIZE(data->ps) - 1));
+ return IA64_NO_FAULT;
+ }
+ }
+ if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
+ if (vpsr.ic) {
+ vcpu_set_isr(vcpu, visr.val);
+ alt_dtlb(vcpu, vadr);
+ return IA64_FAULT;
+ } else {
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ } else {
+ if (vpsr.ic) {
+ vcpu_set_isr(vcpu, visr.val);
+ dvhpt_fault(vcpu, vadr);
+ return IA64_FAULT;
+ } else{
+ nested_dtlb(vcpu);
+ return IA64_FAULT;
+ }
+ }
+
+ return IA64_NO_FAULT;
+}
+
+
+int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r1, r3;
+
+ r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+
+ if (vcpu_tpa(vcpu, r3, &r1))
+ return IA64_FAULT;
+
+ vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+ return(IA64_NO_FAULT);
+}
+
+void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r1, r3;
+
+ r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+ r1 = vcpu_tak(vcpu, r3);
+ vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+}
+
+
+/************************************
+ * Insert/Purge translation register/cache
+ ************************************/
+void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+ thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
+}
+
+void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+ thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
+}
+
+void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+ u64 ps, va, rid;
+ struct thash_data *p_itr;
+
+ ps = itir_ps(itir);
+ va = PAGEALIGN(ifa, ps);
+ pte &= ~PAGE_FLAGS_RV_MASK;
+ rid = vcpu_get_rr(vcpu, ifa);
+ rid = rid & RR_RID_MASK;
+ p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
+ vcpu_set_tr(p_itr, pte, itir, va, rid);
+ vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
+}
+
+
+void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+ u64 gpfn;
+ u64 ps, va, rid;
+ struct thash_data *p_dtr;
+
+ ps = itir_ps(itir);
+ va = PAGEALIGN(ifa, ps);
+ pte &= ~PAGE_FLAGS_RV_MASK;
+
+ if (ps != _PAGE_SIZE_16M)
+ thash_purge_entries(vcpu, va, ps);
+ gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
+ if (__gpfn_is_io(gpfn))
+ pte |= VTLB_PTE_IO;
+ rid = vcpu_get_rr(vcpu, va);
+ rid = rid & RR_RID_MASK;
+ p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
+ vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
+ pte, itir, va, rid);
+ vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
+}
+
+void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+ int index;
+ u64 va;
+
+ va = PAGEALIGN(ifa, ps);
+ while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
+ vcpu->arch.dtrs[index].page_flags = 0;
+
+ thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+ int index;
+ u64 va;
+
+ va = PAGEALIGN(ifa, ps);
+ while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
+ vcpu->arch.itrs[index].page_flags = 0;
+
+ thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+ va = PAGEALIGN(va, ps);
+ thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
+{
+ thash_purge_all(vcpu);
+}
+
+void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+ long psr;
+ local_irq_save(psr);
+ p->exit_reason = EXIT_REASON_PTC_G;
+
+ p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
+ p->u.ptc_g_data.vaddr = va;
+ p->u.ptc_g_data.ps = ps;
+ vmm_transition(vcpu);
+ /* Do Local Purge Here*/
+ vcpu_ptc_l(vcpu, va, ps);
+ local_irq_restore(psr);
+}
+
+
+void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+ vcpu_ptc_ga(vcpu, va, ps);
+}
+
+void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long ifa;
+
+ ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+ vcpu_ptc_e(vcpu, ifa);
+}
+
+void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long ifa, itir;
+
+ ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+ itir = vcpu_get_gr(vcpu, inst.M45.r2);
+ vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long ifa, itir;
+
+ ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+ itir = vcpu_get_gr(vcpu, inst.M45.r2);
+ vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long ifa, itir;
+
+ ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+ itir = vcpu_get_gr(vcpu, inst.M45.r2);
+ vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long ifa, itir;
+
+ ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+ itir = vcpu_get_gr(vcpu, inst.M45.r2);
+ vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long ifa, itir;
+
+ ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+ itir = vcpu_get_gr(vcpu, inst.M45.r2);
+ vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long itir, ifa, pte, slot;
+
+ slot = vcpu_get_gr(vcpu, inst.M45.r3);
+ pte = vcpu_get_gr(vcpu, inst.M45.r2);
+ itir = vcpu_get_itir(vcpu);
+ ifa = vcpu_get_ifa(vcpu);
+ vcpu_itr_d(vcpu, slot, pte, itir, ifa);
+}
+
+
+
+void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long itir, ifa, pte, slot;
+
+ slot = vcpu_get_gr(vcpu, inst.M45.r3);
+ pte = vcpu_get_gr(vcpu, inst.M45.r2);
+ itir = vcpu_get_itir(vcpu);
+ ifa = vcpu_get_ifa(vcpu);
+ vcpu_itr_i(vcpu, slot, pte, itir, ifa);
+}
+
+void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long itir, ifa, pte;
+
+ itir = vcpu_get_itir(vcpu);
+ ifa = vcpu_get_ifa(vcpu);
+ pte = vcpu_get_gr(vcpu, inst.M45.r2);
+ vcpu_itc_d(vcpu, pte, itir, ifa);
+}
+
+void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long itir, ifa, pte;
+
+ itir = vcpu_get_itir(vcpu);
+ ifa = vcpu_get_ifa(vcpu);
+ pte = vcpu_get_gr(vcpu, inst.M45.r2);
+ vcpu_itc_i(vcpu, pte, itir, ifa);
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+ *************************************/
+
+void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long imm;
+
+ if (inst.M30.s)
+ imm = -inst.M30.imm;
+ else
+ imm = inst.M30.imm;
+
+ vcpu_set_itc(vcpu, imm);
+}
+
+void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r2;
+
+ r2 = vcpu_get_gr(vcpu, inst.M29.r2);
+ vcpu_set_itc(vcpu, r2);
+}
+
+
+void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r1;
+
+ r1 = vcpu_get_itc(vcpu);
+ vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
+}
+/**************************************************************************
+ struct kvm_vcpu*protection key register access routines
+ **************************************************************************/
+
+unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+ return ((unsigned long)ia64_get_pkr(reg));
+}
+
+void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
+{
+ ia64_set_pkr(reg, val);
+}
+
+
+unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
+{
+ union ia64_rr rr, rr1;
+
+ rr.val = vcpu_get_rr(vcpu, ifa);
+ rr1.val = 0;
+ rr1.ps = rr.ps;
+ rr1.rid = rr.rid;
+ return (rr1.val);
+}
+
+
+
+/********************************
+ * Moves to privileged registers
+ ********************************/
+unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
+ unsigned long val)
+{
+ union ia64_rr oldrr, newrr;
+ unsigned long rrval;
+ struct exit_ctl_data *p = &vcpu->arch.exit_data;
+ unsigned long psr;
+
+ oldrr.val = vcpu_get_rr(vcpu, reg);
+ newrr.val = val;
+ vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
+
+ switch ((unsigned long)(reg >> VRN_SHIFT)) {
+ case VRN6:
+ vcpu->arch.vmm_rr = vrrtomrr(val);
+ local_irq_save(psr);
+ p->exit_reason = EXIT_REASON_SWITCH_RR6;
+ vmm_transition(vcpu);
+ local_irq_restore(psr);
+ break;
+ case VRN4:
+ rrval = vrrtomrr(val);
+ vcpu->arch.metaphysical_saved_rr4 = rrval;
+ if (!is_physical_mode(vcpu))
+ ia64_set_rr(reg, rrval);
+ break;
+ case VRN0:
+ rrval = vrrtomrr(val);
+ vcpu->arch.metaphysical_saved_rr0 = rrval;
+ if (!is_physical_mode(vcpu))
+ ia64_set_rr(reg, rrval);
+ break;
+ default:
+ ia64_set_rr(reg, vrrtomrr(val));
+ break;
+ }
+
+ return (IA64_NO_FAULT);
+}
+
+
+
+void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r2;
+
+ r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+ r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+ vcpu_set_rr(vcpu, r3, r2);
+}
+
+void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r2;
+
+ r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+ r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+ vcpu_set_pmc(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r2;
+
+ r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+ r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+ vcpu_set_pmd(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ u64 r3, r2;
+
+ r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+ r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+ vcpu_set_pkr(vcpu, r3, r2);
+}
+
+
+
+void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r1;
+
+ r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+ r1 = vcpu_get_rr(vcpu, r3);
+ vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r1;
+
+ r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+ r1 = vcpu_get_pkr(vcpu, r3);
+ vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r1;
+
+ r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+ r1 = vcpu_get_dbr(vcpu, r3);
+ vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r1;
+
+ r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+ r1 = vcpu_get_ibr(vcpu, r3);
+ vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r1;
+
+ r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+ r1 = vcpu_get_pmc(vcpu, r3);
+ vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+
+unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+ /* FIXME: This could get called as a result of a rsvd-reg fault */
+ if (reg > (ia64_get_cpuid(3) & 0xff))
+ return 0;
+ else
+ return ia64_get_cpuid(reg);
+}
+
+void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r3, r1;
+
+ r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+ r1 = vcpu_get_cpuid(vcpu, r3);
+ vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ VCPU(vcpu, tpr) = val;
+ vcpu->arch.irq_check = 1;
+}
+
+unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long r2;
+
+ r2 = vcpu_get_gr(vcpu, inst.M32.r2);
+ VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
+
+ switch (inst.M32.cr3) {
+ case 0:
+ vcpu_set_dcr(vcpu, r2);
+ break;
+ case 1:
+ vcpu_set_itm(vcpu, r2);
+ break;
+ case 66:
+ vcpu_set_tpr(vcpu, r2);
+ break;
+ case 67:
+ vcpu_set_eoi(vcpu, r2);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long tgt = inst.M33.r1;
+ unsigned long val;
+
+ switch (inst.M33.cr3) {
+ case 65:
+ val = vcpu_get_ivr(vcpu);
+ vcpu_set_gr(vcpu, tgt, val, 0);
+ break;
+
+ case 67:
+ vcpu_set_gr(vcpu, tgt, 0L, 0);
+ break;
+ default:
+ val = VCPU(vcpu, vcr[inst.M33.cr3]);
+ vcpu_set_gr(vcpu, tgt, val, 0);
+ break;
+ }
+
+ return 0;
+}
+
+
+
+void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+
+ unsigned long mask;
+ struct kvm_pt_regs *regs;
+ struct ia64_psr old_psr, new_psr;
+
+ old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+ regs = vcpu_regs(vcpu);
+ /* We only support guest as:
+ * vpsr.pk = 0
+ * vpsr.is = 0
+ * Otherwise panic
+ */
+ if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
+ panic_vm(vcpu);
+
+ /*
+ * For those IA64_PSR bits: id/da/dd/ss/ed/ia
+ * Since these bits will become 0, after success execution of each
+ * instruction, we will change set them to mIA64_PSR
+ */
+ VCPU(vcpu, vpsr) = val
+ & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
+ IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
+
+ if (!old_psr.i && (val & IA64_PSR_I)) {
+ /* vpsr.i 0->1 */
+ vcpu->arch.irq_check = 1;
+ }
+ new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+ /*
+ * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+ * , except for the following bits:
+ * ic/i/dt/si/rt/mc/it/bn/vm
+ */
+ mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
+ IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
+ IA64_PSR_VM;
+
+ regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
+
+ check_mm_mode_switch(vcpu, old_psr, new_psr);
+
+ return ;
+}
+
+unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
+{
+ struct ia64_psr vpsr;
+
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+ vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+ if (!vpsr.ic)
+ VCPU(vcpu, ifs) = regs->cr_ifs;
+ regs->cr_ifs = IA64_IFS_V;
+ return (IA64_NO_FAULT);
+}
+
+
+
+/**************************************************************************
+ VCPU banked general register access routines
+ **************************************************************************/
+#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
+ do { \
+ __asm__ __volatile__ ( \
+ ";;extr.u %0 = %3,%6,16;;\n" \
+ "dep %1 = %0, %1, 0, 16;;\n" \
+ "st8 [%4] = %1\n" \
+ "extr.u %0 = %2, 16, 16;;\n" \
+ "dep %3 = %0, %3, %6, 16;;\n" \
+ "st8 [%5] = %3\n" \
+ ::"r"(i), "r"(*b1unat), "r"(*b0unat), \
+ "r"(*runat), "r"(b1unat), "r"(runat), \
+ "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \
+ } while (0)
+
+void vcpu_bsw0(struct kvm_vcpu *vcpu)
+{
+ unsigned long i;
+
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+ unsigned long *r = &regs->r16;
+ unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+ unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+ unsigned long *runat = &regs->eml_unat;
+ unsigned long *b0unat = &VCPU(vcpu, vbnat);
+ unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+
+ if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
+ for (i = 0; i < 16; i++) {
+ *b1++ = *r;
+ *r++ = *b0++;
+ }
+ vcpu_bsw0_unat(i, b0unat, b1unat, runat,
+ VMM_PT_REGS_R16_SLOT);
+ VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
+ }
+}
+
+#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \
+ do { \
+ __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \
+ "dep %1 = %0, %1, 16, 16;;\n" \
+ "st8 [%4] = %1\n" \
+ "extr.u %0 = %2, 0, 16;;\n" \
+ "dep %3 = %0, %3, %6, 16;;\n" \
+ "st8 [%5] = %3\n" \
+ ::"r"(i), "r"(*b0unat), "r"(*b1unat), \
+ "r"(*runat), "r"(b0unat), "r"(runat), \
+ "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \
+ } while (0)
+
+void vcpu_bsw1(struct kvm_vcpu *vcpu)
+{
+ unsigned long i;
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+ unsigned long *r = &regs->r16;
+ unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+ unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+ unsigned long *runat = &regs->eml_unat;
+ unsigned long *b0unat = &VCPU(vcpu, vbnat);
+ unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+ if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
+ for (i = 0; i < 16; i++) {
+ *b0++ = *r;
+ *r++ = *b1++;
+ }
+ vcpu_bsw1_unat(i, b0unat, b1unat, runat,
+ VMM_PT_REGS_R16_SLOT);
+ VCPU(vcpu, vpsr) |= IA64_PSR_BN;
+ }
+}
+
+
+
+
+void vcpu_rfi(struct kvm_vcpu *vcpu)
+{
+ unsigned long ifs, psr;
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+ psr = VCPU(vcpu, ipsr);
+ if (psr & IA64_PSR_BN)
+ vcpu_bsw1(vcpu);
+ else
+ vcpu_bsw0(vcpu);
+ vcpu_set_psr(vcpu, psr);
+ ifs = VCPU(vcpu, ifs);
+ if (ifs >> 63)
+ regs->cr_ifs = ifs;
+ regs->cr_iip = VCPU(vcpu, iip);
+}
+
+
+/*
+ VPSR can't keep track of below bits of guest PSR
+ This function gets guest PSR
+ */
+
+unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
+{
+ unsigned long mask;
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+ mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
+ IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
+ return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
+}
+
+void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long vpsr;
+ unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
+ | inst.M44.imm;
+
+ vpsr = vcpu_get_psr(vcpu);
+ vpsr &= (~imm24);
+ vcpu_set_psr(vcpu, vpsr);
+}
+
+void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long vpsr;
+ unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
+ | inst.M44.imm;
+
+ vpsr = vcpu_get_psr(vcpu);
+ vpsr |= imm24;
+ vcpu_set_psr(vcpu, vpsr);
+}
+
+/* Generate Mask
+ * Parameter:
+ * bit -- starting bit
+ * len -- how many bits
+ */
+#define MASK(bit,len) \
+({ \
+ __u64 ret; \
+ \
+ __asm __volatile("dep %0=-1, r0, %1, %2"\
+ : "=r" (ret): \
+ "M" (bit), \
+ "M" (len)); \
+ ret; \
+})
+
+void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
+ vcpu_set_psr(vcpu, val);
+}
+
+void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long val;
+
+ val = vcpu_get_gr(vcpu, inst.M35.r2);
+ vcpu_set_psr_l(vcpu, val);
+}
+
+void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+ unsigned long val;
+
+ val = vcpu_get_psr(vcpu);
+ val = (val & MASK(0, 32)) | (val & MASK(35, 2));
+ vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
+}
+
+void vcpu_increment_iip(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+ struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+ if (ipsr->ri == 2) {
+ ipsr->ri = 0;
+ regs->cr_iip += 16;
+ } else
+ ipsr->ri++;
+}
+
+void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+ struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+
+ if (ipsr->ri == 0) {
+ ipsr->ri = 2;
+ regs->cr_iip -= 16;
+ } else
+ ipsr->ri--;
+}
+
+/** Emulate a privileged operation.
+ *
+ *
+ * @param vcpu virtual cpu
+ * @cause the reason cause virtualization fault
+ * @opcode the instruction code which cause virtualization fault
+ */
+
+void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
+{
+ unsigned long status, cause, opcode ;
+ INST64 inst;
+
+ status = IA64_NO_FAULT;
+ cause = VMX(vcpu, cause);
+ opcode = VMX(vcpu, opcode);
+ inst.inst = opcode;
+ /*
+ * Switch to actual virtual rid in rr0 and rr4,
+ * which is required by some tlb related instructions.
+ */
+ prepare_if_physical_mode(vcpu);
+
+ switch (cause) {
+ case EVENT_RSM:
+ kvm_rsm(vcpu, inst);
+ break;
+ case EVENT_SSM:
+ kvm_ssm(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PSR:
+ kvm_mov_to_psr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PSR:
+ kvm_mov_from_psr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_CR:
+ kvm_mov_from_cr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_CR:
+ kvm_mov_to_cr(vcpu, inst);
+ break;
+ case EVENT_BSW_0:
+ vcpu_bsw0(vcpu);
+ break;
+ case EVENT_BSW_1:
+ vcpu_bsw1(vcpu);
+ break;
+ case EVENT_COVER:
+ vcpu_cover(vcpu);
+ break;
+ case EVENT_RFI:
+ vcpu_rfi(vcpu);
+ break;
+ case EVENT_ITR_D:
+ kvm_itr_d(vcpu, inst);
+ break;
+ case EVENT_ITR_I:
+ kvm_itr_i(vcpu, inst);
+ break;
+ case EVENT_PTR_D:
+ kvm_ptr_d(vcpu, inst);
+ break;
+ case EVENT_PTR_I:
+ kvm_ptr_i(vcpu, inst);
+ break;
+ case EVENT_ITC_D:
+ kvm_itc_d(vcpu, inst);
+ break;
+ case EVENT_ITC_I:
+ kvm_itc_i(vcpu, inst);
+ break;
+ case EVENT_PTC_L:
+ kvm_ptc_l(vcpu, inst);
+ break;
+ case EVENT_PTC_G:
+ kvm_ptc_g(vcpu, inst);
+ break;
+ case EVENT_PTC_GA:
+ kvm_ptc_ga(vcpu, inst);
+ break;
+ case EVENT_PTC_E:
+ kvm_ptc_e(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_RR:
+ kvm_mov_to_rr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_RR:
+ kvm_mov_from_rr(vcpu, inst);
+ break;
+ case EVENT_THASH:
+ kvm_thash(vcpu, inst);
+ break;
+ case EVENT_TTAG:
+ kvm_ttag(vcpu, inst);
+ break;
+ case EVENT_TPA:
+ status = kvm_tpa(vcpu, inst);
+ break;
+ case EVENT_TAK:
+ kvm_tak(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_AR_IMM:
+ kvm_mov_to_ar_imm(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_AR:
+ kvm_mov_to_ar_reg(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_AR:
+ kvm_mov_from_ar_reg(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_DBR:
+ kvm_mov_to_dbr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_IBR:
+ kvm_mov_to_ibr(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PMC:
+ kvm_mov_to_pmc(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PMD:
+ kvm_mov_to_pmd(vcpu, inst);
+ break;
+ case EVENT_MOV_TO_PKR:
+ kvm_mov_to_pkr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_DBR:
+ kvm_mov_from_dbr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_IBR:
+ kvm_mov_from_ibr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PMC:
+ kvm_mov_from_pmc(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_PKR:
+ kvm_mov_from_pkr(vcpu, inst);
+ break;
+ case EVENT_MOV_FROM_CPUID:
+ kvm_mov_from_cpuid(vcpu, inst);
+ break;
+ case EVENT_VMSW:
+ status = IA64_FAULT;
+ break;
+ default:
+ break;
+ };
+ /*Assume all status is NO_FAULT ?*/
+ if (status == IA64_NO_FAULT && cause != EVENT_RFI)
+ vcpu_increment_iip(vcpu);
+
+ recover_if_physical_mode(vcpu);
+}
+
+void init_vcpu(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ vcpu->arch.mode_flags = GUEST_IN_PHY;
+ VMX(vcpu, vrr[0]) = 0x38;
+ VMX(vcpu, vrr[1]) = 0x38;
+ VMX(vcpu, vrr[2]) = 0x38;
+ VMX(vcpu, vrr[3]) = 0x38;
+ VMX(vcpu, vrr[4]) = 0x38;
+ VMX(vcpu, vrr[5]) = 0x38;
+ VMX(vcpu, vrr[6]) = 0x38;
+ VMX(vcpu, vrr[7]) = 0x38;
+ VCPU(vcpu, vpsr) = IA64_PSR_BN;
+ VCPU(vcpu, dcr) = 0;
+ /* pta.size must not be 0. The minimum is 15 (32k) */
+ VCPU(vcpu, pta) = 15 << 2;
+ VCPU(vcpu, itv) = 0x10000;
+ VCPU(vcpu, itm) = 0;
+ VMX(vcpu, last_itc) = 0;
+
+ VCPU(vcpu, lid) = VCPU_LID(vcpu);
+ VCPU(vcpu, ivr) = 0;
+ VCPU(vcpu, tpr) = 0x10000;
+ VCPU(vcpu, eoi) = 0;
+ VCPU(vcpu, irr[0]) = 0;
+ VCPU(vcpu, irr[1]) = 0;
+ VCPU(vcpu, irr[2]) = 0;
+ VCPU(vcpu, irr[3]) = 0;
+ VCPU(vcpu, pmv) = 0x10000;
+ VCPU(vcpu, cmcv) = 0x10000;
+ VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */
+ VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */
+ update_vhpi(vcpu, NULL_VECTOR);
+ VLSAPIC_XTP(vcpu) = 0x80; /* disabled */
+
+ for (i = 0; i < 4; i++)
+ VLSAPIC_INSVC(vcpu, i) = 0;
+}
+
+void kvm_init_all_rr(struct kvm_vcpu *vcpu)
+{
+ unsigned long psr;
+
+ local_irq_save(psr);
+
+ /* WARNING: not allow co-exist of both virtual mode and physical
+ * mode in same region
+ */
+
+ vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
+ vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
+
+ if (is_physical_mode(vcpu)) {
+ if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
+ panic_vm(vcpu);
+
+ ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
+ ia64_dv_serialize_data();
+ ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
+ ia64_dv_serialize_data();
+ } else {
+ ia64_set_rr((VRN0 << VRN_SHIFT),
+ vcpu->arch.metaphysical_saved_rr0);
+ ia64_dv_serialize_data();
+ ia64_set_rr((VRN4 << VRN_SHIFT),
+ vcpu->arch.metaphysical_saved_rr4);
+ ia64_dv_serialize_data();
+ }
+ ia64_set_rr((VRN1 << VRN_SHIFT),
+ vrrtomrr(VMX(vcpu, vrr[VRN1])));
+ ia64_dv_serialize_data();
+ ia64_set_rr((VRN2 << VRN_SHIFT),
+ vrrtomrr(VMX(vcpu, vrr[VRN2])));
+ ia64_dv_serialize_data();
+ ia64_set_rr((VRN3 << VRN_SHIFT),
+ vrrtomrr(VMX(vcpu, vrr[VRN3])));
+ ia64_dv_serialize_data();
+ ia64_set_rr((VRN5 << VRN_SHIFT),
+ vrrtomrr(VMX(vcpu, vrr[VRN5])));
+ ia64_dv_serialize_data();
+ ia64_set_rr((VRN7 << VRN_SHIFT),
+ vrrtomrr(VMX(vcpu, vrr[VRN7])));
+ ia64_dv_serialize_data();
+ ia64_srlz_d();
+ ia64_set_psr(psr);
+}
+
+int vmm_entry(void)
+{
+ struct kvm_vcpu *v;
+ v = current_vcpu;
+
+ ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
+ 0, 0, 0, 0, 0, 0);
+ kvm_init_vtlb(v);
+ kvm_init_vhpt(v);
+ init_vcpu(v);
+ kvm_init_all_rr(v);
+ vmm_reset_entry();
+
+ return 0;
+}
+
+void panic_vm(struct kvm_vcpu *v)
+{
+ struct exit_ctl_data *p = &v->arch.exit_data;
+
+ p->exit_reason = EXIT_REASON_VM_PANIC;
+ vmm_transition(v);
+ /*Never to return*/
+ while (1);
+}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
new file mode 100644
index 000000000000..b0fcfb62c49e
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.h
@@ -0,0 +1,740 @@
+/*
+ * vcpu.h: vcpu routines
+ * Copyright (c) 2005, Intel Corporation.
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ * Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#ifndef __KVM_VCPU_H__
+#define __KVM_VCPU_H__
+
+#include <asm/types.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+
+#ifndef __ASSEMBLY__
+#include "vti.h"
+
+#include <linux/kvm_host.h>
+#include <linux/spinlock.h>
+
+typedef unsigned long IA64_INST;
+
+typedef union U_IA64_BUNDLE {
+ unsigned long i64[2];
+ struct { unsigned long template:5, slot0:41, slot1a:18,
+ slot1b:23, slot2:41; };
+ /* NOTE: following doesn't work because bitfields can't cross natural
+ size boundaries
+ struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
+} IA64_BUNDLE;
+
+typedef union U_INST64_A5 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
+ imm9d:9, s:1, major:4; };
+} INST64_A5;
+
+typedef union U_INST64_B4 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
+ wh:2, d:1, un1:1, major:4; };
+} INST64_B4;
+
+typedef union U_INST64_B8 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
+} INST64_B8;
+
+typedef union U_INST64_B9 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
+} INST64_B9;
+
+typedef union U_INST64_I19 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
+} INST64_I19;
+
+typedef union U_INST64_I26 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I26;
+
+typedef union U_INST64_I27 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
+} INST64_I27;
+
+typedef union U_INST64_I28 { /* not privileged (mov from AR) */
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I28;
+
+typedef union U_INST64_M28 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M28;
+
+typedef union U_INST64_M29 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M29;
+
+typedef union U_INST64_M30 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
+ x3:3, s:1, major:4; };
+} INST64_M30;
+
+typedef union U_INST64_M31 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M31;
+
+typedef union U_INST64_M32 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M32;
+
+typedef union U_INST64_M33 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M33;
+
+typedef union U_INST64_M35 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+
+} INST64_M35;
+
+typedef union U_INST64_M36 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
+} INST64_M36;
+
+typedef union U_INST64_M37 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
+ i:1, major:4; };
+} INST64_M37;
+
+typedef union U_INST64_M41 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+} INST64_M41;
+
+typedef union U_INST64_M42 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M42;
+
+typedef union U_INST64_M43 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M43;
+
+typedef union U_INST64_M44 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
+} INST64_M44;
+
+typedef union U_INST64_M45 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M45;
+
+typedef union U_INST64_M46 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
+ x3:3, un1:1, major:4; };
+} INST64_M46;
+
+typedef union U_INST64_M47 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
+} INST64_M47;
+
+typedef union U_INST64_M1{
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
+ x6:6, m:1, major:4; };
+} INST64_M1;
+
+typedef union U_INST64_M2{
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
+ x6:6, m:1, major:4; };
+} INST64_M2;
+
+typedef union U_INST64_M3{
+ IA64_INST inst;
+ struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
+ x6:6, s:1, major:4; };
+} INST64_M3;
+
+typedef union U_INST64_M4 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
+ x6:6, m:1, major:4; };
+} INST64_M4;
+
+typedef union U_INST64_M5 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
+ x6:6, s:1, major:4; };
+} INST64_M5;
+
+typedef union U_INST64_M6 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
+ x6:6, m:1, major:4; };
+} INST64_M6;
+
+typedef union U_INST64_M9 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
+ x6:6, m:1, major:4; };
+} INST64_M9;
+
+typedef union U_INST64_M10 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
+ x6:6, s:1, major:4; };
+} INST64_M10;
+
+typedef union U_INST64_M12 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
+ x6:6, m:1, major:4; };
+} INST64_M12;
+
+typedef union U_INST64_M15 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
+ x6:6, s:1, major:4; };
+} INST64_M15;
+
+typedef union U_INST64 {
+ IA64_INST inst;
+ struct { unsigned long :37, major:4; } generic;
+ INST64_A5 A5; /* used in build_hypercall_bundle only */
+ INST64_B4 B4; /* used in build_hypercall_bundle only */
+ INST64_B8 B8; /* rfi, bsw.[01] */
+ INST64_B9 B9; /* break.b */
+ INST64_I19 I19; /* used in build_hypercall_bundle only */
+ INST64_I26 I26; /* mov register to ar (I unit) */
+ INST64_I27 I27; /* mov immediate to ar (I unit) */
+ INST64_I28 I28; /* mov from ar (I unit) */
+ INST64_M1 M1; /* ld integer */
+ INST64_M2 M2;
+ INST64_M3 M3;
+ INST64_M4 M4; /* st integer */
+ INST64_M5 M5;
+ INST64_M6 M6; /* ldfd floating pointer */
+ INST64_M9 M9; /* stfd floating pointer */
+ INST64_M10 M10; /* stfd floating pointer */
+ INST64_M12 M12; /* ldfd pair floating pointer */
+ INST64_M15 M15; /* lfetch + imm update */
+ INST64_M28 M28; /* purge translation cache entry */
+ INST64_M29 M29; /* mov register to ar (M unit) */
+ INST64_M30 M30; /* mov immediate to ar (M unit) */
+ INST64_M31 M31; /* mov from ar (M unit) */
+ INST64_M32 M32; /* mov reg to cr */
+ INST64_M33 M33; /* mov from cr */
+ INST64_M35 M35; /* mov to psr */
+ INST64_M36 M36; /* mov from psr */
+ INST64_M37 M37; /* break.m */
+ INST64_M41 M41; /* translation cache insert */
+ INST64_M42 M42; /* mov to indirect reg/translation reg insert*/
+ INST64_M43 M43; /* mov from indirect reg */
+ INST64_M44 M44; /* set/reset system mask */
+ INST64_M45 M45; /* translation purge */
+ INST64_M46 M46; /* translation access (tpa,tak) */
+ INST64_M47 M47; /* purge translation entry */
+} INST64;
+
+#define MASK_41 ((unsigned long)0x1ffffffffff)
+
+/* Virtual address memory attributes encoding */
+#define VA_MATTR_WB 0x0
+#define VA_MATTR_UC 0x4
+#define VA_MATTR_UCE 0x5
+#define VA_MATTR_WC 0x6
+#define VA_MATTR_NATPAGE 0x7
+
+#define PMASK(size) (~((size) - 1))
+#define PSIZE(size) (1UL<<(size))
+#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits))
+#define PAGEALIGN(va, ps) CLEARLSB(va, ps)
+#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
+#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */
+
+#define ARCH_PAGE_SHIFT 12
+
+#define INVALID_TI_TAG (1UL << 63)
+
+#define VTLB_PTE_P_BIT 0
+#define VTLB_PTE_IO_BIT 60
+#define VTLB_PTE_IO (1UL<<VTLB_PTE_IO_BIT)
+#define VTLB_PTE_P (1UL<<VTLB_PTE_P_BIT)
+
+#define vcpu_quick_region_check(_tr_regions,_ifa) \
+ (_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
+
+#define vcpu_quick_region_set(_tr_regions,_ifa) \
+ do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
+
+static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
+ u64 va, u64 rid)
+{
+ trp->page_flags = pte;
+ trp->itir = itir;
+ trp->vadr = va;
+ trp->rid = rid;
+}
+
+extern u64 kvm_lookup_mpa(u64 gpfn);
+extern u64 kvm_gpa_to_mpa(u64 gpa);
+
+/* Return I/O type if trye */
+#define __gpfn_is_io(gpfn) \
+ ({ \
+ u64 pte, ret = 0; \
+ pte = kvm_lookup_mpa(gpfn); \
+ if (!(pte & GPFN_INV_MASK)) \
+ ret = pte & GPFN_IO_MASK; \
+ ret; \
+ })
+
+#endif
+
+#define IA64_NO_FAULT 0
+#define IA64_FAULT 1
+
+#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15)
+
+#define SW_BAD 0 /* Bad mode transitition */
+#define SW_V2P 1 /* Physical emulatino is activated */
+#define SW_P2V 2 /* Exit physical mode emulation */
+#define SW_SELF 3 /* No mode transition */
+#define SW_NOP 4 /* Mode transition, but without action required */
+
+#define GUEST_IN_PHY 0x1
+#define GUEST_PHY_EMUL 0x2
+
+#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
+
+#define VRN_SHIFT 61
+#define VRN_MASK 0xe000000000000000
+#define VRN0 0x0UL
+#define VRN1 0x1UL
+#define VRN2 0x2UL
+#define VRN3 0x3UL
+#define VRN4 0x4UL
+#define VRN5 0x5UL
+#define VRN6 0x6UL
+#define VRN7 0x7UL
+
+#define IRQ_NO_MASKED 0
+#define IRQ_MASKED_BY_VTPR 1
+#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */
+
+#define PTA_BASE_SHIFT 15
+
+#define IA64_PSR_VM_BIT 46
+#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
+
+/* Interruption Function State */
+#define IA64_IFS_V_BIT 63
+#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT)
+
+#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
+#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/gcc_intrin.h>
+
+#define is_physical_mode(v) \
+ ((v->arch.mode_flags) & GUEST_IN_PHY)
+
+#define is_virtual_mode(v) \
+ (!is_physical_mode(v))
+
+#define MODE_IND(psr) \
+ (((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
+
+#define _vmm_raw_spin_lock(x) \
+ do { \
+ __u32 *ia64_spinlock_ptr = (__u32 *) (x); \
+ __u64 ia64_spinlock_val; \
+ ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+ if (unlikely(ia64_spinlock_val)) { \
+ do { \
+ while (*ia64_spinlock_ptr) \
+ ia64_barrier(); \
+ ia64_spinlock_val = \
+ ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+ } while (ia64_spinlock_val); \
+ } \
+ } while (0)
+
+#define _vmm_raw_spin_unlock(x) \
+ do { barrier(); \
+ ((spinlock_t *)x)->raw_lock.lock = 0; } \
+while (0)
+
+void vmm_spin_lock(spinlock_t *lock);
+void vmm_spin_unlock(spinlock_t *lock);
+enum {
+ I_TLB = 1,
+ D_TLB = 2
+};
+
+union kvm_va {
+ struct {
+ unsigned long off : 60; /* intra-region offset */
+ unsigned long reg : 4; /* region number */
+ } f;
+ unsigned long l;
+ void *p;
+};
+
+#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \
+ _v.f.reg = 0; _v.l; })
+#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \
+ _v.f.reg = -1; _v.p; })
+
+#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \
+ _v.rid; })
+#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \
+ _v.ps; })
+#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \
+ _v.ve; })
+
+enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
+enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
+
+#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
+#define VMX(_v, _x) ((_v)->arch._x)
+
+#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
+#define VLSAPIC_XTP(_v) VMX(_v, xtp)
+
+static inline unsigned long itir_ps(unsigned long itir)
+{
+ return ((itir >> 2) & 0x3f);
+}
+
+
+/**************************************************************************
+ VCPU control register access routines
+ **************************************************************************/
+
+static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, itir));
+}
+
+static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, itir) = val;
+}
+
+static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, ifa));
+}
+
+static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, ifa) = val;
+}
+
+static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, iva));
+}
+
+static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, pta));
+}
+
+static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, lid));
+}
+
+static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, tpr));
+}
+
+static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
+{
+ return (0UL); /*reads of eoi always return 0 */
+}
+
+static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, irr[0]));
+}
+
+static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, irr[1]));
+}
+
+static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, irr[2]));
+}
+
+static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
+{
+ return ((u64)VCPU(vcpu, irr[3]));
+}
+
+static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
+{
+ ia64_setreg(_IA64_REG_CR_DCR, val);
+}
+
+static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, isr) = val;
+}
+
+static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, lid) = val;
+}
+
+static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, ipsr) = val;
+}
+
+static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, iip) = val;
+}
+
+static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, ifs) = val;
+}
+
+static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, iipa) = val;
+}
+
+static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
+{
+ VCPU(vcpu, iha) = val;
+}
+
+
+static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
+{
+ return vcpu->arch.vrr[reg>>61];
+}
+
+/**************************************************************************
+ VCPU debug breakpoint register access routines
+ **************************************************************************/
+
+static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+ __ia64_set_dbr(reg, val);
+}
+
+static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+ ia64_set_ibr(reg, val);
+}
+
+static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
+{
+ return ((u64)__ia64_get_dbr(reg));
+}
+
+static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
+{
+ return ((u64)ia64_get_ibr(reg));
+}
+
+/**************************************************************************
+ VCPU performance monitor register access routines
+ **************************************************************************/
+static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+ /* NOTE: Writes to unimplemented PMC registers are discarded */
+ ia64_set_pmc(reg, val);
+}
+
+static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+ /* NOTE: Writes to unimplemented PMD registers are discarded */
+ ia64_set_pmd(reg, val);
+}
+
+static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
+{
+ /* NOTE: Reads from unimplemented PMC registers return zero */
+ return ((u64)ia64_get_pmc(reg));
+}
+
+static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
+{
+ /* NOTE: Reads from unimplemented PMD registers return zero */
+ return ((u64)ia64_get_pmd(reg));
+}
+
+static inline unsigned long vrrtomrr(unsigned long val)
+{
+ union ia64_rr rr;
+ rr.val = val;
+ rr.rid = (rr.rid << 4) | 0xe;
+ if (rr.ps > PAGE_SHIFT)
+ rr.ps = PAGE_SHIFT;
+ rr.ve = 1;
+ return rr.val;
+}
+
+
+static inline int highest_bits(int *dat)
+{
+ u32 bits, bitnum;
+ int i;
+
+ /* loop for all 256 bits */
+ for (i = 7; i >= 0 ; i--) {
+ bits = dat[i];
+ if (bits) {
+ bitnum = fls(bits);
+ return i * 32 + bitnum - 1;
+ }
+ }
+ return NULL_VECTOR;
+}
+
+/*
+ * The pending irq is higher than the inservice one.
+ *
+ */
+static inline int is_higher_irq(int pending, int inservice)
+{
+ return ((pending > inservice)
+ || ((pending != NULL_VECTOR)
+ && (inservice == NULL_VECTOR)));
+}
+
+static inline int is_higher_class(int pending, int mic)
+{
+ return ((pending >> 4) > mic);
+}
+
+/*
+ * Return 0-255 for pending irq.
+ * NULL_VECTOR: when no pending.
+ */
+static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+ if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
+ return NMI_VECTOR;
+ if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
+ return ExtINT_VECTOR;
+
+ return highest_bits((int *)&VCPU(vcpu, irr[0]));
+}
+
+static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
+{
+ if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
+ return NMI_VECTOR;
+ if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
+ return ExtINT_VECTOR;
+
+ return highest_bits((int *)&(VMX(vcpu, insvc[0])));
+}
+
+extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+ struct ia64_fpreg *val);
+extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+ struct ia64_fpreg *val);
+extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg);
+extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat);
+extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu);
+extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val);
+extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
+extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
+extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
+ u64 itir, u64 va, int type);
+extern struct thash_data *vhpt_lookup(u64 va);
+extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
+extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
+extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
+extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
+extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
+ u64 itir, u64 ifa, int type);
+extern void thash_purge_all(struct kvm_vcpu *v);
+extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
+ u64 va, int is_data);
+extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
+ u64 ps, int is_data);
+
+extern void vcpu_increment_iip(struct kvm_vcpu *v);
+extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
+extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
+extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
+extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
+extern void nested_dtlb(struct kvm_vcpu *vcpu);
+extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
+extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
+
+extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
+extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
+
+extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
+extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
+extern void vmm_transition(struct kvm_vcpu *vcpu);
+extern void vmm_trampoline(union context *from, union context *to);
+extern int vmm_entry(void);
+extern u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
+
+extern void vmm_reset_entry(void);
+void kvm_init_vtlb(struct kvm_vcpu *v);
+void kvm_init_vhpt(struct kvm_vcpu *v);
+void thash_init(struct thash_cb *hcb, u64 sz);
+
+void panic_vm(struct kvm_vcpu *v);
+
+extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
+ u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+#endif
+#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
new file mode 100644
index 000000000000..2275bf4e681a
--- /dev/null
+++ b/arch/ia64/kvm/vmm.c
@@ -0,0 +1,66 @@
+/*
+ * vmm.c: vmm module interface with kvm module
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include<linux/module.h>
+#include<asm/fpswa.h>
+
+#include "vcpu.h"
+
+MODULE_AUTHOR("Intel");
+MODULE_LICENSE("GPL");
+
+extern char kvm_ia64_ivt;
+extern fpswa_interface_t *vmm_fpswa_interface;
+
+struct kvm_vmm_info vmm_info = {
+ .module = THIS_MODULE,
+ .vmm_entry = vmm_entry,
+ .tramp_entry = vmm_trampoline,
+ .vmm_ivt = (unsigned long)&kvm_ia64_ivt,
+};
+
+static int __init kvm_vmm_init(void)
+{
+
+ vmm_fpswa_interface = fpswa_interface;
+
+ /*Register vmm data to kvm side*/
+ return kvm_init(&vmm_info, 1024, THIS_MODULE);
+}
+
+static void __exit kvm_vmm_exit(void)
+{
+ kvm_exit();
+ return ;
+}
+
+void vmm_spin_lock(spinlock_t *lock)
+{
+ _vmm_raw_spin_lock(lock);
+}
+
+void vmm_spin_unlock(spinlock_t *lock)
+{
+ _vmm_raw_spin_unlock(lock);
+}
+module_init(kvm_vmm_init)
+module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
new file mode 100644
index 000000000000..3ee5f481c06d
--- /dev/null
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -0,0 +1,1424 @@
+/*
+ * /ia64/kvm_ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Stephane Eranian <eranian@hpl.hp.com>
+ * David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ * Asit Mallick <asit.k.mallick@intel.com>
+ * Suresh Siddha <suresh.b.siddha@intel.com>
+ * Kenneth Chen <kenneth.w.chen@intel.com>
+ * Fenghua Yu <fenghua.yu@intel.com>
+ *
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
+ * for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
+ * handler now uses virtual PT.
+ *
+ * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Supporting Intel virtualization architecture
+ *
+ */
+
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for
+ * critical
+ * interruptions like TLB misses.
+ *
+ * For each entry, the comment is as follows:
+ *
+ * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
+ * (12,51)
+ * entry offset ----/ / / /
+ * /
+ * entry number ---------/ / /
+ * /
+ * size of the entry -------------/ /
+ * /
+ * vector name -------------------------------------/
+ * /
+ * interruptions triggering this vector
+ * ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB
+ * boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/pgtable.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+#include "kvm_minstate.h"
+#include "vti.h"
+
+#if 1
+# define PSR_DEFAULT_BITS psr.ac
+#else
+# define PSR_DEFAULT_BITS 0
+#endif
+
+
+#define KVM_FAULT(n) \
+ kvm_fault_##n:; \
+ mov r19=n;; \
+ br.sptk.many kvm_fault_##n; \
+ ;; \
+
+
+#define KVM_REFLECT(n) \
+ mov r31=pr; \
+ mov r19=n; /* prepare to save predicates */ \
+ mov r29=cr.ipsr; \
+ ;; \
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
+(p7)br.sptk.many kvm_dispatch_reflection; \
+ br.sptk.many kvm_panic; \
+
+
+GLOBAL_ENTRY(kvm_panic)
+ br.sptk.many kvm_panic
+ ;;
+END(kvm_panic)
+
+
+
+
+
+ .section .text.ivt,"ax"
+
+ .align 32768 // align on 32KB boundary
+ .global kvm_ia64_ivt
+kvm_ia64_ivt:
+///////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(kvm_vhpt_miss)
+ KVM_FAULT(0)
+END(kvm_vhpt_miss)
+
+
+ .org kvm_ia64_ivt+0x400
+////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(kvm_itlb_miss)
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+ (p6) br.sptk kvm_alt_itlb_miss
+ mov r19 = 1
+ br.sptk kvm_itlb_miss_dispatch
+ KVM_FAULT(1);
+END(kvm_itlb_miss)
+
+ .org kvm_ia64_ivt+0x0800
+//////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(kvm_dtlb_miss)
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)br.sptk kvm_alt_dtlb_miss
+ br.sptk kvm_dtlb_miss_dispatch
+END(kvm_dtlb_miss)
+
+ .org kvm_ia64_ivt+0x0c00
+////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(kvm_alt_itlb_miss)
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ movl r17=PAGE_KERNEL
+ mov r24=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ ;;
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ ;;
+ or r19=r17,r19 // insert PTE control bits into r19
+ ;;
+ movl r20=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r20
+ ;;
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+END(kvm_alt_itlb_miss)
+
+ .org kvm_ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(kvm_alt_dtlb_miss)
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ movl r17=PAGE_KERNEL
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r24=cr.ipsr
+ ;;
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ ;;
+ or r19=r19,r17 // insert PTE control bits into r19
+ ;;
+ movl r20=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r20
+ ;;
+ itc.d r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
+END(kvm_alt_dtlb_miss)
+
+ .org kvm_ia64_ivt+0x1400
+//////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(kvm_nested_dtlb_miss)
+ KVM_FAULT(5)
+END(kvm_nested_dtlb_miss)
+
+ .org kvm_ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(kvm_ikey_miss)
+ KVM_REFLECT(6)
+END(kvm_ikey_miss)
+
+ .org kvm_ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(kvm_dkey_miss)
+ KVM_REFLECT(7)
+END(kvm_dkey_miss)
+
+ .org kvm_ia64_ivt+0x2000
+////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(kvm_dirty_bit)
+ KVM_REFLECT(8)
+END(kvm_dirty_bit)
+
+ .org kvm_ia64_ivt+0x2400
+////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(kvm_iaccess_bit)
+ KVM_REFLECT(9)
+END(kvm_iaccess_bit)
+
+ .org kvm_ia64_ivt+0x2800
+///////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(kvm_daccess_bit)
+ KVM_REFLECT(10)
+END(kvm_daccess_bit)
+
+ .org kvm_ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(kvm_break_fault)
+ mov r31=pr
+ mov r19=11
+ mov r29=cr.ipsr
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.iim // FIXME: pity to make this slow access twice
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15)ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out1=16,sp
+ br.call.sptk.many b6=kvm_ia64_handle_break
+ ;;
+END(kvm_break_fault)
+
+ .org kvm_ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(kvm_interrupt)
+ mov r31=pr // prepare to save predicates
+ mov r19=12
+ mov r29=cr.ipsr
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+ tbit.z p0,p15=r29,IA64_PSR_I_BIT
+ ;;
+(p7) br.sptk kvm_dispatch_interrupt
+ ;;
+ mov r27=ar.rsc /* M */
+ mov r20=r1 /* A */
+ mov r25=ar.unat /* M */
+ mov r26=ar.pfs /* I */
+ mov r28=cr.iip /* M */
+ cover /* B (or nothing) */
+ ;;
+ mov r1=sp
+ ;;
+ invala /* M */
+ mov r30=cr.ifs
+ ;;
+ addl r1=-VMM_PT_REGS_SIZE,r1
+ ;;
+ adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
+ adds r16=PT(CR_IPSR),r1
+ ;;
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+ st8 [r16]=r29 /* save cr.ipsr */
+ ;;
+ lfetch.fault.excl.nt1 [r17]
+ mov r29=b0
+ ;;
+ adds r16=PT(R8),r1 /* initialize first base pointer */
+ adds r17=PT(R9),r1 /* initialize second base pointer */
+ mov r18=r0 /* make sure r18 isn't NaT */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r8,16
+.mem.offset 8,0; st8.spill [r17]=r9,16
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r10,24
+.mem.offset 8,0; st8.spill [r17]=r11,24
+ ;;
+ st8 [r16]=r28,16 /* save cr.iip */
+ st8 [r17]=r30,16 /* save cr.ifs */
+ mov r8=ar.fpsr /* M */
+ mov r9=ar.csd
+ mov r10=ar.ssd
+ movl r11=FPSR_DEFAULT /* L-unit */
+ ;;
+ st8 [r16]=r25,16 /* save ar.unat */
+ st8 [r17]=r26,16 /* save ar.pfs */
+ shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
+ ;;
+ st8 [r16]=r27,16 /* save ar.rsc */
+ adds r17=16,r17 /* skip over ar_rnat field */
+ ;;
+ st8 [r17]=r31,16 /* save predicates */
+ adds r16=16,r16 /* skip over ar_bspstore field */
+ ;;
+ st8 [r16]=r29,16 /* save b0 */
+ st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
+.mem.offset 8,0; st8.spill [r17]=r12,16
+ adds r12=-16,r1
+ /* switch to kernel memory stack (with 16 bytes of scratch) */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r13,16
+.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r15,16
+.mem.offset 8,0; st8.spill [r17]=r14,16
+ dep r14=-1,r0,60,4
+ ;;
+.mem.offset 0,0; st8.spill [r16]=r2,16
+.mem.offset 8,0; st8.spill [r17]=r3,16
+ adds r2=VMM_PT_REGS_R16_OFFSET,r1
+ adds r14 = VMM_VCPU_GP_OFFSET,r13
+ ;;
+ mov r8=ar.ccv
+ ld8 r14 = [r14]
+ ;;
+ mov r1=r14 /* establish kernel global pointer */
+ ;; \
+ bsw.1
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+ mov out0=r13
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ //(p15) ssm psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ srlz.i // ensure everybody knows psr.ic is back on
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+ mov r18=b6
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+ mov r19=b7
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+ ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,32
+ ;;
+ mov ar.fpsr=r11 /* M-unit */
+ st8 [r2]=r8,8 /* ar.ccv */
+ adds r24=PT(B6)-PT(F7),r3
+ ;;
+ stf.spill [r2]=f6,32
+ stf.spill [r3]=f7,32
+ ;;
+ stf.spill [r2]=f8,32
+ stf.spill [r3]=f9,32
+ ;;
+ stf.spill [r2]=f10
+ stf.spill [r3]=f11
+ adds r25=PT(B7)-PT(F11),r3
+ ;;
+ st8 [r24]=r18,16 /* b6 */
+ st8 [r25]=r19,16 /* b7 */
+ ;;
+ st8 [r24]=r9 /* ar.csd */
+ st8 [r25]=r10 /* ar.ssd */
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ addl r14=@gprel(ia64_leave_nested),gp
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=kvm_ia64_handle_irq
+ ;;
+END(kvm_interrupt)
+
+ .global kvm_dispatch_vexirq
+ .org kvm_ia64_ivt+0x3400
+//////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ENTRY(kvm_virtual_exirq)
+ mov r31=pr
+ mov r19=13
+ mov r30 =r0
+ ;;
+kvm_dispatch_vexirq:
+ cmp.eq p6,p0 = 1,r30
+ ;;
+(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+(p6)ld8 r1 = [r29]
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,1,0
+ mov out0=r13
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ KVM_SAVE_REST
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=kvm_vexirq
+END(kvm_virtual_exirq)
+
+ .org kvm_ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+ KVM_FAULT(14)
+ // this code segment is from 2.6.16.13
+
+
+ .org kvm_ia64_ivt+0x3c00
+///////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+ KVM_FAULT(15)
+
+
+ .org kvm_ia64_ivt+0x4000
+///////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+ KVM_FAULT(16)
+
+ .org kvm_ia64_ivt+0x4400
+//////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+ KVM_FAULT(17)
+
+ .org kvm_ia64_ivt+0x4800
+//////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+ KVM_FAULT(18)
+
+ .org kvm_ia64_ivt+0x4c00
+//////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+ KVM_FAULT(19)
+
+ .org kvm_ia64_ivt+0x5000
+//////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present
+ENTRY(kvm_page_not_present)
+ KVM_REFLECT(20)
+END(kvm_page_not_present)
+
+ .org kvm_ia64_ivt+0x5100
+///////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
+ENTRY(kvm_key_permission)
+ KVM_REFLECT(21)
+END(kvm_key_permission)
+
+ .org kvm_ia64_ivt+0x5200
+//////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(kvm_iaccess_rights)
+ KVM_REFLECT(22)
+END(kvm_iaccess_rights)
+
+ .org kvm_ia64_ivt+0x5300
+//////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(kvm_daccess_rights)
+ KVM_REFLECT(23)
+END(kvm_daccess_rights)
+
+ .org kvm_ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(kvm_general_exception)
+ KVM_REFLECT(24)
+ KVM_FAULT(24)
+END(kvm_general_exception)
+
+ .org kvm_ia64_ivt+0x5500
+//////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(kvm_disabled_fp_reg)
+ KVM_REFLECT(25)
+END(kvm_disabled_fp_reg)
+
+ .org kvm_ia64_ivt+0x5600
+////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(kvm_nat_consumption)
+ KVM_REFLECT(26)
+END(kvm_nat_consumption)
+
+ .org kvm_ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(kvm_speculation_vector)
+ KVM_REFLECT(27)
+END(kvm_speculation_vector)
+
+ .org kvm_ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+ KVM_FAULT(28)
+
+ .org kvm_ia64_ivt+0x5900
+///////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(kvm_debug_vector)
+ KVM_FAULT(29)
+END(kvm_debug_vector)
+
+ .org kvm_ia64_ivt+0x5a00
+///////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(kvm_unaligned_access)
+ KVM_REFLECT(30)
+END(kvm_unaligned_access)
+
+ .org kvm_ia64_ivt+0x5b00
+//////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(kvm_unsupported_data_reference)
+ KVM_REFLECT(31)
+END(kvm_unsupported_data_reference)
+
+ .org kvm_ia64_ivt+0x5c00
+////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
+ENTRY(kvm_floating_point_fault)
+ KVM_REFLECT(32)
+END(kvm_floating_point_fault)
+
+ .org kvm_ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(kvm_floating_point_trap)
+ KVM_REFLECT(33)
+END(kvm_floating_point_trap)
+
+ .org kvm_ia64_ivt+0x5e00
+//////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(kvm_lower_privilege_trap)
+ KVM_REFLECT(34)
+END(kvm_lower_privilege_trap)
+
+ .org kvm_ia64_ivt+0x5f00
+//////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(kvm_taken_branch_trap)
+ KVM_REFLECT(35)
+END(kvm_taken_branch_trap)
+
+ .org kvm_ia64_ivt+0x6000
+////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(kvm_single_step_trap)
+ KVM_REFLECT(36)
+END(kvm_single_step_trap)
+ .global kvm_virtualization_fault_back
+ .org kvm_ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
+ENTRY(kvm_virtualization_fault)
+ mov r31=pr
+ adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+ st8 [r16] = r1
+ adds r17 = VMM_VCPU_GP_OFFSET, r21
+ ;;
+ ld8 r1 = [r17]
+ cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+ cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+ cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+ cmp.eq p9,p0=EVENT_RSM,r24
+ cmp.eq p10,p0=EVENT_SSM,r24
+ cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+ cmp.eq p12,p0=EVENT_THASH,r24
+ (p6) br.dptk.many kvm_asm_mov_from_ar
+ (p7) br.dptk.many kvm_asm_mov_from_rr
+ (p8) br.dptk.many kvm_asm_mov_to_rr
+ (p9) br.dptk.many kvm_asm_rsm
+ (p10) br.dptk.many kvm_asm_ssm
+ (p11) br.dptk.many kvm_asm_mov_to_psr
+ (p12) br.dptk.many kvm_asm_thash
+ ;;
+kvm_virtualization_fault_back:
+ adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+ ld8 r1 = [r16]
+ ;;
+ mov r19=37
+ adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+ adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+ ;;
+ st8 [r16] = r24
+ st8 [r17] = r25
+ ;;
+ cmp.ne p6,p0=EVENT_RFI, r24
+ (p6) br.sptk kvm_dispatch_virtualization_fault
+ ;;
+ adds r18=VMM_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r18]
+ ;;
+ adds r18=VMM_VPD_VIFS_OFFSET,r18
+ ;;
+ ld8 r18=[r18]
+ ;;
+ tbit.z p6,p0=r18,63
+ (p6) br.sptk kvm_dispatch_virtualization_fault
+ ;;
+ //if vifs.v=1 desert current register frame
+ alloc r18=ar.pfs,0,0,0,0
+ br.sptk kvm_dispatch_virtualization_fault
+END(kvm_virtualization_fault)
+
+ .org kvm_ia64_ivt+0x6200
+//////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+ KVM_FAULT(38)
+
+ .org kvm_ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+ KVM_FAULT(39)
+
+ .org kvm_ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+ KVM_FAULT(40)
+
+ .org kvm_ia64_ivt+0x6500
+//////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+ KVM_FAULT(41)
+
+ .org kvm_ia64_ivt+0x6600
+//////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+ KVM_FAULT(42)
+
+ .org kvm_ia64_ivt+0x6700
+//////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+ KVM_FAULT(43)
+
+ .org kvm_ia64_ivt+0x6800
+//////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+ KVM_FAULT(44)
+
+ .org kvm_ia64_ivt+0x6900
+///////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
+//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(kvm_ia32_exception)
+ KVM_FAULT(45)
+END(kvm_ia32_exception)
+
+ .org kvm_ia64_ivt+0x6a00
+////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
+ENTRY(kvm_ia32_intercept)
+ KVM_FAULT(47)
+END(kvm_ia32_intercept)
+
+ .org kvm_ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+ KVM_FAULT(48)
+
+ .org kvm_ia64_ivt+0x6d00
+//////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+ KVM_FAULT(49)
+
+ .org kvm_ia64_ivt+0x6e00
+//////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+ KVM_FAULT(50)
+
+ .org kvm_ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+ KVM_FAULT(52)
+
+ .org kvm_ia64_ivt+0x7100
+////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+ KVM_FAULT(53)
+
+ .org kvm_ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+ KVM_FAULT(54)
+
+ .org kvm_ia64_ivt+0x7300
+////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+ KVM_FAULT(55)
+
+ .org kvm_ia64_ivt+0x7400
+////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+ KVM_FAULT(56)
+
+ .org kvm_ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+ KVM_FAULT(57)
+
+ .org kvm_ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+ KVM_FAULT(58)
+
+ .org kvm_ia64_ivt+0x7700
+////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+ KVM_FAULT(59)
+
+ .org kvm_ia64_ivt+0x7800
+////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+ KVM_FAULT(60)
+
+ .org kvm_ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+ KVM_FAULT(61)
+
+ .org kvm_ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+ KVM_FAULT(62)
+
+ .org kvm_ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+ KVM_FAULT(63)
+
+ .org kvm_ia64_ivt+0x7c00
+////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+ KVM_FAULT(64)
+
+ .org kvm_ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+ KVM_FAULT(65)
+
+ .org kvm_ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+ KVM_FAULT(66)
+
+ .org kvm_ia64_ivt+0x7f00
+////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+ KVM_FAULT(67)
+
+ .org kvm_ia64_ivt+0x8000
+// There is no particular reason for this code to be here, other than that
+// there happens to be space here that would go unused otherwise. If this
+// fault ever gets "unreserved", simply moved the following code to a more
+// suitable spot...
+
+
+ENTRY(kvm_dtlb_miss_dispatch)
+ mov r19 = 2
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+ ;;
+ KVM_SAVE_REST
+ KVM_SAVE_EXTRA
+ mov rp=r14
+ ;;
+ adds out2=16,r12
+ br.call.sptk.many b6=kvm_page_fault
+END(kvm_dtlb_miss_dispatch)
+
+ENTRY(kvm_itlb_miss_dispatch)
+
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12
+ br.call.sptk.many b6=kvm_page_fault
+END(kvm_itlb_miss_dispatch)
+
+ENTRY(kvm_dispatch_reflection)
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: intr type (offset into ivt, see ia64_int.h)
+ * r31: contains saved predicates (pr)
+ */
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+ mov out2=cr.iim
+ mov out3=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out4=16,r12
+ br.call.sptk.many b6=reflect_interruption
+END(kvm_dispatch_reflection)
+
+ENTRY(kvm_dispatch_virtualization_fault)
+ adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+ adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+ ;;
+ st8 [r16] = r24
+ st8 [r17] = r25
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+ mov out0=r13 //vcpu
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+ ;;
+ KVM_SAVE_REST
+ KVM_SAVE_EXTRA
+ mov rp=r14
+ ;;
+ adds out1=16,sp //regs
+ br.call.sptk.many b6=kvm_emulate
+END(kvm_dispatch_virtualization_fault)
+
+
+ENTRY(kvm_dispatch_interrupt)
+ KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+ //mov out0=cr.ivr // pass cr.ivr as first arg
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ //(p15) ssm psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ mov out0=r13 // pass pointer to pt_regs as second arg
+ br.call.sptk.many b6=kvm_ia64_handle_irq
+END(kvm_dispatch_interrupt)
+
+
+
+
+GLOBAL_ENTRY(ia64_leave_nested)
+ rsm psr.i
+ ;;
+ adds r21=PT(PR)+16,r12
+ ;;
+ lfetch [r21],PT(CR_IPSR)-PT(PR)
+ adds r2=PT(B6)+16,r12
+ adds r3=PT(R16)+16,r12
+ ;;
+ lfetch [r21]
+ ld8 r28=[r2],8 // load b6
+ adds r29=PT(R24)+16,r12
+
+ ld8.fill r16=[r3]
+ adds r3=PT(AR_CSD)-PT(R16),r3
+ adds r30=PT(AR_CCV)+16,r12
+ ;;
+ ld8.fill r24=[r29]
+ ld8 r15=[r30] // load ar.ccv
+ ;;
+ ld8 r29=[r2],16 // load b7
+ ld8 r30=[r3],16 // load ar.csd
+ ;;
+ ld8 r31=[r2],16 // load ar.ssd
+ ld8.fill r8=[r3],16
+ ;;
+ ld8.fill r9=[r2],16
+ ld8.fill r10=[r3],PT(R17)-PT(R10)
+ ;;
+ ld8.fill r11=[r2],PT(R18)-PT(R11)
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ mov ar.csd=r30
+ mov ar.ssd=r31
+ ;;
+ rsm psr.i | psr.ic
+ // initiate turning off of interrupt and interruption collection
+ invala // invalidate ALAT
+ ;;
+ srlz.i
+ ;;
+ ld8.fill r22=[r2],24
+ ld8.fill r23=[r3],24
+ mov b6=r28
+ ;;
+ ld8.fill r25=[r2],16
+ ld8.fill r26=[r3],16
+ mov b7=r29
+ ;;
+ ld8.fill r27=[r2],16
+ ld8.fill r28=[r3],16
+ ;;
+ ld8.fill r29=[r2],16
+ ld8.fill r30=[r3],24
+ ;;
+ ld8.fill r31=[r2],PT(F9)-PT(R31)
+ adds r3=PT(F10)-PT(F6),r3
+ ;;
+ ldf.fill f9=[r2],PT(F6)-PT(F9)
+ ldf.fill f10=[r3],PT(F8)-PT(F10)
+ ;;
+ ldf.fill f6=[r2],PT(F7)-PT(F6)
+ ;;
+ ldf.fill f7=[r2],PT(F11)-PT(F7)
+ ldf.fill f8=[r3],32
+ ;;
+ srlz.i // ensure interruption collection is off
+ mov ar.ccv=r15
+ ;;
+ bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
+ ;;
+ ldf.fill f11=[r2]
+// mov r18=r13
+// mov r21=r13
+ adds r16=PT(CR_IPSR)+16,r12
+ adds r17=PT(CR_IIP)+16,r12
+ ;;
+ ld8 r29=[r16],16 // load cr.ipsr
+ ld8 r28=[r17],16 // load cr.iip
+ ;;
+ ld8 r30=[r16],16 // load cr.ifs
+ ld8 r25=[r17],16 // load ar.unat
+ ;;
+ ld8 r26=[r16],16 // load ar.pfs
+ ld8 r27=[r17],16 // load ar.rsc
+ cmp.eq p9,p0=r0,r0
+ // set p9 to indicate that we should restore cr.ifs
+ ;;
+ ld8 r24=[r16],16 // load ar.rnat (may be garbage)
+ ld8 r23=[r17],16// load ar.bspstore (may be garbage)
+ ;;
+ ld8 r31=[r16],16 // load predicates
+ ld8 r22=[r17],16 // load b0
+ ;;
+ ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 // load r1
+ ;;
+ ld8.fill r12=[r16],16
+ ld8.fill r13=[r17],16
+ ;;
+ ld8 r20=[r16],16 // ar.fpsr
+ ld8.fill r15=[r17],16
+ ;;
+ ld8.fill r14=[r16],16
+ ld8.fill r2=[r17]
+ ;;
+ ld8.fill r3=[r16]
+ ;;
+ mov r16=ar.bsp // get existing backing store pointer
+ ;;
+ mov b0=r22
+ mov ar.pfs=r26
+ mov cr.ifs=r30
+ mov cr.ipsr=r29
+ mov ar.fpsr=r20
+ mov cr.iip=r28
+ ;;
+ mov ar.rsc=r27
+ mov ar.unat=r25
+ mov pr=r31,-1
+ rfi
+END(ia64_leave_nested)
+
+
+
+GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
+ /*
+ * work.need_resched etc. mustn't get changed
+ *by this CPU before it returns to
+ ;;
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+ adds r2 = PT(R4)+16,r12
+ adds r3 = PT(R5)+16,r12
+ adds r8 = PT(EML_UNAT)+16,r12
+ ;;
+ ld8 r8 = [r8]
+ ;;
+ mov ar.unat=r8
+ ;;
+ ld8.fill r4=[r2],16 //load r4
+ ld8.fill r5=[r3],16 //load r5
+ ;;
+ ld8.fill r6=[r2] //load r6
+ ld8.fill r7=[r3] //load r7
+ ;;
+END(ia64_leave_hypervisor_prepare)
+//fall through
+GLOBAL_ENTRY(ia64_leave_hypervisor)
+ rsm psr.i
+ ;;
+ br.call.sptk.many b0=leave_hypervisor_tail
+ ;;
+ adds r20=PT(PR)+16,r12
+ adds r8=PT(EML_UNAT)+16,r12
+ ;;
+ ld8 r8=[r8]
+ ;;
+ mov ar.unat=r8
+ ;;
+ lfetch [r20],PT(CR_IPSR)-PT(PR)
+ adds r2 = PT(B6)+16,r12
+ adds r3 = PT(B7)+16,r12
+ ;;
+ lfetch [r20]
+ ;;
+ ld8 r24=[r2],16 /* B6 */
+ ld8 r25=[r3],16 /* B7 */
+ ;;
+ ld8 r26=[r2],16 /* ar_csd */
+ ld8 r27=[r3],16 /* ar_ssd */
+ mov b6 = r24
+ ;;
+ ld8.fill r8=[r2],16
+ ld8.fill r9=[r3],16
+ mov b7 = r25
+ ;;
+ mov ar.csd = r26
+ mov ar.ssd = r27
+ ;;
+ ld8.fill r10=[r2],PT(R15)-PT(R10)
+ ld8.fill r11=[r3],PT(R14)-PT(R11)
+ ;;
+ ld8.fill r15=[r2],PT(R16)-PT(R15)
+ ld8.fill r14=[r3],PT(R17)-PT(R14)
+ ;;
+ ld8.fill r16=[r2],16
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ ;;
+ ld8.fill r22=[r2],16
+ ld8.fill r23=[r3],16
+ ;;
+ ld8.fill r24=[r2],16
+ ld8.fill r25=[r3],16
+ ;;
+ ld8.fill r26=[r2],16
+ ld8.fill r27=[r3],16
+ ;;
+ ld8.fill r28=[r2],16
+ ld8.fill r29=[r3],16
+ ;;
+ ld8.fill r30=[r2],PT(F6)-PT(R30)
+ ld8.fill r31=[r3],PT(F7)-PT(R31)
+ ;;
+ rsm psr.i | psr.ic
+ // initiate turning off of interrupt and interruption collection
+ invala // invalidate ALAT
+ ;;
+ srlz.i // ensure interruption collection is off
+ ;;
+ bsw.0
+ ;;
+ adds r16 = PT(CR_IPSR)+16,r12
+ adds r17 = PT(CR_IIP)+16,r12
+ mov r21=r13 // get current
+ ;;
+ ld8 r31=[r16],16 // load cr.ipsr
+ ld8 r30=[r17],16 // load cr.iip
+ ;;
+ ld8 r29=[r16],16 // load cr.ifs
+ ld8 r28=[r17],16 // load ar.unat
+ ;;
+ ld8 r27=[r16],16 // load ar.pfs
+ ld8 r26=[r17],16 // load ar.rsc
+ ;;
+ ld8 r25=[r16],16 // load ar.rnat
+ ld8 r24=[r17],16 // load ar.bspstore
+ ;;
+ ld8 r23=[r16],16 // load predicates
+ ld8 r22=[r17],16 // load b0
+ ;;
+ ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 //load r1
+ ;;
+ ld8.fill r12=[r16],16 //load r12
+ ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
+ ;;
+ ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
+ ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
+ ;;
+ ld8.fill r3=[r16] //load r3
+ ld8 r18=[r17] //load ar_ccv
+ ;;
+ mov ar.fpsr=r19
+ mov ar.ccv=r18
+ shr.u r18=r20,16
+ ;;
+kvm_rbs_switch:
+ mov r19=96
+
+kvm_dont_preserve_current_frame:
+/*
+ * To prevent leaking bits between the hypervisor and guest domain,
+ * we must clear the stacked registers in the "invalid" partition here.
+ * 5 registers/cycle on McKinley).
+ */
+# define pRecurse p6
+# define pReturn p7
+# define Nregs 14
+
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
+ sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
+ ;;
+ mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
+ shladd in0=loc1,3,r19
+ mov in1=0
+ ;;
+ TEXT_ALIGN(32)
+kvm_rse_clear_invalid:
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ cmp.lt pRecurse,p0=Nregs*8,in0
+ // if more than Nregs regs left to clear, (re)curse
+ add out0=-Nregs*8,in0
+ add out1=1,in1 // increment recursion count
+ mov loc1=0
+ mov loc2=0
+ ;;
+ mov loc3=0
+ mov loc4=0
+ mov loc5=0
+ mov loc6=0
+ mov loc7=0
+(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
+ ;;
+ mov loc8=0
+ mov loc9=0
+ cmp.ne pReturn,p0=r0,in1
+ // if recursion count != 0, we need to do a br.ret
+ mov loc10=0
+ mov loc11=0
+(pReturn) br.ret.dptk.many b0
+
+# undef pRecurse
+# undef pReturn
+
+// loadrs has already been shifted
+ alloc r16=ar.pfs,0,0,0,0 // drop current register frame
+ ;;
+ loadrs
+ ;;
+ mov ar.bspstore=r24
+ ;;
+ mov ar.unat=r28
+ mov ar.rnat=r25
+ mov ar.rsc=r26
+ ;;
+ mov cr.ipsr=r31
+ mov cr.iip=r30
+ mov cr.ifs=r29
+ mov ar.pfs=r27
+ adds r18=VMM_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r18] //vpd
+ adds r17=VMM_VCPU_ISR_OFFSET,r21
+ ;;
+ ld8 r17=[r17]
+ adds r19=VMM_VPD_VPSR_OFFSET,r18
+ ;;
+ ld8 r19=[r19] //vpsr
+ adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21
+ ;;
+ ld8 r20=[r20]
+ ;;
+//vsa_sync_write_start
+ mov r25=r18
+ adds r16= VMM_VCPU_GP_OFFSET,r21
+ ;;
+ ld8 r16= [r16] // Put gp in r24
+ movl r24=@gprel(ia64_vmm_entry) // calculate return address
+ ;;
+ add r24=r24,r16
+ ;;
+ add r16=PAL_VPS_SYNC_WRITE,r20
+ ;;
+ mov b0=r16
+ br.cond.sptk b0 // call the service
+ ;;
+END(ia64_leave_hypervisor)
+// fall through
+GLOBAL_ENTRY(ia64_vmm_entry)
+/*
+ * must be at bank 0
+ * parameter:
+ * r17:cr.isr
+ * r18:vpd
+ * r19:vpsr
+ * r20:__vsa_base
+ * r22:b0
+ * r23:predicate
+ */
+ mov r24=r22
+ mov r25=r18
+ tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
+ ;;
+ (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+ (p1) br.sptk.many ia64_vmm_entry_out
+ ;;
+ tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir
+ ;;
+ (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+ (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
+ (p2) ld8 r26=[r25]
+ ;;
+ia64_vmm_entry_out:
+ mov pr=r23,-2
+ mov b0=r29
+ ;;
+ br.cond.sptk b0 // call pal service
+END(ia64_vmm_entry)
+
+
+
+/*
+ * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
+ * u64 arg3, u64 arg4, u64 arg5,
+ * u64 arg6, u64 arg7);
+ *
+ * XXX: The currently defined services use only 4 args at the max. The
+ * rest are not consumed.
+ */
+GLOBAL_ENTRY(ia64_call_vsa)
+ .regstk 4,4,0,0
+
+rpsave = loc0
+pfssave = loc1
+psrsave = loc2
+entry = loc3
+hostret = r24
+
+ alloc pfssave=ar.pfs,4,4,0,0
+ mov rpsave=rp
+ adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+ ;;
+ ld8 entry=[entry]
+1: mov hostret=ip
+ mov r25=in1 // copy arguments
+ mov r26=in2
+ mov r27=in3
+ mov psrsave=psr
+ ;;
+ tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
+ tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
+ ;;
+ add hostret=2f-1b,hostret // calculate return address
+ add entry=entry,in0
+ ;;
+ rsm psr.i | psr.ic
+ ;;
+ srlz.i
+ mov b6=entry
+ br.cond.sptk b6 // call the service
+2:
+ // Architectural sequence for enabling interrupts if necessary
+(p7) ssm psr.ic
+ ;;
+(p7) srlz.i
+ ;;
+//(p6) ssm psr.i
+ ;;
+ mov rp=rpsave
+ mov ar.pfs=pfssave
+ mov r8=r31
+ ;;
+ srlz.d
+ br.ret.sptk rp
+
+END(ia64_call_vsa)
+
+#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
+
+GLOBAL_ENTRY(vmm_reset_entry)
+ //set up ipsr, iip, vpd.vpsr, dcr
+ // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+ // For DCR: all bits 0
+ adds r14=-VMM_PT_REGS_SIZE, r12
+ ;;
+ movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+ movl r10=0x8000000000000000
+ adds r16=PT(CR_IIP), r14
+ adds r20=PT(R1), r14
+ ;;
+ rsm psr.ic | psr.i
+ ;;
+ srlz.i
+ ;;
+ bsw.0
+ ;;
+ mov r21 =r13
+ ;;
+ bsw.1
+ ;;
+ mov ar.rsc = 0
+ ;;
+ flushrs
+ ;;
+ mov ar.bspstore = 0
+ // clear BSPSTORE
+ ;;
+ mov cr.ipsr=r6
+ mov cr.ifs=r10
+ ld8 r4 = [r16] // Set init iip for first run.
+ ld8 r1 = [r20]
+ ;;
+ mov cr.iip=r4
+ ;;
+ adds r16=VMM_VPD_BASE_OFFSET,r13
+ adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13
+ ;;
+ ld8 r18=[r16]
+ ld8 r20=[r20]
+ ;;
+ adds r19=VMM_VPD_VPSR_OFFSET,r18
+ ;;
+ ld8 r19=[r19]
+ mov r17=r0
+ mov r22=r0
+ mov r23=r0
+ br.cond.sptk ia64_vmm_entry
+ br.ret.sptk b0
+END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
new file mode 100644
index 000000000000..f6c5617e16af
--- /dev/null
+++ b/arch/ia64/kvm/vti.h
@@ -0,0 +1,290 @@
+/*
+ * vti.h: prototype for generial vt related interface
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Fred Yang (fred.yang@intel.com)
+ * Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ * Zhang xiantao <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef _KVM_VT_I_H
+#define _KVM_VT_I_H
+
+#ifndef __ASSEMBLY__
+#include <asm/page.h>
+
+#include <linux/kvm_host.h>
+
+/* define itr.i and itr.d in ia64_itr function */
+#define ITR 0x01
+#define DTR 0x02
+#define IaDTR 0x03
+
+#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
+#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/
+
+#define RR6 (6UL<<61)
+#define RR7 (7UL<<61)
+
+
+/* config_options in pal_vp_init_env */
+#define VP_INITIALIZE 1UL
+#define VP_FR_PMC 1UL<<1
+#define VP_OPCODE 1UL<<8
+#define VP_CAUSE 1UL<<9
+#define VP_FW_ACC 1UL<<63
+
+/* init vp env with initializing vm_buffer */
+#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\
+ VP_OPCODE | VP_CAUSE | VP_FW_ACC)
+/* init vp env without initializing vm_buffer */
+#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
+
+#define PAL_VP_CREATE 265
+/* Stacked Virt. Initializes a new VPD for the operation of
+ * a new virtual processor in the virtual environment.
+ */
+#define PAL_VP_ENV_INFO 266
+/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
+#define PAL_VP_EXIT_ENV 267
+/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
+#define PAL_VP_INIT_ENV 268
+/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
+#define PAL_VP_REGISTER 269
+/*Stacked Virt. Register a different host IVT for the virtual processor.*/
+#define PAL_VP_RESUME 270
+/* Renamed from PAL_VP_RESUME */
+#define PAL_VP_RESTORE 270
+/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
+#define PAL_VP_SUSPEND 271
+/* Renamed from PAL_VP_SUSPEND */
+#define PAL_VP_SAVE 271
+/* Stacked Virt. Suspends operation for the specified virtual processor on
+ * the logical processor.
+ */
+#define PAL_VP_TERMINATE 272
+/* Stacked Virt. Terminates operation for the specified virtual processor.*/
+
+union vac {
+ unsigned long value;
+ struct {
+ int a_int:1;
+ int a_from_int_cr:1;
+ int a_to_int_cr:1;
+ int a_from_psr:1;
+ int a_from_cpuid:1;
+ int a_cover:1;
+ int a_bsw:1;
+ long reserved:57;
+ };
+};
+
+union vdc {
+ unsigned long value;
+ struct {
+ int d_vmsw:1;
+ int d_extint:1;
+ int d_ibr_dbr:1;
+ int d_pmc:1;
+ int d_to_pmd:1;
+ int d_itm:1;
+ long reserved:58;
+ };
+};
+
+struct vpd {
+ union vac vac;
+ union vdc vdc;
+ unsigned long virt_env_vaddr;
+ unsigned long reserved1[29];
+ unsigned long vhpi;
+ unsigned long reserved2[95];
+ unsigned long vgr[16];
+ unsigned long vbgr[16];
+ unsigned long vnat;
+ unsigned long vbnat;
+ unsigned long vcpuid[5];
+ unsigned long reserved3[11];
+ unsigned long vpsr;
+ unsigned long vpr;
+ unsigned long reserved4[76];
+ union {
+ unsigned long vcr[128];
+ struct {
+ unsigned long dcr;
+ unsigned long itm;
+ unsigned long iva;
+ unsigned long rsv1[5];
+ unsigned long pta;
+ unsigned long rsv2[7];
+ unsigned long ipsr;
+ unsigned long isr;
+ unsigned long rsv3;
+ unsigned long iip;
+ unsigned long ifa;
+ unsigned long itir;
+ unsigned long iipa;
+ unsigned long ifs;
+ unsigned long iim;
+ unsigned long iha;
+ unsigned long rsv4[38];
+ unsigned long lid;
+ unsigned long ivr;
+ unsigned long tpr;
+ unsigned long eoi;
+ unsigned long irr[4];
+ unsigned long itv;
+ unsigned long pmv;
+ unsigned long cmcv;
+ unsigned long rsv5[5];
+ unsigned long lrr0;
+ unsigned long lrr1;
+ unsigned long rsv6[46];
+ };
+ };
+ unsigned long reserved5[128];
+ unsigned long reserved6[3456];
+ unsigned long vmm_avail[128];
+ unsigned long reserved7[4096];
+};
+
+#define PAL_PROC_VM_BIT (1UL << 40)
+#define PAL_PROC_VMSW_BIT (1UL << 54)
+
+static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
+ u64 *vp_env_info)
+{
+ struct ia64_pal_retval iprv;
+ PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
+ *buffer_size = iprv.v0;
+ *vp_env_info = iprv.v1;
+ return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_exit_env(u64 iva)
+{
+ struct ia64_pal_retval iprv;
+
+ PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
+ return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
+ u64 vbase_addr, u64 *vsa_base)
+{
+ struct ia64_pal_retval iprv;
+
+ PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
+ vbase_addr);
+ *vsa_base = iprv.v0;
+
+ return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
+{
+ struct ia64_pal_retval iprv;
+
+ PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
+
+ return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
+{
+ struct ia64_pal_retval iprv;
+
+ PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
+
+ return iprv.status;
+}
+
+#endif
+
+/*VPD field offset*/
+#define VPD_VAC_START_OFFSET 0
+#define VPD_VDC_START_OFFSET 8
+#define VPD_VHPI_START_OFFSET 256
+#define VPD_VGR_START_OFFSET 1024
+#define VPD_VBGR_START_OFFSET 1152
+#define VPD_VNAT_START_OFFSET 1280
+#define VPD_VBNAT_START_OFFSET 1288
+#define VPD_VCPUID_START_OFFSET 1296
+#define VPD_VPSR_START_OFFSET 1424
+#define VPD_VPR_START_OFFSET 1432
+#define VPD_VRSE_CFLE_START_OFFSET 1440
+#define VPD_VCR_START_OFFSET 2048
+#define VPD_VTPR_START_OFFSET 2576
+#define VPD_VRR_START_OFFSET 3072
+#define VPD_VMM_VAIL_START_OFFSET 31744
+
+/*Virtualization faults*/
+
+#define EVENT_MOV_TO_AR 1
+#define EVENT_MOV_TO_AR_IMM 2
+#define EVENT_MOV_FROM_AR 3
+#define EVENT_MOV_TO_CR 4
+#define EVENT_MOV_FROM_CR 5
+#define EVENT_MOV_TO_PSR 6
+#define EVENT_MOV_FROM_PSR 7
+#define EVENT_ITC_D 8
+#define EVENT_ITC_I 9
+#define EVENT_MOV_TO_RR 10
+#define EVENT_MOV_TO_DBR 11
+#define EVENT_MOV_TO_IBR 12
+#define EVENT_MOV_TO_PKR 13
+#define EVENT_MOV_TO_PMC 14
+#define EVENT_MOV_TO_PMD 15
+#define EVENT_ITR_D 16
+#define EVENT_ITR_I 17
+#define EVENT_MOV_FROM_RR 18
+#define EVENT_MOV_FROM_DBR 19
+#define EVENT_MOV_FROM_IBR 20
+#define EVENT_MOV_FROM_PKR 21
+#define EVENT_MOV_FROM_PMC 22
+#define EVENT_MOV_FROM_CPUID 23
+#define EVENT_SSM 24
+#define EVENT_RSM 25
+#define EVENT_PTC_L 26
+#define EVENT_PTC_G 27
+#define EVENT_PTC_GA 28
+#define EVENT_PTR_D 29
+#define EVENT_PTR_I 30
+#define EVENT_THASH 31
+#define EVENT_TTAG 32
+#define EVENT_TPA 33
+#define EVENT_TAK 34
+#define EVENT_PTC_E 35
+#define EVENT_COVER 36
+#define EVENT_RFI 37
+#define EVENT_BSW_0 38
+#define EVENT_BSW_1 39
+#define EVENT_VMSW 40
+
+/**PAL virtual services offsets */
+#define PAL_VPS_RESUME_NORMAL 0x0000
+#define PAL_VPS_RESUME_HANDLER 0x0400
+#define PAL_VPS_SYNC_READ 0x0800
+#define PAL_VPS_SYNC_WRITE 0x0c00
+#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000
+#define PAL_VPS_THASH 0x1400
+#define PAL_VPS_TTAG 0x1800
+#define PAL_VPS_RESTORE 0x1c00
+#define PAL_VPS_SAVE 0x2000
+
+#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
new file mode 100644
index 000000000000..def4576d22b1
--- /dev/null
+++ b/arch/ia64/kvm/vtlb.c
@@ -0,0 +1,636 @@
+/*
+ * vtlb.c: guest virtual tlb handling module.
+ * Copyright (c) 2004, Intel Corporation.
+ * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
+ * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include "vcpu.h"
+
+#include <linux/rwsem.h>
+
+#include <asm/tlb.h>
+
+/*
+ * Check to see if the address rid:va is translated by the TLB
+ */
+
+static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
+{
+ return ((trp->p) && (trp->rid == rid)
+ && ((va-trp->vadr) < PSIZE(trp->ps)));
+}
+
+/*
+ * Only for GUEST TR format.
+ */
+static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
+{
+ u64 sa1, ea1;
+
+ if (!trp->p || trp->rid != rid)
+ return 0;
+
+ sa1 = trp->vadr;
+ ea1 = sa1 + PSIZE(trp->ps) - 1;
+ eva -= 1;
+ if ((sva > ea1) || (sa1 > eva))
+ return 0;
+ else
+ return 1;
+
+}
+
+void machine_tlb_purge(u64 va, u64 ps)
+{
+ ia64_ptcl(va, ps << 2);
+}
+
+void local_flush_tlb_all(void)
+{
+ int i, j;
+ unsigned long flags, count0, count1;
+ unsigned long stride0, stride1, addr;
+
+ addr = current_vcpu->arch.ptce_base;
+ count0 = current_vcpu->arch.ptce_count[0];
+ count1 = current_vcpu->arch.ptce_count[1];
+ stride0 = current_vcpu->arch.ptce_stride[0];
+ stride1 = current_vcpu->arch.ptce_stride[1];
+
+ local_irq_save(flags);
+ for (i = 0; i < count0; ++i) {
+ for (j = 0; j < count1; ++j) {
+ ia64_ptce(addr);
+ addr += stride1;
+ }
+ addr += stride0;
+ }
+ local_irq_restore(flags);
+ ia64_srlz_i(); /* srlz.i implies srlz.d */
+}
+
+int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
+{
+ union ia64_rr vrr;
+ union ia64_pta vpta;
+ struct ia64_psr vpsr;
+
+ vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+ vrr.val = vcpu_get_rr(vcpu, vadr);
+ vpta.val = vcpu_get_pta(vcpu);
+
+ if (vrr.ve & vpta.ve) {
+ switch (ref) {
+ case DATA_REF:
+ case NA_REF:
+ return vpsr.dt;
+ case INST_REF:
+ return vpsr.dt && vpsr.it && vpsr.ic;
+ case RSE_REF:
+ return vpsr.dt && vpsr.rt;
+
+ }
+ }
+ return 0;
+}
+
+struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
+{
+ u64 index, pfn, rid, pfn_bits;
+
+ pfn_bits = vpta.size - 5 - 8;
+ pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
+ rid = _REGION_ID(vrr);
+ index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
+ *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
+
+ return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
+ (index << 5));
+}
+
+struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
+{
+
+ struct thash_data *trp;
+ int i;
+ u64 rid;
+
+ rid = vcpu_get_rr(vcpu, va);
+ rid = rid & RR_RID_MASK;;
+ if (type == D_TLB) {
+ if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+ for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+ i < NDTRS; i++, trp++) {
+ if (__is_tr_translated(trp, rid, va))
+ return trp;
+ }
+ }
+ } else {
+ if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+ for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+ i < NITRS; i++, trp++) {
+ if (__is_tr_translated(trp, rid, va))
+ return trp;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
+{
+ union ia64_rr rr;
+ struct thash_data *head;
+ unsigned long ps, gpaddr;
+
+ ps = itir_ps(itir);
+
+ gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
+ (ifa & ((1UL << ps) - 1));
+
+ rr.val = ia64_get_rr(ifa);
+ head = (struct thash_data *)ia64_thash(ifa);
+ head->etag = INVALID_TI_TAG;
+ ia64_mf();
+ head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
+ head->itir = rr.ps << 2;
+ head->etag = ia64_ttag(ifa);
+ head->gpaddr = gpaddr;
+}
+
+void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
+{
+ u64 i, dirty_pages = 1;
+ u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
+ spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
+ void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
+ + KVM_MEM_DIRTY_LOG_OFS;
+ dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
+
+ vmm_spin_lock(lock);
+ for (i = 0; i < dirty_pages; i++) {
+ /* avoid RMW */
+ if (!test_bit(base_gfn + i, dirty_bitmap))
+ set_bit(base_gfn + i , dirty_bitmap);
+ }
+ vmm_spin_unlock(lock);
+}
+
+void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
+{
+ u64 phy_pte, psr;
+ union ia64_rr mrr;
+
+ mrr.val = ia64_get_rr(va);
+ phy_pte = translate_phy_pte(&pte, itir, va);
+
+ if (itir_ps(itir) >= mrr.ps) {
+ vhpt_insert(phy_pte, itir, va, pte);
+ } else {
+ phy_pte &= ~PAGE_FLAGS_RV_MASK;
+ psr = ia64_clear_ic();
+ ia64_itc(type, va, phy_pte, itir_ps(itir));
+ ia64_set_psr(psr);
+ }
+
+ if (!(pte&VTLB_PTE_IO))
+ mark_pages_dirty(v, pte, itir_ps(itir));
+}
+
+/*
+ * vhpt lookup
+ */
+struct thash_data *vhpt_lookup(u64 va)
+{
+ struct thash_data *head;
+ u64 tag;
+
+ head = (struct thash_data *)ia64_thash(va);
+ tag = ia64_ttag(va);
+ if (head->etag == tag)
+ return head;
+ return NULL;
+}
+
+u64 guest_vhpt_lookup(u64 iha, u64 *pte)
+{
+ u64 ret;
+ struct thash_data *data;
+
+ data = __vtr_lookup(current_vcpu, iha, D_TLB);
+ if (data != NULL)
+ thash_vhpt_insert(current_vcpu, data->page_flags,
+ data->itir, iha, D_TLB);
+
+ asm volatile ("rsm psr.ic|psr.i;;"
+ "srlz.d;;"
+ "ld8.s r9=[%1];;"
+ "tnat.nz p6,p7=r9;;"
+ "(p6) mov %0=1;"
+ "(p6) mov r9=r0;"
+ "(p7) extr.u r9=r9,0,53;;"
+ "(p7) mov %0=r0;"
+ "(p7) st8 [%2]=r9;;"
+ "ssm psr.ic;;"
+ "srlz.d;;"
+ /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/
+ : "=r"(ret) : "r"(iha), "r"(pte):"memory");
+
+ return ret;
+}
+
+/*
+ * purge software guest tlb
+ */
+
+static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+ struct thash_data *cur;
+ u64 start, curadr, size, psbits, tag, rr_ps, num;
+ union ia64_rr vrr;
+ struct thash_cb *hcb = &v->arch.vtlb;
+
+ vrr.val = vcpu_get_rr(v, va);
+ psbits = VMX(v, psbits[(va >> 61)]);
+ start = va & ~((1UL << ps) - 1);
+ while (psbits) {
+ curadr = start;
+ rr_ps = __ffs(psbits);
+ psbits &= ~(1UL << rr_ps);
+ num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
+ size = PSIZE(rr_ps);
+ vrr.ps = rr_ps;
+ while (num) {
+ cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
+ if (cur->etag == tag && cur->ps == rr_ps)
+ cur->etag = INVALID_TI_TAG;
+ curadr += size;
+ num--;
+ }
+ }
+}
+
+
+/*
+ * purge VHPT and machine TLB
+ */
+static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+ struct thash_data *cur;
+ u64 start, size, tag, num;
+ union ia64_rr rr;
+
+ start = va & ~((1UL << ps) - 1);
+ rr.val = ia64_get_rr(va);
+ size = PSIZE(rr.ps);
+ num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
+ while (num) {
+ cur = (struct thash_data *)ia64_thash(start);
+ tag = ia64_ttag(start);
+ if (cur->etag == tag)
+ cur->etag = INVALID_TI_TAG;
+ start += size;
+ num--;
+ }
+ machine_tlb_purge(va, ps);
+}
+
+/*
+ * Insert an entry into hash TLB or VHPT.
+ * NOTES:
+ * 1: When inserting VHPT to thash, "va" is a must covered
+ * address by the inserted machine VHPT entry.
+ * 2: The format of entry is always in TLB.
+ * 3: The caller need to make sure the new entry will not overlap
+ * with any existed entry.
+ */
+void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
+{
+ struct thash_data *head;
+ union ia64_rr vrr;
+ u64 tag;
+ struct thash_cb *hcb = &v->arch.vtlb;
+
+ vrr.val = vcpu_get_rr(v, va);
+ vrr.ps = itir_ps(itir);
+ VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
+ head = vsa_thash(hcb->pta, va, vrr.val, &tag);
+ head->page_flags = pte;
+ head->itir = itir;
+ head->etag = tag;
+}
+
+int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
+{
+ struct thash_data *trp;
+ int i;
+ u64 end, rid;
+
+ rid = vcpu_get_rr(vcpu, va);
+ rid = rid & RR_RID_MASK;
+ end = va + PSIZE(ps);
+ if (type == D_TLB) {
+ if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+ for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+ i < NDTRS; i++, trp++) {
+ if (__is_tr_overlap(trp, rid, va, end))
+ return i;
+ }
+ }
+ } else {
+ if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+ for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+ i < NITRS; i++, trp++) {
+ if (__is_tr_overlap(trp, rid, va, end))
+ return i;
+ }
+ }
+ }
+ return -1;
+}
+
+/*
+ * Purge entries in VTLB and VHPT
+ */
+void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+ if (vcpu_quick_region_check(v->arch.tc_regions, va))
+ vtlb_purge(v, va, ps);
+ vhpt_purge(v, va, ps);
+}
+
+void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+ u64 old_va = va;
+ va = REGION_OFFSET(va);
+ if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
+ vtlb_purge(v, va, ps);
+ vhpt_purge(v, va, ps);
+}
+
+u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
+{
+ u64 ps, ps_mask, paddr, maddr;
+ union pte_flags phy_pte;
+
+ ps = itir_ps(itir);
+ ps_mask = ~((1UL << ps) - 1);
+ phy_pte.val = *pte;
+ paddr = *pte;
+ paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
+ maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT);
+ if (maddr & GPFN_IO_MASK) {
+ *pte |= VTLB_PTE_IO;
+ return -1;
+ }
+ maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
+ (paddr & ~PAGE_MASK);
+ phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
+ return phy_pte.val;
+}
+
+/*
+ * Purge overlap TCs and then insert the new entry to emulate itc ops.
+ * Notes: Only TC entry can purge and insert.
+ * 1 indicates this is MMIO
+ */
+int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
+ u64 ifa, int type)
+{
+ u64 ps;
+ u64 phy_pte;
+ union ia64_rr vrr, mrr;
+ int ret = 0;
+
+ ps = itir_ps(itir);
+ vrr.val = vcpu_get_rr(v, ifa);
+ mrr.val = ia64_get_rr(ifa);
+
+ phy_pte = translate_phy_pte(&pte, itir, ifa);
+
+ /* Ensure WB attribute if pte is related to a normal mem page,
+ * which is required by vga acceleration since qemu maps shared
+ * vram buffer with WB.
+ */
+ if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) {
+ pte &= ~_PAGE_MA_MASK;
+ phy_pte &= ~_PAGE_MA_MASK;
+ }
+
+ if (pte & VTLB_PTE_IO)
+ ret = 1;
+
+ vtlb_purge(v, ifa, ps);
+ vhpt_purge(v, ifa, ps);
+
+ if (ps == mrr.ps) {
+ if (!(pte&VTLB_PTE_IO)) {
+ vhpt_insert(phy_pte, itir, ifa, pte);
+ } else {
+ vtlb_insert(v, pte, itir, ifa);
+ vcpu_quick_region_set(VMX(v, tc_regions), ifa);
+ }
+ } else if (ps > mrr.ps) {
+ vtlb_insert(v, pte, itir, ifa);
+ vcpu_quick_region_set(VMX(v, tc_regions), ifa);
+ if (!(pte&VTLB_PTE_IO))
+ vhpt_insert(phy_pte, itir, ifa, pte);
+ } else {
+ u64 psr;
+ phy_pte &= ~PAGE_FLAGS_RV_MASK;
+ psr = ia64_clear_ic();
+ ia64_itc(type, ifa, phy_pte, ps);
+ ia64_set_psr(psr);
+ }
+ if (!(pte&VTLB_PTE_IO))
+ mark_pages_dirty(v, pte, ps);
+
+ return ret;
+}
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+
+void thash_purge_all(struct kvm_vcpu *v)
+{
+ int i;
+ struct thash_data *head;
+ struct thash_cb *vtlb, *vhpt;
+ vtlb = &v->arch.vtlb;
+ vhpt = &v->arch.vhpt;
+
+ for (i = 0; i < 8; i++)
+ VMX(v, psbits[i]) = 0;
+
+ head = vtlb->hash;
+ for (i = 0; i < vtlb->num; i++) {
+ head->page_flags = 0;
+ head->etag = INVALID_TI_TAG;
+ head->itir = 0;
+ head->next = 0;
+ head++;
+ };
+
+ head = vhpt->hash;
+ for (i = 0; i < vhpt->num; i++) {
+ head->page_flags = 0;
+ head->etag = INVALID_TI_TAG;
+ head->itir = 0;
+ head->next = 0;
+ head++;
+ };
+
+ local_flush_tlb_all();
+}
+
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va or the entry.
+ *
+ * INPUT:
+ * in: TLB format for both VHPT & TLB.
+ */
+
+struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
+{
+ struct thash_data *cch;
+ u64 psbits, ps, tag;
+ union ia64_rr vrr;
+
+ struct thash_cb *hcb = &v->arch.vtlb;
+
+ cch = __vtr_lookup(v, va, is_data);;
+ if (cch)
+ return cch;
+
+ if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
+ return NULL;
+
+ psbits = VMX(v, psbits[(va >> 61)]);
+ vrr.val = vcpu_get_rr(v, va);
+ while (psbits) {
+ ps = __ffs(psbits);
+ psbits &= ~(1UL << ps);
+ vrr.ps = ps;
+ cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
+ if (cch->etag == tag && cch->ps == ps)
+ return cch;
+ }
+
+ return NULL;
+}
+
+
+/*
+ * Initialize internal control data before service.
+ */
+void thash_init(struct thash_cb *hcb, u64 sz)
+{
+ int i;
+ struct thash_data *head;
+
+ hcb->pta.val = (unsigned long)hcb->hash;
+ hcb->pta.vf = 1;
+ hcb->pta.ve = 1;
+ hcb->pta.size = sz;
+ head = hcb->hash;
+ for (i = 0; i < hcb->num; i++) {
+ head->page_flags = 0;
+ head->itir = 0;
+ head->etag = INVALID_TI_TAG;
+ head->next = 0;
+ head++;
+ }
+}
+
+u64 kvm_lookup_mpa(u64 gpfn)
+{
+ u64 *base = (u64 *) KVM_P2M_BASE;
+ return *(base + gpfn);
+}
+
+u64 kvm_gpa_to_mpa(u64 gpa)
+{
+ u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
+ return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
+}
+
+
+/*
+ * Fetch guest bundle code.
+ * INPUT:
+ * gip: guest ip
+ * pbundle: used to return fetched bundle.
+ */
+int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
+{
+ u64 gpip = 0; /* guest physical IP*/
+ u64 *vpa;
+ struct thash_data *tlb;
+ u64 maddr;
+
+ if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
+ /* I-side physical mode */
+ gpip = gip;
+ } else {
+ tlb = vtlb_lookup(vcpu, gip, I_TLB);
+ if (tlb)
+ gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
+ (gip & (PSIZE(tlb->ps) - 1));
+ }
+ if (gpip) {
+ maddr = kvm_gpa_to_mpa(gpip);
+ } else {
+ tlb = vhpt_lookup(gip);
+ if (tlb == NULL) {
+ ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
+ return IA64_FAULT;
+ }
+ maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
+ | (gip & (PSIZE(tlb->ps) - 1));
+ }
+ vpa = (u64 *)__kvm_va(maddr);
+
+ pbundle->i64[0] = *vpa++;
+ pbundle->i64[1] = *vpa;
+
+ return IA64_NO_FAULT;
+}
+
+
+void kvm_init_vhpt(struct kvm_vcpu *v)
+{
+ v->arch.vhpt.num = VHPT_NUM_ENTRIES;
+ thash_init(&v->arch.vhpt, VHPT_SHIFT);
+ ia64_set_pta(v->arch.vhpt.pta.val);
+ /*Enable VHPT here?*/
+}
+
+void kvm_init_vtlb(struct kvm_vcpu *v)
+{
+ v->arch.vtlb.num = VTLB_NUM_ENTRIES;
+ thash_init(&v->arch.vtlb, VTLB_SHIFT);
+}
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 5c1de53c8c1c..fc6c6636ffda 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -682,15 +682,6 @@ mem_init (void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-void online_page(struct page *page)
-{
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalram_pages++;
- num_physpages++;
-}
-
int arch_add_memory(int nid, u64 start, u64 size)
{
pg_data_t *pgdat;
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index dfc6bf1c7b41..49d3120415eb 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -550,11 +550,12 @@ static int __init sn2_ptc_init(void)
if (!ia64_platform_is("sn2"))
return 0;
- if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
+ proc_sn2_ptc = proc_create(PTC_BASENAME, 0444,
+ NULL, &proc_sn2_ptc_operations);
+ if (!&proc_sn2_ptc_operations) {
printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
return -EINVAL;
}
- proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations;
spin_lock_init(&sn2_global_ptc_lock);
return 0;
}
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
index 62b3e9a496ac..2526e5c783a4 100644
--- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -139,30 +139,21 @@ static const struct file_operations proc_sn_topo_fops = {
void register_sn_procfs(void)
{
static struct proc_dir_entry *sgi_proc_dir = NULL;
- struct proc_dir_entry *pde;
BUG_ON(sgi_proc_dir != NULL);
if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
return;
- pde = create_proc_entry("partition_id", 0444, sgi_proc_dir);
- if (pde)
- pde->proc_fops = &proc_partition_id_fops;
- pde = create_proc_entry("system_serial_number", 0444, sgi_proc_dir);
- if (pde)
- pde->proc_fops = &proc_system_sn_fops;
- pde = create_proc_entry("licenseID", 0444, sgi_proc_dir);
- if (pde)
- pde->proc_fops = &proc_license_id_fops;
- pde = create_proc_entry("sn_force_interrupt", 0644, sgi_proc_dir);
- if (pde)
- pde->proc_fops = &proc_sn_force_intr_fops;
- pde = create_proc_entry("coherence_id", 0444, sgi_proc_dir);
- if (pde)
- pde->proc_fops = &proc_coherence_id_fops;
- pde = create_proc_entry("sn_topology", 0444, sgi_proc_dir);
- if (pde)
- pde->proc_fops = &proc_sn_topo_fops;
+ proc_create("partition_id", 0444, sgi_proc_dir,
+ &proc_partition_id_fops);
+ proc_create("system_serial_number", 0444, sgi_proc_dir,
+ &proc_system_sn_fops);
+ proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops);
+ proc_create("sn_force_interrupt", 0644, sgi_proc_dir,
+ &proc_sn_force_intr_fops);
+ proc_create("coherence_id", 0444, sgi_proc_dir,
+ &proc_coherence_id_fops);
+ proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops);
}
#endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 18b94b792d54..52175af299a0 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -10,6 +10,7 @@
*/
#include <linux/module.h>
+#include <linux/dma-attrs.h>
#include <asm/dma.h>
#include <asm/sn/intr.h>
#include <asm/sn/pcibus_provider_defs.h>
@@ -149,11 +150,12 @@ void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
EXPORT_SYMBOL(sn_dma_free_coherent);
/**
- * sn_dma_map_single - map a single page for DMA
+ * sn_dma_map_single_attrs - map a single page for DMA
* @dev: device to map for
* @cpu_addr: kernel virtual address of the region to map
* @size: size of the region
* @direction: DMA direction
+ * @attrs: optional dma attributes
*
* Map the region pointed to by @cpu_addr for DMA and return the
* DMA address.
@@ -163,42 +165,59 @@ EXPORT_SYMBOL(sn_dma_free_coherent);
* no way of saving the dmamap handle from the alloc to later free
* (which is pretty much unacceptable).
*
+ * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with
+ * dma_map_consistent() so that writes force a flush of pending DMA.
+ * (See "SGI Altix Architecture Considerations for Linux Device Drivers",
+ * Document Number: 007-4763-001)
+ *
* TODO: simplify our interface;
* figure out how to save dmamap handle so can use two step.
*/
-dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size,
- int direction)
+dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr,
+ size_t size, int direction,
+ struct dma_attrs *attrs)
{
dma_addr_t dma_addr;
unsigned long phys_addr;
struct pci_dev *pdev = to_pci_dev(dev);
struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+ int dmabarr;
+
+ dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
BUG_ON(dev->bus != &pci_bus_type);
phys_addr = __pa(cpu_addr);
- dma_addr = provider->dma_map(pdev, phys_addr, size, SN_DMA_ADDR_PHYS);
+ if (dmabarr)
+ dma_addr = provider->dma_map_consistent(pdev, phys_addr,
+ size, SN_DMA_ADDR_PHYS);
+ else
+ dma_addr = provider->dma_map(pdev, phys_addr, size,
+ SN_DMA_ADDR_PHYS);
+
if (!dma_addr) {
printk(KERN_ERR "%s: out of ATEs\n", __func__);
return 0;
}
return dma_addr;
}
-EXPORT_SYMBOL(sn_dma_map_single);
+EXPORT_SYMBOL(sn_dma_map_single_attrs);
/**
- * sn_dma_unmap_single - unamp a DMA mapped page
+ * sn_dma_unmap_single_attrs - unamp a DMA mapped page
* @dev: device to sync
* @dma_addr: DMA address to sync
* @size: size of region
* @direction: DMA direction
+ * @attrs: optional dma attributes
*
* This routine is supposed to sync the DMA region specified
* by @dma_handle into the coherence domain. On SN, we're always cache
* coherent, so we just need to free any ATEs associated with this mapping.
*/
-void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
- int direction)
+void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
+ size_t size, int direction,
+ struct dma_attrs *attrs)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
@@ -207,19 +226,21 @@ void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
provider->dma_unmap(pdev, dma_addr, direction);
}
-EXPORT_SYMBOL(sn_dma_unmap_single);
+EXPORT_SYMBOL(sn_dma_unmap_single_attrs);
/**
- * sn_dma_unmap_sg - unmap a DMA scatterlist
+ * sn_dma_unmap_sg_attrs - unmap a DMA scatterlist
* @dev: device to unmap
* @sg: scatterlist to unmap
* @nhwentries: number of scatterlist entries
* @direction: DMA direction
+ * @attrs: optional dma attributes
*
* Unmap a set of streaming mode DMA translations.
*/
-void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
- int nhwentries, int direction)
+void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
+ int nhwentries, int direction,
+ struct dma_attrs *attrs)
{
int i;
struct pci_dev *pdev = to_pci_dev(dev);
@@ -234,25 +255,34 @@ void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
sg->dma_length = 0;
}
}
-EXPORT_SYMBOL(sn_dma_unmap_sg);
+EXPORT_SYMBOL(sn_dma_unmap_sg_attrs);
/**
- * sn_dma_map_sg - map a scatterlist for DMA
+ * sn_dma_map_sg_attrs - map a scatterlist for DMA
* @dev: device to map for
* @sg: scatterlist to map
* @nhwentries: number of entries
* @direction: direction of the DMA transaction
+ * @attrs: optional dma attributes
+ *
+ * mappings with the DMA_ATTR_WRITE_BARRIER get mapped with
+ * dma_map_consistent() so that writes force a flush of pending DMA.
+ * (See "SGI Altix Architecture Considerations for Linux Device Drivers",
+ * Document Number: 007-4763-001)
*
* Maps each entry of @sg for DMA.
*/
-int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
- int direction)
+int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+ int nhwentries, int direction, struct dma_attrs *attrs)
{
unsigned long phys_addr;
struct scatterlist *saved_sg = sgl, *sg;
struct pci_dev *pdev = to_pci_dev(dev);
struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
int i;
+ int dmabarr;
+
+ dmabarr = dma_get_attr(DMA_ATTR_WRITE_BARRIER, attrs);
BUG_ON(dev->bus != &pci_bus_type);
@@ -260,11 +290,19 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
* Setup a DMA address for each entry in the scatterlist.
*/
for_each_sg(sgl, sg, nhwentries, i) {
+ dma_addr_t dma_addr;
phys_addr = SG_ENT_PHYS_ADDRESS(sg);
- sg->dma_address = provider->dma_map(pdev,
- phys_addr, sg->length,
- SN_DMA_ADDR_PHYS);
+ if (dmabarr)
+ dma_addr = provider->dma_map_consistent(pdev,
+ phys_addr,
+ sg->length,
+ SN_DMA_ADDR_PHYS);
+ else
+ dma_addr = provider->dma_map(pdev, phys_addr,
+ sg->length,
+ SN_DMA_ADDR_PHYS);
+ sg->dma_address = dma_addr;
if (!sg->dma_address) {
printk(KERN_ERR "%s: out of ATEs\n", __func__);
@@ -272,7 +310,8 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
* Free any successfully allocated entries.
*/
if (i > 0)
- sn_dma_unmap_sg(dev, saved_sg, i, direction);
+ sn_dma_unmap_sg_attrs(dev, saved_sg, i,
+ direction, attrs);
return 0;
}
@@ -281,7 +320,7 @@ int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl, int nhwentries,
return nhwentries;
}
-EXPORT_SYMBOL(sn_dma_map_sg);
+EXPORT_SYMBOL(sn_dma_map_sg_attrs);
void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
size_t size, int direction)
diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c
index 246a8820c223..b1f012f6c493 100644
--- a/arch/m68k/kernel/asm-offsets.c
+++ b/arch/m68k/kernel/asm-offsets.c
@@ -11,14 +11,12 @@
#include <linux/stddef.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
+#include <linux/kbuild.h>
#include <asm/bootinfo.h>
#include <asm/irq.h>
#include <asm/amigahw.h>
#include <linux/font.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
int main(void)
{
/* offsets into the task struct */
diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 2b412454cb41..ded7dd2f67b2 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -186,7 +186,7 @@ int setup_irq(unsigned int irq, struct irq_node *node)
if (irq >= NR_IRQS || !(contr = irq_controller[irq])) {
printk("%s: Incorrect IRQ %d from %s\n",
- __FUNCTION__, irq, node->devname);
+ __func__, irq, node->devname);
return -ENXIO;
}
@@ -249,7 +249,7 @@ void free_irq(unsigned int irq, void *dev_id)
unsigned long flags;
if (irq >= NR_IRQS || !(contr = irq_controller[irq])) {
- printk("%s: Incorrect IRQ %d\n", __FUNCTION__, irq);
+ printk("%s: Incorrect IRQ %d\n", __func__, irq);
return;
}
@@ -267,7 +267,7 @@ void free_irq(unsigned int irq, void *dev_id)
node->handler = NULL;
} else
printk("%s: Removing probably wrong IRQ %d\n",
- __FUNCTION__, irq);
+ __func__, irq);
if (!irq_list[irq]) {
if (contr->shutdown)
@@ -288,7 +288,7 @@ void enable_irq(unsigned int irq)
if (irq >= NR_IRQS || !(contr = irq_controller[irq])) {
printk("%s: Incorrect IRQ %d\n",
- __FUNCTION__, irq);
+ __func__, irq);
return;
}
@@ -312,7 +312,7 @@ void disable_irq(unsigned int irq)
if (irq >= NR_IRQS || !(contr = irq_controller[irq])) {
printk("%s: Incorrect IRQ %d\n",
- __FUNCTION__, irq);
+ __func__, irq);
return;
}
diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c
index 5b2799eb96a6..326fb9978094 100644
--- a/arch/m68k/mac/iop.c
+++ b/arch/m68k/mac/iop.c
@@ -109,7 +109,6 @@
#include <linux/mm.h>
#include <linux/delay.h>
#include <linux/init.h>
-#include <linux/proc_fs.h>
#include <linux/interrupt.h>
#include <asm/bootinfo.h>
@@ -124,10 +123,6 @@
int iop_scc_present,iop_ism_present;
-#ifdef CONFIG_PROC_FS
-static int iop_get_proc_info(char *, char **, off_t, int);
-#endif /* CONFIG_PROC_FS */
-
/* structure for tracking channel listeners */
struct listener {
@@ -299,12 +294,6 @@ void __init iop_init(void)
iop_listeners[IOP_NUM_ISM][i].devname = NULL;
iop_listeners[IOP_NUM_ISM][i].handler = NULL;
}
-
-#if 0 /* Crashing in 2.4 now, not yet sure why. --jmt */
-#ifdef CONFIG_PROC_FS
- create_proc_info_entry("mac_iop", 0, &proc_root, iop_get_proc_info);
-#endif
-#endif
}
/*
@@ -637,77 +626,3 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id)
}
return IRQ_HANDLED;
}
-
-#ifdef CONFIG_PROC_FS
-
-char *iop_chan_state(int state)
-{
- switch(state) {
- case IOP_MSG_IDLE : return "idle ";
- case IOP_MSG_NEW : return "new ";
- case IOP_MSG_RCVD : return "received ";
- case IOP_MSG_COMPLETE : return "completed ";
- default : return "unknown ";
- }
-}
-
-int iop_dump_one_iop(char *buf, int iop_num, char *iop_name)
-{
- int i,len = 0;
- volatile struct mac_iop *iop = iop_base[iop_num];
-
- len += sprintf(buf+len, "%s IOP channel states:\n\n", iop_name);
- len += sprintf(buf+len, "## send_state recv_state device\n");
- len += sprintf(buf+len, "------------------------------------------------\n");
- for (i = 0 ; i < NUM_IOP_CHAN ; i++) {
- len += sprintf(buf+len, "%2d %10s %10s %s\n", i,
- iop_chan_state(iop_readb(iop, IOP_ADDR_SEND_STATE+i)),
- iop_chan_state(iop_readb(iop, IOP_ADDR_RECV_STATE+i)),
- iop_listeners[iop_num][i].handler?
- iop_listeners[iop_num][i].devname : "");
-
- }
- len += sprintf(buf+len, "\n");
- return len;
-}
-
-static int iop_get_proc_info(char *buf, char **start, off_t pos, int count)
-{
- int len, cnt;
-
- cnt = 0;
- len = sprintf(buf, "IOPs detected:\n\n");
-
- if (iop_scc_present) {
- len += sprintf(buf+len, "SCC IOP (%p): status %02X\n",
- iop_base[IOP_NUM_SCC],
- (uint) iop_base[IOP_NUM_SCC]->status_ctrl);
- }
- if (iop_ism_present) {
- len += sprintf(buf+len, "ISM IOP (%p): status %02X\n\n",
- iop_base[IOP_NUM_ISM],
- (uint) iop_base[IOP_NUM_ISM]->status_ctrl);
- }
-
- if (iop_scc_present) {
- len += iop_dump_one_iop(buf+len, IOP_NUM_SCC, "SCC");
-
- }
-
- if (iop_ism_present) {
- len += iop_dump_one_iop(buf+len, IOP_NUM_ISM, "ISM");
-
- }
-
- if (len >= pos) {
- if (!*start) {
- *start = buf + pos;
- cnt = len - pos;
- } else {
- cnt += len;
- }
- }
- return (count > cnt) ? cnt : count;
-}
-
-#endif /* CONFIG_PROC_FS */
diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c
index 50603d3dce84..3c943d2ec570 100644
--- a/arch/m68k/mac/oss.c
+++ b/arch/m68k/mac/oss.c
@@ -190,7 +190,7 @@ void oss_irq_enable(int irq) {
break;
#ifdef DEBUG_IRQUSE
default:
- printk("%s unknown irq %d\n",__FUNCTION__, irq);
+ printk("%s unknown irq %d\n", __func__, irq);
break;
#endif
}
@@ -230,7 +230,7 @@ void oss_irq_disable(int irq) {
break;
#ifdef DEBUG_IRQUSE
default:
- printk("%s unknown irq %d\n", __FUNCTION__, irq);
+ printk("%s unknown irq %d\n", __func__, irq);
break;
#endif
}
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index f42caa79e4e8..a2bb01f59642 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -79,7 +79,6 @@ void show_mem(void)
printk("\nMem-info:\n");
show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_online_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; i++) {
struct page *page = pgdat->node_mem_map + i;
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index 46161cef08b9..9f0e3d59bf92 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -47,7 +47,7 @@ static int q40_irq_startup(unsigned int irq)
switch (irq) {
case 1: case 2: case 8: case 9:
case 11: case 12: case 13:
- printk("%s: ISA IRQ %d not implemented by HW\n", __FUNCTION__, irq);
+ printk("%s: ISA IRQ %d not implemented by HW\n", __func__, irq);
return -ENXIO;
}
return 0;
diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68knommu/kernel/asm-offsets.c
index d97b89bae53c..fd0c685a7f11 100644
--- a/arch/m68knommu/kernel/asm-offsets.c
+++ b/arch/m68knommu/kernel/asm-offsets.c
@@ -13,15 +13,11 @@
#include <linux/kernel_stat.h>
#include <linux/ptrace.h>
#include <linux/hardirq.h>
+#include <linux/kbuild.h>
#include <asm/bootinfo.h>
#include <asm/irq.h>
#include <asm/thread_info.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
int main(void)
{
/* offsets into the task struct */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8724ed3298d3..e5a7c5d96364 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -81,7 +81,9 @@ config MIPS_COBALT
config MACH_DECSTATION
bool "DECstations"
select BOOT_ELF32
+ select CEVT_DS1287
select CEVT_R4K
+ select CSRC_IOASIC
select CSRC_R4K
select CPU_DADDI_WORKAROUNDS if 64BIT
select CPU_R4000_WORKAROUNDS if 64BIT
@@ -221,6 +223,7 @@ config MIPS_MALTA
select DMA_NONCOHERENT
select GENERIC_ISA_DMA
select IRQ_CPU
+ select IRQ_GIC
select HW_HAS_PCI
select I8253
select I8259
@@ -309,12 +312,12 @@ config MACH_VR41XX
select GENERIC_HARDIRQS_NO__DO_IRQ
config PNX8550_JBS
- bool "Philips PNX8550 based JBS board"
+ bool "NXP PNX8550 based JBS board"
select PNX8550
select SYS_SUPPORTS_LITTLE_ENDIAN
config PNX8550_STB810
- bool "Philips PNX8550 based STB810 board"
+ bool "NXP PNX8550 based STB810 board"
select PNX8550
select SYS_SUPPORTS_LITTLE_ENDIAN
@@ -612,6 +615,7 @@ config TOSHIBA_JMR3927
select SYS_SUPPORTS_LITTLE_ENDIAN
select SYS_SUPPORTS_BIG_ENDIAN
select GENERIC_HARDIRQS_NO__DO_IRQ
+ select GPIO_TXX9
config TOSHIBA_RBTX4927
bool "Toshiba RBTX49[23]7 board"
@@ -653,7 +657,7 @@ config TOSHIBA_RBTX4938
select SYS_SUPPORTS_BIG_ENDIAN
select SYS_SUPPORTS_KGDB
select GENERIC_HARDIRQS_NO__DO_IRQ
- select GENERIC_GPIO
+ select GPIO_TXX9
help
This Toshiba board is based on the TX4938 processor. Say Y here to
support this machine type
@@ -767,6 +771,9 @@ config BOOT_RAW
config CEVT_BCM1480
bool
+config CEVT_DS1287
+ bool
+
config CEVT_GT641XX
bool
@@ -782,12 +789,20 @@ config CEVT_TXX9
config CSRC_BCM1480
bool
+config CSRC_IOASIC
+ bool
+
config CSRC_R4K
bool
config CSRC_SB1250
bool
+config GPIO_TXX9
+ select GENERIC_GPIO
+ select HAVE_GPIO_LIB
+ bool
+
config CFE
bool
@@ -840,6 +855,9 @@ config MIPS_NILE4
config MIPS_DISABLE_OBSOLETE_IDE
bool
+config SYNC_R4K
+ bool
+
config NO_IOPORT
def_bool n
@@ -909,6 +927,9 @@ config IRQ_TXX9
config IRQ_GT641XX
bool
+config IRQ_GIC
+ bool
+
config MIPS_BOARDS_GEN
bool
@@ -1811,6 +1832,17 @@ config NR_CPUS
performance should round up your number of processors to the next
power of two.
+config MIPS_CMP
+ bool "MIPS CMP framework support"
+ depends on SMP
+ select SYNC_R4K
+ select SYS_SUPPORTS_SCHED_SMT
+ select WEAK_ORDERING
+ default n
+ help
+ This is a placeholder option for the GCMP work. It will need to
+ be handled differently...
+
source "kernel/time/Kconfig"
#
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index fd7124c1b75a..f18cf92650e3 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -73,14 +73,4 @@ config RUNTIME_DEBUG
include/asm-mips/debug.h for debuging macros.
If unsure, say N.
-config MIPS_UNCACHED
- bool "Run uncached"
- depends on DEBUG_KERNEL && !SMP && !SGI_IP27
- help
- If you say Y here there kernel will disable all CPU caches. This will
- reduce the system's performance dramatically but can help finding
- otherwise hard to track bugs. It can also useful if you're doing
- hardware debugging with a logic analyzer and need to see all traffic
- on the bus.
-
endmenu
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 1c62381f5c23..69648d01acc0 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -410,21 +410,21 @@ load-$(CONFIG_CASIO_E55) += 0xffffffff80004000
load-$(CONFIG_TANBAC_TB022X) += 0xffffffff80000000
#
-# Common Philips PNX8550
+# Common NXP PNX8550
#
-core-$(CONFIG_SOC_PNX8550) += arch/mips/philips/pnx8550/common/
+core-$(CONFIG_SOC_PNX8550) += arch/mips/nxp/pnx8550/common/
cflags-$(CONFIG_SOC_PNX8550) += -Iinclude/asm-mips/mach-pnx8550
#
-# Philips PNX8550 JBS board
+# NXP PNX8550 JBS board
#
-libs-$(CONFIG_PNX8550_JBS) += arch/mips/philips/pnx8550/jbs/
+libs-$(CONFIG_PNX8550_JBS) += arch/mips/nxp/pnx8550/jbs/
#cflags-$(CONFIG_PNX8550_JBS) += -Iinclude/asm-mips/mach-pnx8550
load-$(CONFIG_PNX8550_JBS) += 0xffffffff80060000
-# Philips PNX8550 STB810 board
+# NXP PNX8550 STB810 board
#
-libs-$(CONFIG_PNX8550_STB810) += arch/mips/philips/pnx8550/stb810/
+libs-$(CONFIG_PNX8550_STB810) += arch/mips/nxp/pnx8550/stb810/
load-$(CONFIG_PNX8550_STB810) += 0xffffffff80060000
# NEC EMMA2RH boards
diff --git a/arch/mips/au1000/common/cputable.c b/arch/mips/au1000/common/cputable.c
index 5c0d35d6e22a..8c93a05d7382 100644
--- a/arch/mips/au1000/common/cputable.c
+++ b/arch/mips/au1000/common/cputable.c
@@ -11,10 +11,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/string.h>
-#include <linux/sched.h>
-#include <linux/threads.h>
-#include <linux/init.h>
+
#include <asm/mach-au1x00/au1000.h>
struct cpu_spec* cur_cpu_spec[NR_CPUS];
diff --git a/arch/mips/au1000/common/dbdma.c b/arch/mips/au1000/common/dbdma.c
index 57f17b41098d..53377dfc0640 100644
--- a/arch/mips/au1000/common/dbdma.c
+++ b/arch/mips/au1000/common/dbdma.c
@@ -31,18 +31,12 @@
*/
#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <asm/mach-au1x00/au1000.h>
#include <asm/mach-au1x00/au1xxx_dbdma.h>
-#include <asm/system.h>
-
#if defined(CONFIG_SOC_AU1550) || defined(CONFIG_SOC_AU1200)
diff --git a/arch/mips/au1000/common/dbg_io.c b/arch/mips/au1000/common/dbg_io.c
index 79e0b0a51ace..eae1bb2ca26e 100644
--- a/arch/mips/au1000/common/dbg_io.c
+++ b/arch/mips/au1000/common/dbg_io.c
@@ -1,5 +1,4 @@
-#include <asm/io.h>
#include <asm/mach-au1x00/au1000.h>
#ifdef CONFIG_KGDB
@@ -55,8 +54,7 @@ typedef unsigned int uint32;
#define UART16550_READ(y) (au_readl(DEBUG_BASE + y) & 0xff)
#define UART16550_WRITE(y, z) (au_writel(z&0xff, DEBUG_BASE + y))
-extern unsigned long get_au1x00_uart_baud_base(void);
-extern unsigned long cal_r4koff(void);
+extern unsigned long calc_clock(void);
void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
{
@@ -64,7 +62,7 @@ void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
if (UART16550_READ(UART_MOD_CNTRL) != 0x3) {
UART16550_WRITE(UART_MOD_CNTRL, 3);
}
- cal_r4koff();
+ calc_clock();
/* disable interrupts */
UART16550_WRITE(UART_IER, 0);
diff --git a/arch/mips/au1000/common/dma.c b/arch/mips/au1000/common/dma.c
index c78260d4e837..95f69ea146e9 100644
--- a/arch/mips/au1000/common/dma.c
+++ b/arch/mips/au1000/common/dma.c
@@ -33,12 +33,9 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
-#include <linux/sched.h>
#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/delay.h>
#include <linux/interrupt.h>
-#include <asm/system.h>
+
#include <asm/mach-au1x00/au1000.h>
#include <asm/mach-au1x00/au1000_dma.h>
diff --git a/arch/mips/au1000/common/gpio.c b/arch/mips/au1000/common/gpio.c
index 0b658f1db4ce..525452589971 100644
--- a/arch/mips/au1000/common/gpio.c
+++ b/arch/mips/au1000/common/gpio.c
@@ -27,13 +27,8 @@
* others have a second one : GPIO2
*/
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/types.h>
#include <linux/module.h>
-#include <asm/addrspace.h>
-
#include <asm/mach-au1x00/au1000.h>
#include <asm/gpio.h>
diff --git a/arch/mips/au1000/common/irq.c b/arch/mips/au1000/common/irq.c
index 3c7714f057ac..f0626992fd75 100644
--- a/arch/mips/au1000/common/irq.c
+++ b/arch/mips/au1000/common/irq.c
@@ -1,7 +1,6 @@
/*
- * Copyright 2001 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- * ppopov@mvista.com or source@mvista.com
+ * Copyright 2001, 2007-2008 MontaVista Software Inc.
+ * Author: MontaVista Software, Inc. <source@mvista.com>
*
* Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
*
@@ -27,7 +26,6 @@
*/
#include <linux/bitops.h>
#include <linux/init.h>
-#include <linux/io.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
@@ -591,7 +589,7 @@ void __init arch_init_irq(void)
imp++;
}
- set_c0_status(ALLINTS);
+ set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | IE_IRQ3 | IE_IRQ4);
/* Board specific IRQ initialization.
*/
diff --git a/arch/mips/au1000/common/pci.c b/arch/mips/au1000/common/pci.c
index ce771487567d..7e966b31e3e1 100644
--- a/arch/mips/au1000/common/pci.c
+++ b/arch/mips/au1000/common/pci.c
@@ -30,7 +30,7 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/types.h>
+
#include <linux/pci.h>
#include <linux/kernel.h>
#include <linux/init.h>
diff --git a/arch/mips/au1000/common/platform.c b/arch/mips/au1000/common/platform.c
index 39d681265297..31d2a2270878 100644
--- a/arch/mips/au1000/common/platform.c
+++ b/arch/mips/au1000/common/platform.c
@@ -3,18 +3,65 @@
*
* Copyright 2004, Matt Porter <mporter@kernel.crashing.org>
*
+ * (C) Copyright Embedded Alley Solutions, Inc 2005
+ * Author: Pantelis Antoniou <pantelis@embeddedalley.com>
+ *
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
* warranty of any kind, whether express or implied.
*/
-#include <linux/device.h>
+
#include <linux/platform_device.h>
-#include <linux/kernel.h>
+#include <linux/serial_8250.h>
#include <linux/init.h>
-#include <linux/resource.h>
#include <asm/mach-au1x00/au1xxx.h>
+#define PORT(_base, _irq) \
+ { \
+ .iobase = _base, \
+ .membase = (void __iomem *)_base,\
+ .mapbase = CPHYSADDR(_base), \
+ .irq = _irq, \
+ .regshift = 2, \
+ .iotype = UPIO_AU, \
+ .flags = UPF_SKIP_TEST \
+ }
+
+static struct plat_serial8250_port au1x00_uart_data[] = {
+#if defined(CONFIG_SERIAL_8250_AU1X00)
+#if defined(CONFIG_SOC_AU1000)
+ PORT(UART0_ADDR, AU1000_UART0_INT),
+ PORT(UART1_ADDR, AU1000_UART1_INT),
+ PORT(UART2_ADDR, AU1000_UART2_INT),
+ PORT(UART3_ADDR, AU1000_UART3_INT),
+#elif defined(CONFIG_SOC_AU1500)
+ PORT(UART0_ADDR, AU1500_UART0_INT),
+ PORT(UART3_ADDR, AU1500_UART3_INT),
+#elif defined(CONFIG_SOC_AU1100)
+ PORT(UART0_ADDR, AU1100_UART0_INT),
+ PORT(UART1_ADDR, AU1100_UART1_INT),
+ PORT(UART3_ADDR, AU1100_UART3_INT),
+#elif defined(CONFIG_SOC_AU1550)
+ PORT(UART0_ADDR, AU1550_UART0_INT),
+ PORT(UART1_ADDR, AU1550_UART1_INT),
+ PORT(UART3_ADDR, AU1550_UART3_INT),
+#elif defined(CONFIG_SOC_AU1200)
+ PORT(UART0_ADDR, AU1200_UART0_INT),
+ PORT(UART1_ADDR, AU1200_UART1_INT),
+#endif
+#endif /* CONFIG_SERIAL_8250_AU1X00 */
+ { },
+};
+
+static struct platform_device au1xx0_uart_device = {
+ .name = "serial8250",
+ .id = PLAT8250_DEV_AU1X00,
+ .dev = {
+ .platform_data = au1x00_uart_data,
+ },
+};
+
/* OHCI (USB full speed host controller) */
static struct resource au1xxx_usb_ohci_resources[] = {
[0] = {
@@ -186,19 +233,6 @@ static struct resource au1200_lcd_resources[] = {
}
};
-static struct resource au1200_ide0_resources[] = {
- [0] = {
- .start = AU1XXX_ATA_PHYS_ADDR,
- .end = AU1XXX_ATA_PHYS_ADDR + AU1XXX_ATA_PHYS_LEN - 1,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = AU1XXX_ATA_INT,
- .end = AU1XXX_ATA_INT,
- .flags = IORESOURCE_IRQ,
- }
-};
-
static u64 au1200_lcd_dmamask = ~(u32)0;
static struct platform_device au1200_lcd_device = {
@@ -212,20 +246,6 @@ static struct platform_device au1200_lcd_device = {
.resource = au1200_lcd_resources,
};
-
-static u64 ide0_dmamask = ~(u32)0;
-
-static struct platform_device au1200_ide0_device = {
- .name = "au1200-ide",
- .id = 0,
- .dev = {
- .dma_mask = &ide0_dmamask,
- .coherent_dma_mask = 0xffffffff,
- },
- .num_resources = ARRAY_SIZE(au1200_ide0_resources),
- .resource = au1200_ide0_resources,
-};
-
static u64 au1xxx_mmc_dmamask = ~(u32)0;
static struct platform_device au1xxx_mmc_device = {
@@ -245,31 +265,6 @@ static struct platform_device au1x00_pcmcia_device = {
.id = 0,
};
-#ifdef CONFIG_MIPS_DB1200
-
-static struct resource smc91x_resources[] = {
- [0] = {
- .name = "smc91x-regs",
- .start = AU1XXX_SMC91111_PHYS_ADDR,
- .end = AU1XXX_SMC91111_PHYS_ADDR + 0xfffff,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = AU1XXX_SMC91111_IRQ,
- .end = AU1XXX_SMC91111_IRQ,
- .flags = IORESOURCE_IRQ,
- },
-};
-
-static struct platform_device smc91x_device = {
- .name = "smc91x",
- .id = -1,
- .num_resources = ARRAY_SIZE(smc91x_resources),
- .resource = smc91x_resources,
-};
-
-#endif
-
/* All Alchemy demoboards with I2C have this #define in their headers */
#ifdef SMBUS_PSC_BASE
static struct resource pbdb_smbus_resources[] = {
@@ -289,6 +284,7 @@ static struct platform_device pbdb_smbus_device = {
#endif
static struct platform_device *au1xxx_platform_devices[] __initdata = {
+ &au1xx0_uart_device,
&au1xxx_usb_ohci_device,
&au1x00_pcmcia_device,
#ifdef CONFIG_FB_AU1100
@@ -299,12 +295,8 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = {
&au1xxx_usb_gdt_device,
&au1xxx_usb_otg_device,
&au1200_lcd_device,
- &au1200_ide0_device,
&au1xxx_mmc_device,
#endif
-#ifdef CONFIG_MIPS_DB1200
- &smc91x_device,
-#endif
#ifdef SMBUS_PSC_BASE
&pbdb_smbus_device,
#endif
@@ -312,6 +304,13 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = {
int __init au1xxx_platform_init(void)
{
+ unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
+ int i;
+
+ /* Fill up uartclk. */
+ for (i = 0; au1x00_uart_data[i].flags ; i++)
+ au1x00_uart_data[i].uartclk = uartclk;
+
return platform_add_devices(au1xxx_platform_devices, ARRAY_SIZE(au1xxx_platform_devices));
}
diff --git a/arch/mips/au1000/common/power.c b/arch/mips/au1000/common/power.c
index 54047d69b820..812a5f8b7d26 100644
--- a/arch/mips/au1000/common/power.c
+++ b/arch/mips/au1000/common/power.c
@@ -29,17 +29,14 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
#include <linux/pm.h>
#include <linux/pm_legacy.h>
-#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/jiffies.h>
-#include <asm/string.h>
#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/system.h>
#include <asm/cacheflush.h>
#include <asm/mach-au1x00/au1000.h>
@@ -47,17 +44,13 @@
#define DEBUG 1
#ifdef DEBUG
-# define DPRINTK(fmt, args...) printk("%s: " fmt, __FUNCTION__ , ## args)
+# define DPRINTK(fmt, args...) printk("%s: " fmt, __func__, ## args)
#else
# define DPRINTK(fmt, args...)
#endif
static void au1000_calibrate_delay(void);
-extern void set_au1x00_speed(unsigned int new_freq);
-extern unsigned int get_au1x00_speed(void);
-extern unsigned long get_au1x00_uart_baud_base(void);
-extern void set_au1x00_uart_baud_base(unsigned long new_baud_base);
extern unsigned long save_local_and_disable(int controller);
extern void restore_local_and_enable(int controller, unsigned long mask);
extern void local_enable_irq(unsigned int irq_nr);
diff --git a/arch/mips/au1000/common/prom.c b/arch/mips/au1000/common/prom.c
index 90d70695aa60..f10af829e4ec 100644
--- a/arch/mips/au1000/common/prom.c
+++ b/arch/mips/au1000/common/prom.c
@@ -33,8 +33,8 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/module.h>
-#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/string.h>
diff --git a/arch/mips/au1000/common/puts.c b/arch/mips/au1000/common/puts.c
index 2705829cd466..e34c67e89293 100644
--- a/arch/mips/au1000/common/puts.c
+++ b/arch/mips/au1000/common/puts.c
@@ -28,7 +28,6 @@
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/types.h>
#include <asm/mach-au1x00/au1000.h>
#define SERIAL_BASE UART_BASE
diff --git a/arch/mips/au1000/common/reset.c b/arch/mips/au1000/common/reset.c
index b8638d293cf9..60cec537c745 100644
--- a/arch/mips/au1000/common/reset.c
+++ b/arch/mips/au1000/common/reset.c
@@ -27,13 +27,7 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/reboot.h>
-#include <asm/system.h>
+
#include <asm/mach-au1x00/au1000.h>
extern int au_sleep(void);
diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c
index 9e4ab80caab6..0e86f7a6b4a7 100644
--- a/arch/mips/au1000/common/setup.c
+++ b/arch/mips/au1000/common/setup.c
@@ -25,21 +25,14 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/sched.h>
#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/pm.h>
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
#include <asm/mipsregs.h>
#include <asm/reboot.h>
-#include <asm/pgtable.h>
#include <asm/time.h>
#include <au1000.h>
@@ -49,8 +42,6 @@ extern void __init board_setup(void);
extern void au1000_restart(char *);
extern void au1000_halt(void);
extern void au1000_power_off(void);
-extern void au1x_time_init(void);
-extern void au1x_timer_setup(struct irqaction *irq);
extern void set_cpuspec(void);
void __init plat_mem_setup(void)
diff --git a/arch/mips/au1000/common/sleeper.S b/arch/mips/au1000/common/sleeper.S
index 683d9da84b66..4b3cf021a454 100644
--- a/arch/mips/au1000/common/sleeper.S
+++ b/arch/mips/au1000/common/sleeper.S
@@ -9,9 +9,9 @@
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*/
+
#include <asm/asm.h>
#include <asm/mipsregs.h>
-#include <asm/addrspace.h>
#include <asm/regdef.h>
#include <asm/stackframe.h>
diff --git a/arch/mips/au1000/common/time.c b/arch/mips/au1000/common/time.c
index e122bbc6cd88..bdb6d73b26fb 100644
--- a/arch/mips/au1000/common/time.c
+++ b/arch/mips/au1000/common/time.c
@@ -1,6 +1,6 @@
/*
*
- * Copyright (C) 2001 MontaVista Software, ppopov@mvista.com
+ * Copyright (C) 2001, 2006, 2008 MontaVista Software, <source@mvista.com>
* Copied and modified Carsten Langgaard's time.c
*
* Carsten Langgaard, carstenl@mips.com
@@ -34,23 +34,13 @@
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sched.h>
#include <linux/spinlock.h>
-#include <linux/hardirq.h>
-#include <asm/compiler.h>
#include <asm/mipsregs.h>
#include <asm/time.h>
-#include <asm/div64.h>
#include <asm/mach-au1x00/au1000.h>
-#include <linux/mc146818rtc.h>
-#include <linux/timex.h>
-
-static unsigned long r4k_offset; /* Amount to increment compare reg each time */
-static unsigned long r4k_cur; /* What counter should be at next timer irq */
-int no_au1xxx_32khz;
+static int no_au1xxx_32khz;
extern int allow_au1k_wait; /* default off for CP0 Counter */
#ifdef CONFIG_PM
@@ -184,7 +174,7 @@ wakeup_counter0_set(int ticks)
* "wait" is enabled, and we need to detect if the 32KHz isn't present
* but requested......got it? :-) -- Dan
*/
-unsigned long cal_r4koff(void)
+unsigned long calc_clock(void)
{
unsigned long cpu_speed;
unsigned long flags;
@@ -229,19 +219,13 @@ unsigned long cal_r4koff(void)
// Equation: Baudrate = CPU / (SD * 2 * CLKDIV * 16)
set_au1x00_uart_baud_base(cpu_speed / (2 * ((int)(au_readl(SYS_POWERCTRL)&0x03) + 2) * 16));
spin_unlock_irqrestore(&time_lock, flags);
- return (cpu_speed / HZ);
+ return cpu_speed;
}
void __init plat_time_init(void)
{
- unsigned int est_freq;
-
- printk("calculating r4koff... ");
- r4k_offset = cal_r4koff();
- printk("%08lx(%d)\n", r4k_offset, (int) r4k_offset);
+ unsigned int est_freq = calc_clock();
- //est_freq = 2*r4k_offset*HZ;
- est_freq = r4k_offset*HZ;
est_freq += 5000; /* round */
est_freq -= est_freq%10000;
printk("CPU frequency %d.%02d MHz\n", est_freq/1000000,
@@ -249,9 +233,6 @@ void __init plat_time_init(void)
set_au1x00_speed(est_freq);
set_au1x00_lcd_clock(); // program the LCD clock
- r4k_cur = (read_c0_count() + r4k_offset);
- write_c0_compare(r4k_cur);
-
#ifdef CONFIG_PM
/*
* setup counter 0, since it keeps ticking after a
@@ -265,12 +246,8 @@ void __init plat_time_init(void)
* Check to ensure we really have a 32KHz oscillator before
* we do this.
*/
- if (no_au1xxx_32khz) {
+ if (no_au1xxx_32khz)
printk("WARNING: no 32KHz clock found.\n");
-
- /* Ensure we get CPO_COUNTER interrupts. */
- set_c0_status(IE_IRQ5);
- }
else {
while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S);
au_writel(0, SYS_TOYWRITE);
diff --git a/arch/mips/au1000/db1x00/board_setup.c b/arch/mips/au1000/db1x00/board_setup.c
index 99eafeada518..b7dcbad5c586 100644
--- a/arch/mips/au1000/db1x00/board_setup.c
+++ b/arch/mips/au1000/db1x00/board_setup.c
@@ -27,20 +27,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
-#include <linux/mc146818rtc.h>
-#include <linux/delay.h>
-
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
+
#include <asm/mach-au1x00/au1000.h>
#include <asm/mach-db1x00/db1x00.h>
diff --git a/arch/mips/au1000/db1x00/init.c b/arch/mips/au1000/db1x00/init.c
index e822c123eab8..d3b967caf70c 100644
--- a/arch/mips/au1000/db1x00/init.c
+++ b/arch/mips/au1000/db1x00/init.c
@@ -28,13 +28,8 @@
*/
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <prom.h>
diff --git a/arch/mips/au1000/db1x00/irqmap.c b/arch/mips/au1000/db1x00/irqmap.c
index 09cea03411b0..eaa50c7b6341 100644
--- a/arch/mips/au1000/db1x00/irqmap.c
+++ b/arch/mips/au1000/db1x00/irqmap.c
@@ -25,26 +25,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/errno.h>
+
#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
#include <asm/mach-au1x00/au1000.h>
#ifdef CONFIG_MIPS_DB1500
diff --git a/arch/mips/au1000/mtx-1/board_setup.c b/arch/mips/au1000/mtx-1/board_setup.c
index 310d5dff89fc..5736354829c6 100644
--- a/arch/mips/au1000/mtx-1/board_setup.c
+++ b/arch/mips/au1000/mtx-1/board_setup.c
@@ -28,19 +28,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
#include <asm/mach-au1x00/au1000.h>
extern int (*board_pci_idsel)(unsigned int devsel, int assert);
diff --git a/arch/mips/au1000/mtx-1/init.c b/arch/mips/au1000/mtx-1/init.c
index e700fd312a24..c015cbce1cca 100644
--- a/arch/mips/au1000/mtx-1/init.c
+++ b/arch/mips/au1000/mtx-1/init.c
@@ -28,14 +28,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/string.h>
+
#include <linux/kernel.h>
-#include <linux/sched.h>
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <prom.h>
diff --git a/arch/mips/au1000/mtx-1/irqmap.c b/arch/mips/au1000/mtx-1/irqmap.c
index 49c612aeddcf..78d70c42c9db 100644
--- a/arch/mips/au1000/mtx-1/irqmap.c
+++ b/arch/mips/au1000/mtx-1/irqmap.c
@@ -25,26 +25,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/errno.h>
+
#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
#include <asm/mach-au1x00/au1000.h>
char irq_tab_alchemy[][5] __initdata = {
diff --git a/arch/mips/au1000/mtx-1/platform.c b/arch/mips/au1000/mtx-1/platform.c
index ce8637b3afa9..a7edbf0829ac 100644
--- a/arch/mips/au1000/mtx-1/platform.c
+++ b/arch/mips/au1000/mtx-1/platform.c
@@ -19,7 +19,6 @@
*/
#include <linux/init.h>
-#include <linux/types.h>
#include <linux/platform_device.h>
#include <linux/leds.h>
#include <linux/gpio_keys.h>
diff --git a/arch/mips/au1000/pb1000/board_setup.c b/arch/mips/au1000/pb1000/board_setup.c
index 5198c4f98b43..33f15acc1b17 100644
--- a/arch/mips/au1000/pb1000/board_setup.c
+++ b/arch/mips/au1000/pb1000/board_setup.c
@@ -23,19 +23,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
#include <linux/delay.h>
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
#include <asm/mach-au1x00/au1000.h>
#include <asm/mach-pb1x00/pb1000.h>
diff --git a/arch/mips/au1000/pb1000/init.c b/arch/mips/au1000/pb1000/init.c
index 2515b9fb24af..549447df71d6 100644
--- a/arch/mips/au1000/pb1000/init.c
+++ b/arch/mips/au1000/pb1000/init.c
@@ -26,14 +26,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <prom.h>
diff --git a/arch/mips/au1000/pb1000/irqmap.c b/arch/mips/au1000/pb1000/irqmap.c
index 88e354508204..b3d56b0af321 100644
--- a/arch/mips/au1000/pb1000/irqmap.c
+++ b/arch/mips/au1000/pb1000/irqmap.c
@@ -25,26 +25,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/errno.h>
+
#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
#include <asm/mach-au1x00/au1000.h>
struct au1xxx_irqmap __initdata au1xxx_irq_map[] = {
diff --git a/arch/mips/au1000/pb1100/board_setup.c b/arch/mips/au1000/pb1100/board_setup.c
index 42874a6b31d1..656164c8e9ca 100644
--- a/arch/mips/au1000/pb1100/board_setup.c
+++ b/arch/mips/au1000/pb1100/board_setup.c
@@ -23,19 +23,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
#include <linux/delay.h>
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
#include <asm/mach-au1x00/au1000.h>
#include <asm/mach-pb1x00/pb1100.h>
diff --git a/arch/mips/au1000/pb1100/init.c b/arch/mips/au1000/pb1100/init.c
index 490c3801c275..c91344648ed3 100644
--- a/arch/mips/au1000/pb1100/init.c
+++ b/arch/mips/au1000/pb1100/init.c
@@ -27,14 +27,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <prom.h>
diff --git a/arch/mips/au1000/pb1100/irqmap.c b/arch/mips/au1000/pb1100/irqmap.c
index 880456bf8c11..b5021e3d477f 100644
--- a/arch/mips/au1000/pb1100/irqmap.c
+++ b/arch/mips/au1000/pb1100/irqmap.c
@@ -25,26 +25,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/errno.h>
+
#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
#include <asm/mach-au1x00/au1000.h>
struct au1xxx_irqmap __initdata au1xxx_irq_map[] = {
diff --git a/arch/mips/au1000/pb1200/Makefile b/arch/mips/au1000/pb1200/Makefile
index 970b1b1d5cda..4fe02ea65a60 100644
--- a/arch/mips/au1000/pb1200/Makefile
+++ b/arch/mips/au1000/pb1200/Makefile
@@ -3,5 +3,6 @@
#
lib-y := init.o board_setup.o irqmap.o
+obj-y += platform.o
EXTRA_CFLAGS += -Werror
diff --git a/arch/mips/au1000/pb1200/board_setup.c b/arch/mips/au1000/pb1200/board_setup.c
index b98bebfa87c6..4493a792cc4c 100644
--- a/arch/mips/au1000/pb1200/board_setup.c
+++ b/arch/mips/au1000/pb1200/board_setup.c
@@ -23,27 +23,11 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
-#include <linux/mc146818rtc.h>
-#include <linux/delay.h>
-
-#if defined(CONFIG_BLK_DEV_IDE_AU1XXX)
-#include <linux/ide.h>
-#endif
-
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
#include <au1000.h>
-#include <au1xxx_dbdma.h>
#include <prom.h>
#ifdef CONFIG_MIPS_PB1200
@@ -52,8 +36,6 @@
#ifdef CONFIG_MIPS_DB1200
#include <asm/mach-db1x00/db1200.h>
-#define PB1200_ETH_INT DB1200_ETH_INT
-#define PB1200_IDE_INT DB1200_IDE_INT
#endif
extern void _board_init_irq(void);
diff --git a/arch/mips/au1000/pb1200/init.c b/arch/mips/au1000/pb1200/init.c
index 069ed45f04f2..72af5500660b 100644
--- a/arch/mips/au1000/pb1200/init.c
+++ b/arch/mips/au1000/pb1200/init.c
@@ -27,14 +27,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <prom.h>
diff --git a/arch/mips/au1000/pb1200/irqmap.c b/arch/mips/au1000/pb1200/irqmap.c
index 8fcd0df86f93..e61eb8e0b76b 100644
--- a/arch/mips/au1000/pb1200/irqmap.c
+++ b/arch/mips/au1000/pb1200/irqmap.c
@@ -22,26 +22,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/errno.h>
+
#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
+
#include <asm/mach-au1x00/au1000.h>
#ifdef CONFIG_MIPS_PB1200
diff --git a/arch/mips/au1000/pb1200/platform.c b/arch/mips/au1000/pb1200/platform.c
new file mode 100644
index 000000000000..5930110b9b6d
--- /dev/null
+++ b/arch/mips/au1000/pb1200/platform.c
@@ -0,0 +1,84 @@
+/*
+ * Pb1200/DBAu1200 board platform device registration
+ *
+ * Copyright (C) 2008 MontaVista Software Inc. <source@mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-au1x00/au1xxx.h>
+
+static struct resource ide_resources[] = {
+ [0] = {
+ .start = IDE_PHYS_ADDR,
+ .end = IDE_PHYS_ADDR + IDE_PHYS_LEN - 1,
+ .flags = IORESOURCE_MEM
+ },
+ [1] = {
+ .start = IDE_INT,
+ .end = IDE_INT,
+ .flags = IORESOURCE_IRQ
+ }
+};
+
+static u64 ide_dmamask = ~(u32)0;
+
+static struct platform_device ide_device = {
+ .name = "au1200-ide",
+ .id = 0,
+ .dev = {
+ .dma_mask = &ide_dmamask,
+ .coherent_dma_mask = 0xffffffff,
+ },
+ .num_resources = ARRAY_SIZE(ide_resources),
+ .resource = ide_resources
+};
+
+static struct resource smc91c111_resources[] = {
+ [0] = {
+ .name = "smc91x-regs",
+ .start = SMC91C111_PHYS_ADDR,
+ .end = SMC91C111_PHYS_ADDR + 0xf,
+ .flags = IORESOURCE_MEM
+ },
+ [1] = {
+ .start = SMC91C111_INT,
+ .end = SMC91C111_INT,
+ .flags = IORESOURCE_IRQ
+ },
+};
+
+static struct platform_device smc91c111_device = {
+ .name = "smc91x",
+ .id = -1,
+ .num_resources = ARRAY_SIZE(smc91c111_resources),
+ .resource = smc91c111_resources
+};
+
+static struct platform_device *board_platform_devices[] __initdata = {
+ &ide_device,
+ &smc91c111_device
+};
+
+static int __init board_register_devices(void)
+{
+ return platform_add_devices(board_platform_devices,
+ ARRAY_SIZE(board_platform_devices));
+}
+
+arch_initcall(board_register_devices);
diff --git a/arch/mips/au1000/pb1500/board_setup.c b/arch/mips/au1000/pb1500/board_setup.c
index 5446836869d6..24c652e8ec4b 100644
--- a/arch/mips/au1000/pb1500/board_setup.c
+++ b/arch/mips/au1000/pb1500/board_setup.c
@@ -23,19 +23,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
#include <linux/delay.h>
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
#include <asm/mach-au1x00/au1000.h>
#include <asm/mach-pb1x00/pb1500.h>
diff --git a/arch/mips/au1000/pb1500/init.c b/arch/mips/au1000/pb1500/init.c
index db558c967048..488507c07db9 100644
--- a/arch/mips/au1000/pb1500/init.c
+++ b/arch/mips/au1000/pb1500/init.c
@@ -27,14 +27,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <prom.h>
diff --git a/arch/mips/au1000/pb1500/irqmap.c b/arch/mips/au1000/pb1500/irqmap.c
index 810f695e24bb..4817ab44d07f 100644
--- a/arch/mips/au1000/pb1500/irqmap.c
+++ b/arch/mips/au1000/pb1500/irqmap.c
@@ -25,26 +25,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/errno.h>
+
#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
#include <asm/mach-au1x00/au1000.h>
char irq_tab_alchemy[][5] __initdata = {
diff --git a/arch/mips/au1000/pb1550/board_setup.c b/arch/mips/au1000/pb1550/board_setup.c
index e3cfb0d73180..45d60872b565 100644
--- a/arch/mips/au1000/pb1550/board_setup.c
+++ b/arch/mips/au1000/pb1550/board_setup.c
@@ -27,20 +27,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
-#include <linux/mc146818rtc.h>
-#include <linux/delay.h>
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
#include <asm/mach-au1x00/au1000.h>
#include <asm/mach-pb1x00/pb1550.h>
diff --git a/arch/mips/au1000/pb1550/init.c b/arch/mips/au1000/pb1550/init.c
index b716363ea564..f6b2fc587980 100644
--- a/arch/mips/au1000/pb1550/init.c
+++ b/arch/mips/au1000/pb1550/init.c
@@ -27,14 +27,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <prom.h>
diff --git a/arch/mips/au1000/pb1550/irqmap.c b/arch/mips/au1000/pb1550/irqmap.c
index 56becab28e5d..e1dac37af08a 100644
--- a/arch/mips/au1000/pb1550/irqmap.c
+++ b/arch/mips/au1000/pb1550/irqmap.c
@@ -25,26 +25,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/errno.h>
+
#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
#include <asm/mach-au1x00/au1000.h>
char irq_tab_alchemy[][5] __initdata = {
diff --git a/arch/mips/au1000/xxs1500/board_setup.c b/arch/mips/au1000/xxs1500/board_setup.c
index b2e413e597a8..79d1798621bf 100644
--- a/arch/mips/au1000/xxs1500/board_setup.c
+++ b/arch/mips/au1000/xxs1500/board_setup.c
@@ -23,19 +23,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
#include <linux/delay.h>
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
#include <asm/mach-au1x00/au1000.h>
void board_reset(void)
diff --git a/arch/mips/au1000/xxs1500/init.c b/arch/mips/au1000/xxs1500/init.c
index 7e6878c1b0a5..24fc6e132dc0 100644
--- a/arch/mips/au1000/xxs1500/init.c
+++ b/arch/mips/au1000/xxs1500/init.c
@@ -26,14 +26,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/addrspace.h>
#include <asm/bootinfo.h>
#include <prom.h>
diff --git a/arch/mips/au1000/xxs1500/irqmap.c b/arch/mips/au1000/xxs1500/irqmap.c
index a343da134334..dd6e3d1eb4d4 100644
--- a/arch/mips/au1000/xxs1500/irqmap.c
+++ b/arch/mips/au1000/xxs1500/irqmap.c
@@ -25,26 +25,9 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/errno.h>
+
#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
#include <asm/mach-au1x00/au1000.h>
struct au1xxx_irqmap __initdata au1xxx_irq_map[] = {
diff --git a/arch/mips/basler/excite/excite_procfs.c b/arch/mips/basler/excite/excite_procfs.c
index 9ee67a95f6b9..08923e6825b5 100644
--- a/arch/mips/basler/excite/excite_procfs.c
+++ b/arch/mips/basler/excite/excite_procfs.c
@@ -18,8 +18,9 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-
+#include <linux/module.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/stat.h>
#include <asm/page.h>
#include <asm/io.h>
@@ -28,14 +29,25 @@
#include <excite.h>
-static int excite_get_unit_id(char *buf, char **addr, off_t offs, int size)
+static int excite_unit_id_proc_show(struct seq_file *m, void *v)
{
- const int len = snprintf(buf, PAGE_SIZE, "%06x", unit_id);
- const int w = len - offs;
- *addr = buf + offs;
- return w < size ? w : size;
+ seq_printf(m, "%06x", unit_id);
+ return 0;
}
+static int excite_unit_id_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, excite_unit_id_proc_show, NULL);
+}
+
+static const struct file_operations excite_unit_id_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = excite_unit_id_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int
excite_bootrom_read(char *page, char **start, off_t off, int count,
int *eof, void *data)
@@ -65,12 +77,12 @@ excite_bootrom_read(char *page, char **start, off_t off, int count,
void excite_procfs_init(void)
{
/* Create & populate /proc/excite */
- struct proc_dir_entry * const pdir = proc_mkdir("excite", &proc_root);
+ struct proc_dir_entry * const pdir = proc_mkdir("excite", NULL);
if (pdir) {
struct proc_dir_entry * e;
- e = create_proc_info_entry("unit_id", S_IRUGO, pdir,
- excite_get_unit_id);
+ e = proc_create("unit_id", S_IRUGO, pdir,
+ &excite_unit_id_proc_fops);
if (e) e->size = 6;
e = create_proc_read_entry("bootrom", S_IRUGO, pdir,
diff --git a/arch/mips/configs/mipssim_defconfig b/arch/mips/configs/mipssim_defconfig
index 6db0bdaefb27..4f6bce99d5cf 100644
--- a/arch/mips/configs/mipssim_defconfig
+++ b/arch/mips/configs/mipssim_defconfig
@@ -641,7 +641,6 @@ CONFIG_CROSSCOMPILE=y
CONFIG_CMDLINE="nfsroot=192.168.192.169:/u1/mipsel,timeo=20 ip=dhcp"
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_RUNTIME_DEBUG is not set
-# CONFIG_MIPS_UNCACHED is not set
#
# Security options
diff --git a/arch/mips/configs/pnx8550-jbs_defconfig b/arch/mips/configs/pnx8550-jbs_defconfig
index 518a60892b78..780c7fc24b82 100644
--- a/arch/mips/configs/pnx8550-jbs_defconfig
+++ b/arch/mips/configs/pnx8550-jbs_defconfig
@@ -1223,7 +1223,6 @@ CONFIG_CMDLINE="console=ttyS1,38400n8 kgdb=ttyS0 root=/dev/nfs ip=bootp"
# CONFIG_KGDB is not set
CONFIG_SYS_SUPPORTS_KGDB=y
# CONFIG_RUNTIME_DEBUG is not set
-# CONFIG_MIPS_UNCACHED is not set
#
# Security options
diff --git a/arch/mips/configs/pnx8550-stb810_defconfig b/arch/mips/configs/pnx8550-stb810_defconfig
index 68351eb81bc8..267f21ed1d0f 100644
--- a/arch/mips/configs/pnx8550-stb810_defconfig
+++ b/arch/mips/configs/pnx8550-stb810_defconfig
@@ -1213,7 +1213,6 @@ CONFIG_CMDLINE="console=ttyS1,38400n8 kgdb=ttyS0 root=/dev/nfs ip=bootp"
# CONFIG_KGDB is not set
CONFIG_SYS_SUPPORTS_KGDB=y
# CONFIG_RUNTIME_DEBUG is not set
-# CONFIG_MIPS_UNCACHED is not set
#
# Security options
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 60349062595a..3965fda94a89 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -9,30 +9,15 @@
*
*/
#include <linux/bcd.h>
-#include <linux/errno.h>
#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
#include <linux/mc146818rtc.h>
-#include <linux/mm.h>
-#include <linux/module.h>
#include <linux/param.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/time.h>
-#include <linux/types.h>
-
-#include <asm/bootinfo.h>
-#include <asm/cpu.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/sections.h>
-#include <asm/time.h>
+#include <asm/cpu-features.h>
+#include <asm/ds1287.h>
+#include <asm/time.h>
#include <asm/dec/interrupts.h>
#include <asm/dec/ioasic.h>
-#include <asm/dec/ioasic_addrs.h>
#include <asm/dec/machtype.h>
unsigned long read_persistent_clock(void)
@@ -139,42 +124,32 @@ int rtc_mips_set_mmss(unsigned long nowtime)
return retval;
}
-static int dec_timer_state(void)
+void __init plat_time_init(void)
{
- return (CMOS_READ(RTC_REG_C) & RTC_PF) != 0;
-}
+ u32 start, end;
+ int i = HZ / 10;
-static void dec_timer_ack(void)
-{
- CMOS_READ(RTC_REG_C); /* Ack the RTC interrupt. */
-}
-
-static cycle_t dec_ioasic_hpt_read(void)
-{
- /*
- * The free-running counter is 32-bit which is good for about
- * 2 minutes, 50 seconds at possible count rates of up to 25MHz.
- */
- return ioasic_read(IO_REG_FCTR);
-}
+ /* Set up the rate of periodic DS1287 interrupts. */
+ ds1287_set_base_clock(HZ);
+ if (cpu_has_counter) {
+ while (!ds1287_timer_state())
+ ;
-void __init plat_time_init(void)
-{
- mips_timer_ack = dec_timer_ack;
+ start = read_c0_count();
- if (!cpu_has_counter && IOASIC)
- /* For pre-R4k systems we use the I/O ASIC's counter. */
- clocksource_mips.read = dec_ioasic_hpt_read;
+ while (i--)
+ while (!ds1287_timer_state())
+ ;
- /* Set up the rate of periodic DS1287 interrupts. */
- CMOS_WRITE(RTC_REF_CLCK_32KHZ | (16 - __ffs(HZ)), RTC_REG_A);
-}
+ end = read_c0_count();
-void __init plat_timer_setup(struct irqaction *irq)
-{
- setup_irq(dec_interrupt[DEC_IRQ_RTC], irq);
+ mips_hpt_frequency = (end - start) * 10;
+ printk(KERN_INFO "MIPS counter frequency %dHz\n",
+ mips_hpt_frequency);
+ } else if (IOASIC)
+ /* For pre-R4k systems we use the I/O ASIC's counter. */
+ dec_ioasic_clocksource_init();
- /* Enable periodic DS1287 interrupts. */
- CMOS_WRITE(CMOS_READ(RTC_REG_B) | RTC_PIE, RTC_REG_B);
+ ds1287_clockevent_init(dec_interrupt[DEC_IRQ_RTC]);
}
diff --git a/arch/mips/jmr3927/rbhma3100/setup.c b/arch/mips/jmr3927/rbhma3100/setup.c
index c886d804d303..f39c444e42d4 100644
--- a/arch/mips/jmr3927/rbhma3100/setup.c
+++ b/arch/mips/jmr3927/rbhma3100/setup.c
@@ -36,11 +36,13 @@
#include <linux/pm.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
+#include <linux/gpio.h>
#ifdef CONFIG_SERIAL_TXX9
#include <linux/serial_core.h>
#endif
#include <asm/txx9tmr.h>
+#include <asm/txx9pio.h>
#include <asm/reboot.h>
#include <asm/jmr3927/jmr3927.h>
#include <asm/mipsregs.h>
@@ -340,9 +342,12 @@ static void __init tx3927_setup(void)
/* PIO */
/* PIO[15:12] connected to LEDs */
- tx3927_pioptr->dir = 0x0000f000;
- tx3927_pioptr->maskcpu = 0;
- tx3927_pioptr->maskext = 0;
+ __raw_writel(0x0000f000, &tx3927_pioptr->dir);
+ __raw_writel(0, &tx3927_pioptr->maskcpu);
+ __raw_writel(0, &tx3927_pioptr->maskext);
+ txx9_gpio_init(TX3927_PIO_REG, 0, 16);
+ gpio_request(11, "dipsw1");
+ gpio_request(10, "dipsw2");
{
unsigned int conf;
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 6fcdb6fda2e2..45545be3eb86 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -10,12 +10,15 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \
obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o
obj-$(CONFIG_CEVT_R4K) += cevt-r4k.o
+obj-$(CONFIG_CEVT_DS1287) += cevt-ds1287.o
obj-$(CONFIG_CEVT_GT641XX) += cevt-gt641xx.o
obj-$(CONFIG_CEVT_SB1250) += cevt-sb1250.o
obj-$(CONFIG_CEVT_TXX9) += cevt-txx9.o
obj-$(CONFIG_CSRC_BCM1480) += csrc-bcm1480.o
+obj-$(CONFIG_CSRC_IOASIC) += csrc-ioasic.o
obj-$(CONFIG_CSRC_R4K) += csrc-r4k.o
obj-$(CONFIG_CSRC_SB1250) += csrc-sb1250.o
+obj-$(CONFIG_SYNC_R4K) += sync-r4k.o
binfmt_irix-objs := irixelf.o irixinv.o irixioctl.o irixsig.o \
irix5sys.o sysirix.o
@@ -50,6 +53,8 @@ obj-$(CONFIG_MIPS_MT) += mips-mt.o
obj-$(CONFIG_MIPS_MT_FPAFF) += mips-mt-fpaff.o
obj-$(CONFIG_MIPS_MT_SMTC) += smtc.o smtc-asm.o smtc-proc.o
obj-$(CONFIG_MIPS_MT_SMP) += smp-mt.o
+obj-$(CONFIG_MIPS_CMP) += smp-cmp.o
+obj-$(CONFIG_CPU_MIPSR2) += spram.o
obj-$(CONFIG_MIPS_APSP_KSPD) += kspd.o
obj-$(CONFIG_MIPS_VPE_LOADER) += vpe.o
@@ -62,6 +67,7 @@ obj-$(CONFIG_IRQ_CPU_RM9K) += irq-rm9000.o
obj-$(CONFIG_MIPS_BOARDS_GEN) += irq-msc01.o
obj-$(CONFIG_IRQ_TXX9) += irq_txx9.o
obj-$(CONFIG_IRQ_GT641XX) += irq-gt641xx.o
+obj-$(CONFIG_IRQ_GIC) += irq-gic.o
obj-$(CONFIG_32BIT) += scall32-o32.o
obj-$(CONFIG_64BIT) += scall64-64.o
@@ -77,6 +83,8 @@ obj-$(CONFIG_64BIT) += cpu-bugs64.o
obj-$(CONFIG_I8253) += i8253.o
+obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o
+
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index ca136298acdc..72942226fcdd 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -13,327 +13,285 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
-
+#include <linux/kbuild.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
-#define text(t) __asm__("\n@@@" t)
-#define _offset(type, member) (&(((type *)NULL)->member))
-#define offset(string, ptr, member) \
- __asm__("\n@@@" string "%0" : : "i" (_offset(ptr, member)))
-#define constant(string, member) \
- __asm__("\n@@@" string "%X0" : : "ri" (member))
-#define size(string, size) \
- __asm__("\n@@@" string "%0" : : "i" (sizeof(size)))
-#define linefeed text("")
-
void output_ptreg_defines(void)
{
- text("/* MIPS pt_regs offsets. */");
- offset("#define PT_R0 ", struct pt_regs, regs[0]);
- offset("#define PT_R1 ", struct pt_regs, regs[1]);
- offset("#define PT_R2 ", struct pt_regs, regs[2]);
- offset("#define PT_R3 ", struct pt_regs, regs[3]);
- offset("#define PT_R4 ", struct pt_regs, regs[4]);
- offset("#define PT_R5 ", struct pt_regs, regs[5]);
- offset("#define PT_R6 ", struct pt_regs, regs[6]);
- offset("#define PT_R7 ", struct pt_regs, regs[7]);
- offset("#define PT_R8 ", struct pt_regs, regs[8]);
- offset("#define PT_R9 ", struct pt_regs, regs[9]);
- offset("#define PT_R10 ", struct pt_regs, regs[10]);
- offset("#define PT_R11 ", struct pt_regs, regs[11]);
- offset("#define PT_R12 ", struct pt_regs, regs[12]);
- offset("#define PT_R13 ", struct pt_regs, regs[13]);
- offset("#define PT_R14 ", struct pt_regs, regs[14]);
- offset("#define PT_R15 ", struct pt_regs, regs[15]);
- offset("#define PT_R16 ", struct pt_regs, regs[16]);
- offset("#define PT_R17 ", struct pt_regs, regs[17]);
- offset("#define PT_R18 ", struct pt_regs, regs[18]);
- offset("#define PT_R19 ", struct pt_regs, regs[19]);
- offset("#define PT_R20 ", struct pt_regs, regs[20]);
- offset("#define PT_R21 ", struct pt_regs, regs[21]);
- offset("#define PT_R22 ", struct pt_regs, regs[22]);
- offset("#define PT_R23 ", struct pt_regs, regs[23]);
- offset("#define PT_R24 ", struct pt_regs, regs[24]);
- offset("#define PT_R25 ", struct pt_regs, regs[25]);
- offset("#define PT_R26 ", struct pt_regs, regs[26]);
- offset("#define PT_R27 ", struct pt_regs, regs[27]);
- offset("#define PT_R28 ", struct pt_regs, regs[28]);
- offset("#define PT_R29 ", struct pt_regs, regs[29]);
- offset("#define PT_R30 ", struct pt_regs, regs[30]);
- offset("#define PT_R31 ", struct pt_regs, regs[31]);
- offset("#define PT_LO ", struct pt_regs, lo);
- offset("#define PT_HI ", struct pt_regs, hi);
+ COMMENT("MIPS pt_regs offsets.");
+ OFFSET(PT_R0, pt_regs, regs[0]);
+ OFFSET(PT_R1, pt_regs, regs[1]);
+ OFFSET(PT_R2, pt_regs, regs[2]);
+ OFFSET(PT_R3, pt_regs, regs[3]);
+ OFFSET(PT_R4, pt_regs, regs[4]);
+ OFFSET(PT_R5, pt_regs, regs[5]);
+ OFFSET(PT_R6, pt_regs, regs[6]);
+ OFFSET(PT_R7, pt_regs, regs[7]);
+ OFFSET(PT_R8, pt_regs, regs[8]);
+ OFFSET(PT_R9, pt_regs, regs[9]);
+ OFFSET(PT_R10, pt_regs, regs[10]);
+ OFFSET(PT_R11, pt_regs, regs[11]);
+ OFFSET(PT_R12, pt_regs, regs[12]);
+ OFFSET(PT_R13, pt_regs, regs[13]);
+ OFFSET(PT_R14, pt_regs, regs[14]);
+ OFFSET(PT_R15, pt_regs, regs[15]);
+ OFFSET(PT_R16, pt_regs, regs[16]);
+ OFFSET(PT_R17, pt_regs, regs[17]);
+ OFFSET(PT_R18, pt_regs, regs[18]);
+ OFFSET(PT_R19, pt_regs, regs[19]);
+ OFFSET(PT_R20, pt_regs, regs[20]);
+ OFFSET(PT_R21, pt_regs, regs[21]);
+ OFFSET(PT_R22, pt_regs, regs[22]);
+ OFFSET(PT_R23, pt_regs, regs[23]);
+ OFFSET(PT_R24, pt_regs, regs[24]);
+ OFFSET(PT_R25, pt_regs, regs[25]);
+ OFFSET(PT_R26, pt_regs, regs[26]);
+ OFFSET(PT_R27, pt_regs, regs[27]);
+ OFFSET(PT_R28, pt_regs, regs[28]);
+ OFFSET(PT_R29, pt_regs, regs[29]);
+ OFFSET(PT_R30, pt_regs, regs[30]);
+ OFFSET(PT_R31, pt_regs, regs[31]);
+ OFFSET(PT_LO, pt_regs, lo);
+ OFFSET(PT_HI, pt_regs, hi);
#ifdef CONFIG_CPU_HAS_SMARTMIPS
- offset("#define PT_ACX ", struct pt_regs, acx);
+ OFFSET(PT_ACX, pt_regs, acx);
#endif
- offset("#define PT_EPC ", struct pt_regs, cp0_epc);
- offset("#define PT_BVADDR ", struct pt_regs, cp0_badvaddr);
- offset("#define PT_STATUS ", struct pt_regs, cp0_status);
- offset("#define PT_CAUSE ", struct pt_regs, cp0_cause);
+ OFFSET(PT_EPC, pt_regs, cp0_epc);
+ OFFSET(PT_BVADDR, pt_regs, cp0_badvaddr);
+ OFFSET(PT_STATUS, pt_regs, cp0_status);
+ OFFSET(PT_CAUSE, pt_regs, cp0_cause);
#ifdef CONFIG_MIPS_MT_SMTC
- offset("#define PT_TCSTATUS ", struct pt_regs, cp0_tcstatus);
+ OFFSET(PT_TCSTATUS, pt_regs, cp0_tcstatus);
#endif /* CONFIG_MIPS_MT_SMTC */
- size("#define PT_SIZE ", struct pt_regs);
- linefeed;
+ DEFINE(PT_SIZE, sizeof(struct pt_regs));
+ BLANK();
}
void output_task_defines(void)
{
- text("/* MIPS task_struct offsets. */");
- offset("#define TASK_STATE ", struct task_struct, state);
- offset("#define TASK_THREAD_INFO ", struct task_struct, stack);
- offset("#define TASK_FLAGS ", struct task_struct, flags);
- offset("#define TASK_MM ", struct task_struct, mm);
- offset("#define TASK_PID ", struct task_struct, pid);
- size( "#define TASK_STRUCT_SIZE ", struct task_struct);
- linefeed;
+ COMMENT("MIPS task_struct offsets.");
+ OFFSET(TASK_STATE, task_struct, state);
+ OFFSET(TASK_THREAD_INFO, task_struct, stack);
+ OFFSET(TASK_FLAGS, task_struct, flags);
+ OFFSET(TASK_MM, task_struct, mm);
+ OFFSET(TASK_PID, task_struct, pid);
+ DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct));
+ BLANK();
}
void output_thread_info_defines(void)
{
- text("/* MIPS thread_info offsets. */");
- offset("#define TI_TASK ", struct thread_info, task);
- offset("#define TI_EXEC_DOMAIN ", struct thread_info, exec_domain);
- offset("#define TI_FLAGS ", struct thread_info, flags);
- offset("#define TI_TP_VALUE ", struct thread_info, tp_value);
- offset("#define TI_CPU ", struct thread_info, cpu);
- offset("#define TI_PRE_COUNT ", struct thread_info, preempt_count);
- offset("#define TI_ADDR_LIMIT ", struct thread_info, addr_limit);
- offset("#define TI_RESTART_BLOCK ", struct thread_info, restart_block);
- offset("#define TI_REGS ", struct thread_info, regs);
- constant("#define _THREAD_SIZE ", THREAD_SIZE);
- constant("#define _THREAD_MASK ", THREAD_MASK);
- linefeed;
+ COMMENT("MIPS thread_info offsets.");
+ OFFSET(TI_TASK, thread_info, task);
+ OFFSET(TI_EXEC_DOMAIN, thread_info, exec_domain);
+ OFFSET(TI_FLAGS, thread_info, flags);
+ OFFSET(TI_TP_VALUE, thread_info, tp_value);
+ OFFSET(TI_CPU, thread_info, cpu);
+ OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
+ OFFSET(TI_ADDR_LIMIT, thread_info, addr_limit);
+ OFFSET(TI_RESTART_BLOCK, thread_info, restart_block);
+ OFFSET(TI_REGS, thread_info, regs);
+ DEFINE(_THREAD_SIZE, THREAD_SIZE);
+ DEFINE(_THREAD_MASK, THREAD_MASK);
+ BLANK();
}
void output_thread_defines(void)
{
- text("/* MIPS specific thread_struct offsets. */");
- offset("#define THREAD_REG16 ", struct task_struct, thread.reg16);
- offset("#define THREAD_REG17 ", struct task_struct, thread.reg17);
- offset("#define THREAD_REG18 ", struct task_struct, thread.reg18);
- offset("#define THREAD_REG19 ", struct task_struct, thread.reg19);
- offset("#define THREAD_REG20 ", struct task_struct, thread.reg20);
- offset("#define THREAD_REG21 ", struct task_struct, thread.reg21);
- offset("#define THREAD_REG22 ", struct task_struct, thread.reg22);
- offset("#define THREAD_REG23 ", struct task_struct, thread.reg23);
- offset("#define THREAD_REG29 ", struct task_struct, thread.reg29);
- offset("#define THREAD_REG30 ", struct task_struct, thread.reg30);
- offset("#define THREAD_REG31 ", struct task_struct, thread.reg31);
- offset("#define THREAD_STATUS ", struct task_struct,
+ COMMENT("MIPS specific thread_struct offsets.");
+ OFFSET(THREAD_REG16, task_struct, thread.reg16);
+ OFFSET(THREAD_REG17, task_struct, thread.reg17);
+ OFFSET(THREAD_REG18, task_struct, thread.reg18);
+ OFFSET(THREAD_REG19, task_struct, thread.reg19);
+ OFFSET(THREAD_REG20, task_struct, thread.reg20);
+ OFFSET(THREAD_REG21, task_struct, thread.reg21);
+ OFFSET(THREAD_REG22, task_struct, thread.reg22);
+ OFFSET(THREAD_REG23, task_struct, thread.reg23);
+ OFFSET(THREAD_REG29, task_struct, thread.reg29);
+ OFFSET(THREAD_REG30, task_struct, thread.reg30);
+ OFFSET(THREAD_REG31, task_struct, thread.reg31);
+ OFFSET(THREAD_STATUS, task_struct,
thread.cp0_status);
- offset("#define THREAD_FPU ", struct task_struct, thread.fpu);
+ OFFSET(THREAD_FPU, task_struct, thread.fpu);
- offset("#define THREAD_BVADDR ", struct task_struct, \
+ OFFSET(THREAD_BVADDR, task_struct, \
thread.cp0_badvaddr);
- offset("#define THREAD_BUADDR ", struct task_struct, \
+ OFFSET(THREAD_BUADDR, task_struct, \
thread.cp0_baduaddr);
- offset("#define THREAD_ECODE ", struct task_struct, \
+ OFFSET(THREAD_ECODE, task_struct, \
thread.error_code);
- offset("#define THREAD_TRAPNO ", struct task_struct, thread.trap_no);
- offset("#define THREAD_TRAMP ", struct task_struct, \
+ OFFSET(THREAD_TRAPNO, task_struct, thread.trap_no);
+ OFFSET(THREAD_TRAMP, task_struct, \
thread.irix_trampoline);
- offset("#define THREAD_OLDCTX ", struct task_struct, \
+ OFFSET(THREAD_OLDCTX, task_struct, \
thread.irix_oldctx);
- linefeed;
+ BLANK();
}
void output_thread_fpu_defines(void)
{
- offset("#define THREAD_FPR0 ",
- struct task_struct, thread.fpu.fpr[0]);
- offset("#define THREAD_FPR1 ",
- struct task_struct, thread.fpu.fpr[1]);
- offset("#define THREAD_FPR2 ",
- struct task_struct, thread.fpu.fpr[2]);
- offset("#define THREAD_FPR3 ",
- struct task_struct, thread.fpu.fpr[3]);
- offset("#define THREAD_FPR4 ",
- struct task_struct, thread.fpu.fpr[4]);
- offset("#define THREAD_FPR5 ",
- struct task_struct, thread.fpu.fpr[5]);
- offset("#define THREAD_FPR6 ",
- struct task_struct, thread.fpu.fpr[6]);
- offset("#define THREAD_FPR7 ",
- struct task_struct, thread.fpu.fpr[7]);
- offset("#define THREAD_FPR8 ",
- struct task_struct, thread.fpu.fpr[8]);
- offset("#define THREAD_FPR9 ",
- struct task_struct, thread.fpu.fpr[9]);
- offset("#define THREAD_FPR10 ",
- struct task_struct, thread.fpu.fpr[10]);
- offset("#define THREAD_FPR11 ",
- struct task_struct, thread.fpu.fpr[11]);
- offset("#define THREAD_FPR12 ",
- struct task_struct, thread.fpu.fpr[12]);
- offset("#define THREAD_FPR13 ",
- struct task_struct, thread.fpu.fpr[13]);
- offset("#define THREAD_FPR14 ",
- struct task_struct, thread.fpu.fpr[14]);
- offset("#define THREAD_FPR15 ",
- struct task_struct, thread.fpu.fpr[15]);
- offset("#define THREAD_FPR16 ",
- struct task_struct, thread.fpu.fpr[16]);
- offset("#define THREAD_FPR17 ",
- struct task_struct, thread.fpu.fpr[17]);
- offset("#define THREAD_FPR18 ",
- struct task_struct, thread.fpu.fpr[18]);
- offset("#define THREAD_FPR19 ",
- struct task_struct, thread.fpu.fpr[19]);
- offset("#define THREAD_FPR20 ",
- struct task_struct, thread.fpu.fpr[20]);
- offset("#define THREAD_FPR21 ",
- struct task_struct, thread.fpu.fpr[21]);
- offset("#define THREAD_FPR22 ",
- struct task_struct, thread.fpu.fpr[22]);
- offset("#define THREAD_FPR23 ",
- struct task_struct, thread.fpu.fpr[23]);
- offset("#define THREAD_FPR24 ",
- struct task_struct, thread.fpu.fpr[24]);
- offset("#define THREAD_FPR25 ",
- struct task_struct, thread.fpu.fpr[25]);
- offset("#define THREAD_FPR26 ",
- struct task_struct, thread.fpu.fpr[26]);
- offset("#define THREAD_FPR27 ",
- struct task_struct, thread.fpu.fpr[27]);
- offset("#define THREAD_FPR28 ",
- struct task_struct, thread.fpu.fpr[28]);
- offset("#define THREAD_FPR29 ",
- struct task_struct, thread.fpu.fpr[29]);
- offset("#define THREAD_FPR30 ",
- struct task_struct, thread.fpu.fpr[30]);
- offset("#define THREAD_FPR31 ",
- struct task_struct, thread.fpu.fpr[31]);
+ OFFSET(THREAD_FPR0, task_struct, thread.fpu.fpr[0]);
+ OFFSET(THREAD_FPR1, task_struct, thread.fpu.fpr[1]);
+ OFFSET(THREAD_FPR2, task_struct, thread.fpu.fpr[2]);
+ OFFSET(THREAD_FPR3, task_struct, thread.fpu.fpr[3]);
+ OFFSET(THREAD_FPR4, task_struct, thread.fpu.fpr[4]);
+ OFFSET(THREAD_FPR5, task_struct, thread.fpu.fpr[5]);
+ OFFSET(THREAD_FPR6, task_struct, thread.fpu.fpr[6]);
+ OFFSET(THREAD_FPR7, task_struct, thread.fpu.fpr[7]);
+ OFFSET(THREAD_FPR8, task_struct, thread.fpu.fpr[8]);
+ OFFSET(THREAD_FPR9, task_struct, thread.fpu.fpr[9]);
+ OFFSET(THREAD_FPR10, task_struct, thread.fpu.fpr[10]);
+ OFFSET(THREAD_FPR11, task_struct, thread.fpu.fpr[11]);
+ OFFSET(THREAD_FPR12, task_struct, thread.fpu.fpr[12]);
+ OFFSET(THREAD_FPR13, task_struct, thread.fpu.fpr[13]);
+ OFFSET(THREAD_FPR14, task_struct, thread.fpu.fpr[14]);
+ OFFSET(THREAD_FPR15, task_struct, thread.fpu.fpr[15]);
+ OFFSET(THREAD_FPR16, task_struct, thread.fpu.fpr[16]);
+ OFFSET(THREAD_FPR17, task_struct, thread.fpu.fpr[17]);
+ OFFSET(THREAD_FPR18, task_struct, thread.fpu.fpr[18]);
+ OFFSET(THREAD_FPR19, task_struct, thread.fpu.fpr[19]);
+ OFFSET(THREAD_FPR20, task_struct, thread.fpu.fpr[20]);
+ OFFSET(THREAD_FPR21, task_struct, thread.fpu.fpr[21]);
+ OFFSET(THREAD_FPR22, task_struct, thread.fpu.fpr[22]);
+ OFFSET(THREAD_FPR23, task_struct, thread.fpu.fpr[23]);
+ OFFSET(THREAD_FPR24, task_struct, thread.fpu.fpr[24]);
+ OFFSET(THREAD_FPR25, task_struct, thread.fpu.fpr[25]);
+ OFFSET(THREAD_FPR26, task_struct, thread.fpu.fpr[26]);
+ OFFSET(THREAD_FPR27, task_struct, thread.fpu.fpr[27]);
+ OFFSET(THREAD_FPR28, task_struct, thread.fpu.fpr[28]);
+ OFFSET(THREAD_FPR29, task_struct, thread.fpu.fpr[29]);
+ OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
+ OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);
- offset("#define THREAD_FCR31 ",
- struct task_struct, thread.fpu.fcr31);
- linefeed;
+ OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
+ BLANK();
}
void output_mm_defines(void)
{
- text("/* Size of struct page */");
- size("#define STRUCT_PAGE_SIZE ", struct page);
- linefeed;
- text("/* Linux mm_struct offsets. */");
- offset("#define MM_USERS ", struct mm_struct, mm_users);
- offset("#define MM_PGD ", struct mm_struct, pgd);
- offset("#define MM_CONTEXT ", struct mm_struct, context);
- linefeed;
- constant("#define _PAGE_SIZE ", PAGE_SIZE);
- constant("#define _PAGE_SHIFT ", PAGE_SHIFT);
- linefeed;
- constant("#define _PGD_T_SIZE ", sizeof(pgd_t));
- constant("#define _PMD_T_SIZE ", sizeof(pmd_t));
- constant("#define _PTE_T_SIZE ", sizeof(pte_t));
- linefeed;
- constant("#define _PGD_T_LOG2 ", PGD_T_LOG2);
- constant("#define _PMD_T_LOG2 ", PMD_T_LOG2);
- constant("#define _PTE_T_LOG2 ", PTE_T_LOG2);
- linefeed;
- constant("#define _PGD_ORDER ", PGD_ORDER);
- constant("#define _PMD_ORDER ", PMD_ORDER);
- constant("#define _PTE_ORDER ", PTE_ORDER);
- linefeed;
- constant("#define _PMD_SHIFT ", PMD_SHIFT);
- constant("#define _PGDIR_SHIFT ", PGDIR_SHIFT);
- linefeed;
- constant("#define _PTRS_PER_PGD ", PTRS_PER_PGD);
- constant("#define _PTRS_PER_PMD ", PTRS_PER_PMD);
- constant("#define _PTRS_PER_PTE ", PTRS_PER_PTE);
- linefeed;
+ COMMENT("Size of struct page");
+ DEFINE(STRUCT_PAGE_SIZE, sizeof(struct page));
+ BLANK();
+ COMMENT("Linux mm_struct offsets.");
+ OFFSET(MM_USERS, mm_struct, mm_users);
+ OFFSET(MM_PGD, mm_struct, pgd);
+ OFFSET(MM_CONTEXT, mm_struct, context);
+ BLANK();
+ DEFINE(_PAGE_SIZE, PAGE_SIZE);
+ DEFINE(_PAGE_SHIFT, PAGE_SHIFT);
+ BLANK();
+ DEFINE(_PGD_T_SIZE, sizeof(pgd_t));
+ DEFINE(_PMD_T_SIZE, sizeof(pmd_t));
+ DEFINE(_PTE_T_SIZE, sizeof(pte_t));
+ BLANK();
+ DEFINE(_PGD_T_LOG2, PGD_T_LOG2);
+ DEFINE(_PMD_T_LOG2, PMD_T_LOG2);
+ DEFINE(_PTE_T_LOG2, PTE_T_LOG2);
+ BLANK();
+ DEFINE(_PGD_ORDER, PGD_ORDER);
+ DEFINE(_PMD_ORDER, PMD_ORDER);
+ DEFINE(_PTE_ORDER, PTE_ORDER);
+ BLANK();
+ DEFINE(_PMD_SHIFT, PMD_SHIFT);
+ DEFINE(_PGDIR_SHIFT, PGDIR_SHIFT);
+ BLANK();
+ DEFINE(_PTRS_PER_PGD, PTRS_PER_PGD);
+ DEFINE(_PTRS_PER_PMD, PTRS_PER_PMD);
+ DEFINE(_PTRS_PER_PTE, PTRS_PER_PTE);
+ BLANK();
}
#ifdef CONFIG_32BIT
void output_sc_defines(void)
{
- text("/* Linux sigcontext offsets. */");
- offset("#define SC_REGS ", struct sigcontext, sc_regs);
- offset("#define SC_FPREGS ", struct sigcontext, sc_fpregs);
- offset("#define SC_ACX ", struct sigcontext, sc_acx);
- offset("#define SC_MDHI ", struct sigcontext, sc_mdhi);
- offset("#define SC_MDLO ", struct sigcontext, sc_mdlo);
- offset("#define SC_PC ", struct sigcontext, sc_pc);
- offset("#define SC_FPC_CSR ", struct sigcontext, sc_fpc_csr);
- offset("#define SC_FPC_EIR ", struct sigcontext, sc_fpc_eir);
- offset("#define SC_HI1 ", struct sigcontext, sc_hi1);
- offset("#define SC_LO1 ", struct sigcontext, sc_lo1);
- offset("#define SC_HI2 ", struct sigcontext, sc_hi2);
- offset("#define SC_LO2 ", struct sigcontext, sc_lo2);
- offset("#define SC_HI3 ", struct sigcontext, sc_hi3);
- offset("#define SC_LO3 ", struct sigcontext, sc_lo3);
- linefeed;
+ COMMENT("Linux sigcontext offsets.");
+ OFFSET(SC_REGS, sigcontext, sc_regs);
+ OFFSET(SC_FPREGS, sigcontext, sc_fpregs);
+ OFFSET(SC_ACX, sigcontext, sc_acx);
+ OFFSET(SC_MDHI, sigcontext, sc_mdhi);
+ OFFSET(SC_MDLO, sigcontext, sc_mdlo);
+ OFFSET(SC_PC, sigcontext, sc_pc);
+ OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr);
+ OFFSET(SC_FPC_EIR, sigcontext, sc_fpc_eir);
+ OFFSET(SC_HI1, sigcontext, sc_hi1);
+ OFFSET(SC_LO1, sigcontext, sc_lo1);
+ OFFSET(SC_HI2, sigcontext, sc_hi2);
+ OFFSET(SC_LO2, sigcontext, sc_lo2);
+ OFFSET(SC_HI3, sigcontext, sc_hi3);
+ OFFSET(SC_LO3, sigcontext, sc_lo3);
+ BLANK();
}
#endif
#ifdef CONFIG_64BIT
void output_sc_defines(void)
{
- text("/* Linux sigcontext offsets. */");
- offset("#define SC_REGS ", struct sigcontext, sc_regs);
- offset("#define SC_FPREGS ", struct sigcontext, sc_fpregs);
- offset("#define SC_MDHI ", struct sigcontext, sc_mdhi);
- offset("#define SC_MDLO ", struct sigcontext, sc_mdlo);
- offset("#define SC_PC ", struct sigcontext, sc_pc);
- offset("#define SC_FPC_CSR ", struct sigcontext, sc_fpc_csr);
- linefeed;
+ COMMENT("Linux sigcontext offsets.");
+ OFFSET(SC_REGS, sigcontext, sc_regs);
+ OFFSET(SC_FPREGS, sigcontext, sc_fpregs);
+ OFFSET(SC_MDHI, sigcontext, sc_mdhi);
+ OFFSET(SC_MDLO, sigcontext, sc_mdlo);
+ OFFSET(SC_PC, sigcontext, sc_pc);
+ OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr);
+ BLANK();
}
#endif
#ifdef CONFIG_MIPS32_COMPAT
void output_sc32_defines(void)
{
- text("/* Linux 32-bit sigcontext offsets. */");
- offset("#define SC32_FPREGS ", struct sigcontext32, sc_fpregs);
- offset("#define SC32_FPC_CSR ", struct sigcontext32, sc_fpc_csr);
- offset("#define SC32_FPC_EIR ", struct sigcontext32, sc_fpc_eir);
- linefeed;
+ COMMENT("Linux 32-bit sigcontext offsets.");
+ OFFSET(SC32_FPREGS, sigcontext32, sc_fpregs);
+ OFFSET(SC32_FPC_CSR, sigcontext32, sc_fpc_csr);
+ OFFSET(SC32_FPC_EIR, sigcontext32, sc_fpc_eir);
+ BLANK();
}
#endif
void output_signal_defined(void)
{
- text("/* Linux signal numbers. */");
- constant("#define _SIGHUP ", SIGHUP);
- constant("#define _SIGINT ", SIGINT);
- constant("#define _SIGQUIT ", SIGQUIT);
- constant("#define _SIGILL ", SIGILL);
- constant("#define _SIGTRAP ", SIGTRAP);
- constant("#define _SIGIOT ", SIGIOT);
- constant("#define _SIGABRT ", SIGABRT);
- constant("#define _SIGEMT ", SIGEMT);
- constant("#define _SIGFPE ", SIGFPE);
- constant("#define _SIGKILL ", SIGKILL);
- constant("#define _SIGBUS ", SIGBUS);
- constant("#define _SIGSEGV ", SIGSEGV);
- constant("#define _SIGSYS ", SIGSYS);
- constant("#define _SIGPIPE ", SIGPIPE);
- constant("#define _SIGALRM ", SIGALRM);
- constant("#define _SIGTERM ", SIGTERM);
- constant("#define _SIGUSR1 ", SIGUSR1);
- constant("#define _SIGUSR2 ", SIGUSR2);
- constant("#define _SIGCHLD ", SIGCHLD);
- constant("#define _SIGPWR ", SIGPWR);
- constant("#define _SIGWINCH ", SIGWINCH);
- constant("#define _SIGURG ", SIGURG);
- constant("#define _SIGIO ", SIGIO);
- constant("#define _SIGSTOP ", SIGSTOP);
- constant("#define _SIGTSTP ", SIGTSTP);
- constant("#define _SIGCONT ", SIGCONT);
- constant("#define _SIGTTIN ", SIGTTIN);
- constant("#define _SIGTTOU ", SIGTTOU);
- constant("#define _SIGVTALRM ", SIGVTALRM);
- constant("#define _SIGPROF ", SIGPROF);
- constant("#define _SIGXCPU ", SIGXCPU);
- constant("#define _SIGXFSZ ", SIGXFSZ);
- linefeed;
+ COMMENT("Linux signal numbers.");
+ DEFINE(_SIGHUP, SIGHUP);
+ DEFINE(_SIGINT, SIGINT);
+ DEFINE(_SIGQUIT, SIGQUIT);
+ DEFINE(_SIGILL, SIGILL);
+ DEFINE(_SIGTRAP, SIGTRAP);
+ DEFINE(_SIGIOT, SIGIOT);
+ DEFINE(_SIGABRT, SIGABRT);
+ DEFINE(_SIGEMT, SIGEMT);
+ DEFINE(_SIGFPE, SIGFPE);
+ DEFINE(_SIGKILL, SIGKILL);
+ DEFINE(_SIGBUS, SIGBUS);
+ DEFINE(_SIGSEGV, SIGSEGV);
+ DEFINE(_SIGSYS, SIGSYS);
+ DEFINE(_SIGPIPE, SIGPIPE);
+ DEFINE(_SIGALRM, SIGALRM);
+ DEFINE(_SIGTERM, SIGTERM);
+ DEFINE(_SIGUSR1, SIGUSR1);
+ DEFINE(_SIGUSR2, SIGUSR2);
+ DEFINE(_SIGCHLD, SIGCHLD);
+ DEFINE(_SIGPWR, SIGPWR);
+ DEFINE(_SIGWINCH, SIGWINCH);
+ DEFINE(_SIGURG, SIGURG);
+ DEFINE(_SIGIO, SIGIO);
+ DEFINE(_SIGSTOP, SIGSTOP);
+ DEFINE(_SIGTSTP, SIGTSTP);
+ DEFINE(_SIGCONT, SIGCONT);
+ DEFINE(_SIGTTIN, SIGTTIN);
+ DEFINE(_SIGTTOU, SIGTTOU);
+ DEFINE(_SIGVTALRM, SIGVTALRM);
+ DEFINE(_SIGPROF, SIGPROF);
+ DEFINE(_SIGXCPU, SIGXCPU);
+ DEFINE(_SIGXFSZ, SIGXFSZ);
+ BLANK();
}
void output_irq_cpustat_t_defines(void)
{
- text("/* Linux irq_cpustat_t offsets. */");
- offset("#define IC_SOFTIRQ_PENDING ", irq_cpustat_t, __softirq_pending);
- size("#define IC_IRQ_CPUSTAT_T ", irq_cpustat_t);
- linefeed;
+ COMMENT("Linux irq_cpustat_t offsets.");
+ DEFINE(IC_SOFTIRQ_PENDING,
+ offsetof(irq_cpustat_t, __softirq_pending));
+ DEFINE(IC_IRQ_CPUSTAT_T, sizeof(irq_cpustat_t));
+ BLANK();
}
diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c
new file mode 100644
index 000000000000..df4acb68bfb5
--- /dev/null
+++ b/arch/mips/kernel/cevt-ds1287.c
@@ -0,0 +1,129 @@
+/*
+ * DS1287 clockevent driver
+ *
+ * Copyright (C) 2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <linux/clockchips.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/time.h>
+
+int ds1287_timer_state(void)
+{
+ return (CMOS_READ(RTC_REG_C) & RTC_PF) != 0;
+}
+
+int ds1287_set_base_clock(unsigned int hz)
+{
+ u8 rate;
+
+ switch (hz) {
+ case 128:
+ rate = 0x9;
+ break;
+ case 256:
+ rate = 0x8;
+ break;
+ case 1024:
+ rate = 0x6;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ CMOS_WRITE(RTC_REF_CLCK_32KHZ | rate, RTC_REG_A);
+
+ return 0;
+}
+
+static int ds1287_set_next_event(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ return -EINVAL;
+}
+
+static void ds1287_set_mode(enum clock_event_mode mode,
+ struct clock_event_device *evt)
+{
+ u8 val;
+
+ spin_lock(&rtc_lock);
+
+ val = CMOS_READ(RTC_REG_B);
+
+ switch (mode) {
+ case CLOCK_EVT_MODE_PERIODIC:
+ val |= RTC_PIE;
+ break;
+ default:
+ val &= ~RTC_PIE;
+ break;
+ }
+
+ CMOS_WRITE(val, RTC_REG_B);
+
+ spin_unlock(&rtc_lock);
+}
+
+static void ds1287_event_handler(struct clock_event_device *dev)
+{
+}
+
+static struct clock_event_device ds1287_clockevent = {
+ .name = "ds1287",
+ .features = CLOCK_EVT_FEAT_PERIODIC,
+ .cpumask = CPU_MASK_CPU0,
+ .set_next_event = ds1287_set_next_event,
+ .set_mode = ds1287_set_mode,
+ .event_handler = ds1287_event_handler,
+};
+
+static irqreturn_t ds1287_interrupt(int irq, void *dev_id)
+{
+ struct clock_event_device *cd = &ds1287_clockevent;
+
+ /* Ack the RTC interrupt. */
+ CMOS_READ(RTC_REG_C);
+
+ cd->event_handler(cd);
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction ds1287_irqaction = {
+ .handler = ds1287_interrupt,
+ .flags = IRQF_DISABLED | IRQF_PERCPU,
+ .name = "ds1287",
+};
+
+int __init ds1287_clockevent_init(int irq)
+{
+ struct clock_event_device *cd;
+
+ cd = &ds1287_clockevent;
+ cd->rating = 100;
+ cd->irq = irq;
+ clockevent_set_clock(cd, 32768);
+ cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
+ cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+
+ clockevents_register_device(&ds1287_clockevent);
+
+ return setup_irq(irq, &ds1287_irqaction);
+}
diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c
index c36772631fe0..6e2f58520afb 100644
--- a/arch/mips/kernel/cevt-gt641xx.c
+++ b/arch/mips/kernel/cevt-gt641xx.c
@@ -25,8 +25,6 @@
#include <asm/gt64120.h>
#include <asm/time.h>
-#include <irq.h>
-
static DEFINE_SPINLOCK(gt641xx_timer_lock);
static unsigned int gt641xx_base_clock;
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 89c3304cb93c..335a6ae3d594 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -169,6 +169,7 @@ static inline void check_wait(void)
case CPU_24K:
case CPU_34K:
+ case CPU_1004K:
cpu_wait = r4k_wait;
if (read_c0_config7() & MIPS_CONF7_WII)
cpu_wait = r4k_wait_irqoff;
@@ -675,6 +676,12 @@ static void __cpuinit decode_configs(struct cpuinfo_mips *c)
return;
}
+#ifdef CONFIG_CPU_MIPSR2
+extern void spram_config(void);
+#else
+static inline void spram_config(void) {}
+#endif
+
static inline void cpu_probe_mips(struct cpuinfo_mips *c)
{
decode_configs(c);
@@ -711,7 +718,12 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c)
case PRID_IMP_74K:
c->cputype = CPU_74K;
break;
+ case PRID_IMP_1004K:
+ c->cputype = CPU_1004K;
+ break;
}
+
+ spram_config();
}
static inline void cpu_probe_alchemy(struct cpuinfo_mips *c)
@@ -778,7 +790,7 @@ static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c)
}
}
-static inline void cpu_probe_philips(struct cpuinfo_mips *c)
+static inline void cpu_probe_nxp(struct cpuinfo_mips *c)
{
decode_configs(c);
switch (c->processor_id & 0xff00) {
@@ -787,7 +799,7 @@ static inline void cpu_probe_philips(struct cpuinfo_mips *c)
c->isa_level = MIPS_CPU_ISA_M32R1;
break;
default:
- panic("Unknown Philips Core!"); /* REVISIT: die? */
+ panic("Unknown NXP Core!"); /* REVISIT: die? */
break;
}
}
@@ -876,6 +888,7 @@ static __cpuinit const char *cpu_to_name(struct cpuinfo_mips *c)
case CPU_24K: name = "MIPS 24K"; break;
case CPU_25KF: name = "MIPS 25Kf"; break;
case CPU_34K: name = "MIPS 34K"; break;
+ case CPU_1004K: name = "MIPS 1004K"; break;
case CPU_74K: name = "MIPS 74K"; break;
case CPU_VR4111: name = "NEC VR4111"; break;
case CPU_VR4121: name = "NEC VR4121"; break;
@@ -925,8 +938,8 @@ __cpuinit void cpu_probe(void)
case PRID_COMP_SANDCRAFT:
cpu_probe_sandcraft(c);
break;
- case PRID_COMP_PHILIPS:
- cpu_probe_philips(c);
+ case PRID_COMP_NXP:
+ cpu_probe_nxp(c);
break;
default:
c->cputype = CPU_UNKNOWN;
diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c
new file mode 100644
index 000000000000..1d5f63cf8997
--- /dev/null
+++ b/arch/mips/kernel/csrc-ioasic.c
@@ -0,0 +1,65 @@
+/*
+ * DEC I/O ASIC's counter clocksource
+ *
+ * Copyright (C) 2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+
+#include <asm/ds1287.h>
+#include <asm/time.h>
+#include <asm/dec/ioasic.h>
+#include <asm/dec/ioasic_addrs.h>
+
+static cycle_t dec_ioasic_hpt_read(void)
+{
+ return ioasic_read(IO_REG_FCTR);
+}
+
+static struct clocksource clocksource_dec = {
+ .name = "dec-ioasic",
+ .read = dec_ioasic_hpt_read,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+void __init dec_ioasic_clocksource_init(void)
+{
+ unsigned int freq;
+ u32 start, end;
+ int i = HZ / 10;
+
+
+ while (!ds1287_timer_state())
+ ;
+
+ start = dec_ioasic_hpt_read();
+
+ while (i--)
+ while (!ds1287_timer_state())
+ ;
+
+ end = dec_ioasic_hpt_read();
+
+ freq = (end - start) * 10;
+ printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq);
+
+ clocksource_dec.rating = 200 + freq / 10000000;
+ clocksource_set_clock(&clocksource_dec, freq);
+
+ clocksource_register(&clocksource_dec);
+}
diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c
new file mode 100644
index 000000000000..b1436a857998
--- /dev/null
+++ b/arch/mips/kernel/gpio_txx9.c
@@ -0,0 +1,87 @@
+/*
+ * A gpio chip driver for TXx9 SoCs
+ *
+ * Copyright (C) 2008 Atsushi Nemoto <anemo@mba.ocn.ne.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/gpio.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <asm/txx9pio.h>
+
+static DEFINE_SPINLOCK(txx9_gpio_lock);
+
+static struct txx9_pio_reg __iomem *txx9_pioptr;
+
+static int txx9_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+ return __raw_readl(&txx9_pioptr->din) & (1 << offset);
+}
+
+static void txx9_gpio_set_raw(unsigned int offset, int value)
+{
+ u32 val;
+ val = __raw_readl(&txx9_pioptr->dout);
+ if (value)
+ val |= 1 << offset;
+ else
+ val &= ~(1 << offset);
+ __raw_writel(val, &txx9_pioptr->dout);
+}
+
+static void txx9_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&txx9_gpio_lock, flags);
+ txx9_gpio_set_raw(offset, value);
+ mmiowb();
+ spin_unlock_irqrestore(&txx9_gpio_lock, flags);
+}
+
+static int txx9_gpio_dir_in(struct gpio_chip *chip, unsigned int offset)
+{
+ spin_lock_irq(&txx9_gpio_lock);
+ __raw_writel(__raw_readl(&txx9_pioptr->dir) & ~(1 << offset),
+ &txx9_pioptr->dir);
+ mmiowb();
+ spin_unlock_irq(&txx9_gpio_lock);
+ return 0;
+}
+
+static int txx9_gpio_dir_out(struct gpio_chip *chip, unsigned int offset,
+ int value)
+{
+ spin_lock_irq(&txx9_gpio_lock);
+ txx9_gpio_set_raw(offset, value);
+ __raw_writel(__raw_readl(&txx9_pioptr->dir) | (1 << offset),
+ &txx9_pioptr->dir);
+ mmiowb();
+ spin_unlock_irq(&txx9_gpio_lock);
+ return 0;
+}
+
+static struct gpio_chip txx9_gpio_chip = {
+ .get = txx9_gpio_get,
+ .set = txx9_gpio_set,
+ .direction_input = txx9_gpio_dir_in,
+ .direction_output = txx9_gpio_dir_out,
+ .label = "TXx9",
+};
+
+int __init txx9_gpio_init(unsigned long baseaddr,
+ unsigned int base, unsigned int num)
+{
+ txx9_pioptr = ioremap(baseaddr, sizeof(struct txx9_pio_reg));
+ if (!txx9_pioptr)
+ return -ENODEV;
+ txx9_gpio_chip.base = base;
+ txx9_gpio_chip.ngpio = num;
+ return gpiochip_add(&txx9_gpio_chip);
+}
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
new file mode 100644
index 000000000000..f0a4bb19e096
--- /dev/null
+++ b/arch/mips/kernel/irq-gic.c
@@ -0,0 +1,295 @@
+#undef DEBUG
+
+#include <linux/bitmap.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/gic.h>
+#include <asm/gcmpregs.h>
+#include <asm/mips-boards/maltaint.h>
+#include <asm/irq.h>
+#include <linux/hardirq.h>
+#include <asm-generic/bitops/find.h>
+
+
+static unsigned long _gic_base;
+static unsigned int _irqbase, _mapsize, numvpes, numintrs;
+static struct gic_intr_map *_intrmap;
+
+static struct gic_pcpu_mask pcpu_masks[NR_CPUS];
+static struct gic_pending_regs pending_regs[NR_CPUS];
+static struct gic_intrmask_regs intrmask_regs[NR_CPUS];
+
+#define gic_wedgeb2bok 0 /*
+ * Can GIC handle b2b writes to wedge register?
+ */
+#if gic_wedgeb2bok == 0
+static DEFINE_SPINLOCK(gic_wedgeb2b_lock);
+#endif
+
+void gic_send_ipi(unsigned int intr)
+{
+#if gic_wedgeb2bok == 0
+ unsigned long flags;
+#endif
+ pr_debug("CPU%d: %s status %08x\n", smp_processor_id(), __func__,
+ read_c0_status());
+ if (!gic_wedgeb2bok)
+ spin_lock_irqsave(&gic_wedgeb2b_lock, flags);
+ GICWRITE(GIC_REG(SHARED, GIC_SH_WEDGE), 0x80000000 | intr);
+ if (!gic_wedgeb2bok) {
+ (void) GIC_REG(SHARED, GIC_SH_CONFIG);
+ spin_unlock_irqrestore(&gic_wedgeb2b_lock, flags);
+ }
+}
+
+/* This is Malta specific and needs to be exported */
+static void vpe_local_setup(unsigned int numvpes)
+{
+ int i;
+ unsigned long timer_interrupt = 5, perf_interrupt = 5;
+ unsigned int vpe_ctl;
+
+ /*
+ * Setup the default performance counter timer interrupts
+ * for all VPEs
+ */
+ for (i = 0; i < numvpes; i++) {
+ GICWRITE(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), i);
+
+ /* Are Interrupts locally routable? */
+ GICREAD(GIC_REG(VPE_OTHER, GIC_VPE_CTL), vpe_ctl);
+ if (vpe_ctl & GIC_VPE_CTL_TIMER_RTBL_MSK)
+ GICWRITE(GIC_REG(VPE_OTHER, GIC_VPE_TIMER_MAP),
+ GIC_MAP_TO_PIN_MSK | timer_interrupt);
+
+ if (vpe_ctl & GIC_VPE_CTL_PERFCNT_RTBL_MSK)
+ GICWRITE(GIC_REG(VPE_OTHER, GIC_VPE_PERFCTR_MAP),
+ GIC_MAP_TO_PIN_MSK | perf_interrupt);
+ }
+}
+
+unsigned int gic_get_int(void)
+{
+ unsigned int i;
+ unsigned long *pending, *intrmask, *pcpu_mask;
+ unsigned long *pending_abs, *intrmask_abs;
+
+ /* Get per-cpu bitmaps */
+ pending = pending_regs[smp_processor_id()].pending;
+ intrmask = intrmask_regs[smp_processor_id()].intrmask;
+ pcpu_mask = pcpu_masks[smp_processor_id()].pcpu_mask;
+
+ pending_abs = (unsigned long *) GIC_REG_ABS_ADDR(SHARED,
+ GIC_SH_PEND_31_0_OFS);
+ intrmask_abs = (unsigned long *) GIC_REG_ABS_ADDR(SHARED,
+ GIC_SH_MASK_31_0_OFS);
+
+ for (i = 0; i < BITS_TO_LONGS(GIC_NUM_INTRS); i++) {
+ GICREAD(*pending_abs, pending[i]);
+ GICREAD(*intrmask_abs, intrmask[i]);
+ pending_abs++;
+ intrmask_abs++;
+ }
+
+ bitmap_and(pending, pending, intrmask, GIC_NUM_INTRS);
+ bitmap_and(pending, pending, pcpu_mask, GIC_NUM_INTRS);
+
+ i = find_first_bit(pending, GIC_NUM_INTRS);
+
+ pr_debug("CPU%d: %s pend=%d\n", smp_processor_id(), __func__, i);
+
+ return i;
+}
+
+static unsigned int gic_irq_startup(unsigned int irq)
+{
+ pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
+ irq -= _irqbase;
+ /* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */
+ GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_SMASK_31_0_OFS + (irq / 32))),
+ 1 << (irq % 32));
+ return 0;
+}
+
+static void gic_irq_ack(unsigned int irq)
+{
+#if gic_wedgeb2bok == 0
+ unsigned long flags;
+#endif
+ pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
+ irq -= _irqbase;
+ GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_RMASK_31_0_OFS + (irq / 32))),
+ 1 << (irq % 32));
+
+ if (_intrmap[irq].trigtype == GIC_TRIG_EDGE) {
+ if (!gic_wedgeb2bok)
+ spin_lock_irqsave(&gic_wedgeb2b_lock, flags);
+ GICWRITE(GIC_REG(SHARED, GIC_SH_WEDGE), irq);
+ if (!gic_wedgeb2bok) {
+ (void) GIC_REG(SHARED, GIC_SH_CONFIG);
+ spin_unlock_irqrestore(&gic_wedgeb2b_lock, flags);
+ }
+ }
+}
+
+static void gic_mask_irq(unsigned int irq)
+{
+ pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
+ irq -= _irqbase;
+ /* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */
+ GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_RMASK_31_0_OFS + (irq / 32))),
+ 1 << (irq % 32));
+}
+
+static void gic_unmask_irq(unsigned int irq)
+{
+ pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
+ irq -= _irqbase;
+ /* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */
+ GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_SMASK_31_0_OFS + (irq / 32))),
+ 1 << (irq % 32));
+}
+
+#ifdef CONFIG_SMP
+
+static DEFINE_SPINLOCK(gic_lock);
+
+static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
+{
+ cpumask_t tmp = CPU_MASK_NONE;
+ unsigned long flags;
+ int i;
+
+ pr_debug(KERN_DEBUG "%s called\n", __func__);
+ irq -= _irqbase;
+
+ cpus_and(tmp, cpumask, cpu_online_map);
+ if (cpus_empty(tmp))
+ return;
+
+ /* Assumption : cpumask refers to a single CPU */
+ spin_lock_irqsave(&gic_lock, flags);
+ for (;;) {
+ /* Re-route this IRQ */
+ GIC_SH_MAP_TO_VPE_SMASK(irq, first_cpu(tmp));
+
+ /*
+ * FIXME: assumption that _intrmap is ordered and has no holes
+ */
+
+ /* Update the intr_map */
+ _intrmap[irq].cpunum = first_cpu(tmp);
+
+ /* Update the pcpu_masks */
+ for (i = 0; i < NR_CPUS; i++)
+ clear_bit(irq, pcpu_masks[i].pcpu_mask);
+ set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
+
+ }
+ irq_desc[irq].affinity = cpumask;
+ spin_unlock_irqrestore(&gic_lock, flags);
+
+}
+#endif
+
+static struct irq_chip gic_irq_controller = {
+ .name = "MIPS GIC",
+ .startup = gic_irq_startup,
+ .ack = gic_irq_ack,
+ .mask = gic_mask_irq,
+ .mask_ack = gic_mask_irq,
+ .unmask = gic_unmask_irq,
+ .eoi = gic_unmask_irq,
+#ifdef CONFIG_SMP
+ .set_affinity = gic_set_affinity,
+#endif
+};
+
+static void __init setup_intr(unsigned int intr, unsigned int cpu,
+ unsigned int pin, unsigned int polarity, unsigned int trigtype)
+{
+ /* Setup Intr to Pin mapping */
+ if (pin & GIC_MAP_TO_NMI_MSK) {
+ GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)), pin);
+ /* FIXME: hack to route NMI to all cpu's */
+ for (cpu = 0; cpu < NR_CPUS; cpu += 32) {
+ GICWRITE(GIC_REG_ADDR(SHARED,
+ GIC_SH_MAP_TO_VPE_REG_OFF(intr, cpu)),
+ 0xffffffff);
+ }
+ } else {
+ GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)),
+ GIC_MAP_TO_PIN_MSK | pin);
+ /* Setup Intr to CPU mapping */
+ GIC_SH_MAP_TO_VPE_SMASK(intr, cpu);
+ }
+
+ /* Setup Intr Polarity */
+ GIC_SET_POLARITY(intr, polarity);
+
+ /* Setup Intr Trigger Type */
+ GIC_SET_TRIGGER(intr, trigtype);
+
+ /* Init Intr Masks */
+ GIC_SET_INTR_MASK(intr, 0);
+}
+
+static void __init gic_basic_init(void)
+{
+ unsigned int i, cpu;
+
+ /* Setup defaults */
+ for (i = 0; i < GIC_NUM_INTRS; i++) {
+ GIC_SET_POLARITY(i, GIC_POL_POS);
+ GIC_SET_TRIGGER(i, GIC_TRIG_LEVEL);
+ GIC_SET_INTR_MASK(i, 0);
+ }
+
+ /* Setup specifics */
+ for (i = 0; i < _mapsize; i++) {
+ cpu = _intrmap[i].cpunum;
+ if (cpu == X)
+ continue;
+
+ setup_intr(_intrmap[i].intrnum,
+ _intrmap[i].cpunum,
+ _intrmap[i].pin,
+ _intrmap[i].polarity,
+ _intrmap[i].trigtype);
+ /* Initialise per-cpu Interrupt software masks */
+ if (_intrmap[i].ipiflag)
+ set_bit(_intrmap[i].intrnum, pcpu_masks[cpu].pcpu_mask);
+ }
+
+ vpe_local_setup(numvpes);
+
+ for (i = _irqbase; i < (_irqbase + numintrs); i++)
+ set_irq_chip(i, &gic_irq_controller);
+}
+
+void __init gic_init(unsigned long gic_base_addr,
+ unsigned long gic_addrspace_size,
+ struct gic_intr_map *intr_map, unsigned int intr_map_size,
+ unsigned int irqbase)
+{
+ unsigned int gicconfig;
+
+ _gic_base = (unsigned long) ioremap_nocache(gic_base_addr,
+ gic_addrspace_size);
+ _irqbase = irqbase;
+ _intrmap = intr_map;
+ _mapsize = intr_map_size;
+
+ GICREAD(GIC_REG(SHARED, GIC_SH_CONFIG), gicconfig);
+ numintrs = (gicconfig & GIC_SH_CONFIG_NUMINTRS_MSK) >>
+ GIC_SH_CONFIG_NUMINTRS_SHF;
+ numintrs = ((numintrs + 1) * 8);
+
+ numvpes = (gicconfig & GIC_SH_CONFIG_NUMVPES_MSK) >>
+ GIC_SH_CONFIG_NUMVPES_SHF;
+
+ pr_debug("%s called\n", __func__);
+
+ gic_basic_init();
+}
diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c
index 4edc7e451d91..963c16d266ab 100644
--- a/arch/mips/kernel/irq-msc01.c
+++ b/arch/mips/kernel/irq-msc01.c
@@ -17,6 +17,7 @@
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/msc01_ic.h>
+#include <asm/traps.h>
static unsigned long _icctrl_msc;
#define MSC01_IC_REG_BASE _icctrl_msc
@@ -98,14 +99,13 @@ void ll_msc_irq(void)
}
}
-void
-msc_bind_eic_interrupt(unsigned int irq, unsigned int set)
+static void msc_bind_eic_interrupt(int irq, int set)
{
MSCIC_WRITE(MSC01_IC_RAMW,
(irq<<MSC01_IC_RAMW_ADDR_SHF) | (set<<MSC01_IC_RAMW_DATA_SHF));
}
-struct irq_chip msc_levelirq_type = {
+static struct irq_chip msc_levelirq_type = {
.name = "SOC-it-Level",
.ack = level_mask_and_ack_msc_irq,
.mask = mask_msc_irq,
@@ -115,7 +115,7 @@ struct irq_chip msc_levelirq_type = {
.end = end_msc_irq,
};
-struct irq_chip msc_edgeirq_type = {
+static struct irq_chip msc_edgeirq_type = {
.name = "SOC-it-Edge",
.ack = edge_mask_and_ack_msc_irq,
.mask = mask_msc_irq,
@@ -128,8 +128,6 @@ struct irq_chip msc_edgeirq_type = {
void __init init_msc_irqs(unsigned long icubase, unsigned int irqbase, msc_irqmap_t *imp, int nirq)
{
- extern void (*board_bind_eic_interrupt)(unsigned int irq, unsigned int regset);
-
_icctrl_msc = (unsigned long) ioremap(icubase, 0x40000);
/* Reset interrupt controller - initialises all registers to 0 */
diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h
index c0faabd52010..6c8e8c4246f7 100644
--- a/arch/mips/kernel/signal-common.h
+++ b/arch/mips/kernel/signal-common.h
@@ -14,7 +14,7 @@
/* #define DEBUG_SIG */
#ifdef DEBUG_SIG
-# define DEBUGP(fmt, args...) printk("%s: " fmt, __FUNCTION__ , ##args)
+# define DEBUGP(fmt, args...) printk("%s: " fmt, __func__, ##args)
#else
# define DEBUGP(fmt, args...)
#endif
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
new file mode 100644
index 000000000000..ca476c4f62a5
--- /dev/null
+++ b/arch/mips/kernel/smp-cmp.c
@@ -0,0 +1,265 @@
+/*
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 MIPS Technologies, Inc.
+ * Chris Dearman (chris@mips.com)
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+
+#include <asm/atomic.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/hardirq.h>
+#include <asm/mmu_context.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+#include <asm/mipsregs.h>
+#include <asm/mipsmtregs.h>
+#include <asm/mips_mt.h>
+
+/*
+ * Crude manipulation of the CPU masks to control which
+ * which CPU's are brought online during initialisation
+ *
+ * Beware... this needs to be called after CPU discovery
+ * but before CPU bringup
+ */
+static int __init allowcpus(char *str)
+{
+ cpumask_t cpu_allow_map;
+ char buf[256];
+ int len;
+
+ cpus_clear(cpu_allow_map);
+ if (cpulist_parse(str, cpu_allow_map) == 0) {
+ cpu_set(0, cpu_allow_map);
+ cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map);
+ len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map);
+ buf[len] = '\0';
+ pr_debug("Allowable CPUs: %s\n", buf);
+ return 1;
+ } else
+ return 0;
+}
+__setup("allowcpus=", allowcpus);
+
+static void ipi_call_function(unsigned int cpu)
+{
+ unsigned int action = 0;
+
+ pr_debug("CPU%d: %s cpu %d status %08x\n",
+ smp_processor_id(), __func__, cpu, read_c0_status());
+
+ switch (cpu) {
+ case 0:
+ action = GIC_IPI_EXT_INTR_CALLFNC_VPE0;
+ break;
+ case 1:
+ action = GIC_IPI_EXT_INTR_CALLFNC_VPE1;
+ break;
+ case 2:
+ action = GIC_IPI_EXT_INTR_CALLFNC_VPE2;
+ break;
+ case 3:
+ action = GIC_IPI_EXT_INTR_CALLFNC_VPE3;
+ break;
+ }
+ gic_send_ipi(action);
+}
+
+
+static void ipi_resched(unsigned int cpu)
+{
+ unsigned int action = 0;
+
+ pr_debug("CPU%d: %s cpu %d status %08x\n",
+ smp_processor_id(), __func__, cpu, read_c0_status());
+
+ switch (cpu) {
+ case 0:
+ action = GIC_IPI_EXT_INTR_RESCHED_VPE0;
+ break;
+ case 1:
+ action = GIC_IPI_EXT_INTR_RESCHED_VPE1;
+ break;
+ case 2:
+ action = GIC_IPI_EXT_INTR_RESCHED_VPE2;
+ break;
+ case 3:
+ action = GIC_IPI_EXT_INTR_RESCHED_VPE3;
+ break;
+ }
+ gic_send_ipi(action);
+}
+
+/*
+ * FIXME: This isn't restricted to CMP
+ * The SMVP kernel could use GIC interrupts if available
+ */
+void cmp_send_ipi_single(int cpu, unsigned int action)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ switch (action) {
+ case SMP_CALL_FUNCTION:
+ ipi_call_function(cpu);
+ break;
+
+ case SMP_RESCHEDULE_YOURSELF:
+ ipi_resched(cpu);
+ break;
+ }
+
+ local_irq_restore(flags);
+}
+
+static void cmp_send_ipi_mask(cpumask_t mask, unsigned int action)
+{
+ unsigned int i;
+
+ for_each_cpu_mask(i, mask)
+ cmp_send_ipi_single(i, action);
+}
+
+static void cmp_init_secondary(void)
+{
+ struct cpuinfo_mips *c = &current_cpu_data;
+
+ /* Assume GIC is present */
+ change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 | STATUSF_IP6 |
+ STATUSF_IP7);
+
+ /* Enable per-cpu interrupts: platform specific */
+
+ c->core = (read_c0_ebase() >> 1) & 0xff;
+#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
+ c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE;
+#endif
+#ifdef CONFIG_MIPS_MT_SMTC
+ c->tc_id = (read_c0_tcbind() >> TCBIND_CURTC_SHIFT) & TCBIND_CURTC;
+#endif
+}
+
+static void cmp_smp_finish(void)
+{
+ pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__);
+
+ /* CDFIXME: remove this? */
+ write_c0_compare(read_c0_count() + (8 * mips_hpt_frequency / HZ));
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+ /* If we have an FPU, enroll ourselves in the FPU-full mask */
+ if (cpu_has_fpu)
+ cpu_set(smp_processor_id(), mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
+ local_irq_enable();
+}
+
+static void cmp_cpus_done(void)
+{
+ pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__);
+}
+
+/*
+ * Setup the PC, SP, and GP of a secondary processor and start it running
+ * smp_bootstrap is the place to resume from
+ * __KSTK_TOS(idle) is apparently the stack pointer
+ * (unsigned long)idle->thread_info the gp
+ */
+static void cmp_boot_secondary(int cpu, struct task_struct *idle)
+{
+ struct thread_info *gp = task_thread_info(idle);
+ unsigned long sp = __KSTK_TOS(idle);
+ unsigned long pc = (unsigned long)&smp_bootstrap;
+ unsigned long a0 = 0;
+
+ pr_debug("SMPCMP: CPU%d: %s cpu %d\n", smp_processor_id(),
+ __func__, cpu);
+
+#if 0
+ /* Needed? */
+ flush_icache_range((unsigned long)gp,
+ (unsigned long)(gp + sizeof(struct thread_info)));
+#endif
+
+ amon_cpu_start(cpu, pc, sp, gp, a0);
+}
+
+/*
+ * Common setup before any secondaries are started
+ */
+void __init cmp_smp_setup(void)
+{
+ int i;
+ int ncpu = 0;
+
+ pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__);
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+ /* If we have an FPU, enroll ourselves in the FPU-full mask */
+ if (cpu_has_fpu)
+ cpu_set(0, mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
+ for (i = 1; i < NR_CPUS; i++) {
+ if (amon_cpu_avail(i)) {
+ cpu_set(i, phys_cpu_present_map);
+ __cpu_number_map[i] = ++ncpu;
+ __cpu_logical_map[ncpu] = i;
+ }
+ }
+
+ if (cpu_has_mipsmt) {
+ unsigned int nvpe, mvpconf0 = read_c0_mvpconf0();
+
+ nvpe = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
+ smp_num_siblings = nvpe;
+ }
+ pr_info("Detected %i available secondary CPU(s)\n", ncpu);
+}
+
+void __init cmp_prepare_cpus(unsigned int max_cpus)
+{
+ pr_debug("SMPCMP: CPU%d: %s max_cpus=%d\n",
+ smp_processor_id(), __func__, max_cpus);
+
+ /*
+ * FIXME: some of these options are per-system, some per-core and
+ * some per-cpu
+ */
+ mips_mt_set_cpuoptions();
+}
+
+struct plat_smp_ops cmp_smp_ops = {
+ .send_ipi_single = cmp_send_ipi_single,
+ .send_ipi_mask = cmp_send_ipi_mask,
+ .init_secondary = cmp_init_secondary,
+ .smp_finish = cmp_smp_finish,
+ .cpus_done = cmp_cpus_done,
+ .boot_secondary = cmp_boot_secondary,
+ .smp_setup = cmp_smp_setup,
+ .prepare_cpus = cmp_prepare_cpus,
+};
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 89e6f6aa5166..87a1816c1f45 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -36,110 +36,7 @@
#include <asm/mipsmtregs.h>
#include <asm/mips_mt.h>
-#define MIPS_CPU_IPI_RESCHED_IRQ 0
-#define MIPS_CPU_IPI_CALL_IRQ 1
-
-static int cpu_ipi_resched_irq, cpu_ipi_call_irq;
-
-#if 0
-static void dump_mtregisters(int vpe, int tc)
-{
- printk("vpe %d tc %d\n", vpe, tc);
-
- settc(tc);
-
- printk(" c0 status 0x%lx\n", read_vpe_c0_status());
- printk(" vpecontrol 0x%lx\n", read_vpe_c0_vpecontrol());
- printk(" vpeconf0 0x%lx\n", read_vpe_c0_vpeconf0());
- printk(" tcstatus 0x%lx\n", read_tc_c0_tcstatus());
- printk(" tcrestart 0x%lx\n", read_tc_c0_tcrestart());
- printk(" tcbind 0x%lx\n", read_tc_c0_tcbind());
- printk(" tchalt 0x%lx\n", read_tc_c0_tchalt());
-}
-#endif
-
-void __init sanitize_tlb_entries(void)
-{
- int i, tlbsiz;
- unsigned long mvpconf0, ncpu;
-
- if (!cpu_has_mipsmt)
- return;
-
- /* Enable VPC */
- set_c0_mvpcontrol(MVPCONTROL_VPC);
-
- back_to_back_c0_hazard();
-
- /* Disable TLB sharing */
- clear_c0_mvpcontrol(MVPCONTROL_STLB);
-
- mvpconf0 = read_c0_mvpconf0();
-
- printk(KERN_INFO "MVPConf0 0x%lx TLBS %lx PTLBE %ld\n", mvpconf0,
- (mvpconf0 & MVPCONF0_TLBS) >> MVPCONF0_TLBS_SHIFT,
- (mvpconf0 & MVPCONF0_PTLBE) >> MVPCONF0_PTLBE_SHIFT);
-
- tlbsiz = (mvpconf0 & MVPCONF0_PTLBE) >> MVPCONF0_PTLBE_SHIFT;
- ncpu = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
-
- printk(" tlbsiz %d ncpu %ld\n", tlbsiz, ncpu);
-
- if (tlbsiz > 0) {
- /* share them out across the vpe's */
- tlbsiz /= ncpu;
-
- printk(KERN_INFO "setting Config1.MMU_size to %d\n", tlbsiz);
-
- for (i = 0; i < ncpu; i++) {
- settc(i);
-
- if (i == 0)
- write_c0_config1((read_c0_config1() & ~(0x3f << 25)) | (tlbsiz << 25));
- else
- write_vpe_c0_config1((read_vpe_c0_config1() & ~(0x3f << 25)) |
- (tlbsiz << 25));
- }
- }
-
- clear_c0_mvpcontrol(MVPCONTROL_VPC);
-}
-
-static void ipi_resched_dispatch(void)
-{
- do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ);
-}
-
-static void ipi_call_dispatch(void)
-{
- do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ);
-}
-
-static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
-{
- return IRQ_HANDLED;
-}
-
-static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
-{
- smp_call_function_interrupt();
-
- return IRQ_HANDLED;
-}
-
-static struct irqaction irq_resched = {
- .handler = ipi_resched_interrupt,
- .flags = IRQF_DISABLED|IRQF_PERCPU,
- .name = "IPI_resched"
-};
-
-static struct irqaction irq_call = {
- .handler = ipi_call_interrupt,
- .flags = IRQF_DISABLED|IRQF_PERCPU,
- .name = "IPI_call"
-};
-
-static void __init smp_copy_vpe_config(void)
+static void __init smvp_copy_vpe_config(void)
{
write_vpe_c0_status(
(read_c0_status() & ~(ST0_IM | ST0_IE | ST0_KSU)) | ST0_CU0);
@@ -156,7 +53,7 @@ static void __init smp_copy_vpe_config(void)
write_vpe_c0_count(read_c0_count());
}
-static unsigned int __init smp_vpe_init(unsigned int tc, unsigned int mvpconf0,
+static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
unsigned int ncpu)
{
if (tc > ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT))
@@ -182,12 +79,12 @@ static unsigned int __init smp_vpe_init(unsigned int tc, unsigned int mvpconf0,
write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() & ~VPECONTROL_TE);
if (tc != 0)
- smp_copy_vpe_config();
+ smvp_copy_vpe_config();
return ncpu;
}
-static void __init smp_tc_init(unsigned int tc, unsigned int mvpconf0)
+static void __init smvp_tc_init(unsigned int tc, unsigned int mvpconf0)
{
unsigned long tmp;
@@ -254,15 +151,20 @@ static void vsmp_send_ipi_mask(cpumask_t mask, unsigned int action)
static void __cpuinit vsmp_init_secondary(void)
{
- /* Enable per-cpu interrupts */
+ extern int gic_present;
/* This is Malta specific: IPI,performance and timer inetrrupts */
- write_c0_status((read_c0_status() & ~ST0_IM ) |
- (STATUSF_IP0 | STATUSF_IP1 | STATUSF_IP6 | STATUSF_IP7));
+ if (gic_present)
+ change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 |
+ STATUSF_IP6 | STATUSF_IP7);
+ else
+ change_c0_status(ST0_IM, STATUSF_IP0 | STATUSF_IP1 |
+ STATUSF_IP6 | STATUSF_IP7);
}
static void __cpuinit vsmp_smp_finish(void)
{
+ /* CDFIXME: remove this? */
write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ));
#ifdef CONFIG_MIPS_MT_FPAFF
@@ -323,7 +225,7 @@ static void __cpuinit vsmp_boot_secondary(int cpu, struct task_struct *idle)
/*
* Common setup before any secondaries are started
* Make sure all CPU's are in a sensible state before we boot any of the
- * secondarys
+ * secondaries
*/
static void __init vsmp_smp_setup(void)
{
@@ -356,8 +258,8 @@ static void __init vsmp_smp_setup(void)
for (tc = 0; tc <= ntc; tc++) {
settc(tc);
- smp_tc_init(tc, mvpconf0);
- ncpu = smp_vpe_init(tc, mvpconf0, ncpu);
+ smvp_tc_init(tc, mvpconf0);
+ ncpu = smvp_vpe_init(tc, mvpconf0, ncpu);
}
/* Release config state */
@@ -371,21 +273,6 @@ static void __init vsmp_smp_setup(void)
static void __init vsmp_prepare_cpus(unsigned int max_cpus)
{
mips_mt_set_cpuoptions();
-
- /* set up ipi interrupts */
- if (cpu_has_vint) {
- set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
- set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
- }
-
- cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ;
- cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ;
-
- setup_irq(cpu_ipi_resched_irq, &irq_resched);
- setup_irq(cpu_ipi_call_irq, &irq_call);
-
- set_irq_handler(cpu_ipi_resched_irq, handle_percpu_irq);
- set_irq_handler(cpu_ipi_call_irq, handle_percpu_irq);
}
struct plat_smp_ops vsmp_smp_ops = {
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 9d41dab90a80..33780cc61ce9 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -35,6 +35,7 @@
#include <asm/atomic.h>
#include <asm/cpu.h>
#include <asm/processor.h>
+#include <asm/r4k-timer.h>
#include <asm/system.h>
#include <asm/mmu_context.h>
#include <asm/time.h>
@@ -125,6 +126,8 @@ asmlinkage __cpuinit void start_secondary(void)
cpu_set(cpu, cpu_callin_map);
+ synchronise_count_slave();
+
cpu_idle();
}
@@ -287,6 +290,7 @@ void smp_send_stop(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
mp_ops->cpus_done();
+ synchronise_count_master();
}
/* called from main before smp_init() */
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index b42e71c71119..3e863186cd22 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -174,14 +174,6 @@ static int clock_hang_reported[NR_CPUS];
#endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
-/* Initialize shared TLB - the should probably migrate to smtc_setup_cpus() */
-
-void __init sanitize_tlb_entries(void)
-{
- printk("Deprecated sanitize_tlb_entries() invoked\n");
-}
-
-
/*
* Configure shared TLB - VPC configuration bit must be set by caller
*/
@@ -339,7 +331,8 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
/* In general, all TCs should have the same cpu_data indications */
memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips));
/* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */
- if (cpu_data[0].cputype == CPU_34K)
+ if (cpu_data[0].cputype == CPU_34K ||
+ cpu_data[0].cputype == CPU_1004K)
cpu_data[cpu].options &= ~MIPS_CPU_FPU;
cpu_data[cpu].vpe_id = vpe;
cpu_data[cpu].tc_id = tc;
diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c
new file mode 100644
index 000000000000..6ddb507a87ef
--- /dev/null
+++ b/arch/mips/kernel/spram.c
@@ -0,0 +1,221 @@
+/*
+ * MIPS SPRAM support
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (C) 2007, 2008 MIPS Technologies, Inc.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/stddef.h>
+
+#include <asm/cpu.h>
+#include <asm/fpu.h>
+#include <asm/mipsregs.h>
+#include <asm/system.h>
+#include <asm/r4kcache.h>
+#include <asm/hazards.h>
+
+/*
+ * These definitions are correct for the 24K/34K/74K SPRAM sample
+ * implementation. The 4KS interpreted the tags differently...
+ */
+#define SPRAM_TAG0_ENABLE 0x00000080
+#define SPRAM_TAG0_PA_MASK 0xfffff000
+#define SPRAM_TAG1_SIZE_MASK 0xfffff000
+
+#define SPRAM_TAG_STRIDE 8
+
+#define ERRCTL_SPRAM (1 << 28)
+
+/* errctl access */
+#define read_c0_errctl(x) read_c0_ecc(x)
+#define write_c0_errctl(x) write_c0_ecc(x)
+
+/*
+ * Different semantics to the set_c0_* function built by __BUILD_SET_C0
+ */
+static __cpuinit unsigned int bis_c0_errctl(unsigned int set)
+{
+ unsigned int res;
+ res = read_c0_errctl();
+ write_c0_errctl(res | set);
+ return res;
+}
+
+static __cpuinit void ispram_store_tag(unsigned int offset, unsigned int data)
+{
+ unsigned int errctl;
+
+ /* enable SPRAM tag access */
+ errctl = bis_c0_errctl(ERRCTL_SPRAM);
+ ehb();
+
+ write_c0_taglo(data);
+ ehb();
+
+ cache_op(Index_Store_Tag_I, CKSEG0|offset);
+ ehb();
+
+ write_c0_errctl(errctl);
+ ehb();
+}
+
+
+static __cpuinit unsigned int ispram_load_tag(unsigned int offset)
+{
+ unsigned int data;
+ unsigned int errctl;
+
+ /* enable SPRAM tag access */
+ errctl = bis_c0_errctl(ERRCTL_SPRAM);
+ ehb();
+ cache_op(Index_Load_Tag_I, CKSEG0 | offset);
+ ehb();
+ data = read_c0_taglo();
+ ehb();
+ write_c0_errctl(errctl);
+ ehb();
+
+ return data;
+}
+
+static __cpuinit void dspram_store_tag(unsigned int offset, unsigned int data)
+{
+ unsigned int errctl;
+
+ /* enable SPRAM tag access */
+ errctl = bis_c0_errctl(ERRCTL_SPRAM);
+ ehb();
+ write_c0_dtaglo(data);
+ ehb();
+ cache_op(Index_Store_Tag_D, CKSEG0 | offset);
+ ehb();
+ write_c0_errctl(errctl);
+ ehb();
+}
+
+
+static __cpuinit unsigned int dspram_load_tag(unsigned int offset)
+{
+ unsigned int data;
+ unsigned int errctl;
+
+ errctl = bis_c0_errctl(ERRCTL_SPRAM);
+ ehb();
+ cache_op(Index_Load_Tag_D, CKSEG0 | offset);
+ ehb();
+ data = read_c0_dtaglo();
+ ehb();
+ write_c0_errctl(errctl);
+ ehb();
+
+ return data;
+}
+
+static __cpuinit void probe_spram(char *type,
+ unsigned int base,
+ unsigned int (*read)(unsigned int),
+ void (*write)(unsigned int, unsigned int))
+{
+ unsigned int firstsize = 0, lastsize = 0;
+ unsigned int firstpa = 0, lastpa = 0, pa = 0;
+ unsigned int offset = 0;
+ unsigned int size, tag0, tag1;
+ unsigned int enabled;
+ int i;
+
+ /*
+ * The limit is arbitrary but avoids the loop running away if
+ * the SPRAM tags are implemented differently
+ */
+
+ for (i = 0; i < 8; i++) {
+ tag0 = read(offset);
+ tag1 = read(offset+SPRAM_TAG_STRIDE);
+ pr_debug("DBG %s%d: tag0=%08x tag1=%08x\n",
+ type, i, tag0, tag1);
+
+ size = tag1 & SPRAM_TAG1_SIZE_MASK;
+
+ if (size == 0)
+ break;
+
+ if (i != 0) {
+ /* tags may repeat... */
+ if ((pa == firstpa && size == firstsize) ||
+ (pa == lastpa && size == lastsize))
+ break;
+ }
+
+ /* Align base with size */
+ base = (base + size - 1) & ~(size-1);
+
+ /* reprogram the base address base address and enable */
+ tag0 = (base & SPRAM_TAG0_PA_MASK) | SPRAM_TAG0_ENABLE;
+ write(offset, tag0);
+
+ base += size;
+
+ /* reread the tag */
+ tag0 = read(offset);
+ pa = tag0 & SPRAM_TAG0_PA_MASK;
+ enabled = tag0 & SPRAM_TAG0_ENABLE;
+
+ if (i == 0) {
+ firstpa = pa;
+ firstsize = size;
+ }
+
+ lastpa = pa;
+ lastsize = size;
+
+ if (strcmp(type, "DSPRAM") == 0) {
+ unsigned int *vp = (unsigned int *)(CKSEG1 | pa);
+ unsigned int v;
+#define TDAT 0x5a5aa5a5
+ vp[0] = TDAT;
+ vp[1] = ~TDAT;
+
+ mb();
+
+ v = vp[0];
+ if (v != TDAT)
+ printk(KERN_ERR "vp=%p wrote=%08x got=%08x\n",
+ vp, TDAT, v);
+ v = vp[1];
+ if (v != ~TDAT)
+ printk(KERN_ERR "vp=%p wrote=%08x got=%08x\n",
+ vp+1, ~TDAT, v);
+ }
+
+ pr_info("%s%d: PA=%08x,Size=%08x%s\n",
+ type, i, pa, size, enabled ? ",enabled" : "");
+ offset += 2 * SPRAM_TAG_STRIDE;
+ }
+}
+
+__cpuinit void spram_config(void)
+{
+ struct cpuinfo_mips *c = &current_cpu_data;
+ unsigned int config0;
+
+ switch (c->cputype) {
+ case CPU_24K:
+ case CPU_34K:
+ case CPU_74K:
+ config0 = read_c0_config();
+ /* FIXME: addresses are Malta specific */
+ if (config0 & (1<<24)) {
+ probe_spram("ISPRAM", 0x1c000000,
+ &ispram_load_tag, &ispram_store_tag);
+ }
+ if (config0 & (1<<23))
+ probe_spram("DSPRAM", 0x1c100000,
+ &dspram_load_tag, &dspram_store_tag);
+ }
+}
diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
new file mode 100644
index 000000000000..9021108eb9c1
--- /dev/null
+++ b/arch/mips/kernel/sync-r4k.c
@@ -0,0 +1,159 @@
+/*
+ * Count register synchronisation.
+ *
+ * All CPUs will have their count registers synchronised to the CPU0 expirelo
+ * value. This can cause a small timewarp for CPU0. All other CPU's should
+ * not have done anything significant (but they may have had interrupts
+ * enabled briefly - prom_smp_finish() should not be responsible for enabling
+ * interrupts...)
+ *
+ * FIXME: broken for SMTC
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/r4k-timer.h>
+
+#include <asm/atomic.h>
+#include <asm/barrier.h>
+#include <asm/cpumask.h>
+#include <asm/mipsregs.h>
+
+static atomic_t __initdata count_start_flag = ATOMIC_INIT(0);
+static atomic_t __initdata count_count_start = ATOMIC_INIT(0);
+static atomic_t __initdata count_count_stop = ATOMIC_INIT(0);
+
+#define COUNTON 100
+#define NR_LOOPS 5
+
+void __init synchronise_count_master(void)
+{
+ int i;
+ unsigned long flags;
+ unsigned int initcount;
+ int nslaves;
+
+#ifdef CONFIG_MIPS_MT_SMTC
+ /*
+ * SMTC needs to synchronise per VPE, not per CPU
+ * ignore for now
+ */
+ return;
+#endif
+
+ pr_info("Checking COUNT synchronization across %u CPUs: ",
+ num_online_cpus());
+
+ local_irq_save(flags);
+
+ /*
+ * Notify the slaves that it's time to start
+ */
+ atomic_set(&count_start_flag, 1);
+ smp_wmb();
+
+ /* Count will be initialised to expirelo for all CPU's */
+ initcount = expirelo;
+
+ /*
+ * We loop a few times to get a primed instruction cache,
+ * then the last pass is more or less synchronised and
+ * the master and slaves each set their cycle counters to a known
+ * value all at once. This reduces the chance of having random offsets
+ * between the processors, and guarantees that the maximum
+ * delay between the cycle counters is never bigger than
+ * the latency of information-passing (cachelines) between
+ * two CPUs.
+ */
+
+ nslaves = num_online_cpus()-1;
+ for (i = 0; i < NR_LOOPS; i++) {
+ /* slaves loop on '!= ncpus' */
+ while (atomic_read(&count_count_start) != nslaves)
+ mb();
+ atomic_set(&count_count_stop, 0);
+ smp_wmb();
+
+ /* this lets the slaves write their count register */
+ atomic_inc(&count_count_start);
+
+ /*
+ * Everyone initialises count in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_c0_count(initcount);
+
+ /*
+ * Wait for all slaves to leave the synchronization point:
+ */
+ while (atomic_read(&count_count_stop) != nslaves)
+ mb();
+ atomic_set(&count_count_start, 0);
+ smp_wmb();
+ atomic_inc(&count_count_stop);
+ }
+ /* Arrange for an interrupt in a short while */
+ write_c0_compare(read_c0_count() + COUNTON);
+
+ local_irq_restore(flags);
+
+ /*
+ * i386 code reported the skew here, but the
+ * count registers were almost certainly out of sync
+ * so no point in alarming people
+ */
+ printk("done.\n");
+}
+
+void __init synchronise_count_slave(void)
+{
+ int i;
+ unsigned long flags;
+ unsigned int initcount;
+ int ncpus;
+
+#ifdef CONFIG_MIPS_MT_SMTC
+ /*
+ * SMTC needs to synchronise per VPE, not per CPU
+ * ignore for now
+ */
+ return;
+#endif
+
+ local_irq_save(flags);
+
+ /*
+ * Not every cpu is online at the time this gets called,
+ * so we first wait for the master to say everyone is ready
+ */
+
+ while (!atomic_read(&count_start_flag))
+ mb();
+
+ /* Count will be initialised to expirelo for all CPU's */
+ initcount = expirelo;
+
+ ncpus = num_online_cpus();
+ for (i = 0; i < NR_LOOPS; i++) {
+ atomic_inc(&count_count_start);
+ while (atomic_read(&count_count_start) != ncpus)
+ mb();
+
+ /*
+ * Everyone initialises count in the last loop:
+ */
+ if (i == NR_LOOPS-1)
+ write_c0_count(initcount);
+
+ atomic_inc(&count_count_stop);
+ while (atomic_read(&count_count_stop) != ncpus)
+ mb();
+ }
+ /* Arrange for an interrupt in a short while */
+ write_c0_compare(read_c0_count() + COUNTON);
+
+ local_irq_restore(flags);
+}
+#undef NR_LOOPS
+#endif
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index b45a7093ca2d..1f467d534642 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -38,7 +38,6 @@ int __weak rtc_mips_set_time(unsigned long sec)
{
return 0;
}
-EXPORT_SYMBOL(rtc_mips_set_time);
int __weak rtc_mips_set_mmss(unsigned long nowtime)
{
@@ -50,13 +49,11 @@ int update_persistent_clock(struct timespec now)
return rtc_mips_set_mmss(now.tv_sec);
}
-int null_perf_irq(void)
+static int null_perf_irq(void)
{
return 0;
}
-EXPORT_SYMBOL(null_perf_irq);
-
int (*perf_irq)(void) = null_perf_irq;
EXPORT_SYMBOL(perf_irq);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 984c0d0a7b4d..cb8b0e2c7954 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -22,6 +22,7 @@
#include <linux/kallsyms.h>
#include <linux/bootmem.h>
#include <linux/interrupt.h>
+#include <linux/ptrace.h>
#include <asm/bootinfo.h>
#include <asm/branch.h>
@@ -80,19 +81,22 @@ void (*board_bind_eic_interrupt)(int irq, int regset);
static void show_raw_backtrace(unsigned long reg29)
{
- unsigned long *sp = (unsigned long *)reg29;
+ unsigned long *sp = (unsigned long *)(reg29 & ~3);
unsigned long addr;
printk("Call Trace:");
#ifdef CONFIG_KALLSYMS
printk("\n");
#endif
- while (!kstack_end(sp)) {
- addr = *sp++;
- if (__kernel_text_address(addr))
- print_ip_sym(addr);
+#define IS_KVA01(a) ((((unsigned int)a) & 0xc0000000) == 0x80000000)
+ if (IS_KVA01(sp)) {
+ while (!kstack_end(sp)) {
+ addr = *sp++;
+ if (__kernel_text_address(addr))
+ print_ip_sym(addr);
+ }
+ printk("\n");
}
- printk("\n");
}
#ifdef CONFIG_KALLSYMS
@@ -192,16 +196,19 @@ EXPORT_SYMBOL(dump_stack);
static void show_code(unsigned int __user *pc)
{
long i;
+ unsigned short __user *pc16 = NULL;
printk("\nCode:");
+ if ((unsigned long)pc & 1)
+ pc16 = (unsigned short __user *)((unsigned long)pc & ~1);
for(i = -3 ; i < 6 ; i++) {
unsigned int insn;
- if (__get_user(insn, pc + i)) {
+ if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) {
printk(" (Bad address in epc)\n");
break;
}
- printk("%c%08x%c", (i?' ':'<'), insn, (i?' ':'>'));
+ printk("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>'));
}
}
@@ -311,10 +318,21 @@ void show_regs(struct pt_regs *regs)
void show_registers(const struct pt_regs *regs)
{
+ const int field = 2 * sizeof(unsigned long);
+
__show_regs(regs);
print_modules();
- printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n",
- current->comm, task_pid_nr(current), current_thread_info(), current);
+ printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
+ current->comm, current->pid, current_thread_info(), current,
+ field, current_thread_info()->tp_value);
+ if (cpu_has_userlocal) {
+ unsigned long tls;
+
+ tls = read_c0_userlocal();
+ if (tls != current_thread_info()->tp_value)
+ printk("*HwTLS: %0*lx\n", field, tls);
+ }
+
show_stacktrace(current, regs);
show_code((unsigned int __user *) regs->cp0_epc);
printk("\n");
@@ -657,35 +675,24 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
force_sig_info(SIGFPE, &info, current);
}
-asmlinkage void do_bp(struct pt_regs *regs)
+static void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
+ const char *str)
{
- unsigned int opcode, bcode;
siginfo_t info;
-
- if (__get_user(opcode, (unsigned int __user *) exception_epc(regs)))
- goto out_sigsegv;
-
- /*
- * There is the ancient bug in the MIPS assemblers that the break
- * code starts left to bit 16 instead to bit 6 in the opcode.
- * Gas is bug-compatible, but not always, grrr...
- * We handle both cases with a simple heuristics. --macro
- */
- bcode = ((opcode >> 6) & ((1 << 20) - 1));
- if (bcode < (1 << 10))
- bcode <<= 10;
+ char b[40];
/*
- * (A short test says that IRIX 5.3 sends SIGTRAP for all break
- * insns, even for break codes that indicate arithmetic failures.
- * Weird ...)
+ * A short test says that IRIX 5.3 sends SIGTRAP for all trap
+ * insns, even for trap and break codes that indicate arithmetic
+ * failures. Weird ...
* But should we continue the brokenness??? --macro
*/
- switch (bcode) {
- case BRK_OVERFLOW << 10:
- case BRK_DIVZERO << 10:
- die_if_kernel("Break instruction in kernel code", regs);
- if (bcode == (BRK_DIVZERO << 10))
+ switch (code) {
+ case BRK_OVERFLOW:
+ case BRK_DIVZERO:
+ scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
+ die_if_kernel(b, regs);
+ if (code == BRK_DIVZERO)
info.si_code = FPE_INTDIV;
else
info.si_code = FPE_INTOVF;
@@ -695,12 +702,34 @@ asmlinkage void do_bp(struct pt_regs *regs)
force_sig_info(SIGFPE, &info, current);
break;
case BRK_BUG:
- die("Kernel bug detected", regs);
+ die_if_kernel("Kernel bug detected", regs);
+ force_sig(SIGTRAP, current);
break;
default:
- die_if_kernel("Break instruction in kernel code", regs);
+ scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
+ die_if_kernel(b, regs);
force_sig(SIGTRAP, current);
}
+}
+
+asmlinkage void do_bp(struct pt_regs *regs)
+{
+ unsigned int opcode, bcode;
+
+ if (__get_user(opcode, (unsigned int __user *) exception_epc(regs)))
+ goto out_sigsegv;
+
+ /*
+ * There is the ancient bug in the MIPS assemblers that the break
+ * code starts left to bit 16 instead to bit 6 in the opcode.
+ * Gas is bug-compatible, but not always, grrr...
+ * We handle both cases with a simple heuristics. --macro
+ */
+ bcode = ((opcode >> 6) & ((1 << 20) - 1));
+ if (bcode >= (1 << 10))
+ bcode >>= 10;
+
+ do_trap_or_bp(regs, bcode, "Break");
return;
out_sigsegv:
@@ -710,7 +739,6 @@ out_sigsegv:
asmlinkage void do_tr(struct pt_regs *regs)
{
unsigned int opcode, tcode = 0;
- siginfo_t info;
if (__get_user(opcode, (unsigned int __user *) exception_epc(regs)))
goto out_sigsegv;
@@ -719,32 +747,7 @@ asmlinkage void do_tr(struct pt_regs *regs)
if (!(opcode & OPCODE))
tcode = ((opcode >> 6) & ((1 << 10) - 1));
- /*
- * (A short test says that IRIX 5.3 sends SIGTRAP for all trap
- * insns, even for trap codes that indicate arithmetic failures.
- * Weird ...)
- * But should we continue the brokenness??? --macro
- */
- switch (tcode) {
- case BRK_OVERFLOW:
- case BRK_DIVZERO:
- die_if_kernel("Trap instruction in kernel code", regs);
- if (tcode == BRK_DIVZERO)
- info.si_code = FPE_INTDIV;
- else
- info.si_code = FPE_INTOVF;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_addr = (void __user *) regs->cp0_epc;
- force_sig_info(SIGFPE, &info, current);
- break;
- case BRK_BUG:
- die("Kernel bug detected", regs);
- break;
- default:
- die_if_kernel("Trap instruction in kernel code", regs);
- force_sig(SIGTRAP, current);
- }
+ do_trap_or_bp(regs, tcode, "Trap");
return;
out_sigsegv:
@@ -985,6 +988,21 @@ asmlinkage void do_reserved(struct pt_regs *regs)
(regs->cp0_cause & 0x7f) >> 2);
}
+static int __initdata l1parity = 1;
+static int __init nol1parity(char *s)
+{
+ l1parity = 0;
+ return 1;
+}
+__setup("nol1par", nol1parity);
+static int __initdata l2parity = 1;
+static int __init nol2parity(char *s)
+{
+ l2parity = 0;
+ return 1;
+}
+__setup("nol2par", nol2parity);
+
/*
* Some MIPS CPUs can enable/disable for cache parity detection, but do
* it different ways.
@@ -994,6 +1012,62 @@ static inline void parity_protection_init(void)
switch (current_cpu_type()) {
case CPU_24K:
case CPU_34K:
+ case CPU_74K:
+ case CPU_1004K:
+ {
+#define ERRCTL_PE 0x80000000
+#define ERRCTL_L2P 0x00800000
+ unsigned long errctl;
+ unsigned int l1parity_present, l2parity_present;
+
+ errctl = read_c0_ecc();
+ errctl &= ~(ERRCTL_PE|ERRCTL_L2P);
+
+ /* probe L1 parity support */
+ write_c0_ecc(errctl | ERRCTL_PE);
+ back_to_back_c0_hazard();
+ l1parity_present = (read_c0_ecc() & ERRCTL_PE);
+
+ /* probe L2 parity support */
+ write_c0_ecc(errctl|ERRCTL_L2P);
+ back_to_back_c0_hazard();
+ l2parity_present = (read_c0_ecc() & ERRCTL_L2P);
+
+ if (l1parity_present && l2parity_present) {
+ if (l1parity)
+ errctl |= ERRCTL_PE;
+ if (l1parity ^ l2parity)
+ errctl |= ERRCTL_L2P;
+ } else if (l1parity_present) {
+ if (l1parity)
+ errctl |= ERRCTL_PE;
+ } else if (l2parity_present) {
+ if (l2parity)
+ errctl |= ERRCTL_L2P;
+ } else {
+ /* No parity available */
+ }
+
+ printk(KERN_INFO "Writing ErrCtl register=%08lx\n", errctl);
+
+ write_c0_ecc(errctl);
+ back_to_back_c0_hazard();
+ errctl = read_c0_ecc();
+ printk(KERN_INFO "Readback ErrCtl register=%08lx\n", errctl);
+
+ if (l1parity_present)
+ printk(KERN_INFO "Cache parity protection %sabled\n",
+ (errctl & ERRCTL_PE) ? "en" : "dis");
+
+ if (l2parity_present) {
+ if (l1parity_present && l1parity)
+ errctl ^= ERRCTL_L2P;
+ printk(KERN_INFO "L2 cache parity protection %sabled\n",
+ (errctl & ERRCTL_L2P) ? "en" : "dis");
+ }
+ }
+ break;
+
case CPU_5KC:
write_c0_ecc(0x80000000);
back_to_back_c0_hazard();
@@ -1306,6 +1380,17 @@ int cp0_compare_irq;
int cp0_perfcount_irq;
EXPORT_SYMBOL_GPL(cp0_perfcount_irq);
+static int __cpuinitdata noulri;
+
+static int __init ulri_disable(char *s)
+{
+ pr_info("Disabling ulri\n");
+ noulri = 1;
+
+ return 1;
+}
+__setup("noulri", ulri_disable);
+
void __cpuinit per_cpu_trap_init(void)
{
unsigned int cpu = smp_processor_id();
@@ -1342,16 +1427,14 @@ void __cpuinit per_cpu_trap_init(void)
change_c0_status(ST0_CU|ST0_MX|ST0_RE|ST0_FR|ST0_BEV|ST0_TS|ST0_KX|ST0_SX|ST0_UX,
status_set);
-#ifdef CONFIG_CPU_MIPSR2
if (cpu_has_mips_r2) {
unsigned int enable = 0x0000000f;
- if (cpu_has_userlocal)
+ if (!noulri && cpu_has_userlocal)
enable |= (1 << 29);
write_c0_hwrena(enable);
}
-#endif
#ifdef CONFIG_MIPS_MT_SMTC
if (!secondaryTC) {
diff --git a/arch/mips/lib/iomap-pci.c b/arch/mips/lib/iomap-pci.c
index c11b2494bb6e..2ab899c4b4ce 100644
--- a/arch/mips/lib/iomap-pci.c
+++ b/arch/mips/lib/iomap-pci.c
@@ -45,8 +45,8 @@ static void __iomem *ioport_map_pci(struct pci_dev *dev,
*/
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h
index 8977eb585a37..762786538449 100644
--- a/arch/mips/math-emu/ieee754dp.h
+++ b/arch/mips/math-emu/ieee754dp.h
@@ -46,7 +46,7 @@
#define DPDNORMX DPDNORMx(xm, xe)
#define DPDNORMY DPDNORMx(ym, ye)
-static __inline ieee754dp builddp(int s, int bx, u64 m)
+static inline ieee754dp builddp(int s, int bx, u64 m)
{
ieee754dp r;
diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index 9917c1e4d947..d9e3586b5bce 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h
@@ -51,7 +51,7 @@
#define SPDNORMX SPDNORMx(xm, xe)
#define SPDNORMY SPDNORMx(ym, ye)
-static __inline ieee754sp buildsp(int s, int bx, unsigned m)
+static inline ieee754sp buildsp(int s, int bx, unsigned m)
{
ieee754sp r;
diff --git a/arch/mips/mips-boards/generic/Makefile b/arch/mips/mips-boards/generic/Makefile
index b31d8dfed1be..f7f87fc09d1e 100644
--- a/arch/mips/mips-boards/generic/Makefile
+++ b/arch/mips/mips-boards/generic/Makefile
@@ -20,6 +20,7 @@
obj-y := reset.o display.o init.o memory.o \
cmdline.o time.o
+obj-y += amon.o
obj-$(CONFIG_EARLY_PRINTK) += console.o
obj-$(CONFIG_PCI) += pci.o
diff --git a/arch/mips/mips-boards/generic/amon.c b/arch/mips/mips-boards/generic/amon.c
new file mode 100644
index 000000000000..b7633fda4180
--- /dev/null
+++ b/arch/mips/mips-boards/generic/amon.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2007 MIPS Technologies, Inc.
+ * All rights reserved.
+
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Arbitrary Monitor interface
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm-mips/addrspace.h>
+#include <asm-mips/mips-boards/launch.h>
+#include <asm-mips/mipsmtregs.h>
+
+int amon_cpu_avail(int cpu)
+{
+ struct cpulaunch *launch = (struct cpulaunch *)KSEG0ADDR(CPULAUNCH);
+
+ if (cpu < 0 || cpu >= NCPULAUNCH) {
+ pr_debug("avail: cpu%d is out of range\n", cpu);
+ return 0;
+ }
+
+ launch += cpu;
+ if (!(launch->flags & LAUNCH_FREADY)) {
+ pr_debug("avail: cpu%d is not ready\n", cpu);
+ return 0;
+ }
+ if (launch->flags & (LAUNCH_FGO|LAUNCH_FGONE)) {
+ pr_debug("avail: too late.. cpu%d is already gone\n", cpu);
+ return 0;
+ }
+
+ return 1;
+}
+
+void amon_cpu_start(int cpu,
+ unsigned long pc, unsigned long sp,
+ unsigned long gp, unsigned long a0)
+{
+ volatile struct cpulaunch *launch =
+ (struct cpulaunch *)KSEG0ADDR(CPULAUNCH);
+
+ if (!amon_cpu_avail(cpu))
+ return;
+ if (cpu == smp_processor_id()) {
+ pr_debug("launch: I am cpu%d!\n", cpu);
+ return;
+ }
+ launch += cpu;
+
+ pr_debug("launch: starting cpu%d\n", cpu);
+
+ launch->pc = pc;
+ launch->gp = gp;
+ launch->sp = sp;
+ launch->a0 = a0;
+
+ /* Make sure target sees parameters before the go bit */
+ smp_mb();
+
+ launch->flags |= LAUNCH_FGO;
+ while ((launch->flags & LAUNCH_FGONE) == 0)
+ ;
+ pr_debug("launch: cpu%d gone!\n", cpu);
+}
diff --git a/arch/mips/mips-boards/generic/init.c b/arch/mips/mips-boards/generic/init.c
index 1695dca5506b..83b9dc739203 100644
--- a/arch/mips/mips-boards/generic/init.c
+++ b/arch/mips/mips-boards/generic/init.c
@@ -226,7 +226,7 @@ void __init kgdb_config(void)
}
#endif
-void __init mips_nmi_setup(void)
+static void __init mips_nmi_setup(void)
{
void *base;
extern char except_vec_nmi;
@@ -238,7 +238,7 @@ void __init mips_nmi_setup(void)
flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
}
-void __init mips_ejtag_setup(void)
+static void __init mips_ejtag_setup(void)
{
void *base;
extern char except_vec_ejtag_debug;
@@ -295,15 +295,21 @@ void __init prom_init(void)
break;
case MIPS_REVISION_CORID_CORE_MSC:
case MIPS_REVISION_CORID_CORE_FPGA2:
- case MIPS_REVISION_CORID_CORE_FPGA3:
- case MIPS_REVISION_CORID_CORE_FPGA4:
case MIPS_REVISION_CORID_CORE_24K:
- case MIPS_REVISION_CORID_CORE_EMUL_MSC:
+ /*
+ * SOCit/ROCit support is essentially identical
+ * but make an attempt to distinguish them
+ */
mips_revision_sconid = MIPS_REVISION_SCON_SOCIT;
break;
+ case MIPS_REVISION_CORID_CORE_FPGA3:
+ case MIPS_REVISION_CORID_CORE_FPGA4:
+ case MIPS_REVISION_CORID_CORE_FPGA5:
+ case MIPS_REVISION_CORID_CORE_EMUL_MSC:
default:
- mips_display_message("CC Error");
- while (1); /* We die here... */
+ /* See above */
+ mips_revision_sconid = MIPS_REVISION_SCON_ROCIT;
+ break;
}
}
@@ -418,6 +424,9 @@ void __init prom_init(void)
#ifdef CONFIG_SERIAL_8250_CONSOLE
console_config();
#endif
+#ifdef CONFIG_MIPS_CMP
+ register_smp_ops(&cmp_smp_ops);
+#endif
#ifdef CONFIG_MIPS_MT_SMP
register_smp_ops(&vsmp_smp_ops);
#endif
diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c
index dc272c188233..5e443bba5662 100644
--- a/arch/mips/mips-boards/generic/memory.c
+++ b/arch/mips/mips-boards/generic/memory.c
@@ -37,7 +37,7 @@ enum yamon_memtypes {
yamon_prom,
yamon_free,
};
-struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS];
+static struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS];
#ifdef DEBUG
static char *mtypes[3] = {
@@ -50,7 +50,7 @@ static char *mtypes[3] = {
/* determined physical memory size, not overridden by command line args */
unsigned long physical_memsize = 0L;
-struct prom_pmemblock * __init prom_getmdesc(void)
+static struct prom_pmemblock * __init prom_getmdesc(void)
{
char *memsize_str;
unsigned int memsize;
diff --git a/arch/mips/mips-boards/generic/time.c b/arch/mips/mips-boards/generic/time.c
index b50e0fc406ac..008fd82b5840 100644
--- a/arch/mips/mips-boards/generic/time.c
+++ b/arch/mips/mips-boards/generic/time.c
@@ -55,16 +55,36 @@
unsigned long cpu_khz;
static int mips_cpu_timer_irq;
+static int mips_cpu_perf_irq;
extern int cp0_perfcount_irq;
+DEFINE_PER_CPU(unsigned int, tickcount);
+#define tickcount_this_cpu __get_cpu_var(tickcount)
+static unsigned long ledbitmask;
+
static void mips_timer_dispatch(void)
{
+#if defined(CONFIG_MIPS_MALTA) || defined(CONFIG_MIPS_ATLAS)
+ /*
+ * Yes, this is very tacky, won't work as expected with SMTC and
+ * dyntick will break it,
+ * but it gives me a nice warm feeling during debug
+ */
+#define LEDBAR 0xbf000408
+ if (tickcount_this_cpu++ >= HZ) {
+ tickcount_this_cpu = 0;
+ change_bit(smp_processor_id(), &ledbitmask);
+ smp_wmb(); /* Make sure every one else sees the change */
+ /* This will pick up any recent changes made by other CPU's */
+ *(unsigned int *)LEDBAR = ledbitmask;
+ }
+#endif
do_IRQ(mips_cpu_timer_irq);
}
static void mips_perf_dispatch(void)
{
- do_IRQ(cp0_perfcount_irq);
+ do_IRQ(mips_cpu_perf_irq);
}
/*
@@ -127,21 +147,20 @@ unsigned long read_persistent_clock(void)
return mc146818_get_cmos_time();
}
-void __init plat_perf_setup(void)
+static void __init plat_perf_setup(void)
{
- cp0_perfcount_irq = -1;
-
#ifdef MSC01E_INT_BASE
if (cpu_has_veic) {
set_vi_handler(MSC01E_INT_PERFCTR, mips_perf_dispatch);
- cp0_perfcount_irq = MSC01E_INT_BASE + MSC01E_INT_PERFCTR;
+ mips_cpu_perf_irq = MSC01E_INT_BASE + MSC01E_INT_PERFCTR;
} else
#endif
if (cp0_perfcount_irq >= 0) {
if (cpu_has_vint)
set_vi_handler(cp0_perfcount_irq, mips_perf_dispatch);
+ mips_cpu_perf_irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
#ifdef CONFIG_SMP
- set_irq_handler(cp0_perfcount_irq, handle_percpu_irq);
+ set_irq_handler(mips_cpu_perf_irq, handle_percpu_irq);
#endif
}
}
diff --git a/arch/mips/mips-boards/malta/Makefile b/arch/mips/mips-boards/malta/Makefile
index 931ca4600a63..8dc6e2ac4c03 100644
--- a/arch/mips/mips-boards/malta/Makefile
+++ b/arch/mips/mips-boards/malta/Makefile
@@ -22,6 +22,7 @@
obj-y := malta_int.o malta_platform.o malta_setup.o
obj-$(CONFIG_MTD) += malta_mtd.o
+# FIXME FIXME FIXME
obj-$(CONFIG_MIPS_MT_SMTC) += malta_smtc.o
EXTRA_CFLAGS += -Werror
diff --git a/arch/mips/mips-boards/malta/malta_int.c b/arch/mips/mips-boards/malta/malta_int.c
index dbe60eb55e29..8c495104b321 100644
--- a/arch/mips/mips-boards/malta/malta_int.c
+++ b/arch/mips/mips-boards/malta/malta_int.c
@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/random.h>
+#include <asm/traps.h>
#include <asm/i8259.h>
#include <asm/irq_cpu.h>
#include <asm/irq_regs.h>
@@ -41,6 +42,14 @@
#include <asm/mips-boards/generic.h>
#include <asm/mips-boards/msc01_pci.h>
#include <asm/msc01_ic.h>
+#include <asm/gic.h>
+#include <asm/gcmpregs.h>
+
+int gcmp_present = -1;
+int gic_present;
+static unsigned long _msc01_biu_base;
+static unsigned long _gcmp_base;
+static unsigned int ipi_map[NR_CPUS];
static DEFINE_SPINLOCK(mips_irq_lock);
@@ -121,6 +130,17 @@ static void malta_hw0_irqdispatch(void)
do_IRQ(MALTA_INT_BASE + irq);
}
+static void malta_ipi_irqdispatch(void)
+{
+ int irq;
+
+ irq = gic_get_int();
+ if (irq < 0)
+ return; /* interrupt has already been cleared */
+
+ do_IRQ(MIPS_GIC_IRQ_BASE + irq);
+}
+
static void corehi_irqdispatch(void)
{
unsigned int intedge, intsteer, pcicmd, pcibadaddr;
@@ -257,12 +277,61 @@ asmlinkage void plat_irq_dispatch(void)
if (irq == MIPSCPU_INT_I8259A)
malta_hw0_irqdispatch();
+ else if (gic_present && ((1 << irq) & ipi_map[smp_processor_id()]))
+ malta_ipi_irqdispatch();
else if (irq >= 0)
do_IRQ(MIPS_CPU_IRQ_BASE + irq);
else
spurious_interrupt();
}
+#ifdef CONFIG_MIPS_MT_SMP
+
+
+#define GIC_MIPS_CPU_IPI_RESCHED_IRQ 3
+#define GIC_MIPS_CPU_IPI_CALL_IRQ 4
+
+#define MIPS_CPU_IPI_RESCHED_IRQ 0 /* SW int 0 for resched */
+#define C_RESCHED C_SW0
+#define MIPS_CPU_IPI_CALL_IRQ 1 /* SW int 1 for resched */
+#define C_CALL C_SW1
+static int cpu_ipi_resched_irq, cpu_ipi_call_irq;
+
+static void ipi_resched_dispatch(void)
+{
+ do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ);
+}
+
+static void ipi_call_dispatch(void)
+{
+ do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ);
+}
+
+static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
+{
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
+{
+ smp_call_function_interrupt();
+
+ return IRQ_HANDLED;
+}
+
+static struct irqaction irq_resched = {
+ .handler = ipi_resched_interrupt,
+ .flags = IRQF_DISABLED|IRQF_PERCPU,
+ .name = "IPI_resched"
+};
+
+static struct irqaction irq_call = {
+ .handler = ipi_call_interrupt,
+ .flags = IRQF_DISABLED|IRQF_PERCPU,
+ .name = "IPI_call"
+};
+#endif /* CONFIG_MIPS_MT_SMP */
+
static struct irqaction i8259irq = {
.handler = no_action,
.name = "XT-PIC cascade"
@@ -273,13 +342,13 @@ static struct irqaction corehi_irqaction = {
.name = "CoreHi"
};
-msc_irqmap_t __initdata msc_irqmap[] = {
+static msc_irqmap_t __initdata msc_irqmap[] = {
{MSC01C_INT_TMR, MSC01_IRQ_EDGE, 0},
{MSC01C_INT_PCI, MSC01_IRQ_LEVEL, 0},
};
-int __initdata msc_nr_irqs = ARRAY_SIZE(msc_irqmap);
+static int __initdata msc_nr_irqs = ARRAY_SIZE(msc_irqmap);
-msc_irqmap_t __initdata msc_eicirqmap[] = {
+static msc_irqmap_t __initdata msc_eicirqmap[] = {
{MSC01E_INT_SW0, MSC01_IRQ_LEVEL, 0},
{MSC01E_INT_SW1, MSC01_IRQ_LEVEL, 0},
{MSC01E_INT_I8259A, MSC01_IRQ_LEVEL, 0},
@@ -291,15 +360,90 @@ msc_irqmap_t __initdata msc_eicirqmap[] = {
{MSC01E_INT_PERFCTR, MSC01_IRQ_LEVEL, 0},
{MSC01E_INT_CPUCTR, MSC01_IRQ_LEVEL, 0}
};
-int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap);
+
+static int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap);
+
+/*
+ * This GIC specific tabular array defines the association between External
+ * Interrupts and CPUs/Core Interrupts. The nature of the External
+ * Interrupts is also defined here - polarity/trigger.
+ */
+static struct gic_intr_map gic_intr_map[] = {
+ { GIC_EXT_INTR(0), X, X, X, X, 0 },
+ { GIC_EXT_INTR(1), X, X, X, X, 0 },
+ { GIC_EXT_INTR(2), X, X, X, X, 0 },
+ { GIC_EXT_INTR(3), 0, GIC_CPU_INT0, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(4), 0, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(5), 0, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(6), 0, GIC_CPU_INT3, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(7), 0, GIC_CPU_INT4, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(8), 0, GIC_CPU_INT3, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(9), 0, GIC_CPU_INT3, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(10), X, X, X, X, 0 },
+ { GIC_EXT_INTR(11), X, X, X, X, 0 },
+ { GIC_EXT_INTR(12), 0, GIC_CPU_INT3, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(13), 0, GIC_MAP_TO_NMI_MSK, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(14), 0, GIC_MAP_TO_NMI_MSK, GIC_POL_POS, GIC_TRIG_LEVEL, 0 },
+ { GIC_EXT_INTR(15), X, X, X, X, 0 },
+ { GIC_EXT_INTR(16), 0, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_EDGE, 1 },
+ { GIC_EXT_INTR(17), 0, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_EDGE, 1 },
+ { GIC_EXT_INTR(18), 1, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_EDGE, 1 },
+ { GIC_EXT_INTR(19), 1, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_EDGE, 1 },
+ { GIC_EXT_INTR(20), 2, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_EDGE, 1 },
+ { GIC_EXT_INTR(21), 2, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_EDGE, 1 },
+ { GIC_EXT_INTR(22), 3, GIC_CPU_INT1, GIC_POL_POS, GIC_TRIG_EDGE, 1 },
+ { GIC_EXT_INTR(23), 3, GIC_CPU_INT2, GIC_POL_POS, GIC_TRIG_EDGE, 1 },
+};
+
+/*
+ * GCMP needs to be detected before any SMP initialisation
+ */
+int __init gcmp_probe(unsigned long addr, unsigned long size)
+{
+ if (gcmp_present >= 0)
+ return gcmp_present;
+
+ _gcmp_base = (unsigned long) ioremap_nocache(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ);
+ _msc01_biu_base = (unsigned long) ioremap_nocache(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ);
+ gcmp_present = (GCMPGCB(GCMPB) & GCMP_GCB_GCMPB_GCMPBASE_MSK) == GCMP_BASE_ADDR;
+
+ if (gcmp_present)
+ printk(KERN_DEBUG "GCMP present\n");
+ return gcmp_present;
+}
+
+void __init fill_ipi_map(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(gic_intr_map); i++) {
+ if (gic_intr_map[i].ipiflag && (gic_intr_map[i].cpunum != X))
+ ipi_map[gic_intr_map[i].cpunum] |=
+ (1 << (gic_intr_map[i].pin + 2));
+ }
+}
void __init arch_init_irq(void)
{
+ int gic_present, gcmp_present;
+
init_i8259_irqs();
if (!cpu_has_veic)
mips_cpu_irq_init();
+ gcmp_present = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ);
+ if (gcmp_present) {
+ GCMPGCB(GICBA) = GIC_BASE_ADDR | GCMP_GCB_GICBA_EN_MSK;
+ gic_present = 1;
+ } else {
+ _msc01_biu_base = (unsigned long) ioremap_nocache(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ);
+ gic_present = (REG(_msc01_biu_base, MSC01_SC_CFG) &
+ MSC01_SC_CFG_GICPRES_MSK) >> MSC01_SC_CFG_GICPRES_SHF;
+ }
+ if (gic_present)
+ printk(KERN_DEBUG "GIC present\n");
+
switch (mips_revision_sconid) {
case MIPS_REVISION_SCON_SOCIT:
case MIPS_REVISION_SCON_ROCIT:
@@ -360,4 +504,206 @@ void __init arch_init_irq(void)
setup_irq(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_COREHI,
&corehi_irqaction);
}
+
+#if defined(CONFIG_MIPS_MT_SMP)
+ if (gic_present) {
+ /* FIXME */
+ int i;
+ struct {
+ unsigned int resched;
+ unsigned int call;
+ } ipiirq[] = {
+ {
+ .resched = GIC_IPI_EXT_INTR_RESCHED_VPE0,
+ .call = GIC_IPI_EXT_INTR_CALLFNC_VPE0},
+ {
+ .resched = GIC_IPI_EXT_INTR_RESCHED_VPE1,
+ .call = GIC_IPI_EXT_INTR_CALLFNC_VPE1
+ }, {
+ .resched = GIC_IPI_EXT_INTR_RESCHED_VPE2,
+ .call = GIC_IPI_EXT_INTR_CALLFNC_VPE2
+ }, {
+ .resched = GIC_IPI_EXT_INTR_RESCHED_VPE3,
+ .call = GIC_IPI_EXT_INTR_CALLFNC_VPE3
+ }
+ };
+#define NIPI (sizeof(ipiirq)/sizeof(ipiirq[0]))
+ fill_ipi_map();
+ gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, gic_intr_map, ARRAY_SIZE(gic_intr_map), MIPS_GIC_IRQ_BASE);
+ if (!gcmp_present) {
+ /* Enable the GIC */
+ i = REG(_msc01_biu_base, MSC01_SC_CFG);
+ REG(_msc01_biu_base, MSC01_SC_CFG) =
+ (i | (0x1 << MSC01_SC_CFG_GICENA_SHF));
+ pr_debug("GIC Enabled\n");
+ }
+
+ /* set up ipi interrupts */
+ if (cpu_has_vint) {
+ set_vi_handler(MIPSCPU_INT_IPI0, malta_ipi_irqdispatch);
+ set_vi_handler(MIPSCPU_INT_IPI1, malta_ipi_irqdispatch);
+ }
+ /* Argh.. this really needs sorting out.. */
+ printk("CPU%d: status register was %08x\n", smp_processor_id(), read_c0_status());
+ write_c0_status(read_c0_status() | STATUSF_IP3 | STATUSF_IP4);
+ printk("CPU%d: status register now %08x\n", smp_processor_id(), read_c0_status());
+ write_c0_status(0x1100dc00);
+ printk("CPU%d: status register frc %08x\n", smp_processor_id(), read_c0_status());
+ for (i = 0; i < NIPI; i++) {
+ setup_irq(MIPS_GIC_IRQ_BASE + ipiirq[i].resched, &irq_resched);
+ setup_irq(MIPS_GIC_IRQ_BASE + ipiirq[i].call, &irq_call);
+
+ set_irq_handler(MIPS_GIC_IRQ_BASE + ipiirq[i].resched, handle_percpu_irq);
+ set_irq_handler(MIPS_GIC_IRQ_BASE + ipiirq[i].call, handle_percpu_irq);
+ }
+ } else {
+ /* set up ipi interrupts */
+ if (cpu_has_veic) {
+ set_vi_handler (MSC01E_INT_SW0, ipi_resched_dispatch);
+ set_vi_handler (MSC01E_INT_SW1, ipi_call_dispatch);
+ cpu_ipi_resched_irq = MSC01E_INT_SW0;
+ cpu_ipi_call_irq = MSC01E_INT_SW1;
+ } else {
+ if (cpu_has_vint) {
+ set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
+ set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
+ }
+ cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ;
+ cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ;
+ }
+
+ setup_irq(cpu_ipi_resched_irq, &irq_resched);
+ setup_irq(cpu_ipi_call_irq, &irq_call);
+
+ set_irq_handler(cpu_ipi_resched_irq, handle_percpu_irq);
+ set_irq_handler(cpu_ipi_call_irq, handle_percpu_irq);
+ }
+#endif
+}
+
+void malta_be_init(void)
+{
+ if (gcmp_present) {
+ /* Could change CM error mask register */
+ }
+}
+
+
+static char *tr[8] = {
+ "mem", "gcr", "gic", "mmio",
+ "0x04", "0x05", "0x06", "0x07"
+};
+
+static char *mcmd[32] = {
+ [0x00] = "0x00",
+ [0x01] = "Legacy Write",
+ [0x02] = "Legacy Read",
+ [0x03] = "0x03",
+ [0x04] = "0x04",
+ [0x05] = "0x05",
+ [0x06] = "0x06",
+ [0x07] = "0x07",
+ [0x08] = "Coherent Read Own",
+ [0x09] = "Coherent Read Share",
+ [0x0a] = "Coherent Read Discard",
+ [0x0b] = "Coherent Ready Share Always",
+ [0x0c] = "Coherent Upgrade",
+ [0x0d] = "Coherent Writeback",
+ [0x0e] = "0x0e",
+ [0x0f] = "0x0f",
+ [0x10] = "Coherent Copyback",
+ [0x11] = "Coherent Copyback Invalidate",
+ [0x12] = "Coherent Invalidate",
+ [0x13] = "Coherent Write Invalidate",
+ [0x14] = "Coherent Completion Sync",
+ [0x15] = "0x15",
+ [0x16] = "0x16",
+ [0x17] = "0x17",
+ [0x18] = "0x18",
+ [0x19] = "0x19",
+ [0x1a] = "0x1a",
+ [0x1b] = "0x1b",
+ [0x1c] = "0x1c",
+ [0x1d] = "0x1d",
+ [0x1e] = "0x1e",
+ [0x1f] = "0x1f"
+};
+
+static char *core[8] = {
+ "Invalid/OK", "Invalid/Data",
+ "Shared/OK", "Shared/Data",
+ "Modified/OK", "Modified/Data",
+ "Exclusive/OK", "Exclusive/Data"
+};
+
+static char *causes[32] = {
+ "None", "GC_WR_ERR", "GC_RD_ERR", "COH_WR_ERR",
+ "COH_RD_ERR", "MMIO_WR_ERR", "MMIO_RD_ERR", "0x07",
+ "0x08", "0x09", "0x0a", "0x0b",
+ "0x0c", "0x0d", "0x0e", "0x0f",
+ "0x10", "0x11", "0x12", "0x13",
+ "0x14", "0x15", "0x16", "INTVN_WR_ERR",
+ "INTVN_RD_ERR", "0x19", "0x1a", "0x1b",
+ "0x1c", "0x1d", "0x1e", "0x1f"
+};
+
+int malta_be_handler(struct pt_regs *regs, int is_fixup)
+{
+ /* This duplicates the handling in do_be which seems wrong */
+ int retval = is_fixup ? MIPS_BE_FIXUP : MIPS_BE_FATAL;
+
+ if (gcmp_present) {
+ unsigned long cm_error = GCMPGCB(GCMEC);
+ unsigned long cm_addr = GCMPGCB(GCMEA);
+ unsigned long cm_other = GCMPGCB(GCMEO);
+ unsigned long cause, ocause;
+ char buf[256];
+
+ cause = (cm_error & GCMP_GCB_GMEC_ERROR_TYPE_MSK);
+ if (cause != 0) {
+ cause >>= GCMP_GCB_GMEC_ERROR_TYPE_SHF;
+ if (cause < 16) {
+ unsigned long cca_bits = (cm_error >> 15) & 7;
+ unsigned long tr_bits = (cm_error >> 12) & 7;
+ unsigned long mcmd_bits = (cm_error >> 7) & 0x1f;
+ unsigned long stag_bits = (cm_error >> 3) & 15;
+ unsigned long sport_bits = (cm_error >> 0) & 7;
+
+ snprintf(buf, sizeof(buf),
+ "CCA=%lu TR=%s MCmd=%s STag=%lu "
+ "SPort=%lu\n",
+ cca_bits, tr[tr_bits], mcmd[mcmd_bits],
+ stag_bits, sport_bits);
+ } else {
+ /* glob state & sresp together */
+ unsigned long c3_bits = (cm_error >> 18) & 7;
+ unsigned long c2_bits = (cm_error >> 15) & 7;
+ unsigned long c1_bits = (cm_error >> 12) & 7;
+ unsigned long c0_bits = (cm_error >> 9) & 7;
+ unsigned long sc_bit = (cm_error >> 8) & 1;
+ unsigned long mcmd_bits = (cm_error >> 3) & 0x1f;
+ unsigned long sport_bits = (cm_error >> 0) & 7;
+ snprintf(buf, sizeof(buf),
+ "C3=%s C2=%s C1=%s C0=%s SC=%s "
+ "MCmd=%s SPort=%lu\n",
+ core[c3_bits], core[c2_bits],
+ core[c1_bits], core[c0_bits],
+ sc_bit ? "True" : "False",
+ mcmd[mcmd_bits], sport_bits);
+ }
+
+ ocause = (cm_other & GCMP_GCB_GMEO_ERROR_2ND_MSK) >>
+ GCMP_GCB_GMEO_ERROR_2ND_SHF;
+
+ printk("CM_ERROR=%08lx %s <%s>\n", cm_error,
+ causes[cause], buf);
+ printk("CM_ADDR =%08lx\n", cm_addr);
+ printk("CM_OTHER=%08lx %s\n", cm_other, causes[ocause]);
+
+ /* reprime cause register */
+ GCMPGCB(GCMEC) = 0;
+ }
+ }
+
+ return retval;
}
diff --git a/arch/mips/mips-boards/malta/malta_setup.c b/arch/mips/mips-boards/malta/malta_setup.c
index 2cd8f5734b36..e7cad54936ca 100644
--- a/arch/mips/mips-boards/malta/malta_setup.c
+++ b/arch/mips/mips-boards/malta/malta_setup.c
@@ -1,7 +1,7 @@
/*
* Carsten Langgaard, carstenl@mips.com
* Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved.
- * Copyright (C) Dmitri Vorobiev
+ * Copyright (C) 2008 Dmitri Vorobiev
*
* This program is free software; you can distribute it and/or modify it
* under the terms of the GNU General Public License (Version 2) as
@@ -36,7 +36,10 @@
#include <linux/console.h>
#endif
-struct resource standard_io_resources[] = {
+extern void malta_be_init(void);
+extern int malta_be_handler(struct pt_regs *regs, int is_fixup);
+
+static struct resource standard_io_resources[] = {
{
.name = "dma1",
.start = 0x00,
@@ -220,4 +223,7 @@ void __init plat_mem_setup(void)
screen_info_setup();
#endif
mips_reboot_setup();
+
+ board_be_init = malta_be_init;
+ board_be_handler = malta_be_handler;
}
diff --git a/arch/mips/mipssim/sim_setup.c b/arch/mips/mipssim/sim_setup.c
index d49fe73426b7..7c7148ef2646 100644
--- a/arch/mips/mipssim/sim_setup.c
+++ b/arch/mips/mipssim/sim_setup.c
@@ -39,9 +39,6 @@
static void __init serial_init(void);
unsigned int _isbonito = 0;
-extern void __init sanitize_tlb_entries(void);
-
-
const char *get_system_type(void)
{
return "MIPSsim";
@@ -55,9 +52,6 @@ void __init plat_mem_setup(void)
pr_info("Linux started...\n");
-#ifdef CONFIG_MIPS_MT_SMP
- sanitize_tlb_entries();
-#endif
}
extern struct plat_smp_ops ssmtc_smp_ops;
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index c6f832e0f41c..48731020ca0e 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -4,30 +4,29 @@
obj-y += cache.o dma-default.o extable.o fault.o \
init.o pgtable.o tlbex.o tlbex-fault.o \
- uasm.o
+ uasm.o page.o
obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o
obj-$(CONFIG_64BIT) += pgtable-64.o
obj-$(CONFIG_HIGHMEM) += highmem.o
-obj-$(CONFIG_CPU_LOONGSON2) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_MIPS64) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_NEVADA) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R10000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o pg-r4k.o
-obj-$(CONFIG_CPU_R4300) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R4X00) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R5000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R5432) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r8k.o
-obj-$(CONFIG_CPU_RM7000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_RM9000) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o pg-sb1.o \
- tlb-r4k.o
-obj-$(CONFIG_CPU_TX39XX) += c-tx39.o pg-r4k.o tlb-r3k.o
-obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_LOONGSON2) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_MIPS32) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_MIPS64) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_NEVADA) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R10000) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R3000) += c-r3k.o tlb-r3k.o
+obj-$(CONFIG_CPU_R4300) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R4X00) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R5000) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R5432) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R8000) += c-r4k.o cex-gen.o tlb-r8k.o
+obj-$(CONFIG_CPU_RM7000) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_RM9000) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o
+obj-$(CONFIG_CPU_TX39XX) += c-tx39.o tlb-r3k.o
+obj-$(CONFIG_CPU_TX49XX) += c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_VR41XX) += c-r4k.o cex-gen.o tlb-r4k.o
obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o
obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 77aefb4ebedd..643c8bcffff3 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -14,6 +14,7 @@
#include <linux/linkage.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/module.h>
#include <linux/bitops.h>
#include <asm/bcache.h>
@@ -53,6 +54,12 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info,
preempt_enable();
}
+#if defined(CONFIG_MIPS_CMP)
+#define cpu_has_safe_index_cacheops 0
+#else
+#define cpu_has_safe_index_cacheops 1
+#endif
+
/*
* Must die.
*/
@@ -481,6 +488,8 @@ static inline void local_r4k_flush_cache_page(void *args)
if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) {
r4k_blast_dcache_page(addr);
+ if (exec && !cpu_icache_snoops_remote_store)
+ r4k_blast_scache_page(addr);
}
if (exec) {
if (vaddr && cpu_has_vtag_icache && mm == current->active_mm) {
@@ -583,7 +592,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
* subset property so we have to flush the primary caches
* explicitly
*/
- if (size >= dcache_size) {
+ if (cpu_has_safe_index_cacheops && size >= dcache_size) {
r4k_blast_dcache();
} else {
R4600_HIT_CACHEOP_WAR_IMPL;
@@ -606,7 +615,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
return;
}
- if (size >= dcache_size) {
+ if (cpu_has_safe_index_cacheops && size >= dcache_size) {
r4k_blast_dcache();
} else {
R4600_HIT_CACHEOP_WAR_IMPL;
@@ -968,6 +977,7 @@ static void __cpuinit probe_pcache(void)
case CPU_24K:
case CPU_34K:
case CPU_74K:
+ case CPU_1004K:
if ((read_c0_config7() & (1 << 16))) {
/* effectively physically indexed dcache,
thus no virtual aliases. */
@@ -1216,9 +1226,25 @@ void au1x00_fixup_config_od(void)
}
}
+static int __cpuinitdata cca = -1;
+
+static int __init cca_setup(char *str)
+{
+ get_option(&str, &cca);
+
+ return 1;
+}
+
+__setup("cca=", cca_setup);
+
static void __cpuinit coherency_setup(void)
{
- change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT);
+ if (cca < 0 || cca > 7)
+ cca = read_c0_config() & CONF_CM_CMASK;
+ _page_cachable_default = cca << _CACHE_SHIFT;
+
+ pr_debug("Using cache attribute %d\n", cca);
+ change_c0_config(CONF_CM_CMASK, cca);
/*
* c0_status.cu=0 specifies that updates by the sc instruction use
@@ -1248,6 +1274,20 @@ static void __cpuinit coherency_setup(void)
}
}
+#if defined(CONFIG_DMA_NONCOHERENT)
+
+static int __cpuinitdata coherentio;
+
+static int __init setcoherentio(char *str)
+{
+ coherentio = 1;
+
+ return 1;
+}
+
+__setup("coherentio", setcoherentio);
+#endif
+
void __cpuinit r4k_cache_init(void)
{
extern void build_clear_page(void);
@@ -1307,14 +1347,22 @@ void __cpuinit r4k_cache_init(void)
flush_data_cache_page = r4k_flush_data_cache_page;
flush_icache_range = r4k_flush_icache_range;
-#ifdef CONFIG_DMA_NONCOHERENT
- _dma_cache_wback_inv = r4k_dma_cache_wback_inv;
- _dma_cache_wback = r4k_dma_cache_wback_inv;
- _dma_cache_inv = r4k_dma_cache_inv;
+#if defined(CONFIG_DMA_NONCOHERENT)
+ if (coherentio) {
+ _dma_cache_wback_inv = (void *)cache_noop;
+ _dma_cache_wback = (void *)cache_noop;
+ _dma_cache_inv = (void *)cache_noop;
+ } else {
+ _dma_cache_wback_inv = r4k_dma_cache_wback_inv;
+ _dma_cache_wback = r4k_dma_cache_wback_inv;
+ _dma_cache_inv = r4k_dma_cache_inv;
+ }
#endif
build_clear_page();
build_copy_page();
+#if !defined(CONFIG_MIPS_CMP)
local_r4k___flush_cache_all(NULL);
+#endif
coherency_setup();
}
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index f5903679ee6a..034e8506f6ea 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -130,8 +130,28 @@ void __update_cache(struct vm_area_struct *vma, unsigned long address,
}
}
-static char cache_panic[] __cpuinitdata =
- "Yeee, unsupported cache architecture.";
+unsigned long _page_cachable_default;
+EXPORT_SYMBOL_GPL(_page_cachable_default);
+
+static inline void setup_protection_map(void)
+{
+ protection_map[0] = PAGE_NONE;
+ protection_map[1] = PAGE_READONLY;
+ protection_map[2] = PAGE_COPY;
+ protection_map[3] = PAGE_COPY;
+ protection_map[4] = PAGE_READONLY;
+ protection_map[5] = PAGE_READONLY;
+ protection_map[6] = PAGE_COPY;
+ protection_map[7] = PAGE_COPY;
+ protection_map[8] = PAGE_NONE;
+ protection_map[9] = PAGE_READONLY;
+ protection_map[10] = PAGE_SHARED;
+ protection_map[11] = PAGE_SHARED;
+ protection_map[12] = PAGE_READONLY;
+ protection_map[13] = PAGE_READONLY;
+ protection_map[14] = PAGE_SHARED;
+ protection_map[15] = PAGE_SHARED;
+}
void __devinit cpu_cache_init(void)
{
@@ -139,34 +159,29 @@ void __devinit cpu_cache_init(void)
extern void __weak r3k_cache_init(void);
r3k_cache_init();
- return;
}
if (cpu_has_6k_cache) {
extern void __weak r6k_cache_init(void);
r6k_cache_init();
- return;
}
if (cpu_has_4k_cache) {
extern void __weak r4k_cache_init(void);
r4k_cache_init();
- return;
}
if (cpu_has_8k_cache) {
extern void __weak r8k_cache_init(void);
r8k_cache_init();
- return;
}
if (cpu_has_tx39_cache) {
extern void __weak tx39_cache_init(void);
tx39_cache_init();
- return;
}
- panic(cache_panic);
+ setup_protection_map();
}
int __weak __uncached_access(struct file *file, unsigned long addr)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index c7aed133d11d..ecd562d2c348 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -142,7 +142,7 @@ void *kmap_coherent(struct page *page, unsigned long addr)
#endif
vaddr = __fix_to_virt(FIX_CMAP_END - idx);
pte = mk_pte(page, PAGE_KERNEL);
-#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1)
+#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
entrylo = pte.pte_high;
#else
entrylo = pte_val(pte) >> 6;
@@ -221,7 +221,7 @@ void copy_user_highpage(struct page *to, struct page *from,
copy_page(vto, vfrom);
kunmap_atomic(vfrom, KM_USER0);
}
- if (((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc) ||
+ if ((!cpu_has_ic_fills_f_dc) ||
pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
flush_data_cache_page((unsigned long)vto);
kunmap_atomic(vto, KM_USER1);
@@ -229,8 +229,6 @@ void copy_user_highpage(struct page *to, struct page *from,
smp_wmb();
}
-EXPORT_SYMBOL(copy_user_highpage);
-
void copy_to_user_page(struct vm_area_struct *vma,
struct page *page, unsigned long vaddr, void *dst, const void *src,
unsigned long len)
@@ -249,8 +247,6 @@ void copy_to_user_page(struct vm_area_struct *vma,
flush_cache_page(vma, vaddr, page_to_pfn(page));
}
-EXPORT_SYMBOL(copy_to_user_page);
-
void copy_from_user_page(struct vm_area_struct *vma,
struct page *page, unsigned long vaddr, void *dst, const void *src,
unsigned long len)
@@ -267,9 +263,6 @@ void copy_from_user_page(struct vm_area_struct *vma,
}
}
-EXPORT_SYMBOL(copy_from_user_page);
-
-
#ifdef CONFIG_HIGHMEM
unsigned long highstart_pfn, highend_pfn;
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
new file mode 100644
index 000000000000..d827d6144369
--- /dev/null
+++ b/arch/mips/mm/page.c
@@ -0,0 +1,684 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2007 Maciej W. Rozycki
+ * Copyright (C) 2008 Thiemo Seufer
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+
+#include <asm/bugs.h>
+#include <asm/cacheops.h>
+#include <asm/inst.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/prefetch.h>
+#include <asm/system.h>
+#include <asm/bootinfo.h>
+#include <asm/mipsregs.h>
+#include <asm/mmu_context.h>
+#include <asm/cpu.h>
+#include <asm/war.h>
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+#include <asm/sibyte/sb1250.h>
+#include <asm/sibyte/sb1250_regs.h>
+#include <asm/sibyte/sb1250_dma.h>
+#endif
+
+#include "uasm.h"
+
+/* Registers used in the assembled routines. */
+#define ZERO 0
+#define AT 2
+#define A0 4
+#define A1 5
+#define A2 6
+#define T0 8
+#define T1 9
+#define T2 10
+#define T3 11
+#define T9 25
+#define RA 31
+
+/* Handle labels (which must be positive integers). */
+enum label_id {
+ label_clear_nopref = 1,
+ label_clear_pref,
+ label_copy_nopref,
+ label_copy_pref_both,
+ label_copy_pref_store,
+};
+
+UASM_L_LA(_clear_nopref)
+UASM_L_LA(_clear_pref)
+UASM_L_LA(_copy_nopref)
+UASM_L_LA(_copy_pref_both)
+UASM_L_LA(_copy_pref_store)
+
+/* We need one branch and therefore one relocation per target label. */
+static struct uasm_label __cpuinitdata labels[5];
+static struct uasm_reloc __cpuinitdata relocs[5];
+
+#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010)
+#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020)
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache: 0x058 bytes
+ * R4600 v1.7: 0x05c bytes
+ * R4600 v2.0: 0x060 bytes
+ * With prefetching, 16 word strides 0x120 bytes
+ */
+
+static u32 clear_page_array[0x120 / 4];
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+void clear_page_cpu(void *page) __attribute__((alias("clear_page_array")));
+#else
+void clear_page(void *page) __attribute__((alias("clear_page_array")));
+#endif
+
+EXPORT_SYMBOL(clear_page);
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache: 0x11c bytes
+ * R4600 v1.7: 0x080 bytes
+ * R4600 v2.0: 0x07c bytes
+ * With prefetching, 16 word strides 0x540 bytes
+ */
+static u32 copy_page_array[0x540 / 4];
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+void
+copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array")));
+#else
+void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
+#endif
+
+EXPORT_SYMBOL(copy_page);
+
+
+static int pref_bias_clear_store __cpuinitdata;
+static int pref_bias_copy_load __cpuinitdata;
+static int pref_bias_copy_store __cpuinitdata;
+
+static u32 pref_src_mode __cpuinitdata;
+static u32 pref_dst_mode __cpuinitdata;
+
+static int clear_word_size __cpuinitdata;
+static int copy_word_size __cpuinitdata;
+
+static int half_clear_loop_size __cpuinitdata;
+static int half_copy_loop_size __cpuinitdata;
+
+static int cache_line_size __cpuinitdata;
+#define cache_line_mask() (cache_line_size - 1)
+
+static inline void __cpuinit
+pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
+{
+ if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
+ if (off > 0x7fff) {
+ uasm_i_lui(buf, T9, uasm_rel_hi(off));
+ uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+ } else
+ uasm_i_addiu(buf, T9, ZERO, off);
+ uasm_i_daddu(buf, reg1, reg2, T9);
+ } else {
+ if (off > 0x7fff) {
+ uasm_i_lui(buf, T9, uasm_rel_hi(off));
+ uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+ UASM_i_ADDU(buf, reg1, reg2, T9);
+ } else
+ UASM_i_ADDIU(buf, reg1, reg2, off);
+ }
+}
+
+static void __cpuinit set_prefetch_parameters(void)
+{
+ if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
+ clear_word_size = 8;
+ else
+ clear_word_size = 4;
+
+ if (cpu_has_64bit_gp_regs)
+ copy_word_size = 8;
+ else
+ copy_word_size = 4;
+
+ /*
+ * The pref's used here are using "streaming" hints, which cause the
+ * copied data to be kicked out of the cache sooner. A page copy often
+ * ends up copying a lot more data than is commonly used, so this seems
+ * to make sense in terms of reducing cache pollution, but I've no real
+ * performance data to back this up.
+ */
+ if (cpu_has_prefetch) {
+ /*
+ * XXX: Most prefetch bias values in here are based on
+ * guesswork.
+ */
+ cache_line_size = cpu_dcache_line_size();
+ switch (current_cpu_type()) {
+ case CPU_TX49XX:
+ /* TX49 supports only Pref_Load */
+ pref_bias_copy_load = 256;
+ break;
+
+ case CPU_RM9000:
+ /*
+ * As a workaround for erratum G105 which make the
+ * PrepareForStore hint unusable we fall back to
+ * StoreRetained on the RM9000. Once it is known which
+ * versions of the RM9000 we'll be able to condition-
+ * alize this.
+ */
+
+ case CPU_R10000:
+ case CPU_R12000:
+ case CPU_R14000:
+ /*
+ * Those values have been experimentally tuned for an
+ * Origin 200.
+ */
+ pref_bias_clear_store = 512;
+ pref_bias_copy_load = 256;
+ pref_bias_copy_store = 256;
+ pref_src_mode = Pref_LoadStreamed;
+ pref_dst_mode = Pref_StoreStreamed;
+ break;
+
+ case CPU_SB1:
+ case CPU_SB1A:
+ pref_bias_clear_store = 128;
+ pref_bias_copy_load = 128;
+ pref_bias_copy_store = 128;
+ /*
+ * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
+ * hints are broken.
+ */
+ if (current_cpu_type() == CPU_SB1 &&
+ (current_cpu_data.processor_id & 0xff) < 0x02) {
+ pref_src_mode = Pref_Load;
+ pref_dst_mode = Pref_Store;
+ } else {
+ pref_src_mode = Pref_LoadStreamed;
+ pref_dst_mode = Pref_StoreStreamed;
+ }
+ break;
+
+ default:
+ pref_bias_clear_store = 128;
+ pref_bias_copy_load = 256;
+ pref_bias_copy_store = 128;
+ pref_src_mode = Pref_LoadStreamed;
+ pref_dst_mode = Pref_PrepareForStore;
+ break;
+ }
+ } else {
+ if (cpu_has_cache_cdex_s)
+ cache_line_size = cpu_scache_line_size();
+ else if (cpu_has_cache_cdex_p)
+ cache_line_size = cpu_dcache_line_size();
+ }
+ /*
+ * Too much unrolling will overflow the available space in
+ * clear_space_array / copy_page_array. 8 words sounds generous,
+ * but a R4000 with 128 byte L2 line length can exceed even that.
+ */
+ half_clear_loop_size = min(8 * clear_word_size,
+ max(cache_line_size >> 1,
+ 4 * clear_word_size));
+ half_copy_loop_size = min(8 * copy_word_size,
+ max(cache_line_size >> 1,
+ 4 * copy_word_size));
+}
+
+static void __cpuinit build_clear_store(u32 **buf, int off)
+{
+ if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
+ uasm_i_sd(buf, ZERO, off, A0);
+ } else {
+ uasm_i_sw(buf, ZERO, off, A0);
+ }
+}
+
+static inline void __cpuinit build_clear_pref(u32 **buf, int off)
+{
+ if (off & cache_line_mask())
+ return;
+
+ if (pref_bias_clear_store) {
+ uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
+ A0);
+ } else if (cpu_has_cache_cdex_s) {
+ uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+ } else if (cpu_has_cache_cdex_p) {
+ if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ }
+
+ if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ uasm_i_lw(buf, ZERO, ZERO, AT);
+
+ uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+ }
+}
+
+void __cpuinit build_clear_page(void)
+{
+ int off;
+ u32 *buf = (u32 *)&clear_page_array;
+ struct uasm_label *l = labels;
+ struct uasm_reloc *r = relocs;
+ int i;
+
+ memset(labels, 0, sizeof(labels));
+ memset(relocs, 0, sizeof(relocs));
+
+ set_prefetch_parameters();
+
+ /*
+ * This algorithm makes the following assumptions:
+ * - The prefetch bias is a multiple of 2 words.
+ * - The prefetch bias is less than one page.
+ */
+ BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
+ BUG_ON(PAGE_SIZE < pref_bias_clear_store);
+
+ off = PAGE_SIZE - pref_bias_clear_store;
+ if (off > 0xffff || !pref_bias_clear_store)
+ pg_addiu(&buf, A2, A0, off);
+ else
+ uasm_i_ori(&buf, A2, A0, off);
+
+ if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ uasm_i_lui(&buf, AT, 0xa000);
+
+ off = min(8, pref_bias_clear_store / cache_line_size) *
+ cache_line_size;
+ while (off) {
+ build_clear_pref(&buf, -off);
+ off -= cache_line_size;
+ }
+ uasm_l_clear_pref(&l, buf);
+ do {
+ build_clear_pref(&buf, off);
+ build_clear_store(&buf, off);
+ off += clear_word_size;
+ } while (off < half_clear_loop_size);
+ pg_addiu(&buf, A0, A0, 2 * off);
+ off = -off;
+ do {
+ build_clear_pref(&buf, off);
+ if (off == -clear_word_size)
+ uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
+ build_clear_store(&buf, off);
+ off += clear_word_size;
+ } while (off < 0);
+
+ if (pref_bias_clear_store) {
+ pg_addiu(&buf, A2, A0, pref_bias_clear_store);
+ uasm_l_clear_nopref(&l, buf);
+ off = 0;
+ do {
+ build_clear_store(&buf, off);
+ off += clear_word_size;
+ } while (off < half_clear_loop_size);
+ pg_addiu(&buf, A0, A0, 2 * off);
+ off = -off;
+ do {
+ if (off == -clear_word_size)
+ uasm_il_bne(&buf, &r, A0, A2,
+ label_clear_nopref);
+ build_clear_store(&buf, off);
+ off += clear_word_size;
+ } while (off < 0);
+ }
+
+ uasm_i_jr(&buf, RA);
+ uasm_i_nop(&buf);
+
+ BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array));
+
+ uasm_resolve_relocs(relocs, labels);
+
+ pr_debug("Synthesized clear page handler (%u instructions).\n",
+ (u32)(buf - clear_page_array));
+
+ pr_debug("\t.set push\n");
+ pr_debug("\t.set noreorder\n");
+ for (i = 0; i < (buf - clear_page_array); i++)
+ pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
+ pr_debug("\t.set pop\n");
+}
+
+static void __cpuinit build_copy_load(u32 **buf, int reg, int off)
+{
+ if (cpu_has_64bit_gp_regs) {
+ uasm_i_ld(buf, reg, off, A1);
+ } else {
+ uasm_i_lw(buf, reg, off, A1);
+ }
+}
+
+static void __cpuinit build_copy_store(u32 **buf, int reg, int off)
+{
+ if (cpu_has_64bit_gp_regs) {
+ uasm_i_sd(buf, reg, off, A0);
+ } else {
+ uasm_i_sw(buf, reg, off, A0);
+ }
+}
+
+static inline void build_copy_load_pref(u32 **buf, int off)
+{
+ if (off & cache_line_mask())
+ return;
+
+ if (pref_bias_copy_load)
+ uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
+}
+
+static inline void build_copy_store_pref(u32 **buf, int off)
+{
+ if (off & cache_line_mask())
+ return;
+
+ if (pref_bias_copy_store) {
+ uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
+ A0);
+ } else if (cpu_has_cache_cdex_s) {
+ uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+ } else if (cpu_has_cache_cdex_p) {
+ if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ uasm_i_nop(buf);
+ }
+
+ if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ uasm_i_lw(buf, ZERO, ZERO, AT);
+
+ uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+ }
+}
+
+void __cpuinit build_copy_page(void)
+{
+ int off;
+ u32 *buf = (u32 *)&copy_page_array;
+ struct uasm_label *l = labels;
+ struct uasm_reloc *r = relocs;
+ int i;
+
+ memset(labels, 0, sizeof(labels));
+ memset(relocs, 0, sizeof(relocs));
+
+ set_prefetch_parameters();
+
+ /*
+ * This algorithm makes the following assumptions:
+ * - All prefetch biases are multiples of 8 words.
+ * - The prefetch biases are less than one page.
+ * - The store prefetch bias isn't greater than the load
+ * prefetch bias.
+ */
+ BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
+ BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
+ BUG_ON(PAGE_SIZE < pref_bias_copy_load);
+ BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
+
+ off = PAGE_SIZE - pref_bias_copy_load;
+ if (off > 0xffff || !pref_bias_copy_load)
+ pg_addiu(&buf, A2, A0, off);
+ else
+ uasm_i_ori(&buf, A2, A0, off);
+
+ if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+ uasm_i_lui(&buf, AT, 0xa000);
+
+ off = min(8, pref_bias_copy_load / cache_line_size) * cache_line_size;
+ while (off) {
+ build_copy_load_pref(&buf, -off);
+ off -= cache_line_size;
+ }
+ off = min(8, pref_bias_copy_store / cache_line_size) * cache_line_size;
+ while (off) {
+ build_copy_store_pref(&buf, -off);
+ off -= cache_line_size;
+ }
+ uasm_l_copy_pref_both(&l, buf);
+ do {
+ build_copy_load_pref(&buf, off);
+ build_copy_load(&buf, T0, off);
+ build_copy_load_pref(&buf, off + copy_word_size);
+ build_copy_load(&buf, T1, off + copy_word_size);
+ build_copy_load_pref(&buf, off + 2 * copy_word_size);
+ build_copy_load(&buf, T2, off + 2 * copy_word_size);
+ build_copy_load_pref(&buf, off + 3 * copy_word_size);
+ build_copy_load(&buf, T3, off + 3 * copy_word_size);
+ build_copy_store_pref(&buf, off);
+ build_copy_store(&buf, T0, off);
+ build_copy_store_pref(&buf, off + copy_word_size);
+ build_copy_store(&buf, T1, off + copy_word_size);
+ build_copy_store_pref(&buf, off + 2 * copy_word_size);
+ build_copy_store(&buf, T2, off + 2 * copy_word_size);
+ build_copy_store_pref(&buf, off + 3 * copy_word_size);
+ build_copy_store(&buf, T3, off + 3 * copy_word_size);
+ off += 4 * copy_word_size;
+ } while (off < half_copy_loop_size);
+ pg_addiu(&buf, A1, A1, 2 * off);
+ pg_addiu(&buf, A0, A0, 2 * off);
+ off = -off;
+ do {
+ build_copy_load_pref(&buf, off);
+ build_copy_load(&buf, T0, off);
+ build_copy_load_pref(&buf, off + copy_word_size);
+ build_copy_load(&buf, T1, off + copy_word_size);
+ build_copy_load_pref(&buf, off + 2 * copy_word_size);
+ build_copy_load(&buf, T2, off + 2 * copy_word_size);
+ build_copy_load_pref(&buf, off + 3 * copy_word_size);
+ build_copy_load(&buf, T3, off + 3 * copy_word_size);
+ build_copy_store_pref(&buf, off);
+ build_copy_store(&buf, T0, off);
+ build_copy_store_pref(&buf, off + copy_word_size);
+ build_copy_store(&buf, T1, off + copy_word_size);
+ build_copy_store_pref(&buf, off + 2 * copy_word_size);
+ build_copy_store(&buf, T2, off + 2 * copy_word_size);
+ build_copy_store_pref(&buf, off + 3 * copy_word_size);
+ if (off == -(4 * copy_word_size))
+ uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
+ build_copy_store(&buf, T3, off + 3 * copy_word_size);
+ off += 4 * copy_word_size;
+ } while (off < 0);
+
+ if (pref_bias_copy_load - pref_bias_copy_store) {
+ pg_addiu(&buf, A2, A0,
+ pref_bias_copy_load - pref_bias_copy_store);
+ uasm_l_copy_pref_store(&l, buf);
+ off = 0;
+ do {
+ build_copy_load(&buf, T0, off);
+ build_copy_load(&buf, T1, off + copy_word_size);
+ build_copy_load(&buf, T2, off + 2 * copy_word_size);
+ build_copy_load(&buf, T3, off + 3 * copy_word_size);
+ build_copy_store_pref(&buf, off);
+ build_copy_store(&buf, T0, off);
+ build_copy_store_pref(&buf, off + copy_word_size);
+ build_copy_store(&buf, T1, off + copy_word_size);
+ build_copy_store_pref(&buf, off + 2 * copy_word_size);
+ build_copy_store(&buf, T2, off + 2 * copy_word_size);
+ build_copy_store_pref(&buf, off + 3 * copy_word_size);
+ build_copy_store(&buf, T3, off + 3 * copy_word_size);
+ off += 4 * copy_word_size;
+ } while (off < half_copy_loop_size);
+ pg_addiu(&buf, A1, A1, 2 * off);
+ pg_addiu(&buf, A0, A0, 2 * off);
+ off = -off;
+ do {
+ build_copy_load(&buf, T0, off);
+ build_copy_load(&buf, T1, off + copy_word_size);
+ build_copy_load(&buf, T2, off + 2 * copy_word_size);
+ build_copy_load(&buf, T3, off + 3 * copy_word_size);
+ build_copy_store_pref(&buf, off);
+ build_copy_store(&buf, T0, off);
+ build_copy_store_pref(&buf, off + copy_word_size);
+ build_copy_store(&buf, T1, off + copy_word_size);
+ build_copy_store_pref(&buf, off + 2 * copy_word_size);
+ build_copy_store(&buf, T2, off + 2 * copy_word_size);
+ build_copy_store_pref(&buf, off + 3 * copy_word_size);
+ if (off == -(4 * copy_word_size))
+ uasm_il_bne(&buf, &r, A2, A0,
+ label_copy_pref_store);
+ build_copy_store(&buf, T3, off + 3 * copy_word_size);
+ off += 4 * copy_word_size;
+ } while (off < 0);
+ }
+
+ if (pref_bias_copy_store) {
+ pg_addiu(&buf, A2, A0, pref_bias_copy_store);
+ uasm_l_copy_nopref(&l, buf);
+ off = 0;
+ do {
+ build_copy_load(&buf, T0, off);
+ build_copy_load(&buf, T1, off + copy_word_size);
+ build_copy_load(&buf, T2, off + 2 * copy_word_size);
+ build_copy_load(&buf, T3, off + 3 * copy_word_size);
+ build_copy_store(&buf, T0, off);
+ build_copy_store(&buf, T1, off + copy_word_size);
+ build_copy_store(&buf, T2, off + 2 * copy_word_size);
+ build_copy_store(&buf, T3, off + 3 * copy_word_size);
+ off += 4 * copy_word_size;
+ } while (off < half_copy_loop_size);
+ pg_addiu(&buf, A1, A1, 2 * off);
+ pg_addiu(&buf, A0, A0, 2 * off);
+ off = -off;
+ do {
+ build_copy_load(&buf, T0, off);
+ build_copy_load(&buf, T1, off + copy_word_size);
+ build_copy_load(&buf, T2, off + 2 * copy_word_size);
+ build_copy_load(&buf, T3, off + 3 * copy_word_size);
+ build_copy_store(&buf, T0, off);
+ build_copy_store(&buf, T1, off + copy_word_size);
+ build_copy_store(&buf, T2, off + 2 * copy_word_size);
+ if (off == -(4 * copy_word_size))
+ uasm_il_bne(&buf, &r, A2, A0,
+ label_copy_nopref);
+ build_copy_store(&buf, T3, off + 3 * copy_word_size);
+ off += 4 * copy_word_size;
+ } while (off < 0);
+ }
+
+ uasm_i_jr(&buf, RA);
+ uasm_i_nop(&buf);
+
+ BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array));
+
+ uasm_resolve_relocs(relocs, labels);
+
+ pr_debug("Synthesized copy page handler (%u instructions).\n",
+ (u32)(buf - copy_page_array));
+
+ pr_debug("\t.set push\n");
+ pr_debug("\t.set noreorder\n");
+ for (i = 0; i < (buf - copy_page_array); i++)
+ pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
+ pr_debug("\t.set pop\n");
+}
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+
+/*
+ * Pad descriptors to cacheline, since each is exclusively owned by a
+ * particular CPU.
+ */
+struct dmadscr {
+ u64 dscr_a;
+ u64 dscr_b;
+ u64 pad_a;
+ u64 pad_b;
+} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
+
+void sb1_dma_init(void)
+{
+ int i;
+
+ for (i = 0; i < DM_NUM_CHANNELS; i++) {
+ const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) |
+ V_DM_DSCR_BASE_RINGSZ(1);
+ void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
+
+ __raw_writeq(base_val, base_reg);
+ __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
+ __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
+ }
+}
+
+void clear_page(void *page)
+{
+ u64 to_phys = CPHYSADDR((unsigned long)page);
+ unsigned int cpu = smp_processor_id();
+
+ /* if the page is not in KSEG0, use old way */
+ if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
+ return clear_page_cpu(page);
+
+ page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
+ M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
+ page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
+ __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
+
+ /*
+ * Don't really want to do it this way, but there's no
+ * reliable way to delay completion detection.
+ */
+ while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
+ & M_DM_DSCR_BASE_INTERRUPT))
+ ;
+ __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+
+void copy_page(void *to, void *from)
+{
+ u64 from_phys = CPHYSADDR((unsigned long)from);
+ u64 to_phys = CPHYSADDR((unsigned long)to);
+ unsigned int cpu = smp_processor_id();
+
+ /* if any page is not in KSEG0, use old way */
+ if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
+ || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
+ return copy_page_cpu(to, from);
+
+ page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
+ M_DM_DSCRA_INTERRUPT;
+ page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
+ __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
+
+ /*
+ * Don't really want to do it this way, but there's no
+ * reliable way to delay completion detection.
+ */
+ while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
+ & M_DM_DSCR_BASE_INTERRUPT))
+ ;
+ __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+
+#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */
diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c
deleted file mode 100644
index 455dedb5b39e..000000000000
--- a/arch/mips/mm/pg-r4k.c
+++ /dev/null
@@ -1,534 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
- * Copyright (C) 2007 Maciej W. Rozycki
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-
-#include <asm/bugs.h>
-#include <asm/cacheops.h>
-#include <asm/inst.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/prefetch.h>
-#include <asm/system.h>
-#include <asm/bootinfo.h>
-#include <asm/mipsregs.h>
-#include <asm/mmu_context.h>
-#include <asm/cpu.h>
-#include <asm/war.h>
-
-#define half_scache_line_size() (cpu_scache_line_size() >> 1)
-#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010)
-#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020)
-
-
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache: 0x58 bytes
- * R4600 v1.7: 0x5c bytes
- * R4600 v2.0: 0x60 bytes
- * With prefetching, 16 byte strides 0xa0 bytes
- */
-
-static unsigned int clear_page_array[0x130 / 4];
-
-void clear_page(void * page) __attribute__((alias("clear_page_array")));
-
-EXPORT_SYMBOL(clear_page);
-
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache: 0x11c bytes
- * R4600 v1.7: 0x080 bytes
- * R4600 v2.0: 0x07c bytes
- * With prefetching, 16 byte strides 0x0b8 bytes
- */
-static unsigned int copy_page_array[0x148 / 4];
-
-void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
-
-EXPORT_SYMBOL(copy_page);
-
-/*
- * This is suboptimal for 32-bit kernels; we assume that R10000 is only used
- * with 64-bit kernels. The prefetch offsets have been experimentally tuned
- * an Origin 200.
- */
-static int pref_offset_clear __cpuinitdata = 512;
-static int pref_offset_copy __cpuinitdata = 256;
-
-static unsigned int pref_src_mode __cpuinitdata;
-static unsigned int pref_dst_mode __cpuinitdata;
-
-static int load_offset __cpuinitdata;
-static int store_offset __cpuinitdata;
-
-static unsigned int __cpuinitdata *dest, *epc;
-
-static unsigned int instruction_pending;
-static union mips_instruction delayed_mi;
-
-static void __cpuinit emit_instruction(union mips_instruction mi)
-{
- if (instruction_pending)
- *epc++ = delayed_mi.word;
-
- instruction_pending = 1;
- delayed_mi = mi;
-}
-
-static inline void flush_delay_slot_or_nop(void)
-{
- if (instruction_pending) {
- *epc++ = delayed_mi.word;
- instruction_pending = 0;
- return;
- }
-
- *epc++ = 0;
-}
-
-static inline unsigned int *label(void)
-{
- if (instruction_pending) {
- *epc++ = delayed_mi.word;
- instruction_pending = 0;
- }
-
- return epc;
-}
-
-static inline void build_insn_word(unsigned int word)
-{
- union mips_instruction mi;
-
- mi.word = word;
-
- emit_instruction(mi);
-}
-
-static inline void build_nop(void)
-{
- build_insn_word(0); /* nop */
-}
-
-static inline void build_src_pref(int advance)
-{
- if (!(load_offset & (cpu_dcache_line_size() - 1)) && advance) {
- union mips_instruction mi;
-
- mi.i_format.opcode = pref_op;
- mi.i_format.rs = 5; /* $a1 */
- mi.i_format.rt = pref_src_mode;
- mi.i_format.simmediate = load_offset + advance;
-
- emit_instruction(mi);
- }
-}
-
-static inline void __build_load_reg(int reg)
-{
- union mips_instruction mi;
- unsigned int width;
-
- if (cpu_has_64bit_gp_regs) {
- mi.i_format.opcode = ld_op;
- width = 8;
- } else {
- mi.i_format.opcode = lw_op;
- width = 4;
- }
- mi.i_format.rs = 5; /* $a1 */
- mi.i_format.rt = reg; /* $reg */
- mi.i_format.simmediate = load_offset;
-
- load_offset += width;
- emit_instruction(mi);
-}
-
-static inline void build_load_reg(int reg)
-{
- if (cpu_has_prefetch)
- build_src_pref(pref_offset_copy);
-
- __build_load_reg(reg);
-}
-
-static inline void build_dst_pref(int advance)
-{
- if (!(store_offset & (cpu_dcache_line_size() - 1)) && advance) {
- union mips_instruction mi;
-
- mi.i_format.opcode = pref_op;
- mi.i_format.rs = 4; /* $a0 */
- mi.i_format.rt = pref_dst_mode;
- mi.i_format.simmediate = store_offset + advance;
-
- emit_instruction(mi);
- }
-}
-
-static inline void build_cdex_s(void)
-{
- union mips_instruction mi;
-
- if ((store_offset & (cpu_scache_line_size() - 1)))
- return;
-
- mi.c_format.opcode = cache_op;
- mi.c_format.rs = 4; /* $a0 */
- mi.c_format.c_op = 3; /* Create Dirty Exclusive */
- mi.c_format.cache = 3; /* Secondary Data Cache */
- mi.c_format.simmediate = store_offset;
-
- emit_instruction(mi);
-}
-
-static inline void build_cdex_p(void)
-{
- union mips_instruction mi;
-
- if (store_offset & (cpu_dcache_line_size() - 1))
- return;
-
- if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
- build_nop();
- build_nop();
- build_nop();
- build_nop();
- }
-
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
- build_insn_word(0x8c200000); /* lw $zero, ($at) */
-
- mi.c_format.opcode = cache_op;
- mi.c_format.rs = 4; /* $a0 */
- mi.c_format.c_op = 3; /* Create Dirty Exclusive */
- mi.c_format.cache = 1; /* Data Cache */
- mi.c_format.simmediate = store_offset;
-
- emit_instruction(mi);
-}
-
-static void __cpuinit __build_store_reg(int reg)
-{
- union mips_instruction mi;
- unsigned int width;
-
- if (cpu_has_64bit_gp_regs ||
- (cpu_has_64bit_zero_reg && reg == 0)) {
- mi.i_format.opcode = sd_op;
- width = 8;
- } else {
- mi.i_format.opcode = sw_op;
- width = 4;
- }
- mi.i_format.rs = 4; /* $a0 */
- mi.i_format.rt = reg; /* $reg */
- mi.i_format.simmediate = store_offset;
-
- store_offset += width;
- emit_instruction(mi);
-}
-
-static inline void build_store_reg(int reg)
-{
- int pref_off = cpu_has_prefetch ?
- (reg ? pref_offset_copy : pref_offset_clear) : 0;
- if (pref_off)
- build_dst_pref(pref_off);
- else if (cpu_has_cache_cdex_s)
- build_cdex_s();
- else if (cpu_has_cache_cdex_p)
- build_cdex_p();
-
- __build_store_reg(reg);
-}
-
-static inline void build_addiu_rt_rs(unsigned int rt, unsigned int rs,
- unsigned long offset)
-{
- union mips_instruction mi;
-
- BUG_ON(offset > 0x7fff);
-
- if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
- mi.i_format.opcode = addiu_op;
- mi.i_format.rs = 0; /* $zero */
- mi.i_format.rt = 25; /* $t9 */
- mi.i_format.simmediate = offset;
- emit_instruction(mi);
-
- mi.r_format.opcode = spec_op;
- mi.r_format.rs = rs;
- mi.r_format.rt = 25; /* $t9 */
- mi.r_format.rd = rt;
- mi.r_format.re = 0;
- mi.r_format.func = daddu_op;
- } else {
- mi.i_format.opcode = cpu_has_64bit_gp_regs ?
- daddiu_op : addiu_op;
- mi.i_format.rs = rs;
- mi.i_format.rt = rt;
- mi.i_format.simmediate = offset;
- }
- emit_instruction(mi);
-}
-
-static inline void build_addiu_a2_a0(unsigned long offset)
-{
- build_addiu_rt_rs(6, 4, offset); /* $a2, $a0, offset */
-}
-
-static inline void build_addiu_a2(unsigned long offset)
-{
- build_addiu_rt_rs(6, 6, offset); /* $a2, $a2, offset */
-}
-
-static inline void build_addiu_a1(unsigned long offset)
-{
- build_addiu_rt_rs(5, 5, offset); /* $a1, $a1, offset */
-
- load_offset -= offset;
-}
-
-static inline void build_addiu_a0(unsigned long offset)
-{
- build_addiu_rt_rs(4, 4, offset); /* $a0, $a0, offset */
-
- store_offset -= offset;
-}
-
-static inline void build_bne(unsigned int *dest)
-{
- union mips_instruction mi;
-
- mi.i_format.opcode = bne_op;
- mi.i_format.rs = 6; /* $a2 */
- mi.i_format.rt = 4; /* $a0 */
- mi.i_format.simmediate = dest - epc - 1;
-
- *epc++ = mi.word;
- flush_delay_slot_or_nop();
-}
-
-static inline void build_jr_ra(void)
-{
- union mips_instruction mi;
-
- mi.r_format.opcode = spec_op;
- mi.r_format.rs = 31;
- mi.r_format.rt = 0;
- mi.r_format.rd = 0;
- mi.r_format.re = 0;
- mi.r_format.func = jr_op;
-
- *epc++ = mi.word;
- flush_delay_slot_or_nop();
-}
-
-void __cpuinit build_clear_page(void)
-{
- unsigned int loop_start;
- unsigned long off;
- int i;
-
- epc = (unsigned int *) &clear_page_array;
- instruction_pending = 0;
- store_offset = 0;
-
- if (cpu_has_prefetch) {
- switch (current_cpu_type()) {
- case CPU_TX49XX:
- /* TX49 supports only Pref_Load */
- pref_offset_clear = 0;
- pref_offset_copy = 0;
- break;
-
- case CPU_RM9000:
- /*
- * As a workaround for erratum G105 which make the
- * PrepareForStore hint unusable we fall back to
- * StoreRetained on the RM9000. Once it is known which
- * versions of the RM9000 we'll be able to condition-
- * alize this.
- */
-
- case CPU_R10000:
- case CPU_R12000:
- case CPU_R14000:
- pref_src_mode = Pref_LoadStreamed;
- pref_dst_mode = Pref_StoreStreamed;
- break;
-
- default:
- pref_src_mode = Pref_LoadStreamed;
- pref_dst_mode = Pref_PrepareForStore;
- break;
- }
- }
-
- off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0);
- if (off > 0x7fff) {
- build_addiu_a2_a0(off >> 1);
- build_addiu_a2(off >> 1);
- } else
- build_addiu_a2_a0(off);
-
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
- build_insn_word(0x3c01a000); /* lui $at, 0xa000 */
-
-dest = label();
- do {
- build_store_reg(0);
- build_store_reg(0);
- build_store_reg(0);
- build_store_reg(0);
- } while (store_offset < half_scache_line_size());
- build_addiu_a0(2 * store_offset);
- loop_start = store_offset;
- do {
- build_store_reg(0);
- build_store_reg(0);
- build_store_reg(0);
- build_store_reg(0);
- } while ((store_offset - loop_start) < half_scache_line_size());
- build_bne(dest);
-
- if (cpu_has_prefetch && pref_offset_clear) {
- build_addiu_a2_a0(pref_offset_clear);
- dest = label();
- loop_start = store_offset;
- do {
- __build_store_reg(0);
- __build_store_reg(0);
- __build_store_reg(0);
- __build_store_reg(0);
- } while ((store_offset - loop_start) < half_scache_line_size());
- build_addiu_a0(2 * store_offset);
- loop_start = store_offset;
- do {
- __build_store_reg(0);
- __build_store_reg(0);
- __build_store_reg(0);
- __build_store_reg(0);
- } while ((store_offset - loop_start) < half_scache_line_size());
- build_bne(dest);
- }
-
- build_jr_ra();
-
- BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array));
-
- pr_info("Synthesized clear page handler (%u instructions).\n",
- (unsigned int)(epc - clear_page_array));
-
- pr_debug("\t.set push\n");
- pr_debug("\t.set noreorder\n");
- for (i = 0; i < (epc - clear_page_array); i++)
- pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
- pr_debug("\t.set pop\n");
-}
-
-void __cpuinit build_copy_page(void)
-{
- unsigned int loop_start;
- unsigned long off;
- int i;
-
- epc = (unsigned int *) &copy_page_array;
- store_offset = load_offset = 0;
- instruction_pending = 0;
-
- off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0);
- if (off > 0x7fff) {
- build_addiu_a2_a0(off >> 1);
- build_addiu_a2(off >> 1);
- } else
- build_addiu_a2_a0(off);
-
- if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
- build_insn_word(0x3c01a000); /* lui $at, 0xa000 */
-
-dest = label();
- loop_start = store_offset;
- do {
- build_load_reg( 8);
- build_load_reg( 9);
- build_load_reg(10);
- build_load_reg(11);
- build_store_reg( 8);
- build_store_reg( 9);
- build_store_reg(10);
- build_store_reg(11);
- } while ((store_offset - loop_start) < half_scache_line_size());
- build_addiu_a0(2 * store_offset);
- build_addiu_a1(2 * load_offset);
- loop_start = store_offset;
- do {
- build_load_reg( 8);
- build_load_reg( 9);
- build_load_reg(10);
- build_load_reg(11);
- build_store_reg( 8);
- build_store_reg( 9);
- build_store_reg(10);
- build_store_reg(11);
- } while ((store_offset - loop_start) < half_scache_line_size());
- build_bne(dest);
-
- if (cpu_has_prefetch && pref_offset_copy) {
- build_addiu_a2_a0(pref_offset_copy);
- dest = label();
- loop_start = store_offset;
- do {
- __build_load_reg( 8);
- __build_load_reg( 9);
- __build_load_reg(10);
- __build_load_reg(11);
- __build_store_reg( 8);
- __build_store_reg( 9);
- __build_store_reg(10);
- __build_store_reg(11);
- } while ((store_offset - loop_start) < half_scache_line_size());
- build_addiu_a0(2 * store_offset);
- build_addiu_a1(2 * load_offset);
- loop_start = store_offset;
- do {
- __build_load_reg( 8);
- __build_load_reg( 9);
- __build_load_reg(10);
- __build_load_reg(11);
- __build_store_reg( 8);
- __build_store_reg( 9);
- __build_store_reg(10);
- __build_store_reg(11);
- } while ((store_offset - loop_start) < half_scache_line_size());
- build_bne(dest);
- }
-
- build_jr_ra();
-
- BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array));
-
- pr_info("Synthesized copy page handler (%u instructions).\n",
- (unsigned int)(epc - copy_page_array));
-
- pr_debug("\t.set push\n");
- pr_debug("\t.set noreorder\n");
- for (i = 0; i < (epc - copy_page_array); i++)
- pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
- pr_debug("\t.set pop\n");
-}
diff --git a/arch/mips/mm/pg-sb1.c b/arch/mips/mm/pg-sb1.c
deleted file mode 100644
index 49e289d05414..000000000000
--- a/arch/mips/mm/pg-sb1.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
- * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
- * Copyright (C) 2000 SiByte, Inc.
- * Copyright (C) 2005 Thiemo Seufer
- *
- * Written by Justin Carlson of SiByte, Inc.
- * and Kip Walker of Broadcom Corp.
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-
-#include <asm/io.h>
-#include <asm/sibyte/sb1250.h>
-#include <asm/sibyte/sb1250_regs.h>
-#include <asm/sibyte/sb1250_dma.h>
-
-#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
-#define SB1_PREF_LOAD_STREAMED_HINT "0"
-#define SB1_PREF_STORE_STREAMED_HINT "1"
-#else
-#define SB1_PREF_LOAD_STREAMED_HINT "4"
-#define SB1_PREF_STORE_STREAMED_HINT "5"
-#endif
-
-static inline void clear_page_cpu(void *page)
-{
- unsigned char *addr = (unsigned char *) page;
- unsigned char *end = addr + PAGE_SIZE;
-
- /*
- * JDCXXX - This should be bottlenecked by the write buffer, but these
- * things tend to be mildly unpredictable...should check this on the
- * performance model
- *
- * We prefetch 4 lines ahead. We're also "cheating" slightly here...
- * since we know we're on an SB1, we force the assembler to take
- * 64-bit operands to speed things up
- */
- __asm__ __volatile__(
- " .set push \n"
- " .set mips4 \n"
- " .set noreorder \n"
-#ifdef CONFIG_CPU_HAS_PREFETCH
- " daddiu %0, %0, 128 \n"
- " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%0) \n"
- /* Prefetch the first 4 lines */
- " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%0) \n"
- " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%0) \n"
- " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n"
- "1: sd $0, -128(%0) \n" /* Throw out a cacheline of 0's */
- " sd $0, -120(%0) \n"
- " sd $0, -112(%0) \n"
- " sd $0, -104(%0) \n"
- " daddiu %0, %0, 32 \n"
- " bnel %0, %1, 1b \n"
- " pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%0) \n"
- " daddiu %0, %0, -128 \n"
-#endif
- " sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */
- "1: sd $0, 8(%0) \n"
- " sd $0, 16(%0) \n"
- " sd $0, 24(%0) \n"
- " daddiu %0, %0, 32 \n"
- " bnel %0, %1, 1b \n"
- " sd $0, 0(%0) \n"
- " .set pop \n"
- : "+r" (addr)
- : "r" (end)
- : "memory");
-}
-
-static inline void copy_page_cpu(void *to, void *from)
-{
- unsigned char *src = (unsigned char *)from;
- unsigned char *dst = (unsigned char *)to;
- unsigned char *end = src + PAGE_SIZE;
-
- /*
- * The pref's used here are using "streaming" hints, which cause the
- * copied data to be kicked out of the cache sooner. A page copy often
- * ends up copying a lot more data than is commonly used, so this seems
- * to make sense in terms of reducing cache pollution, but I've no real
- * performance data to back this up
- */
- __asm__ __volatile__(
- " .set push \n"
- " .set mips4 \n"
- " .set noreorder \n"
-#ifdef CONFIG_CPU_HAS_PREFETCH
- " daddiu %0, %0, 128 \n"
- " daddiu %1, %1, 128 \n"
- " pref " SB1_PREF_LOAD_STREAMED_HINT ", -128(%0)\n"
- /* Prefetch the first 4 lines */
- " pref " SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n"
- " pref " SB1_PREF_LOAD_STREAMED_HINT ", -96(%0)\n"
- " pref " SB1_PREF_STORE_STREAMED_HINT ", -96(%1)\n"
- " pref " SB1_PREF_LOAD_STREAMED_HINT ", -64(%0)\n"
- " pref " SB1_PREF_STORE_STREAMED_HINT ", -64(%1)\n"
- " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n"
- "1: pref " SB1_PREF_STORE_STREAMED_HINT ", -32(%1)\n"
-# ifdef CONFIG_64BIT
- " ld $8, -128(%0) \n" /* Block copy a cacheline */
- " ld $9, -120(%0) \n"
- " ld $10, -112(%0) \n"
- " ld $11, -104(%0) \n"
- " sd $8, -128(%1) \n"
- " sd $9, -120(%1) \n"
- " sd $10, -112(%1) \n"
- " sd $11, -104(%1) \n"
-# else
- " lw $2, -128(%0) \n" /* Block copy a cacheline */
- " lw $3, -124(%0) \n"
- " lw $6, -120(%0) \n"
- " lw $7, -116(%0) \n"
- " lw $8, -112(%0) \n"
- " lw $9, -108(%0) \n"
- " lw $10, -104(%0) \n"
- " lw $11, -100(%0) \n"
- " sw $2, -128(%1) \n"
- " sw $3, -124(%1) \n"
- " sw $6, -120(%1) \n"
- " sw $7, -116(%1) \n"
- " sw $8, -112(%1) \n"
- " sw $9, -108(%1) \n"
- " sw $10, -104(%1) \n"
- " sw $11, -100(%1) \n"
-# endif
- " daddiu %0, %0, 32 \n"
- " daddiu %1, %1, 32 \n"
- " bnel %0, %2, 1b \n"
- " pref " SB1_PREF_LOAD_STREAMED_HINT ", -32(%0)\n"
- " daddiu %0, %0, -128 \n"
- " daddiu %1, %1, -128 \n"
-#endif
-#ifdef CONFIG_64BIT
- " ld $8, 0(%0) \n" /* Block copy a cacheline */
- "1: ld $9, 8(%0) \n"
- " ld $10, 16(%0) \n"
- " ld $11, 24(%0) \n"
- " sd $8, 0(%1) \n"
- " sd $9, 8(%1) \n"
- " sd $10, 16(%1) \n"
- " sd $11, 24(%1) \n"
-#else
- " lw $2, 0(%0) \n" /* Block copy a cacheline */
- "1: lw $3, 4(%0) \n"
- " lw $6, 8(%0) \n"
- " lw $7, 12(%0) \n"
- " lw $8, 16(%0) \n"
- " lw $9, 20(%0) \n"
- " lw $10, 24(%0) \n"
- " lw $11, 28(%0) \n"
- " sw $2, 0(%1) \n"
- " sw $3, 4(%1) \n"
- " sw $6, 8(%1) \n"
- " sw $7, 12(%1) \n"
- " sw $8, 16(%1) \n"
- " sw $9, 20(%1) \n"
- " sw $10, 24(%1) \n"
- " sw $11, 28(%1) \n"
-#endif
- " daddiu %0, %0, 32 \n"
- " daddiu %1, %1, 32 \n"
- " bnel %0, %2, 1b \n"
-#ifdef CONFIG_64BIT
- " ld $8, 0(%0) \n"
-#else
- " lw $2, 0(%0) \n"
-#endif
- " .set pop \n"
- : "+r" (src), "+r" (dst)
- : "r" (end)
-#ifdef CONFIG_64BIT
- : "$8", "$9", "$10", "$11", "memory");
-#else
- : "$2", "$3", "$6", "$7", "$8", "$9", "$10", "$11", "memory");
-#endif
-}
-
-
-#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
-
-/*
- * Pad descriptors to cacheline, since each is exclusively owned by a
- * particular CPU.
- */
-typedef struct dmadscr_s {
- u64 dscr_a;
- u64 dscr_b;
- u64 pad_a;
- u64 pad_b;
-} dmadscr_t;
-
-static dmadscr_t page_descr[DM_NUM_CHANNELS]
- __attribute__((aligned(SMP_CACHE_BYTES)));
-
-void sb1_dma_init(void)
-{
- int i;
-
- for (i = 0; i < DM_NUM_CHANNELS; i++) {
- const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) |
- V_DM_DSCR_BASE_RINGSZ(1);
- void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
-
- __raw_writeq(base_val, base_reg);
- __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
- __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
- }
-}
-
-void clear_page(void *page)
-{
- u64 to_phys = CPHYSADDR((unsigned long)page);
- unsigned int cpu = smp_processor_id();
-
- /* if the page is not in KSEG0, use old way */
- if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
- return clear_page_cpu(page);
-
- page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
- M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
- page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
- __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
-
- /*
- * Don't really want to do it this way, but there's no
- * reliable way to delay completion detection.
- */
- while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
- & M_DM_DSCR_BASE_INTERRUPT))
- ;
- __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
-}
-
-void copy_page(void *to, void *from)
-{
- u64 from_phys = CPHYSADDR((unsigned long)from);
- u64 to_phys = CPHYSADDR((unsigned long)to);
- unsigned int cpu = smp_processor_id();
-
- /* if any page is not in KSEG0, use old way */
- if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
- || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
- return copy_page_cpu(to, from);
-
- page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
- M_DM_DSCRA_INTERRUPT;
- page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
- __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
-
- /*
- * Don't really want to do it this way, but there's no
- * reliable way to delay completion detection.
- */
- while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
- & M_DM_DSCR_BASE_INTERRUPT))
- ;
- __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
-}
-
-#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */
-
-void clear_page(void *page)
-{
- return clear_page_cpu(page);
-}
-
-void copy_page(void *to, void *from)
-{
- return copy_page_cpu(to, from);
-}
-
-#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */
-
-EXPORT_SYMBOL(clear_page);
-EXPORT_SYMBOL(copy_page);
-
-void __cpuinit build_clear_page(void)
-{
-}
-
-void __cpuinit build_copy_page(void)
-{
-}
diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c
index 57df1c38e303..7dfa579ab24c 100644
--- a/arch/mips/mm/pgtable.c
+++ b/arch/mips/mm/pgtable.c
@@ -12,7 +12,6 @@ void show_mem(void)
printk("Mem-info:\n");
show_free_areas();
- printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
pfn = max_mapnr;
while (pfn-- > 0) {
if (!pfn_valid(pfn))
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 63065d6e8063..5ce2fa745626 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -299,7 +299,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
idx = read_c0_index();
ptep = pte_offset_map(pmdp, address);
-#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1)
+#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
write_c0_entrylo0(ptep->pte_high);
ptep++;
write_c0_entrylo1(ptep->pte_high);
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 1a6f7704cc89..1655aa69e133 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -58,13 +58,13 @@ enum opcode {
insn_invalid,
insn_addu, insn_addiu, insn_and, insn_andi, insn_beq,
insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
- insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0,
- insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32,
- insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld,
- insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0,
- insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll,
- insn_sra, insn_srl, insn_subu, insn_sw, insn_tlbp, insn_tlbwi,
- insn_tlbwr, insn_xor, insn_xori
+ insn_bne, insn_cache, insn_daddu, insn_daddiu, insn_dmfc0,
+ insn_dmtc0, insn_dsll, insn_dsll32, insn_dsra, insn_dsrl,
+ insn_dsrl32, insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr,
+ insn_ld, insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0,
+ insn_mtc0, insn_ori, insn_pref, insn_rfe, insn_sc, insn_scd,
+ insn_sd, insn_sll, insn_sra, insn_srl, insn_subu, insn_sw,
+ insn_tlbp, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori
};
struct insn {
@@ -94,6 +94,7 @@ static struct insn insn_table[] __cpuinitdata = {
{ insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM },
{ insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM },
{ insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
+ { insn_cache, M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
{ insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
@@ -116,6 +117,7 @@ static struct insn insn_table[] __cpuinitdata = {
{ insn_mfc0, M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET},
{ insn_mtc0, M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET},
{ insn_ori, M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM },
+ { insn_pref, M(pref_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
{ insn_rfe, M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0 },
{ insn_sc, M(sc_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
{ insn_scd, M(scd_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
@@ -337,6 +339,7 @@ I_u1s2(_bgezl)
I_u1s2(_bltz)
I_u1s2(_bltzl)
I_u1u2s3(_bne)
+I_u2s3u1(_cache)
I_u1u2u3(_dmfc0)
I_u1u2u3(_dmtc0)
I_u2u1s3(_daddiu)
@@ -359,6 +362,7 @@ I_u2s3u1(_lw)
I_u1u2u3(_mfc0)
I_u1u2u3(_mtc0)
I_u2u1u3(_ori)
+I_u2s3u1(_pref)
I_0(_rfe)
I_u2s3u1(_sc)
I_u2s3u1(_scd)
@@ -555,6 +559,14 @@ uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid)
}
void __cpuinit
+uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
+ unsigned int reg2, int lid)
+{
+ uasm_r_mips_pc16(r, *p, lid);
+ uasm_i_bne(p, reg1, reg2, 0);
+}
+
+void __cpuinit
uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid)
{
uasm_r_mips_pc16(r, *p, lid);
diff --git a/arch/mips/mm/uasm.h b/arch/mips/mm/uasm.h
index fe0574f6e77d..0d6a66f32030 100644
--- a/arch/mips/mm/uasm.h
+++ b/arch/mips/mm/uasm.h
@@ -55,6 +55,7 @@ Ip_u1s2(_bgezl);
Ip_u1s2(_bltz);
Ip_u1s2(_bltzl);
Ip_u1u2s3(_bne);
+Ip_u2s3u1(_cache);
Ip_u1u2u3(_dmfc0);
Ip_u1u2u3(_dmtc0);
Ip_u2u1s3(_daddiu);
@@ -77,6 +78,7 @@ Ip_u2s3u1(_lw);
Ip_u1u2u3(_mfc0);
Ip_u1u2u3(_mtc0);
Ip_u2u1u3(_ori);
+Ip_u2s3u1(_pref);
Ip_0(_rfe);
Ip_u2s3u1(_sc);
Ip_u2s3u1(_scd);
@@ -177,6 +179,8 @@ void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid);
void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
+void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
+ unsigned int reg2, int lid);
void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
diff --git a/arch/mips/philips/pnx8550/common/Makefile b/arch/mips/nxp/pnx8550/common/Makefile
index 31cc1a5cec3b..31cc1a5cec3b 100644
--- a/arch/mips/philips/pnx8550/common/Makefile
+++ b/arch/mips/nxp/pnx8550/common/Makefile
diff --git a/arch/mips/philips/pnx8550/common/gdb_hook.c b/arch/mips/nxp/pnx8550/common/gdb_hook.c
index ad4624f6d9bc..ad4624f6d9bc 100644
--- a/arch/mips/philips/pnx8550/common/gdb_hook.c
+++ b/arch/mips/nxp/pnx8550/common/gdb_hook.c
diff --git a/arch/mips/philips/pnx8550/common/int.c b/arch/mips/nxp/pnx8550/common/int.c
index aad03429a5e3..aad03429a5e3 100644
--- a/arch/mips/philips/pnx8550/common/int.c
+++ b/arch/mips/nxp/pnx8550/common/int.c
diff --git a/arch/mips/philips/pnx8550/common/pci.c b/arch/mips/nxp/pnx8550/common/pci.c
index eee4f3dfc410..eee4f3dfc410 100644
--- a/arch/mips/philips/pnx8550/common/pci.c
+++ b/arch/mips/nxp/pnx8550/common/pci.c
diff --git a/arch/mips/philips/pnx8550/common/platform.c b/arch/mips/nxp/pnx8550/common/platform.c
index c839436bd012..c7c763dbe588 100644
--- a/arch/mips/philips/pnx8550/common/platform.c
+++ b/arch/mips/nxp/pnx8550/common/platform.c
@@ -1,5 +1,5 @@
/*
- * Platform device support for Philips PNX8550 SoCs
+ * Platform device support for NXP PNX8550 SoCs
*
* Copyright 2005, Embedded Alley Solutions, Inc
*
diff --git a/arch/mips/philips/pnx8550/common/proc.c b/arch/mips/nxp/pnx8550/common/proc.c
index 18b125e3b65d..18b125e3b65d 100644
--- a/arch/mips/philips/pnx8550/common/proc.c
+++ b/arch/mips/nxp/pnx8550/common/proc.c
diff --git a/arch/mips/philips/pnx8550/common/prom.c b/arch/mips/nxp/pnx8550/common/prom.c
index 2f567452e7ac..2f567452e7ac 100644
--- a/arch/mips/philips/pnx8550/common/prom.c
+++ b/arch/mips/nxp/pnx8550/common/prom.c
diff --git a/arch/mips/philips/pnx8550/common/reset.c b/arch/mips/nxp/pnx8550/common/reset.c
index 7b2cbc5b2c7c..7b2cbc5b2c7c 100644
--- a/arch/mips/philips/pnx8550/common/reset.c
+++ b/arch/mips/nxp/pnx8550/common/reset.c
diff --git a/arch/mips/philips/pnx8550/common/setup.c b/arch/mips/nxp/pnx8550/common/setup.c
index 92d764c97701..92d764c97701 100644
--- a/arch/mips/philips/pnx8550/common/setup.c
+++ b/arch/mips/nxp/pnx8550/common/setup.c
diff --git a/arch/mips/philips/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c
index 62f495b57f93..62f495b57f93 100644
--- a/arch/mips/philips/pnx8550/common/time.c
+++ b/arch/mips/nxp/pnx8550/common/time.c
diff --git a/arch/mips/philips/pnx8550/jbs/Makefile b/arch/mips/nxp/pnx8550/jbs/Makefile
index e8228dbca8f6..ad6a8ca7d8ce 100644
--- a/arch/mips/philips/pnx8550/jbs/Makefile
+++ b/arch/mips/nxp/pnx8550/jbs/Makefile
@@ -1,4 +1,4 @@
-# Makefile for the Philips JBS Board.
+# Makefile for the NXP JBS Board.
lib-y := init.o board_setup.o irqmap.o
diff --git a/arch/mips/philips/pnx8550/jbs/board_setup.c b/arch/mips/nxp/pnx8550/jbs/board_setup.c
index f92826e0096d..f92826e0096d 100644
--- a/arch/mips/philips/pnx8550/jbs/board_setup.c
+++ b/arch/mips/nxp/pnx8550/jbs/board_setup.c
diff --git a/arch/mips/philips/pnx8550/jbs/init.c b/arch/mips/nxp/pnx8550/jbs/init.c
index 90b4d35f3ece..d59b4a4e5e8b 100644
--- a/arch/mips/philips/pnx8550/jbs/init.c
+++ b/arch/mips/nxp/pnx8550/jbs/init.c
@@ -40,7 +40,7 @@ extern char *prom_getenv(char *envname);
const char *get_system_type(void)
{
- return "Philips PNX8550/JBS";
+ return "NXP PNX8550/JBS";
}
void __init prom_init(void)
diff --git a/arch/mips/philips/pnx8550/jbs/irqmap.c b/arch/mips/nxp/pnx8550/jbs/irqmap.c
index 98c3429e6e50..7fc89842002c 100644
--- a/arch/mips/philips/pnx8550/jbs/irqmap.c
+++ b/arch/mips/nxp/pnx8550/jbs/irqmap.c
@@ -1,5 +1,5 @@
/*
- * Philips JBS board irqmap.
+ * NXP JBS board irqmap.
*
* Copyright 2005 Embedded Alley Solutions, Inc
* source@embeddealley.com
@@ -33,4 +33,3 @@ char pnx8550_irq_tab[][5] __initdata = {
[9] = { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff},
[17] = { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff},
};
-
diff --git a/arch/mips/philips/pnx8550/stb810/Makefile b/arch/mips/nxp/pnx8550/stb810/Makefile
index f14b592af398..ab91d72c5664 100644
--- a/arch/mips/philips/pnx8550/stb810/Makefile
+++ b/arch/mips/nxp/pnx8550/stb810/Makefile
@@ -1,4 +1,4 @@
-# Makefile for the Philips STB810 Board.
+# Makefile for the NXP STB810 Board.
lib-y := prom_init.o board_setup.o irqmap.o
diff --git a/arch/mips/philips/pnx8550/stb810/board_setup.c b/arch/mips/nxp/pnx8550/stb810/board_setup.c
index 345d71e53cf2..1282c27cfcb7 100644
--- a/arch/mips/philips/pnx8550/stb810/board_setup.c
+++ b/arch/mips/nxp/pnx8550/stb810/board_setup.c
@@ -1,7 +1,7 @@
/*
* STB810 specific board startup routines.
*
- * Based on the arch/mips/philips/pnx8550/jbs/board_setup.c
+ * Based on the arch/mips/nxp/pnx8550/jbs/board_setup.c
*
* Author: MontaVista Software, Inc.
* source@mvista.com
diff --git a/arch/mips/philips/pnx8550/stb810/irqmap.c b/arch/mips/nxp/pnx8550/stb810/irqmap.c
index 5ee11e19975e..8c034963ddcd 100644
--- a/arch/mips/philips/pnx8550/stb810/irqmap.c
+++ b/arch/mips/nxp/pnx8550/stb810/irqmap.c
@@ -1,5 +1,5 @@
/*
- * Philips STB810 board irqmap.
+ * NXP STB810 board irqmap.
*
* Author: MontaVista Software, Inc.
* source@mvista.com
@@ -20,4 +20,3 @@ char pnx8550_irq_tab[][5] __initdata = {
[9] = { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff},
[10] = { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff},
};
-
diff --git a/arch/mips/philips/pnx8550/stb810/prom_init.c b/arch/mips/nxp/pnx8550/stb810/prom_init.c
index 832dd60b0a7a..ca7f4ada0640 100644
--- a/arch/mips/philips/pnx8550/stb810/prom_init.c
+++ b/arch/mips/nxp/pnx8550/stb810/prom_init.c
@@ -28,7 +28,7 @@ extern char *prom_getenv(char *envname);
const char *get_system_type(void)
{
- return "Philips PNX8550/STB810";
+ return "NXP PNX8950/STB810";
}
void __init prom_init(void)
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index aa52aa146cea..b5f6f71b27bc 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -80,6 +80,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
case CPU_24K:
case CPU_25KF:
case CPU_34K:
+ case CPU_1004K:
case CPU_74K:
case CPU_SB1:
case CPU_SB1A:
diff --git a/arch/mips/oprofile/op_impl.h b/arch/mips/oprofile/op_impl.h
index fa6b4aae7523..2bfc17c30106 100644
--- a/arch/mips/oprofile/op_impl.h
+++ b/arch/mips/oprofile/op_impl.h
@@ -10,7 +10,6 @@
#ifndef OP_IMPL_H
#define OP_IMPL_H 1
-extern int null_perf_irq(void);
extern int (*perf_irq)(void);
/* Per-counter configuration as set via oprofilefs. */
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index ccbea229a0e6..da8cbb6899dc 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -31,9 +31,14 @@
#define M_COUNTER_OVERFLOW (1UL << 31)
+static int (*save_perf_irq)(void);
+
#ifdef CONFIG_MIPS_MT_SMP
-#define WHAT (M_TC_EN_VPE | M_PERFCTL_VPEID(smp_processor_id()))
-#define vpe_id() smp_processor_id()
+static int cpu_has_mipsmt_pertccounters;
+#define WHAT (M_TC_EN_VPE | \
+ M_PERFCTL_VPEID(cpu_data[smp_processor_id()].vpe_id))
+#define vpe_id() (cpu_has_mipsmt_pertccounters ? \
+ 0 : cpu_data[smp_processor_id()].vpe_id)
/*
* The number of bits to shift to convert between counters per core and
@@ -243,11 +248,11 @@ static inline int __n_counters(void)
{
if (!(read_c0_config1() & M_CONFIG1_PC))
return 0;
- if (!(r_c0_perfctrl0() & M_PERFCTL_MORE))
+ if (!(read_c0_perfctrl0() & M_PERFCTL_MORE))
return 1;
- if (!(r_c0_perfctrl1() & M_PERFCTL_MORE))
+ if (!(read_c0_perfctrl1() & M_PERFCTL_MORE))
return 2;
- if (!(r_c0_perfctrl2() & M_PERFCTL_MORE))
+ if (!(read_c0_perfctrl2() & M_PERFCTL_MORE))
return 3;
return 4;
@@ -274,8 +279,9 @@ static inline int n_counters(void)
return counters;
}
-static inline void reset_counters(int counters)
+static void reset_counters(void *arg)
{
+ int counters = (int)arg;
switch (counters) {
case 4:
w_c0_perfctrl3(0);
@@ -302,9 +308,12 @@ static int __init mipsxx_init(void)
return -ENODEV;
}
- reset_counters(counters);
-
- counters = counters_total_to_per_cpu(counters);
+#ifdef CONFIG_MIPS_MT_SMP
+ cpu_has_mipsmt_pertccounters = read_c0_config7() & (1<<19);
+ if (!cpu_has_mipsmt_pertccounters)
+ counters = counters_total_to_per_cpu(counters);
+#endif
+ on_each_cpu(reset_counters, (void *)counters, 0, 1);
op_model_mipsxx_ops.num_counters = counters;
switch (current_cpu_type()) {
@@ -320,6 +329,13 @@ static int __init mipsxx_init(void)
op_model_mipsxx_ops.cpu_type = "mips/25K";
break;
+ case CPU_1004K:
+#if 0
+ /* FIXME: report as 34K for now */
+ op_model_mipsxx_ops.cpu_type = "mips/1004K";
+ break;
+#endif
+
case CPU_34K:
op_model_mipsxx_ops.cpu_type = "mips/34K";
break;
@@ -355,6 +371,7 @@ static int __init mipsxx_init(void)
return -ENODEV;
}
+ save_perf_irq = perf_irq;
perf_irq = mipsxx_perfcount_handler;
return 0;
@@ -365,9 +382,9 @@ static void mipsxx_exit(void)
int counters = op_model_mipsxx_ops.num_counters;
counters = counters_per_cpu_to_total(counters);
- reset_counters(counters);
+ on_each_cpu(reset_counters, (void *)counters, 0, 1);
- perf_irq = null_perf_irq;
+ perf_irq = save_perf_irq;
}
struct op_mips_model op_model_mipsxx_ops = {
diff --git a/arch/mips/pci/fixup-au1000.c b/arch/mips/pci/fixup-au1000.c
index ca0276c8070a..00c36c9dbe0e 100644
--- a/arch/mips/pci/fixup-au1000.c
+++ b/arch/mips/pci/fixup-au1000.c
@@ -26,13 +26,10 @@
* with this program; if not, write to the Free Software Foundation, Inc.,
* 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/types.h>
+
#include <linux/pci.h>
-#include <linux/kernel.h>
#include <linux/init.h>
-#include <asm/mach-au1x00/au1000.h>
-
extern char irq_tab_alchemy[][5];
int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
diff --git a/arch/mips/pci/ops-pnx8550.c b/arch/mips/pci/ops-pnx8550.c
index d61064652498..0e160d9f07c3 100644
--- a/arch/mips/pci/ops-pnx8550.c
+++ b/arch/mips/pci/ops-pnx8550.c
@@ -90,14 +90,14 @@ config_access(unsigned int pci_cmd, struct pci_bus *bus, unsigned int devfn, int
loops--;
if (loops == 0) {
- printk("%s : Arbiter Locked.\n", __FUNCTION__);
+ printk("%s : Arbiter Locked.\n", __func__);
}
}
clear_status();
if ((pci_cmd == PCI_CMD_IOR) || (pci_cmd == PCI_CMD_IOW)) {
printk("%s timeout (GPPM_CTRL=%X) ioaddr %lX pci_cmd %X\n",
- __FUNCTION__, inl(PCI_BASE | PCI_GPPM_CTRL), ioaddr,
+ __func__, inl(PCI_BASE | PCI_GPPM_CTRL), ioaddr,
pci_cmd);
}
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 855977ca51cd..6537d90a25bb 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -143,9 +143,6 @@ void __init plat_time_init(void)
mips_hpt_frequency = 33000000 * 3 * 5;
}
-/* No other usable initialization hook than this ... */
-extern void (*late_time_init)(void);
-
unsigned long ocd_base;
EXPORT_SYMBOL(ocd_base);
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 624bbdbff2a8..b6cab089561e 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -142,7 +142,7 @@ static irqreturn_t ip32_rtc_int(int irq, void *dev_id)
reg_c = CMOS_READ(RTC_INTR_FLAGS);
if (!(reg_c & RTC_IRQF)) {
printk(KERN_WARNING
- "%s: RTC IRQ without RTC_IRQF\n", __FUNCTION__);
+ "%s: RTC IRQ without RTC_IRQF\n", __func__);
}
/* Wait until interrupt goes away */
disable_irq(MACEISA_RTC_IRQ);
diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c
index 3f808b629242..6d31f2a98abf 100644
--- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c
+++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c
@@ -173,7 +173,7 @@ static const u32 toshiba_rbtx4927_irq_debug_flag =
{ \
char tmp[100]; \
sprintf( tmp, str ); \
- printk( "%s(%s:%u)::%s", __FUNCTION__, __FILE__, __LINE__, tmp ); \
+ printk( "%s(%s:%u)::%s", __func__, __FILE__, __LINE__, tmp ); \
}
#else
#define TOSHIBA_RBTX4927_IRQ_DPRINTK(flag, str...)
diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
index e466e5e711d8..2203c77b2ce2 100644
--- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
+++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
@@ -93,7 +93,7 @@ static const u32 toshiba_rbtx4927_setup_debug_flag =
{ \
char tmp[100]; \
sprintf( tmp, str ); \
- printk( "%s(%s:%u)::%s", __FUNCTION__, __FILE__, __LINE__, tmp ); \
+ printk( "%s(%s:%u)::%s", __func__, __FILE__, __LINE__, tmp ); \
}
#else
#define TOSHIBA_RBTX4927_SETUP_DPRINTK(flag, str...)
diff --git a/arch/mips/tx4938/common/dbgio.c b/arch/mips/tx4938/common/dbgio.c
index bea59ff1842a..33b9c672a322 100644
--- a/arch/mips/tx4938/common/dbgio.c
+++ b/arch/mips/tx4938/common/dbgio.c
@@ -31,9 +31,7 @@
* Support for TX4938 in 2.6 - Hiroshi DOYU <Hiroshi_DOYU@montavista.co.jp>
*/
-#include <asm/mipsregs.h>
-#include <asm/system.h>
-#include <asm/tx4938/tx4938_mips.h>
+#include <linux/types>
extern u8 txx9_sio_kdbg_rd(void);
extern int txx9_sio_kdbg_wr( u8 ch );
diff --git a/arch/mips/tx4938/common/prom.c b/arch/mips/tx4938/common/prom.c
index 3189a65f7d7e..20baeaeba4cd 100644
--- a/arch/mips/tx4938/common/prom.c
+++ b/arch/mips/tx4938/common/prom.c
@@ -13,13 +13,8 @@
*/
#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
-#include <asm/tx4938/tx4938.h>
+#include <linux/types.h>
+#include <linux/io.h>
static unsigned int __init
tx4938_process_sdccr(u64 * addr)
@@ -35,7 +30,7 @@ tx4938_process_sdccr(u64 * addr)
unsigned int bc = 4;
unsigned int msize = 0;
- val = (*((vu64 *) (addr)));
+ val = ____raw_readq((void __iomem *)addr);
/* MVMCP -- need #defs for these bits masks */
sdccr_ce = ((val & (1 << 10)) >> 10);
diff --git a/arch/mips/tx4938/toshiba_rbtx4938/irq.c b/arch/mips/tx4938/toshiba_rbtx4938/irq.c
index f00185017e80..4d6a8dc46c76 100644
--- a/arch/mips/tx4938/toshiba_rbtx4938/irq.c
+++ b/arch/mips/tx4938/toshiba_rbtx4938/irq.c
@@ -67,24 +67,7 @@ IRQ Device
63 RBTX4938-IOC/07 SWINT
*/
#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/ioport.h>
-#include <linux/sched.h>
#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/timex.h>
-#include <asm/bootinfo.h>
-#include <asm/page.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/processor.h>
-#include <asm/reboot.h>
-#include <asm/time.h>
-#include <asm/wbflush.h>
-#include <linux/bootmem.h>
#include <asm/tx4938/rbtx4938.h>
static void toshiba_rbtx4938_irq_ioc_enable(unsigned int irq);
@@ -99,21 +82,16 @@ static struct irq_chip toshiba_rbtx4938_irq_ioc_type = {
.unmask = toshiba_rbtx4938_irq_ioc_enable,
};
-#define TOSHIBA_RBTX4938_IOC_INTR_ENAB 0xb7f02000
-#define TOSHIBA_RBTX4938_IOC_INTR_STAT 0xb7f0200a
-
int
toshiba_rbtx4938_irq_nested(int sw_irq)
{
u8 level3;
- level3 = reg_rd08(TOSHIBA_RBTX4938_IOC_INTR_STAT) & 0xff;
- if (level3) {
+ level3 = readb(rbtx4938_imstat_addr);
+ if (level3)
/* must use fls so onboard ATA has priority */
sw_irq = TOSHIBA_RBTX4938_IRQ_IOC_BEG + fls(level3) - 1;
- }
- wbflush();
return sw_irq;
}
@@ -144,25 +122,23 @@ toshiba_rbtx4938_irq_ioc_init(void)
static void
toshiba_rbtx4938_irq_ioc_enable(unsigned int irq)
{
- volatile unsigned char v;
+ unsigned char v;
- v = TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB);
+ v = readb(rbtx4938_imask_addr);
v |= (1 << (irq - TOSHIBA_RBTX4938_IRQ_IOC_BEG));
- TX4938_WR08(TOSHIBA_RBTX4938_IOC_INTR_ENAB, v);
+ writeb(v, rbtx4938_imask_addr);
mmiowb();
- TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB);
}
static void
toshiba_rbtx4938_irq_ioc_disable(unsigned int irq)
{
- volatile unsigned char v;
+ unsigned char v;
- v = TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB);
+ v = readb(rbtx4938_imask_addr);
v &= ~(1 << (irq - TOSHIBA_RBTX4938_IRQ_IOC_BEG));
- TX4938_WR08(TOSHIBA_RBTX4938_IOC_INTR_ENAB, v);
+ writeb(v, rbtx4938_imask_addr);
mmiowb();
- TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB);
}
void __init arch_init_irq(void)
@@ -174,14 +150,12 @@ void __init arch_init_irq(void)
/* all IRC interrupt mode are Low Active. */
/* mask all IOC interrupts */
- *rbtx4938_imask_ptr = 0;
+ writeb(0, rbtx4938_imask_addr);
/* clear SoftInt interrupts */
- *rbtx4938_softint_ptr = 0;
+ writeb(0, rbtx4938_softint_addr);
tx4938_irq_init();
toshiba_rbtx4938_irq_ioc_init();
/* Onboard 10M Ether: High Active */
set_irq_type(RBTX4938_IRQ_ETHER, IRQF_TRIGGER_HIGH);
-
- wbflush();
}
diff --git a/arch/mips/tx4938/toshiba_rbtx4938/setup.c b/arch/mips/tx4938/toshiba_rbtx4938/setup.c
index 61249f049cd6..3a3659e8633a 100644
--- a/arch/mips/tx4938/toshiba_rbtx4938/setup.c
+++ b/arch/mips/tx4938/toshiba_rbtx4938/setup.c
@@ -21,8 +21,8 @@
#include <linux/pm.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
+#include <linux/gpio.h>
-#include <asm/wbflush.h>
#include <asm/reboot.h>
#include <asm/time.h>
#include <asm/txx9tmr.h>
@@ -34,7 +34,7 @@
#endif
#include <linux/spi/spi.h>
#include <asm/tx4938/spi.h>
-#include <asm/gpio.h>
+#include <asm/txx9pio.h>
extern char * __init prom_getcmdline(void);
static inline void tx4938_report_pcic_status1(struct tx4938_pcic_reg *pcicptr);
@@ -90,12 +90,11 @@ void rbtx4938_machine_restart(char *command)
local_irq_disable();
printk("Rebooting...");
- *rbtx4938_softresetlock_ptr = 1;
- *rbtx4938_sfvol_ptr = 1;
- *rbtx4938_softreset_ptr = 1;
- wbflush();
-
- while(1);
+ writeb(1, rbtx4938_softresetlock_addr);
+ writeb(1, rbtx4938_sfvol_addr);
+ writeb(1, rbtx4938_softreset_addr);
+ while(1)
+ ;
}
void __init
@@ -487,7 +486,7 @@ static int __init tx4938_pcibios_init(void)
}
/* Reset PCI Bus */
- *rbtx4938_pcireset_ptr = 0;
+ writeb(0, rbtx4938_pcireset_addr);
/* Reset PCIC */
tx4938_ccfgptr->clkctr |= TX4938_CLKCTR_PCIRST;
if (txboard_pci66_mode > 0)
@@ -495,8 +494,8 @@ static int __init tx4938_pcibios_init(void)
mdelay(10);
/* clear PCIC reset */
tx4938_ccfgptr->clkctr &= ~TX4938_CLKCTR_PCIRST;
- *rbtx4938_pcireset_ptr = 1;
- wbflush();
+ writeb(1, rbtx4938_pcireset_addr);
+ mmiowb();
tx4938_report_pcic_status1(tx4938_pcicptr);
tx4938_report_pciclk();
@@ -504,15 +503,15 @@ static int __init tx4938_pcibios_init(void)
if (txboard_pci66_mode == 0 &&
txboard_pci66_check(&tx4938_pci_controller[0], 0, 0)) {
/* Reset PCI Bus */
- *rbtx4938_pcireset_ptr = 0;
+ writeb(0, rbtx4938_pcireset_addr);
/* Reset PCIC */
tx4938_ccfgptr->clkctr |= TX4938_CLKCTR_PCIRST;
tx4938_pciclk66_setup();
mdelay(10);
/* clear PCIC reset */
tx4938_ccfgptr->clkctr &= ~TX4938_CLKCTR_PCIRST;
- *rbtx4938_pcireset_ptr = 1;
- wbflush();
+ writeb(1, rbtx4938_pcireset_addr);
+ mmiowb();
/* Reinitialize PCIC */
tx4938_report_pciclk();
tx4938_pcic_setup(tx4938_pcicptr, &tx4938_pci_controller[0], io_base[0], extarb);
@@ -615,9 +614,6 @@ static void __init rbtx4938_spi_setup(void)
{
/* set SPI_SEL */
tx4938_ccfgptr->pcfg |= TX4938_PCFG_SPI_SEL;
- /* chip selects for SPI devices */
- tx4938_pioptr->dout |= (1 << SEEPROM1_CS);
- tx4938_pioptr->dir |= (1 << SEEPROM1_CS);
}
static struct resource rbtx4938_fpga_resource;
@@ -776,12 +772,13 @@ void __init tx4938_board_setup(void)
txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL);
/* enable DMA */
- TX4938_WR64(0xff1fb150, TX4938_DMA_MCR_MSTEN);
- TX4938_WR64(0xff1fb950, TX4938_DMA_MCR_MSTEN);
+ for (i = 0; i < 2; i++)
+ ____raw_writeq(TX4938_DMA_MCR_MSTEN,
+ (void __iomem *)(TX4938_DMA_REG(i) + 0x50));
/* PIO */
- tx4938_pioptr->maskcpu = 0;
- tx4938_pioptr->maskext = 0;
+ __raw_writel(0, &tx4938_pioptr->maskcpu);
+ __raw_writel(0, &tx4938_pioptr->maskext);
/* TX4938 internal registers */
if (request_resource(&iomem_resource, &tx4938_reg_resource))
@@ -863,10 +860,6 @@ void __init plat_mem_setup(void)
if (txx9_master_clock == 0)
txx9_master_clock = 25000000; /* 25MHz */
tx4938_board_setup();
- /* setup serial stuff */
- TX4938_WR(0xff1ff314, 0x00000000); /* h/w flow control off */
- TX4938_WR(0xff1ff414, 0x00000000); /* h/w flow control off */
-
#ifndef CONFIG_PCI
set_io_port_base(RBTX4938_ETHER_BASE);
#endif
@@ -932,16 +925,16 @@ void __init plat_mem_setup(void)
pcfg = tx4938_ccfgptr->pcfg; /* updated */
/* fixup piosel */
if ((pcfg & (TX4938_PCFG_ATA_SEL | TX4938_PCFG_NDF_SEL)) ==
- TX4938_PCFG_ATA_SEL) {
- *rbtx4938_piosel_ptr = (*rbtx4938_piosel_ptr & 0x03) | 0x04;
- }
+ TX4938_PCFG_ATA_SEL)
+ writeb((readb(rbtx4938_piosel_addr) & 0x03) | 0x04,
+ rbtx4938_piosel_addr);
else if ((pcfg & (TX4938_PCFG_ATA_SEL | TX4938_PCFG_NDF_SEL)) ==
- TX4938_PCFG_NDF_SEL) {
- *rbtx4938_piosel_ptr = (*rbtx4938_piosel_ptr & 0x03) | 0x08;
- }
- else {
- *rbtx4938_piosel_ptr &= ~(0x08 | 0x04);
- }
+ TX4938_PCFG_NDF_SEL)
+ writeb((readb(rbtx4938_piosel_addr) & 0x03) | 0x08,
+ rbtx4938_piosel_addr);
+ else
+ writeb(readb(rbtx4938_piosel_addr) & ~(0x08 | 0x04),
+ rbtx4938_piosel_addr);
rbtx4938_fpga_resource.name = "FPGA Registers";
rbtx4938_fpga_resource.start = CPHYSADDR(RBTX4938_FPGA_REG_ADDR);
@@ -950,17 +943,14 @@ void __init plat_mem_setup(void)
if (request_resource(&iomem_resource, &rbtx4938_fpga_resource))
printk("request resource for fpga failed\n");
- /* disable all OnBoard I/O interrupts */
- *rbtx4938_imask_ptr = 0;
-
_machine_restart = rbtx4938_machine_restart;
_machine_halt = rbtx4938_machine_halt;
pm_power_off = rbtx4938_machine_power_off;
- *rbtx4938_led_ptr = 0xff;
- printk("RBTX4938 --- FPGA(Rev %02x)", *rbtx4938_fpga_rev_ptr);
- printk(" DIPSW:%02x,%02x\n",
- *rbtx4938_dipsw_ptr, *rbtx4938_bdipsw_ptr);
+ writeb(0xff, rbtx4938_led_addr);
+ printk(KERN_INFO "RBTX4938 --- FPGA(Rev %02x) DIPSW:%02x,%02x\n",
+ readb(rbtx4938_fpga_rev_addr),
+ readb(rbtx4938_dipsw_addr), readb(rbtx4938_bdipsw_addr));
}
static int __init rbtx4938_ne_init(void)
@@ -984,106 +974,48 @@ device_initcall(rbtx4938_ne_init);
/* GPIO support */
-static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock);
-
-static void rbtx4938_spi_gpio_set(unsigned gpio, int value)
+int gpio_to_irq(unsigned gpio)
{
- u8 val;
- unsigned long flags;
- gpio -= 16;
- spin_lock_irqsave(&rbtx4938_spi_gpio_lock, flags);
- val = *rbtx4938_spics_ptr;
- if (value)
- val |= 1 << gpio;
- else
- val &= ~(1 << gpio);
- *rbtx4938_spics_ptr = val;
- mmiowb();
- spin_unlock_irqrestore(&rbtx4938_spi_gpio_lock, flags);
+ return -EINVAL;
}
-static int rbtx4938_spi_gpio_dir_out(unsigned gpio, int value)
+int irq_to_gpio(unsigned irq)
{
- rbtx4938_spi_gpio_set(gpio, value);
- return 0;
+ return -EINVAL;
}
-static DEFINE_SPINLOCK(tx4938_gpio_lock);
-
-static int tx4938_gpio_get(unsigned gpio)
-{
- return tx4938_pioptr->din & (1 << gpio);
-}
+static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock);
-static void tx4938_gpio_set_raw(unsigned gpio, int value)
+static void rbtx4938_spi_gpio_set(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
- u32 val;
- val = tx4938_pioptr->dout;
+ u8 val;
+ unsigned long flags;
+ spin_lock_irqsave(&rbtx4938_spi_gpio_lock, flags);
+ val = readb(rbtx4938_spics_addr);
if (value)
- val |= 1 << gpio;
+ val |= 1 << offset;
else
- val &= ~(1 << gpio);
- tx4938_pioptr->dout = val;
-}
-
-static void tx4938_gpio_set(unsigned gpio, int value)
-{
- unsigned long flags;
- spin_lock_irqsave(&tx4938_gpio_lock, flags);
- tx4938_gpio_set_raw(gpio, value);
- mmiowb();
- spin_unlock_irqrestore(&tx4938_gpio_lock, flags);
-}
-
-static int tx4938_gpio_dir_in(unsigned gpio)
-{
- spin_lock_irq(&tx4938_gpio_lock);
- tx4938_pioptr->dir &= ~(1 << gpio);
+ val &= ~(1 << offset);
+ writeb(val, rbtx4938_spics_addr);
mmiowb();
- spin_unlock_irq(&tx4938_gpio_lock);
- return 0;
-}
-
-static int tx4938_gpio_dir_out(unsigned int gpio, int value)
-{
- spin_lock_irq(&tx4938_gpio_lock);
- tx4938_gpio_set_raw(gpio, value);
- tx4938_pioptr->dir |= 1 << gpio;
- mmiowb();
- spin_unlock_irq(&tx4938_gpio_lock);
- return 0;
-}
-
-int gpio_direction_input(unsigned gpio)
-{
- if (gpio < 16)
- return tx4938_gpio_dir_in(gpio);
- return -EINVAL;
-}
-
-int gpio_direction_output(unsigned gpio, int value)
-{
- if (gpio < 16)
- return tx4938_gpio_dir_out(gpio, value);
- if (gpio < 16 + 3)
- return rbtx4938_spi_gpio_dir_out(gpio, value);
- return -EINVAL;
+ spin_unlock_irqrestore(&rbtx4938_spi_gpio_lock, flags);
}
-int gpio_get_value(unsigned gpio)
+static int rbtx4938_spi_gpio_dir_out(struct gpio_chip *chip,
+ unsigned int offset, int value)
{
- if (gpio < 16)
- return tx4938_gpio_get(gpio);
+ rbtx4938_spi_gpio_set(chip, offset, value);
return 0;
}
-void gpio_set_value(unsigned gpio, int value)
-{
- if (gpio < 16)
- tx4938_gpio_set(gpio, value);
- else
- rbtx4938_spi_gpio_set(gpio, value);
-}
+static struct gpio_chip rbtx4938_spi_gpio_chip = {
+ .set = rbtx4938_spi_gpio_set,
+ .direction_output = rbtx4938_spi_gpio_dir_out,
+ .label = "RBTX4938-SPICS",
+ .base = 16,
+ .ngpio = 3,
+};
/* SPI support */
@@ -1094,7 +1026,6 @@ static void __init txx9_spi_init(unsigned long base, int irq)
.start = base,
.end = base + 0x20 - 1,
.flags = IORESOURCE_MEM,
- .parent = &tx4938_reg_resource,
}, {
.start = irq,
.flags = IORESOURCE_IRQ,
@@ -1118,10 +1049,25 @@ static int __init rbtx4938_spi_init(void)
spi_eeprom_register(SEEPROM1_CS);
spi_eeprom_register(16 + SEEPROM2_CS);
spi_eeprom_register(16 + SEEPROM3_CS);
+ gpio_request(16 + SRTC_CS, "rtc-rs5c348");
+ gpio_direction_output(16 + SRTC_CS, 0);
+ gpio_request(SEEPROM1_CS, "seeprom1");
+ gpio_direction_output(SEEPROM1_CS, 1);
+ gpio_request(16 + SEEPROM2_CS, "seeprom2");
+ gpio_direction_output(16 + SEEPROM2_CS, 1);
+ gpio_request(16 + SEEPROM3_CS, "seeprom3");
+ gpio_direction_output(16 + SEEPROM3_CS, 1);
txx9_spi_init(TX4938_SPI_REG & 0xfffffffffULL, RBTX4938_IRQ_IRC_SPI);
return 0;
}
-arch_initcall(rbtx4938_spi_init);
+
+static int __init rbtx4938_arch_init(void)
+{
+ txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, 16);
+ gpiochip_add(&rbtx4938_spi_gpio_chip);
+ return rbtx4938_spi_init();
+}
+arch_initcall(rbtx4938_arch_init);
/* Watchdog support */
@@ -1131,7 +1077,6 @@ static int __init txx9_wdt_init(unsigned long base)
.start = base,
.end = base + 0x100 - 1,
.flags = IORESOURCE_MEM,
- .parent = &tx4938_reg_resource,
};
struct platform_device *dev =
platform_device_register_simple("txx9wdt", -1, &res, 1);
diff --git a/arch/mips/vr41xx/common/init.c b/arch/mips/vr41xx/common/init.c
index 76d4b5ed3fc0..c64995342ba8 100644
--- a/arch/mips/vr41xx/common/init.c
+++ b/arch/mips/vr41xx/common/init.c
@@ -1,7 +1,7 @@
/*
* init.c, Common initialization routines for NEC VR4100 series.
*
- * Copyright (C) 2003-2005 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ * Copyright (C) 2003-2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -53,6 +53,8 @@ void __init plat_time_init(void)
void __init plat_mem_setup(void)
{
iomem_resource_init();
+
+ vr41xx_siu_setup();
}
void __init prom_init(void)
diff --git a/arch/mips/vr41xx/common/siu.c b/arch/mips/vr41xx/common/siu.c
index b735f45b25f0..654dee6208be 100644
--- a/arch/mips/vr41xx/common/siu.c
+++ b/arch/mips/vr41xx/common/siu.c
@@ -1,7 +1,7 @@
/*
* NEC VR4100 series SIU platform device.
*
- * Copyright (C) 2007 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ * Copyright (C) 2007-2008 Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -118,3 +118,37 @@ err_free_device:
return retval;
}
device_initcall(vr41xx_siu_add);
+
+void __init vr41xx_siu_setup(void)
+{
+ struct uart_port port;
+ struct resource *res;
+ unsigned int *type;
+ int i;
+
+ switch (current_cpu_type()) {
+ case CPU_VR4111:
+ case CPU_VR4121:
+ type = siu_type1_ports;
+ res = siu_type1_resource;
+ break;
+ case CPU_VR4122:
+ case CPU_VR4131:
+ case CPU_VR4133:
+ type = siu_type2_ports;
+ res = siu_type2_resource;
+ break;
+ default:
+ return;
+ }
+
+ for (i = 0; i < SIU_PORTS_MAX; i++) {
+ port.line = i;
+ port.type = type[i];
+ if (port.type == PORT_UNKNOWN)
+ break;
+ port.mapbase = res[i].start;
+ port.membase = (unsigned char __iomem *)KSEG1ADDR(res[i].start);
+ vr41xx_siu_early_setup(&port);
+ }
+}
diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c
index ee2d9f8af5ad..2646fcbd7d89 100644
--- a/arch/mn10300/kernel/asm-offsets.c
+++ b/arch/mn10300/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/personality.h>
+#include <linux/kbuild.h>
#include <asm/ucontext.h>
#include <asm/processor.h>
#include <asm/thread_info.h>
@@ -14,14 +15,6 @@
#include "sigframe.h"
#include "mn10300-serial.h"
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->")
-
-#define OFFSET(sym, str, mem) \
- DEFINE(sym, offsetof(struct str, mem));
-
void foo(void)
{
OFFSET(SIGCONTEXT_d0, sigcontext, d0);
diff --git a/arch/mn10300/unit-asb2305/pci-iomap.c b/arch/mn10300/unit-asb2305/pci-iomap.c
index dbceae4307da..c1a8d8f941fd 100644
--- a/arch/mn10300/unit-asb2305/pci-iomap.c
+++ b/arch/mn10300/unit-asb2305/pci-iomap.c
@@ -16,8 +16,8 @@
*/
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index eaa79bc14d94..3efc0b73e4ff 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -32,6 +32,7 @@
#include <linux/thread_info.h>
#include <linux/ptrace.h>
#include <linux/hardirq.h>
+#include <linux/kbuild.h>
#include <asm/pgtable.h>
#include <asm/ptrace.h>
@@ -39,11 +40,6 @@
#include <asm/pdc.h>
#include <asm/uaccess.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
#ifdef CONFIG_64BIT
#define FRAME_SIZE 128
#else
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index 9448d4e91142..ccd61b9567a6 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -397,10 +397,9 @@ pcxl_dma_init(void)
"pcxl_dma_init: Unable to create gsc /proc dir entry\n");
else {
struct proc_dir_entry* ent;
- ent = create_proc_entry("pcxl_dma", 0, proc_gsc_root);
- if (ent)
- ent->proc_fops = &proc_pcxl_dma_ops;
- else
+ ent = proc_create("pcxl_dma", 0, proc_gsc_root,
+ &proc_pcxl_dma_ops);
+ if (!ent)
printk(KERN_WARNING
"pci-dma.c: Unable to create pcxl_dma /proc entry.\n");
}
diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c
index f4a811690ab3..9abed07db7fc 100644
--- a/arch/parisc/lib/iomap.c
+++ b/arch/parisc/lib/iomap.c
@@ -438,8 +438,8 @@ void ioport_unmap(void __iomem *addr)
/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index eb80f5e33d7d..1f012843150f 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -603,15 +603,18 @@ void show_mem(void)
#ifdef CONFIG_DISCONTIGMEM
{
struct zonelist *zl;
- int i, j, k;
+ int i, j;
for (i = 0; i < npmem_ranges; i++) {
+ zl = node_zonelist(i);
for (j = 0; j < MAX_NR_ZONES; j++) {
- zl = NODE_DATA(i)->node_zonelists + j;
+ struct zoneref *z;
+ struct zone *zone;
printk("Zone list for zone %d on node %d: ", j, i);
- for (k = 0; zl->zones[k] != NULL; k++)
- printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name);
+ for_each_zone_zonelist(zone, z, zl, j)
+ printk("[%d/%s] ", zone_to_nid(zone),
+ zone->name);
printk("\n");
}
}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 20f45a8b87e3..3934e2659407 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -608,6 +608,19 @@ source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig"
+config HAS_RAPIDIO
+ bool
+ default n
+
+config RAPIDIO
+ bool "RapidIO support"
+ depends on HAS_RAPIDIO
+ help
+ If you say Y here, the kernel will include drivers and
+ infrastructure code to support RapidIO interconnect devices.
+
+source "drivers/rapidio/Kconfig"
+
endmenu
menu "Advanced setup"
@@ -803,3 +816,4 @@ config PPC_CLOCK
config PPC_LIB_RHEAP
bool
+source "arch/powerpc/kvm/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index a86d8d853214..a7d24e692bab 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -118,7 +118,6 @@ config XMON_DISASSEMBLY
config IRQSTACKS
bool "Use separate kernel stacks when processing interrupts"
- depends on PPC64
help
If you say Y here the kernel will use separate kernel stacks
for handling hard and soft interrupts. This can help avoid
@@ -151,6 +150,9 @@ config BOOTX_TEXT
config PPC_EARLY_DEBUG
bool "Early debugging (dangerous)"
+ # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water
+ # mark, which doesn't work with current 440 KVM.
+ depends on !KVM
help
Say Y to enable some early debugging facilities that may be available
for your processor/board combination. Those facilities are hacks
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index e2ec4a91ccef..9dcdc036cdf7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -145,6 +145,7 @@ core-y += arch/powerpc/kernel/ \
arch/powerpc/platforms/
core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/
core-$(CONFIG_XMON) += arch/powerpc/xmon/
+core-$(CONFIG_KVM) += arch/powerpc/kvm/
drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
diff --git a/arch/powerpc/boot/dts/mpc8610_hpcd.dts b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
index 16c947b8a721..1f2f1e0a5571 100644
--- a/arch/powerpc/boot/dts/mpc8610_hpcd.dts
+++ b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
@@ -45,6 +45,11 @@
reg = <0x00000000 0x20000000>; // 512M at 0x0
};
+ board-control@e8000000 {
+ compatible = "fsl,fpga-pixis";
+ reg = <0xe8000000 32>; // pixis at 0xe8000000
+ };
+
soc@e0000000 {
#address-cells = <1>;
#size-cells = <1>;
@@ -104,6 +109,13 @@
interrupt-parent = <&mpic>;
};
+ display@2c000 {
+ compatible = "fsl,diu";
+ reg = <0x2c000 100>;
+ interrupts = <72 2>;
+ interrupt-parent = <&mpic>;
+ };
+
mpic: interrupt-controller@40000 {
clock-frequency = <0>;
interrupt-controller;
diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
index 7f9b999843ce..1e4bfe9cadb9 100644
--- a/arch/powerpc/boot/dts/mpc8641_hpcn.dts
+++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
@@ -26,6 +26,7 @@
serial1 = &serial1;
pci0 = &pci0;
pci1 = &pci1;
+ rapidio0 = &rapidio0;
};
cpus {
@@ -500,4 +501,15 @@
0x0 0x00100000>;
};
};
+ rapidio0: rapidio@f80c0000 {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ compatible = "fsl,rapidio-delta";
+ reg = <0xf80c0000 0x20000>;
+ ranges = <0 0 0xc0000000 0 0x20000000>;
+ interrupt-parent = <&mpic>;
+ /* err_irq bell_outb_irq bell_inb_irq
+ msg1_tx_irq msg1_rx_irq msg2_tx_irq msg2_rx_irq */
+ interrupts = <48 2 49 2 50 2 53 2 54 2 55 2 56 2>;
+ };
};
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index a20501f89474..88338a9f5e95 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -696,6 +696,7 @@ CONFIG_WINDFARM=y
CONFIG_WINDFARM_PM81=y
CONFIG_WINDFARM_PM91=y
CONFIG_WINDFARM_PM112=y
+CONFIG_WINDFARM_PM121=y
# CONFIG_PMAC_RACKMETER is not set
CONFIG_NETDEVICES=y
# CONFIG_NETDEVICES_MULTIQUEUE is not set
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9177b21b1a95..d14cebf62bb0 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -73,7 +73,6 @@ pci64-$(CONFIG_PPC64) += pci_dn.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
pci-common.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_RAPIDIO) += rio.o
obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
machine_kexec_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_AUDIT) += audit.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index adf1d09d726f..ec9228d687b0 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,10 +23,14 @@
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/hrtimer.h>
+#ifdef CONFIG_KVM
+#include <linux/kvm_host.h>
+#endif
#ifdef CONFIG_PPC64
#include <linux/time.h>
#include <linux/hardirq.h>
#endif
+#include <linux/kbuild.h>
#include <asm/io.h>
#include <asm/page.h>
@@ -48,11 +52,6 @@
#include <asm/iseries/alpaca.h>
#endif
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
int main(void)
{
DEFINE(THREAD, offsetof(struct task_struct, thread));
@@ -64,6 +63,7 @@ int main(void)
#endif /* CONFIG_PPC64 */
DEFINE(KSP, offsetof(struct thread_struct, ksp));
+ DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
@@ -324,5 +324,30 @@ int main(void)
DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
+#ifdef CONFIG_KVM
+ DEFINE(TLBE_BYTES, sizeof(struct tlbe));
+
+ DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
+ DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+ DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb));
+ DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
+ DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
+ DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+ DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+ DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
+ DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+ DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
+ DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
+ DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
+ DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
+ DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
+ DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid));
+
+ DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
+ DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
+ DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
+#endif
+
return 0;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 84c868633068..0c8614d9875c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -137,11 +137,12 @@ transfer_to_handler:
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
- lwz r9,THREAD_INFO-THREAD(r12)
- cmplw r1,r9 /* if r1 <= current->thread_info */
+ lwz r9,KSP_LIMIT(r12)
+ cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
5:
#ifdef CONFIG_6xx
+ rlwinm r9,r1,0,0,31-THREAD_SHIFT
tophys(r9,r9) /* check local flags */
lwz r12,TI_LOCAL_FLAGS(r9)
mtcrf 0x01,r12
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 215973a2c8d5..024805e1747d 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -239,6 +239,10 @@ instruction_access_slb_pSeries:
.globl system_call_pSeries
system_call_pSeries:
HMT_MEDIUM
+BEGIN_FTR_SECTION
+ cmpdi r0,0x1ebe
+ beq- 1f
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
mr r9,r13
mfmsr r10
mfspr r13,SPRN_SPRG3
@@ -253,6 +257,13 @@ system_call_pSeries:
rfid
b . /* prevent speculative execution */
+/* Fast LE/BE switch system call */
+1: mfspr r12,SPRN_SRR1
+ xori r12,r12,MSR_LE
+ mtspr SPRN_SRR1,r12
+ rfid /* return to userspace */
+ b .
+
STD_EXCEPTION_PSERIES(0xd00, single_step)
STD_EXCEPTION_PSERIES(0xe00, trap_0e)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 425616f92d18..2f73f705d564 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -307,6 +307,7 @@ void do_IRQ(struct pt_regs *regs)
if (curtp != irqtp) {
struct irq_desc *desc = irq_desc + irq;
void *handler = desc->handle_irq;
+ unsigned long saved_sp_limit = current->thread.ksp_limit;
if (handler == NULL)
handler = &__do_IRQ;
irqtp->task = curtp->task;
@@ -319,7 +320,10 @@ void do_IRQ(struct pt_regs *regs)
(irqtp->preempt_count & ~SOFTIRQ_MASK) |
(curtp->preempt_count & SOFTIRQ_MASK);
+ current->thread.ksp_limit = (unsigned long)irqtp +
+ _ALIGN_UP(sizeof(struct thread_info), 16);
call_handle_irq(irq, desc, irqtp, handler);
+ current->thread.ksp_limit = saved_sp_limit;
irqtp->task = NULL;
@@ -352,9 +356,7 @@ void __init init_IRQ(void)
{
if (ppc_md.init_IRQ)
ppc_md.init_IRQ();
-#ifdef CONFIG_PPC64
irq_ctx_init();
-#endif
}
@@ -383,11 +385,15 @@ void irq_ctx_init(void)
static inline void do_softirq_onstack(void)
{
struct thread_info *curtp, *irqtp;
+ unsigned long saved_sp_limit = current->thread.ksp_limit;
curtp = current_thread_info();
irqtp = softirq_ctx[smp_processor_id()];
irqtp->task = curtp->task;
+ current->thread.ksp_limit = (unsigned long)irqtp +
+ _ALIGN_UP(sizeof(struct thread_info), 16);
call_do_softirq(irqtp);
+ current->thread.ksp_limit = saved_sp_limit;
irqtp->task = NULL;
}
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 1ffacc698ffb..1e656b43ad7f 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -591,10 +591,8 @@ int __init lparcfg_init(void)
!firmware_has_feature(FW_FEATURE_ISERIES))
mode |= S_IWUSR;
- ent = create_proc_entry("ppc64/lparcfg", mode, NULL);
- if (ent) {
- ent->proc_fops = &lparcfg_fops;
- } else {
+ ent = proc_create("ppc64/lparcfg", mode, NULL, &lparcfg_fops);
+ if (!ent) {
printk(KERN_ERR "Failed to create ppc64/lparcfg\n");
return -EIO;
}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 92ccc6fcc5b0..89aaaa6f3561 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -32,6 +32,31 @@
.text
+#ifdef CONFIG_IRQSTACKS
+_GLOBAL(call_do_softirq)
+ mflr r0
+ stw r0,4(r1)
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+ mr r1,r3
+ bl __do_softirq
+ lwz r1,0(r1)
+ lwz r0,4(r1)
+ mtlr r0
+ blr
+
+_GLOBAL(call_handle_irq)
+ mflr r0
+ stw r0,4(r1)
+ mtctr r6
+ stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
+ mr r1,r5
+ bctrl
+ lwz r1,0(r1)
+ lwz r0,4(r1)
+ mtlr r0
+ blr
+#endif /* CONFIG_IRQSTACKS */
+
/*
* This returns the high 64 bits of the product of two 64-bit numbers.
*/
diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c
index f78dfce1b771..c647ddef40dc 100644
--- a/arch/powerpc/kernel/proc_ppc64.c
+++ b/arch/powerpc/kernel/proc_ppc64.c
@@ -68,12 +68,11 @@ static int __init proc_ppc64_init(void)
{
struct proc_dir_entry *pde;
- pde = create_proc_entry("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL);
+ pde = proc_create_data("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL,
+ &page_map_fops, vdso_data);
if (!pde)
return 1;
- pde->data = vdso_data;
pde->size = PAGE_SIZE;
- pde->proc_fops = &page_map_fops;
return 0;
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6caad17ea72e..7de41c3948ec 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -589,6 +589,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
kregs = (struct pt_regs *) sp;
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
+ p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
+ _ALIGN_UP(sizeof(struct thread_info), 16);
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SLB)) {
diff --git a/arch/powerpc/kernel/rio.c b/arch/powerpc/kernel/rio.c
deleted file mode 100644
index 29487fedfc76..000000000000
--- a/arch/powerpc/kernel/rio.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * RapidIO PPC32 support
- *
- * Copyright 2005 MontaVista Software, Inc.
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/rio.h>
-
-#include <asm/rio.h>
-
-/**
- * platform_rio_init - Do platform specific RIO init
- *
- * Any platform specific initialization of RapdIO
- * hardware is done here as well as registration
- * of any active master ports in the system.
- */
-void __attribute__ ((weak))
- platform_rio_init(void)
-{
- printk(KERN_WARNING "RIO: No platform_rio_init() present\n");
-}
-
-/**
- * ppc_rio_init - Do PPC32 RIO init
- *
- * Calls platform-specific RIO init code and then calls
- * rio_init_mports() to initialize any master ports that
- * have been registered with the RIO subsystem.
- */
-static int __init ppc_rio_init(void)
-{
- printk(KERN_INFO "RIO: RapidIO init\n");
-
- /* Platform specific initialization */
- platform_rio_init();
-
- /* Enumerate all registered ports */
- rio_init_mports();
-
- return 0;
-}
-
-subsys_initcall(ppc_rio_init);
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index f2e3bc714d76..f9c6abc84a94 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -255,8 +255,6 @@ static void check_location(struct seq_file *m, const char *c);
static int __init proc_rtas_init(void)
{
- struct proc_dir_entry *entry;
-
if (!machine_is(pseries))
return -ENODEV;
@@ -264,35 +262,20 @@ static int __init proc_rtas_init(void)
if (rtas_node == NULL)
return -ENODEV;
- entry = create_proc_entry("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL);
- if (entry)
- entry->proc_fops = &ppc_rtas_progress_operations;
-
- entry = create_proc_entry("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL);
- if (entry)
- entry->proc_fops = &ppc_rtas_clock_operations;
-
- entry = create_proc_entry("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL);
- if (entry)
- entry->proc_fops = &ppc_rtas_poweron_operations;
-
- entry = create_proc_entry("ppc64/rtas/sensors", S_IRUGO, NULL);
- if (entry)
- entry->proc_fops = &ppc_rtas_sensors_operations;
-
- entry = create_proc_entry("ppc64/rtas/frequency", S_IWUSR|S_IRUGO,
- NULL);
- if (entry)
- entry->proc_fops = &ppc_rtas_tone_freq_operations;
-
- entry = create_proc_entry("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL);
- if (entry)
- entry->proc_fops = &ppc_rtas_tone_volume_operations;
-
- entry = create_proc_entry("ppc64/rtas/rmo_buffer", S_IRUSR, NULL);
- if (entry)
- entry->proc_fops = &ppc_rtas_rmo_buf_ops;
-
+ proc_create("ppc64/rtas/progress", S_IRUGO|S_IWUSR, NULL,
+ &ppc_rtas_progress_operations);
+ proc_create("ppc64/rtas/clock", S_IRUGO|S_IWUSR, NULL,
+ &ppc_rtas_clock_operations);
+ proc_create("ppc64/rtas/poweron", S_IWUSR|S_IRUGO, NULL,
+ &ppc_rtas_poweron_operations);
+ proc_create("ppc64/rtas/sensors", S_IRUGO, NULL,
+ &ppc_rtas_sensors_operations);
+ proc_create("ppc64/rtas/frequency", S_IWUSR|S_IRUGO, NULL,
+ &ppc_rtas_tone_freq_operations);
+ proc_create("ppc64/rtas/volume", S_IWUSR|S_IRUGO, NULL,
+ &ppc_rtas_tone_volume_operations);
+ proc_create("ppc64/rtas/rmo_buffer", S_IRUSR, NULL,
+ &ppc_rtas_rmo_buf_ops);
return 0;
}
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 627f126d1848..0a5e22b22729 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -704,18 +704,11 @@ static int initialize_flash_pde_data(const char *rtas_call_name,
static struct proc_dir_entry *create_flash_pde(const char *filename,
const struct file_operations *fops)
{
- struct proc_dir_entry *ent = NULL;
-
- ent = create_proc_entry(filename, S_IRUSR | S_IWUSR, NULL);
- if (ent != NULL) {
- ent->proc_fops = fops;
- ent->owner = THIS_MODULE;
- }
-
- return ent;
+ return proc_create(filename, S_IRUSR | S_IWUSR, NULL, fops);
}
static const struct file_operations rtas_flash_operations = {
+ .owner = THIS_MODULE,
.read = rtas_flash_read,
.write = rtas_flash_write,
.open = rtas_excl_open,
@@ -723,6 +716,7 @@ static const struct file_operations rtas_flash_operations = {
};
static const struct file_operations manage_flash_operations = {
+ .owner = THIS_MODULE,
.read = manage_flash_read,
.write = manage_flash_write,
.open = rtas_excl_open,
@@ -730,6 +724,7 @@ static const struct file_operations manage_flash_operations = {
};
static const struct file_operations validate_flash_operations = {
+ .owner = THIS_MODULE,
.read = validate_flash_read,
.write = validate_flash_write,
.open = rtas_excl_open,
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 36f6779c88d4..5112a4aa801d 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -16,6 +16,7 @@
#include <linux/root_dev.h>
#include <linux/cpu.h>
#include <linux/console.h>
+#include <linux/lmb.h>
#include <asm/io.h>
#include <asm/prom.h>
@@ -229,6 +230,24 @@ int __init ppc_init(void)
arch_initcall(ppc_init);
+#ifdef CONFIG_IRQSTACKS
+static void __init irqstack_early_init(void)
+{
+ unsigned int i;
+
+ /* interrupt stacks must be in lowmem, we get that for free on ppc32
+ * as the lmb is limited to lowmem by LMB_REAL_LIMIT */
+ for_each_possible_cpu(i) {
+ softirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+ hardirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+ }
+}
+#else
+#define irqstack_early_init()
+#endif
+
/* Warning, IO base is not yet inited */
void __init setup_arch(char **cmdline_p)
{
@@ -286,6 +305,8 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
+ irqstack_early_init();
+
/* set up the bootmem stuff with available memory */
do_init_bootmem();
if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
new file mode 100644
index 000000000000..f5d7a5eab96e
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -0,0 +1,224 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <asm/mmu-44x.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
+#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
+
+static unsigned int kvmppc_tlb_44x_pos;
+
+static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
+{
+ /* Mask off reserved bits. */
+ attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
+
+ if (!usermode) {
+ /* Guest is in supervisor mode, so we need to translate guest
+ * supervisor permissions into user permissions. */
+ attrib &= ~PPC44x_TLB_USER_PERM_MASK;
+ attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3;
+ }
+
+ /* Make sure host can always access this memory. */
+ attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
+
+ return attrib;
+}
+
+/* Search the guest TLB for a matching entry. */
+int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
+ unsigned int as)
+{
+ int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+ struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
+ unsigned int tid;
+
+ if (eaddr < get_tlb_eaddr(tlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(tlbe))
+ continue;
+
+ tid = get_tlb_tid(tlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ if (get_tlb_ts(tlbe) != as)
+ continue;
+
+ return i;
+ }
+
+ return -1;
+}
+
+struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+ unsigned int index;
+
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+ if (index == -1)
+ return NULL;
+ return &vcpu->arch.guest_tlb[index];
+}
+
+struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+ unsigned int index;
+
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+ if (index == -1)
+ return NULL;
+ return &vcpu->arch.guest_tlb[index];
+}
+
+static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
+{
+ return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
+}
+
+/* Must be called with mmap_sem locked for writing. */
+static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
+ unsigned int index)
+{
+ struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
+ struct page *page = vcpu->arch.shadow_pages[index];
+
+ kunmap(vcpu->arch.shadow_pages[index]);
+
+ if (get_tlb_v(stlbe)) {
+ if (kvmppc_44x_tlbe_is_writable(stlbe))
+ kvm_release_page_dirty(page);
+ else
+ kvm_release_page_clean(page);
+ }
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
+ u32 flags)
+{
+ struct page *new_page;
+ struct tlbe *stlbe;
+ hpa_t hpaddr;
+ unsigned int victim;
+
+ /* Future optimization: don't overwrite the TLB entry containing the
+ * current PC (or stack?). */
+ victim = kvmppc_tlb_44x_pos++;
+ if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
+ kvmppc_tlb_44x_pos = 0;
+ stlbe = &vcpu->arch.shadow_tlb[victim];
+
+ /* Get reference to new page. */
+ down_write(&current->mm->mmap_sem);
+ new_page = gfn_to_page(vcpu->kvm, gfn);
+ if (is_error_page(new_page)) {
+ printk(KERN_ERR "Couldn't get guest page!\n");
+ kvm_release_page_clean(new_page);
+ return;
+ }
+ hpaddr = page_to_phys(new_page);
+
+ /* Drop reference to old page. */
+ kvmppc_44x_shadow_release(vcpu, victim);
+ up_write(&current->mm->mmap_sem);
+
+ vcpu->arch.shadow_pages[victim] = new_page;
+
+ /* XXX Make sure (va, size) doesn't overlap any other
+ * entries. 440x6 user manual says the result would be
+ * "undefined." */
+
+ /* XXX what about AS? */
+
+ stlbe->tid = asid & 0xff;
+
+ /* Force TS=1 for all guest mappings. */
+ /* For now we hardcode 4KB mappings, but it will be important to
+ * use host large pages in the future. */
+ stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
+ | PPC44x_TLB_4K;
+
+ stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+ stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+ vcpu->arch.msr & MSR_PR);
+}
+
+void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid)
+{
+ unsigned int pid = asid & 0xff;
+ int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ down_write(&current->mm->mmap_sem);
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+ unsigned int tid;
+
+ if (!get_tlb_v(stlbe))
+ continue;
+
+ if (eaddr < get_tlb_eaddr(stlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(stlbe))
+ continue;
+
+ tid = get_tlb_tid(stlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ kvmppc_44x_shadow_release(vcpu, i);
+ stlbe->word0 = 0;
+ }
+ up_write(&current->mm->mmap_sem);
+}
+
+/* Invalidate all mappings, so that when they fault back in they will get the
+ * proper permission bits. */
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+ int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ down_write(&current->mm->mmap_sem);
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ kvmppc_44x_shadow_release(vcpu, i);
+ vcpu->arch.shadow_tlb[i].word0 = 0;
+ }
+ up_write(&current->mm->mmap_sem);
+}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
new file mode 100644
index 000000000000..2ccd46b6f6b7
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -0,0 +1,91 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_POWERPC_TLB_H__
+#define __KVM_POWERPC_TLB_H__
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-44x.h>
+
+extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
+ unsigned int pid, unsigned int as);
+extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+
+/* TLB helper functions */
+static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+{
+ return (tlbe->word0 >> 4) & 0xf;
+}
+
+static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+{
+ return tlbe->word0 & 0xfffffc00;
+}
+
+static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
+{
+ unsigned int pgsize = get_tlb_size(tlbe);
+ return 1 << 10 << (pgsize << 1);
+}
+
+static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+{
+ return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+{
+ u64 word1 = tlbe->word1;
+ return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
+}
+
+static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+{
+ return tlbe->tid & 0xff;
+}
+
+static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+{
+ return (tlbe->word0 >> 8) & 0x1;
+}
+
+static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+{
+ return (tlbe->word0 >> 9) & 0x1;
+}
+
+static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.mmucr & 0xff;
+}
+
+static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.mmucr >> 16) & 0x1;
+}
+
+static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
+{
+ unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
+
+ return get_tlb_raddr(tlbe) | (eaddr & pgmask);
+}
+
+#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
new file mode 100644
index 000000000000..6b076010213b
--- /dev/null
+++ b/arch/powerpc/kvm/Kconfig
@@ -0,0 +1,42 @@
+#
+# KVM configuration
+#
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ ---help---
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ bool "Kernel-based Virtual Machine (KVM) support"
+ depends on 44x && EXPERIMENTAL
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ # We can only run on Book E hosts so far
+ select KVM_BOOKE_HOST
+ ---help---
+ Support hosting virtualized guest machines. You will also
+ need to select one or more of the processor modules below.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_BOOKE_HOST
+ bool "KVM host support for Book E PowerPC processors"
+ depends on KVM && 44x
+ ---help---
+ Provides host support for KVM on Book E PowerPC processors. Currently
+ this works on 440 processors only.
+
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
new file mode 100644
index 000000000000..d0d358d367ec
--- /dev/null
+++ b/arch/powerpc/kvm/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+
+kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o
+obj-$(CONFIG_KVM) += kvm.o
+
+AFLAGS_booke_interrupts.o := -I$(obj)
+
+kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
+obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
new file mode 100644
index 000000000000..6d9884a6884a
--- /dev/null
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -0,0 +1,615 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputable.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { "exits", VCPU_STAT(sum_exits) },
+ { "mmio", VCPU_STAT(mmio_exits) },
+ { "dcr", VCPU_STAT(dcr_exits) },
+ { "sig", VCPU_STAT(signal_exits) },
+ { "light", VCPU_STAT(light_exits) },
+ { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
+ { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
+ { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
+ { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) },
+ { "sysc", VCPU_STAT(syscall_exits) },
+ { "isi", VCPU_STAT(isi_exits) },
+ { "dsi", VCPU_STAT(dsi_exits) },
+ { "inst_emu", VCPU_STAT(emulated_inst_exits) },
+ { "dec", VCPU_STAT(dec_exits) },
+ { "ext_intr", VCPU_STAT(ext_intr_exits) },
+ { NULL }
+};
+
+static const u32 interrupt_msr_mask[16] = {
+ [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
+ [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
+ [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
+ [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
+};
+
+const unsigned char exception_priority[] = {
+ [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
+ [BOOKE_INTERRUPT_INST_STORAGE] = 1,
+ [BOOKE_INTERRUPT_ALIGNMENT] = 2,
+ [BOOKE_INTERRUPT_PROGRAM] = 3,
+ [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
+ [BOOKE_INTERRUPT_SYSCALL] = 5,
+ [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
+ [BOOKE_INTERRUPT_DTLB_MISS] = 7,
+ [BOOKE_INTERRUPT_ITLB_MISS] = 8,
+ [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
+ [BOOKE_INTERRUPT_DEBUG] = 10,
+ [BOOKE_INTERRUPT_CRITICAL] = 11,
+ [BOOKE_INTERRUPT_WATCHDOG] = 12,
+ [BOOKE_INTERRUPT_EXTERNAL] = 13,
+ [BOOKE_INTERRUPT_FIT] = 14,
+ [BOOKE_INTERRUPT_DECREMENTER] = 15,
+};
+
+const unsigned char priority_exception[] = {
+ BOOKE_INTERRUPT_DATA_STORAGE,
+ BOOKE_INTERRUPT_INST_STORAGE,
+ BOOKE_INTERRUPT_ALIGNMENT,
+ BOOKE_INTERRUPT_PROGRAM,
+ BOOKE_INTERRUPT_FP_UNAVAIL,
+ BOOKE_INTERRUPT_SYSCALL,
+ BOOKE_INTERRUPT_AP_UNAVAIL,
+ BOOKE_INTERRUPT_DTLB_MISS,
+ BOOKE_INTERRUPT_ITLB_MISS,
+ BOOKE_INTERRUPT_MACHINE_CHECK,
+ BOOKE_INTERRUPT_DEBUG,
+ BOOKE_INTERRUPT_CRITICAL,
+ BOOKE_INTERRUPT_WATCHDOG,
+ BOOKE_INTERRUPT_EXTERNAL,
+ BOOKE_INTERRUPT_FIT,
+ BOOKE_INTERRUPT_DECREMENTER,
+};
+
+
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+ struct tlbe *tlbe;
+ int i;
+
+ printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+ printk("| %2s | %3s | %8s | %8s | %8s |\n",
+ "nr", "tid", "word0", "word1", "word2");
+
+ for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+ tlbe = &vcpu->arch.guest_tlb[i];
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ printk(" G%2d | %02X | %08X | %08X | %08X |\n",
+ i, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
+ }
+
+ for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+ tlbe = &vcpu->arch.shadow_tlb[i];
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ printk(" S%2d | %02X | %08X | %08X | %08X |\n",
+ i, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
+ }
+}
+
+/* TODO: use vcpu_printf() */
+void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr);
+ printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
+ printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
+
+ printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
+
+ for (i = 0; i < 32; i += 4) {
+ printk("gpr%02d: %08x %08x %08x %08x\n", i,
+ vcpu->arch.gpr[i],
+ vcpu->arch.gpr[i+1],
+ vcpu->arch.gpr[i+2],
+ vcpu->arch.gpr[i+3]);
+ }
+}
+
+/* Check if we are ready to deliver the interrupt */
+static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+{
+ int r;
+
+ switch (interrupt) {
+ case BOOKE_INTERRUPT_CRITICAL:
+ r = vcpu->arch.msr & MSR_CE;
+ break;
+ case BOOKE_INTERRUPT_MACHINE_CHECK:
+ r = vcpu->arch.msr & MSR_ME;
+ break;
+ case BOOKE_INTERRUPT_EXTERNAL:
+ r = vcpu->arch.msr & MSR_EE;
+ break;
+ case BOOKE_INTERRUPT_DECREMENTER:
+ r = vcpu->arch.msr & MSR_EE;
+ break;
+ case BOOKE_INTERRUPT_FIT:
+ r = vcpu->arch.msr & MSR_EE;
+ break;
+ case BOOKE_INTERRUPT_WATCHDOG:
+ r = vcpu->arch.msr & MSR_CE;
+ break;
+ case BOOKE_INTERRUPT_DEBUG:
+ r = vcpu->arch.msr & MSR_DE;
+ break;
+ default:
+ r = 1;
+ }
+
+ return r;
+}
+
+static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+{
+ switch (interrupt) {
+ case BOOKE_INTERRUPT_DECREMENTER:
+ vcpu->arch.tsr |= TSR_DIS;
+ break;
+ }
+
+ vcpu->arch.srr0 = vcpu->arch.pc;
+ vcpu->arch.srr1 = vcpu->arch.msr;
+ vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
+ kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
+}
+
+/* Check pending exceptions and deliver one, if possible. */
+void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
+{
+ unsigned long *pending = &vcpu->arch.pending_exceptions;
+ unsigned int exception;
+ unsigned int priority;
+
+ priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
+ while (priority <= BOOKE_MAX_INTERRUPT) {
+ exception = priority_exception[priority];
+ if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
+ kvmppc_clear_exception(vcpu, exception);
+ kvmppc_deliver_interrupt(vcpu, exception);
+ break;
+ }
+
+ priority = find_next_bit(pending,
+ BITS_PER_BYTE * sizeof(*pending),
+ priority + 1);
+ }
+}
+
+static int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+ int r;
+
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ /* Future optimization: only reload non-volatiles if they were
+ * actually modified. */
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ /* We must reload nonvolatiles because "update" load/store
+ * instructions modify register state. */
+ /* Future optimization: only reload non-volatiles if they were
+ * actually modified. */
+ r = RESUME_HOST_NV;
+ break;
+ case EMULATE_FAIL:
+ /* XXX Deliver Program interrupt to guest. */
+ printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
+ vcpu->arch.last_inst);
+ r = RESUME_HOST;
+ break;
+ default:
+ BUG();
+ }
+
+ return r;
+}
+
+/**
+ * kvmppc_handle_exit
+ *
+ * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
+ */
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int exit_nr)
+{
+ enum emulation_result er;
+ int r = RESUME_HOST;
+
+ local_irq_enable();
+
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ run->ready_for_interrupt_injection = 1;
+
+ switch (exit_nr) {
+ case BOOKE_INTERRUPT_MACHINE_CHECK:
+ printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
+ kvmppc_dump_vcpu(vcpu);
+ r = RESUME_HOST;
+ break;
+
+ case BOOKE_INTERRUPT_EXTERNAL:
+ case BOOKE_INTERRUPT_DECREMENTER:
+ /* Since we switched IVPR back to the host's value, the host
+ * handled this interrupt the moment we enabled interrupts.
+ * Now we just offer it a chance to reschedule the guest. */
+
+ /* XXX At this point the TLB still holds our shadow TLB, so if
+ * we do reschedule the host will fault over it. Perhaps we
+ * should politely restore the host's entries to minimize
+ * misses before ceding control. */
+ if (need_resched())
+ cond_resched();
+ if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
+ vcpu->stat.dec_exits++;
+ else
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_PROGRAM:
+ if (vcpu->arch.msr & MSR_PR) {
+ /* Program traps generated by user-level software must be handled
+ * by the guest kernel. */
+ vcpu->arch.esr = vcpu->arch.fault_esr;
+ kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ /* Future optimization: only reload non-volatiles if
+ * they were actually modified by emulation. */
+ vcpu->stat.emulated_inst_exits++;
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_DO_DCR:
+ run->exit_reason = KVM_EXIT_DCR;
+ r = RESUME_HOST;
+ break;
+ case EMULATE_FAIL:
+ /* XXX Deliver Program interrupt to guest. */
+ printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
+ __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+ /* For debugging, encode the failing instruction and
+ * report it to userspace. */
+ run->hw.hardware_exit_reason = ~0ULL << 32;
+ run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+ r = RESUME_HOST;
+ break;
+ default:
+ BUG();
+ }
+ break;
+
+ case BOOKE_INTERRUPT_DATA_STORAGE:
+ vcpu->arch.dear = vcpu->arch.fault_dear;
+ vcpu->arch.esr = vcpu->arch.fault_esr;
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->stat.dsi_exits++;
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_INST_STORAGE:
+ vcpu->arch.esr = vcpu->arch.fault_esr;
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->stat.isi_exits++;
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_SYSCALL:
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->stat.syscall_exits++;
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_DTLB_MISS: {
+ struct tlbe *gtlbe;
+ unsigned long eaddr = vcpu->arch.fault_dear;
+ gfn_t gfn;
+
+ /* Check the guest TLB. */
+ gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
+ if (!gtlbe) {
+ /* The guest didn't have a mapping for it. */
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->arch.dear = vcpu->arch.fault_dear;
+ vcpu->arch.esr = vcpu->arch.fault_esr;
+ vcpu->stat.dtlb_real_miss_exits++;
+ r = RESUME_GUEST;
+ break;
+ }
+
+ vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
+ gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
+
+ if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ /* The guest TLB had a mapping, but the shadow TLB
+ * didn't, and it is RAM. This could be because:
+ * a) the entry is mapping the host kernel, or
+ * b) the guest used a large mapping which we're faking
+ * Either way, we need to satisfy the fault without
+ * invoking the guest. */
+ kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+ gtlbe->word2);
+ vcpu->stat.dtlb_virt_miss_exits++;
+ r = RESUME_GUEST;
+ } else {
+ /* Guest has mapped and accessed a page which is not
+ * actually RAM. */
+ r = kvmppc_emulate_mmio(run, vcpu);
+ }
+
+ break;
+ }
+
+ case BOOKE_INTERRUPT_ITLB_MISS: {
+ struct tlbe *gtlbe;
+ unsigned long eaddr = vcpu->arch.pc;
+ gfn_t gfn;
+
+ r = RESUME_GUEST;
+
+ /* Check the guest TLB. */
+ gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
+ if (!gtlbe) {
+ /* The guest didn't have a mapping for it. */
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->stat.itlb_real_miss_exits++;
+ break;
+ }
+
+ vcpu->stat.itlb_virt_miss_exits++;
+
+ gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
+
+ if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ /* The guest TLB had a mapping, but the shadow TLB
+ * didn't. This could be because:
+ * a) the entry is mapping the host kernel, or
+ * b) the guest used a large mapping which we're faking
+ * Either way, we need to satisfy the fault without
+ * invoking the guest. */
+ kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+ gtlbe->word2);
+ } else {
+ /* Guest mapped and leaped at non-RAM! */
+ kvmppc_queue_exception(vcpu,
+ BOOKE_INTERRUPT_MACHINE_CHECK);
+ }
+
+ break;
+ }
+
+ default:
+ printk(KERN_EMERG "exit_nr %d\n", exit_nr);
+ BUG();
+ }
+
+ local_irq_disable();
+
+ kvmppc_check_and_deliver_interrupts(vcpu);
+
+ /* Do some exit accounting. */
+ vcpu->stat.sum_exits++;
+ if (!(r & RESUME_HOST)) {
+ /* To avoid clobbering exit_reason, only check for signals if
+ * we aren't already exiting to userspace for some other
+ * reason. */
+ if (signal_pending(current)) {
+ run->exit_reason = KVM_EXIT_INTR;
+ r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+
+ vcpu->stat.signal_exits++;
+ } else {
+ vcpu->stat.light_exits++;
+ }
+ } else {
+ switch (run->exit_reason) {
+ case KVM_EXIT_MMIO:
+ vcpu->stat.mmio_exits++;
+ break;
+ case KVM_EXIT_DCR:
+ vcpu->stat.dcr_exits++;
+ break;
+ case KVM_EXIT_INTR:
+ vcpu->stat.signal_exits++;
+ break;
+ }
+ }
+
+ return r;
+}
+
+/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
+
+ tlbe->tid = 0;
+ tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+ tlbe->word1 = 0;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+ tlbe++;
+ tlbe->tid = 0;
+ tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+ tlbe->word1 = 0xef600000;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+ | PPC44x_TLB_I | PPC44x_TLB_G;
+
+ vcpu->arch.pc = 0;
+ vcpu->arch.msr = 0;
+ vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
+
+ /* Eye-catching number so we know if the guest takes an interrupt
+ * before it's programmed its own IVPR. */
+ vcpu->arch.ivpr = 0x55550000;
+
+ /* Since the guest can directly access the timebase, it must know the
+ * real timebase frequency. Accordingly, it must see the state of
+ * CCR1[TCS]. */
+ vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ regs->pc = vcpu->arch.pc;
+ regs->cr = vcpu->arch.cr;
+ regs->ctr = vcpu->arch.ctr;
+ regs->lr = vcpu->arch.lr;
+ regs->xer = vcpu->arch.xer;
+ regs->msr = vcpu->arch.msr;
+ regs->srr0 = vcpu->arch.srr0;
+ regs->srr1 = vcpu->arch.srr1;
+ regs->pid = vcpu->arch.pid;
+ regs->sprg0 = vcpu->arch.sprg0;
+ regs->sprg1 = vcpu->arch.sprg1;
+ regs->sprg2 = vcpu->arch.sprg2;
+ regs->sprg3 = vcpu->arch.sprg3;
+ regs->sprg5 = vcpu->arch.sprg4;
+ regs->sprg6 = vcpu->arch.sprg5;
+ regs->sprg7 = vcpu->arch.sprg6;
+
+ for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+ regs->gpr[i] = vcpu->arch.gpr[i];
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ vcpu->arch.pc = regs->pc;
+ vcpu->arch.cr = regs->cr;
+ vcpu->arch.ctr = regs->ctr;
+ vcpu->arch.lr = regs->lr;
+ vcpu->arch.xer = regs->xer;
+ vcpu->arch.msr = regs->msr;
+ vcpu->arch.srr0 = regs->srr0;
+ vcpu->arch.srr1 = regs->srr1;
+ vcpu->arch.sprg0 = regs->sprg0;
+ vcpu->arch.sprg1 = regs->sprg1;
+ vcpu->arch.sprg2 = regs->sprg2;
+ vcpu->arch.sprg3 = regs->sprg3;
+ vcpu->arch.sprg5 = regs->sprg4;
+ vcpu->arch.sprg6 = regs->sprg5;
+ vcpu->arch.sprg7 = regs->sprg6;
+
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
+ vcpu->arch.gpr[i] = regs->gpr[i];
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -ENOTSUPP;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ struct tlbe *gtlbe;
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+ if (index == -1) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ gtlbe = &vcpu->arch.guest_tlb[index];
+
+ tr->physical_address = tlb_xlate(gtlbe, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
new file mode 100644
index 000000000000..b480341bc31e
--- /dev/null
+++ b/arch/powerpc/kvm/booke_host.c
@@ -0,0 +1,83 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/kvm_ppc.h>
+
+unsigned long kvmppc_booke_handlers;
+
+static int kvmppc_booke_init(void)
+{
+ unsigned long ivor[16];
+ unsigned long max_ivor = 0;
+ int i;
+
+ /* We install our own exception handlers by hijacking IVPR. IVPR must
+ * be 16-bit aligned, so we need a 64KB allocation. */
+ kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ VCPU_SIZE_ORDER);
+ if (!kvmppc_booke_handlers)
+ return -ENOMEM;
+
+ /* XXX make sure our handlers are smaller than Linux's */
+
+ /* Copy our interrupt handlers to match host IVORs. That way we don't
+ * have to swap the IVORs on every guest/host transition. */
+ ivor[0] = mfspr(SPRN_IVOR0);
+ ivor[1] = mfspr(SPRN_IVOR1);
+ ivor[2] = mfspr(SPRN_IVOR2);
+ ivor[3] = mfspr(SPRN_IVOR3);
+ ivor[4] = mfspr(SPRN_IVOR4);
+ ivor[5] = mfspr(SPRN_IVOR5);
+ ivor[6] = mfspr(SPRN_IVOR6);
+ ivor[7] = mfspr(SPRN_IVOR7);
+ ivor[8] = mfspr(SPRN_IVOR8);
+ ivor[9] = mfspr(SPRN_IVOR9);
+ ivor[10] = mfspr(SPRN_IVOR10);
+ ivor[11] = mfspr(SPRN_IVOR11);
+ ivor[12] = mfspr(SPRN_IVOR12);
+ ivor[13] = mfspr(SPRN_IVOR13);
+ ivor[14] = mfspr(SPRN_IVOR14);
+ ivor[15] = mfspr(SPRN_IVOR15);
+
+ for (i = 0; i < 16; i++) {
+ if (ivor[i] > max_ivor)
+ max_ivor = ivor[i];
+
+ memcpy((void *)kvmppc_booke_handlers + ivor[i],
+ kvmppc_handlers_start + i * kvmppc_handler_len,
+ kvmppc_handler_len);
+ }
+ flush_icache_range(kvmppc_booke_handlers,
+ kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+ return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvmppc_booke_exit(void)
+{
+ free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+ kvm_exit();
+}
+
+module_init(kvmppc_booke_init)
+module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
new file mode 100644
index 000000000000..3b653b5309b8
--- /dev/null
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -0,0 +1,436 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu-44x.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS)
+
+#define VCPU_GPR(n) (VCPU_GPRS + (n * 4))
+
+/* The host stack layout: */
+#define HOST_R1 0 /* Implied by stwu. */
+#define HOST_CALLEE_LR 4
+#define HOST_RUN 8
+/* r2 is special: it holds 'current', and it made nonvolatile in the
+ * kernel with the -ffixed-r2 gcc option. */
+#define HOST_R2 12
+#define HOST_NV_GPRS 16
+#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4))
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4)
+#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
+#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */
+
+#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
+ (1<<BOOKE_INTERRUPT_DTLB_MISS))
+
+#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
+ (1<<BOOKE_INTERRUPT_DTLB_MISS))
+
+#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
+ (1<<BOOKE_INTERRUPT_INST_STORAGE) | \
+ (1<<BOOKE_INTERRUPT_PROGRAM) | \
+ (1<<BOOKE_INTERRUPT_DTLB_MISS))
+
+.macro KVM_HANDLER ivor_nr
+_GLOBAL(kvmppc_handler_\ivor_nr)
+ /* Get pointer to vcpu and record exit number. */
+ mtspr SPRN_SPRG0, r4
+ mfspr r4, SPRN_SPRG1
+ stw r5, VCPU_GPR(r5)(r4)
+ stw r6, VCPU_GPR(r6)(r4)
+ mfctr r5
+ lis r6, kvmppc_resume_host@h
+ stw r5, VCPU_CTR(r4)
+ li r5, \ivor_nr
+ ori r6, r6, kvmppc_resume_host@l
+ mtctr r6
+ bctr
+.endm
+
+_GLOBAL(kvmppc_handlers_start)
+KVM_HANDLER BOOKE_INTERRUPT_CRITICAL
+KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK
+KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE
+KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE
+KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL
+KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT
+KVM_HANDLER BOOKE_INTERRUPT_PROGRAM
+KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_SYSCALL
+KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER
+KVM_HANDLER BOOKE_INTERRUPT_FIT
+KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
+KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
+KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
+KVM_HANDLER BOOKE_INTERRUPT_DEBUG
+
+_GLOBAL(kvmppc_handler_len)
+ .long kvmppc_handler_1 - kvmppc_handler_0
+
+
+/* Registers:
+ * SPRG0: guest r4
+ * r4: vcpu pointer
+ * r5: KVM exit number
+ */
+_GLOBAL(kvmppc_resume_host)
+ stw r3, VCPU_GPR(r3)(r4)
+ mfcr r3
+ stw r3, VCPU_CR(r4)
+ stw r7, VCPU_GPR(r7)(r4)
+ stw r8, VCPU_GPR(r8)(r4)
+ stw r9, VCPU_GPR(r9)(r4)
+
+ li r6, 1
+ slw r6, r6, r5
+
+ /* Save the faulting instruction and all GPRs for emulation. */
+ andi. r7, r6, NEED_INST_MASK
+ beq ..skip_inst_copy
+ mfspr r9, SPRN_SRR0
+ mfmsr r8
+ ori r7, r8, MSR_DS
+ mtmsr r7
+ isync
+ lwz r9, 0(r9)
+ mtmsr r8
+ isync
+ stw r9, VCPU_LAST_INST(r4)
+
+ stw r15, VCPU_GPR(r15)(r4)
+ stw r16, VCPU_GPR(r16)(r4)
+ stw r17, VCPU_GPR(r17)(r4)
+ stw r18, VCPU_GPR(r18)(r4)
+ stw r19, VCPU_GPR(r19)(r4)
+ stw r20, VCPU_GPR(r20)(r4)
+ stw r21, VCPU_GPR(r21)(r4)
+ stw r22, VCPU_GPR(r22)(r4)
+ stw r23, VCPU_GPR(r23)(r4)
+ stw r24, VCPU_GPR(r24)(r4)
+ stw r25, VCPU_GPR(r25)(r4)
+ stw r26, VCPU_GPR(r26)(r4)
+ stw r27, VCPU_GPR(r27)(r4)
+ stw r28, VCPU_GPR(r28)(r4)
+ stw r29, VCPU_GPR(r29)(r4)
+ stw r30, VCPU_GPR(r30)(r4)
+ stw r31, VCPU_GPR(r31)(r4)
+..skip_inst_copy:
+
+ /* Also grab DEAR and ESR before the host can clobber them. */
+
+ andi. r7, r6, NEED_DEAR_MASK
+ beq ..skip_dear
+ mfspr r9, SPRN_DEAR
+ stw r9, VCPU_FAULT_DEAR(r4)
+..skip_dear:
+
+ andi. r7, r6, NEED_ESR_MASK
+ beq ..skip_esr
+ mfspr r9, SPRN_ESR
+ stw r9, VCPU_FAULT_ESR(r4)
+..skip_esr:
+
+ /* Save remaining volatile guest register state to vcpu. */
+ stw r0, VCPU_GPR(r0)(r4)
+ stw r1, VCPU_GPR(r1)(r4)
+ stw r2, VCPU_GPR(r2)(r4)
+ stw r10, VCPU_GPR(r10)(r4)
+ stw r11, VCPU_GPR(r11)(r4)
+ stw r12, VCPU_GPR(r12)(r4)
+ stw r13, VCPU_GPR(r13)(r4)
+ stw r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */
+ mflr r3
+ stw r3, VCPU_LR(r4)
+ mfxer r3
+ stw r3, VCPU_XER(r4)
+ mfspr r3, SPRN_SPRG0
+ stw r3, VCPU_GPR(r4)(r4)
+ mfspr r3, SPRN_SRR0
+ stw r3, VCPU_PC(r4)
+
+ /* Restore host stack pointer and PID before IVPR, since the host
+ * exception handlers use them. */
+ lwz r1, VCPU_HOST_STACK(r4)
+ lwz r3, VCPU_HOST_PID(r4)
+ mtspr SPRN_PID, r3
+
+ /* Restore host IVPR before re-enabling interrupts. We cheat and know
+ * that Linux IVPR is always 0xc0000000. */
+ lis r3, 0xc000
+ mtspr SPRN_IVPR, r3
+
+ /* Switch to kernel stack and jump to handler. */
+ LOAD_REG_ADDR(r3, kvmppc_handle_exit)
+ mtctr r3
+ lwz r3, HOST_RUN(r1)
+ lwz r2, HOST_R2(r1)
+ mr r14, r4 /* Save vcpu pointer. */
+
+ bctrl /* kvmppc_handle_exit() */
+
+ /* Restore vcpu pointer and the nonvolatiles we used. */
+ mr r4, r14
+ lwz r14, VCPU_GPR(r14)(r4)
+
+ /* Sometimes instruction emulation must restore complete GPR state. */
+ andi. r5, r3, RESUME_FLAG_NV
+ beq ..skip_nv_load
+ lwz r15, VCPU_GPR(r15)(r4)
+ lwz r16, VCPU_GPR(r16)(r4)
+ lwz r17, VCPU_GPR(r17)(r4)
+ lwz r18, VCPU_GPR(r18)(r4)
+ lwz r19, VCPU_GPR(r19)(r4)
+ lwz r20, VCPU_GPR(r20)(r4)
+ lwz r21, VCPU_GPR(r21)(r4)
+ lwz r22, VCPU_GPR(r22)(r4)
+ lwz r23, VCPU_GPR(r23)(r4)
+ lwz r24, VCPU_GPR(r24)(r4)
+ lwz r25, VCPU_GPR(r25)(r4)
+ lwz r26, VCPU_GPR(r26)(r4)
+ lwz r27, VCPU_GPR(r27)(r4)
+ lwz r28, VCPU_GPR(r28)(r4)
+ lwz r29, VCPU_GPR(r29)(r4)
+ lwz r30, VCPU_GPR(r30)(r4)
+ lwz r31, VCPU_GPR(r31)(r4)
+..skip_nv_load:
+
+ /* Should we return to the guest? */
+ andi. r5, r3, RESUME_FLAG_HOST
+ beq lightweight_exit
+
+ srawi r3, r3, 2 /* Shift -ERR back down. */
+
+heavyweight_exit:
+ /* Not returning to guest. */
+
+ /* We already saved guest volatile register state; now save the
+ * non-volatiles. */
+ stw r15, VCPU_GPR(r15)(r4)
+ stw r16, VCPU_GPR(r16)(r4)
+ stw r17, VCPU_GPR(r17)(r4)
+ stw r18, VCPU_GPR(r18)(r4)
+ stw r19, VCPU_GPR(r19)(r4)
+ stw r20, VCPU_GPR(r20)(r4)
+ stw r21, VCPU_GPR(r21)(r4)
+ stw r22, VCPU_GPR(r22)(r4)
+ stw r23, VCPU_GPR(r23)(r4)
+ stw r24, VCPU_GPR(r24)(r4)
+ stw r25, VCPU_GPR(r25)(r4)
+ stw r26, VCPU_GPR(r26)(r4)
+ stw r27, VCPU_GPR(r27)(r4)
+ stw r28, VCPU_GPR(r28)(r4)
+ stw r29, VCPU_GPR(r29)(r4)
+ stw r30, VCPU_GPR(r30)(r4)
+ stw r31, VCPU_GPR(r31)(r4)
+
+ /* Load host non-volatile register state from host stack. */
+ lwz r14, HOST_NV_GPR(r14)(r1)
+ lwz r15, HOST_NV_GPR(r15)(r1)
+ lwz r16, HOST_NV_GPR(r16)(r1)
+ lwz r17, HOST_NV_GPR(r17)(r1)
+ lwz r18, HOST_NV_GPR(r18)(r1)
+ lwz r19, HOST_NV_GPR(r19)(r1)
+ lwz r20, HOST_NV_GPR(r20)(r1)
+ lwz r21, HOST_NV_GPR(r21)(r1)
+ lwz r22, HOST_NV_GPR(r22)(r1)
+ lwz r23, HOST_NV_GPR(r23)(r1)
+ lwz r24, HOST_NV_GPR(r24)(r1)
+ lwz r25, HOST_NV_GPR(r25)(r1)
+ lwz r26, HOST_NV_GPR(r26)(r1)
+ lwz r27, HOST_NV_GPR(r27)(r1)
+ lwz r28, HOST_NV_GPR(r28)(r1)
+ lwz r29, HOST_NV_GPR(r29)(r1)
+ lwz r30, HOST_NV_GPR(r30)(r1)
+ lwz r31, HOST_NV_GPR(r31)(r1)
+
+ /* Return to kvm_vcpu_run(). */
+ lwz r4, HOST_STACK_LR(r1)
+ addi r1, r1, HOST_STACK_SIZE
+ mtlr r4
+ /* r3 still contains the return code from kvmppc_handle_exit(). */
+ blr
+
+
+/* Registers:
+ * r3: kvm_run pointer
+ * r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+ stwu r1, -HOST_STACK_SIZE(r1)
+ stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+
+ /* Save host state to stack. */
+ stw r3, HOST_RUN(r1)
+ mflr r3
+ stw r3, HOST_STACK_LR(r1)
+
+ /* Save host non-volatile register state to stack. */
+ stw r14, HOST_NV_GPR(r14)(r1)
+ stw r15, HOST_NV_GPR(r15)(r1)
+ stw r16, HOST_NV_GPR(r16)(r1)
+ stw r17, HOST_NV_GPR(r17)(r1)
+ stw r18, HOST_NV_GPR(r18)(r1)
+ stw r19, HOST_NV_GPR(r19)(r1)
+ stw r20, HOST_NV_GPR(r20)(r1)
+ stw r21, HOST_NV_GPR(r21)(r1)
+ stw r22, HOST_NV_GPR(r22)(r1)
+ stw r23, HOST_NV_GPR(r23)(r1)
+ stw r24, HOST_NV_GPR(r24)(r1)
+ stw r25, HOST_NV_GPR(r25)(r1)
+ stw r26, HOST_NV_GPR(r26)(r1)
+ stw r27, HOST_NV_GPR(r27)(r1)
+ stw r28, HOST_NV_GPR(r28)(r1)
+ stw r29, HOST_NV_GPR(r29)(r1)
+ stw r30, HOST_NV_GPR(r30)(r1)
+ stw r31, HOST_NV_GPR(r31)(r1)
+
+ /* Load guest non-volatiles. */
+ lwz r14, VCPU_GPR(r14)(r4)
+ lwz r15, VCPU_GPR(r15)(r4)
+ lwz r16, VCPU_GPR(r16)(r4)
+ lwz r17, VCPU_GPR(r17)(r4)
+ lwz r18, VCPU_GPR(r18)(r4)
+ lwz r19, VCPU_GPR(r19)(r4)
+ lwz r20, VCPU_GPR(r20)(r4)
+ lwz r21, VCPU_GPR(r21)(r4)
+ lwz r22, VCPU_GPR(r22)(r4)
+ lwz r23, VCPU_GPR(r23)(r4)
+ lwz r24, VCPU_GPR(r24)(r4)
+ lwz r25, VCPU_GPR(r25)(r4)
+ lwz r26, VCPU_GPR(r26)(r4)
+ lwz r27, VCPU_GPR(r27)(r4)
+ lwz r28, VCPU_GPR(r28)(r4)
+ lwz r29, VCPU_GPR(r29)(r4)
+ lwz r30, VCPU_GPR(r30)(r4)
+ lwz r31, VCPU_GPR(r31)(r4)
+
+lightweight_exit:
+ stw r2, HOST_R2(r1)
+
+ mfspr r3, SPRN_PID
+ stw r3, VCPU_HOST_PID(r4)
+ lwz r3, VCPU_PID(r4)
+ mtspr SPRN_PID, r3
+
+ /* Prevent all TLB updates. */
+ mfmsr r5
+ lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
+ ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
+ andc r6, r5, r6
+ mtmsr r6
+
+ /* Save the host's non-pinned TLB mappings, and load the guest mappings
+ * over them. Leave the host's "pinned" kernel mappings in place. */
+ /* XXX optimization: use generation count to avoid swapping unmodified
+ * entries. */
+ mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
+ lis r8, tlb_44x_hwater@ha
+ lwz r8, tlb_44x_hwater@l(r8)
+ addi r3, r4, VCPU_HOST_TLB - 4
+ addi r9, r4, VCPU_SHADOW_TLB - 4
+ li r6, 0
+1:
+ /* Save host entry. */
+ tlbre r7, r6, PPC44x_TLB_PAGEID
+ mfspr r5, SPRN_MMUCR
+ stwu r5, 4(r3)
+ stwu r7, 4(r3)
+ tlbre r7, r6, PPC44x_TLB_XLAT
+ stwu r7, 4(r3)
+ tlbre r7, r6, PPC44x_TLB_ATTRIB
+ stwu r7, 4(r3)
+ /* Load guest entry. */
+ lwzu r7, 4(r9)
+ mtspr SPRN_MMUCR, r7
+ lwzu r7, 4(r9)
+ tlbwe r7, r6, PPC44x_TLB_PAGEID
+ lwzu r7, 4(r9)
+ tlbwe r7, r6, PPC44x_TLB_XLAT
+ lwzu r7, 4(r9)
+ tlbwe r7, r6, PPC44x_TLB_ATTRIB
+ /* Increment index. */
+ addi r6, r6, 1
+ cmpw r6, r8
+ blt 1b
+ mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
+
+ iccci 0, 0 /* XXX hack */
+
+ /* Load some guest volatiles. */
+ lwz r0, VCPU_GPR(r0)(r4)
+ lwz r2, VCPU_GPR(r2)(r4)
+ lwz r9, VCPU_GPR(r9)(r4)
+ lwz r10, VCPU_GPR(r10)(r4)
+ lwz r11, VCPU_GPR(r11)(r4)
+ lwz r12, VCPU_GPR(r12)(r4)
+ lwz r13, VCPU_GPR(r13)(r4)
+ lwz r3, VCPU_LR(r4)
+ mtlr r3
+ lwz r3, VCPU_XER(r4)
+ mtxer r3
+
+ /* Switch the IVPR. XXX If we take a TLB miss after this we're screwed,
+ * so how do we make sure vcpu won't fault? */
+ lis r8, kvmppc_booke_handlers@ha
+ lwz r8, kvmppc_booke_handlers@l(r8)
+ mtspr SPRN_IVPR, r8
+
+ /* Save vcpu pointer for the exception handlers. */
+ mtspr SPRN_SPRG1, r4
+
+ /* Can't switch the stack pointer until after IVPR is switched,
+ * because host interrupt handlers would get confused. */
+ lwz r1, VCPU_GPR(r1)(r4)
+
+ /* XXX handle USPRG0 */
+ /* Host interrupt handlers may have clobbered these guest-readable
+ * SPRGs, so we need to reload them here with the guest's values. */
+ lwz r3, VCPU_SPRG4(r4)
+ mtspr SPRN_SPRG4, r3
+ lwz r3, VCPU_SPRG5(r4)
+ mtspr SPRN_SPRG5, r3
+ lwz r3, VCPU_SPRG6(r4)
+ mtspr SPRN_SPRG6, r3
+ lwz r3, VCPU_SPRG7(r4)
+ mtspr SPRN_SPRG7, r3
+
+ /* Finish loading guest volatiles and jump to guest. */
+ lwz r3, VCPU_CTR(r4)
+ mtctr r3
+ lwz r3, VCPU_CR(r4)
+ mtcr r3
+ lwz r5, VCPU_GPR(r5)(r4)
+ lwz r6, VCPU_GPR(r6)(r4)
+ lwz r7, VCPU_GPR(r7)(r4)
+ lwz r8, VCPU_GPR(r8)(r4)
+ lwz r3, VCPU_PC(r4)
+ mtsrr0 r3
+ lwz r3, VCPU_MSR(r4)
+ oris r3, r3, KVMPPC_MSR_MASK@h
+ ori r3, r3, KVMPPC_MSR_MASK@l
+ mtsrr1 r3
+ lwz r3, VCPU_GPR(r3)(r4)
+ lwz r4, VCPU_GPR(r4)(r4)
+ rfi
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
new file mode 100644
index 000000000000..a03fe0c80698
--- /dev/null
+++ b/arch/powerpc/kvm/emulate.c
@@ -0,0 +1,760 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/time.h>
+#include <asm/byteorder.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+/* Instruction decoding */
+static inline unsigned int get_op(u32 inst)
+{
+ return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+ return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+ return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+ return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+ return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+ return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+ return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+ return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+ return inst & 0xffff;
+}
+
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct tlbe *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+ return 0;
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
+{
+ u64 eaddr;
+ u64 raddr;
+ u64 asid;
+ u32 flags;
+ struct tlbe *tlbe;
+ unsigned int ra;
+ unsigned int rs;
+ unsigned int ws;
+ unsigned int index;
+
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ ws = get_ws(inst);
+
+ index = vcpu->arch.gpr[ra];
+ if (index > PPC44x_TLB_SIZE) {
+ printk("%s: index %d\n", __func__, index);
+ kvmppc_dump_vcpu(vcpu);
+ return EMULATE_FAIL;
+ }
+
+ tlbe = &vcpu->arch.guest_tlb[index];
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+ if (tlbe->word0 & PPC44x_TLB_VALID) {
+ eaddr = get_tlb_eaddr(tlbe);
+ asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+ kvmppc_mmu_invalidate(vcpu, eaddr, asid);
+ }
+
+ switch (ws) {
+ case PPC44x_TLB_PAGEID:
+ tlbe->tid = vcpu->arch.mmucr & 0xff;
+ tlbe->word0 = vcpu->arch.gpr[rs];
+ break;
+
+ case PPC44x_TLB_XLAT:
+ tlbe->word1 = vcpu->arch.gpr[rs];
+ break;
+
+ case PPC44x_TLB_ATTRIB:
+ tlbe->word2 = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ if (tlbe_is_host_safe(vcpu, tlbe)) {
+ eaddr = get_tlb_eaddr(tlbe);
+ raddr = get_tlb_raddr(tlbe);
+ asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+ flags = tlbe->word2 & 0xffff;
+
+ /* Create a 4KB mapping on the host. If the guest wanted a
+ * large page, only the first 4KB is mapped here and the rest
+ * are mapped on the fly. */
+ kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
+ }
+
+ return EMULATE_DONE;
+}
+
+static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.tcr & TCR_DIE) {
+ /* The decrementer ticks at the same rate as the timebase, so
+ * that's how we convert the guest DEC value to the number of
+ * host ticks. */
+ unsigned long nr_jiffies;
+
+ nr_jiffies = vcpu->arch.dec / tb_ticks_per_jiffy;
+ mod_timer(&vcpu->arch.dec_timer,
+ get_jiffies_64() + nr_jiffies);
+ } else {
+ del_timer(&vcpu->arch.dec_timer);
+ }
+}
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+/* XXX to do:
+ * lhax
+ * lhaux
+ * lswx
+ * lswi
+ * stswx
+ * stswi
+ * lha
+ * lhau
+ * lmw
+ * stmw
+ *
+ * XXX is_bigendian should depend on MMU mapping or MSR[LE]
+ */
+int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ u32 inst = vcpu->arch.last_inst;
+ u32 ea;
+ int ra;
+ int rb;
+ int rc;
+ int rs;
+ int rt;
+ int sprn;
+ int dcrn;
+ enum emulation_result emulated = EMULATE_DONE;
+ int advance = 1;
+
+ switch (get_op(inst)) {
+ case 3: /* trap */
+ printk("trap!\n");
+ kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+ advance = 0;
+ break;
+
+ case 19:
+ switch (get_xop(inst)) {
+ case 50: /* rfi */
+ kvmppc_emul_rfi(vcpu);
+ advance = 0;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
+ case 31:
+ switch (get_xop(inst)) {
+
+ case 83: /* mfmsr */
+ rt = get_rt(inst);
+ vcpu->arch.gpr[rt] = vcpu->arch.msr;
+ break;
+
+ case 87: /* lbzx */
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+ break;
+
+ case 131: /* wrtee */
+ rs = get_rs(inst);
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (vcpu->arch.gpr[rs] & MSR_EE);
+ break;
+
+ case 146: /* mtmsr */
+ rs = get_rs(inst);
+ kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+ break;
+
+ case 163: /* wrteei */
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (inst & MSR_EE);
+ break;
+
+ case 215: /* stbx */
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu,
+ vcpu->arch.gpr[rs],
+ 1, 1);
+ break;
+
+ case 247: /* stbux */
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ ea = vcpu->arch.gpr[rb];
+ if (ra)
+ ea += vcpu->arch.gpr[ra];
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ vcpu->arch.gpr[rs],
+ 1, 1);
+ vcpu->arch.gpr[rs] = ea;
+ break;
+
+ case 279: /* lhzx */
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+ break;
+
+ case 311: /* lhzux */
+ rt = get_rt(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ ea = vcpu->arch.gpr[rb];
+ if (ra)
+ ea += vcpu->arch.gpr[ra];
+
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+ vcpu->arch.gpr[ra] = ea;
+ break;
+
+ case 323: /* mfdcr */
+ dcrn = get_dcrn(inst);
+ rt = get_rt(inst);
+
+ /* The guest may access CPR0 registers to determine the timebase
+ * frequency, and it must know the real host frequency because it
+ * can directly access the timebase registers.
+ *
+ * It would be possible to emulate those accesses in userspace,
+ * but userspace can really only figure out the end frequency.
+ * We could decompose that into the factors that compute it, but
+ * that's tricky math, and it's easier to just report the real
+ * CPR0 values.
+ */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+ break;
+ case DCRN_CPR0_CONFIG_DATA:
+ local_irq_disable();
+ mtdcr(DCRN_CPR0_CONFIG_ADDR,
+ vcpu->arch.cpr0_cfgaddr);
+ vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+ local_irq_enable();
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = 0;
+ run->dcr.is_write = 0;
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.dcr_needed = 1;
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
+ case 339: /* mfspr */
+ sprn = get_sprn(inst);
+ rt = get_rt(inst);
+
+ switch (sprn) {
+ case SPRN_SRR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
+ case SPRN_SRR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
+ case SPRN_MMUCR:
+ vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+ case SPRN_PID:
+ vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+ case SPRN_IVPR:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+ case SPRN_CCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+ case SPRN_CCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+ case SPRN_PVR:
+ vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
+ case SPRN_DEAR:
+ vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+ case SPRN_ESR:
+ vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+ case SPRN_DBCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+ case SPRN_DBCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+
+ /* Note: mftb and TBRL/TBWL are user-accessible, so
+ * the guest can always access the real TB anyways.
+ * In fact, we probably will never see these traps. */
+ case SPRN_TBWL:
+ vcpu->arch.gpr[rt] = mftbl(); break;
+ case SPRN_TBWU:
+ vcpu->arch.gpr[rt] = mftbu(); break;
+
+ case SPRN_SPRG0:
+ vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
+ case SPRN_SPRG1:
+ vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break;
+ case SPRN_SPRG2:
+ vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break;
+ case SPRN_SPRG3:
+ vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break;
+ /* Note: SPRG4-7 are user-readable, so we don't get
+ * a trap. */
+
+ case SPRN_IVOR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
+ case SPRN_IVOR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
+ case SPRN_IVOR2:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
+ case SPRN_IVOR3:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
+ case SPRN_IVOR4:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
+ case SPRN_IVOR5:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
+ case SPRN_IVOR6:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
+ case SPRN_IVOR7:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
+ case SPRN_IVOR8:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
+ case SPRN_IVOR9:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
+ case SPRN_IVOR10:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
+ case SPRN_IVOR11:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
+ case SPRN_IVOR12:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
+ case SPRN_IVOR13:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
+ case SPRN_IVOR14:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
+ case SPRN_IVOR15:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
+
+ default:
+ printk("mfspr: unknown spr %x\n", sprn);
+ vcpu->arch.gpr[rt] = 0;
+ break;
+ }
+ break;
+
+ case 407: /* sthx */
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ vcpu->arch.gpr[rs],
+ 2, 1);
+ break;
+
+ case 439: /* sthux */
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ ea = vcpu->arch.gpr[rb];
+ if (ra)
+ ea += vcpu->arch.gpr[ra];
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ vcpu->arch.gpr[rs],
+ 2, 1);
+ vcpu->arch.gpr[ra] = ea;
+ break;
+
+ case 451: /* mtdcr */
+ dcrn = get_dcrn(inst);
+ rs = get_rs(inst);
+
+ /* emulate some access in kernel */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = vcpu->arch.gpr[rs];
+ run->dcr.is_write = 1;
+ vcpu->arch.dcr_needed = 1;
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
+ case 467: /* mtspr */
+ sprn = get_sprn(inst);
+ rs = get_rs(inst);
+ switch (sprn) {
+ case SPRN_SRR0:
+ vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SRR1:
+ vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MMUCR:
+ vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+ case SPRN_PID:
+ vcpu->arch.pid = vcpu->arch.gpr[rs]; break;
+ case SPRN_CCR0:
+ vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_CCR1:
+ vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DEAR:
+ vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+ case SPRN_ESR:
+ vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR0:
+ vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR1:
+ vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+
+ /* XXX We need to context-switch the timebase for
+ * watchdog and FIT. */
+ case SPRN_TBWL: break;
+ case SPRN_TBWU: break;
+
+ case SPRN_DEC:
+ vcpu->arch.dec = vcpu->arch.gpr[rs];
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ case SPRN_TSR:
+ vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+
+ case SPRN_TCR:
+ vcpu->arch.tcr = vcpu->arch.gpr[rs];
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ case SPRN_SPRG0:
+ vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG1:
+ vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG2:
+ vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG3:
+ vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
+
+ /* Note: SPRG4-7 are user-readable. These values are
+ * loaded into the real SPRGs when resuming the
+ * guest. */
+ case SPRN_SPRG4:
+ vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG5:
+ vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG6:
+ vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG7:
+ vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+ case SPRN_IVPR:
+ vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR0:
+ vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR1:
+ vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR2:
+ vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR3:
+ vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR4:
+ vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR5:
+ vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR6:
+ vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR7:
+ vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR8:
+ vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR9:
+ vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR10:
+ vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR11:
+ vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR12:
+ vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR13:
+ vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR14:
+ vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR15:
+ vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
+
+ default:
+ printk("mtspr: unknown spr %x\n", sprn);
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
+ case 470: /* dcbi */
+ /* Do nothing. The guest is performing dcbi because
+ * hardware DMA is not snooped by the dcache, but
+ * emulated DMA either goes through the dcache as
+ * normal writes, or the host kernel has handled dcache
+ * coherence. */
+ break;
+
+ case 534: /* lwbrx */
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
+ break;
+
+ case 566: /* tlbsync */
+ break;
+
+ case 662: /* stwbrx */
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ vcpu->arch.gpr[rs],
+ 4, 0);
+ break;
+
+ case 978: /* tlbwe */
+ emulated = kvmppc_emul_tlbwe(vcpu, inst);
+ break;
+
+ case 914: { /* tlbsx */
+ int index;
+ unsigned int as = get_mmucr_sts(vcpu);
+ unsigned int pid = get_mmucr_stid(vcpu);
+
+ rt = get_rt(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+ rc = get_rc(inst);
+
+ ea = vcpu->arch.gpr[rb];
+ if (ra)
+ ea += vcpu->arch.gpr[ra];
+
+ index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+ if (rc) {
+ if (index < 0)
+ vcpu->arch.cr &= ~0x20000000;
+ else
+ vcpu->arch.cr |= 0x20000000;
+ }
+ vcpu->arch.gpr[rt] = index;
+
+ }
+ break;
+
+ case 790: /* lhbrx */
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
+ break;
+
+ case 918: /* sthbrx */
+ rs = get_rs(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ vcpu->arch.gpr[rs],
+ 2, 0);
+ break;
+
+ case 966: /* iccci */
+ break;
+
+ default:
+ printk("unknown: op %d xop %d\n", get_op(inst),
+ get_xop(inst));
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
+ case 32: /* lwz */
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+ break;
+
+ case 33: /* lwzu */
+ ra = get_ra(inst);
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+ vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ break;
+
+ case 34: /* lbz */
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+ break;
+
+ case 35: /* lbzu */
+ ra = get_ra(inst);
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+ vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ break;
+
+ case 36: /* stw */
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ 4, 1);
+ break;
+
+ case 37: /* stwu */
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ 4, 1);
+ vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ break;
+
+ case 38: /* stb */
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ 1, 1);
+ break;
+
+ case 39: /* stbu */
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ 1, 1);
+ vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ break;
+
+ case 40: /* lhz */
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+ break;
+
+ case 41: /* lhzu */
+ ra = get_ra(inst);
+ rt = get_rt(inst);
+ emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+ vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ break;
+
+ case 44: /* sth */
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ 2, 1);
+ break;
+
+ case 45: /* sthu */
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ 2, 1);
+ vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ break;
+
+ default:
+ printk("unknown op %d\n", get_op(inst));
+ emulated = EMULATE_FAIL;
+ break;
+ }
+
+ if (advance)
+ vcpu->arch.pc += 4; /* Advance past emulated instruction. */
+
+ return emulated;
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
new file mode 100644
index 000000000000..bad40bd2d3ac
--- /dev/null
+++ b/arch/powerpc/kvm/powerpc.c
@@ -0,0 +1,436 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputable.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+ return gfn;
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
+{
+ /* XXX implement me */
+ return 0;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+ return 1;
+}
+
+
+int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+ int r;
+
+ er = kvmppc_emulate_instruction(run, vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ /* Future optimization: only reload non-volatiles if they were
+ * actually modified. */
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ /* We must reload nonvolatiles because "update" load/store
+ * instructions modify register state. */
+ /* Future optimization: only reload non-volatiles if they were
+ * actually modified. */
+ r = RESUME_HOST_NV;
+ break;
+ case EMULATE_FAIL:
+ /* XXX Deliver Program interrupt to guest. */
+ printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
+ vcpu->arch.last_inst);
+ r = RESUME_HOST;
+ break;
+ default:
+ BUG();
+ }
+
+ return r;
+}
+
+void kvm_arch_hardware_enable(void *garbage)
+{
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ *(int *)rtn = r;
+}
+
+struct kvm *kvm_arch_create_vm(void)
+{
+ struct kvm *kvm;
+
+ kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+ if (!kvm)
+ return ERR_PTR(-ENOMEM);
+
+ return kvm;
+}
+
+static void kvmppc_free_vcpus(struct kvm *kvm)
+{
+ unsigned int i;
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (kvm->vcpus[i]) {
+ kvm_arch_vcpu_free(kvm->vcpus[i]);
+ kvm->vcpus[i] = NULL;
+ }
+ }
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ kvmppc_free_vcpus(kvm);
+ kvm_free_physmem(kvm);
+ kfree(kvm);
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_USER_MEMORY:
+ r = 1;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+ return r;
+
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ return 0;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ return vcpu;
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+ return ERR_PTR(err);
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ kvm_arch_vcpu_free(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
+
+ return test_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+static void kvmppc_decrementer_func(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+ kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func,
+ (unsigned long)vcpu);
+
+ return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg)
+{
+ return -ENOTSUPP;
+}
+
+static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+ *gpr = run->dcr.data;
+}
+
+static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+
+ if (run->mmio.len > sizeof(*gpr)) {
+ printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
+ return;
+ }
+
+ if (vcpu->arch.mmio_is_bigendian) {
+ switch (run->mmio.len) {
+ case 4: *gpr = *(u32 *)run->mmio.data; break;
+ case 2: *gpr = *(u16 *)run->mmio.data; break;
+ case 1: *gpr = *(u8 *)run->mmio.data; break;
+ }
+ } else {
+ /* Convert BE data from userland back to LE. */
+ switch (run->mmio.len) {
+ case 4: *gpr = ld_le32((u32 *)run->mmio.data); break;
+ case 2: *gpr = ld_le16((u16 *)run->mmio.data); break;
+ case 1: *gpr = *(u8 *)run->mmio.data; break;
+ }
+ }
+}
+
+int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes, int is_bigendian)
+{
+ if (bytes > sizeof(run->mmio.data)) {
+ printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
+ run->mmio.len);
+ }
+
+ run->mmio.phys_addr = vcpu->arch.paddr_accessed;
+ run->mmio.len = bytes;
+ run->mmio.is_write = 0;
+
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.mmio_is_bigendian = is_bigendian;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_is_write = 0;
+
+ return EMULATE_DO_MMIO;
+}
+
+int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ u32 val, unsigned int bytes, int is_bigendian)
+{
+ void *data = run->mmio.data;
+
+ if (bytes > sizeof(run->mmio.data)) {
+ printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
+ run->mmio.len);
+ }
+
+ run->mmio.phys_addr = vcpu->arch.paddr_accessed;
+ run->mmio.len = bytes;
+ run->mmio.is_write = 1;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_is_write = 1;
+
+ /* Store the value at the lowest bytes in 'data'. */
+ if (is_bigendian) {
+ switch (bytes) {
+ case 4: *(u32 *)data = val; break;
+ case 2: *(u16 *)data = val; break;
+ case 1: *(u8 *)data = val; break;
+ }
+ } else {
+ /* Store LE value into 'data'. */
+ switch (bytes) {
+ case 4: st_le32(data, val); break;
+ case 2: st_le16(data, val); break;
+ case 1: *(u8 *)data = val; break;
+ }
+ }
+
+ return EMULATE_DO_MMIO;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int r;
+ sigset_t sigsaved;
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+ if (vcpu->mmio_needed) {
+ if (!vcpu->mmio_is_write)
+ kvmppc_complete_mmio_load(vcpu, run);
+ vcpu->mmio_needed = 0;
+ } else if (vcpu->arch.dcr_needed) {
+ if (!vcpu->arch.dcr_is_write)
+ kvmppc_complete_dcr_load(vcpu, run);
+ vcpu->arch.dcr_needed = 0;
+ }
+
+ kvmppc_check_and_deliver_interrupts(vcpu);
+
+ local_irq_disable();
+ kvm_guest_enter();
+ r = __kvmppc_vcpu_run(run, vcpu);
+ kvm_guest_exit();
+ local_irq_enable();
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
+{
+ kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r;
+
+ switch (ioctl) {
+ case KVM_INTERRUPT: {
+ struct kvm_interrupt irq;
+ r = -EFAULT;
+ if (copy_from_user(&irq, argp, sizeof(irq)))
+ goto out;
+ r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+ break;
+ }
+ default:
+ r = -EINVAL;
+ }
+
+out:
+ return r;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+ return -ENOTSUPP;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ long r;
+
+ switch (ioctl) {
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+int kvm_arch_init(void *opaque)
+{
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 5ccb579b81e4..f67e118116fa 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -110,15 +110,6 @@ EXPORT_SYMBOL(phys_mem_access_prot);
#ifdef CONFIG_MEMORY_HOTPLUG
-void online_page(struct page *page)
-{
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalram_pages++;
- num_physpages++;
-}
-
#ifdef CONFIG_NUMA
int memory_add_physaddr_to_nid(u64 start)
{
@@ -163,19 +154,35 @@ out:
/*
* walk_memory_resource() needs to make sure there is no holes in a given
- * memory range. On PPC64, since this range comes from /sysfs, the range
- * is guaranteed to be valid, non-overlapping and can not contain any
- * holes. By the time we get here (memory add or remove), /proc/device-tree
- * is updated and correct. Only reason we need to check against device-tree
- * would be if we allow user-land to specify a memory range through a
- * system call/ioctl etc. instead of doing offline/online through /sysfs.
+ * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
+ * Instead it maintains it in lmb.memory structures. Walk through the
+ * memory regions, find holes and callback for contiguous regions.
*/
int
walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
int (*func)(unsigned long, unsigned long, void *))
{
- return (*func)(start_pfn, nr_pages, arg);
+ struct lmb_property res;
+ unsigned long pfn, len;
+ u64 end;
+ int ret = -1;
+
+ res.base = (u64) start_pfn << PAGE_SHIFT;
+ res.size = (u64) nr_pages << PAGE_SHIFT;
+
+ end = res.base + res.size - 1;
+ while ((res.base < end) && (lmb_find(&res) >= 0)) {
+ pfn = (unsigned long)(res.base >> PAGE_SHIFT);
+ len = (unsigned long)(res.size >> PAGE_SHIFT);
+ ret = (*func)(pfn, len, arg);
+ if (ret)
+ break;
+ res.base += (res.size + 1);
+ res.size = (end - res.base + 1);
+ }
+ return ret;
}
+EXPORT_SYMBOL_GPL(walk_memory_resource);
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 7442c58d44f5..053f49a1dcae 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -8,6 +8,7 @@ config MPC8641_HPCN
select PPC_I8259
select DEFAULT_UIMAGE
select FSL_ULI1575
+ select HAS_RAPIDIO
help
This option enables support for the MPC8641 HPCN board.
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 18b8ebe930d5..5e1e8cf14e75 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -3,11 +3,12 @@
*
* Initial author: Xianghua Xiao <x.xiao@freescale.com>
* Recode: Jason Jin <jason.jin@freescale.com>
+ * York Sun <yorksun@freescale.com>
*
* Rewrite the interrupt routing. remove the 8259PIC support,
* All the integrated device in ULI use sideband interrupt.
*
- * Copyright 2007 Freescale Semiconductor Inc.
+ * Copyright 2008 Freescale Semiconductor Inc.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -38,6 +39,8 @@
#include <sysdev/fsl_pci.h>
#include <sysdev/fsl_soc.h>
+static unsigned char *pixis_bdcfg0, *pixis_arch;
+
static struct of_device_id __initdata mpc8610_ids[] = {
{ .compatible = "fsl,mpc8610-immr", },
{}
@@ -52,8 +55,7 @@ static int __init mpc8610_declare_of_platform_devices(void)
}
machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
-static void __init
-mpc86xx_hpcd_init_irq(void)
+static void __init mpc86xx_hpcd_init_irq(void)
{
struct mpic *mpic1;
struct device_node *np;
@@ -161,12 +163,159 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5288, final_uli5288);
#endif /* CONFIG_PCI */
-static void __init
-mpc86xx_hpcd_setup_arch(void)
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+static u32 get_busfreq(void)
{
-#ifdef CONFIG_PCI
- struct device_node *np;
+ struct device_node *node;
+
+ u32 fs_busfreq = 0;
+ node = of_find_node_by_type(NULL, "cpu");
+ if (node) {
+ unsigned int size;
+ const unsigned int *prop =
+ of_get_property(node, "bus-frequency", &size);
+ if (prop)
+ fs_busfreq = *prop;
+ of_node_put(node);
+ };
+ return fs_busfreq;
+}
+
+unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
+ int monitor_port)
+{
+ static const unsigned long pixelformat[][3] = {
+ {0x88882317, 0x88083218, 0x65052119},
+ {0x88883316, 0x88082219, 0x65053118},
+ };
+ unsigned int pix_fmt, arch_monitor;
+
+ arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1;
+ /* DVI port for board version 0x01 */
+
+ if (bits_per_pixel == 32)
+ pix_fmt = pixelformat[arch_monitor][0];
+ else if (bits_per_pixel == 24)
+ pix_fmt = pixelformat[arch_monitor][1];
+ else if (bits_per_pixel == 16)
+ pix_fmt = pixelformat[arch_monitor][2];
+ else
+ pix_fmt = pixelformat[1][0];
+
+ return pix_fmt;
+}
+
+void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base)
+{
+ int i;
+ if (monitor_port == 2) { /* dual link LVDS */
+ for (i = 0; i < 256*3; i++)
+ gamma_table_base[i] = (gamma_table_base[i] << 2) |
+ ((gamma_table_base[i] >> 6) & 0x03);
+ }
+}
+
+void mpc8610hpcd_set_monitor_port(int monitor_port)
+{
+ static const u8 bdcfg[] = {0xBD, 0xB5, 0xA5};
+ if (monitor_port < 3)
+ *pixis_bdcfg0 = bdcfg[monitor_port];
+}
+
+void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
+{
+ u32 __iomem *clkdvdr;
+ u32 temp;
+ /* variables for pixel clock calcs */
+ ulong bestval, bestfreq, speed_ccb, minpixclock, maxpixclock;
+ ulong pixval;
+ long err;
+ int i;
+
+ clkdvdr = ioremap(get_immrbase() + 0xe0800, sizeof(u32));
+ if (!clkdvdr) {
+ printk(KERN_ERR "Err: can't map clock divider register!\n");
+ return;
+ }
+
+ /* Pixel Clock configuration */
+ pr_debug("DIU: Bus Frequency = %d\n", get_busfreq());
+ speed_ccb = get_busfreq();
+
+ /* Calculate the pixel clock with the smallest error */
+ /* calculate the following in steps to avoid overflow */
+ pr_debug("DIU pixclock in ps - %d\n", pixclock);
+ temp = 1000000000/pixclock;
+ temp *= 1000;
+ pixclock = temp;
+ pr_debug("DIU pixclock freq - %u\n", pixclock);
+
+ temp = pixclock * 5 / 100;
+ pr_debug("deviation = %d\n", temp);
+ minpixclock = pixclock - temp;
+ maxpixclock = pixclock + temp;
+ pr_debug("DIU minpixclock - %lu\n", minpixclock);
+ pr_debug("DIU maxpixclock - %lu\n", maxpixclock);
+ pixval = speed_ccb/pixclock;
+ pr_debug("DIU pixval = %lu\n", pixval);
+
+ err = 100000000;
+ bestval = pixval;
+ pr_debug("DIU bestval = %lu\n", bestval);
+
+ bestfreq = 0;
+ for (i = -1; i <= 1; i++) {
+ temp = speed_ccb / ((pixval+i) + 1);
+ pr_debug("DIU test pixval i= %d, pixval=%lu, temp freq. = %u\n",
+ i, pixval, temp);
+ if ((temp < minpixclock) || (temp > maxpixclock))
+ pr_debug("DIU exceeds monitor range (%lu to %lu)\n",
+ minpixclock, maxpixclock);
+ else if (abs(temp - pixclock) < err) {
+ pr_debug("Entered the else if block %d\n", i);
+ err = abs(temp - pixclock);
+ bestval = pixval+i;
+ bestfreq = temp;
+ }
+ }
+
+ pr_debug("DIU chose = %lx\n", bestval);
+ pr_debug("DIU error = %ld\n NomPixClk ", err);
+ pr_debug("DIU: Best Freq = %lx\n", bestfreq);
+ /* Modify PXCLK in GUTS CLKDVDR */
+ pr_debug("DIU: Current value of CLKDVDR = 0x%08x\n", (*clkdvdr));
+ temp = (*clkdvdr) & 0x2000FFFF;
+ *clkdvdr = temp; /* turn off clock */
+ *clkdvdr = temp | 0x80000000 | (((bestval) & 0x1F) << 16);
+ pr_debug("DIU: Modified value of CLKDVDR = 0x%08x\n", (*clkdvdr));
+ iounmap(clkdvdr);
+}
+
+ssize_t mpc8610hpcd_show_monitor_port(int monitor_port, char *buf)
+{
+ return snprintf(buf, PAGE_SIZE,
+ "%c0 - DVI\n"
+ "%c1 - Single link LVDS\n"
+ "%c2 - Dual link LVDS\n",
+ monitor_port == 0 ? '*' : ' ',
+ monitor_port == 1 ? '*' : ' ',
+ monitor_port == 2 ? '*' : ' ');
+}
+
+int mpc8610hpcd_set_sysfs_monitor_port(int val)
+{
+ return val < 3 ? val : 0;
+}
+
#endif
+
+static void __init mpc86xx_hpcd_setup_arch(void)
+{
+ struct resource r;
+ struct device_node *np;
+ unsigned char *pixis;
+
if (ppc_md.progress)
ppc_md.progress("mpc86xx_hpcd_setup_arch()", 0);
@@ -183,6 +332,30 @@ mpc86xx_hpcd_setup_arch(void)
}
}
#endif
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+ preallocate_diu_videomemory();
+ diu_ops.get_pixel_format = mpc8610hpcd_get_pixel_format;
+ diu_ops.set_gamma_table = mpc8610hpcd_set_gamma_table;
+ diu_ops.set_monitor_port = mpc8610hpcd_set_monitor_port;
+ diu_ops.set_pixel_clock = mpc8610hpcd_set_pixel_clock;
+ diu_ops.show_monitor_port = mpc8610hpcd_show_monitor_port;
+ diu_ops.set_sysfs_monitor_port = mpc8610hpcd_set_sysfs_monitor_port;
+#endif
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis");
+ if (np) {
+ of_address_to_resource(np, 0, &r);
+ of_node_put(np);
+ pixis = ioremap(r.start, 32);
+ if (!pixis) {
+ printk(KERN_ERR "Err: can't map FPGA cfg register!\n");
+ return;
+ }
+ pixis_bdcfg0 = pixis + 8;
+ pixis_arch = pixis + 1;
+ } else
+ printk(KERN_ERR "Err: "
+ "can't find device node 'fsl,fpga-pixis'\n");
printk("MPC86xx HPCD board from Freescale Semiconductor\n");
}
@@ -200,8 +373,7 @@ static int __init mpc86xx_hpcd_probe(void)
return 0;
}
-static long __init
-mpc86xx_time_init(void)
+static long __init mpc86xx_time_init(void)
{
unsigned int temp;
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
index f947f555fd46..f13704aabbea 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
@@ -221,6 +221,7 @@ mpc86xx_time_init(void)
static __initdata struct of_device_id of_bus_ids[] = {
{ .compatible = "simple-bus", },
+ { .compatible = "fsl,rapidio-delta", },
{},
};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
index ab24d94baab6..31da84c458d2 100644
--- a/arch/powerpc/platforms/cell/celleb_scc_pciex.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -36,8 +36,8 @@
#include "celleb_scc.h"
#include "celleb_pci.h"
-#define PEX_IN(base, off) in_be32((void *)(base) + (off))
-#define PEX_OUT(base, off, data) out_be32((void *)(base) + (off), (data))
+#define PEX_IN(base, off) in_be32((void __iomem *)(base) + (off))
+#define PEX_OUT(base, off, data) out_be32((void __iomem *)(base) + (off), (data))
static void scc_pciex_io_flush(struct iowa_bus *bus)
{
@@ -304,7 +304,7 @@ static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT)
#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size))
-static uint32_t config_read_pciex_dev(unsigned int *base,
+static uint32_t config_read_pciex_dev(unsigned int __iomem *base,
uint64_t bus_no, uint64_t dev_no, uint64_t func_no,
uint64_t off, uint64_t size)
{
@@ -320,7 +320,7 @@ static uint32_t config_read_pciex_dev(unsigned int *base,
return ret;
}
-static void config_write_pciex_dev(unsigned int *base, uint64_t bus_no,
+static void config_write_pciex_dev(unsigned int __iomem *base, uint64_t bus_no,
uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size,
uint32_t data)
{
@@ -338,7 +338,7 @@ static void config_write_pciex_dev(unsigned int *base, uint64_t bus_no,
((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT)
#define MK_PEXCADRS(cmd, addr, size) \
((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3))
-static uint32_t config_read_pciex_rc(unsigned int *base,
+static uint32_t config_read_pciex_rc(unsigned int __iomem *base,
uint32_t where, uint32_t size)
{
PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size));
@@ -346,7 +346,7 @@ static uint32_t config_read_pciex_rc(unsigned int *base,
>> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1);
}
-static void config_write_pciex_rc(unsigned int *base, uint32_t where,
+static void config_write_pciex_rc(unsigned int __iomem *base, uint32_t where,
uint32_t size, uint32_t val)
{
uint32_t data;
@@ -410,7 +410,7 @@ static struct pci_ops scc_pciex_pci_ops = {
scc_pciex_write_config,
};
-static void pciex_clear_intr_all(unsigned int *base)
+static void pciex_clear_intr_all(unsigned int __iomem *base)
{
PEX_OUT(base, PEXAERRSTS, 0xffffffff);
PEX_OUT(base, PEXPRERRSTS, 0xffffffff);
@@ -427,7 +427,7 @@ static void pciex_disable_intr_all(unsigned int *base)
}
#endif
-static void pciex_enable_intr_all(unsigned int *base)
+static void pciex_enable_intr_all(unsigned int __iomem *base)
{
PEX_OUT(base, PEXINTMASK, 0x0000e7f1);
PEX_OUT(base, PEXAERRMASK, 0x03ff01ff);
@@ -435,7 +435,7 @@ static void pciex_enable_intr_all(unsigned int *base)
PEX_OUT(base, PEXVDMASK, 0x00000001);
}
-static void pciex_check_status(unsigned int *base)
+static void pciex_check_status(unsigned int __iomem *base)
{
uint32_t err = 0;
uint32_t intsts, aerr, prerr, rcvcp, lenerr;
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 00528ef84ad2..45dcd2693502 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1063,10 +1063,9 @@ int __init spu_sched_init(void)
mod_timer(&spuloadavg_timer, 0);
- entry = create_proc_entry("spu_loadavg", 0, NULL);
+ entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops);
if (!entry)
goto out_stop_kthread;
- entry->proc_fops = &spu_loadavg_fops;
pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 79aa773f3c99..aea5286f1245 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -201,10 +201,9 @@ static int __init sputrace_init(void)
if (!sputrace_log)
goto out;
- entry = create_proc_entry("sputrace", S_IRUSR, NULL);
+ entry = proc_create("sputrace", S_IRUSR, NULL, &sputrace_fops);
if (!entry)
goto out_free_log;
- entry->proc_fops = &sputrace_fops;
for (i = 0; i < ARRAY_SIZE(spu_probes); i++) {
struct spu_probe *p = &spu_probes[i];
diff --git a/arch/powerpc/platforms/iseries/lpevents.c b/arch/powerpc/platforms/iseries/lpevents.c
index e5b40e3e0082..b0f8a857ec02 100644
--- a/arch/powerpc/platforms/iseries/lpevents.c
+++ b/arch/powerpc/platforms/iseries/lpevents.c
@@ -330,15 +330,11 @@ static const struct file_operations proc_lpevents_operations = {
static int __init proc_lpevents_init(void)
{
- struct proc_dir_entry *e;
-
if (!firmware_has_feature(FW_FEATURE_ISERIES))
return 0;
- e = create_proc_entry("iSeries/lpevents", S_IFREG|S_IRUGO, NULL);
- if (e)
- e->proc_fops = &proc_lpevents_operations;
-
+ proc_create("iSeries/lpevents", S_IFREG|S_IRUGO, NULL,
+ &proc_lpevents_operations);
return 0;
}
__initcall(proc_lpevents_init);
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index c0f2433bc16e..1dc7295746da 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -1255,11 +1255,11 @@ static int __init mf_proc_init(void)
if (i == 3) /* no vmlinux entry for 'D' */
continue;
- ent = create_proc_entry("vmlinux", S_IFREG|S_IWUSR, mf);
+ ent = proc_create_data("vmlinux", S_IFREG|S_IWUSR, mf,
+ &proc_vmlinux_operations,
+ (void *)(long)i);
if (!ent)
return 1;
- ent->data = (void *)(long)i;
- ent->proc_fops = &proc_vmlinux_operations;
}
ent = create_proc_entry("side", S_IFREG|S_IRUSR|S_IWUSR, mf_proc_root);
diff --git a/arch/powerpc/platforms/iseries/proc.c b/arch/powerpc/platforms/iseries/proc.c
index f2cde4180204..91f4c6cd4b99 100644
--- a/arch/powerpc/platforms/iseries/proc.c
+++ b/arch/powerpc/platforms/iseries/proc.c
@@ -110,15 +110,11 @@ static const struct file_operations proc_titantod_operations = {
static int __init iseries_proc_init(void)
{
- struct proc_dir_entry *e;
-
if (!firmware_has_feature(FW_FEATURE_ISERIES))
return 0;
- e = create_proc_entry("iSeries/titanTod", S_IFREG|S_IRUGO, NULL);
- if (e)
- e->proc_fops = &proc_titantod_operations;
-
+ proc_create("iSeries/titanTod", S_IFREG|S_IRUGO, NULL,
+ &proc_titantod_operations);
return 0;
}
__initcall(iseries_proc_init);
diff --git a/arch/powerpc/platforms/iseries/viopath.c b/arch/powerpc/platforms/iseries/viopath.c
index df23331eb25c..49ff4dc422b7 100644
--- a/arch/powerpc/platforms/iseries/viopath.c
+++ b/arch/powerpc/platforms/iseries/viopath.c
@@ -180,15 +180,10 @@ static const struct file_operations proc_viopath_operations = {
static int __init vio_proc_init(void)
{
- struct proc_dir_entry *e;
-
if (!firmware_has_feature(FW_FEATURE_ISERIES))
return 0;
- e = create_proc_entry("iSeries/config", 0, NULL);
- if (e)
- e->proc_fops = &proc_viopath_operations;
-
+ proc_create("iSeries/config", 0, NULL, &proc_viopath_operations);
return 0;
}
__initcall(vio_proc_init);
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 78093d7f97af..4d72c8f72159 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -6,7 +6,10 @@ obj-y += pic.o setup.o time.o feature.o pci.o \
obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o
obj-$(CONFIG_CPU_FREQ_PMAC) += cpufreq_32.o
obj-$(CONFIG_CPU_FREQ_PMAC64) += cpufreq_64.o
-obj-$(CONFIG_NVRAM) += nvram.o
+# CONFIG_NVRAM is an arch. independant tristate symbol, for pmac32 we really
+# need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really
+# CONFIG_NVRAM=y
+obj-$(CONFIG_NVRAM:m=y) += nvram.o
# ppc64 pmac doesn't define CONFIG_NVRAM but needs nvram stuff
obj-$(CONFIG_PPC64) += nvram.o
obj-$(CONFIG_PPC32) += bootx_init.o
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index bf44c5441a36..00bd0166d07f 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -337,7 +337,8 @@ static void __init pmac_setup_arch(void)
find_via_pmu();
smu_init();
-#if defined(CONFIG_NVRAM) || defined(CONFIG_PPC64)
+#if defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) || \
+ defined(CONFIG_PPC64)
pmac_nvram_init();
#endif
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index bd2593ed28dd..554c6e42ef2a 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_PCI) += pci.o pci_dlpar.o
obj-$(CONFIG_PCI_MSI) += msi.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
+obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o
obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
obj-$(CONFIG_HVCS) += hvcserver.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index a3fd56b186e6..6f544ba4b37f 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -1259,14 +1259,8 @@ static const struct file_operations proc_eeh_operations = {
static int __init eeh_init_proc(void)
{
- struct proc_dir_entry *e;
-
- if (machine_is(pseries)) {
- e = create_proc_entry("ppc64/eeh", 0, NULL);
- if (e)
- e->proc_fops = &proc_eeh_operations;
- }
-
+ if (machine_is(pseries))
+ proc_create("ppc64/eeh", 0, NULL, &proc_eeh_operations);
return 0;
}
__initcall(eeh_init_proc);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
new file mode 100644
index 000000000000..3c5727dd5aa5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -0,0 +1,141 @@
+/*
+ * pseries Memory Hotplug infrastructure.
+ *
+ * Copyright (C) 2008 Badari Pulavarty, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/of.h>
+#include <linux/lmb.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/pSeries_reconfig.h>
+
+static int pseries_remove_memory(struct device_node *np)
+{
+ const char *type;
+ const unsigned int *my_index;
+ const unsigned int *regs;
+ u64 start_pfn, start;
+ struct zone *zone;
+ int ret = -EINVAL;
+
+ /*
+ * Check to see if we are actually removing memory
+ */
+ type = of_get_property(np, "device_type", NULL);
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ /*
+ * Find the memory index and size of the removing section
+ */
+ my_index = of_get_property(np, "ibm,my-drc-index", NULL);
+ if (!my_index)
+ return ret;
+
+ regs = of_get_property(np, "reg", NULL);
+ if (!regs)
+ return ret;
+
+ start_pfn = section_nr_to_pfn(*my_index & 0xffff);
+ zone = page_zone(pfn_to_page(start_pfn));
+
+ /*
+ * Remove section mappings and sysfs entries for the
+ * section of the memory we are removing.
+ *
+ * NOTE: Ideally, this should be done in generic code like
+ * remove_memory(). But remove_memory() gets called by writing
+ * to sysfs "state" file and we can't remove sysfs entries
+ * while writing to it. So we have to defer it to here.
+ */
+ ret = __remove_pages(zone, start_pfn, regs[3] >> PAGE_SHIFT);
+ if (ret)
+ return ret;
+
+ /*
+ * Update memory regions for memory remove
+ */
+ lmb_remove(start_pfn << PAGE_SHIFT, regs[3]);
+
+ /*
+ * Remove htab bolted mappings for this section of memory
+ */
+ start = (unsigned long)__va(start_pfn << PAGE_SHIFT);
+ ret = remove_section_mapping(start, start + regs[3]);
+ return ret;
+}
+
+static int pseries_add_memory(struct device_node *np)
+{
+ const char *type;
+ const unsigned int *my_index;
+ const unsigned int *regs;
+ u64 start_pfn;
+ int ret = -EINVAL;
+
+ /*
+ * Check to see if we are actually adding memory
+ */
+ type = of_get_property(np, "device_type", NULL);
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ /*
+ * Find the memory index and size of the added section
+ */
+ my_index = of_get_property(np, "ibm,my-drc-index", NULL);
+ if (!my_index)
+ return ret;
+
+ regs = of_get_property(np, "reg", NULL);
+ if (!regs)
+ return ret;
+
+ start_pfn = section_nr_to_pfn(*my_index & 0xffff);
+
+ /*
+ * Update memory region to represent the memory add
+ */
+ lmb_add(start_pfn << PAGE_SHIFT, regs[3]);
+ return 0;
+}
+
+static int pseries_memory_notifier(struct notifier_block *nb,
+ unsigned long action, void *node)
+{
+ int err = NOTIFY_OK;
+
+ switch (action) {
+ case PSERIES_RECONFIG_ADD:
+ if (pseries_add_memory(node))
+ err = NOTIFY_BAD;
+ break;
+ case PSERIES_RECONFIG_REMOVE:
+ if (pseries_remove_memory(node))
+ err = NOTIFY_BAD;
+ break;
+ default:
+ err = NOTIFY_DONE;
+ break;
+ }
+ return err;
+}
+
+static struct notifier_block pseries_mem_nb = {
+ .notifier_call = pseries_memory_notifier,
+};
+
+static int __init pseries_memory_hotplug_init(void)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ pSeries_reconfig_notifier_register(&pseries_mem_nb);
+
+ return 0;
+}
+machine_device_initcall(pseries, pseries_memory_hotplug_init);
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index ac75c10de278..75769aae41d5 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -512,12 +512,9 @@ static int proc_ppc64_create_ofdt(void)
if (!machine_is(pseries))
return 0;
- ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL);
- if (ent) {
- ent->data = NULL;
+ ent = proc_create("ppc64/ofdt", S_IWUSR, NULL, &ofdt_fops);
+ if (ent)
ent->size = 0;
- ent->proc_fops = &ofdt_fops;
- }
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index befadd4f9524..7d3e2b0bd4d2 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -468,10 +468,9 @@ static int __init rtas_init(void)
return -ENOMEM;
}
- entry = create_proc_entry("ppc64/rtas/error_log", S_IRUSR, NULL);
- if (entry)
- entry->proc_fops = &proc_rtas_log_operations;
- else
+ entry = proc_create("ppc64/rtas/error_log", S_IRUSR, NULL,
+ &proc_rtas_log_operations);
+ if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
if (kernel_thread(rtasd, NULL, CLONE_FS) < 0)
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index d359d6e92975..7f59188cd9a1 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -143,7 +143,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
*/
static int
axon_ram_direct_access(struct block_device *device, sector_t sector,
- unsigned long *data)
+ void **kaddr, unsigned long *pfn)
{
struct axon_ram_bank *bank = device->bd_disk->private_data;
loff_t offset;
@@ -154,7 +154,8 @@ axon_ram_direct_access(struct block_device *device, sector_t sector,
return -ERANGE;
}
- *data = bank->ph_addr + offset;
+ *kaddr = (void *)(bank->ph_addr + offset);
+ *pfn = virt_to_phys(kaddr) >> PAGE_SHIFT;
return 0;
}
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index af2425e4655f..3d920376f58e 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -1,5 +1,8 @@
/*
- * MPC85xx RapidIO support
+ * Freescale MPC85xx/MPC86xx RapidIO support
+ *
+ * Copyright (C) 2007, 2008 Freescale Semiconductor, Inc.
+ * Zhang Wei <wei.zhang@freescale.com>
*
* Copyright 2005 MontaVista Software, Inc.
* Matt Porter <mporter@kernel.crashing.org>
@@ -17,12 +20,23 @@
#include <linux/interrupt.h>
#include <linux/rio.h>
#include <linux/rio_drv.h>
+#include <linux/of_platform.h>
+#include <linux/delay.h>
#include <asm/io.h>
-#define RIO_REGS_BASE (CCSRBAR + 0xc0000)
+/* RapidIO definition irq, which read from OF-tree */
+#define IRQ_RIO_BELL(m) (((struct rio_priv *)(m->priv))->bellirq)
+#define IRQ_RIO_TX(m) (((struct rio_priv *)(m->priv))->txirq)
+#define IRQ_RIO_RX(m) (((struct rio_priv *)(m->priv))->rxirq)
+
#define RIO_ATMU_REGS_OFFSET 0x10c00
-#define RIO_MSG_REGS_OFFSET 0x11000
+#define RIO_P_MSG_REGS_OFFSET 0x11000
+#define RIO_S_MSG_REGS_OFFSET 0x13000
+#define RIO_ESCSR 0x158
+#define RIO_CCSR 0x15c
+#define RIO_ISR_AACR 0x10120
+#define RIO_ISR_AACR_AA 0x1 /* Accept All ID */
#define RIO_MAINT_WIN_SIZE 0x400000
#define RIO_DBELL_WIN_SIZE 0x1000
@@ -50,18 +64,18 @@
#define DOORBELL_DSR_TE 0x00000080
#define DOORBELL_DSR_QFI 0x00000010
#define DOORBELL_DSR_DIQI 0x00000001
-#define DOORBELL_TID_OFFSET 0x03
-#define DOORBELL_SID_OFFSET 0x05
+#define DOORBELL_TID_OFFSET 0x02
+#define DOORBELL_SID_OFFSET 0x04
#define DOORBELL_INFO_OFFSET 0x06
#define DOORBELL_MESSAGE_SIZE 0x08
-#define DBELL_SID(x) (*(u8 *)(x + DOORBELL_SID_OFFSET))
-#define DBELL_TID(x) (*(u8 *)(x + DOORBELL_TID_OFFSET))
+#define DBELL_SID(x) (*(u16 *)(x + DOORBELL_SID_OFFSET))
+#define DBELL_TID(x) (*(u16 *)(x + DOORBELL_TID_OFFSET))
#define DBELL_INF(x) (*(u16 *)(x + DOORBELL_INFO_OFFSET))
struct rio_atmu_regs {
u32 rowtar;
- u32 pad1;
+ u32 rowtear;
u32 rowbar;
u32 pad2;
u32 rowar;
@@ -87,7 +101,15 @@ struct rio_msg_regs {
u32 ifqdpar;
u32 pad6;
u32 ifqepar;
- u32 pad7[250];
+ u32 pad7[226];
+ u32 odmr;
+ u32 odsr;
+ u32 res0[4];
+ u32 oddpr;
+ u32 oddatr;
+ u32 res1[3];
+ u32 odretcr;
+ u32 res2[12];
u32 dmr;
u32 dsr;
u32 pad8;
@@ -112,20 +134,12 @@ struct rio_tx_desc {
u32 res4;
};
-static u32 regs_win;
-static struct rio_atmu_regs *atmu_regs;
-static struct rio_atmu_regs *maint_atmu_regs;
-static struct rio_atmu_regs *dbell_atmu_regs;
-static u32 dbell_win;
-static u32 maint_win;
-static struct rio_msg_regs *msg_regs;
-
-static struct rio_dbell_ring {
+struct rio_dbell_ring {
void *virt;
dma_addr_t phys;
-} dbell_ring;
+};
-static struct rio_msg_tx_ring {
+struct rio_msg_tx_ring {
void *virt;
dma_addr_t phys;
void *virt_buffer[RIO_MAX_TX_RING_SIZE];
@@ -133,19 +147,35 @@ static struct rio_msg_tx_ring {
int tx_slot;
int size;
void *dev_id;
-} msg_tx_ring;
+};
-static struct rio_msg_rx_ring {
+struct rio_msg_rx_ring {
void *virt;
dma_addr_t phys;
void *virt_buffer[RIO_MAX_RX_RING_SIZE];
int rx_slot;
int size;
void *dev_id;
-} msg_rx_ring;
+};
+
+struct rio_priv {
+ void __iomem *regs_win;
+ struct rio_atmu_regs __iomem *atmu_regs;
+ struct rio_atmu_regs __iomem *maint_atmu_regs;
+ struct rio_atmu_regs __iomem *dbell_atmu_regs;
+ void __iomem *dbell_win;
+ void __iomem *maint_win;
+ struct rio_msg_regs __iomem *msg_regs;
+ struct rio_dbell_ring dbell_ring;
+ struct rio_msg_tx_ring msg_tx_ring;
+ struct rio_msg_rx_ring msg_rx_ring;
+ int bellirq;
+ int txirq;
+ int rxirq;
+};
/**
- * mpc85xx_rio_doorbell_send - Send a MPC85xx doorbell message
+ * fsl_rio_doorbell_send - Send a MPC85xx doorbell message
* @index: ID of RapidIO interface
* @destid: Destination ID of target device
* @data: 16-bit info field of RapidIO doorbell message
@@ -153,18 +183,34 @@ static struct rio_msg_rx_ring {
* Sends a MPC85xx doorbell message. Returns %0 on success or
* %-EINVAL on failure.
*/
-static int mpc85xx_rio_doorbell_send(int index, u16 destid, u16 data)
+static int fsl_rio_doorbell_send(struct rio_mport *mport,
+ int index, u16 destid, u16 data)
{
- pr_debug("mpc85xx_doorbell_send: index %d destid %4.4x data %4.4x\n",
+ struct rio_priv *priv = mport->priv;
+ pr_debug("fsl_doorbell_send: index %d destid %4.4x data %4.4x\n",
index, destid, data);
- out_be32((void *)&dbell_atmu_regs->rowtar, destid << 22);
- out_be16((void *)(dbell_win), data);
+ switch (mport->phy_type) {
+ case RIO_PHY_PARALLEL:
+ out_be32(&priv->dbell_atmu_regs->rowtar, destid << 22);
+ out_be16(priv->dbell_win, data);
+ break;
+ case RIO_PHY_SERIAL:
+ /* In the serial version silicons, such as MPC8548, MPC8641,
+ * below operations is must be.
+ */
+ out_be32(&priv->msg_regs->odmr, 0x00000000);
+ out_be32(&priv->msg_regs->odretcr, 0x00000004);
+ out_be32(&priv->msg_regs->oddpr, destid << 16);
+ out_be32(&priv->msg_regs->oddatr, data);
+ out_be32(&priv->msg_regs->odmr, 0x00000001);
+ break;
+ }
return 0;
}
/**
- * mpc85xx_local_config_read - Generate a MPC85xx local config space read
+ * fsl_local_config_read - Generate a MPC85xx local config space read
* @index: ID of RapdiIO interface
* @offset: Offset into configuration space
* @len: Length (in bytes) of the maintenance transaction
@@ -173,17 +219,19 @@ static int mpc85xx_rio_doorbell_send(int index, u16 destid, u16 data)
* Generates a MPC85xx local configuration space read. Returns %0 on
* success or %-EINVAL on failure.
*/
-static int mpc85xx_local_config_read(int index, u32 offset, int len, u32 * data)
+static int fsl_local_config_read(struct rio_mport *mport,
+ int index, u32 offset, int len, u32 *data)
{
- pr_debug("mpc85xx_local_config_read: index %d offset %8.8x\n", index,
+ struct rio_priv *priv = mport->priv;
+ pr_debug("fsl_local_config_read: index %d offset %8.8x\n", index,
offset);
- *data = in_be32((void *)(regs_win + offset));
+ *data = in_be32(priv->regs_win + offset);
return 0;
}
/**
- * mpc85xx_local_config_write - Generate a MPC85xx local config space write
+ * fsl_local_config_write - Generate a MPC85xx local config space write
* @index: ID of RapdiIO interface
* @offset: Offset into configuration space
* @len: Length (in bytes) of the maintenance transaction
@@ -192,18 +240,20 @@ static int mpc85xx_local_config_read(int index, u32 offset, int len, u32 * data)
* Generates a MPC85xx local configuration space write. Returns %0 on
* success or %-EINVAL on failure.
*/
-static int mpc85xx_local_config_write(int index, u32 offset, int len, u32 data)
+static int fsl_local_config_write(struct rio_mport *mport,
+ int index, u32 offset, int len, u32 data)
{
+ struct rio_priv *priv = mport->priv;
pr_debug
- ("mpc85xx_local_config_write: index %d offset %8.8x data %8.8x\n",
+ ("fsl_local_config_write: index %d offset %8.8x data %8.8x\n",
index, offset, data);
- out_be32((void *)(regs_win + offset), data);
+ out_be32(priv->regs_win + offset, data);
return 0;
}
/**
- * mpc85xx_rio_config_read - Generate a MPC85xx read maintenance transaction
+ * fsl_rio_config_read - Generate a MPC85xx read maintenance transaction
* @index: ID of RapdiIO interface
* @destid: Destination ID of transaction
* @hopcount: Number of hops to target device
@@ -215,18 +265,19 @@ static int mpc85xx_local_config_write(int index, u32 offset, int len, u32 data)
* success or %-EINVAL on failure.
*/
static int
-mpc85xx_rio_config_read(int index, u16 destid, u8 hopcount, u32 offset, int len,
- u32 * val)
+fsl_rio_config_read(struct rio_mport *mport, int index, u16 destid,
+ u8 hopcount, u32 offset, int len, u32 *val)
{
+ struct rio_priv *priv = mport->priv;
u8 *data;
pr_debug
- ("mpc85xx_rio_config_read: index %d destid %d hopcount %d offset %8.8x len %d\n",
+ ("fsl_rio_config_read: index %d destid %d hopcount %d offset %8.8x len %d\n",
index, destid, hopcount, offset, len);
- out_be32((void *)&maint_atmu_regs->rowtar,
+ out_be32(&priv->maint_atmu_regs->rowtar,
(destid << 22) | (hopcount << 12) | ((offset & ~0x3) >> 9));
- data = (u8 *) maint_win + offset;
+ data = (u8 *) priv->maint_win + offset;
switch (len) {
case 1:
*val = in_8((u8 *) data);
@@ -243,7 +294,7 @@ mpc85xx_rio_config_read(int index, u16 destid, u8 hopcount, u32 offset, int len,
}
/**
- * mpc85xx_rio_config_write - Generate a MPC85xx write maintenance transaction
+ * fsl_rio_config_write - Generate a MPC85xx write maintenance transaction
* @index: ID of RapdiIO interface
* @destid: Destination ID of transaction
* @hopcount: Number of hops to target device
@@ -255,17 +306,18 @@ mpc85xx_rio_config_read(int index, u16 destid, u8 hopcount, u32 offset, int len,
* success or %-EINVAL on failure.
*/
static int
-mpc85xx_rio_config_write(int index, u16 destid, u8 hopcount, u32 offset,
- int len, u32 val)
+fsl_rio_config_write(struct rio_mport *mport, int index, u16 destid,
+ u8 hopcount, u32 offset, int len, u32 val)
{
+ struct rio_priv *priv = mport->priv;
u8 *data;
pr_debug
- ("mpc85xx_rio_config_write: index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n",
+ ("fsl_rio_config_write: index %d destid %d hopcount %d offset %8.8x len %d val %8.8x\n",
index, destid, hopcount, offset, len, val);
- out_be32((void *)&maint_atmu_regs->rowtar,
+ out_be32(&priv->maint_atmu_regs->rowtar,
(destid << 22) | (hopcount << 12) | ((offset & ~0x3) >> 9));
- data = (u8 *) maint_win + offset;
+ data = (u8 *) priv->maint_win + offset;
switch (len) {
case 1:
out_8((u8 *) data, val);
@@ -296,9 +348,10 @@ int
rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
void *buffer, size_t len)
{
+ struct rio_priv *priv = mport->priv;
u32 omr;
- struct rio_tx_desc *desc =
- (struct rio_tx_desc *)msg_tx_ring.virt + msg_tx_ring.tx_slot;
+ struct rio_tx_desc *desc = (struct rio_tx_desc *)priv->msg_tx_ring.virt
+ + priv->msg_tx_ring.tx_slot;
int ret = 0;
pr_debug
@@ -311,31 +364,43 @@ rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
}
/* Copy and clear rest of buffer */
- memcpy(msg_tx_ring.virt_buffer[msg_tx_ring.tx_slot], buffer, len);
+ memcpy(priv->msg_tx_ring.virt_buffer[priv->msg_tx_ring.tx_slot], buffer,
+ len);
if (len < (RIO_MAX_MSG_SIZE - 4))
- memset((void *)((u32) msg_tx_ring.
- virt_buffer[msg_tx_ring.tx_slot] + len), 0,
- RIO_MAX_MSG_SIZE - len);
+ memset(priv->msg_tx_ring.virt_buffer[priv->msg_tx_ring.tx_slot]
+ + len, 0, RIO_MAX_MSG_SIZE - len);
- /* Set mbox field for message */
- desc->dport = mbox & 0x3;
+ switch (mport->phy_type) {
+ case RIO_PHY_PARALLEL:
+ /* Set mbox field for message */
+ desc->dport = mbox & 0x3;
- /* Enable EOMI interrupt, set priority, and set destid */
- desc->dattr = 0x28000000 | (rdev->destid << 2);
+ /* Enable EOMI interrupt, set priority, and set destid */
+ desc->dattr = 0x28000000 | (rdev->destid << 2);
+ break;
+ case RIO_PHY_SERIAL:
+ /* Set mbox field for message, and set destid */
+ desc->dport = (rdev->destid << 16) | (mbox & 0x3);
+
+ /* Enable EOMI interrupt and priority */
+ desc->dattr = 0x28000000;
+ break;
+ }
/* Set transfer size aligned to next power of 2 (in double words) */
desc->dwcnt = is_power_of_2(len) ? len : 1 << get_bitmask_order(len);
/* Set snooping and source buffer address */
- desc->saddr = 0x00000004 | msg_tx_ring.phys_buffer[msg_tx_ring.tx_slot];
+ desc->saddr = 0x00000004
+ | priv->msg_tx_ring.phys_buffer[priv->msg_tx_ring.tx_slot];
/* Increment enqueue pointer */
- omr = in_be32((void *)&msg_regs->omr);
- out_be32((void *)&msg_regs->omr, omr | RIO_MSG_OMR_MUI);
+ omr = in_be32(&priv->msg_regs->omr);
+ out_be32(&priv->msg_regs->omr, omr | RIO_MSG_OMR_MUI);
/* Go to next descriptor */
- if (++msg_tx_ring.tx_slot == msg_tx_ring.size)
- msg_tx_ring.tx_slot = 0;
+ if (++priv->msg_tx_ring.tx_slot == priv->msg_tx_ring.size)
+ priv->msg_tx_ring.tx_slot = 0;
out:
return ret;
@@ -344,7 +409,7 @@ rio_hw_add_outb_message(struct rio_mport *mport, struct rio_dev *rdev, int mbox,
EXPORT_SYMBOL_GPL(rio_hw_add_outb_message);
/**
- * mpc85xx_rio_tx_handler - MPC85xx outbound message interrupt handler
+ * fsl_rio_tx_handler - MPC85xx outbound message interrupt handler
* @irq: Linux interrupt number
* @dev_instance: Pointer to interrupt-specific data
*
@@ -352,32 +417,34 @@ EXPORT_SYMBOL_GPL(rio_hw_add_outb_message);
* mailbox event handler and acks the interrupt occurrence.
*/
static irqreturn_t
-mpc85xx_rio_tx_handler(int irq, void *dev_instance)
+fsl_rio_tx_handler(int irq, void *dev_instance)
{
int osr;
struct rio_mport *port = (struct rio_mport *)dev_instance;
+ struct rio_priv *priv = port->priv;
- osr = in_be32((void *)&msg_regs->osr);
+ osr = in_be32(&priv->msg_regs->osr);
if (osr & RIO_MSG_OSR_TE) {
pr_info("RIO: outbound message transmission error\n");
- out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_TE);
+ out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_TE);
goto out;
}
if (osr & RIO_MSG_OSR_QOI) {
pr_info("RIO: outbound message queue overflow\n");
- out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_QOI);
+ out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_QOI);
goto out;
}
if (osr & RIO_MSG_OSR_EOMI) {
- u32 dqp = in_be32((void *)&msg_regs->odqdpar);
- int slot = (dqp - msg_tx_ring.phys) >> 5;
- port->outb_msg[0].mcback(port, msg_tx_ring.dev_id, -1, slot);
+ u32 dqp = in_be32(&priv->msg_regs->odqdpar);
+ int slot = (dqp - priv->msg_tx_ring.phys) >> 5;
+ port->outb_msg[0].mcback(port, priv->msg_tx_ring.dev_id, -1,
+ slot);
/* Ack the end-of-message interrupt */
- out_be32((void *)&msg_regs->osr, RIO_MSG_OSR_EOMI);
+ out_be32(&priv->msg_regs->osr, RIO_MSG_OSR_EOMI);
}
out:
@@ -398,6 +465,7 @@ mpc85xx_rio_tx_handler(int irq, void *dev_instance)
int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries)
{
int i, j, rc = 0;
+ struct rio_priv *priv = mport->priv;
if ((entries < RIO_MIN_TX_RING_SIZE) ||
(entries > RIO_MAX_TX_RING_SIZE) || (!is_power_of_2(entries))) {
@@ -406,54 +474,53 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr
}
/* Initialize shadow copy ring */
- msg_tx_ring.dev_id = dev_id;
- msg_tx_ring.size = entries;
-
- for (i = 0; i < msg_tx_ring.size; i++) {
- if (!
- (msg_tx_ring.virt_buffer[i] =
- dma_alloc_coherent(NULL, RIO_MSG_BUFFER_SIZE,
- &msg_tx_ring.phys_buffer[i],
- GFP_KERNEL))) {
+ priv->msg_tx_ring.dev_id = dev_id;
+ priv->msg_tx_ring.size = entries;
+
+ for (i = 0; i < priv->msg_tx_ring.size; i++) {
+ priv->msg_tx_ring.virt_buffer[i] =
+ dma_alloc_coherent(NULL, RIO_MSG_BUFFER_SIZE,
+ &priv->msg_tx_ring.phys_buffer[i], GFP_KERNEL);
+ if (!priv->msg_tx_ring.virt_buffer[i]) {
rc = -ENOMEM;
- for (j = 0; j < msg_tx_ring.size; j++)
- if (msg_tx_ring.virt_buffer[j])
+ for (j = 0; j < priv->msg_tx_ring.size; j++)
+ if (priv->msg_tx_ring.virt_buffer[j])
dma_free_coherent(NULL,
- RIO_MSG_BUFFER_SIZE,
- msg_tx_ring.
- virt_buffer[j],
- msg_tx_ring.
- phys_buffer[j]);
+ RIO_MSG_BUFFER_SIZE,
+ priv->msg_tx_ring.
+ virt_buffer[j],
+ priv->msg_tx_ring.
+ phys_buffer[j]);
goto out;
}
}
/* Initialize outbound message descriptor ring */
- if (!(msg_tx_ring.virt = dma_alloc_coherent(NULL,
- msg_tx_ring.size *
- RIO_MSG_DESC_SIZE,
- &msg_tx_ring.phys,
- GFP_KERNEL))) {
+ priv->msg_tx_ring.virt = dma_alloc_coherent(NULL,
+ priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+ &priv->msg_tx_ring.phys, GFP_KERNEL);
+ if (!priv->msg_tx_ring.virt) {
rc = -ENOMEM;
goto out_dma;
}
- memset(msg_tx_ring.virt, 0, msg_tx_ring.size * RIO_MSG_DESC_SIZE);
- msg_tx_ring.tx_slot = 0;
+ memset(priv->msg_tx_ring.virt, 0,
+ priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE);
+ priv->msg_tx_ring.tx_slot = 0;
/* Point dequeue/enqueue pointers at first entry in ring */
- out_be32((void *)&msg_regs->odqdpar, msg_tx_ring.phys);
- out_be32((void *)&msg_regs->odqepar, msg_tx_ring.phys);
+ out_be32(&priv->msg_regs->odqdpar, priv->msg_tx_ring.phys);
+ out_be32(&priv->msg_regs->odqepar, priv->msg_tx_ring.phys);
/* Configure for snooping */
- out_be32((void *)&msg_regs->osar, 0x00000004);
+ out_be32(&priv->msg_regs->osar, 0x00000004);
/* Clear interrupt status */
- out_be32((void *)&msg_regs->osr, 0x000000b3);
+ out_be32(&priv->msg_regs->osr, 0x000000b3);
/* Hook up outbound message handler */
- if ((rc =
- request_irq(MPC85xx_IRQ_RIO_TX, mpc85xx_rio_tx_handler, 0,
- "msg_tx", (void *)mport)) < 0)
+ rc = request_irq(IRQ_RIO_TX(mport), fsl_rio_tx_handler, 0,
+ "msg_tx", (void *)mport);
+ if (rc < 0)
goto out_irq;
/*
@@ -463,28 +530,28 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr
* Chaining mode
* Disable
*/
- out_be32((void *)&msg_regs->omr, 0x00100220);
+ out_be32(&priv->msg_regs->omr, 0x00100220);
/* Set number of entries */
- out_be32((void *)&msg_regs->omr,
- in_be32((void *)&msg_regs->omr) |
+ out_be32(&priv->msg_regs->omr,
+ in_be32(&priv->msg_regs->omr) |
((get_bitmask_order(entries) - 2) << 12));
/* Now enable the unit */
- out_be32((void *)&msg_regs->omr, in_be32((void *)&msg_regs->omr) | 0x1);
+ out_be32(&priv->msg_regs->omr, in_be32(&priv->msg_regs->omr) | 0x1);
out:
return rc;
out_irq:
- dma_free_coherent(NULL, msg_tx_ring.size * RIO_MSG_DESC_SIZE,
- msg_tx_ring.virt, msg_tx_ring.phys);
+ dma_free_coherent(NULL, priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+ priv->msg_tx_ring.virt, priv->msg_tx_ring.phys);
out_dma:
- for (i = 0; i < msg_tx_ring.size; i++)
+ for (i = 0; i < priv->msg_tx_ring.size; i++)
dma_free_coherent(NULL, RIO_MSG_BUFFER_SIZE,
- msg_tx_ring.virt_buffer[i],
- msg_tx_ring.phys_buffer[i]);
+ priv->msg_tx_ring.virt_buffer[i],
+ priv->msg_tx_ring.phys_buffer[i]);
return rc;
}
@@ -499,19 +566,20 @@ int rio_open_outb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entr
*/
void rio_close_outb_mbox(struct rio_mport *mport, int mbox)
{
+ struct rio_priv *priv = mport->priv;
/* Disable inbound message unit */
- out_be32((void *)&msg_regs->omr, 0);
+ out_be32(&priv->msg_regs->omr, 0);
/* Free ring */
- dma_free_coherent(NULL, msg_tx_ring.size * RIO_MSG_DESC_SIZE,
- msg_tx_ring.virt, msg_tx_ring.phys);
+ dma_free_coherent(NULL, priv->msg_tx_ring.size * RIO_MSG_DESC_SIZE,
+ priv->msg_tx_ring.virt, priv->msg_tx_ring.phys);
/* Free interrupt */
- free_irq(MPC85xx_IRQ_RIO_TX, (void *)mport);
+ free_irq(IRQ_RIO_TX(mport), (void *)mport);
}
/**
- * mpc85xx_rio_rx_handler - MPC85xx inbound message interrupt handler
+ * fsl_rio_rx_handler - MPC85xx inbound message interrupt handler
* @irq: Linux interrupt number
* @dev_instance: Pointer to interrupt-specific data
*
@@ -519,16 +587,17 @@ void rio_close_outb_mbox(struct rio_mport *mport, int mbox)
* mailbox event handler and acks the interrupt occurrence.
*/
static irqreturn_t
-mpc85xx_rio_rx_handler(int irq, void *dev_instance)
+fsl_rio_rx_handler(int irq, void *dev_instance)
{
int isr;
struct rio_mport *port = (struct rio_mport *)dev_instance;
+ struct rio_priv *priv = port->priv;
- isr = in_be32((void *)&msg_regs->isr);
+ isr = in_be32(&priv->msg_regs->isr);
if (isr & RIO_MSG_ISR_TE) {
pr_info("RIO: inbound message reception error\n");
- out_be32((void *)&msg_regs->isr, RIO_MSG_ISR_TE);
+ out_be32((void *)&priv->msg_regs->isr, RIO_MSG_ISR_TE);
goto out;
}
@@ -540,10 +609,10 @@ mpc85xx_rio_rx_handler(int irq, void *dev_instance)
* make the callback with an unknown/invalid mailbox number
* argument.
*/
- port->inb_msg[0].mcback(port, msg_rx_ring.dev_id, -1, -1);
+ port->inb_msg[0].mcback(port, priv->msg_rx_ring.dev_id, -1, -1);
/* Ack the queueing interrupt */
- out_be32((void *)&msg_regs->isr, RIO_MSG_ISR_DIQI);
+ out_be32(&priv->msg_regs->isr, RIO_MSG_ISR_DIQI);
}
out:
@@ -564,6 +633,7 @@ mpc85xx_rio_rx_handler(int irq, void *dev_instance)
int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entries)
{
int i, rc = 0;
+ struct rio_priv *priv = mport->priv;
if ((entries < RIO_MIN_RX_RING_SIZE) ||
(entries > RIO_MAX_RX_RING_SIZE) || (!is_power_of_2(entries))) {
@@ -572,36 +642,35 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri
}
/* Initialize client buffer ring */
- msg_rx_ring.dev_id = dev_id;
- msg_rx_ring.size = entries;
- msg_rx_ring.rx_slot = 0;
- for (i = 0; i < msg_rx_ring.size; i++)
- msg_rx_ring.virt_buffer[i] = NULL;
+ priv->msg_rx_ring.dev_id = dev_id;
+ priv->msg_rx_ring.size = entries;
+ priv->msg_rx_ring.rx_slot = 0;
+ for (i = 0; i < priv->msg_rx_ring.size; i++)
+ priv->msg_rx_ring.virt_buffer[i] = NULL;
/* Initialize inbound message ring */
- if (!(msg_rx_ring.virt = dma_alloc_coherent(NULL,
- msg_rx_ring.size *
- RIO_MAX_MSG_SIZE,
- &msg_rx_ring.phys,
- GFP_KERNEL))) {
+ priv->msg_rx_ring.virt = dma_alloc_coherent(NULL,
+ priv->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+ &priv->msg_rx_ring.phys, GFP_KERNEL);
+ if (!priv->msg_rx_ring.virt) {
rc = -ENOMEM;
goto out;
}
/* Point dequeue/enqueue pointers at first entry in ring */
- out_be32((void *)&msg_regs->ifqdpar, (u32) msg_rx_ring.phys);
- out_be32((void *)&msg_regs->ifqepar, (u32) msg_rx_ring.phys);
+ out_be32(&priv->msg_regs->ifqdpar, (u32) priv->msg_rx_ring.phys);
+ out_be32(&priv->msg_regs->ifqepar, (u32) priv->msg_rx_ring.phys);
/* Clear interrupt status */
- out_be32((void *)&msg_regs->isr, 0x00000091);
+ out_be32(&priv->msg_regs->isr, 0x00000091);
/* Hook up inbound message handler */
- if ((rc =
- request_irq(MPC85xx_IRQ_RIO_RX, mpc85xx_rio_rx_handler, 0,
- "msg_rx", (void *)mport)) < 0) {
+ rc = request_irq(IRQ_RIO_RX(mport), fsl_rio_rx_handler, 0,
+ "msg_rx", (void *)mport);
+ if (rc < 0) {
dma_free_coherent(NULL, RIO_MSG_BUFFER_SIZE,
- msg_tx_ring.virt_buffer[i],
- msg_tx_ring.phys_buffer[i]);
+ priv->msg_tx_ring.virt_buffer[i],
+ priv->msg_tx_ring.phys_buffer[i]);
goto out;
}
@@ -612,15 +681,13 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri
* Unmask all interrupt sources
* Disable
*/
- out_be32((void *)&msg_regs->imr, 0x001b0060);
+ out_be32(&priv->msg_regs->imr, 0x001b0060);
/* Set number of queue entries */
- out_be32((void *)&msg_regs->imr,
- in_be32((void *)&msg_regs->imr) |
- ((get_bitmask_order(entries) - 2) << 12));
+ setbits32(&priv->msg_regs->imr, (get_bitmask_order(entries) - 2) << 12);
/* Now enable the unit */
- out_be32((void *)&msg_regs->imr, in_be32((void *)&msg_regs->imr) | 0x1);
+ setbits32(&priv->msg_regs->imr, 0x1);
out:
return rc;
@@ -636,15 +703,16 @@ int rio_open_inb_mbox(struct rio_mport *mport, void *dev_id, int mbox, int entri
*/
void rio_close_inb_mbox(struct rio_mport *mport, int mbox)
{
+ struct rio_priv *priv = mport->priv;
/* Disable inbound message unit */
- out_be32((void *)&msg_regs->imr, 0);
+ out_be32(&priv->msg_regs->imr, 0);
/* Free ring */
- dma_free_coherent(NULL, msg_rx_ring.size * RIO_MAX_MSG_SIZE,
- msg_rx_ring.virt, msg_rx_ring.phys);
+ dma_free_coherent(NULL, priv->msg_rx_ring.size * RIO_MAX_MSG_SIZE,
+ priv->msg_rx_ring.virt, priv->msg_rx_ring.phys);
/* Free interrupt */
- free_irq(MPC85xx_IRQ_RIO_RX, (void *)mport);
+ free_irq(IRQ_RIO_RX(mport), (void *)mport);
}
/**
@@ -659,21 +727,22 @@ void rio_close_inb_mbox(struct rio_mport *mport, int mbox)
int rio_hw_add_inb_buffer(struct rio_mport *mport, int mbox, void *buf)
{
int rc = 0;
+ struct rio_priv *priv = mport->priv;
pr_debug("RIO: rio_hw_add_inb_buffer(), msg_rx_ring.rx_slot %d\n",
- msg_rx_ring.rx_slot);
+ priv->msg_rx_ring.rx_slot);
- if (msg_rx_ring.virt_buffer[msg_rx_ring.rx_slot]) {
+ if (priv->msg_rx_ring.virt_buffer[priv->msg_rx_ring.rx_slot]) {
printk(KERN_ERR
"RIO: error adding inbound buffer %d, buffer exists\n",
- msg_rx_ring.rx_slot);
+ priv->msg_rx_ring.rx_slot);
rc = -EINVAL;
goto out;
}
- msg_rx_ring.virt_buffer[msg_rx_ring.rx_slot] = buf;
- if (++msg_rx_ring.rx_slot == msg_rx_ring.size)
- msg_rx_ring.rx_slot = 0;
+ priv->msg_rx_ring.virt_buffer[priv->msg_rx_ring.rx_slot] = buf;
+ if (++priv->msg_rx_ring.rx_slot == priv->msg_rx_ring.size)
+ priv->msg_rx_ring.rx_slot = 0;
out:
return rc;
@@ -691,20 +760,21 @@ EXPORT_SYMBOL_GPL(rio_hw_add_inb_buffer);
*/
void *rio_hw_get_inb_message(struct rio_mport *mport, int mbox)
{
- u32 imr;
+ struct rio_priv *priv = mport->priv;
u32 phys_buf, virt_buf;
void *buf = NULL;
int buf_idx;
- phys_buf = in_be32((void *)&msg_regs->ifqdpar);
+ phys_buf = in_be32(&priv->msg_regs->ifqdpar);
/* If no more messages, then bail out */
- if (phys_buf == in_be32((void *)&msg_regs->ifqepar))
+ if (phys_buf == in_be32(&priv->msg_regs->ifqepar))
goto out2;
- virt_buf = (u32) msg_rx_ring.virt + (phys_buf - msg_rx_ring.phys);
- buf_idx = (phys_buf - msg_rx_ring.phys) / RIO_MAX_MSG_SIZE;
- buf = msg_rx_ring.virt_buffer[buf_idx];
+ virt_buf = (u32) priv->msg_rx_ring.virt + (phys_buf
+ - priv->msg_rx_ring.phys);
+ buf_idx = (phys_buf - priv->msg_rx_ring.phys) / RIO_MAX_MSG_SIZE;
+ buf = priv->msg_rx_ring.virt_buffer[buf_idx];
if (!buf) {
printk(KERN_ERR
@@ -716,11 +786,10 @@ void *rio_hw_get_inb_message(struct rio_mport *mport, int mbox)
memcpy(buf, (void *)virt_buf, RIO_MAX_MSG_SIZE);
/* Clear the available buffer */
- msg_rx_ring.virt_buffer[buf_idx] = NULL;
+ priv->msg_rx_ring.virt_buffer[buf_idx] = NULL;
out1:
- imr = in_be32((void *)&msg_regs->imr);
- out_be32((void *)&msg_regs->imr, imr | RIO_MSG_IMR_MI);
+ setbits32(&priv->msg_regs->imr, RIO_MSG_IMR_MI);
out2:
return buf;
@@ -729,7 +798,7 @@ void *rio_hw_get_inb_message(struct rio_mport *mport, int mbox)
EXPORT_SYMBOL_GPL(rio_hw_get_inb_message);
/**
- * mpc85xx_rio_dbell_handler - MPC85xx doorbell interrupt handler
+ * fsl_rio_dbell_handler - MPC85xx doorbell interrupt handler
* @irq: Linux interrupt number
* @dev_instance: Pointer to interrupt-specific data
*
@@ -737,31 +806,31 @@ EXPORT_SYMBOL_GPL(rio_hw_get_inb_message);
* doorbell event handlers and executes a matching event handler.
*/
static irqreturn_t
-mpc85xx_rio_dbell_handler(int irq, void *dev_instance)
+fsl_rio_dbell_handler(int irq, void *dev_instance)
{
int dsr;
struct rio_mport *port = (struct rio_mport *)dev_instance;
+ struct rio_priv *priv = port->priv;
- dsr = in_be32((void *)&msg_regs->dsr);
+ dsr = in_be32(&priv->msg_regs->dsr);
if (dsr & DOORBELL_DSR_TE) {
pr_info("RIO: doorbell reception error\n");
- out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_TE);
+ out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_TE);
goto out;
}
if (dsr & DOORBELL_DSR_QFI) {
pr_info("RIO: doorbell queue full\n");
- out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_QFI);
+ out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_QFI);
goto out;
}
/* XXX Need to check/dispatch until queue empty */
if (dsr & DOORBELL_DSR_DIQI) {
u32 dmsg =
- (u32) dbell_ring.virt +
- (in_be32((void *)&msg_regs->dqdpar) & 0xfff);
- u32 dmr;
+ (u32) priv->dbell_ring.virt +
+ (in_be32(&priv->msg_regs->dqdpar) & 0xfff);
struct rio_dbell *dbell;
int found = 0;
@@ -784,9 +853,8 @@ mpc85xx_rio_dbell_handler(int irq, void *dev_instance)
("RIO: spurious doorbell, sid %2.2x tid %2.2x info %4.4x\n",
DBELL_SID(dmsg), DBELL_TID(dmsg), DBELL_INF(dmsg));
}
- dmr = in_be32((void *)&msg_regs->dmr);
- out_be32((void *)&msg_regs->dmr, dmr | DOORBELL_DMR_DI);
- out_be32((void *)&msg_regs->dsr, DOORBELL_DSR_DIQI);
+ setbits32(&priv->msg_regs->dmr, DOORBELL_DMR_DI);
+ out_be32(&priv->msg_regs->dsr, DOORBELL_DSR_DIQI);
}
out:
@@ -794,21 +862,22 @@ mpc85xx_rio_dbell_handler(int irq, void *dev_instance)
}
/**
- * mpc85xx_rio_doorbell_init - MPC85xx doorbell interface init
+ * fsl_rio_doorbell_init - MPC85xx doorbell interface init
* @mport: Master port implementing the inbound doorbell unit
*
* Initializes doorbell unit hardware and inbound DMA buffer
- * ring. Called from mpc85xx_rio_setup(). Returns %0 on success
+ * ring. Called from fsl_rio_setup(). Returns %0 on success
* or %-ENOMEM on failure.
*/
-static int mpc85xx_rio_doorbell_init(struct rio_mport *mport)
+static int fsl_rio_doorbell_init(struct rio_mport *mport)
{
+ struct rio_priv *priv = mport->priv;
int rc = 0;
/* Map outbound doorbell window immediately after maintenance window */
- if (!(dbell_win =
- (u32) ioremap(mport->iores.start + RIO_MAINT_WIN_SIZE,
- RIO_DBELL_WIN_SIZE))) {
+ priv->dbell_win = ioremap(mport->iores.start + RIO_MAINT_WIN_SIZE,
+ RIO_DBELL_WIN_SIZE);
+ if (!priv->dbell_win) {
printk(KERN_ERR
"RIO: unable to map outbound doorbell window\n");
rc = -ENOMEM;
@@ -816,37 +885,36 @@ static int mpc85xx_rio_doorbell_init(struct rio_mport *mport)
}
/* Initialize inbound doorbells */
- if (!(dbell_ring.virt = dma_alloc_coherent(NULL,
- 512 * DOORBELL_MESSAGE_SIZE,
- &dbell_ring.phys,
- GFP_KERNEL))) {
+ priv->dbell_ring.virt = dma_alloc_coherent(NULL, 512 *
+ DOORBELL_MESSAGE_SIZE, &priv->dbell_ring.phys, GFP_KERNEL);
+ if (!priv->dbell_ring.virt) {
printk(KERN_ERR "RIO: unable allocate inbound doorbell ring\n");
rc = -ENOMEM;
- iounmap((void *)dbell_win);
+ iounmap(priv->dbell_win);
goto out;
}
/* Point dequeue/enqueue pointers at first entry in ring */
- out_be32((void *)&msg_regs->dqdpar, (u32) dbell_ring.phys);
- out_be32((void *)&msg_regs->dqepar, (u32) dbell_ring.phys);
+ out_be32(&priv->msg_regs->dqdpar, (u32) priv->dbell_ring.phys);
+ out_be32(&priv->msg_regs->dqepar, (u32) priv->dbell_ring.phys);
/* Clear interrupt status */
- out_be32((void *)&msg_regs->dsr, 0x00000091);
+ out_be32(&priv->msg_regs->dsr, 0x00000091);
/* Hook up doorbell handler */
- if ((rc =
- request_irq(MPC85xx_IRQ_RIO_BELL, mpc85xx_rio_dbell_handler, 0,
- "dbell_rx", (void *)mport) < 0)) {
- iounmap((void *)dbell_win);
+ rc = request_irq(IRQ_RIO_BELL(mport), fsl_rio_dbell_handler, 0,
+ "dbell_rx", (void *)mport);
+ if (rc < 0) {
+ iounmap(priv->dbell_win);
dma_free_coherent(NULL, 512 * DOORBELL_MESSAGE_SIZE,
- dbell_ring.virt, dbell_ring.phys);
+ priv->dbell_ring.virt, priv->dbell_ring.phys);
printk(KERN_ERR
"MPC85xx RIO: unable to request inbound doorbell irq");
goto out;
}
/* Configure doorbells for snooping, 512 entries, and enable */
- out_be32((void *)&msg_regs->dmr, 0x00108161);
+ out_be32(&priv->msg_regs->dmr, 0x00108161);
out:
return rc;
@@ -854,7 +922,7 @@ static int mpc85xx_rio_doorbell_init(struct rio_mport *mport)
static char *cmdline = NULL;
-static int mpc85xx_rio_get_hdid(int index)
+static int fsl_rio_get_hdid(int index)
{
/* XXX Need to parse multiple entries in some format */
if (!cmdline)
@@ -863,7 +931,7 @@ static int mpc85xx_rio_get_hdid(int index)
return simple_strtol(cmdline, NULL, 0);
}
-static int mpc85xx_rio_get_cmdline(char *s)
+static int fsl_rio_get_cmdline(char *s)
{
if (!s)
return 0;
@@ -872,61 +940,266 @@ static int mpc85xx_rio_get_cmdline(char *s)
return 1;
}
-__setup("riohdid=", mpc85xx_rio_get_cmdline);
+__setup("riohdid=", fsl_rio_get_cmdline);
+
+static inline void fsl_rio_info(struct device *dev, u32 ccsr)
+{
+ const char *str;
+ if (ccsr & 1) {
+ /* Serial phy */
+ switch (ccsr >> 30) {
+ case 0:
+ str = "1";
+ break;
+ case 1:
+ str = "4";
+ break;
+ default:
+ str = "Unknown";
+ break;;
+ }
+ dev_info(dev, "Hardware port width: %s\n", str);
+
+ switch ((ccsr >> 27) & 7) {
+ case 0:
+ str = "Single-lane 0";
+ break;
+ case 1:
+ str = "Single-lane 2";
+ break;
+ case 2:
+ str = "Four-lane";
+ break;
+ default:
+ str = "Unknown";
+ break;
+ }
+ dev_info(dev, "Training connection status: %s\n", str);
+ } else {
+ /* Parallel phy */
+ if (!(ccsr & 0x80000000))
+ dev_info(dev, "Output port operating in 8-bit mode\n");
+ if (!(ccsr & 0x08000000))
+ dev_info(dev, "Input port operating in 8-bit mode\n");
+ }
+}
/**
- * mpc85xx_rio_setup - Setup MPC85xx RapidIO interface
- * @law_start: Starting physical address of RapidIO LAW
- * @law_size: Size of RapidIO LAW
+ * fsl_rio_setup - Setup MPC85xx RapidIO interface
+ * @fsl_rio_setup - Setup Freescale PowerPC RapidIO interface
*
* Initializes MPC85xx RapidIO hardware interface, configures
* master port with system-specific info, and registers the
* master port with the RapidIO subsystem.
*/
-void mpc85xx_rio_setup(int law_start, int law_size)
+int fsl_rio_setup(struct of_device *dev)
{
struct rio_ops *ops;
struct rio_mport *port;
+ struct rio_priv *priv;
+ int rc = 0;
+ const u32 *dt_range, *cell;
+ struct resource regs;
+ int rlen;
+ u32 ccsr;
+ u64 law_start, law_size;
+ int paw, aw, sw;
+
+ if (!dev->node) {
+ dev_err(&dev->dev, "Device OF-Node is NULL");
+ return -EFAULT;
+ }
+
+ rc = of_address_to_resource(dev->node, 0, &regs);
+ if (rc) {
+ dev_err(&dev->dev, "Can't get %s property 'reg'\n",
+ dev->node->full_name);
+ return -EFAULT;
+ }
+ dev_info(&dev->dev, "Of-device full name %s\n", dev->node->full_name);
+ dev_info(&dev->dev, "Regs start 0x%08x size 0x%08x\n", regs.start,
+ regs.end - regs.start + 1);
+
+ dt_range = of_get_property(dev->node, "ranges", &rlen);
+ if (!dt_range) {
+ dev_err(&dev->dev, "Can't get %s property 'ranges'\n",
+ dev->node->full_name);
+ return -EFAULT;
+ }
+
+ /* Get node address wide */
+ cell = of_get_property(dev->node, "#address-cells", NULL);
+ if (cell)
+ aw = *cell;
+ else
+ aw = of_n_addr_cells(dev->node);
+ /* Get node size wide */
+ cell = of_get_property(dev->node, "#size-cells", NULL);
+ if (cell)
+ sw = *cell;
+ else
+ sw = of_n_size_cells(dev->node);
+ /* Get parent address wide wide */
+ paw = of_n_addr_cells(dev->node);
+
+ law_start = of_read_number(dt_range + aw, paw);
+ law_size = of_read_number(dt_range + aw + paw, sw);
+
+ dev_info(&dev->dev, "LAW start 0x%016llx, size 0x%016llx.\n",
+ law_start, law_size);
ops = kmalloc(sizeof(struct rio_ops), GFP_KERNEL);
- ops->lcread = mpc85xx_local_config_read;
- ops->lcwrite = mpc85xx_local_config_write;
- ops->cread = mpc85xx_rio_config_read;
- ops->cwrite = mpc85xx_rio_config_write;
- ops->dsend = mpc85xx_rio_doorbell_send;
+ ops->lcread = fsl_local_config_read;
+ ops->lcwrite = fsl_local_config_write;
+ ops->cread = fsl_rio_config_read;
+ ops->cwrite = fsl_rio_config_write;
+ ops->dsend = fsl_rio_doorbell_send;
- port = kmalloc(sizeof(struct rio_mport), GFP_KERNEL);
+ port = kzalloc(sizeof(struct rio_mport), GFP_KERNEL);
port->id = 0;
port->index = 0;
+
+ priv = kzalloc(sizeof(struct rio_priv), GFP_KERNEL);
+ if (!priv) {
+ printk(KERN_ERR "Can't alloc memory for 'priv'\n");
+ rc = -ENOMEM;
+ goto err;
+ }
+
INIT_LIST_HEAD(&port->dbells);
port->iores.start = law_start;
port->iores.end = law_start + law_size;
port->iores.flags = IORESOURCE_MEM;
+ priv->bellirq = irq_of_parse_and_map(dev->node, 2);
+ priv->txirq = irq_of_parse_and_map(dev->node, 3);
+ priv->rxirq = irq_of_parse_and_map(dev->node, 4);
+ dev_info(&dev->dev, "bellirq: %d, txirq: %d, rxirq %d\n", priv->bellirq,
+ priv->txirq, priv->rxirq);
+
rio_init_dbell_res(&port->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff);
rio_init_mbox_res(&port->riores[RIO_INB_MBOX_RESOURCE], 0, 0);
rio_init_mbox_res(&port->riores[RIO_OUTB_MBOX_RESOURCE], 0, 0);
strcpy(port->name, "RIO0 mport");
port->ops = ops;
- port->host_deviceid = mpc85xx_rio_get_hdid(port->id);
+ port->host_deviceid = fsl_rio_get_hdid(port->id);
+ port->priv = priv;
rio_register_mport(port);
- regs_win = (u32) ioremap(RIO_REGS_BASE, 0x20000);
- atmu_regs = (struct rio_atmu_regs *)(regs_win + RIO_ATMU_REGS_OFFSET);
- maint_atmu_regs = atmu_regs + 1;
- dbell_atmu_regs = atmu_regs + 2;
- msg_regs = (struct rio_msg_regs *)(regs_win + RIO_MSG_REGS_OFFSET);
+ priv->regs_win = ioremap(regs.start, regs.end - regs.start + 1);
+
+ /* Probe the master port phy type */
+ ccsr = in_be32(priv->regs_win + RIO_CCSR);
+ port->phy_type = (ccsr & 1) ? RIO_PHY_SERIAL : RIO_PHY_PARALLEL;
+ dev_info(&dev->dev, "RapidIO PHY type: %s\n",
+ (port->phy_type == RIO_PHY_PARALLEL) ? "parallel" :
+ ((port->phy_type == RIO_PHY_SERIAL) ? "serial" :
+ "unknown"));
+ /* Checking the port training status */
+ if (in_be32((priv->regs_win + RIO_ESCSR)) & 1) {
+ dev_err(&dev->dev, "Port is not ready. "
+ "Try to restart connection...\n");
+ switch (port->phy_type) {
+ case RIO_PHY_SERIAL:
+ /* Disable ports */
+ out_be32(priv->regs_win + RIO_CCSR, 0);
+ /* Set 1x lane */
+ setbits32(priv->regs_win + RIO_CCSR, 0x02000000);
+ /* Enable ports */
+ setbits32(priv->regs_win + RIO_CCSR, 0x00600000);
+ break;
+ case RIO_PHY_PARALLEL:
+ /* Disable ports */
+ out_be32(priv->regs_win + RIO_CCSR, 0x22000000);
+ /* Enable ports */
+ out_be32(priv->regs_win + RIO_CCSR, 0x44000000);
+ break;
+ }
+ msleep(100);
+ if (in_be32((priv->regs_win + RIO_ESCSR)) & 1) {
+ dev_err(&dev->dev, "Port restart failed.\n");
+ rc = -ENOLINK;
+ goto err;
+ }
+ dev_info(&dev->dev, "Port restart success!\n");
+ }
+ fsl_rio_info(&dev->dev, ccsr);
+
+ port->sys_size = (in_be32((priv->regs_win + RIO_PEF_CAR))
+ & RIO_PEF_CTLS) >> 4;
+ dev_info(&dev->dev, "RapidIO Common Transport System size: %d\n",
+ port->sys_size ? 65536 : 256);
+
+ priv->atmu_regs = (struct rio_atmu_regs *)(priv->regs_win
+ + RIO_ATMU_REGS_OFFSET);
+ priv->maint_atmu_regs = priv->atmu_regs + 1;
+ priv->dbell_atmu_regs = priv->atmu_regs + 2;
+ priv->msg_regs = (struct rio_msg_regs *)(priv->regs_win +
+ ((port->phy_type == RIO_PHY_SERIAL) ?
+ RIO_S_MSG_REGS_OFFSET : RIO_P_MSG_REGS_OFFSET));
+
+ /* Set to receive any dist ID for serial RapidIO controller. */
+ if (port->phy_type == RIO_PHY_SERIAL)
+ out_be32((priv->regs_win + RIO_ISR_AACR), RIO_ISR_AACR_AA);
/* Configure maintenance transaction window */
- out_be32((void *)&maint_atmu_regs->rowbar, 0x000c0000);
- out_be32((void *)&maint_atmu_regs->rowar, 0x80077015);
+ out_be32(&priv->maint_atmu_regs->rowbar, 0x000c0000);
+ out_be32(&priv->maint_atmu_regs->rowar, 0x80077015);
- maint_win = (u32) ioremap(law_start, RIO_MAINT_WIN_SIZE);
+ priv->maint_win = ioremap(law_start, RIO_MAINT_WIN_SIZE);
/* Configure outbound doorbell window */
- out_be32((void *)&dbell_atmu_regs->rowbar, 0x000c0400);
- out_be32((void *)&dbell_atmu_regs->rowar, 0x8004200b);
- mpc85xx_rio_doorbell_init(port);
+ out_be32(&priv->dbell_atmu_regs->rowbar, 0x000c0400);
+ out_be32(&priv->dbell_atmu_regs->rowar, 0x8004200b);
+ fsl_rio_doorbell_init(port);
+
+ return 0;
+err:
+ if (priv)
+ iounmap(priv->regs_win);
+ kfree(ops);
+ kfree(priv);
+ kfree(port);
+ return rc;
+}
+
+/* The probe function for RapidIO peer-to-peer network.
+ */
+static int __devinit fsl_of_rio_rpn_probe(struct of_device *dev,
+ const struct of_device_id *match)
+{
+ int rc;
+ printk(KERN_INFO "Setting up RapidIO peer-to-peer network %s\n",
+ dev->node->full_name);
+
+ rc = fsl_rio_setup(dev);
+ if (rc)
+ goto out;
+
+ /* Enumerate all registered ports */
+ rc = rio_init_mports();
+out:
+ return rc;
+};
+
+static const struct of_device_id fsl_of_rio_rpn_ids[] = {
+ {
+ .compatible = "fsl,rapidio-delta",
+ },
+ {},
+};
+
+static struct of_platform_driver fsl_of_rio_rpn_driver = {
+ .name = "fsl-of-rio",
+ .match_table = fsl_of_rio_rpn_ids,
+ .probe = fsl_of_rio_rpn_probe,
+};
+
+static __init int fsl_of_rio_rpn_init(void)
+{
+ return of_register_platform_driver(&fsl_of_rio_rpn_driver);
}
+
+subsys_initcall(fsl_of_rio_rpn_init);
diff --git a/arch/powerpc/sysdev/fsl_rio.h b/arch/powerpc/sysdev/fsl_rio.h
deleted file mode 100644
index 6d3ff30b1579..000000000000
--- a/arch/powerpc/sysdev/fsl_rio.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * MPC85xx RapidIO definitions
- *
- * Copyright 2005 MontaVista Software, Inc.
- * Matt Porter <mporter@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef __PPC_SYSLIB_PPC85XX_RIO_H
-#define __PPC_SYSLIB_PPC85XX_RIO_H
-
-#include <linux/init.h>
-
-extern void mpc85xx_rio_setup(int law_start, int law_size);
-
-#endif /* __PPC_SYSLIB_PPC85XX_RIO_H */
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 5c1b246aaccc..7b45670c7af3 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -892,3 +892,44 @@ void fsl_rstcr_restart(char *cmd)
while (1) ;
}
#endif
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+struct platform_diu_data_ops diu_ops = {
+ .diu_size = 1280 * 1024 * 4, /* default one 1280x1024 buffer */
+};
+EXPORT_SYMBOL(diu_ops);
+
+int __init preallocate_diu_videomemory(void)
+{
+ pr_debug("diu_size=%lu\n", diu_ops.diu_size);
+
+ diu_ops.diu_mem = __alloc_bootmem(diu_ops.diu_size, 8, 0);
+ if (!diu_ops.diu_mem) {
+ printk(KERN_ERR "fsl-diu: cannot allocate %lu bytes\n",
+ diu_ops.diu_size);
+ return -ENOMEM;
+ }
+
+ pr_debug("diu_mem=%p\n", diu_ops.diu_mem);
+
+ rh_init(&diu_ops.diu_rh_info, 4096, ARRAY_SIZE(diu_ops.diu_rh_block),
+ diu_ops.diu_rh_block);
+ return rh_attach_region(&diu_ops.diu_rh_info,
+ (unsigned long) diu_ops.diu_mem,
+ diu_ops.diu_size);
+}
+
+static int __init early_parse_diufb(char *p)
+{
+ if (!p)
+ return 1;
+
+ diu_ops.diu_size = _ALIGN_UP(memparse(p, &p), 8);
+
+ pr_debug("diu_size=%lu\n", diu_ops.diu_size);
+
+ return 0;
+}
+early_param("diufb", early_parse_diufb);
+
+#endif
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index 74c4a9657b33..52c831fa1886 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -17,5 +17,28 @@ extern int fsl_spi_init(struct spi_board_info *board_infos,
void (*deactivate_cs)(u8 cs, u8 polarity));
extern void fsl_rstcr_restart(char *cmd);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+#include <linux/bootmem.h>
+#include <asm/rheap.h>
+struct platform_diu_data_ops {
+ rh_block_t diu_rh_block[16];
+ rh_info_t diu_rh_info;
+ unsigned long diu_size;
+ void *diu_mem;
+
+ unsigned int (*get_pixel_format) (unsigned int bits_per_pixel,
+ int monitor_port);
+ void (*set_gamma_table) (int monitor_port, char *gamma_table_base);
+ void (*set_monitor_port) (int monitor_port);
+ void (*set_pixel_clock) (unsigned int pixclock);
+ ssize_t (*show_monitor_port) (int monitor_port, char *buf);
+ int (*set_sysfs_monitor_port) (int val);
+};
+
+extern struct platform_diu_data_ops diu_ops;
+int __init preallocate_diu_videomemory(void);
+#endif
+
#endif
#endif
diff --git a/arch/ppc/kernel/asm-offsets.c b/arch/ppc/kernel/asm-offsets.c
index a51a17714231..8dcbdd6c2d2c 100644
--- a/arch/ppc/kernel/asm-offsets.c
+++ b/arch/ppc/kernel/asm-offsets.c
@@ -18,6 +18,8 @@
#include <linux/suspend.h>
#include <linux/mman.h>
#include <linux/mm.h>
+#include <linux/kbuild.h>
+
#include <asm/io.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -26,11 +28,6 @@
#include <asm/thread_info.h>
#include <asm/vdso_datapage.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
int
main(void)
{
diff --git a/arch/ppc/kernel/pci.c b/arch/ppc/kernel/pci.c
index 50ce83f20adb..df3ef6db072c 100644
--- a/arch/ppc/kernel/pci.c
+++ b/arch/ppc/kernel/pci.c
@@ -1121,8 +1121,8 @@ void __init pci_init_resource(struct resource *res, resource_size_t start,
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len)
diff --git a/arch/ppc/platforms/sbc82xx.c b/arch/ppc/platforms/sbc82xx.c
index 0df6aacb8237..24f6e0694ac1 100644
--- a/arch/ppc/platforms/sbc82xx.c
+++ b/arch/ppc/platforms/sbc82xx.c
@@ -30,8 +30,6 @@ static void (*callback_init_IRQ)(void);
extern unsigned char __res[sizeof(bd_t)];
-extern void (*late_time_init)(void);
-
#ifdef CONFIG_GEN_RTC
TODC_ALLOC();
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a68e178fc5..8f5f02160ffc 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
+config PGSTE
+ bool
+ default y if KVM
+
mainmenu "Linux Kernel Configuration"
config S390
@@ -69,6 +73,7 @@ config S390
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_KVM if 64BIT
source "init/Kconfig"
@@ -515,6 +520,13 @@ config ZFCPDUMP
Select this option if you want to build an zfcpdump enabled kernel.
Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+config S390_GUEST
+bool "s390 guest support (EXPERIMENTAL)"
+ depends on 64BIT && EXPERIMENTAL
+ select VIRTIO
+ select VIRTIO_RING
+ help
+ Select this option if you want to run the kernel under s390 linux
endmenu
source "net/Kconfig"
@@ -536,3 +548,5 @@ source "security/Kconfig"
source "crypto/Kconfig"
source "lib/Kconfig"
+
+source "arch/s390/kvm/Kconfig"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index f708be367b03..792a4e7743ce 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start
head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o
core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
- arch/s390/appldata/ arch/s390/hypfs/
+ arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
libs-y += arch/s390/lib/
drivers-y += drivers/s390/
drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 1375f8a4469e..fa28ecae636b 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -5,44 +5,38 @@
*/
#include <linux/sched.h>
-
-/* Use marker if you need to separate the values later */
-
-#define DEFINE(sym, val, marker) \
- asm volatile("\n->" #sym " %0 " #val " " #marker : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
+#include <linux/kbuild.h>
int main(void)
{
- DEFINE(__THREAD_info, offsetof(struct task_struct, stack),);
- DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),);
- DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),);
+ DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
+ DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
+ DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info));
DEFINE(__THREAD_mm_segment,
- offsetof(struct task_struct, thread.mm_segment),);
+ offsetof(struct task_struct, thread.mm_segment));
BLANK();
- DEFINE(__TASK_pid, offsetof(struct task_struct, pid),);
+ DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
BLANK();
- DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid),);
- DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address),);
- DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id),);
+ DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid));
+ DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address));
+ DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id));
BLANK();
- DEFINE(__TI_task, offsetof(struct thread_info, task),);
- DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain),);
- DEFINE(__TI_flags, offsetof(struct thread_info, flags),);
- DEFINE(__TI_cpu, offsetof(struct thread_info, cpu),);
- DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count),);
+ DEFINE(__TI_task, offsetof(struct thread_info, task));
+ DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
+ DEFINE(__TI_flags, offsetof(struct thread_info, flags));
+ DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
+ DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
BLANK();
- DEFINE(__PT_ARGS, offsetof(struct pt_regs, args),);
- DEFINE(__PT_PSW, offsetof(struct pt_regs, psw),);
- DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs),);
- DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2),);
- DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc),);
- DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap),);
- DEFINE(__PT_SIZE, sizeof(struct pt_regs),);
+ DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
+ DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
+ DEFINE(__PT_GPRS, offsetof(struct pt_regs, gprs));
+ DEFINE(__PT_ORIG_GPR2, offsetof(struct pt_regs, orig_gpr2));
+ DEFINE(__PT_ILC, offsetof(struct pt_regs, ilc));
+ DEFINE(__PT_TRAP, offsetof(struct pt_regs, trap));
+ DEFINE(__PT_SIZE, sizeof(struct pt_regs));
BLANK();
- DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain),);
- DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs),);
- DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1),);
+ DEFINE(__SF_BACKCHAIN, offsetof(struct stack_frame, back_chain));
+ DEFINE(__SF_GPRS, offsetof(struct stack_frame, gprs));
+ DEFINE(__SF_EMPTY, offsetof(struct stack_frame, empty1));
return 0;
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 540a67f979b6..68ec4083bf73 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -144,6 +144,10 @@ static noinline __init void detect_machine_type(void)
/* Running on a P/390 ? */
if (cpuinfo->cpu_id.machine == 0x7490)
machine_flags |= 4;
+
+ /* Running under KVM ? */
+ if (cpuinfo->cpu_id.version == 0xfe)
+ machine_flags |= 64;
}
#ifdef CONFIG_64BIT
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index c36d8123ca14..c59a86dca584 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -60,8 +60,6 @@ init_IRQ(void)
/*
* Switch to the asynchronous interrupt stack for softirq execution.
*/
-extern void __do_softirq(void);
-
asmlinkage void do_softirq(void)
{
unsigned long flags, old, new;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7141147e6b63..a9d18aafa5f4 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
early_param("ipldelay", early_parse_ipldelay);
#ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
unsigned int switch_amode = 0;
+#endif
EXPORT_SYMBOL_GPL(switch_amode);
static void set_amode_and_uaccess(unsigned long user_amode,
@@ -797,9 +801,13 @@ setup_arch(char **cmdline_p)
"This machine has an IEEE fpu\n" :
"This machine has no IEEE fpu\n");
#else /* CONFIG_64BIT */
- printk((MACHINE_IS_VM) ?
- "We are running under VM (64 bit mode)\n" :
- "We are running native (64 bit mode)\n");
+ if (MACHINE_IS_VM)
+ printk("We are running under VM (64 bit mode)\n");
+ else if (MACHINE_IS_KVM) {
+ printk("We are running under KVM (64 bit mode)\n");
+ add_preferred_console("ttyS", 1, NULL);
+ } else
+ printk("We are running native (64 bit mode)\n");
#endif /* CONFIG_64BIT */
/* Save unparsed command line copy for /proc/cmdline */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c5f05b3fb2c3..ca90ee3f930e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk)
S390_lowcore.steal_clock -= cputime << 12;
account_system_time(tsk, 0, cputime);
}
+EXPORT_SYMBOL_GPL(account_system_vtime);
static inline void set_vtimer(__u64 expires)
{
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
new file mode 100644
index 000000000000..1761b74d639b
--- /dev/null
+++ b/arch/s390/kvm/Kconfig
@@ -0,0 +1,46 @@
+#
+# KVM configuration
+#
+config HAVE_KVM
+ bool
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ default y
+ ---help---
+ Say Y here to get to see options for using your Linux host to run other
+ operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
+ depends on HAVE_KVM && EXPERIMENTAL
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ select S390_SWITCH_AMODE
+ select PREEMPT
+ ---help---
+ Support hosting paravirtualized guest machines using the SIE
+ virtualization capability on the mainframe. This should work
+ on any 64bit machine.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ To compile this as a module, choose M here: the module
+ will be called kvm.
+
+ If unsure, say N.
+
+config KVM_TRACE
+ bool
+
+# OK, it's a little counter-intuitive to do this, but it puts it neatly under
+# the virtualization menu.
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
new file mode 100644
index 000000000000..e5221ec0b8e3
--- /dev/null
+++ b/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
+# Makefile for kernel virtual machines on s390
+#
+# Copyright IBM Corp. 2008
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (version 2 only)
+# as published by the Free Software Foundation.
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
+obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
new file mode 100644
index 000000000000..f639a152869f
--- /dev/null
+++ b/arch/s390/kvm/diag.c
@@ -0,0 +1,67 @@
+/*
+ * diag.c - handling diagnose instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+
+static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
+ vcpu->stat.diagnose_44++;
+ vcpu_put(vcpu);
+ schedule();
+ vcpu_load(vcpu);
+ return 0;
+}
+
+static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
+{
+ unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
+ unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
+
+ VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+ switch (subcode) {
+ case 3:
+ vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+ break;
+ case 4:
+ vcpu->run->s390_reset_flags = 0;
+ break;
+ default:
+ return -ENOTSUPP;
+ }
+
+ atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
+ vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
+ vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
+ VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx",
+ vcpu->run->s390_reset_flags);
+ return -EREMOTE;
+}
+
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
+{
+ int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+
+ switch (code) {
+ case 0x44:
+ return __diag_time_slice_end(vcpu);
+ case 0x308:
+ return __diag_ipl_functions(vcpu);
+ default:
+ return -ENOTSUPP;
+ }
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
new file mode 100644
index 000000000000..4e0633c413f3
--- /dev/null
+++ b/arch/s390/kvm/gaccess.h
@@ -0,0 +1,274 @@
+/*
+ * gaccess.h - access guest memory
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_GACCESS_H
+#define __KVM_S390_GACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/uaccess.h>
+
+static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
+ u64 guestaddr)
+{
+ u64 prefix = vcpu->arch.sie_block->prefix;
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if (guestaddr < 2 * PAGE_SIZE)
+ guestaddr += prefix;
+ else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
+ guestaddr -= prefix;
+
+ if (guestaddr > memsize)
+ return (void __user __force *) ERR_PTR(-EFAULT);
+
+ guestaddr += origin;
+
+ return (void __user *) guestaddr;
+}
+
+static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u64 *result)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ BUG_ON(guestaddr & 7);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return get_user(*result, (u64 __user *) uptr);
+}
+
+static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u32 *result)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ BUG_ON(guestaddr & 3);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return get_user(*result, (u32 __user *) uptr);
+}
+
+static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u16 *result)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ BUG_ON(guestaddr & 1);
+
+ if (IS_ERR(uptr))
+ return PTR_ERR(uptr);
+
+ return get_user(*result, (u16 __user *) uptr);
+}
+
+static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u8 *result)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return get_user(*result, (u8 __user *) uptr);
+}
+
+static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u64 value)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ BUG_ON(guestaddr & 7);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return put_user(value, (u64 __user *) uptr);
+}
+
+static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u32 value)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ BUG_ON(guestaddr & 3);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return put_user(value, (u32 __user *) uptr);
+}
+
+static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u16 value)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ BUG_ON(guestaddr & 1);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return put_user(value, (u16 __user *) uptr);
+}
+
+static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+ u8 value)
+{
+ void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+ if (IS_ERR((void __force *) uptr))
+ return PTR_ERR((void __force *) uptr);
+
+ return put_user(value, (u8 __user *) uptr);
+}
+
+
+static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest,
+ const void *from, unsigned long n)
+{
+ int rc;
+ unsigned long i;
+ const u8 *data = from;
+
+ for (i = 0; i < n; i++) {
+ rc = put_guest_u8(vcpu, guestdest++, *(data++));
+ if (rc < 0)
+ return rc;
+ }
+ return 0;
+}
+
+static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest,
+ const void *from, unsigned long n)
+{
+ u64 prefix = vcpu->arch.sie_block->prefix;
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
+ goto slowpath;
+
+ if ((guestdest < prefix) && (guestdest + n > prefix))
+ goto slowpath;
+
+ if ((guestdest < prefix + 2 * PAGE_SIZE)
+ && (guestdest + n > prefix + 2 * PAGE_SIZE))
+ goto slowpath;
+
+ if (guestdest < 2 * PAGE_SIZE)
+ guestdest += prefix;
+ else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
+ guestdest -= prefix;
+
+ if (guestdest + n > memsize)
+ return -EFAULT;
+
+ if (guestdest + n < guestdest)
+ return -EFAULT;
+
+ guestdest += origin;
+
+ return copy_to_user((void __user *) guestdest, from, n);
+slowpath:
+ return __copy_to_guest_slow(vcpu, guestdest, from, n);
+}
+
+static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
+ u64 guestsrc, unsigned long n)
+{
+ int rc;
+ unsigned long i;
+ u8 *data = to;
+
+ for (i = 0; i < n; i++) {
+ rc = get_guest_u8(vcpu, guestsrc++, data++);
+ if (rc < 0)
+ return rc;
+ }
+ return 0;
+}
+
+static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
+ u64 guestsrc, unsigned long n)
+{
+ u64 prefix = vcpu->arch.sie_block->prefix;
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
+ goto slowpath;
+
+ if ((guestsrc < prefix) && (guestsrc + n > prefix))
+ goto slowpath;
+
+ if ((guestsrc < prefix + 2 * PAGE_SIZE)
+ && (guestsrc + n > prefix + 2 * PAGE_SIZE))
+ goto slowpath;
+
+ if (guestsrc < 2 * PAGE_SIZE)
+ guestsrc += prefix;
+ else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
+ guestsrc -= prefix;
+
+ if (guestsrc + n > memsize)
+ return -EFAULT;
+
+ if (guestsrc + n < guestsrc)
+ return -EFAULT;
+
+ guestsrc += origin;
+
+ return copy_from_user(to, (void __user *) guestsrc, n);
+slowpath:
+ return __copy_from_guest_slow(vcpu, to, guestsrc, n);
+}
+
+static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest,
+ const void *from, unsigned long n)
+{
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if (guestdest + n > memsize)
+ return -EFAULT;
+
+ if (guestdest + n < guestdest)
+ return -EFAULT;
+
+ guestdest += origin;
+
+ return copy_to_user((void __user *) guestdest, from, n);
+}
+
+static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
+ u64 guestsrc, unsigned long n)
+{
+ u64 origin = vcpu->kvm->arch.guest_origin;
+ u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+ if (guestsrc + n > memsize)
+ return -EFAULT;
+
+ if (guestsrc + n < guestsrc)
+ return -EFAULT;
+
+ guestsrc += origin;
+
+ return copy_from_user(to, (void __user *) guestsrc, n);
+}
+#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
new file mode 100644
index 000000000000..349581a26103
--- /dev/null
+++ b/arch/s390/kvm/intercept.c
@@ -0,0 +1,216 @@
+/*
+ * intercept.c - in-kernel handling for sie intercepts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <asm/kvm_host.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int handle_lctg(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+ ((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+ u64 useraddr;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctg++;
+ if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
+ return -ENOTSUPP;
+
+ useraddr = disp2;
+ if (base2)
+ useraddr += vcpu->arch.guest_gprs[base2];
+
+ reg = reg1;
+
+ VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+ disp2);
+
+ do {
+ rc = get_guest_u64(vcpu, useraddr,
+ &vcpu->arch.sie_block->gcr[reg]);
+ if (rc == -EFAULT) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ break;
+ }
+ useraddr += 8;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+ return 0;
+}
+
+static int handle_lctl(struct kvm_vcpu *vcpu)
+{
+ int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 useraddr;
+ u32 val = 0;
+ int reg, rc;
+
+ vcpu->stat.instruction_lctl++;
+
+ useraddr = disp2;
+ if (base2)
+ useraddr += vcpu->arch.guest_gprs[base2];
+
+ VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+ disp2);
+
+ reg = reg1;
+ do {
+ rc = get_guest_u32(vcpu, useraddr, &val);
+ if (rc == -EFAULT) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ break;
+ }
+ vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+ vcpu->arch.sie_block->gcr[reg] |= val;
+ useraddr += 4;
+ if (reg == reg3)
+ break;
+ reg = (reg + 1) % 16;
+ } while (1);
+ return 0;
+}
+
+static intercept_handler_t instruction_handlers[256] = {
+ [0x83] = kvm_s390_handle_diag,
+ [0xae] = kvm_s390_handle_sigp,
+ [0xb2] = kvm_s390_handle_priv,
+ [0xb7] = handle_lctl,
+ [0xeb] = handle_lctg,
+};
+
+static int handle_noop(struct kvm_vcpu *vcpu)
+{
+ switch (vcpu->arch.sie_block->icptcode) {
+ case 0x10:
+ vcpu->stat.exit_external_request++;
+ break;
+ case 0x14:
+ vcpu->stat.exit_external_interrupt++;
+ break;
+ default:
+ break; /* nothing */
+ }
+ return 0;
+}
+
+static int handle_stop(struct kvm_vcpu *vcpu)
+{
+ int rc;
+
+ vcpu->stat.exit_stop_request++;
+ atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+ spin_lock_bh(&vcpu->arch.local_int.lock);
+ if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
+ vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
+ rc = __kvm_s390_vcpu_store_status(vcpu,
+ KVM_S390_STORE_STATUS_NOADDR);
+ if (rc >= 0)
+ rc = -ENOTSUPP;
+ }
+
+ if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
+ vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
+ VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
+ rc = -ENOTSUPP;
+ } else
+ rc = 0;
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+ return rc;
+}
+
+static int handle_validity(struct kvm_vcpu *vcpu)
+{
+ int viwhy = vcpu->arch.sie_block->ipb >> 16;
+ vcpu->stat.exit_validity++;
+ if (viwhy == 0x37) {
+ fault_in_pages_writeable((char __user *)
+ vcpu->kvm->arch.guest_origin +
+ vcpu->arch.sie_block->prefix,
+ PAGE_SIZE);
+ return 0;
+ }
+ VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
+ viwhy);
+ return -ENOTSUPP;
+}
+
+static int handle_instruction(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler;
+
+ vcpu->stat.exit_instruction++;
+ handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
+ if (handler)
+ return handler(vcpu);
+ return -ENOTSUPP;
+}
+
+static int handle_prog(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.exit_program_interruption++;
+ return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+}
+
+static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
+{
+ int rc, rc2;
+
+ vcpu->stat.exit_instr_and_program++;
+ rc = handle_instruction(vcpu);
+ rc2 = handle_prog(vcpu);
+
+ if (rc == -ENOTSUPP)
+ vcpu->arch.sie_block->icptcode = 0x04;
+ if (rc)
+ return rc;
+ return rc2;
+}
+
+static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
+ [0x00 >> 2] = handle_noop,
+ [0x04 >> 2] = handle_instruction,
+ [0x08 >> 2] = handle_prog,
+ [0x0C >> 2] = handle_instruction_and_prog,
+ [0x10 >> 2] = handle_noop,
+ [0x14 >> 2] = handle_noop,
+ [0x1C >> 2] = kvm_s390_handle_wait,
+ [0x20 >> 2] = handle_validity,
+ [0x28 >> 2] = handle_stop,
+};
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t func;
+ u8 code = vcpu->arch.sie_block->icptcode;
+
+ if (code & 3 || code > 0x48)
+ return -ENOTSUPP;
+ func = intercept_funcs[code >> 2];
+ if (func)
+ return func(vcpu);
+ return -ENOTSUPP;
+}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
new file mode 100644
index 000000000000..fcd1ed8015c1
--- /dev/null
+++ b/arch/s390/kvm/interrupt.c
@@ -0,0 +1,592 @@
+/*
+ * interrupt.c - handling kvm guest interrupts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#include <asm/lowcore.h>
+#include <asm/uaccess.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+{
+ return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
+}
+
+static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
+{
+ if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
+ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
+ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
+ return 0;
+ return 1;
+}
+
+static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+ struct interrupt_info *inti)
+{
+ switch (inti->type) {
+ case KVM_S390_INT_EMERGENCY:
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
+ return 1;
+ return 0;
+ case KVM_S390_INT_SERVICE:
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+ return 1;
+ return 0;
+ case KVM_S390_INT_VIRTIO:
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+ return 1;
+ return 0;
+ case KVM_S390_PROGRAM_INT:
+ case KVM_S390_SIGP_STOP:
+ case KVM_S390_SIGP_SET_PREFIX:
+ case KVM_S390_RESTART:
+ return 1;
+ default:
+ BUG();
+ }
+ return 0;
+}
+
+static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+{
+ BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+ atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+{
+ BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+ atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+ clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+ atomic_clear_mask(CPUSTAT_ECALL_PEND |
+ CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+ &vcpu->arch.sie_block->cpuflags);
+ vcpu->arch.sie_block->lctl = 0x0000;
+}
+
+static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
+{
+ atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+}
+
+static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
+ struct interrupt_info *inti)
+{
+ switch (inti->type) {
+ case KVM_S390_INT_EMERGENCY:
+ case KVM_S390_INT_SERVICE:
+ case KVM_S390_INT_VIRTIO:
+ if (psw_extint_disabled(vcpu))
+ __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR0;
+ break;
+ case KVM_S390_SIGP_STOP:
+ __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+ struct interrupt_info *inti)
+{
+ const unsigned short table[] = { 2, 4, 4, 6 };
+ int rc, exception = 0;
+
+ switch (inti->type) {
+ case KVM_S390_INT_EMERGENCY:
+ VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+ vcpu->stat.deliver_emergency_signal++;
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_INT_SERVICE:
+ VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+ inti->ext.ext_params);
+ vcpu->stat.deliver_service_signal++;
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_INT_VIRTIO:
+ VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx",
+ inti->ext.ext_params, inti->ext.ext_params2);
+ vcpu->stat.deliver_virtio_interrupt++;
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM,
+ inti->ext.ext_params2);
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_SIGP_STOP:
+ VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+ vcpu->stat.deliver_stop_signal++;
+ __set_intercept_indicator(vcpu, inti);
+ break;
+
+ case KVM_S390_SIGP_SET_PREFIX:
+ VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
+ inti->prefix.address);
+ vcpu->stat.deliver_prefix_signal++;
+ vcpu->arch.sie_block->prefix = inti->prefix.address;
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ break;
+
+ case KVM_S390_RESTART:
+ VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+ vcpu->stat.deliver_restart_signal++;
+ rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
+ restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ case KVM_S390_PROGRAM_INT:
+ VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+ inti->pgm.code,
+ table[vcpu->arch.sie_block->ipa >> 14]);
+ vcpu->stat.deliver_program_int++;
+ rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = put_guest_u16(vcpu, __LC_PGM_ILC,
+ table[vcpu->arch.sie_block->ipa >> 14]);
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_PGM_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+ break;
+
+ default:
+ BUG();
+ }
+
+ if (exception) {
+ VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering"
+ " interrupt");
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (inti->type == KVM_S390_PROGRAM_INT) {
+ printk(KERN_WARNING "kvm: recursive program check\n");
+ BUG();
+ }
+ }
+}
+
+static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+{
+ int rc, exception = 0;
+
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+ return 0;
+ rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
+ if (rc == -EFAULT)
+ exception = 1;
+ rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+ rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+ __LC_EXT_NEW_PSW, sizeof(psw_t));
+ if (rc == -EFAULT)
+ exception = 1;
+
+ if (exception) {
+ VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \
+ " ckc interrupt");
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ return 0;
+ }
+
+ return 1;
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct local_interrupt *li = &vcpu->arch.local_int;
+ struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+ struct interrupt_info *inti;
+ int rc = 0;
+
+ if (atomic_read(&li->active)) {
+ spin_lock_bh(&li->lock);
+ list_for_each_entry(inti, &li->list, list)
+ if (__interrupt_is_deliverable(vcpu, inti)) {
+ rc = 1;
+ break;
+ }
+ spin_unlock_bh(&li->lock);
+ }
+
+ if ((!rc) && atomic_read(&fi->active)) {
+ spin_lock_bh(&fi->lock);
+ list_for_each_entry(inti, &fi->list, list)
+ if (__interrupt_is_deliverable(vcpu, inti)) {
+ rc = 1;
+ break;
+ }
+ spin_unlock_bh(&fi->lock);
+ }
+
+ if ((!rc) && (vcpu->arch.sie_block->ckc <
+ get_clock() + vcpu->arch.sie_block->epoch)) {
+ if ((!psw_extint_disabled(vcpu)) &&
+ (vcpu->arch.sie_block->gcr[0] & 0x800ul))
+ rc = 1;
+ }
+
+ return rc;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
+{
+ u64 now, sltime;
+ DECLARE_WAITQUEUE(wait, current);
+
+ vcpu->stat.exit_wait_state++;
+ if (kvm_cpu_has_interrupt(vcpu))
+ return 0;
+
+ if (psw_interrupts_disabled(vcpu)) {
+ VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
+ __unset_cpu_idle(vcpu);
+ return -ENOTSUPP; /* disabled wait */
+ }
+
+ if (psw_extint_disabled(vcpu) ||
+ (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+ VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
+ goto no_timer;
+ }
+
+ now = get_clock() + vcpu->arch.sie_block->epoch;
+ if (vcpu->arch.sie_block->ckc < now) {
+ __unset_cpu_idle(vcpu);
+ return 0;
+ }
+
+ sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
+
+ vcpu->arch.ckc_timer.expires = jiffies + sltime;
+
+ add_timer(&vcpu->arch.ckc_timer);
+ VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime);
+no_timer:
+ spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+ spin_lock_bh(&vcpu->arch.local_int.lock);
+ __set_cpu_idle(vcpu);
+ vcpu->arch.local_int.timer_due = 0;
+ add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+ while (list_empty(&vcpu->arch.local_int.list) &&
+ list_empty(&vcpu->arch.local_int.float_int->list) &&
+ (!vcpu->arch.local_int.timer_due) &&
+ !signal_pending(current)) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+ spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+ vcpu_put(vcpu);
+ schedule();
+ vcpu_load(vcpu);
+ spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+ spin_lock_bh(&vcpu->arch.local_int.lock);
+ }
+ __unset_cpu_idle(vcpu);
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&vcpu->wq, &wait);
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+ spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+ del_timer(&vcpu->arch.ckc_timer);
+ return 0;
+}
+
+void kvm_s390_idle_wakeup(unsigned long data)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+ spin_lock_bh(&vcpu->arch.local_int.lock);
+ vcpu->arch.local_int.timer_due = 1;
+ if (waitqueue_active(&vcpu->arch.local_int.wq))
+ wake_up_interruptible(&vcpu->arch.local_int.wq);
+ spin_unlock_bh(&vcpu->arch.local_int.lock);
+}
+
+
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+ struct local_interrupt *li = &vcpu->arch.local_int;
+ struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+ struct interrupt_info *n, *inti = NULL;
+ int deliver;
+
+ __reset_intercept_indicators(vcpu);
+ if (atomic_read(&li->active)) {
+ do {
+ deliver = 0;
+ spin_lock_bh(&li->lock);
+ list_for_each_entry_safe(inti, n, &li->list, list) {
+ if (__interrupt_is_deliverable(vcpu, inti)) {
+ list_del(&inti->list);
+ deliver = 1;
+ break;
+ }
+ __set_intercept_indicator(vcpu, inti);
+ }
+ if (list_empty(&li->list))
+ atomic_set(&li->active, 0);
+ spin_unlock_bh(&li->lock);
+ if (deliver) {
+ __do_deliver_interrupt(vcpu, inti);
+ kfree(inti);
+ }
+ } while (deliver);
+ }
+
+ if ((vcpu->arch.sie_block->ckc <
+ get_clock() + vcpu->arch.sie_block->epoch))
+ __try_deliver_ckc_interrupt(vcpu);
+
+ if (atomic_read(&fi->active)) {
+ do {
+ deliver = 0;
+ spin_lock_bh(&fi->lock);
+ list_for_each_entry_safe(inti, n, &fi->list, list) {
+ if (__interrupt_is_deliverable(vcpu, inti)) {
+ list_del(&inti->list);
+ deliver = 1;
+ break;
+ }
+ __set_intercept_indicator(vcpu, inti);
+ }
+ if (list_empty(&fi->list))
+ atomic_set(&fi->active, 0);
+ spin_unlock_bh(&fi->lock);
+ if (deliver) {
+ __do_deliver_interrupt(vcpu, inti);
+ kfree(inti);
+ }
+ } while (deliver);
+ }
+}
+
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+ struct local_interrupt *li = &vcpu->arch.local_int;
+ struct interrupt_info *inti;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = KVM_S390_PROGRAM_INT;;
+ inti->pgm.code = code;
+
+ VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+ spin_lock_bh(&li->lock);
+ list_add(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ BUG_ON(waitqueue_active(&li->wq));
+ spin_unlock_bh(&li->lock);
+ return 0;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int)
+{
+ struct local_interrupt *li;
+ struct float_interrupt *fi;
+ struct interrupt_info *inti;
+ int sigcpu;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ switch (s390int->type) {
+ case KVM_S390_INT_VIRTIO:
+ VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx",
+ s390int->parm, s390int->parm64);
+ inti->type = s390int->type;
+ inti->ext.ext_params = s390int->parm;
+ inti->ext.ext_params2 = s390int->parm64;
+ break;
+ case KVM_S390_INT_SERVICE:
+ VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+ inti->type = s390int->type;
+ inti->ext.ext_params = s390int->parm;
+ break;
+ case KVM_S390_PROGRAM_INT:
+ case KVM_S390_SIGP_STOP:
+ case KVM_S390_INT_EMERGENCY:
+ default:
+ kfree(inti);
+ return -EINVAL;
+ }
+
+ mutex_lock(&kvm->lock);
+ fi = &kvm->arch.float_int;
+ spin_lock_bh(&fi->lock);
+ list_add_tail(&inti->list, &fi->list);
+ atomic_set(&fi->active, 1);
+ sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+ if (sigcpu == KVM_MAX_VCPUS) {
+ do {
+ sigcpu = fi->next_rr_cpu++;
+ if (sigcpu == KVM_MAX_VCPUS)
+ sigcpu = fi->next_rr_cpu = 0;
+ } while (fi->local_int[sigcpu] == NULL);
+ }
+ li = fi->local_int[sigcpu];
+ spin_lock_bh(&li->lock);
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ spin_unlock_bh(&li->lock);
+ spin_unlock_bh(&fi->lock);
+ mutex_unlock(&kvm->lock);
+ return 0;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_s390_interrupt *s390int)
+{
+ struct local_interrupt *li;
+ struct interrupt_info *inti;
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ switch (s390int->type) {
+ case KVM_S390_PROGRAM_INT:
+ if (s390int->parm & 0xffff0000) {
+ kfree(inti);
+ return -EINVAL;
+ }
+ inti->type = s390int->type;
+ inti->pgm.code = s390int->parm;
+ VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
+ s390int->parm);
+ break;
+ case KVM_S390_SIGP_STOP:
+ case KVM_S390_RESTART:
+ case KVM_S390_SIGP_SET_PREFIX:
+ case KVM_S390_INT_EMERGENCY:
+ VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
+ inti->type = s390int->type;
+ break;
+ case KVM_S390_INT_VIRTIO:
+ case KVM_S390_INT_SERVICE:
+ default:
+ kfree(inti);
+ return -EINVAL;
+ }
+
+ mutex_lock(&vcpu->kvm->lock);
+ li = &vcpu->arch.local_int;
+ spin_lock_bh(&li->lock);
+ if (inti->type == KVM_S390_PROGRAM_INT)
+ list_add(&inti->list, &li->list);
+ else
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ if (inti->type == KVM_S390_SIGP_STOP)
+ li->action_bits |= ACTION_STOP_ON_STOP;
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&vcpu->arch.local_int.wq);
+ spin_unlock_bh(&li->lock);
+ mutex_unlock(&vcpu->kvm->lock);
+ return 0;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
new file mode 100644
index 000000000000..98d1e73e01f1
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,685 @@
+/*
+ * s390host.c -- hosting zSeries kernel virtual machines
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ * Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <asm/lowcore.h>
+#include <asm/pgtable.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { "userspace_handled", VCPU_STAT(exit_userspace) },
+ { "exit_validity", VCPU_STAT(exit_validity) },
+ { "exit_stop_request", VCPU_STAT(exit_stop_request) },
+ { "exit_external_request", VCPU_STAT(exit_external_request) },
+ { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
+ { "exit_instruction", VCPU_STAT(exit_instruction) },
+ { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
+ { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+ { "instruction_lctg", VCPU_STAT(instruction_lctg) },
+ { "instruction_lctl", VCPU_STAT(instruction_lctl) },
+ { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+ { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
+ { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
+ { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
+ { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
+ { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
+ { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
+ { "exit_wait_state", VCPU_STAT(exit_wait_state) },
+ { "instruction_stidp", VCPU_STAT(instruction_stidp) },
+ { "instruction_spx", VCPU_STAT(instruction_spx) },
+ { "instruction_stpx", VCPU_STAT(instruction_stpx) },
+ { "instruction_stap", VCPU_STAT(instruction_stap) },
+ { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+ { "instruction_stsch", VCPU_STAT(instruction_stsch) },
+ { "instruction_chsc", VCPU_STAT(instruction_chsc) },
+ { "instruction_stsi", VCPU_STAT(instruction_stsi) },
+ { "instruction_stfl", VCPU_STAT(instruction_stfl) },
+ { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+ { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+ { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+ { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
+ { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
+ { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+ { "diagnose_44", VCPU_STAT(diagnose_44) },
+ { NULL }
+};
+
+
+/* Section: not file related */
+void kvm_arch_hardware_enable(void *garbage)
+{
+ /* every s390 is virtualization enabled ;-) */
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+}
+
+int kvm_arch_init(void *opaque)
+{
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+/* Section: device related */
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ if (ioctl == KVM_S390_ENABLE_SIE)
+ return s390_enable_sie();
+ return -EINVAL;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+ return 0;
+}
+
+/* Section: vm related */
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+ struct kvm_dirty_log *log)
+{
+ return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm *kvm = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int r;
+
+ switch (ioctl) {
+ case KVM_S390_INTERRUPT: {
+ struct kvm_s390_interrupt s390int;
+
+ r = -EFAULT;
+ if (copy_from_user(&s390int, argp, sizeof(s390int)))
+ break;
+ r = kvm_s390_inject_vm(kvm, &s390int);
+ break;
+ }
+ default:
+ r = -EINVAL;
+ }
+
+ return r;
+}
+
+struct kvm *kvm_arch_create_vm(void)
+{
+ struct kvm *kvm;
+ int rc;
+ char debug_name[16];
+
+ rc = s390_enable_sie();
+ if (rc)
+ goto out_nokvm;
+
+ rc = -ENOMEM;
+ kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+ if (!kvm)
+ goto out_nokvm;
+
+ kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+ if (!kvm->arch.sca)
+ goto out_nosca;
+
+ sprintf(debug_name, "kvm-%u", current->pid);
+
+ kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+ if (!kvm->arch.dbf)
+ goto out_nodbf;
+
+ spin_lock_init(&kvm->arch.float_int.lock);
+ INIT_LIST_HEAD(&kvm->arch.float_int.list);
+
+ debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
+ VM_EVENT(kvm, 3, "%s", "vm created");
+
+ try_module_get(THIS_MODULE);
+
+ return kvm;
+out_nodbf:
+ free_page((unsigned long)(kvm->arch.sca));
+out_nosca:
+ kfree(kvm);
+out_nokvm:
+ return ERR_PTR(rc);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ debug_unregister(kvm->arch.dbf);
+ free_page((unsigned long)(kvm->arch.sca));
+ kfree(kvm);
+ module_put(THIS_MODULE);
+}
+
+/* Section: vcpu related */
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ /* kvm common code refers to this, but does'nt call it */
+ BUG();
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ save_fp_regs(&vcpu->arch.host_fpregs);
+ save_access_regs(vcpu->arch.host_acrs);
+ vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
+ restore_fp_regs(&vcpu->arch.guest_fpregs);
+ restore_access_regs(vcpu->arch.guest_acrs);
+
+ if (signal_pending(current))
+ atomic_set_mask(CPUSTAT_STOP_INT,
+ &vcpu->arch.sie_block->cpuflags);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ save_fp_regs(&vcpu->arch.guest_fpregs);
+ save_access_regs(vcpu->arch.guest_acrs);
+ restore_fp_regs(&vcpu->arch.host_fpregs);
+ restore_access_regs(vcpu->arch.host_acrs);
+}
+
+static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
+{
+ /* this equals initial cpu reset in pop, but we don't switch to ESA */
+ vcpu->arch.sie_block->gpsw.mask = 0UL;
+ vcpu->arch.sie_block->gpsw.addr = 0UL;
+ vcpu->arch.sie_block->prefix = 0UL;
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ vcpu->arch.sie_block->cputm = 0UL;
+ vcpu->arch.sie_block->ckc = 0UL;
+ vcpu->arch.sie_block->todpr = 0;
+ memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
+ vcpu->arch.sie_block->gcr[0] = 0xE0UL;
+ vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
+ vcpu->arch.guest_fpregs.fpc = 0;
+ asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+ vcpu->arch.sie_block->gbea = 1;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
+ vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
+ vcpu->arch.sie_block->gmsor = 0x000000000000;
+ vcpu->arch.sie_block->ecb = 2;
+ vcpu->arch.sie_block->eca = 0xC1002001U;
+ setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
+ (unsigned long) vcpu);
+ get_cpu_id(&vcpu->arch.cpu_id);
+ vcpu->arch.cpu_id.version = 0xfe;
+ return 0;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+ unsigned int id)
+{
+ struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+ int rc = -ENOMEM;
+
+ if (!vcpu)
+ goto out_nomem;
+
+ vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
+
+ if (!vcpu->arch.sie_block)
+ goto out_free_cpu;
+
+ vcpu->arch.sie_block->icpua = id;
+ BUG_ON(!kvm->arch.sca);
+ BUG_ON(kvm->arch.sca->cpu[id].sda);
+ kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
+ vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+
+ spin_lock_init(&vcpu->arch.local_int.lock);
+ INIT_LIST_HEAD(&vcpu->arch.local_int.list);
+ vcpu->arch.local_int.float_int = &kvm->arch.float_int;
+ spin_lock_bh(&kvm->arch.float_int.lock);
+ kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
+ init_waitqueue_head(&vcpu->arch.local_int.wq);
+ vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+ spin_unlock_bh(&kvm->arch.float_int.lock);
+
+ rc = kvm_vcpu_init(vcpu, kvm, id);
+ if (rc)
+ goto out_free_cpu;
+ VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+ vcpu->arch.sie_block);
+
+ try_module_get(THIS_MODULE);
+
+ return vcpu;
+out_free_cpu:
+ kfree(vcpu);
+out_nomem:
+ return ERR_PTR(rc);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
+ free_page((unsigned long)(vcpu->arch.sie_block));
+ kfree(vcpu);
+ module_put(THIS_MODULE);
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ /* kvm common code refers to this, but never calls it */
+ BUG();
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+ vcpu_load(vcpu);
+ kvm_s390_vcpu_initial_reset(vcpu);
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ vcpu_load(vcpu);
+ memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
+ memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ vcpu_load(vcpu);
+ memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
+ memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_load(vcpu);
+ memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+ vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_load(vcpu);
+ memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+ fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+ vcpu_put(vcpu);
+ return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
+{
+ int rc = 0;
+
+ vcpu_load(vcpu);
+ if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
+ rc = -EBUSY;
+ else
+ vcpu->arch.sie_block->gpsw = psw;
+ vcpu_put(vcpu);
+ return rc;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg)
+{
+ return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL; /* not implemented yet */
+}
+
+static void __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
+
+ if (need_resched())
+ schedule();
+
+ vcpu->arch.sie_block->icptcode = 0;
+ local_irq_disable();
+ kvm_guest_enter();
+ local_irq_enable();
+ VCPU_EVENT(vcpu, 6, "entering sie flags %x",
+ atomic_read(&vcpu->arch.sie_block->cpuflags));
+ sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
+ VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+ vcpu->arch.sie_block->icptcode);
+ local_irq_disable();
+ kvm_guest_exit();
+ local_irq_enable();
+
+ memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ int rc;
+ sigset_t sigsaved;
+
+ vcpu_load(vcpu);
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+ atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+
+ BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
+
+ switch (kvm_run->exit_reason) {
+ case KVM_EXIT_S390_SIEIC:
+ vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
+ vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
+ break;
+ case KVM_EXIT_UNKNOWN:
+ case KVM_EXIT_S390_RESET:
+ break;
+ default:
+ BUG();
+ }
+
+ might_sleep();
+
+ do {
+ kvm_s390_deliver_pending_interrupts(vcpu);
+ __vcpu_run(vcpu);
+ rc = kvm_handle_sie_intercept(vcpu);
+ } while (!signal_pending(current) && !rc);
+
+ if (signal_pending(current) && !rc)
+ rc = -EINTR;
+
+ if (rc == -ENOTSUPP) {
+ /* intercept cannot be handled in-kernel, prepare kvm-run */
+ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
+ kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+ kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask;
+ kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr;
+ kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
+ kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
+ rc = 0;
+ }
+
+ if (rc == -EREMOTE) {
+ /* intercept was handled, but userspace support is needed
+ * kvm_run has been prepared by the handler */
+ rc = 0;
+ }
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+ vcpu_put(vcpu);
+
+ vcpu->stat.exit_userspace++;
+ return rc;
+}
+
+static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
+ unsigned long n, int prefix)
+{
+ if (prefix)
+ return copy_to_guest(vcpu, guestdest, from, n);
+ else
+ return copy_to_guest_absolute(vcpu, guestdest, from, n);
+}
+
+/*
+ * store status at address
+ * we use have two special cases:
+ * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
+ * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
+ */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ const unsigned char archmode = 1;
+ int prefix;
+
+ if (addr == KVM_S390_STORE_STATUS_NOADDR) {
+ if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+ return -EFAULT;
+ addr = SAVE_AREA_BASE;
+ prefix = 0;
+ } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
+ if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+ return -EFAULT;
+ addr = SAVE_AREA_BASE;
+ prefix = 1;
+ } else
+ prefix = 0;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
+ vcpu->arch.guest_fpregs.fprs, 128, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
+ vcpu->arch.guest_gprs, 128, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
+ &vcpu->arch.sie_block->gpsw, 16, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
+ &vcpu->arch.sie_block->prefix, 4, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu,
+ addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
+ &vcpu->arch.guest_fpregs.fpc, 4, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
+ &vcpu->arch.sie_block->todpr, 4, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
+ &vcpu->arch.sie_block->cputm, 8, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
+ &vcpu->arch.sie_block->ckc, 8, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
+ &vcpu->arch.guest_acrs, 64, prefix))
+ return -EFAULT;
+
+ if (__guestcopy(vcpu,
+ addr + offsetof(struct save_area_s390x, ctrl_regs),
+ &vcpu->arch.sie_block->gcr, 128, prefix))
+ return -EFAULT;
+ return 0;
+}
+
+static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ int rc;
+
+ vcpu_load(vcpu);
+ rc = __kvm_s390_vcpu_store_status(vcpu, addr);
+ vcpu_put(vcpu);
+ return rc;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+
+ switch (ioctl) {
+ case KVM_S390_INTERRUPT: {
+ struct kvm_s390_interrupt s390int;
+
+ if (copy_from_user(&s390int, argp, sizeof(s390int)))
+ return -EFAULT;
+ return kvm_s390_inject_vcpu(vcpu, &s390int);
+ }
+ case KVM_S390_STORE_STATUS:
+ return kvm_s390_vcpu_store_status(vcpu, arg);
+ case KVM_S390_SET_INITIAL_PSW: {
+ psw_t psw;
+
+ if (copy_from_user(&psw, argp, sizeof(psw)))
+ return -EFAULT;
+ return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+ }
+ case KVM_S390_INITIAL_RESET:
+ return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ default:
+ ;
+ }
+ return -EINVAL;
+}
+
+/* Section: memory related */
+int kvm_arch_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ /* A few sanity checks. We can have exactly one memory slot which has
+ to start at guest virtual zero and which has to be located at a
+ page boundary in userland and which has to end at a page boundary.
+ The memory in userland is ok to be fragmented into various different
+ vmas. It is okay to mmap() and munmap() stuff in this slot after
+ doing this call at any time */
+
+ if (mem->slot)
+ return -EINVAL;
+
+ if (mem->guest_phys_addr)
+ return -EINVAL;
+
+ if (mem->userspace_addr & (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ if (mem->memory_size & (PAGE_SIZE - 1))
+ return -EINVAL;
+
+ kvm->arch.guest_origin = mem->userspace_addr;
+ kvm->arch.guest_memsize = mem->memory_size;
+
+ /* FIXME: we do want to interrupt running CPUs and update their memory
+ configuration now to avoid race conditions. But hey, changing the
+ memory layout while virtual CPUs are running is usually bad
+ programming practice. */
+
+ return 0;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+ return gfn;
+}
+
+static int __init kvm_s390_init(void)
+{
+ return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvm_s390_exit(void)
+{
+ kvm_exit();
+}
+
+module_init(kvm_s390_init);
+module_exit(kvm_s390_exit);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
new file mode 100644
index 000000000000..3893cf12eacf
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,64 @@
+/*
+ * kvm_s390.h - definition for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef ARCH_S390_KVM_S390_H
+#define ARCH_S390_KVM_S390_H
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
+#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
+ d_args); \
+} while (0)
+
+#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
+ "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
+ d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
+ d_args); \
+} while (0)
+
+static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
+{
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_idle_wakeup(unsigned long data);
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+int kvm_s390_inject_vm(struct kvm *kvm,
+ struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+ struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+
+/* implemented in priv.c */
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+
+/* implemented in sigp.c */
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+
+/* implemented in kvm-s390.c */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
+ unsigned long addr);
+/* implemented in diag.c */
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
new file mode 100644
index 000000000000..1465946325c5
--- /dev/null
+++ b/arch/s390/kvm/priv.c
@@ -0,0 +1,323 @@
+/*
+ * priv.c - handling privileged instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/errno.h>
+#include <asm/current.h>
+#include <asm/debug.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+static int handle_set_prefix(struct kvm_vcpu *vcpu)
+{
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 operand2;
+ u32 address = 0;
+ u8 tmp;
+
+ vcpu->stat.instruction_spx++;
+
+ operand2 = disp2;
+ if (base2)
+ operand2 += vcpu->arch.guest_gprs[base2];
+
+ /* must be word boundary */
+ if (operand2 & 3) {
+ kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ goto out;
+ }
+
+ /* get the value */
+ if (get_guest_u32(vcpu, operand2, &address)) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ address = address & 0x7fffe000u;
+
+ /* make sure that the new value is valid memory */
+ if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
+ (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ vcpu->arch.sie_block->prefix = address;
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+
+ VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
+out:
+ return 0;
+}
+
+static int handle_store_prefix(struct kvm_vcpu *vcpu)
+{
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 operand2;
+ u32 address;
+
+ vcpu->stat.instruction_stpx++;
+ operand2 = disp2;
+ if (base2)
+ operand2 += vcpu->arch.guest_gprs[base2];
+
+ /* must be word boundary */
+ if (operand2 & 3) {
+ kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ goto out;
+ }
+
+ address = vcpu->arch.sie_block->prefix;
+ address = address & 0x7fffe000u;
+
+ /* get the value */
+ if (put_guest_u32(vcpu, operand2, address)) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+out:
+ return 0;
+}
+
+static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
+{
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 useraddr;
+ int rc;
+
+ vcpu->stat.instruction_stap++;
+ useraddr = disp2;
+ if (base2)
+ useraddr += vcpu->arch.guest_gprs[base2];
+
+ if (useraddr & 1) {
+ kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ goto out;
+ }
+
+ rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
+ if (rc == -EFAULT) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr);
+out:
+ return 0;
+}
+
+static int handle_skey(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_storage_key++;
+ vcpu->arch.sie_block->gpsw.addr -= 4;
+ VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+ return 0;
+}
+
+static int handle_stsch(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_stsch++;
+ VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
+ /* condition code 3 */
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ return 0;
+}
+
+static int handle_chsc(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.instruction_chsc++;
+ VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
+ /* condition code 3 */
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+ return 0;
+}
+
+static unsigned int kvm_stfl(void)
+{
+ asm volatile(
+ " .insn s,0xb2b10000,0(0)\n" /* stfl */
+ "0:\n"
+ EX_TABLE(0b, 0b));
+ return S390_lowcore.stfl_fac_list;
+}
+
+static int handle_stfl(struct kvm_vcpu *vcpu)
+{
+ unsigned int facility_list = kvm_stfl();
+ int rc;
+
+ vcpu->stat.instruction_stfl++;
+ facility_list &= ~(1UL<<24); /* no stfle */
+
+ rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+ &facility_list, sizeof(facility_list));
+ if (rc == -EFAULT)
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ else
+ VCPU_EVENT(vcpu, 5, "store facility list value %x",
+ facility_list);
+ return 0;
+}
+
+static int handle_stidp(struct kvm_vcpu *vcpu)
+{
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 operand2;
+ int rc;
+
+ vcpu->stat.instruction_stidp++;
+ operand2 = disp2;
+ if (base2)
+ operand2 += vcpu->arch.guest_gprs[base2];
+
+ if (operand2 & 7) {
+ kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ goto out;
+ }
+
+ rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
+ if (rc == -EFAULT) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out;
+ }
+
+ VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+out:
+ return 0;
+}
+
+static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ int cpus = 0;
+ int n;
+
+ spin_lock_bh(&fi->lock);
+ for (n = 0; n < KVM_MAX_VCPUS; n++)
+ if (fi->local_int[n])
+ cpus++;
+ spin_unlock_bh(&fi->lock);
+
+ /* deal with other level 3 hypervisors */
+ if (stsi(mem, 3, 2, 2) == -ENOSYS)
+ mem->count = 0;
+ if (mem->count < 8)
+ mem->count++;
+ for (n = mem->count - 1; n > 0 ; n--)
+ memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
+ mem->vm[0].cpus_total = cpus;
+ mem->vm[0].cpus_configured = cpus;
+ mem->vm[0].cpus_standby = 0;
+ mem->vm[0].cpus_reserved = 0;
+ mem->vm[0].caf = 1000;
+ memcpy(mem->vm[0].name, "KVMguest", 8);
+ ASCEBC(mem->vm[0].name, 8);
+ memcpy(mem->vm[0].cpi, "KVM/Linux ", 16);
+ ASCEBC(mem->vm[0].cpi, 16);
+}
+
+static int handle_stsi(struct kvm_vcpu *vcpu)
+{
+ int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
+ int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
+ int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u64 operand2;
+ unsigned long mem;
+
+ vcpu->stat.instruction_stsi++;
+ VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+
+ operand2 = disp2;
+ if (base2)
+ operand2 += vcpu->arch.guest_gprs[base2];
+
+ if (operand2 & 0xfff && fc > 0)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (fc) {
+ case 0:
+ vcpu->arch.guest_gprs[0] = 3 << 28;
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ return 0;
+ case 1: /* same handling for 1 and 2 */
+ case 2:
+ mem = get_zeroed_page(GFP_KERNEL);
+ if (!mem)
+ goto out_fail;
+ if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
+ goto out_mem;
+ break;
+ case 3:
+ if (sel1 != 2 || sel2 != 2)
+ goto out_fail;
+ mem = get_zeroed_page(GFP_KERNEL);
+ if (!mem)
+ goto out_fail;
+ handle_stsi_3_2_2(vcpu, (void *) mem);
+ break;
+ default:
+ goto out_fail;
+ }
+
+ if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
+ kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ goto out_mem;
+ }
+ free_page(mem);
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.guest_gprs[0] = 0;
+ return 0;
+out_mem:
+ free_page(mem);
+out_fail:
+ /* condition code 3 */
+ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
+ return 0;
+}
+
+static intercept_handler_t priv_handlers[256] = {
+ [0x02] = handle_stidp,
+ [0x10] = handle_set_prefix,
+ [0x11] = handle_store_prefix,
+ [0x12] = handle_store_cpu_address,
+ [0x29] = handle_skey,
+ [0x2a] = handle_skey,
+ [0x2b] = handle_skey,
+ [0x34] = handle_stsch,
+ [0x5f] = handle_chsc,
+ [0x7d] = handle_stsi,
+ [0xb1] = handle_stfl,
+};
+
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+{
+ intercept_handler_t handler;
+
+ handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+ if (handler)
+ return handler(vcpu);
+ return -ENOTSUPP;
+}
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
new file mode 100644
index 000000000000..934fd6a885f6
--- /dev/null
+++ b/arch/s390/kvm/sie64a.S
@@ -0,0 +1,47 @@
+/*
+ * sie64a.S - low level sie call
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <asm/asm-offsets.h>
+
+SP_R5 = 5 * 8 # offset into stackframe
+SP_R6 = 6 * 8
+
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+ .globl sie64a
+sie64a:
+ lgr %r5,%r3
+ stmg %r5,%r14,SP_R5(%r15) # save register on entry
+ lgr %r14,%r2 # pointer to sie control block
+ lmg %r0,%r13,0(%r3) # load guest gprs 0-13
+sie_inst:
+ sie 0(%r14)
+ lg %r14,SP_R5(%r15)
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ lghi %r2,0
+ lmg %r6,%r14,SP_R6(%r15)
+ br %r14
+
+sie_err:
+ lg %r14,SP_R5(%r15)
+ stmg %r0,%r13,0(%r14) # save guest gprs 0-13
+ lghi %r2,-EFAULT
+ lmg %r6,%r14,SP_R6(%r15)
+ br %r14
+
+ .section __ex_table,"a"
+ .quad sie_inst,sie_err
+ .previous
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
new file mode 100644
index 000000000000..0a236acfb5f6
--- /dev/null
+++ b/arch/s390/kvm/sigp.c
@@ -0,0 +1,288 @@
+/*
+ * sigp.c - handlinge interprocessor communication
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+/* sigp order codes */
+#define SIGP_SENSE 0x01
+#define SIGP_EXTERNAL_CALL 0x02
+#define SIGP_EMERGENCY 0x03
+#define SIGP_START 0x04
+#define SIGP_STOP 0x05
+#define SIGP_RESTART 0x06
+#define SIGP_STOP_STORE_STATUS 0x09
+#define SIGP_INITIAL_CPU_RESET 0x0b
+#define SIGP_CPU_RESET 0x0c
+#define SIGP_SET_PREFIX 0x0d
+#define SIGP_STORE_STATUS_ADDR 0x0e
+#define SIGP_SET_ARCH 0x12
+
+/* cpu status bits */
+#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL
+#define SIGP_STAT_INCORRECT_STATE 0x00000200UL
+#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
+#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL
+#define SIGP_STAT_STOPPED 0x00000040UL
+#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL
+#define SIGP_STAT_CHECK_STOP 0x00000010UL
+#define SIGP_STAT_INOPERATIVE 0x00000004UL
+#define SIGP_STAT_INVALID_ORDER 0x00000002UL
+#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL
+
+
+static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ int rc;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return 3; /* not operational */
+
+ spin_lock_bh(&fi->lock);
+ if (fi->local_int[cpu_addr] == NULL)
+ rc = 3; /* not operational */
+ else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
+ & CPUSTAT_RUNNING) {
+ *reg &= 0xffffffff00000000UL;
+ rc = 1; /* status stored */
+ } else {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STAT_STOPPED;
+ rc = 1; /* status stored */
+ }
+ spin_unlock_bh(&fi->lock);
+
+ VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+ return rc;
+}
+
+static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct local_interrupt *li;
+ struct interrupt_info *inti;
+ int rc;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return 3; /* not operational */
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = KVM_S390_INT_EMERGENCY;
+
+ spin_lock_bh(&fi->lock);
+ li = fi->local_int[cpu_addr];
+ if (li == NULL) {
+ rc = 3; /* not operational */
+ kfree(inti);
+ goto unlock;
+ }
+ spin_lock_bh(&li->lock);
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ spin_unlock_bh(&li->lock);
+ rc = 0; /* order accepted */
+unlock:
+ spin_unlock_bh(&fi->lock);
+ VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+ return rc;
+}
+
+static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct local_interrupt *li;
+ struct interrupt_info *inti;
+ int rc;
+
+ if (cpu_addr >= KVM_MAX_VCPUS)
+ return 3; /* not operational */
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return -ENOMEM;
+
+ inti->type = KVM_S390_SIGP_STOP;
+
+ spin_lock_bh(&fi->lock);
+ li = fi->local_int[cpu_addr];
+ if (li == NULL) {
+ rc = 3; /* not operational */
+ kfree(inti);
+ goto unlock;
+ }
+ spin_lock_bh(&li->lock);
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+ if (store)
+ li->action_bits |= ACTION_STORE_ON_STOP;
+ li->action_bits |= ACTION_STOP_ON_STOP;
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ spin_unlock_bh(&li->lock);
+ rc = 0; /* order accepted */
+unlock:
+ spin_unlock_bh(&fi->lock);
+ VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+ return rc;
+}
+
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
+{
+ int rc;
+
+ switch (parameter & 0xff) {
+ case 0:
+ printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
+ " not supported");
+ rc = 3; /* not operational */
+ break;
+ case 1:
+ case 2:
+ rc = 0; /* order accepted */
+ break;
+ default:
+ rc = -ENOTSUPP;
+ }
+ return rc;
+}
+
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
+ u64 *reg)
+{
+ struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct local_interrupt *li;
+ struct interrupt_info *inti;
+ int rc;
+ u8 tmp;
+
+ /* make sure that the new value is valid memory */
+ address = address & 0x7fffe000u;
+ if ((copy_from_guest(vcpu, &tmp,
+ (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
+ (copy_from_guest(vcpu, &tmp, (u64) (address +
+ vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
+ *reg |= SIGP_STAT_INVALID_PARAMETER;
+ return 1; /* invalid parameter */
+ }
+
+ inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+ if (!inti)
+ return 2; /* busy */
+
+ spin_lock_bh(&fi->lock);
+ li = fi->local_int[cpu_addr];
+
+ if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
+ rc = 1; /* incorrect state */
+ *reg &= SIGP_STAT_INCORRECT_STATE;
+ kfree(inti);
+ goto out_fi;
+ }
+
+ spin_lock_bh(&li->lock);
+ /* cpu must be in stopped state */
+ if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+ rc = 1; /* incorrect state */
+ *reg &= SIGP_STAT_INCORRECT_STATE;
+ kfree(inti);
+ goto out_li;
+ }
+
+ inti->type = KVM_S390_SIGP_SET_PREFIX;
+ inti->prefix.address = address;
+
+ list_add_tail(&inti->list, &li->list);
+ atomic_set(&li->active, 1);
+ if (waitqueue_active(&li->wq))
+ wake_up_interruptible(&li->wq);
+ rc = 0; /* order accepted */
+
+ VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+out_li:
+ spin_unlock_bh(&li->lock);
+out_fi:
+ spin_unlock_bh(&fi->lock);
+ return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+ int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+ int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+ int base2 = vcpu->arch.sie_block->ipb >> 28;
+ int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ u32 parameter;
+ u16 cpu_addr = vcpu->arch.guest_gprs[r3];
+ u8 order_code;
+ int rc;
+
+ order_code = disp2;
+ if (base2)
+ order_code += vcpu->arch.guest_gprs[base2];
+
+ if (r1 % 2)
+ parameter = vcpu->arch.guest_gprs[r1];
+ else
+ parameter = vcpu->arch.guest_gprs[r1 + 1];
+
+ switch (order_code) {
+ case SIGP_SENSE:
+ vcpu->stat.instruction_sigp_sense++;
+ rc = __sigp_sense(vcpu, cpu_addr,
+ &vcpu->arch.guest_gprs[r1]);
+ break;
+ case SIGP_EMERGENCY:
+ vcpu->stat.instruction_sigp_emergency++;
+ rc = __sigp_emergency(vcpu, cpu_addr);
+ break;
+ case SIGP_STOP:
+ vcpu->stat.instruction_sigp_stop++;
+ rc = __sigp_stop(vcpu, cpu_addr, 0);
+ break;
+ case SIGP_STOP_STORE_STATUS:
+ vcpu->stat.instruction_sigp_stop++;
+ rc = __sigp_stop(vcpu, cpu_addr, 1);
+ break;
+ case SIGP_SET_ARCH:
+ vcpu->stat.instruction_sigp_arch++;
+ rc = __sigp_set_arch(vcpu, parameter);
+ break;
+ case SIGP_SET_PREFIX:
+ vcpu->stat.instruction_sigp_prefix++;
+ rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
+ &vcpu->arch.guest_gprs[r1]);
+ break;
+ case SIGP_RESTART:
+ vcpu->stat.instruction_sigp_restart++;
+ /* user space must know about restart */
+ default:
+ return -ENOTSUPP;
+ }
+
+ if (rc < 0)
+ return rc;
+
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+ return 0;
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fd072013f88c..5c1aea97cd12 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
#define TABLES_PER_PAGE 4
#define FRAG_MASK 15UL
#define SECOND_HALVES 10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+ memset(table + 256, 0, PAGE_SIZE/4);
+ clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+ memset(table + 768, 0, PAGE_SIZE/4);
+}
+
#else
#define ALLOC_ORDER 2
#define TABLES_PER_PAGE 2
#define FRAG_MASK 3UL
#define SECOND_HALVES 2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+ memset(table + 256, 0, PAGE_SIZE/2);
+}
+
#endif
unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
unsigned long *table;
unsigned long bits;
- bits = mm->context.noexec ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
spin_lock(&mm->page_table_lock);
page = NULL;
if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
pgtable_page_ctor(page);
page->flags &= ~FRAG_MASK;
table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+ if (mm->context.pgstes)
+ clear_table_pgstes(table);
+ else
+ clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
spin_lock(&mm->page_table_lock);
list_add(&page->lru, &mm->context.pgtable_list);
}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
struct page *page;
unsigned long bits;
- bits = mm->context.noexec ? 3UL : 1UL;
+ bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
mm->context.noexec = 0;
update_mm(mm, tsk);
}
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm;
+ int rc;
+
+ task_lock(tsk);
+
+ rc = 0;
+ if (tsk->mm->context.pgstes)
+ goto unlock;
+
+ rc = -EINVAL;
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+ tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+ goto unlock;
+
+ tsk->mm->context.pgstes = 1; /* dirty little tricks .. */
+ mm = dup_mm(tsk);
+ tsk->mm->context.pgstes = 0;
+
+ rc = -ENOMEM;
+ if (!mm)
+ goto unlock;
+ mmput(tsk->mm);
+ tsk->mm = tsk->active_mm = mm;
+ preempt_disable();
+ update_mm(mm, tsk);
+ cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+ preempt_enable();
+ rc = 0;
+unlock:
+ task_unlock(tsk);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 49b435c3a57a..08d2e7325252 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -191,8 +191,8 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq)
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (unlikely(!len || !start))
diff --git a/arch/sh/kernel/asm-offsets.c b/arch/sh/kernel/asm-offsets.c
index dc6725c51a89..57cf0e0680f3 100644
--- a/arch/sh/kernel/asm-offsets.c
+++ b/arch/sh/kernel/asm-offsets.c
@@ -11,12 +11,9 @@
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/mm.h>
-#include <asm/thread_info.h>
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+#include <linux/kbuild.h>
-#define BLANK() asm volatile("\n->" : : )
+#include <asm/thread_info.h>
int main(void)
{
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 9bf19b00696a..a2a99e487e33 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -200,8 +200,6 @@ void irq_ctx_exit(int cpu)
hardirq_ctx[cpu] = NULL;
}
-extern asmlinkage void __do_softirq(void);
-
asmlinkage void do_softirq(void)
{
unsigned long flags;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 53dde0607362..d7df26bd1e54 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -307,15 +307,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
-void online_page(struct page *page)
-{
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalram_pages++;
- num_physpages++;
-}
-
int arch_add_memory(int nid, u64 start, u64 size)
{
pg_data_t *pgdat;
diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
index 6773ed76e414..cd3f7694e9b9 100644
--- a/arch/sparc/kernel/asm-offsets.c
+++ b/arch/sparc/kernel/asm-offsets.c
@@ -12,11 +12,7 @@
#include <linux/sched.h>
// #include <linux/mm.h>
-
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
+#include <linux/kbuild.h>
int foo(void)
{
diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
index 70c0dd22491d..e7f35198ae34 100644
--- a/arch/sparc/kernel/process.c
+++ b/arch/sparc/kernel/process.c
@@ -357,8 +357,6 @@ void flush_thread(void)
{
current_thread_info()->w_saved = 0;
- /* No new signal delivery by default */
- current->thread.new_signal = 0;
#ifndef CONFIG_SMP
if(last_task_used_math == current) {
#else
diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c
index 3e849e8e3480..3c312290c3c2 100644
--- a/arch/sparc/kernel/signal.c
+++ b/arch/sparc/kernel/signal.c
@@ -1,5 +1,4 @@
-/* $Id: signal.c,v 1.110 2002/02/08 03:57:14 davem Exp $
- * linux/arch/sparc/kernel/signal.c
+/* linux/arch/sparc/kernel/signal.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -32,37 +31,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
void *fpqueue, unsigned long *fpqdepth);
extern void fpload(unsigned long *fpregs, unsigned long *fsr);
-/* Signal frames: the original one (compatible with SunOS):
- *
- * Set up a signal frame... Make the stack look the way SunOS
- * expects it to look which is basically:
- *
- * ---------------------------------- <-- %sp at signal time
- * Struct sigcontext
- * Signal address
- * Ptr to sigcontext area above
- * Signal code
- * The signal number itself
- * One register window
- * ---------------------------------- <-- New %sp
- */
-struct signal_sframe {
- struct reg_window sig_window;
- int sig_num;
- int sig_code;
- struct sigcontext __user *sig_scptr;
- int sig_address;
- struct sigcontext sig_context;
- unsigned int extramask[_NSIG_WORDS - 1];
-};
-
-/*
- * And the new one, intended to be used for Linux applications only
- * (we have enough in there to work with clone).
- * All the interesting bits are in the info field.
- */
-
-struct new_signal_frame {
+struct signal_frame {
struct sparc_stackf ss;
__siginfo_t info;
__siginfo_fpu_t __user *fpu_save;
@@ -85,8 +54,7 @@ struct rt_signal_frame {
};
/* Align macros */
-#define SF_ALIGNEDSZ (((sizeof(struct signal_sframe) + 7) & (~7)))
-#define NF_ALIGNEDSZ (((sizeof(struct new_signal_frame) + 7) & (~7)))
+#define SF_ALIGNEDSZ (((sizeof(struct signal_frame) + 7) & (~7)))
#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame) + 7) & (~7)))
static int _sigpause_common(old_sigset_t set)
@@ -141,15 +109,20 @@ restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
return err;
}
-static inline void do_new_sigreturn (struct pt_regs *regs)
+asmlinkage void do_sigreturn(struct pt_regs *regs)
{
- struct new_signal_frame __user *sf;
+ struct signal_frame __user *sf;
unsigned long up_psr, pc, npc;
sigset_t set;
__siginfo_fpu_t __user *fpu_save;
int err;
- sf = (struct new_signal_frame __user *) regs->u_regs[UREG_FP];
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ synchronize_user_stack();
+
+ sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
/* 1. Make sure we are not getting garbage from the user */
if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
@@ -198,73 +171,6 @@ segv_and_exit:
force_sig(SIGSEGV, current);
}
-asmlinkage void do_sigreturn(struct pt_regs *regs)
-{
- struct sigcontext __user *scptr;
- unsigned long pc, npc, psr;
- sigset_t set;
- int err;
-
- /* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
- synchronize_user_stack();
-
- if (current->thread.new_signal) {
- do_new_sigreturn(regs);
- return;
- }
-
- scptr = (struct sigcontext __user *) regs->u_regs[UREG_I0];
-
- /* Check sanity of the user arg. */
- if (!access_ok(VERIFY_READ, scptr, sizeof(struct sigcontext)) ||
- (((unsigned long) scptr) & 3))
- goto segv_and_exit;
-
- err = __get_user(pc, &scptr->sigc_pc);
- err |= __get_user(npc, &scptr->sigc_npc);
-
- if ((pc | npc) & 3)
- goto segv_and_exit;
-
- /* This is pretty much atomic, no amount locking would prevent
- * the races which exist anyways.
- */
- err |= __get_user(set.sig[0], &scptr->sigc_mask);
- /* Note that scptr + 1 points to extramask */
- err |= __copy_from_user(&set.sig[1], scptr + 1,
- (_NSIG_WORDS - 1) * sizeof(unsigned int));
-
- if (err)
- goto segv_and_exit;
-
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- regs->pc = pc;
- regs->npc = npc;
-
- err = __get_user(regs->u_regs[UREG_FP], &scptr->sigc_sp);
- err |= __get_user(regs->u_regs[UREG_I0], &scptr->sigc_o0);
- err |= __get_user(regs->u_regs[UREG_G1], &scptr->sigc_g1);
-
- /* User can only change condition codes in %psr. */
- err |= __get_user(psr, &scptr->sigc_psr);
- if (err)
- goto segv_and_exit;
-
- regs->psr &= ~(PSR_ICC);
- regs->psr |= (psr & PSR_ICC);
- return;
-
-segv_and_exit:
- force_sig(SIGSEGV, current);
-}
-
asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
{
struct rt_signal_frame __user *sf;
@@ -351,128 +257,6 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re
return (void __user *)(sp - framesize);
}
-static inline void
-setup_frame(struct sigaction *sa, struct pt_regs *regs, int signr, sigset_t *oldset, siginfo_t *info)
-{
- struct signal_sframe __user *sframep;
- struct sigcontext __user *sc;
- int window = 0, err;
- unsigned long pc = regs->pc;
- unsigned long npc = regs->npc;
- struct thread_info *tp = current_thread_info();
- void __user *sig_address;
- int sig_code;
-
- synchronize_user_stack();
- sframep = (struct signal_sframe __user *)
- get_sigframe(sa, regs, SF_ALIGNEDSZ);
- if (invalid_frame_pointer(sframep, sizeof(*sframep))){
- /* Don't change signal code and address, so that
- * post mortem debuggers can have a look.
- */
- goto sigill_and_return;
- }
-
- sc = &sframep->sig_context;
-
- /* We've already made sure frame pointer isn't in kernel space... */
- err = __put_user((sas_ss_flags(regs->u_regs[UREG_FP]) == SS_ONSTACK),
- &sc->sigc_onstack);
- err |= __put_user(oldset->sig[0], &sc->sigc_mask);
- err |= __copy_to_user(sframep->extramask, &oldset->sig[1],
- (_NSIG_WORDS - 1) * sizeof(unsigned int));
- err |= __put_user(regs->u_regs[UREG_FP], &sc->sigc_sp);
- err |= __put_user(pc, &sc->sigc_pc);
- err |= __put_user(npc, &sc->sigc_npc);
- err |= __put_user(regs->psr, &sc->sigc_psr);
- err |= __put_user(regs->u_regs[UREG_G1], &sc->sigc_g1);
- err |= __put_user(regs->u_regs[UREG_I0], &sc->sigc_o0);
- err |= __put_user(tp->w_saved, &sc->sigc_oswins);
- if (tp->w_saved)
- for (window = 0; window < tp->w_saved; window++) {
- put_user((char *)tp->rwbuf_stkptrs[window],
- &sc->sigc_spbuf[window]);
- err |= __copy_to_user(&sc->sigc_wbuf[window],
- &tp->reg_window[window],
- sizeof(struct reg_window));
- }
- else
- err |= __copy_to_user(sframep, (char *) regs->u_regs[UREG_FP],
- sizeof(struct reg_window));
-
- tp->w_saved = 0; /* So process is allowed to execute. */
-
- err |= __put_user(signr, &sframep->sig_num);
- sig_address = NULL;
- sig_code = 0;
- if (SI_FROMKERNEL (info) && (info->si_code & __SI_MASK) == __SI_FAULT) {
- sig_address = info->si_addr;
- switch (signr) {
- case SIGSEGV:
- switch (info->si_code) {
- case SEGV_MAPERR: sig_code = SUBSIG_NOMAPPING; break;
- default: sig_code = SUBSIG_PROTECTION; break;
- }
- break;
- case SIGILL:
- switch (info->si_code) {
- case ILL_ILLOPC: sig_code = SUBSIG_ILLINST; break;
- case ILL_PRVOPC: sig_code = SUBSIG_PRIVINST; break;
- case ILL_ILLTRP: sig_code = SUBSIG_BADTRAP(info->si_trapno); break;
- default: sig_code = SUBSIG_STACK; break;
- }
- break;
- case SIGFPE:
- switch (info->si_code) {
- case FPE_INTDIV: sig_code = SUBSIG_IDIVZERO; break;
- case FPE_INTOVF: sig_code = SUBSIG_FPINTOVFL; break;
- case FPE_FLTDIV: sig_code = SUBSIG_FPDIVZERO; break;
- case FPE_FLTOVF: sig_code = SUBSIG_FPOVFLOW; break;
- case FPE_FLTUND: sig_code = SUBSIG_FPUNFLOW; break;
- case FPE_FLTRES: sig_code = SUBSIG_FPINEXACT; break;
- case FPE_FLTINV: sig_code = SUBSIG_FPOPERROR; break;
- default: sig_code = SUBSIG_FPERROR; break;
- }
- break;
- case SIGBUS:
- switch (info->si_code) {
- case BUS_ADRALN: sig_code = SUBSIG_ALIGNMENT; break;
- case BUS_ADRERR: sig_code = SUBSIG_MISCERROR; break;
- default: sig_code = SUBSIG_BUSTIMEOUT; break;
- }
- break;
- case SIGEMT:
- switch (info->si_code) {
- case EMT_TAGOVF: sig_code = SUBSIG_TAG; break;
- }
- break;
- case SIGSYS:
- if (info->si_code == (__SI_FAULT|0x100)) {
- sig_code = info->si_trapno;
- break;
- }
- default:
- sig_address = NULL;
- }
- }
- err |= __put_user((unsigned long)sig_address, &sframep->sig_address);
- err |= __put_user(sig_code, &sframep->sig_code);
- err |= __put_user(sc, &sframep->sig_scptr);
- if (err)
- goto sigsegv;
-
- regs->u_regs[UREG_FP] = (unsigned long) sframep;
- regs->pc = (unsigned long) sa->sa_handler;
- regs->npc = (regs->pc + 4);
- return;
-
-sigill_and_return:
- do_exit(SIGILL);
-sigsegv:
- force_sigsegv(signr, current);
-}
-
-
static inline int
save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
{
@@ -508,21 +292,20 @@ save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
return err;
}
-static inline void
-new_setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
- int signo, sigset_t *oldset)
+static void setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
+ int signo, sigset_t *oldset)
{
- struct new_signal_frame __user *sf;
+ struct signal_frame __user *sf;
int sigframe_size, err;
/* 1. Make sure everything is clean */
synchronize_user_stack();
- sigframe_size = NF_ALIGNEDSZ;
+ sigframe_size = SF_ALIGNEDSZ;
if (!used_math())
sigframe_size -= sizeof(__siginfo_fpu_t);
- sf = (struct new_signal_frame __user *)
+ sf = (struct signal_frame __user *)
get_sigframe(&ka->sa, regs, sigframe_size);
if (invalid_frame_pointer(sf, sigframe_size))
@@ -586,9 +369,8 @@ sigsegv:
force_sigsegv(signo, current);
}
-static inline void
-new_setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
- int signo, sigset_t *oldset, siginfo_t *info)
+static void setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
+ int signo, sigset_t *oldset, siginfo_t *info)
{
struct rt_signal_frame __user *sf;
int sigframe_size;
@@ -674,11 +456,9 @@ handle_signal(unsigned long signr, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
{
if (ka->sa.sa_flags & SA_SIGINFO)
- new_setup_rt_frame(ka, regs, signr, oldset, info);
- else if (current->thread.new_signal)
- new_setup_frame(ka, regs, signr, oldset);
+ setup_rt_frame(ka, regs, signr, oldset, info);
else
- setup_frame(&ka->sa, regs, signr, oldset, info);
+ setup_frame(ka, regs, signr, oldset);
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 42bf09db9a81..f188b5dc9fd0 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -1,5 +1,4 @@
-/* $Id: sys_sparc.c,v 1.70 2001/04/14 01:12:02 davem Exp $
- * linux/arch/sparc/kernel/sys_sparc.c
+/* linux/arch/sparc/kernel/sys_sparc.c
*
* This file contains various random system calls that
* have a non-standard calling sequence on the Linux/sparc
@@ -395,10 +394,8 @@ sparc_sigaction (int sig, const struct old_sigaction __user *act,
struct k_sigaction new_ka, old_ka;
int ret;
- if (sig < 0) {
- current->thread.new_signal = 1;
- sig = -sig;
- }
+ WARN_ON_ONCE(sig >= 0);
+ sig = -sig;
if (act) {
unsigned long mask;
@@ -446,11 +443,6 @@ sys_rt_sigaction(int sig,
if (sigsetsize != sizeof(sigset_t))
return -EINVAL;
- /* All tasks which use RT signals (effectively) use
- * new style signals.
- */
- current->thread.new_signal = 1;
-
if (act) {
new_ka.ka_restorer = restorer;
if (copy_from_user(&new_ka.sa, act, sizeof(*act)))
diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c
index 54501c1ca785..9ef37e13a920 100644
--- a/arch/sparc/lib/iomap.c
+++ b/arch/sparc/lib/iomap.c
@@ -21,8 +21,8 @@ EXPORT_SYMBOL(ioport_unmap);
/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 8acc5cc38621..edbe71e3fab9 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -1,9 +1,5 @@
-# $Id: config.in,v 1.158 2002/01/24 22:14:44 davem Exp $
-# For a description of the syntax of this configuration file,
-# see the Configure script.
-#
-
-mainmenu "Linux/UltraSPARC Kernel Configuration"
+# sparc64 configuration
+mainmenu "Linux Kernel Configuration for 64-bit SPARC"
config SPARC
bool
@@ -17,12 +13,6 @@ config SPARC64
default y
select HAVE_IDE
select HAVE_LMB
- help
- SPARC is a family of RISC microprocessors designed and marketed by
- Sun Microsystems, incorporated. This port covers the newer 64-bit
- UltraSPARC. The UltraLinux project maintains both the SPARC32 and
- SPARC64 ports; its web page is available at
- <http://www.ultralinux.org/>.
config GENERIC_TIME
bool
@@ -97,7 +87,7 @@ config SPARC64_PAGE_SIZE_8KB
help
This lets you select the page size of the kernel.
- 8KB and 64KB work quite well, since Sparc ELF sections
+ 8KB and 64KB work quite well, since SPARC ELF sections
provide for up to 64KB alignment.
Therefore, 512KB and 4MB are for expert hackers only.
@@ -138,7 +128,7 @@ config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
select HOTPLUG
- ---help---
+ help
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
@@ -155,23 +145,16 @@ source "kernel/time/Kconfig"
config SMP
bool "Symmetric multi-processing support"
- ---help---
+ help
This enables support for systems with more than one CPU. If you have
a system with only one CPU, say N. If you have a system with more than
one CPU, say Y.
If you say N here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
- singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
-
- See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO
- available at <http://www.tldp.org/docs.html#howto>.
+ you say Y here, the kernel will run on single-processor machines.
+ On a single-processor machine, the kernel will run faster if you say
+ N here.
If you don't know what to do here, say N.
@@ -284,50 +267,19 @@ source "mm/Kconfig"
config ISA
bool
- help
- Find out whether you have ISA slots on your motherboard. ISA is the
- name of a bus system, i.e. the way the CPU talks to the other stuff
- inside your box. Other bus systems are PCI, EISA, MicroChannel
- (MCA) or VESA. ISA is an older system, now being displaced by PCI;
- newer boards don't support it. If you have ISA, say Y, otherwise N.
config ISAPNP
bool
- help
- Say Y here if you would like support for ISA Plug and Play devices.
- Some information is in <file:Documentation/isapnp.txt>.
-
- To compile this driver as a module, choose M here: the
- module will be called isapnp.
-
- If unsure, say Y.
config EISA
bool
- ---help---
- The Extended Industry Standard Architecture (EISA) bus was
- developed as an open alternative to the IBM MicroChannel bus.
-
- The EISA bus provided some of the features of the IBM MicroChannel
- bus while maintaining backward compatibility with cards made for
- the older ISA bus. The EISA bus saw limited use between 1988 and
- 1995 when it was made obsolete by the PCI bus.
-
- Say Y here if you are building a kernel for an EISA-based machine.
-
- Otherwise, say N.
config MCA
bool
- help
- MicroChannel Architecture is found in some IBM PS/2 machines and
- laptops. It is a bus system similar to PCI or ISA. See
- <file:Documentation/mca.txt> (and especially the web page given
- there) before attempting to build an MCA bus kernel.
config PCMCIA
tristate
- ---help---
+ help
Say Y here if you want to attach PCMCIA- or PC-cards to your Linux
computer. These are credit-card size devices such as network cards,
modems or hard drives often used with laptops computers. There are
@@ -369,10 +321,10 @@ config PCI
bool "PCI support"
select ARCH_SUPPORTS_MSI
help
- Find out whether you have a PCI motherboard. PCI is the name of a
- bus system, i.e. the way the CPU talks to the other stuff inside
- your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
- VESA. If you have PCI, say Y, otherwise N.
+ Find out whether your system includes a PCI bus. PCI is the name of
+ a bus system, i.e. the way the CPU talks to the other stuff inside
+ your box. If you say Y here, the kernel will include drivers and
+ infrastructure code to support PCI bus devices.
config PCI_DOMAINS
def_bool PCI
@@ -396,15 +348,8 @@ menu "Executable file formats"
source "fs/Kconfig.binfmt"
-config SPARC32_COMPAT
- bool "Kernel support for Linux/Sparc 32bit binary compatibility"
- help
- This allows you to run 32-bit binaries on your Ultra.
- Everybody wants this; say Y.
-
config COMPAT
bool
- depends on SPARC32_COMPAT
default y
select COMPAT_BINFMT_ELF
@@ -421,8 +366,8 @@ config SCHED_SMT
default y
help
SMT scheduler support improves the CPU scheduler's decision making
- when dealing with UltraSPARC cpus at a cost of slightly increased
- overhead in some places. If unsure say N here.
+ when dealing with SPARC cpus at a cost of slightly increased overhead
+ in some places. If unsure say N here.
config SCHED_MC
bool "Multi-core scheduler support"
diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig
index 92f79680f70d..aff93c9d13f4 100644
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.25-numa
-# Wed Apr 23 04:49:08 2008
+# Linux kernel version: 2.6.25
+# Sat Apr 26 03:11:06 2008
#
CONFIG_SPARC=y
CONFIG_SPARC64=y
@@ -152,7 +152,9 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_HUGETLB_PAGE_SIZE_4MB=y
# CONFIG_HUGETLB_PAGE_SIZE_512K is not set
# CONFIG_HUGETLB_PAGE_SIZE_64K is not set
-# CONFIG_NUMA is not set
+CONFIG_NUMA=y
+CONFIG_NODES_SHIFT=4
+CONFIG_NODES_SPAN_OTHER_NODES=y
CONFIG_ARCH_POPULATES_NODE_MAP=y
CONFIG_ARCH_SELECT_MEMORY_MODEL=y
CONFIG_ARCH_SPARSEMEM_ENABLE=y
@@ -162,12 +164,14 @@ CONFIG_SELECT_MEMORY_MODEL=y
# CONFIG_DISCONTIGMEM_MANUAL is not set
CONFIG_SPARSEMEM_MANUAL=y
CONFIG_SPARSEMEM=y
+CONFIG_NEED_MULTIPLE_NODES=y
CONFIG_HAVE_MEMORY_PRESENT=y
# CONFIG_SPARSEMEM_STATIC is not set
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP=y
CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=0
CONFIG_NR_QUICK=1
@@ -191,7 +195,6 @@ CONFIG_SUN_OPENPROMFS=m
CONFIG_BINFMT_ELF=y
CONFIG_COMPAT_BINFMT_ELF=y
CONFIG_BINFMT_MISC=m
-CONFIG_SPARC32_COMPAT=y
CONFIG_COMPAT=y
CONFIG_SYSVIPC_COMPAT=y
CONFIG_SCHED_SMT=y
@@ -746,13 +749,7 @@ CONFIG_DEVPORT=y
CONFIG_I2C=y
CONFIG_I2C_BOARDINFO=y
# CONFIG_I2C_CHARDEV is not set
-
-#
-# I2C Algorithms
-#
CONFIG_I2C_ALGOBIT=y
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
#
# I2C Hardware Bus support
@@ -780,6 +777,7 @@ CONFIG_I2C_ALGOBIT=y
# CONFIG_I2C_VIA is not set
# CONFIG_I2C_VIAPRO is not set
# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_PLATFORM is not set
#
# Miscellaneous I2C Chip support
@@ -1026,6 +1024,7 @@ CONFIG_SND_ALI5451=m
# CONFIG_SND_AU8810 is not set
# CONFIG_SND_AU8820 is not set
# CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AW2 is not set
# CONFIG_SND_AZT3328 is not set
# CONFIG_SND_BT87X is not set
# CONFIG_SND_CA0106 is not set
@@ -1097,10 +1096,6 @@ CONFIG_SND_SUN_CS4231=m
# CONFIG_SND_SOC is not set
#
-# SoC Audio support for SuperH
-#
-
-#
# ALSA SoC audio for Freescale SOCs
#
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index 63c6ae0dd273..2bd0340b743d 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -15,17 +15,17 @@ obj-y := process.o setup.o cpu.o idprom.o \
visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o \
+obj-$(CONFIG_PCI) += ebus.o pci_common.o \
pci_psycho.o pci_sabre.o pci_schizo.o \
pci_sun4v.o pci_sun4v_asm.o pci_fire.o
obj-$(CONFIG_PCI_MSI) += pci_msi.o
obj-$(CONFIG_SMP) += smp.o trampoline.o hvtramp.o
-obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o
+obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
obj-$(CONFIG_AUDIT) += audit.o
-obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o
+obj-$(CONFIG_AUDIT)$(CONFIG_COMPAT) += compat_audit.o
obj-y += $(obj-yy)
diff --git a/arch/sparc64/kernel/audit.c b/arch/sparc64/kernel/audit.c
index 24d7f4b4178a..8fff0ac63d56 100644
--- a/arch/sparc64/kernel/audit.c
+++ b/arch/sparc64/kernel/audit.c
@@ -30,7 +30,7 @@ static unsigned signal_class[] = {
int audit_classify_arch(int arch)
{
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
if (arch == AUDIT_ARCH_SPARC)
return 1;
#endif
@@ -39,7 +39,7 @@ int audit_classify_arch(int arch)
int audit_classify_syscall(int abi, unsigned syscall)
{
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
extern int sparc32_classify_syscall(unsigned);
if (abi == AUDIT_ARCH_SPARC)
return sparc32_classify_syscall(syscall);
@@ -60,7 +60,7 @@ int audit_classify_syscall(int abi, unsigned syscall)
static int __init audit_classes_init(void)
{
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
extern __u32 sparc32_dir_class[];
extern __u32 sparc32_write_class[];
extern __u32 sparc32_read_class[];
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index eb88bd6e674e..b441a26b73b0 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -1,6 +1,6 @@
/* irq.c: UltraSparc IRQ handling/init/registry.
*
- * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
* Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
* Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
*/
@@ -308,6 +308,7 @@ static void sun4u_irq_enable(unsigned int virt_irq)
IMAP_AID_SAFARI | IMAP_NID_SAFARI);
val |= tid | IMAP_VALID;
upa_writeq(val, imap);
+ upa_writeq(ICLR_IDLE, data->iclr);
}
}
diff --git a/arch/sparc64/kernel/isa.c b/arch/sparc64/kernel/isa.c
deleted file mode 100644
index a2af5ed784c9..000000000000
--- a/arch/sparc64/kernel/isa.c
+++ /dev/null
@@ -1,191 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <asm/oplib.h>
-#include <asm/prom.h>
-#include <asm/of_device.h>
-#include <asm/isa.h>
-
-struct sparc_isa_bridge *isa_chain;
-
-static void __init fatal_err(const char *reason)
-{
- prom_printf("ISA: fatal error, %s.\n", reason);
-}
-
-static void __init report_dev(struct sparc_isa_device *isa_dev, int child)
-{
- if (child)
- printk(" (%s)", isa_dev->prom_node->name);
- else
- printk(" [%s", isa_dev->prom_node->name);
-}
-
-static void __init isa_dev_get_resource(struct sparc_isa_device *isa_dev)
-{
- struct of_device *op = of_find_device_by_node(isa_dev->prom_node);
-
- memcpy(&isa_dev->resource, &op->resource[0], sizeof(struct resource));
-}
-
-static void __init isa_dev_get_irq(struct sparc_isa_device *isa_dev)
-{
- struct of_device *op = of_find_device_by_node(isa_dev->prom_node);
-
- if (!op || !op->num_irqs) {
- isa_dev->irq = PCI_IRQ_NONE;
- } else {
- isa_dev->irq = op->irqs[0];
- }
-}
-
-static void __init isa_fill_children(struct sparc_isa_device *parent_isa_dev)
-{
- struct device_node *dp = parent_isa_dev->prom_node->child;
-
- if (!dp)
- return;
-
- printk(" ->");
- while (dp) {
- struct sparc_isa_device *isa_dev;
-
- isa_dev = kzalloc(sizeof(*isa_dev), GFP_KERNEL);
- if (!isa_dev) {
- fatal_err("cannot allocate child isa_dev");
- prom_halt();
- }
-
- /* Link it in to parent. */
- isa_dev->next = parent_isa_dev->child;
- parent_isa_dev->child = isa_dev;
-
- isa_dev->bus = parent_isa_dev->bus;
- isa_dev->prom_node = dp;
-
- isa_dev_get_resource(isa_dev);
- isa_dev_get_irq(isa_dev);
-
- report_dev(isa_dev, 1);
-
- dp = dp->sibling;
- }
-}
-
-static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br)
-{
- struct device_node *dp = isa_br->prom_node->child;
-
- while (dp) {
- struct sparc_isa_device *isa_dev;
- struct dev_archdata *sd;
-
- isa_dev = kzalloc(sizeof(*isa_dev), GFP_KERNEL);
- if (!isa_dev) {
- printk(KERN_DEBUG "ISA: cannot allocate isa_dev");
- return;
- }
-
- sd = &isa_dev->ofdev.dev.archdata;
- sd->prom_node = dp;
- sd->op = &isa_dev->ofdev;
- sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu;
- sd->stc = isa_br->ofdev.dev.parent->archdata.stc;
- sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node;
-
- isa_dev->ofdev.node = dp;
- isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev;
- isa_dev->ofdev.dev.bus = &isa_bus_type;
- sprintf(isa_dev->ofdev.dev.bus_id, "isa[%08x]", dp->node);
-
- /* Register with core */
- if (of_device_register(&isa_dev->ofdev) != 0) {
- printk(KERN_DEBUG "isa: device registration error for %s!\n",
- dp->path_component_name);
- kfree(isa_dev);
- goto next_sibling;
- }
-
- /* Link it in. */
- isa_dev->next = NULL;
- if (isa_br->devices == NULL) {
- isa_br->devices = isa_dev;
- } else {
- struct sparc_isa_device *tmp = isa_br->devices;
-
- while (tmp->next)
- tmp = tmp->next;
-
- tmp->next = isa_dev;
- }
-
- isa_dev->bus = isa_br;
- isa_dev->prom_node = dp;
-
- isa_dev_get_resource(isa_dev);
- isa_dev_get_irq(isa_dev);
-
- report_dev(isa_dev, 0);
-
- isa_fill_children(isa_dev);
-
- printk("]");
-
- next_sibling:
- dp = dp->sibling;
- }
-}
-
-void __init isa_init(void)
-{
- struct pci_dev *pdev;
- unsigned short vendor, device;
- int index = 0;
-
- vendor = PCI_VENDOR_ID_AL;
- device = PCI_DEVICE_ID_AL_M1533;
-
- pdev = NULL;
- while ((pdev = pci_get_device(vendor, device, pdev)) != NULL) {
- struct sparc_isa_bridge *isa_br;
- struct device_node *dp;
-
- dp = pci_device_to_OF_node(pdev);
-
- isa_br = kzalloc(sizeof(*isa_br), GFP_KERNEL);
- if (!isa_br) {
- printk(KERN_DEBUG "isa: cannot allocate sparc_isa_bridge");
- pci_dev_put(pdev);
- return;
- }
-
- isa_br->ofdev.node = dp;
- isa_br->ofdev.dev.parent = &pdev->dev;
- isa_br->ofdev.dev.bus = &isa_bus_type;
- sprintf(isa_br->ofdev.dev.bus_id, "isa%d", index);
-
- /* Register with core */
- if (of_device_register(&isa_br->ofdev) != 0) {
- printk(KERN_DEBUG "isa: device registration error for %s!\n",
- dp->path_component_name);
- kfree(isa_br);
- pci_dev_put(pdev);
- return;
- }
-
- /* Link it in. */
- isa_br->next = isa_chain;
- isa_chain = isa_br;
-
- isa_br->self = pdev;
- isa_br->index = index++;
- isa_br->prom_node = dp;
-
- printk("isa%d:", isa_br->index);
-
- isa_fill_devices(isa_br);
-
- printk("\n");
- }
-}
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index 9e58e8cba1c3..d569f60c24b8 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -412,12 +412,6 @@ static int __init build_one_resource(struct device_node *parent,
static int __init use_1to1_mapping(struct device_node *pp)
{
- /* If this is on the PMU bus, don't try to translate it even
- * if a ranges property exists.
- */
- if (!strcmp(pp->name, "pmu"))
- return 1;
-
/* If we have a ranges property in the parent, use it. */
if (of_find_property(pp, "ranges", NULL) != NULL)
return 0;
diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c
index 49f912766519..dbf2fc2f4d87 100644
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -23,7 +23,6 @@
#include <asm/pgtable.h>
#include <asm/irq.h>
#include <asm/ebus.h>
-#include <asm/isa.h>
#include <asm/prom.h>
#include <asm/apb.h>
@@ -885,7 +884,6 @@ static int __init pcibios_init(void)
pci_scan_each_controller_bus();
- isa_init();
ebus_init();
power_init();
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index acf8c5250aa9..056013749157 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -1,5 +1,4 @@
-/* $Id: process.c,v 1.131 2002/02/09 19:49:30 davem Exp $
- * arch/sparc64/kernel/process.c
+/* arch/sparc64/kernel/process.c
*
* Copyright (C) 1995, 1996 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
@@ -368,9 +367,6 @@ void flush_thread(void)
if (get_thread_current_ds() != ASI_AIUS)
set_fs(USER_DS);
-
- /* Init new signal delivery disposition. */
- clear_thread_flag(TIF_NEWSIGNALS);
}
/* It's a bit more tricky when 64-bit tasks are involved... */
@@ -595,6 +591,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
if (clone_flags & CLONE_SETTLS)
t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3];
+ /* We do not want to accidently trigger system call restart
+ * handling in the new thread. Therefore, clear out the trap
+ * type, which will make pt_regs_regs_is_syscall() return false.
+ */
+ pt_regs_clear_trap_type(t->kregs);
+
return 0;
}
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
index 77a3e8592cbc..f2d88d8f7a42 100644
--- a/arch/sparc64/kernel/signal.c
+++ b/arch/sparc64/kernel/signal.c
@@ -8,7 +8,7 @@
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
#include <linux/compat.h> /* for compat_old_sigset_t */
#endif
#include <linux/sched.h>
@@ -236,9 +236,6 @@ struct rt_signal_frame {
__siginfo_fpu_t fpu_state;
};
-/* Align macros */
-#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame) + 7) & (~7)))
-
static long _sigpause_common(old_sigset_t set)
{
set &= _BLOCKABLE;
@@ -400,7 +397,7 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
synchronize_user_stack();
save_and_clear_fpu();
- sigframe_size = RT_ALIGNEDSZ;
+ sigframe_size = sizeof(struct rt_signal_frame);
if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
sigframe_size -= sizeof(__siginfo_fpu_t);
@@ -516,11 +513,10 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
struct k_sigaction ka;
sigset_t *oldset;
siginfo_t info;
- int signr, tt;
+ int signr;
- tt = regs->magic & 0x1ff;
- if (tt == 0x110 || tt == 0x111 || tt == 0x16d) {
- regs->magic &= ~0x1ff;
+ if (pt_regs_is_syscall(regs)) {
+ pt_regs_clear_trap_type(regs);
cookie.restart_syscall = 1;
} else
cookie.restart_syscall = 0;
@@ -531,7 +527,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
else
oldset = &current->blocked;
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
if (test_thread_flag(TIF_32BIT)) {
extern void do_signal32(sigset_t *, struct pt_regs *,
struct signal_deliver_cookie *);
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index 43cdec64d9c9..91f8d0826db1 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -1,5 +1,4 @@
-/* $Id: signal32.c,v 1.74 2002/02/09 19:49:30 davem Exp $
- * arch/sparc64/kernel/signal32.c
+/* arch/sparc64/kernel/signal32.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -31,30 +30,6 @@
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-/* Signal frames: the original one (compatible with SunOS):
- *
- * Set up a signal frame... Make the stack look the way SunOS
- * expects it to look which is basically:
- *
- * ---------------------------------- <-- %sp at signal time
- * Struct sigcontext
- * Signal address
- * Ptr to sigcontext area above
- * Signal code
- * The signal number itself
- * One register window
- * ---------------------------------- <-- New %sp
- */
-struct signal_sframe32 {
- struct reg_window32 sig_window;
- int sig_num;
- int sig_code;
- /* struct sigcontext32 * */ u32 sig_scptr;
- int sig_address;
- struct sigcontext32 sig_context;
- unsigned int extramask[_COMPAT_NSIG_WORDS - 1];
-};
-
/* This magic should be in g_upper[0] for all upper parts
* to be valid.
*/
@@ -65,12 +40,7 @@ typedef struct {
unsigned int asi;
} siginfo_extra_v8plus_t;
-/*
- * And the new one, intended to be used for Linux applications only
- * (we have enough in there to work with clone).
- * All the interesting bits are in the info field.
- */
-struct new_signal_frame32 {
+struct signal_frame32 {
struct sparc_stackf32 ss;
__siginfo32_t info;
/* __siginfo_fpu32_t * */ u32 fpu_save;
@@ -149,8 +119,7 @@ struct rt_signal_frame32 {
};
/* Align macros */
-#define SF_ALIGNEDSZ (((sizeof(struct signal_sframe32) + 7) & (~7)))
-#define NF_ALIGNEDSZ (((sizeof(struct new_signal_frame32) + 7) & (~7)))
+#define SF_ALIGNEDSZ (((sizeof(struct signal_frame32) + 7) & (~7)))
#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame32) + 7) & (~7)))
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
@@ -241,17 +210,22 @@ static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu
return err;
}
-void do_new_sigreturn32(struct pt_regs *regs)
+void do_sigreturn32(struct pt_regs *regs)
{
- struct new_signal_frame32 __user *sf;
+ struct signal_frame32 __user *sf;
unsigned int psr;
unsigned pc, npc, fpu_save;
sigset_t set;
unsigned seta[_COMPAT_NSIG_WORDS];
int err, i;
+ /* Always make any pending restarted system calls return -EINTR */
+ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+ synchronize_user_stack();
+
regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
- sf = (struct new_signal_frame32 __user *) regs->u_regs[UREG_FP];
+ sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP];
/* 1. Make sure we are not getting garbage from the user */
if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
@@ -319,76 +293,6 @@ segv:
force_sig(SIGSEGV, current);
}
-asmlinkage void do_sigreturn32(struct pt_regs *regs)
-{
- struct sigcontext32 __user *scptr;
- unsigned int pc, npc, psr;
- sigset_t set;
- unsigned int seta[_COMPAT_NSIG_WORDS];
- int err;
-
- /* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
- synchronize_user_stack();
- if (test_thread_flag(TIF_NEWSIGNALS)) {
- do_new_sigreturn32(regs);
- return;
- }
-
- scptr = (struct sigcontext32 __user *)
- (regs->u_regs[UREG_I0] & 0x00000000ffffffffUL);
- /* Check sanity of the user arg. */
- if (!access_ok(VERIFY_READ, scptr, sizeof(struct sigcontext32)) ||
- (((unsigned long) scptr) & 3))
- goto segv;
-
- err = __get_user(pc, &scptr->sigc_pc);
- err |= __get_user(npc, &scptr->sigc_npc);
-
- if ((pc | npc) & 3)
- goto segv; /* Nice try. */
-
- err |= __get_user(seta[0], &scptr->sigc_mask);
- /* Note that scptr + 1 points to extramask */
- err |= copy_from_user(seta+1, scptr + 1,
- (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
- if (err)
- goto segv;
- switch (_NSIG_WORDS) {
- case 4: set.sig[3] = seta[6] + (((long)seta[7]) << 32);
- case 3: set.sig[2] = seta[4] + (((long)seta[5]) << 32);
- case 2: set.sig[1] = seta[2] + (((long)seta[3]) << 32);
- case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32);
- }
- sigdelsetmask(&set, ~_BLOCKABLE);
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- if (test_thread_flag(TIF_32BIT)) {
- pc &= 0xffffffff;
- npc &= 0xffffffff;
- }
- regs->tpc = pc;
- regs->tnpc = npc;
- err = __get_user(regs->u_regs[UREG_FP], &scptr->sigc_sp);
- err |= __get_user(regs->u_regs[UREG_I0], &scptr->sigc_o0);
- err |= __get_user(regs->u_regs[UREG_G1], &scptr->sigc_g1);
-
- /* User can only change condition codes in %tstate. */
- err |= __get_user(psr, &scptr->sigc_psr);
- if (err)
- goto segv;
- regs->tstate &= ~(TSTATE_ICC|TSTATE_XCC);
- regs->tstate |= psr_to_tstate_icc(psr);
- return;
-
-segv:
- force_sig(SIGSEGV, current);
-}
-
asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
{
struct rt_signal_frame32 __user *sf;
@@ -504,145 +408,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
return (void __user *)(sp - framesize);
}
-static void
-setup_frame32(struct sigaction *sa, struct pt_regs *regs, int signr, sigset_t *oldset, siginfo_t *info)
-{
- struct signal_sframe32 __user *sframep;
- struct sigcontext32 __user *sc;
- unsigned int seta[_COMPAT_NSIG_WORDS];
- int err = 0;
- void __user *sig_address;
- int sig_code;
- unsigned long pc = regs->tpc;
- unsigned long npc = regs->tnpc;
- unsigned int psr;
-
- if (test_thread_flag(TIF_32BIT)) {
- pc &= 0xffffffff;
- npc &= 0xffffffff;
- }
-
- synchronize_user_stack();
- save_and_clear_fpu();
-
- sframep = (struct signal_sframe32 __user *)
- get_sigframe(sa, regs, SF_ALIGNEDSZ);
- if (invalid_frame_pointer(sframep, sizeof(*sframep))){
- /* Don't change signal code and address, so that
- * post mortem debuggers can have a look.
- */
- do_exit(SIGILL);
- }
-
- sc = &sframep->sig_context;
-
- /* We've already made sure frame pointer isn't in kernel space... */
- err = __put_user((sas_ss_flags(regs->u_regs[UREG_FP]) == SS_ONSTACK),
- &sc->sigc_onstack);
-
- switch (_NSIG_WORDS) {
- case 4: seta[7] = (oldset->sig[3] >> 32);
- seta[6] = oldset->sig[3];
- case 3: seta[5] = (oldset->sig[2] >> 32);
- seta[4] = oldset->sig[2];
- case 2: seta[3] = (oldset->sig[1] >> 32);
- seta[2] = oldset->sig[1];
- case 1: seta[1] = (oldset->sig[0] >> 32);
- seta[0] = oldset->sig[0];
- }
- err |= __put_user(seta[0], &sc->sigc_mask);
- err |= __copy_to_user(sframep->extramask, seta + 1,
- (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
- err |= __put_user(regs->u_regs[UREG_FP], &sc->sigc_sp);
- err |= __put_user(pc, &sc->sigc_pc);
- err |= __put_user(npc, &sc->sigc_npc);
- psr = tstate_to_psr(regs->tstate);
- if (current_thread_info()->fpsaved[0] & FPRS_FEF)
- psr |= PSR_EF;
- err |= __put_user(psr, &sc->sigc_psr);
- err |= __put_user(regs->u_regs[UREG_G1], &sc->sigc_g1);
- err |= __put_user(regs->u_regs[UREG_I0], &sc->sigc_o0);
- err |= __put_user(get_thread_wsaved(), &sc->sigc_oswins);
-
- err |= copy_in_user((u32 __user *)sframep,
- (u32 __user *)(regs->u_regs[UREG_FP]),
- sizeof(struct reg_window32));
-
- set_thread_wsaved(0); /* So process is allowed to execute. */
- err |= __put_user(signr, &sframep->sig_num);
- sig_address = NULL;
- sig_code = 0;
- if (SI_FROMKERNEL (info) && (info->si_code & __SI_MASK) == __SI_FAULT) {
- sig_address = info->si_addr;
- switch (signr) {
- case SIGSEGV:
- switch (info->si_code) {
- case SEGV_MAPERR: sig_code = SUBSIG_NOMAPPING; break;
- default: sig_code = SUBSIG_PROTECTION; break;
- }
- break;
- case SIGILL:
- switch (info->si_code) {
- case ILL_ILLOPC: sig_code = SUBSIG_ILLINST; break;
- case ILL_PRVOPC: sig_code = SUBSIG_PRIVINST; break;
- case ILL_ILLTRP: sig_code = SUBSIG_BADTRAP(info->si_trapno); break;
- default: sig_code = SUBSIG_STACK; break;
- }
- break;
- case SIGFPE:
- switch (info->si_code) {
- case FPE_INTDIV: sig_code = SUBSIG_IDIVZERO; break;
- case FPE_INTOVF: sig_code = SUBSIG_FPINTOVFL; break;
- case FPE_FLTDIV: sig_code = SUBSIG_FPDIVZERO; break;
- case FPE_FLTOVF: sig_code = SUBSIG_FPOVFLOW; break;
- case FPE_FLTUND: sig_code = SUBSIG_FPUNFLOW; break;
- case FPE_FLTRES: sig_code = SUBSIG_FPINEXACT; break;
- case FPE_FLTINV: sig_code = SUBSIG_FPOPERROR; break;
- default: sig_code = SUBSIG_FPERROR; break;
- }
- break;
- case SIGBUS:
- switch (info->si_code) {
- case BUS_ADRALN: sig_code = SUBSIG_ALIGNMENT; break;
- case BUS_ADRERR: sig_code = SUBSIG_MISCERROR; break;
- default: sig_code = SUBSIG_BUSTIMEOUT; break;
- }
- break;
- case SIGEMT:
- switch (info->si_code) {
- case EMT_TAGOVF: sig_code = SUBSIG_TAG; break;
- }
- break;
- case SIGSYS:
- if (info->si_code == (__SI_FAULT|0x100)) {
- /* See sys_sunos32.c */
- sig_code = info->si_trapno;
- break;
- }
- default:
- sig_address = NULL;
- }
- }
- err |= __put_user(ptr_to_compat(sig_address), &sframep->sig_address);
- err |= __put_user(sig_code, &sframep->sig_code);
- err |= __put_user(ptr_to_compat(sc), &sframep->sig_scptr);
- if (err)
- goto sigsegv;
-
- regs->u_regs[UREG_FP] = (unsigned long) sframep;
- regs->tpc = (unsigned long) sa->sa_handler;
- regs->tnpc = (regs->tpc + 4);
- if (test_thread_flag(TIF_32BIT)) {
- regs->tpc &= 0xffffffff;
- regs->tnpc &= 0xffffffff;
- }
- return;
-
-sigsegv:
- force_sigsegv(signr, current);
-}
-
-
static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
{
unsigned long *fpregs = current_thread_info()->fpregs;
@@ -663,10 +428,10 @@ static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
return err;
}
-static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
- int signo, sigset_t *oldset)
+static void setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
+ int signo, sigset_t *oldset)
{
- struct new_signal_frame32 __user *sf;
+ struct signal_frame32 __user *sf;
int sigframe_size;
u32 psr;
int i, err;
@@ -676,11 +441,11 @@ static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
synchronize_user_stack();
save_and_clear_fpu();
- sigframe_size = NF_ALIGNEDSZ;
+ sigframe_size = SF_ALIGNEDSZ;
if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
sigframe_size -= sizeof(__siginfo_fpu_t);
- sf = (struct new_signal_frame32 __user *)
+ sf = (struct signal_frame32 __user *)
get_sigframe(&ka->sa, regs, sigframe_size);
if (invalid_frame_pointer(sf, sigframe_size))
@@ -944,10 +709,9 @@ static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka,
{
if (ka->sa.sa_flags & SA_SIGINFO)
setup_rt_frame32(ka, regs, signr, oldset, info);
- else if (test_thread_flag(TIF_NEWSIGNALS))
- new_setup_frame32(ka, regs, signr, oldset);
else
- setup_frame32(&ka->sa, regs, signr, oldset, info);
+ setup_frame32(ka, regs, signr, oldset);
+
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NOMASK))
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 66336590e830..8ac0b99f2c55 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -49,7 +49,6 @@
#endif
#ifdef CONFIG_PCI
#include <asm/ebus.h>
-#include <asm/isa.h>
#endif
#include <asm/ns87303.h>
#include <asm/timer.h>
@@ -187,7 +186,6 @@ EXPORT_SYMBOL(insw);
EXPORT_SYMBOL(insl);
#ifdef CONFIG_PCI
EXPORT_SYMBOL(ebus_chain);
-EXPORT_SYMBOL(isa_chain);
EXPORT_SYMBOL(pci_alloc_consistent);
EXPORT_SYMBOL(pci_free_consistent);
EXPORT_SYMBOL(pci_map_single);
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index c1a61e98899a..161ce4710fe7 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -554,10 +554,8 @@ asmlinkage long compat_sys_sigaction(int sig, struct old_sigaction32 __user *act
struct k_sigaction new_ka, old_ka;
int ret;
- if (sig < 0) {
- set_thread_flag(TIF_NEWSIGNALS);
- sig = -sig;
- }
+ WARN_ON_ONCE(sig >= 0);
+ sig = -sig;
if (act) {
compat_old_sigset_t mask;
@@ -601,11 +599,6 @@ asmlinkage long compat_sys_rt_sigaction(int sig,
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
- /* All tasks which use RT signals (effectively) use
- * new style signals.
- */
- set_thread_flag(TIF_NEWSIGNALS);
-
if (act) {
u32 u_handler, u_restorer;
diff --git a/arch/sparc64/lib/iomap.c b/arch/sparc64/lib/iomap.c
index ac556db06973..7120ebbd4d03 100644
--- a/arch/sparc64/lib/iomap.c
+++ b/arch/sparc64/lib/iomap.c
@@ -21,8 +21,8 @@ EXPORT_SYMBOL(ioport_unmap);
/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
- unsigned long start = pci_resource_start(dev, bar);
- unsigned long len = pci_resource_len(dev, bar);
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
unsigned long flags = pci_resource_flags(dev, bar);
if (!len || !start)
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 177d8aaeec42..8c2b50e8abc6 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1699,9 +1699,21 @@ void __init paging_init(void)
* functions like clear_dcache_dirty_cpu use the cpu mask
* in 13-bit signed-immediate instruction fields.
*/
- BUILD_BUG_ON(FLAGS_RESERVED != 32);
+
+ /*
+ * Page flags must not reach into upper 32 bits that are used
+ * for the cpu number
+ */
+ BUILD_BUG_ON(NR_PAGEFLAGS > 32);
+
+ /*
+ * The bit fields placed in the high range must not reach below
+ * the 32 bit boundary. Otherwise we cannot place the cpu field
+ * at the 32 bit boundary.
+ */
BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
- ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED);
+ ilog2(roundup_pow_of_two(NR_CPUS)) > 32);
+
BUILD_BUG_ON(NR_CPUS > 4096);
kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index db3082b4da46..6e51424745ab 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -125,7 +125,7 @@ static int open_one_chan(struct chan *chan)
return 0;
}
-int open_chan(struct list_head *chans)
+static int open_chan(struct list_head *chans)
{
struct list_head *ele;
struct chan *chan;
@@ -583,19 +583,6 @@ int parse_chan_pair(char *str, struct line *line, int device,
return 0;
}
-int chan_out_fd(struct list_head *chans)
-{
- struct list_head *ele;
- struct chan *chan;
-
- list_for_each(ele, chans) {
- chan = list_entry(ele, struct chan, list);
- if (chan->primary && chan->output)
- return chan->fd;
- }
- return -1;
-}
-
void chan_interrupt(struct list_head *chans, struct delayed_work *task,
struct tty_struct *tty, int irq)
{
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 2c898c4d6b6a..10b86e1cc659 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -304,7 +304,7 @@ int line_ioctl(struct tty_struct *tty, struct file * file,
break;
if (i == ARRAY_SIZE(tty_ioctls)) {
printk(KERN_ERR "%s: %s: unknown ioctl: 0x%x\n",
- __FUNCTION__, tty->name, cmd);
+ __func__, tty->name, cmd);
}
ret = -ENOIOCTLCMD;
break;
diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c
index 822092f149be..8c4378a76d63 100644
--- a/arch/um/drivers/mcast_kern.c
+++ b/arch/um/drivers/mcast_kern.c
@@ -58,7 +58,7 @@ static const struct net_kern_info mcast_kern_info = {
.write = mcast_write,
};
-int mcast_setup(char *str, char **mac_out, void *data)
+static int mcast_setup(char *str, char **mac_out, void *data)
{
struct mcast_init *init = data;
char *port_str = NULL, *ttl_str = NULL, *remain;
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index 13af2f03ed84..f8cf4c8bedef 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -39,7 +39,7 @@ static struct mconsole_command commands[] = {
/* Initialized in mconsole_init, which is an initcall */
char mconsole_socket_name[256];
-int mconsole_reply_v0(struct mc_request *req, char *reply)
+static int mconsole_reply_v0(struct mc_request *req, char *reply)
{
struct iovec iov;
struct msghdr msg;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 1d43bdfc20c4..5b4ca8d93682 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -116,7 +116,7 @@ static void uml_dev_close(struct work_struct *work)
dev_close(lp->dev);
}
-irqreturn_t uml_net_interrupt(int irq, void *dev_id)
+static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
{
struct net_device *dev = dev_id;
struct uml_net_private *lp = dev->priv;
@@ -296,7 +296,7 @@ static struct ethtool_ops uml_net_ethtool_ops = {
.get_link = ethtool_op_get_link,
};
-void uml_net_user_timer_expire(unsigned long _conn)
+static void uml_net_user_timer_expire(unsigned long _conn)
{
#ifdef undef
struct connection *conn = (struct connection *)_conn;
@@ -786,7 +786,7 @@ static int uml_inetaddr_event(struct notifier_block *this, unsigned long event,
}
/* uml_net_init shouldn't be called twice on two CPUs at the same time */
-struct notifier_block uml_inetaddr_notifier = {
+static struct notifier_block uml_inetaddr_notifier = {
.notifier_call = uml_inetaddr_event,
};
diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
index addd75902656..d269ca387f10 100644
--- a/arch/um/drivers/port_user.c
+++ b/arch/um/drivers/port_user.c
@@ -153,7 +153,7 @@ struct port_pre_exec_data {
int pipe_fd;
};
-void port_pre_exec(void *arg)
+static void port_pre_exec(void *arg)
{
struct port_pre_exec_data *data = arg;
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index 6b4a0f9e38de..d19faec7046e 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -13,7 +13,7 @@ struct slip_init {
char *gate_addr;
};
-void slip_init(struct net_device *dev, void *data)
+static void slip_init(struct net_device *dev, void *data)
{
struct uml_net_private *private;
struct slip_data *spri;
@@ -57,7 +57,7 @@ static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
(struct slip_data *) &lp->user);
}
-const struct net_kern_info slip_kern_info = {
+static const struct net_kern_info slip_kern_info = {
.init = slip_init,
.protocol = slip_protocol,
.read = slip_read,
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index cec0c33cdd39..49266f6108c4 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -34,7 +34,7 @@
static struct tty_driver *console_driver;
-void stdio_announce(char *dev_name, int dev)
+static void stdio_announce(char *dev_name, int dev)
{
printk(KERN_INFO "Virtual console %d assigned device '%s'\n", dev,
dev_name);
@@ -158,7 +158,7 @@ static struct console stdiocons = {
.index = -1,
};
-int stdio_init(void)
+static int stdio_init(void)
{
char *new_title;
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index be3a2797dac4..5e45e39a8a8d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -72,18 +72,6 @@ struct io_thread_req {
int error;
};
-extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
- char **backing_file_out, int *bitmap_offset_out,
- unsigned long *bitmap_len_out, int *data_offset_out,
- int *create_cow_out);
-extern int create_cow_file(char *cow_file, char *backing_file,
- struct openflags flags, int sectorsize,
- int alignment, int *bitmap_offset_out,
- unsigned long *bitmap_len_out,
- int *data_offset_out);
-extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
-extern void do_io(struct io_thread_req *req);
-
static inline int ubd_test_bit(__u64 bit, unsigned char *data)
{
__u64 n;
@@ -200,7 +188,7 @@ struct ubd {
}
/* Protected by ubd_lock */
-struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
+static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
/* Only changed by fake_ide_setup which is a setup */
static int fake_ide = 0;
@@ -463,7 +451,7 @@ __uml_help(udb_setup,
static void do_ubd_request(struct request_queue * q);
/* Only changed by ubd_init, which is an initcall. */
-int thread_fd = -1;
+static int thread_fd = -1;
static void ubd_end_request(struct request *req, int bytes, int error)
{
@@ -531,7 +519,7 @@ static irqreturn_t ubd_intr(int irq, void *dev)
/* Only changed by ubd_init, which is an initcall. */
static int io_pid = -1;
-void kill_io_thread(void)
+static void kill_io_thread(void)
{
if(io_pid != -1)
os_kill_process(io_pid, 1);
@@ -547,6 +535,192 @@ static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
return os_file_size(file, size_out);
}
+static int read_cow_bitmap(int fd, void *buf, int offset, int len)
+{
+ int err;
+
+ err = os_seek_file(fd, offset);
+ if (err < 0)
+ return err;
+
+ err = os_read_file(fd, buf, len);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
+{
+ unsigned long modtime;
+ unsigned long long actual;
+ int err;
+
+ err = os_file_modtime(file, &modtime);
+ if (err < 0) {
+ printk(KERN_ERR "Failed to get modification time of backing "
+ "file \"%s\", err = %d\n", file, -err);
+ return err;
+ }
+
+ err = os_file_size(file, &actual);
+ if (err < 0) {
+ printk(KERN_ERR "Failed to get size of backing file \"%s\", "
+ "err = %d\n", file, -err);
+ return err;
+ }
+
+ if (actual != size) {
+ /*__u64 can be a long on AMD64 and with %lu GCC complains; so
+ * the typecast.*/
+ printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
+ "vs backing file\n", (unsigned long long) size, actual);
+ return -EINVAL;
+ }
+ if (modtime != mtime) {
+ printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
+ "backing file\n", mtime, modtime);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
+{
+ struct uml_stat buf1, buf2;
+ int err;
+
+ if (from_cmdline == NULL)
+ return 0;
+ if (!strcmp(from_cmdline, from_cow))
+ return 0;
+
+ err = os_stat_file(from_cmdline, &buf1);
+ if (err < 0) {
+ printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
+ -err);
+ return 0;
+ }
+ err = os_stat_file(from_cow, &buf2);
+ if (err < 0) {
+ printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
+ -err);
+ return 1;
+ }
+ if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
+ return 0;
+
+ printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
+ "\"%s\" specified in COW header of \"%s\"\n",
+ from_cmdline, from_cow, cow);
+ return 1;
+}
+
+static int open_ubd_file(char *file, struct openflags *openflags, int shared,
+ char **backing_file_out, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out, int *data_offset_out,
+ int *create_cow_out)
+{
+ time_t mtime;
+ unsigned long long size;
+ __u32 version, align;
+ char *backing_file;
+ int fd, err, sectorsize, asked_switch, mode = 0644;
+
+ fd = os_open_file(file, *openflags, mode);
+ if (fd < 0) {
+ if ((fd == -ENOENT) && (create_cow_out != NULL))
+ *create_cow_out = 1;
+ if (!openflags->w ||
+ ((fd != -EROFS) && (fd != -EACCES)))
+ return fd;
+ openflags->w = 0;
+ fd = os_open_file(file, *openflags, mode);
+ if (fd < 0)
+ return fd;
+ }
+
+ if (shared)
+ printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
+ else {
+ err = os_lock_file(fd, openflags->w);
+ if (err < 0) {
+ printk(KERN_ERR "Failed to lock '%s', err = %d\n",
+ file, -err);
+ goto out_close;
+ }
+ }
+
+ /* Successful return case! */
+ if (backing_file_out == NULL)
+ return fd;
+
+ err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
+ &size, &sectorsize, &align, bitmap_offset_out);
+ if (err && (*backing_file_out != NULL)) {
+ printk(KERN_ERR "Failed to read COW header from COW file "
+ "\"%s\", errno = %d\n", file, -err);
+ goto out_close;
+ }
+ if (err)
+ return fd;
+
+ asked_switch = path_requires_switch(*backing_file_out, backing_file,
+ file);
+
+ /* Allow switching only if no mismatch. */
+ if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
+ mtime)) {
+ printk(KERN_ERR "Switching backing file to '%s'\n",
+ *backing_file_out);
+ err = write_cow_header(file, fd, *backing_file_out,
+ sectorsize, align, &size);
+ if (err) {
+ printk(KERN_ERR "Switch failed, errno = %d\n", -err);
+ goto out_close;
+ }
+ } else {
+ *backing_file_out = backing_file;
+ err = backing_file_mismatch(*backing_file_out, size, mtime);
+ if (err)
+ goto out_close;
+ }
+
+ cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
+ bitmap_len_out, data_offset_out);
+
+ return fd;
+ out_close:
+ os_close_file(fd);
+ return err;
+}
+
+static int create_cow_file(char *cow_file, char *backing_file,
+ struct openflags flags,
+ int sectorsize, int alignment, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out, int *data_offset_out)
+{
+ int err, fd;
+
+ flags.c = 1;
+ fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
+ if (fd < 0) {
+ err = fd;
+ printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
+ cow_file, -err);
+ goto out;
+ }
+
+ err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
+ bitmap_offset_out, bitmap_len_out,
+ data_offset_out);
+ if (!err)
+ return fd;
+ os_close_file(fd);
+ out:
+ return err;
+}
+
static void ubd_close_dev(struct ubd *ubd_dev)
{
os_close_file(ubd_dev->fd);
@@ -1166,185 +1340,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file,
return -EINVAL;
}
-static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
-{
- struct uml_stat buf1, buf2;
- int err;
-
- if(from_cmdline == NULL)
- return 0;
- if(!strcmp(from_cmdline, from_cow))
- return 0;
-
- err = os_stat_file(from_cmdline, &buf1);
- if(err < 0){
- printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
- return 0;
- }
- err = os_stat_file(from_cow, &buf2);
- if(err < 0){
- printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
- return 1;
- }
- if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
- return 0;
-
- printk("Backing file mismatch - \"%s\" requested,\n"
- "\"%s\" specified in COW header of \"%s\"\n",
- from_cmdline, from_cow, cow);
- return 1;
-}
-
-static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
-{
- unsigned long modtime;
- unsigned long long actual;
- int err;
-
- err = os_file_modtime(file, &modtime);
- if(err < 0){
- printk("Failed to get modification time of backing file "
- "\"%s\", err = %d\n", file, -err);
- return err;
- }
-
- err = os_file_size(file, &actual);
- if(err < 0){
- printk("Failed to get size of backing file \"%s\", "
- "err = %d\n", file, -err);
- return err;
- }
-
- if(actual != size){
- /*__u64 can be a long on AMD64 and with %lu GCC complains; so
- * the typecast.*/
- printk("Size mismatch (%llu vs %llu) of COW header vs backing "
- "file\n", (unsigned long long) size, actual);
- return -EINVAL;
- }
- if(modtime != mtime){
- printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
- "file\n", mtime, modtime);
- return -EINVAL;
- }
- return 0;
-}
-
-int read_cow_bitmap(int fd, void *buf, int offset, int len)
-{
- int err;
-
- err = os_seek_file(fd, offset);
- if(err < 0)
- return err;
-
- err = os_read_file(fd, buf, len);
- if(err < 0)
- return err;
-
- return 0;
-}
-
-int open_ubd_file(char *file, struct openflags *openflags, int shared,
- char **backing_file_out, int *bitmap_offset_out,
- unsigned long *bitmap_len_out, int *data_offset_out,
- int *create_cow_out)
-{
- time_t mtime;
- unsigned long long size;
- __u32 version, align;
- char *backing_file;
- int fd, err, sectorsize, asked_switch, mode = 0644;
-
- fd = os_open_file(file, *openflags, mode);
- if (fd < 0) {
- if ((fd == -ENOENT) && (create_cow_out != NULL))
- *create_cow_out = 1;
- if (!openflags->w ||
- ((fd != -EROFS) && (fd != -EACCES)))
- return fd;
- openflags->w = 0;
- fd = os_open_file(file, *openflags, mode);
- if (fd < 0)
- return fd;
- }
-
- if(shared)
- printk("Not locking \"%s\" on the host\n", file);
- else {
- err = os_lock_file(fd, openflags->w);
- if(err < 0){
- printk("Failed to lock '%s', err = %d\n", file, -err);
- goto out_close;
- }
- }
-
- /* Successful return case! */
- if(backing_file_out == NULL)
- return fd;
-
- err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
- &size, &sectorsize, &align, bitmap_offset_out);
- if(err && (*backing_file_out != NULL)){
- printk("Failed to read COW header from COW file \"%s\", "
- "errno = %d\n", file, -err);
- goto out_close;
- }
- if(err)
- return fd;
-
- asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
-
- /* Allow switching only if no mismatch. */
- if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
- printk("Switching backing file to '%s'\n", *backing_file_out);
- err = write_cow_header(file, fd, *backing_file_out,
- sectorsize, align, &size);
- if (err) {
- printk("Switch failed, errno = %d\n", -err);
- goto out_close;
- }
- } else {
- *backing_file_out = backing_file;
- err = backing_file_mismatch(*backing_file_out, size, mtime);
- if (err)
- goto out_close;
- }
-
- cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
- bitmap_len_out, data_offset_out);
-
- return fd;
- out_close:
- os_close_file(fd);
- return err;
-}
-
-int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
- int sectorsize, int alignment, int *bitmap_offset_out,
- unsigned long *bitmap_len_out, int *data_offset_out)
-{
- int err, fd;
-
- flags.c = 1;
- fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
- if(fd < 0){
- err = fd;
- printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
- -err);
- goto out;
- }
-
- err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
- bitmap_offset_out, bitmap_len_out,
- data_offset_out);
- if(!err)
- return fd;
- os_close_file(fd);
- out:
- return err;
-}
-
static int update_bitmap(struct io_thread_req *req)
{
int n;
@@ -1369,7 +1364,7 @@ static int update_bitmap(struct io_thread_req *req)
return 0;
}
-void do_io(struct io_thread_req *req)
+static void do_io(struct io_thread_req *req)
{
char *buf;
unsigned long len;
diff --git a/arch/um/include/chan_kern.h b/arch/um/include/chan_kern.h
index 624b5100a3cd..1e651457e049 100644
--- a/arch/um/include/chan_kern.h
+++ b/arch/um/include/chan_kern.h
@@ -31,7 +31,6 @@ extern void chan_interrupt(struct list_head *chans, struct delayed_work *task,
struct tty_struct *tty, int irq);
extern int parse_chan_pair(char *str, struct line *line, int device,
const struct chan_opts *opts, char **error_out);
-extern int open_chan(struct list_head *chans);
extern int write_chan(struct list_head *chans, const char *buf, int len,
int write_irq);
extern int console_write_chan(struct list_head *chans, const char *buf,
@@ -45,7 +44,6 @@ extern void close_chan(struct list_head *chans, int delay_free_irq);
extern int chan_window_size(struct list_head *chans,
unsigned short *rows_out,
unsigned short *cols_out);
-extern int chan_out_fd(struct list_head *chans);
extern int chan_config_string(struct list_head *chans, char *str, int size,
char **error_out);
diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c
index 984f80e668ca..6540d2c9fbb7 100644
--- a/arch/um/kernel/exitcode.c
+++ b/arch/um/kernel/exitcode.c
@@ -59,7 +59,7 @@ static int make_proc_exitcode(void)
{
struct proc_dir_entry *ent;
- ent = create_proc_entry("exitcode", 0600, &proc_root);
+ ent = create_proc_entry("exitcode", 0600, NULL);
if (ent == NULL) {
printk(KERN_WARNING "make_proc_exitcode : Failed to register "
"/proc/exitcode\n");
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index e8cb9ff183e9..83603cfbde81 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -364,7 +364,7 @@ int __init make_proc_sysemu(void)
if (!sysemu_supported)
return 0;
- ent = create_proc_entry("sysemu", 0600, &proc_root);
+ ent = create_proc_entry("sysemu", 0600, NULL);
if (ent == NULL)
{
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index e066e84493b1..0d0cea2ac98d 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -4,6 +4,7 @@
*/
#include <linux/clockchips.h>
+#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/jiffies.h>
#include <linux/threads.h>
@@ -109,8 +110,6 @@ static void __init setup_itimer(void)
clockevents_register_device(&itimer_clockevent);
}
-extern void (*late_time_init)(void);
-
void __init time_init(void)
{
long long nsecs;
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index a6c1dd1cf5a1..56deed623446 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -115,7 +115,7 @@ static int have_root __initdata = 0;
/* Set in uml_mem_setup and modified in linux_main */
long long physmem_size = 32 * 1024 * 1024;
-static char *usage_string =
+static const char *usage_string =
"User Mode Linux v%s\n"
" available at http://user-mode-linux.sourceforge.net/\n\n";
@@ -202,7 +202,7 @@ static void __init uml_checksetup(char *line, int *add)
p = &__uml_setup_start;
while (p < &__uml_setup_end) {
- int n;
+ size_t n;
n = strlen(p->str);
if (!strncmp(line, p->str, n) && p->setup_func(line + n, add))
@@ -258,7 +258,8 @@ int __init linux_main(int argc, char **argv)
{
unsigned long avail, diff;
unsigned long virtmem_size, max_physmem;
- unsigned int i, add;
+ unsigned int i;
+ int add;
char * mode;
for (i = 1; i < argc; i++) {
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index b616e15638fb..997d01944f91 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -25,15 +25,15 @@
#include "registers.h"
#include "skas_ptrace.h"
-static int ptrace_child(void)
+static void ptrace_child(void)
{
int ret;
/* Calling os_getpid because some libcs cached getpid incorrectly */
int pid = os_getpid(), ppid = getppid();
int sc_result;
- change_sig(SIGWINCH, 0);
- if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
+ if (change_sig(SIGWINCH, 0) < 0 ||
+ ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
perror("ptrace");
kill(pid, SIGKILL);
}
@@ -75,9 +75,8 @@ static void fatal(char *fmt, ...)
va_list list;
va_start(list, fmt);
- vprintf(fmt, list);
+ vfprintf(stderr, fmt, list);
va_end(list);
- fflush(stdout);
exit(1);
}
@@ -87,9 +86,8 @@ static void non_fatal(char *fmt, ...)
va_list list;
va_start(list, fmt);
- vprintf(fmt, list);
+ vfprintf(stderr, fmt, list);
va_end(list);
- fflush(stdout);
}
static int start_ptraced_child(void)
@@ -495,7 +493,7 @@ int __init parse_iomem(char *str, int *add)
driver = str;
file = strchr(str,',');
if (file == NULL) {
- printf("parse_iomem : failed to parse iomem\n");
+ fprintf(stderr, "parse_iomem : failed to parse iomem\n");
goto out;
}
*file = '\0';
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c
index 48d211b3d9a1..ccb49b0aff59 100644
--- a/arch/um/os-Linux/sys-i386/task_size.c
+++ b/arch/um/os-Linux/sys-i386/task_size.c
@@ -88,7 +88,10 @@ unsigned long os_get_task_size(void)
sa.sa_handler = segfault;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_NODEFER;
- sigaction(SIGSEGV, &sa, &old);
+ if (sigaction(SIGSEGV, &sa, &old)) {
+ perror("os_get_task_size");
+ exit(1);
+ }
if (!page_ok(bottom)) {
fprintf(stderr, "Address 0x%x no good?\n",
@@ -110,11 +113,12 @@ unsigned long os_get_task_size(void)
out:
/* Restore the old SIGSEGV handling */
- sigaction(SIGSEGV, &old, NULL);
-
+ if (sigaction(SIGSEGV, &old, NULL)) {
+ perror("os_get_task_size");
+ exit(1);
+ }
top <<= UM_KERN_PAGE_SHIFT;
printf("0x%x\n", top);
- fflush(stdout);
return top;
}
diff --git a/arch/v850/kernel/asm-offsets.c b/arch/v850/kernel/asm-offsets.c
index cee5c3142d41..581e6986a776 100644
--- a/arch/v850/kernel/asm-offsets.c
+++ b/arch/v850/kernel/asm-offsets.c
@@ -13,14 +13,11 @@
#include <linux/kernel_stat.h>
#include <linux/ptrace.h>
#include <linux/hardirq.h>
+#include <linux/kbuild.h>
+
#include <asm/irq.h>
#include <asm/errno.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
int main (void)
{
/* offsets into the task struct */
diff --git a/arch/v850/kernel/rte_mb_a_pci.c b/arch/v850/kernel/rte_mb_a_pci.c
index 7165478824e7..687e367d8b64 100644
--- a/arch/v850/kernel/rte_mb_a_pci.c
+++ b/arch/v850/kernel/rte_mb_a_pci.c
@@ -790,8 +790,8 @@ pci_free_consistent (struct pci_dev *pdev, size_t size, void *cpu_addr,
void __iomem *pci_iomap (struct pci_dev *dev, int bar, unsigned long max)
{
- unsigned long start = pci_resource_start (dev, bar);
- unsigned long len = pci_resource_len (dev, bar);
+ resource_size_t start = pci_resource_start (dev, bar);
+ resource_size_t len = pci_resource_len (dev, bar);
if (!start || len == 0)
return 0;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fadf794483d..f70e3e3a9fa7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -86,9 +86,6 @@ config GENERIC_GPIO
config ARCH_MAY_HAVE_PC_FDC
def_bool y
-config DMI
- def_bool y
-
config RWSEM_GENERIC_SPINLOCK
def_bool !X86_XADD
@@ -114,6 +111,9 @@ config GENERIC_TIME_VSYSCALL
config ARCH_HAS_CPU_RELAX
def_bool y
+config ARCH_HAS_CACHE_LINE_SIZE
+ def_bool y
+
config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
@@ -373,6 +373,25 @@ config VMI
at the moment), by linking the kernel to a GPL-ed ROM module
provided by the hypervisor.
+config KVM_CLOCK
+ bool "KVM paravirtualized clock"
+ select PARAVIRT
+ depends on !(X86_VISWS || X86_VOYAGER)
+ help
+ Turning on this option will allow you to run a paravirtualized clock
+ when running over the KVM hypervisor. Instead of relying on a PIT
+ (or probably other) emulation by the underlying device model, the host
+ provides the guest with timing infrastructure such as time of day, and
+ system time
+
+config KVM_GUEST
+ bool "KVM Guest support"
+ select PARAVIRT
+ depends on !(X86_VISWS || X86_VOYAGER)
+ help
+ This option enables various optimizations for running under the KVM
+ hypervisor.
+
source "arch/x86/lguest/Kconfig"
config PARAVIRT
@@ -463,6 +482,15 @@ config HPET_EMULATE_RTC
# Mark as embedded because too many people got it wrong.
# The code disables itself when not needed.
+config DMI
+ default y
+ bool "Enable DMI scanning" if EMBEDDED
+ help
+ Enabled scanning of DMI to identify machine quirks. Say Y
+ here unless you have verified that your setup is not
+ affected by entries in the DMI blacklist. Required by PNP
+ BIOS code.
+
config GART_IOMMU
bool "GART IOMMU support" if EMBEDDED
default y
@@ -509,9 +537,6 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
Calgary anyway, pass 'iommu=calgary' on the kernel command line.
If unsure, say Y.
-config IOMMU_HELPER
- def_bool (CALGARY_IOMMU || GART_IOMMU)
-
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
bool
@@ -522,6 +547,8 @@ config SWIOTLB
access 32-bits of memory can be used on systems with more than
3 GB of memory. If unsure, say Y.
+config IOMMU_HELPER
+ def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB)
config NR_CPUS
int "Maximum number of CPUs (2-255)"
@@ -1477,6 +1504,10 @@ config PCI_GODIRECT
config PCI_GOANY
bool "Any"
+config PCI_GOOLPC
+ bool "OLPC"
+ depends on OLPC
+
endchoice
config PCI_BIOS
@@ -1486,12 +1517,17 @@ config PCI_BIOS
# x86-64 doesn't support PCI BIOS access from long mode so always go direct.
config PCI_DIRECT
def_bool y
- depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY) || X86_VISWS)
+ depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC) || X86_VISWS)
config PCI_MMCONFIG
def_bool y
depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
+config PCI_OLPC
+ bool
+ depends on PCI && PCI_GOOLPC
+ default y
+
config PCI_DOMAINS
def_bool y
depends on PCI
@@ -1611,6 +1647,13 @@ config GEODE_MFGPT_TIMER
MFGPTs have a better resolution and max interval than the
generic PIT, and are suitable for use as high-res timers.
+config OLPC
+ bool "One Laptop Per Child support"
+ default n
+ help
+ Add support for detecting the unique features of the OLPC
+ XO hardware.
+
endif # X86_32
config K8_NB
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index d84a48ece785..03399d64013b 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -126,17 +126,25 @@ void query_edd(void)
{
char eddarg[8];
int do_mbr = 1;
+#ifdef CONFIG_EDD_OFF
+ int do_edd = 0;
+#else
int do_edd = 1;
+#endif
int be_quiet;
int devno;
struct edd_info ei, *edp;
u32 *mbrptr;
if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) {
- if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip"))
+ if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) {
+ do_edd = 1;
do_mbr = 0;
+ }
else if (!strcmp(eddarg, "off"))
do_edd = 0;
+ else if (!strcmp(eddarg, "on"))
+ do_edd = 1;
}
be_quiet = cmdline_find_option_bool("quiet");
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 815b650977b4..30d54ed27e55 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
+obj-$(CONFIG_KVM_GUEST) += kvm.o
+obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
ifdef CONFIG_INPUT_PCSPKR
@@ -89,6 +91,8 @@ endif
obj-$(CONFIG_SCx200) += scx200.o
scx200-y += scx200_32.o
+obj-$(CONFIG_OLPC) += olpc.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f0030a0999c7..e4ea362e8480 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -904,6 +904,7 @@ recalc:
original_pm_idle();
else
default_idle();
+ local_irq_disable();
jiffies_since_last_check = jiffies - last_jiffies;
if (jiffies_since_last_check > idle_period)
goto recalc;
@@ -911,6 +912,8 @@ recalc:
if (apm_idle_done)
apm_do_busy();
+
+ local_irq_enable();
}
/**
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 670c3c311289..92588083950f 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -9,6 +9,7 @@
#include <linux/signal.h>
#include <linux/personality.h>
#include <linux/suspend.h>
+#include <linux/kbuild.h>
#include <asm/ucontext.h>
#include "sigframe.h"
#include <asm/pgtable.h>
@@ -23,14 +24,6 @@
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
- DEFINE(sym, offsetof(struct str, mem));
-
/* workaround for a warning with -Wmissing-prototypes */
void foo(void);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 494e1e096ee6..f126c05d6170 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -10,6 +10,7 @@
#include <linux/errno.h>
#include <linux/hardirq.h>
#include <linux/suspend.h>
+#include <linux/kbuild.h>
#include <asm/pda.h>
#include <asm/processor.h>
#include <asm/segment.h>
@@ -17,14 +18,6 @@
#include <asm/ia32.h>
#include <asm/bootparam.h>
-#define DEFINE(sym, val) \
- asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
-#define BLANK() asm volatile("\n->" : : )
-
-#define OFFSET(sym, str, mem) \
- DEFINE(sym, offsetof(struct str, mem))
-
#define __NO_STUBS 1
#undef __SYSCALL
#undef _ASM_X86_64_UNISTD_H_
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index e2d870de837c..b0c8208df9fa 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -339,6 +339,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
unsigned int freq;
+ unsigned int cached_freq;
dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
@@ -347,7 +348,16 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
return 0;
}
+ cached_freq = data->freq_table[data->acpi_data->state].frequency;
freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
+ if (freq != cached_freq) {
+ /*
+ * The dreaded BIOS frequency change behind our back.
+ * Force set the frequency on next target call.
+ */
+ data->resume = 1;
+ }
+
dprintk("cur freq = %u\n", freq);
return freq;
@@ -591,6 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
policy->cpus = perf->shared_cpu_map;
}
+ policy->related_cpus = perf->shared_cpu_map;
#ifdef CONFIG_SMP
dmi_check_system(sw_any_bug_dmi_table);
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 1960f1985e5e..84c480bb3715 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -424,7 +424,7 @@ static int __init mtrr_if_init(void)
return -ENODEV;
proc_root_mtrr =
- proc_create("mtrr", S_IWUSR | S_IRUGO, &proc_root, &mtrr_fops);
+ proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
if (proc_root_mtrr)
proc_root_mtrr->owner = THIS_MODULE;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2251d0ae9570..268553817909 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -25,6 +25,7 @@
#include <asm/hpet.h>
#include <linux/kdebug.h>
#include <asm/smp.h>
+#include <asm/reboot.h>
#include <mach_ipi.h>
@@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void)
}
#endif
-void machine_crash_shutdown(struct pt_regs *regs)
+void native_machine_crash_shutdown(struct pt_regs *regs)
{
/* This function is only called after the system
* has panicked or is otherwise in a critical state.
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 696b8e4e66bb..a40d54fc1fdd 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2444,6 +2444,7 @@ void destroy_irq(unsigned int irq)
dynamic_irq_cleanup(irq);
spin_lock_irqsave(&vector_lock, flags);
+ clear_bit(irq_vector[irq], used_vectors);
irq_vector[irq] = 0;
spin_unlock_irqrestore(&vector_lock, flags);
}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 00bda7bcda63..147352df28b9 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -190,8 +190,6 @@ void irq_ctx_exit(int cpu)
hardirq_ctx[cpu] = NULL;
}
-extern asmlinkage void __do_softirq(void);
-
asmlinkage void do_softirq(void)
{
unsigned long flags;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
new file mode 100644
index 000000000000..8b7a3cf37d2b
--- /dev/null
+++ b/arch/x86/kernel/kvm.c
@@ -0,0 +1,248 @@
+/*
+ * KVM paravirt_ops implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright IBM Corporation, 2007
+ * Authors: Anthony Liguori <aliguori@us.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/hardirq.h>
+
+#define MMU_QUEUE_SIZE 1024
+
+struct kvm_para_state {
+ u8 mmu_queue[MMU_QUEUE_SIZE];
+ int mmu_queue_len;
+ enum paravirt_lazy_mode mode;
+};
+
+static DEFINE_PER_CPU(struct kvm_para_state, para_state);
+
+static struct kvm_para_state *kvm_para_state(void)
+{
+ return &per_cpu(para_state, raw_smp_processor_id());
+}
+
+/*
+ * No need for any "IO delay" on KVM
+ */
+static void kvm_io_delay(void)
+{
+}
+
+static void kvm_mmu_op(void *buffer, unsigned len)
+{
+ int r;
+ unsigned long a1, a2;
+
+ do {
+ a1 = __pa(buffer);
+ a2 = 0; /* on i386 __pa() always returns <4G */
+ r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
+ buffer += r;
+ len -= r;
+ } while (len);
+}
+
+static void mmu_queue_flush(struct kvm_para_state *state)
+{
+ if (state->mmu_queue_len) {
+ kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
+ state->mmu_queue_len = 0;
+ }
+}
+
+static void kvm_deferred_mmu_op(void *buffer, int len)
+{
+ struct kvm_para_state *state = kvm_para_state();
+
+ if (state->mode != PARAVIRT_LAZY_MMU) {
+ kvm_mmu_op(buffer, len);
+ return;
+ }
+ if (state->mmu_queue_len + len > sizeof state->mmu_queue)
+ mmu_queue_flush(state);
+ memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
+ state->mmu_queue_len += len;
+}
+
+static void kvm_mmu_write(void *dest, u64 val)
+{
+ __u64 pte_phys;
+ struct kvm_mmu_op_write_pte wpte;
+
+#ifdef CONFIG_HIGHPTE
+ struct page *page;
+ unsigned long dst = (unsigned long) dest;
+
+ page = kmap_atomic_to_page(dest);
+ pte_phys = page_to_pfn(page);
+ pte_phys <<= PAGE_SHIFT;
+ pte_phys += (dst & ~(PAGE_MASK));
+#else
+ pte_phys = (unsigned long)__pa(dest);
+#endif
+ wpte.header.op = KVM_MMU_OP_WRITE_PTE;
+ wpte.pte_val = val;
+ wpte.pte_phys = pte_phys;
+
+ kvm_deferred_mmu_op(&wpte, sizeof wpte);
+}
+
+/*
+ * We only need to hook operations that are MMU writes. We hook these so that
+ * we can use lazy MMU mode to batch these operations. We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+ kvm_mmu_write(pmdp, pmd_val(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ kvm_mmu_write(ptep, 0);
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+ kvm_mmu_write(pmdp, 0);
+}
+#endif
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+ kvm_mmu_write(pudp, pud_val(pud));
+}
+
+#if PAGETABLE_LEVELS == 4
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ kvm_mmu_write(pgdp, pgd_val(pgd));
+}
+#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+ struct kvm_mmu_op_flush_tlb ftlb = {
+ .header.op = KVM_MMU_OP_FLUSH_TLB,
+ };
+
+ kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+ struct kvm_mmu_op_release_pt rpt = {
+ .header.op = KVM_MMU_OP_RELEASE_PT,
+ .pt_phys = (u64)pfn << PAGE_SHIFT,
+ };
+
+ kvm_mmu_op(&rpt, sizeof rpt);
+}
+
+static void kvm_enter_lazy_mmu(void)
+{
+ struct kvm_para_state *state = kvm_para_state();
+
+ paravirt_enter_lazy_mmu();
+ state->mode = paravirt_get_lazy_mode();
+}
+
+static void kvm_leave_lazy_mmu(void)
+{
+ struct kvm_para_state *state = kvm_para_state();
+
+ mmu_queue_flush(state);
+ paravirt_leave_lazy(paravirt_get_lazy_mode());
+ state->mode = paravirt_get_lazy_mode();
+}
+
+static void paravirt_ops_setup(void)
+{
+ pv_info.name = "KVM";
+ pv_info.paravirt_enabled = 1;
+
+ if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
+ pv_cpu_ops.io_delay = kvm_io_delay;
+
+ if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
+ pv_mmu_ops.set_pte = kvm_set_pte;
+ pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+ pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+ pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+ pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+ pv_mmu_ops.pte_clear = kvm_pte_clear;
+ pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+ pv_mmu_ops.set_pud = kvm_set_pud;
+#if PAGETABLE_LEVELS == 4
+ pv_mmu_ops.set_pgd = kvm_set_pgd;
+#endif
+#endif
+ pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+ pv_mmu_ops.release_pte = kvm_release_pt;
+ pv_mmu_ops.release_pmd = kvm_release_pt;
+ pv_mmu_ops.release_pud = kvm_release_pt;
+
+ pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
+ pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
+ }
+}
+
+void __init kvm_guest_init(void)
+{
+ if (!kvm_para_available())
+ return;
+
+ paravirt_ops_setup();
+}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
new file mode 100644
index 000000000000..ddee04043aeb
--- /dev/null
+++ b/arch/x86/kernel/kvmclock.c
@@ -0,0 +1,187 @@
+/* KVM paravirtual clock driver. A clocksource implementation
+ Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <linux/clocksource.h>
+#include <linux/kvm_para.h>
+#include <asm/arch_hooks.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <linux/percpu.h>
+#include <asm/reboot.h>
+
+#define KVM_SCALE 22
+
+static int kvmclock = 1;
+
+static int parse_no_kvmclock(char *arg)
+{
+ kvmclock = 0;
+ return 0;
+}
+early_param("no-kvmclock", parse_no_kvmclock);
+
+/* The hypervisor will put information about time periodically here */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
+#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+
+static inline u64 kvm_get_delta(u64 last_tsc)
+{
+ int cpu = smp_processor_id();
+ u64 delta = native_read_tsc() - last_tsc;
+ return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
+}
+
+static struct kvm_wall_clock wall_clock;
+static cycle_t kvm_clock_read(void);
+/*
+ * The wallclock is the time of day when we booted. Since then, some time may
+ * have elapsed since the hypervisor wrote the data. So we try to account for
+ * that with system time
+ */
+unsigned long kvm_get_wallclock(void)
+{
+ u32 wc_sec, wc_nsec;
+ u64 delta;
+ struct timespec ts;
+ int version, nsec;
+ int low, high;
+
+ low = (int)__pa(&wall_clock);
+ high = ((u64)__pa(&wall_clock) >> 32);
+
+ delta = kvm_clock_read();
+
+ native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
+ do {
+ version = wall_clock.wc_version;
+ rmb();
+ wc_sec = wall_clock.wc_sec;
+ wc_nsec = wall_clock.wc_nsec;
+ rmb();
+ } while ((wall_clock.wc_version != version) || (version & 1));
+
+ delta = kvm_clock_read() - delta;
+ delta += wc_nsec;
+ nsec = do_div(delta, NSEC_PER_SEC);
+ set_normalized_timespec(&ts, wc_sec + delta, nsec);
+ /*
+ * Of all mechanisms of time adjustment I've tested, this one
+ * was the champion!
+ */
+ return ts.tv_sec + 1;
+}
+
+int kvm_set_wallclock(unsigned long now)
+{
+ return 0;
+}
+
+/*
+ * This is our read_clock function. The host puts an tsc timestamp each time
+ * it updates a new time. Without the tsc adjustment, we can have a situation
+ * in which a vcpu starts to run earlier (smaller system_time), but probes
+ * time later (compared to another vcpu), leading to backwards time
+ */
+static cycle_t kvm_clock_read(void)
+{
+ u64 last_tsc, now;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ last_tsc = get_clock(cpu, tsc_timestamp);
+ now = get_clock(cpu, system_time);
+
+ now += kvm_get_delta(last_tsc);
+ preempt_enable();
+
+ return now;
+}
+static struct clocksource kvm_clock = {
+ .name = "kvm-clock",
+ .read = kvm_clock_read,
+ .rating = 400,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 1 << KVM_SCALE,
+ .shift = KVM_SCALE,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int kvm_register_clock(void)
+{
+ int cpu = smp_processor_id();
+ int low, high;
+ low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
+ high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+
+ return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+}
+
+static void kvm_setup_secondary_clock(void)
+{
+ /*
+ * Now that the first cpu already had this clocksource initialized,
+ * we shouldn't fail.
+ */
+ WARN_ON(kvm_register_clock());
+ /* ok, done with our trickery, call native */
+ setup_secondary_APIC_clock();
+}
+
+/*
+ * After the clock is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written. So before any
+ * kind of shutdown from our side, we unregister the clock by writting anything
+ * that does not have the 'enable' bit set in the msr
+ */
+#ifdef CONFIG_KEXEC
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+ native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+ native_machine_crash_shutdown(regs);
+}
+#endif
+
+static void kvm_shutdown(void)
+{
+ native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+ native_machine_shutdown();
+}
+
+void __init kvmclock_init(void)
+{
+ if (!kvm_para_available())
+ return;
+
+ if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+ if (kvm_register_clock())
+ return;
+ pv_time_ops.get_wallclock = kvm_get_wallclock;
+ pv_time_ops.set_wallclock = kvm_set_wallclock;
+ pv_time_ops.sched_clock = kvm_clock_read;
+ pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+ machine_ops.shutdown = kvm_shutdown;
+#ifdef CONFIG_KEXEC
+ machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+ clocksource_register(&kvm_clock);
+ }
+}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index cfc2648d25ff..3cad17fe026b 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -63,7 +63,7 @@ static int __init mfgpt_fix(char *s)
/* The following udocumented bit resets the MFGPT timers */
val = 0xFF; dummy = 0;
- wrmsr(0x5140002B, val, dummy);
+ wrmsr(MSR_MFGPT_SETUP, val, dummy);
return 1;
}
__setup("mfgptfix", mfgpt_fix);
@@ -127,17 +127,17 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
* 6; that is, resets for 7 and 8 will be ignored. Is this
* a problem? -dilinger
*/
- msr = MFGPT_NR_MSR;
+ msr = MSR_MFGPT_NR;
mask = 1 << (timer + 24);
break;
case MFGPT_EVENT_NMI:
- msr = MFGPT_NR_MSR;
+ msr = MSR_MFGPT_NR;
mask = 1 << (timer + shift);
break;
case MFGPT_EVENT_IRQ:
- msr = MFGPT_IRQ_MSR;
+ msr = MSR_MFGPT_IRQ;
mask = 1 << (timer + shift);
break;
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c
new file mode 100644
index 000000000000..3e6672274807
--- /dev/null
+++ b/arch/x86/kernel/olpc.c
@@ -0,0 +1,260 @@
+/*
+ * Support for the OLPC DCON and OLPC EC access
+ *
+ * Copyright © 2006 Advanced Micro Devices, Inc.
+ * Copyright © 2007-2008 Andres Salomon <dilinger@debian.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <asm/geode.h>
+#include <asm/olpc.h>
+
+#ifdef CONFIG_OPEN_FIRMWARE
+#include <asm/ofw.h>
+#endif
+
+struct olpc_platform_t olpc_platform_info;
+EXPORT_SYMBOL_GPL(olpc_platform_info);
+
+static DEFINE_SPINLOCK(ec_lock);
+
+/* what the timeout *should* be (in ms) */
+#define EC_BASE_TIMEOUT 20
+
+/* the timeout that bugs in the EC might force us to actually use */
+static int ec_timeout = EC_BASE_TIMEOUT;
+
+static int __init olpc_ec_timeout_set(char *str)
+{
+ if (get_option(&str, &ec_timeout) != 1) {
+ ec_timeout = EC_BASE_TIMEOUT;
+ printk(KERN_ERR "olpc-ec: invalid argument to "
+ "'olpc_ec_timeout=', ignoring!\n");
+ }
+ printk(KERN_DEBUG "olpc-ec: using %d ms delay for EC commands.\n",
+ ec_timeout);
+ return 1;
+}
+__setup("olpc_ec_timeout=", olpc_ec_timeout_set);
+
+/*
+ * These {i,o}bf_status functions return whether the buffers are full or not.
+ */
+
+static inline unsigned int ibf_status(unsigned int port)
+{
+ return !!(inb(port) & 0x02);
+}
+
+static inline unsigned int obf_status(unsigned int port)
+{
+ return inb(port) & 0x01;
+}
+
+#define wait_on_ibf(p, d) __wait_on_ibf(__LINE__, (p), (d))
+static int __wait_on_ibf(unsigned int line, unsigned int port, int desired)
+{
+ unsigned int timeo;
+ int state = ibf_status(port);
+
+ for (timeo = ec_timeout; state != desired && timeo; timeo--) {
+ mdelay(1);
+ state = ibf_status(port);
+ }
+
+ if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
+ timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
+ printk(KERN_WARNING "olpc-ec: %d: waited %u ms for IBF!\n",
+ line, ec_timeout - timeo);
+ }
+
+ return !(state == desired);
+}
+
+#define wait_on_obf(p, d) __wait_on_obf(__LINE__, (p), (d))
+static int __wait_on_obf(unsigned int line, unsigned int port, int desired)
+{
+ unsigned int timeo;
+ int state = obf_status(port);
+
+ for (timeo = ec_timeout; state != desired && timeo; timeo--) {
+ mdelay(1);
+ state = obf_status(port);
+ }
+
+ if ((state == desired) && (ec_timeout > EC_BASE_TIMEOUT) &&
+ timeo < (ec_timeout - EC_BASE_TIMEOUT)) {
+ printk(KERN_WARNING "olpc-ec: %d: waited %u ms for OBF!\n",
+ line, ec_timeout - timeo);
+ }
+
+ return !(state == desired);
+}
+
+/*
+ * This allows the kernel to run Embedded Controller commands. The EC is
+ * documented at <http://wiki.laptop.org/go/Embedded_controller>, and the
+ * available EC commands are here:
+ * <http://wiki.laptop.org/go/Ec_specification>. Unfortunately, while
+ * OpenFirmware's source is available, the EC's is not.
+ */
+int olpc_ec_cmd(unsigned char cmd, unsigned char *inbuf, size_t inlen,
+ unsigned char *outbuf, size_t outlen)
+{
+ unsigned long flags;
+ int ret = -EIO;
+ int i;
+
+ spin_lock_irqsave(&ec_lock, flags);
+
+ /* Clear OBF */
+ for (i = 0; i < 10 && (obf_status(0x6c) == 1); i++)
+ inb(0x68);
+ if (i == 10) {
+ printk(KERN_ERR "olpc-ec: timeout while attempting to "
+ "clear OBF flag!\n");
+ goto err;
+ }
+
+ if (wait_on_ibf(0x6c, 0)) {
+ printk(KERN_ERR "olpc-ec: timeout waiting for EC to "
+ "quiesce!\n");
+ goto err;
+ }
+
+restart:
+ /*
+ * Note that if we time out during any IBF checks, that's a failure;
+ * we have to return. There's no way for the kernel to clear that.
+ *
+ * If we time out during an OBF check, we can restart the command;
+ * reissuing it will clear the OBF flag, and we should be alright.
+ * The OBF flag will sometimes misbehave due to what we believe
+ * is a hardware quirk..
+ */
+ printk(KERN_DEBUG "olpc-ec: running cmd 0x%x\n", cmd);
+ outb(cmd, 0x6c);
+
+ if (wait_on_ibf(0x6c, 0)) {
+ printk(KERN_ERR "olpc-ec: timeout waiting for EC to read "
+ "command!\n");
+ goto err;
+ }
+
+ if (inbuf && inlen) {
+ /* write data to EC */
+ for (i = 0; i < inlen; i++) {
+ if (wait_on_ibf(0x6c, 0)) {
+ printk(KERN_ERR "olpc-ec: timeout waiting for"
+ " EC accept data!\n");
+ goto err;
+ }
+ printk(KERN_DEBUG "olpc-ec: sending cmd arg 0x%x\n",
+ inbuf[i]);
+ outb(inbuf[i], 0x68);
+ }
+ }
+ if (outbuf && outlen) {
+ /* read data from EC */
+ for (i = 0; i < outlen; i++) {
+ if (wait_on_obf(0x6c, 1)) {
+ printk(KERN_ERR "olpc-ec: timeout waiting for"
+ " EC to provide data!\n");
+ goto restart;
+ }
+ outbuf[i] = inb(0x68);
+ printk(KERN_DEBUG "olpc-ec: received 0x%x\n",
+ outbuf[i]);
+ }
+ }
+
+ ret = 0;
+err:
+ spin_unlock_irqrestore(&ec_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(olpc_ec_cmd);
+
+#ifdef CONFIG_OPEN_FIRMWARE
+static void __init platform_detect(void)
+{
+ size_t propsize;
+ u32 rev;
+
+ if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4,
+ &propsize) || propsize != 4) {
+ printk(KERN_ERR "ofw: getprop call failed!\n");
+ rev = 0;
+ }
+ olpc_platform_info.boardrev = be32_to_cpu(rev);
+}
+#else
+static void __init platform_detect(void)
+{
+ /* stopgap until OFW support is added to the kernel */
+ olpc_platform_info.boardrev = be32_to_cpu(0xc2);
+}
+#endif
+
+static int __init olpc_init(void)
+{
+ unsigned char *romsig;
+
+ /* The ioremap check is dangerous; limit what we run it on */
+ if (!is_geode() || geode_has_vsa2())
+ return 0;
+
+ spin_lock_init(&ec_lock);
+
+ romsig = ioremap(0xffffffc0, 16);
+ if (!romsig)
+ return 0;
+
+ if (strncmp(romsig, "CL1 Q", 7))
+ goto unmap;
+ if (strncmp(romsig+6, romsig+13, 3)) {
+ printk(KERN_INFO "OLPC BIOS signature looks invalid. "
+ "Assuming not OLPC\n");
+ goto unmap;
+ }
+
+ printk(KERN_INFO "OLPC board with OpenFirmware %.16s\n", romsig);
+ olpc_platform_info.flags |= OLPC_F_PRESENT;
+
+ /* get the platform revision */
+ platform_detect();
+
+ /* assume B1 and above models always have a DCON */
+ if (olpc_board_at_least(olpc_board(0xb1)))
+ olpc_platform_info.flags |= OLPC_F_DCON;
+
+ /* get the EC revision */
+ olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0,
+ (unsigned char *) &olpc_platform_info.ecver, 1);
+
+ /* check to see if the VSA exists */
+ if (geode_has_vsa2())
+ olpc_platform_info.flags |= OLPC_F_VSA;
+
+ printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n",
+ ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "",
+ olpc_platform_info.boardrev >> 4,
+ olpc_platform_info.ecver);
+
+unmap:
+ iounmap(romsig);
+ return 0;
+}
+
+postcore_initcall(olpc_init);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3004d716539d..67e9b4a1e89d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -4,6 +4,8 @@
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/pm.h>
struct kmem_cache *task_xstate_cachep;
@@ -42,3 +44,118 @@ void arch_task_cache_init(void)
__alignof__(union thread_xstate),
SLAB_PANIC, NULL);
}
+
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
+void cpu_idle_wait(void)
+{
+ smp_mb();
+ /* kick all the CPUs so that they exit out of pm_idle */
+ smp_call_function(do_nothing, NULL, 0, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
+{
+ if (!need_resched()) {
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (!need_resched())
+ __mwait(ax, cx);
+ }
+}
+
+/* Default MONITOR/MWAIT with no hints, used for default C1 state */
+static void mwait_idle(void)
+{
+ if (!need_resched()) {
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (!need_resched())
+ __sti_mwait(0, 0);
+ else
+ local_irq_enable();
+ } else
+ local_irq_enable();
+}
+
+
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+{
+ if (force_mwait)
+ return 1;
+ /* Any C1 states supported? */
+ return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
+}
+
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->work.need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle(void)
+{
+ local_irq_enable();
+ cpu_relax();
+}
+
+void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+{
+ static int selected;
+
+ if (selected)
+ return;
+#ifdef CONFIG_X86_SMP
+ if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+ " performance may degrade.\n");
+ }
+#endif
+ if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
+ /*
+ * Skip, if setup has overridden idle.
+ * One CPU supports mwait => All CPUs supports mwait
+ */
+ if (!pm_idle) {
+ printk(KERN_INFO "using mwait in idle threads.\n");
+ pm_idle = mwait_idle;
+ }
+ }
+ selected = 1;
+}
+
+static int __init idle_setup(char *str)
+{
+ if (!strcmp(str, "poll")) {
+ printk("using polling idle threads.\n");
+ pm_idle = poll_idle;
+ } else if (!strcmp(str, "mwait"))
+ force_mwait = 1;
+ else
+ return -1;
+
+ boot_option_idle_override = 1;
+ return 0;
+}
+early_param("idle", idle_setup);
+
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 77de848bd1fb..f8476dfbb60d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -111,12 +111,10 @@ void default_idle(void)
*/
smp_mb();
- local_irq_disable();
- if (!need_resched()) {
+ if (!need_resched())
safe_halt(); /* enables interrupts racelessly */
- local_irq_disable();
- }
- local_irq_enable();
+ else
+ local_irq_enable();
current_thread_info()->status |= TS_POLLING;
} else {
local_irq_enable();
@@ -128,17 +126,6 @@ void default_idle(void)
EXPORT_SYMBOL(default_idle);
#endif
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
- local_irq_enable();
- cpu_relax();
-}
-
#ifdef CONFIG_HOTPLUG_CPU
#include <asm/nmi.h>
/* We don't actually take CPU down, just spin without interrupts. */
@@ -196,6 +183,7 @@ void cpu_idle(void)
if (cpu_is_offline(cpu))
play_dead();
+ local_irq_disable();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
@@ -206,104 +194,6 @@ void cpu_idle(void)
}
}
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
- smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
- smp_call_function(do_nothing, NULL, 0, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
- if (!need_resched()) {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __sti_mwait(ax, cx);
- else
- local_irq_enable();
- } else
- local_irq_enable();
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
- local_irq_enable();
- mwait_idle_with_hints(0, 0);
-}
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
- if (force_mwait)
- return 1;
- /* Any C1 states supported? */
- return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
- static int selected;
-
- if (selected)
- return;
-#ifdef CONFIG_X86_SMP
- if (pm_idle == poll_idle && smp_num_siblings > 1) {
- printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
- " performance may degrade.\n");
- }
-#endif
- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
- /*
- * Skip, if setup has overridden idle.
- * One CPU supports mwait => All CPUs supports mwait
- */
- if (!pm_idle) {
- printk(KERN_INFO "using mwait in idle threads.\n");
- pm_idle = mwait_idle;
- }
- }
- selected = 1;
-}
-
-static int __init idle_setup(char *str)
-{
- if (!strcmp(str, "poll")) {
- printk("using polling idle threads.\n");
- pm_idle = poll_idle;
- } else if (!strcmp(str, "mwait"))
- force_mwait = 1;
- else
- return -1;
-
- boot_option_idle_override = 1;
- return 0;
-}
-early_param("idle", idle_setup);
-
void __show_registers(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 131c2ee7ac56..e2319f39988b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -106,26 +106,13 @@ void default_idle(void)
* test NEED_RESCHED:
*/
smp_mb();
- local_irq_disable();
- if (!need_resched()) {
+ if (!need_resched())
safe_halt(); /* enables interrupts racelessly */
- local_irq_disable();
- }
- local_irq_enable();
+ else
+ local_irq_enable();
current_thread_info()->status |= TS_POLLING;
}
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
- local_irq_enable();
- cpu_relax();
-}
-
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
@@ -192,110 +179,6 @@ void cpu_idle(void)
}
}
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
- smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
- smp_call_function(do_nothing, NULL, 0, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
- if (!need_resched()) {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __mwait(ax, cx);
- }
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
- if (!need_resched()) {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __sti_mwait(0, 0);
- else
- local_irq_enable();
- } else {
- local_irq_enable();
- }
-}
-
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
- if (force_mwait)
- return 1;
- /* Any C1 states supported? */
- return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
- static int selected;
-
- if (selected)
- return;
-#ifdef CONFIG_X86_SMP
- if (pm_idle == poll_idle && smp_num_siblings > 1) {
- printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
- " performance may degrade.\n");
- }
-#endif
- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
- /*
- * Skip, if setup has overridden idle.
- * One CPU supports mwait => All CPUs supports mwait
- */
- if (!pm_idle) {
- printk(KERN_INFO "using mwait in idle threads.\n");
- pm_idle = mwait_idle;
- }
- }
- selected = 1;
-}
-
-static int __init idle_setup(char *str)
-{
- if (!strcmp(str, "poll")) {
- printk("using polling idle threads.\n");
- pm_idle = poll_idle;
- } else if (!strcmp(str, "mwait"))
- force_mwait = 1;
- else
- return -1;
-
- boot_option_idle_override = 1;
- return 0;
-}
-early_param("idle", idle_setup);
-
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs * regs)
{
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1791a751a772..a4a838306b2c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void)
}
}
-static void native_machine_shutdown(void)
+void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
@@ -470,7 +470,10 @@ struct machine_ops machine_ops = {
.shutdown = native_machine_shutdown,
.emergency_restart = native_machine_emergency_restart,
.restart = native_machine_restart,
- .halt = native_machine_halt
+ .halt = native_machine_halt,
+#ifdef CONFIG_KEXEC
+ .crash_shutdown = native_machine_crash_shutdown,
+#endif
};
void machine_power_off(void)
@@ -498,3 +501,9 @@ void machine_halt(void)
machine_ops.halt();
}
+#ifdef CONFIG_KEXEC
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ machine_ops.crash_shutdown(regs);
+}
+#endif
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 44cc9b933932..2283422af794 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -47,6 +47,7 @@
#include <linux/pfn.h>
#include <linux/pci.h>
#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
#include <video/edid.h>
@@ -820,6 +821,10 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn = setup_memory();
+#ifdef CONFIG_KVM_CLOCK
+ kvmclock_init();
+#endif
+
#ifdef CONFIG_VMI
/*
* Must be after max_low_pfn is determined, and before kernel
@@ -827,6 +832,7 @@ void __init setup_arch(char **cmdline_p)
*/
vmi_init();
#endif
+ kvm_guest_init();
/*
* NOTE: before this point _nobody_ is allowed to allocate
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 2f5c488aad0b..22c14e21c97c 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -44,6 +44,7 @@
#include <linux/sort.h>
#include <linux/uaccess.h>
#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
@@ -398,6 +399,10 @@ void __init setup_arch(char **cmdline_p)
io_delay_init();
+#ifdef CONFIG_KVM_CLOCK
+ kvmclock_init();
+#endif
+
#ifdef CONFIG_SMP
/* setup to use the early static init tables during kernel startup */
x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
@@ -502,6 +507,8 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
ioapic_init_mappings();
+ kvm_guest_init();
+
/*
* We trust e820 completely. No explicit ROM probing in memory.
*/
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 1a89e93f3f1c..2ff21f398934 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -115,7 +115,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-extern void (*late_time_init)(void);
/* Duplicate of time_init() below, with hpet_enable part added */
void __init hpet_time_init(void)
{
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index b7ab3c335fae..fad3674b06a5 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -209,12 +209,6 @@ SECTIONS
EXIT_DATA
}
-/* vdso blob that is mapped into user space */
- vdso_start = . ;
- .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
- . = ALIGN(PAGE_SIZE);
- vdso_end = .;
-
#ifdef CONFIG_BLK_DEV_INITRD
. = ALIGN(PAGE_SIZE);
__initramfs_start = .;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 41962e793c0f..8d45fabc5f3b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -19,7 +19,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
- depends on HAVE_KVM && EXPERIMENTAL
+ depends on HAVE_KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
---help---
@@ -50,6 +50,17 @@ config KVM_AMD
Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions.
+config KVM_TRACE
+ bool "KVM trace support"
+ depends on KVM && MARKERS && SYSFS
+ select RELAY
+ select DEBUG_FS
+ default n
+ ---help---
+ This option allows reading a trace of kvm-related events through
+ relayfs. Note the ABI is not considered stable and will be
+ modified in future updates.
+
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/lguest/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b310784..c97d35c218db 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,10 +3,14 @@
#
common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+ifeq ($(CONFIG_KVM_TRACE),y)
+common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
+endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+ i8254.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 000000000000..361e31611276
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,611 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ * Sheng Yang <sheng.yang@intel.com>
+ * Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } u, res;
+ u64 rl, rh;
+
+ u.ll = a;
+ rl = (u64)u.l.low * (u64)b;
+ rh = (u64)u.l.high * (u64)b;
+ rh += (rl >> 32);
+ res.l.high = div64_64(rh, c);
+ res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+ return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ switch (c->mode) {
+ default:
+ case 0:
+ case 4:
+ /* XXX: just disable/enable counting */
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ /* Restart counting on rising edge. */
+ if (c->gate < val)
+ c->count_load_time = ktime_get();
+ break;
+ }
+
+ c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int counter;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ case 0:
+ case 1:
+ case 4:
+ case 5:
+ counter = (c->count - d) & 0xffff;
+ break;
+ case 3:
+ /* XXX: may be incorrect for odd counts */
+ counter = c->count - (mod_64((2 * d), c->count));
+ break;
+ default:
+ counter = c->count - mod_64(d, c->count);
+ break;
+ }
+ return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+ s64 d, t;
+ int out;
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+ d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+ switch (c->mode) {
+ default:
+ case 0:
+ out = (d >= c->count);
+ break;
+ case 1:
+ out = (d < c->count);
+ break;
+ case 2:
+ out = ((mod_64(d, c->count) == 0) && (d != 0));
+ break;
+ case 3:
+ out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+ break;
+ case 4:
+ case 5:
+ out = (d == c->count);
+ break;
+ }
+
+ return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->count_latched) {
+ c->latched_count = pit_get_count(kvm, channel);
+ c->count_latched = c->rw_mode;
+ }
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+ struct kvm_kpit_channel_state *c =
+ &kvm->arch.vpit->pit_state.channels[channel];
+
+ WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+ if (!c->status_latched) {
+ /* TODO: Return NULL COUNT (bit 6). */
+ c->status = ((pit_get_out(kvm, channel) << 7) |
+ (c->rw_mode << 4) |
+ (c->mode << 1) |
+ c->bcd);
+ c->status_latched = 1;
+ }
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+ struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+ struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+ atomic_inc(&pt->pending);
+ smp_mb__after_atomic_inc();
+ /* FIXME: handle case where the guest is in guest mode */
+ if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
+
+ pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+ pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+ return (pt->period == 0 ? 0 : 1);
+}
+
+int pit_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+
+ if (pit && vcpu->vcpu_id == 0)
+ return atomic_read(&pit->pit_state.pit_timer.pending);
+
+ return 0;
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+ struct kvm_kpit_state *ps;
+ int restart_timer = 0;
+
+ ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+ restart_timer = __pit_timer_fn(ps);
+
+ if (restart_timer)
+ return HRTIMER_RESTART;
+ else
+ return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+ pr_debug("pit: execute del timer!\n");
+ hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+{
+ s64 interval;
+
+ interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
+
+ pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+ /* TODO The new value only affected after the retriggered */
+ hrtimer_cancel(&pt->timer);
+ pt->period = (is_period == 0) ? 0 : interval;
+ pt->timer.function = pit_timer_fn;
+ atomic_set(&pt->pending, 0);
+
+ hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+ HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+ WARN_ON(!mutex_is_locked(&ps->lock));
+
+ pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+ /*
+ * Though spec said the state of 8254 is undefined after power-up,
+ * seems some tricky OS like Windows XP depends on IRQ0 interrupt
+ * when booting up.
+ * So here setting initialize rate for it, and not a specific number
+ */
+ if (val == 0)
+ val = 0x10000;
+
+ ps->channels[channel].count_load_time = ktime_get();
+ ps->channels[channel].count = val;
+
+ if (channel != 0)
+ return;
+
+ /* Two types of timer
+ * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ switch (ps->channels[0].mode) {
+ case 1:
+ create_pit_timer(&ps->pit_timer, val, 0);
+ break;
+ case 2:
+ create_pit_timer(&ps->pit_timer, val, 1);
+ break;
+ default:
+ destroy_pit_timer(&ps->pit_timer);
+ }
+}
+
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ pit_load_count(kvm, channel, val);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int channel, access;
+ struct kvm_kpit_channel_state *s;
+ u32 val = *(u32 *) data;
+
+ val &= 0xff;
+ addr &= KVM_PIT_CHANNEL_MASK;
+
+ mutex_lock(&pit_state->lock);
+
+ if (val != 0)
+ pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+ (unsigned int)addr, len, val);
+
+ if (addr == 3) {
+ channel = val >> 6;
+ if (channel == 3) {
+ /* Read-Back Command. */
+ for (channel = 0; channel < 3; channel++) {
+ s = &pit_state->channels[channel];
+ if (val & (2 << channel)) {
+ if (!(val & 0x20))
+ pit_latch_count(kvm, channel);
+ if (!(val & 0x10))
+ pit_latch_status(kvm, channel);
+ }
+ }
+ } else {
+ /* Select Counter <channel>. */
+ s = &pit_state->channels[channel];
+ access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
+ if (access == 0) {
+ pit_latch_count(kvm, channel);
+ } else {
+ s->rw_mode = access;
+ s->read_state = access;
+ s->write_state = access;
+ s->mode = (val >> 1) & 7;
+ if (s->mode > 5)
+ s->mode -= 4;
+ s->bcd = val & 1;
+ }
+ }
+ } else {
+ /* Write Count. */
+ s = &pit_state->channels[addr];
+ switch (s->write_state) {
+ default:
+ case RW_STATE_LSB:
+ pit_load_count(kvm, addr, val);
+ break;
+ case RW_STATE_MSB:
+ pit_load_count(kvm, addr, val << 8);
+ break;
+ case RW_STATE_WORD0:
+ s->write_latch = val;
+ s->write_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ pit_load_count(kvm, addr, s->write_latch | (val << 8));
+ s->write_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ int ret, count;
+ struct kvm_kpit_channel_state *s;
+
+ addr &= KVM_PIT_CHANNEL_MASK;
+ s = &pit_state->channels[addr];
+
+ mutex_lock(&pit_state->lock);
+
+ if (s->status_latched) {
+ s->status_latched = 0;
+ ret = s->status;
+ } else if (s->count_latched) {
+ switch (s->count_latched) {
+ default:
+ case RW_STATE_LSB:
+ ret = s->latched_count & 0xff;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_MSB:
+ ret = s->latched_count >> 8;
+ s->count_latched = 0;
+ break;
+ case RW_STATE_WORD0:
+ ret = s->latched_count & 0xff;
+ s->count_latched = RW_STATE_MSB;
+ break;
+ }
+ } else {
+ switch (s->read_state) {
+ default:
+ case RW_STATE_LSB:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ break;
+ case RW_STATE_MSB:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ break;
+ case RW_STATE_WORD0:
+ count = pit_get_count(kvm, addr);
+ ret = count & 0xff;
+ s->read_state = RW_STATE_WORD1;
+ break;
+ case RW_STATE_WORD1:
+ count = pit_get_count(kvm, addr);
+ ret = (count >> 8) & 0xff;
+ s->read_state = RW_STATE_WORD0;
+ break;
+ }
+ }
+
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+
+ mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return ((addr >= KVM_PIT_BASE_ADDRESS) &&
+ (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+ gpa_t addr, int len, const void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ u32 val = *(u32 *) data;
+
+ mutex_lock(&pit_state->lock);
+ pit_state->speaker_data_on = (val >> 1) & 1;
+ pit_set_gate(kvm, 2, val & 1);
+ mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
+{
+ struct kvm_pit *pit = (struct kvm_pit *)this->private;
+ struct kvm_kpit_state *pit_state = &pit->pit_state;
+ struct kvm *kvm = pit->kvm;
+ unsigned int refresh_clock;
+ int ret;
+
+ /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+ refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+ mutex_lock(&pit_state->lock);
+ ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+ (pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+ if (len > sizeof(ret))
+ len = sizeof(ret);
+ memcpy(data, (char *)&ret, len);
+ mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+ return (addr == KVM_SPEAKER_BASE_ADDRESS);
+}
+
+void kvm_pit_reset(struct kvm_pit *pit)
+{
+ int i;
+ struct kvm_kpit_channel_state *c;
+
+ mutex_lock(&pit->pit_state.lock);
+ for (i = 0; i < 3; i++) {
+ c = &pit->pit_state.channels[i];
+ c->mode = 0xff;
+ c->gate = (i != 2);
+ pit_load_count(pit->kvm, i, 0);
+ }
+ mutex_unlock(&pit->pit_state.lock);
+
+ atomic_set(&pit->pit_state.pit_timer.pending, 0);
+ pit->pit_state.inject_pending = 1;
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+ struct kvm_pit *pit;
+ struct kvm_kpit_state *pit_state;
+
+ pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+ if (!pit)
+ return NULL;
+
+ mutex_init(&pit->pit_state.lock);
+ mutex_lock(&pit->pit_state.lock);
+
+ /* Initialize PIO device */
+ pit->dev.read = pit_ioport_read;
+ pit->dev.write = pit_ioport_write;
+ pit->dev.in_range = pit_in_range;
+ pit->dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+ pit->speaker_dev.read = speaker_ioport_read;
+ pit->speaker_dev.write = speaker_ioport_write;
+ pit->speaker_dev.in_range = speaker_in_range;
+ pit->speaker_dev.private = pit;
+ kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+ kvm->arch.vpit = pit;
+ pit->kvm = kvm;
+
+ pit_state = &pit->pit_state;
+ pit_state->pit = pit;
+ hrtimer_init(&pit_state->pit_timer.timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ mutex_unlock(&pit->pit_state.lock);
+
+ kvm_pit_reset(pit);
+
+ return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+ struct hrtimer *timer;
+
+ if (kvm->arch.vpit) {
+ mutex_lock(&kvm->arch.vpit->pit_state.lock);
+ timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+ hrtimer_cancel(timer);
+ mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+ kfree(kvm->arch.vpit);
+ }
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+ mutex_lock(&kvm->lock);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+ kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+ kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && pit) {
+ ps = &pit->pit_state;
+
+ /* Try to inject pending interrupts when:
+ * 1. Pending exists
+ * 2. Last interrupt was accepted or waited for too long time*/
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (ps->inject_pending ||
+ (jiffies - ps->last_injected_time
+ >= KVM_MAX_PIT_INTR_INTERVAL))) {
+ ps->inject_pending = 0;
+ __inject_pit_timer_intr(kvm);
+ ps->last_injected_time = jiffies;
+ }
+ }
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+ struct kvm_arch *arch = &vcpu->kvm->arch;
+ struct kvm_kpit_state *ps;
+
+ if (vcpu && arch->vpit) {
+ ps = &arch->vpit->pit_state;
+ if (atomic_read(&ps->pit_timer.pending) &&
+ (((arch->vpic->pics[0].imr & 1) == 0 &&
+ arch->vpic->pics[0].irq_base == vec) ||
+ (arch->vioapic->redirtbl[0].fields.vector == vec &&
+ arch->vioapic->redirtbl[0].fields.mask != 1))) {
+ ps->inject_pending = 1;
+ atomic_dec(&ps->pit_timer.pending);
+ ps->channels[0].count_load_time = ktime_get();
+ }
+ }
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 000000000000..db25c2a6c8c4
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,63 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+ struct hrtimer timer;
+ int irq;
+ s64 period; /* unit: ns */
+ s64 scheduled;
+ ktime_t last_update;
+ atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+ u32 count; /* can be 65536 */
+ u16 latched_count;
+ u8 count_latched;
+ u8 status_latched;
+ u8 status;
+ u8 read_state;
+ u8 write_state;
+ u8 write_latch;
+ u8 rw_mode;
+ u8 mode;
+ u8 bcd; /* not supported */
+ u8 gate; /* timer start */
+ ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+ struct kvm_kpit_channel_state channels[3];
+ struct kvm_kpit_timer pit_timer;
+ u32 speaker_data_on;
+ struct mutex lock;
+ struct kvm_pit *pit;
+ bool inject_pending; /* if inject pending interrupts */
+ unsigned long last_injected_time;
+};
+
+struct kvm_pit {
+ unsigned long base_addresss;
+ struct kvm_io_device dev;
+ struct kvm_io_device speaker_dev;
+ struct kvm *kvm;
+ struct kvm_kpit_state pit_state;
+};
+
+#define KVM_PIT_BASE_ADDRESS 0x40
+#define KVM_SPEAKER_BASE_ADDRESS 0x61
+#define KVM_PIT_MEM_LENGTH 4
+#define KVM_PIT_FREQ 1193181
+#define KVM_MAX_PIT_INTR_INTERVAL HZ / 100
+#define KVM_PIT_CHANNEL_MASK 0x3
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+void kvm_pit_reset(struct kvm_pit *pit);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e5714759e97f..ce1f583459b1 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,22 @@
#include <linux/kvm_host.h>
#include "irq.h"
+#include "i8254.h"
+
+/*
+ * check if there are pending timer events
+ * to be processed.
+ */
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ ret = pit_has_pending_timer(vcpu);
+ ret |= apic_has_pending_timer(vcpu);
+
+ return ret;
+}
+EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
/*
* check if there is pending interrupt without
@@ -66,6 +82,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
{
kvm_inject_apic_timer_irqs(vcpu);
+ kvm_inject_pit_timer_irqs(vcpu);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +90,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
+ kvm_pit_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index fa5ed5d59b5d..1802134b836f 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -85,4 +85,7 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
+int pit_has_pending_timer(struct kvm_vcpu *vcpu);
+int apic_has_pending_timer(struct kvm_vcpu *vcpu);
+
#endif
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index ecdfe97e4635..65ef0fc2c036 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -39,6 +39,8 @@ struct vcpu_svm {
unsigned long host_db_regs[NUM_DB_REGS];
unsigned long host_dr6;
unsigned long host_dr7;
+
+ u32 *msrpm;
};
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 68a6b1511934..57ac4e4c556a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -338,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
} else
apic_clear_vector(vector, apic->regs + APIC_TMR);
- if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
+ if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
kvm_vcpu_kick(vcpu);
- else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) {
- vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
}
@@ -362,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_INIT:
if (level) {
- if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
+ if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
printk(KERN_DEBUG
"INIT on a runnable vcpu %d\n",
vcpu->vcpu_id);
- vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED;
+ vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
kvm_vcpu_kick(vcpu);
} else {
printk(KERN_DEBUG
@@ -379,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_STARTUP:
printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
vcpu->vcpu_id, vector);
- if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
+ if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
vcpu->arch.sipi_vector = vector;
- vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
+ vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
}
@@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
PRIx64 ", "
"timer initial count 0x%x, period %lldns, "
- "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__,
+ "expire @ 0x%016" PRIx64 ".\n", __func__,
APIC_BUS_CYCLE_NS, ktime_to_ns(now),
apic_get_reg(apic, APIC_TMICT),
apic->timer.period,
@@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
/* too common printing */
if (offset != APIC_EOI)
apic_debug("%s: offset 0x%x with length 0x%x, and value is "
- "0x%x\n", __FUNCTION__, offset, len, val);
+ "0x%x\n", __func__, offset, len, val);
offset &= 0xff0;
@@ -822,6 +822,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
| (apic_get_reg(apic, APIC_TASKPRI) & 4));
}
+EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr);
u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
{
@@ -869,7 +870,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic;
int i;
- apic_debug("%s\n", __FUNCTION__);
+ apic_debug("%s\n", __func__);
ASSERT(vcpu);
apic = vcpu->arch.apic;
@@ -907,7 +908,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_update_ppr(apic);
apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
- "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
+ "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
vcpu, kvm_apic_id(apic),
vcpu->arch.apic_base, apic->base_address);
}
@@ -940,7 +941,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
atomic_inc(&apic->timer.pending);
if (waitqueue_active(q)) {
- apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
wake_up_interruptible(q);
}
if (apic_lvtt_period(apic)) {
@@ -952,6 +953,16 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
return result;
}
+int apic_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *lapic = vcpu->arch.apic;
+
+ if (lapic)
+ return atomic_read(&lapic->timer.pending);
+
+ return 0;
+}
+
static int __inject_apic_timer_irq(struct kvm_lapic *apic)
{
int vector;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e55af12e11b7..2ad6f5481671 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -27,11 +27,22 @@
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/swap.h>
+#include <linux/hugetlb.h>
+#include <linux/compiler.h>
#include <asm/page.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
+/*
+ * When setting this variable to true it enables Two-Dimensional-Paging
+ * where the hardware walks 2 page tables:
+ * 1. the guest-virtual to guest-physical
+ * 2. while doing 1. it walks guest-physical to host-physical
+ * If the hardware supports that we don't need to do shadow paging.
+ */
+bool tdp_enabled = false;
+
#undef MMU_DEBUG
#undef AUDIT
@@ -101,8 +112,6 @@ static int dbg = 1;
#define PT_FIRST_AVAIL_BITS_SHIFT 9
#define PT64_SECOND_AVAIL_BITS_SHIFT 52
-#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
-
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
#define PT64_LEVEL_BITS 9
@@ -159,6 +168,13 @@ static int dbg = 1;
#define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+struct kvm_pv_mmu_op_buffer {
+ void *ptr;
+ unsigned len;
+ unsigned processed;
+ char buf[512] __aligned(sizeof(long));
+};
+
struct kvm_rmap_desc {
u64 *shadow_ptes[RMAP_EXT];
struct kvm_rmap_desc *more;
@@ -200,11 +216,15 @@ static int is_present_pte(unsigned long pte)
static int is_shadow_present_pte(u64 pte)
{
- pte &= ~PT_SHADOW_IO_MARK;
return pte != shadow_trap_nonpresent_pte
&& pte != shadow_notrap_nonpresent_pte;
}
+static int is_large_pte(u64 pte)
+{
+ return pte & PT_PAGE_SIZE_MASK;
+}
+
static int is_writeble_pte(unsigned long pte)
{
return pte & PT_WRITABLE_MASK;
@@ -215,14 +235,14 @@ static int is_dirty_pte(unsigned long pte)
return pte & PT_DIRTY_MASK;
}
-static int is_io_pte(unsigned long pte)
+static int is_rmap_pte(u64 pte)
{
- return pte & PT_SHADOW_IO_MARK;
+ return is_shadow_present_pte(pte);
}
-static int is_rmap_pte(u64 pte)
+static pfn_t spte_to_pfn(u64 pte)
{
- return is_shadow_present_pte(pte);
+ return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
}
static gfn_t pse36_gfn_delta(u32 gpte)
@@ -349,16 +369,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
}
/*
+ * Return the pointer to the largepage write count for a given
+ * gfn, handling slots that are not large page aligned.
+ */
+static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
+{
+ unsigned long idx;
+
+ idx = (gfn / KVM_PAGES_PER_HPAGE) -
+ (slot->base_gfn / KVM_PAGES_PER_HPAGE);
+ return &slot->lpage_info[idx].write_count;
+}
+
+static void account_shadowed(struct kvm *kvm, gfn_t gfn)
+{
+ int *write_count;
+
+ write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+ *write_count += 1;
+ WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
+}
+
+static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
+{
+ int *write_count;
+
+ write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+ *write_count -= 1;
+ WARN_ON(*write_count < 0);
+}
+
+static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
+{
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ int *largepage_idx;
+
+ if (slot) {
+ largepage_idx = slot_largepage_idx(gfn, slot);
+ return *largepage_idx;
+ }
+
+ return 1;
+}
+
+static int host_largepage_backed(struct kvm *kvm, gfn_t gfn)
+{
+ struct vm_area_struct *vma;
+ unsigned long addr;
+
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr))
+ return 0;
+
+ vma = find_vma(current->mm, addr);
+ if (vma && is_vm_hugetlb_page(vma))
+ return 1;
+
+ return 0;
+}
+
+static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
+{
+ struct kvm_memory_slot *slot;
+
+ if (has_wrprotected_page(vcpu->kvm, large_gfn))
+ return 0;
+
+ if (!host_largepage_backed(vcpu->kvm, large_gfn))
+ return 0;
+
+ slot = gfn_to_memslot(vcpu->kvm, large_gfn);
+ if (slot && slot->dirty_bitmap)
+ return 0;
+
+ return 1;
+}
+
+/*
* Take gfn and return the reverse mapping to it.
* Note: gfn must be unaliased before this function get called
*/
-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
{
struct kvm_memory_slot *slot;
+ unsigned long idx;
slot = gfn_to_memslot(kvm, gfn);
- return &slot->rmap[gfn - slot->base_gfn];
+ if (!lpage)
+ return &slot->rmap[gfn - slot->base_gfn];
+
+ idx = (gfn / KVM_PAGES_PER_HPAGE) -
+ (slot->base_gfn / KVM_PAGES_PER_HPAGE);
+
+ return &slot->lpage_info[idx].rmap_pde;
}
/*
@@ -370,7 +474,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
* If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
* containing more mappings.
*/
-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
{
struct kvm_mmu_page *sp;
struct kvm_rmap_desc *desc;
@@ -382,7 +486,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
gfn = unalias_gfn(vcpu->kvm, gfn);
sp = page_header(__pa(spte));
sp->gfns[spte - sp->spt] = gfn;
- rmapp = gfn_to_rmap(vcpu->kvm, gfn);
+ rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
if (!*rmapp) {
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
*rmapp = (unsigned long)spte;
@@ -435,20 +539,21 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
struct kvm_rmap_desc *desc;
struct kvm_rmap_desc *prev_desc;
struct kvm_mmu_page *sp;
- struct page *page;
+ pfn_t pfn;
unsigned long *rmapp;
int i;
if (!is_rmap_pte(*spte))
return;
sp = page_header(__pa(spte));
- page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
- mark_page_accessed(page);
+ pfn = spte_to_pfn(*spte);
+ if (*spte & PT_ACCESSED_MASK)
+ kvm_set_pfn_accessed(pfn);
if (is_writeble_pte(*spte))
- kvm_release_page_dirty(page);
+ kvm_release_pfn_dirty(pfn);
else
- kvm_release_page_clean(page);
- rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]);
+ kvm_release_pfn_clean(pfn);
+ rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
BUG();
@@ -514,7 +619,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
int write_protected = 0;
gfn = unalias_gfn(kvm, gfn);
- rmapp = gfn_to_rmap(kvm, gfn);
+ rmapp = gfn_to_rmap(kvm, gfn, 0);
spte = rmap_next(kvm, rmapp, NULL);
while (spte) {
@@ -527,8 +632,35 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
}
spte = rmap_next(kvm, rmapp, spte);
}
+ if (write_protected) {
+ pfn_t pfn;
+
+ spte = rmap_next(kvm, rmapp, NULL);
+ pfn = spte_to_pfn(*spte);
+ kvm_set_pfn_dirty(pfn);
+ }
+
+ /* check for huge page mappings */
+ rmapp = gfn_to_rmap(kvm, gfn, 1);
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ BUG_ON(!spte);
+ BUG_ON(!(*spte & PT_PRESENT_MASK));
+ BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
+ pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
+ if (is_writeble_pte(*spte)) {
+ rmap_remove(kvm, spte);
+ --kvm->stat.lpages;
+ set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+ write_protected = 1;
+ }
+ spte = rmap_next(kvm, rmapp, spte);
+ }
+
if (write_protected)
kvm_flush_remote_tlbs(kvm);
+
+ account_shadowed(kvm, gfn);
}
#ifdef MMU_DEBUG
@@ -538,8 +670,8 @@ static int is_empty_shadow_page(u64 *spt)
u64 *end;
for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
- if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) {
- printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
+ if (*pos != shadow_trap_nonpresent_pte) {
+ printk(KERN_ERR "%s: %p %llx\n", __func__,
pos, *pos);
return 0;
}
@@ -559,7 +691,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
static unsigned kvm_page_table_hashfn(gfn_t gfn)
{
- return gfn;
+ return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1);
}
static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
@@ -662,13 +794,14 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
struct kvm_mmu_page *sp;
struct hlist_node *node;
- pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
- index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+ pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
+ index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical) {
+ if (sp->gfn == gfn && !sp->role.metaphysical
+ && !sp->role.invalid) {
pgprintk("%s: found role %x\n",
- __FUNCTION__, sp->role.word);
+ __func__, sp->role.word);
return sp;
}
return NULL;
@@ -699,27 +832,27 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
- pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
+ pgprintk("%s: looking gfn %lx role %x\n", __func__,
gfn, role.word);
- index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+ index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
hlist_for_each_entry(sp, node, bucket, hash_link)
if (sp->gfn == gfn && sp->role.word == role.word) {
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
- pgprintk("%s: found\n", __FUNCTION__);
+ pgprintk("%s: found\n", __func__);
return sp;
}
++vcpu->kvm->stat.mmu_cache_miss;
sp = kvm_mmu_alloc_page(vcpu, parent_pte);
if (!sp)
return sp;
- pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
+ pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
sp->gfn = gfn;
sp->role = role;
hlist_add_head(&sp->hash_link, bucket);
- vcpu->arch.mmu.prefetch_page(vcpu, sp);
if (!metaphysical)
rmap_write_protect(vcpu->kvm, gfn);
+ vcpu->arch.mmu.prefetch_page(vcpu, sp);
return sp;
}
@@ -745,11 +878,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
ent = pt[i];
+ if (is_shadow_present_pte(ent)) {
+ if (!is_large_pte(ent)) {
+ ent &= PT64_BASE_ADDR_MASK;
+ mmu_page_remove_parent_pte(page_header(ent),
+ &pt[i]);
+ } else {
+ --kvm->stat.lpages;
+ rmap_remove(kvm, &pt[i]);
+ }
+ }
pt[i] = shadow_trap_nonpresent_pte;
- if (!is_shadow_present_pte(ent))
- continue;
- ent &= PT64_BASE_ADDR_MASK;
- mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
}
kvm_flush_remote_tlbs(kvm);
}
@@ -789,10 +928,15 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
}
kvm_mmu_page_unlink_children(kvm, sp);
if (!sp->root_count) {
+ if (!sp->role.metaphysical)
+ unaccount_shadowed(kvm, sp->gfn);
hlist_del(&sp->hash_link);
kvm_mmu_free_page(kvm, sp);
- } else
+ } else {
list_move(&sp->link, &kvm->arch.active_mmu_pages);
+ sp->role.invalid = 1;
+ kvm_reload_remote_mmus(kvm);
+ }
kvm_mmu_reset_last_pte_updated(kvm);
}
@@ -838,13 +982,13 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
struct hlist_node *node, *n;
int r;
- pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
+ pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
r = 0;
- index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+ index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
if (sp->gfn == gfn && !sp->role.metaphysical) {
- pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
+ pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
kvm_mmu_zap_page(kvm, sp);
r = 1;
@@ -857,7 +1001,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
struct kvm_mmu_page *sp;
while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
- pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word);
+ pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
kvm_mmu_zap_page(kvm, sp);
}
}
@@ -889,26 +1033,39 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
- int *ptwrite, gfn_t gfn, struct page *page)
+ int *ptwrite, int largepage, gfn_t gfn,
+ pfn_t pfn, bool speculative)
{
u64 spte;
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
- hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
pgprintk("%s: spte %llx access %x write_fault %d"
" user_fault %d gfn %lx\n",
- __FUNCTION__, *shadow_pte, pt_access,
+ __func__, *shadow_pte, pt_access,
write_fault, user_fault, gfn);
if (is_rmap_pte(*shadow_pte)) {
- if (host_pfn != page_to_pfn(page)) {
+ /*
+ * If we overwrite a PTE page pointer with a 2MB PMD, unlink
+ * the parent of the now unreachable PTE.
+ */
+ if (largepage && !is_large_pte(*shadow_pte)) {
+ struct kvm_mmu_page *child;
+ u64 pte = *shadow_pte;
+
+ child = page_header(pte & PT64_BASE_ADDR_MASK);
+ mmu_page_remove_parent_pte(child, shadow_pte);
+ } else if (pfn != spte_to_pfn(*shadow_pte)) {
pgprintk("hfn old %lx new %lx\n",
- host_pfn, page_to_pfn(page));
+ spte_to_pfn(*shadow_pte), pfn);
rmap_remove(vcpu->kvm, shadow_pte);
+ } else {
+ if (largepage)
+ was_rmapped = is_large_pte(*shadow_pte);
+ else
+ was_rmapped = 1;
}
- else
- was_rmapped = 1;
}
/*
@@ -917,6 +1074,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
* demand paging).
*/
spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
+ if (!speculative)
+ pte_access |= PT_ACCESSED_MASK;
if (!dirty)
pte_access &= ~ACC_WRITE_MASK;
if (!(pte_access & ACC_EXEC_MASK))
@@ -925,15 +1084,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= PT_PRESENT_MASK;
if (pte_access & ACC_USER_MASK)
spte |= PT_USER_MASK;
+ if (largepage)
+ spte |= PT_PAGE_SIZE_MASK;
- if (is_error_page(page)) {
- set_shadow_pte(shadow_pte,
- shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
- kvm_release_page_clean(page);
- return;
- }
-
- spte |= page_to_phys(page);
+ spte |= (u64)pfn << PAGE_SHIFT;
if ((pte_access & ACC_WRITE_MASK)
|| (write_fault && !is_write_protection(vcpu) && !user_fault)) {
@@ -946,9 +1100,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
}
shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
- if (shadow) {
+ if (shadow ||
+ (largepage && has_wrprotected_page(vcpu->kvm, gfn))) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
- __FUNCTION__, gfn);
+ __func__, gfn);
pte_access &= ~ACC_WRITE_MASK;
if (is_writeble_pte(spte)) {
spte &= ~PT_WRITABLE_MASK;
@@ -964,18 +1119,25 @@ unshadowed:
if (pte_access & ACC_WRITE_MASK)
mark_page_dirty(vcpu->kvm, gfn);
- pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
+ pgprintk("%s: setting spte %llx\n", __func__, spte);
+ pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n",
+ (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB",
+ (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte);
set_shadow_pte(shadow_pte, spte);
+ if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK)
+ && (spte & PT_PRESENT_MASK))
+ ++vcpu->kvm->stat.lpages;
+
page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
if (!was_rmapped) {
- rmap_add(vcpu, shadow_pte, gfn);
+ rmap_add(vcpu, shadow_pte, gfn, largepage);
if (!is_rmap_pte(*shadow_pte))
- kvm_release_page_clean(page);
+ kvm_release_pfn_clean(pfn);
} else {
if (was_writeble)
- kvm_release_page_dirty(page);
+ kvm_release_pfn_dirty(pfn);
else
- kvm_release_page_clean(page);
+ kvm_release_pfn_clean(pfn);
}
if (!ptwrite || !*ptwrite)
vcpu->arch.last_pte_updated = shadow_pte;
@@ -985,10 +1147,10 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
-static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
- gfn_t gfn, struct page *page)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+ int largepage, gfn_t gfn, pfn_t pfn,
+ int level)
{
- int level = PT32E_ROOT_LEVEL;
hpa_t table_addr = vcpu->arch.mmu.root_hpa;
int pt_write = 0;
@@ -1001,8 +1163,14 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
if (level == 1) {
mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
- 0, write, 1, &pt_write, gfn, page);
- return pt_write || is_io_pte(table[index]);
+ 0, write, 1, &pt_write, 0, gfn, pfn, false);
+ return pt_write;
+ }
+
+ if (largepage && level == 2) {
+ mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
+ 0, write, 1, &pt_write, 1, gfn, pfn, false);
+ return pt_write;
}
if (table[index] == shadow_trap_nonpresent_pte) {
@@ -1016,7 +1184,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
1, ACC_ALL, &table[index]);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
- kvm_release_page_clean(page);
+ kvm_release_pfn_clean(pfn);
return -ENOMEM;
}
@@ -1030,21 +1198,30 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{
int r;
-
- struct page *page;
-
- down_read(&vcpu->kvm->slots_lock);
+ int largepage = 0;
+ pfn_t pfn;
down_read(&current->mm->mmap_sem);
- page = gfn_to_page(vcpu->kvm, gfn);
+ if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
+ gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+ largepage = 1;
+ }
+
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);
+ /* mmio */
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ return 1;
+ }
+
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
- r = __nonpaging_map(vcpu, v, write, gfn, page);
+ r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
+ PT32E_ROOT_LEVEL);
spin_unlock(&vcpu->kvm->mmu_lock);
- up_read(&vcpu->kvm->slots_lock);
return r;
}
@@ -1073,6 +1250,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
sp = page_header(root);
--sp->root_count;
+ if (!sp->root_count && sp->role.invalid)
+ kvm_mmu_zap_page(vcpu->kvm, sp);
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
spin_unlock(&vcpu->kvm->mmu_lock);
return;
@@ -1085,6 +1264,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
root &= PT64_BASE_ADDR_MASK;
sp = page_header(root);
--sp->root_count;
+ if (!sp->root_count && sp->role.invalid)
+ kvm_mmu_zap_page(vcpu->kvm, sp);
}
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
}
@@ -1097,6 +1278,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
int i;
gfn_t root_gfn;
struct kvm_mmu_page *sp;
+ int metaphysical = 0;
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
@@ -1105,14 +1287,20 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
hpa_t root = vcpu->arch.mmu.root_hpa;
ASSERT(!VALID_PAGE(root));
+ if (tdp_enabled)
+ metaphysical = 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
+ PT64_ROOT_LEVEL, metaphysical,
+ ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.root_hpa = root;
return;
}
#endif
+ metaphysical = !is_paging(vcpu);
+ if (tdp_enabled)
+ metaphysical = 1;
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
@@ -1126,7 +1314,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
} else if (vcpu->arch.mmu.root_level == 0)
root_gfn = 0;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
- PT32_ROOT_LEVEL, !is_paging(vcpu),
+ PT32_ROOT_LEVEL, metaphysical,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
@@ -1146,7 +1334,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
gfn_t gfn;
int r;
- pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code);
+ pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
@@ -1160,6 +1348,41 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
error_code & PFERR_WRITE_MASK, gfn);
}
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
+ u32 error_code)
+{
+ pfn_t pfn;
+ int r;
+ int largepage = 0;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+
+ ASSERT(vcpu);
+ ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+
+ r = mmu_topup_memory_caches(vcpu);
+ if (r)
+ return r;
+
+ down_read(&current->mm->mmap_sem);
+ if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
+ gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+ largepage = 1;
+ }
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
+ up_read(&current->mm->mmap_sem);
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ return 1;
+ }
+ spin_lock(&vcpu->kvm->mmu_lock);
+ kvm_mmu_free_some_pages(vcpu);
+ r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
+ largepage, gfn, pfn, TDP_ROOT_LEVEL);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+
+ return r;
+}
+
static void nonpaging_free(struct kvm_vcpu *vcpu)
{
mmu_free_roots(vcpu);
@@ -1188,7 +1411,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
static void paging_new_cr3(struct kvm_vcpu *vcpu)
{
- pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3);
+ pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3);
mmu_free_roots(vcpu);
}
@@ -1253,7 +1476,35 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu)
return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
}
-static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu *context = &vcpu->arch.mmu;
+
+ context->new_cr3 = nonpaging_new_cr3;
+ context->page_fault = tdp_page_fault;
+ context->free = nonpaging_free;
+ context->prefetch_page = nonpaging_prefetch_page;
+ context->shadow_root_level = TDP_ROOT_LEVEL;
+ context->root_hpa = INVALID_PAGE;
+
+ if (!is_paging(vcpu)) {
+ context->gva_to_gpa = nonpaging_gva_to_gpa;
+ context->root_level = 0;
+ } else if (is_long_mode(vcpu)) {
+ context->gva_to_gpa = paging64_gva_to_gpa;
+ context->root_level = PT64_ROOT_LEVEL;
+ } else if (is_pae(vcpu)) {
+ context->gva_to_gpa = paging64_gva_to_gpa;
+ context->root_level = PT32E_ROOT_LEVEL;
+ } else {
+ context->gva_to_gpa = paging32_gva_to_gpa;
+ context->root_level = PT32_ROOT_LEVEL;
+ }
+
+ return 0;
+}
+
+static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1268,6 +1519,16 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
return paging32_init_context(vcpu);
}
+static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.update_pte.pfn = bad_pfn;
+
+ if (tdp_enabled)
+ return init_kvm_tdp_mmu(vcpu);
+ else
+ return init_kvm_softmmu(vcpu);
+}
+
static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
{
ASSERT(vcpu);
@@ -1316,7 +1577,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
pte = *spte;
if (is_shadow_present_pte(pte)) {
- if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+ if (sp->role.level == PT_PAGE_TABLE_LEVEL ||
+ is_large_pte(pte))
rmap_remove(vcpu->kvm, spte);
else {
child = page_header(pte & PT64_BASE_ADDR_MASK);
@@ -1324,24 +1586,26 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
}
}
set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+ if (is_large_pte(pte))
+ --vcpu->kvm->stat.lpages;
}
static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
u64 *spte,
- const void *new, int bytes,
- int offset_in_pte)
+ const void *new)
{
- if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
+ if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
+ && !vcpu->arch.update_pte.largepage) {
++vcpu->kvm->stat.mmu_pde_zapped;
return;
}
++vcpu->kvm->stat.mmu_pte_updated;
if (sp->role.glevels == PT32_ROOT_LEVEL)
- paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
+ paging32_update_pte(vcpu, sp, spte, new);
else
- paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
+ paging64_update_pte(vcpu, sp, spte, new);
}
static bool need_remote_flush(u64 old, u64 new)
@@ -1378,7 +1642,9 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn_t gfn;
int r;
u64 gpte = 0;
- struct page *page;
+ pfn_t pfn;
+
+ vcpu->arch.update_pte.largepage = 0;
if (bytes != 4 && bytes != 8)
return;
@@ -1408,11 +1674,19 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
down_read(&current->mm->mmap_sem);
- page = gfn_to_page(vcpu->kvm, gfn);
+ if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
+ gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+ vcpu->arch.update_pte.largepage = 1;
+ }
+ pfn = gfn_to_pfn(vcpu->kvm, gfn);
up_read(&current->mm->mmap_sem);
+ if (is_error_pfn(pfn)) {
+ kvm_release_pfn_clean(pfn);
+ return;
+ }
vcpu->arch.update_pte.gfn = gfn;
- vcpu->arch.update_pte.page = page;
+ vcpu->arch.update_pte.pfn = pfn;
}
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1423,7 +1697,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
struct hlist_node *node, *n;
struct hlist_head *bucket;
unsigned index;
- u64 entry;
+ u64 entry, gentry;
u64 *spte;
unsigned offset = offset_in_page(gpa);
unsigned pte_size;
@@ -1433,8 +1707,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int level;
int flooded = 0;
int npte;
+ int r;
- pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
+ pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
@@ -1450,7 +1725,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
vcpu->arch.last_pt_write_count = 1;
vcpu->arch.last_pte_updated = NULL;
}
- index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+ index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
if (sp->gfn != gfn || sp->role.metaphysical)
@@ -1496,20 +1771,29 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
continue;
}
spte = &sp->spt[page_offset / sizeof(*spte)];
+ if ((gpa & (pte_size - 1)) || (bytes < pte_size)) {
+ gentry = 0;
+ r = kvm_read_guest_atomic(vcpu->kvm,
+ gpa & ~(u64)(pte_size - 1),
+ &gentry, pte_size);
+ new = (const void *)&gentry;
+ if (r < 0)
+ new = NULL;
+ }
while (npte--) {
entry = *spte;
mmu_pte_write_zap_pte(vcpu, sp, spte);
- mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes,
- page_offset & (pte_size - 1));
+ if (new)
+ mmu_pte_write_new_pte(vcpu, sp, spte, new);
mmu_pte_write_flush_tlb(vcpu, entry, *spte);
++spte;
}
}
kvm_mmu_audit(vcpu, "post pte write");
spin_unlock(&vcpu->kvm->mmu_lock);
- if (vcpu->arch.update_pte.page) {
- kvm_release_page_clean(vcpu->arch.update_pte.page);
- vcpu->arch.update_pte.page = NULL;
+ if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
+ kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
+ vcpu->arch.update_pte.pfn = bad_pfn;
}
}
@@ -1518,9 +1802,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
gpa_t gpa;
int r;
- down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
- up_read(&vcpu->kvm->slots_lock);
spin_lock(&vcpu->kvm->mmu_lock);
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -1577,6 +1859,12 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
+void kvm_enable_tdp(void)
+{
+ tdp_enabled = true;
+}
+EXPORT_SYMBOL_GPL(kvm_enable_tdp);
+
static void free_mmu_pages(struct kvm_vcpu *vcpu)
{
struct kvm_mmu_page *sp;
@@ -1677,7 +1965,53 @@ void kvm_mmu_zap_all(struct kvm *kvm)
kvm_flush_remote_tlbs(kvm);
}
-void kvm_mmu_module_exit(void)
+void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
+{
+ struct kvm_mmu_page *page;
+
+ page = container_of(kvm->arch.active_mmu_pages.prev,
+ struct kvm_mmu_page, link);
+ kvm_mmu_zap_page(kvm, page);
+}
+
+static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+{
+ struct kvm *kvm;
+ struct kvm *kvm_freed = NULL;
+ int cache_count = 0;
+
+ spin_lock(&kvm_lock);
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ int npages;
+
+ spin_lock(&kvm->mmu_lock);
+ npages = kvm->arch.n_alloc_mmu_pages -
+ kvm->arch.n_free_mmu_pages;
+ cache_count += npages;
+ if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
+ kvm_mmu_remove_one_alloc_mmu_page(kvm);
+ cache_count--;
+ kvm_freed = kvm;
+ }
+ nr_to_scan--;
+
+ spin_unlock(&kvm->mmu_lock);
+ }
+ if (kvm_freed)
+ list_move_tail(&kvm_freed->vm_list, &vm_list);
+
+ spin_unlock(&kvm_lock);
+
+ return cache_count;
+}
+
+static struct shrinker mmu_shrinker = {
+ .shrink = mmu_shrink,
+ .seeks = DEFAULT_SEEKS * 10,
+};
+
+void mmu_destroy_caches(void)
{
if (pte_chain_cache)
kmem_cache_destroy(pte_chain_cache);
@@ -1687,6 +2021,12 @@ void kvm_mmu_module_exit(void)
kmem_cache_destroy(mmu_page_header_cache);
}
+void kvm_mmu_module_exit(void)
+{
+ mmu_destroy_caches();
+ unregister_shrinker(&mmu_shrinker);
+}
+
int kvm_mmu_module_init(void)
{
pte_chain_cache = kmem_cache_create("kvm_pte_chain",
@@ -1706,10 +2046,12 @@ int kvm_mmu_module_init(void)
if (!mmu_page_header_cache)
goto nomem;
+ register_shrinker(&mmu_shrinker);
+
return 0;
nomem:
- kvm_mmu_module_exit();
+ mmu_destroy_caches();
return -ENOMEM;
}
@@ -1732,6 +2074,127 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
return nr_mmu_pages;
}
+static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
+ unsigned len)
+{
+ if (len > buffer->len)
+ return NULL;
+ return buffer->ptr;
+}
+
+static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
+ unsigned len)
+{
+ void *ret;
+
+ ret = pv_mmu_peek_buffer(buffer, len);
+ if (!ret)
+ return ret;
+ buffer->ptr += len;
+ buffer->len -= len;
+ buffer->processed += len;
+ return ret;
+}
+
+static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
+ gpa_t addr, gpa_t value)
+{
+ int bytes = 8;
+ int r;
+
+ if (!is_long_mode(vcpu) && !is_pae(vcpu))
+ bytes = 4;
+
+ r = mmu_topup_memory_caches(vcpu);
+ if (r)
+ return r;
+
+ if (!emulator_write_phys(vcpu, addr, &value, bytes))
+ return -EFAULT;
+
+ return 1;
+}
+
+static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ kvm_x86_ops->tlb_flush(vcpu);
+ return 1;
+}
+
+static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+ spin_lock(&vcpu->kvm->mmu_lock);
+ mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ return 1;
+}
+
+static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
+ struct kvm_pv_mmu_op_buffer *buffer)
+{
+ struct kvm_mmu_op_header *header;
+
+ header = pv_mmu_peek_buffer(buffer, sizeof *header);
+ if (!header)
+ return 0;
+ switch (header->op) {
+ case KVM_MMU_OP_WRITE_PTE: {
+ struct kvm_mmu_op_write_pte *wpte;
+
+ wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
+ if (!wpte)
+ return 0;
+ return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
+ wpte->pte_val);
+ }
+ case KVM_MMU_OP_FLUSH_TLB: {
+ struct kvm_mmu_op_flush_tlb *ftlb;
+
+ ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
+ if (!ftlb)
+ return 0;
+ return kvm_pv_mmu_flush_tlb(vcpu);
+ }
+ case KVM_MMU_OP_RELEASE_PT: {
+ struct kvm_mmu_op_release_pt *rpt;
+
+ rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
+ if (!rpt)
+ return 0;
+ return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
+ }
+ default: return 0;
+ }
+}
+
+int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
+ gpa_t addr, unsigned long *ret)
+{
+ int r;
+ struct kvm_pv_mmu_op_buffer buffer;
+
+ buffer.ptr = buffer.buf;
+ buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
+ buffer.processed = 0;
+
+ r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
+ if (r)
+ goto out;
+
+ while (buffer.len) {
+ r = kvm_pv_mmu_op_one(vcpu, &buffer);
+ if (r < 0)
+ goto out;
+ if (r == 0)
+ break;
+ }
+
+ r = 1;
+out:
+ *ret = buffer.processed;
+ return r;
+}
+
#ifdef AUDIT
static const char *audit_msg;
@@ -1768,8 +2231,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
audit_mappings_page(vcpu, ent, va, level - 1);
} else {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
- struct page *page = gpa_to_page(vcpu, gpa);
- hpa_t hpa = page_to_phys(page);
+ hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
if (is_shadow_present_pte(ent)
&& (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -1782,7 +2244,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
&& !is_error_hpa(hpa))
printk(KERN_ERR "audit: (%s) notrap shadow,"
" valid guest gva %lx\n", audit_msg, va);
- kvm_release_page_clean(page);
+ kvm_release_pfn_clean(pfn);
}
}
@@ -1867,7 +2329,7 @@ static void audit_rmap(struct kvm_vcpu *vcpu)
if (n_rmap != n_actual)
printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
- __FUNCTION__, audit_msg, n_rmap, n_actual);
+ __func__, audit_msg, n_rmap, n_actual);
}
static void audit_write_protection(struct kvm_vcpu *vcpu)
@@ -1887,7 +2349,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
if (*rmapp)
printk(KERN_ERR "%s: (%s) shadow page has writable"
" mappings: gfn %lx role %x\n",
- __FUNCTION__, audit_msg, sp->gfn,
+ __func__, audit_msg, sp->gfn,
sp->role.word);
}
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 1fce19ec7a23..e64e9f56a65e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -3,6 +3,12 @@
#include <linux/kvm_host.h>
+#ifdef CONFIG_X86_64
+#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL
+#else
+#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL
+#endif
+
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
{
if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ecc0856268c4..156fe10288ae 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
unsigned index, pt_access, pte_access;
gpa_t pte_gpa;
- pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
+ pgprintk("%s: addr %lx\n", __func__, addr);
walk:
walker->level = vcpu->arch.mmu.root_level;
pte = vcpu->arch.cr3;
@@ -155,7 +155,7 @@ walk:
pte_gpa += index * sizeof(pt_element_t);
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
- pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+ pgprintk("%s: table_gfn[%d] %lx\n", __func__,
walker->level - 1, table_gfn);
kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
@@ -222,7 +222,7 @@ walk:
walker->pt_access = pt_access;
walker->pte_access = pte_access;
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
- __FUNCTION__, (u64)pte, pt_access, pte_access);
+ __func__, (u64)pte, pt_access, pte_access);
return 1;
not_present:
@@ -243,31 +243,30 @@ err:
}
static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
- u64 *spte, const void *pte, int bytes,
- int offset_in_pte)
+ u64 *spte, const void *pte)
{
pt_element_t gpte;
unsigned pte_access;
- struct page *npage;
+ pfn_t pfn;
+ int largepage = vcpu->arch.update_pte.largepage;
gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
- if (!offset_in_pte && !is_present_pte(gpte))
+ if (!is_present_pte(gpte))
set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
return;
}
- if (bytes < sizeof(pt_element_t))
- return;
- pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
+ pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
return;
- npage = vcpu->arch.update_pte.page;
- if (!npage)
+ pfn = vcpu->arch.update_pte.pfn;
+ if (is_error_pfn(pfn))
return;
- get_page(npage);
+ kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
- gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage);
+ gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
+ pfn, true);
}
/*
@@ -275,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
*/
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *walker,
- int user_fault, int write_fault, int *ptwrite,
- struct page *page)
+ int user_fault, int write_fault, int largepage,
+ int *ptwrite, pfn_t pfn)
{
hpa_t shadow_addr;
int level;
@@ -304,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
shadow_ent = ((u64 *)__va(shadow_addr)) + index;
if (level == PT_PAGE_TABLE_LEVEL)
break;
- if (is_shadow_present_pte(*shadow_ent)) {
+
+ if (largepage && level == PT_DIRECTORY_LEVEL)
+ break;
+
+ if (is_shadow_present_pte(*shadow_ent)
+ && !is_large_pte(*shadow_ent)) {
shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
continue;
}
+ if (is_large_pte(*shadow_ent))
+ rmap_remove(vcpu->kvm, shadow_ent);
+
if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
@@ -329,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
walker->pte_gpa[level - 2],
&curr_pte, sizeof(curr_pte));
if (r || curr_pte != walker->ptes[level - 2]) {
- kvm_release_page_clean(page);
+ kvm_release_pfn_clean(pfn);
return NULL;
}
}
@@ -342,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
user_fault, write_fault,
walker->ptes[walker->level-1] & PT_DIRTY_MASK,
- ptwrite, walker->gfn, page);
+ ptwrite, largepage, walker->gfn, pfn, false);
return shadow_ent;
}
@@ -371,16 +378,16 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
u64 *shadow_pte;
int write_pt = 0;
int r;
- struct page *page;
+ pfn_t pfn;
+ int largepage = 0;
- pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
+ pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
kvm_mmu_audit(vcpu, "pre page fault");
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
- down_read(&vcpu->kvm->slots_lock);
/*
* Look up the shadow pte for the faulting address.
*/
@@ -391,40 +398,45 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!r) {
- pgprintk("%s: guest page fault\n", __FUNCTION__);
+ pgprintk("%s: guest page fault\n", __func__);
inject_page_fault(vcpu, addr, walker.error_code);
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
- up_read(&vcpu->kvm->slots_lock);
return 0;
}
down_read(&current->mm->mmap_sem);
- page = gfn_to_page(vcpu->kvm, walker.gfn);
+ if (walker.level == PT_DIRECTORY_LEVEL) {
+ gfn_t large_gfn;
+ large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
+ if (is_largepage_backed(vcpu, large_gfn)) {
+ walker.gfn = large_gfn;
+ largepage = 1;
+ }
+ }
+ pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
up_read(&current->mm->mmap_sem);
+ /* mmio */
+ if (is_error_pfn(pfn)) {
+ pgprintk("gfn %x is mmio\n", walker.gfn);
+ kvm_release_pfn_clean(pfn);
+ return 1;
+ }
+
spin_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_free_some_pages(vcpu);
shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
- &write_pt, page);
- pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
+ largepage, &write_pt, pfn);
+
+ pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
shadow_pte, *shadow_pte, write_pt);
if (!write_pt)
vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
- /*
- * mmio: emulate if accessible, otherwise its a guest fault.
- */
- if (shadow_pte && is_io_pte(*shadow_pte)) {
- spin_unlock(&vcpu->kvm->mmu_lock);
- up_read(&vcpu->kvm->slots_lock);
- return 1;
- }
-
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
spin_unlock(&vcpu->kvm->mmu_lock);
- up_read(&vcpu->kvm->slots_lock);
return write_pt;
}
diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h
deleted file mode 100644
index 56fc4c873389..000000000000
--- a/arch/x86/kvm/segment_descriptor.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef __SEGMENT_DESCRIPTOR_H
-#define __SEGMENT_DESCRIPTOR_H
-
-struct segment_descriptor {
- u16 limit_low;
- u16 base_low;
- u8 base_mid;
- u8 type : 4;
- u8 system : 1;
- u8 dpl : 2;
- u8 present : 1;
- u8 limit_high : 4;
- u8 avl : 1;
- u8 long_mode : 1;
- u8 default_op : 1;
- u8 granularity : 1;
- u8 base_high;
-} __attribute__((packed));
-
-#ifdef CONFIG_X86_64
-/* LDT or TSS descriptor in the GDT. 16 bytes. */
-struct segment_descriptor_64 {
- struct segment_descriptor s;
- u32 base_higher;
- u32 pad_zero;
-};
-
-#endif
-#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1a582f1090e8..89e0be2c10d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -47,6 +47,18 @@ MODULE_LICENSE("GPL");
#define SVM_FEATURE_LBRV (1 << 1)
#define SVM_DEATURE_SVML (1 << 2)
+#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+
+/* enable NPT for AMD64 and X86 with PAE */
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static bool npt_enabled = true;
+#else
+static bool npt_enabled = false;
+#endif
+static int npt = 1;
+
+module_param(npt, int, S_IRUGO);
+
static void kvm_reput_irq(struct vcpu_svm *svm);
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
@@ -54,8 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
return container_of(vcpu, struct vcpu_svm, vcpu);
}
-unsigned long iopm_base;
-unsigned long msrpm_base;
+static unsigned long iopm_base;
struct kvm_ldttss_desc {
u16 limit0;
@@ -182,7 +193,7 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
- if (!(efer & EFER_LMA))
+ if (!npt_enabled && !(efer & EFER_LMA))
efer &= ~EFER_LME;
to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
@@ -219,12 +230,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if (!svm->next_rip) {
- printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
+ printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
}
if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
- __FUNCTION__,
+ __func__,
svm->vmcb->save.rip,
svm->next_rip);
@@ -279,11 +290,7 @@ static void svm_hardware_enable(void *garbage)
struct svm_cpu_data *svm_data;
uint64_t efer;
-#ifdef CONFIG_X86_64
- struct desc_ptr gdt_descr;
-#else
struct desc_ptr gdt_descr;
-#endif
struct desc_struct *gdt;
int me = raw_smp_processor_id();
@@ -302,7 +309,6 @@ static void svm_hardware_enable(void *garbage)
svm_data->asid_generation = 1;
svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
svm_data->next_asid = svm_data->max_asid + 1;
- svm_features = cpuid_edx(SVM_CPUID_FUNC);
asm volatile ("sgdt %0" : "=m"(gdt_descr));
gdt = (struct desc_struct *)gdt_descr.address;
@@ -361,12 +367,51 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
BUG();
}
+static void svm_vcpu_init_msrpm(u32 *msrpm)
+{
+ memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
+
+#ifdef CONFIG_X86_64
+ set_msr_interception(msrpm, MSR_GS_BASE, 1, 1);
+ set_msr_interception(msrpm, MSR_FS_BASE, 1, 1);
+ set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1);
+ set_msr_interception(msrpm, MSR_LSTAR, 1, 1);
+ set_msr_interception(msrpm, MSR_CSTAR, 1, 1);
+ set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1);
+#endif
+ set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
+ set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
+ set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
+ set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
+}
+
+static void svm_enable_lbrv(struct vcpu_svm *svm)
+{
+ u32 *msrpm = svm->msrpm;
+
+ svm->vmcb->control.lbr_ctl = 1;
+ set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+ set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+ set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+ set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+}
+
+static void svm_disable_lbrv(struct vcpu_svm *svm)
+{
+ u32 *msrpm = svm->msrpm;
+
+ svm->vmcb->control.lbr_ctl = 0;
+ set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
+ set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
+ set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
+ set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
struct page *iopm_pages;
- struct page *msrpm_pages;
- void *iopm_va, *msrpm_va;
+ void *iopm_va;
int r;
iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
@@ -379,41 +424,33 @@ static __init int svm_hardware_setup(void)
clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+ if (boot_cpu_has(X86_FEATURE_NX))
+ kvm_enable_efer_bits(EFER_NX);
- msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+ for_each_online_cpu(cpu) {
+ r = svm_cpu_init(cpu);
+ if (r)
+ goto err;
+ }
- r = -ENOMEM;
- if (!msrpm_pages)
- goto err_1;
+ svm_features = cpuid_edx(SVM_CPUID_FUNC);
- msrpm_va = page_address(msrpm_pages);
- memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
- msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT;
+ if (!svm_has(SVM_FEATURE_NPT))
+ npt_enabled = false;
-#ifdef CONFIG_X86_64
- set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1);
- set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1);
- set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1);
- set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1);
- set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1);
- set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1);
-#endif
- set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1);
- set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1);
- set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1);
- set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1);
+ if (npt_enabled && !npt) {
+ printk(KERN_INFO "kvm: Nested Paging disabled\n");
+ npt_enabled = false;
+ }
- for_each_online_cpu(cpu) {
- r = svm_cpu_init(cpu);
- if (r)
- goto err_2;
+ if (npt_enabled) {
+ printk(KERN_INFO "kvm: Nested Paging enabled\n");
+ kvm_enable_tdp();
}
+
return 0;
-err_2:
- __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
- msrpm_base = 0;
-err_1:
+err:
__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
iopm_base = 0;
return r;
@@ -421,9 +458,8 @@ err_1:
static __exit void svm_hardware_unsetup(void)
{
- __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER);
__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
- iopm_base = msrpm_base = 0;
+ iopm_base = 0;
}
static void init_seg(struct vmcb_seg *seg)
@@ -443,15 +479,14 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
seg->base = 0;
}
-static void init_vmcb(struct vmcb *vmcb)
+static void init_vmcb(struct vcpu_svm *svm)
{
- struct vmcb_control_area *control = &vmcb->control;
- struct vmcb_save_area *save = &vmcb->save;
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ struct vmcb_save_area *save = &svm->vmcb->save;
control->intercept_cr_read = INTERCEPT_CR0_MASK |
INTERCEPT_CR3_MASK |
- INTERCEPT_CR4_MASK |
- INTERCEPT_CR8_MASK;
+ INTERCEPT_CR4_MASK;
control->intercept_cr_write = INTERCEPT_CR0_MASK |
INTERCEPT_CR3_MASK |
@@ -471,23 +506,13 @@ static void init_vmcb(struct vmcb *vmcb)
INTERCEPT_DR7_MASK;
control->intercept_exceptions = (1 << PF_VECTOR) |
- (1 << UD_VECTOR);
+ (1 << UD_VECTOR) |
+ (1 << MC_VECTOR);
control->intercept = (1ULL << INTERCEPT_INTR) |
(1ULL << INTERCEPT_NMI) |
(1ULL << INTERCEPT_SMI) |
- /*
- * selective cr0 intercept bug?
- * 0: 0f 22 d8 mov %eax,%cr3
- * 3: 0f 20 c0 mov %cr0,%eax
- * 6: 0d 00 00 00 80 or $0x80000000,%eax
- * b: 0f 22 c0 mov %eax,%cr0
- * set cr3 ->interception
- * get cr0 ->interception
- * set cr0 -> no interception
- */
- /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */
(1ULL << INTERCEPT_CPUID) |
(1ULL << INTERCEPT_INVD) |
(1ULL << INTERCEPT_HLT) |
@@ -508,7 +533,7 @@ static void init_vmcb(struct vmcb *vmcb)
(1ULL << INTERCEPT_MWAIT);
control->iopm_base_pa = iopm_base;
- control->msrpm_base_pa = msrpm_base;
+ control->msrpm_base_pa = __pa(svm->msrpm);
control->tsc_offset = 0;
control->int_ctl = V_INTR_MASKING_MASK;
@@ -550,13 +575,30 @@ static void init_vmcb(struct vmcb *vmcb)
save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
save->cr4 = X86_CR4_PAE;
/* rdx = ?? */
+
+ if (npt_enabled) {
+ /* Setup VMCB for Nested Paging */
+ control->nested_ctl = 1;
+ control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH);
+ control->intercept_exceptions &= ~(1 << PF_VECTOR);
+ control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
+ INTERCEPT_CR3_MASK);
+ control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
+ INTERCEPT_CR3_MASK);
+ save->g_pat = 0x0007040600070406ULL;
+ /* enable caching because the QEMU Bios doesn't enable it */
+ save->cr0 = X86_CR0_ET;
+ save->cr3 = 0;
+ save->cr4 = 0;
+ }
+ force_new_asid(&svm->vcpu);
}
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- init_vmcb(svm->vmcb);
+ init_vmcb(svm);
if (vcpu->vcpu_id != 0) {
svm->vmcb->save.rip = 0;
@@ -571,6 +613,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
{
struct vcpu_svm *svm;
struct page *page;
+ struct page *msrpm_pages;
int err;
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -589,12 +632,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
goto uninit;
}
+ err = -ENOMEM;
+ msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+ if (!msrpm_pages)
+ goto uninit;
+ svm->msrpm = page_address(msrpm_pages);
+ svm_vcpu_init_msrpm(svm->msrpm);
+
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
memset(svm->db_regs, 0, sizeof(svm->db_regs));
- init_vmcb(svm->vmcb);
+ init_vmcb(svm);
fx_init(&svm->vcpu);
svm->vcpu.fpu_active = 1;
@@ -617,6 +667,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+ __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -731,6 +782,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->unusable = !var->present;
}
+static int svm_get_cpl(struct kvm_vcpu *vcpu)
+{
+ struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
+
+ return save->cpl;
+}
+
static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -784,6 +842,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
}
#endif
+ if (npt_enabled)
+ goto set;
+
if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
vcpu->fpu_active = 1;
@@ -791,18 +852,29 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
vcpu->arch.cr0 = cr0;
cr0 |= X86_CR0_PG | X86_CR0_WP;
- cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
if (!vcpu->fpu_active) {
svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
cr0 |= X86_CR0_TS;
}
+set:
+ /*
+ * re-enable caching here because the QEMU bios
+ * does not do it - this results in some delay at
+ * reboot
+ */
+ cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
}
static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
- vcpu->arch.cr4 = cr4;
- to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
+ unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
+
+ vcpu->arch.cr4 = cr4;
+ if (!npt_enabled)
+ cr4 |= X86_CR4_PAE;
+ cr4 |= host_cr4_mce;
+ to_svm(vcpu)->vmcb->save.cr4 = cr4;
}
static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -833,13 +905,6 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
}
-/* FIXME:
-
- svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK;
- svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
-
-*/
-
static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
{
return -EOPNOTSUPP;
@@ -920,7 +985,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
}
default:
printk(KERN_DEBUG "%s: unexpected dr %u\n",
- __FUNCTION__, dr);
+ __func__, dr);
*exception = UD_VECTOR;
return;
}
@@ -962,6 +1027,19 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
+static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ /*
+ * On an #MC intercept the MCE handler is not called automatically in
+ * the host. So do it by hand here.
+ */
+ asm volatile (
+ "int $0x12\n");
+ /* not sure if we ever come back to this point */
+
+ return 1;
+}
+
static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
/*
@@ -969,7 +1047,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
* so reinitialize it.
*/
clear_page(svm->vmcb);
- init_vmcb(svm->vmcb);
+ init_vmcb(svm);
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
return 0;
@@ -1033,9 +1111,18 @@ static int invalid_op_interception(struct vcpu_svm *svm,
static int task_switch_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
- pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__);
- kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
- return 0;
+ u16 tss_selector;
+
+ tss_selector = (u16)svm->vmcb->control.exit_info_1;
+ if (svm->vmcb->control.exit_info_2 &
+ (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
+ return kvm_task_switch(&svm->vcpu, tss_selector,
+ TASK_SWITCH_IRET);
+ if (svm->vmcb->control.exit_info_2 &
+ (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
+ return kvm_task_switch(&svm->vcpu, tss_selector,
+ TASK_SWITCH_JMP);
+ return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
}
static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1049,7 +1136,7 @@ static int emulate_on_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
- pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__);
+ pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
return 1;
}
@@ -1179,8 +1266,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
svm->vmcb->save.sysenter_esp = data;
break;
case MSR_IA32_DEBUGCTLMSR:
- pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
- __FUNCTION__, data);
+ if (!svm_has(SVM_FEATURE_LBRV)) {
+ pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
+ __func__, data);
+ break;
+ }
+ if (data & DEBUGCTL_RESERVED_BITS)
+ return 1;
+
+ svm->vmcb->save.dbgctl = data;
+ if (data & (1ULL<<0))
+ svm_enable_lbrv(svm);
+ else
+ svm_disable_lbrv(svm);
break;
case MSR_K7_EVNTSEL0:
case MSR_K7_EVNTSEL1:
@@ -1265,6 +1363,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
+ [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
[SVM_EXIT_INTR] = nop_on_interception,
[SVM_EXIT_NMI] = nop_on_interception,
[SVM_EXIT_SMI] = nop_on_interception,
@@ -1290,14 +1389,34 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
[SVM_EXIT_MWAIT] = invalid_op_interception,
+ [SVM_EXIT_NPF] = pf_interception,
};
-
static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u32 exit_code = svm->vmcb->control.exit_code;
+ if (npt_enabled) {
+ int mmu_reload = 0;
+ if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
+ svm_set_cr0(vcpu, svm->vmcb->save.cr0);
+ mmu_reload = 1;
+ }
+ vcpu->arch.cr0 = svm->vmcb->save.cr0;
+ vcpu->arch.cr3 = svm->vmcb->save.cr3;
+ if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
+ if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ }
+ if (mmu_reload) {
+ kvm_mmu_reset_context(vcpu);
+ kvm_mmu_load(vcpu);
+ }
+ }
+
kvm_reput_irq(svm);
if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
@@ -1308,10 +1427,11 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
}
if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
- exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR)
+ exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
+ exit_code != SVM_EXIT_NPF)
printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
"exit_code 0x%x\n",
- __FUNCTION__, svm->vmcb->control.exit_int_info,
+ __func__, svm->vmcb->control.exit_int_info,
exit_code);
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
@@ -1364,6 +1484,27 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
svm_inject_irq(svm, irq);
}
+static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb *vmcb = svm->vmcb;
+ int max_irr, tpr;
+
+ if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
+ return;
+
+ vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
+
+ max_irr = kvm_lapic_find_highest_irr(vcpu);
+ if (max_irr == -1)
+ return;
+
+ tpr = kvm_lapic_get_cr8(vcpu) << 4;
+
+ if (tpr >= (max_irr & 0xf0))
+ vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+}
+
static void svm_intr_assist(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1376,14 +1517,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
SVM_EVTINJ_VEC_MASK;
vmcb->control.exit_int_info = 0;
svm_inject_irq(svm, intr_vector);
- return;
+ goto out;
}
if (vmcb->control.int_ctl & V_IRQ_MASK)
- return;
+ goto out;
if (!kvm_cpu_has_interrupt(vcpu))
- return;
+ goto out;
if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
@@ -1391,12 +1532,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
/* unable to deliver irq, set pending irq */
vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
svm_inject_irq(svm, 0x0);
- return;
+ goto out;
}
/* Okay, we can deliver the interrupt: grab it and update PIC state. */
intr_vector = kvm_cpu_get_interrupt(vcpu);
svm_inject_irq(svm, intr_vector);
kvm_timer_intr_post(vcpu, intr_vector);
+out:
+ update_cr8_intercept(vcpu);
}
static void kvm_reput_irq(struct vcpu_svm *svm)
@@ -1482,6 +1625,29 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
{
}
+static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
+ int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
+ kvm_lapic_set_tpr(vcpu, cr8);
+ }
+}
+
+static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 cr8;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return;
+
+ cr8 = kvm_get_cr8(vcpu);
+ svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
+ svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
+}
+
static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1491,6 +1657,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
pre_svm_run(svm);
+ sync_lapic_to_cr8(vcpu);
+
save_host_msrs(vcpu);
fs_selector = read_fs();
gs_selector = read_gs();
@@ -1499,6 +1667,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
svm->host_dr6 = read_dr6();
svm->host_dr7 = read_dr7();
svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ /* required for live migration with NPT */
+ if (npt_enabled)
+ svm->vmcb->save.cr3 = vcpu->arch.cr3;
if (svm->vmcb->save.dr7 & 0xff) {
write_dr7(0);
@@ -1635,6 +1806,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
stgi();
+ sync_cr8_to_lapic(vcpu);
+
svm->next_rip = 0;
}
@@ -1642,6 +1815,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ if (npt_enabled) {
+ svm->vmcb->control.nested_cr3 = root;
+ force_new_asid(vcpu);
+ return;
+ }
+
svm->vmcb->save.cr3 = root;
force_new_asid(vcpu);
@@ -1709,6 +1888,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.get_segment_base = svm_get_segment_base,
.get_segment = svm_get_segment,
.set_segment = svm_set_segment,
+ .get_cpl = svm_get_cpl,
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
.set_cr0 = svm_set_cr0,
diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h
index 5fd50491b555..1b8afa78e869 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/kvm/svm.h
@@ -238,6 +238,9 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+
#define SVM_EXIT_READ_CR0 0x000
#define SVM_EXIT_READ_CR3 0x003
#define SVM_EXIT_READ_CR4 0x004
diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h
new file mode 100644
index 000000000000..622aa10f692f
--- /dev/null
+++ b/arch/x86/kvm/tss.h
@@ -0,0 +1,59 @@
+#ifndef __TSS_SEGMENT_H
+#define __TSS_SEGMENT_H
+
+struct tss_segment_32 {
+ u32 prev_task_link;
+ u32 esp0;
+ u32 ss0;
+ u32 esp1;
+ u32 ss1;
+ u32 esp2;
+ u32 ss2;
+ u32 cr3;
+ u32 eip;
+ u32 eflags;
+ u32 eax;
+ u32 ecx;
+ u32 edx;
+ u32 ebx;
+ u32 esp;
+ u32 ebp;
+ u32 esi;
+ u32 edi;
+ u32 es;
+ u32 cs;
+ u32 ss;
+ u32 ds;
+ u32 fs;
+ u32 gs;
+ u32 ldt_selector;
+ u16 t;
+ u16 io_map;
+};
+
+struct tss_segment_16 {
+ u16 prev_task_link;
+ u16 sp0;
+ u16 ss0;
+ u16 sp1;
+ u16 ss1;
+ u16 sp2;
+ u16 ss2;
+ u16 ip;
+ u16 flag;
+ u16 ax;
+ u16 cx;
+ u16 dx;
+ u16 bx;
+ u16 sp;
+ u16 bp;
+ u16 si;
+ u16 di;
+ u16 es;
+ u16 cs;
+ u16 ss;
+ u16 ds;
+ u16 ldt;
+};
+
+#endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8e1462880d1f..8e5d6645b90d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -17,7 +17,6 @@
#include "irq.h"
#include "vmx.h"
-#include "segment_descriptor.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@@ -37,6 +36,12 @@ MODULE_LICENSE("GPL");
static int bypass_guest_pf = 1;
module_param(bypass_guest_pf, bool, 0);
+static int enable_vpid = 1;
+module_param(enable_vpid, bool, 0);
+
+static int flexpriority_enabled = 1;
+module_param(flexpriority_enabled, bool, 0);
+
struct vmcs {
u32 revision_id;
u32 abort;
@@ -71,6 +76,7 @@ struct vcpu_vmx {
unsigned rip;
} irq;
} rmode;
+ int vpid;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -85,6 +91,10 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
static struct page *vmx_io_bitmap_a;
static struct page *vmx_io_bitmap_b;
+static struct page *vmx_msr_bitmap;
+
+static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
+static DEFINE_SPINLOCK(vmx_vpid_lock);
static struct vmcs_config {
int size;
@@ -176,6 +186,11 @@ static inline int is_external_interrupt(u32 intr_info)
== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
}
+static inline int cpu_has_vmx_msr_bitmap(void)
+{
+ return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS);
+}
+
static inline int cpu_has_vmx_tpr_shadow(void)
{
return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);
@@ -194,8 +209,9 @@ static inline int cpu_has_secondary_exec_ctrls(void)
static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
{
- return (vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+ return flexpriority_enabled
+ && (vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
}
static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
@@ -204,6 +220,12 @@ static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
(irqchip_in_kernel(kvm)));
}
+static inline int cpu_has_vmx_vpid(void)
+{
+ return (vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_ENABLE_VPID);
+}
+
static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
{
int i;
@@ -214,6 +236,20 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
return -1;
}
+static inline void __invvpid(int ext, u16 vpid, gva_t gva)
+{
+ struct {
+ u64 vpid : 16;
+ u64 rsvd : 48;
+ u64 gva;
+ } operand = { vpid, 0, gva };
+
+ asm volatile (ASM_VMX_INVVPID
+ /* CF==1 or ZF==1 --> rc = -1 */
+ "; ja 1f ; ud2 ; 1:"
+ : : "a"(&operand), "c"(ext) : "cc", "memory");
+}
+
static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
{
int i;
@@ -257,6 +293,14 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
vmx->launched = 0;
}
+static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
+{
+ if (vmx->vpid == 0)
+ return;
+
+ __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
+}
+
static unsigned long vmcs_readl(unsigned long field)
{
unsigned long value;
@@ -353,7 +397,7 @@ static void reload_tss(void)
* VT restores TR but not its size. Useless.
*/
struct descriptor_table gdt;
- struct segment_descriptor *descs;
+ struct desc_struct *descs;
get_gdt(&gdt);
descs = (void *)gdt.base;
@@ -485,11 +529,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 phys_addr = __pa(vmx->vmcs);
- u64 tsc_this, delta;
+ u64 tsc_this, delta, new_offset;
if (vcpu->cpu != cpu) {
vcpu_clear(vmx);
kvm_migrate_apic_timer(vcpu);
+ vpid_sync_vcpu_all(vmx);
}
if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
@@ -524,8 +569,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
* Make sure the time stamp counter is monotonous.
*/
rdtscll(tsc_this);
- delta = vcpu->arch.host_tsc - tsc_this;
- vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);
+ if (tsc_this < vcpu->arch.host_tsc) {
+ delta = vcpu->arch.host_tsc - tsc_this;
+ new_offset = vmcs_read64(TSC_OFFSET) + delta;
+ vmcs_write64(TSC_OFFSET, new_offset);
+ }
}
}
@@ -596,7 +644,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
{
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
nr | INTR_TYPE_EXCEPTION
- | (has_error_code ? INTR_INFO_DELIEVER_CODE_MASK : 0)
+ | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
| INTR_INFO_VALID_MASK);
if (has_error_code)
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
@@ -959,6 +1007,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING;
opt = CPU_BASED_TPR_SHADOW |
+ CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
&_cpu_based_exec_control) < 0)
@@ -971,7 +1020,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
min = 0;
opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
- SECONDARY_EXEC_WBINVD_EXITING;
+ SECONDARY_EXEC_WBINVD_EXITING |
+ SECONDARY_EXEC_ENABLE_VPID;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
return -EIO;
@@ -1080,6 +1130,10 @@ static __init int hardware_setup(void)
{
if (setup_vmcs_config(&vmcs_config) < 0)
return -EIO;
+
+ if (boot_cpu_has(X86_FEATURE_NX))
+ kvm_enable_efer_bits(EFER_NX);
+
return alloc_kvm_area();
}
@@ -1214,7 +1268,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
- __FUNCTION__);
+ __func__);
vmcs_write32(GUEST_TR_AR_BYTES,
(guest_tr_ar & ~AR_TYPE_MASK)
| AR_TYPE_BUSY_64_TSS);
@@ -1239,6 +1293,11 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
#endif
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+{
+ vpid_sync_vcpu_all(to_vmx(vcpu));
+}
+
static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK;
@@ -1275,6 +1334,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
+ vmx_flush_tlb(vcpu);
vmcs_writel(GUEST_CR3, cr3);
if (vcpu->arch.cr0 & X86_CR0_PE)
vmx_fpu_deactivate(vcpu);
@@ -1288,14 +1348,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vcpu->arch.cr4 = cr4;
}
-#ifdef CONFIG_X86_64
-
static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
vcpu->arch.shadow_efer = efer;
+ if (!msr)
+ return;
if (efer & EFER_LMA) {
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1312,8 +1372,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
setup_msrs(vmx);
}
-#endif
-
static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1344,6 +1402,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->unusable = (ar >> 16) & 1;
}
+static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment kvm_seg;
+
+ if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
+ return 0;
+
+ if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
+ return 3;
+
+ vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS);
+ return kvm_seg.selector & 3;
+}
+
static u32 vmx_segment_access_rights(struct kvm_segment *var)
{
u32 ar;
@@ -1433,7 +1505,6 @@ static int init_rmode_tss(struct kvm *kvm)
int ret = 0;
int r;
- down_read(&kvm->slots_lock);
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -1456,7 +1527,6 @@ static int init_rmode_tss(struct kvm *kvm)
ret = 1;
out:
- up_read(&kvm->slots_lock);
return ret;
}
@@ -1494,6 +1564,46 @@ out:
return r;
}
+static void allocate_vpid(struct vcpu_vmx *vmx)
+{
+ int vpid;
+
+ vmx->vpid = 0;
+ if (!enable_vpid || !cpu_has_vmx_vpid())
+ return;
+ spin_lock(&vmx_vpid_lock);
+ vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
+ if (vpid < VMX_NR_VPIDS) {
+ vmx->vpid = vpid;
+ __set_bit(vpid, vmx_vpid_bitmap);
+ }
+ spin_unlock(&vmx_vpid_lock);
+}
+
+void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
+{
+ void *va;
+
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
+ /*
+ * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+ * have the write-low and read-high bitmap offsets the wrong way round.
+ * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+ */
+ va = kmap(msr_bitmap);
+ if (msr <= 0x1fff) {
+ __clear_bit(msr, va + 0x000); /* read-low */
+ __clear_bit(msr, va + 0x800); /* write-low */
+ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+ msr &= 0x1fff;
+ __clear_bit(msr, va + 0x400); /* read-high */
+ __clear_bit(msr, va + 0xc00); /* write-high */
+ }
+ kunmap(msr_bitmap);
+}
+
/*
* Sets up the vmcs for emulated real mode.
*/
@@ -1511,6 +1621,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
+ if (cpu_has_vmx_msr_bitmap())
+ vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
+
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
/* Control */
@@ -1532,6 +1645,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
exec_control &=
~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ if (vmx->vpid == 0)
+ exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
@@ -1613,6 +1728,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
u64 msr;
int ret;
+ down_read(&vcpu->kvm->slots_lock);
if (!init_rmode_tss(vmx->vcpu.kvm)) {
ret = -ENOMEM;
goto out;
@@ -1621,7 +1737,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.rmode.active = 0;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
- set_cr8(&vmx->vcpu, 0);
+ kvm_set_cr8(&vmx->vcpu, 0);
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (vmx->vcpu.vcpu_id == 0)
msr |= MSR_IA32_APICBASE_BSP;
@@ -1704,18 +1820,22 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write64(APIC_ACCESS_ADDR,
page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
+ if (vmx->vpid != 0)
+ vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+
vmx->vcpu.arch.cr0 = 0x60000010;
vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
vmx_set_cr4(&vmx->vcpu, 0);
-#ifdef CONFIG_X86_64
vmx_set_efer(&vmx->vcpu, 0);
-#endif
vmx_fpu_activate(&vmx->vcpu);
update_exception_bitmap(&vmx->vcpu);
- return 0;
+ vpid_sync_vcpu_all(vmx);
+
+ ret = 0;
out:
+ up_read(&vcpu->kvm->slots_lock);
return ret;
}
@@ -1723,6 +1843,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
+
if (vcpu->arch.rmode.active) {
vmx->rmode.irq.pending = true;
vmx->rmode.irq.vector = irq;
@@ -1844,7 +1966,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
!is_page_fault(intr_info))
printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
- "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
+ "intr info 0x%x\n", __func__, vect_info, intr_info);
if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
@@ -1869,10 +1991,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
error_code = 0;
rip = vmcs_readl(GUEST_RIP);
- if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
+ if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
if (is_page_fault(intr_info)) {
cr2 = vmcs_readl(EXIT_QUALIFICATION);
+ KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
+ (u32)((u64)cr2 >> 32), handler);
return kvm_mmu_page_fault(vcpu, cr2, error_code);
}
@@ -1901,6 +2025,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
++vcpu->stat.irq_exits;
+ KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
return 1;
}
@@ -1958,25 +2083,27 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
reg = (exit_qualification >> 8) & 15;
switch ((exit_qualification >> 4) & 3) {
case 0: /* mov to cr */
+ KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg],
+ (u32)((u64)vcpu->arch.regs[reg] >> 32), handler);
switch (cr) {
case 0:
vcpu_load_rsp_rip(vcpu);
- set_cr0(vcpu, vcpu->arch.regs[reg]);
+ kvm_set_cr0(vcpu, vcpu->arch.regs[reg]);
skip_emulated_instruction(vcpu);
return 1;
case 3:
vcpu_load_rsp_rip(vcpu);
- set_cr3(vcpu, vcpu->arch.regs[reg]);
+ kvm_set_cr3(vcpu, vcpu->arch.regs[reg]);
skip_emulated_instruction(vcpu);
return 1;
case 4:
vcpu_load_rsp_rip(vcpu);
- set_cr4(vcpu, vcpu->arch.regs[reg]);
+ kvm_set_cr4(vcpu, vcpu->arch.regs[reg]);
skip_emulated_instruction(vcpu);
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
- set_cr8(vcpu, vcpu->arch.regs[reg]);
+ kvm_set_cr8(vcpu, vcpu->arch.regs[reg]);
skip_emulated_instruction(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
return 1;
@@ -1990,6 +2117,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.cr0 &= ~X86_CR0_TS;
vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
vmx_fpu_activate(vcpu);
+ KVMTRACE_0D(CLTS, vcpu, handler);
skip_emulated_instruction(vcpu);
return 1;
case 1: /*mov from cr*/
@@ -1998,18 +2126,24 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu_load_rsp_rip(vcpu);
vcpu->arch.regs[reg] = vcpu->arch.cr3;
vcpu_put_rsp_rip(vcpu);
+ KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
+ (u32)vcpu->arch.regs[reg],
+ (u32)((u64)vcpu->arch.regs[reg] >> 32),
+ handler);
skip_emulated_instruction(vcpu);
return 1;
case 8:
vcpu_load_rsp_rip(vcpu);
- vcpu->arch.regs[reg] = get_cr8(vcpu);
+ vcpu->arch.regs[reg] = kvm_get_cr8(vcpu);
vcpu_put_rsp_rip(vcpu);
+ KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
+ (u32)vcpu->arch.regs[reg], handler);
skip_emulated_instruction(vcpu);
return 1;
}
break;
case 3: /* lmsw */
- lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
+ kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
skip_emulated_instruction(vcpu);
return 1;
@@ -2049,6 +2183,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
val = 0;
}
vcpu->arch.regs[reg] = val;
+ KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
} else {
/* mov to dr */
}
@@ -2073,6 +2208,9 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
+ KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32),
+ handler);
+
/* FIXME: handling of bits 32:63 of rax, rdx */
vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
@@ -2086,6 +2224,9 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+ KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32),
+ handler);
+
if (vmx_set_msr(vcpu, ecx, data) != 0) {
kvm_inject_gp(vcpu, 0);
return 1;
@@ -2110,6 +2251,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+
+ KVMTRACE_0D(PEND_INTR, vcpu, handler);
+
/*
* If the user space waits to inject interrupts, exit as soon as
* possible
@@ -2152,6 +2296,8 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
offset = exit_qualification & 0xffful;
+ KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler);
+
er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
if (er != EMULATE_DONE) {
@@ -2163,6 +2309,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
+static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ unsigned long exit_qualification;
+ u16 tss_selector;
+ int reason;
+
+ exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+ reason = (u32)exit_qualification >> 30;
+ tss_selector = exit_qualification;
+
+ return kvm_task_switch(vcpu, tss_selector, reason);
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2185,6 +2345,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
[EXIT_REASON_WBINVD] = handle_wbinvd,
+ [EXIT_REASON_TASK_SWITCH] = handle_task_switch,
};
static const int kvm_vmx_max_exit_handlers =
@@ -2200,6 +2361,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 vectoring_info = vmx->idt_vectoring_info;
+ KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
+ (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
+
if (unlikely(vmx->fail)) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2210,7 +2374,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
exit_reason != EXIT_REASON_EXCEPTION_NMI)
printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
- "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
+ "exit reason is 0x%x\n", __func__, exit_reason);
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -2221,10 +2385,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
return 0;
}
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
-{
-}
-
static void update_tpr_threshold(struct kvm_vcpu *vcpu)
{
int max_irr, tpr;
@@ -2285,11 +2445,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
return;
}
+ KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
+
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
- if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK))
+ if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
vmcs_read32(IDT_VECTORING_ERROR_CODE));
if (unlikely(has_ext_irq))
@@ -2470,8 +2632,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* We need to handle NMIs before interrupts are enabled */
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+ KVMTRACE_0D(NMI, vcpu, handler);
asm("int $2");
+ }
}
static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
@@ -2489,6 +2653,10 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ spin_lock(&vmx_vpid_lock);
+ if (vmx->vpid != 0)
+ __clear_bit(vmx->vpid, vmx_vpid_bitmap);
+ spin_unlock(&vmx_vpid_lock);
vmx_free_vmcs(vcpu);
kfree(vmx->host_msrs);
kfree(vmx->guest_msrs);
@@ -2505,6 +2673,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!vmx)
return ERR_PTR(-ENOMEM);
+ allocate_vpid(vmx);
+
err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
if (err)
goto free_vcpu;
@@ -2591,14 +2761,13 @@ static struct kvm_x86_ops vmx_x86_ops = {
.get_segment_base = vmx_get_segment_base,
.get_segment = vmx_get_segment,
.set_segment = vmx_set_segment,
+ .get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
.set_cr0 = vmx_set_cr0,
.set_cr3 = vmx_set_cr3,
.set_cr4 = vmx_set_cr4,
-#ifdef CONFIG_X86_64
.set_efer = vmx_set_efer,
-#endif
.get_idt = vmx_get_idt,
.set_idt = vmx_set_idt,
.get_gdt = vmx_get_gdt,
@@ -2626,7 +2795,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
static int __init vmx_init(void)
{
- void *iova;
+ void *va;
int r;
vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
@@ -2639,28 +2808,48 @@ static int __init vmx_init(void)
goto out;
}
+ vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!vmx_msr_bitmap) {
+ r = -ENOMEM;
+ goto out1;
+ }
+
/*
* Allow direct access to the PC debug port (it is often used for I/O
* delays, but the vmexits simply slow things down).
*/
- iova = kmap(vmx_io_bitmap_a);
- memset(iova, 0xff, PAGE_SIZE);
- clear_bit(0x80, iova);
+ va = kmap(vmx_io_bitmap_a);
+ memset(va, 0xff, PAGE_SIZE);
+ clear_bit(0x80, va);
kunmap(vmx_io_bitmap_a);
- iova = kmap(vmx_io_bitmap_b);
- memset(iova, 0xff, PAGE_SIZE);
+ va = kmap(vmx_io_bitmap_b);
+ memset(va, 0xff, PAGE_SIZE);
kunmap(vmx_io_bitmap_b);
+ va = kmap(vmx_msr_bitmap);
+ memset(va, 0xff, PAGE_SIZE);
+ kunmap(vmx_msr_bitmap);
+
+ set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+
r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
if (r)
- goto out1;
+ goto out2;
+
+ vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE);
+ vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE);
+ vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS);
+ vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
+ vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
if (bypass_guest_pf)
kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
return 0;
+out2:
+ __free_page(vmx_msr_bitmap);
out1:
__free_page(vmx_io_bitmap_b);
out:
@@ -2670,6 +2859,7 @@ out:
static void __exit vmx_exit(void)
{
+ __free_page(vmx_msr_bitmap);
__free_page(vmx_io_bitmap_b);
__free_page(vmx_io_bitmap_a);
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index d52ae8d7303d..5dff4606b988 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -49,6 +49,7 @@
* Definitions of Secondary Processor-Based VM-Execution Controls.
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
@@ -65,6 +66,7 @@
/* VMCS Encodings */
enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
GUEST_ES_SELECTOR = 0x00000800,
GUEST_CS_SELECTOR = 0x00000802,
GUEST_SS_SELECTOR = 0x00000804,
@@ -231,12 +233,12 @@ enum vmcs_field {
*/
#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */
#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
-#define INTR_INFO_DELIEVER_CODE_MASK 0x800 /* 11 */
+#define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */
#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK
#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK
-#define VECTORING_INFO_DELIEVER_CODE_MASK INTR_INFO_DELIEVER_CODE_MASK
+#define VECTORING_INFO_DELIVER_CODE_MASK INTR_INFO_DELIVER_CODE_MASK
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
@@ -321,4 +323,8 @@ enum vmcs_field {
#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
+#define VMX_NR_VPIDS (1 << 16)
+#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
+#define VMX_VPID_EXTENT_ALL_CONTEXT 2
+
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b01552bd1f1..0ce556372a4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -15,10 +15,12 @@
*/
#include <linux/kvm_host.h>
-#include "segment_descriptor.h"
#include "irq.h"
#include "mmu.h"
+#include "i8254.h"
+#include "tss.h"
+#include <linux/clocksource.h>
#include <linux/kvm.h>
#include <linux/fs.h>
#include <linux/vmalloc.h>
@@ -28,6 +30,7 @@
#include <asm/uaccess.h>
#include <asm/msr.h>
+#include <asm/desc.h>
#define MAX_IO_MSRS 256
#define CR0_RESERVED_BITS \
@@ -41,7 +44,15 @@
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
-#define EFER_RESERVED_BITS 0xfffffffffffff2fe
+/* EFER defaults:
+ * - enable syscall per default because its emulated by KVM
+ * - enable LME and LMA per default on 64 bit KVM
+ */
+#ifdef CONFIG_X86_64
+static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
+#else
+static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
+#endif
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -63,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "irq_window", VCPU_STAT(irq_window_exits) },
{ "halt_exits", VCPU_STAT(halt_exits) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
@@ -78,6 +90,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
+ { "largepages", VM_STAT(lpages) },
{ NULL }
};
@@ -85,7 +98,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
unsigned long segment_base(u16 selector)
{
struct descriptor_table gdt;
- struct segment_descriptor *d;
+ struct desc_struct *d;
unsigned long table_base;
unsigned long v;
@@ -101,13 +114,12 @@ unsigned long segment_base(u16 selector)
asm("sldt %0" : "=g"(ldt_selector));
table_base = segment_base(ldt_selector);
}
- d = (struct segment_descriptor *)(table_base + (selector & ~7));
- v = d->base_low | ((unsigned long)d->base_mid << 16) |
- ((unsigned long)d->base_high << 24);
+ d = (struct desc_struct *)(table_base + (selector & ~7));
+ v = d->base0 | ((unsigned long)d->base1 << 16) |
+ ((unsigned long)d->base2 << 24);
#ifdef CONFIG_X86_64
- if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
- v |= ((unsigned long) \
- ((struct segment_descriptor_64 *)d)->base_higher) << 32;
+ if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
+ v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
#endif
return v;
}
@@ -145,11 +157,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
u32 error_code)
{
++vcpu->stat.pf_guest;
- if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) {
- printk(KERN_DEBUG "kvm: inject_page_fault:"
- " double fault 0x%lx\n", addr);
- vcpu->arch.exception.nr = DF_VECTOR;
- vcpu->arch.exception.error_code = 0;
+ if (vcpu->arch.exception.pending) {
+ if (vcpu->arch.exception.nr == PF_VECTOR) {
+ printk(KERN_DEBUG "kvm: inject_page_fault:"
+ " double fault 0x%lx\n", addr);
+ vcpu->arch.exception.nr = DF_VECTOR;
+ vcpu->arch.exception.error_code = 0;
+ } else if (vcpu->arch.exception.nr == DF_VECTOR) {
+ /* triple fault -> shutdown */
+ set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+ }
return;
}
vcpu->arch.cr2 = addr;
@@ -184,7 +201,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
int ret;
u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
- down_read(&vcpu->kvm->slots_lock);
ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
offset * sizeof(u64), sizeof(pdpte));
if (ret < 0) {
@@ -201,10 +217,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
out:
- up_read(&vcpu->kvm->slots_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(load_pdptrs);
static bool pdptrs_changed(struct kvm_vcpu *vcpu)
{
@@ -215,18 +231,16 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
if (is_long_mode(vcpu) || !is_pae(vcpu))
return false;
- down_read(&vcpu->kvm->slots_lock);
r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
if (r < 0)
goto out;
changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
out:
- up_read(&vcpu->kvm->slots_lock);
return changed;
}
-void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
if (cr0 & CR0_RESERVED_BITS) {
printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
@@ -284,15 +298,18 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
kvm_mmu_reset_context(vcpu);
return;
}
-EXPORT_SYMBOL_GPL(set_cr0);
+EXPORT_SYMBOL_GPL(kvm_set_cr0);
-void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
+void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
{
- set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
+ kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
+ KVMTRACE_1D(LMSW, vcpu,
+ (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
+ handler);
}
-EXPORT_SYMBOL_GPL(lmsw);
+EXPORT_SYMBOL_GPL(kvm_lmsw);
-void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
if (cr4 & CR4_RESERVED_BITS) {
printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
@@ -323,9 +340,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vcpu->arch.cr4 = cr4;
kvm_mmu_reset_context(vcpu);
}
-EXPORT_SYMBOL_GPL(set_cr4);
+EXPORT_SYMBOL_GPL(kvm_set_cr4);
-void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
{
if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
kvm_mmu_flush_tlb(vcpu);
@@ -359,7 +376,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
*/
}
- down_read(&vcpu->kvm->slots_lock);
/*
* Does the new cr3 value map to physical memory? (Note, we
* catch an invalid cr3 even in real-mode, because it would
@@ -375,11 +391,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
vcpu->arch.cr3 = cr3;
vcpu->arch.mmu.new_cr3(vcpu);
}
- up_read(&vcpu->kvm->slots_lock);
}
-EXPORT_SYMBOL_GPL(set_cr3);
+EXPORT_SYMBOL_GPL(kvm_set_cr3);
-void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
{
if (cr8 & CR8_RESERVED_BITS) {
printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
@@ -391,16 +406,16 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
else
vcpu->arch.cr8 = cr8;
}
-EXPORT_SYMBOL_GPL(set_cr8);
+EXPORT_SYMBOL_GPL(kvm_set_cr8);
-unsigned long get_cr8(struct kvm_vcpu *vcpu)
+unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
{
if (irqchip_in_kernel(vcpu->kvm))
return kvm_lapic_get_cr8(vcpu);
else
return vcpu->arch.cr8;
}
-EXPORT_SYMBOL_GPL(get_cr8);
+EXPORT_SYMBOL_GPL(kvm_get_cr8);
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@ -415,7 +430,8 @@ static u32 msrs_to_save[] = {
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
- MSR_IA32_TIME_STAMP_COUNTER,
+ MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+ MSR_IA32_PERF_STATUS,
};
static unsigned num_msrs_to_save;
@@ -424,11 +440,9 @@ static u32 emulated_msrs[] = {
MSR_IA32_MISC_ENABLE,
};
-#ifdef CONFIG_X86_64
-
static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
- if (efer & EFER_RESERVED_BITS) {
+ if (efer & efer_reserved_bits) {
printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
efer);
kvm_inject_gp(vcpu, 0);
@@ -450,7 +464,12 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
vcpu->arch.shadow_efer = efer;
}
-#endif
+void kvm_enable_efer_bits(u64 mask)
+{
+ efer_reserved_bits &= ~mask;
+}
+EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
+
/*
* Writes msr value into into the appropriate "register".
@@ -470,26 +489,86 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
return kvm_set_msr(vcpu, index, *data);
}
+static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
+{
+ static int version;
+ struct kvm_wall_clock wc;
+ struct timespec wc_ts;
+
+ if (!wall_clock)
+ return;
+
+ version++;
+
+ kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+
+ wc_ts = current_kernel_time();
+ wc.wc_sec = wc_ts.tv_sec;
+ wc.wc_nsec = wc_ts.tv_nsec;
+ wc.wc_version = version;
+
+ kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
+
+ version++;
+ kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+}
+
+static void kvm_write_guest_time(struct kvm_vcpu *v)
+{
+ struct timespec ts;
+ unsigned long flags;
+ struct kvm_vcpu_arch *vcpu = &v->arch;
+ void *shared_kaddr;
+
+ if ((!vcpu->time_page))
+ return;
+
+ /* Keep irq disabled to prevent changes to the clock */
+ local_irq_save(flags);
+ kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
+ &vcpu->hv_clock.tsc_timestamp);
+ ktime_get_ts(&ts);
+ local_irq_restore(flags);
+
+ /* With all the info we got, fill in the values */
+
+ vcpu->hv_clock.system_time = ts.tv_nsec +
+ (NSEC_PER_SEC * (u64)ts.tv_sec);
+ /*
+ * The interface expects us to write an even number signaling that the
+ * update is finished. Since the guest won't see the intermediate
+ * state, we just write "2" at the end
+ */
+ vcpu->hv_clock.version = 2;
+
+ shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
+
+ memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock));
+
+ kunmap_atomic(shared_kaddr, KM_USER0);
+
+ mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+}
+
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
switch (msr) {
-#ifdef CONFIG_X86_64
case MSR_EFER:
set_efer(vcpu, data);
break;
-#endif
case MSR_IA32_MC0_STATUS:
pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
- __FUNCTION__, data);
+ __func__, data);
break;
case MSR_IA32_MCG_STATUS:
pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
- __FUNCTION__, data);
+ __func__, data);
break;
case MSR_IA32_MCG_CTL:
pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
- __FUNCTION__, data);
+ __func__, data);
break;
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
@@ -501,6 +580,42 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case MSR_IA32_MISC_ENABLE:
vcpu->arch.ia32_misc_enable_msr = data;
break;
+ case MSR_KVM_WALL_CLOCK:
+ vcpu->kvm->arch.wall_clock = data;
+ kvm_write_wall_clock(vcpu->kvm, data);
+ break;
+ case MSR_KVM_SYSTEM_TIME: {
+ if (vcpu->arch.time_page) {
+ kvm_release_page_dirty(vcpu->arch.time_page);
+ vcpu->arch.time_page = NULL;
+ }
+
+ vcpu->arch.time = data;
+
+ /* we verify if the enable bit is set... */
+ if (!(data & 1))
+ break;
+
+ /* ...but clean it before doing the actual write */
+ vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
+
+ vcpu->arch.hv_clock.tsc_to_system_mul =
+ clocksource_khz2mult(tsc_khz, 22);
+ vcpu->arch.hv_clock.tsc_shift = 22;
+
+ down_read(&current->mm->mmap_sem);
+ vcpu->arch.time_page =
+ gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+ up_read(&current->mm->mmap_sem);
+
+ if (is_error_page(vcpu->arch.time_page)) {
+ kvm_release_page_clean(vcpu->arch.time_page);
+ vcpu->arch.time_page = NULL;
+ }
+
+ kvm_write_guest_time(vcpu);
+ break;
+ }
default:
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
return 1;
@@ -540,7 +655,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MC0_MISC+12:
case MSR_IA32_MC0_MISC+16:
case MSR_IA32_UCODE_REV:
- case MSR_IA32_PERF_STATUS:
case MSR_IA32_EBL_CR_POWERON:
/* MTRR registers */
case 0xfe:
@@ -556,11 +670,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_MISC_ENABLE:
data = vcpu->arch.ia32_misc_enable_msr;
break;
-#ifdef CONFIG_X86_64
+ case MSR_IA32_PERF_STATUS:
+ /* TSC increment by tick */
+ data = 1000ULL;
+ /* CPU multiplier */
+ data |= (((uint64_t)4ULL) << 40);
+ break;
case MSR_EFER:
data = vcpu->arch.shadow_efer;
break;
-#endif
+ case MSR_KVM_WALL_CLOCK:
+ data = vcpu->kvm->arch.wall_clock;
+ break;
+ case MSR_KVM_SYSTEM_TIME:
+ data = vcpu->arch.time;
+ break;
default:
pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
return 1;
@@ -584,9 +708,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
vcpu_load(vcpu);
+ down_read(&vcpu->kvm->slots_lock);
for (i = 0; i < msrs->nmsrs; ++i)
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
+ up_read(&vcpu->kvm->slots_lock);
vcpu_put(vcpu);
@@ -688,11 +814,24 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
+ case KVM_CAP_CLOCKSOURCE:
+ case KVM_CAP_PIT:
+ case KVM_CAP_NOP_IO_DELAY:
+ case KVM_CAP_MP_STATE:
r = 1;
break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
break;
+ case KVM_CAP_NR_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_MEMORY_SLOTS;
+ break;
+ case KVM_CAP_PV_MMU:
+ r = !tdp_enabled;
+ break;
default:
r = 0;
break;
@@ -763,6 +902,7 @@ out:
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvm_x86_ops->vcpu_load(vcpu, cpu);
+ kvm_write_guest_time(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -958,32 +1098,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
/* function 4 and 0xb have additional index. */
case 4: {
- int index, cache_type;
+ int i, cache_type;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until cache_type is zero */
- for (index = 1; *nent < maxnent; ++index) {
- cache_type = entry[index - 1].eax & 0x1f;
+ for (i = 1; *nent < maxnent; ++i) {
+ cache_type = entry[i - 1].eax & 0x1f;
if (!cache_type)
break;
- do_cpuid_1_ent(&entry[index], function, index);
- entry[index].flags |=
+ do_cpuid_1_ent(&entry[i], function, i);
+ entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
}
break;
}
case 0xb: {
- int index, level_type;
+ int i, level_type;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
- for (index = 1; *nent < maxnent; ++index) {
- level_type = entry[index - 1].ecx & 0xff;
+ for (i = 1; *nent < maxnent; ++i) {
+ level_type = entry[i - 1].ecx & 0xff;
if (!level_type)
break;
- do_cpuid_1_ent(&entry[index], function, index);
- entry[index].flags |=
+ do_cpuid_1_ent(&entry[i], function, i);
+ entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
}
@@ -1365,6 +1505,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
return r;
}
+static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
+{
+ int r = 0;
+
+ memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
+ return r;
+}
+
+static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
+{
+ int r = 0;
+
+ memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
+ kvm_pit_load_count(kvm, 0, ps->channels[0].count);
+ return r;
+}
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -1457,6 +1614,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
} else
goto out;
break;
+ case KVM_CREATE_PIT:
+ r = -ENOMEM;
+ kvm->arch.vpit = kvm_create_pit(kvm);
+ if (kvm->arch.vpit)
+ r = 0;
+ break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -1512,6 +1675,37 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_GET_PIT: {
+ struct kvm_pit_state ps;
+ r = -EFAULT;
+ if (copy_from_user(&ps, argp, sizeof ps))
+ goto out;
+ r = -ENXIO;
+ if (!kvm->arch.vpit)
+ goto out;
+ r = kvm_vm_ioctl_get_pit(kvm, &ps);
+ if (r)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user(argp, &ps, sizeof ps))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_SET_PIT: {
+ struct kvm_pit_state ps;
+ r = -EFAULT;
+ if (copy_from_user(&ps, argp, sizeof ps))
+ goto out;
+ r = -ENXIO;
+ if (!kvm->arch.vpit)
+ goto out;
+ r = kvm_vm_ioctl_set_pit(kvm, &ps);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
default:
;
}
@@ -1570,7 +1764,6 @@ int emulator_read_std(unsigned long addr,
void *data = val;
int r = X86EMUL_CONTINUE;
- down_read(&vcpu->kvm->slots_lock);
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
@@ -1592,7 +1785,6 @@ int emulator_read_std(unsigned long addr,
addr += tocopy;
}
out:
- up_read(&vcpu->kvm->slots_lock);
return r;
}
EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1611,9 +1803,7 @@ static int emulator_read_emulated(unsigned long addr,
return X86EMUL_CONTINUE;
}
- down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
- up_read(&vcpu->kvm->slots_lock);
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1646,19 +1836,15 @@ mmio:
return X86EMUL_UNHANDLEABLE;
}
-static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
- const void *val, int bytes)
+int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+ const void *val, int bytes)
{
int ret;
- down_read(&vcpu->kvm->slots_lock);
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
- if (ret < 0) {
- up_read(&vcpu->kvm->slots_lock);
+ if (ret < 0)
return 0;
- }
kvm_mmu_pte_write(vcpu, gpa, val, bytes);
- up_read(&vcpu->kvm->slots_lock);
return 1;
}
@@ -1670,9 +1856,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
struct kvm_io_device *mmio_dev;
gpa_t gpa;
- down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
- up_read(&vcpu->kvm->slots_lock);
if (gpa == UNMAPPED_GVA) {
kvm_inject_page_fault(vcpu, addr, 2);
@@ -1749,7 +1933,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
char *kaddr;
u64 val;
- down_read(&vcpu->kvm->slots_lock);
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
if (gpa == UNMAPPED_GVA ||
@@ -1769,9 +1952,8 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
kunmap_atomic(kaddr, KM_USER0);
kvm_release_page_dirty(page);
- emul_write:
- up_read(&vcpu->kvm->slots_lock);
}
+emul_write:
#endif
return emulator_write_emulated(addr, new, bytes, vcpu);
@@ -1802,7 +1984,7 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
*dest = kvm_x86_ops->get_dr(vcpu, dr);
return X86EMUL_CONTINUE;
default:
- pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
+ pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
return X86EMUL_UNHANDLEABLE;
}
}
@@ -1840,7 +2022,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
}
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
-struct x86_emulate_ops emulate_ops = {
+static struct x86_emulate_ops emulate_ops = {
.read_std = emulator_read_std,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
@@ -2091,6 +2273,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.guest_page_offset = 0;
vcpu->arch.pio.rep = 0;
+ if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
+ KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
+ handler);
+ else
+ KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
+ handler);
+
kvm_x86_ops->cache_regs(vcpu);
memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
kvm_x86_ops->decache_regs(vcpu);
@@ -2129,6 +2318,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.guest_page_offset = offset_in_page(address);
vcpu->arch.pio.rep = rep;
+ if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
+ KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
+ handler);
+ else
+ KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
+ handler);
+
if (!count) {
kvm_x86_ops->skip_emulated_instruction(vcpu);
return 1;
@@ -2163,10 +2359,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
kvm_x86_ops->skip_emulated_instruction(vcpu);
for (i = 0; i < nr_pages; ++i) {
- down_read(&vcpu->kvm->slots_lock);
page = gva_to_page(vcpu, address + i * PAGE_SIZE);
vcpu->arch.pio.guest_pages[i] = page;
- up_read(&vcpu->kvm->slots_lock);
if (!page) {
kvm_inject_gp(vcpu, 0);
free_pio_guest_pages(vcpu);
@@ -2238,10 +2432,13 @@ void kvm_arch_exit(void)
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
+ KVMTRACE_0D(HLT, vcpu, handler);
if (irqchip_in_kernel(vcpu->kvm)) {
- vcpu->arch.mp_state = VCPU_MP_STATE_HALTED;
+ vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+ up_read(&vcpu->kvm->slots_lock);
kvm_vcpu_block(vcpu);
- if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE)
+ down_read(&vcpu->kvm->slots_lock);
+ if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
return -EINTR;
return 1;
} else {
@@ -2251,9 +2448,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
+static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
+ unsigned long a1)
+{
+ if (is_long_mode(vcpu))
+ return a0;
+ else
+ return a0 | ((gpa_t)a1 << 32);
+}
+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
+ int r = 1;
kvm_x86_ops->cache_regs(vcpu);
@@ -2263,6 +2470,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
a2 = vcpu->arch.regs[VCPU_REGS_RDX];
a3 = vcpu->arch.regs[VCPU_REGS_RSI];
+ KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
+
if (!is_long_mode(vcpu)) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
@@ -2275,13 +2484,17 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_VAPIC_POLL_IRQ:
ret = 0;
break;
+ case KVM_HC_MMU_OP:
+ r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
+ break;
default:
ret = -KVM_ENOSYS;
break;
}
vcpu->arch.regs[VCPU_REGS_RAX] = ret;
kvm_x86_ops->decache_regs(vcpu);
- return 0;
+ ++vcpu->stat.hypercalls;
+ return r;
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
@@ -2329,7 +2542,7 @@ void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long *rflags)
{
- lmsw(vcpu, msw);
+ kvm_lmsw(vcpu, msw);
*rflags = kvm_x86_ops->get_rflags(vcpu);
}
@@ -2346,9 +2559,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
case 4:
return vcpu->arch.cr4;
case 8:
- return get_cr8(vcpu);
+ return kvm_get_cr8(vcpu);
default:
- vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
+ vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
return 0;
}
}
@@ -2358,23 +2571,23 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
{
switch (cr) {
case 0:
- set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
+ kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
*rflags = kvm_x86_ops->get_rflags(vcpu);
break;
case 2:
vcpu->arch.cr2 = val;
break;
case 3:
- set_cr3(vcpu, val);
+ kvm_set_cr3(vcpu, val);
break;
case 4:
- set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
+ kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
break;
case 8:
- set_cr8(vcpu, val & 0xfUL);
+ kvm_set_cr8(vcpu, val & 0xfUL);
break;
default:
- vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
+ vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
}
}
@@ -2447,6 +2660,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
}
kvm_x86_ops->decache_regs(vcpu);
kvm_x86_ops->skip_emulated_instruction(vcpu);
+ KVMTRACE_5D(CPUID, vcpu, function,
+ (u32)vcpu->arch.regs[VCPU_REGS_RAX],
+ (u32)vcpu->arch.regs[VCPU_REGS_RBX],
+ (u32)vcpu->arch.regs[VCPU_REGS_RCX],
+ (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
@@ -2469,7 +2687,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
- kvm_run->cr8 = get_cr8(vcpu);
+ kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
if (irqchip_in_kernel(vcpu->kvm))
kvm_run->ready_for_interrupt_injection = 1;
@@ -2509,16 +2727,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
- if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
+ if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
r = kvm_x86_ops->vcpu_reset(vcpu);
if (r)
return r;
- vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
+ down_read(&vcpu->kvm->slots_lock);
vapic_enter(vcpu);
preempted:
@@ -2526,6 +2745,10 @@ preempted:
kvm_x86_ops->guest_debug_pre(vcpu);
again:
+ if (vcpu->requests)
+ if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+ kvm_mmu_unload(vcpu);
+
r = kvm_mmu_reload(vcpu);
if (unlikely(r))
goto out;
@@ -2539,6 +2762,11 @@ again:
r = 0;
goto out;
}
+ if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
+ kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+ r = 0;
+ goto out;
+ }
}
kvm_inject_pending_timer_irqs(vcpu);
@@ -2557,6 +2785,14 @@ again:
goto out;
}
+ if (vcpu->requests)
+ if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
+ local_irq_enable();
+ preempt_enable();
+ r = 1;
+ goto out;
+ }
+
if (signal_pending(current)) {
local_irq_enable();
preempt_enable();
@@ -2566,6 +2802,13 @@ again:
goto out;
}
+ vcpu->guest_mode = 1;
+ /*
+ * Make sure that guest_mode assignment won't happen after
+ * testing the pending IRQ vector bitmap.
+ */
+ smp_wmb();
+
if (vcpu->arch.exception.pending)
__queue_exception(vcpu);
else if (irqchip_in_kernel(vcpu->kvm))
@@ -2575,13 +2818,15 @@ again:
kvm_lapic_sync_to_vapic(vcpu);
- vcpu->guest_mode = 1;
+ up_read(&vcpu->kvm->slots_lock);
+
kvm_guest_enter();
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
kvm_x86_ops->tlb_flush(vcpu);
+ KVMTRACE_0D(VMENTRY, vcpu, entryexit);
kvm_x86_ops->run(vcpu, kvm_run);
vcpu->guest_mode = 0;
@@ -2601,6 +2846,8 @@ again:
preempt_enable();
+ down_read(&vcpu->kvm->slots_lock);
+
/*
* Profile KVM exit RIPs:
*/
@@ -2628,14 +2875,18 @@ again:
}
out:
+ up_read(&vcpu->kvm->slots_lock);
if (r > 0) {
kvm_resched(vcpu);
+ down_read(&vcpu->kvm->slots_lock);
goto preempted;
}
post_kvm_run_save(vcpu, kvm_run);
+ down_read(&vcpu->kvm->slots_lock);
vapic_exit(vcpu);
+ up_read(&vcpu->kvm->slots_lock);
return r;
}
@@ -2647,7 +2898,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu_load(vcpu);
- if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
+ if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
vcpu_put(vcpu);
return -EAGAIN;
@@ -2658,7 +2909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* re-sync apic's tpr */
if (!irqchip_in_kernel(vcpu->kvm))
- set_cr8(vcpu, kvm_run->cr8);
+ kvm_set_cr8(vcpu, kvm_run->cr8);
if (vcpu->arch.pio.cur_count) {
r = complete_pio(vcpu);
@@ -2670,9 +2921,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
vcpu->mmio_read_completed = 1;
vcpu->mmio_needed = 0;
+
+ down_read(&vcpu->kvm->slots_lock);
r = emulate_instruction(vcpu, kvm_run,
vcpu->arch.mmio_fault_cr2, 0,
EMULTYPE_NO_DECODE);
+ up_read(&vcpu->kvm->slots_lock);
if (r == EMULATE_DO_MMIO) {
/*
* Read-modify-write. Back to userspace.
@@ -2773,7 +3027,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
static void get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
- return kvm_x86_ops->get_segment(vcpu, var, seg);
+ kvm_x86_ops->get_segment(vcpu, var, seg);
}
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -2816,7 +3070,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
sregs->cr2 = vcpu->arch.cr2;
sregs->cr3 = vcpu->arch.cr3;
sregs->cr4 = vcpu->arch.cr4;
- sregs->cr8 = get_cr8(vcpu);
+ sregs->cr8 = kvm_get_cr8(vcpu);
sregs->efer = vcpu->arch.shadow_efer;
sregs->apic_base = kvm_get_apic_base(vcpu);
@@ -2836,12 +3090,438 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
return 0;
}
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_load(vcpu);
+ mp_state->mp_state = vcpu->arch.mp_state;
+ vcpu_put(vcpu);
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_load(vcpu);
+ vcpu->arch.mp_state = mp_state->mp_state;
+ vcpu_put(vcpu);
+ return 0;
+}
+
static void set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
- return kvm_x86_ops->set_segment(vcpu, var, seg);
+ kvm_x86_ops->set_segment(vcpu, var, seg);
+}
+
+static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
+ struct kvm_segment *kvm_desct)
+{
+ kvm_desct->base = seg_desc->base0;
+ kvm_desct->base |= seg_desc->base1 << 16;
+ kvm_desct->base |= seg_desc->base2 << 24;
+ kvm_desct->limit = seg_desc->limit0;
+ kvm_desct->limit |= seg_desc->limit << 16;
+ kvm_desct->selector = selector;
+ kvm_desct->type = seg_desc->type;
+ kvm_desct->present = seg_desc->p;
+ kvm_desct->dpl = seg_desc->dpl;
+ kvm_desct->db = seg_desc->d;
+ kvm_desct->s = seg_desc->s;
+ kvm_desct->l = seg_desc->l;
+ kvm_desct->g = seg_desc->g;
+ kvm_desct->avl = seg_desc->avl;
+ if (!selector)
+ kvm_desct->unusable = 1;
+ else
+ kvm_desct->unusable = 0;
+ kvm_desct->padding = 0;
+}
+
+static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
+ u16 selector,
+ struct descriptor_table *dtable)
+{
+ if (selector & 1 << 2) {
+ struct kvm_segment kvm_seg;
+
+ get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
+
+ if (kvm_seg.unusable)
+ dtable->limit = 0;
+ else
+ dtable->limit = kvm_seg.limit;
+ dtable->base = kvm_seg.base;
+ }
+ else
+ kvm_x86_ops->get_gdt(vcpu, dtable);
+}
+
+/* allowed just for 8 bytes segments */
+static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+ struct desc_struct *seg_desc)
+{
+ struct descriptor_table dtable;
+ u16 index = selector >> 3;
+
+ get_segment_descritptor_dtable(vcpu, selector, &dtable);
+
+ if (dtable.limit < index * 8 + 7) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
+ return 1;
+ }
+ return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
+}
+
+/* allowed just for 8 bytes segments */
+static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+ struct desc_struct *seg_desc)
+{
+ struct descriptor_table dtable;
+ u16 index = selector >> 3;
+
+ get_segment_descritptor_dtable(vcpu, selector, &dtable);
+
+ if (dtable.limit < index * 8 + 7)
+ return 1;
+ return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
+}
+
+static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
+ struct desc_struct *seg_desc)
+{
+ u32 base_addr;
+
+ base_addr = seg_desc->base0;
+ base_addr |= (seg_desc->base1 << 16);
+ base_addr |= (seg_desc->base2 << 24);
+
+ return base_addr;
+}
+
+static int load_tss_segment32(struct kvm_vcpu *vcpu,
+ struct desc_struct *seg_desc,
+ struct tss_segment_32 *tss)
+{
+ u32 base_addr;
+
+ base_addr = get_tss_base_addr(vcpu, seg_desc);
+
+ return kvm_read_guest(vcpu->kvm, base_addr, tss,
+ sizeof(struct tss_segment_32));
+}
+
+static int save_tss_segment32(struct kvm_vcpu *vcpu,
+ struct desc_struct *seg_desc,
+ struct tss_segment_32 *tss)
+{
+ u32 base_addr;
+
+ base_addr = get_tss_base_addr(vcpu, seg_desc);
+
+ return kvm_write_guest(vcpu->kvm, base_addr, tss,
+ sizeof(struct tss_segment_32));
+}
+
+static int load_tss_segment16(struct kvm_vcpu *vcpu,
+ struct desc_struct *seg_desc,
+ struct tss_segment_16 *tss)
+{
+ u32 base_addr;
+
+ base_addr = get_tss_base_addr(vcpu, seg_desc);
+
+ return kvm_read_guest(vcpu->kvm, base_addr, tss,
+ sizeof(struct tss_segment_16));
+}
+
+static int save_tss_segment16(struct kvm_vcpu *vcpu,
+ struct desc_struct *seg_desc,
+ struct tss_segment_16 *tss)
+{
+ u32 base_addr;
+
+ base_addr = get_tss_base_addr(vcpu, seg_desc);
+
+ return kvm_write_guest(vcpu->kvm, base_addr, tss,
+ sizeof(struct tss_segment_16));
+}
+
+static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
+{
+ struct kvm_segment kvm_seg;
+
+ get_segment(vcpu, &kvm_seg, seg);
+ return kvm_seg.selector;
+}
+
+static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
+ u16 selector,
+ struct kvm_segment *kvm_seg)
+{
+ struct desc_struct seg_desc;
+
+ if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
+ return 1;
+ seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
+ return 0;
+}
+
+static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+ int type_bits, int seg)
+{
+ struct kvm_segment kvm_seg;
+
+ if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
+ return 1;
+ kvm_seg.type |= type_bits;
+
+ if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
+ seg != VCPU_SREG_LDTR)
+ if (!kvm_seg.s)
+ kvm_seg.unusable = 1;
+
+ set_segment(vcpu, &kvm_seg, seg);
+ return 0;
+}
+
+static void save_state_to_tss32(struct kvm_vcpu *vcpu,
+ struct tss_segment_32 *tss)
+{
+ tss->cr3 = vcpu->arch.cr3;
+ tss->eip = vcpu->arch.rip;
+ tss->eflags = kvm_x86_ops->get_rflags(vcpu);
+ tss->eax = vcpu->arch.regs[VCPU_REGS_RAX];
+ tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX];
+ tss->edx = vcpu->arch.regs[VCPU_REGS_RDX];
+ tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX];
+ tss->esp = vcpu->arch.regs[VCPU_REGS_RSP];
+ tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP];
+ tss->esi = vcpu->arch.regs[VCPU_REGS_RSI];
+ tss->edi = vcpu->arch.regs[VCPU_REGS_RDI];
+
+ tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
+ tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
+ tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
+ tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
+ tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
+ tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
+ tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
+ tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
+}
+
+static int load_state_from_tss32(struct kvm_vcpu *vcpu,
+ struct tss_segment_32 *tss)
+{
+ kvm_set_cr3(vcpu, tss->cr3);
+
+ vcpu->arch.rip = tss->eip;
+ kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
+
+ vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax;
+ vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx;
+ vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx;
+ vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx;
+ vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp;
+ vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp;
+ vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi;
+ vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi;
+
+ if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
+ return 1;
+ return 0;
+}
+
+static void save_state_to_tss16(struct kvm_vcpu *vcpu,
+ struct tss_segment_16 *tss)
+{
+ tss->ip = vcpu->arch.rip;
+ tss->flag = kvm_x86_ops->get_rflags(vcpu);
+ tss->ax = vcpu->arch.regs[VCPU_REGS_RAX];
+ tss->cx = vcpu->arch.regs[VCPU_REGS_RCX];
+ tss->dx = vcpu->arch.regs[VCPU_REGS_RDX];
+ tss->bx = vcpu->arch.regs[VCPU_REGS_RBX];
+ tss->sp = vcpu->arch.regs[VCPU_REGS_RSP];
+ tss->bp = vcpu->arch.regs[VCPU_REGS_RBP];
+ tss->si = vcpu->arch.regs[VCPU_REGS_RSI];
+ tss->di = vcpu->arch.regs[VCPU_REGS_RDI];
+
+ tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
+ tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
+ tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
+ tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
+ tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
+ tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
+}
+
+static int load_state_from_tss16(struct kvm_vcpu *vcpu,
+ struct tss_segment_16 *tss)
+{
+ vcpu->arch.rip = tss->ip;
+ kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
+ vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax;
+ vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx;
+ vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx;
+ vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx;
+ vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp;
+ vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp;
+ vcpu->arch.regs[VCPU_REGS_RSI] = tss->si;
+ vcpu->arch.regs[VCPU_REGS_RDI] = tss->di;
+
+ if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+ return 1;
+
+ if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+ return 1;
+ return 0;
+}
+
+int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
+ struct desc_struct *cseg_desc,
+ struct desc_struct *nseg_desc)
+{
+ struct tss_segment_16 tss_segment_16;
+ int ret = 0;
+
+ if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16))
+ goto out;
+
+ save_state_to_tss16(vcpu, &tss_segment_16);
+ save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
+
+ if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16))
+ goto out;
+ if (load_state_from_tss16(vcpu, &tss_segment_16))
+ goto out;
+
+ ret = 1;
+out:
+ return ret;
+}
+
+int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
+ struct desc_struct *cseg_desc,
+ struct desc_struct *nseg_desc)
+{
+ struct tss_segment_32 tss_segment_32;
+ int ret = 0;
+
+ if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32))
+ goto out;
+
+ save_state_to_tss32(vcpu, &tss_segment_32);
+ save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
+
+ if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32))
+ goto out;
+ if (load_state_from_tss32(vcpu, &tss_segment_32))
+ goto out;
+
+ ret = 1;
+out:
+ return ret;
}
+int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
+{
+ struct kvm_segment tr_seg;
+ struct desc_struct cseg_desc;
+ struct desc_struct nseg_desc;
+ int ret = 0;
+
+ get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
+
+ if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
+ goto out;
+
+ if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc))
+ goto out;
+
+
+ if (reason != TASK_SWITCH_IRET) {
+ int cpl;
+
+ cpl = kvm_x86_ops->get_cpl(vcpu);
+ if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
+ kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+ return 1;
+ }
+ }
+
+ if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
+ kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
+ return 1;
+ }
+
+ if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
+ cseg_desc.type &= ~(1 << 8); //clear the B flag
+ save_guest_segment_descriptor(vcpu, tr_seg.selector,
+ &cseg_desc);
+ }
+
+ if (reason == TASK_SWITCH_IRET) {
+ u32 eflags = kvm_x86_ops->get_rflags(vcpu);
+ kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
+ }
+
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+ kvm_x86_ops->cache_regs(vcpu);
+
+ if (nseg_desc.type & 8)
+ ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc,
+ &nseg_desc);
+ else
+ ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc,
+ &nseg_desc);
+
+ if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
+ u32 eflags = kvm_x86_ops->get_rflags(vcpu);
+ kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
+ }
+
+ if (reason != TASK_SWITCH_IRET) {
+ nseg_desc.type |= (1 << 8);
+ save_guest_segment_descriptor(vcpu, tss_selector,
+ &nseg_desc);
+ }
+
+ kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
+ seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
+ tr_seg.type = 11;
+ set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
+out:
+ kvm_x86_ops->decache_regs(vcpu);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_task_switch);
+
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
@@ -2862,12 +3542,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
vcpu->arch.cr3 = sregs->cr3;
- set_cr8(vcpu, sregs->cr8);
+ kvm_set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
-#ifdef CONFIG_X86_64
kvm_x86_ops->set_efer(vcpu, sregs->efer);
-#endif
kvm_set_apic_base(vcpu, sregs->apic_base);
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
@@ -3141,9 +3819,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.root_hpa = INVALID_PAGE;
if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
- vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
- vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED;
+ vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) {
@@ -3175,7 +3853,9 @@ fail:
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
kvm_free_lapic(vcpu);
+ down_read(&vcpu->kvm->slots_lock);
kvm_mmu_destroy(vcpu);
+ up_read(&vcpu->kvm->slots_lock);
free_page((unsigned long)vcpu->arch.pio_data);
}
@@ -3219,10 +3899,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
kvm_free_vcpus(kvm);
kvm_free_physmem(kvm);
+ if (kvm->arch.apic_access_page)
+ put_page(kvm->arch.apic_access_page);
kfree(kvm);
}
@@ -3278,8 +3961,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE
- || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED;
+ return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
+ || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
}
static void vcpu_kick_intr(void *info)
@@ -3293,11 +3976,17 @@ static void vcpu_kick_intr(void *info)
void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
{
int ipi_pcpu = vcpu->cpu;
+ int cpu = get_cpu();
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
++vcpu->stat.halt_wakeup;
}
- if (vcpu->guest_mode)
+ /*
+ * We may be called synchronously with irqs disabled in guest mode,
+ * So need not to call smp_call_function_single() in that case.
+ */
+ if (vcpu->guest_mode && vcpu->cpu != cpu)
smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+ put_cpu();
}
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 79586003397a..2ca08386f993 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -65,6 +65,14 @@
#define MemAbs (1<<9) /* Memory operand is absolute displacement */
#define String (1<<10) /* String instruction (rep capable) */
#define Stack (1<<11) /* Stack instruction (push/pop) */
+#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
+#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
+#define GroupMask 0xff /* Group number stored in bits 0:7 */
+
+enum {
+ Group1_80, Group1_81, Group1_82, Group1_83,
+ Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
+};
static u16 opcode_table[256] = {
/* 0x00 - 0x07 */
@@ -123,14 +131,14 @@ static u16 opcode_table[256] = {
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x80 - 0x87 */
- ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
- ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
+ Group | Group1_80, Group | Group1_81,
+ Group | Group1_82, Group | Group1_83,
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
/* 0x88 - 0x8F */
ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
- 0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov | Stack,
+ 0, ModRM | DstReg, 0, Group | Group1A,
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
@@ -164,16 +172,15 @@ static u16 opcode_table[256] = {
0, 0, 0, 0,
/* 0xF0 - 0xF7 */
0, 0, 0, 0,
- ImplicitOps, ImplicitOps,
- ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
+ ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
/* 0xF8 - 0xFF */
ImplicitOps, 0, ImplicitOps, ImplicitOps,
- 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
+ 0, 0, Group | Group4, Group | Group5,
};
static u16 twobyte_table[256] = {
/* 0x00 - 0x0F */
- 0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
+ 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
/* 0x10 - 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
@@ -229,6 +236,56 @@ static u16 twobyte_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
+static u16 group_table[] = {
+ [Group1_80*8] =
+ ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ [Group1_81*8] =
+ DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+ DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+ DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+ DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+ [Group1_82*8] =
+ ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ [Group1_83*8] =
+ DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+ DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+ DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+ DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+ [Group1A*8] =
+ DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
+ [Group3_Byte*8] =
+ ByteOp | SrcImm | DstMem | ModRM, 0,
+ ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
+ 0, 0, 0, 0,
+ [Group3*8] =
+ DstMem | SrcImm | ModRM | SrcImm, 0,
+ DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
+ 0, 0, 0, 0,
+ [Group4*8] =
+ ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
+ 0, 0, 0, 0, 0, 0,
+ [Group5*8] =
+ DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0,
+ SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0,
+ [Group7*8] =
+ 0, 0, ModRM | SrcMem, ModRM | SrcMem,
+ SrcNone | ModRM | DstMem | Mov, 0,
+ SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
+};
+
+static u16 group2_table[] = {
+ [Group7*8] =
+ SrcNone | ModRM, 0, 0, 0,
+ SrcNone | ModRM | DstMem | Mov, 0,
+ SrcMem16 | ModRM | Mov, 0,
+};
+
/* EFLAGS bit definitions. */
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
@@ -317,7 +374,7 @@ static u16 twobyte_table[256] = {
#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
do { \
- unsigned long _tmp; \
+ unsigned long __tmp; \
switch ((_dst).bytes) { \
case 1: \
__asm__ __volatile__ ( \
@@ -325,7 +382,7 @@ static u16 twobyte_table[256] = {
_op"b %"_bx"3,%1; " \
_POST_EFLAGS("0", "4", "2") \
: "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (_tmp) \
+ "=&r" (__tmp) \
: _by ((_src).val), "i" (EFLAGS_MASK)); \
break; \
default: \
@@ -426,29 +483,40 @@ static u16 twobyte_table[256] = {
(_type)_x; \
})
+static inline unsigned long ad_mask(struct decode_cache *c)
+{
+ return (1UL << (c->ad_bytes << 3)) - 1;
+}
+
/* Access/update address held in a register, based on addressing mode. */
-#define address_mask(reg) \
- ((c->ad_bytes == sizeof(unsigned long)) ? \
- (reg) : ((reg) & ((1UL << (c->ad_bytes << 3)) - 1)))
-#define register_address(base, reg) \
- ((base) + address_mask(reg))
-#define register_address_increment(reg, inc) \
- do { \
- /* signed type ensures sign extension to long */ \
- int _inc = (inc); \
- if (c->ad_bytes == sizeof(unsigned long)) \
- (reg) += _inc; \
- else \
- (reg) = ((reg) & \
- ~((1UL << (c->ad_bytes << 3)) - 1)) | \
- (((reg) + _inc) & \
- ((1UL << (c->ad_bytes << 3)) - 1)); \
- } while (0)
+static inline unsigned long
+address_mask(struct decode_cache *c, unsigned long reg)
+{
+ if (c->ad_bytes == sizeof(unsigned long))
+ return reg;
+ else
+ return reg & ad_mask(c);
+}
-#define JMP_REL(rel) \
- do { \
- register_address_increment(c->eip, rel); \
- } while (0)
+static inline unsigned long
+register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
+{
+ return base + address_mask(c, reg);
+}
+
+static inline void
+register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
+{
+ if (c->ad_bytes == sizeof(unsigned long))
+ *reg += inc;
+ else
+ *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
+}
+
+static inline void jmp_rel(struct decode_cache *c, int rel)
+{
+ register_address_increment(c, &c->eip, rel);
+}
static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
@@ -763,7 +831,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
struct decode_cache *c = &ctxt->decode;
int rc = 0;
int mode = ctxt->mode;
- int def_op_bytes, def_ad_bytes;
+ int def_op_bytes, def_ad_bytes, group;
/* Shadow copy of register state. Committed on successful emulation. */
@@ -864,12 +932,24 @@ done_prefixes:
c->b = insn_fetch(u8, 1, c->eip);
c->d = twobyte_table[c->b];
}
+ }
- /* Unrecognised? */
- if (c->d == 0) {
- DPRINTF("Cannot emulate %02x\n", c->b);
- return -1;
- }
+ if (c->d & Group) {
+ group = c->d & GroupMask;
+ c->modrm = insn_fetch(u8, 1, c->eip);
+ --c->eip;
+
+ group = (group << 3) + ((c->modrm >> 3) & 7);
+ if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
+ c->d = group2_table[group];
+ else
+ c->d = group_table[group];
+ }
+
+ /* Unrecognised? */
+ if (c->d == 0) {
+ DPRINTF("Cannot emulate %02x\n", c->b);
+ return -1;
}
if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
@@ -924,6 +1004,7 @@ done_prefixes:
*/
if ((c->d & ModRM) && c->modrm_mod == 3) {
c->src.type = OP_REG;
+ c->src.val = c->modrm_val;
break;
}
c->src.type = OP_MEM;
@@ -967,6 +1048,7 @@ done_prefixes:
case DstMem:
if ((c->d & ModRM) && c->modrm_mod == 3) {
c->dst.type = OP_REG;
+ c->dst.val = c->dst.orig_val = c->modrm_val;
break;
}
c->dst.type = OP_MEM;
@@ -984,8 +1066,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
c->dst.type = OP_MEM;
c->dst.bytes = c->op_bytes;
c->dst.val = c->src.val;
- register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes);
- c->dst.ptr = (void *) register_address(ctxt->ss_base,
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
+ c->dst.ptr = (void *) register_address(c, ctxt->ss_base,
c->regs[VCPU_REGS_RSP]);
}
@@ -995,13 +1077,13 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct decode_cache *c = &ctxt->decode;
int rc;
- rc = ops->read_std(register_address(ctxt->ss_base,
+ rc = ops->read_std(register_address(c, ctxt->ss_base,
c->regs[VCPU_REGS_RSP]),
&c->dst.val, c->dst.bytes, ctxt->vcpu);
if (rc != 0)
return rc;
- register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes);
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
return 0;
}
@@ -1043,26 +1125,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
switch (c->modrm_reg) {
case 0 ... 1: /* test */
- /*
- * Special case in Grp3: test has an immediate
- * source operand.
- */
- c->src.type = OP_IMM;
- c->src.ptr = (unsigned long *)c->eip;
- c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- if (c->src.bytes == 8)
- c->src.bytes = 4;
- switch (c->src.bytes) {
- case 1:
- c->src.val = insn_fetch(s8, 1, c->eip);
- break;
- case 2:
- c->src.val = insn_fetch(s16, 2, c->eip);
- break;
- case 4:
- c->src.val = insn_fetch(s32, 4, c->eip);
- break;
- }
emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
break;
case 2: /* not */
@@ -1076,7 +1138,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
rc = X86EMUL_UNHANDLEABLE;
break;
}
-done:
return rc;
}
@@ -1084,7 +1145,6 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
- int rc;
switch (c->modrm_reg) {
case 0: /* inc */
@@ -1094,36 +1154,11 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
emulate_1op("dec", c->dst, ctxt->eflags);
break;
case 4: /* jmp abs */
- if (c->b == 0xff)
- c->eip = c->dst.val;
- else {
- DPRINTF("Cannot emulate %02x\n", c->b);
- return X86EMUL_UNHANDLEABLE;
- }
+ c->eip = c->src.val;
break;
case 6: /* push */
-
- /* 64-bit mode: PUSH always pushes a 64-bit operand. */
-
- if (ctxt->mode == X86EMUL_MODE_PROT64) {
- c->dst.bytes = 8;
- rc = ops->read_std((unsigned long)c->dst.ptr,
- &c->dst.val, 8, ctxt->vcpu);
- if (rc != 0)
- return rc;
- }
- register_address_increment(c->regs[VCPU_REGS_RSP],
- -c->dst.bytes);
- rc = ops->write_emulated(register_address(ctxt->ss_base,
- c->regs[VCPU_REGS_RSP]), &c->dst.val,
- c->dst.bytes, ctxt->vcpu);
- if (rc != 0)
- return rc;
- c->dst.type = OP_NONE;
+ emulate_push(ctxt);
break;
- default:
- DPRINTF("Cannot emulate %02x\n", c->b);
- return X86EMUL_UNHANDLEABLE;
}
return 0;
}
@@ -1361,19 +1396,19 @@ special_insn:
c->dst.type = OP_MEM;
c->dst.bytes = c->op_bytes;
c->dst.val = c->src.val;
- register_address_increment(c->regs[VCPU_REGS_RSP],
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP],
-c->op_bytes);
c->dst.ptr = (void *) register_address(
- ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
+ c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
break;
case 0x58 ... 0x5f: /* pop reg */
pop_instruction:
- if ((rc = ops->read_std(register_address(ctxt->ss_base,
+ if ((rc = ops->read_std(register_address(c, ctxt->ss_base,
c->regs[VCPU_REGS_RSP]), c->dst.ptr,
c->op_bytes, ctxt->vcpu)) != 0)
goto done;
- register_address_increment(c->regs[VCPU_REGS_RSP],
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP],
c->op_bytes);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
@@ -1393,9 +1428,9 @@ special_insn:
1,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
- address_mask(c->regs[VCPU_REGS_RCX]) : 1,
+ address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
(ctxt->eflags & EFLG_DF),
- register_address(ctxt->es_base,
+ register_address(c, ctxt->es_base,
c->regs[VCPU_REGS_RDI]),
c->rep_prefix,
c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1409,9 +1444,9 @@ special_insn:
0,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
- address_mask(c->regs[VCPU_REGS_RCX]) : 1,
+ address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
(ctxt->eflags & EFLG_DF),
- register_address(c->override_base ?
+ register_address(c, c->override_base ?
*c->override_base :
ctxt->ds_base,
c->regs[VCPU_REGS_RSI]),
@@ -1425,7 +1460,7 @@ special_insn:
int rel = insn_fetch(s8, 1, c->eip);
if (test_cc(c->b, ctxt->eflags))
- JMP_REL(rel);
+ jmp_rel(c, rel);
break;
}
case 0x80 ... 0x83: /* Grp1 */
@@ -1477,7 +1512,7 @@ special_insn:
case 0x88 ... 0x8b: /* mov */
goto mov;
case 0x8d: /* lea r16/r32, m */
- c->dst.val = c->modrm_val;
+ c->dst.val = c->modrm_ea;
break;
case 0x8f: /* pop (sole member of Grp1a) */
rc = emulate_grp1a(ctxt, ops);
@@ -1501,27 +1536,27 @@ special_insn:
case 0xa4 ... 0xa5: /* movs */
c->dst.type = OP_MEM;
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->dst.ptr = (unsigned long *)register_address(
+ c->dst.ptr = (unsigned long *)register_address(c,
ctxt->es_base,
c->regs[VCPU_REGS_RDI]);
- if ((rc = ops->read_emulated(register_address(
+ if ((rc = ops->read_emulated(register_address(c,
c->override_base ? *c->override_base :
ctxt->ds_base,
c->regs[VCPU_REGS_RSI]),
&c->dst.val,
c->dst.bytes, ctxt->vcpu)) != 0)
goto done;
- register_address_increment(c->regs[VCPU_REGS_RSI],
+ register_address_increment(c, &c->regs[VCPU_REGS_RSI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
: c->dst.bytes);
- register_address_increment(c->regs[VCPU_REGS_RDI],
+ register_address_increment(c, &c->regs[VCPU_REGS_RDI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
: c->dst.bytes);
break;
case 0xa6 ... 0xa7: /* cmps */
c->src.type = OP_NONE; /* Disable writeback. */
c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->src.ptr = (unsigned long *)register_address(
+ c->src.ptr = (unsigned long *)register_address(c,
c->override_base ? *c->override_base :
ctxt->ds_base,
c->regs[VCPU_REGS_RSI]);
@@ -1533,7 +1568,7 @@ special_insn:
c->dst.type = OP_NONE; /* Disable writeback. */
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->dst.ptr = (unsigned long *)register_address(
+ c->dst.ptr = (unsigned long *)register_address(c,
ctxt->es_base,
c->regs[VCPU_REGS_RDI]);
if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
@@ -1546,10 +1581,10 @@ special_insn:
emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
- register_address_increment(c->regs[VCPU_REGS_RSI],
+ register_address_increment(c, &c->regs[VCPU_REGS_RSI],
(ctxt->eflags & EFLG_DF) ? -c->src.bytes
: c->src.bytes);
- register_address_increment(c->regs[VCPU_REGS_RDI],
+ register_address_increment(c, &c->regs[VCPU_REGS_RDI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
: c->dst.bytes);
@@ -1557,11 +1592,11 @@ special_insn:
case 0xaa ... 0xab: /* stos */
c->dst.type = OP_MEM;
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
- c->dst.ptr = (unsigned long *)register_address(
+ c->dst.ptr = (unsigned long *)register_address(c,
ctxt->es_base,
c->regs[VCPU_REGS_RDI]);
c->dst.val = c->regs[VCPU_REGS_RAX];
- register_address_increment(c->regs[VCPU_REGS_RDI],
+ register_address_increment(c, &c->regs[VCPU_REGS_RDI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
: c->dst.bytes);
break;
@@ -1569,7 +1604,7 @@ special_insn:
c->dst.type = OP_REG;
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
- if ((rc = ops->read_emulated(register_address(
+ if ((rc = ops->read_emulated(register_address(c,
c->override_base ? *c->override_base :
ctxt->ds_base,
c->regs[VCPU_REGS_RSI]),
@@ -1577,7 +1612,7 @@ special_insn:
c->dst.bytes,
ctxt->vcpu)) != 0)
goto done;
- register_address_increment(c->regs[VCPU_REGS_RSI],
+ register_address_increment(c, &c->regs[VCPU_REGS_RSI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
: c->dst.bytes);
break;
@@ -1616,14 +1651,14 @@ special_insn:
goto cannot_emulate;
}
c->src.val = (unsigned long) c->eip;
- JMP_REL(rel);
+ jmp_rel(c, rel);
c->op_bytes = c->ad_bytes;
emulate_push(ctxt);
break;
}
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
- JMP_REL(c->src.val);
+ jmp_rel(c, c->src.val);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xf4: /* hlt */
@@ -1690,6 +1725,8 @@ twobyte_insn:
goto done;
kvm_emulate_hypercall(ctxt->vcpu);
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
break;
case 2: /* lgdt */
rc = read_descriptor(ctxt, ops, c->src.ptr,
@@ -1697,6 +1734,8 @@ twobyte_insn:
if (rc)
goto done;
realmode_lgdt(ctxt->vcpu, size, address);
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
break;
case 3: /* lidt/vmmcall */
if (c->modrm_mod == 3 && c->modrm_rm == 1) {
@@ -1712,27 +1751,25 @@ twobyte_insn:
goto done;
realmode_lidt(ctxt->vcpu, size, address);
}
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
break;
case 4: /* smsw */
- if (c->modrm_mod != 3)
- goto cannot_emulate;
- *(u16 *)&c->regs[c->modrm_rm]
- = realmode_get_cr(ctxt->vcpu, 0);
+ c->dst.bytes = 2;
+ c->dst.val = realmode_get_cr(ctxt->vcpu, 0);
break;
case 6: /* lmsw */
- if (c->modrm_mod != 3)
- goto cannot_emulate;
- realmode_lmsw(ctxt->vcpu, (u16)c->modrm_val,
- &ctxt->eflags);
+ realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
+ &ctxt->eflags);
break;
case 7: /* invlpg*/
emulate_invlpg(ctxt->vcpu, memop);
+ /* Disable writeback. */
+ c->dst.type = OP_NONE;
break;
default:
goto cannot_emulate;
}
- /* Disable writeback. */
- c->dst.type = OP_NONE;
break;
case 0x06:
emulate_clts(ctxt->vcpu);
@@ -1823,7 +1860,7 @@ twobyte_insn:
goto cannot_emulate;
}
if (test_cc(c->b, ctxt->eflags))
- JMP_REL(rel);
+ jmp_rel(c, rel);
c->dst.type = OP_NONE;
break;
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4a4761892951..de236e419cb5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -287,47 +287,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
-static void __meminit free_new_highpage(struct page *page)
-{
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
-}
-
void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
{
if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
ClearPageReserved(page);
- free_new_highpage(page);
+ init_page_count(page);
+ __free_page(page);
+ totalhigh_pages++;
} else
SetPageReserved(page);
}
-static int __meminit
-add_one_highpage_hotplug(struct page *page, unsigned long pfn)
-{
- free_new_highpage(page);
- totalram_pages++;
-#ifdef CONFIG_FLATMEM
- max_mapnr = max(pfn, max_mapnr);
-#endif
- num_physpages++;
-
- return 0;
-}
-
-/*
- * Not currently handling the NUMA case.
- * Assuming single node and all memory that
- * has been added dynamically that would be
- * onlined here is in HIGHMEM.
- */
-void __meminit online_page(struct page *page)
-{
- ClearPageReserved(page);
- add_one_highpage_hotplug(page, page_to_pfn(page));
-}
-
#ifndef CONFIG_NUMA
static void __init set_highmem_pages_init(int bad_ppro)
{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 5fbb8652cf59..32ba13b0f818 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -620,15 +620,6 @@ void __init paging_init(void)
/*
* Memory hotplug specific functions
*/
-void online_page(struct page *page)
-{
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalram_pages++;
- num_physpages++;
-}
-
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Memory is added always to NORMAL zone. This means you will never get
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d176b23110cc..804de18abcc2 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -117,8 +117,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
* have to convert them into an offset in a page-aligned mapping, but the
* caller shouldn't need to know that small detail.
*/
-static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
- unsigned long prot_val)
+static void __iomem *__ioremap_caller(resource_size_t phys_addr,
+ unsigned long size, unsigned long prot_val, void *caller)
{
unsigned long pfn, offset, vaddr;
resource_size_t last_addr;
@@ -212,7 +212,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
/*
* Ok, go for it..
*/
- area = get_vm_area(size, VM_IOREMAP);
+ area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (!area)
return NULL;
area->phys_addr = phys_addr;
@@ -255,7 +255,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
*/
void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
{
- return __ioremap(phys_addr, size, _PAGE_CACHE_UC);
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_UC,
+ __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_nocache);
@@ -272,7 +273,8 @@ EXPORT_SYMBOL(ioremap_nocache);
void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
{
if (pat_wc_enabled)
- return __ioremap(phys_addr, size, _PAGE_CACHE_WC);
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
+ __builtin_return_address(0));
else
return ioremap_nocache(phys_addr, size);
}
@@ -280,7 +282,8 @@ EXPORT_SYMBOL(ioremap_wc);
void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
{
- return __ioremap(phys_addr, size, _PAGE_CACHE_WB);
+ return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
+ __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_cache);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index e7ca7fc48d12..277446cd30b6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -387,8 +387,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
break;
}
- printk(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
- saved_ptr->start, saved_ptr->end);
+ pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
+ saved_ptr->start, saved_ptr->end);
/* No conflict. Go ahead and add this new entry */
list_add(&new_entry->nd, &saved_ptr->nd);
new_entry = NULL;
@@ -510,7 +510,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
{
u64 offset = ((u64) pfn) << PAGE_SHIFT;
unsigned long flags = _PAGE_CACHE_UC_MINUS;
- unsigned long ret_flags;
int retval;
if (!range_is_allowed(pfn, size))
@@ -549,14 +548,12 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
if (flags != _PAGE_CACHE_UC_MINUS) {
retval = reserve_memtype(offset, offset + size, flags, NULL);
} else {
- retval = reserve_memtype(offset, offset + size, -1, &ret_flags);
+ retval = reserve_memtype(offset, offset + size, -1, &flags);
}
if (retval < 0)
return 0;
- flags = ret_flags;
-
if (pfn <= max_pfn_mapped &&
ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
free_memtype(offset, offset + size);
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
index e9c5caf54e59..2a1516efb542 100644
--- a/arch/x86/pci/Makefile_32
+++ b/arch/x86/pci/Makefile_32
@@ -3,6 +3,7 @@ obj-y := i386.o init.o
obj-$(CONFIG_PCI_BIOS) += pcbios.o
obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o direct.o mmconfig-shared.o
obj-$(CONFIG_PCI_DIRECT) += direct.o
+obj-$(CONFIG_PCI_OLPC) += olpc.o
pci-y := fixup.o
pci-$(CONFIG_ACPI) += acpi.o
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index 343c36337e69..dd30c6076b5d 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -11,8 +11,12 @@ static __init int pci_access_init(void)
type = pci_direct_probe();
#endif
+
pci_mmcfg_early_init();
+#ifdef CONFIG_PCI_OLPC
+ pci_olpc_init();
+#endif
#ifdef CONFIG_PCI_BIOS
pci_pcbios_init();
#endif
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
new file mode 100644
index 000000000000..5e7636558c02
--- /dev/null
+++ b/arch/x86/pci/olpc.c
@@ -0,0 +1,313 @@
+/*
+ * Low-level PCI config space access for OLPC systems who lack the VSA
+ * PCI virtualization software.
+ *
+ * Copyright © 2006 Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * The AMD Geode chipset (ie: GX2 processor, cs5536 I/O companion device)
+ * has some I/O functions (display, southbridge, sound, USB HCIs, etc)
+ * that more or less behave like PCI devices, but the hardware doesn't
+ * directly implement the PCI configuration space headers. AMD provides
+ * "VSA" (Virtual System Architecture) software that emulates PCI config
+ * space for these devices, by trapping I/O accesses to PCI config register
+ * (CF8/CFC) and running some code in System Management Mode interrupt state.
+ * On the OLPC platform, we don't want to use that VSA code because
+ * (a) it slows down suspend/resume, and (b) recompiling it requires special
+ * compilers that are hard to get. So instead of letting the complex VSA
+ * code simulate the PCI config registers for the on-chip devices, we
+ * just simulate them the easy way, by inserting the code into the
+ * pci_write_config and pci_read_config path. Most of the config registers
+ * are read-only anyway, so the bulk of the simulation is just table lookup.
+ */
+
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <asm/olpc.h>
+#include <asm/geode.h>
+#include "pci.h"
+
+/*
+ * In the tables below, the first two line (8 longwords) are the
+ * size masks that are used when the higher level PCI code determines
+ * the size of the region by writing ~0 to a base address register
+ * and reading back the result.
+ *
+ * The following lines are the values that are read during normal
+ * PCI config access cycles, i.e. not after just having written
+ * ~0 to a base address register.
+ */
+
+static const uint32_t lxnb_hdr[] = { /* dev 1 function 0 - devfn = 8 */
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+
+ 0x281022, 0x2200005, 0x6000021, 0x80f808, /* AMD Vendor ID */
+ 0x0, 0x0, 0x0, 0x0, /* No virtual registers, hence no BAR */
+ 0x0, 0x0, 0x0, 0x28100b,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+};
+
+static const uint32_t gxnb_hdr[] = { /* dev 1 function 0 - devfn = 8 */
+ 0xfffffffd, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+
+ 0x28100b, 0x2200005, 0x6000021, 0x80f808, /* NSC Vendor ID */
+ 0xac1d, 0x0, 0x0, 0x0, /* I/O BAR - base of virtual registers */
+ 0x0, 0x0, 0x0, 0x28100b,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+};
+
+static const uint32_t lxfb_hdr[] = { /* dev 1 function 1 - devfn = 9 */
+ 0xff000008, 0xffffc000, 0xffffc000, 0xffffc000,
+ 0xffffc000, 0x0, 0x0, 0x0,
+
+ 0x20811022, 0x2200003, 0x3000000, 0x0, /* AMD Vendor ID */
+ 0xfd000000, 0xfe000000, 0xfe004000, 0xfe008000, /* FB, GP, VG, DF */
+ 0xfe00c000, 0x0, 0x0, 0x30100b, /* VIP */
+ 0x0, 0x0, 0x0, 0x10e, /* INTA, IRQ14 for graphics accel */
+ 0x0, 0x0, 0x0, 0x0,
+ 0x3d0, 0x3c0, 0xa0000, 0x0, /* VG IO, VG IO, EGA FB, MONO FB */
+ 0x0, 0x0, 0x0, 0x0,
+};
+
+static const uint32_t gxfb_hdr[] = { /* dev 1 function 1 - devfn = 9 */
+ 0xff800008, 0xffffc000, 0xffffc000, 0xffffc000,
+ 0x0, 0x0, 0x0, 0x0,
+
+ 0x30100b, 0x2200003, 0x3000000, 0x0, /* NSC Vendor ID */
+ 0xfd000000, 0xfe000000, 0xfe004000, 0xfe008000, /* FB, GP, VG, DF */
+ 0x0, 0x0, 0x0, 0x30100b,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x3d0, 0x3c0, 0xa0000, 0x0, /* VG IO, VG IO, EGA FB, MONO FB */
+ 0x0, 0x0, 0x0, 0x0,
+};
+
+static const uint32_t aes_hdr[] = { /* dev 1 function 2 - devfn = 0xa */
+ 0xffffc000, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+
+ 0x20821022, 0x2a00006, 0x10100000, 0x8, /* NSC Vendor ID */
+ 0xfe010000, 0x0, 0x0, 0x0, /* AES registers */
+ 0x0, 0x0, 0x0, 0x20821022,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+};
+
+
+static const uint32_t isa_hdr[] = { /* dev f function 0 - devfn = 78 */
+ 0xfffffff9, 0xffffff01, 0xffffffc1, 0xffffffe1,
+ 0xffffff81, 0xffffffc1, 0x0, 0x0,
+
+ 0x20901022, 0x2a00049, 0x6010003, 0x802000,
+ 0x18b1, 0x1001, 0x1801, 0x1881, /* SMB-8 GPIO-256 MFGPT-64 IRQ-32 */
+ 0x1401, 0x1841, 0x0, 0x20901022, /* PMS-128 ACPI-64 */
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0xaa5b, /* IRQ steering */
+ 0x0, 0x0, 0x0, 0x0,
+};
+
+static const uint32_t ac97_hdr[] = { /* dev f function 3 - devfn = 7b */
+ 0xffffff81, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+
+ 0x20931022, 0x2a00041, 0x4010001, 0x0,
+ 0x1481, 0x0, 0x0, 0x0, /* I/O BAR-128 */
+ 0x0, 0x0, 0x0, 0x20931022,
+ 0x0, 0x0, 0x0, 0x205, /* IntB, IRQ5 */
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+};
+
+static const uint32_t ohci_hdr[] = { /* dev f function 4 - devfn = 7c */
+ 0xfffff000, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+
+ 0x20941022, 0x2300006, 0xc031002, 0x0,
+ 0xfe01a000, 0x0, 0x0, 0x0, /* MEMBAR-1000 */
+ 0x0, 0x0, 0x0, 0x20941022,
+ 0x0, 0x40, 0x0, 0x40a, /* CapPtr INT-D, IRQA */
+ 0xc8020001, 0x0, 0x0, 0x0, /* Capabilities - 40 is R/O,
+ 44 is mask 8103 (power control) */
+ 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+};
+
+static const uint32_t ehci_hdr[] = { /* dev f function 4 - devfn = 7d */
+ 0xfffff000, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0,
+
+ 0x20951022, 0x2300006, 0xc032002, 0x0,
+ 0xfe01b000, 0x0, 0x0, 0x0, /* MEMBAR-1000 */
+ 0x0, 0x0, 0x0, 0x20951022,
+ 0x0, 0x40, 0x0, 0x40a, /* CapPtr INT-D, IRQA */
+ 0xc8020001, 0x0, 0x0, 0x0, /* Capabilities - 40 is R/O, 44 is
+ mask 8103 (power control) */
+#if 0
+ 0x1, 0x40080000, 0x0, 0x0, /* EECP - see EHCI spec section 2.1.7 */
+#endif
+ 0x01000001, 0x0, 0x0, 0x0, /* EECP - see EHCI spec section 2.1.7 */
+ 0x2020, 0x0, 0x0, 0x0, /* (EHCI page 8) 60 SBRN (R/O),
+ 61 FLADJ (R/W), PORTWAKECAP */
+};
+
+static uint32_t ff_loc = ~0;
+static uint32_t zero_loc;
+static int bar_probing; /* Set after a write of ~0 to a BAR */
+static int is_lx;
+
+#define NB_SLOT 0x1 /* Northbridge - GX chip - Device 1 */
+#define SB_SLOT 0xf /* Southbridge - CS5536 chip - Device F */
+
+static int is_simulated(unsigned int bus, unsigned int devfn)
+{
+ return (!bus && ((PCI_SLOT(devfn) == NB_SLOT) ||
+ (PCI_SLOT(devfn) == SB_SLOT)));
+}
+
+static uint32_t *hdr_addr(const uint32_t *hdr, int reg)
+{
+ uint32_t addr;
+
+ /*
+ * This is a little bit tricky. The header maps consist of
+ * 0x20 bytes of size masks, followed by 0x70 bytes of header data.
+ * In the normal case, when not probing a BAR's size, we want
+ * to access the header data, so we add 0x20 to the reg offset,
+ * thus skipping the size mask area.
+ * In the BAR probing case, we want to access the size mask for
+ * the BAR, so we subtract 0x10 (the config header offset for
+ * BAR0), and don't skip the size mask area.
+ */
+
+ addr = (uint32_t)hdr + reg + (bar_probing ? -0x10 : 0x20);
+
+ bar_probing = 0;
+ return (uint32_t *)addr;
+}
+
+static int pci_olpc_read(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, uint32_t *value)
+{
+ uint32_t *addr;
+
+ /* Use the hardware mechanism for non-simulated devices */
+ if (!is_simulated(bus, devfn))
+ return pci_direct_conf1.read(seg, bus, devfn, reg, len, value);
+
+ /*
+ * No device has config registers past 0x70, so we save table space
+ * by not storing entries for the nonexistent registers
+ */
+ if (reg >= 0x70)
+ addr = &zero_loc;
+ else {
+ switch (devfn) {
+ case 0x8:
+ addr = hdr_addr(is_lx ? lxnb_hdr : gxnb_hdr, reg);
+ break;
+ case 0x9:
+ addr = hdr_addr(is_lx ? lxfb_hdr : gxfb_hdr, reg);
+ break;
+ case 0xa:
+ addr = is_lx ? hdr_addr(aes_hdr, reg) : &ff_loc;
+ break;
+ case 0x78:
+ addr = hdr_addr(isa_hdr, reg);
+ break;
+ case 0x7b:
+ addr = hdr_addr(ac97_hdr, reg);
+ break;
+ case 0x7c:
+ addr = hdr_addr(ohci_hdr, reg);
+ break;
+ case 0x7d:
+ addr = hdr_addr(ehci_hdr, reg);
+ break;
+ default:
+ addr = &ff_loc;
+ break;
+ }
+ }
+ switch (len) {
+ case 1:
+ *value = *(uint8_t *)addr;
+ break;
+ case 2:
+ *value = *(uint16_t *)addr;
+ break;
+ case 4:
+ *value = *addr;
+ break;
+ default:
+ BUG();
+ }
+
+ return 0;
+}
+
+static int pci_olpc_write(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, uint32_t value)
+{
+ /* Use the hardware mechanism for non-simulated devices */
+ if (!is_simulated(bus, devfn))
+ return pci_direct_conf1.write(seg, bus, devfn, reg, len, value);
+
+ /* XXX we may want to extend this to simulate EHCI power management */
+
+ /*
+ * Mostly we just discard writes, but if the write is a size probe
+ * (i.e. writing ~0 to a BAR), we remember it and arrange to return
+ * the appropriate size mask on the next read. This is cheating
+ * to some extent, because it depends on the fact that the next
+ * access after such a write will always be a read to the same BAR.
+ */
+
+ if ((reg >= 0x10) && (reg < 0x2c)) {
+ /* write is to a BAR */
+ if (value == ~0)
+ bar_probing = 1;
+ } else {
+ /*
+ * No warning on writes to ROM BAR, CMD, LATENCY_TIMER,
+ * CACHE_LINE_SIZE, or PM registers.
+ */
+ if ((reg != PCI_ROM_ADDRESS) && (reg != PCI_COMMAND_MASTER) &&
+ (reg != PCI_LATENCY_TIMER) &&
+ (reg != PCI_CACHE_LINE_SIZE) && (reg != 0x44))
+ printk(KERN_WARNING "OLPC PCI: Config write to devfn"
+ " %x reg %x value %x\n", devfn, reg, value);
+ }
+
+ return 0;
+}
+
+static struct pci_raw_ops pci_olpc_conf = {
+ .read = pci_olpc_read,
+ .write = pci_olpc_write,
+};
+
+void __init pci_olpc_init(void)
+{
+ if (!machine_is_olpc() || olpc_has_vsa())
+ return;
+
+ printk(KERN_INFO "PCI: Using configuration type OLPC\n");
+ raw_pci_ops = &pci_olpc_conf;
+ is_lx = is_geode_lx();
+}
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index 8ef86b5c37c8..c58805a92db5 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -98,6 +98,7 @@ extern struct pci_raw_ops pci_direct_conf1;
extern int pci_direct_probe(void);
extern void pci_direct_init(int type);
extern void pci_pcbios_init(void);
+extern void pci_olpc_init(void);
/* pci-mmconfig.c */
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 4b1620a1529e..1d3aa6b87181 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,2 +1,10 @@
- .section ".vdso","a"
+#include <linux/init.h>
+
+__INITDATA
+
+ .globl vdso_start, vdso_end
+vdso_start:
.incbin "arch/x86/vdso/vdso.so"
+vdso_end:
+
+__FINIT
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6cbcf65609ad..126766d43aea 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -387,7 +387,7 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
static int pin_page(struct page *page, enum pt_level level)
{
- unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
+ unsigned pgfl = TestSetPagePinned(page);
int flush;
if (pgfl)
@@ -468,7 +468,7 @@ void __init xen_mark_init_mm_pinned(void)
static int unpin_page(struct page *page, enum pt_level level)
{
- unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
+ unsigned pgfl = TestClearPagePinned(page);
if (pgfl && !PageHighMem(page)) {
void *pt = lowmem_page_address(page);
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index ef63adadf7f4..070ff8af3a21 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -19,12 +19,11 @@
#include <linux/thread_info.h>
#include <linux/ptrace.h>
#include <linux/mm.h>
+#include <linux/kbuild.h>
#include <asm/ptrace.h>
#include <asm/uaccess.h>
-#define DEFINE(sym, val) asm volatile("\n->" #sym " %0 " #val : : "i" (val))
-
int main(void)
{
/* struct pt_regs */