diff options
Diffstat (limited to 'arch/powerpc')
163 files changed, 9141 insertions, 1315 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fae42da7468d..2cdc35ce8045 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -37,6 +37,10 @@ config RWSEM_XCHGADD_ALGORITHM bool default y +config GENERIC_HWEIGHT + bool + default y + config GENERIC_CALIBRATE_DELAY bool default y @@ -487,7 +491,7 @@ config PPC601_SYNC_FIX If in doubt, say Y here. config TAU - bool "Thermal Management Support" + bool "On-chip CPU temperature sensor support" depends on 6xx help G3 and G4 processors have an on-chip temperature sensor called the @@ -496,7 +500,7 @@ config TAU on-die temperature in /proc/cpuinfo if the cpu supports it. Unfortunately, on some chip revisions, this sensor is very inaccurate - and in some cases, does not work at all, so don't assume the cpu + and in many cases, does not work at all, so don't assume the cpu temp is actually what /proc/cpuinfo says it is. config TAU_INT diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 9254806f7032..8d48e9e7162a 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -110,11 +110,6 @@ config SERIAL_TEXT_DEBUG depends on 4xx || LOPEC || MV64X60 || PPLUS || PRPMC800 || \ PPC_GEN550 || PPC_MPC52xx -config PPC_OCP - bool - depends on IBM_OCP || XILINX_OCP - default y - choice prompt "Early debugging (dangerous)" bool diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a3fc7a23158f..6ec84d37a337 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -129,13 +129,8 @@ core-y += arch/powerpc/kernel/ \ arch/powerpc/lib/ \ arch/powerpc/sysdev/ \ arch/powerpc/platforms/ -core-$(CONFIG_PPC32) += arch/ppc/kernel/ -core-$(CONFIG_MATH_EMULATION) += arch/ppc/math-emu/ +core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ core-$(CONFIG_XMON) += arch/powerpc/xmon/ -core-$(CONFIG_APUS) += arch/ppc/amiga/ -drivers-$(CONFIG_8xx) += arch/ppc/8xx_io/ -drivers-$(CONFIG_4xx) += arch/ppc/4xx_io/ -drivers-$(CONFIG_CPM2) += arch/ppc/8260_io/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ @@ -150,7 +145,7 @@ CPPFLAGS_vmlinux.lds := -Upowerpc BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm uImage vmlinux.bin -.PHONY: $(BOOT_TARGETS) +PHONY += $(BOOT_TARGETS) boot := arch/$(ARCH)/boot diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 3c2acab63736..fe22e54ab2b0 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc6 -# Wed Mar 15 16:19:48 2006 +# Linux kernel version: 2.6.16 +# Thu Mar 23 20:48:09 2006 # CONFIG_PPC64=y CONFIG_64BIT=y @@ -30,6 +30,7 @@ CONFIG_POWER4=y CONFIG_PPC_FPU=y CONFIG_ALTIVEC=y CONFIG_PPC_STD_MMU=y +CONFIG_VIRT_CPU_ACCOUNTING=y CONFIG_SMP=y CONFIG_NR_CPUS=4 @@ -51,7 +52,8 @@ CONFIG_SYSVIPC=y # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y # CONFIG_AUDIT is not set -# CONFIG_IKCONFIG is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set CONFIG_INITRAMFS_SOURCE="" CONFIG_CC_OPTIMIZE_FOR_SIZE=y @@ -85,7 +87,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set +CONFIG_KMOD=y CONFIG_STOP_MACHINE=y # @@ -130,7 +132,8 @@ CONFIG_CELL_IIC=y # # Cell Broadband Engine options # -CONFIG_SPU_FS=y +CONFIG_SPU_FS=m +CONFIG_SPUFS_MMAP=y # # Kernel options @@ -144,7 +147,7 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT is not set CONFIG_PREEMPT_BKL=y CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set +CONFIG_BINFMT_MISC=m CONFIG_FORCE_MAX_ZONEORDER=13 # CONFIG_IOMMU_VMERGE is not set CONFIG_KEXEC=y @@ -155,13 +158,16 @@ CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_FLATMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y +# CONFIG_FLATMEM_MANUAL is not set # CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y # CONFIG_SPARSEMEM_STATIC is not set +CONFIG_SPARSEMEM_EXTREME=y +# CONFIG_MEMORY_HOTPLUG is not set CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MIGRATION=y # CONFIG_PPC_64K_PAGES is not set CONFIG_SCHED_SMT=y CONFIG_PROC_DEVICETREE=y @@ -232,6 +238,7 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_IP_VS is not set CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m @@ -244,25 +251,7 @@ CONFIG_NETFILTER=y # Core Netfilter Configuration # # CONFIG_NETFILTER_NETLINK is not set -CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_NOTRACK=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -# CONFIG_NETFILTER_XT_MATCH_DCCP is not set -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_SCTP=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +# CONFIG_NETFILTER_XTABLES is not set # # IP: Netfilter Configuration @@ -278,51 +267,13 @@ CONFIG_IP_NF_IRC=m CONFIG_IP_NF_TFTP=m CONFIG_IP_NF_AMANDA=m # CONFIG_IP_NF_PPTP is not set +# CONFIG_IP_NF_H323 is not set CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_IPRANGE=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_RECENT=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m -CONFIG_IP_NF_MATCH_HASHLIMIT=m -CONFIG_IP_NF_MATCH_POLICY=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_SAME=m -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_NAT_TFTP=m -CONFIG_IP_NF_NAT_AMANDA=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m # # IPv6: Netfilter Configuration (EXPERIMENTAL) # # CONFIG_IP6_NF_QUEUE is not set -# CONFIG_IP6_NF_IPTABLES is not set # # DCCP Configuration (EXPERIMENTAL) @@ -355,7 +306,6 @@ CONFIG_IP_NF_ARP_MANGLE=m # QoS and/or fair queueing # # CONFIG_NET_SCHED is not set -CONFIG_NET_CLS_ROUTE=y # # Network testing @@ -408,7 +358,7 @@ CONFIG_FW_LOADER=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=y # CONFIG_BLK_DEV_CRYPTOLOOP is not set -CONFIG_BLK_DEV_NBD=y +# CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_SX8 is not set CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 @@ -484,7 +434,23 @@ CONFIG_IDEDMA_AUTO=y # # Multi-device support (RAID and LVM) # -# CONFIG_MD is not set +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +# CONFIG_MD_RAID10 is not set +# CONFIG_MD_RAID5 is not set +# CONFIG_MD_RAID6 is not set +# CONFIG_MD_MULTIPATH is not set +# CONFIG_MD_FAULTY is not set +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +# CONFIG_DM_MULTIPATH_EMC is not set # # Fusion MPT device support @@ -548,7 +514,7 @@ CONFIG_MII=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=m -# CONFIG_E1000_NAPI is not set +CONFIG_E1000_NAPI=y # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set @@ -560,7 +526,7 @@ CONFIG_SKGE=m # CONFIG_SK98LIN is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set -CONFIG_SPIDER_NET=y +CONFIG_SPIDER_NET=m # CONFIG_MV643XX_ETH is not set # @@ -678,6 +644,8 @@ CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set +CONFIG_HVC_DRIVER=y +CONFIG_HVC_RTAS=y # # IPMI @@ -694,14 +662,13 @@ CONFIG_WATCHDOG=y # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set -# CONFIG_WATCHDOG_RTAS is not set +CONFIG_WATCHDOG_RTAS=y # # PCI-based Watchdog Cards # # CONFIG_PCIPCWATCHDOG is not set # CONFIG_WDTPCI is not set -# CONFIG_RTC is not set CONFIG_GEN_RTC=y # CONFIG_GEN_RTC_X is not set # CONFIG_DTLK is not set @@ -833,6 +800,7 @@ CONFIG_DUMMY_CONSOLE=y # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y # CONFIG_USB is not set # @@ -852,7 +820,14 @@ CONFIG_USB_ARCH_HAS_OHCI=y # # InfiniBand support # -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=y +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_DEBUG=y +CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) @@ -1037,10 +1012,6 @@ CONFIG_CRC32=y # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=m CONFIG_ZLIB_DEFLATE=m -CONFIG_TEXTSEARCH=y -CONFIG_TEXTSEARCH_KMP=m -CONFIG_TEXTSEARCH_BM=m -CONFIG_TEXTSEARCH_FSM=m # # Instrumentation Support @@ -1058,7 +1029,7 @@ CONFIG_LOG_BUF_SHIFT=15 CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_SCHEDSTATS is not set # CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_MUTEXES is not set +CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_SPINLOCK is not set CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_KOBJECT is not set diff --git a/arch/powerpc/configs/mpc8540_ads_defconfig b/arch/powerpc/configs/mpc8540_ads_defconfig index 2a8290ee15c6..7f0780f1aa39 100644 --- a/arch/powerpc/configs/mpc8540_ads_defconfig +++ b/arch/powerpc/configs/mpc8540_ads_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: -# Sat Jan 14 15:57:54 2006 +# Linux kernel version: 2.6.16 +# Mon Mar 27 23:37:36 2006 # # CONFIG_PPC64 is not set CONFIG_PPC32=y @@ -9,6 +9,7 @@ CONFIG_PPC_MERGE=y CONFIG_MMU=y CONFIG_GENERIC_HARDIRQS=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_PPC=y CONFIG_EARLY_PRINTK=y @@ -18,6 +19,7 @@ CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_PPC_OF=y CONFIG_PPC_UDBG_16550=y # CONFIG_GENERIC_TBSYNC is not set +CONFIG_DEFAULT_UIMAGE=y # # Processor support @@ -42,7 +44,6 @@ CONFIG_SPE=y # Code maturity level options # CONFIG_EXPERIMENTAL=y -CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y CONFIG_INIT_ENV_ARG_LIMIT=32 @@ -58,6 +59,7 @@ CONFIG_SYSVIPC=y CONFIG_SYSCTL=y # CONFIG_AUDIT is not set # CONFIG_IKCONFIG is not set +# CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EMBEDDED=y @@ -72,10 +74,6 @@ CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 CONFIG_SLAB=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -90,6 +88,8 @@ CONFIG_BASE_SMALL=0 # Block layer # # CONFIG_LBD is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_LSF is not set # # IO Schedulers @@ -183,6 +183,7 @@ CONFIG_NET=y # # Networking options # +# CONFIG_NETDEBUG is not set CONFIG_PACKET=y # CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y @@ -220,6 +221,11 @@ CONFIG_TCP_CONG_BIC=y # SCTP Configuration (EXPERIMENTAL) # # CONFIG_IP_SCTP is not set + +# +# TIPC Configuration (EXPERIMENTAL) +# +# CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set # CONFIG_VLAN_8021Q is not set @@ -229,11 +235,6 @@ CONFIG_TCP_CONG_BIC=y # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set - -# -# TIPC Configuration (EXPERIMENTAL) -# -# CONFIG_TIPC is not set # CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -487,6 +488,12 @@ CONFIG_GEN_RTC=y # CONFIG_I2C is not set # +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set + +# # Dallas's 1-wire bus # # CONFIG_W1 is not set @@ -496,6 +503,7 @@ CONFIG_GEN_RTC=y # CONFIG_HWMON=y # CONFIG_HWMON_VID is not set +# CONFIG_SENSORS_F71805F is not set # CONFIG_HWMON_DEBUG_CHIP is not set # @@ -503,10 +511,6 @@ CONFIG_HWMON=y # # -# Multimedia Capabilities Port drivers -# - -# # Multimedia devices # # CONFIG_VIDEO_DEV is not set @@ -531,6 +535,7 @@ CONFIG_HWMON=y # # CONFIG_USB_ARCH_HAS_HCD is not set # CONFIG_USB_ARCH_HAS_OHCI is not set +# CONFIG_USB_ARCH_HAS_EHCI is not set # # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' @@ -551,7 +556,7 @@ CONFIG_HWMON=y # # -# SN Devices +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # @@ -603,7 +608,6 @@ CONFIG_SYSFS=y CONFIG_TMPFS=y # CONFIG_HUGETLB_PAGE is not set CONFIG_RAMFS=y -# CONFIG_RELAYFS_FS is not set # CONFIG_CONFIGFS_FS is not set # @@ -658,6 +662,7 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_SGI_PARTITION is not set # CONFIG_ULTRIX_PARTITION is not set # CONFIG_SUN_PARTITION is not set +# CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set # @@ -695,6 +700,8 @@ CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_FS is not set # CONFIG_DEBUG_VM is not set +# CONFIG_UNWIND_INFO is not set +CONFIG_FORCED_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_DEBUGGER is not set # CONFIG_BDI_SWITCH is not set diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 80e9fe2632b8..0cc0995b81b0 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -12,12 +12,12 @@ endif obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ - init_task.o process.o systbl.o + init_task.o process.o systbl.o idle.o obj-y += vdso32/ obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o cpu_setup_power4.o \ - firmware.o sysfs.o idle_64.o + firmware.o sysfs.o obj-$(CONFIG_PPC64) += vdso64/ obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o obj-$(CONFIG_POWER4) += idle_power4.o @@ -34,6 +34,11 @@ obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj64-$(CONFIG_PPC_MULTIPLATFORM) += nvram_64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o +obj-$(CONFIG_TAU) += tau_6xx.o +obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o +obj32-$(CONFIG_MODULES) += module_32.o +obj-$(CONFIG_E500) += perfmon_fsl_booke.o ifeq ($(CONFIG_PPC_MERGE),y) @@ -51,7 +56,6 @@ obj-$(CONFIG_PPC64) += misc_64.o dma_64.o iommu.o obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o obj-$(CONFIG_MODULES) += ppc_ksyms.o obj-$(CONFIG_BOOTX_TEXT) += btext.o -obj-$(CONFIG_6xx) += idle_6xx.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o @@ -77,6 +81,7 @@ smpobj-$(CONFIG_SMP) += smp.o endif +obj-$(CONFIG_PPC32) += $(obj32-y) obj-$(CONFIG_PPC64) += $(obj64-y) extra-$(CONFIG_PPC_FPU) += fpu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 882889b15926..54b48f330051 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -105,8 +105,6 @@ int main(void) DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); - DEFINE(PLATFORM_LPAR, PLATFORM_LPAR); - /* paca */ DEFINE(PACA_SIZE, sizeof(struct paca_struct)); DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S new file mode 100644 index 000000000000..55ed7716636f --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -0,0 +1,474 @@ +/* + * This file contains low level CPU setup functions. + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/cache.h> + +_GLOBAL(__setup_cpu_603) + b setup_common_caches +_GLOBAL(__setup_cpu_604) + mflr r4 + bl setup_common_caches + bl setup_604_hid0 + mtlr r4 + blr +_GLOBAL(__setup_cpu_750) + mflr r4 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + mtlr r4 + blr +_GLOBAL(__setup_cpu_750cx) + mflr r4 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + bl setup_750cx + mtlr r4 + blr +_GLOBAL(__setup_cpu_750fx) + mflr r4 + bl __init_fpu_registers + bl setup_common_caches + bl setup_750_7400_hid0 + bl setup_750fx + mtlr r4 + blr +_GLOBAL(__setup_cpu_7400) + mflr r4 + bl __init_fpu_registers + bl setup_7400_workarounds + bl setup_common_caches + bl setup_750_7400_hid0 + mtlr r4 + blr +_GLOBAL(__setup_cpu_7410) + mflr r4 + bl __init_fpu_registers + bl setup_7410_workarounds + bl setup_common_caches + bl setup_750_7400_hid0 + li r3,0 + mtspr SPRN_L2CR2,r3 + mtlr r4 + blr +_GLOBAL(__setup_cpu_745x) + mflr r4 + bl setup_common_caches + bl setup_745x_specifics + mtlr r4 + blr + +/* Enable caches for 603's, 604, 750 & 7400 */ +setup_common_caches: + mfspr r11,SPRN_HID0 + andi. r0,r11,HID0_DCE + ori r11,r11,HID0_ICE|HID0_DCE + ori r8,r11,HID0_ICFI + bne 1f /* don't invalidate the D-cache */ + ori r8,r8,HID0_DCI /* unless it wasn't enabled */ +1: sync + mtspr SPRN_HID0,r8 /* enable and invalidate caches */ + sync + mtspr SPRN_HID0,r11 /* enable caches */ + sync + isync + blr + +/* 604, 604e, 604ev, ... + * Enable superscalar execution & branch history table + */ +setup_604_hid0: + mfspr r11,SPRN_HID0 + ori r11,r11,HID0_SIED|HID0_BHTE + ori r8,r11,HID0_BTCD + sync + mtspr SPRN_HID0,r8 /* flush branch target address cache */ + sync /* on 604e/604r */ + mtspr SPRN_HID0,r11 + sync + isync + blr + +/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some + * erratas we work around here. + * Moto MPC710CE.pdf describes them, those are errata + * #3, #4 and #5 + * Note that we assume the firmware didn't choose to + * apply other workarounds (there are other ones documented + * in the .pdf). It appear that Apple firmware only works + * around #3 and with the same fix we use. We may want to + * check if the CPU is using 60x bus mode in which case + * the workaround for errata #4 is useless. Also, we may + * want to explicitely clear HID0_NOPDST as this is not + * needed once we have applied workaround #5 (though it's + * not set by Apple's firmware at least). + */ +setup_7400_workarounds: + mfpvr r3 + rlwinm r3,r3,0,20,31 + cmpwi 0,r3,0x0207 + ble 1f + blr +setup_7410_workarounds: + mfpvr r3 + rlwinm r3,r3,0,20,31 + cmpwi 0,r3,0x0100 + bnelr +1: + mfspr r11,SPRN_MSSSR0 + /* Errata #3: Set L1OPQ_SIZE to 0x10 */ + rlwinm r11,r11,0,9,6 + oris r11,r11,0x0100 + /* Errata #4: Set L2MQ_SIZE to 1 (check for MPX mode first ?) */ + oris r11,r11,0x0002 + /* Errata #5: Set DRLT_SIZE to 0x01 */ + rlwinm r11,r11,0,5,2 + oris r11,r11,0x0800 + sync + mtspr SPRN_MSSSR0,r11 + sync + isync + blr + +/* 740/750/7400/7410 + * Enable Store Gathering (SGE), Address Brodcast (ABE), + * Branch History Table (BHTE), Branch Target ICache (BTIC) + * Dynamic Power Management (DPM), Speculative (SPD) + * Clear Instruction cache throttling (ICTC) + */ +setup_750_7400_hid0: + mfspr r11,SPRN_HID0 + ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC + oris r11,r11,HID0_DPM@h +BEGIN_FTR_SECTION + xori r11,r11,HID0_BTIC +END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) +BEGIN_FTR_SECTION + xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */ +END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) + li r3,HID0_SPD + andc r11,r11,r3 /* clear SPD: enable speculative */ + li r3,0 + mtspr SPRN_ICTC,r3 /* Instruction Cache Throttling off */ + isync + mtspr SPRN_HID0,r11 + sync + isync + blr + +/* 750cx specific + * Looks like we have to disable NAP feature for some PLL settings... + * (waiting for confirmation) + */ +setup_750cx: + mfspr r10, SPRN_HID1 + rlwinm r10,r10,4,28,31 + cmpwi cr0,r10,7 + cmpwi cr1,r10,9 + cmpwi cr2,r10,11 + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr2+eq + bnelr + lwz r6,CPU_SPEC_FEATURES(r5) + li r7,CPU_FTR_CAN_NAP + andc r6,r6,r7 + stw r6,CPU_SPEC_FEATURES(r5) + blr + +/* 750fx specific + */ +setup_750fx: + blr + +/* MPC 745x + * Enable Store Gathering (SGE), Branch Folding (FOLD) + * Branch History Table (BHTE), Branch Target ICache (BTIC) + * Dynamic Power Management (DPM), Speculative (SPD) + * Ensure our data cache instructions really operate. + * Timebase has to be running or we wouldn't have made it here, + * just ensure we don't disable it. + * Clear Instruction cache throttling (ICTC) + * Enable L2 HW prefetch + */ +setup_745x_specifics: + /* We check for the presence of an L3 cache setup by + * the firmware. If any, we disable NAP capability as + * it's known to be bogus on rev 2.1 and earlier + */ + mfspr r11,SPRN_L3CR + andis. r11,r11,L3CR_L3E@h + beq 1f + lwz r6,CPU_SPEC_FEATURES(r5) + andi. r0,r6,CPU_FTR_L3_DISABLE_NAP + beq 1f + li r7,CPU_FTR_CAN_NAP + andc r6,r6,r7 + stw r6,CPU_SPEC_FEATURES(r5) +1: + mfspr r11,SPRN_HID0 + + /* All of the bits we have to set..... + */ + ori r11,r11,HID0_SGE | HID0_FOLD | HID0_BHTE + ori r11,r11,HID0_LRSTK | HID0_BTIC + oris r11,r11,HID0_DPM@h +BEGIN_FTR_SECTION + xori r11,r11,HID0_BTIC +END_FTR_SECTION_IFSET(CPU_FTR_NO_BTIC) +BEGIN_FTR_SECTION + xoris r11,r11,HID0_DPM@h /* disable dynamic power mgmt */ +END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) + + /* All of the bits we have to clear.... + */ + li r3,HID0_SPD | HID0_NOPDST | HID0_NOPTI + andc r11,r11,r3 /* clear SPD: enable speculative */ + li r3,0 + + mtspr SPRN_ICTC,r3 /* Instruction Cache Throttling off */ + isync + mtspr SPRN_HID0,r11 + sync + isync + + /* Enable L2 HW prefetch, if L2 is enabled + */ + mfspr r3,SPRN_L2CR + andis. r3,r3,L2CR_L2E@h + beqlr + mfspr r3,SPRN_MSSCR0 + ori r3,r3,3 + sync + mtspr SPRN_MSSCR0,r3 + sync + isync + blr + +/* + * Initialize the FPU registers. This is needed to work around an errata + * in some 750 cpus where using a not yet initialized FPU register after + * power on reset may hang the CPU + */ +_GLOBAL(__init_fpu_registers) + mfmsr r10 + ori r11,r10,MSR_FP + mtmsr r11 + isync + addis r9,r3,empty_zero_page@ha + addi r9,r9,empty_zero_page@l + REST_32FPRS(0,r9) + sync + mtmsr r10 + isync + blr + + +/* Definitions for the table use to save CPU states */ +#define CS_HID0 0 +#define CS_HID1 4 +#define CS_HID2 8 +#define CS_MSSCR0 12 +#define CS_MSSSR0 16 +#define CS_ICTRL 20 +#define CS_LDSTCR 24 +#define CS_LDSTDB 28 +#define CS_SIZE 32 + + .data + .balign L1_CACHE_BYTES +cpu_state_storage: + .space CS_SIZE + .balign L1_CACHE_BYTES,0 + .text + +/* Called in normal context to backup CPU 0 state. This + * does not include cache settings. This function is also + * called for machine sleep. This does not include the MMU + * setup, BATs, etc... but rather the "special" registers + * like HID0, HID1, MSSCR0, etc... + */ +_GLOBAL(__save_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + lis r5,cpu_state_storage@h + ori r5,r5,cpu_state_storage@l + + /* Save HID0 (common to all CONFIG_6xx cpus) */ + mfspr r3,SPRN_HID0 + stw r3,CS_HID0(r5) + + /* Now deal with CPU type dependent registers */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,0x8000 /* 7450 */ + cmplwi cr1,r3,0x000c /* 7400 */ + cmplwi cr2,r3,0x800c /* 7410 */ + cmplwi cr3,r3,0x8001 /* 7455 */ + cmplwi cr4,r3,0x8002 /* 7457 */ + cmplwi cr5,r3,0x8003 /* 7447A */ + cmplwi cr6,r3,0x7000 /* 750FX */ + cmplwi cr7,r3,0x8004 /* 7448 */ + /* cr1 is 7400 || 7410 */ + cror 4*cr1+eq,4*cr1+eq,4*cr2+eq + /* cr0 is 74xx */ + cror 4*cr0+eq,4*cr0+eq,4*cr3+eq + cror 4*cr0+eq,4*cr0+eq,4*cr4+eq + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr5+eq + cror 4*cr0+eq,4*cr0+eq,4*cr7+eq + bne 1f + /* Backup 74xx specific regs */ + mfspr r4,SPRN_MSSCR0 + stw r4,CS_MSSCR0(r5) + mfspr r4,SPRN_MSSSR0 + stw r4,CS_MSSSR0(r5) + beq cr1,1f + /* Backup 745x specific registers */ + mfspr r4,SPRN_HID1 + stw r4,CS_HID1(r5) + mfspr r4,SPRN_ICTRL + stw r4,CS_ICTRL(r5) + mfspr r4,SPRN_LDSTCR + stw r4,CS_LDSTCR(r5) + mfspr r4,SPRN_LDSTDB + stw r4,CS_LDSTDB(r5) +1: + bne cr6,1f + /* Backup 750FX specific registers */ + mfspr r4,SPRN_HID1 + stw r4,CS_HID1(r5) + /* If rev 2.x, backup HID2 */ + mfspr r3,SPRN_PVR + andi. r3,r3,0xff00 + cmpwi cr0,r3,0x0200 + bne 1f + mfspr r4,SPRN_HID2 + stw r4,CS_HID2(r5) +1: + mtcr r7 + blr + +/* Called with no MMU context (typically MSR:IR/DR off) to + * restore CPU state as backed up by the previous + * function. This does not include cache setting + */ +_GLOBAL(__restore_cpu_setup) + /* Some CR fields are volatile, we back it up all */ + mfcr r7 + + /* Get storage ptr */ + lis r5,(cpu_state_storage-KERNELBASE)@h + ori r5,r5,cpu_state_storage@l + + /* Restore HID0 */ + lwz r3,CS_HID0(r5) + sync + isync + mtspr SPRN_HID0,r3 + sync + isync + + /* Now deal with CPU type dependent registers */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,0x8000 /* 7450 */ + cmplwi cr1,r3,0x000c /* 7400 */ + cmplwi cr2,r3,0x800c /* 7410 */ + cmplwi cr3,r3,0x8001 /* 7455 */ + cmplwi cr4,r3,0x8002 /* 7457 */ + cmplwi cr5,r3,0x8003 /* 7447A */ + cmplwi cr6,r3,0x7000 /* 750FX */ + cmplwi cr7,r3,0x8004 /* 7448 */ + /* cr1 is 7400 || 7410 */ + cror 4*cr1+eq,4*cr1+eq,4*cr2+eq + /* cr0 is 74xx */ + cror 4*cr0+eq,4*cr0+eq,4*cr3+eq + cror 4*cr0+eq,4*cr0+eq,4*cr4+eq + cror 4*cr0+eq,4*cr0+eq,4*cr1+eq + cror 4*cr0+eq,4*cr0+eq,4*cr5+eq + cror 4*cr0+eq,4*cr0+eq,4*cr7+eq + bne 2f + /* Restore 74xx specific regs */ + lwz r4,CS_MSSCR0(r5) + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lwz r4,CS_MSSSR0(r5) + sync + mtspr SPRN_MSSSR0,r4 + sync + isync + bne cr2,1f + /* Clear 7410 L2CR2 */ + li r4,0 + mtspr SPRN_L2CR2,r4 +1: beq cr1,2f + /* Restore 745x specific registers */ + lwz r4,CS_HID1(r5) + sync + mtspr SPRN_HID1,r4 + isync + sync + lwz r4,CS_ICTRL(r5) + sync + mtspr SPRN_ICTRL,r4 + isync + sync + lwz r4,CS_LDSTCR(r5) + sync + mtspr SPRN_LDSTCR,r4 + isync + sync + lwz r4,CS_LDSTDB(r5) + sync + mtspr SPRN_LDSTDB,r4 + isync + sync +2: bne cr6,1f + /* Restore 750FX specific registers + * that is restore HID2 on rev 2.x and PLL config & switch + * to PLL 0 on all + */ + /* If rev 2.x, restore HID2 with low voltage bit cleared */ + mfspr r3,SPRN_PVR + andi. r3,r3,0xff00 + cmpwi cr0,r3,0x0200 + bne 4f + lwz r4,CS_HID2(r5) + rlwinm r4,r4,0,19,17 + mtspr SPRN_HID2,r4 + sync +4: + lwz r4,CS_HID1(r5) + rlwinm r5,r4,0,16,14 + mtspr SPRN_HID1,r5 + /* Wait for PLL to stabilize */ + mftbl r5 +3: mftbl r6 + sub r6,r6,r5 + cmplwi cr0,r6,10000 + ble 3b + /* Setup final PLL */ + mtspr SPRN_HID1,r4 +1: + mtcr r7 + blr + diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 4827ca1ec89b..b3a979467225 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -135,10 +135,10 @@ transfer_to_handler: mfspr r11,SPRN_HID0 mtcr r11 BEGIN_FTR_SECTION - bt- 8,power_save_6xx_restore /* Check DOZE */ + bt- 8,4f /* Check DOZE */ END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) BEGIN_FTR_SECTION - bt- 9,power_save_6xx_restore /* Check NAP */ + bt- 9,4f /* Check NAP */ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #endif /* CONFIG_6xx */ .globl transfer_to_handler_cont @@ -157,6 +157,10 @@ transfer_to_handler_cont: SYNC RFI /* jump to handler, enable MMU */ +#ifdef CONFIG_6xx +4: b power_save_6xx_restore +#endif + /* * On kernel stack overflow, load up an initial stack pointer * and call StackOverflow(regs), which should not return. diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 1060155d84c3..19ad5c6b1818 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -617,6 +617,12 @@ _GLOBAL(enter_rtas) mfsrr1 r10 std r10,_SRR1(r1) + /* Temporary workaround to clear CR until RTAS can be modified to + * ignore all bits. + */ + li r0,0 + mtcr r0 + /* There is no way it is acceptable to get here with interrupts enabled, * check it with the asm equivalent of WARN_ON */ diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 4d37a3cb80f6..0bfe9061720a 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -14,7 +14,9 @@ */ #include <linux/config.h> +#include <linux/module.h> #include <asm/firmware.h> -unsigned long ppc64_firmware_features; +unsigned long powerpc_firmware_features; +EXPORT_SYMBOL_GPL(powerpc_firmware_features); diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 35084f3a841b..a5ae04a57c78 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -1544,7 +1544,11 @@ _STATIC(__boot_from_prom) mr r28,r6 mr r27,r7 - /* Align the stack to 16-byte boundary for broken yaboot */ + /* + * Align the stack to 16-byte boundary + * Depending on the size and layout of the ELF sections in the initial + * boot binary, the stack pointer will be unalignet on PowerMac + */ rldicr r1,r1,0,59 /* Make sure we are running in 64 bits mode */ @@ -1847,21 +1851,6 @@ _STATIC(start_here_multiplatform) bl .__save_cpu_setup sync - /* Setup a valid physical PACA pointer in SPRG3 for early_setup - * note that boot_cpuid can always be 0 nowadays since there is - * nowhere it can be initialized differently before we reach this - * code - */ - LOAD_REG_IMMEDIATE(r27, boot_cpuid) - add r27,r27,r26 - lwz r27,0(r27) - - LOAD_REG_IMMEDIATE(r24, paca) /* Get base vaddr of paca array */ - mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */ - add r13,r13,r24 /* for this processor. */ - add r13,r13,r26 /* convert to physical addr */ - mtspr SPRN_SPRG3,r13 - /* Do very early kernel initializations, including initial hash table, * stab and slb setup before we turn on relocation. */ @@ -1930,6 +1919,17 @@ _STATIC(start_here_common) /* Not reached */ BUG_OPCODE +/* Put the paca pointer into r13 and SPRG3 */ +_GLOBAL(setup_boot_paca) + LOAD_REG_IMMEDIATE(r3, boot_cpuid) + lwz r3,0(r3) + LOAD_REG_IMMEDIATE(r4, paca) /* Get base vaddr of paca array */ + mulli r3,r3,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r3,r4 /* for this processor. */ + mtspr SPRN_SPRG3,r13 + + blr + /* * We put a few things here that have to be page-aligned. * This stuff goes at the beginning of the bss, which is page-aligned. diff --git a/arch/powerpc/kernel/idle_64.c b/arch/powerpc/kernel/idle.c index b879d3057ef8..e9f321d74d85 100644 --- a/arch/powerpc/kernel/idle_64.c +++ b/arch/powerpc/kernel/idle.c @@ -2,13 +2,17 @@ * Idle daemon for PowerPC. Idle daemon will handle any action * that needs to be taken when the system becomes idle. * - * Originally Written by Cort Dougan (cort@cs.nmt.edu) + * Originally written by Cort Dougan (cort@cs.nmt.edu). + * Subsequent 32-bit hacking by Tom Rini, Armin Kuster, + * Paul Mackerras and others. * * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com> * * Additional shared processor, SMT, and firmware support * Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com> * + * 32-bit and 64-bit versions merged by Paul Mackerras <paulus@samba.org> + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -29,18 +33,43 @@ #include <asm/machdep.h> #include <asm/smp.h> -extern void power4_idle(void); +#ifdef CONFIG_HOTPLUG_CPU +#define cpu_should_die() (cpu_is_offline(smp_processor_id()) && \ + system_state == SYSTEM_RUNNING) +#else +#define cpu_should_die() 0 +#endif -void default_idle(void) +/* + * The body of the idle task. + */ +void cpu_idle(void) { - unsigned int cpu = smp_processor_id(); - set_thread_flag(TIF_POLLING_NRFLAG); + if (ppc_md.idle_loop) + ppc_md.idle_loop(); /* doesn't return */ + set_thread_flag(TIF_POLLING_NRFLAG); while (1) { - if (!need_resched()) { - while (!need_resched() && !cpu_is_offline(cpu)) { - ppc64_runlatch_off(); + ppc64_runlatch_off(); + while (!need_resched() && !cpu_should_die()) { + if (ppc_md.power_save) { + clear_thread_flag(TIF_POLLING_NRFLAG); + /* + * smp_mb is so clearing of TIF_POLLING_NRFLAG + * is ordered w.r.t. need_resched() test. + */ + smp_mb(); + local_irq_disable(); + + /* check again after disabling irqs */ + if (!need_resched() && !cpu_should_die()) + ppc_md.power_save(); + + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); + + } else { /* * Go into low thread priority and possibly * low power mode. @@ -48,46 +77,18 @@ void default_idle(void) HMT_low(); HMT_very_low(); } - - HMT_medium(); } + HMT_medium(); ppc64_runlatch_on(); + if (cpu_should_die()) + cpu_die(); preempt_enable_no_resched(); schedule(); preempt_disable(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); } } -void native_idle(void) -{ - while (1) { - ppc64_runlatch_off(); - - if (!need_resched()) - power4_idle(); - - if (need_resched()) { - ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } - - if (cpu_is_offline(smp_processor_id()) && - system_state == SYSTEM_RUNNING) - cpu_die(); - } -} - -void cpu_idle(void) -{ - BUG_ON(NULL == ppc_md.idle_loop); - ppc_md.idle_loop(); -} - int powersave_nap; #ifdef CONFIG_SYSCTL diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 444fdcc769f1..12a4efbaa08f 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -87,19 +87,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) cmpwi 0,r3,0 beqlr - /* Clear MSR:EE */ - mfmsr r7 - rlwinm r0,r7,0,17,15 - mtmsr r0 - - /* Check current_thread_info()->flags */ - rlwinm r4,r1,0,0,18 - lwz r4,TI_FLAGS(r4) - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - mtmsr r7 /* out of line this ? */ - blr -1: /* Some pre-nap cleanups needed on some CPUs */ andis. r0,r3,HID0_NAP@h beq 2f @@ -157,7 +144,8 @@ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) - ori r7,r7,MSR_EE /* Could be ommited (already set) */ + mfmsr r7 + ori r7,r7,MSR_EE oris r7,r7,MSR_POW@h sync isync @@ -220,8 +208,6 @@ _GLOBAL(nap_save_msscr0) _GLOBAL(nap_save_hid1) .space 4*NR_CPUS -_GLOBAL(powersave_nap) - .long 0 _GLOBAL(powersave_lowspeed) .long 0 diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index c16b4afab582..6dad1c02496e 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -1,11 +1,5 @@ /* - * This file contains the power_save function for 6xx & 7xxx CPUs - * rewritten in assembler - * - * Warning ! This code assumes that if your machine has a 750fx - * it will have PLL 1 set to low speed mode (used during NAP/DOZE). - * if this is not the case some additional changes will have to - * be done to check a runtime var (a bit like powersave-nap) + * This file contains the power_save function for 970-family CPUs. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -26,49 +20,23 @@ .text -/* - * Here is the power_save_6xx function. This could eventually be - * split into several functions & changing the function pointer - * depending on the various features. - */ _GLOBAL(power4_idle) BEGIN_FTR_SECTION blr END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) - /* We must dynamically check for the NAP feature as it - * can be cleared by CPU init after the fixups are done - */ - LOAD_REG_ADDRBASE(r3,cur_cpu_spec) - ld r4,ADDROFF(cur_cpu_spec)(r3) - ld r4,CPU_SPEC_FEATURES(r4) - andi. r0,r4,CPU_FTR_CAN_NAP - beqlr /* Now check if user or arch enabled NAP mode */ LOAD_REG_ADDRBASE(r3,powersave_nap) lwz r4,ADDROFF(powersave_nap)(r3) cmpwi 0,r4,0 beqlr - /* Clear MSR:EE */ - mfmsr r7 - li r4,0 - ori r4,r4,MSR_EE - andc r0,r7,r4 - mtmsrd r0 - - /* Check current_thread_info()->flags */ - clrrdi r4,r1,THREAD_SHIFT - ld r4,TI_FLAGS(r4) - andi. r0,r4,_TIF_NEED_RESCHED - beq 1f - mtmsrd r7 /* out of line this ? */ - blr -1: /* Go to NAP now */ BEGIN_FTR_SECTION DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + mfmsr r7 + ori r7,r7,MSR_EE oris r7,r7,MSR_POW@h sync isync diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 771a59cbd213..bb5c9501234c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -379,7 +379,7 @@ void irq_ctx_init(void) struct thread_info *tp; int i; - for_each_cpu(i) { + for_each_possible_cpu(i) { memset((void *)softirq_ctx[i], 0, THREAD_SIZE); tp = softirq_ctx[i]; tp->cpu = i; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index cb1fe5878e8b..ad7a90212204 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -30,9 +30,11 @@ #include <linux/kprobes.h> #include <linux/ptrace.h> #include <linux/preempt.h> +#include <linux/module.h> #include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/sstep.h> +#include <asm/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -372,17 +374,62 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs); + const struct exception_table_entry *entry; + + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the nip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->nip = (unsigned long)cur->addr; regs->msr &= ~MSR_SE; regs->msr |= kcb->kprobe_saved_msr; - - reset_current_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if ((entry = search_exception_tables(regs->nip)) != NULL) { + regs->nip = entry->fixup; + return 1; + } + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; } return 0; } @@ -396,6 +443,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch (val) { case DIE_BPT: if (kprobe_handler(args->regs)) diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S new file mode 100644 index 000000000000..d7f4e982b539 --- /dev/null +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -0,0 +1,471 @@ +/* + L2CR functions + Copyright © 1997-1998 by PowerLogix R & D, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ +/* + Thur, Dec. 12, 1998. + - First public release, contributed by PowerLogix. + *********** + Sat, Aug. 7, 1999. + - Terry: Made sure code disabled interrupts before running. (Previously + it was assumed interrupts were already disabled). + - Terry: Updated for tentative G4 support. 4MB of memory is now flushed + instead of 2MB. (Prob. only 3 is necessary). + - Terry: Updated for workaround to HID0[DPM] processor bug + during global invalidates. + *********** + Thu, July 13, 2000. + - Terry: Added isync to correct for an errata. + + 22 August 2001. + - DanM: Finally added the 7450 patch I've had for the past + several months. The L2CR is similar, but I'm going + to assume the user of this functions knows what they + are doing. + + Author: Terry Greeniaus (tgree@phys.ualberta.ca) + Please e-mail updates to this file to me, thanks! +*/ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/cache.h> +#include <asm/page.h> + +/* Usage: + + When setting the L2CR register, you must do a few special + things. If you are enabling the cache, you must perform a + global invalidate. If you are disabling the cache, you must + flush the cache contents first. This routine takes care of + doing these things. When first enabling the cache, make sure + you pass in the L2CR you want, as well as passing in the + global invalidate bit set. A global invalidate will only be + performed if the L2I bit is set in applyThis. When enabling + the cache, you should also set the L2E bit in applyThis. If + you want to modify the L2CR contents after the cache has been + enabled, the recommended procedure is to first call + __setL2CR(0) to disable the cache and then call it again with + the new values for L2CR. Examples: + + _setL2CR(0) - disables the cache + _setL2CR(0xB3A04000) - enables my G3 upgrade card: + - L2E set to turn on the cache + - L2SIZ set to 1MB + - L2CLK set to 1:1 + - L2RAM set to pipelined synchronous late-write + - L2I set to perform a global invalidation + - L2OH set to 0.5 nS + - L2DF set because this upgrade card + requires it + + A similar call should work for your card. You need to know + the correct setting for your card and then place them in the + fields I have outlined above. Other fields support optional + features, such as L2DO which caches only data, or L2TS which + causes cache pushes from the L1 cache to go to the L2 cache + instead of to main memory. + +IMPORTANT: + Starting with the 7450, the bits in this register have moved + or behave differently. The Enable, Parity Enable, Size, + and L2 Invalidate are the only bits that have not moved. + The size is read-only for these processors with internal L2 + cache, and the invalidate is a control as well as status. + -- Dan + +*/ +/* + * Summary: this procedure ignores the L2I bit in the value passed in, + * flushes the cache if it was already enabled, always invalidates the + * cache, then enables the cache if the L2E bit is set in the value + * passed in. + * -- paulus. + */ +_GLOBAL(_set_L2CR) + /* Make sure this is a 750 or 7400 chip */ +BEGIN_FTR_SECTION + li r3,-1 + blr +END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) + + mflr r9 + + /* Stop DST streams */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + + /* Turn off interrupts and data relocation. */ + mfmsr r7 /* Save MSR in r7 */ + rlwinm r4,r7,0,17,15 + rlwinm r4,r4,0,28,26 /* Turn off DR bit */ + sync + mtmsr r4 + isync + + /* Before we perform the global invalidation, we must disable dynamic + * power management via HID0[DPM] to work around a processor bug where + * DPM can possibly interfere with the state machine in the processor + * that invalidates the L2 cache tags. + */ + mfspr r8,SPRN_HID0 /* Save HID0 in r8 */ + rlwinm r4,r8,0,12,10 /* Turn off HID0[DPM] */ + sync + mtspr SPRN_HID0,r4 /* Disable DPM */ + sync + + /* Get the current enable bit of the L2CR into r4 */ + mfspr r4,SPRN_L2CR + + /* Tweak some bits */ + rlwinm r5,r3,0,0,0 /* r5 contains the new enable bit */ + rlwinm r3,r3,0,11,9 /* Turn off the invalidate bit */ + rlwinm r3,r3,0,1,31 /* Turn off the enable bit */ + + /* Check to see if we need to flush */ + rlwinm. r4,r4,0,0,0 + beq 2f + + /* Flush the cache. First, read the first 4MB of memory (physical) to + * put new data in the cache. (Actually we only need + * the size of the L2 cache plus the size of the L1 cache, but 4MB will + * cover everything just to be safe). + */ + + /**** Might be a good idea to set L2DO here - to prevent instructions + from getting into the cache. But since we invalidate + the next time we enable the cache it doesn't really matter. + Don't do this unless you accomodate all processor variations. + The bit moved on the 7450..... + ****/ + +BEGIN_FTR_SECTION + /* Disable L2 prefetch on some 745x and try to ensure + * L2 prefetch engines are idle. As explained by errata + * text, we can't be sure they are, we just hope very hard + * that well be enough (sic !). At least I noticed Apple + * doesn't even bother doing the dcbf's here... + */ + mfspr r4,SPRN_MSSCR0 + rlwinm r4,r4,0,0,29 + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lis r4,KERNELBASE@h + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + + /* TODO: use HW flush assist when available */ + + lis r4,0x0002 + mtctr r4 + li r4,0 +1: + lwzx r0,r0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + isync + + /* Now, flush the first 4MB of memory */ + lis r4,0x0002 + mtctr r4 + li r4,0 + sync +1: + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + +2: + /* Set up the L2CR configuration bits (and switch L2 off) */ + /* CPU errata: Make sure the mtspr below is already in the + * L1 icache + */ + b 20f + .balign L1_CACHE_BYTES +22: + sync + mtspr SPRN_L2CR,r3 + sync + b 23f +20: + b 21f +21: sync + isync + b 22b + +23: + /* Perform a global invalidation */ + oris r3,r3,0x0020 + sync + mtspr SPRN_L2CR,r3 + sync + isync /* For errata */ + +BEGIN_FTR_SECTION + /* On the 7450, we wait for the L2I bit to clear...... + */ +10: mfspr r3,SPRN_L2CR + andis. r4,r3,0x0020 + bne 10b + b 11f +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + + /* Wait for the invalidation to complete */ +3: mfspr r3,SPRN_L2CR + rlwinm. r4,r3,0,31,31 + bne 3b + +11: rlwinm r3,r3,0,11,9 /* Turn off the L2I bit */ + sync + mtspr SPRN_L2CR,r3 + sync + + /* See if we need to enable the cache */ + cmplwi r5,0 + beq 4f + + /* Enable the cache */ + oris r3,r3,0x8000 + mtspr SPRN_L2CR,r3 + sync + + /* Enable L2 HW prefetch on 744x/745x */ +BEGIN_FTR_SECTION + mfspr r3,SPRN_MSSCR0 + ori r3,r3,3 + sync + mtspr SPRN_MSSCR0,r3 + sync + isync +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) +4: + + /* Restore HID0[DPM] to whatever it was before */ + sync + mtspr 1008,r8 + sync + + /* Restore MSR (restores EE and DR bits to original state) */ + SYNC + mtmsr r7 + isync + + mtlr r9 + blr + +_GLOBAL(_get_L2CR) + /* Return the L2CR contents */ + li r3,0 +BEGIN_FTR_SECTION + mfspr r3,SPRN_L2CR +END_FTR_SECTION_IFSET(CPU_FTR_L2CR) + blr + + +/* + * Here is a similar routine for dealing with the L3 cache + * on the 745x family of chips + */ + +_GLOBAL(_set_L3CR) + /* Make sure this is a 745x chip */ +BEGIN_FTR_SECTION + li r3,-1 + blr +END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) + + /* Turn off interrupts and data relocation. */ + mfmsr r7 /* Save MSR in r7 */ + rlwinm r4,r7,0,17,15 + rlwinm r4,r4,0,28,26 /* Turn off DR bit */ + sync + mtmsr r4 + isync + + /* Stop DST streams */ + DSSALL + sync + + /* Get the current enable bit of the L3CR into r4 */ + mfspr r4,SPRN_L3CR + + /* Tweak some bits */ + rlwinm r5,r3,0,0,0 /* r5 contains the new enable bit */ + rlwinm r3,r3,0,22,20 /* Turn off the invalidate bit */ + rlwinm r3,r3,0,2,31 /* Turn off the enable & PE bits */ + rlwinm r3,r3,0,5,3 /* Turn off the clken bit */ + /* Check to see if we need to flush */ + rlwinm. r4,r4,0,0,0 + beq 2f + + /* Flush the cache. + */ + + /* TODO: use HW flush assist */ + + lis r4,0x0008 + mtctr r4 + li r4,0 +1: + lwzx r0,r0,r4 + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + +2: + /* Set up the L3CR configuration bits (and switch L3 off) */ + sync + mtspr SPRN_L3CR,r3 + sync + + oris r3,r3,L3CR_L3RES@h /* Set reserved bit 5 */ + mtspr SPRN_L3CR,r3 + sync + oris r3,r3,L3CR_L3CLKEN@h /* Set clken */ + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* Perform a global invalidation */ + ori r3,r3,0x0400 + sync + mtspr SPRN_L3CR,r3 + sync + isync + + /* We wait for the L3I bit to clear...... */ +10: mfspr r3,SPRN_L3CR + andi. r4,r3,0x0400 + bne 10b + + /* Clear CLKEN */ + rlwinm r3,r3,0,5,3 /* Turn off the clken bit */ + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* See if we need to enable the cache */ + cmplwi r5,0 + beq 4f + + /* Enable the cache */ + oris r3,r3,(L3CR_L3E | L3CR_L3CLKEN)@h + mtspr SPRN_L3CR,r3 + sync + + /* Wait for stabilize */ + li r0,256 + mtctr r0 +1: bdnz 1b + + /* Restore MSR (restores EE and DR bits to original state) */ +4: SYNC + mtmsr r7 + isync + blr + +_GLOBAL(_get_L3CR) + /* Return the L3CR contents */ + li r3,0 +BEGIN_FTR_SECTION + mfspr r3,SPRN_L3CR +END_FTR_SECTION_IFSET(CPU_FTR_L3CR) + blr + +/* --- End of PowerLogix code --- + */ + + +/* flush_disable_L1() - Flush and disable L1 cache + * + * clobbers r0, r3, ctr, cr0 + * Must be called with interrupts disabled and MMU enabled. + */ +_GLOBAL(__flush_disable_L1) + /* Stop pending alitvec streams and memory accesses */ +BEGIN_FTR_SECTION + DSSALL +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + sync + + /* Load counter to 0x4000 cache lines (512k) and + * load cache with datas + */ + li r3,0x4000 /* 512kB / 32B */ + mtctr r3 + lis r3,KERNELBASE@h +1: + lwz r0,0(r3) + addi r3,r3,0x0020 /* Go to start of next cache line */ + bdnz 1b + isync + sync + + /* Now flush those cache lines */ + li r3,0x4000 /* 512kB / 32B */ + mtctr r3 + lis r3,KERNELBASE@h +1: + dcbf 0,r3 + addi r3,r3,0x0020 /* Go to start of next cache line */ + bdnz 1b + sync + + /* We can now disable the L1 cache (HID0:DCE, HID0:ICE) */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,18,15 + mtspr SPRN_HID0,r3 + sync + isync + blr + +/* inval_enable_L1 - Invalidate and enable L1 cache + * + * Assumes L1 is already disabled and MSR:EE is off + * + * clobbers r3 + */ +_GLOBAL(__inval_enable_L1) + /* Enable and then Flash inval the instruction & data cache */ + mfspr r3,SPRN_HID0 + ori r3,r3, HID0_ICE|HID0_ICFI|HID0_DCE|HID0_DCI + sync + isync + mtspr SPRN_HID0,r3 + xori r3,r3, HID0_ICFI|HID0_DCI + mtspr SPRN_HID0,r3 + sync + + blr + + diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index c7a799a09516..6e67b5b49ba1 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -37,7 +37,7 @@ static int legacy_serial_console = -1; static int __init add_legacy_port(struct device_node *np, int want_index, int iotype, phys_addr_t base, phys_addr_t taddr, unsigned long irq, - unsigned int flags) + upf_t flags) { u32 *clk, *spd, clock = BASE_BAUD * 16; int index; @@ -113,7 +113,7 @@ static int __init add_legacy_soc_port(struct device_node *np, { phys_addr_t addr; u32 *addrp; - unsigned int flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; + upf_t flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; /* We only support ports that have a clock frequency properly * encoded in the device-tree. @@ -236,6 +236,23 @@ static int __init add_legacy_pci_port(struct device_node *np, } #endif +static void __init setup_legacy_serial_console(int console) +{ + struct legacy_serial_info *info = + &legacy_serial_infos[console]; + void __iomem *addr; + + if (info->taddr == 0) + return; + addr = ioremap(info->taddr, 0x1000); + if (addr == NULL) + return; + if (info->speed == 0) + info->speed = udbg_probe_uart_speed(addr, info->clock); + DBG("default console speed = %d\n", info->speed); + udbg_init_uart(addr, info->speed, info->clock); +} + /* * This is called very early, as part of setup_system() or eventually * setup_arch(), basically before anything else in this file. This function @@ -318,25 +335,8 @@ void __init find_legacy_serial_ports(void) #endif DBG("legacy_serial_console = %d\n", legacy_serial_console); - - /* udbg is 64 bits only for now, that will change soon though ... */ - while (legacy_serial_console >= 0) { - struct legacy_serial_info *info = - &legacy_serial_infos[legacy_serial_console]; - void __iomem *addr; - - if (info->taddr == 0) - break; - addr = ioremap(info->taddr, 0x1000); - if (addr == NULL) - break; - if (info->speed == 0) - info->speed = udbg_probe_uart_speed(addr, info->clock); - DBG("default console speed = %d\n", info->speed); - udbg_init_uart(addr, info->speed, info->clock); - break; - } - + if (legacy_serial_console >= 0) + setup_legacy_serial_console(legacy_serial_console); DBG(" <- find_legacy_serial_port()\n"); } diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index e789fef4eb8a..1b73508ecb2b 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -56,7 +56,7 @@ static unsigned long get_purr(void) unsigned long sum_purr = 0; int cpu; - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { sum_purr += lppaca[cpu].emulated_time_base; #ifdef PURR_DEBUG @@ -222,7 +222,7 @@ static unsigned long get_purr(void) int cpu; struct cpu_usage *cu; - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { cu = &per_cpu(cpu_usage_array, cpu); sum_purr += cu->current_tb; } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c new file mode 100644 index 000000000000..92f4e5f64f02 --- /dev/null +++ b/arch/powerpc/kernel/module_32.c @@ -0,0 +1,320 @@ +/* Kernel module help for PPC. + Copyright (C) 2001 Rusty Russell. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include <linux/module.h> +#include <linux/moduleloader.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/cache.h> + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) +#endif + +LIST_HEAD(module_bug_list); + +void *module_alloc(unsigned long size) +{ + if (size == 0) + return NULL; + return vmalloc(size); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + +/* Count how many different relocations (different symbol, different + addend) */ +static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) +{ + unsigned int i, j, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + for (j = 0; j < i; j++) { + /* If this addend appeared before, it's + already been counted */ + if (ELF32_R_SYM(rela[i].r_info) + == ELF32_R_SYM(rela[j].r_info) + && rela[i].r_addend == rela[j].r_addend) + break; + } + if (j == i) ret++; + } + return ret; +} + +/* Get the potential trampolines size required of the init and + non-init sections */ +static unsigned long get_plt_size(const Elf32_Ehdr *hdr, + const Elf32_Shdr *sechdrs, + const char *secstrings, + int is_init) +{ + unsigned long ret = 0; + unsigned i; + + /* Everything marked ALLOC (this includes the exported + symbols) */ + for (i = 1; i < hdr->e_shnum; i++) { + /* If it's called *.init*, and we're not init, we're + not interested */ + if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) + != is_init) + continue; + + /* We don't want to look at debug sections. */ + if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0) + continue; + + if (sechdrs[i].sh_type == SHT_RELA) { + DEBUGP("Found relocations in section %u\n", i); + DEBUGP("Ptr: %p. Number: %u\n", + (void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf32_Rela)); + ret += count_relocs((void *)hdr + + sechdrs[i].sh_offset, + sechdrs[i].sh_size + / sizeof(Elf32_Rela)) + * sizeof(struct ppc_plt_entry); + } + } + + return ret; +} + +int module_frob_arch_sections(Elf32_Ehdr *hdr, + Elf32_Shdr *sechdrs, + char *secstrings, + struct module *me) +{ + unsigned int i; + + /* Find .plt and .init.plt sections */ + for (i = 0; i < hdr->e_shnum; i++) { + if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0) + me->arch.init_plt_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0) + me->arch.core_plt_section = i; + } + if (!me->arch.core_plt_section || !me->arch.init_plt_section) { + printk("Module doesn't contain .plt or .init.plt sections.\n"); + return -ENOEXEC; + } + + /* Override their sizes */ + sechdrs[me->arch.core_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 0); + sechdrs[me->arch.init_plt_section].sh_size + = get_plt_size(hdr, sechdrs, secstrings, 1); + return 0; +} + +int apply_relocate(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *module) +{ + printk(KERN_ERR "%s: Non-ADD RELOCATION unsupported\n", + module->name); + return -ENOEXEC; +} + +static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) +{ + if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x396b0000 + (val & 0xffff)) + return 1; + return 0; +} + +/* Set up a trampoline in the PLT to bounce us to the distant function */ +static uint32_t do_plt_call(void *location, + Elf32_Addr val, + Elf32_Shdr *sechdrs, + struct module *mod) +{ + struct ppc_plt_entry *entry; + + DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); + /* Init, or core PLT? */ + if (location >= mod->module_core + && location < mod->module_core + mod->core_size) + entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; + else + entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; + + /* Find this entry, or if that fails, the next avail. entry */ + while (entry->jump[0]) { + if (entry_matches(entry, val)) return (uint32_t)entry; + entry++; + } + + /* Stolen from Paul Mackerras as well... */ + entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ + entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ + entry->jump[2] = 0x7d6903a6; /* mtctr r11 */ + entry->jump[3] = 0x4e800420; /* bctr */ + + DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); + return (uint32_t)entry; +} + +int apply_relocate_add(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *module) +{ + unsigned int i; + Elf32_Rela *rela = (void *)sechdrs[relsec].sh_addr; + Elf32_Sym *sym; + uint32_t *location; + uint32_t value; + + DEBUGP("Applying ADD relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rela[i].r_offset; + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf32_Sym *)sechdrs[symindex].sh_addr + + ELF32_R_SYM(rela[i].r_info); + /* `Everything is relative'. */ + value = sym->st_value + rela[i].r_addend; + + switch (ELF32_R_TYPE(rela[i].r_info)) { + case R_PPC_ADDR32: + /* Simply set it */ + *(uint32_t *)location = value; + break; + + case R_PPC_ADDR16_LO: + /* Low half of the symbol */ + *(uint16_t *)location = value; + break; + + case R_PPC_ADDR16_HA: + /* Sign-adjusted lower 16 bits: PPC ELF ABI says: + (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF. + This is the same, only sane. + */ + *(uint16_t *)location = (value + 0x8000) >> 16; + break; + + case R_PPC_REL24: + if ((int)(value - (uint32_t)location) < -0x02000000 + || (int)(value - (uint32_t)location) >= 0x02000000) + value = do_plt_call(location, value, + sechdrs, module); + + /* Only replace bits 2 through 26 */ + DEBUGP("REL24 value = %08X. location = %08X\n", + value, (uint32_t)location); + DEBUGP("Location before: %08X.\n", + *(uint32_t *)location); + *(uint32_t *)location + = (*(uint32_t *)location & ~0x03fffffc) + | ((value - (uint32_t)location) + & 0x03fffffc); + DEBUGP("Location after: %08X.\n", + *(uint32_t *)location); + DEBUGP("ie. jump to %08X+%08X = %08X\n", + *(uint32_t *)location & 0x03fffffc, + (uint32_t)location, + (*(uint32_t *)location & 0x03fffffc) + + (uint32_t)location); + break; + + case R_PPC_REL32: + /* 32-bit relative jump. */ + *(uint32_t *)location = value - (uint32_t)location; + break; + + default: + printk("%s: unknown ADD relocation: %u\n", + module->name, + ELF32_R_TYPE(rela[i].r_info)); + return -ENOEXEC; + } + } + return 0; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + char *secstrings; + unsigned int i; + + me->arch.bug_table = NULL; + me->arch.num_bugs = 0; + + /* Find the __bug_table section, if present */ + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (i = 1; i < hdr->e_shnum; i++) { + if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table")) + continue; + me->arch.bug_table = (void *) sechdrs[i].sh_addr; + me->arch.num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry); + break; + } + + /* + * Strictly speaking this should have a spinlock to protect against + * traversals, but since we only traverse on BUG()s, a spinlock + * could potentially lead to deadlock and thus be counter-productive. + */ + list_add(&me->arch.bug_list, &module_bug_list); + + return 0; +} + +void module_arch_cleanup(struct module *mod) +{ + list_del(&mod->arch.bug_list); +} + +struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + struct mod_arch_specific *mod; + unsigned int i; + struct bug_entry *bug; + + list_for_each_entry(mod, &module_bug_list, bug_list) { + bug = mod->bug_table; + for (i = 0; i < mod->num_bugs; ++i, ++bug) + if (bugaddr == bug->bug_addr) + return bug; + } + return NULL; +} diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index fd7db8d542db..ada50aa5b600 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -160,7 +160,7 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file, case IOC_NVRAM_GET_OFFSET: { int part, offset; - if (_machine != PLATFORM_POWERMAC) + if (!machine_is(powermac)) return -EINVAL; if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) return -EFAULT; @@ -174,8 +174,9 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file, return 0; } #endif /* CONFIG_PPC_PMAC */ + default: + return -EINVAL; } - return -EINVAL; } struct file_operations nvram_fops = { @@ -443,7 +444,7 @@ static int nvram_setup_partition(void) * in our nvram, as Apple defined partitions use pretty much * all of the space */ - if (_machine == PLATFORM_POWERMAC) + if (machine_is(powermac)) return -ENOSPC; /* see if we have an OS partition that meets our needs. diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 5d1b708086bd..f505a8827e3e 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -56,14 +56,11 @@ struct lppaca lppaca[] = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -#define PACA_INIT_COMMON(number, start, asrr, asrv) \ +#define PACA_INIT_COMMON(number) \ .lppaca_ptr = &lppaca[number], \ .lock_token = 0x8000, \ .paca_index = (number), /* Paca Index */ \ .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ - .stab_real = (asrr), /* Real pointer to segment table */ \ - .stab_addr = (asrv), /* Virt pointer to segment table */ \ - .cpu_start = (start), /* Processor start */ \ .hw_cpu_id = 0xffff, #ifdef CONFIG_PPC_ISERIES @@ -72,30 +69,20 @@ struct lppaca lppaca[] = { #define PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 0, 0, 0) \ - PACA_INIT_ISERIES(number) \ -} - -#define BOOTCPU_PACA_INIT(number) \ -{ \ - PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \ + PACA_INIT_COMMON(number) \ PACA_INIT_ISERIES(number) \ } #else #define PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 0, 0, 0) \ + PACA_INIT_COMMON(number) \ } -#define BOOTCPU_PACA_INIT(number) \ -{ \ - PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \ -} #endif struct paca_struct paca[] = { - BOOTCPU_PACA_INIT(0), + PACA_INIT(0), #if NR_CPUS > 1 PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3), #if NR_CPUS > 4 diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 704c846b2b0f..b129d2e4b759 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -787,7 +787,7 @@ pci_busdev_to_OF_node(struct pci_bus *bus, int devfn) * fix has to be done by making the remapping per-host and always * filling the pci_to_OF map. --BenH */ - if (_machine == _MACH_Pmac && busnr >= 0xf0) + if (machine_is(powermac) && busnr >= 0xf0) busnr -= 0xf0; else #endif @@ -1728,7 +1728,7 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn) * (bus 0 is HT root), we return the AGP one instead. */ #ifdef CONFIG_PPC_PMAC - if (_machine == _MACH_Pmac && machine_is_compatible("MacRISC4")) + if (machine_is(powermac) && machine_is_compatible("MacRISC4")) if (bus == 0) bus = 0xf0; #endif /* CONFIG_PPC_PMAC */ diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index ba92bab7cc2c..4c4449be81ce 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -78,6 +78,7 @@ int global_phb_number; /* Global phb counter */ /* Cached ISA bridge dev. */ struct pci_dev *ppc64_isabridge_dev = NULL; +EXPORT_SYMBOL_GPL(ppc64_isabridge_dev); static void fixup_broken_pcnet32(struct pci_dev* dev) { diff --git a/arch/powerpc/kernel/perfmon_fsl_booke.c b/arch/powerpc/kernel/perfmon_fsl_booke.c new file mode 100644 index 000000000000..32455dfcc36b --- /dev/null +++ b/arch/powerpc/kernel/perfmon_fsl_booke.c @@ -0,0 +1,222 @@ +/* kernel/perfmon_fsl_booke.c + * Freescale Book-E Performance Monitor code + * + * Author: Andy Fleming + * Copyright (c) 2004 Freescale Semiconductor, Inc + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/interrupt.h> +#include <linux/config.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/prctl.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/reg.h> +#include <asm/xmon.h> +#include <asm/pmc.h> + +static inline u32 get_pmlca(int ctr); +static inline void set_pmlca(int ctr, u32 pmlca); + +static inline u32 get_pmlca(int ctr) +{ + u32 pmlca; + + switch (ctr) { + case 0: + pmlca = mfpmr(PMRN_PMLCA0); + break; + case 1: + pmlca = mfpmr(PMRN_PMLCA1); + break; + case 2: + pmlca = mfpmr(PMRN_PMLCA2); + break; + case 3: + pmlca = mfpmr(PMRN_PMLCA3); + break; + default: + panic("Bad ctr number\n"); + } + + return pmlca; +} + +static inline void set_pmlca(int ctr, u32 pmlca) +{ + switch (ctr) { + case 0: + mtpmr(PMRN_PMLCA0, pmlca); + break; + case 1: + mtpmr(PMRN_PMLCA1, pmlca); + break; + case 2: + mtpmr(PMRN_PMLCA2, pmlca); + break; + case 3: + mtpmr(PMRN_PMLCA3, pmlca); + break; + default: + panic("Bad ctr number\n"); + } +} + +void init_pmc_stop(int ctr) +{ + u32 pmlca = (PMLCA_FC | PMLCA_FCS | PMLCA_FCU | + PMLCA_FCM1 | PMLCA_FCM0); + u32 pmlcb = 0; + + switch (ctr) { + case 0: + mtpmr(PMRN_PMLCA0, pmlca); + mtpmr(PMRN_PMLCB0, pmlcb); + break; + case 1: + mtpmr(PMRN_PMLCA1, pmlca); + mtpmr(PMRN_PMLCB1, pmlcb); + break; + case 2: + mtpmr(PMRN_PMLCA2, pmlca); + mtpmr(PMRN_PMLCB2, pmlcb); + break; + case 3: + mtpmr(PMRN_PMLCA3, pmlca); + mtpmr(PMRN_PMLCB3, pmlcb); + break; + default: + panic("Bad ctr number!\n"); + } +} + +void set_pmc_event(int ctr, int event) +{ + u32 pmlca; + + pmlca = get_pmlca(ctr); + + pmlca = (pmlca & ~PMLCA_EVENT_MASK) | + ((event << PMLCA_EVENT_SHIFT) & + PMLCA_EVENT_MASK); + + set_pmlca(ctr, pmlca); +} + +void set_pmc_user_kernel(int ctr, int user, int kernel) +{ + u32 pmlca; + + pmlca = get_pmlca(ctr); + + if(user) + pmlca &= ~PMLCA_FCU; + else + pmlca |= PMLCA_FCU; + + if(kernel) + pmlca &= ~PMLCA_FCS; + else + pmlca |= PMLCA_FCS; + + set_pmlca(ctr, pmlca); +} + +void set_pmc_marked(int ctr, int mark0, int mark1) +{ + u32 pmlca = get_pmlca(ctr); + + if(mark0) + pmlca &= ~PMLCA_FCM0; + else + pmlca |= PMLCA_FCM0; + + if(mark1) + pmlca &= ~PMLCA_FCM1; + else + pmlca |= PMLCA_FCM1; + + set_pmlca(ctr, pmlca); +} + +void pmc_start_ctr(int ctr, int enable) +{ + u32 pmlca = get_pmlca(ctr); + + pmlca &= ~PMLCA_FC; + + if (enable) + pmlca |= PMLCA_CE; + else + pmlca &= ~PMLCA_CE; + + set_pmlca(ctr, pmlca); +} + +void pmc_start_ctrs(int enable) +{ + u32 pmgc0 = mfpmr(PMRN_PMGC0); + + pmgc0 &= ~PMGC0_FAC; + pmgc0 |= PMGC0_FCECE; + + if (enable) + pmgc0 |= PMGC0_PMIE; + else + pmgc0 &= ~PMGC0_PMIE; + + mtpmr(PMRN_PMGC0, pmgc0); +} + +void pmc_stop_ctrs(void) +{ + u32 pmgc0 = mfpmr(PMRN_PMGC0); + + pmgc0 |= PMGC0_FAC; + + pmgc0 &= ~(PMGC0_PMIE | PMGC0_FCECE); + + mtpmr(PMRN_PMGC0, pmgc0); +} + +void dump_pmcs(void) +{ + printk("pmgc0: %x\n", mfpmr(PMRN_PMGC0)); + printk("pmc\t\tpmlca\t\tpmlcb\n"); + printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC0), + mfpmr(PMRN_PMLCA0), mfpmr(PMRN_PMLCB0)); + printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC1), + mfpmr(PMRN_PMLCA1), mfpmr(PMRN_PMLCB1)); + printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC2), + mfpmr(PMRN_PMLCA2), mfpmr(PMRN_PMLCB2)); + printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC3), + mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); +} + +EXPORT_SYMBOL(init_pmc_stop); +EXPORT_SYMBOL(set_pmc_event); +EXPORT_SYMBOL(set_pmc_user_kernel); +EXPORT_SYMBOL(set_pmc_marked); +EXPORT_SYMBOL(pmc_start_ctr); +EXPORT_SYMBOL(pmc_start_ctrs); +EXPORT_SYMBOL(pmc_stop_ctrs); +EXPORT_SYMBOL(dump_pmcs); diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_ppc64.c index 7ba42a405f41..3c2cf661f6d9 100644 --- a/arch/powerpc/kernel/proc_ppc64.c +++ b/arch/powerpc/kernel/proc_ppc64.c @@ -23,6 +23,7 @@ #include <linux/slab.h> #include <linux/kernel.h> +#include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> #include <asm/uaccess.h> @@ -51,7 +52,7 @@ static int __init proc_ppc64_create(void) if (!root) return 1; - if (!(platform_is_pseries() || _machine == PLATFORM_CELL)) + if (!machine_is(pseries) && !machine_is(cell)) return 0; if (!proc_mkdir("rtas", root)) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1770a066c217..706090c99f47 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -35,7 +35,6 @@ #include <linux/mqueue.h> #include <linux/hardirq.h> #include <linux/utsname.h> -#include <linux/kprobes.h> #include <asm/pgtable.h> #include <asm/uaccess.h> @@ -46,6 +45,7 @@ #include <asm/prom.h> #include <asm/machdep.h> #include <asm/time.h> +#include <asm/syscalls.h> #ifdef CONFIG_PPC64 #include <asm/firmware.h> #endif @@ -363,7 +363,11 @@ static void show_instructions(struct pt_regs *regs) if (!(i % 8)) printk("\n"); - if (BAD_PC(pc) || __get_user(instr, (unsigned int *)pc)) { + /* We use __get_user here *only* to avoid an OOPS on a + * bad address because the pc *should* only be a + * kernel address. + */ + if (BAD_PC(pc) || __get_user(instr, (unsigned int __user *)pc)) { printk("XXXXXXXX "); } else { if (regs->nip == pc) @@ -460,7 +464,6 @@ void show_regs(struct pt_regs * regs) void exit_thread(void) { - kprobe_flush_task(current); discard_lazy_cpu_state(); } @@ -767,7 +770,7 @@ out: return error; } -static int validate_sp(unsigned long sp, struct task_struct *p, +int validate_sp(unsigned long sp, struct task_struct *p, unsigned long nbytes) { unsigned long stack_page = (unsigned long)task_stack_page(p); @@ -805,6 +808,8 @@ static int validate_sp(unsigned long sp, struct task_struct *p, #define FRAME_MARKER 2 #endif +EXPORT_SYMBOL(validate_sp); + unsigned long get_wchan(struct task_struct *p) { unsigned long ip, sp; diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index d63cd562d9d5..4336390bcf34 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -383,14 +383,14 @@ static int __devinit finish_node_interrupts(struct device_node *np, /* Apple uses bits in there in a different way, let's * only keep the real sense bit on macs */ - if (_machine == PLATFORM_POWERMAC) + if (machine_is(powermac)) sense &= 0x1; np->intrs[intrcount].sense = map_mpic_senses[sense]; } #ifdef CONFIG_PPC64 /* We offset irq numbers for the u3 MPIC by 128 in PowerMac */ - if (_machine == PLATFORM_POWERMAC && ic && ic->parent) { + if (machine_is(powermac) && ic && ic->parent) { char *name = get_property(ic->parent, "name", NULL); if (name && !strcmp(name, "u3")) np->intrs[intrcount].line += 128; @@ -570,6 +570,18 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node, return rc; } +unsigned long __init of_get_flat_dt_root(void) +{ + unsigned long p = ((unsigned long)initial_boot_params) + + initial_boot_params->off_dt_struct; + + while(*((u32 *)p) == OF_DT_NOP) + p += 4; + BUG_ON (*((u32 *)p) != OF_DT_BEGIN_NODE); + p += 4; + return _ALIGN(p + strlen((char *)p) + 1, 4); +} + /** * This function can be used within scan_flattened_dt callback to get * access to properties @@ -612,6 +624,25 @@ void* __init of_get_flat_dt_prop(unsigned long node, const char *name, } while(1); } +int __init of_flat_dt_is_compatible(unsigned long node, const char *compat) +{ + const char* cp; + unsigned long cplen, l; + + cp = of_get_flat_dt_prop(node, "compatible", &cplen); + if (cp == NULL) + return 0; + while (cplen > 0) { + if (strncasecmp(cp, compat, strlen(compat)) == 0) + return 1; + l = strlen(cp) + 1; + cp += l; + cplen -= l; + } + + return 0; +} + static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, unsigned long align) { @@ -686,7 +717,7 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, #ifdef DEBUG if ((strlen(p) + l + 1) != allocl) { DBG("%s: p: %d, l: %d, a: %d\n", - pathp, strlen(p), l, allocl); + pathp, (int)strlen(p), l, allocl); } #endif p += strlen(p); @@ -854,35 +885,73 @@ void __init unflatten_device_tree(void) DBG(" <- unflatten_device_tree()\n"); } - static int __init early_init_dt_scan_cpus(unsigned long node, - const char *uname, int depth, void *data) + const char *uname, int depth, + void *data) { + static int logical_cpuid = 0; + char *type = of_get_flat_dt_prop(node, "device_type", NULL); +#ifdef CONFIG_ALTIVEC u32 *prop; - unsigned long size; - char *type = of_get_flat_dt_prop(node, "device_type", &size); +#endif + u32 *intserv; + int i, nthreads; + unsigned long len; + int found = 0; /* We are scanning "cpu" nodes only */ if (type == NULL || strcmp(type, "cpu") != 0) return 0; - boot_cpuid = 0; - boot_cpuid_phys = 0; - if (initial_boot_params && initial_boot_params->version >= 2) { - /* version 2 of the kexec param format adds the phys cpuid - * of booted proc. - */ - boot_cpuid_phys = initial_boot_params->boot_cpuid_phys; + /* Get physical cpuid */ + intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); + if (intserv) { + nthreads = len / sizeof(int); } else { - /* Check if it's the boot-cpu, set it's hw index now */ - if (of_get_flat_dt_prop(node, + intserv = of_get_flat_dt_prop(node, "reg", NULL); + nthreads = 1; + } + + /* + * Now see if any of these threads match our boot cpu. + * NOTE: This must match the parsing done in smp_setup_cpu_maps. + */ + for (i = 0; i < nthreads; i++) { + /* + * version 2 of the kexec param format adds the phys cpuid of + * booted proc. + */ + if (initial_boot_params && initial_boot_params->version >= 2) { + if (intserv[i] == + initial_boot_params->boot_cpuid_phys) { + found = 1; + break; + } + } else { + /* + * Check if it's the boot-cpu, set it's hw index now, + * unfortunately this format did not support booting + * off secondary threads. + */ + if (of_get_flat_dt_prop(node, "linux,boot-cpu", NULL) != NULL) { - prop = of_get_flat_dt_prop(node, "reg", NULL); - if (prop != NULL) - boot_cpuid_phys = *prop; + found = 1; + break; + } } + +#ifdef CONFIG_SMP + /* logical cpu id is always 0 on UP kernels */ + logical_cpuid++; +#endif + } + + if (found) { + DBG("boot cpu: logical %d physical %d\n", logical_cpuid, + intserv[i]); + boot_cpuid = logical_cpuid; + set_hard_smp_processor_id(boot_cpuid, intserv[i]); } - set_hard_smp_processor_id(0, boot_cpuid_phys); #ifdef CONFIG_ALTIVEC /* Check if we have a VMX and eventually update CPU features */ @@ -901,16 +970,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_PPC_PSERIES - /* - * Check for an SMT capable CPU and set the CPU feature. We do - * this by looking at the size of the ibm,ppc-interrupt-server#s - * property - */ - prop = (u32 *)of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", - &size); - cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; - if (prop && ((size / sizeof(u32)) > 1)) + if (nthreads > 1) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; + else + cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; #endif return 0; @@ -919,7 +982,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, static int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data) { - u32 *prop; unsigned long *lprop; unsigned long l; char *p; @@ -930,14 +992,6 @@ static int __init early_init_dt_scan_chosen(unsigned long node, (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0)) return 0; - /* get platform type */ - prop = (u32 *)of_get_flat_dt_prop(node, "linux,platform", NULL); - if (prop == NULL) - return 0; -#ifdef CONFIG_PPC_MULTIPLATFORM - _machine = *prop; -#endif - #ifdef CONFIG_PPC64 /* check if iommu is forced on or off */ if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL) @@ -964,15 +1018,15 @@ static int __init early_init_dt_scan_chosen(unsigned long node, * set of RTAS infos now if available */ { - u64 *basep, *entryp; + u64 *basep, *entryp, *sizep; basep = of_get_flat_dt_prop(node, "linux,rtas-base", NULL); entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL); - prop = of_get_flat_dt_prop(node, "linux,rtas-size", NULL); - if (basep && entryp && prop) { + sizep = of_get_flat_dt_prop(node, "linux,rtas-size", NULL); + if (basep && entryp && sizep) { rtas.base = *basep; rtas.entry = *entryp; - rtas.size = *prop; + rtas.size = *sizep; } } #endif /* CONFIG_PPC_RTAS */ @@ -1001,25 +1055,13 @@ static int __init early_init_dt_scan_chosen(unsigned long node, if (strstr(cmd_line, "mem=")) { char *p, *q; - unsigned long maxmem = 0; for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) { q = p + 4; if (p > cmd_line && p[-1] != ' ') continue; - maxmem = simple_strtoul(q, &q, 0); - if (*q == 'k' || *q == 'K') { - maxmem <<= 10; - ++q; - } else if (*q == 'm' || *q == 'M') { - maxmem <<= 20; - ++q; - } else if (*q == 'g' || *q == 'G') { - maxmem <<= 30; - ++q; - } + memory_limit = memparse(q, &q); } - memory_limit = maxmem; } /* break now */ @@ -1755,7 +1797,7 @@ static int of_finish_dynamic_node(struct device_node *node) /* We don't support that function on PowerMac, at least * not yet */ - if (_machine == PLATFORM_POWERMAC) + if (machine_is(powermac)) return -ENODEV; /* fix up new node's linux_phandle field */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 813c2cd194c2..d66c5e77fcff 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -180,6 +180,16 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +/* Platforms codes are now obsolete in the kernel. Now only used within this + * file and ultimately gone too. Feel free to change them if you need, they + * are not shared with anything outside of this file anymore + */ +#define PLATFORM_PSERIES 0x0100 +#define PLATFORM_PSERIES_LPAR 0x0101 +#define PLATFORM_LPAR 0x0001 +#define PLATFORM_POWERMAC 0x0400 +#define PLATFORM_GENERIC 0x0500 + static int __initdata of_platform; static char __initdata prom_cmd_line[COMMAND_LINE_SIZE]; @@ -397,6 +407,11 @@ static void __init __attribute__((noreturn)) prom_panic(const char *reason) reason = PTRRELOC(reason); #endif prom_print(reason); + /* Do not call exit because it clears the screen on pmac + * it also causes some sort of double-fault on early pmacs */ + if (RELOC(of_platform) == PLATFORM_POWERMAC) + asm("trap\n"); + /* ToDo: should put up an SRC here on p/iSeries */ call_prom("exit", 0, 0); @@ -1487,7 +1502,10 @@ static int __init prom_find_machine_type(void) int len, i = 0; #ifdef CONFIG_PPC64 phandle rtas; + int x; #endif + + /* Look for a PowerMac */ len = prom_getprop(_prom->root, "compatible", compat, sizeof(compat)-1); if (len > 0) { @@ -1500,28 +1518,36 @@ static int __init prom_find_machine_type(void) if (strstr(p, RELOC("Power Macintosh")) || strstr(p, RELOC("MacRISC"))) return PLATFORM_POWERMAC; -#ifdef CONFIG_PPC64 - if (strstr(p, RELOC("Momentum,Maple"))) - return PLATFORM_MAPLE; - if (strstr(p, RELOC("IBM,CPB"))) - return PLATFORM_CELL; -#endif i += sl + 1; } } #ifdef CONFIG_PPC64 + /* If not a mac, try to figure out if it's an IBM pSeries or any other + * PAPR compliant platform. We assume it is if : + * - /device_type is "chrp" (please, do NOT use that for future + * non-IBM designs ! + * - it has /rtas + */ + len = prom_getprop(_prom->root, "model", + compat, sizeof(compat)-1); + if (len <= 0) + return PLATFORM_GENERIC; + compat[len] = 0; + if (strcmp(compat, "chrp")) + return PLATFORM_GENERIC; + /* Default to pSeries. We need to know if we are running LPAR */ rtas = call_prom("finddevice", 1, 1, ADDR("/rtas")); - if (PHANDLE_VALID(rtas)) { - int x = prom_getproplen(rtas, "ibm,hypertas-functions"); - if (x != PROM_ERROR) { - prom_printf("Hypertas detected, assuming LPAR !\n"); - return PLATFORM_PSERIES_LPAR; - } + if (!PHANDLE_VALID(rtas)) + return PLATFORM_GENERIC; + x = prom_getproplen(rtas, "ibm,hypertas-functions"); + if (x != PROM_ERROR) { + prom_printf("Hypertas detected, assuming LPAR !\n"); + return PLATFORM_PSERIES_LPAR; } return PLATFORM_PSERIES; #else - return PLATFORM_CHRP; + return PLATFORM_GENERIC; #endif } @@ -2029,7 +2055,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, { struct prom_t *_prom; unsigned long hdr; - u32 getprop_rval; unsigned long offset = reloc_offset(); #ifdef CONFIG_PPC32 @@ -2060,6 +2085,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_init_stdout(); + /* + * Get default machine type. At this point, we do not differentiate + * between pSeries SMP and pSeries LPAR + */ + RELOC(of_platform) = prom_find_machine_type(); + /* Bail if this is a kdump kernel. */ if (PHYSICAL_START > 0) prom_panic("Error: You can't boot a kdump kernel from OF!\n"); @@ -2069,15 +2100,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); - /* - * Get default machine type. At this point, we do not differentiate - * between pSeries SMP and pSeries LPAR - */ - RELOC(of_platform) = prom_find_machine_type(); - getprop_rval = RELOC(of_platform); - prom_setprop(_prom->chosen, "/chosen", "linux,platform", - &getprop_rval, sizeof(getprop_rval)); - #ifdef CONFIG_PPC_PSERIES /* * On pSeries, inform the firmware about our capabilities diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 1f03fb28cc0a..456286cf1d14 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -257,7 +257,7 @@ static int __init proc_rtas_init(void) { struct proc_dir_entry *entry; - if (_machine != PLATFORM_PSERIES && _machine != PLATFORM_PSERIES_LPAR) + if (!machine_is(pseries)) return 1; rtas_node = of_find_node_by_name(NULL, "rtas"); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index b5b2add7ad1e..06636c927a7e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -25,6 +25,7 @@ #include <asm/hvcall.h> #include <asm/semaphore.h> #include <asm/machdep.h> +#include <asm/firmware.h> #include <asm/page.h> #include <asm/param.h> #include <asm/system.h> @@ -32,6 +33,7 @@ #include <asm/uaccess.h> #include <asm/lmb.h> #include <asm/udbg.h> +#include <asm/syscalls.h> struct rtas_t rtas = { .lock = SPIN_LOCK_UNLOCKED @@ -591,7 +593,7 @@ static void rtas_percpu_suspend_me(void *info) data->waiting = 0; data->args->args[data->args->nargs] = rtas_call(ibm_suspend_me_token, 0, 1, NULL); - for_each_cpu(i) + for_each_possible_cpu(i) plpar_hcall_norets(H_PROD,i); } else { data->waiting = -EBUSY; @@ -624,7 +626,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args) /* Prod each CPU. This won't hurt, and will wake * anyone we successfully put to sleep with H_Join */ - for_each_cpu(i) + for_each_possible_cpu(i) plpar_hcall_norets(H_PROD, i); return data.waiting; @@ -767,7 +769,7 @@ void __init rtas_initialize(void) * the stop-self token if any */ #ifdef CONFIG_PPC64 - if (_machine == PLATFORM_PSERIES_LPAR) { + if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) { rtas_region = min(lmb.rmo_size, RTAS_INSTANTIATE_MAX); ibm_suspend_me_token = rtas_token("ibm,suspend-me"); } diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index c1d62bf11f29..c607f3b9ca17 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -9,6 +9,9 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ + +#undef DEBUG + #include <linux/config.h> #include <linux/module.h> #include <linux/string.h> @@ -41,6 +44,7 @@ #include <asm/time.h> #include <asm/cputable.h> #include <asm/sections.h> +#include <asm/firmware.h> #include <asm/btext.h> #include <asm/nvram.h> #include <asm/setup.h> @@ -56,8 +60,6 @@ #include "setup.h" -#undef DEBUG - #ifdef DEBUG #include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) @@ -65,10 +67,12 @@ #define DBG(fmt...) #endif -#ifdef CONFIG_PPC_MULTIPLATFORM -int _machine = 0; -EXPORT_SYMBOL(_machine); -#endif +/* The main machine-dep calls structure + */ +struct machdep_calls ppc_md; +EXPORT_SYMBOL(ppc_md); +struct machdep_calls *machine_id; +EXPORT_SYMBOL(machine_id); unsigned long klimit = (unsigned long) _end; @@ -168,7 +172,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); #endif /* CONFIG_SMP && CONFIG_PPC32 */ seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); - + if (ppc_md.name) + seq_printf(m, "platform\t: %s\n", ppc_md.name); if (ppc_md.show_cpuinfo != NULL) ppc_md.show_cpuinfo(m); @@ -352,12 +357,13 @@ void __init check_for_initrd(void) * must be called before using this. * * While we're here, we may as well set the "physical" cpu ids in the paca. + * + * NOTE: This must match the parsing done in early_init_dt_scan_cpus. */ void __init smp_setup_cpu_maps(void) { struct device_node *dn = NULL; int cpu = 0; - int swap_cpuid = 0; while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < NR_CPUS) { int *intserv; @@ -376,30 +382,17 @@ void __init smp_setup_cpu_maps(void) for (j = 0; j < nthreads && cpu < NR_CPUS; j++) { cpu_set(cpu, cpu_present_map); set_hard_smp_processor_id(cpu, intserv[j]); - - if (intserv[j] == boot_cpuid_phys) - swap_cpuid = cpu; cpu_set(cpu, cpu_possible_map); cpu++; } } - /* Swap CPU id 0 with boot_cpuid_phys, so we can always assume that - * boot cpu is logical 0. - */ - if (boot_cpuid_phys != get_hard_smp_processor_id(0)) { - u32 tmp; - tmp = get_hard_smp_processor_id(0); - set_hard_smp_processor_id(0, boot_cpuid_phys); - set_hard_smp_processor_id(swap_cpuid, tmp); - } - #ifdef CONFIG_PPC64 /* * On pSeries LPAR, we need to know how many cpus * could possibly be added to this partition. */ - if (_machine == PLATFORM_PSERIES_LPAR && + if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) && (dn = of_find_node_by_path("/rtas"))) { int num_addr_cell, num_size_cell, maxcpus; unsigned int *ireg; @@ -438,7 +431,7 @@ void __init smp_setup_cpu_maps(void) /* * Do the sibling map; assume only two threads per processor. */ - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { cpu_set(cpu, cpu_sibling_map[cpu]); if (cpu_has_feature(CPU_FTR_SMT)) cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]); @@ -468,3 +461,34 @@ static int __init early_xmon(char *p) } early_param("xmon", early_xmon); #endif + +void probe_machine(void) +{ + extern struct machdep_calls __machine_desc_start; + extern struct machdep_calls __machine_desc_end; + + /* + * Iterate all ppc_md structures until we find the proper + * one for the current machine type + */ + DBG("Probing machine type ...\n"); + + for (machine_id = &__machine_desc_start; + machine_id < &__machine_desc_end; + machine_id++) { + DBG(" %s ...", machine_id->name); + memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls)); + if (ppc_md.probe()) { + DBG(" match !\n"); + break; + } + DBG("\n"); + } + /* What can we do if we didn't find ? */ + if (machine_id >= &__machine_desc_end) { + DBG("No suitable machine found !\n"); + for (;;); + } + + printk(KERN_INFO "Using %s machine description\n", ppc_md.name); +} diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index dc2770df25b3..a72bf5dceeee 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -53,9 +53,6 @@ extern void platform_init(void); extern void bootx_init(unsigned long r4, unsigned long phys); -extern void ppc6xx_idle(void); -extern void power4_idle(void); - boot_infos_t *boot_infos; struct ide_machdep_calls ppc_ide_md; @@ -70,10 +67,6 @@ unsigned int DMA_MODE_WRITE; int have_of = 1; #ifdef CONFIG_PPC_MULTIPLATFORM -extern void prep_init(void); -extern void pmac_init(void); -extern void chrp_init(void); - dev_t boot_dev; #endif /* CONFIG_PPC_MULTIPLATFORM */ @@ -85,9 +78,6 @@ unsigned long SYSRQ_KEY = 0x54; unsigned long vgacon_remap_base; #endif -struct machdep_calls ppc_md; -EXPORT_SYMBOL(ppc_md); - /* * These are used in binfmt_elf.c to put aux entries on the stack * for each elf executable being started. @@ -111,7 +101,7 @@ unsigned long __init early_init(unsigned long dt_ptr) /* First zero the BSS -- use memset_io, some platforms don't have * caches on yet */ - memset_io(PTRRELOC(&__bss_start), 0, _end - __bss_start); + memset_io((void __iomem *)PTRRELOC(&__bss_start), 0, _end - __bss_start); /* * Identify the CPU type and fix up code sections @@ -123,48 +113,6 @@ unsigned long __init early_init(unsigned long dt_ptr) return KERNELBASE + offset; } -#ifdef CONFIG_PPC_MULTIPLATFORM -/* - * The PPC_MULTIPLATFORM version of platform_init... - */ -void __init platform_init(void) -{ - /* if we didn't get any bootinfo telling us what we are... */ - if (_machine == 0) { - /* prep boot loader tells us if we're prep or not */ - if ( *(unsigned long *)(KERNELBASE) == (0xdeadc0de) ) - _machine = _MACH_prep; - } - -#ifdef CONFIG_PPC_PREP - /* not much more to do here, if prep */ - if (_machine == _MACH_prep) { - prep_init(); - return; - } -#endif - -#ifdef CONFIG_ADB - if (strstr(cmd_line, "adb_sync")) { - extern int __adb_probe_sync; - __adb_probe_sync = 1; - } -#endif /* CONFIG_ADB */ - - switch (_machine) { -#ifdef CONFIG_PPC_PMAC - case _MACH_Pmac: - pmac_init(); - break; -#endif -#ifdef CONFIG_PPC_CHRP - case _MACH_chrp: - chrp_init(); - break; -#endif - } -} -#endif /* * Find out what kind of machine we're on and save any data we need @@ -190,11 +138,17 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys) strlcpy(cmd_line, CONFIG_CMDLINE, sizeof(cmd_line)); #endif /* CONFIG_CMDLINE */ - /* Base init based on machine type */ +#ifdef CONFIG_PPC_MULTIPLATFORM + probe_machine(); +#else + /* Base init based on machine type. Obsoloete, please kill ! */ platform_init(); +#endif #ifdef CONFIG_6xx - ppc_md.power_save = ppc6xx_idle; + if (cpu_has_feature(CPU_FTR_CAN_DOZE) || + cpu_has_feature(CPU_FTR_CAN_NAP)) + ppc_md.power_save = ppc6xx_idle; #endif if (ppc_md.progress) @@ -272,7 +226,7 @@ int __init ppc_init(void) if ( ppc_md.progress ) ppc_md.progress(" ", 0xffff); /* register CPU devices */ - for_each_cpu(i) + for_each_possible_cpu(i) register_cpu(&cpu_devices[i], i, NULL); /* call platform init */ @@ -352,12 +306,6 @@ void __init setup_arch(char **cmdline_p) do_init_bootmem(); if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab); -#ifdef CONFIG_PPC_OCP - /* Initialize OCP device list */ - ocp_early_init(); - if ( ppc_md.progress ) ppc_md.progress("ocp: exit", 0x3eab); -#endif - #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; #endif @@ -366,7 +314,4 @@ void __init setup_arch(char **cmdline_p) if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); paging_init(); - - /* this is for modules since _machine can be a define -- Cort */ - ppc_md.ppc_machine = _machine; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2f3fdad35594..59aa92cd6fa4 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -73,7 +73,6 @@ int have_of = 1; int boot_cpuid = 0; -int boot_cpuid_phys = 0; dev_t boot_dev; u64 ppc64_pft_size; @@ -96,11 +95,6 @@ int dcache_bsize; int icache_bsize; int ucache_bsize; -/* The main machine-dep calls structure - */ -struct machdep_calls ppc_md; -EXPORT_SYMBOL(ppc_md); - #ifdef CONFIG_MAGIC_SYSRQ unsigned long SYSRQ_KEY; #endif /* CONFIG_MAGIC_SYSRQ */ @@ -161,32 +155,6 @@ early_param("smt-enabled", early_smt_enabled); #define check_smt_enabled() #endif /* CONFIG_SMP */ -extern struct machdep_calls pSeries_md; -extern struct machdep_calls pmac_md; -extern struct machdep_calls maple_md; -extern struct machdep_calls cell_md; -extern struct machdep_calls iseries_md; - -/* Ultimately, stuff them in an elf section like initcalls... */ -static struct machdep_calls __initdata *machines[] = { -#ifdef CONFIG_PPC_PSERIES - &pSeries_md, -#endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_PMAC - &pmac_md, -#endif /* CONFIG_PPC_PMAC */ -#ifdef CONFIG_PPC_MAPLE - &maple_md, -#endif /* CONFIG_PPC_MAPLE */ -#ifdef CONFIG_PPC_CELL - &cell_md, -#endif -#ifdef CONFIG_PPC_ISERIES - &iseries_md, -#endif - NULL -}; - /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -208,13 +176,10 @@ static struct machdep_calls __initdata *machines[] = { void __init early_setup(unsigned long dt_ptr) { - struct paca_struct *lpaca = get_paca(); - static struct machdep_calls **mach; - /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); - DBG(" -> early_setup()\n"); + DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); /* * Do early initializations using the flattened device @@ -223,22 +188,16 @@ void __init early_setup(unsigned long dt_ptr) */ early_init_devtree(__va(dt_ptr)); - /* - * Iterate all ppc_md structures until we find the proper - * one for the current machine type - */ - DBG("Probing machine type for platform %x...\n", _machine); + /* Now we know the logical id of our boot cpu, setup the paca. */ + setup_boot_paca(); - for (mach = machines; *mach; mach++) { - if ((*mach)->probe(_machine)) - break; - } - /* What can we do if we didn't find ? */ - if (*mach == NULL) { - DBG("No suitable machine found !\n"); - for (;;); - } - ppc_md = **mach; + /* Fix up paca fields required for the boot cpu */ + get_paca()->cpu_start = 1; + get_paca()->stab_real = __pa((u64)&initial_stab); + get_paca()->stab_addr = (u64)&initial_stab; + + /* Probe the machine type */ + probe_machine(); #ifdef CONFIG_CRASH_DUMP kdump_setup(); @@ -260,7 +219,7 @@ void __init early_setup(unsigned long dt_ptr) if (cpu_has_feature(CPU_FTR_SLB)) slb_initialize(); else - stab_initialize(lpaca->stab_real); + stab_initialize(get_paca()->stab_real); } DBG(" <- early_setup()\n"); @@ -340,7 +299,7 @@ static void __init initialize_cache_info(void) const char *dc, *ic; /* Then read cache informations */ - if (_machine == PLATFORM_POWERMAC) { + if (machine_is(powermac)) { dc = "d-cache-block-size"; ic = "i-cache-block-size"; } else { @@ -484,7 +443,6 @@ void __init setup_system(void) printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller); - printk("platform = 0x%x\n", _machine); printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size()); printk("ppc64_caches.dcache_line_size = 0x%x\n", ppc64_caches.dline_size); @@ -516,7 +474,7 @@ static void __init irqstack_early_init(void) * interrupt stacks must be under 256MB, we cannot afford to take * SLB misses on them. */ - for_each_cpu(i) { + for_each_possible_cpu(i) { softirq_ctx[i] = (struct thread_info *) __va(lmb_alloc_base(THREAD_SIZE, THREAD_SIZE, 0x10000000)); @@ -549,7 +507,7 @@ static void __init emergency_stack_init(void) */ limit = min(0x10000000UL, lmb.rmo_size); - for_each_cpu(i) + for_each_possible_cpu(i) paca[i].emergency_sp = __va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE; } @@ -579,7 +537,8 @@ void __init setup_arch(char **cmdline_p) panic_timeout = 180; if (ppc_md.panic) - notifier_chain_register(&panic_notifier_list, &ppc64_panic_block); + atomic_notifier_chain_register(&panic_notifier_list, + &ppc64_panic_block); init_mm.start_code = PAGE_OFFSET; init_mm.end_code = (unsigned long) _etext; @@ -601,12 +560,6 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); - /* Use the default idle loop if the platform hasn't provided one. */ - if (NULL == ppc_md.idle_loop) { - ppc_md.idle_loop = default_idle; - printk(KERN_INFO "Using default idle loop\n"); - } - paging_init(); ppc64_boot_msg(0x15, "Setup Done"); } @@ -671,7 +624,7 @@ void __init setup_per_cpu_areas(void) size = PERCPU_ENOUGH_ROOM; #endif - for_each_cpu(i) { + for_each_possible_cpu(i) { ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index d7a4e814974d..01e3c08cb550 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -42,6 +42,7 @@ #include <asm/uaccess.h> #include <asm/cacheflush.h> +#include <asm/syscalls.h> #include <asm/sigcontext.h> #include <asm/vdso.h> #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 47f910380a6a..27f65b95184d 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -33,6 +33,7 @@ #include <asm/pgtable.h> #include <asm/unistd.h> #include <asm/cacheflush.h> +#include <asm/syscalls.h> #include <asm/vdso.h> #define DEBUG_SIG 0 @@ -211,7 +212,7 @@ static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs /* Default to using normal stack */ newsp = regs->gpr[1]; - if (ka->sa.sa_flags & SA_ONSTACK) { + if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) { if (! on_sig_stack(regs->gpr[1])) newsp = (current->sas_ss_sp + current->sas_ss_size); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 805eaedbc308..530f7dba0bd2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -362,7 +362,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_space_timers(max_cpus); - for_each_cpu(cpu) + for_each_possible_cpu(cpu) if (cpu != boot_cpuid) smp_create_idle(cpu); } diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S new file mode 100644 index 000000000000..69773cc1a85f --- /dev/null +++ b/arch/powerpc/kernel/swsusp_32.S @@ -0,0 +1,349 @@ +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + + +/* + * Structure for storing CPU registers on the save area. + */ +#define SL_SP 0 +#define SL_PC 4 +#define SL_MSR 8 +#define SL_SDR1 0xc +#define SL_SPRG0 0x10 /* 4 sprg's */ +#define SL_DBAT0 0x20 +#define SL_IBAT0 0x28 +#define SL_DBAT1 0x30 +#define SL_IBAT1 0x38 +#define SL_DBAT2 0x40 +#define SL_IBAT2 0x48 +#define SL_DBAT3 0x50 +#define SL_IBAT3 0x58 +#define SL_TB 0x60 +#define SL_R2 0x68 +#define SL_CR 0x6c +#define SL_LR 0x70 +#define SL_R12 0x74 /* r12 to r31 */ +#define SL_SIZE (SL_R12 + 80) + + .section .data + .align 5 + +_GLOBAL(swsusp_save_area) + .space SL_SIZE + + + .section .text + .align 5 + +_GLOBAL(swsusp_arch_suspend) + + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + + mflr r0 + stw r0,SL_LR(r11) + mfcr r0 + stw r0,SL_CR(r11) + stw r1,SL_SP(r11) + stw r2,SL_R2(r11) + stmw r12,SL_R12(r11) + + /* Save MSR & SDR1 */ + mfmsr r4 + stw r4,SL_MSR(r11) + mfsdr1 r4 + stw r4,SL_SDR1(r11) + + /* Get a stable timebase and save it */ +1: mftbu r4 + stw r4,SL_TB(r11) + mftb r5 + stw r5,SL_TB+4(r11) + mftbu r3 + cmpw r3,r4 + bne 1b + + /* Save SPRGs */ + mfsprg r4,0 + stw r4,SL_SPRG0(r11) + mfsprg r4,1 + stw r4,SL_SPRG0+4(r11) + mfsprg r4,2 + stw r4,SL_SPRG0+8(r11) + mfsprg r4,3 + stw r4,SL_SPRG0+12(r11) + + /* Save BATs */ + mfdbatu r4,0 + stw r4,SL_DBAT0(r11) + mfdbatl r4,0 + stw r4,SL_DBAT0+4(r11) + mfdbatu r4,1 + stw r4,SL_DBAT1(r11) + mfdbatl r4,1 + stw r4,SL_DBAT1+4(r11) + mfdbatu r4,2 + stw r4,SL_DBAT2(r11) + mfdbatl r4,2 + stw r4,SL_DBAT2+4(r11) + mfdbatu r4,3 + stw r4,SL_DBAT3(r11) + mfdbatl r4,3 + stw r4,SL_DBAT3+4(r11) + mfibatu r4,0 + stw r4,SL_IBAT0(r11) + mfibatl r4,0 + stw r4,SL_IBAT0+4(r11) + mfibatu r4,1 + stw r4,SL_IBAT1(r11) + mfibatl r4,1 + stw r4,SL_IBAT1+4(r11) + mfibatu r4,2 + stw r4,SL_IBAT2(r11) + mfibatl r4,2 + stw r4,SL_IBAT2+4(r11) + mfibatu r4,3 + stw r4,SL_IBAT3(r11) + mfibatl r4,3 + stw r4,SL_IBAT3+4(r11) + +#if 0 + /* Backup various CPU config stuffs */ + bl __save_cpu_setup +#endif + /* Call the low level suspend stuff (we should probably have made + * a stackframe... + */ + bl swsusp_save + + /* Restore LR from the save area */ + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + lwz r0,SL_LR(r11) + mtlr r0 + + blr + + +/* Resume code */ +_GLOBAL(swsusp_arch_resume) + + /* Stop pending alitvec streams and memory accesses */ +BEGIN_FTR_SECTION + DSSALL +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + sync + + /* Disable MSR:DR to make sure we don't take a TLB or + * hash miss during the copy, as our hash table will + * for a while be unuseable. For .text, we assume we are + * covered by a BAT. This works only for non-G5 at this + * point. G5 will need a better approach, possibly using + * a small temporary hash table filled with large mappings, + * disabling the MMU completely isn't a good option for + * performance reasons. + * (Note that 750's may have the same performance issue as + * the G5 in this case, we should investigate using moving + * BATs for these CPUs) + */ + mfmsr r0 + sync + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + sync + isync + + /* Load ptr the list of pages to copy in r3 */ + lis r11,(pagedir_nosave - KERNELBASE)@h + ori r11,r11,pagedir_nosave@l + lwz r10,0(r11) + + /* Copy the pages. This is a very basic implementation, to + * be replaced by something more cache efficient */ +1: + tophys(r3,r10) + li r0,256 + mtctr r0 + lwz r11,pbe_address(r3) /* source */ + tophys(r5,r11) + lwz r10,pbe_orig_address(r3) /* destination */ + tophys(r6,r10) +2: + lwz r8,0(r5) + lwz r9,4(r5) + lwz r10,8(r5) + lwz r11,12(r5) + addi r5,r5,16 + stw r8,0(r6) + stw r9,4(r6) + stw r10,8(r6) + stw r11,12(r6) + addi r6,r6,16 + bdnz 2b + lwz r10,pbe_next(r3) + cmpwi 0,r10,0 + bne 1b + + /* Do a very simple cache flush/inval of the L1 to ensure + * coherency of the icache + */ + lis r3,0x0002 + mtctr r3 + li r3, 0 +1: + lwz r0,0(r3) + addi r3,r3,0x0020 + bdnz 1b + isync + sync + + /* Now flush those cache lines */ + lis r3,0x0002 + mtctr r3 + li r3, 0 +1: + dcbf 0,r3 + addi r3,r3,0x0020 + bdnz 1b + sync + + /* Ok, we are now running with the kernel data of the old + * kernel fully restored. We can get to the save area + * easily now. As for the rest of the code, it assumes the + * loader kernel and the booted one are exactly identical + */ + lis r11,swsusp_save_area@h + ori r11,r11,swsusp_save_area@l + tophys(r11,r11) + +#if 0 + /* Restore various CPU config stuffs */ + bl __restore_cpu_setup +#endif + /* Restore the BATs, and SDR1. Then we can turn on the MMU. + * This is a bit hairy as we are running out of those BATs, + * but first, our code is probably in the icache, and we are + * writing the same value to the BAT, so that should be fine, + * though a better solution will have to be found long-term + */ + lwz r4,SL_SDR1(r11) + mtsdr1 r4 + lwz r4,SL_SPRG0(r11) + mtsprg 0,r4 + lwz r4,SL_SPRG0+4(r11) + mtsprg 1,r4 + lwz r4,SL_SPRG0+8(r11) + mtsprg 2,r4 + lwz r4,SL_SPRG0+12(r11) + mtsprg 3,r4 + +#if 0 + lwz r4,SL_DBAT0(r11) + mtdbatu 0,r4 + lwz r4,SL_DBAT0+4(r11) + mtdbatl 0,r4 + lwz r4,SL_DBAT1(r11) + mtdbatu 1,r4 + lwz r4,SL_DBAT1+4(r11) + mtdbatl 1,r4 + lwz r4,SL_DBAT2(r11) + mtdbatu 2,r4 + lwz r4,SL_DBAT2+4(r11) + mtdbatl 2,r4 + lwz r4,SL_DBAT3(r11) + mtdbatu 3,r4 + lwz r4,SL_DBAT3+4(r11) + mtdbatl 3,r4 + lwz r4,SL_IBAT0(r11) + mtibatu 0,r4 + lwz r4,SL_IBAT0+4(r11) + mtibatl 0,r4 + lwz r4,SL_IBAT1(r11) + mtibatu 1,r4 + lwz r4,SL_IBAT1+4(r11) + mtibatl 1,r4 + lwz r4,SL_IBAT2(r11) + mtibatu 2,r4 + lwz r4,SL_IBAT2+4(r11) + mtibatl 2,r4 + lwz r4,SL_IBAT3(r11) + mtibatu 3,r4 + lwz r4,SL_IBAT3+4(r11) + mtibatl 3,r4 +#endif + +BEGIN_FTR_SECTION + li r4,0 + mtspr SPRN_DBAT4U,r4 + mtspr SPRN_DBAT4L,r4 + mtspr SPRN_DBAT5U,r4 + mtspr SPRN_DBAT5L,r4 + mtspr SPRN_DBAT6U,r4 + mtspr SPRN_DBAT6L,r4 + mtspr SPRN_DBAT7U,r4 + mtspr SPRN_DBAT7L,r4 + mtspr SPRN_IBAT4U,r4 + mtspr SPRN_IBAT4L,r4 + mtspr SPRN_IBAT5U,r4 + mtspr SPRN_IBAT5L,r4 + mtspr SPRN_IBAT6U,r4 + mtspr SPRN_IBAT6L,r4 + mtspr SPRN_IBAT7U,r4 + mtspr SPRN_IBAT7L,r4 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS) + + /* Flush all TLBs */ + lis r4,0x1000 +1: addic. r4,r4,-0x1000 + tlbie r4 + blt 1b + sync + + /* restore the MSR and turn on the MMU */ + lwz r3,SL_MSR(r11) + bl turn_on_mmu + tovirt(r11,r11) + + /* Restore TB */ + li r3,0 + mttbl r3 + lwz r3,SL_TB(r11) + lwz r4,SL_TB+4(r11) + mttbu r3 + mttbl r4 + + /* Kick decrementer */ + li r0,1 + mtdec r0 + + /* Restore the callee-saved registers and return */ + lwz r0,SL_CR(r11) + mtcr r0 + lwz r2,SL_R2(r11) + lmw r12,SL_R12(r11) + lwz r1,SL_SP(r11) + lwz r0,SL_LR(r11) + mtlr r0 + + // XXX Note: we don't really need to call swsusp_resume + + li r3,0 + blr + +/* FIXME:This construct is actually not useful since we don't shut + * down the instruction MMU, we could just flip back MSR-DR on. + */ +turn_on_mmu: + mflr r4 + mtsrr0 r4 + mtsrr1 r3 + sync + isync + rfi + diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index cd75ab2908fa..ec274e688816 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -24,7 +24,6 @@ #include <linux/resource.h> #include <linux/times.h> #include <linux/utsname.h> -#include <linux/timex.h> #include <linux/smp.h> #include <linux/smp_lock.h> #include <linux/sem.h> @@ -161,78 +160,6 @@ asmlinkage long compat_sys_sysfs(u32 option, u32 arg1, u32 arg2) return sys_sysfs((int)option, arg1, arg2); } -/* Handle adjtimex compatibility. */ -struct timex32 { - u32 modes; - s32 offset, freq, maxerror, esterror; - s32 status, constant, precision, tolerance; - struct compat_timeval time; - s32 tick; - s32 ppsfreq, jitter, shift, stabil; - s32 jitcnt, calcnt, errcnt, stbcnt; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; -}; - -extern int do_adjtimex(struct timex *); - -asmlinkage long compat_sys_adjtimex(struct timex32 __user *utp) -{ - struct timex txc; - int ret; - - memset(&txc, 0, sizeof(struct timex)); - - if(get_user(txc.modes, &utp->modes) || - __get_user(txc.offset, &utp->offset) || - __get_user(txc.freq, &utp->freq) || - __get_user(txc.maxerror, &utp->maxerror) || - __get_user(txc.esterror, &utp->esterror) || - __get_user(txc.status, &utp->status) || - __get_user(txc.constant, &utp->constant) || - __get_user(txc.precision, &utp->precision) || - __get_user(txc.tolerance, &utp->tolerance) || - __get_user(txc.time.tv_sec, &utp->time.tv_sec) || - __get_user(txc.time.tv_usec, &utp->time.tv_usec) || - __get_user(txc.tick, &utp->tick) || - __get_user(txc.ppsfreq, &utp->ppsfreq) || - __get_user(txc.jitter, &utp->jitter) || - __get_user(txc.shift, &utp->shift) || - __get_user(txc.stabil, &utp->stabil) || - __get_user(txc.jitcnt, &utp->jitcnt) || - __get_user(txc.calcnt, &utp->calcnt) || - __get_user(txc.errcnt, &utp->errcnt) || - __get_user(txc.stbcnt, &utp->stbcnt)) - return -EFAULT; - - ret = do_adjtimex(&txc); - - if(put_user(txc.modes, &utp->modes) || - __put_user(txc.offset, &utp->offset) || - __put_user(txc.freq, &utp->freq) || - __put_user(txc.maxerror, &utp->maxerror) || - __put_user(txc.esterror, &utp->esterror) || - __put_user(txc.status, &utp->status) || - __put_user(txc.constant, &utp->constant) || - __put_user(txc.precision, &utp->precision) || - __put_user(txc.tolerance, &utp->tolerance) || - __put_user(txc.time.tv_sec, &utp->time.tv_sec) || - __put_user(txc.time.tv_usec, &utp->time.tv_usec) || - __put_user(txc.tick, &utp->tick) || - __put_user(txc.ppsfreq, &utp->ppsfreq) || - __put_user(txc.jitter, &utp->jitter) || - __put_user(txc.shift, &utp->shift) || - __put_user(txc.stabil, &utp->stabil) || - __put_user(txc.jitcnt, &utp->jitcnt) || - __put_user(txc.calcnt, &utp->calcnt) || - __put_user(txc.errcnt, &utp->errcnt) || - __put_user(txc.stbcnt, &utp->stbcnt)) - ret = -EFAULT; - - return ret; -} - asmlinkage long compat_sys_pause(void) { current->state = TASK_INTERRUPTIBLE; diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index ad895c99813b..9b69d99a9103 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -40,6 +40,7 @@ #include <asm/uaccess.h> #include <asm/ipc.h> #include <asm/semaphore.h> +#include <asm/syscalls.h> #include <asm/time.h> #include <asm/unistd.h> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 0f0c3a9ae2e5..73560ef6f802 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -65,20 +65,20 @@ static int __init smt_setup(void) unsigned int cpu; if (!cpu_has_feature(CPU_FTR_SMT)) - return 1; + return -ENODEV; options = find_path_device("/options"); if (!options) - return 1; + return -ENODEV; val = (unsigned int *)get_property(options, "ibm,smt-snooze-delay", NULL); if (!smt_snooze_cmdline && val) { - for_each_cpu(cpu) + for_each_possible_cpu(cpu) per_cpu(smt_snooze_delay, cpu) = *val; } - return 1; + return 0; } __initcall(smt_setup); @@ -93,7 +93,7 @@ static int __init setup_smt_snooze_delay(char *str) smt_snooze_cmdline = 1; if (get_option(&str, &snooze)) { - for_each_cpu(cpu) + for_each_possible_cpu(cpu) per_cpu(smt_snooze_delay, cpu) = snooze; } @@ -347,7 +347,7 @@ static int __init topology_init(void) register_cpu_notifier(&sysfs_cpu_nb); - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { struct cpu *c = &per_cpu(cpu_devices, cpu); #ifdef CONFIG_NUMA diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c new file mode 100644 index 000000000000..26bd8ea35a4e --- /dev/null +++ b/arch/powerpc/kernel/tau_6xx.c @@ -0,0 +1,271 @@ +/* + * temp.c Thermal management for cpu's with Thermal Assist Units + * + * Written by Troy Benjegerdes <hozer@drgw.net> + * + * TODO: + * dynamic power management to limit peak CPU temp (using ICTC) + * calibration??? + * + * Silly, crazy ideas: use cpu load (from scheduler) and ICTC to extend battery + * life in portables, and add a 'performance/watt' metric somewhere in /proc + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/init.h> + +#include <asm/io.h> +#include <asm/reg.h> +#include <asm/nvram.h> +#include <asm/cache.h> +#include <asm/8xx_immap.h> +#include <asm/machdep.h> + +static struct tau_temp +{ + int interrupts; + unsigned char low; + unsigned char high; + unsigned char grew; +} tau[NR_CPUS]; + +struct timer_list tau_timer; + +#undef DEBUG + +/* TODO: put these in a /proc interface, with some sanity checks, and maybe + * dynamic adjustment to minimize # of interrupts */ +/* configurable values for step size and how much to expand the window when + * we get an interrupt. These are based on the limit that was out of range */ +#define step_size 2 /* step size when temp goes out of range */ +#define window_expand 1 /* expand the window by this much */ +/* configurable values for shrinking the window */ +#define shrink_timer 2*HZ /* period between shrinking the window */ +#define min_window 2 /* minimum window size, degrees C */ + +void set_thresholds(unsigned long cpu) +{ +#ifdef CONFIG_TAU_INT + /* + * setup THRM1, + * threshold, valid bit, enable interrupts, interrupt when below threshold + */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID); + + /* setup THRM2, + * threshold, valid bit, enable interrupts, interrupt when above threshhold + */ + mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE); +#else + /* same thing but don't enable interrupts */ + mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID); + mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V); +#endif +} + +void TAUupdate(int cpu) +{ + unsigned thrm; + +#ifdef DEBUG + printk("TAUupdate "); +#endif + + /* if both thresholds are crossed, the step_sizes cancel out + * and the window winds up getting expanded twice. */ + if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */ + if(thrm & THRM1_TIN){ /* crossed low threshold */ + if (tau[cpu].low >= step_size){ + tau[cpu].low -= step_size; + tau[cpu].high -= (step_size - window_expand); + } + tau[cpu].grew = 1; +#ifdef DEBUG + printk("low threshold crossed "); +#endif + } + } + if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */ + if(thrm & THRM1_TIN){ /* crossed high threshold */ + if (tau[cpu].high <= 127-step_size){ + tau[cpu].low += (step_size - window_expand); + tau[cpu].high += step_size; + } + tau[cpu].grew = 1; +#ifdef DEBUG + printk("high threshold crossed "); +#endif + } + } + +#ifdef DEBUG + printk("grew = %d\n", tau[cpu].grew); +#endif + +#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */ + set_thresholds(cpu); +#endif + +} + +#ifdef CONFIG_TAU_INT +/* + * TAU interrupts - called when we have a thermal assist unit interrupt + * with interrupts disabled + */ + +void TAUException(struct pt_regs * regs) +{ + int cpu = smp_processor_id(); + + irq_enter(); + tau[cpu].interrupts++; + + TAUupdate(cpu); + + irq_exit(); +} +#endif /* CONFIG_TAU_INT */ + +static void tau_timeout(void * info) +{ + int cpu; + unsigned long flags; + int size; + int shrink; + + /* disabling interrupts *should* be okay */ + local_irq_save(flags); + cpu = smp_processor_id(); + +#ifndef CONFIG_TAU_INT + TAUupdate(cpu); +#endif + + size = tau[cpu].high - tau[cpu].low; + if (size > min_window && ! tau[cpu].grew) { + /* do an exponential shrink of half the amount currently over size */ + shrink = (2 + size - min_window) / 4; + if (shrink) { + tau[cpu].low += shrink; + tau[cpu].high -= shrink; + } else { /* size must have been min_window + 1 */ + tau[cpu].low += 1; +#if 1 /* debug */ + if ((tau[cpu].high - tau[cpu].low) != min_window){ + printk(KERN_ERR "temp.c: line %d, logic error\n", __LINE__); + } +#endif + } + } + + tau[cpu].grew = 0; + + set_thresholds(cpu); + + /* + * Do the enable every time, since otherwise a bunch of (relatively) + * complex sleep code needs to be added. One mtspr every time + * tau_timeout is called is probably not a big deal. + * + * Enable thermal sensor and set up sample interval timer + * need 20 us to do the compare.. until a nice 'cpu_speed' function + * call is implemented, just assume a 500 mhz clock. It doesn't really + * matter if we take too long for a compare since it's all interrupt + * driven anyway. + * + * use a extra long time.. (60 us @ 500 mhz) + */ + mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E); + + local_irq_restore(flags); +} + +static void tau_timeout_smp(unsigned long unused) +{ + + /* schedule ourselves to be run again */ + mod_timer(&tau_timer, jiffies + shrink_timer) ; + on_each_cpu(tau_timeout, NULL, 1, 0); +} + +/* + * setup the TAU + * + * Set things up to use THRM1 as a temperature lower bound, and THRM2 as an upper bound. + * Start off at zero + */ + +int tau_initialized = 0; + +void __init TAU_init_smp(void * info) +{ + unsigned long cpu = smp_processor_id(); + + /* set these to a reasonable value and let the timer shrink the + * window */ + tau[cpu].low = 5; + tau[cpu].high = 120; + + set_thresholds(cpu); +} + +int __init TAU_init(void) +{ + /* We assume in SMP that if one CPU has TAU support, they + * all have it --BenH + */ + if (!cpu_has_feature(CPU_FTR_TAU)) { + printk("Thermal assist unit not available\n"); + tau_initialized = 0; + return 1; + } + + + /* first, set up the window shrinking timer */ + init_timer(&tau_timer); + tau_timer.function = tau_timeout_smp; + tau_timer.expires = jiffies + shrink_timer; + add_timer(&tau_timer); + + on_each_cpu(TAU_init_smp, NULL, 1, 0); + + printk("Thermal assist unit "); +#ifdef CONFIG_TAU_INT + printk("using interrupts, "); +#else + printk("using timers, "); +#endif + printk("shrink_timer: %d jiffies\n", shrink_timer); + tau_initialized = 1; + + return 0; +} + +__initcall(TAU_init); + +/* + * return current temp + */ + +u32 cpu_temp_both(unsigned long cpu) +{ + return ((tau[cpu].high << 16) | tau[cpu].low); +} + +int cpu_temp(unsigned long cpu) +{ + return ((tau[cpu].high + tau[cpu].low) / 2); +} + +int tau_interrupts(unsigned long cpu) +{ + return (tau[cpu].interrupts); +} diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 4a27218a086c..24e3ad756de0 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -261,7 +261,7 @@ void snapshot_timebases(void) if (!cpu_has_feature(CPU_FTR_PURR)) return; - for_each_cpu(cpu) + for_each_possible_cpu(cpu) spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock); on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1); } @@ -751,7 +751,7 @@ void __init smp_space_timers(unsigned int max_cpus) * systems works better if the two threads' timebase interrupts * are staggered by half a jiffy with respect to each other. */ - for_each_cpu(i) { + for_each_possible_cpu(i) { if (i == boot_cpuid) continue; if (i == (boot_cpuid ^ 1)) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 98660aedeeb7..4cbde211eb69 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -74,19 +74,19 @@ EXPORT_SYMBOL(__debugger_dabr_match); EXPORT_SYMBOL(__debugger_fault_handler); #endif -struct notifier_block *powerpc_die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(powerpc_die_chain); int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; + return atomic_notifier_chain_register(&powerpc_die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&powerpc_die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&powerpc_die_chain, nb); } +EXPORT_SYMBOL(unregister_die_notifier); /* * Trap & Exception support @@ -97,7 +97,6 @@ static DEFINE_SPINLOCK(die_lock); int die(const char *str, struct pt_regs *regs, long err) { static int die_counter, crash_dump_start = 0; - int nl = 0; if (debugger(regs)) return 1; @@ -106,7 +105,7 @@ int die(const char *str, struct pt_regs *regs, long err) spin_lock_irq(&die_lock); bust_spinlocks(1); #ifdef CONFIG_PMAC_BACKLIGHT - if (_machine == _MACH_Pmac) { + if (machine_is(powermac)) { set_backlight_enable(1); set_backlight_level(BACKLIGHT_MAX); } @@ -114,46 +113,18 @@ int die(const char *str, struct pt_regs *regs, long err) printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); #ifdef CONFIG_PREEMPT printk("PREEMPT "); - nl = 1; #endif #ifdef CONFIG_SMP printk("SMP NR_CPUS=%d ", NR_CPUS); - nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC printk("DEBUG_PAGEALLOC "); - nl = 1; #endif #ifdef CONFIG_NUMA printk("NUMA "); - nl = 1; #endif -#ifdef CONFIG_PPC64 - switch (_machine) { - case PLATFORM_PSERIES: - printk("PSERIES "); - nl = 1; - break; - case PLATFORM_PSERIES_LPAR: - printk("PSERIES LPAR "); - nl = 1; - break; - case PLATFORM_ISERIES_LPAR: - printk("ISERIES LPAR "); - nl = 1; - break; - case PLATFORM_POWERMAC: - printk("POWERMAC "); - nl = 1; - break; - case PLATFORM_CELL: - printk("CELL "); - nl = 1; - break; - } -#endif - if (nl) - printk("\n"); + printk("%s\n", ppc_md.name ? "" : ppc_md.name); + print_modules(); show_regs(regs); bust_spinlocks(0); diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ec8370368423..573afb68d69e 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -33,6 +33,7 @@ #include <asm/machdep.h> #include <asm/cputable.h> #include <asm/sections.h> +#include <asm/firmware.h> #include <asm/vdso.h> #include <asm/vdso_datapage.h> @@ -667,7 +668,13 @@ void __init vdso_init(void) vdso_data->version.major = SYSTEMCFG_MAJOR; vdso_data->version.minor = SYSTEMCFG_MINOR; vdso_data->processor = mfspr(SPRN_PVR); - vdso_data->platform = _machine; + /* + * Fake the old platform number for pSeries and iSeries and add + * in LPAR bit if necessary + */ + vdso_data->platform = machine_is(iseries) ? 0x200 : 0x100; + if (firmware_has_feature(FW_FEATURE_LPAR)) + vdso_data->platform |= 1; vdso_data->physicalMemorySize = lmb_phys_mem_size(); vdso_data->dcache_size = ppc64_caches.dsize; vdso_data->dcache_line_size = ppc64_caches.dline_size; diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7fa7b15fd8e6..fe79c2584cb0 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -1,9 +1,11 @@ #include <linux/config.h> #ifdef CONFIG_PPC64 #include <asm/page.h> +#define PROVIDE32(x) PROVIDE(__unused__##x) #else #define PAGE_SIZE 4096 #define KERNELBASE CONFIG_KERNEL_START +#define PROVIDE32(x) PROVIDE(x) #endif #include <asm-generic/vmlinux.lds.h> @@ -18,43 +20,42 @@ jiffies = jiffies_64 + 4; #endif SECTIONS { - /* Sections to be discarded. */ - /DISCARD/ : { - *(.exitcall.exit) - *(.exit.data) - } - - . = KERNELBASE; - - /* Read-only sections, merged into text segment: */ - .text : { - *(.text .text.*) - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) -#ifdef CONFIG_PPC32 - *(.got1) - __got2_start = .; - *(.got2) - __got2_end = .; -#else - . = ALIGN(PAGE_SIZE); - _etext = .; -#endif - } -#ifdef CONFIG_PPC32 - _etext = .; - PROVIDE (etext = .); + /* Sections to be discarded. */ + /DISCARD/ : { + *(.exitcall.exit) + *(.exit.data) + } - RODATA - .fini : { *(.fini) } =0 - .ctors : { *(.ctors) } - .dtors : { *(.dtors) } + . = KERNELBASE; - .fixup : { *(.fixup) } -#endif +/* + * Text, read only data and other permanent read-only sections + */ + + /* Text and gots */ + .text : { + *(.text .text.*) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.fixup) +#ifdef CONFIG_PPC32 + *(.got1) + __got2_start = .; + *(.got2) + __got2_end = .; +#endif /* CONFIG_PPC32 */ + + . = ALIGN(PAGE_SIZE); + _etext = .; + PROVIDE32 (etext = .); + } + + /* Read-only data */ + RODATA + + /* Exception & bug tables */ __ex_table : { __start___ex_table = .; *(__ex_table) @@ -67,192 +68,172 @@ SECTIONS __stop___bug_table = .; } -#ifdef CONFIG_PPC64 +/* + * Init sections discarded at runtime + */ + . = ALIGN(PAGE_SIZE); + __init_begin = .; + + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + /* .exit.text is discarded at runtime, not link time, + * to deal with references from __bug_table + */ + .exit.text : { *(.exit.text) } + + .init.data : { + *(.init.data); + __vtop_table_begin = .; + *(.vtop_fixup); + __vtop_table_end = .; + __ptov_table_begin = .; + *(.ptov_fixup); + __ptov_table_end = .; + } + + . = ALIGN(16); + .init.setup : { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + + .initcall.init : { + __initcall_start = .; + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + __initcall_end = .; + } + + .con_initcall.init : { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + SECURITY_INIT + + . = ALIGN(8); __ftr_fixup : { __start___ftr_fixup = .; *(__ftr_fixup) __stop___ftr_fixup = .; } - RODATA -#endif + . = ALIGN(PAGE_SIZE); + .init.ramfs : { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } #ifdef CONFIG_PPC32 - /* Read-write section, merged into data segment: */ - . = ALIGN(PAGE_SIZE); - _sdata = .; - .data : - { - *(.data) - *(.data1) - *(.sdata) - *(.sdata2) - *(.got.plt) *(.got) - *(.dynamic) - CONSTRUCTORS - } - - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - - . = ALIGN(32); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - _edata = .; - PROVIDE (edata = .); - - . = ALIGN(8192); - .data.init_task : { *(.data.init_task) } + . = ALIGN(32); +#else + . = ALIGN(128); #endif + .data.percpu : { + __per_cpu_start = .; + *(.data.percpu) + __per_cpu_end = .; + } - /* will be freed after init */ - . = ALIGN(PAGE_SIZE); - __init_begin = .; - .init.text : { - _sinittext = .; - *(.init.text) - _einittext = .; - } -#ifdef CONFIG_PPC32 - /* .exit.text is discarded at runtime, not link time, - to deal with references from __bug_table */ - .exit.text : { *(.exit.text) } -#endif - .init.data : { - *(.init.data); - __vtop_table_begin = .; - *(.vtop_fixup); - __vtop_table_end = .; - __ptov_table_begin = .; - *(.ptov_fixup); - __ptov_table_end = .; - } - - . = ALIGN(16); - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - - .initcall.init : { - __initcall_start = .; - *(.initcall1.init) - *(.initcall2.init) - *(.initcall3.init) - *(.initcall4.init) - *(.initcall5.init) - *(.initcall6.init) - *(.initcall7.init) - __initcall_end = .; - } - - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - - SECURITY_INIT + . = ALIGN(8); + .machine.desc : { + __machine_desc_start = . ; + *(.machine.desc) + __machine_desc_end = . ; + } + + /* freed after init ends here */ + . = ALIGN(PAGE_SIZE); + __init_end = .; + +/* + * And now the various read/write data + */ + + . = ALIGN(PAGE_SIZE); + _sdata = .; #ifdef CONFIG_PPC32 - __start___ftr_fixup = .; - __ftr_fixup : { *(__ftr_fixup) } - __stop___ftr_fixup = .; + .data : + { + *(.data) + *(.sdata) + *(.got.plt) *(.got) + } #else - . = ALIGN(PAGE_SIZE); - .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif + .data : { + *(.data .data.rel* .toc1) + *(.branch_lt) + } -#ifdef CONFIG_PPC32 - . = ALIGN(32); + .opd : { + *(.opd) + } + + .got : { + __toc_start = .; + *(.got) + *(.toc) + } #endif - .data.percpu : { - __per_cpu_start = .; - *(.data.percpu) - __per_cpu_end = .; - } - . = ALIGN(PAGE_SIZE); -#ifdef CONFIG_PPC64 - . = ALIGN(16384); - __init_end = .; - /* freed after init ends here */ - - /* Read/write sections */ - . = ALIGN(PAGE_SIZE); - . = ALIGN(16384); - _sdata = .; - /* The initial task and kernel stack */ - .data.init_task : { - *(.data.init_task) - } - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : { - *(.data.page_aligned) - } - - .data.cacheline_aligned : { - *(.data.cacheline_aligned) - } - - .data : { - *(.data .data.rel* .toc1) - *(.branch_lt) - } - - .opd : { - *(.opd) - } - - .got : { - __toc_start = .; - *(.got) - *(.toc) - . = ALIGN(PAGE_SIZE); - _edata = .; - } - - . = ALIGN(PAGE_SIZE); + . = ALIGN(PAGE_SIZE); + _edata = .; + PROVIDE32 (edata = .); + + /* The initial task and kernel stack */ +#ifdef CONFIG_PPC32 + . = ALIGN(8192); #else - __initramfs_start = .; - .init.ramfs : { - *(.init.ramfs) - } - __initramfs_end = .; + . = ALIGN(16384); +#endif + .data.init_task : { + *(.data.init_task) + } - . = ALIGN(4096); - __init_end = .; + . = ALIGN(PAGE_SIZE); + .data.page_aligned : { + *(.data.page_aligned) + } - . = ALIGN(4096); - _sextratext = .; - _eextratext = .; + .data.cacheline_aligned : { + *(.data.cacheline_aligned) + } - __bss_start = .; -#endif + . = ALIGN(PAGE_SIZE); + __data_nosave : { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } - .bss : { - __bss_start = .; - *(.sbss) *(.scommon) - *(.dynbss) - *(.bss) - *(COMMON) - __bss_stop = .; - } +/* + * And finally the bss + */ + + .bss : { + __bss_start = .; + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + *(COMMON) + __bss_stop = .; + } -#ifdef CONFIG_PPC64 - . = ALIGN(PAGE_SIZE); -#endif - _end = . ; -#ifdef CONFIG_PPC32 - PROVIDE (end = .); -#endif + . = ALIGN(PAGE_SIZE); + _end = . ; + PROVIDE32 (end = .); } diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 666c2aa55016..c251d9936612 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -18,7 +18,7 @@ extern char system_call_common[]; #ifdef CONFIG_PPC64 /* Bits in SRR1 that are copied from MSR */ -#define MSR_MASK 0xffffffff87c0ffff +#define MSR_MASK 0xffffffff87c0ffffUL #else #define MSR_MASK 0x87c0ffff #endif diff --git a/arch/powerpc/lib/strcase.c b/arch/powerpc/lib/strcase.c index 36b521091bbc..f8ec1eba3fdd 100644 --- a/arch/powerpc/lib/strcase.c +++ b/arch/powerpc/lib/strcase.c @@ -1,4 +1,6 @@ +#include <linux/types.h> #include <linux/ctype.h> +#include <linux/string.h> int strcasecmp(const char *s1, const char *s2) { @@ -11,7 +13,7 @@ int strcasecmp(const char *s1, const char *s2) return c1 - c2; } -int strncasecmp(const char *s1, const char *s2, int n) +int strncasecmp(const char *s1, const char *s2, size_t n) { int c1, c2; diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile new file mode 100644 index 000000000000..754143e8936b --- /dev/null +++ b/arch/powerpc/math-emu/Makefile @@ -0,0 +1,13 @@ + +obj-y := math.o fmr.o lfd.o stfd.o + +obj-$(CONFIG_MATH_EMULATION) += fabs.o fadd.o fadds.o fcmpo.o fcmpu.o \ + fctiw.o fctiwz.o fdiv.o fdivs.o \ + fmadd.o fmadds.o fmsub.o fmsubs.o \ + fmul.o fmuls.o fnabs.o fneg.o types.o \ + fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \ + fres.o frsp.o frsqrte.o fsel.o lfs.o \ + fsqrt.o fsqrts.o fsub.o fsubs.o \ + mcrfs.o mffs.o mtfsb0.o mtfsb1.o \ + mtfsf.o mtfsfi.o stfiwx.o stfs.o \ + udivmodti4.o diff --git a/arch/powerpc/math-emu/double.h b/arch/powerpc/math-emu/double.h new file mode 100644 index 000000000000..ffba8b67f059 --- /dev/null +++ b/arch/powerpc/math-emu/double.h @@ -0,0 +1,129 @@ +/* + * Definitions for IEEE Double Precision + */ + +#if _FP_W_TYPE_SIZE < 32 +#error "Here's a nickel kid. Go buy yourself a real computer." +#endif + +#if _FP_W_TYPE_SIZE < 64 +#define _FP_FRACTBITS_D (2 * _FP_W_TYPE_SIZE) +#else +#define _FP_FRACTBITS_D _FP_W_TYPE_SIZE +#endif + +#define _FP_FRACBITS_D 53 +#define _FP_FRACXBITS_D (_FP_FRACTBITS_D - _FP_FRACBITS_D) +#define _FP_WFRACBITS_D (_FP_WORKBITS + _FP_FRACBITS_D) +#define _FP_WFRACXBITS_D (_FP_FRACTBITS_D - _FP_WFRACBITS_D) +#define _FP_EXPBITS_D 11 +#define _FP_EXPBIAS_D 1023 +#define _FP_EXPMAX_D 2047 + +#define _FP_QNANBIT_D \ + ((_FP_W_TYPE)1 << ((_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE)) +#define _FP_IMPLBIT_D \ + ((_FP_W_TYPE)1 << ((_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE)) +#define _FP_OVERFLOW_D \ + ((_FP_W_TYPE)1 << (_FP_WFRACBITS_D % _FP_W_TYPE_SIZE)) + +#if _FP_W_TYPE_SIZE < 64 + +union _FP_UNION_D +{ + double flt; + struct { +#if __BYTE_ORDER == __BIG_ENDIAN + unsigned sign : 1; + unsigned exp : _FP_EXPBITS_D; + unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE; + unsigned frac0 : _FP_W_TYPE_SIZE; +#else + unsigned frac0 : _FP_W_TYPE_SIZE; + unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE; + unsigned exp : _FP_EXPBITS_D; + unsigned sign : 1; +#endif + } bits __attribute__((packed)); +}; + +#define FP_DECL_D(X) _FP_DECL(2,X) +#define FP_UNPACK_RAW_D(X,val) _FP_UNPACK_RAW_2(D,X,val) +#define FP_PACK_RAW_D(val,X) _FP_PACK_RAW_2(D,val,X) + +#define FP_UNPACK_D(X,val) \ + do { \ + _FP_UNPACK_RAW_2(D,X,val); \ + _FP_UNPACK_CANONICAL(D,2,X); \ + } while (0) + +#define FP_PACK_D(val,X) \ + do { \ + _FP_PACK_CANONICAL(D,2,X); \ + _FP_PACK_RAW_2(D,val,X); \ + } while (0) + +#define FP_NEG_D(R,X) _FP_NEG(D,2,R,X) +#define FP_ADD_D(R,X,Y) _FP_ADD(D,2,R,X,Y) +#define FP_SUB_D(R,X,Y) _FP_SUB(D,2,R,X,Y) +#define FP_MUL_D(R,X,Y) _FP_MUL(D,2,R,X,Y) +#define FP_DIV_D(R,X,Y) _FP_DIV(D,2,R,X,Y) +#define FP_SQRT_D(R,X) _FP_SQRT(D,2,R,X) + +#define FP_CMP_D(r,X,Y,un) _FP_CMP(D,2,r,X,Y,un) +#define FP_CMP_EQ_D(r,X,Y) _FP_CMP_EQ(D,2,r,X,Y) + +#define FP_TO_INT_D(r,X,rsz,rsg) _FP_TO_INT(D,2,r,X,rsz,rsg) +#define FP_FROM_INT_D(X,r,rs,rt) _FP_FROM_INT(D,2,X,r,rs,rt) + +#else + +union _FP_UNION_D +{ + double flt; + struct { +#if __BYTE_ORDER == __BIG_ENDIAN + unsigned sign : 1; + unsigned exp : _FP_EXPBITS_D; + unsigned long frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0); +#else + unsigned long frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0); + unsigned exp : _FP_EXPBITS_D; + unsigned sign : 1; +#endif + } bits __attribute__((packed)); +}; + +#define FP_DECL_D(X) _FP_DECL(1,X) +#define FP_UNPACK_RAW_D(X,val) _FP_UNPACK_RAW_1(D,X,val) +#define FP_PACK_RAW_D(val,X) _FP_PACK_RAW_1(D,val,X) + +#define FP_UNPACK_D(X,val) \ + do { \ + _FP_UNPACK_RAW_1(D,X,val); \ + _FP_UNPACK_CANONICAL(D,1,X); \ + } while (0) + +#define FP_PACK_D(val,X) \ + do { \ + _FP_PACK_CANONICAL(D,1,X); \ + _FP_PACK_RAW_1(D,val,X); \ + } while (0) + +#define FP_NEG_D(R,X) _FP_NEG(D,1,R,X) +#define FP_ADD_D(R,X,Y) _FP_ADD(D,1,R,X,Y) +#define FP_SUB_D(R,X,Y) _FP_SUB(D,1,R,X,Y) +#define FP_MUL_D(R,X,Y) _FP_MUL(D,1,R,X,Y) +#define FP_DIV_D(R,X,Y) _FP_DIV(D,1,R,X,Y) +#define FP_SQRT_D(R,X) _FP_SQRT(D,1,R,X) + +/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by + the target machine. */ + +#define FP_CMP_D(r,X,Y,un) _FP_CMP(D,1,r,X,Y,un) +#define FP_CMP_EQ_D(r,X,Y) _FP_CMP_EQ(D,1,r,X,Y) + +#define FP_TO_INT_D(r,X,rsz,rsg) _FP_TO_INT(D,1,r,X,rsz,rsg) +#define FP_FROM_INT_D(X,r,rs,rt) _FP_FROM_INT(D,1,X,r,rs,rt) + +#endif /* W_TYPE_SIZE < 64 */ diff --git a/arch/powerpc/math-emu/fabs.c b/arch/powerpc/math-emu/fabs.c new file mode 100644 index 000000000000..41f0617f3d3a --- /dev/null +++ b/arch/powerpc/math-emu/fabs.c @@ -0,0 +1,18 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int +fabs(u32 *frD, u32 *frB) +{ + frD[0] = frB[0] & 0x7fffffff; + frD[1] = frB[1]; + +#ifdef DEBUG + printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB); + dump_double(frD); + printk("\n"); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/fadd.c b/arch/powerpc/math-emu/fadd.c new file mode 100644 index 000000000000..fc8836488b64 --- /dev/null +++ b/arch/powerpc/math-emu/fadd.c @@ -0,0 +1,38 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fadd(void *frD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, A, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fadds.c b/arch/powerpc/math-emu/fadds.c new file mode 100644 index 000000000000..93025b6c8f3c --- /dev/null +++ b/arch/powerpc/math-emu/fadds.c @@ -0,0 +1,39 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fadds(void *frD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, A, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/fcmpo.c b/arch/powerpc/math-emu/fcmpo.c new file mode 100644 index 000000000000..4efac394b4cb --- /dev/null +++ b/arch/powerpc/math-emu/fcmpo.c @@ -0,0 +1,46 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fcmpo(u32 *ccr, int crfD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + int code[4] = { (1 << 3), (1 << 1), (1 << 2), (1 << 0) }; + long cmp; + int ret = 0; + +#ifdef DEBUG + printk("%s: %p (%08x) %d %p %p\n", __FUNCTION__, ccr, *ccr, crfD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (A_c == FP_CLS_NAN || B_c == FP_CLS_NAN) + ret |= EFLAG_VXVC; + + FP_CMP_D(cmp, A, B, 2); + cmp = code[(cmp + 1) & 3]; + + __FPU_FPSCR &= ~(0x1f000); + __FPU_FPSCR |= (cmp << 12); + + *ccr &= ~(15 << ((7 - crfD) << 2)); + *ccr |= (cmp << ((7 - crfD) << 2)); + +#ifdef DEBUG + printk("CR: %08x\n", *ccr); +#endif + + return ret; +} diff --git a/arch/powerpc/math-emu/fcmpu.c b/arch/powerpc/math-emu/fcmpu.c new file mode 100644 index 000000000000..b7e33176e618 --- /dev/null +++ b/arch/powerpc/math-emu/fcmpu.c @@ -0,0 +1,42 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fcmpu(u32 *ccr, int crfD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + int code[4] = { (1 << 3), (1 << 1), (1 << 2), (1 << 0) }; + long cmp; + +#ifdef DEBUG + printk("%s: %p (%08x) %d %p %p\n", __FUNCTION__, ccr, *ccr, crfD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + FP_CMP_D(cmp, A, B, 2); + cmp = code[(cmp + 1) & 3]; + + __FPU_FPSCR &= ~(0x1f000); + __FPU_FPSCR |= (cmp << 12); + + *ccr &= ~(15 << ((7 - crfD) << 2)); + *ccr |= (cmp << ((7 - crfD) << 2)); + +#ifdef DEBUG + printk("CR: %08x\n", *ccr); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/fctiw.c b/arch/powerpc/math-emu/fctiw.c new file mode 100644 index 000000000000..3b3c98b840cf --- /dev/null +++ b/arch/powerpc/math-emu/fctiw.c @@ -0,0 +1,25 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fctiw(u32 *frD, void *frB) +{ + FP_DECL_D(B); + unsigned int r; + + __FP_UNPACK_D(B, frB); + FP_TO_INT_D(r, B, 32, 1); + frD[1] = r; + +#ifdef DEBUG + printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB); + dump_double(frD); + printk("\n"); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/fctiwz.c b/arch/powerpc/math-emu/fctiwz.c new file mode 100644 index 000000000000..7717eb6fcfb6 --- /dev/null +++ b/arch/powerpc/math-emu/fctiwz.c @@ -0,0 +1,32 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fctiwz(u32 *frD, void *frB) +{ + FP_DECL_D(B); + u32 fpscr; + unsigned int r; + + fpscr = __FPU_FPSCR; + __FPU_FPSCR &= ~(3); + __FPU_FPSCR |= FP_RND_ZERO; + + __FP_UNPACK_D(B, frB); + FP_TO_INT_D(r, B, 32, 1); + frD[1] = r; + + __FPU_FPSCR = fpscr; + +#ifdef DEBUG + printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB); + dump_double(frD); + printk("\n"); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/fdiv.c b/arch/powerpc/math-emu/fdiv.c new file mode 100644 index 000000000000..f2fba825b2d0 --- /dev/null +++ b/arch/powerpc/math-emu/fdiv.c @@ -0,0 +1,53 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fdiv(void *frD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (A_c == FP_CLS_ZERO && B_c == FP_CLS_ZERO) { + ret |= EFLAG_VXZDZ; +#ifdef DEBUG + printk("%s: FPSCR_VXZDZ raised\n", __FUNCTION__); +#endif + } + if (A_c == FP_CLS_INF && B_c == FP_CLS_INF) { + ret |= EFLAG_VXIDI; +#ifdef DEBUG + printk("%s: FPSCR_VXIDI raised\n", __FUNCTION__); +#endif + } + + if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) { + ret |= EFLAG_DIVZERO; + if (__FPU_TRAP_P(EFLAG_DIVZERO)) + return ret; + } + FP_DIV_D(R, A, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fdivs.c b/arch/powerpc/math-emu/fdivs.c new file mode 100644 index 000000000000..b971196e3175 --- /dev/null +++ b/arch/powerpc/math-emu/fdivs.c @@ -0,0 +1,55 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fdivs(void *frD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (A_c == FP_CLS_ZERO && B_c == FP_CLS_ZERO) { + ret |= EFLAG_VXZDZ; +#ifdef DEBUG + printk("%s: FPSCR_VXZDZ raised\n", __FUNCTION__); +#endif + } + if (A_c == FP_CLS_INF && B_c == FP_CLS_INF) { + ret |= EFLAG_VXIDI; +#ifdef DEBUG + printk("%s: FPSCR_VXIDI raised\n", __FUNCTION__); +#endif + } + + if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) { + ret |= EFLAG_DIVZERO; + if (__FPU_TRAP_P(EFLAG_DIVZERO)) + return ret; + } + + FP_DIV_D(R, A, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/fmadd.c b/arch/powerpc/math-emu/fmadd.c new file mode 100644 index 000000000000..0a1dbce793e9 --- /dev/null +++ b/arch/powerpc/math-emu/fmadd.c @@ -0,0 +1,48 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fmadd(void *frD, void *frA, void *frB, void *frC) +{ + FP_DECL_D(R); + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(C); + FP_DECL_D(T); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + __FP_UNPACK_D(C, frC); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); + printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c); +#endif + + if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(T, A, C); + + if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, T, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fmadds.c b/arch/powerpc/math-emu/fmadds.c new file mode 100644 index 000000000000..0f70bba9445e --- /dev/null +++ b/arch/powerpc/math-emu/fmadds.c @@ -0,0 +1,49 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fmadds(void *frD, void *frA, void *frB, void *frC) +{ + FP_DECL_D(R); + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(C); + FP_DECL_D(T); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + __FP_UNPACK_D(C, frC); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); + printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c); +#endif + + if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(T, A, C); + + if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, T, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/fmr.c b/arch/powerpc/math-emu/fmr.c new file mode 100644 index 000000000000..28df700c0c7e --- /dev/null +++ b/arch/powerpc/math-emu/fmr.c @@ -0,0 +1,18 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int +fmr(u32 *frD, u32 *frB) +{ + frD[0] = frB[0]; + frD[1] = frB[1]; + +#ifdef DEBUG + printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB); + dump_double(frD); + printk("\n"); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/fmsub.c b/arch/powerpc/math-emu/fmsub.c new file mode 100644 index 000000000000..203fd48a6fec --- /dev/null +++ b/arch/powerpc/math-emu/fmsub.c @@ -0,0 +1,51 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fmsub(void *frD, void *frA, void *frB, void *frC) +{ + FP_DECL_D(R); + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(C); + FP_DECL_D(T); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + __FP_UNPACK_D(C, frC); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); + printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c); +#endif + + if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(T, A, C); + + if (B_c != FP_CLS_NAN) + B_s ^= 1; + + if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, T, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fmsubs.c b/arch/powerpc/math-emu/fmsubs.c new file mode 100644 index 000000000000..8ce68624c189 --- /dev/null +++ b/arch/powerpc/math-emu/fmsubs.c @@ -0,0 +1,52 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fmsubs(void *frD, void *frA, void *frB, void *frC) +{ + FP_DECL_D(R); + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(C); + FP_DECL_D(T); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + __FP_UNPACK_D(C, frC); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); + printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c); +#endif + + if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(T, A, C); + + if (B_c != FP_CLS_NAN) + B_s ^= 1; + + if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, T, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/fmul.c b/arch/powerpc/math-emu/fmul.c new file mode 100644 index 000000000000..66c7e79aae2e --- /dev/null +++ b/arch/powerpc/math-emu/fmul.c @@ -0,0 +1,42 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fmul(void *frD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n", + A_s, A_f1, A_f0, A_e, A_c, A_f1, A_f0, A_e + 1023); + printk("B: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n", + B_s, B_f1, B_f0, B_e, B_c, B_f1, B_f0, B_e + 1023); +#endif + + if ((A_c == FP_CLS_INF && B_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && B_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(R, A, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n", + R_s, R_f1, R_f0, R_e, R_c, R_f1, R_f0, R_e + 1023); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fmuls.c b/arch/powerpc/math-emu/fmuls.c new file mode 100644 index 000000000000..26bc4278271c --- /dev/null +++ b/arch/powerpc/math-emu/fmuls.c @@ -0,0 +1,43 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fmuls(void *frD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n", + A_s, A_f1, A_f0, A_e, A_c, A_f1, A_f0, A_e + 1023); + printk("B: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n", + B_s, B_f1, B_f0, B_e, B_c, B_f1, B_f0, B_e + 1023); +#endif + + if ((A_c == FP_CLS_INF && B_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && B_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(R, A, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n", + R_s, R_f1, R_f0, R_e, R_c, R_f1, R_f0, R_e + 1023); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/fnabs.c b/arch/powerpc/math-emu/fnabs.c new file mode 100644 index 000000000000..c6b913d179e0 --- /dev/null +++ b/arch/powerpc/math-emu/fnabs.c @@ -0,0 +1,18 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int +fnabs(u32 *frD, u32 *frB) +{ + frD[0] = frB[0] | 0x80000000; + frD[1] = frB[1]; + +#ifdef DEBUG + printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB); + dump_double(frD); + printk("\n"); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/fneg.c b/arch/powerpc/math-emu/fneg.c new file mode 100644 index 000000000000..fe9a98deff69 --- /dev/null +++ b/arch/powerpc/math-emu/fneg.c @@ -0,0 +1,18 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int +fneg(u32 *frD, u32 *frB) +{ + frD[0] = frB[0] ^ 0x80000000; + frD[1] = frB[1]; + +#ifdef DEBUG + printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB); + dump_double(frD); + printk("\n"); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/fnmadd.c b/arch/powerpc/math-emu/fnmadd.c new file mode 100644 index 000000000000..7f312276d920 --- /dev/null +++ b/arch/powerpc/math-emu/fnmadd.c @@ -0,0 +1,51 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fnmadd(void *frD, void *frA, void *frB, void *frC) +{ + FP_DECL_D(R); + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(C); + FP_DECL_D(T); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + __FP_UNPACK_D(C, frC); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); + printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c); +#endif + + if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(T, A, C); + + if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, T, B); + + if (R_c != FP_CLS_NAN) + R_s ^= 1; + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fnmadds.c b/arch/powerpc/math-emu/fnmadds.c new file mode 100644 index 000000000000..65454c9c70bc --- /dev/null +++ b/arch/powerpc/math-emu/fnmadds.c @@ -0,0 +1,52 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fnmadds(void *frD, void *frA, void *frB, void *frC) +{ + FP_DECL_D(R); + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(C); + FP_DECL_D(T); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + __FP_UNPACK_D(C, frC); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); + printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c); +#endif + + if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(T, A, C); + + if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, T, B); + + if (R_c != FP_CLS_NAN) + R_s ^= 1; + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/fnmsub.c b/arch/powerpc/math-emu/fnmsub.c new file mode 100644 index 000000000000..f1ca7482b5f0 --- /dev/null +++ b/arch/powerpc/math-emu/fnmsub.c @@ -0,0 +1,54 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fnmsub(void *frD, void *frA, void *frB, void *frC) +{ + FP_DECL_D(R); + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(C); + FP_DECL_D(T); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + __FP_UNPACK_D(C, frC); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); + printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c); +#endif + + if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(T, A, C); + + if (B_c != FP_CLS_NAN) + B_s ^= 1; + + if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, T, B); + + if (R_c != FP_CLS_NAN) + R_s ^= 1; + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fnmsubs.c b/arch/powerpc/math-emu/fnmsubs.c new file mode 100644 index 000000000000..5c9a09a87dc7 --- /dev/null +++ b/arch/powerpc/math-emu/fnmsubs.c @@ -0,0 +1,55 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fnmsubs(void *frD, void *frA, void *frB, void *frC) +{ + FP_DECL_D(R); + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(C); + FP_DECL_D(T); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + __FP_UNPACK_D(C, frC); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); + printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c); +#endif + + if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) || + (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF)) + ret |= EFLAG_VXIMZ; + + FP_MUL_D(T, A, C); + + if (B_c != FP_CLS_NAN) + B_s ^= 1; + + if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, T, B); + + if (R_c != FP_CLS_NAN) + R_s ^= 1; + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/fres.c b/arch/powerpc/math-emu/fres.c new file mode 100644 index 000000000000..ec11e46d20af --- /dev/null +++ b/arch/powerpc/math-emu/fres.c @@ -0,0 +1,12 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int +fres(void *frD, void *frB) +{ +#ifdef DEBUG + printk("%s: %p %p\n", __FUNCTION__, frD, frB); +#endif + return -ENOSYS; +} diff --git a/arch/powerpc/math-emu/frsp.c b/arch/powerpc/math-emu/frsp.c new file mode 100644 index 000000000000..d879b2a3d0c9 --- /dev/null +++ b/arch/powerpc/math-emu/frsp.c @@ -0,0 +1,25 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +frsp(void *frD, void *frB) +{ + FP_DECL_D(B); + +#ifdef DEBUG + printk("%s: D %p, B %p\n", __FUNCTION__, frD, frB); +#endif + + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + return __FP_PACK_DS(frD, B); +} diff --git a/arch/powerpc/math-emu/frsqrte.c b/arch/powerpc/math-emu/frsqrte.c new file mode 100644 index 000000000000..a11ae1829850 --- /dev/null +++ b/arch/powerpc/math-emu/frsqrte.c @@ -0,0 +1,12 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int +frsqrte(void *frD, void *frB) +{ +#ifdef DEBUG + printk("%s: %p %p\n", __FUNCTION__, frD, frB); +#endif + return 0; +} diff --git a/arch/powerpc/math-emu/fsel.c b/arch/powerpc/math-emu/fsel.c new file mode 100644 index 000000000000..e36e6e72819a --- /dev/null +++ b/arch/powerpc/math-emu/fsel.c @@ -0,0 +1,38 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fsel(u32 *frD, void *frA, u32 *frB, u32 *frC) +{ + FP_DECL_D(A); + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC); +#endif + + __FP_UNPACK_D(A, frA); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %08x %08x\n", frB[0], frB[1]); + printk("C: %08x %08x\n", frC[0], frC[1]); +#endif + + if (A_c == FP_CLS_NAN || (A_c != FP_CLS_ZERO && A_s)) { + frD[0] = frB[0]; + frD[1] = frB[1]; + } else { + frD[0] = frC[0]; + frD[1] = frC[1]; + } + +#ifdef DEBUG + printk("D: %08x.%08x\n", frD[0], frD[1]); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/fsqrt.c b/arch/powerpc/math-emu/fsqrt.c new file mode 100644 index 000000000000..6f8319f64a8a --- /dev/null +++ b/arch/powerpc/math-emu/fsqrt.c @@ -0,0 +1,37 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fsqrt(void *frD, void *frB) +{ + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frB); +#endif + + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (B_s && B_c != FP_CLS_ZERO) + ret |= EFLAG_VXSQRT; + if (B_c == FP_CLS_NAN) + ret |= EFLAG_VXSNAN; + + FP_SQRT_D(R, B); + +#ifdef DEBUG + printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fsqrts.c b/arch/powerpc/math-emu/fsqrts.c new file mode 100644 index 000000000000..3b2b1cf55c12 --- /dev/null +++ b/arch/powerpc/math-emu/fsqrts.c @@ -0,0 +1,38 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fsqrts(void *frD, void *frB) +{ + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frB); +#endif + + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (B_s && B_c != FP_CLS_ZERO) + ret |= EFLAG_VXSQRT; + if (B_c == FP_CLS_NAN) + ret |= EFLAG_VXSNAN; + + FP_SQRT_D(R, B); + +#ifdef DEBUG + printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/fsub.c b/arch/powerpc/math-emu/fsub.c new file mode 100644 index 000000000000..956679042bb2 --- /dev/null +++ b/arch/powerpc/math-emu/fsub.c @@ -0,0 +1,41 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" + +int +fsub(void *frD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (B_c != FP_CLS_NAN) + B_s ^= 1; + + if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, A, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_D(frD, R)); +} diff --git a/arch/powerpc/math-emu/fsubs.c b/arch/powerpc/math-emu/fsubs.c new file mode 100644 index 000000000000..3428117dfe8c --- /dev/null +++ b/arch/powerpc/math-emu/fsubs.c @@ -0,0 +1,42 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +fsubs(void *frD, void *frA, void *frB) +{ + FP_DECL_D(A); + FP_DECL_D(B); + FP_DECL_D(R); + int ret = 0; + +#ifdef DEBUG + printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB); +#endif + + __FP_UNPACK_D(A, frA); + __FP_UNPACK_D(B, frB); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); + printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c); +#endif + + if (B_c != FP_CLS_NAN) + B_s ^= 1; + + if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF) + ret |= EFLAG_VXISI; + + FP_ADD_D(R, A, B); + +#ifdef DEBUG + printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return (ret | __FP_PACK_DS(frD, R)); +} diff --git a/arch/powerpc/math-emu/lfd.c b/arch/powerpc/math-emu/lfd.c new file mode 100644 index 000000000000..7d38101c329b --- /dev/null +++ b/arch/powerpc/math-emu/lfd.c @@ -0,0 +1,19 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "sfp-machine.h" +#include "double.h" + +int +lfd(void *frD, void *ea) +{ + if (copy_from_user(frD, ea, sizeof(double))) + return -EFAULT; +#ifdef DEBUG + printk("%s: D %p, ea %p: ", __FUNCTION__, frD, ea); + dump_double(frD); + printk("\n"); +#endif + return 0; +} diff --git a/arch/powerpc/math-emu/lfs.c b/arch/powerpc/math-emu/lfs.c new file mode 100644 index 000000000000..c86dee3d7655 --- /dev/null +++ b/arch/powerpc/math-emu/lfs.c @@ -0,0 +1,37 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +lfs(void *frD, void *ea) +{ + FP_DECL_D(R); + FP_DECL_S(A); + float f; + +#ifdef DEBUG + printk("%s: D %p, ea %p\n", __FUNCTION__, frD, ea); +#endif + + if (copy_from_user(&f, ea, sizeof(float))) + return -EFAULT; + + __FP_UNPACK_S(A, &f); + +#ifdef DEBUG + printk("A: %ld %lu %ld (%ld) [%08lx]\n", A_s, A_f, A_e, A_c, + *(unsigned long *)&f); +#endif + + FP_CONV(D, S, 2, 1, R, A); + +#ifdef DEBUG + printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c); +#endif + + return __FP_PACK_D(frD, R); +} diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c new file mode 100644 index 000000000000..589153472761 --- /dev/null +++ b/arch/powerpc/math-emu/math.c @@ -0,0 +1,483 @@ +/* + * Copyright (C) 1999 Eddie C. Dost (ecd@atecom.com) + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/sched.h> + +#include <asm/uaccess.h> +#include <asm/reg.h> + +#include "sfp-machine.h" +#include "double.h" + +#define FLOATFUNC(x) extern int x(void *, void *, void *, void *) + +FLOATFUNC(fadd); +FLOATFUNC(fadds); +FLOATFUNC(fdiv); +FLOATFUNC(fdivs); +FLOATFUNC(fmul); +FLOATFUNC(fmuls); +FLOATFUNC(fsub); +FLOATFUNC(fsubs); + +FLOATFUNC(fmadd); +FLOATFUNC(fmadds); +FLOATFUNC(fmsub); +FLOATFUNC(fmsubs); +FLOATFUNC(fnmadd); +FLOATFUNC(fnmadds); +FLOATFUNC(fnmsub); +FLOATFUNC(fnmsubs); + +FLOATFUNC(fctiw); +FLOATFUNC(fctiwz); +FLOATFUNC(frsp); + +FLOATFUNC(fcmpo); +FLOATFUNC(fcmpu); + +FLOATFUNC(mcrfs); +FLOATFUNC(mffs); +FLOATFUNC(mtfsb0); +FLOATFUNC(mtfsb1); +FLOATFUNC(mtfsf); +FLOATFUNC(mtfsfi); + +FLOATFUNC(lfd); +FLOATFUNC(lfs); + +FLOATFUNC(stfd); +FLOATFUNC(stfs); +FLOATFUNC(stfiwx); + +FLOATFUNC(fabs); +FLOATFUNC(fmr); +FLOATFUNC(fnabs); +FLOATFUNC(fneg); + +/* Optional */ +FLOATFUNC(fres); +FLOATFUNC(frsqrte); +FLOATFUNC(fsel); +FLOATFUNC(fsqrt); +FLOATFUNC(fsqrts); + + +#define OP31 0x1f /* 31 */ +#define LFS 0x30 /* 48 */ +#define LFSU 0x31 /* 49 */ +#define LFD 0x32 /* 50 */ +#define LFDU 0x33 /* 51 */ +#define STFS 0x34 /* 52 */ +#define STFSU 0x35 /* 53 */ +#define STFD 0x36 /* 54 */ +#define STFDU 0x37 /* 55 */ +#define OP59 0x3b /* 59 */ +#define OP63 0x3f /* 63 */ + +/* Opcode 31: */ +/* X-Form: */ +#define LFSX 0x217 /* 535 */ +#define LFSUX 0x237 /* 567 */ +#define LFDX 0x257 /* 599 */ +#define LFDUX 0x277 /* 631 */ +#define STFSX 0x297 /* 663 */ +#define STFSUX 0x2b7 /* 695 */ +#define STFDX 0x2d7 /* 727 */ +#define STFDUX 0x2f7 /* 759 */ +#define STFIWX 0x3d7 /* 983 */ + +/* Opcode 59: */ +/* A-Form: */ +#define FDIVS 0x012 /* 18 */ +#define FSUBS 0x014 /* 20 */ +#define FADDS 0x015 /* 21 */ +#define FSQRTS 0x016 /* 22 */ +#define FRES 0x018 /* 24 */ +#define FMULS 0x019 /* 25 */ +#define FMSUBS 0x01c /* 28 */ +#define FMADDS 0x01d /* 29 */ +#define FNMSUBS 0x01e /* 30 */ +#define FNMADDS 0x01f /* 31 */ + +/* Opcode 63: */ +/* A-Form: */ +#define FDIV 0x012 /* 18 */ +#define FSUB 0x014 /* 20 */ +#define FADD 0x015 /* 21 */ +#define FSQRT 0x016 /* 22 */ +#define FSEL 0x017 /* 23 */ +#define FMUL 0x019 /* 25 */ +#define FRSQRTE 0x01a /* 26 */ +#define FMSUB 0x01c /* 28 */ +#define FMADD 0x01d /* 29 */ +#define FNMSUB 0x01e /* 30 */ +#define FNMADD 0x01f /* 31 */ + +/* X-Form: */ +#define FCMPU 0x000 /* 0 */ +#define FRSP 0x00c /* 12 */ +#define FCTIW 0x00e /* 14 */ +#define FCTIWZ 0x00f /* 15 */ +#define FCMPO 0x020 /* 32 */ +#define MTFSB1 0x026 /* 38 */ +#define FNEG 0x028 /* 40 */ +#define MCRFS 0x040 /* 64 */ +#define MTFSB0 0x046 /* 70 */ +#define FMR 0x048 /* 72 */ +#define MTFSFI 0x086 /* 134 */ +#define FNABS 0x088 /* 136 */ +#define FABS 0x108 /* 264 */ +#define MFFS 0x247 /* 583 */ +#define MTFSF 0x2c7 /* 711 */ + + +#define AB 2 +#define AC 3 +#define ABC 4 +#define D 5 +#define DU 6 +#define X 7 +#define XA 8 +#define XB 9 +#define XCR 11 +#define XCRB 12 +#define XCRI 13 +#define XCRL 16 +#define XE 14 +#define XEU 15 +#define XFLB 10 + +#ifdef CONFIG_MATH_EMULATION +static int +record_exception(struct pt_regs *regs, int eflag) +{ + u32 fpscr; + + fpscr = __FPU_FPSCR; + + if (eflag) { + fpscr |= FPSCR_FX; + if (eflag & EFLAG_OVERFLOW) + fpscr |= FPSCR_OX; + if (eflag & EFLAG_UNDERFLOW) + fpscr |= FPSCR_UX; + if (eflag & EFLAG_DIVZERO) + fpscr |= FPSCR_ZX; + if (eflag & EFLAG_INEXACT) + fpscr |= FPSCR_XX; + if (eflag & EFLAG_VXSNAN) + fpscr |= FPSCR_VXSNAN; + if (eflag & EFLAG_VXISI) + fpscr |= FPSCR_VXISI; + if (eflag & EFLAG_VXIDI) + fpscr |= FPSCR_VXIDI; + if (eflag & EFLAG_VXZDZ) + fpscr |= FPSCR_VXZDZ; + if (eflag & EFLAG_VXIMZ) + fpscr |= FPSCR_VXIMZ; + if (eflag & EFLAG_VXVC) + fpscr |= FPSCR_VXVC; + if (eflag & EFLAG_VXSOFT) + fpscr |= FPSCR_VXSOFT; + if (eflag & EFLAG_VXSQRT) + fpscr |= FPSCR_VXSQRT; + if (eflag & EFLAG_VXCVI) + fpscr |= FPSCR_VXCVI; + } + + fpscr &= ~(FPSCR_VX); + if (fpscr & (FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | + FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC | + FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI)) + fpscr |= FPSCR_VX; + + fpscr &= ~(FPSCR_FEX); + if (((fpscr & FPSCR_VX) && (fpscr & FPSCR_VE)) || + ((fpscr & FPSCR_OX) && (fpscr & FPSCR_OE)) || + ((fpscr & FPSCR_UX) && (fpscr & FPSCR_UE)) || + ((fpscr & FPSCR_ZX) && (fpscr & FPSCR_ZE)) || + ((fpscr & FPSCR_XX) && (fpscr & FPSCR_XE))) + fpscr |= FPSCR_FEX; + + __FPU_FPSCR = fpscr; + + return (fpscr & FPSCR_FEX) ? 1 : 0; +} +#endif /* CONFIG_MATH_EMULATION */ + +int +do_mathemu(struct pt_regs *regs) +{ + void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0; + unsigned long pc = regs->nip; + signed short sdisp; + u32 insn = 0; + int idx = 0; +#ifdef CONFIG_MATH_EMULATION + int (*func)(void *, void *, void *, void *); + int type = 0; + int eflag, trap; +#endif + + if (get_user(insn, (u32 *)pc)) + return -EFAULT; + +#ifndef CONFIG_MATH_EMULATION + switch (insn >> 26) { + case LFD: + idx = (insn >> 16) & 0x1f; + sdisp = (insn & 0xffff); + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp); + lfd(op0, op1, op2, op3); + break; + case LFDU: + idx = (insn >> 16) & 0x1f; + sdisp = (insn & 0xffff); + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp); + lfd(op0, op1, op2, op3); + regs->gpr[idx] = (unsigned long)op1; + break; + case STFD: + idx = (insn >> 16) & 0x1f; + sdisp = (insn & 0xffff); + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp); + stfd(op0, op1, op2, op3); + break; + case STFDU: + idx = (insn >> 16) & 0x1f; + sdisp = (insn & 0xffff); + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp); + stfd(op0, op1, op2, op3); + regs->gpr[idx] = (unsigned long)op1; + break; + case OP63: + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)¤t->thread.fpr[(insn >> 11) & 0x1f]; + fmr(op0, op1, op2, op3); + break; + default: + goto illegal; + } +#else /* CONFIG_MATH_EMULATION */ + switch (insn >> 26) { + case LFS: func = lfs; type = D; break; + case LFSU: func = lfs; type = DU; break; + case LFD: func = lfd; type = D; break; + case LFDU: func = lfd; type = DU; break; + case STFS: func = stfs; type = D; break; + case STFSU: func = stfs; type = DU; break; + case STFD: func = stfd; type = D; break; + case STFDU: func = stfd; type = DU; break; + + case OP31: + switch ((insn >> 1) & 0x3ff) { + case LFSX: func = lfs; type = XE; break; + case LFSUX: func = lfs; type = XEU; break; + case LFDX: func = lfd; type = XE; break; + case LFDUX: func = lfd; type = XEU; break; + case STFSX: func = stfs; type = XE; break; + case STFSUX: func = stfs; type = XEU; break; + case STFDX: func = stfd; type = XE; break; + case STFDUX: func = stfd; type = XEU; break; + case STFIWX: func = stfiwx; type = XE; break; + default: + goto illegal; + } + break; + + case OP59: + switch ((insn >> 1) & 0x1f) { + case FDIVS: func = fdivs; type = AB; break; + case FSUBS: func = fsubs; type = AB; break; + case FADDS: func = fadds; type = AB; break; + case FSQRTS: func = fsqrts; type = AB; break; + case FRES: func = fres; type = AB; break; + case FMULS: func = fmuls; type = AC; break; + case FMSUBS: func = fmsubs; type = ABC; break; + case FMADDS: func = fmadds; type = ABC; break; + case FNMSUBS: func = fnmsubs; type = ABC; break; + case FNMADDS: func = fnmadds; type = ABC; break; + default: + goto illegal; + } + break; + + case OP63: + if (insn & 0x20) { + switch ((insn >> 1) & 0x1f) { + case FDIV: func = fdiv; type = AB; break; + case FSUB: func = fsub; type = AB; break; + case FADD: func = fadd; type = AB; break; + case FSQRT: func = fsqrt; type = AB; break; + case FSEL: func = fsel; type = ABC; break; + case FMUL: func = fmul; type = AC; break; + case FRSQRTE: func = frsqrte; type = AB; break; + case FMSUB: func = fmsub; type = ABC; break; + case FMADD: func = fmadd; type = ABC; break; + case FNMSUB: func = fnmsub; type = ABC; break; + case FNMADD: func = fnmadd; type = ABC; break; + default: + goto illegal; + } + break; + } + + switch ((insn >> 1) & 0x3ff) { + case FCMPU: func = fcmpu; type = XCR; break; + case FRSP: func = frsp; type = XB; break; + case FCTIW: func = fctiw; type = XB; break; + case FCTIWZ: func = fctiwz; type = XB; break; + case FCMPO: func = fcmpo; type = XCR; break; + case MTFSB1: func = mtfsb1; type = XCRB; break; + case FNEG: func = fneg; type = XB; break; + case MCRFS: func = mcrfs; type = XCRL; break; + case MTFSB0: func = mtfsb0; type = XCRB; break; + case FMR: func = fmr; type = XB; break; + case MTFSFI: func = mtfsfi; type = XCRI; break; + case FNABS: func = fnabs; type = XB; break; + case FABS: func = fabs; type = XB; break; + case MFFS: func = mffs; type = X; break; + case MTFSF: func = mtfsf; type = XFLB; break; + default: + goto illegal; + } + break; + + default: + goto illegal; + } + + switch (type) { + case AB: + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)¤t->thread.fpr[(insn >> 16) & 0x1f]; + op2 = (void *)¤t->thread.fpr[(insn >> 11) & 0x1f]; + break; + + case AC: + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)¤t->thread.fpr[(insn >> 16) & 0x1f]; + op2 = (void *)¤t->thread.fpr[(insn >> 6) & 0x1f]; + break; + + case ABC: + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)¤t->thread.fpr[(insn >> 16) & 0x1f]; + op2 = (void *)¤t->thread.fpr[(insn >> 11) & 0x1f]; + op3 = (void *)¤t->thread.fpr[(insn >> 6) & 0x1f]; + break; + + case D: + idx = (insn >> 16) & 0x1f; + sdisp = (insn & 0xffff); + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp); + break; + + case DU: + idx = (insn >> 16) & 0x1f; + if (!idx) + goto illegal; + + sdisp = (insn & 0xffff); + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)(regs->gpr[idx] + sdisp); + break; + + case X: + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + break; + + case XA: + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)¤t->thread.fpr[(insn >> 16) & 0x1f]; + break; + + case XB: + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)¤t->thread.fpr[(insn >> 11) & 0x1f]; + break; + + case XE: + idx = (insn >> 16) & 0x1f; + if (!idx) + goto illegal; + + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)(regs->gpr[idx] + regs->gpr[(insn >> 11) & 0x1f]); + break; + + case XEU: + idx = (insn >> 16) & 0x1f; + op0 = (void *)¤t->thread.fpr[(insn >> 21) & 0x1f]; + op1 = (void *)((idx ? regs->gpr[idx] : 0) + + regs->gpr[(insn >> 11) & 0x1f]); + break; + + case XCR: + op0 = (void *)®s->ccr; + op1 = (void *)((insn >> 23) & 0x7); + op2 = (void *)¤t->thread.fpr[(insn >> 16) & 0x1f]; + op3 = (void *)¤t->thread.fpr[(insn >> 11) & 0x1f]; + break; + + case XCRL: + op0 = (void *)®s->ccr; + op1 = (void *)((insn >> 23) & 0x7); + op2 = (void *)((insn >> 18) & 0x7); + break; + + case XCRB: + op0 = (void *)((insn >> 21) & 0x1f); + break; + + case XCRI: + op0 = (void *)((insn >> 23) & 0x7); + op1 = (void *)((insn >> 12) & 0xf); + break; + + case XFLB: + op0 = (void *)((insn >> 17) & 0xff); + op1 = (void *)¤t->thread.fpr[(insn >> 11) & 0x1f]; + break; + + default: + goto illegal; + } + + eflag = func(op0, op1, op2, op3); + + if (insn & 1) { + regs->ccr &= ~(0x0f000000); + regs->ccr |= (__FPU_FPSCR >> 4) & 0x0f000000; + } + + trap = record_exception(regs, eflag); + if (trap) + return 1; + + switch (type) { + case DU: + case XEU: + regs->gpr[idx] = (unsigned long)op1; + break; + + default: + break; + } +#endif /* CONFIG_MATH_EMULATION */ + + regs->nip += 4; + return 0; + +illegal: + return -ENOSYS; +} diff --git a/arch/powerpc/math-emu/mcrfs.c b/arch/powerpc/math-emu/mcrfs.c new file mode 100644 index 000000000000..106dd912914b --- /dev/null +++ b/arch/powerpc/math-emu/mcrfs.c @@ -0,0 +1,31 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" + +int +mcrfs(u32 *ccr, u32 crfD, u32 crfS) +{ + u32 value, clear; + +#ifdef DEBUG + printk("%s: %p (%08x) %d %d\n", __FUNCTION__, ccr, *ccr, crfD, crfS); +#endif + + clear = 15 << ((7 - crfS) << 2); + if (!crfS) + clear = 0x90000000; + + value = (__FPU_FPSCR >> ((7 - crfS) << 2)) & 15; + __FPU_FPSCR &= ~(clear); + + *ccr &= ~(15 << ((7 - crfD) << 2)); + *ccr |= (value << ((7 - crfD) << 2)); + +#ifdef DEBUG + printk("CR: %08x\n", __FUNCTION__, *ccr); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/mffs.c b/arch/powerpc/math-emu/mffs.c new file mode 100644 index 000000000000..f477c9170e75 --- /dev/null +++ b/arch/powerpc/math-emu/mffs.c @@ -0,0 +1,17 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" + +int +mffs(u32 *frD) +{ + frD[1] = __FPU_FPSCR; + +#ifdef DEBUG + printk("%s: frD %p: %08x.%08x\n", __FUNCTION__, frD, frD[0], frD[1]); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/mtfsb0.c b/arch/powerpc/math-emu/mtfsb0.c new file mode 100644 index 000000000000..99bfd80f4af3 --- /dev/null +++ b/arch/powerpc/math-emu/mtfsb0.c @@ -0,0 +1,18 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" + +int +mtfsb0(int crbD) +{ + if ((crbD != 1) && (crbD != 2)) + __FPU_FPSCR &= ~(1 << (31 - crbD)); + +#ifdef DEBUG + printk("%s: %d %08lx\n", __FUNCTION__, crbD, __FPU_FPSCR); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/mtfsb1.c b/arch/powerpc/math-emu/mtfsb1.c new file mode 100644 index 000000000000..3d9e7ed92d2b --- /dev/null +++ b/arch/powerpc/math-emu/mtfsb1.c @@ -0,0 +1,18 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" + +int +mtfsb1(int crbD) +{ + if ((crbD != 1) && (crbD != 2)) + __FPU_FPSCR |= (1 << (31 - crbD)); + +#ifdef DEBUG + printk("%s: %d %08lx\n", __FUNCTION__, crbD, __FPU_FPSCR); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/mtfsf.c b/arch/powerpc/math-emu/mtfsf.c new file mode 100644 index 000000000000..d70cf714994c --- /dev/null +++ b/arch/powerpc/math-emu/mtfsf.c @@ -0,0 +1,45 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" + +int +mtfsf(unsigned int FM, u32 *frB) +{ + u32 mask; + + if (FM == 0) + return 0; + + if (FM == 0xff) + mask = 0x9fffffff; + else { + mask = 0; + if (FM & (1 << 0)) + mask |= 0x90000000; + if (FM & (1 << 1)) + mask |= 0x0f000000; + if (FM & (1 << 2)) + mask |= 0x00f00000; + if (FM & (1 << 3)) + mask |= 0x000f0000; + if (FM & (1 << 4)) + mask |= 0x0000f000; + if (FM & (1 << 5)) + mask |= 0x00000f00; + if (FM & (1 << 6)) + mask |= 0x000000f0; + if (FM & (1 << 7)) + mask |= 0x0000000f; + } + + __FPU_FPSCR &= ~(mask); + __FPU_FPSCR |= (frB[1] & mask); + +#ifdef DEBUG + printk("%s: %02x %p: %08lx\n", __FUNCTION__, FM, frB, __FPU_FPSCR); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/mtfsfi.c b/arch/powerpc/math-emu/mtfsfi.c new file mode 100644 index 000000000000..71df854baa7e --- /dev/null +++ b/arch/powerpc/math-emu/mtfsfi.c @@ -0,0 +1,23 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" + +int +mtfsfi(unsigned int crfD, unsigned int IMM) +{ + u32 mask = 0xf; + + if (!crfD) + mask = 9; + + __FPU_FPSCR &= ~(mask << ((7 - crfD) << 2)); + __FPU_FPSCR |= (IMM & 0xf) << ((7 - crfD) << 2); + +#ifdef DEBUG + printk("%s: %d %x: %08lx\n", __FUNCTION__, crfD, IMM, __FPU_FPSCR); +#endif + + return 0; +} diff --git a/arch/powerpc/math-emu/op-1.h b/arch/powerpc/math-emu/op-1.h new file mode 100644 index 000000000000..c92fa95f562e --- /dev/null +++ b/arch/powerpc/math-emu/op-1.h @@ -0,0 +1,245 @@ +/* + * Basic one-word fraction declaration and manipulation. + */ + +#define _FP_FRAC_DECL_1(X) _FP_W_TYPE X##_f +#define _FP_FRAC_COPY_1(D,S) (D##_f = S##_f) +#define _FP_FRAC_SET_1(X,I) (X##_f = I) +#define _FP_FRAC_HIGH_1(X) (X##_f) +#define _FP_FRAC_LOW_1(X) (X##_f) +#define _FP_FRAC_WORD_1(X,w) (X##_f) + +#define _FP_FRAC_ADDI_1(X,I) (X##_f += I) +#define _FP_FRAC_SLL_1(X,N) \ + do { \ + if (__builtin_constant_p(N) && (N) == 1) \ + X##_f += X##_f; \ + else \ + X##_f <<= (N); \ + } while (0) +#define _FP_FRAC_SRL_1(X,N) (X##_f >>= N) + +/* Right shift with sticky-lsb. */ +#define _FP_FRAC_SRS_1(X,N,sz) __FP_FRAC_SRS_1(X##_f, N, sz) + +#define __FP_FRAC_SRS_1(X,N,sz) \ + (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1 \ + ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0))) + +#define _FP_FRAC_ADD_1(R,X,Y) (R##_f = X##_f + Y##_f) +#define _FP_FRAC_SUB_1(R,X,Y) (R##_f = X##_f - Y##_f) +#define _FP_FRAC_CLZ_1(z, X) __FP_CLZ(z, X##_f) + +/* Predicates */ +#define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE)X##_f < 0) +#define _FP_FRAC_ZEROP_1(X) (X##_f == 0) +#define _FP_FRAC_OVERP_1(fs,X) (X##_f & _FP_OVERFLOW_##fs) +#define _FP_FRAC_EQ_1(X, Y) (X##_f == Y##_f) +#define _FP_FRAC_GE_1(X, Y) (X##_f >= Y##_f) +#define _FP_FRAC_GT_1(X, Y) (X##_f > Y##_f) + +#define _FP_ZEROFRAC_1 0 +#define _FP_MINFRAC_1 1 + +/* + * Unpack the raw bits of a native fp value. Do not classify or + * normalize the data. + */ + +#define _FP_UNPACK_RAW_1(fs, X, val) \ + do { \ + union _FP_UNION_##fs _flo; _flo.flt = (val); \ + \ + X##_f = _flo.bits.frac; \ + X##_e = _flo.bits.exp; \ + X##_s = _flo.bits.sign; \ + } while (0) + + +/* + * Repack the raw bits of a native fp value. + */ + +#define _FP_PACK_RAW_1(fs, val, X) \ + do { \ + union _FP_UNION_##fs _flo; \ + \ + _flo.bits.frac = X##_f; \ + _flo.bits.exp = X##_e; \ + _flo.bits.sign = X##_s; \ + \ + (val) = _flo.flt; \ + } while (0) + + +/* + * Multiplication algorithms: + */ + +/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the + multiplication immediately. */ + +#define _FP_MUL_MEAT_1_imm(fs, R, X, Y) \ + do { \ + R##_f = X##_f * Y##_f; \ + /* Normalize since we know where the msb of the multiplicands \ + were (bit B), we know that the msb of the of the product is \ + at either 2B or 2B-1. */ \ + _FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ + } while (0) + +/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ + +#define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit) \ + do { \ + _FP_W_TYPE _Z_f0, _Z_f1; \ + doit(_Z_f1, _Z_f0, X##_f, Y##_f); \ + /* Normalize since we know where the msb of the multiplicands \ + were (bit B), we know that the msb of the of the product is \ + at either 2B or 2B-1. */ \ + _FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ + R##_f = _Z_f0; \ + } while (0) + +/* Finally, a simple widening multiply algorithm. What fun! */ + +#define _FP_MUL_MEAT_1_hard(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1; \ + \ + /* split the words in half */ \ + _xh = X##_f >> (_FP_W_TYPE_SIZE/2); \ + _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \ + _yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \ + _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1); \ + \ + /* multiply the pieces */ \ + _z_f0 = _xl * _yl; \ + _a_f0 = _xh * _yl; \ + _a_f1 = _xl * _yh; \ + _z_f1 = _xh * _yh; \ + \ + /* reassemble into two full words */ \ + if ((_a_f0 += _a_f1) < _a_f1) \ + _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2); \ + _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \ + _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \ + _FP_FRAC_ADD_2(_z, _z, _a); \ + \ + /* normalize */ \ + _FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs); \ + R##_f = _z_f0; \ + } while (0) + + +/* + * Division algorithms: + */ + +/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the + division immediately. Give this macro either _FP_DIV_HELP_imm for + C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you + choose will depend on what the compiler does with divrem4. */ + +#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \ + do { \ + _FP_W_TYPE _q, _r; \ + X##_f <<= (X##_f < Y##_f \ + ? R##_e--, _FP_WFRACBITS_##fs \ + : _FP_WFRACBITS_##fs - 1); \ + doit(_q, _r, X##_f, Y##_f); \ + R##_f = _q | (_r != 0); \ + } while (0) + +/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd + that may be useful in this situation. This first is for a primitive + that requires normalization, the second for one that does not. Look + for UDIV_NEEDS_NORMALIZATION to tell which your machine needs. */ + +#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _nh, _nl, _q, _r; \ + \ + /* Normalize Y -- i.e. make the most significant bit set. */ \ + Y##_f <<= _FP_WFRACXBITS_##fs - 1; \ + \ + /* Shift X op correspondingly high, that is, up one full word. */ \ + if (X##_f <= Y##_f) \ + { \ + _nl = 0; \ + _nh = X##_f; \ + } \ + else \ + { \ + R##_e++; \ + _nl = X##_f << (_FP_W_TYPE_SIZE-1); \ + _nh = X##_f >> 1; \ + } \ + \ + udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \ + R##_f = _q | (_r != 0); \ + } while (0) + +#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _nh, _nl, _q, _r; \ + if (X##_f < Y##_f) \ + { \ + R##_e--; \ + _nl = X##_f << _FP_WFRACBITS_##fs; \ + _nh = X##_f >> _FP_WFRACXBITS_##fs; \ + } \ + else \ + { \ + _nl = X##_f << (_FP_WFRACBITS_##fs - 1); \ + _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \ + } \ + udiv_qrnnd(_q, _r, _nh, _nl, Y##_f); \ + R##_f = _q | (_r != 0); \ + } while (0) + + +/* + * Square root algorithms: + * We have just one right now, maybe Newton approximation + * should be added for those machines where division is fast. + */ + +#define _FP_SQRT_MEAT_1(R, S, T, X, q) \ + do { \ + while (q) \ + { \ + T##_f = S##_f + q; \ + if (T##_f <= X##_f) \ + { \ + S##_f = T##_f + q; \ + X##_f -= T##_f; \ + R##_f += q; \ + } \ + _FP_FRAC_SLL_1(X, 1); \ + q >>= 1; \ + } \ + } while (0) + +/* + * Assembly/disassembly for converting to/from integral types. + * No shifting or overflow handled here. + */ + +#define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f) +#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r) + + +/* + * Convert FP values between word sizes + */ + +#define _FP_FRAC_CONV_1_1(dfs, sfs, D, S) \ + do { \ + D##_f = S##_f; \ + if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs) \ + _FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs), \ + _FP_WFRACBITS_##sfs); \ + else \ + D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs; \ + } while (0) diff --git a/arch/powerpc/math-emu/op-2.h b/arch/powerpc/math-emu/op-2.h new file mode 100644 index 000000000000..b9b06b4c6ea1 --- /dev/null +++ b/arch/powerpc/math-emu/op-2.h @@ -0,0 +1,433 @@ +/* + * Basic two-word fraction declaration and manipulation. + */ + +#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0, X##_f1 +#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1) +#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I) +#define _FP_FRAC_HIGH_2(X) (X##_f1) +#define _FP_FRAC_LOW_2(X) (X##_f0) +#define _FP_FRAC_WORD_2(X,w) (X##_f##w) + +#define _FP_FRAC_SLL_2(X,N) \ + do { \ + if ((N) < _FP_W_TYPE_SIZE) \ + { \ + if (__builtin_constant_p(N) && (N) == 1) \ + { \ + X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \ + X##_f0 += X##_f0; \ + } \ + else \ + { \ + X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \ + X##_f0 <<= (N); \ + } \ + } \ + else \ + { \ + X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \ + X##_f0 = 0; \ + } \ + } while (0) + +#define _FP_FRAC_SRL_2(X,N) \ + do { \ + if ((N) < _FP_W_TYPE_SIZE) \ + { \ + X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \ + X##_f1 >>= (N); \ + } \ + else \ + { \ + X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \ + X##_f1 = 0; \ + } \ + } while (0) + +/* Right shift with sticky-lsb. */ +#define _FP_FRAC_SRS_2(X,N,sz) \ + do { \ + if ((N) < _FP_W_TYPE_SIZE) \ + { \ + X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \ + (__builtin_constant_p(N) && (N) == 1 \ + ? X##_f0 & 1 \ + : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \ + X##_f1 >>= (N); \ + } \ + else \ + { \ + X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \ + (((X##_f1 << (sz - (N))) | X##_f0) != 0)); \ + X##_f1 = 0; \ + } \ + } while (0) + +#define _FP_FRAC_ADDI_2(X,I) \ + __FP_FRAC_ADDI_2(X##_f1, X##_f0, I) + +#define _FP_FRAC_ADD_2(R,X,Y) \ + __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) + +#define _FP_FRAC_SUB_2(R,X,Y) \ + __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0) + +#define _FP_FRAC_CLZ_2(R,X) \ + do { \ + if (X##_f1) \ + __FP_CLZ(R,X##_f1); \ + else \ + { \ + __FP_CLZ(R,X##_f0); \ + R += _FP_W_TYPE_SIZE; \ + } \ + } while(0) + +/* Predicates */ +#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0) +#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) +#define _FP_FRAC_OVERP_2(fs,X) (X##_f1 & _FP_OVERFLOW_##fs) +#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0) +#define _FP_FRAC_GT_2(X, Y) \ + ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0)) +#define _FP_FRAC_GE_2(X, Y) \ + ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0)) + +#define _FP_ZEROFRAC_2 0, 0 +#define _FP_MINFRAC_2 0, 1 + +/* + * Internals + */ + +#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1) + +#define __FP_CLZ_2(R, xh, xl) \ + do { \ + if (xh) \ + __FP_CLZ(R,xl); \ + else \ + { \ + __FP_CLZ(R,xl); \ + R += _FP_W_TYPE_SIZE; \ + } \ + } while(0) + +#if 0 + +#ifndef __FP_FRAC_ADDI_2 +#define __FP_FRAC_ADDI_2(xh, xl, i) \ + (xh += ((xl += i) < i)) +#endif +#ifndef __FP_FRAC_ADD_2 +#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \ + (rh = xh + yh + ((rl = xl + yl) < xl)) +#endif +#ifndef __FP_FRAC_SUB_2 +#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \ + (rh = xh - yh - ((rl = xl - yl) > xl)) +#endif + +#else + +#undef __FP_FRAC_ADDI_2 +#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i) +#undef __FP_FRAC_ADD_2 +#define __FP_FRAC_ADD_2 add_ssaaaa +#undef __FP_FRAC_SUB_2 +#define __FP_FRAC_SUB_2 sub_ddmmss + +#endif + +/* + * Unpack the raw bits of a native fp value. Do not classify or + * normalize the data. + */ + +#define _FP_UNPACK_RAW_2(fs, X, val) \ + do { \ + union _FP_UNION_##fs _flo; _flo.flt = (val); \ + \ + X##_f0 = _flo.bits.frac0; \ + X##_f1 = _flo.bits.frac1; \ + X##_e = _flo.bits.exp; \ + X##_s = _flo.bits.sign; \ + } while (0) + + +/* + * Repack the raw bits of a native fp value. + */ + +#define _FP_PACK_RAW_2(fs, val, X) \ + do { \ + union _FP_UNION_##fs _flo; \ + \ + _flo.bits.frac0 = X##_f0; \ + _flo.bits.frac1 = X##_f1; \ + _flo.bits.exp = X##_e; \ + _flo.bits.sign = X##_s; \ + \ + (val) = _flo.flt; \ + } while (0) + + +/* + * Multiplication algorithms: + */ + +/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ + +#define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit) \ + do { \ + _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ + \ + doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \ + doit(_b_f1, _b_f0, X##_f0, Y##_f1); \ + doit(_c_f1, _c_f0, X##_f1, Y##_f0); \ + doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \ + \ + __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ + _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \ + 0, _b_f1, _b_f0, 0, \ + _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ + _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0)); \ + __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ + _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0), \ + 0, _c_f1, _c_f0, 0, \ + _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \ + _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0)); \ + \ + /* Normalize since we know where the msb of the multiplicands \ + were (bit B), we know that the msb of the of the product is \ + at either 2B or 2B-1. */ \ + _FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs); \ + R##_f0 = _FP_FRAC_WORD_4(_z,0); \ + R##_f1 = _FP_FRAC_WORD_4(_z,1); \ + } while (0) + +/* This next macro appears to be totally broken. Fortunately nowhere + * seems to use it :-> The problem is that we define _z[4] but + * then use it in _FP_FRAC_SRS_4, which will attempt to access + * _z_f[n] which will cause an error. The fix probably involves + * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998 + */ +#define _FP_MUL_MEAT_2_gmp(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _x[2], _y[2], _z[4]; \ + _x[0] = X##_f0; _x[1] = X##_f1; \ + _y[0] = Y##_f0; _y[1] = Y##_f1; \ + \ + mpn_mul_n(_z, _x, _y, 2); \ + \ + /* Normalize since we know where the msb of the multiplicands \ + were (bit B), we know that the msb of the of the product is \ + at either 2B or 2B-1. */ \ + _FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs); \ + R##_f0 = _z[0]; \ + R##_f1 = _z[1]; \ + } while (0) + + +/* + * Division algorithms: + * This seems to be giving me difficulties -- PMM + * Look, NetBSD seems to be able to comment algorithms. Can't you? + * I've thrown printks at the problem. + * This now appears to work, but I still don't really know why. + * Also, I don't think the result is properly normalised... + */ + +#define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y) \ + do { \ + extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2], \ + _FP_W_TYPE n1, _FP_W_TYPE n0, \ + _FP_W_TYPE d1, _FP_W_TYPE d0); \ + _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0; \ + _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0; \ + _FP_W_TYPE _rmem[2], _qmem[2]; \ + /* I think this check is to ensure that the result is normalised. \ + * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in \ + * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y. \ + * In this case we tweak things. (this is based on comments in \ + * the NetBSD FPU emulation code. ) \ + * We know X,Y are normalised because we ensure this as part of \ + * the unpacking process. -- PMM \ + */ \ + if (_FP_FRAC_GT_2(X, Y)) \ + { \ +/* R##_e++; */ \ + _n_f3 = X##_f1 >> 1; \ + _n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ + _n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ + _n_f0 = 0; \ + } \ + else \ + { \ + R##_e--; \ + _n_f3 = X##_f1; \ + _n_f2 = X##_f0; \ + _n_f1 = _n_f0 = 0; \ + } \ + \ + /* Normalize, i.e. make the most significant bit of the \ + denominator set. CHANGED: - 1 to nothing -- PMM */ \ + _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */); \ + \ + /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4 \ + primitive snagged from libgcc2.c. */ \ + \ + _fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1); \ + _q_f1 = _qmem[0]; \ + umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0); \ + _r_f1 = _rmem[0]; \ + _r_f0 = _n_f1; \ + if (_FP_FRAC_GT_2(_m, _r)) \ + { \ + _q_f1--; \ + _FP_FRAC_ADD_2(_r, _r, Y); \ + if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ + { \ + _q_f1--; \ + _FP_FRAC_ADD_2(_r, _r, Y); \ + } \ + } \ + _FP_FRAC_SUB_2(_r, _r, _m); \ + \ + _fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1); \ + _q_f0 = _qmem[0]; \ + umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0); \ + _r_f1 = _rmem[0]; \ + _r_f0 = _n_f0; \ + if (_FP_FRAC_GT_2(_m, _r)) \ + { \ + _q_f0--; \ + _FP_FRAC_ADD_2(_r, _r, Y); \ + if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \ + { \ + _q_f0--; \ + _FP_FRAC_ADD_2(_r, _r, Y); \ + } \ + } \ + _FP_FRAC_SUB_2(_r, _r, _m); \ + \ + R##_f1 = _q_f1; \ + R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0); \ + /* adjust so answer is normalized again. I'm not sure what the \ + * final sz param should be. In practice it's never used since \ + * N is 1 which is always going to be < _FP_W_TYPE_SIZE... \ + */ \ + /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs); */ \ + } while (0) + + +#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \ + do { \ + _FP_W_TYPE _x[4], _y[2], _z[4]; \ + _y[0] = Y##_f0; _y[1] = Y##_f1; \ + _x[0] = _x[3] = 0; \ + if (_FP_FRAC_GT_2(X, Y)) \ + { \ + R##_e++; \ + _x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) | \ + X##_f1 >> (_FP_W_TYPE_SIZE - \ + (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE))); \ + _x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE); \ + } \ + else \ + { \ + _x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) | \ + X##_f1 >> (_FP_W_TYPE_SIZE - \ + (_FP_WFRACBITS - _FP_W_TYPE_SIZE))); \ + _x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE); \ + } \ + \ + (void) mpn_divrem (_z, 0, _x, 4, _y, 2); \ + R##_f1 = _z[1]; \ + R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \ + } while (0) + + +/* + * Square root algorithms: + * We have just one right now, maybe Newton approximation + * should be added for those machines where division is fast. + */ + +#define _FP_SQRT_MEAT_2(R, S, T, X, q) \ + do { \ + while (q) \ + { \ + T##_f1 = S##_f1 + q; \ + if (T##_f1 <= X##_f1) \ + { \ + S##_f1 = T##_f1 + q; \ + X##_f1 -= T##_f1; \ + R##_f1 += q; \ + } \ + _FP_FRAC_SLL_2(X, 1); \ + q >>= 1; \ + } \ + q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ + while (q) \ + { \ + T##_f0 = S##_f0 + q; \ + T##_f1 = S##_f1; \ + if (T##_f1 < X##_f1 || \ + (T##_f1 == X##_f1 && T##_f0 < X##_f0)) \ + { \ + S##_f0 = T##_f0 + q; \ + if (((_FP_WS_TYPE)T##_f0) < 0 && \ + ((_FP_WS_TYPE)S##_f0) >= 0) \ + S##_f1++; \ + _FP_FRAC_SUB_2(X, X, T); \ + R##_f0 += q; \ + } \ + _FP_FRAC_SLL_2(X, 1); \ + q >>= 1; \ + } \ + } while (0) + + +/* + * Assembly/disassembly for converting to/from integral types. + * No shifting or overflow handled here. + */ + +#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ + do { \ + if (rsize <= _FP_W_TYPE_SIZE) \ + r = X##_f0; \ + else \ + { \ + r = X##_f1; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f0; \ + } \ + } while (0) + +#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ + do { \ + X##_f0 = r; \ + X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ + } while (0) + +/* + * Convert FP values between word sizes + */ + +#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \ + do { \ + _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ + _FP_WFRACBITS_##sfs); \ + D##_f = S##_f0; \ + } while (0) + +#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \ + do { \ + D##_f0 = S##_f; \ + D##_f1 = 0; \ + _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ + } while (0) + diff --git a/arch/powerpc/math-emu/op-4.h b/arch/powerpc/math-emu/op-4.h new file mode 100644 index 000000000000..fcdd6d064c54 --- /dev/null +++ b/arch/powerpc/math-emu/op-4.h @@ -0,0 +1,297 @@ +/* + * Basic four-word fraction declaration and manipulation. + * + * When adding quadword support for 32 bit machines, we need + * to be a little careful as double multiply uses some of these + * macros: (in op-2.h) + * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4, + * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4 + * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use + * _FP_FRAC_DECL_4: it appears to be broken and is not used + * anywhere anyway. ) + * + * I've now fixed all the macros that were here from the sparc64 code. + * [*none* of the shift macros were correct!] -- PMM 02/1998 + * + * The only quadword stuff that remains to be coded is: + * 1) the conversion to/from ints, which requires + * that we check (in op-common.h) that the following do the right thing + * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt) + * 2) multiply, divide and sqrt, which require: + * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q), + * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to + * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h. + * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for + * these; they are used nowhere else. ] + */ + +#define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] +#define _FP_FRAC_COPY_4(D,S) \ + (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ + D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) +/* The _FP_FRAC_SET_n(X,I) macro is intended for use with another + * macro such as _FP_ZEROFRAC_n which returns n comma separated values. + * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3) + * which just assigns the In values to the array X##_f[]. + * This is why the number of parameters doesn't appear to match + * at first glance... -- PMM + */ +#define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) +#define _FP_FRAC_HIGH_4(X) (X##_f[3]) +#define _FP_FRAC_LOW_4(X) (X##_f[0]) +#define _FP_FRAC_WORD_4(X,w) (X##_f[w]) + +#define _FP_FRAC_SLL_4(X,N) \ + do { \ + _FP_I_TYPE _up, _down, _skip, _i; \ + _skip = (N) / _FP_W_TYPE_SIZE; \ + _up = (N) % _FP_W_TYPE_SIZE; \ + _down = _FP_W_TYPE_SIZE - _up; \ + for (_i = 3; _i > _skip; --_i) \ + X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down; \ +/* bugfixed: was X##_f[_i] <<= _up; -- PMM 02/1998 */ \ + X##_f[_i] = X##_f[0] << _up; \ + for (--_i; _i >= 0; --_i) \ + X##_f[_i] = 0; \ + } while (0) + +/* This one was broken too */ +#define _FP_FRAC_SRL_4(X,N) \ + do { \ + _FP_I_TYPE _up, _down, _skip, _i; \ + _skip = (N) / _FP_W_TYPE_SIZE; \ + _down = (N) % _FP_W_TYPE_SIZE; \ + _up = _FP_W_TYPE_SIZE - _down; \ + for (_i = 0; _i < 3-_skip; ++_i) \ + X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \ + X##_f[_i] = X##_f[3] >> _down; \ + for (++_i; _i < 4; ++_i) \ + X##_f[_i] = 0; \ + } while (0) + + +/* Right shift with sticky-lsb. + * What this actually means is that we do a standard right-shift, + * but that if any of the bits that fall off the right hand side + * were one then we always set the LSbit. + */ +#define _FP_FRAC_SRS_4(X,N,size) \ + do { \ + _FP_I_TYPE _up, _down, _skip, _i; \ + _FP_W_TYPE _s; \ + _skip = (N) / _FP_W_TYPE_SIZE; \ + _down = (N) % _FP_W_TYPE_SIZE; \ + _up = _FP_W_TYPE_SIZE - _down; \ + for (_s = _i = 0; _i < _skip; ++_i) \ + _s |= X##_f[_i]; \ + _s |= X##_f[_i] << _up; \ +/* s is now != 0 if we want to set the LSbit */ \ + for (_i = 0; _i < 3-_skip; ++_i) \ + X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up; \ + X##_f[_i] = X##_f[3] >> _down; \ + for (++_i; _i < 4; ++_i) \ + X##_f[_i] = 0; \ + /* don't fix the LSB until the very end when we're sure f[0] is stable */ \ + X##_f[0] |= (_s != 0); \ + } while (0) + +#define _FP_FRAC_ADD_4(R,X,Y) \ + __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ + X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ + Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) + +#define _FP_FRAC_SUB_4(R,X,Y) \ + __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ + X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ + Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) + +#define _FP_FRAC_ADDI_4(X,I) \ + __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) + +#define _FP_ZEROFRAC_4 0,0,0,0 +#define _FP_MINFRAC_4 0,0,0,1 + +#define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) +#define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0) +#define _FP_FRAC_OVERP_4(fs,X) (X##_f[0] & _FP_OVERFLOW_##fs) + +#define _FP_FRAC_EQ_4(X,Y) \ + (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \ + && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) + +#define _FP_FRAC_GT_4(X,Y) \ + (X##_f[3] > Y##_f[3] || \ + (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ + (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ + (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \ + )) \ + )) \ + ) + +#define _FP_FRAC_GE_4(X,Y) \ + (X##_f[3] > Y##_f[3] || \ + (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ + (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ + (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \ + )) \ + )) \ + ) + + +#define _FP_FRAC_CLZ_4(R,X) \ + do { \ + if (X##_f[3]) \ + { \ + __FP_CLZ(R,X##_f[3]); \ + } \ + else if (X##_f[2]) \ + { \ + __FP_CLZ(R,X##_f[2]); \ + R += _FP_W_TYPE_SIZE; \ + } \ + else if (X##_f[1]) \ + { \ + __FP_CLZ(R,X##_f[2]); \ + R += _FP_W_TYPE_SIZE*2; \ + } \ + else \ + { \ + __FP_CLZ(R,X##_f[0]); \ + R += _FP_W_TYPE_SIZE*3; \ + } \ + } while(0) + + +#define _FP_UNPACK_RAW_4(fs, X, val) \ + do { \ + union _FP_UNION_##fs _flo; _flo.flt = (val); \ + X##_f[0] = _flo.bits.frac0; \ + X##_f[1] = _flo.bits.frac1; \ + X##_f[2] = _flo.bits.frac2; \ + X##_f[3] = _flo.bits.frac3; \ + X##_e = _flo.bits.exp; \ + X##_s = _flo.bits.sign; \ + } while (0) + +#define _FP_PACK_RAW_4(fs, val, X) \ + do { \ + union _FP_UNION_##fs _flo; \ + _flo.bits.frac0 = X##_f[0]; \ + _flo.bits.frac1 = X##_f[1]; \ + _flo.bits.frac2 = X##_f[2]; \ + _flo.bits.frac3 = X##_f[3]; \ + _flo.bits.exp = X##_e; \ + _flo.bits.sign = X##_s; \ + (val) = _flo.flt; \ + } while (0) + + +/* + * Internals + */ + +#define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \ + (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) + +#ifndef __FP_FRAC_ADD_4 +#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ + (r0 = x0 + y0, \ + r1 = x1 + y1 + (r0 < x0), \ + r2 = x2 + y2 + (r1 < x1), \ + r3 = x3 + y3 + (r2 < x2)) +#endif + +#ifndef __FP_FRAC_SUB_4 +#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ + (r0 = x0 - y0, \ + r1 = x1 - y1 - (r0 > x0), \ + r2 = x2 - y2 - (r1 > x1), \ + r3 = x3 - y3 - (r2 > x2)) +#endif + +#ifndef __FP_FRAC_ADDI_4 +/* I always wanted to be a lisp programmer :-> */ +#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ + (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2))) +#endif + +/* Convert FP values between word sizes. This appears to be more + * complicated than I'd have expected it to be, so these might be + * wrong... These macros are in any case somewhat bogus because they + * use information about what various FRAC_n variables look like + * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do + * the ones in op-2.h and op-1.h. + */ +#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S) \ + do { \ + _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ + _FP_WFRACBITS_##sfs); \ + D##_f = S##_f[0]; \ + } while (0) + +#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S) \ + do { \ + _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \ + _FP_WFRACBITS_##sfs); \ + D##_f0 = S##_f[0]; \ + D##_f1 = S##_f[1]; \ + } while (0) + +/* Assembly/disassembly for converting to/from integral types. + * No shifting or overflow handled here. + */ +/* Put the FP value X into r, which is an integer of size rsize. */ +#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ + do { \ + if (rsize <= _FP_W_TYPE_SIZE) \ + r = X##_f[0]; \ + else if (rsize <= 2*_FP_W_TYPE_SIZE) \ + { \ + r = X##_f[1]; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f[0]; \ + } \ + else \ + { \ + /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ + /* and int == 4words as a single case. */ \ + r = X##_f[3]; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f[2]; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f[1]; \ + r <<= _FP_W_TYPE_SIZE; \ + r += X##_f[0]; \ + } \ + } while (0) + +/* "No disassemble Number Five!" */ +/* move an integer of size rsize into X's fractional part. We rely on + * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid + * having to mask the values we store into it. + */ +#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ + do { \ + X##_f[0] = r; \ + X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ + X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ + X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ + } while (0) + +#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S) \ + do { \ + D##_f[0] = S##_f; \ + D##_f[1] = D##_f[2] = D##_f[3] = 0; \ + _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ + } while (0) + +#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S) \ + do { \ + D##_f[0] = S##_f0; \ + D##_f[1] = S##_f1; \ + D##_f[2] = D##_f[3] = 0; \ + _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \ + } while (0) + +/* FIXME! This has to be written */ +#define _FP_SQRT_MEAT_4(R, S, T, X, q) diff --git a/arch/powerpc/math-emu/op-common.h b/arch/powerpc/math-emu/op-common.h new file mode 100644 index 000000000000..afb82b6498ce --- /dev/null +++ b/arch/powerpc/math-emu/op-common.h @@ -0,0 +1,688 @@ +#define _FP_DECL(wc, X) \ + _FP_I_TYPE X##_c, X##_s, X##_e; \ + _FP_FRAC_DECL_##wc(X) + +/* + * Finish truely unpacking a native fp value by classifying the kind + * of fp value and normalizing both the exponent and the fraction. + */ + +#define _FP_UNPACK_CANONICAL(fs, wc, X) \ +do { \ + switch (X##_e) \ + { \ + default: \ + _FP_FRAC_HIGH_##wc(X) |= _FP_IMPLBIT_##fs; \ + _FP_FRAC_SLL_##wc(X, _FP_WORKBITS); \ + X##_e -= _FP_EXPBIAS_##fs; \ + X##_c = FP_CLS_NORMAL; \ + break; \ + \ + case 0: \ + if (_FP_FRAC_ZEROP_##wc(X)) \ + X##_c = FP_CLS_ZERO; \ + else \ + { \ + /* a denormalized number */ \ + _FP_I_TYPE _shift; \ + _FP_FRAC_CLZ_##wc(_shift, X); \ + _shift -= _FP_FRACXBITS_##fs; \ + _FP_FRAC_SLL_##wc(X, (_shift+_FP_WORKBITS)); \ + X##_e -= _FP_EXPBIAS_##fs - 1 + _shift; \ + X##_c = FP_CLS_NORMAL; \ + } \ + break; \ + \ + case _FP_EXPMAX_##fs: \ + if (_FP_FRAC_ZEROP_##wc(X)) \ + X##_c = FP_CLS_INF; \ + else \ + /* we don't differentiate between signaling and quiet nans */ \ + X##_c = FP_CLS_NAN; \ + break; \ + } \ +} while (0) + + +/* + * Before packing the bits back into the native fp result, take care + * of such mundane things as rounding and overflow. Also, for some + * kinds of fp values, the original parts may not have been fully + * extracted -- but that is ok, we can regenerate them now. + */ + +#define _FP_PACK_CANONICAL(fs, wc, X) \ +({int __ret = 0; \ + switch (X##_c) \ + { \ + case FP_CLS_NORMAL: \ + X##_e += _FP_EXPBIAS_##fs; \ + if (X##_e > 0) \ + { \ + __ret |= _FP_ROUND(wc, X); \ + if (_FP_FRAC_OVERP_##wc(fs, X)) \ + { \ + _FP_FRAC_SRL_##wc(X, (_FP_WORKBITS+1)); \ + X##_e++; \ + } \ + else \ + _FP_FRAC_SRL_##wc(X, _FP_WORKBITS); \ + if (X##_e >= _FP_EXPMAX_##fs) \ + { \ + /* overflow to infinity */ \ + X##_e = _FP_EXPMAX_##fs; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + __ret |= EFLAG_OVERFLOW; \ + } \ + } \ + else \ + { \ + /* we've got a denormalized number */ \ + X##_e = -X##_e + 1; \ + if (X##_e <= _FP_WFRACBITS_##fs) \ + { \ + _FP_FRAC_SRS_##wc(X, X##_e, _FP_WFRACBITS_##fs); \ + _FP_FRAC_SLL_##wc(X, 1); \ + if (_FP_FRAC_OVERP_##wc(fs, X)) \ + { \ + X##_e = 1; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + } \ + else \ + { \ + X##_e = 0; \ + _FP_FRAC_SRL_##wc(X, _FP_WORKBITS+1); \ + __ret |= EFLAG_UNDERFLOW; \ + } \ + } \ + else \ + { \ + /* underflow to zero */ \ + X##_e = 0; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + __ret |= EFLAG_UNDERFLOW; \ + } \ + } \ + break; \ + \ + case FP_CLS_ZERO: \ + X##_e = 0; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + break; \ + \ + case FP_CLS_INF: \ + X##_e = _FP_EXPMAX_##fs; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + break; \ + \ + case FP_CLS_NAN: \ + X##_e = _FP_EXPMAX_##fs; \ + if (!_FP_KEEPNANFRACP) \ + { \ + _FP_FRAC_SET_##wc(X, _FP_NANFRAC_##fs); \ + X##_s = 0; \ + } \ + else \ + _FP_FRAC_HIGH_##wc(X) |= _FP_QNANBIT_##fs; \ + break; \ + } \ + __ret; \ +}) + + +/* + * Main addition routine. The input values should be cooked. + */ + +#define _FP_ADD(fs, wc, R, X, Y) \ +do { \ + switch (_FP_CLS_COMBINE(X##_c, Y##_c)) \ + { \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL): \ + { \ + /* shift the smaller number so that its exponent matches the larger */ \ + _FP_I_TYPE diff = X##_e - Y##_e; \ + \ + if (diff < 0) \ + { \ + diff = -diff; \ + if (diff <= _FP_WFRACBITS_##fs) \ + _FP_FRAC_SRS_##wc(X, diff, _FP_WFRACBITS_##fs); \ + else if (!_FP_FRAC_ZEROP_##wc(X)) \ + _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc); \ + else \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + R##_e = Y##_e; \ + } \ + else \ + { \ + if (diff > 0) \ + { \ + if (diff <= _FP_WFRACBITS_##fs) \ + _FP_FRAC_SRS_##wc(Y, diff, _FP_WFRACBITS_##fs); \ + else if (!_FP_FRAC_ZEROP_##wc(Y)) \ + _FP_FRAC_SET_##wc(Y, _FP_MINFRAC_##wc); \ + else \ + _FP_FRAC_SET_##wc(Y, _FP_ZEROFRAC_##wc); \ + } \ + R##_e = X##_e; \ + } \ + \ + R##_c = FP_CLS_NORMAL; \ + \ + if (X##_s == Y##_s) \ + { \ + R##_s = X##_s; \ + _FP_FRAC_ADD_##wc(R, X, Y); \ + if (_FP_FRAC_OVERP_##wc(fs, R)) \ + { \ + _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs); \ + R##_e++; \ + } \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_SUB_##wc(R, X, Y); \ + if (_FP_FRAC_ZEROP_##wc(R)) \ + { \ + /* return an exact zero */ \ + if (FP_ROUNDMODE == FP_RND_MINF) \ + R##_s |= Y##_s; \ + else \ + R##_s &= Y##_s; \ + R##_c = FP_CLS_ZERO; \ + } \ + else \ + { \ + if (_FP_FRAC_NEGP_##wc(R)) \ + { \ + _FP_FRAC_SUB_##wc(R, Y, X); \ + R##_s = Y##_s; \ + } \ + \ + /* renormalize after subtraction */ \ + _FP_FRAC_CLZ_##wc(diff, R); \ + diff -= _FP_WFRACXBITS_##fs; \ + if (diff) \ + { \ + R##_e -= diff; \ + _FP_FRAC_SLL_##wc(R, diff); \ + } \ + } \ + } \ + break; \ + } \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN): \ + _FP_CHOOSENAN(fs, wc, R, X, Y); \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ + R##_e = X##_e; \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO): \ + _FP_FRAC_COPY_##wc(R, X); \ + R##_s = X##_s; \ + R##_c = X##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL): \ + R##_e = Y##_e; \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN): \ + _FP_FRAC_COPY_##wc(R, Y); \ + R##_s = Y##_s; \ + R##_c = Y##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF): \ + if (X##_s != Y##_s) \ + { \ + /* +INF + -INF => NAN */ \ + _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs); \ + R##_s = X##_s ^ Y##_s; \ + R##_c = FP_CLS_NAN; \ + break; \ + } \ + /* FALLTHRU */ \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO): \ + R##_s = X##_s; \ + R##_c = FP_CLS_INF; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF): \ + R##_s = Y##_s; \ + R##_c = FP_CLS_INF; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO): \ + /* make sure the sign is correct */ \ + if (FP_ROUNDMODE == FP_RND_MINF) \ + R##_s = X##_s | Y##_s; \ + else \ + R##_s = X##_s & Y##_s; \ + R##_c = FP_CLS_ZERO; \ + break; \ + \ + default: \ + abort(); \ + } \ +} while (0) + + +/* + * Main negation routine. FIXME -- when we care about setting exception + * bits reliably, this will not do. We should examine all of the fp classes. + */ + +#define _FP_NEG(fs, wc, R, X) \ + do { \ + _FP_FRAC_COPY_##wc(R, X); \ + R##_c = X##_c; \ + R##_e = X##_e; \ + R##_s = 1 ^ X##_s; \ + } while (0) + + +/* + * Main multiplication routine. The input values should be cooked. + */ + +#define _FP_MUL(fs, wc, R, X, Y) \ +do { \ + R##_s = X##_s ^ Y##_s; \ + switch (_FP_CLS_COMBINE(X##_c, Y##_c)) \ + { \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL): \ + R##_c = FP_CLS_NORMAL; \ + R##_e = X##_e + Y##_e + 1; \ + \ + _FP_MUL_MEAT_##fs(R,X,Y); \ + \ + if (_FP_FRAC_OVERP_##wc(fs, R)) \ + _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs); \ + else \ + R##_e--; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN): \ + _FP_CHOOSENAN(fs, wc, R, X, Y); \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO): \ + R##_s = X##_s; \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO): \ + _FP_FRAC_COPY_##wc(R, X); \ + R##_c = X##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN): \ + R##_s = Y##_s; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ + _FP_FRAC_COPY_##wc(R, Y); \ + R##_c = Y##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF): \ + R##_c = FP_CLS_NAN; \ + _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs); \ + break; \ + \ + default: \ + abort(); \ + } \ +} while (0) + + +/* + * Main division routine. The input values should be cooked. + */ + +#define _FP_DIV(fs, wc, R, X, Y) \ +do { \ + R##_s = X##_s ^ Y##_s; \ + switch (_FP_CLS_COMBINE(X##_c, Y##_c)) \ + { \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL): \ + R##_c = FP_CLS_NORMAL; \ + R##_e = X##_e - Y##_e; \ + \ + _FP_DIV_MEAT_##fs(R,X,Y); \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN): \ + _FP_CHOOSENAN(fs, wc, R, X, Y); \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO): \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R, X); \ + R##_c = X##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN): \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R, Y); \ + R##_c = Y##_c; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL): \ + R##_c = FP_CLS_ZERO; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO): \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL): \ + R##_c = FP_CLS_INF; \ + break; \ + \ + case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF): \ + case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO): \ + R##_c = FP_CLS_NAN; \ + _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs); \ + break; \ + \ + default: \ + abort(); \ + } \ +} while (0) + + +/* + * Main differential comparison routine. The inputs should be raw not + * cooked. The return is -1,0,1 for normal values, 2 otherwise. + */ + +#define _FP_CMP(fs, wc, ret, X, Y, un) \ + do { \ + /* NANs are unordered */ \ + if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X)) \ + || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y))) \ + { \ + ret = un; \ + } \ + else \ + { \ + int __x_zero = (!X##_e && _FP_FRAC_ZEROP_##wc(X)) ? 1 : 0; \ + int __y_zero = (!Y##_e && _FP_FRAC_ZEROP_##wc(Y)) ? 1 : 0; \ + \ + if (__x_zero && __y_zero) \ + ret = 0; \ + else if (__x_zero) \ + ret = Y##_s ? 1 : -1; \ + else if (__y_zero) \ + ret = X##_s ? -1 : 1; \ + else if (X##_s != Y##_s) \ + ret = X##_s ? -1 : 1; \ + else if (X##_e > Y##_e) \ + ret = X##_s ? -1 : 1; \ + else if (X##_e < Y##_e) \ + ret = X##_s ? 1 : -1; \ + else if (_FP_FRAC_GT_##wc(X, Y)) \ + ret = X##_s ? -1 : 1; \ + else if (_FP_FRAC_GT_##wc(Y, X)) \ + ret = X##_s ? 1 : -1; \ + else \ + ret = 0; \ + } \ + } while (0) + + +/* Simplification for strict equality. */ + +#define _FP_CMP_EQ(fs, wc, ret, X, Y) \ + do { \ + /* NANs are unordered */ \ + if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X)) \ + || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y))) \ + { \ + ret = 1; \ + } \ + else \ + { \ + ret = !(X##_e == Y##_e \ + && _FP_FRAC_EQ_##wc(X, Y) \ + && (X##_s == Y##_s || !X##_e && _FP_FRAC_ZEROP_##wc(X))); \ + } \ + } while (0) + +/* + * Main square root routine. The input value should be cooked. + */ + +#define _FP_SQRT(fs, wc, R, X) \ +do { \ + _FP_FRAC_DECL_##wc(T); _FP_FRAC_DECL_##wc(S); \ + _FP_W_TYPE q; \ + switch (X##_c) \ + { \ + case FP_CLS_NAN: \ + R##_s = 0; \ + R##_c = FP_CLS_NAN; \ + _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc); \ + break; \ + case FP_CLS_INF: \ + if (X##_s) \ + { \ + R##_s = 0; \ + R##_c = FP_CLS_NAN; /* sNAN */ \ + } \ + else \ + { \ + R##_s = 0; \ + R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */ \ + } \ + break; \ + case FP_CLS_ZERO: \ + R##_s = X##_s; \ + R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */ \ + break; \ + case FP_CLS_NORMAL: \ + R##_s = 0; \ + if (X##_s) \ + { \ + R##_c = FP_CLS_NAN; /* sNAN */ \ + break; \ + } \ + R##_c = FP_CLS_NORMAL; \ + if (X##_e & 1) \ + _FP_FRAC_SLL_##wc(X, 1); \ + R##_e = X##_e >> 1; \ + _FP_FRAC_SET_##wc(S, _FP_ZEROFRAC_##wc); \ + _FP_FRAC_SET_##wc(R, _FP_ZEROFRAC_##wc); \ + q = _FP_OVERFLOW_##fs; \ + _FP_FRAC_SLL_##wc(X, 1); \ + _FP_SQRT_MEAT_##wc(R, S, T, X, q); \ + _FP_FRAC_SRL_##wc(R, 1); \ + } \ + } while (0) + +/* + * Convert from FP to integer + */ + +/* "When a NaN, infinity, large positive argument >= 2147483648.0, or + * large negative argument <= -2147483649.0 is converted to an integer, + * the invalid_current bit...should be set and fp_exception_IEEE_754 should + * be raised. If the floating point invalid trap is disabled, no trap occurs + * and a numerical result is generated: if the sign bit of the operand + * is 0, the result is 2147483647; if the sign bit of the operand is 1, + * the result is -2147483648." + * Similarly for conversion to extended ints, except that the boundaries + * are >= 2^63, <= -(2^63 + 1), and the results are 2^63 + 1 for s=0 and + * -2^63 for s=1. + * -- SPARC Architecture Manual V9, Appendix B, which specifies how + * SPARCs resolve implementation dependencies in the IEEE-754 spec. + * I don't believe that the code below follows this. I'm not even sure + * it's right! + * It doesn't cope with needing to convert to an n bit integer when there + * is no n bit integer type. Fortunately gcc provides long long so this + * isn't a problem for sparc32. + * I have, however, fixed its NaN handling to conform as above. + * -- PMM 02/1998 + * NB: rsigned is not 'is r declared signed?' but 'should the value stored + * in r be signed or unsigned?'. r is always(?) declared unsigned. + * Comments below are mine, BTW -- PMM + */ +#define _FP_TO_INT(fs, wc, r, X, rsize, rsigned) \ + do { \ + switch (X##_c) \ + { \ + case FP_CLS_NORMAL: \ + if (X##_e < 0) \ + { \ + /* case FP_CLS_NAN: see above! */ \ + case FP_CLS_ZERO: \ + r = 0; \ + } \ + else if (X##_e >= rsize - (rsigned != 0)) \ + { /* overflow */ \ + case FP_CLS_NAN: \ + case FP_CLS_INF: \ + if (rsigned) \ + { \ + r = 1; \ + r <<= rsize - 1; \ + r -= 1 - X##_s; \ + } \ + else \ + { \ + r = 0; \ + if (!X##_s) \ + r = ~r; \ + } \ + } \ + else \ + { \ + if (_FP_W_TYPE_SIZE*wc < rsize) \ + { \ + _FP_FRAC_ASSEMBLE_##wc(r, X, rsize); \ + r <<= X##_e - _FP_WFRACBITS_##fs; \ + } \ + else \ + { \ + if (X##_e >= _FP_WFRACBITS_##fs) \ + _FP_FRAC_SLL_##wc(X, (X##_e - _FP_WFRACBITS_##fs + 1));\ + else \ + _FP_FRAC_SRL_##wc(X, (_FP_WFRACBITS_##fs - X##_e - 1));\ + _FP_FRAC_ASSEMBLE_##wc(r, X, rsize); \ + } \ + if (rsigned && X##_s) \ + r = -r; \ + } \ + break; \ + } \ + } while (0) + +#define _FP_FROM_INT(fs, wc, X, r, rsize, rtype) \ + do { \ + if (r) \ + { \ + X##_c = FP_CLS_NORMAL; \ + \ + if ((X##_s = (r < 0))) \ + r = -r; \ + /* Note that `r' is now considered unsigned, so we don't have \ + to worry about the single signed overflow case. */ \ + \ + if (rsize <= _FP_W_TYPE_SIZE) \ + __FP_CLZ(X##_e, r); \ + else \ + __FP_CLZ_2(X##_e, (_FP_W_TYPE)(r >> _FP_W_TYPE_SIZE), \ + (_FP_W_TYPE)r); \ + if (rsize < _FP_W_TYPE_SIZE) \ + X##_e -= (_FP_W_TYPE_SIZE - rsize); \ + X##_e = rsize - X##_e - 1; \ + \ + if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e) \ + __FP_FRAC_SRS_1(r, (X##_e - _FP_WFRACBITS_##fs), rsize); \ + r &= ~((_FP_W_TYPE)1 << X##_e); \ + _FP_FRAC_DISASSEMBLE_##wc(X, ((unsigned rtype)r), rsize); \ + _FP_FRAC_SLL_##wc(X, (_FP_WFRACBITS_##fs - X##_e - 1)); \ + } \ + else \ + { \ + X##_c = FP_CLS_ZERO, X##_s = 0; \ + } \ + } while (0) + + +#define FP_CONV(dfs,sfs,dwc,swc,D,S) \ + do { \ + _FP_FRAC_CONV_##dwc##_##swc(dfs, sfs, D, S); \ + D##_e = S##_e; \ + D##_c = S##_c; \ + D##_s = S##_s; \ + } while (0) + +/* + * Helper primitives. + */ + +/* Count leading zeros in a word. */ + +#ifndef __FP_CLZ +#if _FP_W_TYPE_SIZE < 64 +/* this is just to shut the compiler up about shifts > word length -- PMM 02/1998 */ +#define __FP_CLZ(r, x) \ + do { \ + _FP_W_TYPE _t = (x); \ + r = _FP_W_TYPE_SIZE - 1; \ + if (_t > 0xffff) r -= 16; \ + if (_t > 0xffff) _t >>= 16; \ + if (_t > 0xff) r -= 8; \ + if (_t > 0xff) _t >>= 8; \ + if (_t & 0xf0) r -= 4; \ + if (_t & 0xf0) _t >>= 4; \ + if (_t & 0xc) r -= 2; \ + if (_t & 0xc) _t >>= 2; \ + if (_t & 0x2) r -= 1; \ + } while (0) +#else /* not _FP_W_TYPE_SIZE < 64 */ +#define __FP_CLZ(r, x) \ + do { \ + _FP_W_TYPE _t = (x); \ + r = _FP_W_TYPE_SIZE - 1; \ + if (_t > 0xffffffff) r -= 32; \ + if (_t > 0xffffffff) _t >>= 32; \ + if (_t > 0xffff) r -= 16; \ + if (_t > 0xffff) _t >>= 16; \ + if (_t > 0xff) r -= 8; \ + if (_t > 0xff) _t >>= 8; \ + if (_t & 0xf0) r -= 4; \ + if (_t & 0xf0) _t >>= 4; \ + if (_t & 0xc) r -= 2; \ + if (_t & 0xc) _t >>= 2; \ + if (_t & 0x2) r -= 1; \ + } while (0) +#endif /* not _FP_W_TYPE_SIZE < 64 */ +#endif /* ndef __FP_CLZ */ + +#define _FP_DIV_HELP_imm(q, r, n, d) \ + do { \ + q = n / d, r = n % d; \ + } while (0) + diff --git a/arch/powerpc/math-emu/sfp-machine.h b/arch/powerpc/math-emu/sfp-machine.h new file mode 100644 index 000000000000..4b17d83cfcdd --- /dev/null +++ b/arch/powerpc/math-emu/sfp-machine.h @@ -0,0 +1,377 @@ +/* Machine-dependent software floating-point definitions. PPC version. + Copyright (C) 1997 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If + not, write to the Free Software Foundation, Inc., + 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Actually, this is a PPC (32bit) version, written based on the + i386, sparc, and sparc64 versions, by me, + Peter Maydell (pmaydell@chiark.greenend.org.uk). + Comments are by and large also mine, although they may be inaccurate. + + In picking out asm fragments I've gone with the lowest common + denominator, which also happens to be the hardware I have :-> + That is, a SPARC without hardware multiply and divide. + */ + +/* basic word size definitions */ +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* You can optionally code some things like addition in asm. For + * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't + * then you get a fragment of C code [if you change an #ifdef 0 + * in op-2.h] or a call to add_ssaaaa (see below). + * Good places to look for asm fragments to use are gcc and glibc. + * gcc's longlong.h is useful. + */ + +/* We need to know how to multiply and divide. If the host word size + * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which + * codes the multiply with whatever gcc does to 'a * b'. + * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm + * function that can multiply two 1W values and get a 2W result. + * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which + * does bitshifting to avoid overflow. + * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size + * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or + * _FP_DIV_HELP_ldiv (see op-1.h). + * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W). + * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd + * to do this.] + * In general, 'n' is the number of words required to hold the type, + * and 't' is either S, D or Q for single/double/quad. + * -- PMM + */ +/* Example: SPARC64: + * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_imm(S,R,X,Y) + * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm) + * #define _FP_MUL_MEAT_Q(R,X,Y) _FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm) + * + * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) + * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv(D,R,X,Y) + * #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv_64(Q,R,X,Y) + * + * Example: i386: + * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64) + * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64) + * + * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32) + * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) + */ + +#define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) + +/* These macros define what NaN looks like. They're supposed to expand to + * a comma-separated set of 32bit unsigned ints that encode NaN. + */ +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D, 0 +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 + +#define _FP_KEEPNANFRACP 1 + +/* This macro appears to be called when both X and Y are NaNs, and + * has to choose one and copy it to R. i386 goes for the larger of the + * two, sparc64 just picks Y. I don't understand this at all so I'll + * go with sparc64 because it's shorter :-> -- PMM + */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y) \ + do { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + R##_c = FP_CLS_NAN; \ + } while (0) + + +extern void fp_unpack_d(long *, unsigned long *, unsigned long *, + long *, long *, void *); +extern int fp_pack_d(void *, long, unsigned long, unsigned long, long, long); +extern int fp_pack_ds(void *, long, unsigned long, unsigned long, long, long); + +#define __FP_UNPACK_RAW_1(fs, X, val) \ + do { \ + union _FP_UNION_##fs *_flo = \ + (union _FP_UNION_##fs *)val; \ + \ + X##_f = _flo->bits.frac; \ + X##_e = _flo->bits.exp; \ + X##_s = _flo->bits.sign; \ + } while (0) + +#define __FP_UNPACK_RAW_2(fs, X, val) \ + do { \ + union _FP_UNION_##fs *_flo = \ + (union _FP_UNION_##fs *)val; \ + \ + X##_f0 = _flo->bits.frac0; \ + X##_f1 = _flo->bits.frac1; \ + X##_e = _flo->bits.exp; \ + X##_s = _flo->bits.sign; \ + } while (0) + +#define __FP_UNPACK_S(X,val) \ + do { \ + __FP_UNPACK_RAW_1(S,X,val); \ + _FP_UNPACK_CANONICAL(S,1,X); \ + } while (0) + +#define __FP_UNPACK_D(X,val) \ + fp_unpack_d(&X##_s, &X##_f1, &X##_f0, &X##_e, &X##_c, val) + +#define __FP_PACK_RAW_1(fs, val, X) \ + do { \ + union _FP_UNION_##fs *_flo = \ + (union _FP_UNION_##fs *)val; \ + \ + _flo->bits.frac = X##_f; \ + _flo->bits.exp = X##_e; \ + _flo->bits.sign = X##_s; \ + } while (0) + +#define __FP_PACK_RAW_2(fs, val, X) \ + do { \ + union _FP_UNION_##fs *_flo = \ + (union _FP_UNION_##fs *)val; \ + \ + _flo->bits.frac0 = X##_f0; \ + _flo->bits.frac1 = X##_f1; \ + _flo->bits.exp = X##_e; \ + _flo->bits.sign = X##_s; \ + } while (0) + +#include <linux/kernel.h> +#include <linux/sched.h> + +#define __FPU_FPSCR (current->thread.fpscr.val) + +/* We only actually write to the destination register + * if exceptions signalled (if any) will not trap. + */ +#define __FPU_ENABLED_EXC \ +({ \ + (__FPU_FPSCR >> 3) & 0x1f; \ +}) + +#define __FPU_TRAP_P(bits) \ + ((__FPU_ENABLED_EXC & (bits)) != 0) + +#define __FP_PACK_S(val,X) \ +({ int __exc = _FP_PACK_CANONICAL(S,1,X); \ + if(!__exc || !__FPU_TRAP_P(__exc)) \ + __FP_PACK_RAW_1(S,val,X); \ + __exc; \ +}) + +#define __FP_PACK_D(val,X) \ + fp_pack_d(val, X##_s, X##_f1, X##_f0, X##_e, X##_c) + +#define __FP_PACK_DS(val,X) \ + fp_pack_ds(val, X##_s, X##_f1, X##_f0, X##_e, X##_c) + +/* Obtain the current rounding mode. */ +#define FP_ROUNDMODE \ +({ \ + __FPU_FPSCR & 0x3; \ +}) + +/* the asm fragments go here: all these are taken from glibc-2.0.5's + * stdlib/longlong.h + */ + +#include <linux/types.h> +#include <asm/byteorder.h> + +/* add_ssaaaa is used in op-2.h and should be equivalent to + * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al)) + * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + * high_addend_2, low_addend_2) adds two UWtype integers, composed by + * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + * (i.e. carry out) is not stored anywhere, and is lost. + */ +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + } while (0) + +/* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to + * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al)) + * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + * and is lost. + */ +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + } while (0) + +/* asm fragments for mul and div */ + +/* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + * word product in HIGH_PROD and LOW_PROD. + */ +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" \ + : "=r" ((USItype)(ph)) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) + +/* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + * denominator) divides a UDWtype, composed by the UWtype integers + * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + * than DENOMINATOR for correct operation. If, in addition, the most + * significant bit of DENOMINATOR must be 1, then the pre-processor symbol + * UDIV_NEEDS_NORMALIZATION is defined to 1. + */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +#define UDIV_NEEDS_NORMALIZATION 1 + +#define abort() \ + return 0 + +#ifdef __BIG_ENDIAN +#define __BYTE_ORDER __BIG_ENDIAN +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#endif + +/* Exception flags. */ +#define EFLAG_INVALID (1 << (31 - 2)) +#define EFLAG_OVERFLOW (1 << (31 - 3)) +#define EFLAG_UNDERFLOW (1 << (31 - 4)) +#define EFLAG_DIVZERO (1 << (31 - 5)) +#define EFLAG_INEXACT (1 << (31 - 6)) + +#define EFLAG_VXSNAN (1 << (31 - 7)) +#define EFLAG_VXISI (1 << (31 - 8)) +#define EFLAG_VXIDI (1 << (31 - 9)) +#define EFLAG_VXZDZ (1 << (31 - 10)) +#define EFLAG_VXIMZ (1 << (31 - 11)) +#define EFLAG_VXVC (1 << (31 - 12)) +#define EFLAG_VXSOFT (1 << (31 - 21)) +#define EFLAG_VXSQRT (1 << (31 - 22)) +#define EFLAG_VXCVI (1 << (31 - 23)) diff --git a/arch/powerpc/math-emu/single.h b/arch/powerpc/math-emu/single.h new file mode 100644 index 000000000000..f19d99451815 --- /dev/null +++ b/arch/powerpc/math-emu/single.h @@ -0,0 +1,66 @@ +/* + * Definitions for IEEE Single Precision + */ + +#if _FP_W_TYPE_SIZE < 32 +#error "Here's a nickel kid. Go buy yourself a real computer." +#endif + +#define _FP_FRACBITS_S 24 +#define _FP_FRACXBITS_S (_FP_W_TYPE_SIZE - _FP_FRACBITS_S) +#define _FP_WFRACBITS_S (_FP_WORKBITS + _FP_FRACBITS_S) +#define _FP_WFRACXBITS_S (_FP_W_TYPE_SIZE - _FP_WFRACBITS_S) +#define _FP_EXPBITS_S 8 +#define _FP_EXPBIAS_S 127 +#define _FP_EXPMAX_S 255 +#define _FP_QNANBIT_S ((_FP_W_TYPE)1 << (_FP_FRACBITS_S-2)) +#define _FP_IMPLBIT_S ((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1)) +#define _FP_OVERFLOW_S ((_FP_W_TYPE)1 << (_FP_WFRACBITS_S)) + +/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be + chosen by the target machine. */ + +union _FP_UNION_S +{ + float flt; + struct { +#if __BYTE_ORDER == __BIG_ENDIAN + unsigned sign : 1; + unsigned exp : _FP_EXPBITS_S; + unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0); +#else + unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0); + unsigned exp : _FP_EXPBITS_S; + unsigned sign : 1; +#endif + } bits __attribute__((packed)); +}; + +#define FP_DECL_S(X) _FP_DECL(1,X) +#define FP_UNPACK_RAW_S(X,val) _FP_UNPACK_RAW_1(S,X,val) +#define FP_PACK_RAW_S(val,X) _FP_PACK_RAW_1(S,val,X) + +#define FP_UNPACK_S(X,val) \ + do { \ + _FP_UNPACK_RAW_1(S,X,val); \ + _FP_UNPACK_CANONICAL(S,1,X); \ + } while (0) + +#define FP_PACK_S(val,X) \ + do { \ + _FP_PACK_CANONICAL(S,1,X); \ + _FP_PACK_RAW_1(S,val,X); \ + } while (0) + +#define FP_NEG_S(R,X) _FP_NEG(S,1,R,X) +#define FP_ADD_S(R,X,Y) _FP_ADD(S,1,R,X,Y) +#define FP_SUB_S(R,X,Y) _FP_SUB(S,1,R,X,Y) +#define FP_MUL_S(R,X,Y) _FP_MUL(S,1,R,X,Y) +#define FP_DIV_S(R,X,Y) _FP_DIV(S,1,R,X,Y) +#define FP_SQRT_S(R,X) _FP_SQRT(S,1,R,X) + +#define FP_CMP_S(r,X,Y,un) _FP_CMP(S,1,r,X,Y,un) +#define FP_CMP_EQ_S(r,X,Y) _FP_CMP_EQ(S,1,r,X,Y) + +#define FP_TO_INT_S(r,X,rsz,rsg) _FP_TO_INT(S,1,r,X,rsz,rsg) +#define FP_FROM_INT_S(X,r,rs,rt) _FP_FROM_INT(S,1,X,r,rs,rt) diff --git a/arch/powerpc/math-emu/soft-fp.h b/arch/powerpc/math-emu/soft-fp.h new file mode 100644 index 000000000000..cca39598f873 --- /dev/null +++ b/arch/powerpc/math-emu/soft-fp.h @@ -0,0 +1,104 @@ +#ifndef SOFT_FP_H +#define SOFT_FP_H + +#include "sfp-machine.h" + +#define _FP_WORKBITS 3 +#define _FP_WORK_LSB ((_FP_W_TYPE)1 << 3) +#define _FP_WORK_ROUND ((_FP_W_TYPE)1 << 2) +#define _FP_WORK_GUARD ((_FP_W_TYPE)1 << 1) +#define _FP_WORK_STICKY ((_FP_W_TYPE)1 << 0) + +#ifndef FP_RND_NEAREST +# define FP_RND_NEAREST 0 +# define FP_RND_ZERO 1 +# define FP_RND_PINF 2 +# define FP_RND_MINF 3 +#ifndef FP_ROUNDMODE +# define FP_ROUNDMODE FP_RND_NEAREST +#endif +#endif + +#define _FP_ROUND_NEAREST(wc, X) \ +({ int __ret = 0; \ + int __frac = _FP_FRAC_LOW_##wc(X) & 15; \ + if (__frac & 7) { \ + __ret = EFLAG_INEXACT; \ + if ((__frac & 7) != _FP_WORK_ROUND) \ + _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND); \ + else if (__frac & _FP_WORK_LSB) \ + _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND); \ + } \ + __ret; \ +}) + +#define _FP_ROUND_ZERO(wc, X) \ +({ int __ret = 0; \ + if (_FP_FRAC_LOW_##wc(X) & 7) \ + __ret = EFLAG_INEXACT; \ + __ret; \ +}) + +#define _FP_ROUND_PINF(wc, X) \ +({ int __ret = EFLAG_INEXACT; \ + if (!X##_s && (_FP_FRAC_LOW_##wc(X) & 7)) \ + _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB); \ + else __ret = 0; \ + __ret; \ +}) + +#define _FP_ROUND_MINF(wc, X) \ +({ int __ret = EFLAG_INEXACT; \ + if (X##_s && (_FP_FRAC_LOW_##wc(X) & 7)) \ + _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB); \ + else __ret = 0; \ + __ret; \ +}) + +#define _FP_ROUND(wc, X) \ +({ int __ret = 0; \ + switch (FP_ROUNDMODE) \ + { \ + case FP_RND_NEAREST: \ + __ret |= _FP_ROUND_NEAREST(wc,X); \ + break; \ + case FP_RND_ZERO: \ + __ret |= _FP_ROUND_ZERO(wc,X); \ + break; \ + case FP_RND_PINF: \ + __ret |= _FP_ROUND_PINF(wc,X); \ + break; \ + case FP_RND_MINF: \ + __ret |= _FP_ROUND_MINF(wc,X); \ + break; \ + }; \ + __ret; \ +}) + +#define FP_CLS_NORMAL 0 +#define FP_CLS_ZERO 1 +#define FP_CLS_INF 2 +#define FP_CLS_NAN 3 + +#define _FP_CLS_COMBINE(x,y) (((x) << 2) | (y)) + +#include "op-1.h" +#include "op-2.h" +#include "op-4.h" +#include "op-common.h" + +/* Sigh. Silly things longlong.h needs. */ +#define UWtype _FP_W_TYPE +#define W_TYPE_SIZE _FP_W_TYPE_SIZE + +typedef int SItype __attribute__((mode(SI))); +typedef int DItype __attribute__((mode(DI))); +typedef unsigned int USItype __attribute__((mode(SI))); +typedef unsigned int UDItype __attribute__((mode(DI))); +#if _FP_W_TYPE_SIZE == 32 +typedef unsigned int UHWtype __attribute__((mode(HI))); +#elif _FP_W_TYPE_SIZE == 64 +typedef USItype UHWtype; +#endif + +#endif diff --git a/arch/powerpc/math-emu/stfd.c b/arch/powerpc/math-emu/stfd.c new file mode 100644 index 000000000000..3f8c2558a9e8 --- /dev/null +++ b/arch/powerpc/math-emu/stfd.c @@ -0,0 +1,20 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int +stfd(void *frS, void *ea) +{ +#if 0 +#ifdef DEBUG + printk("%s: S %p, ea %p: ", __FUNCTION__, frS, ea); + dump_double(frS); + printk("\n"); +#endif +#endif + + if (copy_to_user(ea, frS, sizeof(double))) + return -EFAULT; + + return 0; +} diff --git a/arch/powerpc/math-emu/stfiwx.c b/arch/powerpc/math-emu/stfiwx.c new file mode 100644 index 000000000000..95caaeec6a08 --- /dev/null +++ b/arch/powerpc/math-emu/stfiwx.c @@ -0,0 +1,16 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +int +stfiwx(u32 *frS, void *ea) +{ +#ifdef DEBUG + printk("%s: %p %p\n", __FUNCTION__, frS, ea); +#endif + + if (copy_to_user(ea, &frS[1], sizeof(frS[1]))) + return -EFAULT; + + return 0; +} diff --git a/arch/powerpc/math-emu/stfs.c b/arch/powerpc/math-emu/stfs.c new file mode 100644 index 000000000000..e87ca23c6dc3 --- /dev/null +++ b/arch/powerpc/math-emu/stfs.c @@ -0,0 +1,41 @@ +#include <linux/types.h> +#include <linux/errno.h> +#include <asm/uaccess.h> + +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +int +stfs(void *frS, void *ea) +{ + FP_DECL_D(A); + FP_DECL_S(R); + float f; + int err; + +#ifdef DEBUG + printk("%s: S %p, ea %p\n", __FUNCTION__, frS, ea); +#endif + + __FP_UNPACK_D(A, frS); + +#ifdef DEBUG + printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c); +#endif + + FP_CONV(S, D, 1, 2, R, A); + +#ifdef DEBUG + printk("R: %ld %lu %ld (%ld)\n", R_s, R_f, R_e, R_c); +#endif + + err = _FP_PACK_CANONICAL(S, 1, R); + if (!err || !__FPU_TRAP_P(err)) { + __FP_PACK_RAW_1(S, &f, R); + if (copy_to_user(ea, &f, sizeof(float))) + return -EFAULT; + } + + return err; +} diff --git a/arch/powerpc/math-emu/types.c b/arch/powerpc/math-emu/types.c new file mode 100644 index 000000000000..e1ed15d829db --- /dev/null +++ b/arch/powerpc/math-emu/types.c @@ -0,0 +1,51 @@ +#include "soft-fp.h" +#include "double.h" +#include "single.h" + +void +fp_unpack_d(long *_s, unsigned long *_f1, unsigned long *_f0, + long *_e, long *_c, void *val) +{ + FP_DECL_D(X); + + __FP_UNPACK_RAW_2(D, X, val); + + _FP_UNPACK_CANONICAL(D, 2, X); + + *_s = X_s; + *_f1 = X_f1; + *_f0 = X_f0; + *_e = X_e; + *_c = X_c; +} + +int +fp_pack_d(void *val, long X_s, unsigned long X_f1, + unsigned long X_f0, long X_e, long X_c) +{ + int exc; + + exc = _FP_PACK_CANONICAL(D, 2, X); + if (!exc || !__FPU_TRAP_P(exc)) + __FP_PACK_RAW_2(D, val, X); + return exc; +} + +int +fp_pack_ds(void *val, long X_s, unsigned long X_f1, + unsigned long X_f0, long X_e, long X_c) +{ + FP_DECL_S(__X); + int exc; + + FP_CONV(S, D, 1, 2, __X, X); + exc = _FP_PACK_CANONICAL(S, 1, __X); + if (!exc || !__FPU_TRAP_P(exc)) { + _FP_UNPACK_CANONICAL(S, 1, __X); + FP_CONV(D, S, 2, 1, X, __X); + exc |= _FP_PACK_CANONICAL(D, 2, X); + if (!exc || !__FPU_TRAP_P(exc)) + __FP_PACK_RAW_2(D, val, X); + } + return exc; +} diff --git a/arch/powerpc/math-emu/udivmodti4.c b/arch/powerpc/math-emu/udivmodti4.c new file mode 100644 index 000000000000..7e112dc1e2f2 --- /dev/null +++ b/arch/powerpc/math-emu/udivmodti4.c @@ -0,0 +1,191 @@ +/* This has so very few changes over libgcc2's __udivmoddi4 it isn't funny. */ + +#include "soft-fp.h" + +#undef count_leading_zeros +#define count_leading_zeros __FP_CLZ + +void +_fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2], + _FP_W_TYPE n1, _FP_W_TYPE n0, + _FP_W_TYPE d1, _FP_W_TYPE d0) +{ + _FP_W_TYPE q0, q1, r0, r1; + _FP_I_TYPE b, bm; + + if (d1 == 0) + { +#if !UDIV_NEEDS_NORMALIZATION + if (d0 > n1) + { + /* 0q = nn / 0D */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + udiv_qrnnd (q1, n1, 0, n1, d0); + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0. */ + } + + r0 = n0; + r1 = 0; + +#else /* UDIV_NEEDS_NORMALIZATION */ + + if (d0 > n1) + { + /* 0q = nn / 0D */ + + count_leading_zeros (bm, d0); + + if (bm != 0) + { + /* Normalize, i.e. make the most significant bit of the + denominator set. */ + + d0 = d0 << bm; + n1 = (n1 << bm) | (n0 >> (_FP_W_TYPE_SIZE - bm)); + n0 = n0 << bm; + } + + udiv_qrnnd (q0, n0, n1, n0, d0); + q1 = 0; + + /* Remainder in n0 >> bm. */ + } + else + { + /* qq = NN / 0d */ + + if (d0 == 0) + d0 = 1 / d0; /* Divide intentionally by zero. */ + + count_leading_zeros (bm, d0); + + if (bm == 0) + { + /* From (n1 >= d0) /\ (the most significant bit of d0 is set), + conclude (the most significant bit of n1 is set) /\ (the + leading quotient digit q1 = 1). + + This special case is necessary, not an optimization. + (Shifts counts of SI_TYPE_SIZE are undefined.) */ + + n1 -= d0; + q1 = 1; + } + else + { + _FP_W_TYPE n2; + + /* Normalize. */ + + b = _FP_W_TYPE_SIZE - bm; + + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q1, n1, n2, n1, d0); + } + + /* n1 != d0... */ + + udiv_qrnnd (q0, n0, n1, n0, d0); + + /* Remainder in n0 >> bm. */ + } + + r0 = n0 >> bm; + r1 = 0; +#endif /* UDIV_NEEDS_NORMALIZATION */ + } + else + { + if (d1 > n1) + { + /* 00 = nn / DD */ + + q0 = 0; + q1 = 0; + + /* Remainder in n1n0. */ + r0 = n0; + r1 = n1; + } + else + { + /* 0q = NN / dd */ + + count_leading_zeros (bm, d1); + if (bm == 0) + { + /* From (n1 >= d1) /\ (the most significant bit of d1 is set), + conclude (the most significant bit of n1 is set) /\ (the + quotient digit q0 = 0 or 1). + + This special case is necessary, not an optimization. */ + + /* The condition on the next line takes advantage of that + n1 >= d1 (true due to program flow). */ + if (n1 > d1 || n0 >= d0) + { + q0 = 1; + sub_ddmmss (n1, n0, n1, n0, d1, d0); + } + else + q0 = 0; + + q1 = 0; + + r0 = n0; + r1 = n1; + } + else + { + _FP_W_TYPE m1, m0, n2; + + /* Normalize. */ + + b = _FP_W_TYPE_SIZE - bm; + + d1 = (d1 << bm) | (d0 >> b); + d0 = d0 << bm; + n2 = n1 >> b; + n1 = (n1 << bm) | (n0 >> b); + n0 = n0 << bm; + + udiv_qrnnd (q0, n1, n2, n1, d1); + umul_ppmm (m1, m0, q0, d0); + + if (m1 > n1 || (m1 == n1 && m0 > n0)) + { + q0--; + sub_ddmmss (m1, m0, m1, m0, d1, d0); + } + + q1 = 0; + + /* Remainder in (n1n0 - m1m0) >> bm. */ + sub_ddmmss (n1, n0, n1, n0, m1, m0); + r0 = (n1 << b) | (n0 >> bm); + r1 = n1 >> bm; + } + } + } + + q[0] = q0; q[1] = q1; + r[0] = r0, r[1] = r1; +} diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index ec4adcb4bc28..5aea0909a5ec 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -267,25 +267,29 @@ good_area: #endif #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) pte_t *ptep; + pmd_t *pmdp; /* Since 4xx/Book-E supports per-page execute permission, * we lazily flush dcache to icache. */ ptep = NULL; - if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) { - struct page *page = pte_page(*ptep); - - if (! test_bit(PG_arch_1, &page->flags)) { - flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + if (get_pteptr(mm, address, &ptep, &pmdp)) { + spinlock_t *ptl = pte_lockptr(mm, pmdp); + spin_lock(ptl); + if (pte_present(*ptep)) { + struct page *page = pte_page(*ptep); + + if (!test_bit(PG_arch_1, &page->flags)) { + flush_dcache_icache_page(page); + set_bit(PG_arch_1, &page->flags); + } + pte_update(ptep, 0, _PAGE_HWEXEC); + _tlbie(address); + pte_unmap_unlock(ptep, ptl); + up_read(&mm->mmap_sem); + return 0; } - pte_update(ptep, 0, _PAGE_HWEXEC); - _tlbie(address); - pte_unmap(ptep); - up_read(&mm->mmap_sem); - return 0; + pte_unmap_unlock(ptep, ptl); } - if (ptep != NULL) - pte_unmap(ptep); #endif /* a write */ } else if (is_write) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 89b35c181314..c006d9039633 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -167,7 +167,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, * normal insert callback here. */ #ifdef CONFIG_PPC_ISERIES - if (_machine == PLATFORM_ISERIES_LPAR) + if (machine_is(iseries)) ret = iSeries_hpte_insert(hpteg, va, paddr, tmp_mode, @@ -176,7 +176,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, else #endif #ifdef CONFIG_PPC_PSERIES - if (_machine & PLATFORM_LPAR) + if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) ret = pSeries_lpar_hpte_insert(hpteg, va, paddr, tmp_mode, @@ -295,8 +295,7 @@ static void __init htab_init_page_sizes(void) * Not in the device-tree, let's fallback on known size * list for 16M capable GP & GR */ - if ((_machine != PLATFORM_ISERIES_LPAR) && - cpu_has_feature(CPU_FTR_16M_PAGE)) + if (cpu_has_feature(CPU_FTR_16M_PAGE) && !machine_is(iseries)) memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); found: diff --git a/arch/powerpc/mm/imalloc.c b/arch/powerpc/mm/imalloc.c index 8b0c132bc163..add8c1a9af68 100644 --- a/arch/powerpc/mm/imalloc.c +++ b/arch/powerpc/mm/imalloc.c @@ -13,12 +13,12 @@ #include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> -#include <asm/semaphore.h> +#include <linux/mutex.h> #include <asm/cacheflush.h> #include "mmu_decl.h" -static DECLARE_MUTEX(imlist_sem); +static DEFINE_MUTEX(imlist_mutex); struct vm_struct * imlist = NULL; static int get_free_im_addr(unsigned long size, unsigned long *im_addr) @@ -257,7 +257,7 @@ struct vm_struct * im_get_free_area(unsigned long size) struct vm_struct *area; unsigned long addr; - down(&imlist_sem); + mutex_lock(&imlist_mutex); if (get_free_im_addr(size, &addr)) { printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n", __FUNCTION__, size); @@ -272,7 +272,7 @@ struct vm_struct * im_get_free_area(unsigned long size) __FUNCTION__, addr, size); } next_im_done: - up(&imlist_sem); + mutex_unlock(&imlist_mutex); return area; } @@ -281,9 +281,9 @@ struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, { struct vm_struct *area; - down(&imlist_sem); + mutex_lock(&imlist_mutex); area = __im_get_area(v_addr, size, criteria); - up(&imlist_sem); + mutex_unlock(&imlist_mutex); return area; } @@ -297,17 +297,17 @@ void im_free(void * addr) printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr); return; } - down(&imlist_sem); + mutex_lock(&imlist_mutex); for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { if (tmp->addr == addr) { *p = tmp->next; unmap_vm_area(tmp); kfree(tmp); - up(&imlist_sem); + mutex_unlock(&imlist_mutex); return; } } - up(&imlist_sem); + mutex_unlock(&imlist_mutex); printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__, addr); } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index badac10d700c..741dd8802d49 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -195,7 +195,7 @@ void show_mem(void) printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { unsigned long flags; pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; i++) { @@ -342,7 +342,7 @@ void __init mem_init(void) #ifdef CONFIG_NEED_MULTIPLE_NODES for_each_online_node(nid) { if (NODE_DATA(nid)->node_spanned_pages != 0) { - printk("freeing bootmem node %x\n", nid); + printk("freeing bootmem node %d\n", nid); totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); } @@ -351,7 +351,7 @@ void __init mem_init(void) max_mapnr = max_pfn; totalram_pages += free_all_bootmem(); #endif - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { if (!pfn_valid(pgdat->node_start_pfn + i)) continue; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index e89b22aa539e..0a335f34974c 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -756,6 +756,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) struct device_node *memory = NULL; nodemask_t nodes; int default_nid = any_online_node(NODE_MASK_ALL); + int nid; if (!numa_enabled || (min_common_depth < 0)) return default_nid; @@ -790,6 +791,7 @@ ha_new_range: goto ha_new_range; } BUG(); /* section address should be found above */ + return 0; /* Temporary code to ensure that returned node is not empty */ got_nid: diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index d296eb6b4545..90628601fac7 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -372,7 +372,7 @@ void __init io_block_mapping(unsigned long virt, phys_addr_t phys, * the PTE pointer is unmodified if PTE is not found. */ int -get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) +get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) { pgd_t *pgd; pmd_t *pmd; @@ -387,6 +387,8 @@ get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) if (pte) { retval = 1; *ptep = pte; + if (pmdp) + *pmdp = pmd; /* XXX caller needs to do pte_unmap, yuck */ } } @@ -424,7 +426,7 @@ unsigned long iopa(unsigned long addr) mm = &init_mm; pa = 0; - if (get_pteptr(mm, addr, &pte)) { + if (get_pteptr(mm, addr, &pte, NULL)) { pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); pte_unmap(pte); } diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 91d25fb27f89..4a9291d9fef8 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -239,7 +239,7 @@ void stabs_alloc(void) if (cpu_has_feature(CPU_FTR_SLB)) return; - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { unsigned long newstab; if (cpu == 0) diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index 554cd7c75321..f5f9859a8338 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile @@ -6,7 +6,7 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) common.o +oprofile-y := $(DRIVER_OBJS) common.o backtrace.o oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o oprofile-$(CONFIG_PPC32) += op_model_7450.o diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c new file mode 100644 index 000000000000..75f57bc96b40 --- /dev/null +++ b/arch/powerpc/oprofile/backtrace.c @@ -0,0 +1,126 @@ +/** + * Copyright (C) 2005 Brian Rogan <bcr6@cornell.edu>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. +**/ + +#include <linux/oprofile.h> +#include <linux/sched.h> +#include <asm/processor.h> +#include <asm/uaccess.h> + +#define STACK_SP(STACK) *(STACK) + +#define STACK_LR64(STACK) *((unsigned long *)(STACK) + 2) +#define STACK_LR32(STACK) *((unsigned int *)(STACK) + 1) + +#ifdef CONFIG_PPC64 +#define STACK_LR(STACK) STACK_LR64(STACK) +#else +#define STACK_LR(STACK) STACK_LR32(STACK) +#endif + +static unsigned int user_getsp32(unsigned int sp, int is_first) +{ + unsigned int stack_frame[2]; + + if (!access_ok(VERIFY_READ, sp, sizeof(stack_frame))) + return 0; + + /* + * The most likely reason for this is that we returned -EFAULT, + * which means that we've done all that we can do from + * interrupt context. + */ + if (__copy_from_user_inatomic(stack_frame, (void *)(long)sp, + sizeof(stack_frame))) + return 0; + + if (!is_first) + oprofile_add_trace(STACK_LR32(stack_frame)); + + /* + * We do not enforce increasing stack addresses here because + * we may transition to a different stack, eg a signal handler. + */ + return STACK_SP(stack_frame); +} + +#ifdef CONFIG_PPC64 +static unsigned long user_getsp64(unsigned long sp, int is_first) +{ + unsigned long stack_frame[3]; + + if (!access_ok(VERIFY_READ, sp, sizeof(stack_frame))) + return 0; + + if (__copy_from_user_inatomic(stack_frame, (void *)sp, + sizeof(stack_frame))) + return 0; + + if (!is_first) + oprofile_add_trace(STACK_LR64(stack_frame)); + + return STACK_SP(stack_frame); +} +#endif + +static unsigned long kernel_getsp(unsigned long sp, int is_first) +{ + unsigned long *stack_frame = (unsigned long *)sp; + + if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + return 0; + + if (!is_first) + oprofile_add_trace(STACK_LR(stack_frame)); + + /* + * We do not enforce increasing stack addresses here because + * we might be transitioning from an interrupt stack to a kernel + * stack. validate_sp() is designed to understand this, so just + * use it. + */ + return STACK_SP(stack_frame); +} + +void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) +{ + unsigned long sp = regs->gpr[1]; + int first_frame = 1; + + /* We ditch the top stackframe so need to loop through an extra time */ + depth += 1; + + if (!user_mode(regs)) { + while (depth--) { + sp = kernel_getsp(sp, first_frame); + if (!sp) + break; + first_frame = 0; + } + } else { +#ifdef CONFIG_PPC64 + if (!test_thread_flag(TIF_32BIT)) { + while (depth--) { + sp = user_getsp64(sp, first_frame); + if (!sp) + break; + first_frame = 0; + } + + return; + } +#endif + + while (depth--) { + sp = user_getsp32(sp, first_frame); + if (!sp) + break; + first_frame = 0; + } + } +} diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index cc2535be3a73..5b1de7e8041e 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c @@ -117,18 +117,10 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel); oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user); -#ifdef CONFIG_PPC64 - oprofilefs_create_ulong(sb, root, "backtrace_spinlocks", - &sys.backtrace_spinlocks); -#endif /* Default to tracing both kernel and user */ sys.enable_kernel = 1; sys.enable_user = 1; -#ifdef CONFIG_PPC64 - /* Turn on backtracing through spinlocks by default */ - sys.backtrace_spinlocks = 1; -#endif return 0; } @@ -168,6 +160,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) ops->shutdown = op_powerpc_shutdown; ops->start = op_powerpc_start; ops->stop = op_powerpc_stop; + ops->backtrace = op_powerpc_backtrace; printk(KERN_INFO "oprofile: using %s performance monitoring.\n", ops->cpu_type); diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c index 32abfdbb0eb1..e0491c3c71f1 100644 --- a/arch/powerpc/oprofile/op_model_7450.c +++ b/arch/powerpc/oprofile/op_model_7450.c @@ -176,13 +176,13 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs, mtmsr(mfmsr() | MSR_PMM); pc = mfspr(SPRN_SIAR); - is_kernel = (pc >= KERNELBASE); + is_kernel = is_kernel_addr(pc); for (i = 0; i < NUM_CTRS; ++i) { val = ctr_read(i); if (val < 0) { if (oprofile_running && ctr[i].enabled) { - oprofile_add_pc(pc, is_kernel, i); + oprofile_add_ext_sample(pc, regs, i, is_kernel); ctr_write(i, reset_value[i]); } else { ctr_write(i, 0); diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c index 26539cda6023..93d63e62662f 100644 --- a/arch/powerpc/oprofile/op_model_fsl_booke.c +++ b/arch/powerpc/oprofile/op_model_fsl_booke.c @@ -154,13 +154,13 @@ static void fsl_booke_handle_interrupt(struct pt_regs *regs, mtmsr(mfmsr() | MSR_PMM); pc = regs->nip; - is_kernel = (pc >= KERNELBASE); + is_kernel = is_kernel_addr(pc); for (i = 0; i < num_counters; ++i) { val = ctr_read(i); if (val < 0) { if (oprofile_running && ctr[i].enabled) { - oprofile_add_pc(pc, is_kernel, i); + oprofile_add_ext_sample(pc, regs, i, is_kernel); ctr_write(i, reset_value[i]); } else { ctr_write(i, 0); diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index 4b06e53eb9b4..4c2beab1fdc1 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -25,18 +25,14 @@ static unsigned long reset_value[OP_MAX_COUNTER]; static int oprofile_running; static int mmcra_has_sihv; +/* Unfortunately these bits vary between CPUs */ +static unsigned long mmcra_sihv = MMCRA_SIHV; +static unsigned long mmcra_sipr = MMCRA_SIPR; /* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */ static u32 mmcr0_val; static u64 mmcr1_val; -static u32 mmcra_val; - -/* - * Since we do not have an NMI, backtracing through spinlocks is - * only a best guess. In light of this, allow it to be disabled at - * runtime. - */ -static int backtrace_spinlocks; +static u64 mmcra_val; static void power4_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, @@ -63,8 +59,6 @@ static void power4_reg_setup(struct op_counter_config *ctr, mmcr1_val = sys->mmcr1; mmcra_val = sys->mmcra; - backtrace_spinlocks = sys->backtrace_spinlocks; - for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) reset_value[i] = 0x80000000UL - ctr[i].count; @@ -197,25 +191,6 @@ static void __attribute_used__ kernel_unknown_bucket(void) { } -static unsigned long check_spinlock_pc(struct pt_regs *regs, - unsigned long profile_pc) -{ - unsigned long pc = instruction_pointer(regs); - - /* - * If both the SIAR (sampled instruction) and the perfmon exception - * occurred in a spinlock region then we account the sample to the - * calling function. This isnt 100% correct, we really need soft - * IRQ disable so we always get the perfmon exception at the - * point at which the SIAR is set. - */ - if (backtrace_spinlocks && in_lock_functions(pc) && - in_lock_functions(profile_pc)) - return regs->link; - else - return profile_pc; -} - /* * On GQ and newer the MMCRA stores the HV and PR bits at the time * the SIAR was sampled. We use that to work out if the SIAR was sampled in @@ -228,17 +203,17 @@ static unsigned long get_pc(struct pt_regs *regs) /* Cant do much about it */ if (!mmcra_has_sihv) - return check_spinlock_pc(regs, pc); + return pc; mmcra = mfspr(SPRN_MMCRA); /* Were we in the hypervisor? */ - if (firmware_has_feature(FW_FEATURE_LPAR) && (mmcra & MMCRA_SIHV)) + if (firmware_has_feature(FW_FEATURE_LPAR) && (mmcra & mmcra_sihv)) /* function descriptor madness */ return *((unsigned long *)hypervisor_bucket); /* We were in userspace, nothing to do */ - if (mmcra & MMCRA_SIPR) + if (mmcra & mmcra_sipr) return pc; #ifdef CONFIG_PPC_RTAS @@ -257,7 +232,7 @@ static unsigned long get_pc(struct pt_regs *regs) /* function descriptor madness */ return *((unsigned long *)kernel_unknown_bucket); - return check_spinlock_pc(regs, pc); + return pc; } static int get_kernel(unsigned long pc) @@ -268,7 +243,7 @@ static int get_kernel(unsigned long pc) is_kernel = is_kernel_addr(pc); } else { unsigned long mmcra = mfspr(SPRN_MMCRA); - is_kernel = ((mmcra & MMCRA_SIPR) == 0); + is_kernel = ((mmcra & mmcra_sipr) == 0); } return is_kernel; @@ -293,7 +268,7 @@ static void power4_handle_interrupt(struct pt_regs *regs, val = ctr_read(i); if (val < 0) { if (oprofile_running && ctr[i].enabled) { - oprofile_add_pc(pc, is_kernel, i); + oprofile_add_ext_sample(pc, regs, i, is_kernel); ctr_write(i, reset_value[i]); } else { ctr_write(i, 0); diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c index 5c909ee609fe..042f8f4867ad 100644 --- a/arch/powerpc/oprofile/op_model_rs64.c +++ b/arch/powerpc/oprofile/op_model_rs64.c @@ -175,10 +175,13 @@ static void rs64_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) { unsigned int mmcr0; + int is_kernel; int val; int i; unsigned long pc = mfspr(SPRN_SIAR); + is_kernel = is_kernel_addr(pc); + /* set the PMM bit (see comment below) */ mtmsrd(mfmsr() | MSR_PMM); @@ -186,7 +189,7 @@ static void rs64_handle_interrupt(struct pt_regs *regs, val = ctr_read(i); if (val < 0) { if (ctr[i].enabled) { - oprofile_add_pc(pc, is_kernel_addr(pc), i); + oprofile_add_ext_sample(pc, regs, i, is_kernel); ctr_write(i, reset_value[i]); } else { ctr_write(i, 0); diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index d3d0ff745e84..06e371282f57 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -7,6 +7,7 @@ choice config MPC8540_ADS bool "Freescale MPC8540 ADS" + select DEFAULT_UIMAGE help This option enables support for the MPC 8540 ADS board diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 3157071e241c..c2a3db8edb0c 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -10,4 +10,9 @@ config SPU_FS Units on machines implementing the Broadband Processor Architecture. +config SPUFS_MMAP + bool + depends on SPU_FS && SPARSEMEM && !PPC_64K_PAGES + default y + endmenu diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index 3b998a393e3f..e570bad06394 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -6,5 +6,11 @@ obj-$(CONFIG_SPU_FS) += spu-base.o spufs/ spu-base-y += spu_base.o spu_priv1.o -builtin-spufs-$(CONFIG_SPU_FS) += spu_syscalls.o -obj-y += $(builtin-spufs-m) +# needed only when building loadable spufs.ko +spufs-modular-$(CONFIG_SPU_FS) += spu_syscalls.o +obj-y += $(spufs-modular-m) + +# always needed in kernel +spufs-builtin-$(CONFIG_SPU_FS) += spu_callbacks.o +obj-y += $(spufs-builtin-y) $(spufs-builtin-m) + diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 63aa52acf441..978be1c30c1b 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -63,7 +63,24 @@ static DEFINE_PER_CPU(struct iic, iic); void iic_local_enable(void) { - out_be64(&__get_cpu_var(iic).regs->prio, 0xff); + struct iic *iic = &__get_cpu_var(iic); + u64 tmp; + + /* + * There seems to be a bug that is present in DD2.x CPUs + * and still only partially fixed in DD3.1. + * This bug causes a value written to the priority register + * not to make it there, resulting in a system hang unless we + * write it again. + * Masking with 0xf0 is done because the Cell BE does not + * implement the lower four bits of the interrupt priority, + * they always read back as zeroes, although future CPUs + * might implement different bits. + */ + do { + out_be64(&iic->regs->prio, 0xff); + tmp = in_be64(&iic->regs->prio); + } while ((tmp & 0xf0) != 0xf0); } void iic_local_disable(void) @@ -123,7 +140,7 @@ static int iic_external_get_irq(struct iic_pending_bits pending) pending.class != 2) break; irq = IIC_EXT_OFFSET - + spider_get_irq(pending.prio + node * IIC_NODE_STRIDE) + + spider_get_irq(node) + node * IIC_NODE_STRIDE; break; case 0x01 ... 0x04: @@ -174,38 +191,98 @@ int iic_get_irq(struct pt_regs *regs) return irq; } -static int setup_iic(int cpu, struct iic *iic) +/* hardcoded part to be compatible with older firmware */ + +static int setup_iic_hardcoded(void) { struct device_node *np; - int nodeid = cpu / 2; + int nodeid, cpu; unsigned long regs; + struct iic *iic; - for (np = of_find_node_by_type(NULL, "cpu"); - np; - np = of_find_node_by_type(np, "cpu")) { - if (nodeid == *(int *)get_property(np, "node-id", NULL)) - break; + for_each_cpu(cpu) { + iic = &per_cpu(iic, cpu); + nodeid = cpu/2; + + for (np = of_find_node_by_type(NULL, "cpu"); + np; + np = of_find_node_by_type(np, "cpu")) { + if (nodeid == *(int *)get_property(np, "node-id", NULL)) + break; + } + + if (!np) { + printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); + iic->regs = NULL; + iic->target_id = 0xff; + return -ENODEV; + } + + regs = *(long *)get_property(np, "iic", NULL); + + /* hack until we have decided on the devtree info */ + regs += 0x400; + if (cpu & 1) + regs += 0x20; + + printk(KERN_INFO "IIC for CPU %d at %lx\n", cpu, regs); + iic->regs = ioremap(regs, sizeof(struct iic_regs)); + iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe); } - if (!np) { - printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); - iic->regs = NULL; - iic->target_id = 0xff; - return -ENODEV; - } + return 0; +} - regs = *(long *)get_property(np, "iic", NULL); +static int setup_iic(void) +{ + struct device_node *dn; + unsigned long *regs; + char *compatible; + unsigned *np, found = 0; + struct iic *iic = NULL; + + for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + compatible = (char *)get_property(dn, "compatible", NULL); + + if (!compatible) { + printk(KERN_WARNING "no compatible property found !\n"); + continue; + } - /* hack until we have decided on the devtree info */ - regs += 0x400; - if (cpu & 1) - regs += 0x20; + if (strstr(compatible, "IBM,CBEA-Internal-Interrupt-Controller")) + regs = (unsigned long *)get_property(dn,"reg", NULL); + else + continue; - printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs); - iic->regs = __ioremap(regs, sizeof(struct iic_regs), - _PAGE_NO_CACHE); - iic->target_id = (nodeid << 4) + ((cpu & 1) ? 0xf : 0xe); - return 0; + if (!regs) + printk(KERN_WARNING "IIC: no reg property\n"); + + np = (unsigned int *)get_property(dn, "ibm,interrupt-server-ranges", NULL); + + if (!np) { + printk(KERN_WARNING "IIC: CPU association not found\n"); + iic->regs = NULL; + iic->target_id = 0xff; + return -ENODEV; + } + + iic = &per_cpu(iic, np[0]); + iic->regs = ioremap(regs[0], sizeof(struct iic_regs)); + iic->target_id = ((np[0] & 2) << 3) + ((np[0] & 1) ? 0xf : 0xe); + printk("IIC for CPU %d at %lx mapped to %p\n", np[0], regs[0], iic->regs); + + iic = &per_cpu(iic, np[1]); + iic->regs = ioremap(regs[2], sizeof(struct iic_regs)); + iic->target_id = ((np[1] & 2) << 3) + ((np[1] & 1) ? 0xf : 0xe); + printk("IIC for CPU %d at %lx mapped to %p\n", np[1], regs[2], iic->regs); + + found++; + } + + if (found) + return 0; + else + return -ENODEV; } #ifdef CONFIG_SMP @@ -283,10 +360,12 @@ void iic_init_IRQ(void) int cpu, irq_offset; struct iic *iic; + if (setup_iic() < 0) + setup_iic_hardcoded(); + irq_offset = 0; - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { iic = &per_cpu(iic, cpu); - setup_iic(cpu, iic); if (iic->regs) out_be64(&iic->regs->prio, 0xff); } diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h index a14bd38791c0..799f77d98f96 100644 --- a/arch/powerpc/platforms/cell/interrupt.h +++ b/arch/powerpc/platforms/cell/interrupt.h @@ -57,7 +57,7 @@ extern void iic_local_disable(void); extern u8 iic_get_target_id(int cpu); extern void spider_init_IRQ(void); -extern int spider_get_irq(unsigned long int_pending); +extern int spider_get_irq(int node); #endif #endif /* ASM_CELL_PIC_H */ diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 46e7cb9c3e64..a49ceb799a8e 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -289,7 +289,7 @@ static void cell_do_map_iommu(struct cell_iommu *iommu, ioc_base = iommu->mapped_base; ioc_mmio_base = iommu->mapped_mmio_base; - for (real_address = 0, io_address = 0; + for (real_address = 0, io_address = map_start; io_address <= map_start + map_size; real_address += io_page_size, io_address += io_page_size) { ioste = get_iost_entry(fake_iopt, io_address, io_page_size); @@ -302,7 +302,7 @@ static void cell_do_map_iommu(struct cell_iommu *iommu, set_iopt_cache(ioc_mmio_base, get_ioc_hash_1way(ioste, io_address), get_ioc_tag(ioste, io_address), - get_iopt_entry(real_address-map_start, ioid, IOPT_PROT_RW)); + get_iopt_entry(real_address, ioid, IOPT_PROT_RW)); } } @@ -344,8 +344,8 @@ static int cell_map_iommu_hardcoded(int num_nodes) /* node 0 */ iommu = &cell_iommus[0]; - iommu->mapped_base = __ioremap(0x20000511000, 0x1000, _PAGE_NO_CACHE); - iommu->mapped_mmio_base = __ioremap(0x20000510000, 0x1000, _PAGE_NO_CACHE); + iommu->mapped_base = ioremap(0x20000511000, 0x1000); + iommu->mapped_mmio_base = ioremap(0x20000510000, 0x1000); enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); @@ -357,8 +357,8 @@ static int cell_map_iommu_hardcoded(int num_nodes) /* node 1 */ iommu = &cell_iommus[1]; - iommu->mapped_base = __ioremap(0x30000511000, 0x1000, _PAGE_NO_CACHE); - iommu->mapped_mmio_base = __ioremap(0x30000510000, 0x1000, _PAGE_NO_CACHE); + iommu->mapped_base = ioremap(0x30000511000, 0x1000); + iommu->mapped_mmio_base = ioremap(0x30000510000, 0x1000); enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); @@ -407,8 +407,8 @@ static int cell_map_iommu(void) iommu->base = *base; iommu->mmio_base = *mmio_base; - iommu->mapped_base = __ioremap(*base, 0x1000, _PAGE_NO_CACHE); - iommu->mapped_mmio_base = __ioremap(*mmio_base, 0x1000, _PAGE_NO_CACHE); + iommu->mapped_base = ioremap(*base, 0x1000); + iommu->mapped_mmio_base = ioremap(*mmio_base, 0x1000); enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index e0e051c675dd..7eed8c624517 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -203,7 +203,7 @@ found: pr_debug("pervasive area for CPU %d at %lx, size %x\n", cpu, real_address, size); - p->regs = __ioremap(real_address, size, _PAGE_NO_CACHE); + p->regs = ioremap(real_address, size); p->thread = thread; return 0; } @@ -217,7 +217,7 @@ void __init cell_pervasive_init(void) if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO)) return; - for_each_cpu(cpu) { + for_each_possible_cpu(cpu) { p = &cbe_pervasive[cpu]; ret = cbe_find_pmd_mmio(cpu, p); if (ret) diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index fec8e65b36ea..dac5d0365fde 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -195,9 +195,13 @@ static void __init cell_init_early(void) } -static int __init cell_probe(int platform) +static int __init cell_probe(void) { - if (platform != PLATFORM_CELL) + /* XXX This is temporary, the Cell maintainer will come up with + * more appropriate detection logic + */ + unsigned long root = of_get_flat_dt_root(); + if (!of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) return 0; return 1; @@ -212,7 +216,8 @@ static int cell_check_legacy_ioport(unsigned int baseport) return -ENODEV; } -struct machdep_calls __initdata cell_md = { +define_machine(cell) { + .name = "Cell", .probe = cell_probe, .setup_arch = cell_setup_arch, .init_early = cell_init_early, diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index e74132188bdf..55cbdd77a62d 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -84,10 +84,11 @@ static void __iomem *spider_get_irq_config(int irq) static void spider_enable_irq(unsigned int irq) { + int nodeid = (irq / IIC_NODE_STRIDE) * 0x10; void __iomem *cfg = spider_get_irq_config(irq); irq = spider_get_nr(irq); - out_be32(cfg, in_be32(cfg) | 0x3107000eu); + out_be32(cfg, (in_be32(cfg) & ~0xf0)| 0x3107000eu | nodeid); out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq); } @@ -131,61 +132,108 @@ static struct hw_interrupt_type spider_pic = { .end = spider_end_irq, }; - -int spider_get_irq(unsigned long int_pending) +int spider_get_irq(int node) { - void __iomem *regs = spider_get_pic(int_pending); unsigned long cs; - int irq; - - cs = in_be32(regs + TIR_CS); + void __iomem *regs = spider_pics[node]; - irq = cs >> 24; - if (irq != 63) - return irq; + cs = in_be32(regs + TIR_CS) >> 24; - return -1; + if (cs == 63) + return -1; + else + return cs; } - -void spider_init_IRQ(void) + +/* hardcoded part to be compatible with older firmware */ + +void spider_init_IRQ_hardcoded(void) { int node; - struct device_node *dn; - unsigned int *property; long spiderpic; + long pics[] = { 0x24000008000, 0x34000008000 }; int n; -/* FIXME: detect multiple PICs as soon as the device tree has them */ - for (node = 0; node < 1; node++) { - dn = of_find_node_by_path("/"); - n = prom_n_addr_cells(dn); - property = (unsigned int *) get_property(dn, - "platform-spider-pic", NULL); + pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__); - if (!property) - continue; - for (spiderpic = 0; n > 0; --n) - spiderpic = (spiderpic << 32) + *property++; + for (node = 0; node < num_present_cpus()/2; node++) { + spiderpic = pics[node]; printk(KERN_DEBUG "SPIDER addr: %lx\n", spiderpic); - spider_pics[node] = __ioremap(spiderpic, 0x800, _PAGE_NO_CACHE); + spider_pics[node] = ioremap(spiderpic, 0x800); for (n = 0; n < IIC_NUM_EXT; n++) { int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE; get_irq_desc(irq)->handler = &spider_pic; + } /* do not mask any interrupts because of level */ out_be32(spider_pics[node] + TIR_MSK, 0x0); - + /* disable edge detection clear */ /* out_be32(spider_pics[node] + TIR_EDC, 0x0); */ - + /* enable interrupt packets to be output */ out_be32(spider_pics[node] + TIR_PIEN, in_be32(spider_pics[node] + TIR_PIEN) | 0x1); - + /* Enable the interrupt detection enable bit. Do this last! */ out_be32(spider_pics[node] + TIR_DEN, - in_be32(spider_pics[node] +TIR_DEN) | 0x1); + in_be32(spider_pics[node] + TIR_DEN) | 0x1); + } +} + +void spider_init_IRQ(void) +{ + long spider_reg; + struct device_node *dn; + char *compatible; + int n, node = 0; + + for (dn = NULL; (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + compatible = (char *)get_property(dn, "compatible", NULL); + if (!compatible) + continue; + + if (strstr(compatible, "CBEA,platform-spider-pic")) + spider_reg = *(long *)get_property(dn,"reg", NULL); + else if (strstr(compatible, "sti,platform-spider-pic")) { + spider_init_IRQ_hardcoded(); + return; + } else + continue; + + if (!spider_reg) + printk("interrupt controller does not have reg property !\n"); + + n = prom_n_addr_cells(dn); + + if ( n != 2) + printk("reg property with invalid number of elements \n"); + + spider_pics[node] = ioremap(spider_reg, 0x800); + + printk("SPIDER addr: %lx with %i addr_cells mapped to %p\n", + spider_reg, n, spider_pics[node]); + + for (n = 0; n < IIC_NUM_EXT; n++) { + int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE; + get_irq_desc(irq)->handler = &spider_pic; } + + /* do not mask any interrupts because of level */ + out_be32(spider_pics[node] + TIR_MSK, 0x0); + + /* disable edge detection clear */ + /* out_be32(spider_pics[node] + TIR_EDC, 0x0); */ + + /* enable interrupt packets to be output */ + out_be32(spider_pics[node] + TIR_PIEN, + in_be32(spider_pics[node] + TIR_PIEN) | 0x1); + + /* Enable the interrupt detection enable bit. Do this last! */ + out_be32(spider_pics[node] + TIR_DEN, + in_be32(spider_pics[node] + TIR_DEN) | 0x1); + + node++; } } diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index d75ae03df686..269dda4fd0b4 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -32,7 +32,7 @@ #include <asm/io.h> #include <asm/prom.h> -#include <asm/semaphore.h> +#include <linux/mutex.h> #include <asm/spu.h> #include <asm/mmu_context.h> @@ -111,7 +111,7 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) { - pr_debug("%s\n", __FUNCTION__); + pr_debug("%s, %lx, %lx\n", __FUNCTION__, dsisr, ea); /* Handle kernel space hash faults immediately. User hash faults need to be deferred to process context. */ @@ -168,7 +168,7 @@ static int __spu_trap_halt(struct spu *spu) static int __spu_trap_tag_group(struct spu *spu) { pr_debug("%s\n", __FUNCTION__); - /* wake_up(&spu->dma_wq); */ + spu->mfc_callback(spu); return 0; } @@ -242,6 +242,8 @@ spu_irq_class_1(int irq, void *data, struct pt_regs *regs) spu_mfc_dsisr_set(spu, 0ul); spu_int_stat_clear(spu, 1, stat); spin_unlock(&spu->register_lock); + pr_debug("%s: %lx %lx %lx %lx\n", __FUNCTION__, mask, stat, + dar, dsisr); if (stat & 1) /* segment fault */ __spu_trap_data_seg(spu, dar); @@ -342,7 +344,7 @@ spu_free_irqs(struct spu *spu) } static LIST_HEAD(spu_list); -static DECLARE_MUTEX(spu_mutex); +static DEFINE_MUTEX(spu_mutex); static void spu_init_channels(struct spu *spu) { @@ -382,7 +384,7 @@ struct spu *spu_alloc(void) { struct spu *spu; - down(&spu_mutex); + mutex_lock(&spu_mutex); if (!list_empty(&spu_list)) { spu = list_entry(spu_list.next, struct spu, list); list_del_init(&spu->list); @@ -391,7 +393,7 @@ struct spu *spu_alloc(void) pr_debug("No SPU left\n"); spu = NULL; } - up(&spu_mutex); + mutex_unlock(&spu_mutex); if (spu) spu_init_channels(spu); @@ -402,9 +404,9 @@ EXPORT_SYMBOL_GPL(spu_alloc); void spu_free(struct spu *spu) { - down(&spu_mutex); + mutex_lock(&spu_mutex); list_add_tail(&spu->list, &spu_list); - up(&spu_mutex); + mutex_unlock(&spu_mutex); } EXPORT_SYMBOL_GPL(spu_free); @@ -484,14 +486,13 @@ int spu_irq_class_1_bottom(struct spu *spu) ea = spu->dar; dsisr = spu->dsisr; - if (dsisr & MFC_DSISR_PTE_NOT_FOUND) { + if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)) { access = (_PAGE_PRESENT | _PAGE_USER); access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; if (hash_page(ea, access, 0x300) != 0) error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; } - if ((error & CLASS1_ENABLE_STORAGE_FAULT_INTR) || - (dsisr & MFC_DSISR_ACCESS_DENIED)) { + if (error & CLASS1_ENABLE_STORAGE_FAULT_INTR) { if ((ret = spu_handle_mm_fault(spu)) != 0) error |= CLASS1_ENABLE_STORAGE_FAULT_INTR; else @@ -568,6 +569,11 @@ static int __init spu_map_device(struct spu *spu, struct device_node *spe) if (!spu->local_store) goto out; + prop = get_property(spe, "problem", NULL); + if (!prop) + goto out_unmap; + spu->problem_phys = *(unsigned long *)prop; + spu->problem= map_spe_prop(spe, "problem"); if (!spu->problem) goto out_unmap; @@ -632,15 +638,16 @@ static int __init create_spu(struct device_node *spe) spu->ibox_callback = NULL; spu->wbox_callback = NULL; spu->stop_callback = NULL; + spu->mfc_callback = NULL; - down(&spu_mutex); + mutex_lock(&spu_mutex); spu->number = number++; ret = spu_request_irqs(spu); if (ret) goto out_unmap; list_add(&spu->list, &spu_list); - up(&spu_mutex); + mutex_unlock(&spu_mutex); pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", spu->name, spu->isrc, spu->local_store, @@ -648,7 +655,7 @@ static int __init create_spu(struct device_node *spe) goto out; out_unmap: - up(&spu_mutex); + mutex_unlock(&spu_mutex); spu_unmap(spu); out_free: kfree(spu); @@ -668,10 +675,10 @@ static void destroy_spu(struct spu *spu) static void cleanup_spu_base(void) { struct spu *spu, *tmp; - down(&spu_mutex); + mutex_lock(&spu_mutex); list_for_each_entry_safe(spu, tmp, &spu_list, list) destroy_spu(spu); - up(&spu_mutex); + mutex_unlock(&spu_mutex); } module_exit(cleanup_spu_base); diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c new file mode 100644 index 000000000000..3a4245c926ad --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -0,0 +1,345 @@ +/* + * System call callback functions for SPUs + */ + +#define DEBUG + +#include <linux/kallsyms.h> +#include <linux/module.h> +#include <linux/syscalls.h> + +#include <asm/spu.h> +#include <asm/syscalls.h> +#include <asm/unistd.h> + +/* + * This table defines the system calls that an SPU can call. + * It is currently a subset of the 64 bit powerpc system calls, + * with the exact semantics. + * + * The reasons for disabling some of the system calls are: + * 1. They interact with the way SPU syscalls are handled + * and we can't let them execute ever: + * restart_syscall, exit, for, execve, ptrace, ... + * 2. They are deprecated and replaced by other means: + * uselib, pciconfig_*, sysfs, ... + * 3. They are somewhat interacting with the system in a way + * we don't want an SPU to: + * reboot, init_module, mount, kexec_load + * 4. They are optional and we can't rely on them being + * linked into the kernel. Unfortunately, the cond_syscall + * helper does not work here as it does not add the necessary + * opd symbols: + * mbind, mq_open, ipc, ... + */ + +void *spu_syscall_table[] = { + [__NR_restart_syscall] sys_ni_syscall, /* sys_restart_syscall */ + [__NR_exit] sys_ni_syscall, /* sys_exit */ + [__NR_fork] sys_ni_syscall, /* ppc_fork */ + [__NR_read] sys_read, + [__NR_write] sys_write, + [__NR_open] sys_open, + [__NR_close] sys_close, + [__NR_waitpid] sys_waitpid, + [__NR_creat] sys_creat, + [__NR_link] sys_link, + [__NR_unlink] sys_unlink, + [__NR_execve] sys_ni_syscall, /* sys_execve */ + [__NR_chdir] sys_chdir, + [__NR_time] sys_time, + [__NR_mknod] sys_mknod, + [__NR_chmod] sys_chmod, + [__NR_lchown] sys_lchown, + [__NR_break] sys_ni_syscall, + [__NR_oldstat] sys_ni_syscall, + [__NR_lseek] sys_lseek, + [__NR_getpid] sys_getpid, + [__NR_mount] sys_ni_syscall, /* sys_mount */ + [__NR_umount] sys_ni_syscall, + [__NR_setuid] sys_setuid, + [__NR_getuid] sys_getuid, + [__NR_stime] sys_stime, + [__NR_ptrace] sys_ni_syscall, /* sys_ptrace */ + [__NR_alarm] sys_alarm, + [__NR_oldfstat] sys_ni_syscall, + [__NR_pause] sys_ni_syscall, /* sys_pause */ + [__NR_utime] sys_ni_syscall, /* sys_utime */ + [__NR_stty] sys_ni_syscall, + [__NR_gtty] sys_ni_syscall, + [__NR_access] sys_access, + [__NR_nice] sys_nice, + [__NR_ftime] sys_ni_syscall, + [__NR_sync] sys_sync, + [__NR_kill] sys_kill, + [__NR_rename] sys_rename, + [__NR_mkdir] sys_mkdir, + [__NR_rmdir] sys_rmdir, + [__NR_dup] sys_dup, + [__NR_pipe] sys_pipe, + [__NR_times] sys_times, + [__NR_prof] sys_ni_syscall, + [__NR_brk] sys_brk, + [__NR_setgid] sys_setgid, + [__NR_getgid] sys_getgid, + [__NR_signal] sys_ni_syscall, /* sys_signal */ + [__NR_geteuid] sys_geteuid, + [__NR_getegid] sys_getegid, + [__NR_acct] sys_ni_syscall, /* sys_acct */ + [__NR_umount2] sys_ni_syscall, /* sys_umount */ + [__NR_lock] sys_ni_syscall, + [__NR_ioctl] sys_ioctl, + [__NR_fcntl] sys_fcntl, + [__NR_mpx] sys_ni_syscall, + [__NR_setpgid] sys_setpgid, + [__NR_ulimit] sys_ni_syscall, + [__NR_oldolduname] sys_ni_syscall, + [__NR_umask] sys_umask, + [__NR_chroot] sys_chroot, + [__NR_ustat] sys_ni_syscall, /* sys_ustat */ + [__NR_dup2] sys_dup2, + [__NR_getppid] sys_getppid, + [__NR_getpgrp] sys_getpgrp, + [__NR_setsid] sys_setsid, + [__NR_sigaction] sys_ni_syscall, + [__NR_sgetmask] sys_sgetmask, + [__NR_ssetmask] sys_ssetmask, + [__NR_setreuid] sys_setreuid, + [__NR_setregid] sys_setregid, + [__NR_sigsuspend] sys_ni_syscall, + [__NR_sigpending] sys_ni_syscall, + [__NR_sethostname] sys_sethostname, + [__NR_setrlimit] sys_setrlimit, + [__NR_getrlimit] sys_ni_syscall, + [__NR_getrusage] sys_getrusage, + [__NR_gettimeofday] sys_gettimeofday, + [__NR_settimeofday] sys_settimeofday, + [__NR_getgroups] sys_getgroups, + [__NR_setgroups] sys_setgroups, + [__NR_select] sys_ni_syscall, + [__NR_symlink] sys_symlink, + [__NR_oldlstat] sys_ni_syscall, + [__NR_readlink] sys_readlink, + [__NR_uselib] sys_ni_syscall, /* sys_uselib */ + [__NR_swapon] sys_ni_syscall, /* sys_swapon */ + [__NR_reboot] sys_ni_syscall, /* sys_reboot */ + [__NR_readdir] sys_ni_syscall, + [__NR_mmap] sys_mmap, + [__NR_munmap] sys_munmap, + [__NR_truncate] sys_truncate, + [__NR_ftruncate] sys_ftruncate, + [__NR_fchmod] sys_fchmod, + [__NR_fchown] sys_fchown, + [__NR_getpriority] sys_getpriority, + [__NR_setpriority] sys_setpriority, + [__NR_profil] sys_ni_syscall, + [__NR_statfs] sys_ni_syscall, /* sys_statfs */ + [__NR_fstatfs] sys_ni_syscall, /* sys_fstatfs */ + [__NR_ioperm] sys_ni_syscall, + [__NR_socketcall] sys_socketcall, + [__NR_syslog] sys_syslog, + [__NR_setitimer] sys_setitimer, + [__NR_getitimer] sys_getitimer, + [__NR_stat] sys_newstat, + [__NR_lstat] sys_newlstat, + [__NR_fstat] sys_newfstat, + [__NR_olduname] sys_ni_syscall, + [__NR_iopl] sys_ni_syscall, + [__NR_vhangup] sys_vhangup, + [__NR_idle] sys_ni_syscall, + [__NR_vm86] sys_ni_syscall, + [__NR_wait4] sys_wait4, + [__NR_swapoff] sys_ni_syscall, /* sys_swapoff */ + [__NR_sysinfo] sys_sysinfo, + [__NR_ipc] sys_ni_syscall, /* sys_ipc */ + [__NR_fsync] sys_fsync, + [__NR_sigreturn] sys_ni_syscall, + [__NR_clone] sys_ni_syscall, /* ppc_clone */ + [__NR_setdomainname] sys_setdomainname, + [__NR_uname] ppc_newuname, + [__NR_modify_ldt] sys_ni_syscall, + [__NR_adjtimex] sys_adjtimex, + [__NR_mprotect] sys_mprotect, + [__NR_sigprocmask] sys_ni_syscall, + [__NR_create_module] sys_ni_syscall, + [__NR_init_module] sys_ni_syscall, /* sys_init_module */ + [__NR_delete_module] sys_ni_syscall, /* sys_delete_module */ + [__NR_get_kernel_syms] sys_ni_syscall, + [__NR_quotactl] sys_ni_syscall, /* sys_quotactl */ + [__NR_getpgid] sys_getpgid, + [__NR_fchdir] sys_fchdir, + [__NR_bdflush] sys_bdflush, + [__NR_sysfs] sys_ni_syscall, /* sys_sysfs */ + [__NR_personality] ppc64_personality, + [__NR_afs_syscall] sys_ni_syscall, + [__NR_setfsuid] sys_setfsuid, + [__NR_setfsgid] sys_setfsgid, + [__NR__llseek] sys_llseek, + [__NR_getdents] sys_getdents, + [__NR__newselect] sys_select, + [__NR_flock] sys_flock, + [__NR_msync] sys_msync, + [__NR_readv] sys_readv, + [__NR_writev] sys_writev, + [__NR_getsid] sys_getsid, + [__NR_fdatasync] sys_fdatasync, + [__NR__sysctl] sys_ni_syscall, /* sys_sysctl */ + [__NR_mlock] sys_mlock, + [__NR_munlock] sys_munlock, + [__NR_mlockall] sys_mlockall, + [__NR_munlockall] sys_munlockall, + [__NR_sched_setparam] sys_sched_setparam, + [__NR_sched_getparam] sys_sched_getparam, + [__NR_sched_setscheduler] sys_sched_setscheduler, + [__NR_sched_getscheduler] sys_sched_getscheduler, + [__NR_sched_yield] sys_sched_yield, + [__NR_sched_get_priority_max] sys_sched_get_priority_max, + [__NR_sched_get_priority_min] sys_sched_get_priority_min, + [__NR_sched_rr_get_interval] sys_sched_rr_get_interval, + [__NR_nanosleep] sys_nanosleep, + [__NR_mremap] sys_mremap, + [__NR_setresuid] sys_setresuid, + [__NR_getresuid] sys_getresuid, + [__NR_query_module] sys_ni_syscall, + [__NR_poll] sys_poll, + [__NR_nfsservctl] sys_ni_syscall, /* sys_nfsservctl */ + [__NR_setresgid] sys_setresgid, + [__NR_getresgid] sys_getresgid, + [__NR_prctl] sys_prctl, + [__NR_rt_sigreturn] sys_ni_syscall, /* ppc64_rt_sigreturn */ + [__NR_rt_sigaction] sys_ni_syscall, /* sys_rt_sigaction */ + [__NR_rt_sigprocmask] sys_ni_syscall, /* sys_rt_sigprocmask */ + [__NR_rt_sigpending] sys_ni_syscall, /* sys_rt_sigpending */ + [__NR_rt_sigtimedwait] sys_ni_syscall, /* sys_rt_sigtimedwait */ + [__NR_rt_sigqueueinfo] sys_ni_syscall, /* sys_rt_sigqueueinfo */ + [__NR_rt_sigsuspend] sys_ni_syscall, /* sys_rt_sigsuspend */ + [__NR_pread64] sys_pread64, + [__NR_pwrite64] sys_pwrite64, + [__NR_chown] sys_chown, + [__NR_getcwd] sys_getcwd, + [__NR_capget] sys_capget, + [__NR_capset] sys_capset, + [__NR_sigaltstack] sys_ni_syscall, /* sys_sigaltstack */ + [__NR_sendfile] sys_sendfile64, + [__NR_getpmsg] sys_ni_syscall, + [__NR_putpmsg] sys_ni_syscall, + [__NR_vfork] sys_ni_syscall, /* ppc_vfork */ + [__NR_ugetrlimit] sys_getrlimit, + [__NR_readahead] sys_readahead, + [192] sys_ni_syscall, + [193] sys_ni_syscall, + [194] sys_ni_syscall, + [195] sys_ni_syscall, + [196] sys_ni_syscall, + [197] sys_ni_syscall, + [__NR_pciconfig_read] sys_ni_syscall, /* sys_pciconfig_read */ + [__NR_pciconfig_write] sys_ni_syscall, /* sys_pciconfig_write */ + [__NR_pciconfig_iobase] sys_ni_syscall, /* sys_pciconfig_iobase */ + [__NR_multiplexer] sys_ni_syscall, + [__NR_getdents64] sys_getdents64, + [__NR_pivot_root] sys_pivot_root, + [204] sys_ni_syscall, + [__NR_madvise] sys_madvise, + [__NR_mincore] sys_mincore, + [__NR_gettid] sys_gettid, + [__NR_tkill] sys_tkill, + [__NR_setxattr] sys_setxattr, + [__NR_lsetxattr] sys_lsetxattr, + [__NR_fsetxattr] sys_fsetxattr, + [__NR_getxattr] sys_getxattr, + [__NR_lgetxattr] sys_lgetxattr, + [__NR_fgetxattr] sys_fgetxattr, + [__NR_listxattr] sys_listxattr, + [__NR_llistxattr] sys_llistxattr, + [__NR_flistxattr] sys_flistxattr, + [__NR_removexattr] sys_removexattr, + [__NR_lremovexattr] sys_lremovexattr, + [__NR_fremovexattr] sys_fremovexattr, + [__NR_futex] sys_futex, + [__NR_sched_setaffinity] sys_sched_setaffinity, + [__NR_sched_getaffinity] sys_sched_getaffinity, + [__NR_tuxcall] sys_ni_syscall, + [226] sys_ni_syscall, + [__NR_io_setup] sys_io_setup, + [__NR_io_destroy] sys_io_destroy, + [__NR_io_getevents] sys_io_getevents, + [__NR_io_submit] sys_io_submit, + [__NR_io_cancel] sys_io_cancel, + [__NR_set_tid_address] sys_ni_syscall, /* sys_set_tid_address */ + [__NR_fadvise64] sys_fadvise64, + [__NR_exit_group] sys_ni_syscall, /* sys_exit_group */ + [__NR_lookup_dcookie] sys_ni_syscall, /* sys_lookup_dcookie */ + [__NR_epoll_create] sys_epoll_create, + [__NR_epoll_ctl] sys_epoll_ctl, + [__NR_epoll_wait] sys_epoll_wait, + [__NR_remap_file_pages] sys_remap_file_pages, + [__NR_timer_create] sys_timer_create, + [__NR_timer_settime] sys_timer_settime, + [__NR_timer_gettime] sys_timer_gettime, + [__NR_timer_getoverrun] sys_timer_getoverrun, + [__NR_timer_delete] sys_timer_delete, + [__NR_clock_settime] sys_clock_settime, + [__NR_clock_gettime] sys_clock_gettime, + [__NR_clock_getres] sys_clock_getres, + [__NR_clock_nanosleep] sys_clock_nanosleep, + [__NR_swapcontext] sys_ni_syscall, /* ppc64_swapcontext */ + [__NR_tgkill] sys_tgkill, + [__NR_utimes] sys_utimes, + [__NR_statfs64] sys_statfs64, + [__NR_fstatfs64] sys_fstatfs64, + [254] sys_ni_syscall, + [__NR_rtas] ppc_rtas, + [256] sys_ni_syscall, + [257] sys_ni_syscall, + [258] sys_ni_syscall, + [__NR_mbind] sys_ni_syscall, /* sys_mbind */ + [__NR_get_mempolicy] sys_ni_syscall, /* sys_get_mempolicy */ + [__NR_set_mempolicy] sys_ni_syscall, /* sys_set_mempolicy */ + [__NR_mq_open] sys_ni_syscall, /* sys_mq_open */ + [__NR_mq_unlink] sys_ni_syscall, /* sys_mq_unlink */ + [__NR_mq_timedsend] sys_ni_syscall, /* sys_mq_timedsend */ + [__NR_mq_timedreceive] sys_ni_syscall, /* sys_mq_timedreceive */ + [__NR_mq_notify] sys_ni_syscall, /* sys_mq_notify */ + [__NR_mq_getsetattr] sys_ni_syscall, /* sys_mq_getsetattr */ + [__NR_kexec_load] sys_ni_syscall, /* sys_kexec_load */ + [__NR_add_key] sys_ni_syscall, /* sys_add_key */ + [__NR_request_key] sys_ni_syscall, /* sys_request_key */ + [__NR_keyctl] sys_ni_syscall, /* sys_keyctl */ + [__NR_waitid] sys_ni_syscall, /* sys_waitid */ + [__NR_ioprio_set] sys_ni_syscall, /* sys_ioprio_set */ + [__NR_ioprio_get] sys_ni_syscall, /* sys_ioprio_get */ + [__NR_inotify_init] sys_ni_syscall, /* sys_inotify_init */ + [__NR_inotify_add_watch] sys_ni_syscall, /* sys_inotify_add_watch */ + [__NR_inotify_rm_watch] sys_ni_syscall, /* sys_inotify_rm_watch */ + [__NR_spu_run] sys_ni_syscall, /* sys_spu_run */ + [__NR_spu_create] sys_ni_syscall, /* sys_spu_create */ + [__NR_pselect6] sys_ni_syscall, /* sys_pselect */ + [__NR_ppoll] sys_ni_syscall, /* sys_ppoll */ + [__NR_unshare] sys_unshare, +}; + +long spu_sys_callback(struct spu_syscall_block *s) +{ + long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6); + + BUILD_BUG_ON(ARRAY_SIZE(spu_syscall_table) != __NR_syscalls); + + syscall = spu_syscall_table[s->nr_ret]; + + if (s->nr_ret >= __NR_syscalls) { + pr_debug("%s: invalid syscall #%ld", __FUNCTION__, s->nr_ret); + return -ENOSYS; + } + +#ifdef DEBUG + print_symbol(KERN_DEBUG "SPU-syscall %s:", (unsigned long)syscall); + printk("syscall%ld(%lx, %lx, %lx, %lx, %lx, %lx)\n", + s->nr_ret, + s->parm[0], s->parm[1], s->parm[2], + s->parm[3], s->parm[4], s->parm[5]); +#endif + + return syscall(s->parm[0], s->parm[1], s->parm[2], + s->parm[3], s->parm[4], s->parm[5]); +} +EXPORT_SYMBOL_GPL(spu_sys_callback); diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index a5c489a53c61..f1d35ddc9df3 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -285,6 +285,49 @@ static void spu_backing_runcntl_stop(struct spu_context *ctx) spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP); } +static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask, + u32 mode) +{ + struct spu_problem_collapsed *prob = &ctx->csa.prob; + int ret; + + spin_lock(&ctx->csa.register_lock); + ret = -EAGAIN; + if (prob->dma_querytype_RW) + goto out; + ret = 0; + /* FIXME: what are the side-effects of this? */ + prob->dma_querymask_RW = mask; + prob->dma_querytype_RW = mode; +out: + spin_unlock(&ctx->csa.register_lock); + + return ret; +} + +static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx) +{ + return ctx->csa.prob.dma_tagstatus_R; +} + +static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx) +{ + return ctx->csa.prob.dma_qstatus_R; +} + +static int spu_backing_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command *cmd) +{ + int ret; + + spin_lock(&ctx->csa.register_lock); + ret = -EAGAIN; + /* FIXME: set up priv2->puq */ + spin_unlock(&ctx->csa.register_lock); + + return ret; +} + struct spu_context_ops spu_backing_ops = { .mbox_read = spu_backing_mbox_read, .mbox_stat_read = spu_backing_mbox_stat_read, @@ -305,4 +348,8 @@ struct spu_context_ops spu_backing_ops = { .get_ls = spu_backing_get_ls, .runcntl_write = spu_backing_runcntl_write, .runcntl_stop = spu_backing_runcntl_stop, + .set_mfc_query = spu_backing_set_mfc_query, + .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus, + .get_mfc_free_elements = spu_backing_get_mfc_free_elements, + .send_mfc_command = spu_backing_send_mfc_command, }; diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 336f238102fd..8bb33abfad17 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -27,7 +27,7 @@ #include <asm/spu_csa.h> #include "spufs.h" -struct spu_context *alloc_spu_context(struct address_space *local_store) +struct spu_context *alloc_spu_context(void) { struct spu_context *ctx; ctx = kmalloc(sizeof *ctx, GFP_KERNEL); @@ -47,10 +47,17 @@ struct spu_context *alloc_spu_context(struct address_space *local_store) init_waitqueue_head(&ctx->ibox_wq); init_waitqueue_head(&ctx->wbox_wq); init_waitqueue_head(&ctx->stop_wq); + init_waitqueue_head(&ctx->mfc_wq); ctx->ibox_fasync = NULL; ctx->wbox_fasync = NULL; + ctx->mfc_fasync = NULL; + ctx->mfc = NULL; + ctx->tagwait = 0; ctx->state = SPU_STATE_SAVED; - ctx->local_store = local_store; + ctx->local_store = NULL; + ctx->cntl = NULL; + ctx->signal1 = NULL; + ctx->signal2 = NULL; ctx->spu = NULL; ctx->ops = &spu_backing_ops; ctx->owner = get_task_mm(current); @@ -68,8 +75,6 @@ void destroy_spu_context(struct kref *kref) ctx = container_of(kref, struct spu_context, kref); down_write(&ctx->state_sema); spu_deactivate(ctx); - ctx->ibox_fasync = NULL; - ctx->wbox_fasync = NULL; up_write(&ctx->state_sema); spu_fini_csa(&ctx->csa); kfree(ctx); @@ -109,7 +114,16 @@ void spu_release(struct spu_context *ctx) void spu_unmap_mappings(struct spu_context *ctx) { - unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); + if (ctx->local_store) + unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1); + if (ctx->mfc) + unmap_mapping_range(ctx->mfc, 0, 0x4000, 1); + if (ctx->cntl) + unmap_mapping_range(ctx->cntl, 0, 0x4000, 1); + if (ctx->signal1) + unmap_mapping_range(ctx->signal1, 0, 0x4000, 1); + if (ctx->signal2) + unmap_mapping_range(ctx->signal2, 0, 0x4000, 1); } int spu_acquire_runnable(struct spu_context *ctx) diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index dfa649c9b956..366185e92667 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -20,6 +20,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#undef DEBUG + #include <linux/fs.h> #include <linux/ioctl.h> #include <linux/module.h> @@ -39,8 +41,10 @@ static int spufs_mem_open(struct inode *inode, struct file *file) { struct spufs_inode_info *i = SPUFS_I(inode); - file->private_data = i->i_ctx; - file->f_mapping = i->i_ctx->local_store; + struct spu_context *ctx = i->i_ctx; + file->private_data = ctx; + file->f_mapping = inode->i_mapping; + ctx->local_store = inode->i_mapping; return 0; } @@ -84,7 +88,7 @@ spufs_mem_write(struct file *file, const char __user *buffer, return ret; } -#ifdef CONFIG_SPARSEMEM +#ifdef CONFIG_SPUFS_MMAP static struct page * spufs_mem_mmap_nopage(struct vm_area_struct *vma, unsigned long address, int *type) @@ -136,11 +140,113 @@ static struct file_operations spufs_mem_fops = { .read = spufs_mem_read, .write = spufs_mem_write, .llseek = generic_file_llseek, -#ifdef CONFIG_SPARSEMEM +#ifdef CONFIG_SPUFS_MMAP .mmap = spufs_mem_mmap, #endif }; +#ifdef CONFIG_SPUFS_MMAP +static struct page *spufs_ps_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type, unsigned long ps_offs) +{ + struct page *page = NOPAGE_SIGBUS; + int fault_type = VM_FAULT_SIGBUS; + struct spu_context *ctx = vma->vm_file->private_data; + unsigned long offset = address - vma->vm_start; + unsigned long area; + int ret; + + offset += vma->vm_pgoff << PAGE_SHIFT; + if (offset >= 0x4000) + goto out; + + ret = spu_acquire_runnable(ctx); + if (ret) + goto out; + + area = ctx->spu->problem_phys + ps_offs; + page = pfn_to_page((area + offset) >> PAGE_SHIFT); + fault_type = VM_FAULT_MINOR; + page_cache_get(page); + + spu_release(ctx); + + out: + if (type) + *type = fault_type; + + return page; +} + +static struct page *spufs_cntl_mmap_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + return spufs_ps_nopage(vma, address, type, 0x4000); +} + +static struct vm_operations_struct spufs_cntl_mmap_vmops = { + .nopage = spufs_cntl_mmap_nopage, +}; + +/* + * mmap support for problem state control area [0x4000 - 0x4fff]. + * Mapping this area requires that the application have CAP_SYS_RAWIO, + * as these registers require special care when read/writing. + */ +static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + vma->vm_flags |= VM_RESERVED; + vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) + | _PAGE_NO_CACHE); + + vma->vm_ops = &spufs_cntl_mmap_vmops; + return 0; +} +#endif + +static int spufs_cntl_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + file->private_data = ctx; + file->f_mapping = inode->i_mapping; + ctx->cntl = inode->i_mapping; + return 0; +} + +static ssize_t +spufs_cntl_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + /* FIXME: read from spu status */ + return -EINVAL; +} + +static ssize_t +spufs_cntl_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + /* FIXME: write to runctl bit */ + return -EINVAL; +} + +static struct file_operations spufs_cntl_fops = { + .open = spufs_cntl_open, + .read = spufs_cntl_read, + .write = spufs_cntl_write, +#ifdef CONFIG_SPUFS_MMAP + .mmap = spufs_cntl_mmap, +#endif +}; + static int spufs_regs_open(struct inode *inode, struct file *file) { @@ -501,6 +607,16 @@ static struct file_operations spufs_wbox_stat_fops = { .read = spufs_wbox_stat_read, }; +static int spufs_signal1_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + file->private_data = ctx; + file->f_mapping = inode->i_mapping; + ctx->signal1 = inode->i_mapping; + return nonseekable_open(inode, file); +} + static ssize_t spufs_signal1_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { @@ -541,12 +657,50 @@ static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, return 4; } +#ifdef CONFIG_SPUFS_MMAP +static struct page *spufs_signal1_mmap_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + return spufs_ps_nopage(vma, address, type, 0x14000); +} + +static struct vm_operations_struct spufs_signal1_mmap_vmops = { + .nopage = spufs_signal1_mmap_nopage, +}; + +static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + vma->vm_flags |= VM_RESERVED; + vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) + | _PAGE_NO_CACHE); + + vma->vm_ops = &spufs_signal1_mmap_vmops; + return 0; +} +#endif + static struct file_operations spufs_signal1_fops = { - .open = spufs_pipe_open, + .open = spufs_signal1_open, .read = spufs_signal1_read, .write = spufs_signal1_write, +#ifdef CONFIG_SPUFS_MMAP + .mmap = spufs_signal1_mmap, +#endif }; +static int spufs_signal2_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + file->private_data = ctx; + file->f_mapping = inode->i_mapping; + ctx->signal2 = inode->i_mapping; + return nonseekable_open(inode, file); +} + static ssize_t spufs_signal2_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { @@ -589,10 +743,39 @@ static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, return 4; } +#ifdef CONFIG_SPUFS_MMAP +static struct page *spufs_signal2_mmap_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + return spufs_ps_nopage(vma, address, type, 0x1c000); +} + +static struct vm_operations_struct spufs_signal2_mmap_vmops = { + .nopage = spufs_signal2_mmap_nopage, +}; + +static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + /* FIXME: */ + vma->vm_flags |= VM_RESERVED; + vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) + | _PAGE_NO_CACHE); + + vma->vm_ops = &spufs_signal2_mmap_vmops; + return 0; +} +#endif + static struct file_operations spufs_signal2_fops = { - .open = spufs_pipe_open, + .open = spufs_signal2_open, .read = spufs_signal2_read, .write = spufs_signal2_write, +#ifdef CONFIG_SPUFS_MMAP + .mmap = spufs_signal2_mmap, +#endif }; static void spufs_signal1_type_set(void *data, u64 val) @@ -641,6 +824,332 @@ static u64 spufs_signal2_type_get(void *data) DEFINE_SIMPLE_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get, spufs_signal2_type_set, "%llu"); +#ifdef CONFIG_SPUFS_MMAP +static struct page *spufs_mfc_mmap_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + return spufs_ps_nopage(vma, address, type, 0x3000); +} + +static struct vm_operations_struct spufs_mfc_mmap_vmops = { + .nopage = spufs_mfc_mmap_nopage, +}; + +/* + * mmap support for problem state MFC DMA area [0x0000 - 0x0fff]. + * Mapping this area requires that the application have CAP_SYS_RAWIO, + * as these registers require special care when read/writing. + */ +static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!(vma->vm_flags & VM_SHARED)) + return -EINVAL; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + vma->vm_flags |= VM_RESERVED; + vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) + | _PAGE_NO_CACHE); + + vma->vm_ops = &spufs_mfc_mmap_vmops; + return 0; +} +#endif + +static int spufs_mfc_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + + /* we don't want to deal with DMA into other processes */ + if (ctx->owner != current->mm) + return -EINVAL; + + if (atomic_read(&inode->i_count) != 1) + return -EBUSY; + + file->private_data = ctx; + return nonseekable_open(inode, file); +} + +/* interrupt-level mfc callback function. */ +void spufs_mfc_callback(struct spu *spu) +{ + struct spu_context *ctx = spu->ctx; + + wake_up_all(&ctx->mfc_wq); + + pr_debug("%s %s\n", __FUNCTION__, spu->name); + if (ctx->mfc_fasync) { + u32 free_elements, tagstatus; + unsigned int mask; + + /* no need for spu_acquire in interrupt context */ + free_elements = ctx->ops->get_mfc_free_elements(ctx); + tagstatus = ctx->ops->read_mfc_tagstatus(ctx); + + mask = 0; + if (free_elements & 0xffff) + mask |= POLLOUT; + if (tagstatus & ctx->tagwait) + mask |= POLLIN; + + kill_fasync(&ctx->mfc_fasync, SIGIO, mask); + } +} + +static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status) +{ + /* See if there is one tag group is complete */ + /* FIXME we need locking around tagwait */ + *status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait; + ctx->tagwait &= ~*status; + if (*status) + return 1; + + /* enable interrupt waiting for any tag group, + may silently fail if interrupts are already enabled */ + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1); + return 0; +} + +static ssize_t spufs_mfc_read(struct file *file, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret = -EINVAL; + u32 status; + + if (size != 4) + goto out; + + spu_acquire(ctx); + if (file->f_flags & O_NONBLOCK) { + status = ctx->ops->read_mfc_tagstatus(ctx); + if (!(status & ctx->tagwait)) + ret = -EAGAIN; + else + ctx->tagwait &= ~status; + } else { + ret = spufs_wait(ctx->mfc_wq, + spufs_read_mfc_tagstatus(ctx, &status)); + } + spu_release(ctx); + + if (ret) + goto out; + + ret = 4; + if (copy_to_user(buffer, &status, 4)) + ret = -EFAULT; + +out: + return ret; +} + +static int spufs_check_valid_dma(struct mfc_dma_command *cmd) +{ + pr_debug("queueing DMA %x %lx %x %x %x\n", cmd->lsa, + cmd->ea, cmd->size, cmd->tag, cmd->cmd); + + switch (cmd->cmd) { + case MFC_PUT_CMD: + case MFC_PUTF_CMD: + case MFC_PUTB_CMD: + case MFC_GET_CMD: + case MFC_GETF_CMD: + case MFC_GETB_CMD: + break; + default: + pr_debug("invalid DMA opcode %x\n", cmd->cmd); + return -EIO; + } + + if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) { + pr_debug("invalid DMA alignment, ea %lx lsa %x\n", + cmd->ea, cmd->lsa); + return -EIO; + } + + switch (cmd->size & 0xf) { + case 1: + break; + case 2: + if (cmd->lsa & 1) + goto error; + break; + case 4: + if (cmd->lsa & 3) + goto error; + break; + case 8: + if (cmd->lsa & 7) + goto error; + break; + case 0: + if (cmd->lsa & 15) + goto error; + break; + error: + default: + pr_debug("invalid DMA alignment %x for size %x\n", + cmd->lsa & 0xf, cmd->size); + return -EIO; + } + + if (cmd->size > 16 * 1024) { + pr_debug("invalid DMA size %x\n", cmd->size); + return -EIO; + } + + if (cmd->tag & 0xfff0) { + /* we reserve the higher tag numbers for kernel use */ + pr_debug("invalid DMA tag\n"); + return -EIO; + } + + if (cmd->class) { + /* not supported in this version */ + pr_debug("invalid DMA class\n"); + return -EIO; + } + + return 0; +} + +static int spu_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command cmd, + int *error) +{ + *error = ctx->ops->send_mfc_command(ctx, &cmd); + if (*error == -EAGAIN) { + /* wait for any tag group to complete + so we have space for the new command */ + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1); + /* try again, because the queue might be + empty again */ + *error = ctx->ops->send_mfc_command(ctx, &cmd); + if (*error == -EAGAIN) + return 0; + } + return 1; +} + +static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + struct mfc_dma_command cmd; + int ret = -EINVAL; + + if (size != sizeof cmd) + goto out; + + ret = -EFAULT; + if (copy_from_user(&cmd, buffer, sizeof cmd)) + goto out; + + ret = spufs_check_valid_dma(&cmd); + if (ret) + goto out; + + spu_acquire_runnable(ctx); + if (file->f_flags & O_NONBLOCK) { + ret = ctx->ops->send_mfc_command(ctx, &cmd); + } else { + int status; + ret = spufs_wait(ctx->mfc_wq, + spu_send_mfc_command(ctx, cmd, &status)); + if (status) + ret = status; + } + spu_release(ctx); + + if (ret) + goto out; + + ctx->tagwait |= 1 << cmd.tag; + +out: + return ret; +} + +static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait) +{ + struct spu_context *ctx = file->private_data; + u32 free_elements, tagstatus; + unsigned int mask; + + spu_acquire(ctx); + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2); + free_elements = ctx->ops->get_mfc_free_elements(ctx); + tagstatus = ctx->ops->read_mfc_tagstatus(ctx); + spu_release(ctx); + + poll_wait(file, &ctx->mfc_wq, wait); + + mask = 0; + if (free_elements & 0xffff) + mask |= POLLOUT | POLLWRNORM; + if (tagstatus & ctx->tagwait) + mask |= POLLIN | POLLRDNORM; + + pr_debug("%s: free %d tagstatus %d tagwait %d\n", __FUNCTION__, + free_elements, tagstatus, ctx->tagwait); + + return mask; +} + +static int spufs_mfc_flush(struct file *file) +{ + struct spu_context *ctx = file->private_data; + int ret; + + spu_acquire(ctx); +#if 0 +/* this currently hangs */ + ret = spufs_wait(ctx->mfc_wq, + ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2)); + if (ret) + goto out; + ret = spufs_wait(ctx->mfc_wq, + ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait); +out: +#else + ret = 0; +#endif + spu_release(ctx); + + return ret; +} + +static int spufs_mfc_fsync(struct file *file, struct dentry *dentry, + int datasync) +{ + return spufs_mfc_flush(file); +} + +static int spufs_mfc_fasync(int fd, struct file *file, int on) +{ + struct spu_context *ctx = file->private_data; + + return fasync_helper(fd, file, on, &ctx->mfc_fasync); +} + +static struct file_operations spufs_mfc_fops = { + .open = spufs_mfc_open, + .read = spufs_mfc_read, + .write = spufs_mfc_write, + .poll = spufs_mfc_poll, + .flush = spufs_mfc_flush, + .fsync = spufs_mfc_fsync, + .fasync = spufs_mfc_fasync, +#ifdef CONFIG_SPUFS_MMAP + .mmap = spufs_mfc_mmap, +#endif +}; + static void spufs_npc_set(void *data, u64 val) { struct spu_context *ctx = data; @@ -783,6 +1292,8 @@ struct tree_descr spufs_dir_contents[] = { { "signal2", &spufs_signal2_fops, 0666, }, { "signal1_type", &spufs_signal1_type, 0666, }, { "signal2_type", &spufs_signal2_type, 0666, }, + { "mfc", &spufs_mfc_fops, 0666, }, + { "cntl", &spufs_cntl_fops, 0666, }, { "npc", &spufs_npc_ops, 0666, }, { "fpcr", &spufs_fpcr_fops, 0666, }, { "decr", &spufs_decr_ops, 0666, }, diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c index 5445719bff79..a13a8b5a014d 100644 --- a/arch/powerpc/platforms/cell/spufs/hw_ops.c +++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -232,6 +232,59 @@ static void spu_hw_runcntl_stop(struct spu_context *ctx) spin_unlock_irq(&ctx->spu->register_lock); } +static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode) +{ + struct spu_problem *prob = ctx->spu->problem; + int ret; + + spin_lock_irq(&ctx->spu->register_lock); + ret = -EAGAIN; + if (in_be32(&prob->dma_querytype_RW)) + goto out; + ret = 0; + out_be32(&prob->dma_querymask_RW, mask); + out_be32(&prob->dma_querytype_RW, mode); +out: + spin_unlock_irq(&ctx->spu->register_lock); + return ret; +} + +static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx) +{ + return in_be32(&ctx->spu->problem->dma_tagstatus_R); +} + +static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx) +{ + return in_be32(&ctx->spu->problem->dma_qstatus_R); +} + +static int spu_hw_send_mfc_command(struct spu_context *ctx, + struct mfc_dma_command *cmd) +{ + u32 status; + struct spu_problem *prob = ctx->spu->problem; + + spin_lock_irq(&ctx->spu->register_lock); + out_be32(&prob->mfc_lsa_W, cmd->lsa); + out_be64(&prob->mfc_ea_W, cmd->ea); + out_be32(&prob->mfc_union_W.by32.mfc_size_tag32, + cmd->size << 16 | cmd->tag); + out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32, + cmd->class << 16 | cmd->cmd); + status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32); + spin_unlock_irq(&ctx->spu->register_lock); + + switch (status & 0xffff) { + case 0: + return 0; + case 2: + return -EAGAIN; + default: + return -EINVAL; + } +} + struct spu_context_ops spu_hw_ops = { .mbox_read = spu_hw_mbox_read, .mbox_stat_read = spu_hw_mbox_stat_read, @@ -252,4 +305,8 @@ struct spu_context_ops spu_hw_ops = { .get_ls = spu_hw_get_ls, .runcntl_write = spu_hw_runcntl_write, .runcntl_stop = spu_hw_runcntl_stop, + .set_mfc_query = spu_hw_set_mfc_query, + .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus, + .get_mfc_free_elements = spu_hw_get_mfc_free_elements, + .send_mfc_command = spu_hw_send_mfc_command, }; diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index b3962c3a0348..d9554199afa7 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -103,7 +103,7 @@ spufs_setattr(struct dentry *dentry, struct iattr *attr) static int spufs_new_file(struct super_block *sb, struct dentry *dentry, - struct file_operations *fops, int mode, + const struct file_operations *fops, int mode, struct spu_context *ctx) { static struct inode_operations spufs_file_iops = { @@ -241,7 +241,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_gid = dir->i_gid; inode->i_mode &= S_ISGID; } - ctx = alloc_spu_context(inode->i_mapping); + ctx = alloc_spu_context(); SPUFS_I(inode)->i_ctx = ctx; if (!ctx) goto out_iput; @@ -442,7 +442,7 @@ static struct file_system_type spufs_type = { .kill_sb = kill_litter_super, }; -static int spufs_init(void) +static int __init spufs_init(void) { int ret; ret = -ENOMEM; @@ -472,7 +472,7 @@ out: } module_init(spufs_init); -static void spufs_exit(void) +static void __exit spufs_exit(void) { spu_sched_exit(); unregister_spu_syscalls(&spufs_calls); diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 18ea8866c61a..c04e078c0fe5 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -76,6 +76,90 @@ static inline int spu_reacquire_runnable(struct spu_context *ctx, u32 *npc, return 0; } +/* + * SPU syscall restarting is tricky because we violate the basic + * assumption that the signal handler is running on the interrupted + * thread. Here instead, the handler runs on PowerPC user space code, + * while the syscall was called from the SPU. + * This means we can only do a very rough approximation of POSIX + * signal semantics. + */ +int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret, + unsigned int *npc) +{ + int ret; + + switch (*spu_ret) { + case -ERESTARTSYS: + case -ERESTARTNOINTR: + /* + * Enter the regular syscall restarting for + * sys_spu_run, then restart the SPU syscall + * callback. + */ + *npc -= 8; + ret = -ERESTARTSYS; + break; + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* + * Restart block is too hard for now, just return -EINTR + * to the SPU. + * ERESTARTNOHAND comes from sys_pause, we also return + * -EINTR from there. + * Assume that we need to be restarted ourselves though. + */ + *spu_ret = -EINTR; + ret = -ERESTARTSYS; + break; + default: + printk(KERN_WARNING "%s: unexpected return code %ld\n", + __FUNCTION__, *spu_ret); + ret = 0; + } + return ret; +} + +int spu_process_callback(struct spu_context *ctx) +{ + struct spu_syscall_block s; + u32 ls_pointer, npc; + char *ls; + long spu_ret; + int ret; + + /* get syscall block from local store */ + npc = ctx->ops->npc_read(ctx); + ls = ctx->ops->get_ls(ctx); + ls_pointer = *(u32*)(ls + npc); + if (ls_pointer > (LS_SIZE - sizeof(s))) + return -EFAULT; + memcpy(&s, ls + ls_pointer, sizeof (s)); + + /* do actual syscall without pinning the spu */ + ret = 0; + spu_ret = -ENOSYS; + npc += 4; + + if (s.nr_ret < __NR_syscalls) { + spu_release(ctx); + /* do actual system call from here */ + spu_ret = spu_sys_callback(&s); + if (spu_ret <= -ERESTARTSYS) { + ret = spu_handle_restartsys(ctx, &spu_ret, &npc); + } + spu_acquire(ctx); + if (ret == -ERESTARTSYS) + return ret; + } + + /* write result, jump over indirect pointer */ + memcpy(ls + ls_pointer, &spu_ret, sizeof (spu_ret)); + ctx->ops->npc_write(ctx, npc); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); + return ret; +} + static inline int spu_process_events(struct spu_context *ctx) { struct spu *spu = ctx->spu; @@ -107,6 +191,13 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, status)); if (unlikely(ret)) break; + if ((*status & SPU_STATUS_STOPPED_BY_STOP) && + (*status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { + ret = spu_process_callback(ctx); + if (ret) + break; + *status &= ~SPU_STATUS_STOPPED_BY_STOP; + } if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) { ret = spu_reacquire_runnable(ctx, npc, status); if (ret) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 963182fbd1aa..bf652cd77000 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -180,6 +180,7 @@ static inline void bind_context(struct spu *spu, struct spu_context *ctx) spu->ibox_callback = spufs_ibox_callback; spu->wbox_callback = spufs_wbox_callback; spu->stop_callback = spufs_stop_callback; + spu->mfc_callback = spufs_mfc_callback; mb(); spu_unmap_mappings(ctx); spu_restore(&ctx->csa, spu); @@ -197,6 +198,7 @@ static inline void unbind_context(struct spu *spu, struct spu_context *ctx) spu->ibox_callback = NULL; spu->wbox_callback = NULL; spu->stop_callback = NULL; + spu->mfc_callback = NULL; spu->mm = NULL; spu->pid = 0; spu->prio = MAX_PRIO; diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index db2601f0abd5..4485738e2102 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -43,7 +43,11 @@ struct spu_context { struct spu *spu; /* pointer to a physical SPU */ struct spu_state csa; /* SPU context save area. */ spinlock_t mmio_lock; /* protects mmio access */ - struct address_space *local_store;/* local store backing store */ + struct address_space *local_store; /* local store mapping. */ + struct address_space *mfc; /* 'mfc' area mappings. */ + struct address_space *cntl; /* 'control' area mappings. */ + struct address_space *signal1; /* 'signal1' area mappings. */ + struct address_space *signal2; /* 'signal2' area mappings. */ enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state; struct rw_semaphore state_sema; @@ -55,13 +59,27 @@ struct spu_context { wait_queue_head_t ibox_wq; wait_queue_head_t wbox_wq; wait_queue_head_t stop_wq; + wait_queue_head_t mfc_wq; struct fasync_struct *ibox_fasync; struct fasync_struct *wbox_fasync; + struct fasync_struct *mfc_fasync; + u32 tagwait; struct spu_context_ops *ops; struct work_struct reap_work; u64 flags; }; +struct mfc_dma_command { + int32_t pad; /* reserved */ + uint32_t lsa; /* local storage address */ + uint64_t ea; /* effective address */ + uint16_t size; /* transfer size */ + uint16_t tag; /* command tag */ + uint16_t class; /* class ID */ + uint16_t cmd; /* command opcode */ +}; + + /* SPU context query/set operations. */ struct spu_context_ops { int (*mbox_read) (struct spu_context * ctx, u32 * data); @@ -84,6 +102,11 @@ struct spu_context_ops { char*(*get_ls) (struct spu_context * ctx); void (*runcntl_write) (struct spu_context * ctx, u32 data); void (*runcntl_stop) (struct spu_context * ctx); + int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode); + u32 (*read_mfc_tagstatus)(struct spu_context * ctx); + u32 (*get_mfc_free_elements)(struct spu_context *ctx); + int (*send_mfc_command)(struct spu_context *ctx, + struct mfc_dma_command *cmd); }; extern struct spu_context_ops spu_hw_ops; @@ -106,7 +129,7 @@ long spufs_create_thread(struct nameidata *nd, extern struct file_operations spufs_context_fops; /* context management */ -struct spu_context * alloc_spu_context(struct address_space *local_store); +struct spu_context * alloc_spu_context(void); void destroy_spu_context(struct kref *kref); struct spu_context * get_spu_context(struct spu_context *ctx); int put_spu_context(struct spu_context *ctx); @@ -159,5 +182,6 @@ size_t spu_ibox_read(struct spu_context *ctx, u32 *data); void spufs_ibox_callback(struct spu *spu); void spufs_wbox_callback(struct spu *spu); void spufs_stop_callback(struct spu *spu); +void spufs_mfc_callback(struct spu *spu); #endif diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 212db28531fa..97898d5d34e5 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -2145,7 +2145,8 @@ static void init_priv1(struct spu_state *csa) csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR | CLASS1_ENABLE_STORAGE_FAULT_INTR; csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR | - CLASS2_ENABLE_SPU_HALT_INTR; + CLASS2_ENABLE_SPU_HALT_INTR | + CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR; } static void init_priv2(struct spu_state *csa) diff --git a/arch/powerpc/platforms/chrp/chrp.h b/arch/powerpc/platforms/chrp/chrp.h index 814f54742e0f..63f0aee4c158 100644 --- a/arch/powerpc/platforms/chrp/chrp.h +++ b/arch/powerpc/platforms/chrp/chrp.h @@ -8,4 +8,4 @@ extern int chrp_set_rtc_time(struct rtc_time *); extern long chrp_time_init(void); extern void chrp_find_bridges(void); -extern void chrp_event_scan(void); +extern void chrp_event_scan(unsigned long); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 8bf4307e323d..23a201718704 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -35,6 +35,7 @@ #include <linux/root_dev.h> #include <linux/initrd.h> #include <linux/module.h> +#include <linux/timer.h> #include <asm/io.h> #include <asm/pgtable.h> @@ -61,6 +62,10 @@ EXPORT_SYMBOL(_chrp_type); struct mpic *chrp_mpic; +/* Used for doing CHRP event-scans */ +DEFINE_PER_CPU(struct timer_list, heartbeat_timer); +unsigned long event_scan_interval; + /* * XXX this should be in xmon.h, but putting it there means xmon.h * has to include <linux/interrupt.h> (to get irqreturn_t), which @@ -229,8 +234,6 @@ void __init chrp_setup_arch(void) { struct device_node *root = find_path_device ("/"); char *machine = NULL; - struct device_node *device; - unsigned int *p = NULL; /* init to some ~sane value until calibrate_delay() runs */ loops_per_jiffy = 50000000/HZ; @@ -287,23 +290,12 @@ void __init chrp_setup_arch(void) */ sio_init(); - /* Get the event scan rate for the rtas so we know how - * often it expects a heartbeat. -- Cort - */ - device = find_devices("rtas"); - if (device) - p = (unsigned int *) get_property - (device, "rtas-event-scan-rate", NULL); - if (p && *p) { - ppc_md.heartbeat = chrp_event_scan; - ppc_md.heartbeat_reset = HZ / (*p * 30) - 1; - ppc_md.heartbeat_count = 1; - printk("RTAS Event Scan Rate: %u (%lu jiffies)\n", - *p, ppc_md.heartbeat_reset); - } - pci_create_OF_bus_map(); +#ifdef CONFIG_SMP + smp_ops = &chrp_smp_ops; +#endif /* CONFIG_SMP */ + /* * Print the banner, then scroll down so boot progress * can be printed. -- Cort @@ -312,7 +304,7 @@ void __init chrp_setup_arch(void) } void -chrp_event_scan(void) +chrp_event_scan(unsigned long unused) { unsigned char log[1024]; int ret = 0; @@ -320,7 +312,8 @@ chrp_event_scan(void) /* XXX: we should loop until the hardware says no more error logs -- Cort */ rtas_call(rtas_token("event-scan"), 4, 1, &ret, 0xffffffff, 0, __pa(log), 1024); - ppc_md.heartbeat_count = ppc_md.heartbeat_reset; + mod_timer(&__get_cpu_var(heartbeat_timer), + jiffies + event_scan_interval); } /* @@ -465,6 +458,9 @@ void __init chrp_init_IRQ(void) void __init chrp_init2(void) { + struct device_node *device; + unsigned int *p = NULL; + #ifdef CONFIG_NVRAM chrp_nvram_init(); #endif @@ -476,12 +472,53 @@ chrp_init2(void) request_region(0x80,0x10,"dma page reg"); request_region(0xc0,0x20,"dma2"); + /* Get the event scan rate for the rtas so we know how + * often it expects a heartbeat. -- Cort + */ + device = find_devices("rtas"); + if (device) + p = (unsigned int *) get_property + (device, "rtas-event-scan-rate", NULL); + if (p && *p) { + /* + * Arrange to call chrp_event_scan at least *p times + * per minute. We use 59 rather than 60 here so that + * the rate will be slightly higher than the minimum. + * This all assumes we don't do hotplug CPU on any + * machine that needs the event scans done. + */ + unsigned long interval, offset; + int cpu, ncpus; + struct timer_list *timer; + + interval = HZ * 59 / *p; + offset = HZ; + ncpus = num_online_cpus(); + event_scan_interval = ncpus * interval; + for (cpu = 0; cpu < ncpus; ++cpu) { + timer = &per_cpu(heartbeat_timer, cpu); + setup_timer(timer, chrp_event_scan, 0); + timer->expires = jiffies + offset; + add_timer_on(timer, cpu); + offset += interval; + } + printk("RTAS Event Scan Rate: %u (%lu jiffies)\n", + *p, interval); + } + if (ppc_md.progress) ppc_md.progress(" Have fun! ", 0x7777); } -void __init chrp_init(void) +static int __init chrp_probe(void) { + char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(), + "device_type", NULL); + if (dtype == NULL) + return 0; + if (strcmp(dtype, "chrp")) + return 0; + ISA_DMA_THRESHOLD = ~0L; DMA_MODE_READ = 0x44; DMA_MODE_WRITE = 0x48; diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c index 12c6f689b1aa..7d7889026936 100644 --- a/arch/powerpc/platforms/chrp/time.c +++ b/arch/powerpc/platforms/chrp/time.c @@ -120,33 +120,15 @@ int chrp_set_rtc_time(struct rtc_time *tmarg) void chrp_get_rtc_time(struct rtc_time *tm) { unsigned int year, mon, day, hour, min, sec; - int uip, i; - /* The Linux interpretation of the CMOS clock register contents: - * When the Update-In-Progress (UIP) flag goes from 1 to 0, the - * RTC registers show the second which has precisely just started. - * Let's hope other operating systems interpret the RTC the same way. - */ - - /* Since the UIP flag is set for about 2.2 ms and the clock - * is typically written with a precision of 1 jiffy, trying - * to obtain a precision better than a few milliseconds is - * an illusion. Only consistency is interesting, this also - * allows to use the routine for /dev/rtc without a potential - * 1 second kernel busy loop triggered by any reader of /dev/rtc. - */ - - for ( i = 0; i<1000000; i++) { - uip = chrp_cmos_clock_read(RTC_FREQ_SELECT); + do { sec = chrp_cmos_clock_read(RTC_SECONDS); min = chrp_cmos_clock_read(RTC_MINUTES); hour = chrp_cmos_clock_read(RTC_HOURS); day = chrp_cmos_clock_read(RTC_DAY_OF_MONTH); mon = chrp_cmos_clock_read(RTC_MONTH); year = chrp_cmos_clock_read(RTC_YEAR); - uip |= chrp_cmos_clock_read(RTC_FREQ_SELECT); - if ((uip & RTC_UIP)==0) break; - } + } while (sec != chrp_cmos_clock_read(RTC_SECONDS)); if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { BCD_TO_BIN(sec); diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index fa4550611c11..6ce8a404ba6b 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -675,18 +675,20 @@ static void iseries_dedicated_idle(void) void __init iSeries_init_IRQ(void) { } #endif -static int __init iseries_probe(int platform) +static int __init iseries_probe(void) { - if (PLATFORM_ISERIES_LPAR != platform) + unsigned long root = of_get_flat_dt_root(); + if (!of_flat_dt_is_compatible(root, "IBM,iSeries")) return 0; - ppc64_firmware_features |= FW_FEATURE_ISERIES; - ppc64_firmware_features |= FW_FEATURE_LPAR; + powerpc_firmware_features |= FW_FEATURE_ISERIES; + powerpc_firmware_features |= FW_FEATURE_LPAR; return 1; } -struct machdep_calls __initdata iseries_md = { +define_machine(iseries) { + .name = "iSeries", .setup_arch = iSeries_setup_arch, .show_cpuinfo = iSeries_show_cpuinfo, .init_IRQ = iSeries_init_IRQ, @@ -930,7 +932,6 @@ void build_flat_dt(struct iseries_flat_dt *dt, unsigned long phys_mem_size) /* /chosen */ dt_start_node(dt, "chosen"); - dt_prop_u32(dt, "linux,platform", PLATFORM_ISERIES_LPAR); dt_prop_str(dt, "bootargs", cmd_line); if (cmd_mem_limit) dt_prop_u64(dt, "linux,memory-limit", cmd_mem_limit); diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index ec5c1e10c407..24c0aef4ea39 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -259,9 +259,10 @@ static void __init maple_progress(char *s, unsigned short hex) /* * Called very early, MMU is off, device-tree isn't unflattened */ -static int __init maple_probe(int platform) +static int __init maple_probe(void) { - if (platform != PLATFORM_MAPLE) + unsigned long root = of_get_flat_dt_root(); + if (!of_flat_dt_is_compatible(root, "Momentum,Maple")) return 0; /* * On U3, the DART (iommu) must be allocated now since it @@ -274,7 +275,8 @@ static int __init maple_probe(int platform) return 1; } -struct machdep_calls __initdata maple_md = { +define_machine(maple_md) { + .name = "Maple", .probe = maple_probe, .setup_arch = maple_setup_arch, .init_early = maple_init_early, @@ -290,7 +292,7 @@ struct machdep_calls __initdata maple_md = { .get_rtc_time = maple_get_rtc_time, .calibrate_decr = generic_calibrate_decr, .progress = maple_progress, - .idle_loop = native_idle, + .power_save = power4_idle, #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index 5e6981d17379..b9a2b3d4bf33 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -60,34 +60,14 @@ static void maple_clock_write(unsigned long val, int addr) void maple_get_rtc_time(struct rtc_time *tm) { - int uip, i; - - /* The Linux interpretation of the CMOS clock register contents: - * When the Update-In-Progress (UIP) flag goes from 1 to 0, the - * RTC registers show the second which has precisely just started. - * Let's hope other operating systems interpret the RTC the same way. - */ - - /* Since the UIP flag is set for about 2.2 ms and the clock - * is typically written with a precision of 1 jiffy, trying - * to obtain a precision better than a few milliseconds is - * an illusion. Only consistency is interesting, this also - * allows to use the routine for /dev/rtc without a potential - * 1 second kernel busy loop triggered by any reader of /dev/rtc. - */ - - for (i = 0; i<1000000; i++) { - uip = maple_clock_read(RTC_FREQ_SELECT); + do { tm->tm_sec = maple_clock_read(RTC_SECONDS); tm->tm_min = maple_clock_read(RTC_MINUTES); tm->tm_hour = maple_clock_read(RTC_HOURS); tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH); tm->tm_mon = maple_clock_read(RTC_MONTH); tm->tm_year = maple_clock_read(RTC_YEAR); - uip |= maple_clock_read(RTC_FREQ_SELECT); - if ((uip & RTC_UIP)==0) - break; - } + } while (tm->tm_sec != maple_clock_read(RTC_SECONDS)); if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c index fa8b4d7b5ded..eacbfd9beabc 100644 --- a/arch/powerpc/platforms/powermac/bootx_init.c +++ b/arch/powerpc/platforms/powermac/bootx_init.c @@ -161,9 +161,7 @@ static void __init bootx_dt_add_prop(char *name, void *data, int size, static void __init bootx_add_chosen_props(unsigned long base, unsigned long *mem_end) { - u32 val = _MACH_Pmac; - - bootx_dt_add_prop("linux,platform", &val, 4, mem_end); + u32 val; if (bootx_info->kernelParamsOffset) { char *args = (char *)((unsigned long)bootx_info) + @@ -493,7 +491,7 @@ void __init bootx_init(unsigned long r3, unsigned long r4) && (strcmp(model, "iMac,1") == 0 || strcmp(model, "PowerMac1,1") == 0)) { bootx_printf("iMac,1 detected, shutting down USB \n"); - out_le32((unsigned *)0x80880008, 1); /* XXX */ + out_le32((unsigned __iomem *)0x80880008, 1); /* XXX */ } } diff --git a/arch/powerpc/platforms/powermac/cpufreq_64.c b/arch/powerpc/platforms/powermac/cpufreq_64.c index a415e8d2f7af..b57e465a1b71 100644 --- a/arch/powerpc/platforms/powermac/cpufreq_64.c +++ b/arch/powerpc/platforms/powermac/cpufreq_64.c @@ -21,6 +21,7 @@ #include <linux/cpufreq.h> #include <linux/init.h> #include <linux/completion.h> +#include <linux/mutex.h> #include <asm/prom.h> #include <asm/machdep.h> #include <asm/irq.h> @@ -90,7 +91,7 @@ static void (*g5_switch_volt)(int speed_mode); static int (*g5_switch_freq)(int speed_mode); static int (*g5_query_freq)(void); -static DECLARE_MUTEX(g5_switch_mutex); +static DEFINE_MUTEX(g5_switch_mutex); static struct smu_sdbp_fvt *g5_fvt_table; /* table of op. points */ @@ -327,7 +328,7 @@ static int g5_cpufreq_target(struct cpufreq_policy *policy, if (g5_pmode_cur == newstate) return 0; - down(&g5_switch_mutex); + mutex_lock(&g5_switch_mutex); freqs.old = g5_cpu_freqs[g5_pmode_cur].frequency; freqs.new = g5_cpu_freqs[newstate].frequency; @@ -337,7 +338,7 @@ static int g5_cpufreq_target(struct cpufreq_policy *policy, rc = g5_switch_freq(newstate); cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - up(&g5_switch_mutex); + mutex_unlock(&g5_switch_mutex); return rc; } diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index e49eddd5042d..a5063cd675c5 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -2951,7 +2951,7 @@ static void *pmac_early_vresume_data; void pmac_set_early_video_resume(void (*proc)(void *data), void *data) { - if (_machine != _MACH_Pmac) + if (!machine_is(powermac)) return; preempt_disable(); pmac_early_vresume_proc = proc; diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 87eb6bb7f0e7..e14f9ac55cf4 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -1457,6 +1457,9 @@ int __init pmac_i2c_init(void) return 0; i2c_inited = 1; + if (!machine_is(powermac)) + return 0; + /* Probe keywest-i2c busses */ kw_i2c_probe(); diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c index 5fd28995c74c..262f967b880a 100644 --- a/arch/powerpc/platforms/powermac/nvram.c +++ b/arch/powerpc/platforms/powermac/nvram.c @@ -74,7 +74,7 @@ struct core99_header { * Read and write the non-volatile RAM on PowerMacs and CHRP machines. */ static int nvram_naddrs; -static volatile unsigned char *nvram_data; +static volatile unsigned char __iomem *nvram_data; static int is_core_99; static int core99_bank = 0; static int nvram_partitions[3]; @@ -148,7 +148,7 @@ static ssize_t core99_nvram_size(void) } #ifdef CONFIG_PPC32 -static volatile unsigned char *nvram_addr; +static volatile unsigned char __iomem *nvram_addr; static int nvram_mult; static unsigned char direct_nvram_read_byte(int addr) @@ -285,7 +285,7 @@ static int sm_erase_bank(int bank) int stat, i; unsigned long timeout; - u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE; DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank); @@ -317,7 +317,7 @@ static int sm_write_bank(int bank, u8* datas) int i, stat = 0; unsigned long timeout; - u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE; DBG("nvram: Sharp/Micron Writing bank %d...\n", bank); @@ -352,7 +352,7 @@ static int amd_erase_bank(int bank) int i, stat = 0; unsigned long timeout; - u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE; DBG("nvram: AMD Erasing bank %d...\n", bank); @@ -399,7 +399,7 @@ static int amd_write_bank(int bank, u8* datas) int i, stat = 0; unsigned long timeout; - u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE; DBG("nvram: AMD Writing bank %d...\n", bank); @@ -597,7 +597,7 @@ int __init pmac_nvram_init(void) } #ifdef CONFIG_PPC32 - if (_machine == _MACH_chrp && nvram_naddrs == 1) { + if (machine_is(chrp) && nvram_naddrs == 1) { nvram_data = ioremap(r1.start, s1); nvram_mult = 1; ppc_md.nvram_read_val = direct_nvram_read_byte; diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c index de3f30e6b333..f5d8d15d74fa 100644 --- a/arch/powerpc/platforms/powermac/pci.c +++ b/arch/powerpc/platforms/powermac/pci.c @@ -1201,7 +1201,7 @@ void __init pmac_pcibios_after_init(void) #ifdef CONFIG_PPC32 void pmac_pci_fixup_cardbus(struct pci_dev* dev) { - if (_machine != _MACH_Pmac) + if (!machine_is(powermac)) return; /* * Fix the interrupt routing on the various cardbus bridges @@ -1244,8 +1244,9 @@ void pmac_pci_fixup_pciata(struct pci_dev* dev) * On PowerMacs, we try to switch any PCI ATA controller to * fully native mode */ - if (_machine != _MACH_Pmac) + if (!machine_is(powermac)) return; + /* Some controllers don't have the class IDE */ if (dev->vendor == PCI_VENDOR_ID_PROMISE) switch(dev->device) { diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c index 9b7150f10414..a3bd3e728fa3 100644 --- a/arch/powerpc/platforms/powermac/pfunc_base.c +++ b/arch/powerpc/platforms/powermac/pfunc_base.c @@ -336,6 +336,8 @@ int __init pmac_pfunc_base_install(void) return 0; pfbase_inited = 1; + if (!machine_is(powermac)) + return 0; DBG("Installing base platform functions...\n"); diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 385aab90c4d2..4d15e396655c 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -350,6 +350,13 @@ static void __init pmac_setup_arch(void) smp_ops = &psurge_smp_ops; #endif #endif /* CONFIG_SMP */ + +#ifdef CONFIG_ADB + if (strstr(cmd_line, "adb_sync")) { + extern int __adb_probe_sync; + __adb_probe_sync = 1; + } +#endif /* CONFIG_ADB */ } char *bootpath; @@ -576,30 +583,6 @@ pmac_halt(void) pmac_power_off(); } -#ifdef CONFIG_PPC32 -void __init pmac_init(void) -{ - /* isa_io_base gets set in pmac_pci_init */ - isa_mem_base = PMAC_ISA_MEM_BASE; - pci_dram_offset = PMAC_PCI_DRAM_OFFSET; - ISA_DMA_THRESHOLD = ~0L; - DMA_MODE_READ = 1; - DMA_MODE_WRITE = 2; - - ppc_md = pmac_md; - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) -#ifdef CONFIG_BLK_DEV_IDE_PMAC - ppc_ide_md.ide_init_hwif = pmac_ide_init_hwif_ports; - ppc_ide_md.default_io_base = pmac_ide_get_base; -#endif /* CONFIG_BLK_DEV_IDE_PMAC */ -#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */ - - if (ppc_md.progress) ppc_md.progress("pmac_init(): exit", 0); - -} -#endif - /* * Early initialization. */ @@ -646,6 +629,12 @@ static int __init pmac_declare_of_platform_devices(void) { struct device_node *np; + if (machine_is(chrp)) + return -1; + + if (!machine_is(powermac)) + return 0; + np = of_find_node_by_name(NULL, "valkyrie"); if (np) of_platform_device_create(np, "valkyrie", NULL); @@ -666,12 +655,15 @@ device_initcall(pmac_declare_of_platform_devices); /* * Called very early, MMU is off, device-tree isn't unflattened */ -static int __init pmac_probe(int platform) +static int __init pmac_probe(void) { -#ifdef CONFIG_PPC64 - if (platform != PLATFORM_POWERMAC) + unsigned long root = of_get_flat_dt_root(); + + if (!of_flat_dt_is_compatible(root, "Power Macintosh") && + !of_flat_dt_is_compatible(root, "MacRISC")) return 0; +#ifdef CONFIG_PPC64 /* * On U3, the DART (iommu) must be allocated now since it * has an impact on htab_initialize (due to the large page it @@ -681,6 +673,23 @@ static int __init pmac_probe(int platform) alloc_dart_table(); #endif +#ifdef CONFIG_PPC32 + /* isa_io_base gets set in pmac_pci_init */ + isa_mem_base = PMAC_ISA_MEM_BASE; + pci_dram_offset = PMAC_PCI_DRAM_OFFSET; + ISA_DMA_THRESHOLD = ~0L; + DMA_MODE_READ = 1; + DMA_MODE_WRITE = 2; + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +#ifdef CONFIG_BLK_DEV_IDE_PMAC + ppc_ide_md.ide_init_hwif = pmac_ide_init_hwif_ports; + ppc_ide_md.default_io_base = pmac_ide_get_base; +#endif /* CONFIG_BLK_DEV_IDE_PMAC */ +#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */ + +#endif /* CONFIG_PPC32 */ + #ifdef CONFIG_PMAC_SMU /* * SMU based G5s need some memory below 2Gb, at least the current @@ -709,10 +718,8 @@ static int pmac_pci_probe_mode(struct pci_bus *bus) } #endif -struct machdep_calls __initdata pmac_md = { -#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC64) - .cpu_die = generic_mach_cpu_die, -#endif +define_machine(powermac) { + .name = "PowerMac", .probe = pmac_probe, .setup_arch = pmac_setup_arch, .init_early = pmac_init_early, @@ -733,7 +740,7 @@ struct machdep_calls __initdata pmac_md = { .progress = udbg_progress, #ifdef CONFIG_PPC64 .pci_probe_mode = pmac_pci_probe_mode, - .idle_loop = native_idle, + .power_save = power4_idle, .enable_pmcs = power4_enable_pmcs, #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, @@ -746,4 +753,7 @@ struct machdep_calls __initdata pmac_md = { .pcibios_after_init = pmac_pcibios_after_init, .phys_mem_access_prot = pci_phys_mem_access_prot, #endif +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC64) + .cpu_die = generic_mach_cpu_die, +#endif }; diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 5d9afa1fa02d..890758aa9667 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -336,10 +336,10 @@ static struct pmu_sleep_notifier time_sleep_notifier = { */ void __init pmac_calibrate_decr(void) { -#ifdef CONFIG_PM +#if defined(CONFIG_PM) && defined(CONFIG_ADB_PMU) /* XXX why here? */ pmu_register_sleep_notifier(&time_sleep_notifier); -#endif /* CONFIG_PM */ +#endif generic_calibrate_decr(); diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c index c4352a8db644..b4fa9f03b461 100644 --- a/arch/powerpc/platforms/powermac/udbg_scc.c +++ b/arch/powerpc/platforms/powermac/udbg_scc.c @@ -116,7 +116,7 @@ void udbg_scc_init(int force_scc) /* Setup for 57600 8N1 */ if (ch == ch_a) addr += 0x20; - sccc = (volatile u8 * __iomem) ioremap(addr & PAGE_MASK, PAGE_SIZE) ; + sccc = ioremap(addr & PAGE_MASK, PAGE_SIZE) ; sccc += addr & ~PAGE_MASK; sccd = sccc + 0x10; diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 2ab9dcdfb415..9b2b1cb117b3 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -1018,7 +1018,7 @@ static int __init eeh_init_proc(void) { struct proc_dir_entry *e; - if (platform_is_pseries()) { + if (machine_is(pseries)) { e = create_proc_entry("ppc64/eeh", 0, NULL); if (e) e->proc_fops = &proc_eeh_operations; diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index b811d5ff92fe..cc2495a0cdd5 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c @@ -257,6 +257,7 @@ void handle_eeh_events (struct eeh_event *event) struct pci_bus *frozen_bus; int rc = 0; enum pci_ers_result result = PCI_ERS_RESULT_NONE; + const char *pci_str, *drv_str; frozen_dn = find_device_pe(event->dn); frozen_bus = pcibios_find_pci_bus(frozen_dn); @@ -291,6 +292,13 @@ void handle_eeh_events (struct eeh_event *event) frozen_pdn = PCI_DN(frozen_dn); frozen_pdn->eeh_freeze_count++; + + pci_str = pci_name (frozen_pdn->pcidev); + drv_str = pcid_name (frozen_pdn->pcidev); + if (!pci_str) { + pci_str = pci_name (event->dev); + drv_str = pcid_name (event->dev); + } if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) goto hard_fail; @@ -306,9 +314,7 @@ void handle_eeh_events (struct eeh_event *event) eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */); printk(KERN_WARNING "EEH: This PCI device has failed %d times since last reboot: %s - %s\n", - frozen_pdn->eeh_freeze_count, - pci_name (frozen_pdn->pcidev), - pcid_name(frozen_pdn->pcidev)); + frozen_pdn->eeh_freeze_count, drv_str, pci_str); /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -360,9 +366,7 @@ hard_fail: "EEH: PCI device %s - %s has failed %d times \n" "and has been permanently disabled. Please try reseating\n" "this device or replacing it.\n", - pci_name (frozen_pdn->pcidev), - pcid_name(frozen_pdn->pcidev), - frozen_pdn->eeh_freeze_count); + drv_str, pci_str, frozen_pdn->eeh_freeze_count); eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */); diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index 989f4bc136cb..c01d8f0cbe6d 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -91,7 +91,7 @@ void __init fw_feature_init(void) continue; /* we have a match */ - ppc64_firmware_features |= + powerpc_firmware_features |= firmware_features_table[i].val; break; } diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index 138e128a3886..ba6befd96636 100644 --- a/arch/powerpc/platforms/pseries/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -62,6 +62,11 @@ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) unsigned long *lbuf = (unsigned long *) buf; long ret; + + /* hcall will ret H_PARAMETER if 'count' exceeds firmware max.*/ + if (count > MAX_VIO_PUT_CHARS) + count = MAX_VIO_PUT_CHARS; + ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count, lbuf[0], lbuf[1]); if (ret == H_Success) diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c index 946ad59e3352..e97e67f5e079 100644 --- a/arch/powerpc/platforms/pseries/pci.c +++ b/arch/powerpc/platforms/pseries/pci.c @@ -120,7 +120,7 @@ static void fixup_winbond_82c105(struct pci_dev* dev) int i; unsigned int reg; - if (!platform_is_pseries()) + if (!machine_is(pseries)) return; printk("Using INTC for W82c105 IDE controller.\n"); diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 44abdeb9ca03..6bfacc217085 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -28,6 +28,7 @@ #include <linux/pci.h> #include <asm/pci-bridge.h> #include <asm/ppc-pci.h> +#include <asm/firmware.h> static struct pci_bus * find_bus_among_children(struct pci_bus *bus, @@ -152,20 +153,24 @@ pcibios_pci_config_bridge(struct pci_dev *dev) void pcibios_add_pci_devices(struct pci_bus * bus) { - int slotno, num; + int slotno, num, mode; struct pci_dev *dev; struct device_node *dn = pci_bus_to_OF_node(bus); eeh_add_device_tree_early(dn); - if (_machine == PLATFORM_PSERIES_LPAR) { + mode = PCI_PROBE_NORMAL; + if (ppc_md.pci_probe_mode) + mode = ppc_md.pci_probe_mode(bus); + + if (mode == PCI_PROBE_DEVTREE) { /* use ofdt-based probe */ of_scan_bus(dn, bus); if (!list_empty(&bus->devices)) { pcibios_fixup_new_pci_devices(bus, 0); pci_bus_add_devices(bus); } - } else { + } else if (mode == PCI_PROBE_NORMAL) { /* use legacy probe */ slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index b046bcf7443d..9639c66b453d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -132,7 +132,7 @@ static int __init init_ras_IRQ(void) of_node_put(np); } - return 1; + return 0; } __initcall(init_ras_IRQ); diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 86cfa6ecdcf3..1773103354be 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -17,8 +17,9 @@ #include <linux/proc_fs.h> #include <asm/prom.h> -#include <asm/pSeries_reconfig.h> +#include <asm/machdep.h> #include <asm/uaccess.h> +#include <asm/pSeries_reconfig.h> @@ -94,16 +95,16 @@ static struct device_node *derive_parent(const char *path) return parent; } -static struct notifier_block *pSeries_reconfig_chain; +static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain); int pSeries_reconfig_notifier_register(struct notifier_block *nb) { - return notifier_chain_register(&pSeries_reconfig_chain, nb); + return blocking_notifier_chain_register(&pSeries_reconfig_chain, nb); } void pSeries_reconfig_notifier_unregister(struct notifier_block *nb) { - notifier_chain_unregister(&pSeries_reconfig_chain, nb); + blocking_notifier_chain_unregister(&pSeries_reconfig_chain, nb); } static int pSeries_reconfig_add_node(const char *path, struct property *proplist) @@ -131,7 +132,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist goto out_err; } - err = notifier_call_chain(&pSeries_reconfig_chain, + err = blocking_notifier_call_chain(&pSeries_reconfig_chain, PSERIES_RECONFIG_ADD, np); if (err == NOTIFY_BAD) { printk(KERN_ERR "Failed to add device node %s\n", path); @@ -171,7 +172,7 @@ static int pSeries_reconfig_remove_node(struct device_node *np) remove_node_proc_entries(np); - notifier_call_chain(&pSeries_reconfig_chain, + blocking_notifier_call_chain(&pSeries_reconfig_chain, PSERIES_RECONFIG_REMOVE, np); of_detach_node(np); @@ -508,7 +509,7 @@ static int proc_ppc64_create_ofdt(void) { struct proc_dir_entry *ent; - if (!platform_is_pseries()) + if (!machine_is(pseries)) return 0; ent = create_proc_entry("ppc64/ofdt", S_IWUSR, NULL); diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index a6f628d4c9dc..fcc4d561a236 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -27,6 +27,7 @@ #include <asm/prom.h> #include <asm/nvram.h> #include <asm/atomic.h> +#include <asm/machdep.h> #if 0 #define DEBUG(A...) printk(KERN_ERR A) @@ -481,7 +482,7 @@ static int __init rtas_init(void) { struct proc_dir_entry *entry; - if (!platform_is_pseries()) + if (!machine_is(pseries)) return 0; /* No RTAS */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 44d5c7fdcd97..b2fbf8ba8fbb 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -81,8 +81,8 @@ extern void find_udbg_vterm(void); int fwnmi_active; /* TRUE if an FWNMI handler is present */ -static void pseries_shared_idle(void); -static void pseries_dedicated_idle(void); +static void pseries_shared_idle_sleep(void); +static void pseries_dedicated_idle_sleep(void); struct mpic *pSeries_mpic; @@ -236,14 +236,13 @@ static void __init pSeries_setup_arch(void) vpa_init(boot_cpuid); if (get_lppaca()->shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); - ppc_md.idle_loop = pseries_shared_idle; + ppc_md.power_save = pseries_shared_idle_sleep; } else { printk(KERN_INFO "Using dedicated idle loop\n"); - ppc_md.idle_loop = pseries_dedicated_idle; + ppc_md.power_save = pseries_dedicated_idle_sleep; } } else { printk(KERN_INFO "Using default idle loop\n"); - ppc_md.idle_loop = default_idle; } if (firmware_has_feature(FW_FEATURE_LPAR)) @@ -373,156 +372,123 @@ static int pSeries_check_legacy_ioport(unsigned int baseport) /* * Called very early, MMU is off, device-tree isn't unflattened */ -extern struct machdep_calls pSeries_md; -static int __init pSeries_probe(int platform) +static int __init pSeries_probe_hypertas(unsigned long node, + const char *uname, int depth, + void *data) { - if (platform != PLATFORM_PSERIES && - platform != PLATFORM_PSERIES_LPAR) - return 0; - - /* if we have some ppc_md fixups for LPAR to do, do - * it here ... - */ + if (depth != 1 || + (strcmp(uname, "rtas") != 0 && strcmp(uname, "rtas@0") != 0)) + return 0; - if (platform == PLATFORM_PSERIES_LPAR) - ppc64_firmware_features |= FW_FEATURE_LPAR; + if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL) != NULL) + powerpc_firmware_features |= FW_FEATURE_LPAR; - return 1; + return 1; } -DECLARE_PER_CPU(unsigned long, smt_snooze_delay); - -static inline void dedicated_idle_sleep(unsigned int cpu) +static int __init pSeries_probe(void) { - struct lppaca *plppaca = &lppaca[cpu ^ 1]; + char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(), + "device_type", NULL); + if (dtype == NULL) + return 0; + if (strcmp(dtype, "chrp")) + return 0; - /* Only sleep if the other thread is not idle */ - if (!(plppaca->idle)) { - local_irq_disable(); + DBG("pSeries detected, looking for LPAR capability...\n"); - /* - * We are about to sleep the thread and so wont be polling any - * more. - */ - clear_thread_flag(TIF_POLLING_NRFLAG); - smp_mb__after_clear_bit(); - - /* - * SMT dynamic mode. Cede will result in this thread going - * dormant, if the partner thread is still doing work. Thread - * wakes up if partner goes idle, an interrupt is presented, or - * a prod occurs. Returning from the cede enables external - * interrupts. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - set_thread_flag(TIF_POLLING_NRFLAG); - } else { - /* - * Give the HV an opportunity at the processor, since we are - * not doing any work. - */ - poll_pending(); - } + /* Now try to figure out if we are running on LPAR */ + of_scan_flat_dt(pSeries_probe_hypertas, NULL); + + DBG("Machine is%s LPAR !\n", + (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); + + return 1; } -static void pseries_dedicated_idle(void) + +DECLARE_PER_CPU(unsigned long, smt_snooze_delay); + +static void pseries_dedicated_idle_sleep(void) { unsigned int cpu = smp_processor_id(); unsigned long start_snooze; unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay); - set_thread_flag(TIF_POLLING_NRFLAG); - - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; - - if (!need_resched()) { - start_snooze = get_tb() + - *smt_snooze_delay * tb_ticks_per_usec; - - while (!need_resched() && !cpu_is_offline(cpu)) { - ppc64_runlatch_off(); - - /* - * Go into low thread priority and possibly - * low power mode. - */ - HMT_low(); - HMT_very_low(); - - if (*smt_snooze_delay != 0 && - get_tb() > start_snooze) { - HMT_medium(); - dedicated_idle_sleep(cpu); - } - - } - - HMT_medium(); - } - - get_lppaca()->idle = 0; - ppc64_runlatch_on(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } -} + /* + * We come in with interrupts disabled, and need_resched() + * has been checked recently. If we should poll for a little + * while, do so. + */ + if (*smt_snooze_delay) { + start_snooze = get_tb() + + *smt_snooze_delay * tb_ticks_per_usec; + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); -static void pseries_shared_idle(void) -{ - unsigned int cpu = smp_processor_id(); + while (get_tb() < start_snooze) { + if (need_resched() || cpu_is_offline(cpu)) + goto out; + ppc64_runlatch_off(); + HMT_low(); + HMT_very_low(); + } - while (1) { - /* - * Indicate to the HV that we are idle. Now would be - * a good time to find other work to dispatch. - */ - get_lppaca()->idle = 1; + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + local_irq_disable(); + if (need_resched() || cpu_is_offline(cpu)) + goto out; + } - while (!need_resched() && !cpu_is_offline(cpu)) { - local_irq_disable(); - ppc64_runlatch_off(); + /* + * Cede if the other thread is not idle, so that it can + * go single-threaded. If the other thread is idle, + * we ask the hypervisor if it has pending work it + * wants to do and cede if it does. Otherwise we keep + * polling in order to reduce interrupt latency. + * + * Doing the cede when the other thread is active will + * result in this thread going dormant, meaning the other + * thread gets to run in single-threaded (ST) mode, which + * is slightly faster than SMT mode with this thread at + * very low priority. The cede enables interrupts, which + * doesn't matter here. + */ + if (!lppaca[cpu ^ 1].idle || poll_pending() == H_Pending) + cede_processor(); - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - * - * Check need_resched() again with interrupts disabled - * to avoid a race. - */ - if (!need_resched()) - cede_processor(); - else - local_irq_enable(); - - HMT_medium(); - } +out: + HMT_medium(); + get_lppaca()->idle = 0; +} - get_lppaca()->idle = 0; - ppc64_runlatch_on(); +static void pseries_shared_idle_sleep(void) +{ + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; - preempt_enable_no_resched(); - schedule(); - preempt_disable(); + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); - if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING) - cpu_die(); - } + get_lppaca()->idle = 0; } static int pSeries_pci_probe_mode(struct pci_bus *bus) @@ -553,7 +519,8 @@ static void pseries_kexec_cpu_down(int crash_shutdown, int secondary) } #endif -struct machdep_calls __initdata pSeries_md = { +define_machine(pseries) { + .name = "pSeries", .probe = pSeries_probe, .setup_arch = pSeries_setup_arch, .init_early = pSeries_init_early, diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index eb86cdb9b802..4864cb32be25 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -500,7 +500,7 @@ nextnode: np; np = of_find_node_by_type(np, "cpu")) { ireg = (uint *)get_property(np, "reg", &ilen); - if (ireg && ireg[0] == boot_cpuid_phys) { + if (ireg && ireg[0] == get_hard_smp_processor_id(boot_cpuid)) { ireg = (uint *)get_property(np, "ibm,ppc-interrupt-gserver#s", &ilen); i = ilen / sizeof(int); @@ -541,7 +541,7 @@ nextnode: ops = &pSeriesLP_ops; else { #ifdef CONFIG_SMP - for_each_cpu(i) { + for_each_possible_cpu(i) { int hard_id; /* FIXME: Do this dynamically! --RR */ |