summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/Kconfig35
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/boot/dts/apm/apm-mustang.dts4
-rw-r--r--arch/arm64/boot/dts/apm/apm-storm.dtsi32
-rw-r--r--arch/arm64/configs/defconfig14
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S12
-rw-r--r--arch/arm64/crypto/aes-ce.S10
-rw-r--r--arch/arm64/crypto/aes-glue.c12
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S33
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c151
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S29
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c227
-rw-r--r--arch/arm64/include/asm/assembler.h48
-rw-r--r--arch/arm64/include/asm/cpufeature.h18
-rw-r--r--arch/arm64/include/asm/cputable.h30
-rw-r--r--arch/arm64/include/asm/dma-mapping.h2
-rw-r--r--arch/arm64/include/asm/fixmap.h2
-rw-r--r--arch/arm64/include/asm/insn.h1
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h33
-rw-r--r--arch/arm64/include/asm/mmu_context.h43
-rw-r--r--arch/arm64/include/asm/page.h6
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h7
-rw-r--r--arch/arm64/include/asm/pmu.h1
-rw-r--r--arch/arm64/include/asm/proc-fns.h9
-rw-r--r--arch/arm64/include/asm/processor.h6
-rw-r--r--arch/arm64/include/asm/smp_plat.h2
-rw-r--r--arch/arm64/include/asm/thread_info.h3
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/kernel/Makefile4
-rw-r--r--arch/arm64/kernel/alternative.c55
-rw-r--r--arch/arm64/kernel/asm-offsets.c5
-rw-r--r--arch/arm64/kernel/cpu_errata.c47
-rw-r--r--arch/arm64/kernel/cpufeature.c47
-rw-r--r--arch/arm64/kernel/cpuinfo.c1
-rw-r--r--arch/arm64/kernel/cputable.c33
-rw-r--r--arch/arm64/kernel/entry.S20
-rw-r--r--arch/arm64/kernel/entry32.S18
-rw-r--r--arch/arm64/kernel/head.S261
-rw-r--r--arch/arm64/kernel/insn.c81
-rw-r--r--arch/arm64/kernel/perf_event.c78
-rw-r--r--arch/arm64/kernel/setup.c55
-rw-r--r--arch/arm64/kernel/signal.c7
-rw-r--r--arch/arm64/kernel/smp.c3
-rw-r--r--arch/arm64/kernel/sys32.c1
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S17
-rw-r--r--arch/arm64/kvm/hyp-init.S25
-rw-r--r--arch/arm64/mm/mmu.c12
-rw-r--r--arch/arm64/mm/pageattr.c2
-rw-r--r--arch/arm64/mm/proc-macros.S10
-rw-r--r--arch/arm64/mm/proc.S3
50 files changed, 858 insertions, 701 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 34f487d5d84e..b8d96f1554af 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -368,6 +368,27 @@ config ARM64_ERRATUM_832075
If unsure, say Y.
+config ARM64_ERRATUM_845719
+ bool "Cortex-A53: 845719: a load might read incorrect data"
+ depends on COMPAT
+ default y
+ help
+ This option adds an alternative code sequence to work around ARM
+ erratum 845719 on Cortex-A53 parts up to r0p4.
+
+ When running a compat (AArch32) userspace on an affected Cortex-A53
+ part, a load at EL0 from a virtual address that matches the bottom 32
+ bits of the virtual address used by a recent load at (AArch64) EL1
+ might return incorrect data.
+
+ The workaround is to write the contextidr_el1 register on exception
+ return to a 32-bit task.
+ Please note that this does not necessarily enable the workaround,
+ as it depends on the alternative framework, which will only patch
+ the kernel if an affected CPU is detected.
+
+ If unsure, say Y.
+
endmenu
@@ -455,8 +476,8 @@ config SCHED_SMT
places. If unsure say N here.
config NR_CPUS
- int "Maximum number of CPUs (2-64)"
- range 2 64
+ int "Maximum number of CPUs (2-4096)"
+ range 2 4096
depends on SMP
# These have to remain sorted largest to smallest
default "64"
@@ -470,6 +491,10 @@ config HOTPLUG_CPU
source kernel/Kconfig.preempt
+config UP_LATE_INIT
+ def_bool y
+ depends on !SMP
+
config HZ
int
default 100
@@ -670,7 +695,7 @@ source "fs/Kconfig.binfmt"
config COMPAT
bool "Kernel support for 32-bit EL0"
- depends on !ARM64_64K_PAGES
+ depends on !ARM64_64K_PAGES || EXPERT
select COMPAT_BINFMT_ELF
select HAVE_UID16
select OLD_SIGSUSPEND3
@@ -681,6 +706,10 @@ config COMPAT
the user helper functions, VFP support and the ptrace interface are
handled appropriately by the kernel.
+ If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
+ will only be able to execute AArch32 binaries that were compiled with
+ 64k aligned segments.
+
If you want to execute 32-bit userspace applications, say Y.
config SYSVIPC_COMPAT
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 69ceedc982a5..4d2a925998f9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -48,7 +48,7 @@ core-$(CONFIG_KVM) += arch/arm64/kvm/
core-$(CONFIG_XEN) += arch/arm64/xen/
core-$(CONFIG_CRYPTO) += arch/arm64/crypto/
libs-y := arch/arm64/lib/ $(libs-y)
-libs-$(CONFIG_EFI_STUB) += drivers/firmware/efi/libstub/
+core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
# Default target when executing plain make
KBUILD_IMAGE := Image.gz
diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
index 2e25de0800b9..83578e766b94 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -45,6 +45,10 @@
status = "ok";
};
+&sgenet1 {
+ status = "ok";
+};
+
&xgenet {
status = "ok";
};
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index a857794432d6..e74f6e0a208c 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -186,6 +186,16 @@
clock-output-names = "sge0clk";
};
+ sge1clk: sge1clk@1f21c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f21c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ csr-mask = <0xc>;
+ clock-output-names = "sge1clk";
+ };
+
xge0clk: xge0clk@1f61c000 {
compatible = "apm,xgene-device-clock";
#clock-cells = <1>;
@@ -628,13 +638,30 @@
<0x0 0x1f200000 0x0 0Xc300>,
<0x0 0x1B000000 0x0 0X200>;
reg-names = "enet_csr", "ring_csr", "ring_cmd";
- interrupts = <0x0 0xA0 0x4>;
+ interrupts = <0x0 0xA0 0x4>,
+ <0x0 0xA1 0x4>;
dma-coherent;
clocks = <&sge0clk 0>;
local-mac-address = [00 00 00 00 00 00];
phy-connection-type = "sgmii";
};
+ sgenet1: ethernet@1f210030 {
+ compatible = "apm,xgene1-sgenet";
+ status = "disabled";
+ reg = <0x0 0x1f210030 0x0 0xd100>,
+ <0x0 0x1f200000 0x0 0Xc300>,
+ <0x0 0x1B000000 0x0 0X8000>;
+ reg-names = "enet_csr", "ring_csr", "ring_cmd";
+ interrupts = <0x0 0xAC 0x4>,
+ <0x0 0xAD 0x4>;
+ port-id = <1>;
+ dma-coherent;
+ clocks = <&sge1clk 0>;
+ local-mac-address = [00 00 00 00 00 00];
+ phy-connection-type = "sgmii";
+ };
+
xgenet: ethernet@1f610000 {
compatible = "apm,xgene1-xgenet";
status = "disabled";
@@ -642,7 +669,8 @@
<0x0 0x1f600000 0x0 0Xc300>,
<0x0 0x18000000 0x0 0X200>;
reg-names = "enet_csr", "ring_csr", "ring_cmd";
- interrupts = <0x0 0x60 0x4>;
+ interrupts = <0x0 0x60 0x4>,
+ <0x0 0x61 0x4>;
dma-coherent;
clocks = <&xge0clk 0>;
/* mac address will be overwritten by the bootloader */
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index af6a452b1aac..4e03d8dd23f6 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -31,8 +31,12 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_ARCH_EXYNOS7=y
CONFIG_ARCH_FSL_LS2085A=y
CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_SEATTLE=y
+CONFIG_ARCH_TEGRA=y
+CONFIG_ARCH_TEGRA_132_SOC=y
CONFIG_ARCH_THUNDER=y
CONFIG_ARCH_VEXPRESS=y
CONFIG_ARCH_XGENE=y
@@ -62,6 +66,7 @@ CONFIG_BPF_JIT=y
# CONFIG_WIRELESS is not set
CONFIG_NET_9P=y
CONFIG_NET_9P_VIRTIO=y
+# CONFIG_TEGRA_AHB is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -81,6 +86,7 @@ CONFIG_NETDEVICES=y
CONFIG_TUN=y
CONFIG_VIRTIO_NET=y
CONFIG_NET_XGENE=y
+CONFIG_SKY2=y
CONFIG_SMC91X=y
CONFIG_SMSC911X=y
# CONFIG_WLAN is not set
@@ -100,6 +106,8 @@ CONFIG_SPI=y
CONFIG_SPI_PL022=y
CONFIG_GPIO_PL061=y
CONFIG_GPIO_XGENE=y
+CONFIG_POWER_RESET_XGENE=y
+CONFIG_POWER_RESET_SYSCON=y
# CONFIG_HWMON is not set
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
@@ -112,10 +120,10 @@ CONFIG_LOGO=y
CONFIG_USB=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
CONFIG_USB_ULPI=y
CONFIG_MMC=y
CONFIG_MMC_ARMMMCI=y
@@ -125,6 +133,7 @@ CONFIG_MMC_SPI=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_EFI=y
CONFIG_RTC_DRV_XGENE=y
+CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_MMIO=y
# CONFIG_IOMMU_SUPPORT is not set
@@ -143,8 +152,10 @@ CONFIG_CUSE=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
+CONFIG_EFIVAR_FS=y
# CONFIG_MISC_FILESYSTEMS is not set
CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_9P_FS=y
CONFIG_NLS_CODEPAGE_437=y
@@ -159,7 +170,6 @@ CONFIG_LOCKUP_DETECTOR=y
# CONFIG_SCHED_DEBUG is not set
# CONFIG_DEBUG_PREEMPT is not set
# CONFIG_FTRACE is not set
-CONFIG_KEYS=y
CONFIG_SECURITY=y
CONFIG_CRYPTO_ANSI_CPRNG=y
CONFIG_ARM64_CRYPTO=y
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index 432e4841cd81..a2a7fbcacc14 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -101,19 +101,19 @@ ENTRY(ce_aes_ccm_final)
0: mov v4.16b, v3.16b
1: ld1 {v5.2d}, [x2], #16 /* load next round key */
aese v0.16b, v4.16b
- aese v1.16b, v4.16b
aesmc v0.16b, v0.16b
+ aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
2: ld1 {v3.2d}, [x2], #16 /* load next round key */
aese v0.16b, v5.16b
- aese v1.16b, v5.16b
aesmc v0.16b, v0.16b
+ aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
3: ld1 {v4.2d}, [x2], #16 /* load next round key */
subs w3, w3, #3
aese v0.16b, v3.16b
- aese v1.16b, v3.16b
aesmc v0.16b, v0.16b
+ aese v1.16b, v3.16b
aesmc v1.16b, v1.16b
bpl 1b
aese v0.16b, v4.16b
@@ -146,19 +146,19 @@ ENDPROC(ce_aes_ccm_final)
ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
2: /* inner loop: 3 rounds, 2x interleaved */
aese v0.16b, v4.16b
- aese v1.16b, v4.16b
aesmc v0.16b, v0.16b
+ aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
3: ld1 {v3.2d}, [x10], #16 /* load next round key */
aese v0.16b, v5.16b
- aese v1.16b, v5.16b
aesmc v0.16b, v0.16b
+ aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
4: ld1 {v4.2d}, [x10], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
- aese v1.16b, v3.16b
aesmc v0.16b, v0.16b
+ aese v1.16b, v3.16b
aesmc v1.16b, v1.16b
ld1 {v5.2d}, [x10], #16 /* load next round key */
bpl 2b
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 685a18f731eb..78f3cfe92c08 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -45,18 +45,14 @@
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
aes\de \i0\().16b, \k\().16b
- .ifnb \i1
- aes\de \i1\().16b, \k\().16b
- .ifnb \i3
- aes\de \i2\().16b, \k\().16b
- aes\de \i3\().16b, \k\().16b
- .endif
- .endif
aes\mc \i0\().16b, \i0\().16b
.ifnb \i1
+ aes\de \i1\().16b, \k\().16b
aes\mc \i1\().16b, \i1\().16b
.ifnb \i3
+ aes\de \i2\().16b, \k\().16b
aes\mc \i2\().16b, \i2\().16b
+ aes\de \i3\().16b, \k\().16b
aes\mc \i3\().16b, \i3\().16b
.endif
.endif
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index b1b5b893eb20..05d9e16c0dfd 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__ecb-aes-" MODE,
.cra_driver_name = "__driver-ecb-aes-" MODE,
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__cbc-aes-" MODE,
.cra_driver_name = "__driver-cbc-aes-" MODE,
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__ctr-aes-" MODE,
.cra_driver_name = "__driver-ctr-aes-" MODE,
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__xts-aes-" MODE,
.cra_driver_name = "__driver-xts-aes-" MODE,
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 09d57d98609c..033aae6d732a 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -66,8 +66,8 @@
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
/*
- * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
- * u8 *head, long bytes)
+ * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+ * int blocks)
*/
ENTRY(sha1_ce_transform)
/* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
ld1r {k3.4s}, [x6]
/* load state */
- ldr dga, [x2]
- ldr dgb, [x2, #16]
+ ldr dga, [x0]
+ ldr dgb, [x0, #16]
- /* load partial state (if supplied) */
- cbz x3, 0f
- ld1 {v8.4s-v11.4s}, [x3]
- b 1f
+ /* load sha1_ce_state::finalize */
+ ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
/* load input */
0: ld1 {v8.4s-v11.4s}, [x1], #64
- sub w0, w0, #1
+ sub w2, w2, #1
-1:
CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b )
-2: add t0.4s, v8.4s, k0.4s
+1: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b
add_update c, ev, k0, 8, 9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s
- cbnz w0, 0b
+ cbnz w2, 0b
/*
* Final block: add padding and total bit count.
- * Skip if we have no total byte count in x4. In that case, the input
- * size was not a round multiple of the block size, and the padding is
- * handled by the C code.
+ * Skip if the input size was not a round multiple of the block size,
+ * the padding is handled by the C code in that case.
*/
cbz x4, 3f
+ ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b )
mov x4, #0
mov v11.d[0], xzr
mov v11.d[1], x7
- b 2b
+ b 1b
/* store new state */
-3: str dga, [x2]
- str dgb, [x2, #16]
+3: str dga, [x0]
+ str dgb, [x0, #16]
ret
ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 6fe83f37a750..114e7cc5de8c 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,144 +12,81 @@
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
+#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
+#define ASM_EXPORT(sym, val) \
+ asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
-asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
- u8 *head, long bytes);
+struct sha1_ce_state {
+ struct sha1_state sst;
+ u32 finalize;
+};
-static int sha1_init(struct shash_desc *desc)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
+asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+ int blocks);
- *sctx = (struct sha1_state){
- .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
- };
- return 0;
-}
-
-static int sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
- sctx->count += len;
-
- if ((partial + len) >= SHA1_BLOCK_SIZE) {
- int blocks;
-
- if (partial) {
- int p = SHA1_BLOCK_SIZE - partial;
+ struct sha1_ce_state *sctx = shash_desc_ctx(desc);
- memcpy(sctx->buffer + partial, data, p);
- data += p;
- len -= p;
- }
-
- blocks = len / SHA1_BLOCK_SIZE;
- len %= SHA1_BLOCK_SIZE;
-
- kernel_neon_begin_partial(16);
- sha1_ce_transform(blocks, data, sctx->state,
- partial ? sctx->buffer : NULL, 0);
- kernel_neon_end();
+ sctx->finalize = 0;
+ kernel_neon_begin_partial(16);
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_ce_transform);
+ kernel_neon_end();
- data += blocks * SHA1_BLOCK_SIZE;
- partial = 0;
- }
- if (len)
- memcpy(sctx->buffer + partial, data, len);
return 0;
}
-static int sha1_final(struct shash_desc *desc, u8 *out)
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+ struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+ bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be64 bits = cpu_to_be64(sctx->count << 3);
- __be32 *dst = (__be32 *)out;
- int i;
-
- u32 padlen = SHA1_BLOCK_SIZE
- - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
-
- sha1_update(desc, padding, padlen);
- sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
- for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha1_state){};
- return 0;
-}
-
-static int sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int blocks;
- int i;
-
- if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
- sha1_update(desc, data, len);
- return sha1_final(desc, out);
- }
+ ASM_EXPORT(sha1_ce_offsetof_count,
+ offsetof(struct sha1_ce_state, sst.count));
+ ASM_EXPORT(sha1_ce_offsetof_finalize,
+ offsetof(struct sha1_ce_state, finalize));
/*
- * Use a fast path if the input is a multiple of 64 bytes. In
- * this case, there is no need to copy data around, and we can
- * perform the entire digest calculation in a single invocation
- * of sha1_ce_transform()
+ * Allow the asm code to perform the finalization if there is no
+ * partial data and the input is a round multiple of the block size.
*/
- blocks = len / SHA1_BLOCK_SIZE;
+ sctx->finalize = finalize;
kernel_neon_begin_partial(16);
- sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_ce_transform);
+ if (!finalize)
+ sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
-
- for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha1_state){};
- return 0;
+ return sha1_base_finish(desc, out);
}
-static int sha1_export(struct shash_desc *desc, void *out)
+static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- struct sha1_state *dst = out;
-
- *dst = *sctx;
- return 0;
-}
-
-static int sha1_import(struct shash_desc *desc, const void *in)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- struct sha1_state const *src = in;
-
- *sctx = *src;
- return 0;
+ kernel_neon_begin_partial(16);
+ sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
+ kernel_neon_end();
+ return sha1_base_finish(desc, out);
}
static struct shash_alg alg = {
- .init = sha1_init,
- .update = sha1_update,
- .final = sha1_final,
- .finup = sha1_finup,
- .export = sha1_export,
- .import = sha1_import,
- .descsize = sizeof(struct sha1_state),
+ .init = sha1_base_init,
+ .update = sha1_ce_update,
+ .final = sha1_ce_final,
+ .finup = sha1_ce_finup,
+ .descsize = sizeof(struct sha1_ce_state),
.digestsize = SHA1_DIGEST_SIZE,
- .statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 7f29fc031ea8..5df9d9d470ad 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -73,8 +73,8 @@
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
/*
- * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
- * u8 *head, long bytes)
+ * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+ * int blocks)
*/
ENTRY(sha2_ce_transform)
/* load round constants */
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform)
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
- ldp dga, dgb, [x2]
+ ldp dga, dgb, [x0]
- /* load partial input (if supplied) */
- cbz x3, 0f
- ld1 {v16.4s-v19.4s}, [x3]
- b 1f
+ /* load sha256_ce_state::finalize */
+ ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
/* load input */
0: ld1 {v16.4s-v19.4s}, [x1], #64
- sub w0, w0, #1
+ sub w2, w2, #1
-1:
CPU_LE( rev32 v16.16b, v16.16b )
CPU_LE( rev32 v17.16b, v17.16b )
CPU_LE( rev32 v18.16b, v18.16b )
CPU_LE( rev32 v19.16b, v19.16b )
-2: add t0.4s, v16.4s, v0.4s
+1: add t0.4s, v16.4s, v0.4s
mov dg0v.16b, dgav.16b
mov dg1v.16b, dgbv.16b
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b )
add dgbv.4s, dgbv.4s, dg1v.4s
/* handled all input blocks? */
- cbnz w0, 0b
+ cbnz w2, 0b
/*
* Final block: add padding and total bit count.
- * Skip if we have no total byte count in x4. In that case, the input
- * size was not a round multiple of the block size, and the padding is
- * handled by the C code.
+ * Skip if the input size was not a round multiple of the block size,
+ * the padding is handled by the C code in that case.
*/
cbz x4, 3f
+ ldr x4, [x0, #:lo12:sha256_ce_offsetof_count]
movi v17.2d, #0
mov x8, #0x80000000
movi v18.2d, #0
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b )
mov x4, #0
mov v19.d[0], xzr
mov v19.d[1], x7
- b 2b
+ b 1b
/* store new state */
-3: stp dga, dgb, [x2]
+3: stp dga, dgb, [x0]
ret
ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index ae67e88c28b9..1340e44c048b 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -12,206 +12,82 @@
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
+#define ASM_EXPORT(sym, val) \
+ asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
-asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
- u8 *head, long bytes);
-
-static int sha224_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha256_state){
- .state = {
- SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
- SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
- }
- };
- return 0;
-}
-
-static int sha256_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha256_state){
- .state = {
- SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
- SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
- }
- };
- return 0;
-}
-
-static int sha2_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
- sctx->count += len;
-
- if ((partial + len) >= SHA256_BLOCK_SIZE) {
- int blocks;
-
- if (partial) {
- int p = SHA256_BLOCK_SIZE - partial;
-
- memcpy(sctx->buf + partial, data, p);
- data += p;
- len -= p;
- }
+struct sha256_ce_state {
+ struct sha256_state sst;
+ u32 finalize;
+};
- blocks = len / SHA256_BLOCK_SIZE;
- len %= SHA256_BLOCK_SIZE;
+asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+ int blocks);
- kernel_neon_begin_partial(28);
- sha2_ce_transform(blocks, data, sctx->state,
- partial ? sctx->buf : NULL, 0);
- kernel_neon_end();
-
- data += blocks * SHA256_BLOCK_SIZE;
- partial = 0;
- }
- if (len)
- memcpy(sctx->buf + partial, data, len);
- return 0;
-}
-
-static void sha2_final(struct shash_desc *desc)
+static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be64 bits = cpu_to_be64(sctx->count << 3);
- u32 padlen = SHA256_BLOCK_SIZE
- - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
-
- sha2_update(desc, padding, padlen);
- sha2_update(desc, (const u8 *)&bits, sizeof(bits));
-}
-
-static int sha224_final(struct shash_desc *desc, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int i;
-
- sha2_final(desc);
-
- for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha256_state){};
- return 0;
-}
+ struct sha256_ce_state *sctx = shash_desc_ctx(desc);
-static int sha256_final(struct shash_desc *desc, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int i;
-
- sha2_final(desc);
-
- for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
+ sctx->finalize = 0;
+ kernel_neon_begin_partial(28);
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha2_ce_transform);
+ kernel_neon_end();
- *sctx = (struct sha256_state){};
return 0;
}
-static void sha2_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- int blocks;
+ struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+ bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
- if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
- sha2_update(desc, data, len);
- sha2_final(desc);
- return;
- }
+ ASM_EXPORT(sha256_ce_offsetof_count,
+ offsetof(struct sha256_ce_state, sst.count));
+ ASM_EXPORT(sha256_ce_offsetof_finalize,
+ offsetof(struct sha256_ce_state, finalize));
/*
- * Use a fast path if the input is a multiple of 64 bytes. In
- * this case, there is no need to copy data around, and we can
- * perform the entire digest calculation in a single invocation
- * of sha2_ce_transform()
+ * Allow the asm code to perform the finalization if there is no
+ * partial data and the input is a round multiple of the block size.
*/
- blocks = len / SHA256_BLOCK_SIZE;
+ sctx->finalize = finalize;
kernel_neon_begin_partial(28);
- sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha2_ce_transform);
+ if (!finalize)
+ sha256_base_do_finalize(desc,
+ (sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
+ return sha256_base_finish(desc, out);
}
-static int sha224_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int sha256_ce_final(struct shash_desc *desc, u8 *out)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int i;
-
- sha2_finup(desc, data, len);
-
- for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha256_state){};
- return 0;
-}
-
-static int sha256_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int i;
-
- sha2_finup(desc, data, len);
-
- for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha256_state){};
- return 0;
-}
-
-static int sha2_export(struct shash_desc *desc, void *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- struct sha256_state *dst = out;
-
- *dst = *sctx;
- return 0;
-}
-
-static int sha2_import(struct shash_desc *desc, const void *in)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- struct sha256_state const *src = in;
-
- *sctx = *src;
- return 0;
+ kernel_neon_begin_partial(28);
+ sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
+ kernel_neon_end();
+ return sha256_base_finish(desc, out);
}
static struct shash_alg algs[] = { {
- .init = sha224_init,
- .update = sha2_update,
- .final = sha224_final,
- .finup = sha224_finup,
- .export = sha2_export,
- .import = sha2_import,
- .descsize = sizeof(struct sha256_state),
+ .init = sha224_base_init,
+ .update = sha256_ce_update,
+ .final = sha256_ce_final,
+ .finup = sha256_ce_finup,
+ .descsize = sizeof(struct sha256_ce_state),
.digestsize = SHA224_DIGEST_SIZE,
- .statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { {
.cra_module = THIS_MODULE,
}
}, {
- .init = sha256_init,
- .update = sha2_update,
- .final = sha256_final,
- .finup = sha256_finup,
- .export = sha2_export,
- .import = sha2_import,
- .descsize = sizeof(struct sha256_state),
+ .init = sha256_base_init,
+ .update = sha256_ce_update,
+ .final = sha256_ce_final,
+ .finup = sha256_ce_finup,
+ .descsize = sizeof(struct sha256_ce_state),
.digestsize = SHA256_DIGEST_SIZE,
- .statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 750bac4e637e..144b64ad96c3 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -159,4 +159,52 @@ lr .req x30 // link register
orr \rd, \lbits, \hbits, lsl #32
.endm
+/*
+ * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
+ * <symbol> is within the range +/- 4 GB of the PC.
+ */
+ /*
+ * @dst: destination register (64 bit wide)
+ * @sym: name of the symbol
+ * @tmp: optional scratch register to be used if <dst> == sp, which
+ * is not allowed in an adrp instruction
+ */
+ .macro adr_l, dst, sym, tmp=
+ .ifb \tmp
+ adrp \dst, \sym
+ add \dst, \dst, :lo12:\sym
+ .else
+ adrp \tmp, \sym
+ add \dst, \tmp, :lo12:\sym
+ .endif
+ .endm
+
+ /*
+ * @dst: destination register (32 or 64 bit wide)
+ * @sym: name of the symbol
+ * @tmp: optional 64-bit scratch register to be used if <dst> is a
+ * 32-bit wide register, in which case it cannot be used to hold
+ * the address
+ */
+ .macro ldr_l, dst, sym, tmp=
+ .ifb \tmp
+ adrp \dst, \sym
+ ldr \dst, [\dst, :lo12:\sym]
+ .else
+ adrp \tmp, \sym
+ ldr \dst, [\tmp, :lo12:\sym]
+ .endif
+ .endm
+
+ /*
+ * @src: source register (32 or 64 bit wide)
+ * @sym: name of the symbol
+ * @tmp: mandatory 64-bit scratch register to calculate the address
+ * while <src> needs to be preserved.
+ */
+ .macro str_l, src, sym, tmp
+ adrp \tmp, \sym
+ str \src, [\tmp, :lo12:\sym]
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b6c16d5f622f..82cb9f98ba1a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -23,11 +23,24 @@
#define ARM64_WORKAROUND_CLEAN_CACHE 0
#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1
+#define ARM64_WORKAROUND_845719 2
-#define ARM64_NCAPS 2
+#define ARM64_NCAPS 3
#ifndef __ASSEMBLY__
+struct arm64_cpu_capabilities {
+ const char *desc;
+ u16 capability;
+ bool (*matches)(const struct arm64_cpu_capabilities *);
+ union {
+ struct { /* To be used for erratum handling only */
+ u32 midr_model;
+ u32 midr_range_min, midr_range_max;
+ };
+ };
+};
+
extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
static inline bool cpu_have_feature(unsigned int num)
@@ -51,7 +64,10 @@ static inline void cpus_set_cap(unsigned int num)
__set_bit(num, cpu_hwcaps);
}
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+ const char *info);
void check_local_cpu_errata(void);
+void check_local_cpu_features(void);
bool cpu_supports_mixed_endian_el0(void);
bool system_supports_mixed_endian_el0(void);
diff --git a/arch/arm64/include/asm/cputable.h b/arch/arm64/include/asm/cputable.h
deleted file mode 100644
index e3bd983d3661..000000000000
--- a/arch/arm64/include/asm/cputable.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * arch/arm64/include/asm/cputable.h
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_CPUTABLE_H
-#define __ASM_CPUTABLE_H
-
-struct cpu_info {
- unsigned int cpu_id_val;
- unsigned int cpu_id_mask;
- const char *cpu_name;
- unsigned long (*cpu_setup)(void);
-};
-
-extern struct cpu_info *lookup_processor_type(unsigned int);
-
-#endif
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 6932bb57dba0..9437e3dc5833 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -97,7 +97,7 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
if (!dev->dma_mask)
- return 0;
+ return false;
return addr + size - 1 <= *dev->dma_mask;
}
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index defa0ff98250..926495686554 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -33,6 +33,7 @@
enum fixed_addresses {
FIX_HOLE,
FIX_EARLYCON_MEM_BASE,
+ FIX_TEXT_POKE0,
__end_of_permanent_fixed_addresses,
/*
@@ -49,7 +50,6 @@ enum fixed_addresses {
FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
- FIX_TEXT_POKE0,
__end_of_fixed_addresses
};
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index d2f49423c5dc..f81b328d9cf4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -285,6 +285,7 @@ bool aarch64_insn_is_nop(u32 insn);
int aarch64_insn_read(void *addr, u32 *insnp);
int aarch64_insn_write(void *addr, u32 insn);
enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
u32 insn, u64 imm);
u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 36250705dc4c..61505676d085 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -68,6 +68,8 @@
#include <asm/pgalloc.h>
#include <asm/cachetype.h>
#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
@@ -269,5 +271,36 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
void kvm_set_way_flush(struct kvm_vcpu *vcpu);
void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
+static inline bool __kvm_cpu_uses_extended_idmap(void)
+{
+ return __cpu_uses_extended_idmap();
+}
+
+static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
+ pgd_t *hyp_pgd,
+ pgd_t *merged_hyp_pgd,
+ unsigned long hyp_idmap_start)
+{
+ int idmap_idx;
+
+ /*
+ * Use the first entry to access the HYP mappings. It is
+ * guaranteed to be free, otherwise we wouldn't use an
+ * extended idmap.
+ */
+ VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
+ merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE);
+
+ /*
+ * Create another extended level entry that points to the boot HYP map,
+ * which contains an ID mapping of the HYP init code. We essentially
+ * merge the boot and runtime HYP maps by doing so, but they don't
+ * overlap anyway, so this is fine.
+ */
+ idmap_idx = hyp_idmap_start >> VA_BITS;
+ VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
+ merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 101a42bde728..8ec41e5f56f0 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void)
: "r" (ttbr));
}
+/*
+ * TCR.T0SZ value to use when the ID map is active. Usually equals
+ * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
+ * physical memory, in which case it will be smaller.
+ */
+extern u64 idmap_t0sz;
+
+static inline bool __cpu_uses_extended_idmap(void)
+{
+ return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
+ unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
+}
+
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+ unsigned long tcr;
+
+ if (__cpu_uses_extended_idmap())
+ asm volatile (
+ " mrs %0, tcr_el1 ;"
+ " bfi %0, %1, %2, %3 ;"
+ " msr tcr_el1, %0 ;"
+ " isb"
+ : "=&r" (tcr)
+ : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+ __cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
+/*
+ * Set TCR.T0SZ to its default value (based on VA_BITS)
+ */
+static inline void cpu_set_default_tcr_t0sz(void)
+{
+ __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+}
+
static inline void switch_new_context(struct mm_struct *mm)
{
unsigned long flags;
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 8fc8fa280e92..7d9c7e4a424b 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,7 +33,9 @@
* image. Both require pgd, pud (4 levels only) and pmd tables to (section)
* map the kernel. With the 64K page configuration, swapper and idmap need to
* map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information).
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
*/
#ifdef CONFIG_ARM64_64K_PAGES
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
@@ -42,7 +44,7 @@
#endif
#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE (SWAPPER_DIR_SIZE)
+#define IDMAP_DIR_SIZE (3 * PAGE_SIZE)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 80f3d241cff8..59bfae75dc98 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -143,7 +143,12 @@
/*
* TCR flags.
*/
-#define TCR_TxSZ(x) (((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
+#define TCR_T0SZ_OFFSET 0
+#define TCR_T1SZ_OFFSET 16
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ(x) ((UL(64) - (x)) << TCR_T1SZ_OFFSET)
+#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x))
+#define TCR_TxSZ_WIDTH 6
#define TCR_IRGN_NC ((UL(0) << 8) | (UL(0) << 24))
#define TCR_IRGN_WBWA ((UL(1) << 8) | (UL(1) << 24))
#define TCR_IRGN_WT ((UL(2) << 8) | (UL(2) << 24))
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index e6f087806aaf..b7710a59672c 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -44,6 +44,7 @@ struct pmu_hw_events {
struct arm_pmu {
struct pmu pmu;
cpumask_t active_irqs;
+ int *irq_affinity;
const char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct hw_perf_event *evt, int idx);
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 941c375616e2..220633b791b8 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -45,15 +45,6 @@ do { \
cpu_do_switch_mm(virt_to_phys(pgd),mm); \
} while (0)
-#define cpu_get_pgd() \
-({ \
- unsigned long pg; \
- asm("mrs %0, ttbr0_el1\n" \
- : "=r" (pg)); \
- pg &= ~0xffff000000003ffful; \
- (pgd_t *)phys_to_virt(pg); \
-})
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 20e9591a60cf..d2c37a1df0eb 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -127,7 +127,11 @@ extern void release_thread(struct task_struct *);
unsigned long get_wchan(struct task_struct *p);
-#define cpu_relax() barrier()
+static inline void cpu_relax(void)
+{
+ asm volatile("yield" ::: "memory");
+}
+
#define cpu_relax_lowlatency() cpu_relax()
/* Thread switching */
diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h
index 59e282311b58..8dcd61e32176 100644
--- a/arch/arm64/include/asm/smp_plat.h
+++ b/arch/arm64/include/asm/smp_plat.h
@@ -40,4 +40,6 @@ static inline u32 mpidr_hash_size(void)
extern u64 __cpu_logical_map[NR_CPUS];
#define cpu_logical_map(cpu) __cpu_logical_map[cpu]
+void __init do_post_cpus_up_work(void);
+
#endif /* __ASM_SMP_PLAT_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 702e1e6a0d80..dcd06d18a42a 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -33,7 +33,6 @@
#ifndef __ASSEMBLY__
struct task_struct;
-struct exec_domain;
#include <asm/types.h>
@@ -47,7 +46,6 @@ struct thread_info {
unsigned long flags; /* low level flags */
mm_segment_t addr_limit; /* address limit */
struct task_struct *task; /* main task structure */
- struct exec_domain *exec_domain; /* execution domain */
int preempt_count; /* 0 => preemptable, <0 => bug */
int cpu; /* cpu */
};
@@ -55,7 +53,6 @@ struct thread_info {
#define INIT_THREAD_INFO(tsk) \
{ \
.task = &tsk, \
- .exec_domain = &default_exec_domain, \
.flags = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 27224426e0bf..cef934a90f17 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -406,7 +406,7 @@ __SYSCALL(__NR_vfork, sys_vfork)
#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit) /* SuS compliant getrlimit */
#define __NR_mmap2 192
-__SYSCALL(__NR_mmap2, sys_mmap_pgoff)
+__SYSCALL(__NR_mmap2, compat_sys_mmap2_wrapper)
#define __NR_truncate64 193
__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper)
#define __NR_ftruncate64 194
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 5ee07eee80c2..b12e15b80516 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -12,12 +12,12 @@ CFLAGS_REMOVE_insn.o = -pg
CFLAGS_REMOVE_return_address.o = -pg
# Object file lists.
-arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
+arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
sys.o stacktrace.o time.o traps.o io.o vdso.o \
hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o \
return_address.o cpuinfo.o cpu_errata.o \
- alternative.o cacheinfo.o
+ cpufeature.o alternative.o cacheinfo.o
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o entry32.o \
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index ad7821d64a1d..21033bba9390 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -24,6 +24,7 @@
#include <asm/cacheflush.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
+#include <asm/insn.h>
#include <linux/stop_machine.h>
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
@@ -33,6 +34,48 @@ struct alt_region {
struct alt_instr *end;
};
+/*
+ * Decode the imm field of a b/bl instruction, and return the byte
+ * offset as a signed value (so it can be used when computing a new
+ * branch target).
+ */
+static s32 get_branch_offset(u32 insn)
+{
+ s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
+
+ /* sign-extend the immediate before turning it into a byte offset */
+ return (imm << 6) >> 4;
+}
+
+static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr)
+{
+ u32 insn;
+
+ aarch64_insn_read(altinsnptr, &insn);
+
+ /* Stop the world on instructions we don't support... */
+ BUG_ON(aarch64_insn_is_cbz(insn));
+ BUG_ON(aarch64_insn_is_cbnz(insn));
+ BUG_ON(aarch64_insn_is_bcond(insn));
+ /* ... and there is probably more. */
+
+ if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
+ enum aarch64_insn_branch_type type;
+ unsigned long target;
+
+ if (aarch64_insn_is_b(insn))
+ type = AARCH64_INSN_BRANCH_NOLINK;
+ else
+ type = AARCH64_INSN_BRANCH_LINK;
+
+ target = (unsigned long)altinsnptr + get_branch_offset(insn);
+ insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr,
+ target, type);
+ }
+
+ return insn;
+}
+
static int __apply_alternatives(void *alt_region)
{
struct alt_instr *alt;
@@ -40,16 +83,24 @@ static int __apply_alternatives(void *alt_region)
u8 *origptr, *replptr;
for (alt = region->begin; alt < region->end; alt++) {
+ u32 insn;
+ int i;
+
if (!cpus_have_cap(alt->cpufeature))
continue;
- BUG_ON(alt->alt_len > alt->orig_len);
+ BUG_ON(alt->alt_len != alt->orig_len);
pr_info_once("patching kernel code\n");
origptr = (u8 *)&alt->orig_offset + alt->orig_offset;
replptr = (u8 *)&alt->alt_offset + alt->alt_offset;
- memcpy(origptr, replptr, alt->alt_len);
+
+ for (i = 0; i < alt->alt_len; i += sizeof(insn)) {
+ insn = get_alt_insn(origptr + i, replptr + i);
+ aarch64_insn_write(origptr + i, insn);
+ }
+
flush_icache_range((uintptr_t)origptr,
(uintptr_t)(origptr + alt->alt_len));
}
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f7fa65d4c352..da675cc5dfae 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -24,7 +24,6 @@
#include <linux/kvm_host.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
-#include <asm/cputable.h>
#include <asm/smp_plat.h>
#include <asm/suspend.h>
#include <asm/vdso_datapage.h>
@@ -38,7 +37,6 @@ int main(void)
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
- DEFINE(TI_EXEC_DOMAIN, offsetof(struct thread_info, exec_domain));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
@@ -71,9 +69,6 @@ int main(void)
BLANK();
DEFINE(PAGE_SZ, PAGE_SIZE);
BLANK();
- DEFINE(CPU_INFO_SZ, sizeof(struct cpu_info));
- DEFINE(CPU_INFO_SETUP, offsetof(struct cpu_info, cpu_setup));
- BLANK();
DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL);
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index fa62637e63a8..6ffd91438560 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -16,8 +16,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#define pr_fmt(fmt) "alternatives: " fmt
-
#include <linux/types.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
@@ -26,27 +24,11 @@
#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
-/*
- * Add a struct or another datatype to the union below if you need
- * different means to detect an affected CPU.
- */
-struct arm64_cpu_capabilities {
- const char *desc;
- u16 capability;
- bool (*is_affected)(struct arm64_cpu_capabilities *);
- union {
- struct {
- u32 midr_model;
- u32 midr_range_min, midr_range_max;
- };
- };
-};
-
#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
MIDR_ARCHITECTURE_MASK)
static bool __maybe_unused
-is_affected_midr_range(struct arm64_cpu_capabilities *entry)
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
{
u32 midr = read_cpuid_id();
@@ -59,12 +41,12 @@ is_affected_midr_range(struct arm64_cpu_capabilities *entry)
}
#define MIDR_RANGE(model, min, max) \
- .is_affected = is_affected_midr_range, \
+ .matches = is_affected_midr_range, \
.midr_model = model, \
.midr_range_min = min, \
.midr_range_max = max
-struct arm64_cpu_capabilities arm64_errata[] = {
+const struct arm64_cpu_capabilities arm64_errata[] = {
#if defined(CONFIG_ARM64_ERRATUM_826319) || \
defined(CONFIG_ARM64_ERRATUM_827319) || \
defined(CONFIG_ARM64_ERRATUM_824069)
@@ -88,7 +70,16 @@ struct arm64_cpu_capabilities arm64_errata[] = {
/* Cortex-A57 r0p0 - r1p2 */
.desc = "ARM erratum 832075",
.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
- MIDR_RANGE(MIDR_CORTEX_A57, 0x00, 0x12),
+ MIDR_RANGE(MIDR_CORTEX_A57, 0x00,
+ (1 << MIDR_VARIANT_SHIFT) | 2),
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_845719
+ {
+ /* Cortex-A53 r0p[01234] */
+ .desc = "ARM erratum 845719",
+ .capability = ARM64_WORKAROUND_845719,
+ MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04),
},
#endif
{
@@ -97,15 +88,5 @@ struct arm64_cpu_capabilities arm64_errata[] = {
void check_local_cpu_errata(void)
{
- struct arm64_cpu_capabilities *cpus = arm64_errata;
- int i;
-
- for (i = 0; cpus[i].desc; i++) {
- if (!cpus[i].is_affected(&cpus[i]))
- continue;
-
- if (!cpus_have_cap(cpus[i].capability))
- pr_info("enabling workaround for %s\n", cpus[i].desc);
- cpus_set_cap(cpus[i].capability);
- }
+ check_cpu_capabilities(arm64_errata, "enabling workaround for");
}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
new file mode 100644
index 000000000000..3d9967e43d89
--- /dev/null
+++ b/arch/arm64/kernel/cpufeature.c
@@ -0,0 +1,47 @@
+/*
+ * Contains CPU feature definitions
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "alternatives: " fmt
+
+#include <linux/types.h>
+#include <asm/cpu.h>
+#include <asm/cpufeature.h>
+
+static const struct arm64_cpu_capabilities arm64_features[] = {
+ {},
+};
+
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+ const char *info)
+{
+ int i;
+
+ for (i = 0; caps[i].desc; i++) {
+ if (!caps[i].matches(&caps[i]))
+ continue;
+
+ if (!cpus_have_cap(caps[i].capability))
+ pr_info("%s %s\n", info, caps[i].desc);
+ cpus_set_cap(caps[i].capability);
+ }
+}
+
+void check_local_cpu_features(void)
+{
+ check_cpu_capabilities(arm64_features, "detected feature");
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 929855691dae..75d5a867e7fb 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -236,6 +236,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
cpuinfo_detect_icache_policy(info);
check_local_cpu_errata();
+ check_local_cpu_features();
update_cpu_features(info);
}
diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c
deleted file mode 100644
index fd3993cb060f..000000000000
--- a/arch/arm64/kernel/cputable.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * arch/arm64/kernel/cputable.c
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-
-#include <asm/cputable.h>
-
-extern unsigned long __cpu_setup(void);
-
-struct cpu_info cpu_table[] = {
- {
- .cpu_id_val = 0x000f0000,
- .cpu_id_mask = 0x000f0000,
- .cpu_name = "AArch64 Processor",
- .cpu_setup = __cpu_setup,
- },
- { /* Empty */ },
-};
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf21bb3bf752..959fe8733560 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -21,8 +21,10 @@
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
#include <asm/errno.h>
#include <asm/esr.h>
#include <asm/thread_info.h>
@@ -120,6 +122,24 @@
ct_user_enter
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
+
+#ifdef CONFIG_ARM64_ERRATUM_845719
+ alternative_insn \
+ "nop", \
+ "tbz x22, #4, 1f", \
+ ARM64_WORKAROUND_845719
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+ alternative_insn \
+ "nop; nop", \
+ "mrs x29, contextidr_el1; msr contextidr_el1, x29; 1:", \
+ ARM64_WORKAROUND_845719
+#else
+ alternative_insn \
+ "nop", \
+ "msr contextidr_el1, xzr; 1:", \
+ ARM64_WORKAROUND_845719
+#endif
+#endif
.endif
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
index 9a8f6ae2530e..bd9bfaa9269b 100644
--- a/arch/arm64/kernel/entry32.S
+++ b/arch/arm64/kernel/entry32.S
@@ -19,9 +19,12 @@
*/
#include <linux/linkage.h>
+#include <linux/const.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/page.h>
/*
* System call wrappers for the AArch32 compatibility layer.
@@ -54,6 +57,21 @@ ENTRY(compat_sys_fstatfs64_wrapper)
ENDPROC(compat_sys_fstatfs64_wrapper)
/*
+ * Note: off_4k (w5) is always in units of 4K. If we can't do the
+ * requested offset because it is not page-aligned, we return -EINVAL.
+ */
+ENTRY(compat_sys_mmap2_wrapper)
+#if PAGE_SHIFT > 12
+ tst w5, #~PAGE_MASK >> 12
+ b.ne 1f
+ lsr w5, w5, #PAGE_SHIFT - 12
+#endif
+ b sys_mmap_pgoff
+1: mov x0, #-EINVAL
+ ret
+ENDPROC(compat_sys_mmap2_wrapper)
+
+/*
* Wrappers for AArch32 syscalls that either take 64-bit parameters
* in registers or that take 32-bit parameters which require sign
* extension.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 07f930540f4a..19f915e8f6e0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -36,7 +36,7 @@
#include <asm/page.h>
#include <asm/virt.h>
-#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET)
+#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
#if (TEXT_OFFSET & 0xfff) != 0
#error TEXT_OFFSET must be at least 4KB aligned
@@ -46,13 +46,6 @@
#error TEXT_OFFSET must be less than 2MB
#endif
- .macro pgtbl, ttb0, ttb1, virt_to_phys
- ldr \ttb1, =swapper_pg_dir
- ldr \ttb0, =idmap_pg_dir
- add \ttb1, \ttb1, \virt_to_phys
- add \ttb0, \ttb0, \virt_to_phys
- .endm
-
#ifdef CONFIG_ARM64_64K_PAGES
#define BLOCK_SHIFT PAGE_SHIFT
#define BLOCK_SIZE PAGE_SIZE
@@ -63,7 +56,7 @@
#define TABLE_SHIFT PUD_SHIFT
#endif
-#define KERNEL_START KERNEL_RAM_VADDR
+#define KERNEL_START _text
#define KERNEL_END _end
/*
@@ -240,40 +233,43 @@ section_table:
#endif
ENTRY(stext)
- mov x21, x0 // x21=FDT
+ bl preserve_boot_args
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
- bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
+ adrp x24, __PHYS_OFFSET
bl set_cpu_boot_mode_flag
- mrs x22, midr_el1 // x22=cpuid
- mov x0, x22
- bl lookup_processor_type
- mov x23, x0 // x23=current cpu_table
- /*
- * __error_p may end up out of range for cbz if text areas are
- * aligned up to section sizes.
- */
- cbnz x23, 1f // invalid processor (x23=0)?
- b __error_p
-1:
+
bl __vet_fdt
bl __create_page_tables // x25=TTBR0, x26=TTBR1
/*
- * The following calls CPU specific code in a position independent
- * manner. See arch/arm64/mm/proc.S for details. x23 = base of
- * cpu_info structure selected by lookup_processor_type above.
+ * The following calls CPU setup code, see arch/arm64/mm/proc.S for
+ * details.
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
- ldr x27, __switch_data // address to jump to after
+ ldr x27, =__mmap_switched // address to jump to after
// MMU has been enabled
- adrp lr, __enable_mmu // return (PIC) address
- add lr, lr, #:lo12:__enable_mmu
- ldr x12, [x23, #CPU_INFO_SETUP]
- add x12, x12, x28 // __virt_to_phys
- br x12 // initialise processor
+ adr_l lr, __enable_mmu // return (PIC) address
+ b __cpu_setup // initialise processor
ENDPROC(stext)
/*
+ * Preserve the arguments passed by the bootloader in x0 .. x3
+ */
+preserve_boot_args:
+ mov x21, x0 // x21=FDT
+
+ adr_l x0, boot_args // record the contents of
+ stp x21, x1, [x0] // x0 .. x3 at kernel entry
+ stp x2, x3, [x0, #16]
+
+ dmb sy // needed before dc ivac with
+ // MMU off
+
+ add x1, x0, #0x20 // 4 x 8 bytes
+ b __inval_cache_range // tail call
+ENDPROC(preserve_boot_args)
+
+/*
* Determine validity of the x21 FDT pointer.
* The dtb must be 8-byte aligned and live in the first 512M of memory.
*/
@@ -356,7 +352,8 @@ ENDPROC(__vet_fdt)
* - pgd entry for fixed mappings (TTBR1)
*/
__create_page_tables:
- pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses
+ adrp x25, idmap_pg_dir
+ adrp x26, swapper_pg_dir
mov x27, lr
/*
@@ -385,12 +382,50 @@ __create_page_tables:
* Create the identity mapping.
*/
mov x0, x25 // idmap_pg_dir
- ldr x3, =KERNEL_START
- add x3, x3, x28 // __pa(KERNEL_START)
+ adrp x3, KERNEL_START // __pa(KERNEL_START)
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT))
+
+ /*
+ * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
+ * created that covers system RAM if that is located sufficiently high
+ * in the physical address space. So for the ID map, use an extended
+ * virtual range in that case, by configuring an additional translation
+ * level.
+ * First, we have to verify our assumption that the current value of
+ * VA_BITS was chosen such that all translation levels are fully
+ * utilised, and that lowering T0SZ will always result in an additional
+ * translation level to be configured.
+ */
+#if VA_BITS != EXTRA_SHIFT
+#error "Mismatch between VA_BITS and page size/number of translation levels"
+#endif
+
+ /*
+ * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+ * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used),
+ * this number conveniently equals the number of leading zeroes in
+ * the physical address of KERNEL_END.
+ */
+ adrp x5, KERNEL_END
+ clz x5, x5
+ cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough?
+ b.ge 1f // .. then skip additional level
+
+ adr_l x6, idmap_t0sz
+ str x5, [x6]
+ dmb sy
+ dc ivac, x6 // Invalidate potentially stale cache line
+
+ create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
+1:
+#endif
+
create_pgd_entry x0, x3, x5, x6
- ldr x6, =KERNEL_END
mov x5, x3 // __pa(KERNEL_START)
- add x6, x6, x28 // __pa(KERNEL_END)
+ adr_l x6, KERNEL_END // __pa(KERNEL_END)
create_block_map x0, x7, x3, x5, x6
/*
@@ -399,7 +434,7 @@ __create_page_tables:
mov x0, x26 // swapper_pg_dir
mov x5, #PAGE_OFFSET
create_pgd_entry x0, x5, x3, x6
- ldr x6, =KERNEL_END
+ ldr x6, =KERNEL_END // __va(KERNEL_END)
mov x3, x24 // phys offset
create_block_map x0, x7, x3, x5, x6
@@ -426,6 +461,7 @@ __create_page_tables:
*/
mov x0, x25
add x1, x26, #SWAPPER_DIR_SIZE
+ dmb sy
bl __inval_cache_range
mov lr, x27
@@ -433,37 +469,22 @@ __create_page_tables:
ENDPROC(__create_page_tables)
.ltorg
- .align 3
- .type __switch_data, %object
-__switch_data:
- .quad __mmap_switched
- .quad __bss_start // x6
- .quad __bss_stop // x7
- .quad processor_id // x4
- .quad __fdt_pointer // x5
- .quad memstart_addr // x6
- .quad init_thread_union + THREAD_START_SP // sp
-
/*
- * The following fragment of code is executed with the MMU on in MMU mode, and
- * uses absolute addresses; this is not position independent.
+ * The following fragment of code is executed with the MMU enabled.
*/
+ .set initial_sp, init_thread_union + THREAD_START_SP
__mmap_switched:
- adr x3, __switch_data + 8
+ adr_l x6, __bss_start
+ adr_l x7, __bss_stop
- ldp x6, x7, [x3], #16
1: cmp x6, x7
b.hs 2f
str xzr, [x6], #8 // Clear BSS
b 1b
2:
- ldp x4, x5, [x3], #16
- ldr x6, [x3], #8
- ldr x16, [x3]
- mov sp, x16
- str x22, [x4] // Save processor ID
- str x21, [x5] // Save FDT pointer
- str x24, [x6] // Save PHYS_OFFSET
+ adr_l sp, initial_sp, x4
+ str_l x21, __fdt_pointer, x5 // Save FDT pointer
+ str_l x24, memstart_addr, x6 // Save PHYS_OFFSET
mov x29, #0
b start_kernel
ENDPROC(__mmap_switched)
@@ -566,8 +587,7 @@ ENDPROC(el2_setup)
* in x20. See arch/arm64/include/asm/virt.h for more info.
*/
ENTRY(set_cpu_boot_mode_flag)
- ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode
- add x1, x1, x28
+ adr_l x1, __boot_cpu_mode
cmp w20, #BOOT_CPU_MODE_EL2
b.ne 1f
add x1, x1, #4
@@ -588,29 +608,21 @@ ENDPROC(set_cpu_boot_mode_flag)
.align L1_CACHE_SHIFT
ENTRY(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
- .long 0
+ .long BOOT_CPU_MODE_EL1
.popsection
#ifdef CONFIG_SMP
- .align 3
-1: .quad .
- .quad secondary_holding_pen_release
-
/*
* This provides a "holding pen" for platforms to hold all secondary
* cores are held until we're ready for them to initialise.
*/
ENTRY(secondary_holding_pen)
bl el2_setup // Drop to EL1, w20=cpu_boot_mode
- bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
ldr x1, =MPIDR_HWID_BITMASK
and x0, x0, x1
- adr x1, 1b
- ldp x2, x3, [x1]
- sub x1, x1, x2
- add x3, x3, x1
+ adr_l x3, secondary_holding_pen_release
pen: ldr x4, [x3]
cmp x4, x0
b.eq secondary_startup
@@ -624,7 +636,6 @@ ENDPROC(secondary_holding_pen)
*/
ENTRY(secondary_entry)
bl el2_setup // Drop to EL1
- bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
bl set_cpu_boot_mode_flag
b secondary_startup
ENDPROC(secondary_entry)
@@ -633,16 +644,9 @@ ENTRY(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
- mrs x22, midr_el1 // x22=cpuid
- mov x0, x22
- bl lookup_processor_type
- mov x23, x0 // x23=current cpu_table
- cbz x23, __error_p // invalid processor (x23=0)?
-
- pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1
- ldr x12, [x23, #CPU_INFO_SETUP]
- add x12, x12, x28 // __virt_to_phys
- blr x12 // initialise processor
+ adrp x25, idmap_pg_dir
+ adrp x26, swapper_pg_dir
+ bl __cpu_setup // initialise processor
ldr x21, =secondary_data
ldr x27, =__secondary_switched // address to jump to after enabling the MMU
@@ -658,11 +662,12 @@ ENDPROC(__secondary_switched)
#endif /* CONFIG_SMP */
/*
- * Setup common bits before finally enabling the MMU. Essentially this is just
- * loading the page table pointer and vector base registers.
+ * Enable the MMU.
*
- * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on
- * the MMU.
+ * x0 = SCTLR_EL1 value for turning on the MMU.
+ * x27 = *virtual* address to jump to upon completion
+ *
+ * other registers depend on the function called upon completion
*/
__enable_mmu:
ldr x5, =vectors
@@ -670,89 +675,7 @@ __enable_mmu:
msr ttbr0_el1, x25 // load TTBR0
msr ttbr1_el1, x26 // load TTBR1
isb
- b __turn_mmu_on
-ENDPROC(__enable_mmu)
-
-/*
- * Enable the MMU. This completely changes the structure of the visible memory
- * space. You will not be able to trace execution through this.
- *
- * x0 = system control register
- * x27 = *virtual* address to jump to upon completion
- *
- * other registers depend on the function called upon completion
- *
- * We align the entire function to the smallest power of two larger than it to
- * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET
- * close to the end of a 512MB or 1GB block we might require an additional
- * table to map the entire function.
- */
- .align 4
-__turn_mmu_on:
msr sctlr_el1, x0
isb
br x27
-ENDPROC(__turn_mmu_on)
-
-/*
- * Calculate the start of physical memory.
- */
-__calc_phys_offset:
- adr x0, 1f
- ldp x1, x2, [x0]
- sub x28, x0, x1 // x28 = PHYS_OFFSET - PAGE_OFFSET
- add x24, x2, x28 // x24 = PHYS_OFFSET
- ret
-ENDPROC(__calc_phys_offset)
-
- .align 3
-1: .quad .
- .quad PAGE_OFFSET
-
-/*
- * Exception handling. Something went wrong and we can't proceed. We ought to
- * tell the user, but since we don't have any guarantee that we're even
- * running on the right architecture, we do virtually nothing.
- */
-__error_p:
-ENDPROC(__error_p)
-
-__error:
-1: nop
- b 1b
-ENDPROC(__error)
-
-/*
- * This function gets the processor ID in w0 and searches the cpu_table[] for
- * a match. It returns a pointer to the struct cpu_info it found. The
- * cpu_table[] must end with an empty (all zeros) structure.
- *
- * This routine can be called via C code and it needs to work with the MMU
- * both disabled and enabled (the offset is calculated automatically).
- */
-ENTRY(lookup_processor_type)
- adr x1, __lookup_processor_type_data
- ldp x2, x3, [x1]
- sub x1, x1, x2 // get offset between VA and PA
- add x3, x3, x1 // convert VA to PA
-1:
- ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask
- cbz w5, 2f // end of list?
- and w6, w6, w0
- cmp w5, w6
- b.eq 3f
- add x3, x3, #CPU_INFO_SZ
- b 1b
-2:
- mov x3, #0 // unknown processor
-3:
- mov x0, x3
- ret
-ENDPROC(lookup_processor_type)
-
- .align 3
- .type __lookup_processor_type_data, %object
-__lookup_processor_type_data:
- .quad .
- .quad cpu_table
- .size __lookup_processor_type_data, . - __lookup_processor_type_data
+ENDPROC(__enable_mmu)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index c8eca88f12e6..924902083e47 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -265,23 +265,13 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
return aarch64_insn_patch_text_sync(addrs, insns, cnt);
}
-u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
- u32 insn, u64 imm)
+static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
+ u32 *maskp, int *shiftp)
{
- u32 immlo, immhi, lomask, himask, mask;
+ u32 mask;
int shift;
switch (type) {
- case AARCH64_INSN_IMM_ADR:
- lomask = 0x3;
- himask = 0x7ffff;
- immlo = imm & lomask;
- imm >>= 2;
- immhi = imm & himask;
- imm = (immlo << 24) | (immhi);
- mask = (lomask << 24) | (himask);
- shift = 5;
- break;
case AARCH64_INSN_IMM_26:
mask = BIT(26) - 1;
shift = 0;
@@ -320,9 +310,68 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
shift = 16;
break;
default:
- pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
- type);
- return 0;
+ return -EINVAL;
+ }
+
+ *maskp = mask;
+ *shiftp = shift;
+
+ return 0;
+}
+
+#define ADR_IMM_HILOSPLIT 2
+#define ADR_IMM_SIZE SZ_2M
+#define ADR_IMM_LOMASK ((1 << ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_HIMASK ((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_LOSHIFT 29
+#define ADR_IMM_HISHIFT 5
+
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
+{
+ u32 immlo, immhi, mask;
+ int shift;
+
+ switch (type) {
+ case AARCH64_INSN_IMM_ADR:
+ shift = 0;
+ immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK;
+ immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK;
+ insn = (immhi << ADR_IMM_HILOSPLIT) | immlo;
+ mask = ADR_IMM_SIZE - 1;
+ break;
+ default:
+ if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+ pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
+ type);
+ return 0;
+ }
+ }
+
+ return (insn >> shift) & mask;
+}
+
+u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+ u32 insn, u64 imm)
+{
+ u32 immlo, immhi, mask;
+ int shift;
+
+ switch (type) {
+ case AARCH64_INSN_IMM_ADR:
+ shift = 0;
+ immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
+ imm >>= ADR_IMM_HILOSPLIT;
+ immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
+ imm = immlo | immhi;
+ mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
+ (ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
+ break;
+ default:
+ if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+ pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+ type);
+ return 0;
+ }
}
/* Update the immediate field. */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 25a5308744b1..195991dadc37 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -25,8 +25,10 @@
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/export.h>
+#include <linux/of.h>
#include <linux/perf_event.h>
#include <linux/platform_device.h>
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/uaccess.h>
@@ -322,22 +324,31 @@ out:
}
static int
-validate_event(struct pmu_hw_events *hw_events,
- struct perf_event *event)
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+ struct perf_event *event)
{
- struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+ struct arm_pmu *armpmu;
struct hw_perf_event fake_event = event->hw;
struct pmu *leader_pmu = event->group_leader->pmu;
if (is_software_event(event))
return 1;
+ /*
+ * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
+ * core perf code won't check that the pmu->ctx == leader->ctx
+ * until after pmu->event_init(event).
+ */
+ if (event->pmu != pmu)
+ return 0;
+
if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
return 1;
if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
return 1;
+ armpmu = to_arm_pmu(event->pmu);
return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
}
@@ -355,15 +366,15 @@ validate_group(struct perf_event *event)
memset(fake_used_mask, 0, sizeof(fake_used_mask));
fake_pmu.used_mask = fake_used_mask;
- if (!validate_event(&fake_pmu, leader))
+ if (!validate_event(event->pmu, &fake_pmu, leader))
return -EINVAL;
list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
- if (!validate_event(&fake_pmu, sibling))
+ if (!validate_event(event->pmu, &fake_pmu, sibling))
return -EINVAL;
}
- if (!validate_event(&fake_pmu, event))
+ if (!validate_event(event->pmu, &fake_pmu, event))
return -EINVAL;
return 0;
@@ -396,7 +407,12 @@ armpmu_release_hardware(struct arm_pmu *armpmu)
free_percpu_irq(irq, &cpu_hw_events);
} else {
for (i = 0; i < irqs; ++i) {
- if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+ int cpu = i;
+
+ if (armpmu->irq_affinity)
+ cpu = armpmu->irq_affinity[i];
+
+ if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
continue;
irq = platform_get_irq(pmu_device, i);
if (irq > 0)
@@ -450,19 +466,24 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
} else {
for (i = 0; i < irqs; ++i) {
+ int cpu = i;
+
err = 0;
irq = platform_get_irq(pmu_device, i);
if (irq <= 0)
continue;
+ if (armpmu->irq_affinity)
+ cpu = armpmu->irq_affinity[i];
+
/*
* If we have a single PMU interrupt that we can't shift,
* assume that we're running on a uniprocessor machine and
* continue. Otherwise, continue without this interrupt.
*/
- if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+ if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
- irq, i);
+ irq, cpu);
continue;
}
@@ -476,7 +497,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
return err;
}
- cpumask_set_cpu(i, &armpmu->active_irqs);
+ cpumask_set_cpu(cpu, &armpmu->active_irqs);
}
}
@@ -1289,9 +1310,46 @@ static const struct of_device_id armpmu_of_device_ids[] = {
static int armpmu_device_probe(struct platform_device *pdev)
{
+ int i, *irqs;
+
if (!cpu_pmu)
return -ENODEV;
+ irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+ if (!irqs)
+ return -ENOMEM;
+
+ for (i = 0; i < pdev->num_resources; ++i) {
+ struct device_node *dn;
+ int cpu;
+
+ dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+ i);
+ if (!dn) {
+ pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+ of_node_full_name(dn), i);
+ break;
+ }
+
+ for_each_possible_cpu(cpu)
+ if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+ break;
+
+ of_node_put(dn);
+ if (cpu >= nr_cpu_ids) {
+ pr_warn("Failed to find logical CPU for %s\n",
+ dn->name);
+ break;
+ }
+
+ irqs[i] = cpu;
+ }
+
+ if (i == pdev->num_resources)
+ cpu_pmu->irq_affinity = irqs;
+ else
+ kfree(irqs);
+
cpu_pmu->plat_device = pdev;
return 0;
}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index e8420f635bd4..51ef97274b52 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -50,7 +50,6 @@
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/elf.h>
-#include <asm/cputable.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/sections.h>
@@ -62,9 +61,7 @@
#include <asm/memblock.h>
#include <asm/psci.h>
#include <asm/efi.h>
-
-unsigned int processor_id;
-EXPORT_SYMBOL(processor_id);
+#include <asm/virt.h>
unsigned long elf_hwcap __read_mostly;
EXPORT_SYMBOL_GPL(elf_hwcap);
@@ -83,7 +80,6 @@ unsigned int compat_elf_hwcap2 __read_mostly;
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-static const char *cpu_name;
phys_addr_t __fdt_pointer __initdata;
/*
@@ -119,6 +115,11 @@ void __init early_print(const char *str, ...)
printk("%s", buf);
}
+/*
+ * The recorded values of x0 .. x3 upon kernel entry.
+ */
+u64 __cacheline_aligned boot_args[4];
+
void __init smp_setup_processor_id(void)
{
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
@@ -207,24 +208,38 @@ static void __init smp_build_mpidr_hash(void)
}
#endif
+static void __init hyp_mode_check(void)
+{
+ if (is_hyp_mode_available())
+ pr_info("CPU: All CPU(s) started at EL2\n");
+ else if (is_hyp_mode_mismatched())
+ WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC,
+ "CPU: CPUs started in inconsistent modes");
+ else
+ pr_info("CPU: All CPU(s) started at EL1\n");
+}
+
+void __init do_post_cpus_up_work(void)
+{
+ hyp_mode_check();
+ apply_alternatives_all();
+}
+
+#ifdef CONFIG_UP_LATE_INIT
+void __init up_late_init(void)
+{
+ do_post_cpus_up_work();
+}
+#endif /* CONFIG_UP_LATE_INIT */
+
static void __init setup_processor(void)
{
- struct cpu_info *cpu_info;
u64 features, block;
u32 cwg;
int cls;
- cpu_info = lookup_processor_type(read_cpuid_id());
- if (!cpu_info) {
- printk("CPU configuration botched (ID %08x), unable to continue.\n",
- read_cpuid_id());
- while (1);
- }
-
- cpu_name = cpu_info->cpu_name;
-
- printk("CPU: %s [%08x] revision %d\n",
- cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
+ printk("CPU: AArch64 Processor [%08x] revision %d\n",
+ read_cpuid_id(), read_cpuid_id() & 15);
sprintf(init_utsname()->machine, ELF_PLATFORM);
elf_hwcap = 0;
@@ -402,6 +417,12 @@ void __init setup_arch(char **cmdline_p)
conswitchp = &dummy_con;
#endif
#endif
+ if (boot_args[1] || boot_args[2] || boot_args[3]) {
+ pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n"
+ "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n"
+ "This indicates a broken bootloader or old kernel\n",
+ boot_args[1], boot_args[2], boot_args[3]);
+ }
}
static int __init arm64_device_init(void)
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 660ccf9f7524..e18c48cb6db1 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -287,19 +287,12 @@ static void setup_restart_syscall(struct pt_regs *regs)
*/
static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
- struct thread_info *thread = current_thread_info();
struct task_struct *tsk = current;
sigset_t *oldset = sigmask_to_save();
int usig = ksig->sig;
int ret;
/*
- * translate the signal
- */
- if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
- usig = thread->exec_domain->signal_invmap[usig];
-
- /*
* Set up the stack frame
*/
if (is_compat_task()) {
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 328b8ce4b007..ffe8e1b814e0 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void)
*/
cpu_set_reserved_ttbr0();
flush_tlb_all();
+ cpu_set_default_tcr_t0sz();
preempt_disable();
trace_hardirqs_off();
@@ -309,7 +310,7 @@ void cpu_die(void)
void __init smp_cpus_done(unsigned int max_cpus)
{
pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
- apply_alternatives_all();
+ do_post_cpus_up_work();
}
void __init smp_prepare_boot_cpu(void)
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index 2d5ab3c90b82..a40b1343b819 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -37,6 +37,7 @@ asmlinkage long compat_sys_readahead_wrapper(void);
asmlinkage long compat_sys_fadvise64_64_wrapper(void);
asmlinkage long compat_sys_sync_file_range2_wrapper(void);
asmlinkage long compat_sys_fallocate_wrapper(void);
+asmlinkage long compat_sys_mmap2_wrapper(void);
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = sym,
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 5d9d2dca530d..a2c29865c3fe 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -23,10 +23,14 @@ jiffies = jiffies_64;
#define HYPERVISOR_TEXT \
/* \
- * Force the alignment to be compatible with \
- * the vectors requirements \
+ * Align to 4 KB so that \
+ * a) the HYP vector table is at its minimum \
+ * alignment of 2048 bytes \
+ * b) the HYP init code will not cross a page \
+ * boundary if its size does not exceed \
+ * 4 KB (see related ASSERT() below) \
*/ \
- . = ALIGN(2048); \
+ . = ALIGN(SZ_4K); \
VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
*(.hyp.idmap.text) \
VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \
@@ -163,10 +167,11 @@ SECTIONS
}
/*
- * The HYP init code can't be more than a page long.
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
*/
-ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end),
- "HYP init code too big")
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
+ "HYP init code too big or misaligned")
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index c3191168a994..178ba2248a98 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -20,6 +20,7 @@
#include <asm/assembler.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
+#include <asm/pgtable-hwdef.h>
.text
.pushsection .hyp.idmap.text, "ax"
@@ -65,6 +66,25 @@ __do_hyp_init:
and x4, x4, x5
ldr x5, =TCR_EL2_FLAGS
orr x4, x4, x5
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+ /*
+ * If we are running with VA_BITS < 48, we may be running with an extra
+ * level of translation in the ID map. This is only the case if system
+ * RAM is out of range for the currently configured page size and number
+ * of translation levels, in which case we will also need the extra
+ * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
+ *
+ * However, at EL2, there is only one TTBR register, and we can't switch
+ * between translation tables *and* update TCR_EL2.T0SZ at the same
+ * time. Bottom line: we need the extra level in *both* our translation
+ * tables.
+ *
+ * So use the same T0SZ value we use for the ID map.
+ */
+ ldr_l x5, idmap_t0sz
+ bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+#endif
msr tcr_el2, x4
ldr x4, =VTCR_EL2_FLAGS
@@ -91,6 +111,10 @@ __do_hyp_init:
msr sctlr_el2, x4
isb
+ /* Skip the trampoline dance if we merged the boot and runtime PGDs */
+ cmp x0, x1
+ b.eq merged
+
/* MMU is now enabled. Get ready for the trampoline dance */
ldr x4, =TRAMPOLINE_VA
adr x5, target
@@ -105,6 +129,7 @@ target: /* We're now in the trampoline code, switch page tables */
tlbi alle2
dsb sy
+merged:
/* Set the stack and new vectors */
kern_hyp_va x2
mov sp, x2
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 79e01163a981..5b8b664422d3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,6 +40,8 @@
#include "mm.h"
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+
/*
* Empty_zero_page is a special page that is used for zero-initialized data
* and COW.
@@ -454,6 +456,7 @@ void __init paging_init(void)
*/
cpu_set_reserved_ttbr0();
flush_tlb_all();
+ cpu_set_default_tcr_t0sz();
}
/*
@@ -461,8 +464,10 @@ void __init paging_init(void)
*/
void setup_mm_for_reboot(void)
{
- cpu_switch_mm(idmap_pg_dir, &init_mm);
+ cpu_set_reserved_ttbr0();
flush_tlb_all();
+ cpu_set_idmap_tcr_t0sz();
+ cpu_switch_mm(idmap_pg_dir, &init_mm);
}
/*
@@ -627,10 +632,7 @@ void __set_fixmap(enum fixed_addresses idx,
unsigned long addr = __fix_to_virt(idx);
pte_t *pte;
- if (idx >= __end_of_fixed_addresses) {
- BUG();
- return;
- }
+ BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
pte = fixmap_pte(addr);
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 1d3ec3ddd84b..e47ed1c5dce1 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -73,7 +73,6 @@ int set_memory_ro(unsigned long addr, int numpages)
__pgprot(PTE_RDONLY),
__pgprot(PTE_WRITE));
}
-EXPORT_SYMBOL_GPL(set_memory_ro);
int set_memory_rw(unsigned long addr, int numpages)
{
@@ -81,7 +80,6 @@ int set_memory_rw(unsigned long addr, int numpages)
__pgprot(PTE_WRITE),
__pgprot(PTE_RDONLY));
}
-EXPORT_SYMBOL_GPL(set_memory_rw);
int set_memory_nx(unsigned long addr, int numpages)
{
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 005d29e2977d..4c4d93c4bf65 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -52,3 +52,13 @@
mov \reg, #4 // bytes per word
lsl \reg, \reg, \tmp // actual cache line size
.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+ .macro tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+ ldr_l \tmpreg, idmap_t0sz
+ bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+ .endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 28eebfb6af76..cdd754e19b9b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -156,6 +156,7 @@ ENTRY(cpu_do_resume)
msr cpacr_el1, x6
msr ttbr0_el1, x1
msr ttbr1_el1, x7
+ tcr_set_idmap_t0sz x8, x7
msr tcr_el1, x8
msr vbar_el1, x9
msr mdscr_el1, x10
@@ -233,6 +234,8 @@ ENTRY(__cpu_setup)
*/
ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+ tcr_set_idmap_t0sz x10, x9
+
/*
* Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
* TCR_EL1.