summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S48
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S26
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S47
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S50
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S15
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S4
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S11
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S15
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S22
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S15
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S75
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S12
-rw-r--r--arch/x86/entry/entry_32.S4
-rw-r--r--arch/x86/events/intel/core.c11
-rw-r--r--arch/x86/events/intel/cstate.c4
-rw-r--r--arch/x86/events/intel/rapl.c3
-rw-r--r--arch/x86/events/intel/uncore.c12
-rw-r--r--arch/x86/events/intel/uncore_snbep.c4
-rw-r--r--arch/x86/events/msr.c8
-rw-r--r--arch/x86/hyperv/hv_init.c5
-rw-r--r--arch/x86/hyperv/mmu.c57
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/alternative-asm.h4
-rw-r--r--arch/x86/include/asm/alternative.h9
-rw-r--r--arch/x86/include/asm/asm.h15
-rw-r--r--arch/x86/include/asm/fpu/internal.h90
-rw-r--r--arch/x86/include/asm/fpu/types.h32
-rw-r--r--arch/x86/include/asm/fpu/xstate.h12
-rw-r--r--arch/x86/include/asm/kvm_para.h4
-rw-r--r--arch/x86/include/asm/mce.h1
-rw-r--r--arch/x86/include/asm/mmu_context.h40
-rw-r--r--arch/x86/include/asm/mshyperv.h11
-rw-r--r--arch/x86/include/asm/paravirt_types.h14
-rw-r--r--arch/x86/include/asm/preempt.h15
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/include/asm/rwsem.h4
-rw-r--r--arch/x86/include/asm/thread_info.h11
-rw-r--r--arch/x86/include/asm/tlbflush.h24
-rw-r--r--arch/x86/include/asm/trace/fpu.h11
-rw-r--r--arch/x86/include/asm/uaccess.h6
-rw-r--r--arch/x86/include/asm/xen/hypercall.h9
-rw-r--r--arch/x86/kernel/apic/apic.c15
-rw-r--r--arch/x86/kernel/cpu/amd.c11
-rw-r--r--arch/x86/kernel/cpu/bugs.c8
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c2
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c27
-rw-r--r--arch/x86/kernel/fpu/core.c155
-rw-r--r--arch/x86/kernel/fpu/init.c2
-rw-r--r--arch/x86/kernel/fpu/regset.c48
-rw-r--r--arch/x86/kernel/fpu/signal.c37
-rw-r--r--arch/x86/kernel/fpu/xstate.c264
-rw-r--r--arch/x86/kernel/irq_32.c6
-rw-r--r--arch/x86/kernel/kprobes/common.h13
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/ksysfs.c2
-rw-r--r--arch/x86/kernel/kvm.c13
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/smpboot.c13
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/unwind_frame.c38
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/emulate.c17
-rw-r--r--arch/x86/kvm/mmu.c17
-rw-r--r--arch/x86/kvm/paging_tmpl.h3
-rw-r--r--arch/x86/kvm/vmx.c250
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/math-emu/fpu_entry.c2
-rw-r--r--arch/x86/mm/Makefile11
-rw-r--r--arch/x86/mm/extable.c24
-rw-r--r--arch/x86/mm/fault.c50
-rw-r--r--arch/x86/mm/mem_encrypt.c2
-rw-r--r--arch/x86/mm/pkeys.c3
-rw-r--r--arch/x86/mm/tlb.c164
-rw-r--r--arch/x86/net/bpf_jit_comp.c4
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu_pv.c15
79 files changed, 1184 insertions, 836 deletions
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 246c67006ed0..8c1fcb6bad21 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -33,7 +33,7 @@
#define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */
-#define CTX %rdi
+#define CTX %r12
#define RIO %rsi
#define RX0 %rax
@@ -56,12 +56,12 @@
#define RX2bh %ch
#define RX3bh %dh
-#define RT0 %rbp
+#define RT0 %rdi
#define RT1 %rsi
#define RT2 %r8
#define RT3 %r9
-#define RT0d %ebp
+#define RT0d %edi
#define RT1d %esi
#define RT2d %r8d
#define RT3d %r9d
@@ -120,13 +120,14 @@
ENTRY(__blowfish_enc_blk)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
- movq %rbp, %r11;
+ movq %r12, %r11;
+ movq %rdi, CTX;
movq %rsi, %r10;
movq %rdx, RIO;
@@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk)
round_enc(14);
add_roundkey_enc(16);
- movq %r11, %rbp;
+ movq %r11, %r12;
movq %r10, RIO;
test %cl, %cl;
@@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk)
ENTRY(blowfish_dec_blk)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
- movq %rbp, %r11;
+ movq %r12, %r11;
+ movq %rdi, CTX;
movq %rsi, %r10;
movq %rdx, RIO;
@@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk)
movq %r10, RIO;
write_block();
- movq %r11, %rbp;
+ movq %r11, %r12;
ret;
ENDPROC(blowfish_dec_blk)
@@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk)
ENTRY(__blowfish_enc_blk_4way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: bool, if true: xor output
*/
- pushq %rbp;
+ pushq %r12;
pushq %rbx;
pushq %rcx;
- preload_roundkey_enc(0);
-
+ movq %rdi, CTX
movq %rsi, %r11;
movq %rdx, RIO;
+ preload_roundkey_enc(0);
+
read_block4();
round_enc4(0);
@@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way)
round_enc4(14);
add_preloaded_roundkey4();
- popq %rbp;
+ popq %r12;
movq %r11, RIO;
- test %bpl, %bpl;
+ test %r12b, %r12b;
jnz .L__enc_xor4;
write_block4();
popq %rbx;
- popq %rbp;
+ popq %r12;
ret;
.L__enc_xor4:
xor_block4();
popq %rbx;
- popq %rbp;
+ popq %r12;
ret;
ENDPROC(__blowfish_enc_blk_4way)
ENTRY(blowfish_dec_blk_4way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
- pushq %rbp;
+ pushq %r12;
pushq %rbx;
- preload_roundkey_dec(17);
- movq %rsi, %r11;
+ movq %rdi, CTX;
+ movq %rsi, %r11
movq %rdx, RIO;
+ preload_roundkey_dec(17);
read_block4();
round_dec4(17);
@@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way)
write_block4();
popq %rbx;
- popq %rbp;
+ popq %r12;
ret;
ENDPROC(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 310319c601ed..95ba6956a7f6 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -75,17 +75,17 @@
#define RCD1bh %dh
#define RT0 %rsi
-#define RT1 %rbp
+#define RT1 %r12
#define RT2 %r8
#define RT0d %esi
-#define RT1d %ebp
+#define RT1d %r12d
#define RT2d %r8d
#define RT2bl %r8b
#define RXOR %r9
-#define RRBP %r10
+#define RR12 %r10
#define RDST %r11
#define RXORd %r9d
@@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk)
* %rdx: src
* %rcx: bool xor
*/
- movq %rbp, RRBP;
+ movq %r12, RR12;
movq %rcx, RXOR;
movq %rsi, RDST;
@@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk)
enc_outunpack(mov, RT1);
- movq RRBP, %rbp;
+ movq RR12, %r12;
ret;
.L__enc_xor:
enc_outunpack(xor, RT1);
- movq RRBP, %rbp;
+ movq RR12, %r12;
ret;
ENDPROC(__camellia_enc_blk)
@@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk)
movl $24, RXORd;
cmovel RXORd, RT2d; /* max */
- movq %rbp, RRBP;
+ movq %r12, RR12;
movq %rsi, RDST;
movq %rdx, RIO;
@@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk)
dec_outunpack();
- movq RRBP, %rbp;
+ movq RR12, %r12;
ret;
ENDPROC(camellia_dec_blk)
@@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way)
*/
pushq %rbx;
- movq %rbp, RRBP;
+ movq %r12, RR12;
movq %rcx, RXOR;
movq %rsi, RDST;
movq %rdx, RIO;
@@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way)
enc_outunpack2(mov, RT2);
- movq RRBP, %rbp;
+ movq RR12, %r12;
popq %rbx;
ret;
.L__enc2_xor:
enc_outunpack2(xor, RT2);
- movq RRBP, %rbp;
+ movq RR12, %r12;
popq %rbx;
ret;
ENDPROC(__camellia_enc_blk_2way)
@@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way)
cmovel RXORd, RT2d; /* max */
movq %rbx, RXOR;
- movq %rbp, RRBP;
+ movq %r12, RR12;
movq %rsi, RDST;
movq %rdx, RIO;
@@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way)
dec_outunpack2();
- movq RRBP, %rbp;
+ movq RR12, %r12;
movq RXOR, %rbx;
ret;
ENDPROC(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b4a8806234ea..86107c961bb4 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
/**********************************************************************
16-way AVX cast5
**********************************************************************/
-#define CTX %rdi
+#define CTX %r15
#define RL1 %xmm0
#define RR1 %xmm1
@@ -70,8 +70,8 @@
#define RTMP %xmm15
-#define RID1 %rbp
-#define RID1d %ebp
+#define RID1 %rdi
+#define RID1d %edi
#define RID2 %rsi
#define RID2d %esi
@@ -226,7 +226,7 @@
.align 16
__cast5_enc_blk16:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RL1: blocks 1 and 2
* RR1: blocks 3 and 4
* RL2: blocks 5 and 6
@@ -246,9 +246,11 @@ __cast5_enc_blk16:
* RR4: encrypted blocks 15 and 16
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -283,7 +285,7 @@ __cast5_enc_blk16:
.L__skip_enc:
popq %rbx;
- popq %rbp;
+ popq %r15;
vmovdqa .Lbswap_mask, RKM;
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
.align 16
__cast5_dec_blk16:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6
@@ -318,9 +320,11 @@ __cast5_dec_blk16:
* RR4: decrypted blocks 15 and 16
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -356,7 +360,7 @@ __cast5_dec_blk16:
vmovdqa .Lbswap_mask, RKM;
popq %rbx;
- popq %rbp;
+ popq %r15;
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
ENTRY(cast5_ecb_enc_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast5_ecb_enc_16way)
ENTRY(cast5_ecb_dec_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+
+ movq %rdi, CTX;
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast5_ecb_dec_16way)
ENTRY(cast5_cbc_dec_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast5_cbc_dec_16way)
ENTRY(cast5_ctr_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 952d3156a933..7f30b6f0d72c 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
/**********************************************************************
8-way AVX cast6
**********************************************************************/
-#define CTX %rdi
+#define CTX %r15
#define RA1 %xmm0
#define RB1 %xmm1
@@ -70,8 +70,8 @@
#define RTMP %xmm15
-#define RID1 %rbp
-#define RID1d %ebp
+#define RID1 %rdi
+#define RID1d %edi
#define RID2 %rsi
#define RID2d %esi
@@ -264,15 +264,17 @@
.align 8
__cast6_enc_blk8:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -297,7 +299,7 @@ __cast6_enc_blk8:
QBAR(11);
popq %rbx;
- popq %rbp;
+ popq %r15;
vmovdqa .Lbswap_mask, RKM;
@@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8)
.align 8
__cast6_dec_blk8:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
* output:
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -343,7 +347,7 @@ __cast6_dec_blk8:
QBAR(0);
popq %rbx;
- popq %rbp;
+ popq %r15;
vmovdqa .Lbswap_mask, RKM;
outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
@@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8)
ENTRY(cast6_ecb_enc_8way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast6_ecb_enc_8way)
ENTRY(cast6_ecb_dec_8way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
@@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way)
store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast6_ecb_dec_8way)
ENTRY(cast6_cbc_dec_8way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way)
store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast6_cbc_dec_8way)
@@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way)
* %rcx: iv (little endian, 128bit)
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way)
store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast6_ctr_8way)
@@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way)
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way)
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast6_xts_enc_8way)
@@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way)
* %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX
movq %rsi, %r11;
/* regs <= src, dst <= IVs, regs <= regs xor IVs */
@@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way)
/* dst <= regs xor IVs(in dst) */
store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index f3e91647ca27..8e49ce117494 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -64,12 +64,12 @@
#define RW2bh %ch
#define RT0 %r15
-#define RT1 %rbp
+#define RT1 %rsi
#define RT2 %r14
#define RT3 %rdx
#define RT0d %r15d
-#define RT1d %ebp
+#define RT1d %esi
#define RT2d %r14d
#define RT3d %edx
@@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk)
* %rsi: dst
* %rdx: src
*/
- pushq %rbp;
pushq %rbx;
pushq %r12;
pushq %r13;
pushq %r14;
pushq %r15;
+ pushq %rsi; /* dst */
+
read_block(%rdx, RL0, RR0);
initial_permutation(RL0, RR0);
@@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk)
round1(32+15, RL0, RR0, dummy2);
final_permutation(RR0, RL0);
+
+ popq %rsi /* dst */
write_block(%rsi, RR0, RL0);
popq %r15;
@@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk)
popq %r13;
popq %r12;
popq %rbx;
- popq %rbp;
ret;
ENDPROC(des3_ede_x86_64_crypt_blk)
@@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
* %rdx: src (3 blocks)
*/
- pushq %rbp;
pushq %rbx;
pushq %r12;
pushq %r13;
pushq %r14;
pushq %r15;
+ pushq %rsi /* dst */
+
/* load input */
movl 0 * 4(%rdx), RL0d;
movl 1 * 4(%rdx), RR0d;
@@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
bswapl RR2d;
bswapl RL2d;
+ popq %rsi /* dst */
movl RR0d, 0 * 4(%rsi);
movl RL0d, 1 * 4(%rsi);
movl RR1d, 2 * 4(%rsi);
@@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
popq %r13;
popq %r12;
popq %rbx;
- popq %rbp;
ret;
ENDPROC(des3_ede_x86_64_crypt_blk_3way)
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1eab79c9ac48..9f712a7dfd79 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -89,7 +89,7 @@
#define REG_RE %rdx
#define REG_RTA %r12
#define REG_RTB %rbx
-#define REG_T1 %ebp
+#define REG_T1 %r11d
#define xmm_mov vmovups
#define avx2_zeroupper vzeroupper
#define RND_F1 1
@@ -637,7 +637,6 @@ _loop3:
ENTRY(\name)
push %rbx
- push %rbp
push %r12
push %r13
push %r14
@@ -673,7 +672,6 @@ _loop3:
pop %r14
pop %r13
pop %r12
- pop %rbp
pop %rbx
ret
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index a4109506a5e8..6204bd53528c 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -37,7 +37,7 @@
#define REG_A %ecx
#define REG_B %esi
#define REG_C %edi
-#define REG_D %ebp
+#define REG_D %r12d
#define REG_E %edx
#define REG_T1 %eax
@@ -74,10 +74,10 @@
ENTRY(\name)
push %rbx
- push %rbp
push %r12
+ push %rbp
+ mov %rsp, %rbp
- mov %rsp, %r12
sub $64, %rsp # allocate workspace
and $~15, %rsp # align stack
@@ -99,10 +99,9 @@
xor %rax, %rax
rep stosq
- mov %r12, %rsp # deallocate workspace
-
- pop %r12
+ mov %rbp, %rsp # deallocate workspace
pop %rbp
+ pop %r12
pop %rbx
ret
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index e08888a1a5f2..001bbcf93c79 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
-TBL = %rbp
+TBL = %r12
a = %eax
b = %ebx
@@ -350,13 +350,13 @@ a = TMP_
ENTRY(sha256_transform_avx)
.align 32
pushq %rbx
- pushq %rbp
+ pushq %r12
pushq %r13
pushq %r14
pushq %r15
- pushq %r12
+ pushq %rbp
+ movq %rsp, %rbp
- mov %rsp, %r12
subq $STACK_SIZE, %rsp # allocate stack space
and $~15, %rsp # align stack pointer
@@ -452,13 +452,12 @@ loop2:
done_hash:
- mov %r12, %rsp
-
- popq %r12
+ mov %rbp, %rsp
+ popq %rbp
popq %r15
popq %r14
popq %r13
- popq %rbp
+ popq %r12
popq %rbx
ret
ENDPROC(sha256_transform_avx)
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 89c8f09787d2..1420db15dcdd 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -98,8 +98,6 @@ d = %r8d
e = %edx # clobbers NUM_BLKS
y3 = %esi # clobbers INP
-
-TBL = %rbp
SRND = CTX # SRND is same register as CTX
a = %eax
@@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE
ENTRY(sha256_transform_rorx)
.align 32
pushq %rbx
- pushq %rbp
pushq %r12
pushq %r13
pushq %r14
@@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
mov CTX, _CTX(%rsp)
loop0:
- lea K256(%rip), TBL
-
## Load first 16 dwords from two blocks
VMOVDQ 0*32(INP),XTMP0
VMOVDQ 1*32(INP),XTMP1
@@ -597,19 +592,19 @@ last_block_enter:
.align 16
loop1:
- vpaddd 0*32(TBL, SRND), X0, XFER
+ vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 0*32
- vpaddd 1*32(TBL, SRND), X0, XFER
+ vpaddd K256+1*32(SRND), X0, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 1*32
- vpaddd 2*32(TBL, SRND), X0, XFER
+ vpaddd K256+2*32(SRND), X0, XFER
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 2*32
- vpaddd 3*32(TBL, SRND), X0, XFER
+ vpaddd K256+3*32(SRND), X0, XFER
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
FOUR_ROUNDS_AND_SCHED _XFER + 3*32
@@ -619,10 +614,11 @@ loop1:
loop2:
## Do last 16 rounds with no scheduling
- vpaddd 0*32(TBL, SRND), X0, XFER
+ vpaddd K256+0*32(SRND), X0, XFER
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 0*32
- vpaddd 1*32(TBL, SRND), X1, XFER
+
+ vpaddd K256+1*32(SRND), X1, XFER
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
DO_4ROUNDS _XFER + 1*32
add $2*32, SRND
@@ -676,9 +672,6 @@ loop3:
ja done_hash
do_last_block:
- #### do last block
- lea K256(%rip), TBL
-
VMOVDQ 0*16(INP),XWORD0
VMOVDQ 1*16(INP),XWORD1
VMOVDQ 2*16(INP),XWORD2
@@ -718,7 +711,6 @@ done_hash:
popq %r14
popq %r13
popq %r12
- popq %rbp
popq %rbx
ret
ENDPROC(sha256_transform_rorx)
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index 39b83c93e7fd..c6c05ed2c16a 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
-TBL = %rbp
+TBL = %r12
a = %eax
b = %ebx
@@ -356,13 +356,13 @@ a = TMP_
ENTRY(sha256_transform_ssse3)
.align 32
pushq %rbx
- pushq %rbp
+ pushq %r12
pushq %r13
pushq %r14
pushq %r15
- pushq %r12
+ pushq %rbp
+ mov %rsp, %rbp
- mov %rsp, %r12
subq $STACK_SIZE, %rsp
and $~15, %rsp
@@ -462,13 +462,12 @@ loop2:
done_hash:
- mov %r12, %rsp
-
- popq %r12
+ mov %rbp, %rsp
+ popq %rbp
popq %r15
popq %r14
popq %r13
- popq %rbp
+ popq %r12
popq %rbx
ret
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 7f5f6c6ec72e..b16d56005162 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -69,8 +69,9 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9
-# 1st arg
-CTX = %rdi
+# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
+CTX1 = %rdi
+CTX2 = %r12
# 2nd arg
INP = %rsi
# 3rd arg
@@ -81,7 +82,7 @@ d = %r8
e = %rdx
y3 = %rsi
-TBL = %rbp
+TBL = %rdi # clobbers CTX1
a = %rax
b = %rbx
@@ -91,26 +92,26 @@ g = %r10
h = %r11
old_h = %r11
-T1 = %r12
+T1 = %r12 # clobbers CTX2
y0 = %r13
y1 = %r14
y2 = %r15
-y4 = %r12
-
# Local variables (stack frame)
XFER_SIZE = 4*8
SRND_SIZE = 1*8
INP_SIZE = 1*8
INPEND_SIZE = 1*8
+CTX_SIZE = 1*8
RSPSAVE_SIZE = 1*8
-GPRSAVE_SIZE = 6*8
+GPRSAVE_SIZE = 5*8
frame_XFER = 0
frame_SRND = frame_XFER + XFER_SIZE
frame_INP = frame_SRND + SRND_SIZE
frame_INPEND = frame_INP + INP_SIZE
-frame_RSPSAVE = frame_INPEND + INPEND_SIZE
+frame_CTX = frame_INPEND + INPEND_SIZE
+frame_RSPSAVE = frame_CTX + CTX_SIZE
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
frame_size = frame_GPRSAVE + GPRSAVE_SIZE
@@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
mov %rax, frame_RSPSAVE(%rsp)
# Save GPRs
- mov %rbp, frame_GPRSAVE(%rsp)
- mov %rbx, 8*1+frame_GPRSAVE(%rsp)
- mov %r12, 8*2+frame_GPRSAVE(%rsp)
- mov %r13, 8*3+frame_GPRSAVE(%rsp)
- mov %r14, 8*4+frame_GPRSAVE(%rsp)
- mov %r15, 8*5+frame_GPRSAVE(%rsp)
+ mov %rbx, 8*0+frame_GPRSAVE(%rsp)
+ mov %r12, 8*1+frame_GPRSAVE(%rsp)
+ mov %r13, 8*2+frame_GPRSAVE(%rsp)
+ mov %r14, 8*3+frame_GPRSAVE(%rsp)
+ mov %r15, 8*4+frame_GPRSAVE(%rsp)
shl $7, NUM_BLKS # convert to bytes
jz done_hash
@@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
mov NUM_BLKS, frame_INPEND(%rsp)
## load initial digest
- mov 8*0(CTX),a
- mov 8*1(CTX),b
- mov 8*2(CTX),c
- mov 8*3(CTX),d
- mov 8*4(CTX),e
- mov 8*5(CTX),f
- mov 8*6(CTX),g
- mov 8*7(CTX),h
+ mov 8*0(CTX1), a
+ mov 8*1(CTX1), b
+ mov 8*2(CTX1), c
+ mov 8*3(CTX1), d
+ mov 8*4(CTX1), e
+ mov 8*5(CTX1), f
+ mov 8*6(CTX1), g
+ mov 8*7(CTX1), h
+
+ # save %rdi (CTX) before it gets clobbered
+ mov %rdi, frame_CTX(%rsp)
vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
@@ -652,14 +655,15 @@ loop2:
subq $1, frame_SRND(%rsp)
jne loop2
- addm 8*0(CTX),a
- addm 8*1(CTX),b
- addm 8*2(CTX),c
- addm 8*3(CTX),d
- addm 8*4(CTX),e
- addm 8*5(CTX),f
- addm 8*6(CTX),g
- addm 8*7(CTX),h
+ mov frame_CTX(%rsp), CTX2
+ addm 8*0(CTX2), a
+ addm 8*1(CTX2), b
+ addm 8*2(CTX2), c
+ addm 8*3(CTX2), d
+ addm 8*4(CTX2), e
+ addm 8*5(CTX2), f
+ addm 8*6(CTX2), g
+ addm 8*7(CTX2), h
mov frame_INP(%rsp), INP
add $128, INP
@@ -669,12 +673,11 @@ loop2:
done_hash:
# Restore GPRs
- mov frame_GPRSAVE(%rsp) ,%rbp
- mov 8*1+frame_GPRSAVE(%rsp) ,%rbx
- mov 8*2+frame_GPRSAVE(%rsp) ,%r12
- mov 8*3+frame_GPRSAVE(%rsp) ,%r13
- mov 8*4+frame_GPRSAVE(%rsp) ,%r14
- mov 8*5+frame_GPRSAVE(%rsp) ,%r15
+ mov 8*0+frame_GPRSAVE(%rsp), %rbx
+ mov 8*1+frame_GPRSAVE(%rsp), %r12
+ mov 8*2+frame_GPRSAVE(%rsp), %r13
+ mov 8*3+frame_GPRSAVE(%rsp), %r14
+ mov 8*4+frame_GPRSAVE(%rsp), %r15
# Restore Stack Pointer
mov frame_RSPSAVE(%rsp), %rsp
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index b3f49d286348..73b471da3622 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -76,8 +76,8 @@
#define RT %xmm14
#define RR %xmm15
-#define RID1 %rbp
-#define RID1d %ebp
+#define RID1 %r13
+#define RID1d %r13d
#define RID2 %rsi
#define RID2d %esi
@@ -259,7 +259,7 @@ __twofish_enc_blk8:
vmovdqu w(CTX), RK1;
- pushq %rbp;
+ pushq %r13;
pushq %rbx;
pushq %rcx;
@@ -282,7 +282,7 @@ __twofish_enc_blk8:
popq %rcx;
popq %rbx;
- popq %rbp;
+ popq %r13;
outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
@@ -301,7 +301,7 @@ __twofish_dec_blk8:
vmovdqu (w+4*4)(CTX), RK1;
- pushq %rbp;
+ pushq %r13;
pushq %rbx;
inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
@@ -322,7 +322,7 @@ __twofish_dec_blk8:
vmovdqu (w)(CTX), RK1;
popq %rbx;
- popq %rbp;
+ popq %r13;
outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 8a13d468635a..50e0d2bc4528 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -176,7 +176,7 @@
/*
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just setting the LSB, which makes it an invalid stack address and is also
+ * is just clearing the MSB, which makes it an invalid stack address and is also
* a signal to the unwinder that it's a pt_regs pointer in disguise.
*
* NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
@@ -185,7 +185,7 @@
.macro ENCODE_FRAME_POINTER
#ifdef CONFIG_FRAME_POINTER
mov %esp, %ebp
- orl $0x1, %ebp
+ andl $0x7fffffff, %ebp
#endif
.endm
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 829e89cfcee2..9fb9a1f1e47b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void)
return 0;
}
- if (lockup_detector_suspend() != 0) {
- pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
- return 0;
- }
+ cpus_read_lock();
+
+ hardlockup_detector_perf_stop();
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
@@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void)
x86_pmu.commit_scheduling = NULL;
x86_pmu.stop_scheduling = NULL;
- lockup_detector_resume();
-
- cpus_read_lock();
+ hardlockup_detector_perf_restart();
for_each_online_cpu(c)
free_excl_cntrs(c);
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 4cf100ff2a37..72db0664a53d 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
@@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
+
+ X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 8e2457cb6b4a..005908ee9333 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
+
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 1c5390f1cf09..d45e06346f14 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -822,7 +822,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
pmus[i].type = type;
pmus[i].boxes = kzalloc(size, GFP_KERNEL);
if (!pmus[i].boxes)
- return -ENOMEM;
+ goto err;
}
type->pmus = pmus;
@@ -836,7 +836,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
sizeof(*attr_group), GFP_KERNEL);
if (!attr_group)
- return -ENOMEM;
+ goto err;
attrs = (struct attribute **)(attr_group + 1);
attr_group->name = "events";
@@ -849,7 +849,15 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
}
type->pmu_group = &uncore_pmu_attr_group;
+
return 0;
+
+err:
+ for (i = 0; i < type->num_boxes; i++)
+ kfree(pmus[i].boxes);
+ kfree(pmus);
+
+ return -ENOMEM;
}
static int __init
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index db1fe377e6dd..a7196818416a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
static struct intel_uncore_type skx_uncore_iio = {
.name = "iio",
.num_counters = 4,
- .num_boxes = 5,
+ .num_boxes = 6,
.perf_ctr_bits = 48,
.event_ctl = SKX_IIO0_MSR_PMON_CTL0,
.perf_ctr = SKX_IIO0_MSR_PMON_CTR0,
@@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = {
static struct intel_uncore_type skx_uncore_irp = {
.name = "irp",
.num_counters = 2,
- .num_boxes = 5,
+ .num_boxes = 6,
.perf_ctr_bits = 48,
.event_ctl = SKX_IRP0_MSR_PMON_CTL0,
.perf_ctr = SKX_IRP0_MSR_PMON_CTR0,
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 4bb3ec69e8ea..06723671ae4e 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -63,6 +63,14 @@ static bool test_intel(int idx)
case INTEL_FAM6_ATOM_SILVERMONT1:
case INTEL_FAM6_ATOM_SILVERMONT2:
case INTEL_FAM6_ATOM_AIRMONT:
+
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_ATOM_DENVERTON:
+
+ case INTEL_FAM6_ATOM_GEMINI_LAKE:
+
+ case INTEL_FAM6_XEON_PHI_KNL:
+ case INTEL_FAM6_XEON_PHI_KNM:
if (idx == PERF_MSR_SMI)
return true;
break;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 1a8eb550c40f..a5db63f728a2 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -85,6 +85,8 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
+u32 hv_max_vp_index;
+
static int hv_cpu_init(unsigned int cpu)
{
u64 msr_vp_index;
@@ -93,6 +95,9 @@ static int hv_cpu_init(unsigned int cpu)
hv_vp_index[smp_processor_id()] = msr_vp_index;
+ if (msr_vp_index > hv_max_vp_index)
+ hv_max_vp_index = msr_vp_index;
+
return 0;
}
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 39e7f6e50919..9cc9e1c1e2db 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -36,9 +36,9 @@ struct hv_flush_pcpu_ex {
/* Each gva in gva_list encodes up to 4096 pages to flush */
#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
-static struct hv_flush_pcpu __percpu *pcpu_flush;
+static struct hv_flush_pcpu __percpu **pcpu_flush;
-static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
+static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
/*
* Fills in gva_list starting from offset. Returns the number of items added.
@@ -76,6 +76,18 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
{
int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
+ /* valid_bank_mask can represent up to 64 banks */
+ if (hv_max_vp_index / 64 >= 64)
+ return 0;
+
+ /*
+ * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+ * structs are not cleared between calls, we risk flushing unneeded
+ * vCPUs otherwise.
+ */
+ for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
+ flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
+
/*
* Some banks may end up being empty but this is acceptable.
*/
@@ -83,11 +95,6 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
vcpu = hv_cpu_number_to_vp_number(cpu);
vcpu_bank = vcpu / 64;
vcpu_offset = vcpu % 64;
-
- /* valid_bank_mask can represent up to 64 banks */
- if (vcpu_bank >= 64)
- return 0;
-
__set_bit(vcpu_offset, (unsigned long *)
&flush->hv_vp_set.bank_contents[vcpu_bank]);
if (vcpu_bank >= nr_bank)
@@ -102,6 +109,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
int cpu, vcpu, gva_n, max_gvas;
+ struct hv_flush_pcpu **flush_pcpu;
struct hv_flush_pcpu *flush;
u64 status = U64_MAX;
unsigned long flags;
@@ -116,7 +124,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
local_irq_save(flags);
- flush = this_cpu_ptr(pcpu_flush);
+ flush_pcpu = this_cpu_ptr(pcpu_flush);
+
+ if (unlikely(!*flush_pcpu))
+ *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto do_native;
+ }
if (info->mm) {
flush->address_space = virt_to_phys(info->mm->pgd);
@@ -173,6 +191,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
int nr_bank = 0, max_gvas, gva_n;
+ struct hv_flush_pcpu_ex **flush_pcpu;
struct hv_flush_pcpu_ex *flush;
u64 status = U64_MAX;
unsigned long flags;
@@ -187,7 +206,17 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
local_irq_save(flags);
- flush = this_cpu_ptr(pcpu_flush_ex);
+ flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
+
+ if (unlikely(!*flush_pcpu))
+ *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
+
+ flush = *flush_pcpu;
+
+ if (unlikely(!flush)) {
+ local_irq_restore(flags);
+ goto do_native;
+ }
if (info->mm) {
flush->address_space = virt_to_phys(info->mm->pgd);
@@ -222,18 +251,18 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
status = hv_do_rep_hypercall(
HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
- 0, nr_bank + 2, flush, NULL);
+ 0, nr_bank, flush, NULL);
} else if (info->end &&
((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
status = hv_do_rep_hypercall(
HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
- 0, nr_bank + 2, flush, NULL);
+ 0, nr_bank, flush, NULL);
} else {
gva_n = fill_gva_list(flush->gva_list, nr_bank,
info->start, info->end);
status = hv_do_rep_hypercall(
HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
- gva_n, nr_bank + 2, flush, NULL);
+ gva_n, nr_bank, flush, NULL);
}
local_irq_restore(flags);
@@ -266,7 +295,7 @@ void hyper_alloc_mmu(void)
return;
if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
- pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+ pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
else
- pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+ pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index e0bb46c02857..0e2a5edbce00 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer;
- if (fpu->fpstate_active) {
+ if (fpu->initialized) {
unsigned long fx_aligned, math_size;
sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index e7636bac7372..6c98821fef5e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -62,8 +62,10 @@
#define new_len2 145f-144f
/*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..ccbe24e697c4 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
alt_end_marker ":\n"
/*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*
- * The additional "-" is needed because gas works with s32s.
+ * The additional "-" is needed because gas uses a "true" value of -1.
*/
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
/*
* Pad the second replacement alternative with additional NOPs if it is
@@ -218,10 +218,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
output, input...) \
{ \
- register void *__sp asm(_ASM_SP); \
asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
"call %P[new2]", feature2) \
- : output, "+r" (__sp) \
+ : output, ASM_CALL_CONSTRAINT \
: [old] "i" (oldfunc), [new1] "i" (newfunc1), \
[new2] "i" (newfunc2), ## input); \
}
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 676ee5807d86..b0dc91f4bedc 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -11,10 +11,12 @@
# define __ASM_FORM_COMMA(x) " " #x ","
#endif
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
# define __ASM_SEL(a,b) __ASM_FORM(a)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
#else
+/* 64 bit */
# define __ASM_SEL(a,b) __ASM_FORM(b)
# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
#endif
@@ -132,4 +134,15 @@
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
+#ifndef __ASSEMBLY__
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction. Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function. If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif
+
#endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 554cdb205d17..e3221ffa304e 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -23,11 +23,9 @@
/*
* High level FPU state handling functions:
*/
-extern void fpu__activate_curr(struct fpu *fpu);
-extern void fpu__activate_fpstate_read(struct fpu *fpu);
-extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
+extern void fpu__initialize(struct fpu *fpu);
+extern void fpu__prepare_read(struct fpu *fpu);
+extern void fpu__prepare_write(struct fpu *fpu);
extern void fpu__save(struct fpu *fpu);
extern void fpu__restore(struct fpu *fpu);
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
@@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
err; \
})
-#define check_insn(insn, output, input...) \
-({ \
- int err; \
+#define kernel_insn(insn, output, input...) \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err), output \
- : "0"(0), input); \
- err; \
-})
+ _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \
+ : output : input)
static inline int copy_fregs_to_user(struct fregs_state __user *fx)
{
@@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
{
- int err;
-
if (IS_ENABLED(CONFIG_X86_32)) {
- err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
- err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+ kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
} else {
/* See comment in copy_fxregs_to_kernel() below. */
- err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+ kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
}
}
- /* Copying from a kernel buffer to FPU registers should never fail: */
- WARN_ON_FPU(err);
}
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
@@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
static inline void copy_kernel_to_fregs(struct fregs_state *fx)
{
- int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- WARN_ON_FPU(err);
+ kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
static inline int copy_user_to_fregs(struct fregs_state __user *fx)
@@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
* Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
* XSAVE area format.
*/
-#define XSTATE_XRESTORE(st, lmask, hmask, err) \
+#define XSTATE_XRESTORE(st, lmask, hmask) \
asm volatile(ALTERNATIVE(XRSTOR, \
XRSTORS, X86_FEATURE_XSAVES) \
"\n" \
- "xor %[err], %[err]\n" \
"3:\n" \
- ".pushsection .fixup,\"ax\"\n" \
- "4: movl $-2, %[err]\n" \
- "jmp 3b\n" \
- ".popsection\n" \
- _ASM_EXTABLE(661b, 4b) \
- : [err] "=r" (err) \
+ _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
+ : \
: "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
: "memory")
@@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
- /* We should never fault when copying from a kernel buffer: */
+ /*
+ * We should never fault when copying from a kernel buffer, and the FPU
+ * state we set at boot time should be valid.
+ */
WARN_ON_FPU(err);
}
@@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
u32 hmask = mask >> 32;
int err;
- WARN_ON(!alternatives_patched);
+ WARN_ON_FPU(!alternatives_patched);
XSTATE_XSAVE(xstate, lmask, hmask, err);
@@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err;
-
- XSTATE_XRESTORE(xstate, lmask, hmask, err);
- /* We should never fault when copying from a kernel buffer: */
- WARN_ON_FPU(err);
+ XSTATE_XRESTORE(xstate, lmask, hmask);
}
/*
@@ -526,38 +503,17 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
*/
static inline void fpregs_deactivate(struct fpu *fpu)
{
- WARN_ON_FPU(!fpu->fpregs_active);
-
- fpu->fpregs_active = 0;
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
trace_x86_fpu_regs_deactivated(fpu);
}
static inline void fpregs_activate(struct fpu *fpu)
{
- WARN_ON_FPU(fpu->fpregs_active);
-
- fpu->fpregs_active = 1;
this_cpu_write(fpu_fpregs_owner_ctx, fpu);
trace_x86_fpu_regs_activated(fpu);
}
/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
- return current->thread.fpu.fpregs_active;
-}
-
-/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
@@ -571,14 +527,13 @@ static inline int fpregs_active(void)
static inline void
switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (old_fpu->fpregs_active) {
+ if (old_fpu->initialized) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
old_fpu->last_cpu = cpu;
/* But leave fpu_fpregs_owner_ctx! */
- old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);
} else
old_fpu->last_cpu = -1;
@@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
{
bool preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->fpstate_active;
+ new_fpu->initialized;
if (preload) {
if (!fpregs_state_valid(new_fpu, cpu))
@@ -617,8 +572,7 @@ static inline void user_fpu_begin(void)
struct fpu *fpu = &current->thread.fpu;
preempt_disable();
- if (!fpregs_active())
- fpregs_activate(fpu);
+ fpregs_activate(fpu);
preempt_enable();
}
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b9c09d..a1520575d86b 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -68,6 +68,9 @@ struct fxregs_state {
/* Default value for fxregs_state.mxcsr: */
#define MXCSR_DEFAULT 0x1f80
+/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
+#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
+
/*
* Software based FPU emulation state. This is arbitrary really,
* it matches the x87 format to make it easier to understand:
@@ -290,36 +293,13 @@ struct fpu {
unsigned int last_cpu;
/*
- * @fpstate_active:
+ * @initialized:
*
- * This flag indicates whether this context is active: if the task
+ * This flag indicates whether this context is initialized: if the task
* is not running then we can restore from this context, if the task
* is running then we should save into this context.
*/
- unsigned char fpstate_active;
-
- /*
- * @fpregs_active:
- *
- * This flag determines whether a given context is actively
- * loaded into the FPU's registers and that those registers
- * represent the task's current FPU state.
- *
- * Note the interaction with fpstate_active:
- *
- * # task does not use the FPU:
- * fpstate_active == 0
- *
- * # task uses the FPU and regs are active:
- * fpstate_active == 1 && fpregs_active == 1
- *
- * # the regs are inactive but still match fpstate:
- * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
- *
- * The third state is what we use for the lazy restore optimization
- * on lazy-switching CPUs.
- */
- unsigned char fpregs_active;
+ unsigned char initialized;
/*
* @state:
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 1b2799e0699a..83fee2469eb7 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
const void *get_xsave_field_ptr(int xstate_field);
int using_compacted_format(void);
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
- void __user *ubuf, struct xregs_state *xsave);
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
- struct xregs_state *xsave);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+extern int validate_xstate_header(const struct xstate_header *hdr);
+
#endif
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index bc62e7cbf1b1..59ad3d132353 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
void __init kvm_guest_init(void);
-void kvm_async_pf_task_wait(u32 token);
+void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
@@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void)
#else /* CONFIG_KVM_GUEST */
#define kvm_guest_init() do {} while (0)
-#define kvm_async_pf_task_wait(T) do {} while(0)
+#define kvm_async_pf_task_wait(T, I) do {} while(0)
#define kvm_async_pf_task_wake(T) do {} while(0)
static inline bool kvm_para_available(void)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 181264989db5..8edac1de2e35 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -187,7 +187,6 @@ struct mca_msr_regs {
extern struct mce_vendor_flags mce_flags;
-extern struct mca_config mca_cfg;
extern struct mca_msr_regs msr_ops;
enum mce_notifier_prios {
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 7ae318c340d9..3c856a15b98e 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
DEBUG_LOCKS_WARN_ON(preemptible());
}
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
-{
- int cpu = smp_processor_id();
-
- if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
- cpumask_clear_cpu(cpu, mm_cpumask(mm));
-}
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
@@ -286,6 +280,32 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
return __pkru_allows_pkey(vma_pkey(vma), write);
}
+/*
+ * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
+ * bits. This serves two purposes. It prevents a nasty situation in
+ * which PCID-unaware code saves CR3, loads some other value (with PCID
+ * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
+ * the saved ASID was nonzero. It also means that any bugs involving
+ * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
+ * deterministically.
+ */
+
+static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
+{
+ if (static_cpu_has(X86_FEATURE_PCID)) {
+ VM_WARN_ON_ONCE(asid > 4094);
+ return __sme_pa(mm->pgd) | (asid + 1);
+ } else {
+ VM_WARN_ON_ONCE(asid != 0);
+ return __sme_pa(mm->pgd);
+ }
+}
+
+static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
+{
+ VM_WARN_ON_ONCE(asid > 4094);
+ return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
+}
/*
* This can be used from process context to figure out what the value of
@@ -296,10 +316,8 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
*/
static inline unsigned long __get_current_cr3_fast(void)
{
- unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
-
- if (static_cpu_has(X86_FEATURE_PCID))
- cr3 |= this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+ unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+ this_cpu_read(cpu_tlbstate.loaded_mm_asid));
/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || preemptible());
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 63cc96f064dc..530f448fddaf 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -179,7 +179,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
u64 input_address = input ? virt_to_phys(input) : 0;
u64 output_address = output ? virt_to_phys(output) : 0;
u64 hv_status;
- register void *__sp asm(_ASM_SP);
#ifdef CONFIG_X86_64
if (!hv_hypercall_pg)
@@ -187,7 +186,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
__asm__ __volatile__("mov %4, %%r8\n"
"call *%5"
- : "=a" (hv_status), "+r" (__sp),
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
: "r" (output_address), "m" (hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
@@ -202,7 +201,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
__asm__ __volatile__("call *%7"
: "=A" (hv_status),
- "+c" (input_address_lo), "+r" (__sp)
+ "+c" (input_address_lo), ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input_address_hi),
"D"(output_address_hi), "S"(output_address_lo),
@@ -224,12 +223,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
{
u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
- register void *__sp asm(_ASM_SP);
#ifdef CONFIG_X86_64
{
__asm__ __volatile__("call *%4"
- : "=a" (hv_status), "+r" (__sp),
+ : "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
: "m" (hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
@@ -242,7 +240,7 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
__asm__ __volatile__ ("call *%5"
: "=A"(hv_status),
"+c"(input1_lo),
- "+r"(__sp)
+ ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input1_hi),
"m" (hv_hypercall_pg)
@@ -291,6 +289,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
* to this information.
*/
extern u32 *hv_vp_index;
+extern u32 hv_max_vp_index;
/**
* hv_cpu_number_to_vp_number() - Map CPU to VP.
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 42873edd9f9d..280d94c36dad 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -459,8 +459,8 @@ int paravirt_disable_iospace(void);
*/
#ifdef CONFIG_X86_32
#define PVOP_VCALL_ARGS \
- unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx; \
- register void *__sp asm("esp")
+ unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx;
+
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
#define PVOP_CALL_ARG1(x) "a" ((unsigned long)(x))
@@ -480,8 +480,8 @@ int paravirt_disable_iospace(void);
/* [re]ax isn't an arg, but the return val */
#define PVOP_VCALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
- __edx = __edx, __ecx = __ecx, __eax = __eax; \
- register void *__sp asm("rsp")
+ __edx = __edx, __ecx = __ecx, __eax = __eax;
+
#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
@@ -532,7 +532,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
@@ -542,7 +542,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
@@ -569,7 +569,7 @@ int paravirt_disable_iospace(void);
asm volatile(pre \
paravirt_alt(PARAVIRT_CALL) \
post \
- : call_clbr, "+r" (__sp) \
+ : call_clbr, ASM_CALL_CONSTRAINT \
: paravirt_type(op), \
paravirt_clobber(clbr), \
##__VA_ARGS__ \
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index ec1f3c651150..4f44505dbf87 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -100,19 +100,14 @@ static __always_inline bool should_resched(int preempt_offset)
#ifdef CONFIG_PREEMPT
extern asmlinkage void ___preempt_schedule(void);
-# define __preempt_schedule() \
-({ \
- register void *__sp asm(_ASM_SP); \
- asm volatile ("call ___preempt_schedule" : "+r"(__sp)); \
-})
+# define __preempt_schedule() \
+ asm volatile ("call ___preempt_schedule" : ASM_CALL_CONSTRAINT)
extern asmlinkage void preempt_schedule(void);
extern asmlinkage void ___preempt_schedule_notrace(void);
-# define __preempt_schedule_notrace() \
-({ \
- register void *__sp asm(_ASM_SP); \
- asm volatile ("call ___preempt_schedule_notrace" : "+r"(__sp)); \
-})
+# define __preempt_schedule_notrace() \
+ asm volatile ("call ___preempt_schedule_notrace" : ASM_CALL_CONSTRAINT)
+
extern asmlinkage void preempt_schedule_notrace(void);
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3fa26a61eabc..b390ff76e58f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -677,8 +677,6 @@ static inline void sync_core(void)
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
- register void *__sp asm(_ASM_SP);
-
#ifdef CONFIG_X86_32
asm volatile (
"pushfl\n\t"
@@ -686,7 +684,7 @@ static inline void sync_core(void)
"pushl $1f\n\t"
"iret\n\t"
"1:"
- : "+r" (__sp) : : "memory");
+ : ASM_CALL_CONSTRAINT : : "memory");
#else
unsigned int tmp;
@@ -703,7 +701,7 @@ static inline void sync_core(void)
"iretq\n\t"
UNWIND_HINT_RESTORE
"1:"
- : "=&r" (tmp), "+r" (__sp) : : "cc", "memory");
+ : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
#endif
}
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index a34e0d4b957d..7116b7931c7b 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -103,7 +103,6 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
({ \
long tmp; \
struct rw_semaphore* ret; \
- register void *__sp asm(_ASM_SP); \
\
asm volatile("# beginning down_write\n\t" \
LOCK_PREFIX " xadd %1,(%4)\n\t" \
@@ -114,7 +113,8 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
" call " slow_path "\n" \
"1:\n" \
"# ending down_write" \
- : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
+ : "+m" (sem->count), "=d" (tmp), \
+ "=a" (ret), ASM_CALL_CONSTRAINT \
: "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
: "memory", "cc"); \
ret; \
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5161da1a0fa0..89e7eeb5cec1 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -158,17 +158,6 @@ struct thread_info {
*/
#ifndef __ASSEMBLY__
-static inline unsigned long current_stack_pointer(void)
-{
- unsigned long sp;
-#ifdef CONFIG_X86_64
- asm("mov %%rsp,%0" : "=g" (sp));
-#else
- asm("mov %%esp,%0" : "=g" (sp));
-#endif
- return sp;
-}
-
/*
* Walks up the stack frames to make sure that the specified object is
* entirely contained by a single stack frame.
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4893abf7f74f..d362161d3291 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -83,6 +83,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
#endif
/*
+ * If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
+ * to init_mm when we switch to a kernel thread (e.g. the idle thread). If
+ * it's false, then we immediately switch CR3 when entering a kernel thread.
+ */
+DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+
+/*
* 6 because 6 should be plenty and struct tlb_state will fit in
* two cache lines.
*/
@@ -105,6 +112,23 @@ struct tlb_state {
u16 next_asid;
/*
+ * We can be in one of several states:
+ *
+ * - Actively using an mm. Our CPU's bit will be set in
+ * mm_cpumask(loaded_mm) and is_lazy == false;
+ *
+ * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
+ * will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+ *
+ * - Lazily using a real mm. loaded_mm != &init_mm, our bit
+ * is set in mm_cpumask(loaded_mm), but is_lazy == true.
+ * We're heuristically guessing that the CR3 load we
+ * skipped more than makes up for the overhead added by
+ * lazy mode.
+ */
+ bool is_lazy;
+
+ /*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
*/
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 342e59789fcd..39f7a27bef13 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
TP_STRUCT__entry(
__field(struct fpu *, fpu)
- __field(bool, fpregs_active)
- __field(bool, fpstate_active)
+ __field(bool, initialized)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
TP_fast_assign(
__entry->fpu = fpu;
- __entry->fpregs_active = fpu->fpregs_active;
- __entry->fpstate_active = fpu->fpstate_active;
+ __entry->initialized = fpu->initialized;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
- __entry->fpregs_active,
- __entry->fpstate_active,
+ __entry->initialized,
__entry->xfeatures,
__entry->xcomp_bv
)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 184eb9894dae..4b892917edeb 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -166,11 +166,11 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
({ \
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
- register void *__sp asm(_ASM_SP); \
__chk_user_ptr(ptr); \
might_fault(); \
asm volatile("call __get_user_%P4" \
- : "=a" (__ret_gu), "=r" (__val_gu), "+r" (__sp) \
+ : "=a" (__ret_gu), "=r" (__val_gu), \
+ ASM_CALL_CONSTRAINT \
: "0" (ptr), "i" (sizeof(*(ptr)))); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
@@ -337,7 +337,7 @@ do { \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
: "=r" (retval), "=&A"(x) \
- : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \
+ : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \
"i" (errret), "0" (retval)); \
})
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 9606688caa4b..7cb282e9e587 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -113,10 +113,9 @@ extern struct { char _entry[32]; } hypercall_page[];
register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \
register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \
register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \
- register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; \
- register void *__sp asm(_ASM_SP);
+ register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5;
-#define __HYPERCALL_0PARAM "=r" (__res), "+r" (__sp)
+#define __HYPERCALL_0PARAM "=r" (__res), ASM_CALL_CONSTRAINT
#define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1)
#define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2)
#define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3)
@@ -552,13 +551,13 @@ static inline void
MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
struct desc_struct desc)
{
- u32 *p = (u32 *) &desc;
-
mcl->op = __HYPERVISOR_update_descriptor;
if (sizeof(maddr) == sizeof(long)) {
mcl->args[0] = maddr;
mcl->args[1] = *(unsigned long *)&desc;
} else {
+ u32 *p = (u32 *)&desc;
+
mcl->args[0] = maddr;
mcl->args[1] = maddr >> 32;
mcl->args[2] = *p++;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c769f77d..ff891772c9f8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,11 +573,21 @@ static u32 bdx_deadline_rev(void)
return ~0U;
}
+static u32 skx_deadline_rev(void)
+{
+ switch (boot_cpu_data.x86_mask) {
+ case 0x03: return 0x01000136;
+ case 0x04: return 0x02000014;
+ }
+
+ return ~0U;
+}
+
static const struct x86_cpu_id deadline_match[] = {
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X, 0x02000014),
+ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20),
@@ -600,7 +610,8 @@ static void apic_check_deadline_errata(void)
const struct x86_cpu_id *m;
u32 rev;
- if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
m = x86_match_cpu(deadline_match);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9862e2cd6d93..d58184b7cd44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -763,6 +763,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
}
}
+static void init_amd_zn(struct cpuinfo_x86 *c)
+{
+ /*
+ * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
+ * all up to and including B1.
+ */
+ if (c->x86_model <= 1 && c->x86_mask <= 1)
+ set_cpu_cap(c, X86_FEATURE_CPB);
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
early_init_amd(c);
@@ -791,6 +801,7 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x10: init_amd_gh(c); break;
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
+ case 0x17: init_amd_zn(c); break;
}
/* Enable workaround for FXSAVE leak */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index db684880d74a..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -21,14 +21,6 @@
void __init check_bugs(void)
{
-#ifdef CONFIG_X86_32
- /*
- * Regardless of whether PCID is enumerated, the SDM says
- * that it can't be enabled in 32-bit mode.
- */
- setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
identify_boot_cpu();
if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 775f10100d7f..c9176bae7fd8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Regardless of whether PCID is enumerated, the SDM says
+ * that it can't be enabled in 32-bit mode.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
}
void __init early_cpu_init(void)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 098530a93bb7..debb974fd17d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,3 +1,6 @@
+#ifndef __X86_MCE_INTERNAL_H__
+#define __X86_MCE_INTERNAL_H__
+
#include <linux/device.h>
#include <asm/mce.h>
@@ -108,3 +111,7 @@ static inline void mce_work_trigger(void) { }
static inline void mce_register_injector_chain(struct notifier_block *nb) { }
static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif
+
+extern struct mca_config mca_cfg;
+
+#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 40e28ed77fbf..486f640b02ef 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -28,6 +28,8 @@
#include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
+#include "mce-internal.h"
+
#define NR_BLOCKS 5
#define THRESHOLD_MAX 0xFFF
#define INT_TYPE_APIC 0x00020000
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 86e8f0b2537b..c4fa4a85d4cb 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void)
bool *res = &dis_ucode_ldr;
#endif
- if (!have_cpuid_p())
- return *res;
-
/*
* CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
* completely accurate as xen pv guests don't see that CPUID bit set but
@@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
void __init load_ucode_bsp(void)
{
unsigned int cpuid_1_eax;
+ bool intel = true;
- if (check_loader_disabled_bsp())
+ if (!have_cpuid_p())
return;
cpuid_1_eax = native_cpuid_eax(1);
switch (x86_cpuid_vendor()) {
case X86_VENDOR_INTEL:
- if (x86_family(cpuid_1_eax) >= 6)
- load_ucode_intel_bsp();
+ if (x86_family(cpuid_1_eax) < 6)
+ return;
break;
+
case X86_VENDOR_AMD:
- if (x86_family(cpuid_1_eax) >= 0x10)
- load_ucode_amd_bsp(cpuid_1_eax);
+ if (x86_family(cpuid_1_eax) < 0x10)
+ return;
+ intel = false;
break;
+
default:
- break;
+ return;
}
+
+ if (check_loader_disabled_bsp())
+ return;
+
+ if (intel)
+ load_ucode_intel_bsp();
+ else
+ load_ucode_amd_bsp(cpuid_1_eax);
}
static bool check_loader_disabled_ap(void)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f070c2d..f92a6593de1e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
kernel_fpu_disable();
- if (fpu->fpregs_active) {
+ if (fpu->initialized) {
/*
* Ignore return value -- we don't care if reg state
* is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
{
struct fpu *fpu = &current->thread.fpu;
- if (fpu->fpregs_active)
+ if (fpu->initialized)
copy_kernel_to_fpregs(&fpu->state);
kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
preempt_disable();
trace_x86_fpu_before_save(fpu);
- if (fpu->fpregs_active) {
+ if (fpu->initialized) {
if (!copy_fpregs_to_fpstate(fpu)) {
copy_kernel_to_fpregs(&fpu->state);
}
@@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init);
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
- dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
- if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
+ if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
return 0;
WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
/*
* Save current FPU registers directly into the child
* FPU context, without any memory-to-memory copying.
- * In lazy mode, if the FPU context isn't loaded into
- * fpregs, CR0.TS will be set and do_device_not_available
- * will load the FPU context.
*
- * We have to do all this with preemption disabled,
- * mostly because of the FNSAVE case, because in that
- * case we must not allow preemption in the window
- * between the FNSAVE and us marking the context lazy.
- *
- * It shouldn't be an issue as even FNSAVE is plenty
- * fast in terms of critical section length.
+ * ( The function 'fails' in the FNSAVE case, which destroys
+ * register contents so we have to copy them back. )
*/
- preempt_disable();
if (!copy_fpregs_to_fpstate(dst_fpu)) {
- memcpy(&src_fpu->state, &dst_fpu->state,
- fpu_kernel_xstate_size);
-
+ memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
copy_kernel_to_fpregs(&src_fpu->state);
}
- preempt_enable();
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
@@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Activate the current task's in-memory FPU context,
* if it has not been used before:
*/
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu);
- if (!fpu->fpstate_active) {
+ if (!fpu->initialized) {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);
trace_x86_fpu_activate_state(fpu);
/* Safe to do for the current task: */
- fpu->fpstate_active = 1;
+ fpu->initialized = 1;
}
}
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
/*
* This function must be called before we read a task's fpstate.
*
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ * to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ * registers were already saved by the context-switch code when
+ * the task scheduled out - we only have to initialize the registers
+ * if they've never been initialized.
*
* If the task has used the FPU before then save it.
*/
-void fpu__activate_fpstate_read(struct fpu *fpu)
+void fpu__prepare_read(struct fpu *fpu)
{
- /*
- * If fpregs are active (in the current CPU), then
- * copy them to the fpstate:
- */
- if (fpu->fpregs_active) {
+ if (fpu == &current->thread.fpu) {
fpu__save(fpu);
} else {
- if (!fpu->fpstate_active) {
+ if (!fpu->initialized) {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);
trace_x86_fpu_activate_state(fpu);
/* Safe to do for current and for stopped child tasks: */
- fpu->fpstate_active = 1;
+ fpu->initialized = 1;
}
}
}
@@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
/*
* This function must be called before we write a task's fpstate.
*
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU registers.
* If the task has not used the FPU before then initialize its fpstate.
*
* After this function call, after registers in the fpstate are
* modified and the child task has woken up, the child task will
* restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
* state pending on its former CPU could be restored, corrupting
* the modifications.
*/
-void fpu__activate_fpstate_write(struct fpu *fpu)
+void fpu__prepare_write(struct fpu *fpu)
{
/*
* Only stopped child tasks can be used to modify the FPU
@@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
*/
WARN_ON_FPU(fpu == &current->thread.fpu);
- if (fpu->fpstate_active) {
- /* Invalidate any lazy state: */
+ if (fpu->initialized) {
+ /* Invalidate any cached state: */
__fpu_invalidate_fpregs_state(fpu);
} else {
fpstate_init(&fpu->state);
@@ -310,74 +300,11 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
trace_x86_fpu_activate_state(fpu);
/* Safe to do for stopped child tasks: */
- fpu->fpstate_active = 1;
+ fpu->initialized = 1;
}
}
/*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'. Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
- struct fpu *fpu = &current->thread.fpu;
-
- /*
- * Ensure that the context-switching code does not write
- * over the fpstate while we are doing our update.
- */
- preempt_disable();
-
- /*
- * Move the fpregs in to the fpu's 'fpstate'.
- */
- fpu__activate_fpstate_read(fpu);
-
- /*
- * The caller is about to write to 'fpu'. Ensure that no
- * CPU thinks that its fpregs match the fpstate. This
- * ensures we will not be lazy and skip a XRSTOR in the
- * future.
- */
- __fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
- struct fpu *fpu = &current->thread.fpu;
-
- /*
- * 'fpu' now has an updated copy of the state, but the
- * registers may still be out of date. Update them with
- * an XRSTOR if they are active.
- */
- if (fpregs_active())
- copy_kernel_to_fpregs(&fpu->state);
-
- /*
- * Our update is done and the fpregs/fpstate are in sync
- * if necessary. Context switches can happen again.
- */
- preempt_enable();
-}
-
-/*
* 'fpu__restore()' is called to copy FPU registers from
* the FPU fpstate to the live hw registers and to activate
* access to the hardware registers, so that FPU instructions
@@ -389,7 +316,7 @@ void fpu__current_fpstate_write_end(void)
*/
void fpu__restore(struct fpu *fpu)
{
- fpu__activate_curr(fpu);
+ fpu__initialize(fpu);
/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
kernel_fpu_disable();
@@ -414,15 +341,17 @@ void fpu__drop(struct fpu *fpu)
{
preempt_disable();
- if (fpu->fpregs_active) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- fpregs_deactivate(fpu);
+ if (fpu == &current->thread.fpu) {
+ if (fpu->initialized) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ fpregs_deactivate(fpu);
+ }
}
- fpu->fpstate_active = 0;
+ fpu->initialized = 0;
trace_x86_fpu_dropped(fpu);
@@ -462,9 +391,11 @@ void fpu__clear(struct fpu *fpu)
* Make sure fpstate is cleared and initialized.
*/
if (static_cpu_has(X86_FEATURE_FPU)) {
- fpu__activate_curr(fpu);
+ preempt_disable();
+ fpu__initialize(fpu);
user_fpu_begin();
copy_init_fpstate_to_fpregs();
+ preempt_enable();
}
}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index d5d44c452624..7affb7e3d9a5 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
- WARN_ON_FPU(current->thread.fpu.fpstate_active);
+ WARN_ON_FPU(current->thread.fpu.initialized);
}
/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b188b16841e3..3ea151372389 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
{
struct fpu *target_fpu = &target->thread.fpu;
- return target_fpu->fpstate_active ? regset->n : 0;
+ return target_fpu->initialized ? regset->n : 0;
}
int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
{
struct fpu *target_fpu = &target->thread.fpu;
- if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+ if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
return regset->n;
else
return 0;
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
- fpu__activate_fpstate_read(fpu);
+ fpu__prepare_read(fpu);
fpstate_sanitize_xstate(fpu);
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
- fpu__activate_fpstate_write(fpu);
+ fpu__prepare_write(fpu);
fpstate_sanitize_xstate(fpu);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,10 +89,13 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
xsave = &fpu->state.xsave;
- fpu__activate_fpstate_read(fpu);
+ fpu__prepare_read(fpu);
if (using_compacted_format()) {
- ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+ if (kbuf)
+ ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
+ else
+ ret = copy_xstate_to_user(ubuf, xsave, pos, count);
} else {
fpstate_sanitize_xstate(fpu);
/*
@@ -129,28 +132,29 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
xsave = &fpu->state.xsave;
- fpu__activate_fpstate_write(fpu);
+ fpu__prepare_write(fpu);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- ret = copyin_to_xsaves(kbuf, ubuf, xsave);
- else
+ if (using_compacted_format()) {
+ if (kbuf)
+ ret = copy_kernel_to_xstate(xsave, kbuf);
+ else
+ ret = copy_user_to_xstate(xsave, ubuf);
+ } else {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-
- /*
- * In case of failure, mark all states as init:
- */
- if (ret)
- fpstate_init(&fpu->state);
+ if (!ret)
+ ret = validate_xstate_header(&xsave->header);
+ }
/*
* mxcsr reserved bits must be masked to zero for security reasons.
*/
xsave->i387.mxcsr &= mxcsr_feature_mask;
- xsave->header.xfeatures &= xfeatures_mask;
+
/*
- * These bits must be zero.
+ * In case of failure, mark all states as init:
*/
- memset(&xsave->header.reserved, 0, 48);
+ if (ret)
+ fpstate_init(&fpu->state);
return ret;
}
@@ -299,7 +303,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
struct fpu *fpu = &target->thread.fpu;
struct user_i387_ia32_struct env;
- fpu__activate_fpstate_read(fpu);
+ fpu__prepare_read(fpu);
if (!boot_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -329,7 +333,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
struct user_i387_ia32_struct env;
int ret;
- fpu__activate_fpstate_write(fpu);
+ fpu__prepare_write(fpu);
fpstate_sanitize_xstate(fpu);
if (!boot_cpu_has(X86_FEATURE_FPU))
@@ -369,7 +373,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
struct fpu *fpu = &tsk->thread.fpu;
int fpvalid;
- fpvalid = fpu->fpstate_active;
+ fpvalid = fpu->initialized;
if (fpvalid)
fpvalid = !fpregs_get(tsk, NULL,
0, sizeof(struct user_i387_ia32_struct),
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 83c23c230b4c..fb639e70048f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
*/
int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
{
- struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+ struct fpu *fpu = &current->thread.fpu;
+ struct xregs_state *xsave = &fpu->state.xsave;
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_32 __user *) buf) ? -1 : 1;
- if (fpregs_active() || using_compacted_format()) {
+ if (fpu->initialized || using_compacted_format()) {
/* Save the live register state to the user directly. */
if (copy_fpregs_to_sigframe(buf_fx))
return -1;
/* Update the thread's fxstate to save the fsave header. */
if (ia32_fxstate)
- copy_fxregs_to_kernel(&tsk->thread.fpu);
+ copy_fxregs_to_kernel(fpu);
} else {
/*
* It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
return -1;
}
- fpstate_sanitize_xstate(&tsk->thread.fpu);
+ fpstate_sanitize_xstate(fpu);
if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
return -1;
}
@@ -213,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
struct xstate_header *header = &xsave->header;
if (use_xsave()) {
- /* These bits must be zero. */
- memset(header->reserved, 0, 48);
+ /*
+ * Note: we don't need to zero the reserved bits in the
+ * xstate_header here because we either didn't copy them at all,
+ * or we checked earlier that they aren't set.
+ */
/*
* Init the state that is not present in the memory
@@ -223,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
if (fx_only)
header->xfeatures = XFEATURE_MASK_FPSSE;
else
- header->xfeatures &= (xfeatures_mask & xfeatures);
+ header->xfeatures &= xfeatures;
}
if (use_fxsr()) {
@@ -279,7 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
if (!access_ok(VERIFY_READ, buf, size))
return -EACCES;
- fpu__activate_curr(fpu);
+ fpu__initialize(fpu);
if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(current, NULL,
@@ -307,28 +311,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
/*
* For 32-bit frames with fxstate, copy the user state to the
* thread's fpu state, reconstruct fxstate from the fsave
- * header. Sanitize the copied state etc.
+ * header. Validate and sanitize the copied state.
*/
struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
/*
- * Drop the current fpu which clears fpu->fpstate_active. This ensures
+ * Drop the current fpu which clears fpu->initialized. This ensures
* that any context-switch during the copy of the new state,
* avoids the intermediate state from getting restored/saved.
* Thus avoiding the new restored state from getting corrupted.
* We will be ready to restore/save the state only after
- * fpu->fpstate_active is again set.
+ * fpu->initialized is again set.
*/
fpu__drop(fpu);
if (using_compacted_format()) {
- err = copyin_to_xsaves(NULL, buf_fx,
- &fpu->state.xsave);
+ err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
} else {
- err = __copy_from_user(&fpu->state.xsave,
- buf_fx, state_size);
+ err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+
+ if (!err && state_size > offsetof(struct xregs_state, header))
+ err = validate_xstate_header(&fpu->state.xsave.header);
}
if (err || __copy_from_user(&env, buf, sizeof(env))) {
@@ -339,7 +344,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
}
- fpu->fpstate_active = 1;
+ fpu->initialized = 1;
preempt_disable();
fpu__restore(fpu);
preempt_enable();
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c24ac1efb12d..f1d5476c9022 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -483,6 +483,30 @@ int using_compacted_format(void)
return boot_cpu_has(X86_FEATURE_XSAVES);
}
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+int validate_xstate_header(const struct xstate_header *hdr)
+{
+ /* No unknown or supervisor features may be set */
+ if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+ return -EINVAL;
+
+ /* Userspace must use the uncompacted format */
+ if (hdr->xcomp_bv)
+ return -EINVAL;
+
+ /*
+ * If 'reserved' is shrunken to add a new field, make sure to validate
+ * that new field here!
+ */
+ BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+ /* No reserved bits may be set */
+ if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+ return -EINVAL;
+
+ return 0;
+}
+
static void __xstate_dump_leaves(void)
{
int i;
@@ -867,7 +891,7 @@ const void *get_xsave_field_ptr(int xsave_state)
{
struct fpu *fpu = &current->thread.fpu;
- if (!fpu->fpstate_active)
+ if (!fpu->initialized)
return NULL;
/*
* fpu__save() takes the CPU's xstate registers
@@ -921,38 +945,129 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
/*
+ * Weird legacy quirk: SSE and YMM states store information in the
+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
+ * area is marked as unused in the xfeatures header, we need to copy
+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
+ */
+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
+{
+ if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
+ return false;
+
+ if (xfeatures & XFEATURE_MASK_FP)
+ return false;
+
+ return true;
+}
+
+/*
* This is similar to user_regset_copyout(), but will not add offset to
* the source data pointer or increment pos, count, kbuf, and ubuf.
*/
-static inline int xstate_copyout(unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf,
- const void *data, const int start_pos,
- const int end_pos)
+static inline void
+__copy_xstate_to_kernel(void *kbuf, const void *data,
+ unsigned int offset, unsigned int size, unsigned int size_total)
{
- if ((count == 0) || (pos < start_pos))
- return 0;
+ if (offset < size_total) {
+ unsigned int copy = min(size, size_total - offset);
- if (end_pos < 0 || pos < end_pos) {
- unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+ memcpy(kbuf + offset, data, copy);
+ }
+}
- if (kbuf) {
- memcpy(kbuf + pos, data, copy);
- } else {
- if (__copy_to_user(ubuf + pos, data, copy))
- return -EFAULT;
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a kernel-space ptrace buffer.
+ *
+ * It supports partial copy but pos always starts from zero. This is called
+ * from xstateregs_get() and there we check the CPU has XSAVES.
+ */
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
+{
+ unsigned int offset, size;
+ struct xstate_header header;
+ int i;
+
+ /*
+ * Currently copy_regset_to_user() starts from pos 0:
+ */
+ if (unlikely(offset_start != 0))
+ return -EFAULT;
+
+ /*
+ * The destination is a ptrace buffer; we put in only user xstates:
+ */
+ memset(&header, 0, sizeof(header));
+ header.xfeatures = xsave->header.xfeatures;
+ header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+ /*
+ * Copy xregs_state->header:
+ */
+ offset = offsetof(struct xregs_state, header);
+ size = sizeof(header);
+
+ __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
+
+ for (i = 0; i < XFEATURE_MAX; i++) {
+ /*
+ * Copy only in-use xstates:
+ */
+ if ((header.xfeatures >> i) & 1) {
+ void *src = __raw_xsave_addr(xsave, 1 << i);
+
+ offset = xstate_offsets[i];
+ size = xstate_sizes[i];
+
+ /* The next component has to fit fully into the output buffer: */
+ if (offset + size > size_total)
+ break;
+
+ __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
}
+
+ }
+
+ if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+ offset = offsetof(struct fxregs_state, mxcsr);
+ size = MXCSR_AND_FLAGS_SIZE;
+ __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
+ }
+
+ /*
+ * Fill xsave->i387.sw_reserved value for ptrace frame:
+ */
+ offset = offsetof(struct fxregs_state, sw_reserved);
+ size = sizeof(xstate_fx_sw_bytes);
+
+ __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
+
+ return 0;
+}
+
+static inline int
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
+{
+ if (!size)
+ return 0;
+
+ if (offset < size_total) {
+ unsigned int copy = min(size, size_total - offset);
+
+ if (__copy_to_user(ubuf + offset, data, copy))
+ return -EFAULT;
}
return 0;
}
/*
* Convert from kernel XSAVES compacted format to standard format and copy
- * to a ptrace buffer. It supports partial copy but pos always starts from
+ * to a user-space buffer. It supports partial copy but pos always starts from
* zero. This is called from xstateregs_get() and there we check the CPU
* has XSAVES.
*/
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
- void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
{
unsigned int offset, size;
int ret, i;
@@ -961,7 +1076,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
/*
* Currently copy_regset_to_user() starts from pos 0:
*/
- if (unlikely(pos != 0))
+ if (unlikely(offset_start != 0))
return -EFAULT;
/*
@@ -977,8 +1092,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
offset = offsetof(struct xregs_state, header);
size = sizeof(header);
- ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
-
+ ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
if (ret)
return ret;
@@ -992,25 +1106,30 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
offset = xstate_offsets[i];
size = xstate_sizes[i];
- ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+ /* The next component has to fit fully into the output buffer: */
+ if (offset + size > size_total)
+ break;
+ ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
if (ret)
return ret;
-
- if (offset + size >= count)
- break;
}
}
+ if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+ offset = offsetof(struct fxregs_state, mxcsr);
+ size = MXCSR_AND_FLAGS_SIZE;
+ __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
+ }
+
/*
* Fill xsave->i387.sw_reserved value for ptrace frame:
*/
offset = offsetof(struct fxregs_state, sw_reserved);
size = sizeof(xstate_fx_sw_bytes);
- ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+ ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
if (ret)
return ret;
@@ -1018,55 +1137,98 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
}
/*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set().
*/
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
- struct xregs_state *xsave)
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
{
unsigned int offset, size;
int i;
- u64 xfeatures;
- u64 allowed_features;
+ struct xstate_header hdr;
offset = offsetof(struct xregs_state, header);
- size = sizeof(xfeatures);
+ size = sizeof(hdr);
- if (kbuf) {
- memcpy(&xfeatures, kbuf + offset, size);
- } else {
- if (__copy_from_user(&xfeatures, ubuf + offset, size))
- return -EFAULT;
+ memcpy(&hdr, kbuf + offset, size);
+
+ if (validate_xstate_header(&hdr))
+ return -EINVAL;
+
+ for (i = 0; i < XFEATURE_MAX; i++) {
+ u64 mask = ((u64)1 << i);
+
+ if (hdr.xfeatures & mask) {
+ void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+ offset = xstate_offsets[i];
+ size = xstate_sizes[i];
+
+ memcpy(dst, kbuf + offset, size);
+ }
+ }
+
+ if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+ offset = offsetof(struct fxregs_state, mxcsr);
+ size = MXCSR_AND_FLAGS_SIZE;
+ memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
}
/*
- * Reject if the user sets any disabled or supervisor features:
+ * The state that came in from userspace was user-state only.
+ * Mask all the user states out of 'xfeatures':
+ */
+ xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+ /*
+ * Add back in the features that came in from userspace:
*/
- allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+ xsave->header.xfeatures |= hdr.xfeatures;
- if (xfeatures & ~allowed_features)
+ return 0;
+}
+
+/*
+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
+ * kernel XSAVES format and copy to the target thread. This is called from
+ * xstateregs_set(), as well as potentially from the sigreturn() and
+ * rt_sigreturn() system calls.
+ */
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
+{
+ unsigned int offset, size;
+ int i;
+ struct xstate_header hdr;
+
+ offset = offsetof(struct xregs_state, header);
+ size = sizeof(hdr);
+
+ if (__copy_from_user(&hdr, ubuf + offset, size))
+ return -EFAULT;
+
+ if (validate_xstate_header(&hdr))
return -EINVAL;
for (i = 0; i < XFEATURE_MAX; i++) {
u64 mask = ((u64)1 << i);
- if (xfeatures & mask) {
+ if (hdr.xfeatures & mask) {
void *dst = __raw_xsave_addr(xsave, 1 << i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
- if (kbuf) {
- memcpy(dst, kbuf + offset, size);
- } else {
- if (__copy_from_user(dst, ubuf + offset, size))
- return -EFAULT;
- }
+ if (__copy_from_user(dst, ubuf + offset, size))
+ return -EFAULT;
}
}
+ if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+ offset = offsetof(struct fxregs_state, mxcsr);
+ size = MXCSR_AND_FLAGS_SIZE;
+ if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+ return -EFAULT;
+ }
+
/*
* The state that came in from userspace was user-state only.
* Mask all the user states out of 'xfeatures':
@@ -1076,7 +1238,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
/*
* Add back in the features that came in from userspace:
*/
- xsave->header.xfeatures |= xfeatures;
+ xsave->header.xfeatures |= hdr.xfeatures;
return 0;
}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1f38d9a4d9de..d4eb450144fd 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack)
static inline void *current_stack(void)
{
- return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+ return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
}
static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
/* Save the next esp at the bottom of the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
/* Push the previous esp onto the stack */
prev_esp = (u32 *)irqstk;
- *prev_esp = current_stack_pointer();
+ *prev_esp = current_stack_pointer;
call_on_stack(__do_softirq, isp);
}
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index db2182d63ed0..3fc0f9a794cb 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -3,6 +3,15 @@
/* Kprobes and Optprobes common header */
+#include <asm/asm.h>
+
+#ifdef CONFIG_FRAME_POINTER
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
+ " mov %" _ASM_SP ", %" _ASM_BP "\n"
+#else
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
+#endif
+
#ifdef CONFIG_X86_64
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax. */ \
@@ -17,7 +26,7 @@
" pushq %r10\n" \
" pushq %r11\n" \
" pushq %rbx\n" \
- " pushq %rbp\n" \
+ SAVE_RBP_STRING \
" pushq %r12\n" \
" pushq %r13\n" \
" pushq %r14\n" \
@@ -48,7 +57,7 @@
" pushl %es\n" \
" pushl %ds\n" \
" pushl %eax\n" \
- " pushl %ebp\n" \
+ SAVE_RBP_STRING \
" pushl %edi\n" \
" pushl %esi\n" \
" pushl %edx\n" \
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f0153714ddac..0742491cbb73 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1080,8 +1080,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
* raw stack chunk with redzones:
*/
__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
- regs->flags &= ~X86_EFLAGS_IF;
- trace_hardirqs_off();
regs->ip = (unsigned long)(jp->entry);
/*
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4b0592ca9e47..8c1cc08f514f 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
return 0;
out_clean_nodes:
- for (j = i - 1; j > 0; j--)
+ for (j = i - 1; j >= 0; j--)
cleanup_setup_data_node(*(kobjp + j));
kfree(kobjp);
out_setup_data_kobj:
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index aa60a08b65b1..8bb9594d0761 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
return NULL;
}
-void kvm_async_pf_task_wait(u32 token)
+/*
+ * @interrupt_kernel: Is this called from a routine which interrupts the kernel
+ * (other than user space)?
+ */
+void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
{
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -140,7 +144,10 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
- n.halted = is_idle_task(current) || preempt_count() > 1;
+ n.halted = is_idle_task(current) ||
+ (IS_ENABLED(CONFIG_PREEMPT_COUNT)
+ ? preempt_count() > 1 || rcu_preempt_depth()
+ : interrupt_kernel);
init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
raw_spin_unlock(&b->lock);
@@ -268,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
prev_state = exception_enter();
- kvm_async_pf_task_wait((u32)read_cr2());
+ kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
exception_exit(prev_state);
break;
case KVM_PV_REASON_PAGE_READY:
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 54180fa6f66f..add33f600531 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -105,6 +105,10 @@ void __noreturn machine_real_restart(unsigned int type)
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
+
+ /* Exiting long mode will fail if CR4.PCIDE is set. */
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e04442345fc0..4e188fda5961 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
sp = (unsigned long) ka->sa.sa_restorer;
}
- if (fpu->fpstate_active) {
+ if (fpu->initialized) {
sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
&buf_fx, &math_size);
*fpstate = (void __user *)sp;
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
return (void __user *)-1L;
/* save i387 and extended state */
- if (fpu->fpstate_active &&
+ if (fpu->initialized &&
copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L;
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/*
* Ensure the signal handler starts with the new fpu state.
*/
- if (fpu->fpstate_active)
+ if (fpu->initialized)
fpu__clear(fpu);
}
signal_setup_done(failed, ksig, stepping);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0854ff169274..ad59edd84de7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -232,12 +232,6 @@ static void notrace start_secondary(void *unused)
*/
if (boot_cpu_has(X86_FEATURE_PCID))
__write_cr4(__read_cr4() | X86_CR4_PCIDE);
- cpu_init();
- x86_cpuinit.early_percpu_clock_init();
- preempt_disable();
- smp_callin();
-
- enable_start_cpu0 = 0;
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
@@ -245,6 +239,13 @@ static void notrace start_secondary(void *unused)
__flush_tlb_all();
#endif
+ cpu_init();
+ x86_cpuinit.early_percpu_clock_init();
+ preempt_disable();
+ smp_callin();
+
+ enable_start_cpu0 = 0;
+
/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 34ea3651362e..67db4f43309e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -142,7 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
* from double_fault.
*/
BUG_ON((unsigned long)(current_top_of_stack() -
- current_stack_pointer()) >= THREAD_SIZE);
+ current_stack_pointer) >= THREAD_SIZE);
preempt_enable_no_resched();
}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index d145a0b1f529..3dc26f95d46e 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -44,7 +44,8 @@ static void unwind_dump(struct unwind_state *state)
state->stack_info.type, state->stack_info.next_sp,
state->stack_mask, state->graph_idx);
- for (sp = state->orig_sp; sp; sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp;
+ sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
break;
@@ -174,6 +175,7 @@ static bool is_last_task_frame(struct unwind_state *state)
* This determines if the frame pointer actually contains an encoded pointer to
* pt_regs on the stack. See ENCODE_FRAME_POINTER.
*/
+#ifdef CONFIG_X86_64
static struct pt_regs *decode_frame_pointer(unsigned long *bp)
{
unsigned long regs = (unsigned long)bp;
@@ -183,6 +185,23 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
return (struct pt_regs *)(regs & ~0x1);
}
+#else
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+ unsigned long regs = (unsigned long)bp;
+
+ if (regs & 0x80000000)
+ return NULL;
+
+ return (struct pt_regs *)(regs | 0x80000000);
+}
+#endif
+
+#ifdef CONFIG_X86_32
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
+#else
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
+#endif
static bool update_stack_state(struct unwind_state *state,
unsigned long *next_bp)
@@ -202,7 +221,7 @@ static bool update_stack_state(struct unwind_state *state,
regs = decode_frame_pointer(next_bp);
if (regs) {
frame = (unsigned long *)regs;
- len = regs_size(regs);
+ len = KERNEL_REGS_SIZE;
state->got_irq = true;
} else {
frame = next_bp;
@@ -226,6 +245,14 @@ static bool update_stack_state(struct unwind_state *state,
frame < prev_frame_end)
return false;
+ /*
+ * On 32-bit with user mode regs, make sure the last two regs are safe
+ * to access:
+ */
+ if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
+ !on_stack(info, frame, len + 2*sizeof(long)))
+ return false;
+
/* Move state to the next frame: */
if (regs) {
state->regs = regs;
@@ -328,6 +355,13 @@ bad_address:
state->regs->sp < (unsigned long)task_pt_regs(state->task))
goto the_end;
+ /*
+ * There are some known frame pointer issues on 32-bit. Disable
+ * unwinder warnings on 32-bit until it gets objtool support.
+ */
+ if (IS_ENABLED(CONFIG_X86_32))
+ goto the_end;
+
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 3ea624452f93..3c48bc8bf08c 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -23,6 +23,7 @@ config KVM
depends on HIGH_RES_TIMERS
# for TASKSTATS/TASK_DELAY_ACCT:
depends on NET && MULTIUSER
+ depends on X86_LOCAL_APIC
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 16bf6655aa85..d90cdc77e077 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -425,8 +425,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
#op " %al \n\t" \
FOP_RET
-asm(".global kvm_fastop_exception \n"
- "kvm_fastop_exception: xor %esi, %esi; ret");
+asm(".pushsection .fixup, \"ax\"\n"
+ ".global kvm_fastop_exception \n"
+ "kvm_fastop_exception: xor %esi, %esi; ret\n"
+ ".popsection");
FOP_START(setcc)
FOP_SETCC(seto)
@@ -4102,10 +4104,12 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
if (efer & EFER_LMA) {
u64 maxphyaddr;
- u32 eax = 0x80000008;
+ u32 eax, ebx, ecx, edx;
- if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL,
- NULL, false))
+ eax = 0x80000008;
+ ecx = 0;
+ if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx,
+ &edx, false))
maxphyaddr = eax & 0xff;
else
maxphyaddr = 36;
@@ -5296,7 +5300,6 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
{
- register void *__sp asm(_ASM_SP);
ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF;
if (!(ctxt->d & ByteOp))
@@ -5304,7 +5307,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n"
: "+a"(ctxt->dst.val), "+d"(ctxt->src.val), [flags]"+D"(flags),
- [fastop]"+S"(fop), "+r"(__sp)
+ [fastop]"+S"(fop), ASM_CALL_CONSTRAINT
: "c"(ctxt->src2.val));
ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca30c1eb1d9..7a69cf053711 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3837,7 +3837,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
case KVM_PV_REASON_PAGE_NOT_PRESENT:
vcpu->arch.apf.host_apf_reason = 0;
local_irq_disable();
- kvm_async_pf_task_wait(fault_address);
+ kvm_async_pf_task_wait(fault_address, 0);
local_irq_enable();
break;
case KVM_PV_REASON_PAGE_READY:
@@ -3974,19 +3974,19 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu,
unsigned level, unsigned gpte)
{
/*
- * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
- * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
- * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
- */
- gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
-
- /*
* The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
* If it is clear, there are no large pages at this level, so clear
* PT_PAGE_SIZE_MASK in gpte if that is the case.
*/
gpte &= level - mmu->last_nonleaf_level;
+ /*
+ * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
+ * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
+ * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
+ */
+ gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
+
return gpte & PT_PAGE_SIZE_MASK;
}
@@ -4555,6 +4555,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
+ update_last_nonleaf_level(vcpu, context);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 86b68dc5a649..f18d1f8d332b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -334,10 +334,11 @@ retry_walk:
--walker->level;
index = PT_INDEX(addr, walker->level);
-
table_gfn = gpte_to_gfn(pte);
offset = index * sizeof(pt_element_t);
pte_gpa = gfn_to_gpa(table_gfn) + offset;
+
+ BUG_ON(walker->level < 1);
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 06c0c6d0541e..95a01609d7ee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -200,6 +200,8 @@ struct loaded_vmcs {
int cpu;
bool launched;
bool nmi_known_unmasked;
+ unsigned long vmcs_host_cr3; /* May not match real cr3 */
+ unsigned long vmcs_host_cr4; /* May not match real cr4 */
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -600,8 +602,6 @@ struct vcpu_vmx {
int gs_ldt_reload_needed;
int fs_reload_needed;
u64 msr_host_bndcfgs;
- unsigned long vmcs_host_cr3; /* May not match real cr3 */
- unsigned long vmcs_host_cr4; /* May not match real cr4 */
} host_state;
struct {
int vm86_active;
@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
struct pi_desc old, new;
unsigned int dest;
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ /*
+ * In case of hot-plug or hot-unplug, we may have to undo
+ * vmx_vcpu_pi_put even if there is no assigned device. And we
+ * always keep PI.NDST up to date for simplicity: it makes the
+ * code easier, and CPU migration is not a fast path.
+ */
+ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+ return;
+
+ /*
+ * First handle the simple case where no cmpxchg is necessary; just
+ * allow posting non-urgent interrupts.
+ *
+ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+ * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+ * expects the VCPU to be on the blocked_vcpu_list that matches
+ * PI.NDST.
+ */
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+ vcpu->cpu == cpu) {
+ pi_clear_sn(pi_desc);
return;
+ }
+ /* The full case. */
do {
old.control = new.control = pi_desc->control;
- /*
- * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
- * are two possible cases:
- * 1. After running 'pre_block', context switch
- * happened. For this case, 'sn' was set in
- * vmx_vcpu_put(), so we need to clear it here.
- * 2. After running 'pre_block', we were blocked,
- * and woken up by some other guy. For this case,
- * we don't need to do anything, 'pi_post_block'
- * will do everything for us. However, we cannot
- * check whether it is case #1 or case #2 here
- * (maybe, not needed), so we also clear sn here,
- * I think it is not a big deal.
- */
- if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
- if (vcpu->cpu != cpu) {
- dest = cpu_physical_id(cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
- }
+ dest = cpu_physical_id(cpu);
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- }
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
- /* Allow posting non-urgent interrupts */
new.sn = 0;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
}
static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -5077,21 +5075,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
if (vcpu->mode == IN_GUEST_MODE) {
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
/*
- * Currently, we don't support urgent interrupt,
- * all interrupts are recognized as non-urgent
- * interrupt, so we cannot post interrupts when
- * 'SN' is set.
+ * The vector of interrupt to be delivered to vcpu had
+ * been set in PIR before this function.
+ *
+ * Following cases will be reached in this block, and
+ * we always send a notification event in all cases as
+ * explained below.
+ *
+ * Case 1: vcpu keeps in non-root mode. Sending a
+ * notification event posts the interrupt to vcpu.
*
- * If the vcpu is in guest mode, it means it is
- * running instead of being scheduled out and
- * waiting in the run queue, and that's the only
- * case when 'SN' is set currently, warning if
- * 'SN' is set.
+ * Case 2: vcpu exits to root mode and is still
+ * runnable. PIR will be synced to vIRR before the
+ * next vcpu entry. Sending a notification event in
+ * this case has no effect, as vcpu is not in root
+ * mode.
+ *
+ * Case 3: vcpu exits to root mode and is blocked.
+ * vcpu_block() has already synced PIR to vIRR and
+ * never blocks vcpu if vIRR is not cleared. Therefore,
+ * a blocked vcpu here does not wait for any requested
+ * interrupts in PIR, and sending a notification event
+ * which has no effect is safe here.
*/
- WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
return true;
@@ -5169,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
*/
cr3 = __read_cr3();
vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */
- vmx->host_state.vmcs_host_cr3 = cr3;
+ vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
/* Save the most likely value for this task's CR4 in the VMCS. */
cr4 = cr4_read_shadow();
vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */
- vmx->host_state.vmcs_host_cr4 = cr4;
+ vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */
#ifdef CONFIG_X86_64
@@ -9036,7 +9043,6 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
{
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- register void *__sp asm(_ASM_SP);
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
@@ -9065,7 +9071,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
[sp]"=&r"(tmp),
#endif
- "+r"(__sp)
+ ASM_CALL_CONSTRAINT
:
[entry]"r"(entry),
[ss]"i"(__KERNEL_DS),
@@ -9265,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
cr3 = __get_current_cr3_fast();
- if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+ if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
vmcs_writel(HOST_CR3, cr3);
- vmx->host_state.vmcs_host_cr3 = cr3;
+ vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
}
cr4 = cr4_read_shadow();
- if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+ if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
vmcs_writel(HOST_CR4, cr4);
- vmx->host_state.vmcs_host_cr4 = cr4;
+ vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
}
/* When single-stepping over STI and MOV SS, we must clear the
@@ -9583,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+ /*
+ * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+ * or POSTED_INTR_WAKEUP_VECTOR.
+ */
+ vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+ vmx->pi_desc.sn = 1;
+
return &vmx->vcpu;
free_vmcs:
@@ -9831,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
WARN_ON(!is_guest_mode(vcpu));
- if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
+ if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
+ !to_vmx(vcpu)->nested.nested_run_pending) {
vmcs12->vm_exit_intr_error_code = fault->error_code;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
@@ -11283,7 +11297,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
/* Same as above - no reason to call set_cr4_guest_host_mask(). */
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
- kvm_set_cr4(vcpu, vmcs12->host_cr4);
+ vmx_set_cr4(vcpu, vmcs12->host_cr4);
nested_ept_uninit_mmu_context(vcpu);
@@ -11696,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old, new;
+ unsigned int dest;
+
+ do {
+ old.control = new.control = pi_desc->control;
+ WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+ "Wakeup handler not enabled while the VCPU is blocked\n");
+
+ dest = cpu_physical_id(vcpu->cpu);
+
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ /* set 'NV' to 'notification vector' */
+ new.nv = POSTED_INTR_VECTOR;
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
+
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_del(&vcpu->blocked_vcpu_list);
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ vcpu->pre_pcpu = -1;
+ }
+}
+
/*
* This routine does the following things for vCPU which is going
* to be blocked if VT-d PI is enabled.
@@ -11711,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
*/
static int pi_pre_block(struct kvm_vcpu *vcpu)
{
- unsigned long flags;
unsigned int dest;
struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11721,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
!kvm_vcpu_apicv_active(vcpu))
return 0;
- vcpu->pre_pcpu = vcpu->cpu;
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_add_tail(&vcpu->blocked_vcpu_list,
- &per_cpu(blocked_vcpu_on_cpu,
- vcpu->pre_pcpu));
- spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+ vcpu->pre_pcpu = vcpu->cpu;
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu,
+ vcpu->pre_pcpu));
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ }
do {
old.control = new.control = pi_desc->control;
- /*
- * We should not block the vCPU if
- * an interrupt is posted for it.
- */
- if (pi_test_on(pi_desc) == 1) {
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
-
- return 1;
- }
-
WARN((pi_desc->sn == 1),
"Warning: SN field of posted-interrupts "
"is set before blocking\n");
@@ -11770,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
- return 0;
+ /* We should not block the vCPU if an interrupt is posted for it. */
+ if (pi_test_on(pi_desc) == 1)
+ __pi_post_block(vcpu);
+
+ local_irq_enable();
+ return (vcpu->pre_pcpu == -1);
}
static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11789,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
static void pi_post_block(struct kvm_vcpu *vcpu)
{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
- unsigned long flags;
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ if (vcpu->pre_pcpu == -1)
return;
- do {
- old.control = new.control = pi_desc->control;
-
- dest = cpu_physical_id(vcpu->cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* Allow posting non-urgent interrupts */
- new.sn = 0;
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
-
- if(vcpu->pre_pcpu != -1) {
- spin_lock_irqsave(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
- }
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ __pi_post_block(vcpu);
+ local_irq_enable();
}
static void vmx_post_block(struct kvm_vcpu *vcpu)
@@ -11911,12 +11915,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
if (set)
ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
- else {
- /* suppress notification event before unposting */
- pi_set_sn(vcpu_to_pi_desc(vcpu));
+ else
ret = irq_set_vcpu_affinity(host_irq, NULL);
- pi_clear_sn(vcpu_to_pi_desc(vcpu));
- }
if (ret < 0) {
printk(KERN_INFO "%s: failed to update PI IRTE\n",
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd17b7d9a107..03869eb7fcd6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int r;
sigset_t sigsaved;
- fpu__activate_curr(fpu);
+ fpu__initialize(fpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index d4a7df2205b8..220638a4cb94 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
struct desc_struct code_descriptor;
struct fpu *fpu = &current->thread.fpu;
- fpu__activate_curr(fpu);
+ fpu__initialize(fpu);
#ifdef RE_ENTRANT_CHECKING
if (emulating) {
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 72bf8c01c6e3..e1f095884386 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,12 @@
-# Kernel does not boot with instrumentation of tlb.c.
-KCOV_INSTRUMENT_tlb.o := n
+# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_mem_encrypt.o := n
+
+KASAN_SANITIZE_mem_encrypt.o := n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_mem_encrypt.o = -pg
+endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
pat.o pgtable.o physaddr.o setup_nx.o tlb.o
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index c076f710de4c..c3521e2be396 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -2,6 +2,7 @@
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
+#include <asm/fpu/internal.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL_GPL(ex_handler_refcount);
+/*
+ * Handler for when we fail to restore a task's FPU state. We should never get
+ * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * should always be valid. However, past bugs have allowed userspace to set
+ * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
+ * These caused XRSTOR to fail when switching to the task, leaking the FPU
+ * registers of the task previously executing on the CPU. Mitigate this class
+ * of vulnerability by restoring from the initial state (essentially, zeroing
+ * out all the FPU registers) if we can't restore from the task's FPU state.
+ */
+bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr)
+{
+ regs->ip = ex_fixup_addr(fixup);
+
+ WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
+ (void *)instruction_pointer(regs));
+
+ __copy_kernel_to_fpregs(&init_fpstate, -1);
+ return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+
bool ex_handler_ext(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr)
{
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b836a7274e12..e2baeaa053a5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
- struct vm_area_struct *vma)
+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* valid VMA, so we should never reach this without a
* valid VMA.
*/
- if (!vma) {
+ if (!pkey) {
WARN_ONCE(1, "PKU fault with no VMA passed in");
info->si_pkey = 0;
return;
@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* absolutely guranteed to be 100% accurate because of
* the race explained above.
*/
- info->si_pkey = vma_pkey(vma);
+ info->si_pkey = *pkey;
}
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk, struct vm_area_struct *vma,
- int fault)
+ struct task_struct *tsk, u32 *pkey, int fault)
{
unsigned lsb = 0;
siginfo_t info;
@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, vma);
+ fill_sig_info_pkey(si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
@@ -762,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
struct task_struct *tsk = current;
unsigned long flags;
int sig;
- /* No context means no VMA to pass down */
- struct vm_area_struct *vma = NULL;
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -788,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
/* XXX: hwpoison faults will set the wrong code. */
force_sig_info_fault(signal, si_code, address,
- tsk, vma, 0);
+ tsk, NULL, 0);
}
/*
@@ -806,7 +802,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
- register void *__sp asm("rsp");
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
/*
* We're likely to be running with very little stack space
@@ -821,7 +816,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
asm volatile ("movq %[stack], %%rsp\n\t"
"call handle_stack_overflow\n\t"
"1: jmp 1b"
- : "+r" (__sp)
+ : ASM_CALL_CONSTRAINT
: "D" ("kernel stack overflow (page fault)"),
"S" (regs), "d" (address),
[stack] "rm" (stack));
@@ -897,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- int si_code)
+ unsigned long address, u32 *pkey, int si_code)
{
struct task_struct *tsk = current;
@@ -946,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_PF;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
return;
}
@@ -959,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
static noinline void
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, u32 *pkey)
{
- __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
+ __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
}
static void
@@ -969,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct vm_area_struct *vma, int si_code)
{
struct mm_struct *mm = current->mm;
+ u32 pkey;
+
+ if (vma)
+ pkey = vma_pkey(vma);
/*
* Something tried to access memory that isn't in our memory map..
@@ -976,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
*/
up_read(&mm->mmap_sem);
- __bad_area_nosemaphore(regs, error_code, address, vma, si_code);
+ __bad_area_nosemaphore(regs, error_code, address,
+ (vma) ? &pkey : NULL, si_code);
}
static noinline void
@@ -1019,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
- struct vm_area_struct *vma, unsigned int fault)
+ u32 *pkey, unsigned int fault)
{
struct task_struct *tsk = current;
int code = BUS_ADRERR;
@@ -1046,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
code = BUS_MCEERR_AR;
}
#endif
- force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
+ force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
}
static noinline void
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- unsigned int fault)
+ unsigned long address, u32 *pkey, unsigned int fault)
{
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
no_context(regs, error_code, address, 0, 0);
@@ -1076,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
VM_FAULT_HWPOISON_LARGE))
- do_sigbus(regs, error_code, address, vma, fault);
+ do_sigbus(regs, error_code, address, pkey, fault);
else if (fault & VM_FAULT_SIGSEGV)
- bad_area_nosemaphore(regs, error_code, address, vma);
+ bad_area_nosemaphore(regs, error_code, address, pkey);
else
BUG();
}
@@ -1268,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm;
int fault, major = 0;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ u32 pkey;
tsk = current;
mm = tsk->mm;
@@ -1468,9 +1467,10 @@ good_area:
return;
}
+ pkey = vma_pkey(vma);
up_read(&mm->mmap_sem);
if (unlikely(fault & VM_FAULT_ERROR)) {
- mm_fault_error(regs, error_code, address, vma, fault);
+ mm_fault_error(regs, error_code, address, &pkey, fault);
return;
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 3fcc8e01683b..16c5f37933a2 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/
+#define DISABLE_BRANCH_PROFILING
+
#include <linux/linkage.h>
#include <linux/init.h>
#include <linux/mm.h>
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 2dab69a706ec..d7bc0eea20a5 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,7 +18,6 @@
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/mmu_context.h> /* vma_pkey() */
-#include <asm/fpu/internal.h> /* fpregs_active() */
int __execute_only_pkey(struct mm_struct *mm)
{
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
*/
preempt_disable();
if (!need_to_set_mm_pkey &&
- fpregs_active() &&
+ current->thread.fpu.initialized &&
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
preempt_enable();
return execute_only_pkey;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 1ab3821f9e26..658bf0090565 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -30,6 +30,8 @@
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
+DEFINE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
+
static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
u16 *new_asid, bool *need_flush)
{
@@ -80,7 +82,7 @@ void leave_mm(int cpu)
return;
/* Warn if we're not lazy. */
- WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm)));
+ WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
switch_mm(NULL, &init_mm, NULL);
}
@@ -126,8 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* isn't free.
*/
#ifdef CONFIG_DEBUG_VM
- if (WARN_ON_ONCE(__read_cr3() !=
- (__sme_pa(real_prev->pgd) | prev_asid))) {
+ if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
/*
* If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace.
@@ -143,45 +144,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
__flush_tlb_all();
}
#endif
+ this_cpu_write(cpu_tlbstate.is_lazy, false);
if (real_prev == next) {
VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);
- if (cpumask_test_cpu(cpu, mm_cpumask(next))) {
- /*
- * There's nothing to do: we weren't lazy, and we
- * aren't changing our mm. We don't need to flush
- * anything, nor do we need to update CR3, CR4, or
- * LDTR.
- */
- return;
- }
-
- /* Resume remote flushes and then read tlb_gen. */
- cpumask_set_cpu(cpu, mm_cpumask(next));
- next_tlb_gen = atomic64_read(&next->context.tlb_gen);
-
- if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) <
- next_tlb_gen) {
- /*
- * Ideally, we'd have a flush_tlb() variant that
- * takes the known CR3 value as input. This would
- * be faster on Xen PV and on hypothetical CPUs
- * on which INVPCID is fast.
- */
- this_cpu_write(cpu_tlbstate.ctxs[prev_asid].tlb_gen,
- next_tlb_gen);
- write_cr3(__sme_pa(next->pgd) | prev_asid);
- trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
- TLB_FLUSH_ALL);
- }
-
/*
- * We just exited lazy mode, which means that CR4 and/or LDTR
- * may be stale. (Changes to the required CR4 and LDTR states
- * are not reflected in tlb_gen.)
+ * We don't currently support having a real mm loaded without
+ * our cpu set in mm_cpumask(). We have all the bookkeeping
+ * in place to figure out whether we would need to flush
+ * if our cpu were cleared in mm_cpumask(), but we don't
+ * currently use it.
*/
+ if (WARN_ON_ONCE(real_prev != &init_mm &&
+ !cpumask_test_cpu(cpu, mm_cpumask(next))))
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+ return;
} else {
u16 new_asid;
bool need_flush;
@@ -192,7 +172,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* mapped in the new pgd, we'll double-fault. Forcibly
* map it.
*/
- unsigned int index = pgd_index(current_stack_pointer());
+ unsigned int index = pgd_index(current_stack_pointer);
pgd_t *pgd = next->pgd + index;
if (unlikely(pgd_none(*pgd)))
@@ -200,10 +180,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
}
/* Stop remote flushes for the previous mm */
- if (cpumask_test_cpu(cpu, mm_cpumask(real_prev)))
- cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
-
- VM_WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
+ real_prev != &init_mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
/*
* Start remote flushes and then read tlb_gen.
@@ -216,12 +195,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
- write_cr3(__sme_pa(next->pgd) | new_asid);
+ write_cr3(build_cr3(next, new_asid));
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH,
TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
- write_cr3(__sme_pa(next->pgd) | new_asid | CR3_NOFLUSH);
+ write_cr3(build_cr3_noflush(next, new_asid));
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
@@ -234,6 +213,37 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
}
/*
+ * enter_lazy_tlb() is a hint from the scheduler that we are entering a
+ * kernel thread or other context without an mm. Acceptable implementations
+ * include doing nothing whatsoever, switching to init_mm, or various clever
+ * lazy tricks to try to minimize TLB flushes.
+ *
+ * The scheduler reserves the right to call enter_lazy_tlb() several times
+ * in a row. It will notify us that we're going back to a real mm by
+ * calling switch_mm_irqs_off().
+ */
+void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+ if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
+ return;
+
+ if (static_branch_unlikely(&tlb_use_lazy_mode)) {
+ /*
+ * There's a significant optimization that may be possible
+ * here. We have accurate enough TLB flush tracking that we
+ * don't need to maintain coherence of TLB per se when we're
+ * lazy. We do, however, need to maintain coherence of
+ * paging-structure caches. We could, in principle, leave our
+ * old mm loaded and only switch to init_mm when
+ * tlb_remove_page() happens.
+ */
+ this_cpu_write(cpu_tlbstate.is_lazy, true);
+ } else {
+ switch_mm(NULL, &init_mm, NULL);
+ }
+}
+
+/*
* Call this when reinitializing a CPU. It fixes the following potential
* problems:
*
@@ -265,7 +275,7 @@ void initialize_tlbstate_and_flush(void)
!(cr4_read_shadow() & X86_CR4_PCIDE));
/* Force ASID 0 and force a TLB flush. */
- write_cr3(cr3 & ~CR3_PCID_MASK);
+ write_cr3(build_cr3(mm, 0));
/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -304,16 +314,20 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
/* This code cannot presently handle being reentered. */
VM_WARN_ON(!irqs_disabled());
+ if (unlikely(loaded_mm == &init_mm))
+ return;
+
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
loaded_mm->context.ctx_id);
- if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(loaded_mm))) {
+ if (this_cpu_read(cpu_tlbstate.is_lazy)) {
/*
- * We're in lazy mode -- don't flush. We can get here on
- * remote flushes due to races and on local flushes if a
- * kernel thread coincidentally flushes the mm it's lazily
- * still using.
+ * We're in lazy mode. We need to at least flush our
+ * paging-structure cache to avoid speculatively reading
+ * garbage into our TLB. Since switching to init_mm is barely
+ * slower than a minimal flush, just switch to init_mm.
*/
+ switch_mm_irqs_off(NULL, &init_mm, NULL);
return;
}
@@ -612,3 +626,57 @@ static int __init create_tlb_single_page_flush_ceiling(void)
return 0;
}
late_initcall(create_tlb_single_page_flush_ceiling);
+
+static ssize_t tlblazy_read_file(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buf[2];
+
+ buf[0] = static_branch_likely(&tlb_use_lazy_mode) ? '1' : '0';
+ buf[1] = '\n';
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t tlblazy_write_file(struct file *file,
+ const char __user *user_buf, size_t count, loff_t *ppos)
+{
+ bool val;
+
+ if (kstrtobool_from_user(user_buf, count, &val))
+ return -EINVAL;
+
+ if (val)
+ static_branch_enable(&tlb_use_lazy_mode);
+ else
+ static_branch_disable(&tlb_use_lazy_mode);
+
+ return count;
+}
+
+static const struct file_operations fops_tlblazy = {
+ .read = tlblazy_read_file,
+ .write = tlblazy_write_file,
+ .llseek = default_llseek,
+};
+
+static int __init init_tlb_use_lazy_mode(void)
+{
+ if (boot_cpu_has(X86_FEATURE_PCID)) {
+ /*
+ * Heuristic: with PCID on, switching to and from
+ * init_mm is reasonably fast, but remote flush IPIs
+ * as expensive as ever, so turn off lazy TLB mode.
+ *
+ * We can't do this in setup_pcid() because static keys
+ * haven't been initialized yet, and it would blow up
+ * badly.
+ */
+ static_branch_disable(&tlb_use_lazy_mode);
+ }
+
+ debugfs_create_file("tlb_use_lazy_mode", S_IRUSR | S_IWUSR,
+ arch_debugfs_dir, NULL, &fops_tlblazy);
+ return 0;
+}
+late_initcall(init_tlb_use_lazy_mode);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8c9573660d51..0554e8aef4d5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog)
/* if (index >= array->map.max_entries)
* goto out;
*/
- EMIT4(0x48, 0x8B, 0x46, /* mov rax, qword ptr [rsi + 16] */
+ EMIT2(0x89, 0xD2); /* mov edx, edx */
+ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
- EMIT3(0x48, 0x39, 0xD0); /* cmp rax, rdx */
#define OFFSET1 43 /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0e7ef69e8531..d669e9d89001 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -93,11 +93,11 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
int rc;
rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE,
- "x86/xen/hvm_guest:prepare",
+ "x86/xen/guest:prepare",
cpu_up_prepare_cb, cpu_dead_cb);
if (rc >= 0) {
rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
- "x86/xen/hvm_guest:online",
+ "x86/xen/guest:online",
xen_cpu_up_online, NULL);
if (rc < 0)
cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE);
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 509f560bd0c6..71495f1a86d7 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1238,21 +1238,16 @@ static void __init xen_pagetable_cleanhighmap(void)
* from _brk_limit way up to the max_pfn_mapped (which is the end of
* the ramdisk). We continue on, erasing PMD entries that point to page
* tables - do note that they are accessible at this stage via __va.
- * For good measure we also round up to the PMD - which means that if
+ * As Xen is aligning the memory end to a 4MB boundary, for good
+ * measure we also round up to PMD_SIZE * 2 - which means that if
* anybody is using __ka address to the initial boot-stack - and try
* to use it - they are going to crash. The xen_start_info has been
* taken care of already in xen_setup_kernel_pagetable. */
addr = xen_start_info->pt_base;
- size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+ size = xen_start_info->nr_pt_frames * PAGE_SIZE;
- xen_cleanhighmap(addr, addr + size);
+ xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
-#ifdef DEBUG
- /* This is superfluous and is not necessary, but you know what
- * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
- * anything at this stage. */
- xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
-#endif
}
#endif
@@ -2220,7 +2215,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
* not the first page table in the page table pool.
* Iterate through the initial page tables to find the real page table base.
*/
-static phys_addr_t xen_find_pt_base(pmd_t *pmd)
+static phys_addr_t __init xen_find_pt_base(pmd_t *pmd)
{
phys_addr_t pt_base, paddr;
unsigned pmdidx;