diff options
Diffstat (limited to 'arch/x86/include')
60 files changed, 663 insertions, 299 deletions
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 65064d9f7fa6..8eb74cf386db 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -14,6 +14,7 @@ #include <asm/mmu.h> #include <asm/mpspec.h> #include <asm/x86_init.h> +#include <asm/cpufeature.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -63,6 +64,13 @@ extern int (*acpi_suspend_lowlevel)(void); /* Physical address to resume after wakeup */ unsigned long acpi_get_wakeup_address(void); +static inline bool acpi_skip_set_wakeup_address(void) +{ + return cpu_feature_enabled(X86_FEATURE_XENPV); +} + +#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address + /* * Check if the CPU can handle C2 and deeper */ diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h index cd7b14322035..c8c111d8fbd7 100644 --- a/arch/x86/include/asm/agp.h +++ b/arch/x86/include/asm/agp.h @@ -23,10 +23,4 @@ */ #define flush_agp_cache() wbinvd() -/* GATT allocation. Returns/accepts GATT kernel virtual address. */ -#define alloc_gatt_pages(order) \ - ((char *)__get_free_pages(GFP_KERNEL, (order))) -#define free_gatt_pages(table, order) \ - free_pages((unsigned long)(table), (order)) - #endif /* _ASM_X86_AGP_H */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 7659217f4d49..e2975a32d443 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -6,8 +6,10 @@ #include <linux/stringify.h> #include <asm/asm.h> -#define ALTINSTR_FLAG_INV (1 << 15) -#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV) +#define ALT_FLAGS_SHIFT 16 + +#define ALT_FLAG_NOT BIT(0) +#define ALT_NOT(feature) ((ALT_FLAG_NOT << ALT_FLAGS_SHIFT) | (feature)) #ifndef __ASSEMBLY__ @@ -59,10 +61,27 @@ ".long 999b - .\n\t" \ ".popsection\n\t" +/* + * The patching flags are part of the upper bits of the @ft_flags parameter when + * specifying them. The split is currently like this: + * + * [31... flags ...16][15... CPUID feature bit ...0] + * + * but since this is all hidden in the macros argument being split, those fields can be + * extended in the future to fit in a u64 or however the need arises. + */ struct alt_instr { s32 instr_offset; /* original instruction */ s32 repl_offset; /* offset to replacement instruction */ - u16 cpuid; /* cpuid bit set for replacement */ + + union { + struct { + u32 cpuid: 16; /* CPUID bit set for replacement */ + u32 flags: 16; /* patching control flags */ + }; + u32 ft_flags; + }; + u8 instrlen; /* length of original instruction */ u8 replacementlen; /* length of new instruction */ } __packed; @@ -182,10 +201,10 @@ static inline int alternatives_text_reserved(void *start, void *end) " - (" alt_slen ")), 0x90\n" \ alt_end_marker ":\n" -#define ALTINSTR_ENTRY(feature, num) \ +#define ALTINSTR_ENTRY(ft_flags, num) \ " .long 661b - .\n" /* label */ \ " .long " b_replacement(num)"f - .\n" /* new instruction */ \ - " .word " __stringify(feature) "\n" /* feature bit */ \ + " .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \ " .byte " alt_total_slen "\n" /* source len */ \ " .byte " alt_rlen(num) "\n" /* replacement len */ @@ -194,20 +213,20 @@ static inline int alternatives_text_reserved(void *start, void *end) b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n" /* alternative assembly primitive: */ -#define ALTERNATIVE(oldinstr, newinstr, feature) \ +#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \ OLDINSTR(oldinstr, 1) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature, 1) \ + ALTINSTR_ENTRY(ft_flags, 1) \ ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, 1) \ ".popsection\n" -#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ +#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \ OLDINSTR_2(oldinstr, 1, 2) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature1, 1) \ - ALTINSTR_ENTRY(feature2, 2) \ + ALTINSTR_ENTRY(ft_flags1, 1) \ + ALTINSTR_ENTRY(ft_flags2, 2) \ ".popsection\n" \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, 1) \ @@ -215,21 +234,22 @@ static inline int alternatives_text_reserved(void *start, void *end) ".popsection\n" /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ -#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ +#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ - newinstr_yes, feature) - -#define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \ - OLDINSTR_3(oldinsn, 1, 2, 3) \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feat1, 1) \ - ALTINSTR_ENTRY(feat2, 2) \ - ALTINSTR_ENTRY(feat3, 3) \ - ".popsection\n" \ - ".pushsection .altinstr_replacement, \"ax\"\n" \ - ALTINSTR_REPLACEMENT(newinsn1, 1) \ - ALTINSTR_REPLACEMENT(newinsn2, 2) \ - ALTINSTR_REPLACEMENT(newinsn3, 3) \ + newinstr_yes, ft_flags) + +#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \ + newinsn3, ft_flags3) \ + OLDINSTR_3(oldinsn, 1, 2, 3) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(ft_flags1, 1) \ + ALTINSTR_ENTRY(ft_flags2, 2) \ + ALTINSTR_ENTRY(ft_flags3, 3) \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinsn1, 1) \ + ALTINSTR_REPLACEMENT(newinsn2, 2) \ + ALTINSTR_REPLACEMENT(newinsn3, 3) \ ".popsection\n" /* @@ -244,14 +264,14 @@ static inline int alternatives_text_reserved(void *start, void *end) * For non barrier like inlines please define new variants * without volatile and memory clobber. */ -#define alternative(oldinstr, newinstr, feature) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") +#define alternative(oldinstr, newinstr, ft_flags) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory") -#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory") +#define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory") -#define alternative_ternary(oldinstr, feature, newinstr_yes, newinstr_no) \ - asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) ::: "memory") +#define alternative_ternary(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ + asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) ::: "memory") /* * Alternative inline assembly with input. @@ -261,8 +281,8 @@ static inline int alternatives_text_reserved(void *start, void *end) * Argument numbers start with 1. * Leaving an unused argument 0 to keep API compatibility. */ -#define alternative_input(oldinstr, newinstr, feature, input...) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ +#define alternative_input(oldinstr, newinstr, ft_flags, input...) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : : "i" (0), ## input) /* @@ -273,20 +293,20 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, newinstr1 is used. * Otherwise, oldinstr is used. */ -#define alternative_input_2(oldinstr, newinstr1, feature1, newinstr2, \ - feature2, input...) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, \ - newinstr2, feature2) \ +#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ + ft_flags2, input...) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ + newinstr2, ft_flags2) \ : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ +#define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \ + asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \ : output : "i" (0), ## input) /* Like alternative_io, but for replacing a direct call with another one. */ -#define alternative_call(oldfunc, newfunc, feature, output, input...) \ - asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \ +#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ + asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", ft_flags) \ : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) /* @@ -295,10 +315,10 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, function1 is used. * Otherwise, old function is used. */ -#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ +#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ output, input...) \ - asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ - "call %P[new2]", feature2) \ + asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", ft_flags1,\ + "call %P[new2]", ft_flags2) \ : output, ASM_CALL_CONSTRAINT \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ [new2] "i" (newfunc2), ## input) @@ -347,10 +367,10 @@ static inline int alternatives_text_reserved(void *start, void *end) * enough information for the alternatives patching code to patch an * instruction. See apply_alternatives(). */ -.macro altinstruction_entry orig alt feature orig_len alt_len +.macro altinstr_entry orig alt ft_flags orig_len alt_len .long \orig - . .long \alt - . - .word \feature + .4byte \ft_flags .byte \orig_len .byte \alt_len .endm @@ -361,7 +381,7 @@ static inline int alternatives_text_reserved(void *start, void *end) * @newinstr. ".skip" directive takes care of proper instruction padding * in case @newinstr is longer than @oldinstr. */ -.macro ALTERNATIVE oldinstr, newinstr, feature +.macro ALTERNATIVE oldinstr, newinstr, ft_flags 140: \oldinstr 141: @@ -369,7 +389,7 @@ static inline int alternatives_text_reserved(void *start, void *end) 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f + altinstr_entry 140b,143f,\ft_flags,142b-140b,144f-143f .popsection .pushsection .altinstr_replacement,"ax" @@ -399,7 +419,7 @@ static inline int alternatives_text_reserved(void *start, void *end) * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has * @feature2, it replaces @oldinstr with @feature2. */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 +.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2 140: \oldinstr 141: @@ -408,8 +428,8 @@ static inline int alternatives_text_reserved(void *start, void *end) 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f + altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f + altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f .popsection .pushsection .altinstr_replacement,"ax" @@ -421,7 +441,7 @@ static inline int alternatives_text_reserved(void *start, void *end) .popsection .endm -.macro ALTERNATIVE_3 oldinstr, newinstr1, feature1, newinstr2, feature2, newinstr3, feature3 +.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3 140: \oldinstr 141: @@ -430,9 +450,9 @@ static inline int alternatives_text_reserved(void *start, void *end) 142: .pushsection .altinstructions,"a" - altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f - altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f - altinstruction_entry 140b,145f,\feature3,142b-140b,146f-145f + altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f + altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f + altinstr_entry 140b,145f,\ft_flags3,142b-140b,146f-145f .popsection .pushsection .altinstr_replacement,"ax" @@ -447,9 +467,9 @@ static inline int alternatives_text_reserved(void *start, void *end) .endm /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */ -#define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \ +#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \ ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \ - newinstr_yes, feature + newinstr_yes, ft_flags #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 8f80de627c60..b1a98fa38828 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -12,6 +12,7 @@ #include <asm/special_insns.h> #include <asm/preempt.h> #include <asm/asm.h> +#include <asm/gsseg.h> #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 5efd01b548d1..808b4eece251 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -71,7 +71,7 @@ ATOMIC64_DECL(add_unless); * the old value. */ -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { return arch_cmpxchg64(&v->counter, o, n); } @@ -85,7 +85,7 @@ static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) * Atomically xchgs the value of @v to @n and returns * the old value. */ -static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) +static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) { s64 o; unsigned high = (unsigned)(n >> 32); @@ -104,7 +104,7 @@ static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) * * Atomically sets the value of @v to @n. */ -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; @@ -119,7 +119,7 @@ static inline void arch_atomic64_set(atomic64_t *v, s64 i) * * Atomically reads the value of @v and returns it. */ -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { s64 r; alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); @@ -133,7 +133,7 @@ static inline s64 arch_atomic64_read(const atomic64_t *v) * * Atomically adds @i to @v and returns @i + *@v */ -static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -145,7 +145,7 @@ static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) /* * Other variants with different arithmetic operators: */ -static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -154,7 +154,7 @@ static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) } #define arch_atomic64_sub_return arch_atomic64_sub_return -static inline s64 arch_atomic64_inc_return(atomic64_t *v) +static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v) { s64 a; alternative_atomic64(inc_return, "=&A" (a), @@ -163,7 +163,7 @@ static inline s64 arch_atomic64_inc_return(atomic64_t *v) } #define arch_atomic64_inc_return arch_atomic64_inc_return -static inline s64 arch_atomic64_dec_return(atomic64_t *v) +static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v) { s64 a; alternative_atomic64(dec_return, "=&A" (a), @@ -179,7 +179,7 @@ static inline s64 arch_atomic64_dec_return(atomic64_t *v) * * Atomically adds @i to @v. */ -static inline s64 arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -194,7 +194,7 @@ static inline s64 arch_atomic64_add(s64 i, atomic64_t *v) * * Atomically subtracts @i from @v. */ -static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -208,7 +208,7 @@ static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) * * Atomically increments @v by 1. */ -static inline void arch_atomic64_inc(atomic64_t *v) +static __always_inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); @@ -221,7 +221,7 @@ static inline void arch_atomic64_inc(atomic64_t *v) * * Atomically decrements @v by 1. */ -static inline void arch_atomic64_dec(atomic64_t *v) +static __always_inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); @@ -237,7 +237,7 @@ static inline void arch_atomic64_dec(atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns non-zero if the add was done, zero otherwise. */ -static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) +static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); @@ -248,7 +248,7 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) } #define arch_atomic64_add_unless arch_atomic64_add_unless -static inline int arch_atomic64_inc_not_zero(atomic64_t *v) +static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; alternative_atomic64(inc_not_zero, "=&a" (r), @@ -257,7 +257,7 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v) } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero -static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) +static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { s64 r; alternative_atomic64(dec_if_positive, "=&A" (r), @@ -269,7 +269,7 @@ static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 -static inline void arch_atomic64_and(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -277,7 +277,7 @@ static inline void arch_atomic64_and(s64 i, atomic64_t *v) c = old; } -static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -288,7 +288,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_and arch_atomic64_fetch_and -static inline void arch_atomic64_or(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -296,7 +296,7 @@ static inline void arch_atomic64_or(s64 i, atomic64_t *v) c = old; } -static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -307,7 +307,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_or arch_atomic64_fetch_or -static inline void arch_atomic64_xor(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -315,7 +315,7 @@ static inline void arch_atomic64_xor(s64 i, atomic64_t *v) c = old; } -static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 old, c = 0; @@ -326,7 +326,7 @@ static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { s64 old, c = 0; diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 7886d0578fc9..c496595bf601 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -17,7 +17,7 @@ * Atomically reads the value of @v. * Doesn't imply a read memory barrier. */ -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __READ_ONCE((v)->counter); } @@ -29,7 +29,7 @@ static inline s64 arch_atomic64_read(const atomic64_t *v) * * Atomically sets the value of @v to @i. */ -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __WRITE_ONCE(v->counter, i); } @@ -55,7 +55,7 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) * * Atomically subtracts @i from @v. */ -static inline void arch_atomic64_sub(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) @@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(s64 i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) +static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } @@ -113,7 +113,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline bool arch_atomic64_dec_and_test(atomic64_t *v) +static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } @@ -127,7 +127,7 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline bool arch_atomic64_inc_and_test(atomic64_t *v) +static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } @@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) +static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } @@ -161,25 +161,25 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) } #define arch_atomic64_add_return arch_atomic64_add_return -static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { return arch_atomic64_add_return(-i, v); } #define arch_atomic64_sub_return arch_atomic64_sub_return -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return xadd(&v->counter, i); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add -static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) { return xadd(&v->counter, -i); } #define arch_atomic64_fetch_sub arch_atomic64_fetch_sub -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return arch_cmpxchg(&v->counter, old, new); } @@ -191,13 +191,13 @@ static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg -static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) +static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); } #define arch_atomic64_xchg arch_atomic64_xchg -static inline void arch_atomic64_and(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "andq %1,%0" : "+m" (v->counter) @@ -205,7 +205,7 @@ static inline void arch_atomic64_and(s64 i, atomic64_t *v) : "memory"); } -static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -215,7 +215,7 @@ static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_and arch_atomic64_fetch_and -static inline void arch_atomic64_or(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "orq %1,%0" : "+m" (v->counter) @@ -223,7 +223,7 @@ static inline void arch_atomic64_or(s64 i, atomic64_t *v) : "memory"); } -static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -233,7 +233,7 @@ static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) } #define arch_atomic64_fetch_or arch_atomic64_fetch_or -static inline void arch_atomic64_xor(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "xorq %1,%0" : "+m" (v->counter) @@ -241,7 +241,7 @@ static inline void arch_atomic64_xor(s64 i, atomic64_t *v) : "memory"); } -static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index 407beebadaf4..4d4a47a3a8ab 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -9,7 +9,6 @@ */ #include <linux/compiler.h> -#include <linux/uaccess.h> #include <asm/byteorder.h> /** diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1a85e1fb0922..ce0c8f7d3218 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -32,6 +32,7 @@ enum cpuid_leafs CPUID_8000_0007_EBX, CPUID_7_EDX, CPUID_8000_001F_EAX, + CPUID_8000_0021_EAX, }; #define X86_CAP_FMT_NUM "%d:%d" @@ -94,8 +95,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 20)) + BUILD_BUG_ON_ZERO(NCAPINTS != 21)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -118,8 +120,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 20)) + BUILD_BUG_ON_ZERO(NCAPINTS != 21)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 61012476d66e..73c9672c123b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 20 /* N 32-bit words worth of info */ +#define NCAPINTS 21 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -97,7 +97,7 @@ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ +/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ @@ -307,11 +307,18 @@ #define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ #define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ @@ -426,6 +433,13 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ + /* * BUG word(s) */ @@ -466,5 +480,6 @@ #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index b049d950612f..66eb5e1ac4fb 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -39,7 +39,20 @@ static __always_inline unsigned long native_get_debugreg(int regno) asm("mov %%db6, %0" :"=r" (val)); break; case 7: - asm("mov %%db7, %0" :"=r" (val)); + /* + * Apply __FORCE_ORDER to DR7 reads to forbid re-ordering them + * with other code. + * + * This is needed because a DR7 access can cause a #VC exception + * when running under SEV-ES. Taking a #VC exception is not a + * safe thing to do just anywhere in the entry code and + * re-ordering might place the access into an unsafe location. + * + * This happened in the NMI handler, where the DR7 read was + * re-ordered to happen before the call to sev_es_ist_enter(), + * causing stack recursion. + */ + asm volatile("mov %%db7, %0" : "=r" (val) : __FORCE_ORDER); break; default: BUG(); @@ -66,7 +79,16 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value) asm("mov %0, %%db6" ::"r" (value)); break; case 7: - asm("mov %0, %%db7" ::"r" (value)); + /* + * Apply __FORCE_ORDER to DR7 writes to forbid re-ordering them + * with other code. + * + * While is didn't happen with a DR7 write (see the DR7 read + * comment above which explains where it happened), add the + * __FORCE_ORDER here too to avoid similar problems in the + * future. + */ + asm volatile("mov %0, %%db7" ::"r" (value), __FORCE_ORDER); break; default: BUG(); @@ -126,9 +148,14 @@ static __always_inline void local_db_restore(unsigned long dr7) } #ifdef CONFIG_CPU_SUP_AMD -extern void set_dr_addr_mask(unsigned long mask, int dr); +extern void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr); +extern unsigned long amd_get_dr_addr_mask(unsigned int dr); #else -static inline void set_dr_addr_mask(unsigned long mask, int dr) { } +static inline void amd_set_dr_addr_mask(unsigned long mask, unsigned int dr) { } +static inline unsigned long amd_get_dr_addr_mask(unsigned int dr) +{ + return 0; +} #endif #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index c44b56f7ffba..5dfa4fb76f4b 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -124,6 +124,7 @@ #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 #define DISABLED_MASK19 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) +#define DISABLED_MASK20 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 1c66708e3062..d1dac96ee30b 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -4,7 +4,7 @@ extern const struct dma_map_ops *dma_ops; -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) +static inline const struct dma_map_ops *get_arch_dma_ops(void) { return dma_ops; } diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a63154e049d7..419280d263d2 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -106,6 +106,8 @@ static inline void efi_fpu_end(void) extern asmlinkage u64 __efi_call(void *fp, ...); +extern bool efi_disable_ibt_for_runtime; + #define efi_call(...) ({ \ __efi_nargs_check(efi_call, 7, __VA_ARGS__); \ __efi_call(__VA_ARGS__); \ @@ -121,7 +123,7 @@ extern asmlinkage u64 __efi_call(void *fp, ...); #undef arch_efi_call_virt #define arch_efi_call_virt(p, f, args...) ({ \ - u64 ret, ibt = ibt_save(); \ + u64 ret, ibt = ibt_save(efi_disable_ibt_for_runtime); \ ret = efi_call((void *)p->f, args); \ ibt_restore(ibt); \ ret; \ @@ -335,6 +337,16 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_open_volume(prot, file) \ ((prot), efi64_zero_upper(file)) +/* Memory Attribute Protocol */ +#define __efi64_argmap_get_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), (flags)) + +#define __efi64_argmap_set_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + +#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \ + ((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h index b2486b2cbc6e..c2d6cd78ed0c 100644 --- a/arch/x86/include/asm/fpu/sched.h +++ b/arch/x86/include/asm/fpu/sched.h @@ -39,7 +39,7 @@ extern void fpu_flush_thread(void); static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { if (cpu_feature_enabled(X86_FEATURE_FPU) && - !(current->flags & PF_KTHREAD)) { + !(current->flags & (PF_KTHREAD | PF_IO_WORKER))) { save_fpregs_to_fpstate(old_fpu); /* * The save operation preserved register state, so the diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index eb7cd1139d97..7f6d858ff47a 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -321,7 +321,7 @@ struct xstate_header { struct xregs_state { struct fxregs_state i387; struct xstate_header header; - u8 extended_state_area[0]; + u8 extended_state_area[]; } __attribute__ ((packed, aligned (64))); /* diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h index 9656a5bc6fea..9a710c060445 100644 --- a/arch/x86/include/asm/fpu/xcr.h +++ b/arch/x86/include/asm/fpu/xcr.h @@ -5,7 +5,7 @@ #define XCR_XFEATURE_ENABLED_MASK 0x00000000 #define XCR_XFEATURE_IN_USE_MASK 0x00000001 -static inline u64 xgetbv(u32 index) +static __always_inline u64 xgetbv(u32 index) { u32 eax, edx; @@ -27,7 +27,7 @@ static inline void xsetbv(u32 index, u64 value) * * Callers should check X86_FEATURE_XGETBV1. */ -static inline u64 xfeatures_in_use(void) +static __always_inline u64 xfeatures_in_use(void) { return xgetbv(XCR_XFEATURE_IN_USE_MASK); } diff --git a/arch/x86/include/asm/gsseg.h b/arch/x86/include/asm/gsseg.h new file mode 100644 index 000000000000..ab6a595cea70 --- /dev/null +++ b/arch/x86/include/asm/gsseg.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_X86_GSSEG_H +#define _ASM_X86_GSSEG_H + +#include <linux/types.h> + +#include <asm/asm.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> +#include <asm/processor.h> +#include <asm/nops.h> + +#ifdef CONFIG_X86_64 + +extern asmlinkage void asm_load_gs_index(u16 selector); + +/* Replace with "lkgs %di" once binutils support LKGS instruction */ +#define LKGS_DI _ASM_BYTES(0xf2,0x0f,0x00,0xf7) + +static inline void native_lkgs(unsigned int selector) +{ + u16 sel = selector; + asm_inline volatile("1: " LKGS_DI + _ASM_EXTABLE_TYPE_REG(1b, 1b, EX_TYPE_ZERO_REG, %k[sel]) + : [sel] "+D" (sel)); +} + +static inline void native_load_gs_index(unsigned int selector) +{ + if (cpu_feature_enabled(X86_FEATURE_LKGS)) { + native_lkgs(selector); + } else { + unsigned long flags; + + local_irq_save(flags); + asm_load_gs_index(selector); + local_irq_restore(flags); + } +} + +#endif /* CONFIG_X86_64 */ + +static inline void __init lkgs_init(void) +{ +#ifdef CONFIG_PARAVIRT_XXL +#ifdef CONFIG_X86_64 + if (cpu_feature_enabled(X86_FEATURE_LKGS)) + pv_ops.cpu.load_gs_index = native_lkgs; +#endif +#endif +} + +#ifndef CONFIG_PARAVIRT_XXL + +static inline void load_gs_index(unsigned int selector) +{ +#ifdef CONFIG_X86_64 + native_load_gs_index(selector); +#else + loadsegment(gs, selector); +#endif +} + +#endif /* CONFIG_PARAVIRT_XXL */ + +#endif /* _ASM_X86_GSSEG_H */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 08e822bd7aa6..0b73a809e9e1 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -116,6 +116,9 @@ /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) +/* Indicates that the hypervisor is nested within a Hyper-V partition. */ +#define HV_X64_HYPERV_NESTED BIT(12) + /* Recommend using enlightened VMCS */ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) @@ -225,6 +228,17 @@ enum hv_isolation_type { #define HV_REGISTER_SINT15 0x4000009F /* + * Define synthetic interrupt controller model specific registers for + * nested hypervisor. + */ +#define HV_REGISTER_NESTED_SCONTROL 0x40001080 +#define HV_REGISTER_NESTED_SVERSION 0x40001081 +#define HV_REGISTER_NESTED_SIEFP 0x40001082 +#define HV_REGISTER_NESTED_SIMP 0x40001083 +#define HV_REGISTER_NESTED_EOM 0x40001084 +#define HV_REGISTER_NESTED_SINT0 0x40001090 + +/* * Synthetic Timer MSRs. Four timers per vcpu. */ #define HV_REGISTER_STIMER0_CONFIG 0x400000B0 @@ -255,6 +269,9 @@ enum hv_isolation_type { /* TSC invariant control */ #define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118 +/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ +#define HV_EXPOSE_INVARIANT_TSC BIT_ULL(0) + /* Register name aliases for temporary compatibility */ #define HV_X64_MSR_STIMER0_COUNT HV_REGISTER_STIMER0_COUNT #define HV_X64_MSR_STIMER0_CONFIG HV_REGISTER_STIMER0_CONFIG @@ -368,7 +385,8 @@ struct hv_nested_enlightenments_control { __u32 reserved:31; } features; struct { - __u32 reserved; + __u32 inter_partition_comm:1; + __u32 reserved:31; } hypercallControls; } __packed; diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index 9b08082a5d9f..baae6b4fea23 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -74,7 +74,7 @@ static inline bool is_endbr(u32 val) return val == gen_endbr(); } -extern __noendbr u64 ibt_save(void); +extern __noendbr u64 ibt_save(bool disable); extern __noendbr void ibt_restore(u64 save); #else /* __ASSEMBLY__ */ @@ -100,7 +100,7 @@ extern __noendbr void ibt_restore(u64 save); static inline bool is_endbr(u32 val) { return false; } -static inline u64 ibt_save(void) { return 0; } +static inline u64 ibt_save(bool disable) { return 0; } static inline void ibt_restore(u64 save) { } #else /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 72184b0b2219..b241af4ce9b4 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -582,18 +582,14 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC, xenpv_exc_machine_check); /* NMI */ -#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL) +#if IS_ENABLED(CONFIG_KVM_INTEL) /* - * Special NOIST entry point for VMX which invokes this on the kernel - * stack. asm_exc_nmi() requires an IST to work correctly vs. the NMI - * 'executing' marker. - * - * On 32bit this just uses the regular NMI entry point because 32-bit does - * not have ISTs. + * Special entry point for VMX which invokes this on the kernel stack, even for + * 64-bit, i.e. without using an IST. asm_exc_nmi() requires an IST to work + * correctly vs. the NMI 'executing' marker. Used for 32-bit kernels as well + * to avoid more ifdeffery. */ -DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_noist); -#else -#define asm_exc_nmi_noist asm_exc_nmi +DECLARE_IDTENTRY(X86_TRAP_NMI, exc_nmi_kvm_vmx); #endif DECLARE_IDTENTRY_NMI(X86_TRAP_NMI, exc_nmi); diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 347707d459c6..cbaf174d8efd 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -123,6 +123,8 @@ #define INTEL_FAM6_METEORLAKE 0xAC #define INTEL_FAM6_METEORLAKE_L 0xAA +#define INTEL_FAM6_LUNARLAKE_M 0xBD + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 7793e52d6237..8c5ae649d2df 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -8,9 +8,6 @@ #include <asm/nospec-branch.h> -/* Provide __cpuidle; we can't safely include <linux/cpu.h> */ -#define __cpuidle __section(".cpuidle.text") - /* * Interrupt control: */ @@ -45,13 +42,13 @@ static __always_inline void native_irq_enable(void) asm volatile("sti": : :"memory"); } -static inline __cpuidle void native_safe_halt(void) +static __always_inline void native_safe_halt(void) { mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } -static inline __cpuidle void native_halt(void) +static __always_inline void native_halt(void) { mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); @@ -84,7 +81,7 @@ static __always_inline void arch_local_irq_enable(void) * Used in the idle loop; sti takes one instruction cycle * to complete: */ -static inline __cpuidle void arch_safe_halt(void) +static __always_inline void arch_safe_halt(void) { native_safe_halt(); } @@ -93,7 +90,7 @@ static inline __cpuidle void arch_safe_halt(void) * Used when interrupts are already enabled or to * shutdown the processor: */ -static inline __cpuidle void halt(void) +static __always_inline void halt(void) { native_halt(); } diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index abccd51dcfca..8dc345cc6318 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -14,6 +14,7 @@ BUILD_BUG_ON(1) * to make a definition optional, but in this case the default will * be __static_call_return0. */ +KVM_X86_OP(check_processor_compatibility) KVM_X86_OP(hardware_enable) KVM_X86_OP(hardware_disable) KVM_X86_OP(hardware_unsetup) @@ -76,7 +77,6 @@ KVM_X86_OP(set_nmi_mask) KVM_X86_OP(enable_nmi_window) KVM_X86_OP(enable_irq_window) KVM_X86_OP_OPTIONAL(update_cr8_intercept) -KVM_X86_OP(check_apicv_inhibit_reasons) KVM_X86_OP(refresh_apicv_exec_ctrl) KVM_X86_OP_OPTIONAL(hwapic_irr_update) KVM_X86_OP_OPTIONAL(hwapic_isr_update) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6aaae18f1854..808c292ad3f4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -134,8 +134,6 @@ #define INVALID_PAGE (~(hpa_t)0) #define VALID_PAGE(x) ((x) != INVALID_PAGE) -#define INVALID_GPA (~(gpa_t)0) - /* KVM Hugepage definitions for x86 */ #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G #define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) @@ -514,6 +512,7 @@ struct kvm_pmc { #define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1) #define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1) #define KVM_PMC_MAX_FIXED 3 +#define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1) #define KVM_AMD_PMC_MAX_GENERIC 6 struct kvm_pmu { unsigned nr_arch_gp_counters; @@ -678,6 +677,11 @@ struct kvm_vcpu_hv { } nested; }; +struct kvm_hypervisor_cpuid { + u32 base; + u32 limit; +}; + /* Xen HVM per vcpu emulation context */ struct kvm_vcpu_xen { u64 hypercall_rip; @@ -698,6 +702,7 @@ struct kvm_vcpu_xen { struct hrtimer timer; int poll_evtchn; struct timer_list poll_timer; + struct kvm_hypervisor_cpuid cpuid; }; struct kvm_queued_exception { @@ -826,7 +831,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; - u32 kvm_cpuid_base; + struct kvm_hypervisor_cpuid kvm_cpuid; u64 reserved_gpa_bits; int maxphyaddr; @@ -1022,19 +1027,30 @@ struct kvm_arch_memory_slot { }; /* - * We use as the mode the number of bits allocated in the LDR for the - * logical processor ID. It happens that these are all powers of two. - * This makes it is very easy to detect cases where the APICs are - * configured for multiple modes; in that case, we cannot use the map and - * hence cannot use kvm_irq_delivery_to_apic_fast either. + * Track the mode of the optimized logical map, as the rules for decoding the + * destination vary per mode. Enabling the optimized logical map requires all + * software-enabled local APIs to be in the same mode, each addressable APIC to + * be mapped to only one MDA, and each MDA to map to at most one APIC. */ -#define KVM_APIC_MODE_XAPIC_CLUSTER 4 -#define KVM_APIC_MODE_XAPIC_FLAT 8 -#define KVM_APIC_MODE_X2APIC 16 +enum kvm_apic_logical_mode { + /* All local APICs are software disabled. */ + KVM_APIC_MODE_SW_DISABLED, + /* All software enabled local APICs in xAPIC cluster addressing mode. */ + KVM_APIC_MODE_XAPIC_CLUSTER, + /* All software enabled local APICs in xAPIC flat addressing mode. */ + KVM_APIC_MODE_XAPIC_FLAT, + /* All software enabled local APICs in x2APIC mode. */ + KVM_APIC_MODE_X2APIC, + /* + * Optimized map disabled, e.g. not all local APICs in the same logical + * mode, same logical ID assigned to multiple APICs, etc. + */ + KVM_APIC_MODE_MAP_DISABLED, +}; struct kvm_apic_map { struct rcu_head rcu; - u8 mode; + enum kvm_apic_logical_mode logical_mode; u32 max_apic_id; union { struct kvm_lapic *xapic_flat_map[8]; @@ -1088,6 +1104,7 @@ struct kvm_hv { u64 hv_reenlightenment_control; u64 hv_tsc_emulation_control; u64 hv_tsc_emulation_status; + u64 hv_invtsc_control; /* How many vCPUs have VP index != vCPU index */ atomic_t num_mismatched_vp_indexes; @@ -1133,6 +1150,18 @@ struct kvm_x86_msr_filter { struct msr_bitmap_range ranges[16]; }; +struct kvm_x86_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 nr_includes; + __u32 nr_excludes; + __u64 *includes; + __u64 *excludes; + __u64 events[]; +}; + enum kvm_apicv_inhibit { /********************************************************************/ @@ -1164,6 +1193,12 @@ enum kvm_apicv_inhibit { APICV_INHIBIT_REASON_BLOCKIRQ, /* + * APICv is disabled because not all vCPUs have a 1:1 mapping between + * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack. + */ + APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED, + + /* * For simplicity, the APIC acceleration is inhibited * first time either APIC ID or APIC base are changed by the guest * from their reset values. @@ -1201,6 +1236,12 @@ enum kvm_apicv_inhibit { * AVIC is disabled because SEV doesn't support it. */ APICV_INHIBIT_REASON_SEV, + + /* + * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1 + * mapping between logical ID and vCPU. + */ + APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED, }; struct kvm_arch { @@ -1249,10 +1290,11 @@ struct kvm_arch { struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; - /* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */ - struct rw_semaphore apicv_update_lock; - bool apic_access_memslot_enabled; + bool apic_access_memslot_inhibited; + + /* Protects apicv_inhibit_reasons */ + struct rw_semaphore apicv_update_lock; unsigned long apicv_inhibit_reasons; gpa_t wall_clock; @@ -1302,7 +1344,6 @@ struct kvm_arch { u32 bsp_vcpu_id; u64 disabled_quirks; - int cpu_dirty_logging_count; enum kvm_irqchip_mode irqchip_mode; u8 nr_reserved_ioapic_pins; @@ -1338,25 +1379,16 @@ struct kvm_arch { /* Guest can access the SGX PROVISIONKEY. */ bool sgx_provisioning_allowed; - struct kvm_pmu_event_filter __rcu *pmu_event_filter; + struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter; struct task_struct *nx_huge_page_recovery_thread; #ifdef CONFIG_X86_64 - /* - * Whether the TDP MMU is enabled for this VM. This contains a - * snapshot of the TDP MMU module parameter from when the VM was - * created and remains unchanged for the life of the VM. If this is - * true, TDP MMU handler functions will run for various MMU - * operations. - */ - bool tdp_mmu_enabled; - /* The number of TDP MMU pages across all roots. */ atomic64_t tdp_mmu_pages; /* - * List of kvm_mmu_page structs being used as roots. - * All kvm_mmu_page structs in the list should have + * List of struct kvm_mmu_pages being used as roots. + * All struct kvm_mmu_pages in the list should have * tdp_mmu_page set. * * For reads, this list is protected by: @@ -1520,6 +1552,8 @@ static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) struct kvm_x86_ops { const char *name; + int (*check_processor_compatibility)(void); + int (*hardware_enable)(void); void (*hardware_disable)(void); void (*hardware_unsetup)(void); @@ -1608,6 +1642,8 @@ struct kvm_x86_ops { void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason); + const unsigned long required_apicv_inhibits; + bool allow_apicv_in_x2apic_without_x2apic_virtualization; void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(int isr); @@ -1731,9 +1767,6 @@ struct kvm_x86_nested_ops { }; struct kvm_x86_init_ops { - int (*cpu_has_kvm_support)(void); - int (*disabled_by_bios)(void); - int (*check_processor_compatibility)(void); int (*hardware_setup)(void); unsigned int (*handle_intel_pt_intr)(void); @@ -1760,6 +1793,9 @@ extern struct kvm_x86_ops kvm_x86_ops; #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP #include <asm/kvm-x86-ops.h> +int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops); +void kvm_x86_vendor_exit(void); + #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { @@ -1982,7 +2018,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, bool kvm_apicv_activated(struct kvm *kvm); bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu); -void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); +void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, enum kvm_apicv_inhibit reason, bool set); void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, @@ -2054,14 +2090,11 @@ enum { TASK_SWITCH_GATE = 3, }; -#define HF_GIF_MASK (1 << 0) -#define HF_NMI_MASK (1 << 3) -#define HF_IRET_MASK (1 << 4) -#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ +#define HF_GUEST_MASK (1 << 0) /* VCPU is in guest-mode */ #ifdef CONFIG_KVM_SMM -#define HF_SMM_MASK (1 << 6) -#define HF_SMM_INSIDE_NMI_MASK (1 << 7) +#define HF_SMM_MASK (1 << 1) +#define HF_SMM_INSIDE_NMI_MASK (1 << 2) # define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE # define KVM_ADDRESS_SPACE_NUM 2 diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h index 6c5765192102..511b35069187 100644 --- a/arch/x86/include/asm/kvmclock.h +++ b/arch/x86/include/asm/kvmclock.h @@ -8,7 +8,7 @@ extern struct clocksource kvm_clock; DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); -static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) { return &this_cpu_read(hv_clock_per_cpu)->pvti; } diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 6e986088817d..9646ed6e8c0b 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -88,6 +88,9 @@ #define MCI_MISC_ADDR_MEM 3 /* memory address */ #define MCI_MISC_ADDR_GENERIC 7 /* generic */ +/* MCi_ADDR register defines */ +#define MCI_ADDR_PHYSADDR GENMASK_ULL(boot_cpu_data.x86_phys_bits - 1, 0) + /* CTL2 register defines */ #define MCI_CTL2_CMCI_EN BIT_ULL(30) #define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index d5a58bde091c..320566a0443d 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -125,13 +125,13 @@ static inline unsigned int x86_cpuid_family(void) #ifdef CONFIG_MICROCODE extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); -void reload_early_microcode(void); +void reload_early_microcode(unsigned int cpu); extern bool initrd_gone; void microcode_bsp_resume(void); #else static inline void __init load_ucode_bsp(void) { } static inline void load_ucode_ap(void) { } -static inline void reload_early_microcode(void) { } +static inline void reload_early_microcode(unsigned int cpu) { } static inline void microcode_bsp_resume(void) { } #endif diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index ac31f9140d07..e6662adf3af4 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -47,12 +47,12 @@ struct microcode_amd { extern void __init load_ucode_amd_bsp(unsigned int family); extern void load_ucode_amd_ap(unsigned int family); extern int __init save_microcode_in_initrd_amd(unsigned int family); -void reload_ucode_amd(void); +void reload_ucode_amd(unsigned int cpu); #else static inline void __init load_ucode_amd_bsp(unsigned int family) {} static inline void load_ucode_amd_ap(unsigned int family) {} static inline int __init save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } -static inline void reload_ucode_amd(void) {} +static inline void reload_ucode_amd(unsigned int cpu) {} #endif #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index b8d40ddeab00..e01aa74a6de7 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -12,6 +12,7 @@ #include <asm/tlbflush.h> #include <asm/paravirt.h> #include <asm/debugreg.h> +#include <asm/gsseg.h> extern atomic64_t last_mm_ctx_id; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6d502f3efb0f..4c4c0ec3b62e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -72,10 +72,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_status; } +/* Hypercall to the L0 hypervisor */ +static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output) +{ + return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output); +} + /* Fast hypercall with 8 bytes of input and no output */ -static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) +static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) { - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; + u64 hv_status; #ifdef CONFIG_X86_64 { @@ -103,10 +109,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) return hv_status; } +static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) +{ + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT; + + return _hv_do_fast_hypercall8(control, input1); +} + +static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1) +{ + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; + + return _hv_do_fast_hypercall8(control, input1); +} + /* Fast hypercall with 16 bytes of input */ -static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) +static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) { - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; + u64 hv_status; #ifdef CONFIG_X86_64 { @@ -137,6 +157,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) return hv_status; } +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) +{ + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT; + + return _hv_do_fast_hypercall16(control, input1, input2); +} + +static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2) +{ + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; + + return _hv_do_fast_hypercall16(control, input1, input2); +} + extern struct hv_vp_assist_page **hv_vp_assist_page; static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) @@ -190,36 +224,20 @@ extern bool hv_isolation_type_snp(void); static inline bool hv_is_synic_reg(unsigned int reg) { - if ((reg >= HV_REGISTER_SCONTROL) && - (reg <= HV_REGISTER_SINT15)) - return true; - return false; + return (reg >= HV_REGISTER_SCONTROL) && + (reg <= HV_REGISTER_SINT15); } -static inline u64 hv_get_register(unsigned int reg) +static inline bool hv_is_sint_reg(unsigned int reg) { - u64 value; - - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) - hv_ghcb_msr_read(reg, &value); - else - rdmsrl(reg, value); - return value; + return (reg >= HV_REGISTER_SINT0) && + (reg <= HV_REGISTER_SINT15); } -static inline void hv_set_register(unsigned int reg, u64 value) -{ - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { - hv_ghcb_msr_write(reg, value); - - /* Write proxy bit via wrmsl instruction */ - if (reg >= HV_REGISTER_SINT0 && - reg <= HV_REGISTER_SINT15) - wrmsrl(reg, value | 1 << 20); - } else { - wrmsrl(reg, value); - } -} +u64 hv_get_register(unsigned int reg); +void hv_set_register(unsigned int reg, u64 value); +u64 hv_get_non_nested_register(unsigned int reg); +void hv_set_non_nested_register(unsigned int reg, u64 value); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} @@ -239,6 +257,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, } static inline void hv_set_register(unsigned int reg, u64 value) { } static inline u64 hv_get_register(unsigned int reg) { return 0; } +static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { } +static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible) { diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 37ff47552bcb..ad35355ee43e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -25,6 +25,7 @@ #define _EFER_SVME 12 /* Enable virtualization */ #define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */ #define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */ #define EFER_SCE (1<<_EFER_SCE) #define EFER_LME (1<<_EFER_LME) @@ -33,6 +34,7 @@ #define EFER_SVME (1<<_EFER_SVME) #define EFER_LMSLE (1<<_EFER_LMSLE) #define EFER_FFXSR (1<<_EFER_FFXSR) +#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) /* Intel MSRs. Some also available on other CPUs */ @@ -49,6 +51,10 @@ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +/* A mask for bits which the kernel toggles when controlling mitigations */ +#define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ + | SPEC_CTRL_RRSBA_DIS_S) + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -189,6 +195,9 @@ #define MSR_TURBO_RATIO_LIMIT1 0x000001ae #define MSR_TURBO_RATIO_LIMIT2 0x000001af +#define MSR_SNOOP_RSP_0 0x00001328 +#define MSR_SNOOP_RSP_1 0x00001329 + #define MSR_LBR_SELECT 0x000001c8 #define MSR_LBR_TOS 0x000001c9 @@ -566,6 +575,26 @@ #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) #define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) +/* SNP feature bits enabled by the hypervisor */ +#define MSR_AMD64_SNP_VTOM BIT_ULL(3) +#define MSR_AMD64_SNP_REFLECT_VC BIT_ULL(4) +#define MSR_AMD64_SNP_RESTRICTED_INJ BIT_ULL(5) +#define MSR_AMD64_SNP_ALT_INJ BIT_ULL(6) +#define MSR_AMD64_SNP_DEBUG_SWAP BIT_ULL(7) +#define MSR_AMD64_SNP_PREVENT_HOST_IBS BIT_ULL(8) +#define MSR_AMD64_SNP_BTB_ISOLATION BIT_ULL(9) +#define MSR_AMD64_SNP_VMPL_SSS BIT_ULL(10) +#define MSR_AMD64_SNP_SECURE_TSC BIT_ULL(11) +#define MSR_AMD64_SNP_VMGEXIT_PARAM BIT_ULL(12) +#define MSR_AMD64_SNP_IBS_VIRT BIT_ULL(14) +#define MSR_AMD64_SNP_VMSA_REG_PROTECTION BIT_ULL(16) +#define MSR_AMD64_SNP_SMT_PROTECTION BIT_ULL(17) + +/* SNP feature bits reserved for future use. */ +#define MSR_AMD64_SNP_RESERVED_BIT13 BIT_ULL(13) +#define MSR_AMD64_SNP_RESERVED_BIT15 BIT_ULL(15) +#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, 18) + #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f /* AMD Collaborative Processor Performance Control MSRs */ @@ -1061,6 +1090,8 @@ /* - AMD: */ #define MSR_IA32_MBA_BW_BASE 0xc0000200 +#define MSR_IA32_SMBA_BW_BASE 0xc0000280 +#define MSR_IA32_EVT_CFG_BASE 0xc0000400 /* MSR_IA32_VMX_MISC bits */ #define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 3a8fdf881313..778df05f8539 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -26,7 +26,7 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 -static inline void __monitor(const void *eax, unsigned long ecx, +static __always_inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { /* "monitor %eax, %ecx, %edx;" */ @@ -34,7 +34,7 @@ static inline void __monitor(const void *eax, unsigned long ecx, :: "a" (eax), "c" (ecx), "d"(edx)); } -static inline void __monitorx(const void *eax, unsigned long ecx, +static __always_inline void __monitorx(const void *eax, unsigned long ecx, unsigned long edx) { /* "monitorx %eax, %ecx, %edx;" */ @@ -42,7 +42,7 @@ static inline void __monitorx(const void *eax, unsigned long ecx, :: "a" (eax), "c" (ecx), "d"(edx)); } -static inline void __mwait(unsigned long eax, unsigned long ecx) +static __always_inline void __mwait(unsigned long eax, unsigned long ecx) { mds_idle_clear_cpu_buffers(); @@ -77,8 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) * EAX (logical) address to monitor * ECX #GP if not zero */ -static inline void __mwaitx(unsigned long eax, unsigned long ebx, - unsigned long ecx) +static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx, + unsigned long ecx) { /* No MDS buffer clear as this is AMD/HYGON only */ @@ -87,7 +87,7 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, :: "a" (eax), "b" (ebx), "c" (ecx)); } -static inline void __sti_mwait(unsigned long eax, unsigned long ecx) +static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx) { mds_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ @@ -105,7 +105,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) * New with Core Duo processors, MWAIT can take some hints based on CPU * capability. */ -static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) +static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) { if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) { if (static_cpu_has_bug(X86_BUG_CLFLUSH_MONITOR)) { diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 771b0a2b7a34..e04313e89f4f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -564,7 +564,7 @@ static __always_inline void mds_user_clear_cpu_buffers(void) * * Clear CPU buffers if the corresponding static key is enabled */ -static inline void mds_idle_clear_cpu_buffers(void) +static __always_inline void mds_idle_clear_cpu_buffers(void) { if (static_branch_likely(&mds_idle_clear)) mds_clear_cpu_buffers(); diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 9cc82f305f4b..d18e5c332cb9 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -34,9 +34,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, copy_page(to, from); } -#define alloc_zeroed_user_highpage_movable(vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr) -#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE +#define vma_alloc_zeroed_movable_folio(vma, vaddr) \ + vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false) #ifndef __pa #define __pa(x) __phys_addr((unsigned long)(x)) diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index df42f8aa99e4..580d71aca65a 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -15,10 +15,6 @@ extern unsigned long __phys_addr(unsigned long); #define __phys_addr_symbol(x) __phys_addr(x) #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) -#ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* CONFIG_FLATMEM */ - #include <linux/string.h> static inline void clear_page(void *page) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 198e03e59ca1..cc6b8e087192 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -39,10 +39,6 @@ extern unsigned long __phys_addr_symbol(unsigned long); #define __phys_reloc_hide(x) (x) -#ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < max_pfn) -#endif - void clear_page_orig(void *page); void clear_page_rep(void *page); void clear_page_erms(void *page); diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 73e9522db7c1..cf40e813b3d7 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -26,7 +26,7 @@ DECLARE_STATIC_CALL(pv_sched_clock, dummy_sched_clock); void paravirt_set_sched_clock(u64 (*func)(void)); -static inline u64 paravirt_sched_clock(void) +static __always_inline u64 paravirt_sched_clock(void) { return static_call(pv_sched_clock)(); } @@ -168,7 +168,7 @@ static inline void __write_cr4(unsigned long x) PVOP_VCALL1(cpu.write_cr4, x); } -static inline void arch_safe_halt(void) +static __always_inline void arch_safe_halt(void) { PVOP_VCALL0(irq.safe_halt); } @@ -178,7 +178,9 @@ static inline void halt(void) PVOP_VCALL0(irq.halt); } -static inline void wbinvd(void) +extern noinstr void pv_native_wbinvd(void); + +static __always_inline void wbinvd(void) { PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT(X86_FEATURE_XENPV)); } diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 5d0f6891ae61..8fc15ed5e60b 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -160,6 +160,14 @@ union cpuid10_edx { }; /* + * Intel "Architectural Performance Monitoring extension" CPUID + * detection/enumeration details: + */ +#define ARCH_PERFMON_EXT_LEAF 0x00000023 +#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1 +#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 + +/* * Intel Architectural LBR CPUID detection/enumeration details: */ union cpuid28_eax { @@ -578,7 +586,7 @@ extern void perf_amd_brs_lopwr_cb(bool lopwr_in); DECLARE_STATIC_CALL(perf_lopwr_cb, perf_amd_brs_lopwr_cb); -static inline void perf_lopwr_cb(bool lopwr_in) +static __always_inline void perf_lopwr_cb(bool lopwr_in) { static_call_mod(perf_lopwr_cb)(lopwr_in); } diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 60d0f9015317..e9482a11ac52 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -80,21 +80,37 @@ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshi return ((value >> rightshift) & mask) << leftshift; } -/* Encode and de-code a swap entry */ +/* + * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that + * are !pte_none() && !pte_present(). + * + * Format of swap PTEs: + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * <----------------- offset ------------------> 0 E <- type --> 0 + * + * E is the exclusive marker that is not stored in swap entries. + */ #define SWP_TYPE_BITS 5 +#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1) +#define _SWP_TYPE_SHIFT (_PAGE_BIT_PRESENT + 1) #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) -#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) -#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ - & ((1U << SWP_TYPE_BITS) - 1)) +#define __swp_type(x) (((x).val >> _SWP_TYPE_SHIFT) \ + & _SWP_TYPE_MASK) #define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) #define __swp_entry(type, offset) ((swp_entry_t) { \ - ((type) << (_PAGE_BIT_PRESENT + 1)) \ + (((type) & _SWP_TYPE_MASK) << _SWP_TYPE_SHIFT) \ | ((offset) << SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) +/* We borrow bit 7 to store the exclusive marker in swap PTEs. */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_PSE + /* No inverted PFNs on 2 level page tables */ static inline u64 protnone_mask(u64 val) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 967b135fa2c0..9e7c0b719c3c 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -145,8 +145,24 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, } #endif -/* Encode and de-code a swap entry */ +/* + * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that + * are !pte_none() && !pte_present(). + * + * Format of swap PTEs: + * + * 6 6 6 6 5 5 5 5 5 5 5 5 5 5 4 4 4 4 4 4 4 4 4 4 3 3 3 3 3 3 3 3 + * 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 + * < type -> <---------------------- offset ---------------------- + * + * 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * --------------------------------------------> 0 E 0 0 0 0 0 0 0 + * + * E is the exclusive marker that is not stored in swap entries. + */ #define SWP_TYPE_BITS 5 +#define _SWP_TYPE_MASK ((1U << SWP_TYPE_BITS) - 1) #define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) @@ -154,9 +170,10 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT + SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) -#define __swp_type(x) (((x).val) & ((1UL << SWP_TYPE_BITS) - 1)) +#define __swp_type(x) (((x).val) & _SWP_TYPE_MASK) #define __swp_offset(x) ((x).val >> SWP_TYPE_BITS) -#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << SWP_TYPE_BITS}) +#define __swp_entry(type, offset) ((swp_entry_t){((type) & _SWP_TYPE_MASK) \ + | (offset) << SWP_TYPE_BITS}) /* * Normally, __swp_entry() converts from arch-independent swp_entry_t to @@ -184,6 +201,9 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ __pteval_swp_offset(pte))) +/* We borrow bit 7 to store the exclusive marker in swap PTEs. */ +#define _PAGE_SWP_EXCLUSIVE _PAGE_PSE + #include <asm/pgtable-invert.h> #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0564edd24ffb..7425f32e5293 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -289,6 +289,11 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear) return native_make_pte(v & ~clear); } +static inline pte_t pte_wrprotect(pte_t pte) +{ + return pte_clear_flags(pte, _PAGE_RW); +} + #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pte_uffd_wp(pte_t pte) { @@ -313,7 +318,7 @@ static inline int pte_uffd_wp(pte_t pte) static inline pte_t pte_mkuffd_wp(pte_t pte) { - return pte_set_flags(pte, _PAGE_UFFD_WP); + return pte_wrprotect(pte_set_flags(pte, _PAGE_UFFD_WP)); } static inline pte_t pte_clear_uffd_wp(pte_t pte) @@ -332,11 +337,6 @@ static inline pte_t pte_mkold(pte_t pte) return pte_clear_flags(pte, _PAGE_ACCESSED); } -static inline pte_t pte_wrprotect(pte_t pte) -{ - return pte_clear_flags(pte, _PAGE_RW); -} - static inline pte_t pte_mkexec(pte_t pte) { return pte_clear_flags(pte, _PAGE_NX); @@ -401,6 +401,11 @@ static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) return native_make_pmd(v & ~clear); } +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_RW); +} + #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP static inline int pmd_uffd_wp(pmd_t pmd) { @@ -409,7 +414,7 @@ static inline int pmd_uffd_wp(pmd_t pmd) static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_UFFD_WP); + return pmd_wrprotect(pmd_set_flags(pmd, _PAGE_UFFD_WP)); } static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) @@ -428,11 +433,6 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_DIRTY); } -static inline pmd_t pmd_wrprotect(pmd_t pmd) -{ - return pmd_clear_flags(pmd, _PAGE_RW); -} - static inline pmd_t pmd_mkdirty(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); @@ -1299,8 +1299,6 @@ static inline void update_mmu_cache_pud(struct vm_area_struct *vma, unsigned long addr, pud_t *pud) { } -#ifdef _PAGE_SWP_EXCLUSIVE -#define __HAVE_ARCH_PTE_SWP_EXCLUSIVE static inline pte_t pte_swp_mkexclusive(pte_t pte) { return pte_set_flags(pte, _PAGE_SWP_EXCLUSIVE); @@ -1315,7 +1313,6 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) { return pte_clear_flags(pte, _PAGE_SWP_EXCLUSIVE); } -#endif /* _PAGE_SWP_EXCLUSIVE */ #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4e35c66edeb7..8d73004e4cac 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -542,7 +542,6 @@ enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL}; extern void enable_sep_cpu(void); -extern int sysenter_setup(void); /* Defined in head.S */ @@ -697,7 +696,8 @@ bool xen_set_default_idle(void); #endif void __noreturn stop_this_cpu(void *dummy); -void microcode_check(void); +void microcode_check(struct cpuinfo_x86 *prev_info); +void store_cpu_caps(struct cpuinfo_x86 *info); enum l1tf_mitigations { L1TF_MITIGATION_OFF, diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 19b695ff2c68..0c92db84469d 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -7,6 +7,7 @@ /* some helper functions for xen and kvm pv clock sources */ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); +u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src); u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src); void pvclock_set_flags(u8 flags); unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); @@ -39,7 +40,7 @@ bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. */ -static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) +static __always_inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) { u64 product; #ifdef __i386__ diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 04c17be9b5fd..bc5b4d788c08 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h @@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type); #define MRR_BIOS 0 #define MRR_APM 1 +void cpu_emergency_disable_virtualization(void); + typedef void (*nmi_shootdown_cb)(int, struct pt_regs*); void nmi_panic_self_stop(struct pt_regs *regs); void nmi_shootdown_cpus(nmi_shootdown_cb callback); diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index aff774775c67..7ba1726b71c7 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -98,6 +98,7 @@ #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20) +#define REQUIRED_MASK20 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index c390a672d560..794f69625780 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -96,7 +96,7 @@ * * 26 - ESPFIX small SS * 27 - per-cpu [ offset to per-cpu data area ] - * 28 - unused + * 28 - VDSO getcpu * 29 - unused * 30 - unused * 31 - TSS for double fault handler @@ -119,6 +119,7 @@ #define GDT_ENTRY_ESPFIX_SS 26 #define GDT_ENTRY_PERCPU 27 +#define GDT_ENTRY_CPUNODE 28 #define GDT_ENTRY_DOUBLEFAULT_TSS 31 @@ -159,6 +160,8 @@ # define __KERNEL_PERCPU 0 #endif +#define __CPUNODE_SEG (GDT_ENTRY_CPUNODE*8 + 3) + #else /* 64-bit: */ #include <asm/cache.h> @@ -226,8 +229,6 @@ #define GDT_ENTRY_TLS_ENTRIES 3 #define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8) -#ifdef CONFIG_X86_64 - /* Bit size and mask of CPU number stored in the per CPU data (and TSC_AUX) */ #define VDSO_CPUNODE_BITS 12 #define VDSO_CPUNODE_MASK 0xfff @@ -265,7 +266,6 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) } #endif /* !__ASSEMBLY__ */ -#endif /* CONFIG_X86_64 */ #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/shared/io.h b/arch/x86/include/asm/shared/io.h index c0ef921c0586..8009d781c2f9 100644 --- a/arch/x86/include/asm/shared/io.h +++ b/arch/x86/include/asm/shared/io.h @@ -5,13 +5,13 @@ #include <linux/types.h> #define BUILDIO(bwl, bw, type) \ -static inline void __out##bwl(type value, u16 port) \ +static __always_inline void __out##bwl(type value, u16 port) \ { \ asm volatile("out" #bwl " %" #bw "0, %w1" \ : : "a"(value), "Nd"(port)); \ } \ \ -static inline type __in##bwl(u16 port) \ +static __always_inline type __in##bwl(u16 port) \ { \ type value; \ asm volatile("in" #bwl " %w1, %" #bw "0" \ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index e53f26228fbb..4a03993e0785 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -8,7 +8,6 @@ #define TDX_HYPERCALL_STANDARD 0 #define TDX_HCALL_HAS_OUTPUT BIT(0) -#define TDX_HCALL_ISSUE_STI BIT(1) #define TDX_CPUID_LEAF_ID 0x21 #define TDX_IDENT "IntelTDX " @@ -22,12 +21,18 @@ * This is a software only structure and not part of the TDX module/VMM ABI. */ struct tdx_hypercall_args { + u64 r8; + u64 r9; u64 r10; u64 r11; u64 r12; u64 r13; u64 r14; u64 r15; + u64 rdi; + u64 rsi; + u64 rbx; + u64 rdx; }; /* Used to request services from the VMM */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 35f709f619fb..de48d1389936 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -115,22 +115,11 @@ static inline void wrpkru(u32 pkru) } #endif -static inline void native_wbinvd(void) +static __always_inline void native_wbinvd(void) { asm volatile("wbinvd": : :"memory"); } -extern asmlinkage void asm_load_gs_index(unsigned int selector); - -static inline void native_load_gs_index(unsigned int selector) -{ - unsigned long flags; - - local_irq_save(flags); - asm_load_gs_index(selector); - local_irq_restore(flags); -} - static inline unsigned long __read_cr4(void) { return native_read_cr4(); @@ -179,24 +168,14 @@ static inline void __write_cr4(unsigned long x) native_write_cr4(x); } -static inline void wbinvd(void) +static __always_inline void wbinvd(void) { native_wbinvd(); } - -static inline void load_gs_index(unsigned int selector) -{ -#ifdef CONFIG_X86_64 - native_load_gs_index(selector); -#else - loadsegment(gs, selector); -#endif -} - #endif /* CONFIG_PARAVIRT_XXL */ -static inline void clflush(volatile void *__p) +static __always_inline void clflush(volatile void *__p) { asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); } @@ -295,7 +274,7 @@ static inline int enqcmds(void __iomem *dst, const void *src) return 0; } -static inline void tile_release(void) +static __always_inline void tile_release(void) { /* * Instruction opcode for TILERELEASE; supported in binutils diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index f4b87f08f5c5..29832c338cdc 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -184,6 +184,37 @@ void int3_emulate_ret(struct pt_regs *regs) unsigned long ip = int3_emulate_pop(regs); int3_emulate_jmp(regs, ip); } + +static __always_inline +void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) +{ + static const unsigned long jcc_mask[6] = { + [0] = X86_EFLAGS_OF, + [1] = X86_EFLAGS_CF, + [2] = X86_EFLAGS_ZF, + [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, + [4] = X86_EFLAGS_SF, + [5] = X86_EFLAGS_PF, + }; + + bool invert = cc & 1; + bool match; + + if (cc < 0xc) { + match = regs->flags & jcc_mask[cc >> 1]; + } else { + match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ + ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); + if (cc >= 0xe) + match = match || (regs->flags & X86_EFLAGS_ZF); + } + + if ((match && !invert) || (!match && invert)) + ip += disp; + + int3_emulate_jmp(regs, ip); +} + #endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f0cb881c1d69..f1cccba52eb9 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -163,7 +163,12 @@ struct thread_info { * GOOD_FRAME if within a frame * BAD_STACK if placed across a frame boundary (or outside stack) * NOT_STACK unable to determine (no frame pointers, etc) + * + * This function reads pointers from the stack and dereferences them. The + * pointers may not have their KMSAN shadow set up properly, which may result + * in false positive reports. Disable instrumentation to avoid those. */ +__no_kmsan_checks static inline int arch_within_stack_frames(const void * const stack, const void * const stackend, const void *obj, unsigned long len) diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index 8ac563abb567..a53961c64a56 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -8,6 +8,7 @@ extern void hpet_time_init(void); extern void time_init(void); extern bool pit_timer_init(void); +extern bool tsc_clocksource_watchdog_disabled(void); extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 2963a2f5dbc4..d7f6592b74a9 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -45,7 +45,7 @@ extern const struct vdso_image vdso_image_x32; extern const struct vdso_image vdso_image_32; #endif -extern void __init init_vdso_image(const struct vdso_image *image); +extern int __init init_vdso_image(const struct vdso_image *image); extern int map_vdso_once(const struct vdso_image *image, unsigned long addr); diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h index 1936f21ed8cd..4cf6794f9d68 100644 --- a/arch/x86/include/asm/vdso/gettimeofday.h +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -318,6 +318,8 @@ u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) } #define vdso_calc_delta vdso_calc_delta +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts); + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h index 57b1a7034c64..2cbce97d29ea 100644 --- a/arch/x86/include/asm/vdso/processor.h +++ b/arch/x86/include/asm/vdso/processor.h @@ -18,6 +18,10 @@ static __always_inline void cpu_relax(void) rep_nop(); } +struct getcpu_cache; + +notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_VDSO_PROCESSOR_H */ diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 8757078d4442..3b12e6b99412 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -126,7 +126,21 @@ static inline void cpu_svm_disable(void) wrmsrl(MSR_VM_HSAVE_PA, 0); rdmsrl(MSR_EFER, efer); - wrmsrl(MSR_EFER, efer & ~EFER_SVME); + if (efer & EFER_SVME) { + /* + * Force GIF=1 prior to disabling SVM to ensure INIT and NMI + * aren't blocked, e.g. if a fatal error occurred between CLGI + * and STGI. Note, STGI may #UD if SVM is disabled from NMI + * context between reading EFER and executing STGI. In that + * case, GIF must already be set, otherwise the NMI would have + * been blocked, so just eat the fault. + */ + asm_volatile_goto("1: stgi\n\t" + _ASM_EXTABLE(1b, %l[fault]) + ::: "memory" : fault); +fault: + wrmsrl(MSR_EFER, efer & ~EFER_SVME); + } } /** Makes sure SVM is disabled, if it is supported on the CPU diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index e5e0fe10c692..a2dd24947eb8 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -382,7 +382,7 @@ MULTI_stack_switch(struct multicall_entry *mcl, } #endif -static inline int +static __always_inline int HYPERVISOR_sched_op(int cmd, void *arg) { return _hypercall2(int, sched_op, cmd, arg); diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 16f548a661cf..5fc35f889cd1 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -38,9 +38,11 @@ extern struct start_info *xen_start_info; #include <asm/processor.h> +#define XEN_SIGNATURE "XenVMMXenVMM" + static inline uint32_t xen_cpuid_base(void) { - return hypervisor_cpuid_base("XenVMMXenVMM", 2); + return hypervisor_cpuid_base(XEN_SIGNATURE, 2); } struct pci_dev; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index e48deab8901d..7f467fe05d42 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include <linux/ioctl.h> +#include <linux/stddef.h> #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 @@ -507,8 +508,8 @@ struct kvm_nested_state { * KVM_{GET,PUT}_NESTED_STATE ioctl values. */ union { - struct kvm_vmx_nested_state_data vmx[0]; - struct kvm_svm_nested_state_data svm[0]; + __DECLARE_FLEX_ARRAY(struct kvm_vmx_nested_state_data, vmx); + __DECLARE_FLEX_ARRAY(struct kvm_svm_nested_state_data, svm); } data; }; @@ -525,6 +526,35 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) +#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) + +/* + * Masked event layout. + * Bits Description + * ---- ----------- + * 7:0 event select (low bits) + * 15:8 umask match + * 31:16 unused + * 35:32 event select (high bits) + * 36:54 unused + * 55 exclude bit + * 63:56 umask mask + */ + +#define KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, exclude) \ + (((event_select) & 0xFFULL) | (((event_select) & 0XF00ULL) << 24) | \ + (((mask) & 0xFFULL) << 56) | \ + (((match) & 0xFFULL) << 8) | \ + ((__u64)(!!(exclude)) << 55)) + +#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ + (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) +#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) + /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ #define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index f69c168391aa..80e1df482337 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -116,6 +116,12 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd +#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe +#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ + /* SW_EXITINFO1[3:0] */ \ + (((((u64)reason_set) & 0xf)) | \ + /* SW_EXITINFO1[11:4] */ \ + ((((u64)reason_code) & 0xff) << 4)) #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff /* Exit code reserved for hypervisor/software use */ |