diff options
author | akpm <akpm@linux-foundation.org> | 2022-06-27 10:31:34 -0700 |
---|---|---|
committer | akpm <akpm@linux-foundation.org> | 2022-06-27 10:31:34 -0700 |
commit | 46a3b1125308f8f90a065eeecfafd2a96b01a36c (patch) | |
tree | 6e080118fbad5aa217d160cedf02f10108ce3bf2 /tools | |
parent | 6edda04ccc7cfb281d139e352dbd5dd933bd2751 (diff) | |
parent | 03c765b0e3b4cb5063276b086c76f7a612856a9a (diff) |
Merge branch 'master' into mm-stable
Diffstat (limited to 'tools')
55 files changed, 1108 insertions, 504 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index e09d6908a21d..8aa0d276a636 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -36,7 +36,7 @@ #define MIDR_VARIANT(midr) \ (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) #define MIDR_IMPLEMENTOR_SHIFT 24 -#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT) #define MIDR_IMPLEMENTOR(midr) \ (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) @@ -118,6 +118,10 @@ #define APPLE_CPU_PART_M1_ICESTORM 0x022 #define APPLE_CPU_PART_M1_FIRESTORM 0x023 +#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024 +#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025 +#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028 +#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) @@ -164,6 +168,10 @@ #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) #define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) #define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) +#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO) +#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) +#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX) +#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX @@ -172,7 +180,7 @@ #ifndef __ASSEMBLY__ -#include "sysreg.h" +#include <asm/sysreg.h> #define read_cpuid(reg) read_sysreg_s(SYS_ ## reg) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 73e643ae94b6..03acc823838a 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -/* FREE! ( 7*32+10) */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ @@ -211,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -/* FREE! ( 7*32+20) */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -238,6 +238,7 @@ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ #define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ #define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -315,6 +316,7 @@ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ +#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -405,6 +407,7 @@ #define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ /* @@ -443,5 +446,6 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 1ae0fab7d902..36369e76cc63 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -62,6 +62,12 @@ # define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) #endif +#ifdef CONFIG_INTEL_TDX_GUEST +# define DISABLE_TDX_GUEST 0 +#else +# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -73,7 +79,7 @@ #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) -#define DISABLED_MASK8 0 +#define DISABLED_MASK8 (DISABLE_TDX_GUEST) #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 403e83b4adc8..d27e0581b777 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -116,6 +116,30 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -133,6 +157,7 @@ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ #define RTM_ALLOW BIT(1) /* TSX development mode */ +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index bf6e96011dfe..21614807a2cb 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -428,11 +428,12 @@ struct kvm_sync_regs { struct kvm_vcpu_events events; }; -#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) -#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) -#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) -#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) -#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) +#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) +#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) +#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) +#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index efa969325ede..f69c168391aa 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -108,6 +108,14 @@ #define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 #define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_PSC 0x80000010 +#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011 +#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012 +#define SVM_VMGEXIT_AP_CREATION 0x80000013 +#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 +#define SVM_VMGEXIT_AP_CREATE 1 +#define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff /* Exit code reserved for hypervisor/software use */ @@ -218,6 +226,11 @@ { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ + { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \ + { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \ + { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \ + { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \ + { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \ { SVM_EXIT_ERR, "invalid_guest_state" } diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 6491fa8fba6d..15b940ec1eac 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -143,6 +143,12 @@ struct unwind_hint { .popsection .endm +.macro STACK_FRAME_NON_STANDARD_FP func:req +#ifdef CONFIG_FRAME_POINTER + STACK_FRAME_NON_STANDARD \func +#endif +.endm + .macro ANNOTATE_NOENDBR .Lhere_\@: .pushsection .discard.noendbr diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 05c3642aaece..a2def7b27009 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -154,25 +154,77 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; -/* +/** + * enum drm_i915_gem_engine_class - uapi engine type enumeration + * * Different engines serve different roles, and there may be more than one - * engine serving each role. enum drm_i915_gem_engine_class provides a - * classification of the role of the engine, which may be used when requesting - * operations to be performed on a certain subset of engines, or for providing - * information about that group. + * engine serving each role. This enum provides a classification of the role + * of the engine, which may be used when requesting operations to be performed + * on a certain subset of engines, or for providing information about that + * group. */ enum drm_i915_gem_engine_class { + /** + * @I915_ENGINE_CLASS_RENDER: + * + * Render engines support instructions used for 3D, Compute (GPGPU), + * and programmable media workloads. These instructions fetch data and + * dispatch individual work items to threads that operate in parallel. + * The threads run small programs (called "kernels" or "shaders") on + * the GPU's execution units (EUs). + */ I915_ENGINE_CLASS_RENDER = 0, + + /** + * @I915_ENGINE_CLASS_COPY: + * + * Copy engines (also referred to as "blitters") support instructions + * that move blocks of data from one location in memory to another, + * or that fill a specified location of memory with fixed data. + * Copy engines can perform pre-defined logical or bitwise operations + * on the source, destination, or pattern data. + */ I915_ENGINE_CLASS_COPY = 1, + + /** + * @I915_ENGINE_CLASS_VIDEO: + * + * Video engines (also referred to as "bit stream decode" (BSD) or + * "vdbox") support instructions that perform fixed-function media + * decode and encode. + */ I915_ENGINE_CLASS_VIDEO = 2, + + /** + * @I915_ENGINE_CLASS_VIDEO_ENHANCE: + * + * Video enhancement engines (also referred to as "vebox") support + * instructions related to image enhancement. + */ I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, - /* should be kept compact */ + /** + * @I915_ENGINE_CLASS_COMPUTE: + * + * Compute engines support a subset of the instructions available + * on render engines: compute engines support Compute (GPGPU) and + * programmable media workloads, but do not support the 3D pipeline. + */ + I915_ENGINE_CLASS_COMPUTE = 4, + + /* Values in this enum should be kept compact. */ + /** + * @I915_ENGINE_CLASS_INVALID: + * + * Placeholder value to represent an invalid engine class assignment. + */ I915_ENGINE_CLASS_INVALID = -1 }; -/* +/** + * struct i915_engine_class_instance - Engine class/instance identifier + * * There may be more than one engine fulfilling any role within the system. * Each engine of a class is given a unique instance number and therefore * any engine can be specified by its class:instance tuplet. APIs that allow @@ -180,10 +232,21 @@ enum drm_i915_gem_engine_class { * for this identification. */ struct i915_engine_class_instance { - __u16 engine_class; /* see enum drm_i915_gem_engine_class */ - __u16 engine_instance; + /** + * @engine_class: + * + * Engine class from enum drm_i915_gem_engine_class + */ + __u16 engine_class; #define I915_ENGINE_CLASS_INVALID_NONE -1 #define I915_ENGINE_CLASS_INVALID_VIRTUAL -2 + + /** + * @engine_instance: + * + * Engine instance. + */ + __u16 engine_instance; }; /** @@ -2657,24 +2720,65 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; -/* +/** + * struct drm_i915_perf_oa_config + * * Structure to upload perf dynamic configuration into the kernel. */ struct drm_i915_perf_oa_config { - /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + /** + * @uuid: + * + * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" + */ char uuid[36]; + /** + * @n_mux_regs: + * + * Number of mux regs in &mux_regs_ptr. + */ __u32 n_mux_regs; + + /** + * @n_boolean_regs: + * + * Number of boolean regs in &boolean_regs_ptr. + */ __u32 n_boolean_regs; + + /** + * @n_flex_regs: + * + * Number of flex regs in &flex_regs_ptr. + */ __u32 n_flex_regs; - /* - * These fields are pointers to tuples of u32 values (register address, - * value). For example the expected length of the buffer pointed by - * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + /** + * @mux_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_mux_regs). */ __u64 mux_regs_ptr; + + /** + * @boolean_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_boolean_regs). + */ __u64 boolean_regs_ptr; + + /** + * @flex_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_flex_regs). + */ __u64 flex_regs_ptr; }; @@ -2685,12 +2789,24 @@ struct drm_i915_perf_oa_config { * @data_ptr is also depends on the specific @query_id. */ struct drm_i915_query_item { - /** @query_id: The id for this query */ + /** + * @query_id: + * + * The id for this query. Currently accepted query IDs are: + * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info) + * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info) + * - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config) + * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions) + * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) + * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info) + */ __u64 query_id; -#define DRM_I915_QUERY_TOPOLOGY_INFO 1 -#define DRM_I915_QUERY_ENGINE_INFO 2 -#define DRM_I915_QUERY_PERF_CONFIG 3 -#define DRM_I915_QUERY_MEMORY_REGIONS 4 +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 +#define DRM_I915_QUERY_ENGINE_INFO 2 +#define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_MEMORY_REGIONS 4 +#define DRM_I915_QUERY_HWCONFIG_BLOB 5 +#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 /* Must be kept compact -- no holes and well documented */ /** @@ -2706,14 +2822,17 @@ struct drm_i915_query_item { /** * @flags: * - * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. * - * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the + * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the * following: * - * - DRM_I915_QUERY_PERF_CONFIG_LIST - * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID - * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * - %DRM_I915_QUERY_PERF_CONFIG_LIST + * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * + * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain + * a struct i915_engine_class_instance that references a render engine. */ __u32 flags; #define DRM_I915_QUERY_PERF_CONFIG_LIST 1 @@ -2771,66 +2890,112 @@ struct drm_i915_query { __u64 items_ptr; }; -/* - * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO : - * - * data: contains the 3 pieces of information : - * - * - the slice mask with one bit per slice telling whether a slice is - * available. The availability of slice X can be queried with the following - * formula : - * - * (data[X / 8] >> (X % 8)) & 1 - * - * - the subslice mask for each slice with one bit per subslice telling - * whether a subslice is available. Gen12 has dual-subslices, which are - * similar to two gen11 subslices. For gen12, this array represents dual- - * subslices. The availability of subslice Y in slice X can be queried - * with the following formula : - * - * (data[subslice_offset + - * X * subslice_stride + - * Y / 8] >> (Y % 8)) & 1 - * - * - the EU mask for each subslice in each slice with one bit per EU telling - * whether an EU is available. The availability of EU Z in subslice Y in - * slice X can be queried with the following formula : +/** + * struct drm_i915_query_topology_info * - * (data[eu_offset + - * (X * max_subslices + Y) * eu_stride + - * Z / 8] >> (Z % 8)) & 1 + * Describes slice/subslice/EU information queried by + * %DRM_I915_QUERY_TOPOLOGY_INFO */ struct drm_i915_query_topology_info { - /* + /** + * @flags: + * * Unused for now. Must be cleared to zero. */ __u16 flags; + /** + * @max_slices: + * + * The number of bits used to express the slice mask. + */ __u16 max_slices; + + /** + * @max_subslices: + * + * The number of bits used to express the subslice mask. + */ __u16 max_subslices; + + /** + * @max_eus_per_subslice: + * + * The number of bits in the EU mask that correspond to a single + * subslice's EUs. + */ __u16 max_eus_per_subslice; - /* + /** + * @subslice_offset: + * * Offset in data[] at which the subslice masks are stored. */ __u16 subslice_offset; - /* + /** + * @subslice_stride: + * * Stride at which each of the subslice masks for each slice are * stored. */ __u16 subslice_stride; - /* + /** + * @eu_offset: + * * Offset in data[] at which the EU masks are stored. */ __u16 eu_offset; - /* + /** + * @eu_stride: + * * Stride at which each of the EU masks for each subslice are stored. */ __u16 eu_stride; + /** + * @data: + * + * Contains 3 pieces of information : + * + * - The slice mask with one bit per slice telling whether a slice is + * available. The availability of slice X can be queried with the + * following formula : + * + * .. code:: c + * + * (data[X / 8] >> (X % 8)) & 1 + * + * Starting with Xe_HP platforms, Intel hardware no longer has + * traditional slices so i915 will always report a single slice + * (hardcoded slicemask = 0x1) which contains all of the platform's + * subslices. I.e., the mask here does not reflect any of the newer + * hardware concepts such as "gslices" or "cslices" since userspace + * is capable of inferring those from the subslice mask. + * + * - The subslice mask for each slice with one bit per subslice telling + * whether a subslice is available. Starting with Gen12 we use the + * term "subslice" to refer to what the hardware documentation + * describes as a "dual-subslices." The availability of subslice Y + * in slice X can be queried with the following formula : + * + * .. code:: c + * + * (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) & 1 + * + * - The EU mask for each subslice in each slice, with one bit per EU + * telling whether an EU is available. The availability of EU Z in + * subslice Y in slice X can be queried with the following formula : + * + * .. code:: c + * + * (data[eu_offset + + * (X * max_subslices + Y) * eu_stride + + * Z / 8 + * ] >> (Z % 8)) & 1 + */ __u8 data[]; }; @@ -2951,52 +3116,68 @@ struct drm_i915_query_engine_info { struct drm_i915_engine_info engines[]; }; -/* - * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG. +/** + * struct drm_i915_query_perf_config + * + * Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and + * %DRM_I915_QUERY_GEOMETRY_SUBSLICES. */ struct drm_i915_query_perf_config { union { - /* - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets - * this fields to the number of configurations available. + /** + * @n_configs: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to + * the number of configurations available. */ __u64 n_configs; - /* - * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, - * i915 will use the value in this field as configuration - * identifier to decide what data to write into config_ptr. + /** + * @config: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the + * value in this field as configuration identifier to decide + * what data to write into config_ptr. */ __u64 config; - /* - * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, - * i915 will use the value in this field as configuration - * identifier to decide what data to write into config_ptr. + /** + * @uuid: + * + * When &drm_i915_query_item.flags == + * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the + * value in this field as configuration identifier to decide + * what data to write into config_ptr. * * String formatted like "%08x-%04x-%04x-%04x-%012x" */ char uuid[36]; }; - /* + /** + * @flags: + * * Unused for now. Must be cleared to zero. */ __u32 flags; - /* - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will - * write an array of __u64 of configuration identifiers. + /** + * @data: * - * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will - * write a struct drm_i915_perf_oa_config. If the following fields of - * drm_i915_perf_oa_config are set not set to 0, i915 will write into - * the associated pointers the values of submitted when the + * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST, + * i915 will write an array of __u64 of configuration identifiers. + * + * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA, + * i915 will write a struct drm_i915_perf_oa_config. If the following + * fields of struct drm_i915_perf_oa_config are not set to 0, i915 will + * write into the associated pointers the values of submitted when the * configuration was created : * - * - n_mux_regs - * - n_boolean_regs - * - n_flex_regs + * - &drm_i915_perf_oa_config.n_mux_regs + * - &drm_i915_perf_oa_config.n_boolean_regs + * - &drm_i915_perf_oa_config.n_flex_regs */ __u8 data[]; }; @@ -3135,6 +3316,16 @@ struct drm_i915_query_memory_regions { }; /** + * DOC: GuC HWCONFIG blob uAPI + * + * The GuC produces a blob with information about the current device. + * i915 reads this blob from GuC and makes it available via this uAPI. + * + * The format and meaning of the blob content are documented in the + * Programmer's Reference Manual. + */ + +/** * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added * extension support using struct i915_user_extension. * diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index e998764f0262..a5e06dcbba13 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -272,6 +272,15 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +/* arm64 Scalable Matrix Extension controls */ +/* Flag values must be in sync with SVE versions */ +#define PR_SME_SET_VL 63 /* set task vector length */ +# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */ +#define PR_SME_GET_VL 64 /* get task vector length */ +/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */ +# define PR_SME_VL_LEN_MASK 0xffff +# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */ + #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 5d99e7c242a2..cab645d4a645 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -89,11 +89,6 @@ /* Set or get vhost backend capability */ -/* Use message type V2 */ -#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 -/* IOTLB can accept batching hints */ -#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 - #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) @@ -150,11 +145,30 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) - /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) /* Get the count of all virtqueues */ #define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + +/* Get the number of address spaces. */ +#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) + +/* Get the group for a virtqueue: read index, write group in num, + * The virtqueue index is stored in the index field of + * vhost_vring_state. The group for this specific virtqueue is + * returned via num field of vhost_vring_state. + */ +#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \ + struct vhost_vring_state) +/* Set the ASID for a virtqueue group. The group index is stored in + * the index field of vhost_vring_state, the ASID associated with this + * group is stored at num field of vhost_vring_state. + */ +#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \ + struct vhost_vring_state) + #endif diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 5a5bd74f55bd..9c366b3a676d 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -1646,7 +1646,8 @@ Press any other key to refresh statistics immediately. .format(values)) if len(pids) > 1: sys.exit('Error: Multiple processes found (pids: {}). Use "-p"' - ' to specify the desired pid'.format(" ".join(pids))) + ' to specify the desired pid' + .format(" ".join(map(str, pids)))) namespace.pid = pids[0] argparser = argparse.ArgumentParser(description=description_text, diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c1d58673f6ef..952f3520d5c2 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -149,23 +149,30 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, int fd, group_fd, *evsel_fd; evsel_fd = FD(evsel, idx, thread); - if (evsel_fd == NULL) - return -EINVAL; + if (evsel_fd == NULL) { + err = -EINVAL; + goto out; + } err = get_group_fd(evsel, idx, thread, &group_fd); if (err < 0) - return err; + goto out; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, cpu, group_fd, 0); - if (fd < 0) - return -errno; + if (fd < 0) { + err = -errno; + goto out; + } *evsel_fd = fd; } } +out: + if (err) + perf_evsel__close(evsel); return err; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a75bf11585b5..54d4e508a092 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -891,7 +891,9 @@ static int copy_kcore_dir(struct perf_inject *inject) if (ret < 0) return ret; pr_debug("%s\n", cmd); - return system(cmd); + ret = system(cmd); + free(cmd); + return ret; } static int output_fd(struct perf_inject *inject) @@ -916,7 +918,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.tracing_data = perf_event__repipe_tracing_data; } - output_data_offset = session->header.data_offset; + output_data_offset = perf_session__data_offset(session->evlist); if (inject->build_id_all) { inject->tool.mmap = perf_event__repipe_buildid_mmap; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4ce87a8eb7d7..d2ecd4d29624 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2586,6 +2586,8 @@ int cmd_stat(int argc, const char **argv) if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack)) goto out; + /* Enable ignoring missing threads when -p option is defined. */ + evlist__first(evsel_list)->ignore_missing_thread = target.pid; status = 0; for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) { if (stat_config.run_count != 1 && verbose > 0) diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index d1ebb5561e5b..6f921db33cf9 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -151,11 +151,21 @@ static int detect_ioctl(void) static int detect_share(int wp_cnt, int bp_cnt) { struct perf_event_attr attr; - int i, fd[wp_cnt + bp_cnt], ret; + int i, *fd = NULL, ret = -1; + + if (wp_cnt + bp_cnt == 0) + return 0; + + fd = malloc(sizeof(int) * (wp_cnt + bp_cnt)); + if (!fd) + return -1; for (i = 0; i < wp_cnt; i++) { fd[i] = wp_event((void *)&the_var, &attr); - TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1); + if (fd[i] == -1) { + pr_err("failed to create wp\n"); + goto out; + } } for (; i < (bp_cnt + wp_cnt); i++) { @@ -166,9 +176,11 @@ static int detect_share(int wp_cnt, int bp_cnt) ret = i != (bp_cnt + wp_cnt); +out: while (i--) close(fd[i]); + free(fd); return ret; } diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index d54c5371c6a6..5c0032fe93ae 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -97,6 +97,8 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u ret |= test(ctx, "2.2 > 2.2", 0); ret |= test(ctx, "2.2 < 1.1", 0); ret |= test(ctx, "1.1 > 2.2", 0); + ret |= test(ctx, "1.1e10 < 1.1e100", 1); + ret |= test(ctx, "1.1e2 > 1.1e-2", 1); if (ret) { expr__ctx_free(ctx); diff --git a/tools/perf/tests/shell/lib/perf_csv_output_lint.py b/tools/perf/tests/shell/lib/perf_csv_output_lint.py deleted file mode 100644 index 714f283cfb1b..000000000000 --- a/tools/perf/tests/shell/lib/perf_csv_output_lint.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/python -# SPDX-License-Identifier: GPL-2.0 - -import argparse -import sys - -# Basic sanity check of perf CSV output as specified in the man page. -# Currently just checks the number of fields per line in output. - -ap = argparse.ArgumentParser() -ap.add_argument('--no-args', action='store_true') -ap.add_argument('--interval', action='store_true') -ap.add_argument('--system-wide-no-aggr', action='store_true') -ap.add_argument('--system-wide', action='store_true') -ap.add_argument('--event', action='store_true') -ap.add_argument('--per-core', action='store_true') -ap.add_argument('--per-thread', action='store_true') -ap.add_argument('--per-die', action='store_true') -ap.add_argument('--per-node', action='store_true') -ap.add_argument('--per-socket', action='store_true') -ap.add_argument('--separator', default=',', nargs='?') -args = ap.parse_args() - -Lines = sys.stdin.readlines() - -def check_csv_output(exp): - for line in Lines: - if 'failed' not in line: - count = line.count(args.separator) - if count != exp: - sys.stdout.write(''.join(Lines)) - raise RuntimeError(f'wrong number of fields. expected {exp} in {line}') - -try: - if args.no_args or args.system_wide or args.event: - expected_items = 6 - elif args.interval or args.per_thread or args.system_wide_no_aggr: - expected_items = 7 - elif args.per_core or args.per_socket or args.per_node or args.per_die: - expected_items = 8 - else: - ap.print_help() - raise RuntimeError('No checking option specified') - check_csv_output(expected_items) - -except: - sys.stdout.write('Test failed for input: ' + ''.join(Lines)) - raise diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh index 983220ef3cb4..38c26f3ef4c1 100755 --- a/tools/perf/tests/shell/stat+csv_output.sh +++ b/tools/perf/tests/shell/stat+csv_output.sh @@ -6,20 +6,41 @@ set -e -pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py -if [ "x$PYTHON" == "x" ] -then - if which python3 > /dev/null - then - PYTHON=python3 - elif which python > /dev/null - then - PYTHON=python - else - echo Skipping test, python not detected please set environment variable PYTHON. - exit 2 - fi -fi +function commachecker() +{ + local -i cnt=0 exp=0 + + case "$1" + in "--no-args") exp=6 + ;; "--system-wide") exp=6 + ;; "--event") exp=6 + ;; "--interval") exp=7 + ;; "--per-thread") exp=7 + ;; "--system-wide-no-aggr") exp=7 + [ $(uname -m) = "s390x" ] && exp=6 + ;; "--per-core") exp=8 + ;; "--per-socket") exp=8 + ;; "--per-node") exp=8 + ;; "--per-die") exp=8 + esac + + while read line + do + # Check for lines beginning with Failed + x=${line:0:6} + [ "$x" = "Failed" ] && continue + + # Count the number of commas + x=$(echo $line | tr -d -c ',') + cnt="${#x}" + # echo $line $cnt + [ "$cnt" -ne "$exp" ] && { + echo "wrong number of fields. expected $exp in $line" 1>&2 + exit 1; + } + done + return 0 +} # Return true if perf_event_paranoid is > $1 and not running as root. function ParanoidAndNotRoot() @@ -30,7 +51,7 @@ function ParanoidAndNotRoot() check_no_args() { echo -n "Checking CSV output: no args " - perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args + perf stat -x, true 2>&1 | commachecker --no-args echo "[Success]" } @@ -42,7 +63,7 @@ check_system_wide() echo "[Skip] paranoid and not root" return fi - perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide + perf stat -x, -a true 2>&1 | commachecker --system-wide echo "[Success]" } @@ -55,14 +76,14 @@ check_system_wide_no_aggr() return fi echo -n "Checking CSV output: system wide no aggregation " - perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr + perf stat -x, -A -a --no-merge true 2>&1 | commachecker --system-wide-no-aggr echo "[Success]" } check_interval() { echo -n "Checking CSV output: interval " - perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval + perf stat -x, -I 1000 true 2>&1 | commachecker --interval echo "[Success]" } @@ -70,7 +91,7 @@ check_interval() check_event() { echo -n "Checking CSV output: event " - perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event + perf stat -x, -e cpu-clock true 2>&1 | commachecker --event echo "[Success]" } @@ -82,7 +103,7 @@ check_per_core() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core + perf stat -x, --per-core -a true 2>&1 | commachecker --per-core echo "[Success]" } @@ -94,7 +115,7 @@ check_per_thread() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread + perf stat -x, --per-thread -a true 2>&1 | commachecker --per-thread echo "[Success]" } @@ -106,7 +127,7 @@ check_per_die() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die + perf stat -x, --per-die -a true 2>&1 | commachecker --per-die echo "[Success]" } @@ -118,7 +139,7 @@ check_per_node() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node + perf stat -x, --per-node -a true 2>&1 | commachecker --per-node echo "[Success]" } @@ -130,7 +151,7 @@ check_per_socket() echo "[Skip] paranoid and not root" return fi - perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket + perf stat -x, --per-socket -a true 2>&1 | commachecker --per-socket echo "[Success]" } diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh index 6ffbb27afaba..ec108d45d3c6 100755 --- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh +++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh @@ -43,7 +43,7 @@ CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer" cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1 # Add a 1 second delay to skip samples that are not in the leaf() function -perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 -- $TEST_PROGRAM 2> /dev/null & +perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null & PID=$! echo " + Recording (PID=$PID)..." diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index d23a9e322ff5..0b4f61b6cc6b 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -115,7 +115,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * physical_package_id will be set to -1. Hence skip this * test if physical_package_id returns -1 for cpu from perf_cpu_map. */ - if (strncmp(session->header.env.arch, "powerpc", 7)) { + if (!strncmp(session->header.env.arch, "ppc64le", 7)) { if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1) return TEST_SKIP; } diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh index 2c5f72fa8108..37c53bac5f56 100755 --- a/tools/perf/trace/beauty/arch_errno_names.sh +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -33,23 +33,13 @@ create_errno_lookup_func() local arch=$(arch_string "$1") local nr name - cat <<EoFuncBegin -static const char *errno_to_name__$arch(int err) -{ - switch (err) { -EoFuncBegin + printf "static const char *errno_to_name__%s(int err)\n{\n\tswitch (err) {\n" $arch while read name nr; do printf '\tcase %d: return "%s";\n' $nr $name done - cat <<EoFuncEnd - default: - return "(unknown)"; - } -} - -EoFuncEnd + printf '\tdefault: return "(unknown)";\n\t}\n}\n' } process_arch() diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 6f85f5d957ef..17311ad9f9af 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -50,6 +50,9 @@ struct linger { struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ + + int msg_inq; /* output, data left in socket */ + struct iov_iter msg_iter; /* data */ /* @@ -62,8 +65,9 @@ struct msghdr { void __user *msg_control_user; }; bool msg_control_is_user : 1; - __kernel_size_t msg_controllen; /* ancillary data buffer length */ + bool msg_get_inq : 1;/* return INQ after receive */ unsigned int msg_flags; /* flags on received message */ + __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; @@ -434,6 +438,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags, extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); +extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 1a80151baed9..d040406f3314 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -387,26 +387,16 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \ - ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \ - ARM_SPE_REMOTE_ACCESS) - -static bool arm_spe__is_memory_event(enum arm_spe_sample_type type) -{ - if (type & SPE_MEM_TYPE) - return true; - - return false; -} - static u64 arm_spe__synth_data_source(const struct arm_spe_record *record) { union perf_mem_data_src data_src = { 0 }; if (record->op == ARM_SPE_LD) data_src.mem_op = PERF_MEM_OP_LOAD; - else + else if (record->op == ARM_SPE_ST) data_src.mem_op = PERF_MEM_OP_STORE; + else + return 0; if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { data_src.mem_lvl = PERF_MEM_LVL_L3; @@ -510,7 +500,11 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } - if (spe->sample_memory && arm_spe__is_memory_event(record->type)) { + /* + * When data_src is zero it means the record is not a memory operation, + * skip to synthesize memory sample for this case. + */ + if (spe->sample_memory && data_src) { err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 82f3d46bea70..328668f38c69 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -872,6 +872,30 @@ out_free: return err; } +static int filename__read_build_id_ns(const char *filename, + struct build_id *bid, + struct nsinfo *nsi) +{ + struct nscookie nsc; + int ret; + + nsinfo__mountns_enter(nsi, &nsc); + ret = filename__read_build_id(filename, bid); + nsinfo__mountns_exit(&nsc); + + return ret; +} + +static bool dso__build_id_mismatch(struct dso *dso, const char *name) +{ + struct build_id bid; + + if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0) + return false; + + return !dso__build_id_equal(dso, &bid); +} + static int dso__cache_build_id(struct dso *dso, struct machine *machine, void *priv __maybe_unused) { @@ -886,6 +910,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, is_kallsyms = true; name = machine->mmap_name; } + + if (!is_kallsyms && dso__build_id_mismatch(dso, name)) + return 0; + return build_id_cache__add_b(&dso->bid, name, dso->nsinfo, is_kallsyms, is_vdso); } diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 0a13eb20c814..4dc8edbfd9ce 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -91,7 +91,7 @@ static int literal(yyscan_t scanner) } %} -number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+) +number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)? sch [-,=] spec \\{sch} diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 53332da100e8..6ad629db63b7 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3686,6 +3686,20 @@ int perf_session__write_header(struct perf_session *session, return perf_session__do_write_header(session, evlist, fd, at_exit, NULL); } +size_t perf_session__data_offset(const struct evlist *evlist) +{ + struct evsel *evsel; + size_t data_offset; + + data_offset = sizeof(struct perf_file_header); + evlist__for_each_entry(evlist, evsel) { + data_offset += evsel->core.ids * sizeof(u64); + } + data_offset += evlist->core.nr_entries * sizeof(struct perf_file_attr); + + return data_offset; +} + int perf_session__inject_header(struct perf_session *session, struct evlist *evlist, int fd, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 08563c1f1bff..56916dabce7b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -136,6 +136,8 @@ int perf_session__inject_header(struct perf_session *session, int fd, struct feat_copier *fc); +size_t perf_session__data_offset(const struct evlist *evlist); + void perf_header__set_feat(struct perf_header *header, int feat); void perf_header__clear_feat(struct perf_header *header, int feat); bool perf_header__has_feat(const struct perf_header *header, int feat); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ee8fcfa115e5..8f7baeabc5cf 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1372,6 +1372,7 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, *out_evlist = NULL; if (!metric_no_merge || hashmap__size(ids->ids) == 0) { + bool added_event = false; int i; /* * We may fail to share events between metrics because a tool @@ -1393,8 +1394,16 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, if (!tmp) return -ENOMEM; ids__insert(ids->ids, tmp); + added_event = true; } } + if (!added_event && hashmap__size(ids->ids) == 0) { + char *tmp = strdup("duration_time"); + + if (!tmp) + return -ENOMEM; + ids__insert(ids->ids, tmp); + } } ret = metricgroup__build_event_string(&events, ids, modifier, has_constraint); diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 37622699c91a..6e5b8cce47bf 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -174,7 +174,7 @@ static int elf_section_address_and_offset(int fd, const char *name, u64 *address Elf *elf; GElf_Ehdr ehdr; GElf_Shdr shdr; - int ret; + int ret = -1; elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 83ef55e3caa4..2974b44f80fa 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -121,24 +121,24 @@ static void kprobe_multi_link_api_subtest(void) }) GET_ADDR("bpf_fentry_test1", addrs[0]); - GET_ADDR("bpf_fentry_test2", addrs[1]); - GET_ADDR("bpf_fentry_test3", addrs[2]); - GET_ADDR("bpf_fentry_test4", addrs[3]); - GET_ADDR("bpf_fentry_test5", addrs[4]); - GET_ADDR("bpf_fentry_test6", addrs[5]); - GET_ADDR("bpf_fentry_test7", addrs[6]); + GET_ADDR("bpf_fentry_test3", addrs[1]); + GET_ADDR("bpf_fentry_test4", addrs[2]); + GET_ADDR("bpf_fentry_test5", addrs[3]); + GET_ADDR("bpf_fentry_test6", addrs[4]); + GET_ADDR("bpf_fentry_test7", addrs[5]); + GET_ADDR("bpf_fentry_test2", addrs[6]); GET_ADDR("bpf_fentry_test8", addrs[7]); #undef GET_ADDR - cookies[0] = 1; - cookies[1] = 2; - cookies[2] = 3; - cookies[3] = 4; - cookies[4] = 5; - cookies[5] = 6; - cookies[6] = 7; - cookies[7] = 8; + cookies[0] = 1; /* bpf_fentry_test1 */ + cookies[1] = 2; /* bpf_fentry_test3 */ + cookies[2] = 3; /* bpf_fentry_test4 */ + cookies[3] = 4; /* bpf_fentry_test5 */ + cookies[4] = 5; /* bpf_fentry_test6 */ + cookies[5] = 6; /* bpf_fentry_test7 */ + cookies[6] = 7; /* bpf_fentry_test2 */ + cookies[7] = 8; /* bpf_fentry_test8 */ opts.kprobe_multi.addrs = (const unsigned long *) &addrs; opts.kprobe_multi.cnt = ARRAY_SIZE(addrs); @@ -149,14 +149,14 @@ static void kprobe_multi_link_api_subtest(void) if (!ASSERT_GE(link1_fd, 0, "link1_fd")) goto cleanup; - cookies[0] = 8; - cookies[1] = 7; - cookies[2] = 6; - cookies[3] = 5; - cookies[4] = 4; - cookies[5] = 3; - cookies[6] = 2; - cookies[7] = 1; + cookies[0] = 8; /* bpf_fentry_test1 */ + cookies[1] = 7; /* bpf_fentry_test3 */ + cookies[2] = 6; /* bpf_fentry_test4 */ + cookies[3] = 5; /* bpf_fentry_test5 */ + cookies[4] = 4; /* bpf_fentry_test6 */ + cookies[5] = 3; /* bpf_fentry_test7 */ + cookies[6] = 2; /* bpf_fentry_test2 */ + cookies[7] = 1; /* bpf_fentry_test8 */ opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN; prog_fd = bpf_program__fd(skel->progs.test_kretprobe); @@ -181,12 +181,12 @@ static void kprobe_multi_attach_api_subtest(void) struct kprobe_multi *skel = NULL; const char *syms[8] = { "bpf_fentry_test1", - "bpf_fentry_test2", "bpf_fentry_test3", "bpf_fentry_test4", "bpf_fentry_test5", "bpf_fentry_test6", "bpf_fentry_test7", + "bpf_fentry_test2", "bpf_fentry_test8", }; __u64 cookies[8]; @@ -198,14 +198,14 @@ static void kprobe_multi_attach_api_subtest(void) skel->bss->pid = getpid(); skel->bss->test_cookie = true; - cookies[0] = 1; - cookies[1] = 2; - cookies[2] = 3; - cookies[3] = 4; - cookies[4] = 5; - cookies[5] = 6; - cookies[6] = 7; - cookies[7] = 8; + cookies[0] = 1; /* bpf_fentry_test1 */ + cookies[1] = 2; /* bpf_fentry_test3 */ + cookies[2] = 3; /* bpf_fentry_test4 */ + cookies[3] = 4; /* bpf_fentry_test5 */ + cookies[4] = 5; /* bpf_fentry_test6 */ + cookies[5] = 6; /* bpf_fentry_test7 */ + cookies[6] = 7; /* bpf_fentry_test2 */ + cookies[7] = 8; /* bpf_fentry_test8 */ opts.syms = syms; opts.cnt = ARRAY_SIZE(syms); @@ -216,14 +216,14 @@ static void kprobe_multi_attach_api_subtest(void) if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts")) goto cleanup; - cookies[0] = 8; - cookies[1] = 7; - cookies[2] = 6; - cookies[3] = 5; - cookies[4] = 4; - cookies[5] = 3; - cookies[6] = 2; - cookies[7] = 1; + cookies[0] = 8; /* bpf_fentry_test1 */ + cookies[1] = 7; /* bpf_fentry_test3 */ + cookies[2] = 6; /* bpf_fentry_test4 */ + cookies[3] = 5; /* bpf_fentry_test5 */ + cookies[4] = 4; /* bpf_fentry_test6 */ + cookies[5] = 3; /* bpf_fentry_test7 */ + cookies[6] = 2; /* bpf_fentry_test2 */ + cookies[7] = 1; /* bpf_fentry_test8 */ opts.retprobe = true; diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 586dc52d6fb9..5b93d5d0bd93 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -364,6 +364,9 @@ static int get_syms(char ***symsp, size_t *cntp) continue; if (!strncmp(name, "rcu_", 4)) continue; + if (!strncmp(name, "__ftrace_invalid_address__", + sizeof("__ftrace_invalid_address__") - 1)) + continue; err = hashmap__add(map, name, NULL); if (err) { free(name); diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index c4da87ec3ba4..19c70880cfb3 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -831,6 +831,59 @@ out: bpf_object__close(obj); } +#include "tailcall_bpf2bpf6.skel.h" + +/* Tail call counting works even when there is data on stack which is + * not aligned to 8 bytes. + */ +static void test_tailcall_bpf2bpf_6(void) +{ + struct tailcall_bpf2bpf6 *obj; + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + obj = tailcall_bpf2bpf6__open_and_load(); + if (!ASSERT_OK_PTR(obj, "open and load")) + return; + + main_fd = bpf_program__fd(obj->progs.entry); + if (!ASSERT_GE(main_fd, 0, "entry prog fd")) + goto out; + + map_fd = bpf_map__fd(obj->maps.jmp_table); + if (!ASSERT_GE(map_fd, 0, "jmp_table map fd")) + goto out; + + prog_fd = bpf_program__fd(obj->progs.classifier_0); + if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd")) + goto out; + + i = 0; + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "jmp_table map update")) + goto out; + + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "entry prog test run"); + ASSERT_EQ(topts.retval, 0, "tailcall retval"); + + data_fd = bpf_map__fd(obj->maps.bss); + if (!ASSERT_GE(map_fd, 0, "bss map fd")) + goto out; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + ASSERT_OK(err, "bss map lookup"); + ASSERT_EQ(val, 1, "done flag is set"); + +out: + tailcall_bpf2bpf6__destroy(obj); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -855,4 +908,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_4(false); if (test__start_subtest("tailcall_bpf2bpf_5")) test_tailcall_bpf2bpf_4(true); + if (test__start_subtest("tailcall_bpf2bpf_6")) + test_tailcall_bpf2bpf_6(); } diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c index 93510f4f0f3a..08f95a8155d1 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c @@ -54,21 +54,21 @@ static void kprobe_multi_check(void *ctx, bool is_return) if (is_return) { SET(kretprobe_test1_result, &bpf_fentry_test1, 8); - SET(kretprobe_test2_result, &bpf_fentry_test2, 7); - SET(kretprobe_test3_result, &bpf_fentry_test3, 6); - SET(kretprobe_test4_result, &bpf_fentry_test4, 5); - SET(kretprobe_test5_result, &bpf_fentry_test5, 4); - SET(kretprobe_test6_result, &bpf_fentry_test6, 3); - SET(kretprobe_test7_result, &bpf_fentry_test7, 2); + SET(kretprobe_test2_result, &bpf_fentry_test2, 2); + SET(kretprobe_test3_result, &bpf_fentry_test3, 7); + SET(kretprobe_test4_result, &bpf_fentry_test4, 6); + SET(kretprobe_test5_result, &bpf_fentry_test5, 5); + SET(kretprobe_test6_result, &bpf_fentry_test6, 4); + SET(kretprobe_test7_result, &bpf_fentry_test7, 3); SET(kretprobe_test8_result, &bpf_fentry_test8, 1); } else { SET(kprobe_test1_result, &bpf_fentry_test1, 1); - SET(kprobe_test2_result, &bpf_fentry_test2, 2); - SET(kprobe_test3_result, &bpf_fentry_test3, 3); - SET(kprobe_test4_result, &bpf_fentry_test4, 4); - SET(kprobe_test5_result, &bpf_fentry_test5, 5); - SET(kprobe_test6_result, &bpf_fentry_test6, 6); - SET(kprobe_test7_result, &bpf_fentry_test7, 7); + SET(kprobe_test2_result, &bpf_fentry_test2, 7); + SET(kprobe_test3_result, &bpf_fentry_test3, 2); + SET(kprobe_test4_result, &bpf_fentry_test4, 3); + SET(kprobe_test5_result, &bpf_fentry_test5, 4); + SET(kprobe_test6_result, &bpf_fentry_test6, 5); + SET(kprobe_test7_result, &bpf_fentry_test7, 6); SET(kprobe_test8_result, &bpf_fentry_test8, 8); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c new file mode 100644 index 000000000000..41ce83da78e8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define __unused __attribute__((unused)) + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +int done = 0; + +SEC("tc") +int classifier_0(struct __sk_buff *skb __unused) +{ + done = 1; + return 0; +} + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + /* Don't propagate the constant to the caller */ + volatile int ret = 1; + + bpf_tail_call_static(skb, &jmp_table, 0); + return ret; +} + +SEC("tc") +int entry(struct __sk_buff *skb) +{ + /* Have data on stack which size is not a multiple of 8 */ + volatile char arr[1] = {}; + + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile index aa8e8b5b3864..cd8c5ece1cba 100644 --- a/tools/testing/selftests/dma/Makefile +++ b/tools/testing/selftests/dma/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -I../../../../usr/include/ +CFLAGS += -I../../../../include/ TEST_GEN_PROGS := dma_map_benchmark diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index c3b3c09e995e..5c997f17fcbd 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -10,8 +10,8 @@ #include <unistd.h> #include <sys/ioctl.h> #include <sys/mman.h> -#include <linux/map_benchmark.h> #include <linux/types.h> +#include <linux/map_benchmark.h> #define NSEC_PER_MSEC 1000000L diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 81470a99ed1c..22423c871ed6 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -37,11 +37,38 @@ ifeq ($(ARCH),riscv) UNAME_M := riscv endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c -LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S -LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c -LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c -LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c +LIBKVM += lib/assert.c +LIBKVM += lib/elf.c +LIBKVM += lib/guest_modes.c +LIBKVM += lib/io.c +LIBKVM += lib/kvm_util.c +LIBKVM += lib/perf_test_util.c +LIBKVM += lib/rbtree.c +LIBKVM += lib/sparsebit.c +LIBKVM += lib/test_util.c + +LIBKVM_x86_64 += lib/x86_64/apic.c +LIBKVM_x86_64 += lib/x86_64/handlers.S +LIBKVM_x86_64 += lib/x86_64/perf_test_util.c +LIBKVM_x86_64 += lib/x86_64/processor.c +LIBKVM_x86_64 += lib/x86_64/svm.c +LIBKVM_x86_64 += lib/x86_64/ucall.c +LIBKVM_x86_64 += lib/x86_64/vmx.c + +LIBKVM_aarch64 += lib/aarch64/gic.c +LIBKVM_aarch64 += lib/aarch64/gic_v3.c +LIBKVM_aarch64 += lib/aarch64/handlers.S +LIBKVM_aarch64 += lib/aarch64/processor.c +LIBKVM_aarch64 += lib/aarch64/spinlock.c +LIBKVM_aarch64 += lib/aarch64/ucall.c +LIBKVM_aarch64 += lib/aarch64/vgic.c + +LIBKVM_s390x += lib/s390x/diag318_test_handler.c +LIBKVM_s390x += lib/s390x/processor.c +LIBKVM_s390x += lib/s390x/ucall.c + +LIBKVM_riscv += lib/riscv/processor.c +LIBKVM_riscv += lib/riscv/ucall.c TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test @@ -173,12 +200,13 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option) # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ include ../lib.mk -STATIC_LIBS := $(OUTPUT)/libkvm.a LIBKVM_C := $(filter %.c,$(LIBKVM)) LIBKVM_S := $(filter %.S,$(LIBKVM)) LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) -EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.* +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) + +EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.* x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c @@ -187,13 +215,8 @@ $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) -$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS) - $(AR) crs $@ $^ - x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) -all: $(STATIC_LIBS) -$(TEST_GEN_PROGS): $(STATIC_LIBS) +$(TEST_GEN_PROGS): $(LIBKVM_OBJS) cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. cscope: diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 7b47ae4f952e..d60a34cdfaee 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -336,8 +336,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-i iterations] [-p offset] [-g]" - "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" + printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " + "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" "[-x memslots]\n", name); puts(""); printf(" -i: specify iteration counts (default: %"PRIu64")\n", @@ -351,6 +351,7 @@ static void help(char *name) printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); guest_modes_help(); + printf(" -n: Run the vCPUs in nested mode (L2)\n"); printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); @@ -387,7 +388,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) { + while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) { switch (opt) { case 'g': dirty_log_manual_caps = 0; @@ -401,6 +402,9 @@ int main(int argc, char *argv[]) case 'm': guest_modes_cmdline(optarg); break; + case 'n': + perf_test_args.nested = true; + break; case 'b': guest_percpu_mem_size = parse_size(optarg); break; diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index a86f953d8d36..d822cb670f1c 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -30,10 +30,15 @@ struct perf_test_vcpu_args { struct perf_test_args { struct kvm_vm *vm; + /* The starting address and size of the guest test region. */ uint64_t gpa; + uint64_t size; uint64_t guest_page_size; int wr_fract; + /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ + bool nested; + struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; }; @@ -49,5 +54,9 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract); void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); void perf_test_join_vcpu_threads(int vcpus); +void perf_test_guest_code(uint32_t vcpu_id); + +uint64_t perf_test_nested_pages(int nr_vcpus); +void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index d0d51adec76e..6ce185449259 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -482,13 +482,23 @@ void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); void vm_xsave_req_perm(int bit); -enum x86_page_size { - X86_PAGE_SIZE_4K = 0, - X86_PAGE_SIZE_2M, - X86_PAGE_SIZE_1G, +enum pg_level { + PG_LEVEL_NONE, + PG_LEVEL_4K, + PG_LEVEL_2M, + PG_LEVEL_1G, + PG_LEVEL_512G, + PG_LEVEL_NUM }; -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - enum x86_page_size page_size); + +#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12) +#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level)) + +#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K) +#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M) +#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) + +void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); /* * Basic CPU control in CR0 @@ -505,9 +515,6 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define X86_CR0_CD (1UL<<30) /* Cache Disable */ #define X86_CR0_PG (1UL<<31) /* Paging */ -/* VMX_EPT_VPID_CAP bits */ -#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) - #define XSTATE_XTILE_CFG_BIT 17 #define XSTATE_XTILE_DATA_BIT 18 diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 583ceb0d1457..cc3604f8f1d3 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -96,6 +96,9 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 +#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000 +#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000 + #define EXIT_REASON_FAILED_VMENTRY 0x80000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 @@ -606,6 +609,7 @@ bool load_vmcs(struct vmx_pages *vmx); bool nested_vmx_supported(void); void nested_vmx_check_supported(void); +bool ept_1g_pages_supported(void); void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr); @@ -613,6 +617,8 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size); void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot); +void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t addr, uint64_t size); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index e0b0164e9af8..be1d9728c4ce 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -73,20 +73,19 @@ void ucall_uninit(struct kvm_vm *vm) void ucall(uint64_t cmd, int nargs, ...) { - struct ucall uc = { - .cmd = cmd, - }; + struct ucall uc = {}; va_list va; int i; + WRITE_ONCE(uc.cmd, cmd); nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; va_start(va, nargs); for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); + WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); va_end(va); - *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; + WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); } uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 722df3a28791..f989ff91f022 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -40,7 +40,7 @@ static bool all_vcpu_threads_running; * Continuously write to the first 8 bytes of each page in the * specified region. */ -static void guest_code(uint32_t vcpu_id) +void perf_test_guest_code(uint32_t vcpu_id) { struct perf_test_args *pta = &perf_test_args; struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id]; @@ -108,8 +108,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, { struct perf_test_args *pta = &perf_test_args; struct kvm_vm *vm; - uint64_t guest_num_pages; + uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES; uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); + uint64_t region_end_gfn; int i; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); @@ -135,33 +136,53 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, slots); /* + * If using nested, allocate extra pages for the nested page tables and + * in-memory data structures. + */ + if (pta->nested) + slot0_pages += perf_test_nested_pages(vcpus); + + /* * Pass guest_num_pages to populate the page tables for test memory. * The memory is also added to memslot 0, but that's a benign side * effect as KVM allows aliasing HVAs in meslots. */ - vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES, - guest_num_pages, 0, guest_code, NULL); + vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0, + perf_test_guest_code, NULL); pta->vm = vm; + /* Put the test region at the top guest physical memory. */ + region_end_gfn = vm_get_max_gfn(vm) + 1; + +#ifdef __x86_64__ + /* + * When running vCPUs in L2, restrict the test region to 48 bits to + * avoid needing 5-level page tables to identity map L2. + */ + if (pta->nested) + region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size); +#endif /* * If there should be more memory in the guest test region than there * can be pages in the guest, it will definitely cause problems. */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + TEST_ASSERT(guest_num_pages < region_end_gfn, "Requested more guest memory than address space allows.\n" " guest pages: %" PRIx64 " max gfn: %" PRIx64 " vcpus: %d wss: %" PRIx64 "]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, + guest_num_pages, region_end_gfn - 1, vcpus, vcpu_memory_bytes); - pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size; + pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size; pta->gpa = align_down(pta->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ pta->gpa = align_down(pta->gpa, 1 << 20); #endif - pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa); + pta->size = guest_num_pages * pta->guest_page_size; + pr_info("guest physical test memory: [0x%lx, 0x%lx)\n", + pta->gpa, pta->gpa + pta->size); /* Add extra memory slots for testing */ for (i = 0; i < slots; i++) { @@ -178,6 +199,11 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); + if (pta->nested) { + pr_info("Configuring vCPUs to run in L2 (nested).\n"); + perf_test_setup_nested(vm, vcpus); + } + ucall_init(vm, NULL); /* Export the shared variables to the guest. */ @@ -198,6 +224,17 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract) sync_global_to_guest(vm, perf_test_args); } +uint64_t __weak perf_test_nested_pages(int nr_vcpus) +{ + return 0; +} + +void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus) +{ + pr_info("%s() not support on this architecture, skipping.\n", __func__); + exit(KSFT_SKIP); +} + static void *vcpu_thread_main(void *data) { struct vcpu_thread *vcpu = data; diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c new file mode 100644 index 000000000000..e258524435a0 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * x86_64-specific extensions to perf_test_util.c. + * + * Copyright (C) 2022, Google, Inc. + */ +#include <stdio.h> +#include <stdlib.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "perf_test_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" +#include "vmx.h" + +void perf_test_l2_guest_code(uint64_t vcpu_id) +{ + perf_test_guest_code(vcpu_id); + vmcall(); +} + +extern char perf_test_l2_guest_entry[]; +__asm__( +"perf_test_l2_guest_entry:" +" mov (%rsp), %rdi;" +" call perf_test_l2_guest_code;" +" ud2;" +); + +static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + unsigned long *rsp; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + GUEST_ASSERT(ept_1g_pages_supported()); + + rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; + *rsp = vcpu_id; + prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +uint64_t perf_test_nested_pages(int nr_vcpus) +{ + /* + * 513 page tables is enough to identity-map 256 TiB of L2 with 1G + * pages and 4-level paging, plus a few pages per-vCPU for data + * structures such as the VMCS. + */ + return 513 + 10 * nr_vcpus; +} + +void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) +{ + uint64_t start, end; + + prepare_eptp(vmx, vm, 0); + + /* + * Identity map the first 4G and the test region with 1G pages so that + * KVM can shadow the EPT12 with the maximum huge page size supported + * by the backing source. + */ + nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); + + start = align_down(perf_test_args.gpa, PG_SIZE_1G); + end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G); + nested_identity_map_1g(vmx, vm, start, end - start); +} + +void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus) +{ + struct vmx_pages *vmx, *vmx0 = NULL; + struct kvm_regs regs; + vm_vaddr_t vmx_gva; + int vcpu_id; + + nested_vmx_check_supported(); + + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + vmx = vcpu_alloc_vmx(vm, &vmx_gva); + + if (vcpu_id == 0) { + perf_test_setup_ept(vmx, vm); + vmx0 = vmx; + } else { + /* Share the same EPT table across all vCPUs. */ + vmx->eptp = vmx0->eptp; + vmx->eptp_hva = vmx0->eptp_hva; + vmx->eptp_gpa = vmx0->eptp_gpa; + } + + /* + * Override the vCPU to run perf_test_l1_guest_code() which will + * bounce it into L2 before calling perf_test_guest_code(). + */ + vcpu_regs_get(vm, vcpu_id, ®s); + regs.rip = (unsigned long) perf_test_l1_guest_code; + vcpu_regs_set(vm, vcpu_id, ®s); + vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id); + } +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 33ea5e9955d9..ead7011ee8f6 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -158,7 +158,7 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, int level) { uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift); - int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu; + int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; return &page_table[index]; } @@ -167,14 +167,14 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr, uint64_t paddr, - int level, - enum x86_page_size page_size) + int current_level, + int target_level) { - uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level); + uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level); if (!(*pte & PTE_PRESENT_MASK)) { *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; - if (level == page_size) + if (current_level == target_level) *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); else *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; @@ -184,20 +184,19 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, * a hugepage at this level, and that there isn't a hugepage at * this level. */ - TEST_ASSERT(level != page_size, + TEST_ASSERT(current_level != target_level, "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", - page_size, vaddr); + current_level, vaddr); TEST_ASSERT(!(*pte & PTE_LARGE_MASK), "Cannot create page table at level: %u, vaddr: 0x%lx\n", - level, vaddr); + current_level, vaddr); } return pte; } -void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - enum x86_page_size page_size) +void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) { - const uint64_t pg_size = 1ull << ((page_size * 9) + 12); + const uint64_t pg_size = PG_LEVEL_SIZE(level); uint64_t *pml4e, *pdpe, *pde; uint64_t *pte; @@ -222,20 +221,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, * early if a hugepage was created. */ pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, - vaddr, paddr, 3, page_size); + vaddr, paddr, PG_LEVEL_512G, level); if (*pml4e & PTE_LARGE_MASK) return; - pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size); + pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); if (*pdpe & PTE_LARGE_MASK) return; - pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size); + pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); if (*pde & PTE_LARGE_MASK) return; /* Fill in page table entry. */ - pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0); + pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); @@ -243,7 +242,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { - __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K); + __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); } static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index d089d8b850b5..b77a01d0a271 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -198,6 +198,16 @@ bool load_vmcs(struct vmx_pages *vmx) return true; } +static bool ept_vpid_cap_supported(uint64_t mask) +{ + return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask; +} + +bool ept_1g_pages_supported(void) +{ + return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES); +} + /* * Initialize the control fields to the most basic settings possible. */ @@ -215,7 +225,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) struct eptPageTablePointer eptp = { .memory_type = VMX_BASIC_MEM_TYPE_WB, .page_walk_length = 3, /* + 1 */ - .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS), + .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, }; @@ -392,80 +402,93 @@ void nested_vmx_check_supported(void) } } -void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr) +static void nested_create_pte(struct kvm_vm *vm, + struct eptPageTableEntry *pte, + uint64_t nested_paddr, + uint64_t paddr, + int current_level, + int target_level) +{ + if (!pte->readable) { + pte->writable = true; + pte->readable = true; + pte->executable = true; + pte->page_size = (current_level == target_level); + if (pte->page_size) + pte->address = paddr >> vm->page_shift; + else + pte->address = vm_alloc_page_table(vm) >> vm->page_shift; + } else { + /* + * Entry already present. Assert that the caller doesn't want + * a hugepage at this level, and that there isn't a hugepage at + * this level. + */ + TEST_ASSERT(current_level != target_level, + "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n", + current_level, nested_paddr); + TEST_ASSERT(!pte->page_size, + "Cannot create page table at level: %u, nested_paddr: 0x%lx\n", + current_level, nested_paddr); + } +} + + +void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, int target_level) { - uint16_t index[4]; - struct eptPageTableEntry *pml4e; + const uint64_t page_size = PG_LEVEL_SIZE(target_level); + struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; + uint16_t index; TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); - TEST_ASSERT((nested_paddr % vm->page_size) == 0, + TEST_ASSERT((nested_paddr >> 48) == 0, + "Nested physical address 0x%lx requires 5-level paging", + nested_paddr); + TEST_ASSERT((nested_paddr % page_size) == 0, "Nested physical address not on page boundary,\n" - " nested_paddr: 0x%lx vm->page_size: 0x%x", - nested_paddr, vm->page_size); + " nested_paddr: 0x%lx page_size: 0x%lx", + nested_paddr, page_size); TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, "Physical address beyond beyond maximum supported,\n" " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", paddr, vm->max_gfn, vm->page_size); - TEST_ASSERT((paddr % vm->page_size) == 0, + TEST_ASSERT((paddr % page_size) == 0, "Physical address not on page boundary,\n" - " paddr: 0x%lx vm->page_size: 0x%x", - paddr, vm->page_size); + " paddr: 0x%lx page_size: 0x%lx", + paddr, page_size); TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, "Physical address beyond beyond maximum supported,\n" " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", paddr, vm->max_gfn, vm->page_size); - index[0] = (nested_paddr >> 12) & 0x1ffu; - index[1] = (nested_paddr >> 21) & 0x1ffu; - index[2] = (nested_paddr >> 30) & 0x1ffu; - index[3] = (nested_paddr >> 39) & 0x1ffu; - - /* Allocate page directory pointer table if not present. */ - pml4e = vmx->eptp_hva; - if (!pml4e[index[3]].readable) { - pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift; - pml4e[index[3]].writable = true; - pml4e[index[3]].readable = true; - pml4e[index[3]].executable = true; - } + for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { + index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; + pte = &pt[index]; - /* Allocate page directory table if not present. */ - struct eptPageTableEntry *pdpe; - pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); - if (!pdpe[index[2]].readable) { - pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift; - pdpe[index[2]].writable = true; - pdpe[index[2]].readable = true; - pdpe[index[2]].executable = true; - } + nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); - /* Allocate page table if not present. */ - struct eptPageTableEntry *pde; - pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); - if (!pde[index[1]].readable) { - pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift; - pde[index[1]].writable = true; - pde[index[1]].readable = true; - pde[index[1]].executable = true; - } + if (pte->page_size) + break; - /* Fill in page table entry. */ - struct eptPageTableEntry *pte; - pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); - pte[index[0]].address = paddr >> vm->page_shift; - pte[index[0]].writable = true; - pte[index[0]].readable = true; - pte[index[0]].executable = true; + pt = addr_gpa2hva(vm, pte->address * vm->page_size); + } /* * For now mark these as accessed and dirty because the only * testcase we have needs that. Can be reconsidered later. */ - pte[index[0]].accessed = true; - pte[index[0]].dirty = true; + pte->accessed = true; + pte->dirty = true; + +} + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr) +{ + __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); } /* @@ -476,7 +499,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, * nested_paddr - Nested guest physical address to map * paddr - VM Physical Address * size - The size of the range to map - * eptp_memslot - Memory region slot for new virtual translation tables + * level - The level at which to map the range * * Output Args: None * @@ -485,22 +508,29 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, * Within the VM given by vm, creates a nested guest translation for the * page range starting at nested_paddr to the page range starting at paddr. */ -void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, - uint64_t nested_paddr, uint64_t paddr, uint64_t size) +void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + int level) { - size_t page_size = vm->page_size; + size_t page_size = PG_LEVEL_SIZE(level); size_t npages = size / page_size; TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); while (npages--) { - nested_pg_map(vmx, vm, nested_paddr, paddr); + __nested_pg_map(vmx, vm, nested_paddr, paddr, level); nested_paddr += page_size; paddr += page_size; } } +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size) +{ + __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); +} + /* Prepare an identity extended page table that maps all the * physical pages in VM. */ @@ -525,6 +555,13 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, } } +/* Identity map a region with 1GiB Pages. */ +void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t addr, uint64_t size) +{ + __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); +} + void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot) { diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 3875c4b23a04..15f046e19cb2 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -244,7 +244,7 @@ int main(int argc, char *argv[]) #ifdef __x86_64__ /* Identity map memory in the guest using 1gb pages. */ for (i = 0; i < slot_size; i += size_1gb) - __virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G); + __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); #else for (i = 0; i < slot_size; i += vm_get_page_size(vm)) virt_pg_map(vm, gpa + i, gpa + i); diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c index da2325fcad87..bdecd532f935 100644 --- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c @@ -35,7 +35,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val) run = vcpu_state(vm, VCPU_ID); /* Map 1gb page without a backing memlot. */ - __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G); + __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G); r = _vcpu_run(vm, VCPU_ID); diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 2a2d240cdc1b..1a5cc3cd97ec 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -7,10 +7,31 @@ else ifneq ($(filter -%,$(LLVM)),) LLVM_SUFFIX := $(LLVM) endif -CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) +CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi +CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu +CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl +CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu +CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu +CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu +CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu +CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu +CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu +CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH)) + +ifeq ($(CROSS_COMPILE),) +ifeq ($(CLANG_TARGET_FLAGS),) +$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk +else +CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS) +endif # CLANG_TARGET_FLAGS +else +CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif # CROSS_COMPILE + +CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as else CC := $(CROSS_COMPILE)gcc -endif +endif # LLVM ifeq (0,$(MAKELEVEL)) ifeq ($(OUTPUT),) diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index b984f8c8d523..a29f79618934 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -37,4 +37,3 @@ gro ioam6_parser toeplitz cmsg_sender -bind_bhash_test diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 464df13831f2..7ea54af55490 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -59,7 +59,6 @@ TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen TEST_PROGS += test_vxlan_vnifiltering.sh -TEST_GEN_FILES += bind_bhash_test TEST_FILES := settings @@ -70,5 +69,4 @@ include bpf/Makefile $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -$(OUTPUT)/bind_bhash_test: LDLIBS += -lpthread $(OUTPUT)/tcp_inq: LDLIBS += -lpthread diff --git a/tools/testing/selftests/net/bind_bhash_test.c b/tools/testing/selftests/net/bind_bhash_test.c deleted file mode 100644 index 252e73754e76..000000000000 --- a/tools/testing/selftests/net/bind_bhash_test.c +++ /dev/null @@ -1,119 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This times how long it takes to bind to a port when the port already - * has multiple sockets in its bhash table. - * - * In the setup(), we populate the port's bhash table with - * MAX_THREADS * MAX_CONNECTIONS number of entries. - */ - -#include <unistd.h> -#include <stdio.h> -#include <netdb.h> -#include <pthread.h> - -#define MAX_THREADS 600 -#define MAX_CONNECTIONS 40 - -static const char *bind_addr = "::1"; -static const char *port; - -static int fd_array[MAX_THREADS][MAX_CONNECTIONS]; - -static int bind_socket(int opt, const char *addr) -{ - struct addrinfo *res, hint = {}; - int sock_fd, reuse = 1, err; - - sock_fd = socket(AF_INET6, SOCK_STREAM, 0); - if (sock_fd < 0) { - perror("socket fd err"); - return -1; - } - - hint.ai_family = AF_INET6; - hint.ai_socktype = SOCK_STREAM; - - err = getaddrinfo(addr, port, &hint, &res); - if (err) { - perror("getaddrinfo failed"); - return -1; - } - - if (opt) { - err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse)); - if (err) { - perror("setsockopt failed"); - return -1; - } - } - - err = bind(sock_fd, res->ai_addr, res->ai_addrlen); - if (err) { - perror("failed to bind to port"); - return -1; - } - - return sock_fd; -} - -static void *setup(void *arg) -{ - int sock_fd, i; - int *array = (int *)arg; - - for (i = 0; i < MAX_CONNECTIONS; i++) { - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); - if (sock_fd < 0) - return NULL; - array[i] = sock_fd; - } - - return NULL; -} - -int main(int argc, const char *argv[]) -{ - int listener_fd, sock_fd, i, j; - pthread_t tid[MAX_THREADS]; - clock_t begin, end; - - if (argc != 2) { - printf("Usage: listener <port>\n"); - return -1; - } - - port = argv[1]; - - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); - if (listen(listener_fd, 100) < 0) { - perror("listen failed"); - return -1; - } - - /* Set up threads to populate the bhash table entry for the port */ - for (i = 0; i < MAX_THREADS; i++) - pthread_create(&tid[i], NULL, setup, fd_array[i]); - - for (i = 0; i < MAX_THREADS; i++) - pthread_join(tid[i], NULL); - - begin = clock(); - - /* Bind to the same port on a different address */ - sock_fd = bind_socket(0, "2001:0db8:0:f101::1"); - - end = clock(); - - printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC); - - /* clean up */ - close(sock_fd); - close(listener_fd); - for (i = 0; i < MAX_THREADS; i++) { - for (j = 0; i < MAX_THREADS; i++) - close(fd_array[i][j]); - } - - return 0; -} diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 54701c8b0cd7..03b586760164 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -70,6 +70,10 @@ NSB_LO_IP6=2001:db8:2::2 NL_IP=172.17.1.1 NL_IP6=2001:db8:4::1 +# multicast and broadcast addresses +MCAST_IP=224.0.0.1 +BCAST_IP=255.255.255.255 + MD5_PW=abc123 MD5_WRONG_PW=abc1234 @@ -308,6 +312,9 @@ addr2str() 127.0.0.1) echo "loopback";; ::1) echo "IPv6 loopback";; + ${BCAST_IP}) echo "broadcast";; + ${MCAST_IP}) echo "multicast";; + ${NSA_IP}) echo "ns-A IP";; ${NSA_IP6}) echo "ns-A IPv6";; ${NSA_LO_IP}) echo "ns-A loopback IP";; @@ -1793,12 +1800,33 @@ ipv4_addr_bind_novrf() done # - # raw socket with nonlocal bind + # tests for nonlocal bind # a=${NL_IP} log_start - run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b - log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind" + run_cmd nettest -s -R -f -l ${a} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address" + + log_start + run_cmd nettest -s -f -l ${a} -b + log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address" + + log_start + run_cmd nettest -s -D -P icmp -f -l ${a} -b + log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address" + + # + # check that ICMP sockets cannot bind to broadcast and multicast addresses + # + a=${BCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -b + log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address" + + a=${MCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -b + log_test_addr ${a} $? 1 "ICMP socket bind to multicast address" # # tcp sockets @@ -1850,13 +1878,34 @@ ipv4_addr_bind_vrf() log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind" # - # raw socket with nonlocal bind + # tests for nonlocal bind # a=${NL_IP} log_start - run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b + run_cmd nettest -s -R -f -l ${a} -I ${VRF} -b log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind" + log_start + run_cmd nettest -s -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address after VRF bind" + + log_start + run_cmd nettest -s -D -P icmp -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address after VRF bind" + + # + # check that ICMP sockets cannot bind to broadcast and multicast addresses + # + a=${BCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind" + + a=${MCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind" + # # tcp sockets # @@ -1889,10 +1938,12 @@ ipv4_addr_bind() log_subsection "No VRF" setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null ipv4_addr_bind_novrf log_subsection "With VRF" setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null ipv4_addr_bind_vrf } diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index b35010cc7f6a..a6991877e50c 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -31,7 +31,7 @@ BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" - ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh pktgen/pktgen_bench_xmit_mode_netif_receive.sh" # Definition of set types: diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 2fcf24312da8..f5e4e0bbd081 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -54,6 +54,7 @@ static int ksm_write_sysfs(const char *file_path, unsigned long val) } if (fprintf(f, "%lu", val) < 0) { perror("fprintf"); + fclose(f); return 1; } fclose(f); @@ -72,6 +73,7 @@ static int ksm_read_sysfs(const char *file_path, unsigned long *val) } if (fscanf(f, "%lu", val) != 1) { perror("fscanf"); + fclose(f); return 1; } fclose(f); |