From ed938a4bfc58cd86eddcb4b1c9d582a435ffcf7b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 4 Mar 2021 17:44:07 +0000 Subject: arm64: defconfig: Use DEBUG_INFO_REDUCED We've had DEBUG_INFO enabled for arm64 defconfigs since the initial commit. This is probably not that frequently used but substantially inflates the size of the build tree and amount of I/O needed during the build. This was causing issues with storage usage in some automated CI environments which don't expect defconfigs to be quite this big, and generally increases the resource consumption for both them and people doing local builds. The main use case for the debug info is decoding things with scripts/faddr2line but that doesn't need the full DEBUG_INFO, DEBUG_INFO_REDUCED is enough for it, so enable that by default. Without this patch my build tree is 6.8G, with it the size drops to 2G (smaller than the 6.4G for allmodconfig!). Suggested-by: Catalin Marinas Reported-by: Guillaume Tucker Signed-off-by: Mark Brown Acked-by: Kevin Hilman Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210304174407.17537-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index d612f633b771..8793a9cb9d4b 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1156,6 +1156,7 @@ CONFIG_CRYPTO_DEV_HISI_TRNG=m CONFIG_CMA_SIZE_MBYTES=32 CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_REDUCED=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y -- cgit v1.2.3 From 98c5ec77c7c5e19e2112825525db5cede8d3f939 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 25 Feb 2021 05:56:59 -0800 Subject: ARM64: enable GENERIC_FIND_FIRST_BIT ARM64 doesn't implement find_first_{zero}_bit in arch code and doesn't enable it in a config. It leads to using find_next_bit() which is less efficient: 0000000000000000 : 0: aa0003e4 mov x4, x0 4: aa0103e0 mov x0, x1 8: b4000181 cbz x1, 38 c: f9400083 ldr x3, [x4] 10: d2800802 mov x2, #0x40 // #64 14: 91002084 add x4, x4, #0x8 18: b40000c3 cbz x3, 30 1c: 14000008 b 3c 20: f8408483 ldr x3, [x4], #8 24: 91010045 add x5, x2, #0x40 28: b50000c3 cbnz x3, 40 2c: aa0503e2 mov x2, x5 30: eb02001f cmp x0, x2 34: 54ffff68 b.hi 20 // b.pmore 38: d65f03c0 ret 3c: d2800002 mov x2, #0x0 // #0 40: dac00063 rbit x3, x3 44: dac01063 clz x3, x3 48: 8b020062 add x2, x3, x2 4c: eb02001f cmp x0, x2 50: 9a829000 csel x0, x0, x2, ls // ls = plast 54: d65f03c0 ret ... 0000000000000118 <_find_next_bit.constprop.1>: 118: eb02007f cmp x3, x2 11c: 540002e2 b.cs 178 <_find_next_bit.constprop.1+0x60> // b.hs, b.nlast 120: d346fc66 lsr x6, x3, #6 124: f8667805 ldr x5, [x0, x6, lsl #3] 128: b4000061 cbz x1, 134 <_find_next_bit.constprop.1+0x1c> 12c: f8667826 ldr x6, [x1, x6, lsl #3] 130: 8a0600a5 and x5, x5, x6 134: ca0400a6 eor x6, x5, x4 138: 92800005 mov x5, #0xffffffffffffffff // #-1 13c: 9ac320a5 lsl x5, x5, x3 140: 927ae463 and x3, x3, #0xffffffffffffffc0 144: ea0600a5 ands x5, x5, x6 148: 54000120 b.eq 16c <_find_next_bit.constprop.1+0x54> // b.none 14c: 1400000e b 184 <_find_next_bit.constprop.1+0x6c> 150: d346fc66 lsr x6, x3, #6 154: f8667805 ldr x5, [x0, x6, lsl #3] 158: b4000061 cbz x1, 164 <_find_next_bit.constprop.1+0x4c> 15c: f8667826 ldr x6, [x1, x6, lsl #3] 160: 8a0600a5 and x5, x5, x6 164: eb05009f cmp x4, x5 168: 540000c1 b.ne 180 <_find_next_bit.constprop.1+0x68> // b.any 16c: 91010063 add x3, x3, #0x40 170: eb03005f cmp x2, x3 174: 54fffee8 b.hi 150 <_find_next_bit.constprop.1+0x38> // b.pmore 178: aa0203e0 mov x0, x2 17c: d65f03c0 ret 180: ca050085 eor x5, x4, x5 184: dac000a5 rbit x5, x5 188: dac010a5 clz x5, x5 18c: 8b0300a3 add x3, x5, x3 190: eb03005f cmp x2, x3 194: 9a839042 csel x2, x2, x3, ls // ls = plast 198: aa0203e0 mov x0, x2 19c: d65f03c0 ret ... 0000000000000238 : 238: a9bf7bfd stp x29, x30, [sp, #-16]! 23c: aa0203e3 mov x3, x2 240: d2800004 mov x4, #0x0 // #0 244: aa0103e2 mov x2, x1 248: 910003fd mov x29, sp 24c: d2800001 mov x1, #0x0 // #0 250: 97ffffb2 bl 118 <_find_next_bit.constprop.1> 254: a8c17bfd ldp x29, x30, [sp], #16 258: d65f03c0 ret Enabling find_{first,next}_bit() would also benefit for_each_{set,clear}_bit(). On A-53 find_first_bit() is almost twice faster than find_next_bit(), according to lib/find_bit_benchmark (thanks to Alexey for testing): GENERIC_FIND_FIRST_BIT=n: [7126084.948181] find_first_bit: 47389224 ns, 16357 iterations [7126085.032315] find_first_bit: 19048193 ns, 655 iterations GENERIC_FIND_FIRST_BIT=y: [ 84.158068] find_first_bit: 27193319 ns, 16406 iterations [ 84.233005] find_first_bit: 11082437 ns, 656 iterations GENERIC_FIND_FIRST_BIT=n bloats the kernel despite that it disables generation of find_{first,next}_bit(): yury:linux$ scripts/bloat-o-meter vmlinux vmlinux.ffb add/remove: 4/1 grow/shrink: 19/251 up/down: 564/-1692 (-1128) ... Overall, GENERIC_FIND_FIRST_BIT=n is harmful both in terms of performance and code size, and it's better to have GENERIC_FIND_FIRST_BIT enabled. Tested-by: Alexey Klimov Signed-off-by: Yury Norov Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210225135700.1381396-2-yury.norov@gmail.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..55d1c22f107c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -108,6 +108,7 @@ config ARM64 select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP + select GENERIC_FIND_FIRST_BIT select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_IPI select GENERIC_IRQ_MULTI_HANDLER -- cgit v1.2.3 From c1fd78a77704318de34ba4b780cd99a5ef96d162 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 11:49:40 +0100 Subject: arm64: mm: add missing P4D definitions and use them consistently Even though level 0, 1 and 2 descriptors share the same attribute encodings, let's be a bit more consistent about using the right one at the right level. So add new macros for level 0/P4D definitions, and clean up some inconsistencies involving these macros. Acked-by: Mark Rutland Reviewed-by: Anshuman Khandual Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210310104942.174584-2-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgalloc.h | 4 ++-- arch/arm64/include/asm/pgtable-hwdef.h | 9 +++++++++ arch/arm64/mm/kasan_init.c | 4 ++-- arch/arm64/mm/mmu.c | 6 +++--- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 3c6a7f5988b1..27cc643d0509 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,7 +27,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE); + __pud_populate(pudp, __pa(pmdp), PUD_TYPE_TABLE); } #else static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) @@ -45,7 +45,7 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - __p4d_populate(p4dp, __pa(pudp), PUD_TYPE_TABLE); + __p4d_populate(p4dp, __pa(pudp), P4D_TYPE_TABLE); } #else static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 42442a0ae2ab..e64e77a345b2 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -94,6 +94,15 @@ /* * Hardware page table definitions. * + * Level 0 descriptor (P4D). + */ +#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) +#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1) +#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) +#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) +#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ + +/* * Level 1 descriptor (PUD). */ #define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index d8e66c78440e..9fe40cbbd8c0 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -79,7 +79,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); - __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); } return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr); @@ -92,7 +92,7 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); - __p4d_populate(p4dp, pud_phys, PMD_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE); } return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7484ea4f6ba0..82da0cd78326 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -309,7 +309,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, phys_addr_t pud_phys; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); - __p4d_populate(p4dp, pud_phys, PUD_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE); p4d = READ_ONCE(*p4dp); } BUG_ON(p4d_bad(p4d)); @@ -1210,11 +1210,11 @@ void __init early_fixmap_init(void) pudp = pud_offset_kimg(p4dp, addr); } else { if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE); + __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); pudp = fixmap_pud(addr); } if (pud_none(READ_ONCE(*pudp))) - __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); + __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); pmdp = fixmap_pmd(addr); __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); -- cgit v1.2.3 From 87143f404f338dbf0b51374ea48ab05e598af564 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 11:49:41 +0100 Subject: arm64: mm: use XN table mapping attributes for the linear region The way the arm64 kernel virtual address space is constructed guarantees that swapper PGD entries are never shared between the linear region on the one hand, and the vmalloc region on the other, which is where all kernel text, module text and BPF text mappings reside. This means that mappings in the linear region (which never require executable permissions) never share any table entries at any level with mappings that do require executable permissions, and so we can set the table-level PXN attributes for all table entries that are created while setting up mappings in the linear region. Since swapper's PGD level page table is mapped r/o itself, this adds another layer of robustness to the way the kernel manages its own page tables. While at it, set the UXN attribute as well for all kernel mappings created at boot. Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20210310104942.174584-3-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable-hwdef.h | 6 ++++++ arch/arm64/mm/mmu.c | 37 +++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index e64e77a345b2..b82575a33f8b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -101,6 +101,8 @@ #define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) #define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) #define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ +#define P4D_TABLE_PXN (_AT(p4dval_t, 1) << 59) +#define P4D_TABLE_UXN (_AT(p4dval_t, 1) << 60) /* * Level 1 descriptor (PUD). @@ -110,6 +112,8 @@ #define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0) #define PUD_SECT_RDONLY (_AT(pudval_t, 1) << 7) /* AP[2] */ +#define PUD_TABLE_PXN (_AT(pudval_t, 1) << 59) +#define PUD_TABLE_UXN (_AT(pudval_t, 1) << 60) /* * Level 2 descriptor (PMD). @@ -131,6 +135,8 @@ #define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52) #define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53) #define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54) +#define PMD_TABLE_PXN (_AT(pmdval_t, 1) << 59) +#define PMD_TABLE_UXN (_AT(pmdval_t, 1) << 60) /* * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 82da0cd78326..fac957ff5187 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -39,6 +39,7 @@ #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) +#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; @@ -185,10 +186,14 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, BUG_ON(pmd_sect(pmd)); if (pmd_none(pmd)) { + pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN; phys_addr_t pte_phys; + + if (flags & NO_EXEC_MAPPINGS) + pmdval |= PMD_TABLE_PXN; BUG_ON(!pgtable_alloc); pte_phys = pgtable_alloc(PAGE_SHIFT); - __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE); + __pmd_populate(pmdp, pte_phys, pmdval); pmd = READ_ONCE(*pmdp); } BUG_ON(pmd_bad(pmd)); @@ -259,10 +264,14 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, */ BUG_ON(pud_sect(pud)); if (pud_none(pud)) { + pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN; phys_addr_t pmd_phys; + + if (flags & NO_EXEC_MAPPINGS) + pudval |= PUD_TABLE_PXN; BUG_ON(!pgtable_alloc); pmd_phys = pgtable_alloc(PMD_SHIFT); - __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, pudval); pud = READ_ONCE(*pudp); } BUG_ON(pud_bad(pud)); @@ -306,10 +315,14 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, p4d_t p4d = READ_ONCE(*p4dp); if (p4d_none(p4d)) { + p4dval_t p4dval = P4D_TYPE_TABLE | P4D_TABLE_UXN; phys_addr_t pud_phys; + + if (flags & NO_EXEC_MAPPINGS) + p4dval |= P4D_TABLE_PXN; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); - __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, p4dval); p4d = READ_ONCE(*p4dp); } BUG_ON(p4d_bad(p4d)); @@ -486,14 +499,24 @@ early_param("crashkernel", enable_crash_mem_map); static void __init map_mem(pgd_t *pgdp) { + static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); phys_addr_t kernel_start = __pa_symbol(_stext); phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t start, end; - int flags = 0; + int flags = NO_EXEC_MAPPINGS; u64 i; + /* + * Setting hierarchical PXNTable attributes on table entries covering + * the linear region is only possible if it is guaranteed that no table + * entries at any level are being shared between the linear region and + * the vmalloc region. Check whether this is true for the PGD level, in + * which case it is guaranteed to be true for all other levels as well. + */ + BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); + if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) - flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* * Take care not to create a writable alias for the @@ -1463,7 +1486,7 @@ struct range arch_get_mappable_range(void) int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { - int ret, flags = 0; + int ret, flags = NO_EXEC_MAPPINGS; VM_BUG_ON(!mhp_range_allowed(start, size, true)); @@ -1473,7 +1496,7 @@ int arch_add_memory(int nid, u64 start, u64 size, */ if (rodata_full || debug_pagealloc_enabled() || IS_ENABLED(CONFIG_KFENCE)) - flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), size, params->pgprot, __pgd_pgtable_alloc, -- cgit v1.2.3 From 59511cfd08f32d0e9d363ffa0fdaaa75afdc52b1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Mar 2021 11:49:42 +0100 Subject: arm64: mm: use XN table mapping attributes for user/kernel mappings As the kernel and user space page tables are strictly mutually exclusive when it comes to executable permissions, we can set the UXN table attribute on all table entries that are created while creating kernel mappings in the swapper page tables, and the PXN table attribute on all table entries that are created while creating user space mappings in user space page tables. While at it, get rid of a redundant comment. Reviewed-by: Anshuman Khandual Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210310104942.174584-4-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgalloc.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 27cc643d0509..31fbab3d6f99 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,7 +27,10 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - __pud_populate(pudp, __pa(pmdp), PUD_TYPE_TABLE); + pudval_t pudval = PUD_TYPE_TABLE; + + pudval |= (mm == &init_mm) ? PUD_TABLE_UXN : PUD_TABLE_PXN; + __pud_populate(pudp, __pa(pmdp), pudval); } #else static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) @@ -45,7 +48,10 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - __p4d_populate(p4dp, __pa(pudp), P4D_TYPE_TABLE); + p4dval_t p4dval = P4D_TYPE_TABLE; + + p4dval |= (mm == &init_mm) ? P4D_TABLE_UXN : P4D_TABLE_PXN; + __p4d_populate(p4dp, __pa(pudp), p4dval); } #else static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) @@ -70,16 +76,15 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep) { - /* - * The pmd must be loaded with the physical address of the PTE table - */ - __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE); + VM_BUG_ON(mm != &init_mm); + __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep) { - __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE); + VM_BUG_ON(mm == &init_mm); + __pmd_populate(pmdp, page_to_phys(ptep), PMD_TYPE_TABLE | PMD_TABLE_PXN); } #define pmd_pgtable(pmd) pmd_page(pmd) -- cgit v1.2.3 From 4a423645bc2690376a7a94b4bb7b2f74bc6206ff Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:24 +0000 Subject: kselftest/arm64: mte: Fix compilation with native compiler The mte selftest Makefile contains a check for GCC, to add the memtag -march flag to the compiler options. This check fails if the compiler is not explicitly specified, so reverts to the standard "cc", in which case --version doesn't mention the "gcc" string we match against: $ cc --version | head -n 1 cc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0 This will not add the -march switch to the command line, so compilation fails: mte_helper.S: Assembler messages: mte_helper.S:25: Error: selected processor does not support `irg x0,x0,xzr' mte_helper.S:38: Error: selected processor does not support `gmi x1,x0,xzr' ... Actually clang accepts the same -march option as well, so we can just drop this check and add this unconditionally to the command line, to avoid any future issues with this check altogether (gcc actually prints basename(argv[0]) when called with --version). Signed-off-by: Andre Przywara Reviewed-by: Nick Desaulniers Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-2-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index 0b3af552632a..df15d44aeb8d 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -6,9 +6,7 @@ SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) PROGS := $(patsubst %.c,%,$(SRCS)) #Add mte compiler option -ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),) CFLAGS += -march=armv8.5-a+memtag -endif #check if the compiler works well mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) -- cgit v1.2.3 From e5decefd884da1c2c6ea18fca17b80b189afcc43 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:25 +0000 Subject: kselftest/arm64: mte: Fix pthread linking The GCC manual suggests to use -pthread, when linking with the PThread library, also to add this switch to both the compilation and linking stages. Do as the manual says, to fix compilation with Ubuntu's 20.04 toolchain, which was getting -lpthread too early on the command line: ------------ /usr/bin/ld: /tmp/cc5zbo2A.o: in function `execute_test': tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c:86: undefined reference to `pthread_create' /usr/bin/ld: tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c:90: undefined reference to `pthread_join' ------------ Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-3-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index df15d44aeb8d..90aadd86fa0d 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 ARM Limited -CFLAGS += -std=gnu99 -I. -lpthread +CFLAGS += -std=gnu99 -I. -pthread +LDFLAGS += -pthread SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) PROGS := $(patsubst %.c,%,$(SRCS)) -- cgit v1.2.3 From 31c88729a7ad2b9871e6a73e491ec7223880d633 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:26 +0000 Subject: kselftest/arm64: mte: ksm_options: Fix fscanf warning Out of the box Ubuntu's 20.04 compiler warns about missing return value checks for fscanf() calls. Make GCC happy by checking whether we actually parsed one integer. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-4-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/check_ksm_options.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index 3b23c4d61d38..88c74bc46d4f 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -33,7 +33,10 @@ static unsigned long read_sysfs(char *str) ksft_print_msg("ERR: missing %s\n", str); return 0; } - fscanf(f, "%lu", &val); + if (fscanf(f, "%lu", &val) != 1) { + ksft_print_msg("ERR: parsing %s\n", str); + val = 0; + } fclose(f); return val; } -- cgit v1.2.3 From 46cb11b17c7a917e0eb5c7aa87a7bc6cda455a5e Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:27 +0000 Subject: kselftest/arm64: mte: user_mem: Fix write() warning Out of the box Ubuntu's 20.04 compiler warns about missing return value checks for write() (sys)calls. Make GCC happy by checking whether we actually managed to write "val". Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-5-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/check_user_mem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 4bfa80f2a8c3..1de7a0abd0ae 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -33,7 +33,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping) if (fd == -1) return KSFT_FAIL; for (i = 0; i < len; i++) - write(fd, &val, sizeof(val)); + if (write(fd, &val, sizeof(val)) != sizeof(val)) + return KSFT_FAIL; lseek(fd, 0, 0); ptr = mte_allocate_memory(len, mem_type, mapping, true); if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) { -- cgit v1.2.3 From d302a702530b4025fbb14f20e637badce28bc741 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:28 +0000 Subject: kselftest/arm64: mte: common: Fix write() warnings Out of the box Ubuntu's 20.04 compiler warns about missing return value checks for write() (sys)calls. Make GCC happy by checking whether we actually managed to write out our buffer. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-6-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- .../testing/selftests/arm64/mte/mte_common_util.c | 23 +++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 39f8908988ea..4e887dad762d 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -181,10 +181,17 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags } /* Initialize the file for mappable size */ lseek(fd, 0, SEEK_SET); - for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) - write(fd, buffer, INIT_BUFFER_SIZE); + for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) { + if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { + perror("initialising buffer"); + return NULL; + } + } index -= INIT_BUFFER_SIZE; - write(fd, buffer, size - index); + if (write(fd, buffer, size - index) != size - index) { + perror("initialising buffer"); + return NULL; + } return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); } @@ -202,9 +209,15 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, /* Initialize the file for mappable size */ lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) - write(fd, buffer, INIT_BUFFER_SIZE); + if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { + perror("initialising buffer"); + return NULL; + } index -= INIT_BUFFER_SIZE; - write(fd, buffer, map_size - index); + if (write(fd, buffer, map_size - index) != map_size - index) { + perror("initialising buffer"); + return NULL; + } return __mte_allocate_memory_range(size, mem_type, mapping, range_before, range_after, true, fd); } -- cgit v1.2.3 From 592432862cc4019075a7196d9961562c49507d6f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:29 +0000 Subject: kselftest/arm64: mte: Fix MTE feature detection To check whether the CPU and kernel support the MTE features we want to test, we use an (emulated) CPU ID register read. However we only check against a very particular feature version (0b0010), even though the ARM ARM promises ID register features to be backwards compatible. While this could be fixed by using ">=" instead of "==", we should actually use the explicit HWCAP2_MTE hardware capability, exposed by the kernel via the ELF auxiliary vectors. That moves this responsibility to the kernel, and fixes running the tests on machines with FEAT_MTE3 capability. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-7-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/mte_common_util.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 4e887dad762d..aa8a8a6b8b6d 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -291,22 +291,13 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) return 0; } -#define ID_AA64PFR1_MTE_SHIFT 8 -#define ID_AA64PFR1_MTE 2 - int mte_default_setup(void) { - unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long hwcaps2 = getauxval(AT_HWCAP2); unsigned long en = 0; int ret; - if (!(hwcaps & HWCAP_CPUID)) { - ksft_print_msg("FAIL: CPUID registers unavailable\n"); - return KSFT_FAIL; - } - /* Read ID_AA64PFR1_EL1 register */ - asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory"); - if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) { + if (!(hwcaps2 & HWCAP2_MTE)) { ksft_print_msg("FAIL: MTE features unavailable\n"); return KSFT_SKIP; } -- cgit v1.2.3 From 5238c2cd5a2e0dc846d1b900553211e832952b8d Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:30 +0000 Subject: kselftest/arm64: mte: Use cross-compiler if specified At the moment we either need to provide CC explicitly, or use a native machine to get the ARM64 MTE selftest compiled. It seems useful to use the same (cross-)compiler as we use for the kernel, so copy the recipe we use in the pauth selftest. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-8-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index 90aadd86fa0d..fa282e5f2069 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2020 ARM Limited +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + CFLAGS += -std=gnu99 -I. -pthread LDFLAGS += -pthread SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) -- cgit v1.2.3 From 8bbb58a3c6b9f12b2217b8ae08e70a6d2f556f73 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:31 +0000 Subject: kselftest/arm64: mte: Output warning about failing compiler At the moment we check the compiler's ability to compile MTE enabled code, but guard all the Makefile rules by it. As a consequence a broken or not capable compiler just doesn't do anything, and make happily returns without any error message, but with no programs created. Since the MTE feature is only supported by recent aarch64 compilers (not all stable distro compilers support it), having an explicit message seems like a good idea. To not break building multiple targets, we let make proceed without errors. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-9-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index fa282e5f2069..e0d43cea3cd1 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -23,6 +23,9 @@ TEST_GEN_PROGS := $(PROGS) # Get Kernel headers installed and use them. KSFT_KHDR_INSTALL := 1 +else + $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.) + $(warning test program "mte" will not be created.) endif # Include KSFT lib.mk. -- cgit v1.2.3 From 9466ecac84a462fc91d192f0f9be0afd8e4f19f9 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:32 +0000 Subject: kselftest/arm64: mte: Makefile: Fix clang compilation When clang finds a header file on the command line, it wants to precompile that, which would end up in a separate output file. Specifying -o on that same command line collides with that effort, so the compiler complains: clang: error: cannot specify -o when generating multiple output files Since we are not really after a precompiled header, just drop the header file from the command line, by removing it from the list of source files in the Makefile. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-10-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index e0d43cea3cd1..409e3e53d00a 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -32,5 +32,5 @@ endif include ../../lib.mk ifeq ($(mte_cc_support),1) -$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S +$(TEST_GEN_PROGS): mte_common_util.c mte_helper.S endif -- cgit v1.2.3 From b4e1fa2290691fda4392ac479115ee3b04a7534c Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:33 +0000 Subject: kselftest/arm64: mte: Fix clang warning if (!prctl(...) == 0) is not only cumbersome to read, it also upsets clang and triggers a warning: ------------ mte_common_util.c:287:6: warning: logical not is only applied to the left hand side of this comparison [-Wlogical-not-parentheses] .... Fix that by just comparing against "not 0" instead. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-11-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/mte_common_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index aa8a8a6b8b6d..040abdca079d 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -284,7 +284,7 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) en |= (incl_mask << PR_MTE_TAG_SHIFT); /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ - if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) { + if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) { ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n"); return -EINVAL; } -- cgit v1.2.3 From 75347add03e0fa60ecf2f79e41ec2152b8504593 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 19 Mar 2021 16:53:34 +0000 Subject: kselftest/arm64: mte: Report filename on failing temp file creation The MTE selftests create temporary files in /dev/shm, for later mmap-ing them. When there is no tmpfs mounted on /dev/shm, or /dev/shm does not exist in the first place (on minimal filesystems), the error message is not giving good hints: # FAIL: Unable to open temporary file # FAIL: memory allocation not ok 17 Check initial tags with private mapping, ... Add a perror() call, that gives both the filename and the actual error reason, so that users get a chance of correcting that. Signed-off-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20210319165334.29213-12-andre.przywara@arm.com Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/mte/mte_common_util.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 040abdca079d..f50ac31920d1 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -337,6 +337,7 @@ int create_temp_file(void) /* Create a file in the tmpfs filesystem */ fd = mkstemp(&filename[0]); if (fd == -1) { + perror(filename); ksft_print_msg("FAIL: Unable to open temporary file\n"); return 0; } -- cgit v1.2.3 From 314bcbf09f147cfb069bc22207215b6b0b7da510 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 9 Mar 2021 19:37:31 +0000 Subject: kselftest: arm64: Add BTI tests Add some tests that verify that BTI functions correctly for static binaries built with and without BTI support, verifying that SIGILL is generated when expected and is not generated in other situations. Since BTI support is still being rolled out in distributions these tests are built entirely free standing, no libc support is used at all so none of the standard helper functions for kselftest can be used and we open code everything. This also means we aren't testing the kernel support for the dynamic linker, though the test program can be readily adapted for that once it becomes something that we can reliably build and run. These tests were originally written by Dave Martin, I've adapted them for kselftest, mainly around the build system and the output format. Signed-off-by: Mark Brown Cc: Dave Martin Link: https://lore.kernel.org/r/20210309193731.57247-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/Makefile | 2 +- tools/testing/selftests/arm64/bti/.gitignore | 2 + tools/testing/selftests/arm64/bti/Makefile | 61 ++++++ tools/testing/selftests/arm64/bti/assembler.h | 80 ++++++++ tools/testing/selftests/arm64/bti/btitest.h | 23 +++ tools/testing/selftests/arm64/bti/compiler.h | 21 ++ tools/testing/selftests/arm64/bti/gen/.gitignore | 2 + tools/testing/selftests/arm64/bti/signal.c | 37 ++++ tools/testing/selftests/arm64/bti/signal.h | 21 ++ tools/testing/selftests/arm64/bti/start.S | 14 ++ tools/testing/selftests/arm64/bti/syscall.S | 23 +++ tools/testing/selftests/arm64/bti/system.c | 22 +++ tools/testing/selftests/arm64/bti/system.h | 28 +++ tools/testing/selftests/arm64/bti/test.c | 234 +++++++++++++++++++++++ tools/testing/selftests/arm64/bti/teststubs.S | 39 ++++ tools/testing/selftests/arm64/bti/trampoline.S | 29 +++ 16 files changed, 637 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/bti/.gitignore create mode 100644 tools/testing/selftests/arm64/bti/Makefile create mode 100644 tools/testing/selftests/arm64/bti/assembler.h create mode 100644 tools/testing/selftests/arm64/bti/btitest.h create mode 100644 tools/testing/selftests/arm64/bti/compiler.h create mode 100644 tools/testing/selftests/arm64/bti/gen/.gitignore create mode 100644 tools/testing/selftests/arm64/bti/signal.c create mode 100644 tools/testing/selftests/arm64/bti/signal.h create mode 100644 tools/testing/selftests/arm64/bti/start.S create mode 100644 tools/testing/selftests/arm64/bti/syscall.S create mode 100644 tools/testing/selftests/arm64/bti/system.c create mode 100644 tools/testing/selftests/arm64/bti/system.h create mode 100644 tools/testing/selftests/arm64/bti/test.c create mode 100644 tools/testing/selftests/arm64/bti/teststubs.S create mode 100644 tools/testing/selftests/arm64/bti/trampoline.S diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 2c9d012797a7..ced910fb4019 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore new file mode 100644 index 000000000000..73869fabada4 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/.gitignore @@ -0,0 +1,2 @@ +btitest +nobtitest diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile new file mode 100644 index 000000000000..73e013c082a6 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/Makefile @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := btitest nobtitest + +PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS)) + +# These tests are built as freestanding binaries since otherwise BTI +# support in ld.so is required which is not currently widespread; when +# it is available it will still be useful to test this separately as the +# cases for statically linked and dynamically lined binaries are +# slightly different. + +CFLAGS_NOBTI = -DBTI=0 +CFLAGS_BTI = -mbranch-protection=standard -DBTI=1 + +CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS) + +BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $< +NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $< + +%-bti.o: %.c + $(BTI_CC_COMMAND) + +%-bti.o: %.S + $(BTI_CC_COMMAND) + +%-nobti.o: %.c + $(NOBTI_CC_COMMAND) + +%-nobti.o: %.S + $(NOBTI_CC_COMMAND) + +BTI_OBJS = \ + test-bti.o \ + signal-bti.o \ + start-bti.o \ + syscall-bti.o \ + system-bti.o \ + teststubs-bti.o \ + trampoline-bti.o +gen/btitest: $(BTI_OBJS) + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + +NOBTI_OBJS = \ + test-nobti.o \ + signal-nobti.o \ + start-nobti.o \ + syscall-nobti.o \ + system-nobti.o \ + teststubs-nobti.o \ + trampoline-nobti.o +gen/nobtitest: $(NOBTI_OBJS) + $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^ + +# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk + +$(TEST_GEN_PROGS): $(PROGS) + cp $(PROGS) $(OUTPUT)/ diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h new file mode 100644 index 000000000000..04e7b72880ef --- /dev/null +++ b/tools/testing/selftests/arm64/bti/assembler.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */ +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) + + +.macro startfn name:req + .globl \name +\name: + .macro endfn + .size \name, . - \name + .type \name, @function + .purgem endfn + .endm +.endm + +.macro emit_aarch64_feature_1_and + .pushsection .note.gnu.property, "a" + .align 3 + .long 2f - 1f + .long 6f - 3f + .long NT_GNU_PROPERTY_TYPE_0 +1: .string "GNU" +2: + .align 3 +3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND + .long 5f - 4f +4: +#if BTI + .long GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \ + GNU_PROPERTY_AARCH64_FEATURE_1_BTI +#else + .long 0 +#endif +5: + .align 3 +6: + .popsection +.endm + +.macro paciasp + hint 0x19 +.endm + +.macro autiasp + hint 0x1d +.endm + +.macro __bti_ + hint 0x20 +.endm + +.macro __bti_c + hint 0x22 +.endm + +.macro __bti_j + hint 0x24 +.endm + +.macro __bti_jc + hint 0x26 +.endm + +.macro bti what= + __bti_\what +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h new file mode 100644 index 000000000000..2aff9b10336e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/btitest.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#ifndef BTITEST_H +#define BTITEST_H + +/* Trampolines for calling the test stubs: */ +void call_using_br_x0(void (*)(void)); +void call_using_br_x16(void (*)(void)); +void call_using_blr(void (*)(void)); + +/* Test stubs: */ +void nohint_func(void); +void bti_none_func(void); +void bti_c_func(void); +void bti_j_func(void); +void bti_jc_func(void); +void paciasp_func(void); + +#endif /* !BTITEST_H */ diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h new file mode 100644 index 000000000000..ebb6204f447a --- /dev/null +++ b/tools/testing/selftests/arm64/bti/compiler.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#ifndef COMPILER_H +#define COMPILER_H + +#define __always_unused __attribute__((__unused__)) +#define __noreturn __attribute__((__noreturn__)) +#define __unreachable() __builtin_unreachable() + +/* curse(e) has value e, but the compiler cannot assume so */ +#define curse(e) ({ \ + __typeof__(e) __curse_e = (e); \ + asm ("" : "+r" (__curse_e)); \ + __curse_e; \ +}) + +#endif /* ! COMPILER_H */ diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore new file mode 100644 index 000000000000..73869fabada4 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/gen/.gitignore @@ -0,0 +1,2 @@ +btitest +nobtitest diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c new file mode 100644 index 000000000000..f3fd29b91141 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/signal.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#include "system.h" +#include "signal.h" + +int sigemptyset(sigset_t *s) +{ + unsigned int i; + + for (i = 0; i < _NSIG_WORDS; ++i) + s->sig[i] = 0; + + return 0; +} + +int sigaddset(sigset_t *s, int n) +{ + if (n < 1 || n > _NSIG) + return -EINVAL; + + s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW; + return 0; +} + +int sigaction(int n, struct sigaction *sa, const struct sigaction *old) +{ + return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask)); +} + +int sigprocmask(int how, const sigset_t *mask, sigset_t *old) +{ + return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask)); +} diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h new file mode 100644 index 000000000000..103457dc880e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/signal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#ifndef SIGNAL_H +#define SIGNAL_H + +#include + +#include "system.h" + +typedef __sighandler_t sighandler_t; + +int sigemptyset(sigset_t *s); +int sigaddset(sigset_t *s, int n); +int sigaction(int n, struct sigaction *sa, const struct sigaction *old); +int sigprocmask(int how, const sigset_t *mask, sigset_t *old); + +#endif /* ! SIGNAL_H */ diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S new file mode 100644 index 000000000000..831f952e0572 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/start.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#include "assembler.h" + +startfn _start + mov x0, sp + b start +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S new file mode 100644 index 000000000000..8dde8b6f3db1 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/syscall.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#include "assembler.h" + +startfn syscall + bti c + mov w8, w0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + mov x3, x4 + mov x4, x5 + mov x5, x6 + mov x6, x7 + svc #0 + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c new file mode 100644 index 000000000000..6385d8d4973b --- /dev/null +++ b/tools/testing/selftests/arm64/bti/system.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#include "system.h" + +#include + +#include "compiler.h" + +void __noreturn exit(int n) +{ + syscall(__NR_exit, n); + __unreachable(); +} + +ssize_t write(int fd, const void *buf, size_t size) +{ + return syscall(__NR_write, fd, buf, size); +} diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h new file mode 100644 index 000000000000..aca118589705 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/system.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#ifndef SYSTEM_H +#define SYSTEM_H + +#include +#include + +typedef __kernel_size_t size_t; +typedef __kernel_ssize_t ssize_t; + +#include +#include +#include +#include + +#include "compiler.h" + +long syscall(int nr, ...); + +void __noreturn exit(int n); +ssize_t write(int fd, const void *buf, size_t size); + +#endif /* ! SYSTEM_H */ diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c new file mode 100644 index 000000000000..656b04976ccc --- /dev/null +++ b/tools/testing/selftests/arm64/bti/test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019,2021 Arm Limited + * Original author: Dave Martin + */ + +#include "system.h" + +#include +#include +#include +#include +#include + +typedef struct ucontext ucontext_t; + +#include "btitest.h" +#include "compiler.h" +#include "signal.h" + +#define EXPECTED_TESTS 18 + +static volatile unsigned int test_num = 1; +static unsigned int test_passed; +static unsigned int test_failed; +static unsigned int test_skipped; + +static void fdputs(int fd, const char *str) +{ + size_t len = 0; + const char *p = str; + + while (*p++) + ++len; + + write(fd, str, len); +} + +static void putstr(const char *str) +{ + fdputs(1, str); +} + +static void putnum(unsigned int num) +{ + char c; + + if (num / 10) + putnum(num / 10); + + c = '0' + (num % 10); + write(1, &c, 1); +} + +#define puttestname(test_name, trampoline_name) do { \ + putstr(test_name); \ + putstr("/"); \ + putstr(trampoline_name); \ +} while (0) + +void print_summary(void) +{ + putstr("# Totals: pass:"); + putnum(test_passed); + putstr(" fail:"); + putnum(test_failed); + putstr(" xfail:0 xpass:0 skip:"); + putnum(test_skipped); + putstr(" error:0\n"); +} + +static const char *volatile current_test_name; +static const char *volatile current_trampoline_name; +static volatile int sigill_expected, sigill_received; + +static void handler(int n, siginfo_t *si __always_unused, + void *uc_ __always_unused) +{ + ucontext_t *uc = uc_; + + putstr("# \t[SIGILL in "); + puttestname(current_test_name, current_trampoline_name); + putstr(", BTYPE="); + write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK) + >> PSR_BTYPE_SHIFT) * 2], 2); + if (!sigill_expected) { + putstr("]\n"); + putstr("not ok "); + putnum(test_num); + putstr(" "); + puttestname(current_test_name, current_trampoline_name); + putstr("(unexpected SIGILL)\n"); + print_summary(); + exit(128 + n); + } + + putstr(" (expected)]\n"); + sigill_received = 1; + /* zap BTYPE so that resuming the faulting code will work */ + uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK; +} + +static int skip_all; + +static void __do_test(void (*trampoline)(void (*)(void)), + void (*fn)(void), + const char *trampoline_name, + const char *name, + int expect_sigill) +{ + if (skip_all) { + test_skipped++; + putstr("ok "); + putnum(test_num); + putstr(" "); + puttestname(name, trampoline_name); + putstr(" # SKIP\n"); + + return; + } + + /* Branch Target exceptions should only happen in BTI binaries: */ + if (!BTI) + expect_sigill = 0; + + sigill_expected = expect_sigill; + sigill_received = 0; + current_test_name = name; + current_trampoline_name = trampoline_name; + + trampoline(fn); + + if (expect_sigill && !sigill_received) { + putstr("not ok "); + test_failed++; + } else { + putstr("ok "); + test_passed++; + } + putnum(test_num++); + putstr(" "); + puttestname(name, trampoline_name); + putstr("\n"); +} + +#define do_test(expect_sigill_br_x0, \ + expect_sigill_br_x16, \ + expect_sigill_blr, \ + name) \ +do { \ + __do_test(call_using_br_x0, name, "call_using_br_x0", #name, \ + expect_sigill_br_x0); \ + __do_test(call_using_br_x16, name, "call_using_br_x16", #name, \ + expect_sigill_br_x16); \ + __do_test(call_using_blr, name, "call_using_blr", #name, \ + expect_sigill_blr); \ +} while (0) + +void start(int *argcp) +{ + struct sigaction sa; + void *const *p; + const struct auxv_entry { + unsigned long type; + unsigned long val; + } *auxv; + unsigned long hwcap = 0, hwcap2 = 0; + + putstr("TAP version 13\n"); + putstr("1.."); + putnum(EXPECTED_TESTS); + putstr("\n"); + + /* Gross hack for finding AT_HWCAP2 from the initial process stack: */ + p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */ + /* step over environment */ + while (*p++) + ; + for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) { + switch (auxv->type) { + case AT_HWCAP: + hwcap = auxv->val; + break; + case AT_HWCAP2: + hwcap2 = auxv->val; + break; + default: + break; + } + } + + if (hwcap & HWCAP_PACA) + putstr("# HWCAP_PACA present\n"); + else + putstr("# HWCAP_PACA not present\n"); + + if (hwcap2 & HWCAP2_BTI) { + putstr("# HWCAP2_BTI present\n"); + if (!(hwcap & HWCAP_PACA)) + putstr("# Bad hardware? Expect problems.\n"); + } else { + putstr("# HWCAP2_BTI not present\n"); + skip_all = 1; + } + + putstr("# Test binary"); + if (!BTI) + putstr(" not"); + putstr(" built for BTI\n"); + + sa.sa_handler = (sighandler_t)(void *)handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(SIGILL, &sa, NULL); + sigaddset(&sa.sa_mask, SIGILL); + sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL); + + do_test(1, 1, 1, nohint_func); + do_test(1, 1, 1, bti_none_func); + do_test(1, 0, 0, bti_c_func); + do_test(0, 0, 1, bti_j_func); + do_test(0, 0, 0, bti_jc_func); + do_test(1, 0, 0, paciasp_func); + + print_summary(); + + if (test_num - 1 != EXPECTED_TESTS) + putstr("# WARNING - EXPECTED TEST COUNT WRONG\n"); + + if (test_failed) + exit(1); + else + exit(0); +} diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S new file mode 100644 index 000000000000..b62c8c35f67e --- /dev/null +++ b/tools/testing/selftests/arm64/bti/teststubs.S @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#include "assembler.h" + +startfn bti_none_func + bti + ret +endfn + +startfn bti_c_func + bti c + ret +endfn + +startfn bti_j_func + bti j + ret +endfn + +startfn bti_jc_func + bti jc + ret +endfn + +startfn paciasp_func + paciasp + autiasp + ret +endfn + +startfn nohint_func + ret +endfn + +emit_aarch64_feature_1_and diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S new file mode 100644 index 000000000000..09beb3f361f1 --- /dev/null +++ b/tools/testing/selftests/arm64/bti/trampoline.S @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 Arm Limited + * Original author: Dave Martin + */ + +#include "assembler.h" + +startfn call_using_br_x0 + bti c + br x0 +endfn + +startfn call_using_br_x16 + bti c + mov x16, x0 + br x16 +endfn + +startfn call_using_blr + paciasp + stp x29, x30, [sp, #-16]! + blr x0 + ldp x29, x30, [sp], #16 + autiasp + ret +endfn + +emit_aarch64_feature_1_and -- cgit v1.2.3 From 7cd6ca1d7902260b54528054d729f2a3b27e5a00 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:34 +0000 Subject: arm64: vdso: Use GFP_KERNEL for allocating compat vdso and signal pages There's no need to allocate the compat vDSO and signal pages using GFP_ATOMIC allocations, so use GFP_KERNEL instead. Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-2-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index cee5d04ea9ad..2d057a4dc787 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -299,7 +299,7 @@ static int aarch32_alloc_kuser_vdso_page(void) if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) return 0; - vdso_page = get_zeroed_page(GFP_ATOMIC); + vdso_page = get_zeroed_page(GFP_KERNEL); if (!vdso_page) return -ENOMEM; @@ -316,7 +316,7 @@ static int aarch32_alloc_sigpage(void) int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long sigpage; - sigpage = get_zeroed_page(GFP_ATOMIC); + sigpage = get_zeroed_page(GFP_KERNEL); if (!sigpage) return -ENOMEM; -- cgit v1.2.3 From e9be47eab1cdaf0a2a3c0af96a6a4be1cf9a95c1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:35 +0000 Subject: arm64: vdso: Remove redundant calls to flush_dcache_page() flush_dcache_page() ensures that the 'PG_dcache_clean' flag for its 'page' argument is clear so that cache maintenance will be performed if the page is mapped into userspace with execute permissions. Newly allocated pages have this flag clear, so there is no need to call flush_dcache_page() for the compat vdso or signal pages. Remove the redundant calls. Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-3-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 2d057a4dc787..421411981dc3 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -306,7 +306,6 @@ static int aarch32_alloc_kuser_vdso_page(void) memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); aarch32_vectors_page = virt_to_page(vdso_page); - flush_dcache_page(aarch32_vectors_page); return 0; } @@ -322,7 +321,6 @@ static int aarch32_alloc_sigpage(void) memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); aarch32_sig_page = virt_to_page(sigpage); - flush_dcache_page(aarch32_sig_page); return 0; } -- cgit v1.2.3 From 7adbf10e29c2323f5eb6d6bdd13050c70900b993 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:36 +0000 Subject: arm64: compat: Allow signal page to be remapped For compatability with 32-bit Arm, allow the compat signal page to be remapped via mremap(). Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-4-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 421411981dc3..16bf0b46fb70 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -271,6 +271,14 @@ enum aarch32_map { static struct page *aarch32_vectors_page __ro_after_init; static struct page *aarch32_sig_page __ro_after_init; +static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + current->mm->context.sigpage = (void *)new_vma->vm_start; + + return 0; +} + static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_VECTORS] = { .name = "[vectors]", /* ABI */ @@ -279,6 +287,7 @@ static struct vm_special_mapping aarch32_vdso_maps[] = { [AA32_MAP_SIGPAGE] = { .name = "[sigpage]", /* ABI */ .pages = &aarch32_sig_page, + .mremap = aarch32_sigpage_mremap, }, [AA32_MAP_VVAR] = { .name = "[vvar]", -- cgit v1.2.3 From 77ec462536a13d4b428a1eead725c4818a49f0b1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:37 +0000 Subject: arm64: vdso: Avoid ISB after reading from cntvct_el0 We can avoid the expensive ISB instruction after reading the counter in the vDSO gettime functions by creating a fake address hazard against a dummy stack read, just like we do inside the kernel. Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-5-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/arch_timer.h | 21 --------------------- arch/arm64/include/asm/barrier.h | 19 +++++++++++++++++++ arch/arm64/include/asm/vdso/gettimeofday.h | 6 +----- 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 9f0ec21d6327..88d20f04c64a 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -165,25 +165,6 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } -/* - * Ensure that reads of the counter are treated the same as memory reads - * for the purposes of ordering by subsequent memory barriers. - * - * This insanity brought to you by speculative system register reads, - * out-of-order memory accesses, sequence locks and Thomas Gleixner. - * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html - */ -#define arch_counter_enforce_ordering(val) do { \ - u64 tmp, _val = (val); \ - \ - asm volatile( \ - " eor %0, %1, %1\n" \ - " add %0, sp, %0\n" \ - " ldr xzr, [%0]" \ - : "=r" (tmp) : "r" (_val)); \ -} while (0) - static __always_inline u64 __arch_counter_get_cntpct_stable(void) { u64 cnt; @@ -224,8 +205,6 @@ static __always_inline u64 __arch_counter_get_cntvct(void) return cnt; } -#undef arch_counter_enforce_ordering - static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index c3009b0e5239..37d891af8ea5 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -70,6 +70,25 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, return mask; } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + #define __smp_mb() dmb(ish) #define __smp_rmb() dmb(ishld) #define __smp_wmb() dmb(ishst) diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h index 631ab1281633..4b4c0dac0e14 100644 --- a/arch/arm64/include/asm/vdso/gettimeofday.h +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -83,11 +83,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode, */ isb(); asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); - /* - * This isb() is required to prevent that the seq lock is - * speculated.# - */ - isb(); + arch_counter_enforce_ordering(res); return res; } -- cgit v1.2.3 From 6e554abd07002405fd9175284a10729e2f54be43 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 Mar 2021 17:07:38 +0000 Subject: arm64: compat: Poison the compat sigpage Commit 9c698bff66ab ("ARM: ensure the signal page contains defined contents") poisoned the unused portions of the signal page for 32-bit Arm. Implement the same poisoning for the compat signal page on arm64 rather than using __GFP_ZERO. Signed-off-by: Will Deacon Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210318170738.7756-6-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 16bf0b46fb70..159b72a646ab 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -318,17 +318,20 @@ static int aarch32_alloc_kuser_vdso_page(void) return 0; } +#define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1 static int aarch32_alloc_sigpage(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; - unsigned long sigpage; + __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD); + void *sigpage; - sigpage = get_zeroed_page(GFP_KERNEL); + sigpage = (void *)__get_free_page(GFP_KERNEL); if (!sigpage) return -ENOMEM; - memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); + memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison)); + memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz); aarch32_sig_page = virt_to_page(sigpage); return 0; } -- cgit v1.2.3 From b0b8b689d78c9666a991e1cc87c3dad4c261007f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 11:56:24 +0000 Subject: genirq: Allow architectures to override set_handle_irq() fallback Some architectures want to provide the generic set_handle_irq() API, but for structural reasons need to provide their own implementation. For example, arm64 needs to do this to provide uniform set_handle_irq() and set_handle_fiq() registration functions. Make this possible by allowing architectures to provide their own implementation of set_handle_irq when CONFIG_GENERIC_IRQ_MULTI_HANDLER is not selected. Signed-off-by: Marc Zyngier [Mark: expand commit message] Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index 2efde6a79b7e..9890180b84fd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1258,11 +1258,13 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)); */ extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; #else +#ifndef set_handle_irq #define set_handle_irq(handle_irq) \ do { \ (void)handle_irq; \ WARN_ON(1); \ } while (0) #endif +#endif #endif /* _LINUX_IRQ_H */ -- cgit v1.2.3 From 338a743640e98d26baf4a1450473ddcfe0a0c025 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 11:56:25 +0000 Subject: arm64: don't use GENERIC_IRQ_MULTI_HANDLER In subsequent patches we want to allow irqchip drivers to register as FIQ handlers, with a set_handle_fiq() function. To keep the IRQ/FIQ paths similar, we want arm64 to provide both set_handle_irq() and set_handle_fiq(), rather than using GENERIC_IRQ_MULTI_HANDLER for the former. This patch adds an arm64-specific implementation of set_handle_irq(). There should be no functional change as a result of this patch. Signed-off-by: Marc Zyngier [Mark: use a single handler pointer] Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/irq.h | 3 +++ arch/arm64/kernel/irq.c | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..e7d2405be71f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -110,7 +110,6 @@ config ARM64 select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_IPI - select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index b2b0c6405eb0..8391c6f6f746 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -8,6 +8,9 @@ struct pt_regs; +int set_handle_irq(void (*handle_irq)(struct pt_regs *)); +#define set_handle_irq set_handle_irq + static inline int nr_legacy_irqs(void) { return 0; diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index dfb1feab867d..ad63bd50fa7b 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -71,6 +71,17 @@ static void init_irq_stacks(void) } #endif +void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; + +int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq) + return -EBUSY; + + handle_arch_irq = handle_irq; + return 0; +} + void __init init_IRQ(void) { init_irq_stacks(); -- cgit v1.2.3 From 8ff443cebffab22544b77a1f9fb3651a49bde300 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 15 Mar 2021 11:56:26 +0000 Subject: arm64: irq: rework root IRQ handler registration If we accidentally unmask IRQs before we've registered a root IRQ handler, handle_arch_irq will be NULL, and the IRQ exception handler will branch to a bogus address. To make this easier to debug, this patch initialises handle_arch_irq to a default handler which will panic(), making such problems easier to debug. When we add support for FIQ handlers, we can follow the same approach. When we add support for a root FIQ handler, it's possible to have root IRQ handler without an root FIQ handler, and in theory the inverse is also possible. To permit this, and to keep the IRQ/FIQ registration logic similar, this patch removes the panic in the absence of a root IRQ controller. Instead, set_handle_irq() logs when a handler is registered, which is sufficient for debug purposes. Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/irq.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index ad63bd50fa7b..2fe0b535de30 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -71,14 +71,20 @@ static void init_irq_stacks(void) } #endif -void (*handle_arch_irq)(struct pt_regs *) __ro_after_init; +static void default_handle_irq(struct pt_regs *regs) +{ + panic("IRQ taken without a root IRQ handler\n"); +} + +void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq; int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { - if (handle_arch_irq) + if (handle_arch_irq != default_handle_irq) return -EBUSY; handle_arch_irq = handle_irq; + pr_info("Root IRQ handler: %ps\n", handle_irq); return 0; } @@ -87,8 +93,6 @@ void __init init_IRQ(void) init_irq_stacks(); init_irq_scs(); irqchip_init(); - if (!handle_arch_irq) - panic("No interrupt controller found."); if (system_uses_irq_prio_masking()) { /* -- cgit v1.2.3 From 9eb563cdabe1d583c262042d5d44cc256f644543 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 11:56:27 +0000 Subject: arm64: entry: factor irq triage logic into macros In subsequent patches we'll allow an FIQ handler to be registered, and FIQ exceptions will need to be triaged very similarly to IRQ exceptions. So that we can reuse the existing logic, this patch factors the IRQ triage logic out into macros that can be reused for FIQ. The macros are named to follow the elX_foo_handler scheme used by the C exception handlers. For consistency with other top-level exception handlers, the kernel_entry/kernel_exit logic is not moved into the macros. As FIQ will use a different C handler, this handler name is provided as an argument to the macros. There should be no functional change as a result of this patch. Signed-off-by: Marc Zyngier [Mark: rework macros, commit message, rebase before DAIF rework] Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Thomas Gleixner Cc: Will Deacon Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 80 +++++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a31a0a713c85..e235b0e4e468 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -491,8 +491,8 @@ tsk .req x28 // current thread_info /* * Interrupt handling. */ - .macro irq_handler - ldr_l x1, handle_arch_irq + .macro irq_handler, handler:req + ldr_l x1, \handler mov x0, sp irq_stack_entry blr x1 @@ -531,6 +531,45 @@ alternative_endif #endif .endm + .macro el1_interrupt_handler, handler:req + gic_prio_irq_setup pmr=x20, tmp=x1 + enable_da_f + + mov x0, sp + bl enter_el1_irq_or_nmi + + irq_handler \handler + +#ifdef CONFIG_PREEMPTION + ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + /* + * DA_F were cleared at start of handling. If anything is set in DAIF, + * we come back from an NMI, so skip preemption + */ + mrs x0, daif + orr x24, x24, x0 +alternative_else_nop_endif + cbnz x24, 1f // preempt count != 0 || NMI return path + bl arm64_preempt_schedule_irq // irq en/disable is done inside +1: +#endif + + mov x0, sp + bl exit_el1_irq_or_nmi + .endm + + .macro el0_interrupt_handler, handler:req + gic_prio_irq_setup pmr=x20, tmp=x0 + user_exit_irqoff + enable_da_f + + tbz x22, #55, 1f + bl do_el0_irq_bp_hardening +1: + irq_handler \handler + .endm + .text /* @@ -660,32 +699,7 @@ SYM_CODE_END(el1_sync) .align 6 SYM_CODE_START_LOCAL_NOALIGN(el1_irq) kernel_entry 1 - gic_prio_irq_setup pmr=x20, tmp=x1 - enable_da_f - - mov x0, sp - bl enter_el1_irq_or_nmi - - irq_handler - -#ifdef CONFIG_PREEMPTION - ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - /* - * DA_F were cleared at start of handling. If anything is set in DAIF, - * we come back from an NMI, so skip preemption - */ - mrs x0, daif - orr x24, x24, x0 -alternative_else_nop_endif - cbnz x24, 1f // preempt count != 0 || NMI return path - bl arm64_preempt_schedule_irq // irq en/disable is done inside -1: -#endif - - mov x0, sp - bl exit_el1_irq_or_nmi - + el1_interrupt_handler handle_arch_irq kernel_exit 1 SYM_CODE_END(el1_irq) @@ -725,15 +739,7 @@ SYM_CODE_END(el0_error_compat) SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: - gic_prio_irq_setup pmr=x20, tmp=x0 - user_exit_irqoff - enable_da_f - - tbz x22, #55, 1f - bl do_el0_irq_bp_hardening -1: - irq_handler - + el0_interrupt_handler handle_arch_irq b ret_to_user SYM_CODE_END(el0_irq) -- cgit v1.2.3 From f0098155d337cab638cf18e37a3e9257d653d481 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 15 Mar 2021 11:56:28 +0000 Subject: arm64: Always keep DAIF.[IF] in sync Apple SoCs (A11 and newer) have some interrupt sources hardwired to the FIQ line. We implement support for this by simply treating IRQs and FIQs the same way in the interrupt vectors. To support these systems, the FIQ mask bit needs to be kept in sync with the IRQ mask bit, so both kinds of exceptions are masked together. No other platforms should be delivering FIQ exceptions right now, and we already unmask FIQ in normal process context, so this should not have an effect on other systems - if spurious FIQs were arriving, they would already panic the kernel. Signed-off-by: Hector Martin Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/arch_gicv3.h | 2 +- arch/arm64/include/asm/assembler.h | 8 ++++---- arch/arm64/include/asm/daifflags.h | 10 +++++----- arch/arm64/include/asm/irqflags.h | 16 +++++++--------- arch/arm64/kernel/entry.S | 12 +++++++----- arch/arm64/kernel/process.c | 2 +- arch/arm64/kernel/smp.c | 1 + 7 files changed, 26 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 880b9054d75c..934b9be582d2 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -173,7 +173,7 @@ static inline void gic_pmr_mask_irqs(void) static inline void gic_arch_enable_irqs(void) { - asm volatile ("msr daifclr, #2" : : : "memory"); + asm volatile ("msr daifclr, #3" : : : "memory"); } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ca31594d3d6c..b76a71e84b7c 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -40,9 +40,9 @@ msr daif, \flags .endm - /* IRQ is the lowest priority flag, unconditionally unmask the rest. */ - .macro enable_da_f - msr daifclr, #(8 | 4 | 1) + /* IRQ/FIQ are the lowest priority flags, unconditionally unmask the rest. */ + .macro enable_da + msr daifclr, #(8 | 4) .endm /* @@ -50,7 +50,7 @@ */ .macro save_and_disable_irq, flags mrs \flags, daif - msr daifset, #2 + msr daifset, #3 .endm .macro restore_irq, flags diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 1c26d7baa67f..5eb7af9c4557 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -13,8 +13,8 @@ #include #define DAIF_PROCCTX 0 -#define DAIF_PROCCTX_NOIRQ PSR_I_BIT -#define DAIF_ERRCTX (PSR_I_BIT | PSR_A_BIT) +#define DAIF_PROCCTX_NOIRQ (PSR_I_BIT | PSR_F_BIT) +#define DAIF_ERRCTX (PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) #define DAIF_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) @@ -47,7 +47,7 @@ static inline unsigned long local_daif_save_flags(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) - flags |= PSR_I_BIT; + flags |= PSR_I_BIT | PSR_F_BIT; } return flags; @@ -69,7 +69,7 @@ static inline void local_daif_restore(unsigned long flags) bool irq_disabled = flags & PSR_I_BIT; WARN_ON(system_has_prio_mask_debugging() && - !(read_sysreg(daif) & PSR_I_BIT)); + (read_sysreg(daif) & (PSR_I_BIT | PSR_F_BIT)) != (PSR_I_BIT | PSR_F_BIT)); if (!irq_disabled) { trace_hardirqs_on(); @@ -86,7 +86,7 @@ static inline void local_daif_restore(unsigned long flags) * If interrupts are disabled but we can take * asynchronous errors, we can take NMIs */ - flags &= ~PSR_I_BIT; + flags &= ~(PSR_I_BIT | PSR_F_BIT); pmr = GIC_PRIO_IRQOFF; } else { pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index ff328e5bbb75..b57b9b1e4344 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -12,15 +12,13 @@ /* * Aarch64 has flags for masking: Debug, Asynchronous (serror), Interrupts and - * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'dai' + * FIQ exceptions, in the 'daif' register. We mask and unmask them in 'daif' * order: * Masking debug exceptions causes all other exceptions to be masked too/ - * Masking SError masks irq, but not debug exceptions. Masking irqs has no - * side effects for other flags. Keeping to this order makes it easier for - * entry.S to know which exceptions should be unmasked. - * - * FIQ is never expected, but we mask it when we disable debug exceptions, and - * unmask it at all other times. + * Masking SError masks IRQ/FIQ, but not debug exceptions. IRQ and FIQ are + * always masked and unmasked together, and have no side effects for other + * flags. Keeping to this order makes it easier for entry.S to know which + * exceptions should be unmasked. */ /* @@ -35,7 +33,7 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable", + "msr daifclr, #3 // arch_local_irq_enable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : @@ -54,7 +52,7 @@ static inline void arch_local_irq_disable(void) } asm volatile(ALTERNATIVE( - "msr daifset, #2 // arch_local_irq_disable", + "msr daifset, #3 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e235b0e4e468..ce8d4dc416fb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -533,7 +533,7 @@ alternative_endif .macro el1_interrupt_handler, handler:req gic_prio_irq_setup pmr=x20, tmp=x1 - enable_da_f + enable_da mov x0, sp bl enter_el1_irq_or_nmi @@ -544,8 +544,10 @@ alternative_endif ldr x24, [tsk, #TSK_TI_PREEMPT] // get preempt count alternative_if ARM64_HAS_IRQ_PRIO_MASKING /* - * DA_F were cleared at start of handling. If anything is set in DAIF, - * we come back from an NMI, so skip preemption + * DA were cleared at start of handling, and IF are cleared by + * the GIC irqchip driver using gic_arch_enable_irqs() for + * normal IRQs. If anything is set, it means we come back from + * an NMI instead of a normal IRQ, so skip preemption */ mrs x0, daif orr x24, x24, x0 @@ -562,7 +564,7 @@ alternative_else_nop_endif .macro el0_interrupt_handler, handler:req gic_prio_irq_setup pmr=x20, tmp=x0 user_exit_irqoff - enable_da_f + enable_da tbz x22, #55, 1f bl do_el0_irq_bp_hardening @@ -763,7 +765,7 @@ el0_error_naked: mov x0, sp mov x1, x25 bl do_serror - enable_da_f + enable_da b ret_to_user SYM_CODE_END(el0_error) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 325c83b1a24d..a29028d3d46e 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -84,7 +84,7 @@ static void noinstr __cpu_do_idle_irqprio(void) unsigned long daif_bits; daif_bits = read_sysreg(daif); - write_sysreg(daif_bits | PSR_I_BIT, daif); + write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif); /* * Unmask PMR before going idle to make sure interrupts can diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 357590beaabb..dcd7041b2b07 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -188,6 +188,7 @@ static void init_gic_priority_masking(void) cpuflags = read_sysreg(daif); WARN_ON(!(cpuflags & PSR_I_BIT)); + WARN_ON(!(cpuflags & PSR_F_BIT)); gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } -- cgit v1.2.3 From 3889ba70102ed8c609e42c1d3563c8c041ce0511 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 15 Mar 2021 11:56:29 +0000 Subject: arm64: irq: allow FIQs to be handled On contemporary platforms we don't use FIQ, and treat any stray FIQ as a fatal event. However, some platforms have an interrupt controller wired to FIQ, and need to handle FIQ as part of regular operation. So that we can support both cases dynamically, this patch updates the FIQ exception handling code to operate the same way as the IRQ handling code, with its own handle_arch_fiq handler. Where a root FIQ handler is not registered, an unexpected FIQ exception will trigger the default FIQ handler, which will panic() as today. Where a root FIQ handler is registered, handling of the FIQ is deferred to that handler. As el0_fiq_invalid_compat is supplanted by el0_fiq, the former is removed. For !CONFIG_COMPAT builds we never expect to take an exception from AArch32 EL0, so we keep the common el0_fiq_invalid handler. Signed-off-by: Mark Rutland Tested-by: Hector Martin Cc: James Morse Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210315115629.57191-7-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/irq.h | 1 + arch/arm64/kernel/entry.S | 30 +++++++++++++++++++++--------- arch/arm64/kernel/irq.c | 16 ++++++++++++++++ 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 8391c6f6f746..fac08e18bcd5 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -10,6 +10,7 @@ struct pt_regs; int set_handle_irq(void (*handle_irq)(struct pt_regs *)); #define set_handle_irq set_handle_irq +int set_handle_fiq(void (*handle_fiq)(struct pt_regs *)); static inline int nr_legacy_irqs(void) { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ce8d4dc416fb..a86f50de2c7b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -588,18 +588,18 @@ SYM_CODE_START(vectors) kernel_ventry 1, sync // Synchronous EL1h kernel_ventry 1, irq // IRQ EL1h - kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, fiq // FIQ EL1h kernel_ventry 1, error // Error EL1h kernel_ventry 0, sync // Synchronous 64-bit EL0 kernel_ventry 0, irq // IRQ 64-bit EL0 - kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0 + kernel_ventry 0, fiq // FIQ 64-bit EL0 kernel_ventry 0, error // Error 64-bit EL0 #ifdef CONFIG_COMPAT kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0 kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0 - kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0 + kernel_ventry 0, fiq_compat, 32 // FIQ 32-bit EL0 kernel_ventry 0, error_compat, 32 // Error 32-bit EL0 #else kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0 @@ -665,12 +665,6 @@ SYM_CODE_START_LOCAL(el0_error_invalid) inv_entry 0, BAD_ERROR SYM_CODE_END(el0_error_invalid) -#ifdef CONFIG_COMPAT -SYM_CODE_START_LOCAL(el0_fiq_invalid_compat) - inv_entry 0, BAD_FIQ, 32 -SYM_CODE_END(el0_fiq_invalid_compat) -#endif - SYM_CODE_START_LOCAL(el1_sync_invalid) inv_entry 1, BAD_SYNC SYM_CODE_END(el1_sync_invalid) @@ -705,6 +699,12 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq) kernel_exit 1 SYM_CODE_END(el1_irq) +SYM_CODE_START_LOCAL_NOALIGN(el1_fiq) + kernel_entry 1 + el1_interrupt_handler handle_arch_fiq + kernel_exit 1 +SYM_CODE_END(el1_fiq) + /* * EL0 mode handlers. */ @@ -731,6 +731,11 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat) b el0_irq_naked SYM_CODE_END(el0_irq_compat) +SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat) + kernel_entry 0, 32 + b el0_fiq_naked +SYM_CODE_END(el0_fiq_compat) + SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat) kernel_entry 0, 32 b el0_error_naked @@ -745,6 +750,13 @@ el0_irq_naked: b ret_to_user SYM_CODE_END(el0_irq) +SYM_CODE_START_LOCAL_NOALIGN(el0_fiq) + kernel_entry 0 +el0_fiq_naked: + el0_interrupt_handler handle_arch_fiq + b ret_to_user +SYM_CODE_END(el0_fiq) + SYM_CODE_START_LOCAL(el1_error) kernel_entry 1 mrs x1, esr_el1 diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 2fe0b535de30..bda49430c9ea 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -76,7 +76,13 @@ static void default_handle_irq(struct pt_regs *regs) panic("IRQ taken without a root IRQ handler\n"); } +static void default_handle_fiq(struct pt_regs *regs) +{ + panic("FIQ taken without a root FIQ handler\n"); +} + void (*handle_arch_irq)(struct pt_regs *) __ro_after_init = default_handle_irq; +void (*handle_arch_fiq)(struct pt_regs *) __ro_after_init = default_handle_fiq; int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) { @@ -88,6 +94,16 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) return 0; } +int __init set_handle_fiq(void (*handle_fiq)(struct pt_regs *)) +{ + if (handle_arch_fiq != default_handle_fiq) + return -EBUSY; + + handle_arch_fiq = handle_fiq; + pr_info("Root FIQ handler: %ps\n", handle_fiq); + return 0; +} + void __init init_IRQ(void) { init_irq_stacks(); -- cgit v1.2.3 From 700a9cf0527ca2d7d3e4980fef2deb4883432ab6 Mon Sep 17 00:00:00 2001 From: Zihao Tang Date: Fri, 19 Mar 2021 18:04:31 +0800 Subject: drivers/perf: convert sysfs snprintf family to sysfs_emit Fix the following coccicheck warning: ./drivers/perf/hisilicon/hisi_uncore_pmu.c:128:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/fsl_imx8_ddr_perf.c:173:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm_spe_pmu.c:129:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm_smmu_pmu.c:563:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm_dsu_pmu.c:149:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm_dsu_pmu.c:139:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm-cmn.c:563:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm-cmn.c:351:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm-ccn.c:224:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm-cci.c:708:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm-cci.c:699:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm-cci.c:528:8-16: WARNING: use scnprintf or sprintf. ./drivers/perf/arm-cci.c:309:8-16: WARNING: use scnprintf or sprintf. Signed-off-by: Zihao Tang Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1616148273-16374-2-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 12 ++++++------ drivers/perf/arm-ccn.c | 4 ++-- drivers/perf/arm-cmn.c | 22 +++++++++++----------- drivers/perf/arm_dsu_pmu.c | 5 ++--- drivers/perf/arm_smmuv3_pmu.c | 2 +- drivers/perf/arm_spe_pmu.c | 3 +-- drivers/perf/fsl_imx8_ddr_perf.c | 3 +-- drivers/perf/hisilicon/hisi_uncore_pmu.c | 2 +- 8 files changed, 25 insertions(+), 28 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index f81e2ec90005..666d8a9b557f 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -306,7 +306,7 @@ static ssize_t cci400_pmu_cycle_event_show(struct device *dev, { struct dev_ext_attribute *eattr = container_of(attr, struct dev_ext_attribute, attr); - return snprintf(buf, PAGE_SIZE, "config=0x%lx\n", (unsigned long)eattr->var); + return sysfs_emit(buf, "config=0x%lx\n", (unsigned long)eattr->var); } static int cci400_get_event_idx(struct cci_pmu *cci_pmu, @@ -525,8 +525,8 @@ static ssize_t cci5xx_pmu_global_event_show(struct device *dev, struct dev_ext_attribute *eattr = container_of(attr, struct dev_ext_attribute, attr); /* Global events have single fixed source code */ - return snprintf(buf, PAGE_SIZE, "event=0x%lx,source=0x%x\n", - (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL); + return sysfs_emit(buf, "event=0x%lx,source=0x%x\n", + (unsigned long)eattr->var, CCI5xx_PORT_GLOBAL); } /* @@ -696,7 +696,7 @@ static ssize_t cci_pmu_format_show(struct device *dev, { struct dev_ext_attribute *eattr = container_of(attr, struct dev_ext_attribute, attr); - return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var); + return sysfs_emit(buf, "%s\n", (char *)eattr->var); } static ssize_t cci_pmu_event_show(struct device *dev, @@ -705,8 +705,8 @@ static ssize_t cci_pmu_event_show(struct device *dev, struct dev_ext_attribute *eattr = container_of(attr, struct dev_ext_attribute, attr); /* source parameter is mandatory for normal PMU events */ - return snprintf(buf, PAGE_SIZE, "source=?,event=0x%lx\n", - (unsigned long)eattr->var); + return sysfs_emit(buf, "source=?,event=0x%lx\n", + (unsigned long)eattr->var); } static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index a0a71c1df042..3a2ddc0cc6c3 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -221,7 +221,7 @@ static ssize_t arm_ccn_pmu_format_show(struct device *dev, struct dev_ext_attribute *ea = container_of(attr, struct dev_ext_attribute, attr); - return snprintf(buf, PAGE_SIZE, "%s\n", (char *)ea->var); + return sysfs_emit(buf, "%s\n", (char *)ea->var); } #define CCN_FORMAT_ATTR(_name, _config) \ @@ -476,7 +476,7 @@ static ssize_t arm_ccn_pmu_cmp_mask_show(struct device *dev, struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); u64 *mask = arm_ccn_pmu_get_cmp_mask(ccn, attr->attr.name); - return mask ? snprintf(buf, PAGE_SIZE, "0x%016llx\n", *mask) : -EINVAL; + return mask ? sysfs_emit(buf, "0x%016llx\n", *mask) : -EINVAL; } static ssize_t arm_ccn_pmu_cmp_mask_store(struct device *dev, diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 1328159fe564..56a5c355701d 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -348,19 +348,19 @@ static ssize_t arm_cmn_event_show(struct device *dev, eattr = container_of(attr, typeof(*eattr), attr); if (eattr->type == CMN_TYPE_DTC) - return snprintf(buf, PAGE_SIZE, "type=0x%x\n", eattr->type); + return sysfs_emit(buf, "type=0x%x\n", eattr->type); if (eattr->type == CMN_TYPE_WP) - return snprintf(buf, PAGE_SIZE, - "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n", - eattr->type, eattr->eventid); + return sysfs_emit(buf, + "type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n", + eattr->type, eattr->eventid); if (arm_cmn_is_occup_event(eattr->type, eattr->eventid)) - return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x,occupid=0x%x\n", - eattr->type, eattr->eventid, eattr->occupid); + return sysfs_emit(buf, "type=0x%x,eventid=0x%x,occupid=0x%x\n", + eattr->type, eattr->eventid, eattr->occupid); - return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x\n", - eattr->type, eattr->eventid); + return sysfs_emit(buf, "type=0x%x,eventid=0x%x\n", eattr->type, + eattr->eventid); } static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj, @@ -560,12 +560,12 @@ static ssize_t arm_cmn_format_show(struct device *dev, int lo = __ffs(fmt->field), hi = __fls(fmt->field); if (lo == hi) - return snprintf(buf, PAGE_SIZE, "config:%d\n", lo); + return sysfs_emit(buf, "config:%d\n", lo); if (!fmt->config) - return snprintf(buf, PAGE_SIZE, "config:%d-%d\n", lo, hi); + return sysfs_emit(buf, "config:%d-%d\n", lo, hi); - return snprintf(buf, PAGE_SIZE, "config%d:%d-%d\n", fmt->config, lo, hi); + return sysfs_emit(buf, "config%d:%d-%d\n", fmt->config, lo, hi); } #define _CMN_FORMAT_ATTR(_name, _cfg, _fld) \ diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c index 0459a3403469..196faea074d0 100644 --- a/drivers/perf/arm_dsu_pmu.c +++ b/drivers/perf/arm_dsu_pmu.c @@ -136,8 +136,7 @@ static ssize_t dsu_pmu_sysfs_event_show(struct device *dev, { struct dev_ext_attribute *eattr = container_of(attr, struct dev_ext_attribute, attr); - return snprintf(buf, PAGE_SIZE, "event=0x%lx\n", - (unsigned long)eattr->var); + return sysfs_emit(buf, "event=0x%lx\n", (unsigned long)eattr->var); } static ssize_t dsu_pmu_sysfs_format_show(struct device *dev, @@ -146,7 +145,7 @@ static ssize_t dsu_pmu_sysfs_format_show(struct device *dev, { struct dev_ext_attribute *eattr = container_of(attr, struct dev_ext_attribute, attr); - return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var); + return sysfs_emit(buf, "%s\n", (char *)eattr->var); } static ssize_t dsu_pmu_cpumask_show(struct device *dev, diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 8ff7a67f691c..fa9dfbcbd683 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -560,7 +560,7 @@ static ssize_t smmu_pmu_identifier_attr_show(struct device *dev, { struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); - return snprintf(page, PAGE_SIZE, "0x%08x\n", smmu_pmu->iidr); + return sysfs_emit(page, "0x%08x\n", smmu_pmu->iidr); } static umode_t smmu_pmu_identifier_attr_visible(struct kobject *kobj, diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index d3929ccebfd2..8a1e86ab2d8e 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -126,8 +126,7 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev, container_of(attr, struct dev_ext_attribute, attr); int cap = (long)ea->var; - return snprintf(buf, PAGE_SIZE, "%u\n", - arm_spe_pmu_cap_get(spe_pmu, cap)); + return sysfs_emit(buf, "%u\n", arm_spe_pmu_cap_get(spe_pmu, cap)); } #define SPE_EXT_ATTR_ENTRY(_name, _func, _var) \ diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index be1f26b62ddb..c126fd8703ee 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -170,8 +170,7 @@ static ssize_t ddr_perf_filter_cap_show(struct device *dev, container_of(attr, struct dev_ext_attribute, attr); int cap = (long)ea->var; - return snprintf(buf, PAGE_SIZE, "%u\n", - ddr_perf_filter_cap_get(pmu, cap)); + return sysfs_emit(buf, "%u\n", ddr_perf_filter_cap_get(pmu, cap)); } #define PERF_EXT_ATTR_ENTRY(_name, _func, _var) \ diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 9dbdc3fc3bb4..64ccf5ed00e3 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -125,7 +125,7 @@ ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, { struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); - return snprintf(page, PAGE_SIZE, "0x%08x\n", hisi_pmu->identifier); + return sysfs_emit(page, "0x%08x\n", hisi_pmu->identifier); } EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); -- cgit v1.2.3 From 9ec9f9cf8660d549c77e719d2ce11647a03063b9 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 19 Mar 2021 18:04:32 +0800 Subject: drivers/perf: convert sysfs scnprintf family to sysfs_emit_at() and sysfs_emit() Use the generic sysfs_emit_at() and sysfs_emit() function to take place of scnprintf() Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1616148273-16374-3-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 3a2ddc0cc6c3..96d47cb302dd 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -326,43 +326,38 @@ static ssize_t arm_ccn_pmu_event_show(struct device *dev, struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); struct arm_ccn_pmu_event *event = container_of(attr, struct arm_ccn_pmu_event, attr); - ssize_t res; + int res; - res = scnprintf(buf, PAGE_SIZE, "type=0x%x", event->type); + res = sysfs_emit(buf, "type=0x%x", event->type); if (event->event) - res += scnprintf(buf + res, PAGE_SIZE - res, ",event=0x%x", - event->event); + res += sysfs_emit_at(buf, res, ",event=0x%x", event->event); if (event->def) - res += scnprintf(buf + res, PAGE_SIZE - res, ",%s", - event->def); + res += sysfs_emit_at(buf, res, ",%s", event->def); if (event->mask) - res += scnprintf(buf + res, PAGE_SIZE - res, ",mask=0x%x", - event->mask); + res += sysfs_emit_at(buf, res, ",mask=0x%x", event->mask); /* Arguments required by an event */ switch (event->type) { case CCN_TYPE_CYCLES: break; case CCN_TYPE_XP: - res += scnprintf(buf + res, PAGE_SIZE - res, - ",xp=?,vc=?"); + res += sysfs_emit_at(buf, res, ",xp=?,vc=?"); if (event->event == CCN_EVENT_WATCHPOINT) - res += scnprintf(buf + res, PAGE_SIZE - res, + res += sysfs_emit_at(buf, res, ",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?"); else - res += scnprintf(buf + res, PAGE_SIZE - res, - ",bus=?"); + res += sysfs_emit_at(buf, res, ",bus=?"); break; case CCN_TYPE_MN: - res += scnprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id); + res += sysfs_emit_at(buf, res, ",node=%d", ccn->mn_id); break; default: - res += scnprintf(buf + res, PAGE_SIZE - res, ",node=?"); + res += sysfs_emit_at(buf, res, ",node=?"); break; } - res += scnprintf(buf + res, PAGE_SIZE - res, "\n"); + res += sysfs_emit_at(buf, res, "\n"); return res; } -- cgit v1.2.3 From fb62d67586afc046f3783d819985c737f6b0b666 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Fri, 19 Mar 2021 18:04:33 +0800 Subject: drivers/perf: convert sysfs sprintf family to sysfs_emit sprintf does not know the PAGE_SIZE maximum of the temporary buffer used for sysfs content and it's possible to overrun the buffer length. Use sysfs_emit() function to ensures that no overrun is done. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1616148273-16374-4-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 2 +- drivers/perf/arm_smmuv3_pmu.c | 2 +- drivers/perf/fsl_imx8_ddr_perf.c | 4 ++-- drivers/perf/hisilicon/hisi_uncore_pmu.c | 6 +++--- drivers/perf/qcom_l2_pmu.c | 2 +- drivers/perf/qcom_l3_pmu.c | 4 ++-- drivers/perf/thunderx2_pmu.c | 4 ++-- drivers/perf/xgene_pmu.c | 4 ++-- 8 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index f2a85500258d..b6c2511d59af 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -113,7 +113,7 @@ dmc620_pmu_event_show(struct device *dev, eattr = container_of(attr, typeof(*eattr), attr); - return sprintf(page, "event=0x%x,clkdiv2=0x%x\n", eattr->eventid, eattr->clkdiv2); + return sysfs_emit(page, "event=0x%x,clkdiv2=0x%x\n", eattr->eventid, eattr->clkdiv2); } #define DMC620_PMU_EVENT_ATTR(_name, _eventid, _clkdiv2) \ diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index fa9dfbcbd683..45a399f20afa 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -506,7 +506,7 @@ static ssize_t smmu_pmu_event_show(struct device *dev, pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%02llx\n", pmu_attr->id); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); } #define SMMU_EVENT_ATTR(name, config) \ diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index c126fd8703ee..2bbb93188064 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -110,7 +110,7 @@ static ssize_t ddr_perf_identifier_show(struct device *dev, { struct ddr_pmu *pmu = dev_get_drvdata(dev); - return sprintf(page, "%s\n", pmu->devtype_data->identifier); + return sysfs_emit(page, "%s\n", pmu->devtype_data->identifier); } static umode_t ddr_perf_identifier_attr_visible(struct kobject *kobj, @@ -219,7 +219,7 @@ ddr_pmu_event_show(struct device *dev, struct device_attribute *attr, struct perf_pmu_events_attr *pmu_attr; pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%02llx\n", pmu_attr->id); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); } #define IMX8_DDR_PMU_EVENT_ATTR(_name, _id) \ diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 64ccf5ed00e3..5e2b5e12777f 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -33,7 +33,7 @@ ssize_t hisi_format_sysfs_show(struct device *dev, eattr = container_of(attr, struct dev_ext_attribute, attr); - return sprintf(buf, "%s\n", (char *)eattr->var); + return sysfs_emit(buf, "%s\n", (char *)eattr->var); } EXPORT_SYMBOL_GPL(hisi_format_sysfs_show); @@ -47,7 +47,7 @@ ssize_t hisi_event_sysfs_show(struct device *dev, eattr = container_of(attr, struct dev_ext_attribute, attr); - return sprintf(page, "config=0x%lx\n", (unsigned long)eattr->var); + return sysfs_emit(page, "config=0x%lx\n", (unsigned long)eattr->var); } EXPORT_SYMBOL_GPL(hisi_event_sysfs_show); @@ -59,7 +59,7 @@ ssize_t hisi_cpumask_sysfs_show(struct device *dev, { struct hisi_pmu *hisi_pmu = to_hisi_pmu(dev_get_drvdata(dev)); - return sprintf(buf, "%d\n", hisi_pmu->on_cpu); + return sysfs_emit(buf, "%d\n", hisi_pmu->on_cpu); } EXPORT_SYMBOL_GPL(hisi_cpumask_sysfs_show); diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 8883af955a2a..fc54a80f9c5c 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -676,7 +676,7 @@ static ssize_t l2cache_pmu_event_show(struct device *dev, struct perf_pmu_events_attr *pmu_attr; pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%02llx\n", pmu_attr->id); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); } #define L2CACHE_EVENT_ATTR(_name, _id) \ diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index fb34b87b9471..bba078077c93 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -615,7 +615,7 @@ static ssize_t l3cache_pmu_format_show(struct device *dev, struct dev_ext_attribute *eattr; eattr = container_of(attr, struct dev_ext_attribute, attr); - return sprintf(buf, "%s\n", (char *) eattr->var); + return sysfs_emit(buf, "%s\n", (char *) eattr->var); } #define L3CACHE_PMU_FORMAT_ATTR(_name, _config) \ @@ -643,7 +643,7 @@ static ssize_t l3cache_pmu_event_show(struct device *dev, struct perf_pmu_events_attr *pmu_attr; pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); - return sprintf(page, "event=0x%02llx\n", pmu_attr->id); + return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); } #define L3CACHE_EVENT_ATTR(_name, _id) \ diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index e116815fa809..06a6d569b0b5 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -128,7 +128,7 @@ __tx2_pmu_##_var##_show(struct device *dev, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ - return sprintf(page, _format "\n"); \ + return sysfs_emit(page, _format "\n"); \ } \ \ static struct device_attribute format_attr_##_var = \ @@ -176,7 +176,7 @@ static ssize_t tx2_pmu_event_show(struct device *dev, struct dev_ext_attribute *eattr; eattr = container_of(attr, struct dev_ext_attribute, attr); - return sprintf(buf, "event=0x%lx\n", (unsigned long) eattr->var); + return sysfs_emit(buf, "event=0x%lx\n", (unsigned long) eattr->var); } #define TX2_EVENT_ATTR(name, config) \ diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 44faa51ba799..ffe3bdeec845 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -170,7 +170,7 @@ static ssize_t xgene_pmu_format_show(struct device *dev, struct dev_ext_attribute *eattr; eattr = container_of(attr, struct dev_ext_attribute, attr); - return sprintf(buf, "%s\n", (char *) eattr->var); + return sysfs_emit(buf, "%s\n", (char *) eattr->var); } #define XGENE_PMU_FORMAT_ATTR(_name, _config) \ @@ -281,7 +281,7 @@ static ssize_t xgene_pmu_event_show(struct device *dev, struct dev_ext_attribute *eattr; eattr = container_of(attr, struct dev_ext_attribute, attr); - return sprintf(buf, "config=0x%lx\n", (unsigned long) eattr->var); + return sysfs_emit(buf, "config=0x%lx\n", (unsigned long) eattr->var); } #define XGENE_PMU_EVENT_ATTR(_name, _config) \ -- cgit v1.2.3 From 9eef29d8c31bdb8d6254b99e3dc741c75832fd6b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 23 Mar 2021 18:12:01 +0000 Subject: arm64: entry: remove test_irqs_unmasked macro We haven't needed the test_irqs_unmasked macro since commit: 105fc3352077bba5 ("arm64: entry: move el1 irq/nmi logic to C") ... and as we convert more of the entry logic to C it is decreasingly likely we'll need it in future, so let's remove the unused macro. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: James Morse Cc: Marc Zyngier Cc: Will Deacon Acked-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210323181201.18889-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a31a0a713c85..a82af88e8599 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -499,20 +499,6 @@ tsk .req x28 // current thread_info irq_stack_exit .endm -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * Set res to 0 if irqs were unmasked in interrupted context. - * Otherwise set res to non-0 value. - */ - .macro test_irqs_unmasked res:req, pmr:req -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - sub \res, \pmr, #GIC_PRIO_IRQON -alternative_else - mov \res, xzr -alternative_endif - .endm -#endif - .macro gic_prio_kentry_setup, tmp:req #ifdef CONFIG_ARM64_PSEUDO_NMI alternative_if ARM64_HAS_IRQ_PRIO_MASKING -- cgit v1.2.3 From 174744136dcb6d441efdaaffd214f91a352bf6c7 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Mon, 8 Feb 2021 21:04:58 +0800 Subject: drivers/perf: Simplify the SMMUv3 PMU event attributes For each PMU event, there is a SMMU_EVENT_ATTR(xx, XX) and &smmu_event_attr_xx.attr.attr. Let's redefine the SMMU_EVENT_ATTR to simplify the smmu_pmu_events. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1612789498-12957-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 45a399f20afa..ff6fab4bae30 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -509,27 +509,21 @@ static ssize_t smmu_pmu_event_show(struct device *dev, return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id); } -#define SMMU_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR(name, smmu_event_attr_##name, \ - config, smmu_pmu_event_show) -SMMU_EVENT_ATTR(cycles, 0); -SMMU_EVENT_ATTR(transaction, 1); -SMMU_EVENT_ATTR(tlb_miss, 2); -SMMU_EVENT_ATTR(config_cache_miss, 3); -SMMU_EVENT_ATTR(trans_table_walk_access, 4); -SMMU_EVENT_ATTR(config_struct_access, 5); -SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6); -SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7); +#define SMMU_EVENT_ATTR(name, config) \ + (&((struct perf_pmu_events_attr) { \ + .attr = __ATTR(name, 0444, smmu_pmu_event_show, NULL), \ + .id = config, \ + }).attr.attr) static struct attribute *smmu_pmu_events[] = { - &smmu_event_attr_cycles.attr.attr, - &smmu_event_attr_transaction.attr.attr, - &smmu_event_attr_tlb_miss.attr.attr, - &smmu_event_attr_config_cache_miss.attr.attr, - &smmu_event_attr_trans_table_walk_access.attr.attr, - &smmu_event_attr_config_struct_access.attr.attr, - &smmu_event_attr_pcie_ats_trans_rq.attr.attr, - &smmu_event_attr_pcie_ats_trans_passed.attr.attr, + SMMU_EVENT_ATTR(cycles, 0), + SMMU_EVENT_ATTR(transaction, 1), + SMMU_EVENT_ATTR(tlb_miss, 2), + SMMU_EVENT_ATTR(config_cache_miss, 3), + SMMU_EVENT_ATTR(trans_table_walk_access, 4), + SMMU_EVENT_ATTR(config_struct_access, 5), + SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6), + SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7), NULL }; -- cgit v1.2.3 From 4e4cb8ca48bd68c00df67c10ff867016abb7391f Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:29 +0800 Subject: drivers/perf: hisi: Remove unnecessary check of counter index The sanity check for counter index has been done in the function hisi_uncore_pmu_get_event_idx, so remove the redundant interface hisi_uncore_pmu_counter_valid() and sanity check. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-2-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 20 +++----------------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 18 ++---------------- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 18 ++---------------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 11 ----------- drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 - 5 files changed, 7 insertions(+), 61 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index ac1a8c120a00..38344b49d7af 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -65,29 +65,15 @@ static u32 hisi_ddrc_pmu_get_counter_offset(int cntr_idx) static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { - /* Use event code as counter index */ - u32 idx = GET_DDRC_EVENTID(hwc); - - if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) { - dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx); - return 0; - } - - return readl(ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx)); + return readl(ddrc_pmu->base + + hisi_ddrc_pmu_get_counter_offset(hwc->idx)); } static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc, u64 val) { - u32 idx = GET_DDRC_EVENTID(hwc); - - if (!hisi_uncore_pmu_counter_valid(ddrc_pmu, idx)) { - dev_err(ddrc_pmu->dev, "Unsupported event index:%d!\n", idx); - return; - } - writel((u32)val, - ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(idx)); + ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(hwc->idx)); } /* diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 3402f1a395a8..a4b8c7daefa6 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -51,29 +51,15 @@ static u32 hisi_hha_pmu_get_counter_offset(int cntr_idx) static u64 hisi_hha_pmu_read_counter(struct hisi_pmu *hha_pmu, struct hw_perf_event *hwc) { - u32 idx = hwc->idx; - - if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) { - dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx); - return 0; - } - /* Read 64 bits and like L3C, top 16 bits are RAZ */ - return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx)); + return readq(hha_pmu->base + hisi_hha_pmu_get_counter_offset(hwc->idx)); } static void hisi_hha_pmu_write_counter(struct hisi_pmu *hha_pmu, struct hw_perf_event *hwc, u64 val) { - u32 idx = hwc->idx; - - if (!hisi_uncore_pmu_counter_valid(hha_pmu, idx)) { - dev_err(hha_pmu->dev, "Unsupported event index:%d!\n", idx); - return; - } - /* Write 64 bits and like L3C, top 16 bits are WI */ - writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(idx)); + writeq(val, hha_pmu->base + hisi_hha_pmu_get_counter_offset(hwc->idx)); } static void hisi_hha_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 7d792435c2aa..f73be6d76abc 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -50,29 +50,15 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc) { - u32 idx = hwc->idx; - - if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) { - dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx); - return 0; - } - /* Read 64-bits and the upper 16 bits are RAZ */ - return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx)); + return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc, u64 val) { - u32 idx = hwc->idx; - - if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) { - dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx); - return; - } - /* Write 64-bits and the upper 16 bits are WI */ - writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx)); + writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx, diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 5e2b5e12777f..44b7553a3e42 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -96,12 +96,6 @@ static bool hisi_validate_event_group(struct perf_event *event) return counters <= hisi_pmu->num_counters; } -int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx) -{ - return idx >= 0 && idx < hisi_pmu->num_counters; -} -EXPORT_SYMBOL_GPL(hisi_uncore_pmu_counter_valid); - int hisi_uncore_pmu_get_event_idx(struct perf_event *event) { struct hisi_pmu *hisi_pmu = to_hisi_pmu(event->pmu); @@ -131,11 +125,6 @@ EXPORT_SYMBOL_GPL(hisi_uncore_pmu_identifier_attr_show); static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) { - if (!hisi_uncore_pmu_counter_valid(hisi_pmu, idx)) { - dev_err(hisi_pmu->dev, "Unsupported event index:%d!\n", idx); - return; - } - clear_bit(idx, hisi_pmu->pmu_events.used_mask); } diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 25b7cbe1f818..6a7f5491ef7d 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -78,7 +78,6 @@ struct hisi_pmu { u32 identifier; }; -int hisi_uncore_pmu_counter_valid(struct hisi_pmu *hisi_pmu, int idx); int hisi_uncore_pmu_get_event_idx(struct perf_event *event); void hisi_uncore_pmu_read(struct perf_event *event); int hisi_uncore_pmu_add(struct perf_event *event, int flags); -- cgit v1.2.3 From baff06c315a146a6943b4fcabb4fe4fa36167413 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:30 +0800 Subject: drivers/perf: hisi: Refactor code for more uncore PMUs On HiSilicon uncore PMU drivers, interrupt handling function and interrupt registration function are very similar in differents PMU modules. Let's refactor the frame. Two new callbacks are added for the HW accessors: * hisi_uncore_ops::get_int_status returns a bitmap of events which have overflowed and raised an interrupt * hisi_uncore_ops::clear_int_status clears the overflow status for a specific event These callback functions are used by a common IRQ handler, hisi_uncore_pmu_isr(). One more function hisi_uncore_pmu_init_irq() is added to replace each PMU initialization IRQ interface and simplify the code. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-3-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 59 ++++----------------------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 59 ++++----------------------- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 59 ++++----------------------- drivers/perf/hisilicon/hisi_uncore_pmu.c | 54 ++++++++++++++++++++++++ drivers/perf/hisilicon/hisi_uncore_pmu.h | 6 ++- 5 files changed, 80 insertions(+), 157 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 38344b49d7af..7f7827cd54d7 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "hisi_uncore_pmu.h" @@ -165,60 +164,14 @@ static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_INT_MASK); } -static irqreturn_t hisi_ddrc_pmu_isr(int irq, void *dev_id) +static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu) { - struct hisi_pmu *ddrc_pmu = dev_id; - struct perf_event *event; - unsigned long overflown; - int idx; - - /* Read the DDRC_INT_STATUS register */ - overflown = readl(ddrc_pmu->base + DDRC_INT_STATUS); - if (!overflown) - return IRQ_NONE; - - /* - * Find the counter index which overflowed if the bit was set - * and handle it - */ - for_each_set_bit(idx, &overflown, DDRC_NR_COUNTERS) { - /* Write 1 to clear the IRQ status flag */ - writel((1 << idx), ddrc_pmu->base + DDRC_INT_CLEAR); - - /* Get the corresponding event struct */ - event = ddrc_pmu->pmu_events.hw_events[idx]; - if (!event) - continue; - - hisi_uncore_pmu_event_update(event); - hisi_uncore_pmu_set_event_period(event); - } - - return IRQ_HANDLED; + return readl(ddrc_pmu->base + DDRC_INT_STATUS); } -static int hisi_ddrc_pmu_init_irq(struct hisi_pmu *ddrc_pmu, - struct platform_device *pdev) +static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, int idx) { - int irq, ret; - - /* Read and init IRQ */ - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, hisi_ddrc_pmu_isr, - IRQF_NOBALANCING | IRQF_NO_THREAD, - dev_name(&pdev->dev), ddrc_pmu); - if (ret < 0) { - dev_err(&pdev->dev, - "Fail to request IRQ:%d ret:%d\n", irq, ret); - return ret; - } - - ddrc_pmu->irq = irq; - - return 0; + writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); } static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { @@ -328,6 +281,8 @@ static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, .write_counter = hisi_ddrc_pmu_write_counter, .read_counter = hisi_ddrc_pmu_read_counter, + .get_int_status = hisi_ddrc_pmu_get_int_status, + .clear_int_status = hisi_ddrc_pmu_clear_int_status, }; static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, @@ -339,7 +294,7 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - ret = hisi_ddrc_pmu_init_irq(ddrc_pmu, pdev); + ret = hisi_uncore_pmu_init_irq(ddrc_pmu, pdev); if (ret) return ret; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index a4b8c7daefa6..667eebddcc82 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "hisi_uncore_pmu.h" @@ -155,60 +154,14 @@ static void hisi_hha_pmu_disable_counter_int(struct hisi_pmu *hha_pmu, writel(val, hha_pmu->base + HHA_INT_MASK); } -static irqreturn_t hisi_hha_pmu_isr(int irq, void *dev_id) +static u32 hisi_hha_pmu_get_int_status(struct hisi_pmu *hha_pmu) { - struct hisi_pmu *hha_pmu = dev_id; - struct perf_event *event; - unsigned long overflown; - int idx; - - /* Read HHA_INT_STATUS register */ - overflown = readl(hha_pmu->base + HHA_INT_STATUS); - if (!overflown) - return IRQ_NONE; - - /* - * Find the counter index which overflowed if the bit was set - * and handle it - */ - for_each_set_bit(idx, &overflown, HHA_NR_COUNTERS) { - /* Write 1 to clear the IRQ status flag */ - writel((1 << idx), hha_pmu->base + HHA_INT_CLEAR); - - /* Get the corresponding event struct */ - event = hha_pmu->pmu_events.hw_events[idx]; - if (!event) - continue; - - hisi_uncore_pmu_event_update(event); - hisi_uncore_pmu_set_event_period(event); - } - - return IRQ_HANDLED; + return readl(hha_pmu->base + HHA_INT_STATUS); } -static int hisi_hha_pmu_init_irq(struct hisi_pmu *hha_pmu, - struct platform_device *pdev) +static void hisi_hha_pmu_clear_int_status(struct hisi_pmu *hha_pmu, int idx) { - int irq, ret; - - /* Read and init IRQ */ - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, hisi_hha_pmu_isr, - IRQF_NOBALANCING | IRQF_NO_THREAD, - dev_name(&pdev->dev), hha_pmu); - if (ret < 0) { - dev_err(&pdev->dev, - "Fail to request IRQ:%d ret:%d\n", irq, ret); - return ret; - } - - hha_pmu->irq = irq; - - return 0; + writel(1 << idx, hha_pmu->base + HHA_INT_CLEAR); } static const struct acpi_device_id hisi_hha_pmu_acpi_match[] = { @@ -340,6 +293,8 @@ static const struct hisi_uncore_ops hisi_uncore_hha_ops = { .disable_counter_int = hisi_hha_pmu_disable_counter_int, .write_counter = hisi_hha_pmu_write_counter, .read_counter = hisi_hha_pmu_read_counter, + .get_int_status = hisi_hha_pmu_get_int_status, + .clear_int_status = hisi_hha_pmu_clear_int_status, }; static int hisi_hha_pmu_dev_probe(struct platform_device *pdev, @@ -351,7 +306,7 @@ static int hisi_hha_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - ret = hisi_hha_pmu_init_irq(hha_pmu, pdev); + ret = hisi_uncore_pmu_init_irq(hha_pmu, pdev); if (ret) return ret; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index f73be6d76abc..831622e0c445 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "hisi_uncore_pmu.h" @@ -154,60 +153,14 @@ static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu, writel(val, l3c_pmu->base + L3C_INT_MASK); } -static irqreturn_t hisi_l3c_pmu_isr(int irq, void *dev_id) +static u32 hisi_l3c_pmu_get_int_status(struct hisi_pmu *l3c_pmu) { - struct hisi_pmu *l3c_pmu = dev_id; - struct perf_event *event; - unsigned long overflown; - int idx; - - /* Read L3C_INT_STATUS register */ - overflown = readl(l3c_pmu->base + L3C_INT_STATUS); - if (!overflown) - return IRQ_NONE; - - /* - * Find the counter index which overflowed if the bit was set - * and handle it. - */ - for_each_set_bit(idx, &overflown, L3C_NR_COUNTERS) { - /* Write 1 to clear the IRQ status flag */ - writel((1 << idx), l3c_pmu->base + L3C_INT_CLEAR); - - /* Get the corresponding event struct */ - event = l3c_pmu->pmu_events.hw_events[idx]; - if (!event) - continue; - - hisi_uncore_pmu_event_update(event); - hisi_uncore_pmu_set_event_period(event); - } - - return IRQ_HANDLED; + return readl(l3c_pmu->base + L3C_INT_STATUS); } -static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu, - struct platform_device *pdev) +static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) { - int irq, ret; - - /* Read and init IRQ */ - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr, - IRQF_NOBALANCING | IRQF_NO_THREAD, - dev_name(&pdev->dev), l3c_pmu); - if (ret < 0) { - dev_err(&pdev->dev, - "Fail to request IRQ:%d ret:%d\n", irq, ret); - return ret; - } - - l3c_pmu->irq = irq; - - return 0; + writel(1 << idx, l3c_pmu->base + L3C_INT_CLEAR); } static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { @@ -330,6 +283,8 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .disable_counter_int = hisi_l3c_pmu_disable_counter_int, .write_counter = hisi_l3c_pmu_write_counter, .read_counter = hisi_l3c_pmu_read_counter, + .get_int_status = hisi_l3c_pmu_get_int_status, + .clear_int_status = hisi_l3c_pmu_clear_int_status, }; static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, @@ -341,7 +296,7 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - ret = hisi_l3c_pmu_init_irq(l3c_pmu, pdev); + ret = hisi_uncore_pmu_init_irq(l3c_pmu, pdev); if (ret) return ret; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 44b7553a3e42..c9d8e2ec499a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -128,6 +128,60 @@ static void hisi_uncore_pmu_clear_event_idx(struct hisi_pmu *hisi_pmu, int idx) clear_bit(idx, hisi_pmu->pmu_events.used_mask); } +static irqreturn_t hisi_uncore_pmu_isr(int irq, void *data) +{ + struct hisi_pmu *hisi_pmu = data; + struct perf_event *event; + unsigned long overflown; + int idx; + + overflown = hisi_pmu->ops->get_int_status(hisi_pmu); + if (!overflown) + return IRQ_NONE; + + /* + * Find the counter index which overflowed if the bit was set + * and handle it. + */ + for_each_set_bit(idx, &overflown, hisi_pmu->num_counters) { + /* Write 1 to clear the IRQ status flag */ + hisi_pmu->ops->clear_int_status(hisi_pmu, idx); + /* Get the corresponding event struct */ + event = hisi_pmu->pmu_events.hw_events[idx]; + if (!event) + continue; + + hisi_uncore_pmu_event_update(event); + hisi_uncore_pmu_set_event_period(event); + } + + return IRQ_HANDLED; +} + +int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, + struct platform_device *pdev) +{ + int irq, ret; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + ret = devm_request_irq(&pdev->dev, irq, hisi_uncore_pmu_isr, + IRQF_NOBALANCING | IRQF_NO_THREAD, + dev_name(&pdev->dev), hisi_pmu); + if (ret < 0) { + dev_err(&pdev->dev, + "Fail to request IRQ: %d ret: %d.\n", irq, ret); + return ret; + } + + hisi_pmu->irq = irq; + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_uncore_pmu_init_irq); + int hisi_uncore_pmu_event_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 6a7f5491ef7d..933020c99e3e 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #undef pr_fmt @@ -47,6 +48,8 @@ struct hisi_uncore_ops { void (*disable_counter_int)(struct hisi_pmu *, struct hw_perf_event *); void (*start_counters)(struct hisi_pmu *); void (*stop_counters)(struct hisi_pmu *); + u32 (*get_int_status)(struct hisi_pmu *hisi_pmu); + void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx); }; struct hisi_pmu_hwevents { @@ -101,6 +104,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node); ssize_t hisi_uncore_pmu_identifier_attr_show(struct device *dev, struct device_attribute *attr, char *page); - +int hisi_uncore_pmu_init_irq(struct hisi_pmu *hisi_pmu, + struct platform_device *pdev); #endif /* __HISI_UNCORE_PMU_H__ */ -- cgit v1.2.3 From 3da582df575c3b2910e09e0445c27c3ebc8096e5 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:31 +0800 Subject: drivers/perf: hisi: Add PMU version for uncore PMU drivers. For HiSilicon uncore PMU, more versions are supported and some variables shall be added suffix to distinguish the version which are prepared for the new drivers. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Acked-by: Mark Rutland Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-4-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 96 ++++++++++++++------------- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 27 ++++---- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 23 ++++--- 3 files changed, 75 insertions(+), 71 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 7f7827cd54d7..38a0622372c5 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -36,7 +36,8 @@ /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 -#define DDRC_PERF_CTRL_EN 0x2 +#define DDRC_V1_PERF_CTRL_EN 0x2 +#define DDRC_V1_NR_EVENTS 0x7 /* * For DDRC PMU, there are eight-events and every event has been mapped @@ -53,26 +54,26 @@ static const u32 ddrc_reg_off[] = { /* * Select the counter register offset using the counter index. - * In DDRC there are no programmable counter, the count - * is readed form the statistics counter register itself. + * In PMU v1, there are no programmable counter, the count + * is read form the statistics counter register itself. */ -static u32 hisi_ddrc_pmu_get_counter_offset(int cntr_idx) +static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx) { return ddrc_reg_off[cntr_idx]; } -static u64 hisi_ddrc_pmu_read_counter(struct hisi_pmu *ddrc_pmu, +static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { return readl(ddrc_pmu->base + - hisi_ddrc_pmu_get_counter_offset(hwc->idx)); + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); } -static void hisi_ddrc_pmu_write_counter(struct hisi_pmu *ddrc_pmu, +static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc, u64 val) { writel((u32)val, - ddrc_pmu->base + hisi_ddrc_pmu_get_counter_offset(hwc->idx)); + ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); } /* @@ -84,28 +85,28 @@ static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, { } -static void hisi_ddrc_pmu_start_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu) { u32 val; /* Set perf_enable in DDRC_PERF_CTRL to start event counting */ val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val |= DDRC_PERF_CTRL_EN; + val |= DDRC_V1_PERF_CTRL_EN; writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); } -static void hisi_ddrc_pmu_stop_counters(struct hisi_pmu *ddrc_pmu) +static void hisi_ddrc_pmu_v1_stop_counters(struct hisi_pmu *ddrc_pmu) { u32 val; /* Clear perf_enable in DDRC_PERF_CTRL to stop event counting */ val = readl(ddrc_pmu->base + DDRC_PERF_CTRL); - val &= ~DDRC_PERF_CTRL_EN; + val &= ~DDRC_V1_PERF_CTRL_EN; writel(val, ddrc_pmu->base + DDRC_PERF_CTRL); } -static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_v1_enable_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { u32 val; @@ -115,8 +116,8 @@ static void hisi_ddrc_pmu_enable_counter(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); } -static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_v1_disable_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { u32 val; @@ -126,7 +127,7 @@ static void hisi_ddrc_pmu_disable_counter(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_EVENT_CTRL); } -static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event) +static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) { struct hisi_pmu *ddrc_pmu = to_hisi_pmu(event->pmu); unsigned long *used_mask = ddrc_pmu->pmu_events.used_mask; @@ -142,8 +143,8 @@ static int hisi_ddrc_pmu_get_event_idx(struct perf_event *event) return idx; } -static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { u32 val; @@ -153,8 +154,8 @@ static void hisi_ddrc_pmu_enable_counter_int(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_INT_MASK); } -static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, - struct hw_perf_event *hwc) +static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) { u32 val; @@ -164,12 +165,13 @@ static void hisi_ddrc_pmu_disable_counter_int(struct hisi_pmu *ddrc_pmu, writel(val, ddrc_pmu->base + DDRC_INT_MASK); } -static u32 hisi_ddrc_pmu_get_int_status(struct hisi_pmu *ddrc_pmu) +static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu) { return readl(ddrc_pmu->base + DDRC_INT_STATUS); } -static void hisi_ddrc_pmu_clear_int_status(struct hisi_pmu *ddrc_pmu, int idx) +static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu, + int idx) { writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); } @@ -212,17 +214,17 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, return 0; } -static struct attribute *hisi_ddrc_pmu_format_attr[] = { +static struct attribute *hisi_ddrc_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-4"), NULL, }; -static const struct attribute_group hisi_ddrc_pmu_format_group = { +static const struct attribute_group hisi_ddrc_pmu_v1_format_group = { .name = "format", - .attrs = hisi_ddrc_pmu_format_attr, + .attrs = hisi_ddrc_pmu_v1_format_attr, }; -static struct attribute *hisi_ddrc_pmu_events_attr[] = { +static struct attribute *hisi_ddrc_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(flux_wr, 0x00), HISI_PMU_EVENT_ATTR(flux_rd, 0x01), HISI_PMU_EVENT_ATTR(flux_wcmd, 0x02), @@ -234,9 +236,9 @@ static struct attribute *hisi_ddrc_pmu_events_attr[] = { NULL, }; -static const struct attribute_group hisi_ddrc_pmu_events_group = { +static const struct attribute_group hisi_ddrc_pmu_v1_events_group = { .name = "events", - .attrs = hisi_ddrc_pmu_events_attr, + .attrs = hisi_ddrc_pmu_v1_events_attr, }; static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); @@ -262,27 +264,27 @@ static const struct attribute_group hisi_ddrc_pmu_identifier_group = { .attrs = hisi_ddrc_pmu_identifier_attrs, }; -static const struct attribute_group *hisi_ddrc_pmu_attr_groups[] = { - &hisi_ddrc_pmu_format_group, - &hisi_ddrc_pmu_events_group, +static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { + &hisi_ddrc_pmu_v1_format_group, + &hisi_ddrc_pmu_v1_events_group, &hisi_ddrc_pmu_cpumask_attr_group, &hisi_ddrc_pmu_identifier_group, NULL, }; -static const struct hisi_uncore_ops hisi_uncore_ddrc_ops = { +static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { .write_evtype = hisi_ddrc_pmu_write_evtype, - .get_event_idx = hisi_ddrc_pmu_get_event_idx, - .start_counters = hisi_ddrc_pmu_start_counters, - .stop_counters = hisi_ddrc_pmu_stop_counters, - .enable_counter = hisi_ddrc_pmu_enable_counter, - .disable_counter = hisi_ddrc_pmu_disable_counter, - .enable_counter_int = hisi_ddrc_pmu_enable_counter_int, - .disable_counter_int = hisi_ddrc_pmu_disable_counter_int, - .write_counter = hisi_ddrc_pmu_write_counter, - .read_counter = hisi_ddrc_pmu_read_counter, - .get_int_status = hisi_ddrc_pmu_get_int_status, - .clear_int_status = hisi_ddrc_pmu_clear_int_status, + .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx, + .start_counters = hisi_ddrc_pmu_v1_start_counters, + .stop_counters = hisi_ddrc_pmu_v1_stop_counters, + .enable_counter = hisi_ddrc_pmu_v1_enable_counter, + .disable_counter = hisi_ddrc_pmu_v1_disable_counter, + .enable_counter_int = hisi_ddrc_pmu_v1_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_v1_disable_counter_int, + .write_counter = hisi_ddrc_pmu_v1_write_counter, + .read_counter = hisi_ddrc_pmu_v1_read_counter, + .get_int_status = hisi_ddrc_pmu_v1_get_int_status, + .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status, }; static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, @@ -300,10 +302,10 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, ddrc_pmu->num_counters = DDRC_NR_COUNTERS; ddrc_pmu->counter_bits = 32; - ddrc_pmu->ops = &hisi_uncore_ddrc_ops; + ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; ddrc_pmu->dev = &pdev->dev; ddrc_pmu->on_cpu = -1; - ddrc_pmu->check_event = 7; + ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; return 0; } @@ -345,7 +347,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_ddrc_pmu_attr_groups, + .attr_groups = hisi_ddrc_pmu_v1_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 667eebddcc82..f779d70cf0bc 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -33,10 +33,11 @@ #define HHA_CNT0_LOWER 0x1F00 /* HHA has 16-counters */ -#define HHA_NR_COUNTERS 0x10 +#define HHA_V1_NR_COUNTERS 0x10 #define HHA_PERF_CTRL_EN 0x1 #define HHA_EVTYPE_NONE 0xff +#define HHA_V1_NR_EVENT 0x65 /* * Select the counter register offset using the counter index @@ -206,17 +207,17 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, return 0; } -static struct attribute *hisi_hha_pmu_format_attr[] = { +static struct attribute *hisi_hha_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL, }; -static const struct attribute_group hisi_hha_pmu_format_group = { +static const struct attribute_group hisi_hha_pmu_v1_format_group = { .name = "format", - .attrs = hisi_hha_pmu_format_attr, + .attrs = hisi_hha_pmu_v1_format_attr, }; -static struct attribute *hisi_hha_pmu_events_attr[] = { +static struct attribute *hisi_hha_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rx_ops_num, 0x00), HISI_PMU_EVENT_ATTR(rx_outer, 0x01), HISI_PMU_EVENT_ATTR(rx_sccl, 0x02), @@ -246,9 +247,9 @@ static struct attribute *hisi_hha_pmu_events_attr[] = { NULL, }; -static const struct attribute_group hisi_hha_pmu_events_group = { +static const struct attribute_group hisi_hha_pmu_v1_events_group = { .name = "events", - .attrs = hisi_hha_pmu_events_attr, + .attrs = hisi_hha_pmu_v1_events_attr, }; static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); @@ -274,9 +275,9 @@ static const struct attribute_group hisi_hha_pmu_identifier_group = { .attrs = hisi_hha_pmu_identifier_attrs, }; -static const struct attribute_group *hisi_hha_pmu_attr_groups[] = { - &hisi_hha_pmu_format_group, - &hisi_hha_pmu_events_group, +static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = { + &hisi_hha_pmu_v1_format_group, + &hisi_hha_pmu_v1_events_group, &hisi_hha_pmu_cpumask_attr_group, &hisi_hha_pmu_identifier_group, NULL, @@ -310,12 +311,12 @@ static int hisi_hha_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - hha_pmu->num_counters = HHA_NR_COUNTERS; + hha_pmu->num_counters = HHA_V1_NR_COUNTERS; hha_pmu->counter_bits = 48; hha_pmu->ops = &hisi_uncore_hha_ops; hha_pmu->dev = &pdev->dev; hha_pmu->on_cpu = -1; - hha_pmu->check_event = 0x65; + hha_pmu->check_event = HHA_V1_NR_EVENT; return 0; } @@ -357,7 +358,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_hha_pmu_attr_groups, + .attr_groups = hisi_hha_pmu_v1_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 831622e0c445..2b26386480dd 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -37,6 +37,7 @@ #define L3C_PERF_CTRL_EN 0x10000 #define L3C_EVTYPE_NONE 0xff +#define L3C_V1_NR_EVENTS 0x59 /* * Select the counter register offset using the counter index @@ -209,17 +210,17 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return 0; } -static struct attribute *hisi_l3c_pmu_format_attr[] = { +static struct attribute *hisi_l3c_pmu_v1_format_attr[] = { HISI_PMU_FORMAT_ATTR(event, "config:0-7"), NULL, }; -static const struct attribute_group hisi_l3c_pmu_format_group = { +static const struct attribute_group hisi_l3c_pmu_v1_format_group = { .name = "format", - .attrs = hisi_l3c_pmu_format_attr, + .attrs = hisi_l3c_pmu_v1_format_attr, }; -static struct attribute *hisi_l3c_pmu_events_attr[] = { +static struct attribute *hisi_l3c_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), HISI_PMU_EVENT_ATTR(rd_hit_cpipe, 0x02), @@ -236,9 +237,9 @@ static struct attribute *hisi_l3c_pmu_events_attr[] = { NULL, }; -static const struct attribute_group hisi_l3c_pmu_events_group = { +static const struct attribute_group hisi_l3c_pmu_v1_events_group = { .name = "events", - .attrs = hisi_l3c_pmu_events_attr, + .attrs = hisi_l3c_pmu_v1_events_attr, }; static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); @@ -264,9 +265,9 @@ static const struct attribute_group hisi_l3c_pmu_identifier_group = { .attrs = hisi_l3c_pmu_identifier_attrs, }; -static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = { - &hisi_l3c_pmu_format_group, - &hisi_l3c_pmu_events_group, +static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { + &hisi_l3c_pmu_v1_format_group, + &hisi_l3c_pmu_v1_events_group, &hisi_l3c_pmu_cpumask_attr_group, &hisi_l3c_pmu_identifier_group, NULL, @@ -305,7 +306,7 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, l3c_pmu->ops = &hisi_uncore_l3c_ops; l3c_pmu->dev = &pdev->dev; l3c_pmu->on_cpu = -1; - l3c_pmu->check_event = 0x59; + l3c_pmu->check_event = L3C_V1_NR_EVENTS; return 0; } @@ -347,7 +348,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_l3c_pmu_attr_groups, + .attr_groups = hisi_l3c_pmu_v1_attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; -- cgit v1.2.3 From 486a7f46b966a825484808d4edf53bbe02698fb3 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:32 +0800 Subject: drivers/perf: hisi: Add new functions for L3C PMU On HiSilicon Hip09 platform, some new functions are enhanced on L3C PMU: * tt_req: it is the abbreviation of tracetag request and allows user to count only read/write/atomic operations. tt_req is 3-bit and details are listed in the hisi-pmu document. $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5 * tt_core: it is the abbreviation of tracetag core and allows user to filter by core/thread within the cluster, it is a 8-bit bitmap that each bit represents the corresponding core/thread in this L3C. $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0xf/ sleep 5 * datasrc_cfg: it is the abbreviation of data source configuration and allows user to check where the data comes from, such as: from local DDR, cross-die DDR or cross-socket DDR. Its is 5-bit and represents different data source in the SoC. $# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0xe/ sleep 5 * datasrc_skt: it is the abbreviation of data source from another socket and is used in the multi-chips, if user wants to check the cross-socket datat source, it shall be added in perf command. Only one bit is used to control this. $# perf stat -a -e hisi_sccl3_l3c0/dat_access,datasrc_cfg=0x10,datasrc_skt=1/ sleep 5 Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-5-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 259 +++++++++++++++++++++++++-- drivers/perf/hisilicon/hisi_uncore_pmu.c | 8 +- drivers/perf/hisilicon/hisi_uncore_pmu.h | 11 ++ 3 files changed, 258 insertions(+), 20 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 2b26386480dd..bf9f7772cac9 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -23,12 +23,17 @@ #define L3C_INT_MASK 0x0800 #define L3C_INT_STATUS 0x0808 #define L3C_INT_CLEAR 0x080c +#define L3C_CORE_CTRL 0x1b04 +#define L3C_TRACETAG_CTRL 0x1b20 +#define L3C_DATSRC_TYPE 0x1b48 +#define L3C_DATSRC_CTRL 0x1bf0 #define L3C_EVENT_CTRL 0x1c00 #define L3C_VERSION 0x1cf0 #define L3C_EVENT_TYPE0 0x1d00 /* - * Each counter is 48-bits and [48:63] are reserved - * which are Read-As-Zero and Writes-Ignored. + * If the HW version only supports a 48-bit counter, then + * bits [63:48] are reserved, which are Read-As-Zero and + * Writes-Ignored. */ #define L3C_CNTR0_LOWER 0x1e00 @@ -36,8 +41,186 @@ #define L3C_NR_COUNTERS 0x8 #define L3C_PERF_CTRL_EN 0x10000 +#define L3C_TRACETAG_EN BIT(31) +#define L3C_TRACETAG_REQ_SHIFT 7 +#define L3C_TRACETAG_MARK_EN BIT(0) +#define L3C_TRACETAG_REQ_EN (L3C_TRACETAG_MARK_EN | BIT(2)) +#define L3C_TRACETAG_CORE_EN (L3C_TRACETAG_MARK_EN | BIT(3)) +#define L3C_CORE_EN BIT(20) +#define L3C_COER_NONE 0x0 +#define L3C_DATSRC_MASK 0xFF +#define L3C_DATSRC_SKT_EN BIT(23) +#define L3C_DATSRC_NONE 0x0 #define L3C_EVTYPE_NONE 0xff #define L3C_V1_NR_EVENTS 0x59 +#define L3C_V2_NR_EVENTS 0xFF + +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_core, config1, 7, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tt_req, config1, 10, 8); +HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_cfg, config1, 15, 11); +HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 16, 16); + +static void hisi_l3c_pmu_config_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 tt_req = hisi_get_tt_req(event); + + if (tt_req) { + u32 val; + + /* Set request-type for tracetag */ + val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val |= tt_req << L3C_TRACETAG_REQ_SHIFT; + val |= L3C_TRACETAG_REQ_EN; + writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + + /* Enable request-tracetag statistics */ + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val |= L3C_TRACETAG_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } +} + +static void hisi_l3c_pmu_clear_req_tracetag(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 tt_req = hisi_get_tt_req(event); + + if (tt_req) { + u32 val; + + /* Clear request-type */ + val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val &= ~(tt_req << L3C_TRACETAG_REQ_SHIFT); + val &= ~L3C_TRACETAG_REQ_EN; + writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + + /* Disable request-tracetag statistics */ + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val &= ~L3C_TRACETAG_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + } +} + +static void hisi_l3c_pmu_write_ds(struct perf_event *event, u32 ds_cfg) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u32 reg, reg_idx, shift, val; + int idx = hwc->idx; + + /* + * Select the appropriate datasource register(L3C_DATSRC_TYPE0/1). + * There are 2 datasource ctrl register for the 8 hardware counters. + * Datasrc is 8-bits and for the former 4 hardware counters, + * L3C_DATSRC_TYPE0 is chosen. For the latter 4 hardware counters, + * L3C_DATSRC_TYPE1 is chosen. + */ + reg = L3C_DATSRC_TYPE + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + val = readl(l3c_pmu->base + reg); + val &= ~(L3C_DATSRC_MASK << shift); + val |= ds_cfg << shift; + writel(val, l3c_pmu->base + reg); +} + +static void hisi_l3c_pmu_config_ds(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 ds_cfg = hisi_get_datasrc_cfg(event); + u32 ds_skt = hisi_get_datasrc_skt(event); + + if (ds_cfg) + hisi_l3c_pmu_write_ds(event, ds_cfg); + + if (ds_skt) { + u32 val; + + val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val |= L3C_DATSRC_SKT_EN; + writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + } +} + +static void hisi_l3c_pmu_clear_ds(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 ds_cfg = hisi_get_datasrc_cfg(event); + u32 ds_skt = hisi_get_datasrc_skt(event); + + if (ds_cfg) + hisi_l3c_pmu_write_ds(event, L3C_DATSRC_NONE); + + if (ds_skt) { + u32 val; + + val = readl(l3c_pmu->base + L3C_DATSRC_CTRL); + val &= ~L3C_DATSRC_SKT_EN; + writel(val, l3c_pmu->base + L3C_DATSRC_CTRL); + } +} + +static void hisi_l3c_pmu_config_core_tracetag(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 core = hisi_get_tt_core(event); + + if (core) { + u32 val; + + /* Config and enable core information */ + writel(core, l3c_pmu->base + L3C_CORE_CTRL); + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val |= L3C_CORE_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + + /* Enable core-tracetag statistics */ + val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val |= L3C_TRACETAG_CORE_EN; + writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + } +} + +static void hisi_l3c_pmu_clear_core_tracetag(struct perf_event *event) +{ + struct hisi_pmu *l3c_pmu = to_hisi_pmu(event->pmu); + u32 core = hisi_get_tt_core(event); + + if (core) { + u32 val; + + /* Clear core information */ + writel(L3C_COER_NONE, l3c_pmu->base + L3C_CORE_CTRL); + val = readl(l3c_pmu->base + L3C_PERF_CTRL); + val &= ~L3C_CORE_EN; + writel(val, l3c_pmu->base + L3C_PERF_CTRL); + + /* Disable core-tracetag statistics */ + val = readl(l3c_pmu->base + L3C_TRACETAG_CTRL); + val &= ~L3C_TRACETAG_CORE_EN; + writel(val, l3c_pmu->base + L3C_TRACETAG_CTRL); + } +} + +static void hisi_l3c_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_l3c_pmu_config_req_tracetag(event); + hisi_l3c_pmu_config_core_tracetag(event); + hisi_l3c_pmu_config_ds(event); + } +} + +static void hisi_l3c_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_l3c_pmu_clear_ds(event); + hisi_l3c_pmu_clear_core_tracetag(event); + hisi_l3c_pmu_clear_req_tracetag(event); + } +} /* * Select the counter register offset using the counter index @@ -50,14 +233,12 @@ static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx) static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc) { - /* Read 64-bits and the upper 16 bits are RAZ */ return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); } static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu, struct hw_perf_event *hwc, u64 val) { - /* Write 64-bits and the upper 16 bits are WI */ writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(hwc->idx)); } @@ -166,23 +347,14 @@ static void hisi_l3c_pmu_clear_int_status(struct hisi_pmu *l3c_pmu, int idx) static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = { { "HISI0213", }, - {}, + { "HISI0214", }, + {} }; MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match); static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { - unsigned long long id; - acpi_status status; - - status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), - "_UID", NULL, &id); - if (ACPI_FAILURE(status)) - return -EINVAL; - - l3c_pmu->index_id = id; - /* * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1]. @@ -220,6 +392,20 @@ static const struct attribute_group hisi_l3c_pmu_v1_format_group = { .attrs = hisi_l3c_pmu_v1_format_attr, }; +static struct attribute *hisi_l3c_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(tt_core, "config1:0-7"), + HISI_PMU_FORMAT_ATTR(tt_req, "config1:8-10"), + HISI_PMU_FORMAT_ATTR(datasrc_cfg, "config1:11-15"), + HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:16"), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_l3c_pmu_v2_format_attr, +}; + static struct attribute *hisi_l3c_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rd_cpipe, 0x00), HISI_PMU_EVENT_ATTR(wr_cpipe, 0x01), @@ -242,6 +428,19 @@ static const struct attribute_group hisi_l3c_pmu_v1_events_group = { .attrs = hisi_l3c_pmu_v1_events_attr, }; +static struct attribute *hisi_l3c_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(l3c_hit, 0x48), + HISI_PMU_EVENT_ATTR(cycles, 0x7f), + HISI_PMU_EVENT_ATTR(l3c_ref, 0xb8), + HISI_PMU_EVENT_ATTR(dat_access, 0xb9), + NULL +}; + +static const struct attribute_group hisi_l3c_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_l3c_pmu_v2_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = { @@ -273,6 +472,14 @@ static const struct attribute_group *hisi_l3c_pmu_v1_attr_groups[] = { NULL, }; +static const struct attribute_group *hisi_l3c_pmu_v2_attr_groups[] = { + &hisi_l3c_pmu_v2_format_group, + &hisi_l3c_pmu_v2_events_group, + &hisi_l3c_pmu_cpumask_attr_group, + &hisi_l3c_pmu_identifier_group, + NULL +}; + static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .write_evtype = hisi_l3c_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -286,6 +493,8 @@ static const struct hisi_uncore_ops hisi_uncore_l3c_ops = { .read_counter = hisi_l3c_pmu_read_counter, .get_int_status = hisi_l3c_pmu_get_int_status, .clear_int_status = hisi_l3c_pmu_clear_int_status, + .enable_filter = hisi_l3c_pmu_enable_filter, + .disable_filter = hisi_l3c_pmu_disable_filter, }; static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, @@ -301,12 +510,20 @@ static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; + if (l3c_pmu->identifier >= HISI_PMU_V2) { + l3c_pmu->counter_bits = 64; + l3c_pmu->check_event = L3C_V2_NR_EVENTS; + l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v2_attr_groups; + } else { + l3c_pmu->counter_bits = 48; + l3c_pmu->check_event = L3C_V1_NR_EVENTS; + l3c_pmu->pmu_events.attr_groups = hisi_l3c_pmu_v1_attr_groups; + } + l3c_pmu->num_counters = L3C_NR_COUNTERS; - l3c_pmu->counter_bits = 48; l3c_pmu->ops = &hisi_uncore_l3c_ops; l3c_pmu->dev = &pdev->dev; l3c_pmu->on_cpu = -1; - l3c_pmu->check_event = L3C_V1_NR_EVENTS; return 0; } @@ -334,8 +551,12 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) return ret; } + /* + * CCL_ID is used to identify the L3C in the same SCCL which was + * used _UID by mistake. + */ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u", - l3c_pmu->sccl_id, l3c_pmu->index_id); + l3c_pmu->sccl_id, l3c_pmu->ccl_id); l3c_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, @@ -348,7 +569,7 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_l3c_pmu_v1_attr_groups, + .attr_groups = l3c_pmu->pmu_events.attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index c9d8e2ec499a..13c68b5e39c4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -21,7 +21,7 @@ #include "hisi_uncore_pmu.h" #define HISI_GET_EVENTID(ev) (ev->hw.config_base & 0xff) -#define HISI_MAX_PERIOD(nr) (BIT_ULL(nr) - 1) +#define HISI_MAX_PERIOD(nr) (GENMASK_ULL((nr) - 1, 0)) /* * PMU format attributes @@ -245,6 +245,9 @@ static void hisi_uncore_pmu_enable_event(struct perf_event *event) hisi_pmu->ops->write_evtype(hisi_pmu, hwc->idx, HISI_GET_EVENTID(event)); + if (hisi_pmu->ops->enable_filter) + hisi_pmu->ops->enable_filter(event); + hisi_pmu->ops->enable_counter_int(hisi_pmu, hwc); hisi_pmu->ops->enable_counter(hisi_pmu, hwc); } @@ -259,6 +262,9 @@ static void hisi_uncore_pmu_disable_event(struct perf_event *event) hisi_pmu->ops->disable_counter(hisi_pmu, hwc); hisi_pmu->ops->disable_counter_int(hisi_pmu, hwc); + + if (hisi_pmu->ops->disable_filter) + hisi_pmu->ops->disable_filter(event); } void hisi_uncore_pmu_set_event_period(struct perf_event *event) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 933020c99e3e..1591dbc6a119 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -11,6 +11,7 @@ #ifndef __HISI_UNCORE_PMU_H__ #define __HISI_UNCORE_PMU_H__ +#include #include #include #include @@ -22,6 +23,7 @@ #undef pr_fmt #define pr_fmt(fmt) "hisi_pmu: " fmt +#define HISI_PMU_V2 0x30 #define HISI_MAX_COUNTERS 0x10 #define to_hisi_pmu(p) (container_of(p, struct hisi_pmu, pmu)) @@ -35,6 +37,12 @@ #define HISI_PMU_EVENT_ATTR(_name, _config) \ HISI_PMU_ATTR(_name, hisi_event_sysfs_show, (unsigned long)_config) +#define HISI_PMU_EVENT_ATTR_EXTRACTOR(name, config, hi, lo) \ + static inline u32 hisi_get_##name(struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(hi, lo), event->attr.config); \ + } + struct hisi_pmu; struct hisi_uncore_ops { @@ -50,11 +58,14 @@ struct hisi_uncore_ops { void (*stop_counters)(struct hisi_pmu *); u32 (*get_int_status)(struct hisi_pmu *hisi_pmu); void (*clear_int_status)(struct hisi_pmu *hisi_pmu, int idx); + void (*enable_filter)(struct perf_event *event); + void (*disable_filter)(struct perf_event *event); }; struct hisi_pmu_hwevents { struct perf_event *hw_events[HISI_MAX_COUNTERS]; DECLARE_BITMAP(used_mask, HISI_MAX_COUNTERS); + const struct attribute_group **attr_groups; }; /* Generic pmu struct for different pmu types */ -- cgit v1.2.3 From 932f6a99f9b0c6b7039a5e2ce961009a8dc8c07c Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:33 +0800 Subject: drivers/perf: hisi: Add new functions for HHA PMU On HiSilicon Hip09 platform, some new functions are also supported on HHA PMU. * tracetag_en: it is the abbreviation of tracetag enable and allows user to count events according to tt_req or tt_core set in L3C PMU. * datasrc_skt: it is the abbreviation of data source from another socket and it is used in the multi-chips. It's the same as L3C PMU. * srcid_cmd & srcid_msk: pair of the fields are used to filter statistics that come from the specific CCL/ICL by the configuration. These are the abbreviation of source ID command and mask. The source ID is 11-bit and detailed descriptions are documented in Documentation/admin-guide/perf/hisi-pmu.rst. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-6-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 203 +++++++++++++++++++++++++-- 1 file changed, 188 insertions(+), 15 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index f779d70cf0bc..0316fabe32f1 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -25,19 +25,136 @@ #define HHA_VERSION 0x1cf0 #define HHA_PERF_CTRL 0x1E00 #define HHA_EVENT_CTRL 0x1E04 +#define HHA_SRCID_CTRL 0x1E08 +#define HHA_DATSRC_CTRL 0x1BF0 #define HHA_EVENT_TYPE0 0x1E80 /* - * Each counter is 48-bits and [48:63] are reserved - * which are Read-As-Zero and Writes-Ignored. + * If the HW version only supports a 48-bit counter, then + * bits [63:48] are reserved, which are Read-As-Zero and + * Writes-Ignored. */ #define HHA_CNT0_LOWER 0x1F00 -/* HHA has 16-counters */ +/* HHA PMU v1 has 16 counters and v2 only has 8 counters */ #define HHA_V1_NR_COUNTERS 0x10 +#define HHA_V2_NR_COUNTERS 0x8 #define HHA_PERF_CTRL_EN 0x1 +#define HHA_TRACETAG_EN BIT(31) +#define HHA_SRCID_EN BIT(2) +#define HHA_SRCID_CMD_SHIFT 6 +#define HHA_SRCID_MSK_SHIFT 20 +#define HHA_SRCID_CMD GENMASK(16, 6) +#define HHA_SRCID_MSK GENMASK(30, 20) +#define HHA_DATSRC_SKT_EN BIT(23) #define HHA_EVTYPE_NONE 0xff #define HHA_V1_NR_EVENT 0x65 +#define HHA_V2_NR_EVENT 0xCE + +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 10, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 21, 11); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 22, 22); +HISI_PMU_EVENT_ATTR_EXTRACTOR(datasrc_skt, config1, 23, 23); + +static void hisi_hha_pmu_enable_tracetag(struct perf_event *event) +{ + struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(hha_pmu->base + HHA_SRCID_CTRL); + val |= HHA_TRACETAG_EN; + writel(val, hha_pmu->base + HHA_SRCID_CTRL); + } +} + +static void hisi_hha_pmu_clear_tracetag(struct perf_event *event) +{ + struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu); + u32 val; + + val = readl(hha_pmu->base + HHA_SRCID_CTRL); + val &= ~HHA_TRACETAG_EN; + writel(val, hha_pmu->base + HHA_SRCID_CTRL); +} + +static void hisi_hha_pmu_config_ds(struct perf_event *event) +{ + struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu); + u32 ds_skt = hisi_get_datasrc_skt(event); + + if (ds_skt) { + u32 val; + + val = readl(hha_pmu->base + HHA_DATSRC_CTRL); + val |= HHA_DATSRC_SKT_EN; + writel(ds_skt, hha_pmu->base + HHA_DATSRC_CTRL); + } +} + +static void hisi_hha_pmu_clear_ds(struct perf_event *event) +{ + struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu); + u32 ds_skt = hisi_get_datasrc_skt(event); + + if (ds_skt) { + u32 val; + + val = readl(hha_pmu->base + HHA_DATSRC_CTRL); + val &= ~HHA_DATSRC_SKT_EN; + writel(ds_skt, hha_pmu->base + HHA_DATSRC_CTRL); + } +} + +static void hisi_hha_pmu_config_srcid(struct perf_event *event) +{ + struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) { + u32 val, msk; + + msk = hisi_get_srcid_msk(event); + val = readl(hha_pmu->base + HHA_SRCID_CTRL); + val |= HHA_SRCID_EN | (cmd << HHA_SRCID_CMD_SHIFT) | + (msk << HHA_SRCID_MSK_SHIFT); + writel(val, hha_pmu->base + HHA_SRCID_CTRL); + } +} + +static void hisi_hha_pmu_disable_srcid(struct perf_event *event) +{ + struct hisi_pmu *hha_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) { + u32 val; + + val = readl(hha_pmu->base + HHA_SRCID_CTRL); + val &= ~(HHA_SRCID_EN | HHA_SRCID_MSK | HHA_SRCID_CMD); + writel(val, hha_pmu->base + HHA_SRCID_CTRL); + } +} + +static void hisi_hha_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_hha_pmu_enable_tracetag(event); + hisi_hha_pmu_config_ds(event); + hisi_hha_pmu_config_srcid(event); + } +} + +static void hisi_hha_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_hha_pmu_disable_srcid(event); + hisi_hha_pmu_clear_ds(event); + hisi_hha_pmu_clear_tracetag(event); + } +} /* * Select the counter register offset using the counter index @@ -167,7 +284,8 @@ static void hisi_hha_pmu_clear_int_status(struct hisi_pmu *hha_pmu, int idx) static const struct acpi_device_id hisi_hha_pmu_acpi_match[] = { { "HISI0243", }, - {}, + { "HISI0244", }, + {} }; MODULE_DEVICE_TABLE(acpi, hisi_hha_pmu_acpi_match); @@ -177,13 +295,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, unsigned long long id; acpi_status status; - status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), - "_UID", NULL, &id); - if (ACPI_FAILURE(status)) - return -EINVAL; - - hha_pmu->index_id = id; - /* * Use SCCL_ID and UID to identify the HHA PMU, while * SCCL_ID is in MPIDR[aff2]. @@ -193,6 +304,22 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, dev_err(&pdev->dev, "Can not read hha sccl-id!\n"); return -EINVAL; } + + /* + * Early versions of BIOS support _UID by mistake, so we support + * both "hisilicon, idx-id" as preference, if available. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", + &hha_pmu->index_id)) { + status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), + "_UID", NULL, &id); + if (ACPI_FAILURE(status)) { + dev_err(&pdev->dev, "Cannot read idx-id!\n"); + return -EINVAL; + } + + hha_pmu->index_id = id; + } /* HHA PMUs only share the same SCCL */ hha_pmu->ccl_id = -1; @@ -217,6 +344,20 @@ static const struct attribute_group hisi_hha_pmu_v1_format_group = { .attrs = hisi_hha_pmu_v1_format_attr, }; +static struct attribute *hisi_hha_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:0-10"), + HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:11-21"), + HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:22"), + HISI_PMU_FORMAT_ATTR(datasrc_skt, "config1:23"), + NULL +}; + +static const struct attribute_group hisi_hha_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_hha_pmu_v2_format_attr, +}; + static struct attribute *hisi_hha_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(rx_ops_num, 0x00), HISI_PMU_EVENT_ATTR(rx_outer, 0x01), @@ -252,6 +393,20 @@ static const struct attribute_group hisi_hha_pmu_v1_events_group = { .attrs = hisi_hha_pmu_v1_events_attr, }; +static struct attribute *hisi_hha_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_ops_num, 0x00), + HISI_PMU_EVENT_ATTR(rx_outer, 0x01), + HISI_PMU_EVENT_ATTR(rx_sccl, 0x02), + HISI_PMU_EVENT_ATTR(hha_retry, 0x2e), + HISI_PMU_EVENT_ATTR(cycles, 0x55), + NULL +}; + +static const struct attribute_group hisi_hha_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_hha_pmu_v2_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_hha_pmu_cpumask_attrs[] = { @@ -283,6 +438,14 @@ static const struct attribute_group *hisi_hha_pmu_v1_attr_groups[] = { NULL, }; +static const struct attribute_group *hisi_hha_pmu_v2_attr_groups[] = { + &hisi_hha_pmu_v2_format_group, + &hisi_hha_pmu_v2_events_group, + &hisi_hha_pmu_cpumask_attr_group, + &hisi_hha_pmu_identifier_group, + NULL +}; + static const struct hisi_uncore_ops hisi_uncore_hha_ops = { .write_evtype = hisi_hha_pmu_write_evtype, .get_event_idx = hisi_uncore_pmu_get_event_idx, @@ -296,6 +459,8 @@ static const struct hisi_uncore_ops hisi_uncore_hha_ops = { .read_counter = hisi_hha_pmu_read_counter, .get_int_status = hisi_hha_pmu_get_int_status, .clear_int_status = hisi_hha_pmu_clear_int_status, + .enable_filter = hisi_hha_pmu_enable_filter, + .disable_filter = hisi_hha_pmu_disable_filter, }; static int hisi_hha_pmu_dev_probe(struct platform_device *pdev, @@ -311,12 +476,20 @@ static int hisi_hha_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; - hha_pmu->num_counters = HHA_V1_NR_COUNTERS; - hha_pmu->counter_bits = 48; + if (hha_pmu->identifier >= HISI_PMU_V2) { + hha_pmu->counter_bits = 64; + hha_pmu->check_event = HHA_V2_NR_EVENT; + hha_pmu->pmu_events.attr_groups = hisi_hha_pmu_v2_attr_groups; + hha_pmu->num_counters = HHA_V2_NR_COUNTERS; + } else { + hha_pmu->counter_bits = 48; + hha_pmu->check_event = HHA_V1_NR_EVENT; + hha_pmu->pmu_events.attr_groups = hisi_hha_pmu_v1_attr_groups; + hha_pmu->num_counters = HHA_V1_NR_COUNTERS; + } hha_pmu->ops = &hisi_uncore_hha_ops; hha_pmu->dev = &pdev->dev; hha_pmu->on_cpu = -1; - hha_pmu->check_event = HHA_V1_NR_EVENT; return 0; } @@ -358,7 +531,7 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_hha_pmu_v1_attr_groups, + .attr_groups = hha_pmu->pmu_events.attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; -- cgit v1.2.3 From cce03e702c9f26a43b16c51bf03029911feab692 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:34 +0800 Subject: drivers/perf: hisi: Update DDRC PMU for programmable counter DDRC PMU's events are useful for performance profiling, but the events are limited and counter is fixed. On HiSilicon Hip09 platform, PMU counters are the programmable and more events are supported. Let's add the DDRC PMU v2 driver. Bandwidth events are exposed directly in driver and some more events will listed in JSON file later. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-7-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 207 ++++++++++++++++++++++++-- drivers/perf/hisilicon/hisi_uncore_pmu.h | 2 + 2 files changed, 196 insertions(+), 13 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 38a0622372c5..7c8a4bc21db4 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -18,7 +18,7 @@ #include "hisi_uncore_pmu.h" -/* DDRC register definition */ +/* DDRC register definition in v1 */ #define DDRC_PERF_CTRL 0x010 #define DDRC_FLUX_WR 0x380 #define DDRC_FLUX_RD 0x384 @@ -34,13 +34,24 @@ #define DDRC_INT_CLEAR 0x6d0 #define DDRC_VERSION 0x710 +/* DDRC register definition in v2 */ +#define DDRC_V2_INT_MASK 0x528 +#define DDRC_V2_INT_STATUS 0x52c +#define DDRC_V2_INT_CLEAR 0x530 +#define DDRC_V2_EVENT_CNT 0xe00 +#define DDRC_V2_EVENT_CTRL 0xe70 +#define DDRC_V2_EVENT_TYPE 0xe74 +#define DDRC_V2_PERF_CTRL 0xeA0 + /* DDRC has 8-counters */ #define DDRC_NR_COUNTERS 0x8 #define DDRC_V1_PERF_CTRL_EN 0x2 +#define DDRC_V2_PERF_CTRL_EN 0x1 #define DDRC_V1_NR_EVENTS 0x7 +#define DDRC_V2_NR_EVENTS 0x90 /* - * For DDRC PMU, there are eight-events and every event has been mapped + * For PMU v1, there are eight-events and every event has been mapped * to fixed-purpose counters which register offset is not consistent. * Therefore there is no write event type and we assume that event * code (0 to 7) is equal to counter index in PMU driver. @@ -62,6 +73,11 @@ static u32 hisi_ddrc_pmu_v1_get_counter_offset(int cntr_idx) return ddrc_reg_off[cntr_idx]; } +static u32 hisi_ddrc_pmu_v2_get_counter_offset(int cntr_idx) +{ + return DDRC_V2_EVENT_CNT + cntr_idx * 8; +} + static u64 hisi_ddrc_pmu_v1_read_counter(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { @@ -76,13 +92,34 @@ static void hisi_ddrc_pmu_v1_write_counter(struct hisi_pmu *ddrc_pmu, ddrc_pmu->base + hisi_ddrc_pmu_v1_get_counter_offset(hwc->idx)); } +static u64 hisi_ddrc_pmu_v2_read_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + return readq(ddrc_pmu->base + + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); +} + +static void hisi_ddrc_pmu_v2_write_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, + ddrc_pmu->base + hisi_ddrc_pmu_v2_get_counter_offset(hwc->idx)); +} + /* - * For DDRC PMU, event has been mapped to fixed-purpose counter by hardware, - * so there is no need to write event type. + * For DDRC PMU v1, event has been mapped to fixed-purpose counter by hardware, + * so there is no need to write event type, while it is programmable counter in + * PMU v2. */ static void hisi_ddrc_pmu_write_evtype(struct hisi_pmu *hha_pmu, int idx, u32 type) { + u32 offset; + + if (hha_pmu->identifier >= HISI_PMU_V2) { + offset = DDRC_V2_EVENT_TYPE + 4 * idx; + writel(type, hha_pmu->base + offset); + } } static void hisi_ddrc_pmu_v1_start_counters(struct hisi_pmu *ddrc_pmu) @@ -143,6 +180,49 @@ static int hisi_ddrc_pmu_v1_get_event_idx(struct perf_event *event) return idx; } +static int hisi_ddrc_pmu_v2_get_event_idx(struct perf_event *event) +{ + return hisi_uncore_pmu_get_event_idx(event); +} + +static void hisi_ddrc_pmu_v2_start_counters(struct hisi_pmu *ddrc_pmu) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val |= DDRC_V2_PERF_CTRL_EN; + writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); +} + +static void hisi_ddrc_pmu_v2_stop_counters(struct hisi_pmu *ddrc_pmu) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_PERF_CTRL); + val &= ~DDRC_V2_PERF_CTRL_EN; + writel(val, ddrc_pmu->base + DDRC_V2_PERF_CTRL); +} + +static void hisi_ddrc_pmu_v2_enable_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + val |= 1 << hwc->idx; + writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); +} + +static void hisi_ddrc_pmu_v2_disable_counter(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, ddrc_pmu->base + DDRC_V2_EVENT_CTRL); +} + static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, struct hw_perf_event *hwc) { @@ -150,7 +230,7 @@ static void hisi_ddrc_pmu_v1_enable_counter_int(struct hisi_pmu *ddrc_pmu, /* Write 0 to enable interrupt */ val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val &= ~(1 << GET_DDRC_EVENTID(hwc)); + val &= ~(1 << hwc->idx); writel(val, ddrc_pmu->base + DDRC_INT_MASK); } @@ -161,10 +241,30 @@ static void hisi_ddrc_pmu_v1_disable_counter_int(struct hisi_pmu *ddrc_pmu, /* Write 1 to mask interrupt */ val = readl(ddrc_pmu->base + DDRC_INT_MASK); - val |= (1 << GET_DDRC_EVENTID(hwc)); + val |= 1 << hwc->idx; writel(val, ddrc_pmu->base + DDRC_INT_MASK); } +static void hisi_ddrc_pmu_v2_enable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); + val &= ~(1 << hwc->idx); + writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); +} + +static void hisi_ddrc_pmu_v2_disable_counter_int(struct hisi_pmu *ddrc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(ddrc_pmu->base + DDRC_V2_INT_MASK); + val |= 1 << hwc->idx; + writel(val, ddrc_pmu->base + DDRC_V2_INT_MASK); +} + static u32 hisi_ddrc_pmu_v1_get_int_status(struct hisi_pmu *ddrc_pmu) { return readl(ddrc_pmu->base + DDRC_INT_STATUS); @@ -176,9 +276,21 @@ static void hisi_ddrc_pmu_v1_clear_int_status(struct hisi_pmu *ddrc_pmu, writel(1 << idx, ddrc_pmu->base + DDRC_INT_CLEAR); } +static u32 hisi_ddrc_pmu_v2_get_int_status(struct hisi_pmu *ddrc_pmu) +{ + return readl(ddrc_pmu->base + DDRC_V2_INT_STATUS); +} + +static void hisi_ddrc_pmu_v2_clear_int_status(struct hisi_pmu *ddrc_pmu, + int idx) +{ + writel(1 << idx, ddrc_pmu->base + DDRC_V2_INT_CLEAR); +} + static const struct acpi_device_id hisi_ddrc_pmu_acpi_match[] = { { "HISI0233", }, - {}, + { "HISI0234", }, + {} }; MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); @@ -210,6 +322,13 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, } ddrc_pmu->identifier = readl(ddrc_pmu->base + DDRC_VERSION); + if (ddrc_pmu->identifier >= HISI_PMU_V2) { + if (device_property_read_u32(&pdev->dev, "hisilicon,sub-id", + &ddrc_pmu->sub_id)) { + dev_err(&pdev->dev, "Can not read sub-id!\n"); + return -EINVAL; + } + } return 0; } @@ -224,6 +343,16 @@ static const struct attribute_group hisi_ddrc_pmu_v1_format_group = { .attrs = hisi_ddrc_pmu_v1_format_attr, }; +static struct attribute *hisi_ddrc_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + NULL +}; + +static const struct attribute_group hisi_ddrc_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_ddrc_pmu_v2_format_attr, +}; + static struct attribute *hisi_ddrc_pmu_v1_events_attr[] = { HISI_PMU_EVENT_ATTR(flux_wr, 0x00), HISI_PMU_EVENT_ATTR(flux_rd, 0x01), @@ -241,6 +370,18 @@ static const struct attribute_group hisi_ddrc_pmu_v1_events_group = { .attrs = hisi_ddrc_pmu_v1_events_attr, }; +static struct attribute *hisi_ddrc_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(cycles, 0x00), + HISI_PMU_EVENT_ATTR(flux_wr, 0x83), + HISI_PMU_EVENT_ATTR(flux_rd, 0x84), + NULL +}; + +static const struct attribute_group hisi_ddrc_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_ddrc_pmu_v2_events_attr, +}; + static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); static struct attribute *hisi_ddrc_pmu_cpumask_attrs[] = { @@ -272,6 +413,14 @@ static const struct attribute_group *hisi_ddrc_pmu_v1_attr_groups[] = { NULL, }; +static const struct attribute_group *hisi_ddrc_pmu_v2_attr_groups[] = { + &hisi_ddrc_pmu_v2_format_group, + &hisi_ddrc_pmu_v2_events_group, + &hisi_ddrc_pmu_cpumask_attr_group, + &hisi_ddrc_pmu_identifier_group, + NULL +}; + static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { .write_evtype = hisi_ddrc_pmu_write_evtype, .get_event_idx = hisi_ddrc_pmu_v1_get_event_idx, @@ -287,6 +436,21 @@ static const struct hisi_uncore_ops hisi_uncore_ddrc_v1_ops = { .clear_int_status = hisi_ddrc_pmu_v1_clear_int_status, }; +static const struct hisi_uncore_ops hisi_uncore_ddrc_v2_ops = { + .write_evtype = hisi_ddrc_pmu_write_evtype, + .get_event_idx = hisi_ddrc_pmu_v2_get_event_idx, + .start_counters = hisi_ddrc_pmu_v2_start_counters, + .stop_counters = hisi_ddrc_pmu_v2_stop_counters, + .enable_counter = hisi_ddrc_pmu_v2_enable_counter, + .disable_counter = hisi_ddrc_pmu_v2_disable_counter, + .enable_counter_int = hisi_ddrc_pmu_v2_enable_counter_int, + .disable_counter_int = hisi_ddrc_pmu_v2_disable_counter_int, + .write_counter = hisi_ddrc_pmu_v2_write_counter, + .read_counter = hisi_ddrc_pmu_v2_read_counter, + .get_int_status = hisi_ddrc_pmu_v2_get_int_status, + .clear_int_status = hisi_ddrc_pmu_v2_clear_int_status, +}; + static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { @@ -300,12 +464,21 @@ static int hisi_ddrc_pmu_dev_probe(struct platform_device *pdev, if (ret) return ret; + if (ddrc_pmu->identifier >= HISI_PMU_V2) { + ddrc_pmu->counter_bits = 48; + ddrc_pmu->check_event = DDRC_V2_NR_EVENTS; + ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v2_attr_groups; + ddrc_pmu->ops = &hisi_uncore_ddrc_v2_ops; + } else { + ddrc_pmu->counter_bits = 32; + ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; + ddrc_pmu->pmu_events.attr_groups = hisi_ddrc_pmu_v1_attr_groups; + ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; + } + ddrc_pmu->num_counters = DDRC_NR_COUNTERS; - ddrc_pmu->counter_bits = 32; - ddrc_pmu->ops = &hisi_uncore_ddrc_v1_ops; ddrc_pmu->dev = &pdev->dev; ddrc_pmu->on_cpu = -1; - ddrc_pmu->check_event = DDRC_V1_NR_EVENTS; return 0; } @@ -333,8 +506,16 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) return ret; } - name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_ddrc%u", - ddrc_pmu->sccl_id, ddrc_pmu->index_id); + if (ddrc_pmu->identifier >= HISI_PMU_V2) + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "hisi_sccl%u_ddrc%u_%u", + ddrc_pmu->sccl_id, ddrc_pmu->index_id, + ddrc_pmu->sub_id); + else + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, + "hisi_sccl%u_ddrc%u", ddrc_pmu->sccl_id, + ddrc_pmu->index_id); + ddrc_pmu->pmu = (struct pmu) { .name = name, .module = THIS_MODULE, @@ -347,7 +528,7 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev) .start = hisi_uncore_pmu_start, .stop = hisi_uncore_pmu_stop, .read = hisi_uncore_pmu_read, - .attr_groups = hisi_ddrc_pmu_v1_attr_groups, + .attr_groups = ddrc_pmu->pmu_events.attr_groups, .capabilities = PERF_PMU_CAP_NO_EXCLUDE, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h index 1591dbc6a119..ea9d89bbc1ea 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.h +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h @@ -85,6 +85,8 @@ struct hisi_pmu { void __iomem *base; /* the ID of the PMU modules */ u32 index_id; + /* For DDRC PMU v2: each DDRC has more than one DMC */ + u32 sub_id; int num_counters; int counter_bits; /* check event code range */ -- cgit v1.2.3 From 3bf30882c3c7b6e376d9d6d04082c9aa2d2ac30a Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:35 +0800 Subject: drivers/perf: hisi: Add support for HiSilicon SLLC PMU driver HiSilicon's Hip09 is comprised by multi-dies that can be connected by SLLC module (Skyros Link Layer Controller), its has separate PMU registers which the driver can program it freely and interrupt is supported to handle counter overflow. Let's support its driver under the framework of HiSilicon uncore PMU driver. SLLC PMU supports the following filter functions: * tracetag_en: allows user to count data according to tt_req or tt_core set in L3C PMU. * srcid_cmd & srcid_msk: allows user to filter statistics that come from specific CCL/ICL by configuration source ID. * tgtid_hi & tgtid_lo: it also supports event statistics that these operations will go to the CCL/ICL by configuration target ID or target ID range. It's the same as source ID with 11-bit width in the SoC. More introduction is added in documentation: Documentation/admin-guide/perf/hisi-pmu.rst Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-8-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/Makefile | 2 +- drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 530 ++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 3 files changed, 532 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index e8377061845f..6600a9d45dd8 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ - hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o + hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c new file mode 100644 index 000000000000..46be312fa126 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c @@ -0,0 +1,530 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon SLLC uncore Hardware event counters support + * + * Copyright (C) 2020 Hisilicon Limited + * Author: Shaokun Zhang + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* SLLC register definition */ +#define SLLC_INT_MASK 0x0814 +#define SLLC_INT_STATUS 0x0818 +#define SLLC_INT_CLEAR 0x081c +#define SLLC_PERF_CTRL 0x1c00 +#define SLLC_SRCID_CTRL 0x1c04 +#define SLLC_TGTID_CTRL 0x1c08 +#define SLLC_EVENT_CTRL 0x1c14 +#define SLLC_EVENT_TYPE0 0x1c18 +#define SLLC_VERSION 0x1cf0 +#define SLLC_EVENT_CNT0_L 0x1d00 + +#define SLLC_EVTYPE_MASK 0xff +#define SLLC_PERF_CTRL_EN BIT(0) +#define SLLC_FILT_EN BIT(1) +#define SLLC_TRACETAG_EN BIT(2) +#define SLLC_SRCID_EN BIT(4) +#define SLLC_SRCID_NONE 0x0 +#define SLLC_TGTID_EN BIT(5) +#define SLLC_TGTID_NONE 0x0 +#define SLLC_TGTID_MIN_SHIFT 1 +#define SLLC_TGTID_MAX_SHIFT 12 +#define SLLC_SRCID_CMD_SHIFT 1 +#define SLLC_SRCID_MSK_SHIFT 12 +#define SLLC_NR_EVENTS 0x80 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_min, config1, 10, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_max, config1, 21, 11); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); + +static bool tgtid_is_valid(u32 max, u32 min) +{ + return max > 0 && max >= min; +} + +static void hisi_sllc_pmu_enable_tracetag(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val |= SLLC_TRACETAG_EN | SLLC_FILT_EN; + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_disable_tracetag(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val &= ~(SLLC_TRACETAG_EN | SLLC_FILT_EN); + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_config_tgtid(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 min = hisi_get_tgtid_min(event); + u32 max = hisi_get_tgtid_max(event); + + if (tgtid_is_valid(max, min)) { + u32 val = (max << SLLC_TGTID_MAX_SHIFT) | (min << SLLC_TGTID_MIN_SHIFT); + + writel(val, sllc_pmu->base + SLLC_TGTID_CTRL); + /* Enable the tgtid */ + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val |= SLLC_TGTID_EN | SLLC_FILT_EN; + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_clear_tgtid(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 min = hisi_get_tgtid_min(event); + u32 max = hisi_get_tgtid_max(event); + + if (tgtid_is_valid(max, min)) { + u32 val; + + writel(SLLC_TGTID_NONE, sllc_pmu->base + SLLC_TGTID_CTRL); + /* Disable the tgtid */ + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val &= ~(SLLC_TGTID_EN | SLLC_FILT_EN); + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_config_srcid(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) { + u32 val, msk; + + msk = hisi_get_srcid_msk(event); + val = (cmd << SLLC_SRCID_CMD_SHIFT) | (msk << SLLC_SRCID_MSK_SHIFT); + writel(val, sllc_pmu->base + SLLC_SRCID_CTRL); + /* Enable the srcid */ + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val |= SLLC_SRCID_EN | SLLC_FILT_EN; + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_clear_srcid(struct perf_event *event) +{ + struct hisi_pmu *sllc_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) { + u32 val; + + writel(SLLC_SRCID_NONE, sllc_pmu->base + SLLC_SRCID_CTRL); + /* Disable the srcid */ + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val &= ~(SLLC_SRCID_EN | SLLC_FILT_EN); + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); + } +} + +static void hisi_sllc_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_sllc_pmu_enable_tracetag(event); + hisi_sllc_pmu_config_srcid(event); + hisi_sllc_pmu_config_tgtid(event); + } +} + +static void hisi_sllc_pmu_clear_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_sllc_pmu_disable_tracetag(event); + hisi_sllc_pmu_clear_srcid(event); + hisi_sllc_pmu_clear_tgtid(event); + } +} + +static u32 hisi_sllc_pmu_get_counter_offset(int idx) +{ + return (SLLC_EVENT_CNT0_L + idx * 8); +} + +static u64 hisi_sllc_pmu_read_counter(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + return readq(sllc_pmu->base + + hisi_sllc_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_sllc_pmu_write_counter(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, sllc_pmu->base + + hisi_sllc_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_sllc_pmu_write_evtype(struct hisi_pmu *sllc_pmu, int idx, + u32 type) +{ + u32 reg, reg_idx, shift, val; + + /* + * Select the appropriate event select register(SLLC_EVENT_TYPE0/1). + * There are 2 event select registers for the 8 hardware counters. + * Event code is 8-bits and for the former 4 hardware counters, + * SLLC_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, + * SLLC_EVENT_TYPE1 is chosen. + */ + reg = SLLC_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + /* Write event code to SLLC_EVENT_TYPEx Register */ + val = readl(sllc_pmu->base + reg); + val &= ~(SLLC_EVTYPE_MASK << shift); + val |= (type << shift); + writel(val, sllc_pmu->base + reg); +} + +static void hisi_sllc_pmu_start_counters(struct hisi_pmu *sllc_pmu) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val |= SLLC_PERF_CTRL_EN; + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); +} + +static void hisi_sllc_pmu_stop_counters(struct hisi_pmu *sllc_pmu) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_PERF_CTRL); + val &= ~(SLLC_PERF_CTRL_EN); + writel(val, sllc_pmu->base + SLLC_PERF_CTRL); +} + +static void hisi_sllc_pmu_enable_counter(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); + val |= 1 << hwc->idx; + writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); +} + +static void hisi_sllc_pmu_disable_counter(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, sllc_pmu->base + SLLC_EVENT_CTRL); +} + +static void hisi_sllc_pmu_enable_counter_int(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_INT_MASK); + /* Write 0 to enable interrupt */ + val &= ~(1 << hwc->idx); + writel(val, sllc_pmu->base + SLLC_INT_MASK); +} + +static void hisi_sllc_pmu_disable_counter_int(struct hisi_pmu *sllc_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + val = readl(sllc_pmu->base + SLLC_INT_MASK); + /* Write 1 to mask interrupt */ + val |= 1 << hwc->idx; + writel(val, sllc_pmu->base + SLLC_INT_MASK); +} + +static u32 hisi_sllc_pmu_get_int_status(struct hisi_pmu *sllc_pmu) +{ + return readl(sllc_pmu->base + SLLC_INT_STATUS); +} + +static void hisi_sllc_pmu_clear_int_status(struct hisi_pmu *sllc_pmu, int idx) +{ + writel(1 << idx, sllc_pmu->base + SLLC_INT_CLEAR); +} + +static const struct acpi_device_id hisi_sllc_pmu_acpi_match[] = { + { "HISI0263", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_sllc_pmu_acpi_match); + +static int hisi_sllc_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *sllc_pmu) +{ + /* + * Use the SCCL_ID and the index ID to identify the SLLC PMU, + * while SCCL_ID is from MPIDR_EL1 by CPU. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &sllc_pmu->sccl_id)) { + dev_err(&pdev->dev, "Cannot read sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", + &sllc_pmu->index_id)) { + dev_err(&pdev->dev, "Cannot read idx-id!\n"); + return -EINVAL; + } + + /* SLLC PMUs only share the same SCCL */ + sllc_pmu->ccl_id = -1; + + sllc_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(sllc_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for sllc_pmu resource.\n"); + return PTR_ERR(sllc_pmu->base); + } + + sllc_pmu->identifier = readl(sllc_pmu->base + SLLC_VERSION); + + return 0; +} + +static struct attribute *hisi_sllc_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(tgtid_min, "config1:0-10"), + HISI_PMU_FORMAT_ATTR(tgtid_max, "config1:11-21"), + HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:22-32"), + HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:33-43"), + HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:44"), + NULL +}; + +static const struct attribute_group hisi_sllc_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_sllc_pmu_v2_format_attr, +}; + +static struct attribute *hisi_sllc_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_req, 0x30), + HISI_PMU_EVENT_ATTR(rx_data, 0x31), + HISI_PMU_EVENT_ATTR(tx_req, 0x34), + HISI_PMU_EVENT_ATTR(tx_data, 0x35), + HISI_PMU_EVENT_ATTR(cycles, 0x09), + NULL +}; + +static const struct attribute_group hisi_sllc_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_sllc_pmu_v2_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_sllc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group hisi_sllc_pmu_cpumask_attr_group = { + .attrs = hisi_sllc_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_sllc_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_sllc_pmu_identifier_attrs[] = { + &hisi_sllc_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_sllc_pmu_identifier_group = { + .attrs = hisi_sllc_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_sllc_pmu_v2_attr_groups[] = { + &hisi_sllc_pmu_v2_format_group, + &hisi_sllc_pmu_v2_events_group, + &hisi_sllc_pmu_cpumask_attr_group, + &hisi_sllc_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_sllc_ops = { + .write_evtype = hisi_sllc_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_sllc_pmu_start_counters, + .stop_counters = hisi_sllc_pmu_stop_counters, + .enable_counter = hisi_sllc_pmu_enable_counter, + .disable_counter = hisi_sllc_pmu_disable_counter, + .enable_counter_int = hisi_sllc_pmu_enable_counter_int, + .disable_counter_int = hisi_sllc_pmu_disable_counter_int, + .write_counter = hisi_sllc_pmu_write_counter, + .read_counter = hisi_sllc_pmu_read_counter, + .get_int_status = hisi_sllc_pmu_get_int_status, + .clear_int_status = hisi_sllc_pmu_clear_int_status, + .enable_filter = hisi_sllc_pmu_enable_filter, + .disable_filter = hisi_sllc_pmu_clear_filter, +}; + +static int hisi_sllc_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *sllc_pmu) +{ + int ret; + + ret = hisi_sllc_pmu_init_data(pdev, sllc_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(sllc_pmu, pdev); + if (ret) + return ret; + + sllc_pmu->pmu_events.attr_groups = hisi_sllc_pmu_v2_attr_groups; + sllc_pmu->ops = &hisi_uncore_sllc_ops; + sllc_pmu->check_event = SLLC_NR_EVENTS; + sllc_pmu->counter_bits = 64; + sllc_pmu->num_counters = 8; + sllc_pmu->dev = &pdev->dev; + sllc_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_sllc_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *sllc_pmu; + char *name; + int ret; + + sllc_pmu = devm_kzalloc(&pdev->dev, sizeof(*sllc_pmu), GFP_KERNEL); + if (!sllc_pmu) + return -ENOMEM; + + ret = hisi_sllc_pmu_dev_probe(pdev, sllc_pmu); + if (ret) + return ret; + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_sllc%u", + sllc_pmu->sccl_id, sllc_pmu->index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + &sllc_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + return ret; + } + + sllc_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = sllc_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + ret = perf_pmu_register(&sllc_pmu->pmu, name, -1); + if (ret) { + dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + &sllc_pmu->node); + irq_set_affinity_hint(sllc_pmu->irq, NULL); + return ret; + } + + platform_set_drvdata(pdev, sllc_pmu); + + return ret; +} + +static int hisi_sllc_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *sllc_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&sllc_pmu->pmu); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + &sllc_pmu->node); + irq_set_affinity_hint(sllc_pmu->irq, NULL); + + return 0; +} + +static struct platform_driver hisi_sllc_pmu_driver = { + .driver = { + .name = "hisi_sllc_pmu", + .acpi_match_table = hisi_sllc_pmu_acpi_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_sllc_pmu_probe, + .remove = hisi_sllc_pmu_remove, +}; + +static int __init hisi_sllc_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, + "AP_PERF_ARM_HISI_SLLC_ONLINE", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret) { + pr_err("SLLC PMU: cpuhp state setup failed, ret = %d\n", ret); + return ret; + } + + ret = platform_driver_register(&hisi_sllc_pmu_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE); + + return ret; +} +module_init(hisi_sllc_pmu_module_init); + +static void __exit hisi_sllc_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_sllc_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE); +} +module_exit(hisi_sllc_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon SLLC uncore PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Shaokun Zhang "); +MODULE_AUTHOR("Qi Liu "); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index f14adb882338..5f5ce676532f 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -175,6 +175,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, -- cgit v1.2.3 From a0ab25cd82eeb68bfa19a4d93a097521af5011b8 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:36 +0800 Subject: drivers/perf: hisi: Add support for HiSilicon PA PMU driver On HiSilicon Hip09 platform, there is a PA (Protocol Adapter) module on each chip SICL (Super I/O Cluster) which incorporates three Hydra interface and facilitates the cache coherency between the dies on the chip. While PA uncore PMU model is the same as other Hip09 PMU modules and many PMU events are supported. Let's support the PMU driver using the HiSilicon uncore PMU framework. PA PMU supports the following filter functions: * tracetag_en: allows user to count events according to tt_req or tt_core set in L3C PMU. It's the same as other PMUs. * srcid_cmd & srcid_msk: allows user to filter statistics that come from specific CCL/ICL by configuration source ID. * tgtid_cmd & tgtid_msk: it is the similar function to srcid_cmd & srcid_msk. Both are used to check where the data comes from or go to. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-9-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/Makefile | 3 +- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 500 ++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 3 files changed, 503 insertions(+), 1 deletion(-) create mode 100644 drivers/perf/hisilicon/hisi_uncore_pa_pmu.c diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index 6600a9d45dd8..7643c9f93e36 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ - hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o + hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ + hisi_uncore_pa_pmu.o diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c new file mode 100644 index 000000000000..14f23eb31248 --- /dev/null +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -0,0 +1,500 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon PA uncore Hardware event counters support + * + * Copyright (C) 2020 HiSilicon Limited + * Author: Shaokun Zhang + * + * This code is based on the uncore PMUs like arm-cci and arm-ccn. + */ +#include +#include +#include +#include +#include +#include + +#include "hisi_uncore_pmu.h" + +/* PA register definition */ +#define PA_PERF_CTRL 0x1c00 +#define PA_EVENT_CTRL 0x1c04 +#define PA_TT_CTRL 0x1c08 +#define PA_TGTID_CTRL 0x1c14 +#define PA_SRCID_CTRL 0x1c18 +#define PA_INT_MASK 0x1c70 +#define PA_INT_STATUS 0x1c78 +#define PA_INT_CLEAR 0x1c7c +#define PA_EVENT_TYPE0 0x1c80 +#define PA_PMU_VERSION 0x1cf0 +#define PA_EVENT_CNT0_L 0x1f00 + +#define PA_EVTYPE_MASK 0xff +#define PA_NR_COUNTERS 0x8 +#define PA_PERF_CTRL_EN BIT(0) +#define PA_TRACETAG_EN BIT(4) +#define PA_TGTID_EN BIT(11) +#define PA_SRCID_EN BIT(11) +#define PA_TGTID_NONE 0 +#define PA_SRCID_NONE 0 +#define PA_TGTID_MSK_SHIFT 12 +#define PA_SRCID_MSK_SHIFT 12 + +HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_cmd, config1, 10, 0); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tgtid_msk, config1, 21, 11); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_cmd, config1, 32, 22); +HISI_PMU_EVENT_ATTR_EXTRACTOR(srcid_msk, config1, 43, 33); +HISI_PMU_EVENT_ATTR_EXTRACTOR(tracetag_en, config1, 44, 44); + +static void hisi_pa_pmu_enable_tracetag(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(pa_pmu->base + PA_TT_CTRL); + val |= PA_TRACETAG_EN; + writel(val, pa_pmu->base + PA_TT_CTRL); + } +} + +static void hisi_pa_pmu_clear_tracetag(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 tt_en = hisi_get_tracetag_en(event); + + if (tt_en) { + u32 val; + + val = readl(pa_pmu->base + PA_TT_CTRL); + val &= ~PA_TRACETAG_EN; + writel(val, pa_pmu->base + PA_TT_CTRL); + } +} + +static void hisi_pa_pmu_config_tgtid(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_tgtid_cmd(event); + + if (cmd) { + u32 msk = hisi_get_tgtid_msk(event); + u32 val = cmd | PA_TGTID_EN | (msk << PA_TGTID_MSK_SHIFT); + + writel(val, pa_pmu->base + PA_TGTID_CTRL); + } +} + +static void hisi_pa_pmu_clear_tgtid(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_tgtid_cmd(event); + + if (cmd) + writel(PA_TGTID_NONE, pa_pmu->base + PA_TGTID_CTRL); +} + +static void hisi_pa_pmu_config_srcid(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) { + u32 msk = hisi_get_srcid_msk(event); + u32 val = cmd | PA_SRCID_EN | (msk << PA_SRCID_MSK_SHIFT); + + writel(val, pa_pmu->base + PA_SRCID_CTRL); + } +} + +static void hisi_pa_pmu_clear_srcid(struct perf_event *event) +{ + struct hisi_pmu *pa_pmu = to_hisi_pmu(event->pmu); + u32 cmd = hisi_get_srcid_cmd(event); + + if (cmd) + writel(PA_SRCID_NONE, pa_pmu->base + PA_SRCID_CTRL); +} + +static void hisi_pa_pmu_enable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_pa_pmu_enable_tracetag(event); + hisi_pa_pmu_config_srcid(event); + hisi_pa_pmu_config_tgtid(event); + } +} + +static void hisi_pa_pmu_disable_filter(struct perf_event *event) +{ + if (event->attr.config1 != 0x0) { + hisi_pa_pmu_clear_tgtid(event); + hisi_pa_pmu_clear_srcid(event); + hisi_pa_pmu_clear_tracetag(event); + } +} + +static u32 hisi_pa_pmu_get_counter_offset(int idx) +{ + return (PA_EVENT_CNT0_L + idx * 8); +} + +static u64 hisi_pa_pmu_read_counter(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + return readq(pa_pmu->base + hisi_pa_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_pa_pmu_write_counter(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc, u64 val) +{ + writeq(val, pa_pmu->base + hisi_pa_pmu_get_counter_offset(hwc->idx)); +} + +static void hisi_pa_pmu_write_evtype(struct hisi_pmu *pa_pmu, int idx, + u32 type) +{ + u32 reg, reg_idx, shift, val; + + /* + * Select the appropriate event select register(PA_EVENT_TYPE0/1). + * There are 2 event select registers for the 8 hardware counters. + * Event code is 8-bits and for the former 4 hardware counters, + * PA_EVENT_TYPE0 is chosen. For the latter 4 hardware counters, + * PA_EVENT_TYPE1 is chosen. + */ + reg = PA_EVENT_TYPE0 + (idx / 4) * 4; + reg_idx = idx % 4; + shift = 8 * reg_idx; + + /* Write event code to pa_EVENT_TYPEx Register */ + val = readl(pa_pmu->base + reg); + val &= ~(PA_EVTYPE_MASK << shift); + val |= (type << shift); + writel(val, pa_pmu->base + reg); +} + +static void hisi_pa_pmu_start_counters(struct hisi_pmu *pa_pmu) +{ + u32 val; + + val = readl(pa_pmu->base + PA_PERF_CTRL); + val |= PA_PERF_CTRL_EN; + writel(val, pa_pmu->base + PA_PERF_CTRL); +} + +static void hisi_pa_pmu_stop_counters(struct hisi_pmu *pa_pmu) +{ + u32 val; + + val = readl(pa_pmu->base + PA_PERF_CTRL); + val &= ~(PA_PERF_CTRL_EN); + writel(val, pa_pmu->base + PA_PERF_CTRL); +} + +static void hisi_pa_pmu_enable_counter(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Enable counter index in PA_EVENT_CTRL register */ + val = readl(pa_pmu->base + PA_EVENT_CTRL); + val |= 1 << hwc->idx; + writel(val, pa_pmu->base + PA_EVENT_CTRL); +} + +static void hisi_pa_pmu_disable_counter(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Clear counter index in PA_EVENT_CTRL register */ + val = readl(pa_pmu->base + PA_EVENT_CTRL); + val &= ~(1 << hwc->idx); + writel(val, pa_pmu->base + PA_EVENT_CTRL); +} + +static void hisi_pa_pmu_enable_counter_int(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 0 to enable interrupt */ + val = readl(pa_pmu->base + PA_INT_MASK); + val &= ~(1 << hwc->idx); + writel(val, pa_pmu->base + PA_INT_MASK); +} + +static void hisi_pa_pmu_disable_counter_int(struct hisi_pmu *pa_pmu, + struct hw_perf_event *hwc) +{ + u32 val; + + /* Write 1 to mask interrupt */ + val = readl(pa_pmu->base + PA_INT_MASK); + val |= 1 << hwc->idx; + writel(val, pa_pmu->base + PA_INT_MASK); +} + +static u32 hisi_pa_pmu_get_int_status(struct hisi_pmu *pa_pmu) +{ + return readl(pa_pmu->base + PA_INT_STATUS); +} + +static void hisi_pa_pmu_clear_int_status(struct hisi_pmu *pa_pmu, int idx) +{ + writel(1 << idx, pa_pmu->base + PA_INT_CLEAR); +} + +static const struct acpi_device_id hisi_pa_pmu_acpi_match[] = { + { "HISI0273", }, + {} +}; +MODULE_DEVICE_TABLE(acpi, hisi_pa_pmu_acpi_match); + +static int hisi_pa_pmu_init_data(struct platform_device *pdev, + struct hisi_pmu *pa_pmu) +{ + /* + * Use the SCCL_ID and the index ID to identify the PA PMU, + * while SCCL_ID is the nearst SCCL_ID from this SICL and + * CPU core is chosen from this SCCL to manage this PMU. + */ + if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id", + &pa_pmu->sccl_id)) { + dev_err(&pdev->dev, "Cannot read sccl-id!\n"); + return -EINVAL; + } + + if (device_property_read_u32(&pdev->dev, "hisilicon,idx-id", + &pa_pmu->index_id)) { + dev_err(&pdev->dev, "Cannot read idx-id!\n"); + return -EINVAL; + } + + pa_pmu->ccl_id = -1; + + pa_pmu->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(pa_pmu->base)) { + dev_err(&pdev->dev, "ioremap failed for pa_pmu resource.\n"); + return PTR_ERR(pa_pmu->base); + } + + pa_pmu->identifier = readl(pa_pmu->base + PA_PMU_VERSION); + + return 0; +} + +static struct attribute *hisi_pa_pmu_v2_format_attr[] = { + HISI_PMU_FORMAT_ATTR(event, "config:0-7"), + HISI_PMU_FORMAT_ATTR(tgtid_cmd, "config1:0-10"), + HISI_PMU_FORMAT_ATTR(tgtid_msk, "config1:11-21"), + HISI_PMU_FORMAT_ATTR(srcid_cmd, "config1:22-32"), + HISI_PMU_FORMAT_ATTR(srcid_msk, "config1:33-43"), + HISI_PMU_FORMAT_ATTR(tracetag_en, "config1:44"), + NULL, +}; + +static const struct attribute_group hisi_pa_pmu_v2_format_group = { + .name = "format", + .attrs = hisi_pa_pmu_v2_format_attr, +}; + +static struct attribute *hisi_pa_pmu_v2_events_attr[] = { + HISI_PMU_EVENT_ATTR(rx_req, 0x40), + HISI_PMU_EVENT_ATTR(tx_req, 0x5c), + HISI_PMU_EVENT_ATTR(cycle, 0x78), + NULL +}; + +static const struct attribute_group hisi_pa_pmu_v2_events_group = { + .name = "events", + .attrs = hisi_pa_pmu_v2_events_attr, +}; + +static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL); + +static struct attribute *hisi_pa_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static const struct attribute_group hisi_pa_pmu_cpumask_attr_group = { + .attrs = hisi_pa_pmu_cpumask_attrs, +}; + +static struct device_attribute hisi_pa_pmu_identifier_attr = + __ATTR(identifier, 0444, hisi_uncore_pmu_identifier_attr_show, NULL); + +static struct attribute *hisi_pa_pmu_identifier_attrs[] = { + &hisi_pa_pmu_identifier_attr.attr, + NULL +}; + +static struct attribute_group hisi_pa_pmu_identifier_group = { + .attrs = hisi_pa_pmu_identifier_attrs, +}; + +static const struct attribute_group *hisi_pa_pmu_v2_attr_groups[] = { + &hisi_pa_pmu_v2_format_group, + &hisi_pa_pmu_v2_events_group, + &hisi_pa_pmu_cpumask_attr_group, + &hisi_pa_pmu_identifier_group, + NULL +}; + +static const struct hisi_uncore_ops hisi_uncore_pa_ops = { + .write_evtype = hisi_pa_pmu_write_evtype, + .get_event_idx = hisi_uncore_pmu_get_event_idx, + .start_counters = hisi_pa_pmu_start_counters, + .stop_counters = hisi_pa_pmu_stop_counters, + .enable_counter = hisi_pa_pmu_enable_counter, + .disable_counter = hisi_pa_pmu_disable_counter, + .enable_counter_int = hisi_pa_pmu_enable_counter_int, + .disable_counter_int = hisi_pa_pmu_disable_counter_int, + .write_counter = hisi_pa_pmu_write_counter, + .read_counter = hisi_pa_pmu_read_counter, + .get_int_status = hisi_pa_pmu_get_int_status, + .clear_int_status = hisi_pa_pmu_clear_int_status, + .enable_filter = hisi_pa_pmu_enable_filter, + .disable_filter = hisi_pa_pmu_disable_filter, +}; + +static int hisi_pa_pmu_dev_probe(struct platform_device *pdev, + struct hisi_pmu *pa_pmu) +{ + int ret; + + ret = hisi_pa_pmu_init_data(pdev, pa_pmu); + if (ret) + return ret; + + ret = hisi_uncore_pmu_init_irq(pa_pmu, pdev); + if (ret) + return ret; + + pa_pmu->pmu_events.attr_groups = hisi_pa_pmu_v2_attr_groups; + pa_pmu->num_counters = PA_NR_COUNTERS; + pa_pmu->ops = &hisi_uncore_pa_ops; + pa_pmu->check_event = 0xB0; + pa_pmu->counter_bits = 64; + pa_pmu->dev = &pdev->dev; + pa_pmu->on_cpu = -1; + + return 0; +} + +static int hisi_pa_pmu_probe(struct platform_device *pdev) +{ + struct hisi_pmu *pa_pmu; + char *name; + int ret; + + pa_pmu = devm_kzalloc(&pdev->dev, sizeof(*pa_pmu), GFP_KERNEL); + if (!pa_pmu) + return -ENOMEM; + + ret = hisi_pa_pmu_dev_probe(pdev, pa_pmu); + if (ret) + return ret; + /* + * PA is attached in SICL and the CPU core is chosen to manage this + * PMU which is the nearest SCCL, while its SCCL_ID is greater than + * one with the SICL_ID. + */ + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sicl%u_pa%u", + pa_pmu->sccl_id - 1, pa_pmu->index_id); + if (!name) + return -ENOMEM; + + ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + &pa_pmu->node); + if (ret) { + dev_err(&pdev->dev, "Error %d registering hotplug\n", ret); + return ret; + } + + pa_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, + .task_ctx_nr = perf_invalid_context, + .event_init = hisi_uncore_pmu_event_init, + .pmu_enable = hisi_uncore_pmu_enable, + .pmu_disable = hisi_uncore_pmu_disable, + .add = hisi_uncore_pmu_add, + .del = hisi_uncore_pmu_del, + .start = hisi_uncore_pmu_start, + .stop = hisi_uncore_pmu_stop, + .read = hisi_uncore_pmu_read, + .attr_groups = pa_pmu->pmu_events.attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + ret = perf_pmu_register(&pa_pmu->pmu, name, -1); + if (ret) { + dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + &pa_pmu->node); + irq_set_affinity_hint(pa_pmu->irq, NULL); + return ret; + } + + platform_set_drvdata(pdev, pa_pmu); + return ret; +} + +static int hisi_pa_pmu_remove(struct platform_device *pdev) +{ + struct hisi_pmu *pa_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&pa_pmu->pmu); + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + &pa_pmu->node); + irq_set_affinity_hint(pa_pmu->irq, NULL); + + return 0; +} + +static struct platform_driver hisi_pa_pmu_driver = { + .driver = { + .name = "hisi_pa_pmu", + .acpi_match_table = hisi_pa_pmu_acpi_match, + .suppress_bind_attrs = true, + }, + .probe = hisi_pa_pmu_probe, + .remove = hisi_pa_pmu_remove, +}; + +static int __init hisi_pa_pmu_module_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, + "AP_PERF_ARM_HISI_PA_ONLINE", + hisi_uncore_pmu_online_cpu, + hisi_uncore_pmu_offline_cpu); + if (ret) { + pr_err("PA PMU: cpuhp state setup failed, ret = %d\n", ret); + return ret; + } + + ret = platform_driver_register(&hisi_pa_pmu_driver); + if (ret) + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE); + + return ret; +} +module_init(hisi_pa_pmu_module_init); + +static void __exit hisi_pa_pmu_module_exit(void) +{ + platform_driver_unregister(&hisi_pa_pmu_driver); + cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE); +} +module_exit(hisi_pa_pmu_module_exit); + +MODULE_DESCRIPTION("HiSilicon Protocol Adapter uncore PMU driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Shaokun Zhang "); +MODULE_AUTHOR("Qi Liu "); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5f5ce676532f..8ef744007e38 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -175,6 +175,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, + CPUHP_AP_PERF_ARM_HISI_PA_ONLINE, CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, -- cgit v1.2.3 From 9b86b1b41e0f48b5b25918e07aeceb00e13d1ce2 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 8 Mar 2021 14:50:37 +0800 Subject: docs: perf: Add new description on HiSilicon uncore PMU v2 Some news functions are added on HiSilicon uncore PMUs. Document them to provide guidance on how to use them. Cc: Mark Rutland Cc: Will Deacon Cc: John Garry Cc: Jonathan Cameron Reviewed-by: John Garry Co-developed-by: Qi Liu Signed-off-by: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1615186237-22263-10-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/hisi-pmu.rst | 49 +++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index 404a5c3d9d00..3b3120e2dd9e 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -53,6 +53,55 @@ Example usage of perf:: $# perf stat -a -e hisi_sccl3_l3c0/rd_hit_cpipe/ sleep 5 $# perf stat -a -e hisi_sccl3_l3c0/config=0x02/ sleep 5 +For HiSilicon uncore PMU v2 whose identifier is 0x30, the topology is the same +as PMU v1, but some new functions are added to the hardware. + +(a) L3C PMU supports filtering by core/thread within the cluster which can be +specified as a bitmap. + $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0x3/ sleep 5 +This will only count the operations from core/thread 0 and 1 in this cluster. + +(b) Tracetag allow the user to chose to count only read, write or atomic +operations via the tt_req parameeter in perf. The default value counts all +operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101 +represents write operations, 3'b110 represents atomic store operations and +3'b111 represents atomic non-store operations, other values are reserved. + $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5 +This will only count the read operations in this cluster. + +(c) Datasrc allows the user to check where the data comes from. It is 5 bits. +Some important codes are as follows: +5'b00001: comes from L3C in this die; +5'b01000: comes from L3C in the cross-die; +5'b01001: comes from L3C which is in another socket; +5'b01110: comes from the local DDR; +5'b01111: comes from the cross-die DDR; +5'b10000: comes from cross-socket DDR; +etc, it is mainly helpful to find that the data source is nearest from the CPU +cores. If datasrc_cfg is used in the multi-chips, the datasrc_skt shall be +configured in perf command. + $# perf stat -a -e hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xE/, + hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xF/ sleep 5 + +(d)Some HiSilicon SoCs encapsulate multiple CPU and IO dies. Each CPU die +contains several Compute Clusters (CCLs). The I/O dies are called Super I/O +clusters (SICL) containing multiple I/O clusters (ICLs). Each CCL/ICL in the +SoC has a unique ID. Each ID is 11bits, include a 6-bit SCCL-ID and 5-bit +CCL/ICL-ID. For I/O die, the ICL-ID is followed by: +5'b00000: I/O_MGMT_ICL; +5'b00001: Network_ICL; +5'b00011: HAC_ICL; +5'b10000: PCIe_ICL; + +Users could configure IDs to count data come from specific CCL/ICL, by setting +srcid_cmd & srcid_msk, and data desitined for specific CCL/ICL by setting +tgtid_cmd & tgtid_msk. A set bit in srcid_msk/tgtid_msk means the PMU will not +check the bit when matching against the srcid_cmd/tgtid_cmd. + +If all of these options are disabled, it can works by the default value that +doesn't distinguish the filter condition and ID information and will return +the total counter values in the PMU counters. + The current driver does not support sampling. So "perf record" is unsupported. Also attach to a task is unsupported as the events are all uncore. -- cgit v1.2.3 From 4f30ba1cce36d413c46097c1f3f96891c662a6d6 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 25 Mar 2021 15:13:04 +0100 Subject: arm64: barrier: Remove spec_bar() macro The spec_bar() macro was introduced in commit bd4fb6d270bc ("arm64: Add support for SB barrier and patch in over DSB; ISB sequences") as a way for C to insert a speculation barrier and was then used in one single place: set_fs(). Later on commit 3d2403fd10a1 ("arm64: uaccess: remove set_fs()") deleted set_fs() altogether and as noted in the commit on the new path the regular sb() assembly macro will be used. Delete the remnant. Cc: Mark Rutland Signed-off-by: Linus Walleij Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210325141304.1607595-1-linus.walleij@linaro.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index c3009b0e5239..bab29932d21b 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,10 +25,6 @@ #define psb_csync() asm volatile("hint #17" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") -#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \ - SB_BARRIER_INSN"nop\n", \ - ARM64_HAS_SB)) - #ifdef CONFIG_ARM64_PSEUDO_NMI #define pmr_sync() \ do { \ -- cgit v1.2.3 From 18107f8a2df6bf1c6cac8d0713f757f866d5af51 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 12 Mar 2021 17:38:10 +0000 Subject: arm64: Support execute-only permissions with Enhanced PAN Enhanced Privileged Access Never (EPAN) allows Privileged Access Never to be used with Execute-only mappings. Absence of such support was a reason for 24cecc377463 ("arm64: Revert support for execute-only user mappings"). Thus now it can be revisited and re-enabled. Cc: Kees Cook Signed-off-by: Vladimir Murzin Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210312173811.58284-2-vladimir.murzin@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 17 +++++++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/pgtable-prot.h | 5 +++-- arch/arm64/include/asm/pgtable.h | 31 +++++++++++++++++++++++-------- arch/arm64/include/asm/sysreg.h | 3 ++- arch/arm64/kernel/cpufeature.c | 12 ++++++++++++ arch/arm64/mm/fault.c | 18 +++++++++++++++++- mm/mmap.c | 6 ++++++ 8 files changed, 82 insertions(+), 13 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..c4d7bafcea87 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1058,6 +1058,9 @@ config SYS_SUPPORTS_HUGETLBFS config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_HAS_FILTER_PGPROT + def_bool y + config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 @@ -1681,6 +1684,20 @@ config ARM64_MTE endmenu +menu "ARMv8.7 architectural features" + +config ARM64_EPAN + bool "Enable support for Enhanced Privileged Access Never (EPAN)" + default y + depends on ARM64_PAN + help + Enhanced Privileged Access Never (EPAN) allows Privileged + Access Never to be used with Execute-only mappings. + + The feature is detected at runtime, and will remain disabled + if the cpu does not implement the feature. +endmenu + config ARM64_SVE bool "ARM Scalable Vector Extension support" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b77d997b173b..9e3ec4dd56d8 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -66,7 +66,8 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 #define ARM64_KVM_PROTECTED_MODE 60 +#define ARM64_HAS_EPAN 61 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 62 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 9a65fb528110..fab2f573f7a4 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -87,12 +87,13 @@ extern bool arm64_use_ng_mappings; #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) +#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_READONLY_EXEC +#define __P100 PAGE_EXECONLY #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -101,7 +102,7 @@ extern bool arm64_use_ng_mappings; #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC +#define __S100 PAGE_EXECONLY #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 47027796c2f9..0b10204e72fc 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -113,11 +113,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) +/* + * Execute-only user mappings do not have the PTE_USER bit set. All valid + * kernel mappings have the PTE_UXN bit set. + */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) - + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) /* * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been @@ -130,12 +131,14 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* - * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check). PROT_NONE mappings do not have the PTE_VALID bit - * set. + * p??_access_permitted() is true for valid user mappings (PTE_USER + * bit set, subject to the write permission check). For execute-only + * mappings, like PROT_EXEC with EPAN (both PTE_USER and PTE_UXN bits + * not set) must return false. PROT_NONE mappings do not have the + * PTE_VALID bit set. */ #define pte_access_permitted(pte, write) \ - (pte_valid_user(pte) && (!(write) || pte_write(pte))) + (((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) && (!(write) || pte_write(pte))) #define pmd_access_permitted(pmd, write) \ (pte_access_permitted(pmd_pte(pmd), (write))) #define pud_access_permitted(pud, write) \ @@ -995,6 +998,18 @@ static inline bool arch_wants_old_prefaulted_pte(void) } #define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +static inline pgprot_t arch_filter_pgprot(pgprot_t prot) +{ + if (cpus_have_const_cap(ARM64_HAS_EPAN)) + return prot; + + if (pgprot_val(prot) != pgprot_val(PAGE_EXECONLY)) + return prot; + + return PAGE_READONLY_EXEC; +} + + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d4a5fca984c3..94291c7c6443 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -597,6 +597,7 @@ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_EPAN (BIT(57)) #define SCTLR_EL1_ATA0 (BIT(42)) #define SCTLR_EL1_TCF0_SHIFT 38 @@ -637,7 +638,7 @@ SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ - SCTLR_EL1_RES1) + SCTLR_EL1_EPAN | SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 066030717a4c..2ab04967dca7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1821,6 +1821,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_EPAN + { + .desc = "Enhanced Privileged Access Never", + .capability = ARM64_HAS_EPAN, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR1_EL1, + .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 3, + }, +#endif /* CONFIG_ARM64_EPAN */ #ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index f37d4e3830b7..871c82ab0a30 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -527,7 +527,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault; - unsigned long vm_flags = VM_ACCESS_FLAGS; + unsigned long vm_flags; unsigned int mm_flags = FAULT_FLAG_DEFAULT; unsigned long addr = untagged_addr(far); @@ -544,12 +544,28 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, if (user_mode(regs)) mm_flags |= FAULT_FLAG_USER; + /* + * vm_flags tells us what bits we must have in vma->vm_flags + * for the fault to be benign, __do_page_fault() would check + * vma->vm_flags & vm_flags and returns an error if the + * intersection is empty + */ if (is_el0_instruction_abort(esr)) { + /* It was exec fault */ vm_flags = VM_EXEC; mm_flags |= FAULT_FLAG_INSTRUCTION; } else if (is_write_abort(esr)) { + /* It was write fault */ vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; + } else { + /* It was read fault */ + vm_flags = VM_READ; + /* Write implies read */ + vm_flags |= VM_WRITE; + /* If EPAN is absent then exec implies read */ + if (!cpus_have_const_cap(ARM64_HAS_EPAN)) + vm_flags |= VM_EXEC; } if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { diff --git a/mm/mmap.c b/mm/mmap.c index 3f287599a7a3..1d96a21acb2f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -93,6 +93,12 @@ static void unmap_region(struct mm_struct *mm, * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes + * + * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and + * MAP_PRIVATE (with Enhanced PAN supported): + * r: (no) no + * w: (no) no + * x: (yes) yes */ pgprot_t protection_map[16] __ro_after_init = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, -- cgit v1.2.3 From b07f3499661c61f03478c99ff3fcb2381ddb9e38 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 19 Mar 2021 17:40:22 +0000 Subject: arm64: stacktrace: Move start_backtrace() out of the header Currently start_backtrace() is a static inline function in the header. Since it really shouldn't be sufficiently performance critical that we actually need to have it inlined move it into a C file, this will save anyone else scratching their head about why it is defined in the header. As far as I can see it's only there because it was factored out of the various callers. Signed-off-by: Mark Brown Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210319174022.33051-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/stacktrace.h | 24 ++---------------------- arch/arm64/kernel/stacktrace.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index eb29b1fe8255..4b33ca620679 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -148,27 +148,7 @@ static inline bool on_accessible_stack(const struct task_struct *tsk, return false; } -static inline void start_backtrace(struct stackframe *frame, - unsigned long fp, unsigned long pc) -{ - frame->fp = fp; - frame->pc = pc; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame->graph = 0; -#endif - - /* - * Prime the first unwind. - * - * In unwind_frame() we'll check that the FP points to a valid stack, - * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be - * treated as a transition to whichever stack that happens to be. The - * prev_fp value won't be used, but we set it to 0 such that it is - * definitely not an accessible stack address. - */ - bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); - frame->prev_fp = 0; - frame->prev_type = STACK_TYPE_UNKNOWN; -} +void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ad20981dfda4..2a1a7a281d34 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -32,6 +32,30 @@ * add sp, sp, #0x10 */ + +void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc) +{ + frame->fp = fp; + frame->pc = pc; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + frame->graph = 0; +#endif + + /* + * Prime the first unwind. + * + * In unwind_frame() we'll check that the FP points to a valid stack, + * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be + * treated as a transition to whichever stack that happens to be. The + * prev_fp value won't be used, but we set it to 0 such that it is + * definitely not an accessible stack address. + */ + bitmap_zero(frame->stacks_done, __NR_STACK_TYPES); + frame->prev_fp = 0; + frame->prev_type = STACK_TYPE_UNKNOWN; +} + /* * Unwind from one frame record (A) to the next frame record (B). * -- cgit v1.2.3 From 776e49af6000ef95301d0d3f834543bda43cb7fb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 26 Mar 2021 18:01:36 +0000 Subject: arm64: setup: name `mair` register In __cpu_setup we conditionally manipulate the MAIR_EL1 value in x5 before later reusing x5 as a scratch register for unrelated temporary variables. To make this a bit clearer, let's move the MAIR_EL1 value into a named register `mair`. To simplify the register allocation, this is placed in the highest available caller-saved scratch register, x17. As it is no longer clobbered by other usage, we can write the value to MAIR_EL1 at the end of the function as we do for TCR_EL1 rather than part-way though feature discovery. There should be no functional change as as a result of this patch. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20210326180137.43119-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index c967bfd30d2b..f560b6fde34c 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -421,7 +421,8 @@ SYM_FUNC_START(__cpu_setup) /* * Memory region attributes */ - mov_q x5, MAIR_EL1_SET + mair .req x17 + mov_q mair, MAIR_EL1_SET #ifdef CONFIG_ARM64_MTE mte_tcr .req x20 @@ -438,7 +439,7 @@ SYM_FUNC_START(__cpu_setup) /* Normal Tagged memory type at the corresponding MAIR index */ mov x10, #MAIR_ATTR_NORMAL_TAGGED - bfi x5, x10, #(8 * MT_NORMAL_TAGGED), #8 + bfi mair, x10, #(8 * MT_NORMAL_TAGGED), #8 /* initialize GCR_EL1: all non-zero tags excluded by default */ mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) @@ -452,7 +453,6 @@ SYM_FUNC_START(__cpu_setup) mov_q mte_tcr, TCR_KASAN_HW_FLAGS 1: #endif - msr mair_el1, x5 /* * Set/prepare TCR and TTBR. TCR_EL1.T1SZ gets further * adjusted if the kernel is compiled with 52bit VA support. @@ -492,10 +492,13 @@ SYM_FUNC_START(__cpu_setup) orr x10, x10, #TCR_HA // hardware Access flag update 1: #endif /* CONFIG_ARM64_HW_AFDBM */ + msr mair_el1, mair msr tcr_el1, x10 /* * Prepare SCTLR */ mov_q x0, INIT_SCTLR_EL1_MMU_ON ret // return to head.S + + .unreq mair SYM_FUNC_END(__cpu_setup) -- cgit v1.2.3 From 5cd6fa6de5e903a9500d858fffbc72c574d4513b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 26 Mar 2021 18:01:37 +0000 Subject: arm64: setup: name `tcr` register In __cpu_setup we conditionally manipulate the TCR_EL1 value in x10 after previously using x10 as a scratch register for unrelated temporary variables. To make this a bit clearer, let's move the TCR_EL1 value into a named register `tcr`. To simplify the register allocation, this is placed in the highest available caller-saved scratch register, tcr. Following the example of `mair`, we initialise the register with the default value prior to any feature discovery, and write it to MAIR_EL1 after all feature discovery is complete, which allows us to simplify the featuere discovery code. The existing `mte_tcr` register is no longer needed, and is replaced by the use of x10 as a temporary, matching the rest of the MTE feature discovery assembly in __cpu_setup. As x20 is no longer used, the function is now AAPCS compliant, as we've generally aimed for in our assembly functions. There should be no functional change as as a result of this patch. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Will Deacon Link: https://lore.kernel.org/r/20210326180137.43119-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index f560b6fde34c..0a48191534ff 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -419,15 +419,17 @@ SYM_FUNC_START(__cpu_setup) reset_amuserenr_el0 x1 // Disable AMU access from EL0 /* - * Memory region attributes + * Default values for VMSA control registers. These will be adjusted + * below depending on detected CPU features. */ mair .req x17 + tcr .req x16 mov_q mair, MAIR_EL1_SET -#ifdef CONFIG_ARM64_MTE - mte_tcr .req x20 - - mov mte_tcr, #0 + mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ + TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ + TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS +#ifdef CONFIG_ARM64_MTE /* * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported * (ID_AA64PFR1_EL1[11:8] > 1). @@ -450,36 +452,26 @@ SYM_FUNC_START(__cpu_setup) msr_s SYS_TFSRE0_EL1, xzr /* set the TCR_EL1 bits */ - mov_q mte_tcr, TCR_KASAN_HW_FLAGS + mov_q x10, TCR_KASAN_HW_FLAGS + orr tcr, tcr, x10 1: #endif - /* - * Set/prepare TCR and TTBR. TCR_EL1.T1SZ gets further - * adjusted if the kernel is compiled with 52bit VA support. - */ - mov_q x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ - TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ - TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS -#ifdef CONFIG_ARM64_MTE - orr x10, x10, mte_tcr - .unreq mte_tcr -#endif - tcr_clear_errata_bits x10, x9, x5 + tcr_clear_errata_bits tcr, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 ldr_l x9, vabits_actual sub x9, xzr, x9 add x9, x9, #64 - tcr_set_t1sz x10, x9 + tcr_set_t1sz tcr, x9 #else ldr_l x9, idmap_t0sz #endif - tcr_set_t0sz x10, x9 + tcr_set_t0sz tcr, x9 /* * Set the IPS bits in TCR_EL1. */ - tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6 + tcr_compute_pa_size tcr, #TCR_IPS_SHIFT, x5, x6 #ifdef CONFIG_ARM64_HW_AFDBM /* * Enable hardware update of the Access Flags bit. @@ -489,11 +481,11 @@ SYM_FUNC_START(__cpu_setup) mrs x9, ID_AA64MMFR1_EL1 and x9, x9, #0xf cbz x9, 1f - orr x10, x10, #TCR_HA // hardware Access flag update + orr tcr, tcr, #TCR_HA // hardware Access flag update 1: #endif /* CONFIG_ARM64_HW_AFDBM */ msr mair_el1, mair - msr tcr_el1, x10 + msr tcr_el1, tcr /* * Prepare SCTLR */ @@ -501,4 +493,5 @@ SYM_FUNC_START(__cpu_setup) ret // return to head.S .unreq mair + .unreq tcr SYM_FUNC_END(__cpu_setup) -- cgit v1.2.3 From a52ef778ff28d0782172e4e0b99aeda7bd97dd21 Mon Sep 17 00:00:00 2001 From: Chen Lifu Date: Mon, 29 Mar 2021 11:43:43 +0800 Subject: arm64: smp: Add missing prototype for some smp.c functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit eb631bb5bf5b ("arm64: Support arch_irq_work_raise() via self IPIs") a new function "arch_irq_work_raise" was added without a prototype. In commit d914d4d49745 ("arm64: Implement panic_smp_self_stop()") a new function "panic_smp_self_stop" was added without a prototype. We get the following warnings on W=1: arch/arm64/kernel/smp.c:842:6: warning: no previous prototype for ‘arch_irq_work_raise’ [-Wmissing-prototypes] arch/arm64/kernel/smp.c:862:6: warning: no previous prototype for ‘panic_smp_self_stop’ [-Wmissing-prototypes] Fix the warnings by: 1. Adding the prototype for 'arch_irq_work_raise' in irq_work.h 2. Adding the prototype for 'panic_smp_self_stop' in smp.h Signed-off-by: Chen Lifu Link: https://lore.kernel.org/r/20210329034343.183974-1-chenlifu@huawei.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/irq_work.h | 2 ++ arch/arm64/include/asm/smp.h | 1 + 2 files changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index a1020285ea75..81bbfa3a035b 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -2,6 +2,8 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H +extern void arch_irq_work_raise(void); + static inline bool arch_irq_work_has_interrupt(void) { return true; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index bcb01ca15325..0e357757c0cc 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -145,6 +145,7 @@ bool cpus_are_stuck_in_kernel(void); extern void crash_smp_send_stop(void); extern bool smp_crash_stop_failed(void); +extern void panic_smp_self_stop(void); #endif /* ifndef __ASSEMBLY__ */ -- cgit v1.2.3 From 9a0732efa77418fc85b1bdc5ddee619e62f59545 Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Wed, 24 Mar 2021 12:05:18 +0800 Subject: arm64: kasan: don't populate vmalloc area for CONFIG_KASAN_VMALLOC Linux support KAsan for VMALLOC since commit 3c5c3cfb9ef4da9 ("kasan: support backing vmalloc space with real shadow memory") Like how the MODULES_VADDR does now, just not to early populate the VMALLOC_START between VMALLOC_END. Before: MODULE_VADDR: no mapping, no zero shadow at init VMALLOC_VADDR: backed with zero shadow at init After: MODULE_VADDR: no mapping, no zero shadow at init VMALLOC_VADDR: no mapping, no zero shadow at init Thus the mapping will get allocated on demand by the core function of KASAN_VMALLOC. ----------- vmalloc_shadow_start | | | | | | <= non-mapping | | | | |-----------| |///////////|<- kimage shadow with page table mapping. |-----------| | | | | <= non-mapping | | ------------- vmalloc_shadow_end |00000000000| |00000000000| <= Zero shadow |00000000000| ------------- KASAN_SHADOW_END Signed-off-by: Lecopzer Chen Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Tested-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210324040522.15548-2-lecopzer.chen@mediatek.com [catalin.marinas@arm.com: add a build check on VMALLOC_START != MODULES_END] Signed-off-by: Catalin Marinas --- arch/arm64/mm/kasan_init.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index d8e66c78440e..7598b0a96b64 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -214,6 +214,7 @@ static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; u64 mod_shadow_start, mod_shadow_end; + u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; @@ -223,6 +224,8 @@ static void __init kasan_init_shadow(void) mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); + vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); + /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call below). @@ -241,12 +244,18 @@ static void __init kasan_init_shadow(void) kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); - kasan_populate_early_shadow((void *)kimg_shadow_end, - (void *)KASAN_SHADOW_END); - if (kimg_shadow_start > mod_shadow_end) - kasan_populate_early_shadow((void *)mod_shadow_end, - (void *)kimg_shadow_start); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + BUILD_BUG_ON(VMALLOC_START != MODULES_END); + kasan_populate_early_shadow((void *)vmalloc_shadow_end, + (void *)KASAN_SHADOW_END); + } else { + kasan_populate_early_shadow((void *)kimg_shadow_end, + (void *)KASAN_SHADOW_END); + if (kimg_shadow_start > mod_shadow_end) + kasan_populate_early_shadow((void *)mod_shadow_end, + (void *)kimg_shadow_start); + } for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); -- cgit v1.2.3 From 7d7b88ff5f8fd79a72baacc521407a3101f0ffae Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Wed, 24 Mar 2021 12:05:19 +0800 Subject: arm64: kasan: abstract _text and _end to KERNEL_START/END Arm64 provides defined macro for KERNEL_START and KERNEL_END, thus replace them by the abstration instead of using _text and _end. Signed-off-by: Lecopzer Chen Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Tested-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210324040522.15548-3-lecopzer.chen@mediatek.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/kasan_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 7598b0a96b64..1cc2cde94e94 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -218,8 +218,8 @@ static void __init kasan_init_shadow(void) phys_addr_t pa_start, pa_end; u64 i; - kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK; - kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end)); + kimg_shadow_start = (u64)kasan_mem_to_shadow(KERNEL_START) & PAGE_MASK; + kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); @@ -240,7 +240,7 @@ static void __init kasan_init_shadow(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, - early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); + early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START)))); kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); -- cgit v1.2.3 From 71b613fc0c69080262f4ebe810c3d909d7ff8132 Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Wed, 24 Mar 2021 12:05:20 +0800 Subject: arm64: Kconfig: support CONFIG_KASAN_VMALLOC We can backed shadow memory in vmalloc area after vmalloc area isn't populated at kasan_init(), thus make KASAN_VMALLOC selectable. Signed-off-by: Lecopzer Chen Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Tested-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210324040522.15548-4-lecopzer.chen@mediatek.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..3e54fa938234 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -138,6 +138,7 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) + select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) select HAVE_ARCH_KFENCE -- cgit v1.2.3 From 31d02e7ab00873befd2cfb6e44581490d947c38b Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Wed, 24 Mar 2021 12:05:21 +0800 Subject: arm64: kaslr: support randomized module area with KASAN_VMALLOC After KASAN_VMALLOC works in arm64, we can randomize module region into vmalloc area now. Test: VMALLOC area ffffffc010000000 fffffffdf0000000 before the patch: module_alloc_base/end ffffffc008b80000 ffffffc010000000 after the patch: module_alloc_base/end ffffffdcf4bed000 ffffffc010000000 And the function that insmod some modules is fine. Suggested-by: Ard Biesheuvel Signed-off-by: Lecopzer Chen Link: https://lore.kernel.org/r/20210324040522.15548-5-lecopzer.chen@mediatek.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/kaslr.c | 18 ++++++++++-------- arch/arm64/kernel/module.c | 16 +++++++++------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 27f8939deb1b..341342b207f6 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -128,15 +128,17 @@ u64 __init kaslr_early_init(void) /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && + (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS))) /* - * KASAN does not expect the module region to intersect the - * vmalloc region, since shadow memory is allocated for each - * module at load time, whereas the vmalloc region is shadowed - * by KASAN zero pages. So keep modules out of the vmalloc - * region if KASAN is enabled, and put the kernel well within - * 4 GB of the module region. + * KASAN without KASAN_VMALLOC does not expect the module region + * to intersect the vmalloc region, since shadow memory is + * allocated for each module at load time, whereas the vmalloc + * region is shadowed by KASAN zero pages. So keep modules + * out of the vmalloc region if KASAN is enabled without + * KASAN_VMALLOC, and put the kernel well within 4 GB of the + * module region. */ return offset % SZ_2G; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index fe21e0f06492..b5ec010c481f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -40,14 +40,16 @@ void *module_alloc(unsigned long size) NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - !IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + (IS_ENABLED(CONFIG_KASAN_VMALLOC) || + (!IS_ENABLED(CONFIG_KASAN_GENERIC) && + !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) /* - * KASAN can only deal with module allocations being served - * from the reserved module region, since the remainder of - * the vmalloc region is already backed by zero shadow pages, - * and punching holes into it is non-trivial. Since the module - * region is not randomized when KASAN is enabled, it is even + * KASAN without KASAN_VMALLOC can only deal with module + * allocations being served from the reserved module region, + * since the remainder of the vmalloc region is already + * backed by zero shadow pages, and punching holes into it + * is non-trivial. Since the module region is not randomized + * when KASAN is enabled without KASAN_VMALLOC, it is even * less likely that the module region gets exhausted, so we * can simply omit this fallback in that case. */ -- cgit v1.2.3 From acc3042d62cb92c3776767f09f665511c903ef2d Mon Sep 17 00:00:00 2001 From: Lecopzer Chen Date: Wed, 24 Mar 2021 12:05:22 +0800 Subject: arm64: Kconfig: select KASAN_VMALLOC if KANSAN_GENERIC is enabled Before this patch, someone who wants to use VMAP_STACK when KASAN_GENERIC enabled must explicitly select KASAN_VMALLOC. >From Will's suggestion [1]: > I would _really_ like to move to VMAP stack unconditionally, and > that would effectively force KASAN_VMALLOC to be set if KASAN is in use Because VMAP_STACK now depends on either HW_TAGS or KASAN_VMALLOC if KASAN enabled, in order to make VMAP_STACK selected unconditionally, we bind KANSAN_GENERIC and KASAN_VMALLOC together. Note that SW_TAGS supports neither VMAP_STACK nor KASAN_VMALLOC now, so this is the first step to make VMAP_STACK selected unconditionally. Bind KANSAN_GENERIC and KASAN_VMALLOC together is supposed to cost more memory at runtime, thus the alternative is using SW_TAGS KASAN instead. [1]: https://lore.kernel.org/lkml/20210204150100.GE20815@willie-the-truck/ Suggested-by: Will Deacon Signed-off-by: Lecopzer Chen Link: https://lore.kernel.org/r/20210324040522.15548-6-lecopzer.chen@mediatek.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3e54fa938234..07762359d741 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -195,6 +195,7 @@ config ARM64 select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING + select KASAN_VMALLOC if KASAN_GENERIC select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH -- cgit v1.2.3 From 68f638a432dff4ed3e053cde2bd23ed36b4c057c Mon Sep 17 00:00:00 2001 From: He Ying Date: Tue, 30 Mar 2021 04:58:17 -0400 Subject: docs: arm64: Fix a grammar error depending -> depending on Reported-by: Hulk Robot Signed-off-by: He Ying Link: https://lore.kernel.org/r/20210330085817.86185-1-heying24@huawei.com Signed-off-by: Catalin Marinas --- Documentation/arm64/tagged-address-abi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index 4a9d9c794ee5..cbc4d4500241 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -40,7 +40,7 @@ space obtained in one of the following ways: during creation and with the same restrictions as for ``mmap()`` above (e.g. data, bss, stack). -The AArch64 Tagged Address ABI has two stages of relaxation depending +The AArch64 Tagged Address ABI has two stages of relaxation depending on how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space -- cgit v1.2.3 From b88f5e9792cc320a511697dcba8890d032ee3ed3 Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Mon, 29 Mar 2021 20:32:01 +0800 Subject: docs: perf: Address some html build warnings Fix following html build warnings: Documentation/admin-guide/perf/hisi-pmu.rst:61: WARNING: Unexpected indentation. Documentation/admin-guide/perf/hisi-pmu.rst:62: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/admin-guide/perf/hisi-pmu.rst:69: WARNING: Unexpected indentation. Documentation/admin-guide/perf/hisi-pmu.rst:70: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/admin-guide/perf/hisi-pmu.rst:83: WARNING: Unexpected indentation. Fixes: 9b86b1b41e0f ("docs: perf: Add new description on HiSilicon uncore PMU v2") Reported-by: Stephen Rothwell Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1617021121-31450-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/hisi-pmu.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/perf/hisi-pmu.rst b/Documentation/admin-guide/perf/hisi-pmu.rst index 3b3120e2dd9e..546979360513 100644 --- a/Documentation/admin-guide/perf/hisi-pmu.rst +++ b/Documentation/admin-guide/perf/hisi-pmu.rst @@ -57,16 +57,20 @@ For HiSilicon uncore PMU v2 whose identifier is 0x30, the topology is the same as PMU v1, but some new functions are added to the hardware. (a) L3C PMU supports filtering by core/thread within the cluster which can be -specified as a bitmap. +specified as a bitmap:: + $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_core=0x3/ sleep 5 + This will only count the operations from core/thread 0 and 1 in this cluster. (b) Tracetag allow the user to chose to count only read, write or atomic operations via the tt_req parameeter in perf. The default value counts all operations. tt_req is 3bits, 3'b100 represents read operations, 3'b101 represents write operations, 3'b110 represents atomic store operations and -3'b111 represents atomic non-store operations, other values are reserved. +3'b111 represents atomic non-store operations, other values are reserved:: + $# perf stat -a -e hisi_sccl3_l3c0/config=0x02,tt_req=0x4/ sleep 5 + This will only count the read operations in this cluster. (c) Datasrc allows the user to check where the data comes from. It is 5 bits. @@ -79,7 +83,8 @@ Some important codes are as follows: 5'b10000: comes from cross-socket DDR; etc, it is mainly helpful to find that the data source is nearest from the CPU cores. If datasrc_cfg is used in the multi-chips, the datasrc_skt shall be -configured in perf command. +configured in perf command:: + $# perf stat -a -e hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xE/, hisi_sccl3_l3c0/config=0xb9,datasrc_cfg=0xF/ sleep 5 -- cgit v1.2.3 From 11fa1dc8020a2a9e0c59998920092d4df3fb7308 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 26 Mar 2021 16:02:40 +0000 Subject: perf/arm_pmu_platform: Use dev_err_probe() for IRQ errors By virtue of using platform_irq_get_optional() under the covers, platform_irq_count() needs the target interrupt controller to be available and may return -EPROBE_DEFER if it isn't. Let's use dev_err_probe() to avoid a spurious error log (and help debug any deferral issues) in that case. Reported-by: Paul Menzel Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/073d5e0d3ed1f040592cb47ca6fe3759f40cc7d1.1616774562.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_platform.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index 933bd8410fc2..bb6ae955083a 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -6,6 +6,7 @@ * Copyright (C) 2010 ARM Ltd., Will Deacon */ #define pr_fmt(fmt) "hw perfevents: " fmt +#define dev_fmt pr_fmt #include #include @@ -100,10 +101,8 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) struct pmu_hw_events __percpu *hw_events = pmu->hw_events; num_irqs = platform_irq_count(pdev); - if (num_irqs < 0) { - pr_err("unable to count PMU IRQs\n"); - return num_irqs; - } + if (num_irqs < 0) + return dev_err_probe(&pdev->dev, num_irqs, "unable to count PMU IRQs\n"); /* * In this case we have no idea which CPUs are covered by the PMU. -- cgit v1.2.3 From e338cb6bef254821a8c095018fd27254d74bfd6a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 26 Mar 2021 16:02:41 +0000 Subject: perf/arm_pmu_platform: Fix error handling If we're aborting after failing to register the PMU device, we probably don't want to leak the IRQs that we've claimed. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/53031a607fc8412a60024bfb3bb8cd7141f998f5.1616774562.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index bb6ae955083a..ef9676418c9f 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -235,7 +235,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, ret = armpmu_register(pmu); if (ret) - goto out_free; + goto out_free_irqs; return 0; -- cgit v1.2.3 From e20ac6c54a93335b56a6d057aa5da27183f573fa Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 26 Mar 2021 16:02:42 +0000 Subject: perf/arm_pmu_platform: Clean up with dev_printk Nearly all of the messages we can log from the platform device code relate to the specific PMU device and the properties we're parsing from its DT node. In some cases we use %pOF to point at where something was wrong, but even that is inconsistent. Let's convert these logs to the appropriate dev_printk variants, so that every issue specific to the device and/or its DT description is clearly and instantly attributable, particularly if there is more than one PMU node present in the DT. The local refactoring in a couple of functions invites some extra cleanup in the process - the init_fn matching can be streamlined, and the PMU registration failure message moved to the appropriate place and log level. CC: Tian Tao Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/10a4aacdf071d0c03d061c408a5899e5b32cc0a6.1616774562.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm_pmu_platform.c | 47 +++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c index ef9676418c9f..513de1f54e2d 100644 --- a/drivers/perf/arm_pmu_platform.c +++ b/drivers/perf/arm_pmu_platform.c @@ -63,7 +63,7 @@ static bool pmu_has_irq_affinity(struct device_node *node) return !!of_find_property(node, "interrupt-affinity", NULL); } -static int pmu_parse_irq_affinity(struct device_node *node, int i) +static int pmu_parse_irq_affinity(struct device *dev, int i) { struct device_node *dn; int cpu; @@ -73,19 +73,18 @@ static int pmu_parse_irq_affinity(struct device_node *node, int i) * affinity matches our logical CPU order, as we used to assume. * This is fragile, so we'll warn in pmu_parse_irqs(). */ - if (!pmu_has_irq_affinity(node)) + if (!pmu_has_irq_affinity(dev->of_node)) return i; - dn = of_parse_phandle(node, "interrupt-affinity", i); + dn = of_parse_phandle(dev->of_node, "interrupt-affinity", i); if (!dn) { - pr_warn("failed to parse interrupt-affinity[%d] for %pOFn\n", - i, node); + dev_warn(dev, "failed to parse interrupt-affinity[%d]\n", i); return -EINVAL; } cpu = of_cpu_node_to_id(dn); if (cpu < 0) { - pr_warn("failed to find logical CPU for %pOFn\n", dn); + dev_warn(dev, "failed to find logical CPU for %pOFn\n", dn); cpu = nr_cpu_ids; } @@ -99,17 +98,18 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) int i = 0, num_irqs; struct platform_device *pdev = pmu->plat_device; struct pmu_hw_events __percpu *hw_events = pmu->hw_events; + struct device *dev = &pdev->dev; num_irqs = platform_irq_count(pdev); if (num_irqs < 0) - return dev_err_probe(&pdev->dev, num_irqs, "unable to count PMU IRQs\n"); + return dev_err_probe(dev, num_irqs, "unable to count PMU IRQs\n"); /* * In this case we have no idea which CPUs are covered by the PMU. * To match our prior behaviour, we assume all CPUs in this case. */ if (num_irqs == 0) { - pr_warn("no irqs for PMU, sampling events not supported\n"); + dev_warn(dev, "no irqs for PMU, sampling events not supported\n"); pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; cpumask_setall(&pmu->supported_cpus); return 0; @@ -121,10 +121,8 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) return pmu_parse_percpu_irq(pmu, irq); } - if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(pdev->dev.of_node)) { - pr_warn("no interrupt-affinity property for %pOF, guessing.\n", - pdev->dev.of_node); - } + if (nr_cpu_ids != 1 && !pmu_has_irq_affinity(dev->of_node)) + dev_warn(dev, "no interrupt-affinity property, guessing.\n"); for (i = 0; i < num_irqs; i++) { int cpu, irq; @@ -134,18 +132,18 @@ static int pmu_parse_irqs(struct arm_pmu *pmu) continue; if (irq_is_percpu_devid(irq)) { - pr_warn("multiple PPIs or mismatched SPI/PPI detected\n"); + dev_warn(dev, "multiple PPIs or mismatched SPI/PPI detected\n"); return -EINVAL; } - cpu = pmu_parse_irq_affinity(pdev->dev.of_node, i); + cpu = pmu_parse_irq_affinity(dev, i); if (cpu < 0) return cpu; if (cpu >= nr_cpu_ids) continue; if (per_cpu(hw_events->irq, cpu)) { - pr_warn("multiple PMU IRQs for the same CPU detected\n"); + dev_warn(dev, "multiple PMU IRQs for the same CPU detected\n"); return -EINVAL; } @@ -190,9 +188,8 @@ int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table) { - const struct of_device_id *of_id; armpmu_init_fn init_fn; - struct device_node *node = pdev->dev.of_node; + struct device *dev = &pdev->dev; struct arm_pmu *pmu; int ret = -ENODEV; @@ -206,15 +203,14 @@ int arm_pmu_device_probe(struct platform_device *pdev, if (ret) goto out_free; - if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { - init_fn = of_id->data; - - pmu->secure_access = of_property_read_bool(pdev->dev.of_node, + init_fn = of_device_get_match_data(dev); + if (init_fn) { + pmu->secure_access = of_property_read_bool(dev->of_node, "secure-reg-access"); /* arm64 systems boot only as non-secure */ if (IS_ENABLED(CONFIG_ARM64) && pmu->secure_access) { - pr_warn("ignoring \"secure-reg-access\" property for arm64\n"); + dev_warn(dev, "ignoring \"secure-reg-access\" property for arm64\n"); pmu->secure_access = false; } @@ -225,7 +221,7 @@ int arm_pmu_device_probe(struct platform_device *pdev, } if (ret) { - pr_info("%pOF: failed to probe PMU!\n", node); + dev_err(dev, "failed to probe PMU!\n"); goto out_free; } @@ -234,15 +230,16 @@ int arm_pmu_device_probe(struct platform_device *pdev, goto out_free_irqs; ret = armpmu_register(pmu); - if (ret) + if (ret) { + dev_err(dev, "failed to register PMU devices!\n"); goto out_free_irqs; + } return 0; out_free_irqs: armpmu_free_irqs(pmu); out_free: - pr_info("%pOF: failed to register PMU devices!\n", node); armpmu_free(pmu); return ret; } -- cgit v1.2.3 From 2c2e21e78a945b174629944281997bbcb839e6bb Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Thu, 1 Apr 2021 19:16:41 +0800 Subject: arm64: perf: Remove redundant initialization in perf_event.c The initialization of value in function armv8pmu_read_hw_counter() and armv8pmu_read_counter() seem redundant, as they are soon updated. So, We can remove them. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1617275801-1980-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 4658fcf88c2b..f594957e29bd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -470,9 +470,8 @@ static inline u64 armv8pmu_read_evcntr(int idx) static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) { int idx = event->hw.idx; - u64 val = 0; + u64 val = armv8pmu_read_evcntr(idx); - val = armv8pmu_read_evcntr(idx); if (armv8pmu_event_is_chained(event)) val = (val << 32) | armv8pmu_read_evcntr(idx - 1); return val; @@ -520,7 +519,7 @@ static u64 armv8pmu_read_counter(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - u64 value = 0; + u64 value; if (idx == ARMV8_IDX_CYCLE_COUNTER) value = read_sysreg(pmccntr_el0); -- cgit v1.2.3 From cccb78ce89c45a4414db712be4986edfb92434bd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 12 Mar 2021 19:03:13 +0000 Subject: arm64/sve: Rework SVE access trap to convert state in registers When we enable SVE usage in userspace after taking a SVE access trap we need to ensure that the portions of the register state that are not shared with the FPSIMD registers are zeroed. Currently we do this by forcing the FPSIMD registers to be saved to the task struct and converting them there. This is wasteful in the common case where the task state is loaded into the registers and we will immediately return to userspace since we can initialise the SVE state directly in registers instead of accessing multiple copies of the register state in memory. Instead in that common case do the conversion in the registers and update the task metadata so that we can return to userspace without spilling the register state to memory unless there is some other reason to do so. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210312190313.24598-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimd.h | 1 + arch/arm64/kernel/entry-fpsimd.S | 5 +++++ arch/arm64/kernel/fpsimd.c | 26 +++++++++++++++++--------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index bec5f14b622a..ebb263b2d3b1 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -73,6 +73,7 @@ extern void sve_flush_live(void); extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); +extern void sve_set_vq(unsigned long vq_minus_1); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 2ca395c25448..3ecec60d3295 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -48,6 +48,11 @@ SYM_FUNC_START(sve_get_vl) ret SYM_FUNC_END(sve_get_vl) +SYM_FUNC_START(sve_set_vq) + sve_load_vq x0, x1, x2 + ret +SYM_FUNC_END(sve_set_vq) + /* * Load SVE state from FPSIMD state. * diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 062b21f30f94..0f58e45bd3d1 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -926,9 +926,8 @@ void fpsimd_release_task(struct task_struct *dead_task) * Trapped SVE access * * Storage is allocated for the full SVE state, the current FPSIMD - * register contents are migrated across, and TIF_SVE is set so that - * the SVE access trap will be disabled the next time this task - * reaches ret_to_user. + * register contents are migrated across, and the access trap is + * disabled. * * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state() * would have disabled the SVE access trap for userspace during @@ -946,15 +945,24 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) get_cpu_fpsimd_context(); - fpsimd_save(); - - /* Force ret_to_user to reload the registers: */ - fpsimd_flush_task_state(current); - - fpsimd_to_sve(current); if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ + /* + * Convert the FPSIMD state to SVE, zeroing all the state that + * is not shared with FPSIMD. If (as is likely) the current + * state is live in the registers then do this there and + * update our metadata for the current task including + * disabling the trap, otherwise update our in-memory copy. + */ + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { + sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1); + sve_flush_live(); + fpsimd_bind_task_to_cpu(); + } else { + fpsimd_to_sve(current); + } + put_cpu_fpsimd_context(); } -- cgit v1.2.3 From a7dcf58ae5d2f7c6f1bbe13290897f539f9cd75b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 13:54:49 +0800 Subject: arm64: Add __init section marker to some functions They are not needed after booting, so mark them as __init to move them to the .init section. Signed-off-by: Jisheng Zhang Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20210330135449.4dcffd7f@xhacker.debian Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ptdump.h | 2 +- arch/arm64/kernel/vdso.c | 4 ++-- arch/arm64/mm/ptdump.c | 4 ++-- arch/arm64/mm/ptdump_debugfs.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h index 38187f74e089..b1dd7ecff7ef 100644 --- a/arch/arm64/include/asm/ptdump.h +++ b/arch/arm64/include/asm/ptdump.h @@ -23,7 +23,7 @@ struct ptdump_info { void ptdump_walk(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_PTDUMP_DEBUGFS -void ptdump_debugfs_register(struct ptdump_info *info, const char *name); +void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else static inline void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index cee5d04ea9ad..d1fa288518a7 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -86,7 +86,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static int __vdso_init(enum vdso_abi abi) +static int __init __vdso_init(enum vdso_abi abi) { int i; struct page **vdso_pagelist; @@ -326,7 +326,7 @@ static int aarch32_alloc_sigpage(void) return 0; } -static int __aarch32_alloc_vdso_pages(void) +static int __init __aarch32_alloc_vdso_pages(void) { if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 0e050d76b83a..a50e92ea1878 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -337,7 +337,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info) ptdump_walk_pgd(&st.ptdump, info->mm, NULL); } -static void ptdump_initialize(void) +static void __init ptdump_initialize(void) { unsigned i, j; @@ -381,7 +381,7 @@ void ptdump_check_wx(void) pr_info("Checked W+X mappings: passed, no W+X pages found\n"); } -static int ptdump_init(void) +static int __init ptdump_init(void) { address_markers[PAGE_END_NR].start_address = PAGE_END; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index d29d722ec3ec..68bf1a125502 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -16,7 +16,7 @@ static int ptdump_show(struct seq_file *m, void *v) } DEFINE_SHOW_ATTRIBUTE(ptdump); -void ptdump_debugfs_register(struct ptdump_info *info, const char *name) +void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name) { debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); } -- cgit v1.2.3 From df652a16a65735c228c88e5fb75a938614f1368b Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 7 Apr 2021 14:38:17 +0100 Subject: arm64: mte: Remove unused mte_assign_mem_tag_range() mte_assign_mem_tag_range() was added in commit 85f49cae4dfc ("arm64: mte: add in-kernel MTE helpers") in 5.11 but moved out of mte.S by commit 2cb34276427a ("arm64: kasan: simplify and inline MTE functions") in 5.12 and renamed to mte_set_mem_tag_range(). 2cb34276427a did not delete the old function prototypes in mte.h. Remove the unused prototype from mte.h. Cc: Will Deacon Reported-by: Derrick McKee Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210407133817.23053-1-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 9b557a457f24..387279540139 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -47,8 +47,6 @@ long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, unsigned long addr, unsigned long data); -void mte_assign_mem_tag_range(void *addr, size_t size); - #else /* CONFIG_ARM64_MTE */ /* unused if !CONFIG_ARM64_MTE, silence the compiler */ @@ -84,10 +82,6 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, return -EIO; } -static inline void mte_assign_mem_tag_range(void *addr, size_t size) -{ -} - #endif /* CONFIG_ARM64_MTE */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 3e237387bb76cbbd254e82fb1e996e2f3af9e6a7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 1 Apr 2021 19:09:39 +0100 Subject: arm64: Document requirements for fine grained traps at boot The arm64 FEAT_FGT extension introduces a set of traps to EL2 for accesses to small sets of registers and instructions from EL1 and EL0, access to which is controlled by EL3. Require access to it so that it is available to us in future and so that we can ensure these traps are disabled during boot. Signed-off-by: Mark Brown Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20210401180942.35815-2-broonie@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/booting.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 7552dbc1cc54..92ec0bea1af5 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -270,6 +270,12 @@ Before jumping into the kernel, the following conditions must be met: having 0b1 set for the corresponding bit for each of the auxiliary counters present. + For CPUs with the Fine Grained Traps (FEAT_FGT) extension present: + + - If EL3 is present and the kernel is entered at EL2: + + - SCR_EL3.FGTEn (bit 27) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. -- cgit v1.2.3 From 31c00d2aeaa2da89361f5b64a64ca831433be5fc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 1 Apr 2021 19:09:40 +0100 Subject: arm64: Disable fine grained traps on boot The arm64 FEAT_FGT extension introduces a set of traps to EL2 for accesses to small sets of registers and instructions from EL1 and EL0. Currently Linux makes no use of this feature, ensure that it is not active at boot by disabling the traps during EL2 setup. Signed-off-by: Mark Brown Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20210401180942.35815-3-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/el2_setup.h | 21 +++++++++++++++++++++ arch/arm64/include/asm/sysreg.h | 6 ++++++ 2 files changed, 27 insertions(+) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index d77d358f9395..b3f2d3bb0938 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -131,6 +131,26 @@ .Lskip_sve_\@: .endm +/* Disable any fine grained traps */ +.macro __init_el2_fgt + mrs x1, id_aa64mmfr0_el1 + ubfx x1, x1, #ID_AA64MMFR0_FGT_SHIFT, #4 + cbz x1, .Lskip_fgt_\@ + + msr_s SYS_HDFGRTR_EL2, xzr + msr_s SYS_HDFGWTR_EL2, xzr + msr_s SYS_HFGRTR_EL2, xzr + msr_s SYS_HFGWTR_EL2, xzr + msr_s SYS_HFGITR_EL2, xzr + + mrs x1, id_aa64pfr0_el1 // AMU traps UNDEF without AMU + ubfx x1, x1, #ID_AA64PFR0_AMU_SHIFT, #4 + cbz x1, .Lskip_fgt_\@ + + msr_s SYS_HAFGRTR_EL2, xzr +.Lskip_fgt_\@: +.endm + .macro __init_el2_nvhe_prepare_eret mov x0, #INIT_PSTATE_EL1 msr spsr_el2, x0 @@ -155,6 +175,7 @@ __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve + __init_el2_fgt __init_el2_nvhe_prepare_eret .endm diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d4a5fca984c3..b35468927363 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -475,9 +475,15 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) #define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) +#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4) +#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5) +#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4) +#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5) +#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) -- cgit v1.2.3 From 230800cd315cd5e2093e603cf7ee150b7591ce1a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 1 Apr 2021 19:09:41 +0100 Subject: arm64: Require that system registers at all visible ELs be initialized Currently we require that software at a higher exception level initialise all registers at the exception level the kernel will be entered prior to starting the kernel in order to ensure that there is nothing uninitialised which could result in an UNKNOWN state while running the kernel. The expectation is that the software running at the highest exception levels will be tightly coupled to the system and can ensure that all available features are appropriately initialised and that the kernel can initialise anything else. There is a gap here in the case where new registers are added to lower exception levels that require initialisation but the kernel does not yet understand them. Extend the requirement to also include exception levels below the one where the kernel is entered to cover this. Suggested-by: Catalin Marinas Signed-off-by: Mark Brown Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20210401180942.35815-4-broonie@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/booting.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 92ec0bea1af5..4fcc00add117 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -202,9 +202,10 @@ Before jumping into the kernel, the following conditions must be met: - System registers - All writable architected system registers at the exception level where - the kernel image will be entered must be initialised by software at a - higher exception level to prevent execution in an UNKNOWN state. + All writable architected system registers at or below the exception + level where the kernel image will be entered must be initialised by + software at a higher exception level to prevent execution in an UNKNOWN + state. - SCR_EL3.FIQ must have the same value across all CPUs the kernel is executing on. -- cgit v1.2.3 From cac642c12a805ae7565a263b59fb94ad19e81952 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Apr 2021 14:10:08 +0100 Subject: arm64: cpufeature: Allow early filtering of feature override Some CPUs are broken enough that some overrides need to be rejected at the earliest opportunity. In some cases, that's right at cpu feature override time. Provide the necessary infrastructure to filter out overrides, and to report such filtered out overrides to the core cpufeature code. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210408131010.1109027-2-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 17 +++++++++++++++++ arch/arm64/kernel/cpufeature.c | 6 ++++++ arch/arm64/kernel/idreg-override.c | 13 +++++++++++++ 3 files changed, 36 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 61177bac49fa..338840c00e8e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -63,6 +63,23 @@ struct arm64_ftr_bits { s64 safe_val; /* safe value for FTR_EXACT features */ }; +/* + * Describe the early feature override to the core override code: + * + * @val Values that are to be merged into the final + * sanitised value of the register. Only the bitfields + * set to 1 in @mask are valid + * @mask Mask of the features that are overridden by @val + * + * A @mask field set to full-1 indicates that the corresponding field + * in @val is a valid override. + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-0 denotes that this field has no override + * + * A @mask field set to full-0 with the corresponding @val field set + * to full-1 denotes thath this field has an invalid override. + */ struct arm64_ftr_override { u64 val; u64 mask; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 066030717a4c..6de15deaa912 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -809,6 +809,12 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) reg->name, ftrp->shift + ftrp->width - 1, ftrp->shift, str, tmp); + } else if ((ftr_mask & reg->override->val) == ftr_mask) { + reg->override->val &= ~ftr_mask; + pr_warn("%s[%d:%d]: impossible override, ignored\n", + reg->name, + ftrp->shift + ftrp->width - 1, + ftrp->shift); } val = arm64_ftr_set_value(ftrp, val, ftr_new); diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 83f1c4b92095..be92fcd319a1 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -25,6 +25,7 @@ struct ftr_set_desc { struct { char name[FTR_DESC_FIELD_LEN]; u8 shift; + bool (*filter)(u64 val); } fields[]; }; @@ -124,6 +125,18 @@ static void __init match_options(const char *cmdline) if (find_field(cmdline, regs[i], f, &v)) continue; + /* + * If an override gets filtered out, advertise + * it by setting the value to 0xf, but + * clearing the mask... Yes, this is fragile. + */ + if (regs[i]->fields[f].filter && + !regs[i]->fields[f].filter(v)) { + regs[i]->override->val |= mask; + regs[i]->override->mask &= ~mask; + continue; + } + regs[i]->override->val &= ~mask; regs[i]->override->val |= (v << shift) & mask; regs[i]->override->mask |= mask; -- cgit v1.2.3 From 31a32b49b80f79cbb84a9c948c5609c6fc044443 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Apr 2021 14:10:09 +0100 Subject: arm64: Cope with CPUs stuck in VHE mode It seems that the CPUs part of the SoC known as Apple M1 have the terrible habit of being stuck with HCR_EL2.E2H==1, in violation of the architecture. Try and work around this deplorable state of affairs by detecting the stuck bit early and short-circuit the nVHE dance. Additional filtering code ensures that attempts at switching to nVHE from the command-line are also ignored. It is still unknown whether there are many more such nuggets to be found... Reported-by: Hector Martin Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210408131010.1109027-3-maz@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 39 +++++++++++++++++++++++++++++++++++--- arch/arm64/kernel/hyp-stub.S | 8 ++++---- arch/arm64/kernel/idreg-override.c | 13 ++++++++++++- 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 840bda1869e9..96873dfa67fd 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -477,14 +477,13 @@ EXPORT_SYMBOL(kimage_vaddr) * booted in EL1 or EL2 respectively. */ SYM_FUNC_START(init_kernel_el) - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq init_el2 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 isb mov_q x0, INIT_PSTATE_EL1 msr spsr_el1, x0 @@ -504,9 +503,43 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr vbar_el2, x0 isb + /* + * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, + * making it impossible to start in nVHE mode. Is that + * compliant with the architecture? Absolutely not! + */ + mrs x0, hcr_el2 + and x0, x0, #HCR_E2H + cbz x0, 1f + + /* Switching to VHE requires a sane SCTLR_EL1 as a start */ + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr_s SYS_SCTLR_EL12, x0 + + /* + * Force an eret into a helper "function", and let it return + * to our original caller... This makes sure that we have + * initialised the basic PSTATE state. + */ + mov x0, #INIT_PSTATE_EL2 + msr spsr_el1, x0 + adr x0, __cpu_stick_to_vhe + msr elr_el1, x0 + eret + +1: + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 + msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 eret + +__cpu_stick_to_vhe: + mov x0, #HVC_VHE_RESTART + hvc #0 + mov x0, #BOOT_CPU_MODE_EL2 + ret SYM_FUNC_END(init_kernel_el) /* diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 5eccbd62fec8..9c9b4d8fc395 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -27,12 +27,12 @@ SYM_CODE_START(__hyp_stub_vectors) ventry el2_fiq_invalid // FIQ EL2t ventry el2_error_invalid // Error EL2t - ventry el2_sync_invalid // Synchronous EL2h + ventry elx_sync // Synchronous EL2h ventry el2_irq_invalid // IRQ EL2h ventry el2_fiq_invalid // FIQ EL2h ventry el2_error_invalid // Error EL2h - ventry el1_sync // Synchronous 64-bit EL1 + ventry elx_sync // Synchronous 64-bit EL1 ventry el1_irq_invalid // IRQ 64-bit EL1 ventry el1_fiq_invalid // FIQ 64-bit EL1 ventry el1_error_invalid // Error 64-bit EL1 @@ -45,7 +45,7 @@ SYM_CODE_END(__hyp_stub_vectors) .align 11 -SYM_CODE_START_LOCAL(el1_sync) +SYM_CODE_START_LOCAL(elx_sync) cmp x0, #HVC_SET_VECTORS b.ne 1f msr vbar_el2, x1 @@ -71,7 +71,7 @@ SYM_CODE_START_LOCAL(el1_sync) 9: mov x0, xzr eret -SYM_CODE_END(el1_sync) +SYM_CODE_END(elx_sync) // nVHE? No way! Give me the real thing! SYM_CODE_START_LOCAL(mutate_to_vhe) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index be92fcd319a1..e628c8ce1ffe 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -29,11 +29,22 @@ struct ftr_set_desc { } fields[]; }; +static bool __init mmfr1_vh_filter(u64 val) +{ + /* + * If we ever reach this point while running VHE, we're + * guaranteed to be on one of these funky, VHE-stuck CPUs. If + * the user was trying to force nVHE on us, proceed with + * attitude adjustment. + */ + return !(is_kernel_in_hyp_mode() && val == 0); +} + static const struct ftr_set_desc mmfr1 __initconst = { .name = "id_aa64mmfr1", .override = &id_aa64mmfr1_override, .fields = { - { "vh", ID_AA64MMFR1_VHE_SHIFT }, + { "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter }, {} }, }; -- cgit v1.2.3 From 2d726d0db6ac479d91bf74490455badd34af6b1d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 8 Apr 2021 14:10:10 +0100 Subject: arm64: Get rid of CONFIG_ARM64_VHE CONFIG_ARM64_VHE was introduced with ARMv8.1 (some 7 years ago), and has been enabled by default for almost all that time. Given that newer systems that are VHE capable are finally becoming available, and that some systems are even incapable of not running VHE, drop the configuration altogether. Anyone willing to stick to non-VHE on VHE hardware for obscure reasons should use the 'kvm-arm.mode=nvhe' command-line option. Suggested-by: Will Deacon Signed-off-by: Marc Zyngier Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210408131010.1109027-4-maz@kernel.org Signed-off-by: Catalin Marinas --- Documentation/admin-guide/kernel-parameters.txt | 3 +-- arch/arm64/Kconfig | 20 -------------------- arch/arm64/kernel/cpufeature.c | 4 ---- arch/arm64/kernel/hyp-stub.S | 2 -- 4 files changed, 1 insertion(+), 28 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 04545725f187..18f8bb3024f4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2279,8 +2279,7 @@ state is kept private from the host. Not valid if the kernel is running in EL2. - Defaults to VHE/nVHE based on hardware support and - the value of CONFIG_ARM64_VHE. + Defaults to VHE/nVHE based on hardware support. kvm-arm.vgic_v3_group0_trap= [KVM,ARM] Trap guest accesses to GICv3 group-0 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..e9fbb0b45793 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1416,19 +1416,6 @@ config ARM64_USE_LSE_ATOMICS built with binutils >= 2.25 in order for the new instructions to be used. -config ARM64_VHE - bool "Enable support for Virtualization Host Extensions (VHE)" - default y - help - Virtualization Host Extensions (VHE) allow the kernel to run - directly at EL2 (instead of EL1) on processors that support - it. This leads to better performance for KVM, as they reduce - the cost of the world switch. - - Selecting this option allows the VHE feature to be detected - at runtime, and does not affect processors that do not - implement this feature. - endmenu menu "ARMv8.2 architectural features" @@ -1684,7 +1671,6 @@ endmenu config ARM64_SVE bool "ARM Scalable Vector Extension support" default y - depends on !KVM || ARM64_VHE help The Scalable Vector Extension (SVE) is an extension to the AArch64 execution state which complements and extends the SIMD functionality @@ -1713,12 +1699,6 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. - CPUs that support SVE are architecturally required to support the - Virtualization Host Extensions (VHE), so the kernel makes no - provision for supporting SVE alongside KVM without VHE enabled. - Thus, you will need to enable CONFIG_ARM64_VHE if you want to support - KVM in the same kernel image. - config ARM64_MODULE_PLTS bool "Use PLTs to allow module memory to spill over into vmalloc area" depends on MODULES diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6de15deaa912..fa6023ac3548 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1623,7 +1623,6 @@ int get_cpu_with_amu_feat(void) } #endif -#ifdef CONFIG_ARM64_VHE static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { return is_kernel_in_hyp_mode(); @@ -1642,7 +1641,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN)) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } -#endif static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) { @@ -1845,7 +1843,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, -#ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, @@ -1853,7 +1850,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, -#endif /* CONFIG_ARM64_VHE */ { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 9c9b4d8fc395..74ad3db061d1 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -224,7 +224,6 @@ SYM_FUNC_END(__hyp_reset_vectors) * Entry point to switch to VHE if deemed capable */ SYM_FUNC_START(switch_to_vhe) -#ifdef CONFIG_ARM64_VHE // Need to have booted at EL2 adr_l x1, __boot_cpu_mode ldr w0, [x1] @@ -240,6 +239,5 @@ SYM_FUNC_START(switch_to_vhe) mov x0, #HVC_VHE_RESTART hvc #0 1: -#endif ret SYM_FUNC_END(switch_to_vhe) -- cgit v1.2.3 From f3b7deef8dcaf84fd659108ae300626ea5420f87 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:11 +0000 Subject: arm64: mte: Add asynchronous mode support MTE provides an asynchronous mode for detecting tag exceptions. In particular instead of triggering a fault the arm64 core updates a register which is checked by the kernel after the asynchronous tag check fault has occurred. Add support for MTE asynchronous mode. The exception handling mechanism will be added with a future patch. Note: KASAN HW activates async mode via kasan.mode kernel parameter. The default mode is set to synchronous. The code that verifies the status of TFSR_EL1 will be added with a future patch. Cc: Will Deacon Reviewed-by: Catalin Marinas Reviewed-by: Andrey Konovalov Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-2-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 4 +++- arch/arm64/include/asm/mte-kasan.h | 9 +++++++-- arch/arm64/kernel/mte.c | 16 ++++++++++++++-- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 0aabc3be9a75..8fbc8dab044f 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -243,7 +243,9 @@ static inline const void *__tag_set(const void *addr, u8 tag) } #ifdef CONFIG_KASAN_HW_TAGS -#define arch_enable_tagging() mte_enable_kernel() +#define arch_enable_tagging_sync() mte_enable_kernel_sync() +#define arch_enable_tagging_async() mte_enable_kernel_async() +#define arch_enable_tagging() arch_enable_tagging_sync() #define arch_set_tagging_report_once(state) mte_set_report_once(state) #define arch_init_tags(max_tag) mte_init_tags(max_tag) #define arch_get_random_tag() mte_get_random_tag() diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 7ab500e2ad17..4acf8bf41cad 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -77,7 +77,8 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag) } while (curr != end); } -void mte_enable_kernel(void); +void mte_enable_kernel_sync(void); +void mte_enable_kernel_async(void); void mte_init_tags(u64 max_tag); void mte_set_report_once(bool state); @@ -104,7 +105,11 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag) { } -static inline void mte_enable_kernel(void) +static inline void mte_enable_kernel_sync(void) +{ +} + +static inline void mte_enable_kernel_async(void) { } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index b3c70a612c7a..fa755cf94e01 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -107,11 +107,23 @@ void mte_init_tags(u64 max_tag) write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1); } -void mte_enable_kernel(void) +static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) { /* Enable MTE Sync Mode for EL1. */ - sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, SCTLR_ELx_TCF_SYNC); + sysreg_clear_set(sctlr_el1, SCTLR_ELx_TCF_MASK, tcf); isb(); + + pr_info_once("MTE: enabled in %s mode at EL1\n", mode); +} + +void mte_enable_kernel_sync(void) +{ + __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); +} + +void mte_enable_kernel_async(void) +{ + __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC); } void mte_set_report_once(bool state) -- cgit v1.2.3 From 2603f8a78dfb1d54f62dbacc490ea44aa6d80e04 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:12 +0000 Subject: kasan: Add KASAN mode kernel parameter Architectures supported by KASAN_HW_TAGS can provide a sync or async mode of execution. On an MTE enabled arm64 hw for example this can be identified with the synchronous or asynchronous tagging mode of execution. In synchronous mode, an exception is triggered if a tag check fault occurs. In asynchronous mode, if a tag check fault occurs, the TFSR_EL1 register is updated asynchronously. The kernel checks the corresponding bits periodically. KASAN requires a specific kernel command line parameter to make use of this hw features. Add KASAN HW execution mode kernel command line parameter. Note: This patch adds the kasan.mode kernel parameter and the sync/async kernel command line options to enable the described features. [ Add a new var instead of exposing kasan_arg_mode to be consistent with flags for other command line arguments. ] Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Reviewed-by: Andrey Konovalov Acked-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/20210315132019.33202-3-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- Documentation/dev-tools/kasan.rst | 9 ++++++ lib/test_kasan.c | 2 +- mm/kasan/hw_tags.c | 60 ++++++++++++++++++++++++++++++++++++--- mm/kasan/kasan.h | 18 ++++++++---- 4 files changed, 78 insertions(+), 11 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index ddf4239a5890..6f6ab3ed7b79 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -161,6 +161,15 @@ particular KASAN features. - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``). +- ``kasan.mode=sync`` or ``=async`` controls whether KASAN is configured in + synchronous or asynchronous mode of execution (default: ``sync``). + Synchronous mode: a bad access is detected immediately when a tag + check fault occurs. + Asynchronous mode: a bad access detection is delayed. When a tag check + fault occurs, the information is stored in hardware (in the TFSR_EL1 + register for arm64). The kernel periodically checks the hardware and + only reports tag faults during these checks. + - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack traces collection (default: ``on``). diff --git a/lib/test_kasan.c b/lib/test_kasan.c index e5647d147b35..479c31a5dc21 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -97,7 +97,7 @@ static void kasan_test_exit(struct kunit *test) READ_ONCE(fail_data.report_found)); \ if (IS_ENABLED(CONFIG_KASAN_HW_TAGS)) { \ if (READ_ONCE(fail_data.report_found)) \ - kasan_enable_tagging(); \ + kasan_enable_tagging_sync(); \ migrate_enable(); \ } \ } while (0) diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 2aad21fda156..1df4ce803861 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -25,6 +25,12 @@ enum kasan_arg { KASAN_ARG_ON, }; +enum kasan_arg_mode { + KASAN_ARG_MODE_DEFAULT, + KASAN_ARG_MODE_SYNC, + KASAN_ARG_MODE_ASYNC, +}; + enum kasan_arg_stacktrace { KASAN_ARG_STACKTRACE_DEFAULT, KASAN_ARG_STACKTRACE_OFF, @@ -38,6 +44,7 @@ enum kasan_arg_fault { }; static enum kasan_arg kasan_arg __ro_after_init; +static enum kasan_arg_mode kasan_arg_mode __ro_after_init; static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init; static enum kasan_arg_fault kasan_arg_fault __ro_after_init; @@ -45,6 +52,10 @@ static enum kasan_arg_fault kasan_arg_fault __ro_after_init; DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled); EXPORT_SYMBOL(kasan_flag_enabled); +/* Whether the asynchronous mode is enabled. */ +bool kasan_flag_async __ro_after_init; +EXPORT_SYMBOL_GPL(kasan_flag_async); + /* Whether to collect alloc/free stack traces. */ DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace); @@ -68,6 +79,23 @@ static int __init early_kasan_flag(char *arg) } early_param("kasan", early_kasan_flag); +/* kasan.mode=sync/async */ +static int __init early_kasan_mode(char *arg) +{ + if (!arg) + return -EINVAL; + + if (!strcmp(arg, "sync")) + kasan_arg_mode = KASAN_ARG_MODE_SYNC; + else if (!strcmp(arg, "async")) + kasan_arg_mode = KASAN_ARG_MODE_ASYNC; + else + return -EINVAL; + + return 0; +} +early_param("kasan.mode", early_kasan_mode); + /* kasan.stacktrace=off/on */ static int __init early_kasan_flag_stacktrace(char *arg) { @@ -115,7 +143,15 @@ void kasan_init_hw_tags_cpu(void) return; hw_init_tags(KASAN_TAG_MAX); - hw_enable_tagging(); + + /* + * Enable async mode only when explicitly requested through + * the command line. + */ + if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC) + hw_enable_tagging_async(); + else + hw_enable_tagging_sync(); } /* kasan_init_hw_tags() is called once on boot CPU. */ @@ -132,6 +168,22 @@ void __init kasan_init_hw_tags(void) /* Enable KASAN. */ static_branch_enable(&kasan_flag_enabled); + switch (kasan_arg_mode) { + case KASAN_ARG_MODE_DEFAULT: + /* + * Default to sync mode. + * Do nothing, kasan_flag_async keeps its default value. + */ + break; + case KASAN_ARG_MODE_SYNC: + /* Do nothing, kasan_flag_async keeps its default value. */ + break; + case KASAN_ARG_MODE_ASYNC: + /* Async mode enabled. */ + kasan_flag_async = true; + break; + } + switch (kasan_arg_stacktrace) { case KASAN_ARG_STACKTRACE_DEFAULT: /* Default to enabling stack trace collection. */ @@ -194,10 +246,10 @@ void kasan_set_tagging_report_once(bool state) } EXPORT_SYMBOL_GPL(kasan_set_tagging_report_once); -void kasan_enable_tagging(void) +void kasan_enable_tagging_sync(void) { - hw_enable_tagging(); + hw_enable_tagging_sync(); } -EXPORT_SYMBOL_GPL(kasan_enable_tagging); +EXPORT_SYMBOL_GPL(kasan_enable_tagging_sync); #endif diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 8c55634d6edd..a3d0d165d97d 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -21,6 +21,7 @@ static inline bool kasan_stack_collection_enabled(void) #endif extern bool kasan_flag_panic __ro_after_init; +extern bool kasan_flag_async __ro_after_init; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) @@ -275,8 +276,11 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #ifdef CONFIG_KASAN_HW_TAGS -#ifndef arch_enable_tagging -#define arch_enable_tagging() +#ifndef arch_enable_tagging_sync +#define arch_enable_tagging_sync() +#endif +#ifndef arch_enable_tagging_async +#define arch_enable_tagging_async() #endif #ifndef arch_init_tags #define arch_init_tags(max_tag) @@ -294,7 +298,8 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define arch_set_mem_tag_range(addr, size, tag) ((void *)(addr)) #endif -#define hw_enable_tagging() arch_enable_tagging() +#define hw_enable_tagging_sync() arch_enable_tagging_sync() +#define hw_enable_tagging_async() arch_enable_tagging_async() #define hw_init_tags(max_tag) arch_init_tags(max_tag) #define hw_set_tagging_report_once(state) arch_set_tagging_report_once(state) #define hw_get_random_tag() arch_get_random_tag() @@ -303,7 +308,8 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #else /* CONFIG_KASAN_HW_TAGS */ -#define hw_enable_tagging() +#define hw_enable_tagging_sync() +#define hw_enable_tagging_async() #define hw_set_tagging_report_once(state) #endif /* CONFIG_KASAN_HW_TAGS */ @@ -311,12 +317,12 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) void kasan_set_tagging_report_once(bool state); -void kasan_enable_tagging(void); +void kasan_enable_tagging_sync(void); #else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ static inline void kasan_set_tagging_report_once(bool state) { } -static inline void kasan_enable_tagging(void) { } +static inline void kasan_enable_tagging_sync(void) { } #endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ -- cgit v1.2.3 From c137c6145b11dc513407bd2e4406cf0a7b8a217d Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:13 +0000 Subject: arm64: mte: Drop arch_enable_tagging() arch_enable_tagging() was left in memory.h after the introduction of async mode to not break the bysectability of the KASAN KUNIT tests. Remove the function now that KASAN has been fully converted. Cc: Will Deacon Acked-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-4-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 8fbc8dab044f..8b0beaedbe1f 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -245,7 +245,6 @@ static inline const void *__tag_set(const void *addr, u8 tag) #ifdef CONFIG_KASAN_HW_TAGS #define arch_enable_tagging_sync() mte_enable_kernel_sync() #define arch_enable_tagging_async() mte_enable_kernel_async() -#define arch_enable_tagging() arch_enable_tagging_sync() #define arch_set_tagging_report_once(state) mte_set_report_once(state) #define arch_init_tags(max_tag) mte_init_tags(max_tag) #define arch_get_random_tag() mte_get_random_tag() -- cgit v1.2.3 From 8f7b5054755e48cc7b217a41e3f1891e01338d2f Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:14 +0000 Subject: kasan: Add report for async mode KASAN provides an asynchronous mode of execution. Add reporting functionality for this mode. Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Reviewed-by: Andrey Konovalov Acked-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/20210315132019.33202-5-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- include/linux/kasan.h | 6 ++++++ mm/kasan/kasan.h | 16 ++++++++++++++++ mm/kasan/report.c | 17 ++++++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index b91732bd05d7..e7c20c79b342 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -376,6 +376,12 @@ static inline void *kasan_reset_tag(const void *addr) #endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS*/ +#ifdef CONFIG_KASAN_HW_TAGS + +void kasan_report_async(void); + +#endif /* CONFIG_KASAN_HW_TAGS */ + #ifdef CONFIG_KASAN_SW_TAGS void __init kasan_init_sw_tags(void); #else diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index a3d0d165d97d..02e9656b857f 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -7,17 +7,33 @@ #include #ifdef CONFIG_KASAN_HW_TAGS + #include + DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace); +extern bool kasan_flag_async __ro_after_init; + static inline bool kasan_stack_collection_enabled(void) { return static_branch_unlikely(&kasan_flag_stacktrace); } + +static inline bool kasan_async_mode_enabled(void) +{ + return kasan_flag_async; +} #else + static inline bool kasan_stack_collection_enabled(void) { return true; } + +static inline bool kasan_async_mode_enabled(void) +{ + return false; +} + #endif extern bool kasan_flag_panic __ro_after_init; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 87b271206163..8b0843a2cdd7 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -87,7 +87,8 @@ static void start_report(unsigned long *flags) static void end_report(unsigned long *flags, unsigned long addr) { - trace_error_report_end(ERROR_DETECTOR_KASAN, addr); + if (!kasan_async_mode_enabled()) + trace_error_report_end(ERROR_DETECTOR_KASAN, addr); pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irqrestore(&report_lock, *flags); @@ -360,6 +361,20 @@ void kasan_report_invalid_free(void *object, unsigned long ip) end_report(&flags, (unsigned long)object); } +#ifdef CONFIG_KASAN_HW_TAGS +void kasan_report_async(void) +{ + unsigned long flags; + + start_report(&flags); + pr_err("BUG: KASAN: invalid-access\n"); + pr_err("Asynchronous mode enabled: no access details available\n"); + pr_err("\n"); + dump_stack(); + end_report(&flags, 0); +} +#endif /* CONFIG_KASAN_HW_TAGS */ + static void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned long ip) { -- cgit v1.2.3 From e60beb95c08baf29416d0e06a9e1d4887faf5d1c Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:15 +0000 Subject: arm64: mte: Enable TCO in functions that can read beyond buffer limits load_unaligned_zeropad() and __get/put_kernel_nofault() functions can read past some buffer limits which may include some MTE granule with a different tag. When MTE async mode is enabled, the load operation crosses the boundaries and the next granule has a different tag the PE sets the TFSR_EL1.TF1 bit as if an asynchronous tag fault is happened. Enable Tag Check Override (TCO) in these functions before the load and disable it afterwards to prevent this to happen. Note: The same condition can be hit in MTE sync mode but we deal with it through the exception handling. In the current implementation, mte_async_mode flag is set only at boot time but in future kasan might acquire some runtime features that that change the mode dynamically, hence we disable it when sync mode is selected for future proof. Cc: Will Deacon Reported-by: Branislav Rankov Tested-by: Branislav Rankov Reviewed-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-6-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 15 +++++++++++++++ arch/arm64/include/asm/uaccess.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/word-at-a-time.h | 4 ++++ arch/arm64/kernel/mte.c | 22 ++++++++++++++++++++++ 4 files changed, 63 insertions(+) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 9b557a457f24..8603c6636a7d 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -90,5 +90,20 @@ static inline void mte_assign_mem_tag_range(void *addr, size_t size) #endif /* CONFIG_ARM64_MTE */ +#ifdef CONFIG_KASAN_HW_TAGS +/* Whether the MTE asynchronous mode is enabled. */ +DECLARE_STATIC_KEY_FALSE(mte_async_mode); + +static inline bool system_uses_mte_async_mode(void) +{ + return static_branch_unlikely(&mte_async_mode); +} +#else +static inline bool system_uses_mte_async_mode(void) +{ + return false; +} +#endif /* CONFIG_KASAN_HW_TAGS */ + #endif /* __ASSEMBLY__ */ #endif /* __ASM_MTE_H */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 0deb88467111..b5f08621fa29 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -188,6 +189,23 @@ static inline void __uaccess_enable_tco(void) ARM64_MTE, CONFIG_KASAN_HW_TAGS)); } +/* + * These functions disable tag checking only if in MTE async mode + * since the sync mode generates exceptions synchronously and the + * nofault or load_unaligned_zeropad can handle them. + */ +static inline void __uaccess_disable_tco_async(void) +{ + if (system_uses_mte_async_mode()) + __uaccess_disable_tco(); +} + +static inline void __uaccess_enable_tco_async(void) +{ + if (system_uses_mte_async_mode()) + __uaccess_enable_tco(); +} + static inline void uaccess_disable_privileged(void) { __uaccess_disable_tco(); @@ -307,8 +325,10 @@ do { \ do { \ int __gkn_err = 0; \ \ + __uaccess_enable_tco_async(); \ __raw_get_mem("ldr", *((type *)(dst)), \ (__force type *)(src), __gkn_err); \ + __uaccess_disable_tco_async(); \ if (unlikely(__gkn_err)) \ goto err_label; \ } while (0) @@ -380,8 +400,10 @@ do { \ do { \ int __pkn_err = 0; \ \ + __uaccess_enable_tco_async(); \ __raw_put_mem("str", *((type *)(src)), \ (__force type *)(dst), __pkn_err); \ + __uaccess_disable_tco_async(); \ if (unlikely(__pkn_err)) \ goto err_label; \ } while(0) diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 3333950b5909..c62d9fa791aa 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -55,6 +55,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) { unsigned long ret, offset; + __uaccess_enable_tco_async(); + /* Load word from unaligned pointer addr */ asm( "1: ldr %0, %3\n" @@ -76,6 +78,8 @@ static inline unsigned long load_unaligned_zeropad(const void *addr) : "=&r" (ret), "=&r" (offset) : "r" (addr), "Q" (*(unsigned long *)addr)); + __uaccess_disable_tco_async(); + return ret; } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index fa755cf94e01..9362928ba0d5 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -26,6 +26,10 @@ u64 gcr_kernel_excl __ro_after_init; static bool report_fault_once = true; +/* Whether the MTE asynchronous mode is enabled. */ +DEFINE_STATIC_KEY_FALSE(mte_async_mode); +EXPORT_SYMBOL_GPL(mte_async_mode); + static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) { pte_t old_pte = READ_ONCE(*ptep); @@ -118,12 +122,30 @@ static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) void mte_enable_kernel_sync(void) { + /* + * Make sure we enter this function when no PE has set + * async mode previously. + */ + WARN_ONCE(system_uses_mte_async_mode(), + "MTE async mode enabled system wide!"); + __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); } void mte_enable_kernel_async(void) { __mte_enable_kernel("asynchronous", SCTLR_ELx_TCF_ASYNC); + + /* + * MTE async mode is set system wide by the first PE that + * executes this function. + * + * Note: If in future KASAN acquires a runtime switching + * mode in between sync and async, this strategy needs + * to be reviewed. + */ + if (!system_uses_mte_async_mode()) + static_branch_enable(&mte_async_mode); } void mte_set_report_once(bool state) -- cgit v1.2.3 From d8969752cc4e3294074ff0582de42d0e3c982eba Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:16 +0000 Subject: arm64: mte: Conditionally compile mte_enable_kernel_*() mte_enable_kernel_*() are not needed if KASAN_HW is disabled. Add ash defines around the functions to conditionally compile the functions. Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210315132019.33202-7-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/mte.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 9362928ba0d5..50f0724c8d8f 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -26,9 +26,11 @@ u64 gcr_kernel_excl __ro_after_init; static bool report_fault_once = true; +#ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ DEFINE_STATIC_KEY_FALSE(mte_async_mode); EXPORT_SYMBOL_GPL(mte_async_mode); +#endif static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) { @@ -120,6 +122,7 @@ static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) pr_info_once("MTE: enabled in %s mode at EL1\n", mode); } +#ifdef CONFIG_KASAN_HW_TAGS void mte_enable_kernel_sync(void) { /* @@ -147,6 +150,7 @@ void mte_enable_kernel_async(void) if (!system_uses_mte_async_mode()) static_branch_enable(&mte_async_mode); } +#endif void mte_set_report_once(bool state) { -- cgit v1.2.3 From 65812c6921cc849d86811147038dd246fa0ea18c Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:17 +0000 Subject: arm64: mte: Enable async tag check fault MTE provides a mode that asynchronously updates the TFSR_EL1 register when a tag check exception is detected. To take advantage of this mode the kernel has to verify the status of the register at: 1. Context switching 2. Return to user/EL0 (Not required in entry from EL0 since the kernel did not run) 3. Kernel entry from EL1 4. Kernel exit to EL1 If the register is non-zero a trace is reported. Add the required features for EL1 detection and reporting. Note: ITFSB bit is set in the SCTLR_EL1 register hence it guaranties that the indirect writes to TFSR_EL1 are synchronized at exception entry to EL1. On the context switch path the synchronization is guarantied by the dsb() in __switch_to(). The dsb(nsh) in mte_check_tfsr_exit() is provisional pending confirmation by the architects. Cc: Will Deacon Reviewed-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-8-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 29 +++++++++++++++++++++++++++++ arch/arm64/kernel/entry-common.c | 6 ++++++ arch/arm64/kernel/mte.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 8603c6636a7d..9a929620ca5d 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -98,11 +98,40 @@ static inline bool system_uses_mte_async_mode(void) { return static_branch_unlikely(&mte_async_mode); } + +void mte_check_tfsr_el1(void); + +static inline void mte_check_tfsr_entry(void) +{ + mte_check_tfsr_el1(); +} + +static inline void mte_check_tfsr_exit(void) +{ + /* + * The asynchronous faults are sync'ed automatically with + * TFSR_EL1 on kernel entry but for exit an explicit dsb() + * is required. + */ + dsb(nsh); + isb(); + + mte_check_tfsr_el1(); +} #else static inline bool system_uses_mte_async_mode(void) { return false; } +static inline void mte_check_tfsr_el1(void) +{ +} +static inline void mte_check_tfsr_entry(void) +{ +} +static inline void mte_check_tfsr_exit(void) +{ +} #endif /* CONFIG_KASAN_HW_TAGS */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 9d3588450473..a1ec351c36bd 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -37,6 +37,8 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs) lockdep_hardirqs_off(CALLER_ADDR0); rcu_irq_enter_check_tick(); trace_hardirqs_off_finish(); + + mte_check_tfsr_entry(); } /* @@ -47,6 +49,8 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs) { lockdep_assert_irqs_disabled(); + mte_check_tfsr_exit(); + if (interrupts_enabled(regs)) { if (regs->exit_rcu) { trace_hardirqs_on_prepare(); @@ -293,6 +297,8 @@ asmlinkage void noinstr enter_from_user_mode(void) asmlinkage void noinstr exit_to_user_mode(void) { + mte_check_tfsr_exit(); + trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(CALLER_ADDR0); user_enter_irqoff(); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 50f0724c8d8f..b6336fbe4c14 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -162,6 +162,29 @@ bool mte_report_once(void) return READ_ONCE(report_fault_once); } +#ifdef CONFIG_KASAN_HW_TAGS +void mte_check_tfsr_el1(void) +{ + u64 tfsr_el1; + + if (!system_supports_mte()) + return; + + tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1); + + if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) { + /* + * Note: isb() is not required after this direct write + * because there is no indirect read subsequent to it + * (per ARM DDI 0487F.c table D13-1). + */ + write_sysreg_s(0, SYS_TFSR_EL1); + + kasan_report_async(); + } +} +#endif + static void update_sctlr_el1_tcf0(u64 tcf0) { /* ISB required for the kernel uaccess routines */ @@ -227,6 +250,19 @@ void mte_thread_switch(struct task_struct *next) /* avoid expensive SCTLR_EL1 accesses if no change */ if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0) update_sctlr_el1_tcf0(next->thread.sctlr_tcf0); + else + isb(); + + /* + * Check if an async tag exception occurred at EL1. + * + * Note: On the context switch path we rely on the dsb() present + * in __switch_to() to guarantee that the indirect writes to TFSR_EL1 + * are synchronized before this point. + * isb() above is required for the same reason. + * + */ + mte_check_tfsr_el1(); } void mte_suspend_exit(void) -- cgit v1.2.3 From eab0e6e17d876381b4d47996eef3b5fd46ea4011 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Mar 2021 13:20:18 +0000 Subject: arm64: mte: Report async tag faults before suspend When MTE async mode is enabled TFSR_EL1 contains the accumulative asynchronous tag check faults for EL1 and EL0. During the suspend/resume operations the firmware might perform some operations that could change the state of the register resulting in a spurious tag check fault report. Report asynchronous tag faults before suspend and clear the TFSR_EL1 register after resume to prevent this to happen. Cc: Will Deacon Cc: Lorenzo Pieralisi Reviewed-by: Catalin Marinas Reviewed-by: Lorenzo Pieralisi Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-9-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mte.h | 4 ++++ arch/arm64/kernel/mte.c | 16 ++++++++++++++++ arch/arm64/kernel/suspend.c | 3 +++ 3 files changed, 23 insertions(+) diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 9a929620ca5d..a38abc15186c 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -41,6 +41,7 @@ void mte_sync_tags(pte_t *ptep, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void flush_mte_state(void); void mte_thread_switch(struct task_struct *next); +void mte_suspend_enter(void); void mte_suspend_exit(void); long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); @@ -66,6 +67,9 @@ static inline void flush_mte_state(void) static inline void mte_thread_switch(struct task_struct *next) { } +static inline void mte_suspend_enter(void) +{ +} static inline void mte_suspend_exit(void) { } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index b6336fbe4c14..820bad94870e 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -265,6 +265,22 @@ void mte_thread_switch(struct task_struct *next) mte_check_tfsr_el1(); } +void mte_suspend_enter(void) +{ + if (!system_supports_mte()) + return; + + /* + * The barriers are required to guarantee that the indirect writes + * to TFSR_EL1 are synchronized before we report the state. + */ + dsb(nsh); + isb(); + + /* Report SYS_TFSR_EL1 before suspend entry */ + mte_check_tfsr_el1(); +} + void mte_suspend_exit(void) { if (!system_supports_mte()) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index d7564891ffe1..6fdc8292b4f5 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -91,6 +91,9 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) unsigned long flags; struct sleep_stack_data state; + /* Report any MTE async fault before going to suspend */ + mte_suspend_enter(); + /* * From this point debug exceptions are disabled to prevent * updates to mdscr register (saved and restored along with -- cgit v1.2.3 From e80a76aa1a91018d919d2210366943f9bf17009e Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 15 Mar 2021 13:20:19 +0000 Subject: kasan, arm64: tests supports for HW_TAGS async mode This change adds KASAN-KUnit tests support for the async HW_TAGS mode. In async mode, tag fault aren't being generated synchronously when a bad access happens, but are instead explicitly checked for by the kernel. As each KASAN-KUnit test expect a fault to happen before the test is over, check for faults as a part of the test handler. Acked-by: Catalin Marinas Acked-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Andrey Konovalov Signed-off-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20210315132019.33202-10-vincenzo.frascino@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 1 + lib/test_kasan.c | 17 +++++++++++------ mm/kasan/hw_tags.c | 6 ++++++ mm/kasan/kasan.h | 6 ++++++ mm/kasan/report.c | 5 +++++ 5 files changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 8b0beaedbe1f..b943879c1c24 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -246,6 +246,7 @@ static inline const void *__tag_set(const void *addr, u8 tag) #define arch_enable_tagging_sync() mte_enable_kernel_sync() #define arch_enable_tagging_async() mte_enable_kernel_async() #define arch_set_tagging_report_once(state) mte_set_report_once(state) +#define arch_force_async_tag_fault() mte_check_tfsr_exit() #define arch_init_tags(max_tag) mte_init_tags(max_tag) #define arch_get_random_tag() mte_get_random_tag() #define arch_get_mem_tag(addr) mte_get_mem_tag(addr) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 479c31a5dc21..785e724ce0d8 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -69,10 +69,10 @@ static void kasan_test_exit(struct kunit *test) * resource named "kasan_data". Do not use this name for KUnit resources * outside of KASAN tests. * - * For hardware tag-based KASAN, when a tag fault happens, tag checking is - * normally auto-disabled. When this happens, this test handler reenables - * tag checking. As tag checking can be only disabled or enabled per CPU, this - * handler disables migration (preemption). + * For hardware tag-based KASAN in sync mode, when a tag fault happens, tag + * checking is auto-disabled. When this happens, this test handler reenables + * tag checking. As tag checking can be only disabled or enabled per CPU, + * this handler disables migration (preemption). * * Since the compiler doesn't see that the expression can change the fail_data * fields, it can reorder or optimize away the accesses to those fields. @@ -80,7 +80,8 @@ static void kasan_test_exit(struct kunit *test) * expression to prevent that. */ #define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \ - if (IS_ENABLED(CONFIG_KASAN_HW_TAGS)) \ + if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ + !kasan_async_mode_enabled()) \ migrate_disable(); \ WRITE_ONCE(fail_data.report_expected, true); \ WRITE_ONCE(fail_data.report_found, false); \ @@ -92,10 +93,14 @@ static void kasan_test_exit(struct kunit *test) barrier(); \ expression; \ barrier(); \ + if (kasan_async_mode_enabled()) \ + kasan_force_async_fault(); \ + barrier(); \ KUNIT_EXPECT_EQ(test, \ READ_ONCE(fail_data.report_expected), \ READ_ONCE(fail_data.report_found)); \ - if (IS_ENABLED(CONFIG_KASAN_HW_TAGS)) { \ + if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ + !kasan_async_mode_enabled()) { \ if (READ_ONCE(fail_data.report_found)) \ kasan_enable_tagging_sync(); \ migrate_enable(); \ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 1df4ce803861..4004388b4e4b 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -252,4 +252,10 @@ void kasan_enable_tagging_sync(void) } EXPORT_SYMBOL_GPL(kasan_enable_tagging_sync); +void kasan_force_async_fault(void) +{ + hw_force_async_tag_fault(); +} +EXPORT_SYMBOL_GPL(kasan_force_async_fault); + #endif diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 02e9656b857f..2d29069ae977 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -304,6 +304,9 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #ifndef arch_set_tagging_report_once #define arch_set_tagging_report_once(state) #endif +#ifndef arch_force_async_tag_fault +#define arch_force_async_tag_fault() +#endif #ifndef arch_get_random_tag #define arch_get_random_tag() (0xFF) #endif @@ -318,6 +321,7 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define hw_enable_tagging_async() arch_enable_tagging_async() #define hw_init_tags(max_tag) arch_init_tags(max_tag) #define hw_set_tagging_report_once(state) arch_set_tagging_report_once(state) +#define hw_force_async_tag_fault() arch_force_async_tag_fault() #define hw_get_random_tag() arch_get_random_tag() #define hw_get_mem_tag(addr) arch_get_mem_tag(addr) #define hw_set_mem_tag_range(addr, size, tag) arch_set_mem_tag_range((addr), (size), (tag)) @@ -334,11 +338,13 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) void kasan_set_tagging_report_once(bool state); void kasan_enable_tagging_sync(void); +void kasan_force_async_fault(void); #else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ static inline void kasan_set_tagging_report_once(bool state) { } static inline void kasan_enable_tagging_sync(void) { } +static inline void kasan_force_async_fault(void) { } #endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8b0843a2cdd7..14bd51ea2348 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -366,6 +366,11 @@ void kasan_report_async(void) { unsigned long flags; +#if IS_ENABLED(CONFIG_KUNIT) + if (current->kunit_test) + kasan_update_kunit_status(current->kunit_test); +#endif /* IS_ENABLED(CONFIG_KUNIT) */ + start_report(&flags); pr_err("BUG: KASAN: invalid-access\n"); pr_err("Asynchronous mode enabled: no access details available\n"); -- cgit v1.2.3 From 27248fe1abb2a0e6fe4c744c25700f557b04466c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Mar 2021 10:01:10 +0100 Subject: arm64: assembler: remove conditional NEON yield macros The users of the conditional NEON yield macros have all been switched to the simplified cond_yield macro, and so the NEON specific ones can be removed. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Link: https://lore.kernel.org/r/20210302090118.30666-2-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/assembler.h | 70 -------------------------------------- 1 file changed, 70 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ca31594d3d6c..e0fc1d424f9b 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -692,76 +692,6 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU isb .endm -/* - * Check whether to yield to another runnable task from kernel mode NEON code - * (which runs with preemption disabled). - * - * if_will_cond_yield_neon - * // pre-yield patchup code - * do_cond_yield_neon - * // post-yield patchup code - * endif_yield_neon