From 471b5e42cc7d76678314542d0ce079e5f3cfb706 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Thu, 16 Feb 2017 18:53:29 +0100
Subject: ARM: 8659/1: l2c: allow CA9 optimizations to be disabled

If a PL310 is added to a system, but the sideband signals are not
connected, some Cortex A9 optimizations cannot be used. In particular,
enabling Full Line of Zeros in the CA9 without sidebands connected will
crash the system since the CA9 will expect the L2C to perform operations,
yet the L2C never gets the commands. Early BRESP also does not work
without sideband signals.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/cache-l2x0.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 2290be390f87..808efbb89b88 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -57,6 +57,9 @@ static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
 
 struct l2x0_regs l2x0_saved_regs;
 
+static bool l2x0_bresp_disable;
+static bool l2x0_flz_disable;
+
 /*
  * Common code for all cache controllers.
  */
@@ -620,7 +623,7 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
 	u32 aux = l2x0_saved_regs.aux_ctrl;
 
 	if (rev >= L310_CACHE_ID_RTL_R2P0) {
-		if (cortex_a9) {
+		if (cortex_a9 && !l2x0_bresp_disable) {
 			aux |= L310_AUX_CTRL_EARLY_BRESP;
 			pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
 		} else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
@@ -629,7 +632,7 @@ static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
 		}
 	}
 
-	if (cortex_a9) {
+	if (cortex_a9 && !l2x0_flz_disable) {
 		u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL);
 		u32 acr = get_auxcr();
 
@@ -1200,6 +1203,12 @@ static void __init l2c310_of_parse(const struct device_node *np,
 		*aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
 	}
 
+	if (of_property_read_bool(np, "arm,early-bresp-disable"))
+		l2x0_bresp_disable = true;
+
+	if (of_property_read_bool(np, "arm,full-line-zero-disable"))
+		l2x0_flz_disable = true;
+
 	prefetch = l2x0_saved_regs.prefetch_ctrl;
 
 	ret = of_property_read_u32(np, "arm,double-linefill", &val);
-- 
cgit v1.2.3


From a96bb197693eb9e7a7221867bd944ccd6b6e12e6 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Thu, 16 Feb 2017 18:54:39 +0100
Subject: ARM: 8660/1: shmobile: r7s72100: Enable L2 cache

Even though L2C is specified in the DT, you still need to add the aux
settings in the machine_desc.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mach-shmobile/setup-r7s72100.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mach-shmobile/setup-r7s72100.c b/arch/arm/mach-shmobile/setup-r7s72100.c
index d46639fc6849..319ca9508ec6 100644
--- a/arch/arm/mach-shmobile/setup-r7s72100.c
+++ b/arch/arm/mach-shmobile/setup-r7s72100.c
@@ -26,6 +26,8 @@ static const char *const r7s72100_boards_compat_dt[] __initconst = {
 };
 
 DT_MACHINE_START(R7S72100_DT, "Generic R7S72100 (Flattened Device Tree)")
+	.l2c_aux_val    = 0,
+	.l2c_aux_mask   = ~0,
 	.init_early	= shmobile_init_delay,
 	.init_late	= shmobile_init_late,
 	.dt_compat	= r7s72100_boards_compat_dt,
-- 
cgit v1.2.3


From f08578e6da96043ec07a695fb6f4cba27a9d22d7 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Thu, 16 Feb 2017 18:55:55 +0100
Subject: ARM: 8661/1: dts: r7s72100: add l2 cache

Note that early-bresp-disable and full-line-zero-disable are required
because the sideband signals between the CPU and L2C were not connected
in this SoC.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/dts/r7s72100.dtsi | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/boot/dts/r7s72100.dtsi b/arch/arm/boot/dts/r7s72100.dtsi
index b8aa256bd515..1cf2bd038090 100644
--- a/arch/arm/boot/dts/r7s72100.dtsi
+++ b/arch/arm/boot/dts/r7s72100.dtsi
@@ -177,6 +177,7 @@
 			compatible = "arm,cortex-a9";
 			reg = <0>;
 			clock-frequency = <400000000>;
+			next-level-cache = <&L2>;
 		};
 	};
 
@@ -368,6 +369,16 @@
 			<0xe8202000 0x1000>;
 	};
 
+	L2: cache-controller@3ffff000 {
+		compatible = "arm,pl310-cache";
+		reg = <0x3ffff000 0x1000>;
+		interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
+		arm,early-bresp-disable;
+		arm,full-line-zero-disable;
+		cache-unified;
+		cache-level = <2>;
+	};
+
 	i2c0: i2c@fcfee000 {
 		#address-cells = <1>;
 		#size-cells = <0>;
-- 
cgit v1.2.3


From b7ede5a1f5905ac394cc8e61712a13e3c5cb7b8f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 22 Feb 2017 19:40:12 +0100
Subject: ARM: 8662/1: module: split core and init PLT sections

Since commit 35fa91eed817 ("ARM: kernel: merge core and init PLTs"),
the ARM module PLT code allocates all PLT entries in a single core
section, since the overhead of having a separate init PLT section is
not justified by the small number of PLT entries usually required for
init code.

However, the core and init module regions are allocated independently,
and there is a corner case where the core region may be allocated from
the VMALLOC region if the dedicated module region is exhausted, but the
init region, being much smaller, can still be allocated from the module
region. This puts the PLT entries out of reach of the relocated branch
instructions, defeating the whole purpose of PLTs.

So split the core and init PLT regions, and name the latter ".init.plt"
so it gets allocated along with (and sufficiently close to) the .init
sections that it serves. Also, given that init PLT entries may need to
be emitted for branches that target the core module, modify the logic
that disregards defined symbols to only disregard symbols that are
defined in the same section.

Fixes: 35fa91eed817 ("ARM: kernel: merge core and init PLTs")
Cc: <stable@vger.kernel.org> # v4.9+
Reported-by: Angus Clark <angus@angusclark.org>
Tested-by: Angus Clark <angus@angusclark.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/module.h |  9 ++++-
 arch/arm/kernel/module-plts.c | 87 +++++++++++++++++++++++++++++--------------
 arch/arm/kernel/module.lds    |  1 +
 3 files changed, 68 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 464748b9fd7d..ed2319663a1e 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -18,13 +18,18 @@ enum {
 };
 #endif
 
+struct mod_plt_sec {
+	struct elf32_shdr	*plt;
+	int			plt_count;
+};
+
 struct mod_arch_specific {
 #ifdef CONFIG_ARM_UNWIND
 	struct unwind_table *unwind[ARM_SEC_MAX];
 #endif
 #ifdef CONFIG_ARM_MODULE_PLTS
-	struct elf32_shdr   *plt;
-	int		    plt_count;
+	struct mod_plt_sec	core;
+	struct mod_plt_sec	init;
 #endif
 };
 
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
index 3a5cba90c971..3d0c2e4dda1d 100644
--- a/arch/arm/kernel/module-plts.c
+++ b/arch/arm/kernel/module-plts.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -31,9 +31,17 @@ struct plt_entries {
 	u32	lit[PLT_ENT_COUNT];
 };
 
+static bool in_init(const struct module *mod, unsigned long loc)
+{
+	return loc - (u32)mod->init_layout.base < mod->init_layout.size;
+}
+
 u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 {
-	struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr;
+	struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
+							  &mod->arch.init;
+
+	struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr;
 	int idx = 0;
 
 	/*
@@ -41,9 +49,9 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 	 * relocations are sorted, this will be the last entry we allocated.
 	 * (if one exists).
 	 */
-	if (mod->arch.plt_count > 0) {
-		plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT;
-		idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT;
+	if (pltsec->plt_count > 0) {
+		plt += (pltsec->plt_count - 1) / PLT_ENT_COUNT;
+		idx = (pltsec->plt_count - 1) % PLT_ENT_COUNT;
 
 		if (plt->lit[idx] == val)
 			return (u32)&plt->ldr[idx];
@@ -53,8 +61,8 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
 			plt++;
 	}
 
-	mod->arch.plt_count++;
-	BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size);
+	pltsec->plt_count++;
+	BUG_ON(pltsec->plt_count * PLT_ENT_SIZE > pltsec->plt->sh_size);
 
 	if (!idx)
 		/* Populate a new set of entries */
@@ -129,7 +137,7 @@ static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
 
 /* Count how many PLT entries we may need */
 static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
-			       const Elf32_Rel *rel, int num)
+			       const Elf32_Rel *rel, int num, Elf32_Word dstidx)
 {
 	unsigned int ret = 0;
 	const Elf32_Sym *s;
@@ -144,13 +152,17 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
 		case R_ARM_THM_JUMP24:
 			/*
 			 * We only have to consider branch targets that resolve
-			 * to undefined symbols. This is not simply a heuristic,
-			 * it is a fundamental limitation, since the PLT itself
-			 * is part of the module, and needs to be within range
-			 * as well, so modules can never grow beyond that limit.
+			 * to symbols that are defined in a different section.
+			 * This is not simply a heuristic, it is a fundamental
+			 * limitation, since there is no guaranteed way to emit
+			 * PLT entries sufficiently close to the branch if the
+			 * section size exceeds the range of a branch
+			 * instruction. So ignore relocations against defined
+			 * symbols if they live in the same section as the
+			 * relocation target.
 			 */
 			s = syms + ELF32_R_SYM(rel[i].r_info);
-			if (s->st_shndx != SHN_UNDEF)
+			if (s->st_shndx == dstidx)
 				break;
 
 			/*
@@ -161,7 +173,12 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
 			 * So we need to support them, but there is no need to
 			 * take them into consideration when trying to optimize
 			 * this code. So let's only check for duplicates when
-			 * the addend is zero.
+			 * the addend is zero. (Note that calls into the core
+			 * module via init PLT entries could involve section
+			 * relative symbol references with non-zero addends, for
+			 * which we may end up emitting duplicates, but the init
+			 * PLT is released along with the rest of the .init
+			 * region as soon as module loading completes.)
 			 */
 			if (!is_zero_addend_relocation(base, rel + i) ||
 			    !duplicate_rel(base, rel, i))
@@ -174,7 +191,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
 int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			      char *secstrings, struct module *mod)
 {
-	unsigned long plts = 0;
+	unsigned long core_plts = 0;
+	unsigned long init_plts = 0;
 	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
 	Elf32_Sym *syms = NULL;
 
@@ -184,13 +202,15 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 	 */
 	for (s = sechdrs; s < sechdrs_end; ++s) {
 		if (strcmp(".plt", secstrings + s->sh_name) == 0)
-			mod->arch.plt = s;
+			mod->arch.core.plt = s;
+		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+			mod->arch.init.plt = s;
 		else if (s->sh_type == SHT_SYMTAB)
 			syms = (Elf32_Sym *)s->sh_addr;
 	}
 
-	if (!mod->arch.plt) {
-		pr_err("%s: module PLT section missing\n", mod->name);
+	if (!mod->arch.core.plt || !mod->arch.init.plt) {
+		pr_err("%s: module PLT section(s) missing\n", mod->name);
 		return -ENOEXEC;
 	}
 	if (!syms) {
@@ -213,16 +233,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		/* sort by type and symbol index */
 		sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
 
-		plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
+		if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
+			core_plts += count_plts(syms, dstsec->sh_addr, rels,
+						numrels, s->sh_info);
+		else
+			init_plts += count_plts(syms, dstsec->sh_addr, rels,
+						numrels, s->sh_info);
 	}
 
-	mod->arch.plt->sh_type = SHT_NOBITS;
-	mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE,
-					  sizeof(struct plt_entries));
-	mod->arch.plt_count = 0;
-
-	pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);
+	mod->arch.core.plt->sh_type = SHT_NOBITS;
+	mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
+					       sizeof(struct plt_entries));
+	mod->arch.core.plt_count = 0;
+
+	mod->arch.init.plt->sh_type = SHT_NOBITS;
+	mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
+					       sizeof(struct plt_entries));
+	mod->arch.init.plt_count = 0;
+
+	pr_debug("%s: plt=%x, init.plt=%x\n", __func__,
+		 mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size);
 	return 0;
 }
diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
index 05881e2b414c..eacb5c67f61e 100644
--- a/arch/arm/kernel/module.lds
+++ b/arch/arm/kernel/module.lds
@@ -1,3 +1,4 @@
 SECTIONS {
 	.plt : { BYTE(0) }
+	.init.plt : { BYTE(0) }
 }
-- 
cgit v1.2.3


From dd59f974bd5e04d000e38ceca1a19085d029794b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 25 Mar 2017 00:51:32 +0100
Subject: ARM: 8666/1: mm: dump: Add domain to output

This adds the memory domain (on non-LPAE) to the PMD and PTE dumps. This
isn't in the regular PMD bits because I couldn't find a clean way to
fall back to retain some of the PMD bits when reporting PTE. So this is
special-cased currently.

New output example:

  ---[ Modules ]---
  0x7f000000-0x7f001000       4K KERNEL      ro x  SHD MEM/CACHED/WBWA
  0x7f001000-0x7f002000       4K KERNEL      ro NX SHD MEM/CACHED/WBWA
  0x7f002000-0x7f004000       8K KERNEL      RW NX SHD MEM/CACHED/WBWA
  ---[ Kernel Mapping ]---
  0x80000000-0x80100000       1M KERNEL      RW NX SHD
  0x80100000-0x80800000       7M KERNEL      ro x  SHD
  0x80800000-0x80b00000       3M KERNEL      ro NX SHD
  0x80b00000-0xa0000000     501M KERNEL      RW NX SHD
  ...
  ---[ Vectors ]---
  0xffff0000-0xffff1000       4K VECTORS USR ro x  SHD MEM/CACHED/WBWA
  0xffff1000-0xffff2000       4K VECTORS     ro x  SHD MEM/CACHED/WBWA

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/dump.c | 54 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
index 21192d6eda40..35ff45470dbf 100644
--- a/arch/arm/mm/dump.c
+++ b/arch/arm/mm/dump.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 
+#include <asm/domain.h>
 #include <asm/fixmap.h>
 #include <asm/memory.h>
 #include <asm/pgtable.h>
@@ -43,6 +44,7 @@ struct pg_state {
 	unsigned long start_address;
 	unsigned level;
 	u64 current_prot;
+	const char *current_domain;
 };
 
 struct prot_bits {
@@ -216,7 +218,8 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t
 	}
 }
 
-static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u64 val)
+static void note_page(struct pg_state *st, unsigned long addr,
+		      unsigned int level, u64 val, const char *domain)
 {
 	static const char units[] = "KMGTPE";
 	u64 prot = val & pg_level[level].mask;
@@ -224,8 +227,10 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u
 	if (!st->level) {
 		st->level = level;
 		st->current_prot = prot;
+		st->current_domain = domain;
 		seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
 	} else if (prot != st->current_prot || level != st->level ||
+		   domain != st->current_domain ||
 		   addr >= st->marker[1].start_address) {
 		const char *unit = units;
 		unsigned long delta;
@@ -240,6 +245,8 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u
 				unit++;
 			}
 			seq_printf(st->seq, "%9lu%c", delta, *unit);
+			if (st->current_domain)
+				seq_printf(st->seq, " %s", st->current_domain);
 			if (pg_level[st->level].bits)
 				dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num);
 			seq_printf(st->seq, "\n");
@@ -251,11 +258,13 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u
 		}
 		st->start_address = addr;
 		st->current_prot = prot;
+		st->current_domain = domain;
 		st->level = level;
 	}
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start,
+		     const char *domain)
 {
 	pte_t *pte = pte_offset_kernel(pmd, 0);
 	unsigned long addr;
@@ -263,25 +272,50 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 
 	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
 		addr = start + i * PAGE_SIZE;
-		note_page(st, addr, 4, pte_val(*pte));
+		note_page(st, addr, 4, pte_val(*pte), domain);
 	}
 }
 
+static const char *get_domain_name(pmd_t *pmd)
+{
+#ifndef CONFIG_ARM_LPAE
+	switch (pmd_val(*pmd) & PMD_DOMAIN_MASK) {
+	case PMD_DOMAIN(DOMAIN_KERNEL):
+		return "KERNEL ";
+	case PMD_DOMAIN(DOMAIN_USER):
+		return "USER   ";
+	case PMD_DOMAIN(DOMAIN_IO):
+		return "IO     ";
+	case PMD_DOMAIN(DOMAIN_VECTORS):
+		return "VECTORS";
+	default:
+		return "unknown";
+	}
+#endif
+	return NULL;
+}
+
 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 {
 	pmd_t *pmd = pmd_offset(pud, 0);
 	unsigned long addr;
 	unsigned i;
+	const char *domain;
 
 	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
 		addr = start + i * PMD_SIZE;
+		domain = get_domain_name(pmd);
 		if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd))
-			note_page(st, addr, 3, pmd_val(*pmd));
+			note_page(st, addr, 3, pmd_val(*pmd), domain);
 		else
-			walk_pte(st, pmd, addr);
+			walk_pte(st, pmd, addr, domain);
 
-		if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1]))
-			note_page(st, addr + SECTION_SIZE, 3, pmd_val(pmd[1]));
+		if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) {
+			addr += SECTION_SIZE;
+			pmd++;
+			domain = get_domain_name(pmd);
+			note_page(st, addr, 3, pmd_val(*pmd), domain);
+		}
 	}
 }
 
@@ -296,7 +330,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 		if (!pud_none(*pud)) {
 			walk_pmd(st, pud, addr);
 		} else {
-			note_page(st, addr, 2, pud_val(*pud));
+			note_page(st, addr, 2, pud_val(*pud), NULL);
 		}
 	}
 }
@@ -317,11 +351,11 @@ static void walk_pgd(struct seq_file *m)
 		if (!pgd_none(*pgd)) {
 			walk_pud(&st, pgd, addr);
 		} else {
-			note_page(&st, addr, 1, pgd_val(*pgd));
+			note_page(&st, addr, 1, pgd_val(*pgd), NULL);
 		}
 	}
 
-	note_page(&st, 0, 0, 0);
+	note_page(&st, 0, 0, 0, NULL);
 }
 
 static int ptdump_show(struct seq_file *m, void *v)
-- 
cgit v1.2.3


From ea2d9a96b6abe57d72d39a7af24e0120f5df0a8c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 19 Mar 2017 17:23:31 +0100
Subject: ARM: 8663/1: wire up HWCAP/HWCAP2 feature bits to the CPU modalias

Wire up the generic support for exposing CPU feature bits via the
modalias in /sys/device/system/cpu. This allows udev to automatically
load modules for things like crypto algorithms that are implemented
using optional instructions.

Since it is non-trivial to transparantly support both HWCAP and HWCAP2
capabilities in the cpu_feature() macro (which allows a module's hwcap
dependency and init routine to be declared using a single invocation of
module_cpu_feature_match()), support only HWCAP2 for now, which covers
the capabilities that are most likely to be useful in this manner.
Module dependencies on HWCAP will need to be declared explicitly via a
MODULE_DEVICE_TABLE(cpu, ...) declaration.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/Kconfig                  |  1 +
 arch/arm/include/asm/cpufeature.h | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 arch/arm/include/asm/cpufeature.h

(limited to 'arch')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 0d4e71b42c77..64b43a84a1eb 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -27,6 +27,7 @@ config ARM
 	select GENERIC_ALLOCATOR
 	select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI)
 	select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IRQ_PROBE
diff --git a/arch/arm/include/asm/cpufeature.h b/arch/arm/include/asm/cpufeature.h
new file mode 100644
index 000000000000..6d425191d01d
--- /dev/null
+++ b/arch/arm/include/asm/cpufeature.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_CPUFEATURE_H
+#define __ASM_CPUFEATURE_H
+
+#include <linux/log2.h>
+#include <asm/hwcap.h>
+
+/*
+ * Due to the fact that ELF_HWCAP is a 32-bit type on ARM, and given the number
+ * of optional CPU features it defines, ARM's CPU hardware capability bits have
+ * been distributed over separate elf_hwcap and elf_hwcap2 variables, each of
+ * which covers a subset of the available CPU features.
+ *
+ * Currently, only a few of those are suitable for automatic module loading
+ * (which is the primary use case of this facility) and those happen to be all
+ * covered by HWCAP2. So let's only cover those via the cpu_feature()
+ * convenience macro for now (which is used by module_cpu_feature_match()).
+ * However, all capabilities are exposed via the modalias, and can be matched
+ * using an explicit MODULE_DEVICE_TABLE() that uses __hwcap_feature() directly.
+ */
+#define MAX_CPU_FEATURES	64
+#define __hwcap_feature(x)	ilog2(HWCAP_ ## x)
+#define __hwcap2_feature(x)	(32 + ilog2(HWCAP2_ ## x))
+#define cpu_feature(x)		__hwcap2_feature(x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+	return num < 32 ? elf_hwcap & BIT(num) : elf_hwcap2 & BIT(num - 32);
+}
+
+#endif
-- 
cgit v1.2.3


From b089c31c519c3906c14801b6ec483e18a5152a50 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@linaro.org>
Date: Mon, 10 Apr 2017 11:13:59 +0100
Subject: ARM: 8667/3: Fix memory attribute inconsistencies when using fixmap

To cope with the variety in ARM architectures and configurations, the
pagetable attributes for kernel memory are generated at runtime to match
the system the kernel finds itself on. This calculated value is stored
in pgprot_kernel.

However, when early fixmap support was added for ARM (commit
a5f4c561b3b1) the attributes used for mappings were hard coded because
pgprot_kernel is not set up early enough. Unfortunately, when fixmap is
used after early boot this means the memory being mapped can have
different attributes to existing mappings, potentially leading to
unpredictable behaviour. A specific problem also exists due to the hard
coded values not include the 'shareable' attribute which means on
systems where this matters (e.g. those with multiple CPU clusters) the
cache contents for a memory location can become inconsistent between
CPUs.

To resolve these issues we change fixmap to use the same memory
attributes (from pgprot_kernel) that the rest of the kernel uses. To
enable this we need to refactor the initialisation code so
build_mem_type_table() is called early enough. Note, that relies on early
param parsing for memory type overrides passed via the kernel command
line, so we need to make sure this call is still after
parse_early_params().

[ardb: keep early_fixmap_init() before param parsing, for earlycon]

Fixes: a5f4c561b3b1 ("ARM: 8415/1: early fixmap support for earlycon")
Cc: <stable@vger.kernel.org> # v4.3+
Tested-by: afzal mohammed <afzal.mohd.ma@gmail.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/fixmap.h |  2 +-
 arch/arm/kernel/setup.c       |  4 ++--
 arch/arm/mm/mmu.c             | 16 +++++++++++++---
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h
index 5c17d2dec777..8f967d1373f6 100644
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -41,7 +41,7 @@ static const enum fixed_addresses __end_of_fixed_addresses =
 
 #define FIXMAP_PAGE_COMMON	(L_PTE_YOUNG | L_PTE_PRESENT | L_PTE_XN | L_PTE_DIRTY)
 
-#define FIXMAP_PAGE_NORMAL	(FIXMAP_PAGE_COMMON | L_PTE_MT_WRITEBACK)
+#define FIXMAP_PAGE_NORMAL	(pgprot_kernel | L_PTE_XN)
 #define FIXMAP_PAGE_RO		(FIXMAP_PAGE_NORMAL | L_PTE_RDONLY)
 
 /* Used by set_fixmap_(io|nocache), both meant for mapping a device */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f4e54503afa9..32e1a9513dc7 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -80,7 +80,7 @@ __setup("fpe=", fpe_setup);
 
 extern void init_default_cache_policy(unsigned long);
 extern void paging_init(const struct machine_desc *desc);
-extern void early_paging_init(const struct machine_desc *);
+extern void early_mm_init(const struct machine_desc *);
 extern void adjust_lowmem_bounds(void);
 extern enum reboot_mode reboot_mode;
 extern void setup_dma_zone(const struct machine_desc *desc);
@@ -1088,7 +1088,7 @@ void __init setup_arch(char **cmdline_p)
 	parse_early_param();
 
 #ifdef CONFIG_MMU
-	early_paging_init(mdesc);
+	early_mm_init(mdesc);
 #endif
 	setup_dma_zone(mdesc);
 	xen_early_init();
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4e016d7f37b3..347cca965783 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -414,6 +414,11 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
 		     FIXADDR_END);
 	BUG_ON(idx >= __end_of_fixed_addresses);
 
+	/* we only support device mappings until pgprot_kernel has been set */
+	if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) &&
+		    pgprot_val(pgprot_kernel) == 0))
+		return;
+
 	if (pgprot_val(prot))
 		set_pte_at(NULL, vaddr, pte,
 			pfn_pte(phys >> PAGE_SHIFT, prot));
@@ -1492,7 +1497,7 @@ pgtables_remap lpae_pgtables_remap_asm;
  * early_paging_init() recreates boot time page table setup, allowing machines
  * to switch over to a high (>4G) address space on LPAE systems
  */
-void __init early_paging_init(const struct machine_desc *mdesc)
+static void __init early_paging_init(const struct machine_desc *mdesc)
 {
 	pgtables_remap *lpae_pgtables_remap;
 	unsigned long pa_pgd;
@@ -1560,7 +1565,7 @@ void __init early_paging_init(const struct machine_desc *mdesc)
 
 #else
 
-void __init early_paging_init(const struct machine_desc *mdesc)
+static void __init early_paging_init(const struct machine_desc *mdesc)
 {
 	long long offset;
 
@@ -1616,7 +1621,6 @@ void __init paging_init(const struct machine_desc *mdesc)
 {
 	void *zero_page;
 
-	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();
 	memblock_set_current_limit(arm_lowmem_limit);
@@ -1636,3 +1640,9 @@ void __init paging_init(const struct machine_desc *mdesc)
 	empty_zero_page = virt_to_page(zero_page);
 	__flush_dcache_page(NULL, empty_zero_page);
 }
+
+void __init early_mm_init(const struct machine_desc *mdesc)
+{
+	build_mem_type_table();
+	early_paging_init(mdesc);
+}
-- 
cgit v1.2.3


From 6f05d0761af612e04572ba4d65b4c0274a88444f Mon Sep 17 00:00:00 2001
From: Abel Vesa <abelvesa@linux.com>
Date: Mon, 3 Apr 2017 23:58:54 +0100
Subject: ARM: 8668/1: ftrace: Fix dynamic ftrace with DEBUG_RODATA and
 !FRAME_POINTER

The support for dynamic ftrace with CONFIG_DEBUG_RODATA involves
overriding the weak arch_ftrace_update_code() with a variant which makes
the kernel text writable around the patching.

This override was however added under the CONFIG_OLD_MCOUNT ifdef, and
CONFIG_OLD_MCOUNT is only enabled if frame pointers are enabled.

This leads to non-functional dynamic ftrace (ftrace triggers a
WARN_ON()) when CONFIG_DEBUG_RODATA is enabled and CONFIG_FRAME_POINTER
is not.

Move the override out of that ifdef and into the CONFIG_DYNAMIC_FTRACE
ifdef where it belongs.

Fixes: 80d6b0c2eed2a ("ARM: mm: allow text and rodata sections to be read-only")
Suggested-by: Nicolai Stange <nicstange@gmail.com>
Suggested-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Abel Vesa <abelvesa@gmail.com>
Acked-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/ftrace.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 3f1759411d51..414e60ed0257 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -29,11 +29,6 @@
 #endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-#ifdef CONFIG_OLD_MCOUNT
-#define OLD_MCOUNT_ADDR	((unsigned long) mcount)
-#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
-
-#define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
 
 static int __ftrace_modify_code(void *data)
 {
@@ -51,6 +46,12 @@ void arch_ftrace_update_code(int command)
 	stop_machine(__ftrace_modify_code, &command, NULL);
 }
 
+#ifdef CONFIG_OLD_MCOUNT
+#define OLD_MCOUNT_ADDR	((unsigned long) mcount)
+#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
+
+#define	OLD_NOP		0xe1a00000	/* mov r0, r0 */
+
 static unsigned long ftrace_nop_replace(struct dyn_ftrace *rec)
 {
 	return rec->arch.old_mcount ? OLD_NOP : NOP;
-- 
cgit v1.2.3


From 6d80594936914e798b1b54b3bfe4bd68d8418966 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 24 Apr 2017 10:40:48 +0100
Subject: ARM: 8670/1: V7M: Do not corrupt vector table around
 v7m_invalidate_l1 call

We save/restore registers around v7m_invalidate_l1 to address pointed
by r12, which is vector table, so the first eight entries are
overwritten with a garbage. We already have stack setup at that stage,
so use it to save/restore register.

Fixes: 6a8146f420be ("ARM: 8609/1: V7M: Add support for the Cortex-M7 processor")
Cc: <stable@vger.kernel.org>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/proc-v7m.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 8dea61640cc1..50497778c2e5 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -147,10 +147,10 @@ __v7m_setup_cont:
 
 	@ Configure caches (if implemented)
 	teq     r8, #0
-	stmneia	r12, {r0-r6, lr}	@ v7m_invalidate_l1 touches r0-r6
+	stmneia	sp, {r0-r6, lr}		@ v7m_invalidate_l1 touches r0-r6
 	blne	v7m_invalidate_l1
 	teq     r8, #0			@ re-evalutae condition
-	ldmneia	r12, {r0-r6, lr}
+	ldmneia	sp, {r0-r6, lr}
 
 	@ Configure the System Control Register to ensure 8-byte stack alignment
 	@ Note the STKALIGN bit is either RW or RAO.
-- 
cgit v1.2.3


From b70cd406d7fe9976962d621d8c60d324eb47d284 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 24 Apr 2017 10:41:53 +0100
Subject: ARM: 8671/1: V7M: Preserve registers across switch from Thread to
 Handler mode

According to ARMv7 ARM, when exception is taken content of r0-r3, r12
is unknown (see ExceptionTaken() pseudocode). Even though existent
implementations keep these register unchanged, preserve them to be in
line with architecture.

Reported-by: Dobromir Stefanov <dobromir.stefanov@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/proc-v7m.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 8dea61640cc1..11ae6b847ad0 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -135,9 +135,11 @@ __v7m_setup_cont:
 	dsb
 	mov	r6, lr			@ save LR
 	ldr	sp, =init_thread_union + THREAD_START_SP
+	stmia	sp, {r0-r3, r12}
 	cpsie	i
 	svc	#0
 1:	cpsid	i
+	ldmia	sp, {r0-r3, r12}
 	str	r5, [r12, #11 * 4]	@ restore the original SVC vector entry
 	mov	lr, r6			@ restore LR
 
-- 
cgit v1.2.3


From 11ce4b33aedc65198d7bc9669344ebca5ee36a41 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@linaro.org>
Date: Tue, 25 Apr 2017 21:20:52 +0100
Subject: ARM: 8672/1: mm: remove tasklist locking from update_sections_early()

The below backtrace can be observed on -rt kernel with
CONFIG_DEBUG_MODULE_RONX (4.9 kernel CONFIG_DEBUG_RODATA) option enabled:

 BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:993
 in_atomic(): 1, irqs_disabled(): 128, pid: 14, name: migration/0
 1 lock held by migration/0/14:
  #0:  (tasklist_lock){+.+...}, at: [<c01183e8>] update_sections_early+0x24/0xdc
 irq event stamp: 38
 hardirqs last  enabled at (37): [<c08f6f7c>] _raw_spin_unlock_irq+0x24/0x68
 hardirqs last disabled at (38): [<c01fdfe8>] multi_cpu_stop+0xd8/0x138
 softirqs last  enabled at (0): [<c01303ec>] copy_process.part.5+0x238/0x1b64
 softirqs last disabled at (0): [<  (null)>]   (null)
 Preemption disabled at: [<c01fe244>] cpu_stopper_thread+0x80/0x10c
 CPU: 0 PID: 14 Comm: migration/0 Not tainted 4.9.21-rt16-02220-g49e319c #15
 Hardware name: Generic DRA74X (Flattened Device Tree)
 [<c0112014>] (unwind_backtrace) from [<c010d370>] (show_stack+0x10/0x14)
 [<c010d370>] (show_stack) from [<c049beb8>] (dump_stack+0xa8/0xd4)
 [<c049beb8>] (dump_stack) from [<c01631a0>] (___might_sleep+0x1bc/0x2ac)
 [<c01631a0>] (___might_sleep) from [<c08f7244>] (__rt_spin_lock+0x1c/0x30)
 [<c08f7244>] (__rt_spin_lock) from [<c08f77a4>] (rt_read_lock+0x54/0x68)
 [<c08f77a4>] (rt_read_lock) from [<c01183e8>] (update_sections_early+0x24/0xdc)
 [<c01183e8>] (update_sections_early) from [<c01184b0>] (__fix_kernmem_perms+0x10/0x1c)
 [<c01184b0>] (__fix_kernmem_perms) from [<c01fe010>] (multi_cpu_stop+0x100/0x138)
 [<c01fe010>] (multi_cpu_stop) from [<c01fe24c>] (cpu_stopper_thread+0x88/0x10c)
 [<c01fe24c>] (cpu_stopper_thread) from [<c015edc4>] (smpboot_thread_fn+0x174/0x31c)
 [<c015edc4>] (smpboot_thread_fn) from [<c015a988>] (kthread+0xf0/0x108)
 [<c015a988>] (kthread) from [<c0108818>] (ret_from_fork+0x14/0x3c)
 Freeing unused kernel memory: 1024K (c0d00000 - c0e00000)

The stop_machine() is called with cpus = NULL from fix_kernmem_perms() and
mark_rodata_ro() which means only one CPU will execute
update_sections_early() while all other CPUs will spin and wait. Hence,
it's safe to remove tasklist locking from update_sections_early(). As part
of this change also mark functions which are local to this module as
static.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Laura Abbott <labbott@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/init.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 1d8558ff9827..ad80548325fe 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -709,34 +709,37 @@ void set_section_perms(struct section_perm *perms, int n, bool set,
 
 }
 
+/**
+ * update_sections_early intended to be called only through stop_machine
+ * framework and executed by only one CPU while all other CPUs will spin and
+ * wait, so no locking is required in this function.
+ */
 static void update_sections_early(struct section_perm perms[], int n)
 {
 	struct task_struct *t, *s;
 
-	read_lock(&tasklist_lock);
 	for_each_process(t) {
 		if (t->flags & PF_KTHREAD)
 			continue;
 		for_each_thread(t, s)
 			set_section_perms(perms, n, true, s->mm);
 	}
-	read_unlock(&tasklist_lock);
 	set_section_perms(perms, n, true, current->active_mm);
 	set_section_perms(perms, n, true, &init_mm);
 }
 
-int __fix_kernmem_perms(void *unused)
+static int __fix_kernmem_perms(void *unused)
 {
 	update_sections_early(nx_perms, ARRAY_SIZE(nx_perms));
 	return 0;
 }
 
-void fix_kernmem_perms(void)
+static void fix_kernmem_perms(void)
 {
 	stop_machine(__fix_kernmem_perms, NULL, NULL);
 }
 
-int __mark_rodata_ro(void *unused)
+static int __mark_rodata_ro(void *unused)
 {
 	update_sections_early(ro_perms, ARRAY_SIZE(ro_perms));
 	return 0;
-- 
cgit v1.2.3