summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/kgdb.tmpl6
-rw-r--r--Documentation/x86/zero-page.txt2
-rw-r--r--arch/s390/include/asm/topology.h24
-rw-r--r--arch/s390/kernel/cache.c25
-rw-r--r--arch/s390/kernel/early.c12
-rw-r--r--arch/s390/kernel/setup.c1
-rw-r--r--arch/s390/kernel/smp.c54
-rw-r--r--arch/s390/kernel/topology.c134
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S6
-rw-r--r--arch/s390/mm/mmap.c5
-rw-r--r--arch/x86/Kconfig16
-rw-r--r--arch/x86/Kconfig.debug13
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/aslr.c34
-rw-r--r--arch/x86/boot/compressed/efi_stub_64.S25
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S196
-rw-r--r--arch/x86/boot/compressed/misc.c3
-rw-r--r--arch/x86/boot/compressed/misc.h6
-rw-r--r--arch/x86/include/asm/apic.h8
-rw-r--r--arch/x86/include/asm/imr.h60
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/spinlock.h90
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/intel_early.c6
-rw-r--r--arch/x86/kernel/irq.c3
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/kvm.c13
-rw-r--r--arch/x86/kernel/module.c10
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/uprobes.c153
-rw-r--r--arch/x86/mm/init.c28
-rw-r--r--arch/x86/mm/mmap.c6
-rw-r--r--arch/x86/platform/Makefile1
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S161
-rw-r--r--arch/x86/platform/efi/efi_thunk_64.S121
-rw-r--r--arch/x86/platform/intel-quark/Makefile2
-rw-r--r--arch/x86/platform/intel-quark/imr.c661
-rw-r--r--arch/x86/platform/intel-quark/imr_selftest.c129
-rw-r--r--arch/x86/xen/spinlock.c13
-rw-r--r--drivers/char/ipmi/ipmi_devintf.c6
-rw-r--r--drivers/char/ipmi/ipmi_msghandler.c102
-rw-r--r--drivers/char/ipmi/ipmi_si_intf.c121
-rw-r--r--drivers/char/ipmi/ipmi_ssif.c6
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c16
-rw-r--r--drivers/platform/x86/Kconfig25
-rw-r--r--fs/binfmt_elf.c5
-rw-r--r--include/linux/compiler.h6
-rw-r--r--include/linux/kdb.h8
-rw-r--r--include/linux/sched.h10
-rw-r--r--kernel/debug/debug_core.c19
-rw-r--r--kernel/debug/kdb/kdb_io.c46
-rw-r--r--kernel/debug/kdb/kdb_main.c16
-rw-r--r--kernel/debug/kdb/kdb_private.h4
-rw-r--r--kernel/locking/rtmutex.c3
-rw-r--r--kernel/printk/printk.c2
-rw-r--r--kernel/rcu/tree_plugin.h1
-rw-r--r--kernel/sched/auto_group.c6
-rw-r--r--kernel/sched/completion.c19
-rw-r--r--kernel/sched/core.c113
-rw-r--r--kernel/sched/deadline.c33
-rw-r--r--kernel/sched/sched.h76
-rw-r--r--kernel/time/ntp.c10
64 files changed, 1978 insertions, 740 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 2428cc04dbc8..f3abca7ec53d 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -197,6 +197,7 @@
may be configured as a kernel built-in or a kernel loadable module.
You can only make use of <constant>kgdbwait</constant> and early
debugging if you build kgdboc into the kernel as a built-in.
+ </para>
<para>Optionally you can elect to activate kms (Kernel Mode
Setting) integration. When you use kms with kgdboc and you have a
video driver that has atomic mode setting hooks, it is possible to
@@ -206,7 +207,6 @@
crashes or doing analysis of memory with kdb while allowing the
full graphics console applications to run.
</para>
- </para>
<sect2 id="kgdbocArgs">
<title>kgdboc arguments</title>
<para>Usage: <constant>kgdboc=[kms][[,]kbd][[,]serial_device][,baud]</constant></para>
@@ -284,7 +284,6 @@
</listitem>
</orderedlist>
</para>
- </sect3>
<para>NOTE: Kgdboc does not support interrupting the target via the
gdb remote protocol. You must manually send a sysrq-g unless you
have a proxy that splits console output to a terminal program.
@@ -305,6 +304,7 @@
as well as on the initial connect, or to use a debugger proxy that
allows an unmodified gdb to do the debugging.
</para>
+ </sect3>
</sect2>
</sect1>
<sect1 id="kgdbwait">
@@ -350,12 +350,12 @@
</para>
</listitem>
</orderedlist>
+ </para>
<para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an
active system console. An example of incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant>
</para>
<para>It is possible to use this option with kgdboc on a tty that is not a system console.
</para>
- </para>
</sect1>
<sect1 id="kgdbreboot">
<title>Run time parameter: kgdbreboot</title>
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
index 199f453cb4de..82fbdbc1e0b0 100644
--- a/Documentation/x86/zero-page.txt
+++ b/Documentation/x86/zero-page.txt
@@ -3,7 +3,7 @@ protocol of kernel. These should be filled by bootloader or 16-bit
real-mode setup code of the kernel. References/settings to it mainly
are in:
- arch/x86/include/asm/bootparam.h
+ arch/x86/include/uapi/asm/bootparam.h
Offset Proto Name Meaning
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index c4fbb9527c5c..b1453a2ae1ca 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -18,15 +18,15 @@ struct cpu_topology_s390 {
cpumask_t book_mask;
};
-extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
+DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
-#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
-#define topology_thread_id(cpu) (cpu_topology[cpu].thread_id)
-#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_mask)
-#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
-#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask)
-#define topology_book_id(cpu) (cpu_topology[cpu].book_id)
-#define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask)
+#define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
+#define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id)
+#define topology_thread_cpumask(cpu) (&per_cpu(cpu_topology, cpu).thread_mask)
+#define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id)
+#define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask)
+#define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id)
+#define topology_book_cpumask(cpu) (&per_cpu(cpu_topology, cpu).book_mask)
#define mc_capable() 1
@@ -51,14 +51,6 @@ static inline void topology_expect_change(void) { }
#define POLARIZATION_VM (2)
#define POLARIZATION_VH (3)
-#ifdef CONFIG_SCHED_BOOK
-void s390_init_cpu_topology(void);
-#else
-static inline void s390_init_cpu_topology(void)
-{
-};
-#endif
-
#include <asm-generic/topology.h>
#endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
index 632fa06ea162..0969d113b3d6 100644
--- a/arch/s390/kernel/cache.c
+++ b/arch/s390/kernel/cache.c
@@ -91,12 +91,9 @@ static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
{
if (level >= CACHE_MAX_LEVEL)
return CACHE_TYPE_NOCACHE;
-
ci += level;
-
if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
return CACHE_TYPE_NOCACHE;
-
return cache_type_map[ci->type];
}
@@ -111,23 +108,19 @@ static inline unsigned long ecag(int ai, int li, int ti)
}
static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
- enum cache_type type, unsigned int level)
+ enum cache_type type, unsigned int level, int cpu)
{
int ti, num_sets;
- int cpu = smp_processor_id();
if (type == CACHE_TYPE_INST)
ti = CACHE_TI_INSTRUCTION;
else
ti = CACHE_TI_UNIFIED;
-
this_leaf->level = level + 1;
this_leaf->type = type;
this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
- this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY,
- level, ti);
+ this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
-
num_sets = this_leaf->size / this_leaf->coherency_line_size;
num_sets /= this_leaf->ways_of_associativity;
this_leaf->number_of_sets = num_sets;
@@ -145,7 +138,6 @@ int init_cache_level(unsigned int cpu)
if (!this_cpu_ci)
return -EINVAL;
-
ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
do {
ctype = get_cache_type(&ct.ci[0], level);
@@ -154,34 +146,31 @@ int init_cache_level(unsigned int cpu)
/* Separate instruction and data caches */
leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
} while (++level < CACHE_MAX_LEVEL);
-
this_cpu_ci->num_levels = level;
this_cpu_ci->num_leaves = leaves;
-
return 0;
}
int populate_cache_leaves(unsigned int cpu)
{
+ struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+ struct cacheinfo *this_leaf = this_cpu_ci->info_list;
unsigned int level, idx, pvt;
union cache_topology ct;
enum cache_type ctype;
- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- struct cacheinfo *this_leaf = this_cpu_ci->info_list;
ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
idx < this_cpu_ci->num_leaves; idx++, level++) {
if (!this_leaf)
return -EINVAL;
-
pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
ctype = get_cache_type(&ct.ci[0], level);
if (ctype == CACHE_TYPE_SEPARATE) {
- ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level);
- ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level);
+ ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
+ ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
} else {
- ci_leaf_init(this_leaf++, pvt, ctype, level);
+ ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
}
}
return 0;
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 70a329450901..4427ab7ac23a 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -393,17 +393,19 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
if (test_facility(129))
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
- if (test_facility(128))
- S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
#endif
}
-static int __init nocad_setup(char *str)
+static int __init cad_setup(char *str)
{
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_CAD;
+ int val;
+
+ get_option(&str, &val);
+ if (val && test_facility(128))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_CAD;
return 0;
}
-early_param("nocad", nocad_setup);
+early_param("cad", cad_setup);
static int __init cad_init(void)
{
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index bfac77ada4f2..a5ea8bc17cb3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -909,7 +909,6 @@ void __init setup_arch(char **cmdline_p)
setup_lowcore();
smp_fill_possible_mask();
cpu_init();
- s390_init_cpu_topology();
/*
* Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index a668993ff577..db8f1115a3bf 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -59,14 +59,13 @@ enum {
CPU_STATE_CONFIGURED,
};
+static DEFINE_PER_CPU(struct cpu *, cpu_device);
+
struct pcpu {
- struct cpu *cpu;
struct _lowcore *lowcore; /* lowcore page(s) for the cpu */
- unsigned long async_stack; /* async stack for the cpu */
- unsigned long panic_stack; /* panic stack for the cpu */
unsigned long ec_mask; /* bit mask for ec_xxx functions */
- int state; /* physical cpu state */
- int polarization; /* physical polarization */
+ signed char state; /* physical cpu state */
+ signed char polarization; /* physical polarization */
u16 address; /* physical cpu address */
};
@@ -173,25 +172,30 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
pcpu_sigp_retry(pcpu, order, 0);
}
+#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
+
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
+ unsigned long async_stack, panic_stack;
struct _lowcore *lc;
if (pcpu != &pcpu_devices[0]) {
pcpu->lowcore = (struct _lowcore *)
__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
- pcpu->async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
- pcpu->panic_stack = __get_free_page(GFP_KERNEL);
- if (!pcpu->lowcore || !pcpu->panic_stack || !pcpu->async_stack)
+ async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+ panic_stack = __get_free_page(GFP_KERNEL);
+ if (!pcpu->lowcore || !panic_stack || !async_stack)
goto out;
+ } else {
+ async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
+ panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
}
lc = pcpu->lowcore;
memcpy(lc, &S390_lowcore, 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
- lc->async_stack = pcpu->async_stack + ASYNC_SIZE
- - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->panic_stack = pcpu->panic_stack + PAGE_SIZE
- - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
+ lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
+ lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
#ifndef CONFIG_64BIT
@@ -212,8 +216,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
return 0;
out:
if (pcpu != &pcpu_devices[0]) {
- free_page(pcpu->panic_stack);
- free_pages(pcpu->async_stack, ASYNC_ORDER);
+ free_page(panic_stack);
+ free_pages(async_stack, ASYNC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
}
return -ENOMEM;
@@ -235,11 +239,11 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
#else
vdso_free_per_cpu(pcpu->lowcore);
#endif
- if (pcpu != &pcpu_devices[0]) {
- free_page(pcpu->panic_stack);
- free_pages(pcpu->async_stack, ASYNC_ORDER);
- free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
- }
+ if (pcpu == &pcpu_devices[0])
+ return;
+ free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
+ free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
+ free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
}
#endif /* CONFIG_HOTPLUG_CPU */
@@ -366,7 +370,8 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
pcpu_delegate(&pcpu_devices[0], func, data,
- pcpu_devices->panic_stack + PAGE_SIZE);
+ pcpu_devices->lowcore->panic_stack -
+ PANIC_FRAME_OFFSET + PAGE_SIZE);
}
int smp_find_processor_id(u16 address)
@@ -935,10 +940,6 @@ void __init smp_prepare_boot_cpu(void)
pcpu->state = CPU_STATE_CONFIGURED;
pcpu->address = stap();
pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix();
- pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE
- + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
- pcpu->panic_stack = S390_lowcore.panic_stack - PAGE_SIZE
- + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
S390_lowcore.percpu_offset = __per_cpu_offset[0];
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
set_cpu_present(0, true);
@@ -1078,8 +1079,7 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
void *hcpu)
{
unsigned int cpu = (unsigned int)(long)hcpu;
- struct cpu *c = pcpu_devices[cpu].cpu;
- struct device *s = &c->dev;
+ struct device *s = &per_cpu(cpu_device, cpu)->dev;
int err = 0;
switch (action & ~CPU_TASKS_FROZEN) {
@@ -1102,7 +1102,7 @@ static int smp_add_present_cpu(int cpu)
c = kzalloc(sizeof(*c), GFP_KERNEL);
if (!c)
return -ENOMEM;
- pcpu_devices[cpu].cpu = c;
+ per_cpu(cpu_device, cpu) = c;
s = &c->dev;
c->hotpluggable = 1;
rc = register_cpu(c, cpu);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 24ee33f1af24..14da43b801d9 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -7,14 +7,14 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/workqueue.h>
-#include <linux/bootmem.h>
#include <linux/cpuset.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
@@ -42,8 +42,8 @@ static DEFINE_SPINLOCK(topology_lock);
static struct mask_info socket_info;
static struct mask_info book_info;
-struct cpu_topology_s390 cpu_topology[NR_CPUS];
-EXPORT_SYMBOL_GPL(cpu_topology);
+DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
@@ -90,15 +90,15 @@ static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core,
if (lcpu < 0)
continue;
for (i = 0; i <= smp_cpu_mtid; i++) {
- cpu_topology[lcpu + i].book_id = book->id;
- cpu_topology[lcpu + i].core_id = rcore;
- cpu_topology[lcpu + i].thread_id = lcpu + i;
+ per_cpu(cpu_topology, lcpu + i).book_id = book->id;
+ per_cpu(cpu_topology, lcpu + i).core_id = rcore;
+ per_cpu(cpu_topology, lcpu + i).thread_id = lcpu + i;
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
if (one_socket_per_cpu)
- cpu_topology[lcpu + i].socket_id = rcore;
+ per_cpu(cpu_topology, lcpu + i).socket_id = rcore;
else
- cpu_topology[lcpu + i].socket_id = socket->id;
+ per_cpu(cpu_topology, lcpu + i).socket_id = socket->id;
smp_cpu_set_polarization(lcpu + i, tl_core->pp);
}
if (one_socket_per_cpu)
@@ -249,14 +249,14 @@ static void update_cpu_masks(void)
spin_lock_irqsave(&topology_lock, flags);
for_each_possible_cpu(cpu) {
- cpu_topology[cpu].thread_mask = cpu_thread_map(cpu);
- cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu);
- cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu);
+ per_cpu(cpu_topology, cpu).thread_mask = cpu_thread_map(cpu);
+ per_cpu(cpu_topology, cpu).core_mask = cpu_group_map(&socket_info, cpu);
+ per_cpu(cpu_topology, cpu).book_mask = cpu_group_map(&book_info, cpu);
if (!MACHINE_HAS_TOPOLOGY) {
- cpu_topology[cpu].thread_id = cpu;
- cpu_topology[cpu].core_id = cpu;
- cpu_topology[cpu].socket_id = cpu;
- cpu_topology[cpu].book_id = cpu;
+ per_cpu(cpu_topology, cpu).thread_id = cpu;
+ per_cpu(cpu_topology, cpu).core_id = cpu;
+ per_cpu(cpu_topology, cpu).socket_id = cpu;
+ per_cpu(cpu_topology, cpu).book_id = cpu;
}
}
spin_unlock_irqrestore(&topology_lock, flags);
@@ -334,50 +334,6 @@ void topology_expect_change(void)
set_topology_timer();
}
-static int __init early_parse_topology(char *p)
-{
- if (strncmp(p, "off", 3))
- return 0;
- topology_enabled = 0;
- return 0;
-}
-early_param("topology", early_parse_topology);
-
-static void __init alloc_masks(struct sysinfo_15_1_x *info,
- struct mask_info *mask, int offset)
-{
- int i, nr_masks;
-
- nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
- for (i = 0; i < info->mnest - offset; i++)
- nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
- nr_masks = max(nr_masks, 1);
- for (i = 0; i < nr_masks; i++) {
- mask->next = alloc_bootmem_align(
- roundup_pow_of_two(sizeof(struct mask_info)),
- roundup_pow_of_two(sizeof(struct mask_info)));
- mask = mask->next;
- }
-}
-
-void __init s390_init_cpu_topology(void)
-{
- struct sysinfo_15_1_x *info;
- int i;
-
- if (!MACHINE_HAS_TOPOLOGY)
- return;
- tl_info = alloc_bootmem_pages(PAGE_SIZE);
- info = tl_info;
- store_topology(info);
- pr_info("The CPU configuration topology of the machine is:");
- for (i = 0; i < TOPOLOGY_NR_MAG; i++)
- printk(KERN_CONT " %d", info->mag[i]);
- printk(KERN_CONT " / %d\n", info->mnest);
- alloc_masks(info, &socket_info, 1);
- alloc_masks(info, &book_info, 2);
-}
-
static int cpu_management;
static ssize_t dispatching_show(struct device *dev,
@@ -467,20 +423,29 @@ int topology_cpu_init(struct cpu *cpu)
const struct cpumask *cpu_thread_mask(int cpu)
{
- return &cpu_topology[cpu].thread_mask;
+ return &per_cpu(cpu_topology, cpu).thread_mask;
}
const struct cpumask *cpu_coregroup_mask(int cpu)
{
- return &cpu_topology[cpu].core_mask;
+ return &per_cpu(cpu_topology, cpu).core_mask;
}
static const struct cpumask *cpu_book_mask(int cpu)
{
- return &cpu_topology[cpu].book_mask;
+ return &per_cpu(cpu_topology, cpu).book_mask;
}
+static int __init early_parse_topology(char *p)
+{
+ if (strncmp(p, "off", 3))
+ return 0;
+ topology_enabled = 0;
+ return 0;
+}
+early_param("topology", early_parse_topology);
+
static struct sched_domain_topology_level s390_topology[] = {
{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
@@ -489,6 +454,42 @@ static struct sched_domain_topology_level s390_topology[] = {
{ NULL, },
};
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+ struct mask_info *mask, int offset)
+{
+ int i, nr_masks;
+
+ nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+ for (i = 0; i < info->mnest - offset; i++)
+ nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+ nr_masks = max(nr_masks, 1);
+ for (i = 0; i < nr_masks; i++) {
+ mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL);
+ mask = mask->next;
+ }
+}
+
+static int __init s390_topology_init(void)
+{
+ struct sysinfo_15_1_x *info;
+ int i;
+
+ if (!MACHINE_HAS_TOPOLOGY)
+ return 0;
+ tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL);
+ info = tl_info;
+ store_topology(info);
+ pr_info("The CPU configuration topology of the machine is:");
+ for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+ printk(KERN_CONT " %d", info->mag[i]);
+ printk(KERN_CONT " / %d\n", info->mnest);
+ alloc_masks(info, &socket_info, 1);
+ alloc_masks(info, &book_info, 2);
+ set_sched_topology(s390_topology);
+ return 0;
+}
+early_initcall(s390_topology_init);
+
static int __init topology_init(void)
{
if (MACHINE_HAS_TOPOLOGY)
@@ -498,10 +499,3 @@ static int __init topology_init(void)
return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
}
device_initcall(topology_init);
-
-static int __init early_topology_init(void)
-{
- set_sched_topology(s390_topology);
- return 0;
-}
-early_initcall(early_topology_init);
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 7699e735ae28..61541fb93dc6 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -25,9 +25,7 @@ __kernel_clock_gettime:
je 4f
cghi %r2,__CLOCK_REALTIME
je 5f
- cghi %r2,__CLOCK_THREAD_CPUTIME_ID
- je 9f
- cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
+ cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
je 9f
cghi %r2,__CLOCK_MONOTONIC_COARSE
je 3f
@@ -106,7 +104,7 @@ __kernel_clock_gettime:
aghi %r15,16
br %r14
- /* CLOCK_THREAD_CPUTIME_ID for this thread */
+ /* CPUCLOCK_VIRT for this thread */
9: icm %r0,15,__VDSO_ECTG_OK(%r5)
jz 12f
ear %r2,%a4
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index d008f638b2cd..179a2c20b01f 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -183,7 +183,10 @@ unsigned long randomize_et_dyn(void)
{
unsigned long base;
- base = (STACK_TOP / 3 * 2) & (~mmap_align_mask << PAGE_SHIFT);
+ base = STACK_TOP / 3 * 2;
+ if (!is_32bit_task())
+ /* Align to 4GB */
+ base &= ~((1UL << 32) - 1);
return base + mmap_rnd();
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index eb1cf898ed3c..c2fb8a87dccb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -488,6 +488,22 @@ config X86_INTEL_MID
Intel MID platforms are based on an Intel processor and chipset which
consume less power than most of the x86 derivatives.
+config X86_INTEL_QUARK
+ bool "Intel Quark platform support"
+ depends on X86_32
+ depends on X86_EXTENDED_PLATFORM
+ depends on X86_PLATFORM_DEVICES
+ depends on X86_TSC
+ depends on PCI
+ depends on PCI_GOANY
+ depends on X86_IO_APIC
+ select IOSF_MBI
+ select INTEL_IMR
+ ---help---
+ Select to include support for Quark X1000 SoC.
+ Say Y here if you have a Quark based system such as the Arduino
+ compatible Intel Galileo.
+
config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
depends on ACPI
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 61bd2ad94281..20028da8ae18 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -313,6 +313,19 @@ config DEBUG_NMI_SELFTEST
If unsure, say N.
+config DEBUG_IMR_SELFTEST
+ bool "Isolated Memory Region self test"
+ default n
+ depends on INTEL_IMR
+ ---help---
+ This option enables automated sanity testing of the IMR code.
+ Some simple tests are run to verify IMR bounds checking, alignment
+ and overlapping. This option is really only useful if you are
+ debugging an IMR memory map or are modifying the IMR code and want to
+ test your changes.
+
+ If unsure say N here.
+
config X86_DEBUG_STATIC_CPU_HAS
bool "Debug alternatives"
depends on DEBUG_KERNEL
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 843feb3eb20b..0a291cdfaf77 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -51,6 +51,7 @@ $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \
$(objtree)/drivers/firmware/efi/libstub/lib.a
+vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
$(obj)/vmlinux: $(vmlinux-objs-y) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index bb1376381985..7083c16cccba 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -14,6 +14,13 @@
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+struct kaslr_setup_data {
+ __u64 next;
+ __u32 type;
+ __u32 len;
+ __u8 data[1];
+} kaslr_setup_data;
+
#define I8254_PORT_CONTROL 0x43
#define I8254_PORT_COUNTER0 0x40
#define I8254_CMD_READBACK 0xC0
@@ -295,7 +302,29 @@ static unsigned long find_random_addr(unsigned long minimum,
return slots_fetch_random();
}
-unsigned char *choose_kernel_location(unsigned char *input,
+static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled)
+{
+ struct setup_data *data;
+
+ kaslr_setup_data.type = SETUP_KASLR;
+ kaslr_setup_data.len = 1;
+ kaslr_setup_data.next = 0;
+ kaslr_setup_data.data[0] = enabled;
+
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+
+ if (data)
+ data->next = (unsigned long)&kaslr_setup_data;
+ else
+ params->hdr.setup_data = (unsigned long)&kaslr_setup_data;
+
+}
+
+unsigned char *choose_kernel_location(struct boot_params *params,
+ unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size)
@@ -306,14 +335,17 @@ unsigned char *choose_kernel_location(unsigned char *input,
#ifdef CONFIG_HIBERNATION
if (!cmdline_find_option_bool("kaslr")) {
debug_putstr("KASLR disabled by default...\n");
+ add_kaslr_setup_data(params, 0);
goto out;
}
#else
if (cmdline_find_option_bool("nokaslr")) {
debug_putstr("KASLR disabled by cmdline...\n");
+ add_kaslr_setup_data(params, 0);
goto out;
}
#endif
+ add_kaslr_setup_data(params, 1);
/* Record the various known unsafe memory ranges. */
mem_avoid_init((unsigned long)input, input_size,
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S
index 7ff3632806b1..99494dff2113 100644
--- a/arch/x86/boot/compressed/efi_stub_64.S
+++ b/arch/x86/boot/compressed/efi_stub_64.S
@@ -3,28 +3,3 @@
#include <asm/processor-flags.h>
#include "../../platform/efi/efi_stub_64.S"
-
-#ifdef CONFIG_EFI_MIXED
- .code64
- .text
-ENTRY(efi64_thunk)
- push %rbp
- push %rbx
-
- subq $16, %rsp
- leaq efi_exit32(%rip), %rax
- movl %eax, 8(%rsp)
- leaq efi_gdt64(%rip), %rax
- movl %eax, 4(%rsp)
- movl %eax, 2(%rax) /* Fixup the gdt base address */
- leaq efi32_boot_gdt(%rip), %rax
- movl %eax, (%rsp)
-
- call __efi64_thunk
-
- addq $16, %rsp
- pop %rbx
- pop %rbp
- ret
-ENDPROC(efi64_thunk)
-#endif /* CONFIG_EFI_MIXED */
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
new file mode 100644
index 000000000000..630384a4c14a
--- /dev/null
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2014, 2015 Intel Corporation; author Matt Fleming
+ *
+ * Early support for invoking 32-bit EFI services from a 64-bit kernel.
+ *
+ * Because this thunking occurs before ExitBootServices() we have to
+ * restore the firmware's 32-bit GDT before we make EFI serivce calls,
+ * since the firmware's 32-bit IDT is still currently installed and it
+ * needs to be able to service interrupts.
+ *
+ * On the plus side, we don't have to worry about mangling 64-bit
+ * addresses into 32-bits because we're executing with an identify
+ * mapped pagetable and haven't transitioned to 64-bit virtual addresses
+ * yet.
+ */
+
+#include <linux/linkage.h>
+#include <asm/msr.h>
+#include <asm/page_types.h>
+#include <asm/processor-flags.h>
+#include <asm/segment.h>
+
+ .code64
+ .text
+ENTRY(efi64_thunk)
+ push %rbp
+ push %rbx
+
+ subq $8, %rsp
+ leaq efi_exit32(%rip), %rax
+ movl %eax, 4(%rsp)
+ leaq efi_gdt64(%rip), %rax
+ movl %eax, (%rsp)
+ movl %eax, 2(%rax) /* Fixup the gdt base address */
+
+ movl %ds, %eax
+ push %rax
+ movl %es, %eax
+ push %rax
+ movl %ss, %eax
+ push %rax
+
+ /*
+ * Convert x86-64 ABI params to i386 ABI
+ */
+ subq $32, %rsp
+ movl %esi, 0x0(%rsp)
+ movl %edx, 0x4(%rsp)
+ movl %ecx, 0x8(%rsp)
+ movq %r8, %rsi
+ movl %esi, 0xc(%rsp)
+ movq %r9, %rsi
+ movl %esi, 0x10(%rsp)
+
+ sgdt save_gdt(%rip)
+
+ leaq 1f(%rip), %rbx
+ movq %rbx, func_rt_ptr(%rip)
+
+ /*
+ * Switch to gdt with 32-bit segments. This is the firmware GDT
+ * that was installed when the kernel started executing. This
+ * pointer was saved at the EFI stub entry point in head_64.S.
+ */
+ leaq efi32_boot_gdt(%rip), %rax
+ lgdt (%rax)
+
+ pushq $__KERNEL_CS
+ leaq efi_enter32(%rip), %rax
+ pushq %rax
+ lretq
+
+1: addq $32, %rsp
+
+ lgdt save_gdt(%rip)
+
+ pop %rbx
+ movl %ebx, %ss
+ pop %rbx
+ movl %ebx, %es
+ pop %rbx
+ movl %ebx, %ds
+
+ /*
+ * Convert 32-bit status code into 64-bit.
+ */
+ test %rax, %rax
+ jz 1f
+ movl %eax, %ecx
+ andl $0x0fffffff, %ecx
+ andl $0xf0000000, %eax
+ shl $32, %rax
+ or %rcx, %rax
+1:
+ addq $8, %rsp
+ pop %rbx
+ pop %rbp
+ ret
+ENDPROC(efi64_thunk)
+
+ENTRY(efi_exit32)
+ movq func_rt_ptr(%rip), %rax
+ push %rax
+ mov %rdi, %rax
+ ret
+ENDPROC(efi_exit32)
+
+ .code32
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+ENTRY(efi_enter32)
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+ /* Reload pgtables */
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ /* Disable paging */
+ movl %cr0, %eax
+ btrl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+
+ /* Disable long mode via EFER */
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btrl $_EFER_LME, %eax
+ wrmsr
+
+ call *%edi
+
+ /* We must preserve return value */
+ movl %eax, %edi
+
+ /*
+ * Some firmware will return with interrupts enabled. Be sure to
+ * disable them before we switch GDTs.
+ */
+ cli
+
+ movl 56(%esp), %eax
+ movl %eax, 2(%eax)
+ lgdtl (%eax)
+
+ movl %cr4, %eax
+ btsl $(X86_CR4_PAE_BIT), %eax
+ movl %eax, %cr4
+
+ movl %cr3, %eax
+ movl %eax, %cr3
+
+ movl $MSR_EFER, %ecx
+ rdmsr
+ btsl $_EFER_LME, %eax
+ wrmsr
+
+ xorl %eax, %eax
+ lldt %ax
+
+ movl 60(%esp), %eax
+ pushl $__KERNEL_CS
+ pushl %eax
+
+ /* Enable paging */
+ movl %cr0, %eax
+ btsl $X86_CR0_PG_BIT, %eax
+ movl %eax, %cr0
+ lret
+ENDPROC(efi_enter32)
+
+ .data
+ .balign 8
+ .global efi32_boot_gdt
+efi32_boot_gdt: .word 0
+ .quad 0
+
+save_gdt: .word 0
+ .quad 0
+func_rt_ptr: .quad 0
+
+ .global efi_gdt64
+efi_gdt64:
+ .word efi_gdt64_end - efi_gdt64
+ .long 0 /* Filled out by user */
+ .word 0
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+ .quad 0x0080890000000000 /* TS descriptor */
+ .quad 0x0000000000000000 /* TS continued */
+efi_gdt64_end:
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index a950864a64da..5903089c818f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -401,7 +401,8 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
* the entire decompressed kernel plus relocation table, or the
* entire decompressed kernel plus .bss and .brk sections.
*/
- output = choose_kernel_location(input_data, input_len, output,
+ output = choose_kernel_location(real_mode, input_data, input_len,
+ output,
output_len > run_size ? output_len
: run_size);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 04477d68403f..ee3576b2666b 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option);
#if CONFIG_RANDOMIZE_BASE
/* aslr.c */
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *params,
+ unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size);
@@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
bool has_cpuflag(int flag);
#else
static inline
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *params,
+ unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 92003f3c8a42..efc3b22d896e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -213,7 +213,15 @@ void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
extern void setup_secondary_APIC_clock(void);
extern int APIC_init_uniprocessor(void);
+
+#ifdef CONFIG_X86_64
+static inline int apic_force_enable(unsigned long addr)
+{
+ return -1;
+}
+#else
extern int apic_force_enable(unsigned long addr);
+#endif
extern int apic_bsp_setup(bool upmode);
extern void apic_ap_setup(void);
diff --git a/arch/x86/include/asm/imr.h b/arch/x86/include/asm/imr.h
new file mode 100644
index 000000000000..cd2ce4068441
--- /dev/null
+++ b/arch/x86/include/asm/imr.h
@@ -0,0 +1,60 @@
+/*
+ * imr.h: Isolated Memory Region API
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#ifndef _IMR_H
+#define _IMR_H
+
+#include <linux/types.h>
+
+/*
+ * IMR agent access mask bits
+ * See section 12.7.4.7 from quark-x1000-datasheet.pdf for register
+ * definitions.
+ */
+#define IMR_ESRAM_FLUSH BIT(31)
+#define IMR_CPU_SNOOP BIT(30) /* Applicable only to write */
+#define IMR_RMU BIT(29)
+#define IMR_VC1_SAI_ID3 BIT(15)
+#define IMR_VC1_SAI_ID2 BIT(14)
+#define IMR_VC1_SAI_ID1 BIT(13)
+#define IMR_VC1_SAI_ID0 BIT(12)
+#define IMR_VC0_SAI_ID3 BIT(11)
+#define IMR_VC0_SAI_ID2 BIT(10)
+#define IMR_VC0_SAI_ID1 BIT(9)
+#define IMR_VC0_SAI_ID0 BIT(8)
+#define IMR_CPU_0 BIT(1) /* SMM mode */
+#define IMR_CPU BIT(0) /* Non SMM mode */
+#define IMR_ACCESS_NONE 0
+
+/*
+ * Read/Write access-all bits here include some reserved bits
+ * These are the values firmware uses and are accepted by hardware.
+ * The kernel defines read/write access-all in the same way as firmware
+ * in order to have a consistent and crisp definition across firmware,
+ * bootloader and kernel.
+ */
+#define IMR_READ_ACCESS_ALL 0xBFFFFFFF
+#define IMR_WRITE_ACCESS_ALL 0xFFFFFFFF
+
+/* Number of IMRs provided by Quark X1000 SoC */
+#define QUARK_X1000_IMR_MAX 0x08
+#define QUARK_X1000_IMR_REGBASE 0x40
+
+/* IMR alignment bits - only bits 31:10 are checked for IMR validity */
+#define IMR_ALIGN 0x400
+#define IMR_MASK (IMR_ALIGN - 1)
+
+int imr_add_range(phys_addr_t base, size_t size,
+ unsigned int rmask, unsigned int wmask, bool lock);
+
+int imr_remove_range(phys_addr_t base, size_t size);
+
+#endif /* _IMR_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index f97fbe3abb67..95e11f79f123 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,6 +51,8 @@ extern int devmem_is_allowed(unsigned long pagenr);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
+extern bool kaslr_enabled;
+
static inline phys_addr_t get_max_mapped(void)
{
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 7050d864f520..cf87de3fc390 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
{
- set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
+ set_bit(0, (volatile unsigned long *)&lock->tickets.head);
}
#else /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
}
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
+static inline int __tickets_equal(__ticket_t one, __ticket_t two)
+{
+ return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
+}
+
+static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
+ __ticket_t head)
+{
+ if (head & TICKET_SLOWPATH_FLAG) {
+ arch_spinlock_t old, new;
+
+ old.tickets.head = head;
+ new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
+ old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
+ new.tickets.tail = old.tickets.tail;
+
+ /* try to clear slowpath flag when there are no contenders */
+ cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
+ }
+}
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
- return lock.tickets.head == lock.tickets.tail;
+ return __tickets_equal(lock.tickets.head, lock.tickets.tail);
}
/*
@@ -87,18 +107,21 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
if (likely(inc.head == inc.tail))
goto out;
- inc.tail &= ~TICKET_SLOWPATH_FLAG;
for (;;) {
unsigned count = SPIN_THRESHOLD;
do {
- if (READ_ONCE(lock->tickets.head) == inc.tail)
- goto out;
+ inc.head = READ_ONCE(lock->tickets.head);
+ if (__tickets_equal(inc.head, inc.tail))
+ goto clear_slowpath;
cpu_relax();
} while (--count);
__ticket_lock_spinning(lock, inc.tail);
}
-out: barrier(); /* make sure nothing creeps before the lock is taken */
+clear_slowpath:
+ __ticket_check_and_clear_slowpath(lock, inc.head);
+out:
+ barrier(); /* make sure nothing creeps before the lock is taken */
}
static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -106,56 +129,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
arch_spinlock_t old, new;
old.tickets = READ_ONCE(lock->tickets);
- if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
+ if (!__tickets_equal(old.tickets.head, old.tickets.tail))
return 0;
new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
+ new.head_tail &= ~TICKET_SLOWPATH_FLAG;
/* cmpxchg is a full barrier, so nothing can move before it */
return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
}
-static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
- arch_spinlock_t old)
-{
- arch_spinlock_t new;
-
- BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
-
- /* Perform the unlock on the "before" copy */
- old.tickets.head += TICKET_LOCK_INC;
-
- /* Clear the slowpath flag */
- new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
-
- /*
- * If the lock is uncontended, clear the flag - use cmpxchg in
- * case it changes behind our back though.
- */
- if (new.tickets.head != new.tickets.tail ||
- cmpxchg(&lock->head_tail, old.head_tail,
- new.head_tail) != old.head_tail) {
- /*
- * Lock still has someone queued for it, so wake up an
- * appropriate waiter.
- */
- __ticket_unlock_kick(lock, old.tickets.head);
- }
-}
-
static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
{
if (TICKET_SLOWPATH_FLAG &&
- static_key_false(&paravirt_ticketlocks_enabled)) {
- arch_spinlock_t prev;
+ static_key_false(&paravirt_ticketlocks_enabled)) {
+ __ticket_t head;
- prev = *lock;
- add_smp(&lock->tickets.head, TICKET_LOCK_INC);
+ BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
- /* add_smp() is a full mb() */
+ head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
- if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
- __ticket_unlock_slowpath(lock, prev);
+ if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
+ head &= ~TICKET_SLOWPATH_FLAG;
+ __ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
+ }
} else
__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
}
@@ -164,14 +161,15 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
- return tmp.tail != tmp.head;
+ return !__tickets_equal(tmp.tail, tmp.head);
}
static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = READ_ONCE(lock->tickets);
- return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
+ tmp.head &= ~TICKET_SLOWPATH_FLAG;
+ return (tmp.tail - tmp.head) > TICKET_LOCK_INC;
}
#define arch_spin_is_contended arch_spin_is_contended
@@ -191,8 +189,8 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
* We need to check "unlocked" in a loop, tmp.head == head
* can be false positive because of overflow.
*/
- if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) ||
- tmp.head != head)
+ if (__tickets_equal(tmp.head, tmp.tail) ||
+ !__tickets_equal(tmp.head, head))
break;
cpu_relax();
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 225b0988043a..44e6dd7e36a2 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,6 +7,7 @@
#define SETUP_DTB 2
#define SETUP_PCI 3
#define SETUP_EFI 4
+#define SETUP_KASLR 5
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ae97ed0873c6..3d525c6124f6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -613,6 +613,11 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp)
{
int rc, irq, trigger, polarity;
+ if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
+ *irqp = gsi;
+ return 0;
+ }
+
rc = acpi_get_override_irq(gsi, &trigger, &polarity);
if (rc == 0) {
trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE;
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index c6826d1e8082..746e7fd08aad 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -196,6 +196,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
struct microcode_header_intel mc_header;
unsigned int mc_size;
+ if (leftover < sizeof(mc_header)) {
+ pr_err("error! Truncated header in microcode data file\n");
+ break;
+ }
+
if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
break;
diff --git a/arch/x86/kernel/cpu/microcode/intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c
index ec9df6f9cd47..420eb933189c 100644
--- a/arch/x86/kernel/cpu/microcode/intel_early.c
+++ b/arch/x86/kernel/cpu/microcode/intel_early.c
@@ -321,7 +321,11 @@ get_matching_model_microcode(int cpu, unsigned long start,
unsigned int mc_saved_count = mc_saved_data->mc_saved_count;
int i;
- while (leftover) {
+ while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) {
+
+ if (leftover < sizeof(mc_header))
+ break;
+
mc_header = (struct microcode_header_intel *)ucode_ptr;
mc_size = get_totalsize(mc_header);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 705ef8d48e2d..67b1cbe0093a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -302,6 +302,9 @@ int check_irq_vectors_for_cpu_disable(void)
irq = __this_cpu_read(vector_irq[vector]);
if (irq >= 0) {
desc = irq_to_desc(irq);
+ if (!desc)
+ continue;
+
data = irq_desc_get_irq_data(desc);
cpumask_copy(&affinity_new, data->affinity);
cpu_clear(this_cpu, affinity_new);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 98f654d466e5..6a1146ea4d4d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -84,7 +84,7 @@ static volatile u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
- W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */
+ W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1) , /* 10 */
W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */
W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 94f643484300..e354cc6446ab 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -609,7 +609,7 @@ static inline void check_zero(void)
u8 ret;
u8 old;
- old = ACCESS_ONCE(zero_stats);
+ old = READ_ONCE(zero_stats);
if (unlikely(old)) {
ret = cmpxchg(&zero_stats, old, 0);
/* This ensures only one fellow resets the stat */
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
int cpu;
u64 start;
unsigned long flags;
+ __ticket_t head;
if (in_nmi())
return;
@@ -768,11 +769,15 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
*/
__ticket_enter_slowpath(lock);
+ /* make sure enter_slowpath, which is atomic does not cross the read */
+ smp_mb__after_atomic();
+
/*
* check again make sure it didn't become free while
* we weren't looking.
*/
- if (ACCESS_ONCE(lock->tickets.head) == want) {
+ head = READ_ONCE(lock->tickets.head);
+ if (__tickets_equal(head, want)) {
add_stats(TAKEN_SLOW_PICKUP, 1);
goto out;
}
@@ -803,8 +808,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
add_stats(RELEASED_SLOW, 1);
for_each_cpu(cpu, &waiting_cpus) {
const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
- if (ACCESS_ONCE(w->lock) == lock &&
- ACCESS_ONCE(w->want) == ticket) {
+ if (READ_ONCE(w->lock) == lock &&
+ READ_ONCE(w->want) == ticket) {
add_stats(RELEASED_SLOW_KICKED, 1);
kvm_kick_cpu(cpu);
break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index d1ac80b72c72..9bbb9b35c144 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -47,21 +47,13 @@ do { \
#ifdef CONFIG_RANDOMIZE_BASE
static unsigned long module_load_offset;
-static int randomize_modules = 1;
/* Mutex protects the module_load_offset. */
static DEFINE_MUTEX(module_kaslr_mutex);
-static int __init parse_nokaslr(char *p)
-{
- randomize_modules = 0;
- return 0;
-}
-early_param("nokaslr", parse_nokaslr);
-
static unsigned long int get_module_load_offset(void)
{
- if (randomize_modules) {
+ if (kaslr_enabled) {
mutex_lock(&module_kaslr_mutex);
/*
* Calculate the module_load_offset the first time this
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0a2421cca01f..98dc9317286e 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -122,6 +122,8 @@
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
+bool __read_mostly kaslr_enabled = false;
+
#ifdef CONFIG_DMI
RESERVE_BRK(dmi_alloc, 65536);
#endif
@@ -425,6 +427,11 @@ static void __init reserve_initrd(void)
}
#endif /* CONFIG_BLK_DEV_INITRD */
+static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
+{
+ kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
+}
+
static void __init parse_setup_data(void)
{
struct setup_data *data;
@@ -450,6 +457,9 @@ static void __init parse_setup_data(void)
case SETUP_EFI:
parse_efi_setup(pa_data, data_len);
break;
+ case SETUP_KASLR:
+ parse_kaslr_setup(pa_data, data_len);
+ break;
default:
break;
}
@@ -832,10 +842,14 @@ static void __init trim_low_memory_range(void)
static int
dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
{
- pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
- "(relocation range: 0x%lx-0x%lx)\n",
- (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
- __START_KERNEL_map, MODULES_VADDR-1);
+ if (kaslr_enabled)
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
+ (unsigned long)&_text - __START_KERNEL,
+ __START_KERNEL,
+ __START_KERNEL_map,
+ MODULES_VADDR-1);
+ else
+ pr_emerg("Kernel Offset: disabled\n");
return 0;
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8b96a947021f..81f8adb0679e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -66,27 +66,54 @@
* Good-instruction tables for 32-bit apps. This is non-const and volatile
* to keep gcc from statically optimizing it out, as variable_test_bit makes
* some versions of gcc to think only *(unsigned long*) is used.
+ *
+ * Opcodes we'll probably never support:
+ * 6c-6f - ins,outs. SEGVs if used in userspace
+ * e4-e7 - in,out imm. SEGVs if used in userspace
+ * ec-ef - in,out acc. SEGVs if used in userspace
+ * cc - int3. SIGTRAP if used in userspace
+ * ce - into. Not used in userspace - no kernel support to make it useful. SEGVs
+ * (why we support bound (62) then? it's similar, and similarly unused...)
+ * f1 - int1. SIGTRAP if used in userspace
+ * f4 - hlt. SEGVs if used in userspace
+ * fa - cli. SEGVs if used in userspace
+ * fb - sti. SEGVs if used in userspace
+ *
+ * Opcodes which need some work to be supported:
+ * 07,17,1f - pop es/ss/ds
+ * Normally not used in userspace, but would execute if used.
+ * Can cause GP or stack exception if tries to load wrong segment descriptor.
+ * We hesitate to run them under single step since kernel's handling
+ * of userspace single-stepping (TF flag) is fragile.
+ * We can easily refuse to support push es/cs/ss/ds (06/0e/16/1e)
+ * on the same grounds that they are never used.
+ * cd - int N.
+ * Used by userspace for "int 80" syscall entry. (Other "int N"
+ * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
+ * Not supported since kernel's handling of userspace single-stepping
+ * (TF flag) is fragile.
+ * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
*/
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
static volatile u32 good_insns_32[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
+ W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 00 */
W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
- W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
- W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
+ W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+ W(0x30, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
- W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+ W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
- W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+ W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
- W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
+ W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
/* ---------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
@@ -94,27 +121,61 @@ static volatile u32 good_insns_32[256 / 32] = {
#define good_insns_32 NULL
#endif
-/* Good-instruction tables for 64-bit apps */
+/* Good-instruction tables for 64-bit apps.
+ *
+ * Genuinely invalid opcodes:
+ * 06,07 - formerly push/pop es
+ * 0e - formerly push cs
+ * 16,17 - formerly push/pop ss
+ * 1e,1f - formerly push/pop ds
+ * 27,2f,37,3f - formerly daa/das/aaa/aas
+ * 60,61 - formerly pusha/popa
+ * 62 - formerly bound. EVEX prefix for AVX512 (not yet supported)
+ * 82 - formerly redundant encoding of Group1
+ * 9a - formerly call seg:ofs
+ * ce - formerly into
+ * d4,d5 - formerly aam/aad
+ * d6 - formerly undocumented salc
+ * ea - formerly jmp seg:ofs
+ *
+ * Opcodes we'll probably never support:
+ * 6c-6f - ins,outs. SEGVs if used in userspace
+ * e4-e7 - in,out imm. SEGVs if used in userspace
+ * ec-ef - in,out acc. SEGVs if used in userspace
+ * cc - int3. SIGTRAP if used in userspace
+ * f1 - int1. SIGTRAP if used in userspace
+ * f4 - hlt. SEGVs if used in userspace
+ * fa - cli. SEGVs if used in userspace
+ * fb - sti. SEGVs if used in userspace
+ *
+ * Opcodes which need some work to be supported:
+ * cd - int N.
+ * Used by userspace for "int 80" syscall entry. (Other "int N"
+ * cause GP -> SEGV since their IDT gates don't allow calls from CPL 3).
+ * Not supported since kernel's handling of userspace single-stepping
+ * (TF flag) is fragile.
+ * cf - iret. Normally not used in userspace. Doesn't SEGV unless arguments are bad
+ */
#if defined(CONFIG_X86_64)
static volatile u32 good_insns_64[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
- W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
+ W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* 00 */
W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
- W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
- W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
- W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+ W(0x20, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 20 */
+ W(0x30, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 30 */
+ W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
- W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+ W(0x60, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
- W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+ W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1) , /* 90 */
W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
- W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
+ W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
- W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
- W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
+ W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0) | /* e0 */
+ W(0xf0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
/* ---------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
@@ -122,49 +183,55 @@ static volatile u32 good_insns_64[256 / 32] = {
#define good_insns_64 NULL
#endif
-/* Using this for both 64-bit and 32-bit apps */
+/* Using this for both 64-bit and 32-bit apps.
+ * Opcodes we don't support:
+ * 0f 00 - SLDT/STR/LLDT/LTR/VERR/VERW/-/- group. System insns
+ * 0f 01 - SGDT/SIDT/LGDT/LIDT/SMSW/-/LMSW/INVLPG group.
+ * Also encodes tons of other system insns if mod=11.
+ * Some are in fact non-system: xend, xtest, rdtscp, maybe more
+ * 0f 05 - syscall
+ * 0f 06 - clts (CPL0 insn)
+ * 0f 07 - sysret
+ * 0f 08 - invd (CPL0 insn)
+ * 0f 09 - wbinvd (CPL0 insn)
+ * 0f 0b - ud2
+ * 0f 30 - wrmsr (CPL0 insn) (then why rdmsr is allowed, it's also CPL0 insn?)
+ * 0f 34 - sysenter
+ * 0f 35 - sysexit
+ * 0f 37 - getsec
+ * 0f 78 - vmread (Intel VMX. CPL0 insn)
+ * 0f 79 - vmwrite (Intel VMX. CPL0 insn)
+ * Note: with prefixes, these two opcodes are
+ * extrq/insertq/AVX512 convert vector ops.
+ * 0f ae - group15: [f]xsave,[f]xrstor,[v]{ld,st}mxcsr,clflush[opt],
+ * {rd,wr}{fs,gs}base,{s,l,m}fence.
+ * Why? They are all user-executable.
+ */
static volatile u32 good_2byte_insns[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* ---------------------------------------------- */
- W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
- W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
- W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
- W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+ W(0x00, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1) | /* 00 */
+ W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
+ W(0x20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+ W(0x30, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* 30 */
W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
- W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+ W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* 70 */
W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
- W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
- W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+ W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
+ W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
- W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+ W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
- W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
+ W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) /* f0 */
/* ---------------------------------------------- */
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
};
#undef W
/*
- * opcodes we'll probably never support:
- *
- * 6c-6d, e4-e5, ec-ed - in
- * 6e-6f, e6-e7, ee-ef - out
- * cc, cd - int3, int
- * cf - iret
- * d6 - illegal instruction
- * f1 - int1/icebp
- * f4 - hlt
- * fa, fb - cli, sti
- * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
- *
- * invalid opcodes in 64-bit mode:
- *
- * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
- * 63 - we support this opcode in x86_64 but not in i386.
- *
* opcodes we may need to refine support for:
*
* 0f - 2-byte instructions: For many of these instructions, the validity
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 553c094b9cd7..a110efca6d06 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -238,6 +238,31 @@ static void __init_refok adjust_range_page_size_mask(struct map_range *mr,
}
}
+static const char *page_size_string(struct map_range *mr)
+{
+ static const char str_1g[] = "1G";
+ static const char str_2m[] = "2M";
+ static const char str_4m[] = "4M";
+ static const char str_4k[] = "4k";
+
+ if (mr->page_size_mask & (1<<PG_LEVEL_1G))
+ return str_1g;
+ /*
+ * 32-bit without PAE has a 4M large page size.
+ * PG_LEVEL_2M is misnamed, but we can at least
+ * print out the right size in the string.
+ */
+ if (IS_ENABLED(CONFIG_X86_32) &&
+ !IS_ENABLED(CONFIG_X86_PAE) &&
+ mr->page_size_mask & (1<<PG_LEVEL_2M))
+ return str_4m;
+
+ if (mr->page_size_mask & (1<<PG_LEVEL_2M))
+ return str_2m;
+
+ return str_4k;
+}
+
static int __meminit split_mem_range(struct map_range *mr, int nr_range,
unsigned long start,
unsigned long end)
@@ -333,8 +358,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range,
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
mr[i].start, mr[i].end - 1,
- (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
- (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+ page_size_string(&mr[i]));
return nr_range;
}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 919b91205cd4..df4552bd239e 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -35,12 +35,12 @@ struct va_alignment __read_mostly va_align = {
.flags = -1,
};
-static unsigned int stack_maxrandom_size(void)
+static unsigned long stack_maxrandom_size(void)
{
- unsigned int max = 0;
+ unsigned long max = 0;
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT;
+ max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
}
return max;
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 85afde1fa3e5..a62e0be3a2f1 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -5,6 +5,7 @@ obj-y += geode/
obj-y += goldfish/
obj-y += iris/
obj-y += intel-mid/
+obj-y += intel-quark/
obj-y += olpc/
obj-y += scx200/
obj-y += sfi/
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 5fcda7272550..86d0f9e08dd9 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -91,167 +91,6 @@ ENTRY(efi_call)
ret
ENDPROC(efi_call)
-#ifdef CONFIG_EFI_MIXED
-
-/*
- * We run this function from the 1:1 mapping.
- *
- * This function must be invoked with a 1:1 mapped stack.
- */
-ENTRY(__efi64_thunk)
- movl %ds, %eax
- push %rax
- movl %es, %eax
- push %rax
- movl %ss, %eax
- push %rax
-
- subq $32, %rsp
- movl %esi, 0x0(%rsp)
- movl %edx, 0x4(%rsp)
- movl %ecx, 0x8(%rsp)
- movq %r8, %rsi
- movl %esi, 0xc(%rsp)
- movq %r9, %rsi
- movl %esi, 0x10(%rsp)
-
- sgdt save_gdt(%rip)
-
- leaq 1f(%rip), %rbx
- movq %rbx, func_rt_ptr(%rip)
-
- /* Switch to gdt with 32-bit segments */
- movl 64(%rsp), %eax
- lgdt (%rax)
-
- leaq efi_enter32(%rip), %rax
- pushq $__KERNEL_CS
- pushq %rax
- lretq
-
-1: addq $32, %rsp
-
- lgdt save_gdt(%rip)
-
- pop %rbx
- movl %ebx, %ss
- pop %rbx
- movl %ebx, %es
- pop %rbx
- movl %ebx, %ds
-
- /*
- * Convert 32-bit status code into 64-bit.
- */
- test %rax, %rax
- jz 1f
- movl %eax, %ecx
- andl $0x0fffffff, %ecx
- andl $0xf0000000, %eax
- shl $32, %rax
- or %rcx, %rax
-1:
- ret
-ENDPROC(__efi64_thunk)
-
-ENTRY(efi_exit32)
- movq func_rt_ptr(%rip), %rax
- push %rax
- mov %rdi, %rax
- ret
-ENDPROC(efi_exit32)
-
- .code32
-/*
- * EFI service pointer must be in %edi.
- *
- * The stack should represent the 32-bit calling convention.
- */
-ENTRY(efi_enter32)
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %es
- movl %eax, %ss
-
- /* Reload pgtables */
- movl %cr3, %eax
- movl %eax, %cr3
-
- /* Disable paging */
- movl %cr0, %eax
- btrl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
-
- /* Disable long mode via EFER */
- movl $MSR_EFER, %ecx
- rdmsr
- btrl $_EFER_LME, %eax
- wrmsr
-
- call *%edi
-
- /* We must preserve return value */
- movl %eax, %edi
-
- /*
- * Some firmware will return with interrupts enabled. Be sure to
- * disable them before we switch GDTs.
- */
- cli
-
- movl 68(%esp), %eax
- movl %eax, 2(%eax)
- lgdtl (%eax)
-
- movl %cr4, %eax
- btsl $(X86_CR4_PAE_BIT), %eax
- movl %eax, %cr4
-
- movl %cr3, %eax
- movl %eax, %cr3
-
- movl $MSR_EFER, %ecx
- rdmsr
- btsl $_EFER_LME, %eax
- wrmsr
-
- xorl %eax, %eax
- lldt %ax
-
- movl 72(%esp), %eax
- pushl $__KERNEL_CS
- pushl %eax
-
- /* Enable paging */
- movl %cr0, %eax
- btsl $X86_CR0_PG_BIT, %eax
- movl %eax, %cr0
- lret
-ENDPROC(efi_enter32)
-
- .data
- .balign 8
- .global efi32_boot_gdt
-efi32_boot_gdt: .word 0
- .quad 0
-
-save_gdt: .word 0
- .quad 0
-func_rt_ptr: .quad 0
-
- .global efi_gdt64
-efi_gdt64:
- .word efi_gdt64_end - efi_gdt64
- .long 0 /* Filled out by user */
- .word 0
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
- .quad 0x00cf92000000ffff /* __KERNEL_DS */
- .quad 0x0080890000000000 /* TS descriptor */
- .quad 0x0000000000000000 /* TS continued */
-efi_gdt64_end:
-#endif /* CONFIG_EFI_MIXED */
-
.data
ENTRY(efi_scratch)
.fill 3,8,0
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 8806fa73e6e6..ff85d28c50f2 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -1,9 +1,26 @@
/*
* Copyright (C) 2014 Intel Corporation; author Matt Fleming
+ *
+ * Support for invoking 32-bit EFI runtime services from a 64-bit
+ * kernel.
+ *
+ * The below thunking functions are only used after ExitBootServices()
+ * has been called. This simplifies things considerably as compared with
+ * the early EFI thunking because we can leave all the kernel state
+ * intact (GDT, IDT, etc) and simply invoke the the 32-bit EFI runtime
+ * services from __KERNEL32_CS. This means we can continue to service
+ * interrupts across an EFI mixed mode call.
+ *
+ * We do however, need to handle the fact that we're running in a full
+ * 64-bit virtual address space. Things like the stack and instruction
+ * addresses need to be accessible by the 32-bit firmware, so we rely on
+ * using the identity mappings in the EFI page table to access the stack
+ * and kernel text (see efi_setup_page_tables()).
*/
#include <linux/linkage.h>
#include <asm/page_types.h>
+#include <asm/segment.h>
.text
.code64
@@ -33,14 +50,6 @@ ENTRY(efi64_thunk)
leaq efi_exit32(%rip), %rbx
subq %rax, %rbx
movl %ebx, 8(%rsp)
- leaq efi_gdt64(%rip), %rbx
- subq %rax, %rbx
- movl %ebx, 2(%ebx)
- movl %ebx, 4(%rsp)
- leaq efi_gdt32(%rip), %rbx
- subq %rax, %rbx
- movl %ebx, 2(%ebx)
- movl %ebx, (%rsp)
leaq __efi64_thunk(%rip), %rbx
subq %rax, %rbx
@@ -52,14 +61,92 @@ ENTRY(efi64_thunk)
retq
ENDPROC(efi64_thunk)
- .data
-efi_gdt32:
- .word efi_gdt32_end - efi_gdt32
- .long 0 /* Filled out above */
- .word 0
- .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x00cf9a000000ffff /* __KERNEL_CS */
- .quad 0x00cf93000000ffff /* __KERNEL_DS */
-efi_gdt32_end:
+/*
+ * We run this function from the 1:1 mapping.
+ *
+ * This function must be invoked with a 1:1 mapped stack.
+ */
+ENTRY(__efi64_thunk)
+ movl %ds, %eax
+ push %rax
+ movl %es, %eax
+ push %rax
+ movl %ss, %eax
+ push %rax
+
+ subq $32, %rsp
+ movl %esi, 0x0(%rsp)
+ movl %edx, 0x4(%rsp)
+ movl %ecx, 0x8(%rsp)
+ movq %r8, %rsi
+ movl %esi, 0xc(%rsp)
+ movq %r9, %rsi
+ movl %esi, 0x10(%rsp)
+
+ leaq 1f(%rip), %rbx
+ movq %rbx, func_rt_ptr(%rip)
+
+ /* Switch to 32-bit descriptor */
+ pushq $__KERNEL32_CS
+ leaq efi_enter32(%rip), %rax
+ pushq %rax
+ lretq
+
+1: addq $32, %rsp
+
+ pop %rbx
+ movl %ebx, %ss
+ pop %rbx
+ movl %ebx, %es
+ pop %rbx
+ movl %ebx, %ds
+ /*
+ * Convert 32-bit status code into 64-bit.
+ */
+ test %rax, %rax
+ jz 1f
+ movl %eax, %ecx
+ andl $0x0fffffff, %ecx
+ andl $0xf0000000, %eax
+ shl $32, %rax
+ or %rcx, %rax
+1:
+ ret
+ENDPROC(__efi64_thunk)
+
+ENTRY(efi_exit32)
+ movq func_rt_ptr(%rip), %rax
+ push %rax
+ mov %rdi, %rax
+ ret
+ENDPROC(efi_exit32)
+
+ .code32
+/*
+ * EFI service pointer must be in %edi.
+ *
+ * The stack should represent the 32-bit calling convention.
+ */
+ENTRY(efi_enter32)
+ movl $__KERNEL_DS, %eax
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+
+ call *%edi
+
+ /* We must preserve return value */
+ movl %eax, %edi
+
+ movl 72(%esp), %eax
+ pushl $__KERNEL_CS
+ pushl %eax
+
+ lret
+ENDPROC(efi_enter32)
+
+ .data
+ .balign 8
+func_rt_ptr: .quad 0
efi_saved_sp: .quad 0
diff --git a/arch/x86/platform/intel-quark/Makefile b/arch/x86/platform/intel-quark/Makefile
new file mode 100644
index 000000000000..9cc57ed36022
--- /dev/null
+++ b/arch/x86/platform/intel-quark/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IMR) += imr.o
+obj-$(CONFIG_DEBUG_IMR_SELFTEST) += imr_selftest.o
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
new file mode 100644
index 000000000000..0ee619f9fcb7
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -0,0 +1,661 @@
+/**
+ * imr.c
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * IMR registers define an isolated region of memory that can
+ * be masked to prohibit certain system agents from accessing memory.
+ * When a device behind a masked port performs an access - snooped or
+ * not, an IMR may optionally prevent that transaction from changing
+ * the state of memory or from getting correct data in response to the
+ * operation.
+ *
+ * Write data will be dropped and reads will return 0xFFFFFFFF, the
+ * system will reset and system BIOS will print out an error message to
+ * inform the user that an IMR has been violated.
+ *
+ * This code is based on the Linux MTRR code and reference code from
+ * Intel's Quark BSP EFI, Linux and grub code.
+ *
+ * See quark-x1000-datasheet.pdf for register definitions.
+ * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/quark-x1000-datasheet.pdf
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <asm-generic/sections.h>
+#include <asm/cpu_device_id.h>
+#include <asm/imr.h>
+#include <asm/iosf_mbi.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+struct imr_device {
+ struct dentry *file;
+ bool init;
+ struct mutex lock;
+ int max_imr;
+ int reg_base;
+};
+
+static struct imr_device imr_dev;
+
+/*
+ * IMR read/write mask control registers.
+ * See quark-x1000-datasheet.pdf sections 12.7.4.5 and 12.7.4.6 for
+ * bit definitions.
+ *
+ * addr_hi
+ * 31 Lock bit
+ * 30:24 Reserved
+ * 23:2 1 KiB aligned lo address
+ * 1:0 Reserved
+ *
+ * addr_hi
+ * 31:24 Reserved
+ * 23:2 1 KiB aligned hi address
+ * 1:0 Reserved
+ */
+#define IMR_LOCK BIT(31)
+
+struct imr_regs {
+ u32 addr_lo;
+ u32 addr_hi;
+ u32 rmask;
+ u32 wmask;
+};
+
+#define IMR_NUM_REGS (sizeof(struct imr_regs)/sizeof(u32))
+#define IMR_SHIFT 8
+#define imr_to_phys(x) ((x) << IMR_SHIFT)
+#define phys_to_imr(x) ((x) >> IMR_SHIFT)
+
+/**
+ * imr_is_enabled - true if an IMR is enabled false otherwise.
+ *
+ * Determines if an IMR is enabled based on address range and read/write
+ * mask. An IMR set with an address range set to zero and a read/write
+ * access mask set to all is considered to be disabled. An IMR in any
+ * other state - for example set to zero but without read/write access
+ * all is considered to be enabled. This definition of disabled is how
+ * firmware switches off an IMR and is maintained in kernel for
+ * consistency.
+ *
+ * @imr: pointer to IMR descriptor.
+ * @return: true if IMR enabled false if disabled.
+ */
+static inline int imr_is_enabled(struct imr_regs *imr)
+{
+ return !(imr->rmask == IMR_READ_ACCESS_ALL &&
+ imr->wmask == IMR_WRITE_ACCESS_ALL &&
+ imr_to_phys(imr->addr_lo) == 0 &&
+ imr_to_phys(imr->addr_hi) == 0);
+}
+
+/**
+ * imr_read - read an IMR at a given index.
+ *
+ * Requires caller to hold imr mutex.
+ *
+ * @idev: pointer to imr_device structure.
+ * @imr_id: IMR entry to read.
+ * @imr: IMR structure representing address and access masks.
+ * @return: 0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_read(struct imr_device *idev, u32 imr_id, struct imr_regs *imr)
+{
+ u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
+ int ret;
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+ reg++, &imr->addr_lo);
+ if (ret)
+ return ret;
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+ reg++, &imr->addr_hi);
+ if (ret)
+ return ret;
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+ reg++, &imr->rmask);
+ if (ret)
+ return ret;
+
+ return iosf_mbi_read(QRK_MBI_UNIT_MM, QRK_MBI_MM_READ,
+ reg++, &imr->wmask);
+}
+
+/**
+ * imr_write - write an IMR at a given index.
+ *
+ * Requires caller to hold imr mutex.
+ * Note lock bits need to be written independently of address bits.
+ *
+ * @idev: pointer to imr_device structure.
+ * @imr_id: IMR entry to write.
+ * @imr: IMR structure representing address and access masks.
+ * @lock: indicates if the IMR lock bit should be applied.
+ * @return: 0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_write(struct imr_device *idev, u32 imr_id,
+ struct imr_regs *imr, bool lock)
+{
+ unsigned long flags;
+ u32 reg = imr_id * IMR_NUM_REGS + idev->reg_base;
+ int ret;
+
+ local_irq_save(flags);
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE, reg++,
+ imr->addr_lo);
+ if (ret)
+ goto failed;
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+ reg++, imr->addr_hi);
+ if (ret)
+ goto failed;
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+ reg++, imr->rmask);
+ if (ret)
+ goto failed;
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+ reg++, imr->wmask);
+ if (ret)
+ goto failed;
+
+ /* Lock bit must be set separately to addr_lo address bits. */
+ if (lock) {
+ imr->addr_lo |= IMR_LOCK;
+ ret = iosf_mbi_write(QRK_MBI_UNIT_MM, QRK_MBI_MM_WRITE,
+ reg - IMR_NUM_REGS, imr->addr_lo);
+ if (ret)
+ goto failed;
+ }
+
+ local_irq_restore(flags);
+ return 0;
+failed:
+ /*
+ * If writing to the IOSF failed then we're in an unknown state,
+ * likely a very bad state. An IMR in an invalid state will almost
+ * certainly lead to a memory access violation.
+ */
+ local_irq_restore(flags);
+ WARN(ret, "IOSF-MBI write fail range 0x%08x-0x%08x unreliable\n",
+ imr_to_phys(imr->addr_lo), imr_to_phys(imr->addr_hi) + IMR_MASK);
+
+ return ret;
+}
+
+/**
+ * imr_dbgfs_state_show - print state of IMR registers.
+ *
+ * @s: pointer to seq_file for output.
+ * @unused: unused parameter.
+ * @return: 0 on success or error code passed from mbi_iosf on failure.
+ */
+static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
+{
+ phys_addr_t base;
+ phys_addr_t end;
+ int i;
+ struct imr_device *idev = s->private;
+ struct imr_regs imr;
+ size_t size;
+ int ret = -ENODEV;
+
+ mutex_lock(&idev->lock);
+
+ for (i = 0; i < idev->max_imr; i++) {
+
+ ret = imr_read(idev, i, &imr);
+ if (ret)
+ break;
+
+ /*
+ * Remember to add IMR_ALIGN bytes to size to indicate the
+ * inherent IMR_ALIGN size bytes contained in the masked away
+ * lower ten bits.
+ */
+ if (imr_is_enabled(&imr)) {
+ base = imr_to_phys(imr.addr_lo);
+ end = imr_to_phys(imr.addr_hi) + IMR_MASK;
+ } else {
+ base = 0;
+ end = 0;
+ }
+ size = end - base;
+ seq_printf(s, "imr%02i: base=%pa, end=%pa, size=0x%08zx "
+ "rmask=0x%08x, wmask=0x%08x, %s, %s\n", i,
+ &base, &end, size, imr.rmask, imr.wmask,
+ imr_is_enabled(&imr) ? "enabled " : "disabled",
+ imr.addr_lo & IMR_LOCK ? "locked" : "unlocked");
+ }
+
+ mutex_unlock(&idev->lock);
+ return ret;
+}
+
+/**
+ * imr_state_open - debugfs open callback.
+ *
+ * @inode: pointer to struct inode.
+ * @file: pointer to struct file.
+ * @return: result of single open.
+ */
+static int imr_state_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, imr_dbgfs_state_show, inode->i_private);
+}
+
+static const struct file_operations imr_state_ops = {
+ .open = imr_state_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+/**
+ * imr_debugfs_register - register debugfs hooks.
+ *
+ * @idev: pointer to imr_device structure.
+ * @return: 0 on success - errno on failure.
+ */
+static int imr_debugfs_register(struct imr_device *idev)
+{
+ idev->file = debugfs_create_file("imr_state", S_IFREG | S_IRUGO, NULL,
+ idev, &imr_state_ops);
+ return PTR_ERR_OR_ZERO(idev->file);
+}
+
+/**
+ * imr_debugfs_unregister - unregister debugfs hooks.
+ *
+ * @idev: pointer to imr_device structure.
+ * @return:
+ */
+static void imr_debugfs_unregister(struct imr_device *idev)
+{
+ debugfs_remove(idev->file);
+}
+
+/**
+ * imr_check_params - check passed address range IMR alignment and non-zero size
+ *
+ * @base: base address of intended IMR.
+ * @size: size of intended IMR.
+ * @return: zero on valid range -EINVAL on unaligned base/size.
+ */
+static int imr_check_params(phys_addr_t base, size_t size)
+{
+ if ((base & IMR_MASK) || (size & IMR_MASK)) {
+ pr_err("base %pa size 0x%08zx must align to 1KiB\n",
+ &base, size);
+ return -EINVAL;
+ }
+ if (size == 0)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * imr_raw_size - account for the IMR_ALIGN bytes that addr_hi appends.
+ *
+ * IMR addr_hi has a built in offset of plus IMR_ALIGN (0x400) bytes from the
+ * value in the register. We need to subtract IMR_ALIGN bytes from input sizes
+ * as a result.
+ *
+ * @size: input size bytes.
+ * @return: reduced size.
+ */
+static inline size_t imr_raw_size(size_t size)
+{
+ return size - IMR_ALIGN;
+}
+
+/**
+ * imr_address_overlap - detects an address overlap.
+ *
+ * @addr: address to check against an existing IMR.
+ * @imr: imr being checked.
+ * @return: true for overlap false for no overlap.
+ */
+static inline int imr_address_overlap(phys_addr_t addr, struct imr_regs *imr)
+{
+ return addr >= imr_to_phys(imr->addr_lo) && addr <= imr_to_phys(imr->addr_hi);
+}
+
+/**
+ * imr_add_range - add an Isolated Memory Region.
+ *
+ * @base: physical base address of region aligned to 1KiB.
+ * @size: physical size of region in bytes must be aligned to 1KiB.
+ * @read_mask: read access mask.
+ * @write_mask: write access mask.
+ * @lock: indicates whether or not to permanently lock this region.
+ * @return: zero on success or negative value indicating error.
+ */
+int imr_add_range(phys_addr_t base, size_t size,
+ unsigned int rmask, unsigned int wmask, bool lock)
+{
+ phys_addr_t end;
+ unsigned int i;
+ struct imr_device *idev = &imr_dev;
+ struct imr_regs imr;
+ size_t raw_size;
+ int reg;
+ int ret;
+
+ if (WARN_ONCE(idev->init == false, "driver not initialized"))
+ return -ENODEV;
+
+ ret = imr_check_params(base, size);
+ if (ret)
+ return ret;
+
+ /* Tweak the size value. */
+ raw_size = imr_raw_size(size);
+ end = base + raw_size;
+
+ /*
+ * Check for reserved IMR value common to firmware, kernel and grub
+ * indicating a disabled IMR.
+ */
+ imr.addr_lo = phys_to_imr(base);
+ imr.addr_hi = phys_to_imr(end);
+ imr.rmask = rmask;
+ imr.wmask = wmask;
+ if (!imr_is_enabled(&imr))
+ return -ENOTSUPP;
+
+ mutex_lock(&idev->lock);
+
+ /*
+ * Find a free IMR while checking for an existing overlapping range.
+ * Note there's no restriction in silicon to prevent IMR overlaps.
+ * For the sake of simplicity and ease in defining/debugging an IMR
+ * memory map we exclude IMR overlaps.
+ */
+ reg = -1;
+ for (i = 0; i < idev->max_imr; i++) {
+ ret = imr_read(idev, i, &imr);
+ if (ret)
+ goto failed;
+
+ /* Find overlap @ base or end of requested range. */
+ ret = -EINVAL;
+ if (imr_is_enabled(&imr)) {
+ if (imr_address_overlap(base, &imr))
+ goto failed;
+ if (imr_address_overlap(end, &imr))
+ goto failed;
+ } else {
+ reg = i;
+ }
+ }
+
+ /* Error out if we have no free IMR entries. */
+ if (reg == -1) {
+ ret = -ENOMEM;
+ goto failed;
+ }
+
+ pr_debug("add %d phys %pa-%pa size %zx mask 0x%08x wmask 0x%08x\n",
+ reg, &base, &end, raw_size, rmask, wmask);
+
+ /* Enable IMR at specified range and access mask. */
+ imr.addr_lo = phys_to_imr(base);
+ imr.addr_hi = phys_to_imr(end);
+ imr.rmask = rmask;
+ imr.wmask = wmask;
+
+ ret = imr_write(idev, reg, &imr, lock);
+ if (ret < 0) {
+ /*
+ * In the highly unlikely event iosf_mbi_write failed
+ * attempt to rollback the IMR setup skipping the trapping
+ * of further IOSF write failures.
+ */
+ imr.addr_lo = 0;
+ imr.addr_hi = 0;
+ imr.rmask = IMR_READ_ACCESS_ALL;
+ imr.wmask = IMR_WRITE_ACCESS_ALL;
+ imr_write(idev, reg, &imr, false);
+ }
+failed:
+ mutex_unlock(&idev->lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(imr_add_range);
+
+/**
+ * __imr_remove_range - delete an Isolated Memory Region.
+ *
+ * This function allows you to delete an IMR by its index specified by reg or
+ * by address range specified by base and size respectively. If you specify an
+ * index on its own the base and size parameters are ignored.
+ * imr_remove_range(0, base, size); delete IMR at index 0 base/size ignored.
+ * imr_remove_range(-1, base, size); delete IMR from base to base+size.
+ *
+ * @reg: imr index to remove.
+ * @base: physical base address of region aligned to 1 KiB.
+ * @size: physical size of region in bytes aligned to 1 KiB.
+ * @return: -EINVAL on invalid range or out or range id
+ * -ENODEV if reg is valid but no IMR exists or is locked
+ * 0 on success.
+ */
+static int __imr_remove_range(int reg, phys_addr_t base, size_t size)
+{
+ phys_addr_t end;
+ bool found = false;
+ unsigned int i;
+ struct imr_device *idev = &imr_dev;
+ struct imr_regs imr;
+ size_t raw_size;
+ int ret = 0;
+
+ if (WARN_ONCE(idev->init == false, "driver not initialized"))
+ return -ENODEV;
+
+ /*
+ * Validate address range if deleting by address, else we are
+ * deleting by index where base and size will be ignored.
+ */
+ if (reg == -1) {
+ ret = imr_check_params(base, size);
+ if (ret)
+ return ret;
+ }
+
+ /* Tweak the size value. */
+ raw_size = imr_raw_size(size);
+ end = base + raw_size;
+
+ mutex_lock(&idev->lock);
+
+ if (reg >= 0) {
+ /* If a specific IMR is given try to use it. */
+ ret = imr_read(idev, reg, &imr);
+ if (ret)
+ goto failed;
+
+ if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK) {
+ ret = -ENODEV;
+ goto failed;
+ }
+ found = true;
+ } else {
+ /* Search for match based on address range. */
+ for (i = 0; i < idev->max_imr; i++) {
+ ret = imr_read(idev, i, &imr);
+ if (ret)
+ goto failed;
+
+ if (!imr_is_enabled(&imr) || imr.addr_lo & IMR_LOCK)
+ continue;
+
+ if ((imr_to_phys(imr.addr_lo) == base) &&
+ (imr_to_phys(imr.addr_hi) == end)) {
+ found = true;
+ reg = i;
+ break;
+ }
+ }
+ }
+
+ if (!found) {
+ ret = -ENODEV;
+ goto failed;
+ }
+
+ pr_debug("remove %d phys %pa-%pa size %zx\n", reg, &base, &end, raw_size);
+
+ /* Tear down the IMR. */
+ imr.addr_lo = 0;
+ imr.addr_hi = 0;
+ imr.rmask = IMR_READ_ACCESS_ALL;
+ imr.wmask = IMR_WRITE_ACCESS_ALL;
+
+ ret = imr_write(idev, reg, &imr, false);
+
+failed:
+ mutex_unlock(&idev->lock);
+ return ret;
+}
+
+/**
+ * imr_remove_range - delete an Isolated Memory Region by address
+ *
+ * This function allows you to delete an IMR by an address range specified
+ * by base and size respectively.
+ * imr_remove_range(base, size); delete IMR from base to base+size.
+ *
+ * @base: physical base address of region aligned to 1 KiB.
+ * @size: physical size of region in bytes aligned to 1 KiB.
+ * @return: -EINVAL on invalid range or out or range id
+ * -ENODEV if reg is valid but no IMR exists or is locked
+ * 0 on success.
+ */
+int imr_remove_range(phys_addr_t base, size_t size)
+{
+ return __imr_remove_range(-1, base, size);
+}
+EXPORT_SYMBOL_GPL(imr_remove_range);
+
+/**
+ * imr_clear - delete an Isolated Memory Region by index
+ *
+ * This function allows you to delete an IMR by an address range specified
+ * by the index of the IMR. Useful for initial sanitization of the IMR
+ * address map.
+ * imr_ge(base, size); delete IMR from base to base+size.
+ *
+ * @reg: imr index to remove.
+ * @return: -EINVAL on invalid range or out or range id
+ * -ENODEV if reg is valid but no IMR exists or is locked
+ * 0 on success.
+ */
+static inline int imr_clear(int reg)
+{
+ return __imr_remove_range(reg, 0, 0);
+}
+
+/**
+ * imr_fixup_memmap - Tear down IMRs used during bootup.
+ *
+ * BIOS and Grub both setup IMRs around compressed kernel, initrd memory
+ * that need to be removed before the kernel hands out one of the IMR
+ * encased addresses to a downstream DMA agent such as the SD or Ethernet.
+ * IMRs on Galileo are setup to immediately reset the system on violation.
+ * As a result if you're running a root filesystem from SD - you'll need
+ * the boot-time IMRs torn down or you'll find seemingly random resets when
+ * using your filesystem.
+ *
+ * @idev: pointer to imr_device structure.
+ * @return:
+ */
+static void __init imr_fixup_memmap(struct imr_device *idev)
+{
+ phys_addr_t base = virt_to_phys(&_text);
+ size_t size = virt_to_phys(&__end_rodata) - base;
+ int i;
+ int ret;
+
+ /* Tear down all existing unlocked IMRs. */
+ for (i = 0; i < idev->max_imr; i++)
+ imr_clear(i);
+
+ /*
+ * Setup a locked IMR around the physical extent of the kernel
+ * from the beginning of the .text secton to the end of the
+ * .rodata section as one physically contiguous block.
+ */
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
+ if (ret < 0) {
+ pr_err("unable to setup IMR for kernel: (%p - %p)\n",
+ &_text, &__end_rodata);
+ } else {
+ pr_info("protecting kernel .text - .rodata: %zu KiB (%p - %p)\n",
+ size / 1024, &_text, &__end_rodata);
+ }
+
+}
+
+static const struct x86_cpu_id imr_ids[] __initconst = {
+ { X86_VENDOR_INTEL, 5, 9 }, /* Intel Quark SoC X1000. */
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, imr_ids);
+
+/**
+ * imr_init - entry point for IMR driver.
+ *
+ * return: -ENODEV for no IMR support 0 if good to go.
+ */
+static int __init imr_init(void)
+{
+ struct imr_device *idev = &imr_dev;
+ int ret;
+
+ if (!x86_match_cpu(imr_ids) || !iosf_mbi_available())
+ return -ENODEV;
+
+ idev->max_imr = QUARK_X1000_IMR_MAX;
+ idev->reg_base = QUARK_X1000_IMR_REGBASE;
+ idev->init = true;
+
+ mutex_init(&idev->lock);
+ ret = imr_debugfs_register(idev);
+ if (ret != 0)
+ pr_warn("debugfs register failed!\n");
+ imr_fixup_memmap(idev);
+ return 0;
+}
+
+/**
+ * imr_exit - exit point for IMR code.
+ *
+ * Deregisters debugfs, leave IMR state as-is.
+ *
+ * return:
+ */
+static void __exit imr_exit(void)
+{
+ imr_debugfs_unregister(&imr_dev);
+}
+
+module_init(imr_init);
+module_exit(imr_exit);
+
+MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
+MODULE_DESCRIPTION("Intel Isolated Memory Region driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/arch/x86/platform/intel-quark/imr_selftest.c b/arch/x86/platform/intel-quark/imr_selftest.c
new file mode 100644
index 000000000000..c9a0838890e2
--- /dev/null
+++ b/arch/x86/platform/intel-quark/imr_selftest.c
@@ -0,0 +1,129 @@
+/**
+ * imr_selftest.c
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ * Copyright(c) 2015 Bryan O'Donoghue <pure.logic@nexus-software.ie>
+ *
+ * IMR self test. The purpose of this module is to run a set of tests on the
+ * IMR API to validate it's sanity. We check for overlapping, reserved
+ * addresses and setup/teardown sanity.
+ *
+ */
+
+#include <asm-generic/sections.h>
+#include <asm/imr.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#define SELFTEST KBUILD_MODNAME ": "
+/**
+ * imr_self_test_result - Print result string for self test.
+ *
+ * @res: result code - true if test passed false otherwise.
+ * @fmt: format string.
+ * ... variadic argument list.
+ */
+static void __init imr_self_test_result(int res, const char *fmt, ...)
+{
+ va_list vlist;
+
+ /* Print pass/fail. */
+ if (res)
+ pr_info(SELFTEST "pass ");
+ else
+ pr_info(SELFTEST "fail ");
+
+ /* Print variable string. */
+ va_start(vlist, fmt);
+ vprintk(fmt, vlist);
+ va_end(vlist);
+
+ /* Optional warning. */
+ WARN(res == 0, "test failed");
+}
+#undef SELFTEST
+
+/**
+ * imr_self_test
+ *
+ * Verify IMR self_test with some simple tests to verify overlap,
+ * zero sized allocations and 1 KiB sized areas.
+ *
+ */
+static void __init imr_self_test(void)
+{
+ phys_addr_t base = virt_to_phys(&_text);
+ size_t size = virt_to_phys(&__end_rodata) - base;
+ const char *fmt_over = "overlapped IMR @ (0x%08lx - 0x%08lx)\n";
+ int ret;
+
+ /* Test zero zero. */
+ ret = imr_add_range(0, 0, 0, 0, false);
+ imr_self_test_result(ret < 0, "zero sized IMR\n");
+
+ /* Test exact overlap. */
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+ /* Test overlap with base inside of existing. */
+ base += size - IMR_ALIGN;
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+ /* Test overlap with end inside of existing. */
+ base -= size + IMR_ALIGN * 2;
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
+ imr_self_test_result(ret < 0, fmt_over, __va(base), __va(base + size));
+
+ /* Test that a 1 KiB IMR @ zero with read/write all will bomb out. */
+ ret = imr_add_range(0, IMR_ALIGN, IMR_READ_ACCESS_ALL,
+ IMR_WRITE_ACCESS_ALL, false);
+ imr_self_test_result(ret < 0, "1KiB IMR @ 0x00000000 - access-all\n");
+
+ /* Test that a 1 KiB IMR @ zero with CPU only will work. */
+ ret = imr_add_range(0, IMR_ALIGN, IMR_CPU, IMR_CPU, false);
+ imr_self_test_result(ret >= 0, "1KiB IMR @ 0x00000000 - cpu-access\n");
+ if (ret >= 0) {
+ ret = imr_remove_range(0, IMR_ALIGN);
+ imr_self_test_result(ret == 0, "teardown - cpu-access\n");
+ }
+
+ /* Test 2 KiB works. */
+ size = IMR_ALIGN * 2;
+ ret = imr_add_range(0, size, IMR_READ_ACCESS_ALL,
+ IMR_WRITE_ACCESS_ALL, false);
+ imr_self_test_result(ret >= 0, "2KiB IMR @ 0x00000000\n");
+ if (ret >= 0) {
+ ret = imr_remove_range(0, size);
+ imr_self_test_result(ret == 0, "teardown 2KiB\n");
+ }
+}
+
+/**
+ * imr_self_test_init - entry point for IMR driver.
+ *
+ * return: -ENODEV for no IMR support 0 if good to go.
+ */
+static int __init imr_self_test_init(void)
+{
+ imr_self_test();
+ return 0;
+}
+
+/**
+ * imr_self_test_exit - exit point for IMR code.
+ *
+ * return:
+ */
+static void __exit imr_self_test_exit(void)
+{
+}
+
+module_init(imr_self_test_init);
+module_exit(imr_self_test_exit);
+
+MODULE_AUTHOR("Bryan O'Donoghue <pure.logic@nexus-software.ie>");
+MODULE_DESCRIPTION("Intel Isolated Memory Region self-test driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 23b45eb9a89c..956374c1edbc 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -41,7 +41,7 @@ static u8 zero_stats;
static inline void check_zero(void)
{
u8 ret;
- u8 old = ACCESS_ONCE(zero_stats);
+ u8 old = READ_ONCE(zero_stats);
if (unlikely(old)) {
ret = cmpxchg(&zero_stats, old, 0);
/* This ensures only one fellow resets the stat */
@@ -112,6 +112,7 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
int cpu = smp_processor_id();
u64 start;
+ __ticket_t head;
unsigned long flags;
/* If kicker interrupts not initialized yet, just spin */
@@ -159,11 +160,15 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
*/
__ticket_enter_slowpath(lock);
+ /* make sure enter_slowpath, which is atomic does not cross the read */
+ smp_mb__after_atomic();
+
/*
* check again make sure it didn't become free while
* we weren't looking
*/
- if (ACCESS_ONCE(lock->tickets.head) == want) {
+ head = READ_ONCE(lock->tickets.head);
+ if (__tickets_equal(head, want)) {
add_stats(TAKEN_SLOW_PICKUP, 1);
goto out;
}
@@ -204,8 +209,8 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
/* Make sure we read lock before want */
- if (ACCESS_ONCE(w->lock) == lock &&
- ACCESS_ONCE(w->want) == next) {
+ if (READ_ONCE(w->lock) == lock &&
+ READ_ONCE(w->want) == next) {
add_stats(RELEASED_SLOW_KICKED, 1);
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
break;
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index ec318bf434a6..1786574536b2 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -157,12 +157,16 @@ static int ipmi_release(struct inode *inode, struct file *file)
{
struct ipmi_file_private *priv = file->private_data;
int rv;
+ struct ipmi_recv_msg *msg, *next;
rv = ipmi_destroy_user(priv->user);
if (rv)
return rv;
- /* FIXME - free the messages in the list. */
+ list_for_each_entry_safe(msg, next, &priv->recv_msgs, link)
+ ipmi_free_recv_msg(msg);
+
+
kfree(priv);
return 0;
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 6b65fa4e0c55..9bb592872532 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -1483,14 +1483,10 @@ static inline void format_lan_msg(struct ipmi_smi_msg *smi_msg,
smi_msg->msgid = msgid;
}
-static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
- struct ipmi_smi_msg *smi_msg, int priority)
+static struct ipmi_smi_msg *smi_add_send_msg(ipmi_smi_t intf,
+ struct ipmi_smi_msg *smi_msg,
+ int priority)
{
- int run_to_completion = intf->run_to_completion;
- unsigned long flags;
-
- if (!run_to_completion)
- spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
if (intf->curr_msg) {
if (priority > 0)
list_add_tail(&smi_msg->link, &intf->hp_xmit_msgs);
@@ -1500,8 +1496,25 @@ static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
} else {
intf->curr_msg = smi_msg;
}
- if (!run_to_completion)
+
+ return smi_msg;
+}
+
+
+static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
+ struct ipmi_smi_msg *smi_msg, int priority)
+{
+ int run_to_completion = intf->run_to_completion;
+
+ if (run_to_completion) {
+ smi_msg = smi_add_send_msg(intf, smi_msg, priority);
+ } else {
+ unsigned long flags;
+
+ spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+ smi_msg = smi_add_send_msg(intf, smi_msg, priority);
spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+ }
if (smi_msg)
handlers->sender(intf->send_info, smi_msg);
@@ -1985,7 +1998,9 @@ static int smi_ipmb_proc_show(struct seq_file *m, void *v)
seq_printf(m, "%x", intf->channels[0].address);
for (i = 1; i < IPMI_MAX_CHANNELS; i++)
seq_printf(m, " %x", intf->channels[i].address);
- return seq_putc(m, '\n');
+ seq_putc(m, '\n');
+
+ return seq_has_overflowed(m);
}
static int smi_ipmb_proc_open(struct inode *inode, struct file *file)
@@ -2004,9 +2019,11 @@ static int smi_version_proc_show(struct seq_file *m, void *v)
{
ipmi_smi_t intf = m->private;
- return seq_printf(m, "%u.%u\n",
- ipmi_version_major(&intf->bmc->id),
- ipmi_version_minor(&intf->bmc->id));
+ seq_printf(m, "%u.%u\n",
+ ipmi_version_major(&intf->bmc->id),
+ ipmi_version_minor(&intf->bmc->id));
+
+ return seq_has_overflowed(m);
}
static int smi_version_proc_open(struct inode *inode, struct file *file)
@@ -2353,11 +2370,28 @@ static struct attribute *bmc_dev_attrs[] = {
&dev_attr_additional_device_support.attr,
&dev_attr_manufacturer_id.attr,
&dev_attr_product_id.attr,
+ &dev_attr_aux_firmware_revision.attr,
+ &dev_attr_guid.attr,
NULL
};
+static umode_t bmc_dev_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int idx)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct bmc_device *bmc = to_bmc_device(dev);
+ umode_t mode = attr->mode;
+
+ if (attr == &dev_attr_aux_firmware_revision.attr)
+ return bmc->id.aux_firmware_revision_set ? mode : 0;
+ if (attr == &dev_attr_guid.attr)
+ return bmc->guid_set ? mode : 0;
+ return mode;
+}
+
static struct attribute_group bmc_dev_attr_group = {
.attrs = bmc_dev_attrs,
+ .is_visible = bmc_dev_attr_is_visible,
};
static const struct attribute_group *bmc_dev_attr_groups[] = {
@@ -2380,13 +2414,6 @@ cleanup_bmc_device(struct kref *ref)
{
struct bmc_device *bmc = container_of(ref, struct bmc_device, usecount);
- if (bmc->id.aux_firmware_revision_set)
- device_remove_file(&bmc->pdev.dev,
- &dev_attr_aux_firmware_revision);
- if (bmc->guid_set)
- device_remove_file(&bmc->pdev.dev,
- &dev_attr_guid);
-
platform_device_unregister(&bmc->pdev);
}
@@ -2407,33 +2434,6 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf)
mutex_unlock(&ipmidriver_mutex);
}
-static int create_bmc_files(struct bmc_device *bmc)
-{
- int err;
-
- if (bmc->id.aux_firmware_revision_set) {
- err = device_create_file(&bmc->pdev.dev,
- &dev_attr_aux_firmware_revision);
- if (err)
- goto out;
- }
- if (bmc->guid_set) {
- err = device_create_file(&bmc->pdev.dev,
- &dev_attr_guid);
- if (err)
- goto out_aux_firm;
- }
-
- return 0;
-
-out_aux_firm:
- if (bmc->id.aux_firmware_revision_set)
- device_remove_file(&bmc->pdev.dev,
- &dev_attr_aux_firmware_revision);
-out:
- return err;
-}
-
static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
{
int rv;
@@ -2522,15 +2522,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
return rv;
}
- rv = create_bmc_files(bmc);
- if (rv) {
- mutex_lock(&ipmidriver_mutex);
- platform_device_unregister(&bmc->pdev);
- mutex_unlock(&ipmidriver_mutex);
-
- return rv;
- }
-
dev_info(intf->si_dev, "Found new BMC (man_id: 0x%6.6x, "
"prod_id: 0x%4.4x, dev_id: 0x%2.2x)\n",
bmc->id.manufacturer_id,
@@ -4212,7 +4203,6 @@ static void need_waiter(ipmi_smi_t intf)
static atomic_t smi_msg_inuse_count = ATOMIC_INIT(0);
static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0);
-/* FIXME - convert these to slabs. */
static void free_smi_msg(struct ipmi_smi_msg *msg)
{
atomic_dec(&smi_msg_inuse_count);
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 967b73aa4e66..f6646ed3047e 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -321,6 +321,18 @@ static int try_smi_init(struct smi_info *smi);
static void cleanup_one_si(struct smi_info *to_clean);
static void cleanup_ipmi_si(void);
+#ifdef DEBUG_TIMING
+void debug_timestamp(char *msg)
+{
+ struct timespec64 t;
+
+ getnstimeofday64(&t);
+ pr_debug("**%s: %lld.%9.9ld\n", msg, (long long) t.tv_sec, t.tv_nsec);
+}
+#else
+#define debug_timestamp(x)
+#endif
+
static ATOMIC_NOTIFIER_HEAD(xaction_notifier_list);
static int register_xaction_notifier(struct notifier_block *nb)
{
@@ -358,9 +370,6 @@ static void return_hosed_msg(struct smi_info *smi_info, int cCode)
static enum si_sm_result start_next_msg(struct smi_info *smi_info)
{
int rv;
-#ifdef DEBUG_TIMING
- struct timeval t;
-#endif
if (!smi_info->waiting_msg) {
smi_info->curr_msg = NULL;
@@ -370,10 +379,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
smi_info->curr_msg = smi_info->waiting_msg;
smi_info->waiting_msg = NULL;
-#ifdef DEBUG_TIMING
- do_gettimeofday(&t);
- printk(KERN_DEBUG "**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
+ debug_timestamp("Start2");
err = atomic_notifier_call_chain(&xaction_notifier_list,
0, smi_info);
if (err & NOTIFY_STOP_MASK) {
@@ -582,12 +588,8 @@ static void check_bt_irq(struct smi_info *smi_info, bool irq_on)
static void handle_transaction_done(struct smi_info *smi_info)
{
struct ipmi_smi_msg *msg;
-#ifdef DEBUG_TIMING
- struct timeval t;
- do_gettimeofday(&t);
- printk(KERN_DEBUG "**Done: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
+ debug_timestamp("Done");
switch (smi_info->si_state) {
case SI_NORMAL:
if (!smi_info->curr_msg)
@@ -929,24 +931,15 @@ static void sender(void *send_info,
struct smi_info *smi_info = send_info;
enum si_sm_result result;
unsigned long flags;
-#ifdef DEBUG_TIMING
- struct timeval t;
-#endif
-
- BUG_ON(smi_info->waiting_msg);
- smi_info->waiting_msg = msg;
-#ifdef DEBUG_TIMING
- do_gettimeofday(&t);
- printk("**Enqueue: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
+ debug_timestamp("Enqueue");
if (smi_info->run_to_completion) {
/*
* If we are running to completion, start it and run
* transactions until everything is clear.
*/
- smi_info->curr_msg = smi_info->waiting_msg;
+ smi_info->curr_msg = msg;
smi_info->waiting_msg = NULL;
/*
@@ -964,6 +957,15 @@ static void sender(void *send_info,
}
spin_lock_irqsave(&smi_info->si_lock, flags);
+ /*
+ * The following two lines don't need to be under the lock for
+ * the lock's sake, but they do need SMP memory barriers to
+ * avoid getting things out of order. We are already claiming
+ * the lock, anyway, so just do it under the lock to avoid the
+ * ordering problem.
+ */
+ BUG_ON(smi_info->waiting_msg);
+ smi_info->waiting_msg = msg;
check_start_timer_thread(smi_info);
spin_unlock_irqrestore(&smi_info->si_lock, flags);
}
@@ -989,18 +991,18 @@ static void set_run_to_completion(void *send_info, bool i_run_to_completion)
* we are spinning in kipmid looking for something and not delaying
* between checks
*/
-static inline void ipmi_si_set_not_busy(struct timespec *ts)
+static inline void ipmi_si_set_not_busy(struct timespec64 *ts)
{
ts->tv_nsec = -1;
}
-static inline int ipmi_si_is_busy(struct timespec *ts)
+static inline int ipmi_si_is_busy(struct timespec64 *ts)
{
return ts->tv_nsec != -1;
}
static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result,
const struct smi_info *smi_info,
- struct timespec *busy_until)
+ struct timespec64 *busy_until)
{
unsigned int max_busy_us = 0;
@@ -1009,12 +1011,13 @@ static inline int ipmi_thread_busy_wait(enum si_sm_result smi_result,
if (max_busy_us == 0 || smi_result != SI_SM_CALL_WITH_DELAY)
ipmi_si_set_not_busy(busy_until);
else if (!ipmi_si_is_busy(busy_until)) {
- getnstimeofday(busy_until);
- timespec_add_ns(busy_until, max_busy_us*NSEC_PER_USEC);
+ getnstimeofday64(busy_until);
+ timespec64_add_ns(busy_until, max_busy_us*NSEC_PER_USEC);
} else {
- struct timespec now;
- getnstimeofday(&now);
- if (unlikely(timespec_compare(&now, busy_until) > 0)) {
+ struct timespec64 now;
+
+ getnstimeofday64(&now);
+ if (unlikely(timespec64_compare(&now, busy_until) > 0)) {
ipmi_si_set_not_busy(busy_until);
return 0;
}
@@ -1037,7 +1040,7 @@ static int ipmi_thread(void *data)
struct smi_info *smi_info = data;
unsigned long flags;
enum si_sm_result smi_result;
- struct timespec busy_until;
+ struct timespec64 busy_until;
ipmi_si_set_not_busy(&busy_until);
set_user_nice(current, MAX_NICE);
@@ -1128,15 +1131,10 @@ static void smi_timeout(unsigned long data)
unsigned long jiffies_now;
long time_diff;
long timeout;
-#ifdef DEBUG_TIMING
- struct timeval t;
-#endif
spin_lock_irqsave(&(smi_info->si_lock), flags);
-#ifdef DEBUG_TIMING
- do_gettimeofday(&t);
- printk(KERN_DEBUG "**Timer: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
+ debug_timestamp("Timer");
+
jiffies_now = jiffies;
time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
* SI_USEC_PER_JIFFY);
@@ -1173,18 +1171,13 @@ static irqreturn_t si_irq_handler(int irq, void *data)
{
struct smi_info *smi_info = data;
unsigned long flags;
-#ifdef DEBUG_TIMING
- struct timeval t;
-#endif
spin_lock_irqsave(&(smi_info->si_lock), flags);
smi_inc_stat(smi_info, interrupts);
-#ifdef DEBUG_TIMING
- do_gettimeofday(&t);
- printk(KERN_DEBUG "**Interrupt: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
+ debug_timestamp("Interrupt");
+
smi_event_handler(smi_info, 0);
spin_unlock_irqrestore(&(smi_info->si_lock), flags);
return IRQ_HANDLED;
@@ -2038,18 +2031,13 @@ static u32 ipmi_acpi_gpe(acpi_handle gpe_device,
{
struct smi_info *smi_info = context;
unsigned long flags;
-#ifdef DEBUG_TIMING
- struct timeval t;
-#endif
spin_lock_irqsave(&(smi_info->si_lock), flags);
smi_inc_stat(smi_info, interrupts);
-#ifdef DEBUG_TIMING
- do_gettimeofday(&t);
- printk("**ACPI_GPE: %d.%9.9d\n", t.tv_sec, t.tv_usec);
-#endif
+ debug_timestamp("ACPI_GPE");
+
smi_event_handler(smi_info, 0);
spin_unlock_irqrestore(&(smi_info->si_lock), flags);
@@ -2071,7 +2059,6 @@ static int acpi_gpe_irq_setup(struct smi_info *info)
if (!info->irq)
return 0;
- /* FIXME - is level triggered right? */
status = acpi_install_gpe_handler(NULL,
info->irq,
ACPI_GPE_LEVEL_TRIGGERED,
@@ -2998,7 +2985,9 @@ static int smi_type_proc_show(struct seq_file *m, void *v)
{
struct smi_info *smi = m->private;
- return seq_printf(m, "%s\n", si_to_str[smi->si_type]);
+ seq_printf(m, "%s\n", si_to_str[smi->si_type]);
+
+ return seq_has_overflowed(m);
}
static int smi_type_proc_open(struct inode *inode, struct file *file)
@@ -3060,16 +3049,18 @@ static int smi_params_proc_show(struct seq_file *m, void *v)
{
struct smi_info *smi = m->private;
- return seq_printf(m,
- "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n",
- si_to_str[smi->si_type],
- addr_space_to_str[smi->io.addr_type],
- smi->io.addr_data,
- smi->io.regspacing,
- smi->io.regsize,
- smi->io.regshift,
- smi->irq,
- smi->slave_addr);
+ seq_printf(m,
+ "%s,%s,0x%lx,rsp=%d,rsi=%d,rsh=%d,irq=%d,ipmb=%d\n",
+ si_to_str[smi->si_type],
+ addr_space_to_str[smi->io.addr_type],
+ smi->io.addr_data,
+ smi->io.regspacing,
+ smi->io.regsize,
+ smi->io.regshift,
+ smi->irq,
+ smi->slave_addr);
+
+ return seq_has_overflowed(m);
}
static int smi_params_proc_open(struct inode *inode, struct file *file)
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 982b96323f82..f6e378dac5f5 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1097,8 +1097,6 @@ static int ssif_remove(struct i2c_client *client)
if (!ssif_info)
return 0;
- i2c_set_clientdata(client, NULL);
-
/*
* After this point, we won't deliver anything asychronously
* to the message handler. We can unregister ourself.
@@ -1198,7 +1196,9 @@ static int ssif_detect(struct i2c_client *client, struct i2c_board_info *info)
static int smi_type_proc_show(struct seq_file *m, void *v)
{
- return seq_puts(m, "ssif\n");
+ seq_puts(m, "ssif\n");
+
+ return seq_has_overflowed(m);
}
static int smi_type_proc_open(struct inode *inode, struct file *file)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index af5d63c7cc53..2fe195002021 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -75,29 +75,25 @@ efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
unsigned long key;
u32 desc_version;
- *map_size = 0;
- *desc_size = 0;
- key = 0;
- status = efi_call_early(get_memory_map, map_size, NULL,
- &key, desc_size, &desc_version);
- if (status != EFI_BUFFER_TOO_SMALL)
- return EFI_LOAD_ERROR;
-
+ *map_size = sizeof(*m) * 32;
+again:
/*
* Add an additional efi_memory_desc_t because we're doing an
* allocation which may be in a new descriptor region.
*/
- *map_size += *desc_size;
+ *map_size += sizeof(*m);
status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
*map_size, (void **)&m);
if (status != EFI_SUCCESS)
goto fail;
+ *desc_size = 0;
+ key = 0;
status = efi_call_early(get_memory_map, map_size, m,
&key, desc_size, &desc_version);
if (status == EFI_BUFFER_TOO_SMALL) {
efi_call_early(free_pool, m);
- return EFI_LOAD_ERROR;
+ goto again;
}
if (status != EFI_SUCCESS)
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 638e797037da..97527614141b 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -735,6 +735,31 @@ config INTEL_IPS
functionality. If in doubt, say Y here; it will only load on
supported platforms.
+config INTEL_IMR
+ bool "Intel Isolated Memory Region support"
+ default n
+ depends on X86_INTEL_QUARK && IOSF_MBI
+ ---help---
+ This option provides a means to manipulate Isolated Memory Regions.
+ IMRs are a set of registers that define read and write access masks
+ to prohibit certain system agents from accessing memory with 1 KiB
+ granularity.
+
+ IMRs make it possible to control read/write access to an address
+ by hardware agents inside the SoC. Read and write masks can be
+ defined for:
+ - eSRAM flush
+ - Dirty CPU snoop (write only)
+ - RMU access
+ - PCI Virtual Channel 0/Virtual Channel 1
+ - SMM mode
+ - Non SMM mode
+
+ Quark contains a set of eight IMR registers and makes use of those
+ registers during its bootup process.
+
+ If you are running on a Galileo/Quark say Y here.
+
config IBM_RTL
tristate "Device driver to enable PRTL support"
depends on X86 && PCI
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 02b16910f4c9..995986b8e36b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -645,11 +645,12 @@ out:
static unsigned long randomize_stack_top(unsigned long stack_top)
{
- unsigned int random_variable = 0;
+ unsigned long random_variable = 0;
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- random_variable = get_random_int() & STACK_RND_MASK;
+ random_variable = (unsigned long) get_random_int();
+ random_variable &= STACK_RND_MASK;
random_variable <<= PAGE_SHIFT;
}
#ifdef CONFIG_STACK_GROWSUP
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d1ec10a940ff..1b45e4a0519b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -202,7 +202,7 @@ static __always_inline void data_access_exceeds_word_size(void)
{
}
-static __always_inline void __read_once_size(volatile void *p, void *res, int size)
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
{
switch (size) {
case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
@@ -259,10 +259,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
*/
#define READ_ONCE(x) \
- ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
+ ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
#define WRITE_ONCE(x, val) \
- ({ typeof(x) __val; __val = val; __write_once_size(&x, &__val, sizeof(__val)); __val; })
+ ({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
#endif /* __KERNEL__ */
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 75ae2e2631fc..a19bcf9e762e 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -156,8 +156,14 @@ typedef enum {
KDB_REASON_SYSTEM_NMI, /* In NMI due to SYSTEM cmd; regs valid */
} kdb_reason_t;
+enum kdb_msgsrc {
+ KDB_MSGSRC_INTERNAL, /* direct call to kdb_printf() */
+ KDB_MSGSRC_PRINTK, /* trapped from printk() */
+};
+
extern int kdb_trap_printk;
-extern __printf(1, 0) int vkdb_printf(const char *fmt, va_list args);
+extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt,
+ va_list args);
extern __printf(1, 2) int kdb_printf(const char *, ...);
typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 41c60e5302d7..6d77432e14ff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -363,9 +363,6 @@ extern void show_regs(struct pt_regs *);
*/
extern void show_stack(struct task_struct *task, unsigned long *sp);
-void io_schedule(void);
-long io_schedule_timeout(long timeout);
-
extern void cpu_init (void);
extern void trap_init(void);
extern void update_process_times(int user);
@@ -422,6 +419,13 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
+extern long io_schedule_timeout(long timeout);
+
+static inline void io_schedule(void)
+{
+ io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+}
+
struct nsproxy;
struct user_namespace;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 07ce18ca71e0..0874e2edd275 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -604,7 +604,7 @@ return_normal:
online_cpus)
cpu_relax();
if (!time_left)
- pr_crit("KGDB: Timed out waiting for secondary CPUs.\n");
+ pr_crit("Timed out waiting for secondary CPUs.\n");
/*
* At this point the primary processor is completely
@@ -696,6 +696,14 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
if (arch_kgdb_ops.enable_nmi)
arch_kgdb_ops.enable_nmi(0);
+ /*
+ * Avoid entering the debugger if we were triggered due to an oops
+ * but panic_timeout indicates the system should automatically
+ * reboot on panic. We don't want to get stuck waiting for input
+ * on such systems, especially if its "just" an oops.
+ */
+ if (signo != SIGTRAP && panic_timeout)
+ return 1;
memset(ks, 0, sizeof(struct kgdb_state));
ks->cpu = raw_smp_processor_id();
@@ -828,6 +836,15 @@ static int kgdb_panic_event(struct notifier_block *self,
unsigned long val,
void *data)
{
+ /*
+ * Avoid entering the debugger if we were triggered due to a panic
+ * We don't want to get stuck waiting for input from user in such case.
+ * panic_timeout indicates the system should automatically
+ * reboot on panic.
+ */
+ if (panic_timeout)
+ return NOTIFY_DONE;
+
if (dbg_kdb_mode)
kdb_printf("PANIC: %s\n", (char *)data);
kgdb_breakpoint();
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 7c70812caea5..fc1ef736253c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -439,7 +439,7 @@ poll_again:
* substituted for %d, %x or %o in the prompt.
*/
-char *kdb_getstr(char *buffer, size_t bufsize, char *prompt)
+char *kdb_getstr(char *buffer, size_t bufsize, const char *prompt)
{
if (prompt && kdb_prompt_str != prompt)
strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
@@ -548,7 +548,7 @@ static int kdb_search_string(char *searched, char *searchfor)
return 0;
}
-int vkdb_printf(const char *fmt, va_list ap)
+int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
{
int diag;
int linecount;
@@ -680,6 +680,12 @@ int vkdb_printf(const char *fmt, va_list ap)
size_avail = sizeof(kdb_buffer) - len;
goto kdb_print_out;
}
+ if (kdb_grepping_flag >= KDB_GREPPING_FLAG_SEARCH)
+ /*
+ * This was a interactive search (using '/' at more
+ * prompt) and it has completed. Clear the flag.
+ */
+ kdb_grepping_flag = 0;
/*
* at this point the string is a full line and
* should be printed, up to the null.
@@ -691,19 +697,20 @@ kdb_printit:
* Write to all consoles.
*/
retlen = strlen(kdb_buffer);
+ cp = (char *) printk_skip_level(kdb_buffer);
if (!dbg_kdb_mode && kgdb_connected) {
- gdbstub_msg_write(kdb_buffer, retlen);
+ gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
} else {
if (dbg_io_ops && !dbg_io_ops->is_console) {
- len = retlen;
- cp = kdb_buffer;
+ len = retlen - (cp - kdb_buffer);
+ cp2 = cp;
while (len--) {
- dbg_io_ops->write_char(*cp);
- cp++;
+ dbg_io_ops->write_char(*cp2);
+ cp2++;
}
}
while (c) {
- c->write(c, kdb_buffer, retlen);
+ c->write(c, cp, retlen - (cp - kdb_buffer));
touch_nmi_watchdog();
c = c->next;
}
@@ -711,7 +718,10 @@ kdb_printit:
if (logging) {
saved_loglevel = console_loglevel;
console_loglevel = CONSOLE_LOGLEVEL_SILENT;
- printk(KERN_INFO "%s", kdb_buffer);
+ if (printk_get_level(kdb_buffer) || src == KDB_MSGSRC_PRINTK)
+ printk("%s", kdb_buffer);
+ else
+ pr_info("%s", kdb_buffer);
}
if (KDB_STATE(PAGER)) {
@@ -794,11 +804,23 @@ kdb_printit:
kdb_nextline = linecount - 1;
kdb_printf("\r");
suspend_grep = 1; /* for this recursion */
+ } else if (buf1[0] == '/' && !kdb_grepping_flag) {
+ kdb_printf("\r");
+ kdb_getstr(kdb_grep_string, KDB_GREP_STRLEN,
+ kdbgetenv("SEARCHPROMPT") ?: "search> ");
+ *strchrnul(kdb_grep_string, '\n') = '\0';
+ kdb_grepping_flag += KDB_GREPPING_FLAG_SEARCH;
+ suspend_grep = 1; /* for this recursion */
} else if (buf1[0] && buf1[0] != '\n') {
/* user hit something other than enter */
suspend_grep = 1; /* for this recursion */
- kdb_printf("\nOnly 'q' or 'Q' are processed at more "
- "prompt, input ignored\n");
+ if (buf1[0] != '/')
+ kdb_printf(
+ "\nOnly 'q', 'Q' or '/' are processed at "
+ "more prompt, input ignored\n");
+ else
+ kdb_printf("\n'/' cannot be used during | "
+ "grep filtering, input ignored\n");
} else if (kdb_grepping_flag) {
/* user hit enter */
suspend_grep = 1; /* for this recursion */
@@ -844,7 +866,7 @@ int kdb_printf(const char *fmt, ...)
int r;
va_start(ap, fmt);
- r = vkdb_printf(fmt, ap);
+ r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap);
va_end(ap);
return r;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 7b40c5f07dce..4121345498e0 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -50,8 +50,7 @@
static int kdb_cmd_enabled = CONFIG_KDB_DEFAULT_ENABLE;
module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600);
-#define GREP_LEN 256
-char kdb_grep_string[GREP_LEN];
+char kdb_grep_string[KDB_GREP_STRLEN];
int kdb_grepping_flag;
EXPORT_SYMBOL(kdb_grepping_flag);
int kdb_grep_leading;
@@ -870,7 +869,7 @@ static void parse_grep(const char *str)
len = strlen(cp);
if (!len)
return;
- if (len >= GREP_LEN) {
+ if (len >= KDB_GREP_STRLEN) {
kdb_printf("search string too long\n");
return;
}
@@ -915,13 +914,12 @@ int kdb_parse(const char *cmdstr)
char *cp;
char *cpp, quoted;
kdbtab_t *tp;
- int i, escaped, ignore_errors = 0, check_grep;
+ int i, escaped, ignore_errors = 0, check_grep = 0;
/*
* First tokenize the command string.
*/
cp = (char *)cmdstr;
- kdb_grepping_flag = check_grep = 0;
if (KDB_FLAG(CMD_INTERRUPT)) {
/* Previous command was interrupted, newline must not
@@ -1247,7 +1245,6 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
kdb_printf("due to NonMaskable Interrupt @ "
kdb_machreg_fmt "\n",
instruction_pointer(regs));
- kdb_dumpregs(regs);
break;
case KDB_REASON_SSTEP:
case KDB_REASON_BREAK:
@@ -1281,6 +1278,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
*/
kdb_nextline = 1;
KDB_STATE_CLEAR(SUPPRESS);
+ kdb_grepping_flag = 0;
+ /* ensure the old search does not leak into '/' commands */
+ kdb_grep_string[0] = '\0';
cmdbuf = cmd_cur;
*cmdbuf = '\0';
@@ -2256,7 +2256,7 @@ static int kdb_cpu(int argc, const char **argv)
/*
* Validate cpunum
*/
- if ((cpunum > NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
+ if ((cpunum >= CONFIG_NR_CPUS) || !kgdb_info[cpunum].enter_kgdb)
return KDB_BADCPUNUM;
dbg_switch_cpu = cpunum;
@@ -2583,7 +2583,7 @@ static int kdb_summary(int argc, const char **argv)
#define K(x) ((x) << (PAGE_SHIFT - 10))
kdb_printf("\nMemTotal: %8lu kB\nMemFree: %8lu kB\n"
"Buffers: %8lu kB\n",
- val.totalram, val.freeram, val.bufferram);
+ K(val.totalram), K(val.freeram), K(val.bufferram));
return 0;
}
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index eaacd1693954..75014d7f4568 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -196,7 +196,9 @@ extern int kdb_main_loop(kdb_reason_t, kdb_reason_t,
/* Miscellaneous functions and data areas */
extern int kdb_grepping_flag;
+#define KDB_GREPPING_FLAG_SEARCH 0x8000
extern char kdb_grep_string[];
+#define KDB_GREP_STRLEN 256
extern int kdb_grep_leading;
extern int kdb_grep_trailing;
extern char *kdb_cmds[];
@@ -209,7 +211,7 @@ extern void kdb_ps1(const struct task_struct *p);
extern void kdb_print_nameval(const char *name, unsigned long val);
extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
extern void kdb_meminfo_proc_show(void);
-extern char *kdb_getstr(char *, size_t, char *);
+extern char *kdb_getstr(char *, size_t, const char *);
extern void kdb_gdb_state_pass(char *buf);
/* Defines for kdb_symbol_print */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3059bc2f022d..e16e5542bf13 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1193,7 +1193,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
if (unlikely(ret)) {
- remove_waiter(lock, &waiter);
+ if (rt_mutex_has_waiters(lock))
+ remove_waiter(lock, &waiter);
rt_mutex_handle_deadlock(ret, chwalk, &waiter);
}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c06df7de0963..01cfd69c54c6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1811,7 +1811,7 @@ int vprintk_default(const char *fmt, va_list args)
#ifdef CONFIG_KGDB_KDB
if (unlikely(kdb_trap_printk)) {
- r = vkdb_printf(fmt, args);
+ r = vkdb_printf(KDB_MSGSRC_PRINTK, fmt, args);
return r;
}
#endif
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0d7bbe3095ad..0a571e9a0f1d 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -326,6 +326,7 @@ void rcu_read_unlock_special(struct task_struct *t)
special = t->rcu_read_unlock_special;
if (special.b.need_qs) {
rcu_preempt_qs();
+ t->rcu_read_unlock_special.b.need_qs = false;
if (!t->rcu_read_unlock_special.s) {
local_irq_restore(flags);
return;
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index 8a2e230fb86a..eae160dd669d 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -87,8 +87,7 @@ static inline struct autogroup *autogroup_create(void)
* so we don't have to move tasks around upon policy change,
* or flail around trying to allocate bandwidth on the fly.
* A bandwidth exception in __sched_setscheduler() allows
- * the policy change to proceed. Thereafter, task_group()
- * returns &root_task_group, so zero bandwidth is required.
+ * the policy change to proceed.
*/
free_rt_sched_group(tg);
tg->rt_se = root_task_group.rt_se;
@@ -115,9 +114,6 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
if (tg != &root_task_group)
return false;
- if (p->sched_class != &fair_sched_class)
- return false;
-
/*
* We can only assume the task group can't go away on us if
* autogroup_move_group() can see us on ->thread_group list.
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 7052d3fd4e7b..8d0f35debf35 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -274,7 +274,7 @@ bool try_wait_for_completion(struct completion *x)
* first without taking the lock so we can
* return early in the blocking case.
*/
- if (!ACCESS_ONCE(x->done))
+ if (!READ_ONCE(x->done))
return 0;
spin_lock_irqsave(&x->wait.lock, flags);
@@ -297,6 +297,21 @@ EXPORT_SYMBOL(try_wait_for_completion);
*/
bool completion_done(struct completion *x)
{
- return !!ACCESS_ONCE(x->done);
+ if (!READ_ONCE(x->done))
+ return false;
+
+ /*
+ * If ->done, we need to wait for complete() to release ->wait.lock
+ * otherwise we can end up freeing the completion before complete()
+ * is done referencing it.
+ *
+ * The RMB pairs with complete()'s RELEASE of ->wait.lock and orders
+ * the loads of ->done and ->wait.lock such that we cannot observe
+ * the lock before complete() acquires it while observing the ->done
+ * after it's acquired the lock.
+ */
+ smp_rmb();
+ spin_unlock_wait(&x->wait.lock);
+ return true;
}
EXPORT_SYMBOL(completion_done);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 13049aac05a6..f0f831e8a345 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -307,66 +307,6 @@ __read_mostly int scheduler_running;
int sysctl_sched_rt_runtime = 950000;
/*
- * __task_rq_lock - lock the rq @p resides on.
- */
-static inline struct rq *__task_rq_lock(struct task_struct *p)
- __acquires(rq->lock)
-{
- struct rq *rq;
-
- lockdep_assert_held(&p->pi_lock);
-
- for (;;) {
- rq = task_rq(p);
- raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
- return rq;
- raw_spin_unlock(&rq->lock);
-
- while (unlikely(task_on_rq_migrating(p)))
- cpu_relax();
- }
-}
-
-/*
- * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
- */
-static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
- __acquires(p->pi_lock)
- __acquires(rq->lock)
-{
- struct rq *rq;
-
- for (;;) {
- raw_spin_lock_irqsave(&p->pi_lock, *flags);
- rq = task_rq(p);
- raw_spin_lock(&rq->lock);
- if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
- return rq;
- raw_spin_unlock(&rq->lock);
- raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-
- while (unlikely(task_on_rq_migrating(p)))
- cpu_relax();
- }
-}
-
-static void __task_rq_unlock(struct rq *rq)
- __releases(rq->lock)
-{
- raw_spin_unlock(&rq->lock);
-}
-
-static inline void
-task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
- __releases(rq->lock)
- __releases(p->pi_lock)
-{
- raw_spin_unlock(&rq->lock);
- raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-}
-
-/*
* this_rq_lock - lock this runqueue and disable interrupts.
*/
static struct rq *this_rq_lock(void)
@@ -2899,7 +2839,7 @@ void __sched schedule_preempt_disabled(void)
preempt_disable();
}
-static void preempt_schedule_common(void)
+static void __sched notrace preempt_schedule_common(void)
{
do {
__preempt_count_add(PREEMPT_ACTIVE);
@@ -4418,36 +4358,29 @@ EXPORT_SYMBOL_GPL(yield_to);
* This task is about to go to sleep on IO. Increment rq->nr_iowait so
* that process accounting knows that this is a task in IO wait state.
*/
-void __sched io_schedule(void)
-{
- struct rq *rq = raw_rq();
-
- delayacct_blkio_start();
- atomic_inc(&rq->nr_iowait);
- blk_flush_plug(current);
- current->in_iowait = 1;
- schedule();
- current->in_iowait = 0;
- atomic_dec(&rq->nr_iowait);
- delayacct_blkio_end();
-}
-EXPORT_SYMBOL(io_schedule);
-
long __sched io_schedule_timeout(long timeout)
{
- struct rq *rq = raw_rq();
+ int old_iowait = current->in_iowait;
+ struct rq *rq;
long ret;
+ current->in_iowait = 1;
+ if (old_iowait)
+ blk_schedule_flush_plug(current);
+ else
+ blk_flush_plug(current);
+
delayacct_blkio_start();
+ rq = raw_rq();
atomic_inc(&rq->nr_iowait);
- blk_flush_plug(current);
- current->in_iowait = 1;
ret = schedule_timeout(timeout);
- current->in_iowait = 0;
+ current->in_iowait = old_iowait;
atomic_dec(&rq->nr_iowait);
delayacct_blkio_end();
+
return ret;
}
+EXPORT_SYMBOL(io_schedule_timeout);
/**
* sys_sched_get_priority_max - return maximum RT priority.
@@ -7642,6 +7575,12 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
{
struct task_struct *g, *p;
+ /*
+ * Autogroups do not have RT tasks; see autogroup_create().
+ */
+ if (task_group_is_autogroup(tg))
+ return 0;
+
for_each_process_thread(g, p) {
if (rt_task(p) && task_group(p) == tg)
return 1;
@@ -7734,6 +7673,17 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
{
int i, err = 0;
+ /*
+ * Disallowing the root group RT runtime is BAD, it would disallow the
+ * kernel creating (and or operating) RT threads.
+ */
+ if (tg == &root_task_group && rt_runtime == 0)
+ return -EINVAL;
+
+ /* No period doesn't make any sense. */
+ if (rt_period == 0)
+ return -EINVAL;
+
mutex_lock(&rt_constraints_mutex);
read_lock(&tasklist_lock);
err = __rt_schedulable(tg, rt_period, rt_runtime);
@@ -7790,9 +7740,6 @@ static int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
rt_period = (u64)rt_period_us * NSEC_PER_USEC;
rt_runtime = tg->rt_bandwidth.rt_runtime;
- if (rt_period == 0)
- return -EINVAL;
-
return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a027799ae130..3fa8fa6d9403 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -511,16 +511,10 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
struct sched_dl_entity,
dl_timer);
struct task_struct *p = dl_task_of(dl_se);
+ unsigned long flags;
struct rq *rq;
-again:
- rq = task_rq(p);
- raw_spin_lock(&rq->lock);
- if (rq != task_rq(p)) {
- /* Task was moved, retrying. */
- raw_spin_unlock(&rq->lock);
- goto again;
- }
+ rq = task_rq_lock(current, &flags);
/*
* We need to take care of several possible races here:
@@ -541,6 +535,26 @@ again:
sched_clock_tick();
update_rq_clock(rq);
+
+ /*
+ * If the throttle happened during sched-out; like:
+ *
+ * schedule()
+ * deactivate_task()
+ * dequeue_task_dl()
+ * update_curr_dl()
+ * start_dl_timer()
+ * __dequeue_task_dl()
+ * prev->on_rq = 0;
+ *
+ * We can be both throttled and !queued. Replenish the counter
+ * but do not enqueue -- wait for our wakeup to do that.
+ */
+ if (!task_on_rq_queued(p)) {
+ replenish_dl_entity(dl_se, dl_se);
+ goto unlock;
+ }
+
enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
if (dl_task(rq->curr))
check_preempt_curr_dl(rq, p, 0);
@@ -555,7 +569,7 @@ again:
push_dl_task(rq);
#endif
unlock:
- raw_spin_unlock(&rq->lock);
+ task_rq_unlock(rq, current, &flags);
return HRTIMER_NORESTART;
}
@@ -898,6 +912,7 @@ static void yield_task_dl(struct rq *rq)
rq->curr->dl.dl_yielded = 1;
p->dl.runtime = 0;
}
+ update_rq_clock(rq);
update_curr_dl(rq);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0870db23d79c..dc0f435a2779 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1380,6 +1380,82 @@ static inline void sched_avg_update(struct rq *rq) { }
extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);
+/*
+ * __task_rq_lock - lock the rq @p resides on.
+ */
+static inline struct rq *__task_rq_lock(struct task_struct *p)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ lockdep_assert_held(&p->pi_lock);
+
+ for (;;) {
+ rq = task_rq(p);
+ raw_spin_lock(&rq->lock);
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+ return rq;
+ raw_spin_unlock(&rq->lock);
+
+ while (unlikely(task_on_rq_migrating(p)))
+ cpu_relax();
+ }
+}
+
+/*
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
+ */
+static inline struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+ __acquires(p->pi_lock)
+ __acquires(rq->lock)
+{
+ struct rq *rq;
+
+ for (;;) {
+ raw_spin_lock_irqsave(&p->pi_lock, *flags);
+ rq = task_rq(p);
+ raw_spin_lock(&rq->lock);
+ /*
+ * move_queued_task() task_rq_lock()
+ *
+ * ACQUIRE (rq->lock)
+ * [S] ->on_rq = MIGRATING [L] rq = task_rq()
+ * WMB (__set_task_cpu()) ACQUIRE (rq->lock);
+ * [S] ->cpu = new_cpu [L] task_rq()
+ * [L] ->on_rq
+ * RELEASE (rq->lock)
+ *
+ * If we observe the old cpu in task_rq_lock, the acquire of
+ * the old rq->lock will fully serialize against the stores.
+ *
+ * If we observe the new cpu in task_rq_lock, the acquire will
+ * pair with the WMB to ensure we must then also see migrating.
+ */
+ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
+ return rq;
+ raw_spin_unlock(&rq->lock);
+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+
+ while (unlikely(task_on_rq_migrating(p)))
+ cpu_relax();
+ }
+}
+
+static inline void __task_rq_unlock(struct rq *rq)
+ __releases(rq->lock)
+{
+ raw_spin_unlock(&rq->lock);
+}
+
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
+ __releases(rq->lock)
+ __releases(p->pi_lock)
+{
+ raw_spin_unlock(&rq->lock);
+ raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
+}
+
#ifdef CONFIG_SMP
#ifdef CONFIG_PREEMPT
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 4b585e0fdd22..0f60b08a4f07 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -633,10 +633,14 @@ int ntp_validate_timex(struct timex *txc)
if ((txc->modes & ADJ_SETOFFSET) && (!capable(CAP_SYS_TIME)))
return -EPERM;
- if (txc->modes & ADJ_FREQUENCY) {
- if (LONG_MIN / PPM_SCALE > txc->freq)
+ /*
+ * Check for potential multiplication overflows that can
+ * only happen on 64-bit systems:
+ */
+ if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
+ if (LLONG_MIN / PPM_SCALE > txc->freq)
return -EINVAL;
- if (LONG_MAX / PPM_SCALE < txc->freq)
+ if (LLONG_MAX / PPM_SCALE < txc->freq)
return -EINVAL;
}