summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/Makefile23
-rw-r--r--arch/x86/boot/header.S29
-rw-r--r--arch/x86/boot/pm.c44
-rw-r--r--arch/x86/boot/tools/build.c9
-rw-r--r--arch/x86/include/asm/boot.h4
-rw-r--r--arch/x86/include/asm/highmem.h1
-rw-r--r--arch/x86/include/asm/kexec.h13
-rw-r--r--arch/x86/include/asm/linkage.h13
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c4
-rw-r--r--arch/x86/kernel/machine_kexec_32.c17
-rw-r--r--arch/x86/kernel/machine_kexec_64.c99
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S24
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S189
-rw-r--r--arch/x86/kernel/visws_quirks.c2
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S7
-rw-r--r--arch/x86/lguest/boot.c21
-rw-r--r--arch/x86/mm/highmem_32.c19
-rw-r--r--arch/x86/mm/iomap_32.c13
-rw-r--r--arch/x86/mm/kmmio.c2
-rw-r--r--arch/x86/mm/pageattr.c11
22 files changed, 351 insertions, 196 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 03f0a3aa43b1..7fcf85182681 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1437,7 +1437,7 @@ config CRASH_DUMP
config KEXEC_JUMP
bool "kexec jump (EXPERIMENTAL)"
depends on EXPERIMENTAL
- depends on KEXEC && HIBERNATION && X86_32
+ depends on KEXEC && HIBERNATION
---help---
Jump between original kernel and kexeced kernel and invoke
code in physical address mode via KEXEC
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index c70eff69a1fb..57a29fecf6bb 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -6,26 +6,23 @@
# for more details.
#
# Copyright (C) 1994 by Linus Torvalds
+# Changed by many, many contributors over the years.
#
# ROOT_DEV specifies the default root-device when making the image.
# This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
# the default of FLOPPY is used by 'build'.
-ROOT_DEV := CURRENT
+ROOT_DEV := CURRENT
# If you want to preset the SVGA mode, uncomment the next line and
# set SVGA_MODE to whatever number you want.
# Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
# The number is the same as you would ordinarily press at bootup.
-SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
+SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
-# If you want the RAM disk device, define this to be the size in blocks.
-
-#RAMDISK := -DRAMDISK=512
-
-targets := vmlinux.bin setup.bin setup.elf zImage bzImage
+targets := vmlinux.bin setup.bin setup.elf bzImage
subdir- := compressed
setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
@@ -71,17 +68,13 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
-$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK)
-$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
-$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
-$(obj)/bzImage: BUILDFLAGS := -b
+$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
-cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \
- $(obj)/vmlinux.bin $(ROOT_DEV) > $@
+cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
+ $(ROOT_DEV) > $@
-$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
- $(obj)/vmlinux.bin $(obj)/tools/build FORCE
+$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 7ccff4884a23..5d84d1c74e4c 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -24,12 +24,8 @@
#include "boot.h"
#include "offsets.h"
-SETUPSECTS = 4 /* default nr of setup-sectors */
BOOTSEG = 0x07C0 /* original address of boot-sector */
-SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */
-SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */
- /* to be loaded */
-ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */
+SYSSEG = 0x1000 /* historical load address >> 4 */
#ifndef SVGA_MODE
#define SVGA_MODE ASK_VGA
@@ -97,12 +93,12 @@ bugger_off_msg:
.section ".header", "a"
.globl hdr
hdr:
-setup_sects: .byte SETUPSECTS
+setup_sects: .byte 0 /* Filled in by build.c */
root_flags: .word ROOT_RDONLY
-syssize: .long SYSSIZE
-ram_size: .word RAMDISK
+syssize: .long 0 /* Filled in by build.c */
+ram_size: .word 0 /* Obsolete */
vid_mode: .word SVGA_MODE
-root_dev: .word ROOT_DEV
+root_dev: .word 0 /* Filled in by build.c */
boot_flag: .word 0xAA55
# offset 512, entry point
@@ -123,14 +119,15 @@ _start:
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
-start_sys_seg: .word SYSSEG
+start_sys_seg: .word SYSSEG # obsolete and meaningless, but just
+ # in case something decided to "use" it
.word kernel_version-512 # pointing to kernel version string
# above section of header is compatible
# with loadlin-1.5 (header v1.5). Don't
# change it.
-type_of_loader: .byte 0 # = 0, old one (LILO, Loadlin,
- # Bootlin, SYSLX, bootsect...)
+type_of_loader: .byte 0 # 0 means ancient bootloader, newer
+ # bootloaders know to change this.
# See Documentation/i386/boot.txt for
# assigned ids
@@ -142,11 +139,7 @@ CAN_USE_HEAP = 0x80 # If set, the loader also has set
# space behind setup.S can be used for
# heap purposes.
# Only the loader knows what is free
-#ifndef __BIG_KERNEL__
- .byte 0
-#else
.byte LOADED_HIGH
-#endif
setup_move_size: .word 0x8000 # size to move, when setup is not
# loaded at 0x90000. We will move setup
@@ -157,11 +150,7 @@ setup_move_size: .word 0x8000 # size to move, when setup is not
code32_start: # here loaders can put a different
# start address for 32-bit code.
-#ifndef __BIG_KERNEL__
- .long 0x1000 # 0x1000 = default for zImage
-#else
.long 0x100000 # 0x100000 = default for big kernel
-#endif
ramdisk_image: .long 0 # address of loaded ramdisk image
# Here the loader puts the 32-bit
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 85a1cd8a8ff8..8062f8915250 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -33,47 +33,6 @@ static void realmode_switch_hook(void)
}
/*
- * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000.
- * A bzImage kernel is loaded and runs at 0x100000.
- */
-static void move_kernel_around(void)
-{
- /* Note: rely on the compile-time option here rather than
- the LOADED_HIGH flag. The Qemu kernel loader unconditionally
- sets the loadflags to zero. */
-#ifndef __BIG_KERNEL__
- u16 dst_seg, src_seg;
- u32 syssize;
-
- dst_seg = 0x1000 >> 4;
- src_seg = 0x10000 >> 4;
- syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */
-
- while (syssize) {
- int paras = (syssize >= 0x1000) ? 0x1000 : syssize;
- int dwords = paras << 2;
-
- asm volatile("pushw %%es ; "
- "pushw %%ds ; "
- "movw %1,%%es ; "
- "movw %2,%%ds ; "
- "xorw %%di,%%di ; "
- "xorw %%si,%%si ; "
- "rep;movsl ; "
- "popw %%ds ; "
- "popw %%es"
- : "+c" (dwords)
- : "r" (dst_seg), "r" (src_seg)
- : "esi", "edi");
-
- syssize -= paras;
- dst_seg += paras;
- src_seg += paras;
- }
-#endif
-}
-
-/*
* Disable all interrupts at the legacy PIC.
*/
static void mask_all_interrupts(void)
@@ -147,9 +106,6 @@ void go_to_protected_mode(void)
/* Hook before leaving real mode, also disables interrupts */
realmode_switch_hook();
- /* Move the kernel/setup to their final resting places */
- move_kernel_around();
-
/* Enable the A20 gate */
if (enable_a20()) {
puts("A20 gate not responding, unable to boot...\n");
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 44dc1923c0e3..ee3a4ea923ac 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -130,7 +130,7 @@ static void die(const char * str, ...)
static void usage(void)
{
- die("Usage: build [-b] setup system [rootdev] [> image]");
+ die("Usage: build setup system [rootdev] [> image]");
}
int main(int argc, char ** argv)
@@ -145,11 +145,6 @@ int main(int argc, char ** argv)
void *kernel;
u32 crc = 0xffffffffUL;
- if (argc > 2 && !strcmp(argv[1], "-b"))
- {
- is_big_kernel = 1;
- argc--, argv++;
- }
if ((argc < 3) || (argc > 4))
usage();
if (argc > 3) {
@@ -216,8 +211,6 @@ int main(int argc, char ** argv)
die("Unable to mmap '%s': %m", argv[2]);
/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
sys_size = (sz + 15 + 4) / 16;
- if (!is_big_kernel && sys_size > DEF_SYSSIZE)
- die("System is too big. Try using bzImage or modules.");
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 6526cf08b0e4..6ba23dd9fc92 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -1,10 +1,6 @@
#ifndef _ASM_X86_BOOT_H
#define _ASM_X86_BOOT_H
-/* Don't touch these, unless you really know what you're doing. */
-#define DEF_SYSSEG 0x1000
-#define DEF_SYSSIZE 0x7F00
-
/* Internal svga startup constants */
#define NORMAL_VGA 0xffff /* 80x25 mode */
#define EXTENDED_VGA 0xfffe /* 80x50 mode */
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index bf9276bea660..014c2b85ae45 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -63,6 +63,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
void *kmap_atomic(struct page *page, enum km_type type);
void kunmap_atomic(void *kvaddr, enum km_type type);
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
+void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
struct page *kmap_atomic_to_page(void *ptr);
#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 0ceb6d19ed30..317ff1703d0b 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -9,13 +9,13 @@
# define PAGES_NR 4
#else
# define PA_CONTROL_PAGE 0
-# define PA_TABLE_PAGE 1
-# define PAGES_NR 2
+# define VA_CONTROL_PAGE 1
+# define PA_TABLE_PAGE 2
+# define PA_SWAP_PAGE 3
+# define PAGES_NR 4
#endif
-#ifdef CONFIG_X86_32
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
-#endif
#ifndef __ASSEMBLY__
@@ -136,10 +136,11 @@ relocate_kernel(unsigned long indirection_page,
unsigned int has_pae,
unsigned int preserve_context);
#else
-NORET_TYPE void
+unsigned long
relocate_kernel(unsigned long indirection_page,
unsigned long page_list,
- unsigned long start_address) ATTRIB_NORET;
+ unsigned long start_address,
+ unsigned int preserve_context);
#endif
#define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index a0d70b46c27c..12d55e773eb6 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_LINKAGE_H
#define _ASM_X86_LINKAGE_H
+#include <linux/stringify.h>
+
#undef notrace
#define notrace __attribute__((no_instrument_function))
@@ -53,14 +55,9 @@
.globl name; \
name:
-#ifdef CONFIG_X86_64
-#define __ALIGN .p2align 4,,15
-#define __ALIGN_STR ".p2align 4,,15"
-#endif
-
-#ifdef CONFIG_X86_ALIGNMENT_16
-#define __ALIGN .align 16,0x90
-#define __ALIGN_STR ".align 16,0x90"
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
+#define __ALIGN .p2align 4, 0x90
+#define __ALIGN_STR __stringify(__ALIGN)
#endif
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b585e04cbc9e..3178c3acd97e 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = {
.name = "p4-clockmod",
.owner = THIS_MODULE,
.attr = p4clockmod_attr,
- .hide_interface = 1,
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index bfbd5323a635..ca14604611ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -639,7 +639,7 @@ static void mce_init_timer(void)
if (!next_interval)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
- t->expires = round_jiffies_relative(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies + next_interval);
add_timer(t);
}
@@ -1110,7 +1110,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- t->expires = round_jiffies_relative(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies + next_interval);
add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f5fc8c781a62..e7368c1da01d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -14,12 +14,12 @@
#include <linux/ftrace.h>
#include <linux/suspend.h>
#include <linux/gfp.h>
+#include <linux/io.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#include <asm/io.h>
#include <asm/apic.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
@@ -63,7 +63,7 @@ static void load_segments(void)
"\tmovl %%eax,%%fs\n"
"\tmovl %%eax,%%gs\n"
"\tmovl %%eax,%%ss\n"
- ::: "eax", "memory");
+ : : : "eax", "memory");
#undef STR
#undef __STR
}
@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
- /* We need to put APICs in legacy mode so that we can
+ /*
+ * We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need
@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);
- /* The segment registers are funny things, they have both a
+ /*
+ * The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
- /* The gdt & idt are now invalid.
+ /*
+ * The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
+ set_gdt(phys_to_virt(0), 0);
+ set_idt(phys_to_virt(0), 0);
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6993d51b7fd8..89cea4d44679 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -12,11 +12,47 @@
#include <linux/reboot.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#include <asm/io.h>
+
+static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
+ unsigned long addr)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ struct page *page;
+ int result = -ENOMEM;
+
+ addr &= PMD_MASK;
+ pgd += pgd_index(addr);
+ if (!pgd_present(*pgd)) {
+ page = kimage_alloc_control_pages(image, 0);
+ if (!page)
+ goto out;
+ pud = (pud_t *)page_address(page);
+ memset(pud, 0, PAGE_SIZE);
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud)) {
+ page = kimage_alloc_control_pages(image, 0);
+ if (!page)
+ goto out;
+ pmd = (pmd_t *)page_address(page);
+ memset(pmd, 0, PAGE_SIZE);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+ result = 0;
+out:
+ return result;
+}
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
@@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
}
level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr);
- if (result) {
+ if (result)
goto out;
- }
set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
addr += PGDIR_SIZE;
}
@@ -156,6 +191,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
if (result)
return result;
+ /*
+ * image->start may be outside 0 ~ max_pfn, for example when
+ * jump back to original kernel from kexeced kernel
+ */
+ result = init_one_level2_page(image, level4p, image->start);
+ if (result)
+ return result;
return init_transition_pgtable(image, level4p);
}
@@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
+ int save_ftrace_enabled;
- tracer_disable();
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ save_processor_state();
+#endif
+
+ save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * We need to put APICs in legacy mode so that we can
+ * get timer interrupts in second kernel. kexec/kdump
+ * paths already have calls to disable_IO_APIC() in
+ * one form or other. kexec jump path also need
+ * one.
+ */
+ disable_IO_APIC();
+#endif
+ }
+
control_page = page_address(image->control_code_page) + PAGE_SIZE;
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
+ memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
+ page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page));
- /* The segment registers are funny things, they have both a
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
+ << PAGE_SHIFT);
+
+ /*
+ * The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
- /* The gdt & idt are now invalid.
+ /*
+ * The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
+ set_gdt(phys_to_virt(0), 0);
+ set_idt(phys_to_virt(0), 0);
/* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start);
+ image->start = relocate_kernel((unsigned long)image->head,
+ (unsigned long)page_list,
+ image->start,
+ image->preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ restore_processor_state();
+#endif
+
+ __ftrace_enabled_restore(save_ftrace_enabled);
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 2064d0aa8d28..41235531b11c 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -17,7 +17,8 @@
#define PTR(x) (x << 2)
-/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+/*
+ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
* jumping back
*/
@@ -76,8 +77,10 @@ relocate_kernel:
movl %eax, CP_PA_SWAP_PAGE(%edi)
movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
- /* get physical address of control page now */
- /* this is impossible after page table switch */
+ /*
+ * get physical address of control page now
+ * this is impossible after page table switch
+ */
movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
/* switch to new set of page tables */
@@ -97,7 +100,8 @@ identity_mapped:
/* store the start address on the stack */
pushl %edx
- /* Set cr0 to a known state:
+ /*
+ * Set cr0 to a known state:
* - Paging disabled
* - Alignment check disabled
* - Write protect disabled
@@ -113,7 +117,8 @@ identity_mapped:
/* clear cr4 if applicable */
testl %ecx, %ecx
jz 1f
- /* Set cr4 to a known state:
+ /*
+ * Set cr4 to a known state:
* Setting everything to zero seems safe.
*/
xorl %eax, %eax
@@ -132,15 +137,18 @@ identity_mapped:
call swap_pages
addl $8, %esp
- /* To be certain of avoiding problems with self-modifying code
+ /*
+ * To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB, it's handy, and not processor dependent.
*/
xorl %eax, %eax
movl %eax, %cr3
- /* set all of the registers to known values */
- /* leave %esp alone */
+ /*
+ * set all of the registers to known values
+ * leave %esp alone
+ */
testl %esi, %esi
jnz 1f
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index d32cfb27a479..4de8f5b3d476 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -19,29 +19,77 @@
#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+/*
+ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+ * ~ control_page + PAGE_SIZE are used as data storage and stack for
+ * jumping back
+ */
+#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
+
+/* Minimal CPU state */
+#define RSP DATA(0x0)
+#define CR0 DATA(0x8)
+#define CR3 DATA(0x10)
+#define CR4 DATA(0x18)
+
+/* other data */
+#define CP_PA_TABLE_PAGE DATA(0x20)
+#define CP_PA_SWAP_PAGE DATA(0x28)
+#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
+
.text
.align PAGE_SIZE
.code64
.globl relocate_kernel
relocate_kernel:
- /* %rdi indirection_page
+ /*
+ * %rdi indirection_page
* %rsi page_list
* %rdx start address
+ * %rcx preserve_context
*/
+ /* Save the CPU context, used for jumping back */
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushf
+
+ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
+ movq %rsp, RSP(%r11)
+ movq %cr0, %rax
+ movq %rax, CR0(%r11)
+ movq %cr3, %rax
+ movq %rax, CR3(%r11)
+ movq %cr4, %rax
+ movq %rax, CR4(%r11)
+
/* zero out flags, and disable interrupts */
pushq $0
popfq
- /* get physical address of control page now */
- /* this is impossible after page table switch */
+ /*
+ * get physical address of control page now
+ * this is impossible after page table switch
+ */
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
/* get physical address of page table now too */
- movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
+ movq PTR(PA_TABLE_PAGE)(%rsi), %r9
+
+ /* get physical address of swap page now */
+ movq PTR(PA_SWAP_PAGE)(%rsi), %r10
+
+ /* save some information for jumping back */
+ movq %r9, CP_PA_TABLE_PAGE(%r11)
+ movq %r10, CP_PA_SWAP_PAGE(%r11)
+ movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
/* Switch to the identity mapped page tables */
- movq %rcx, %cr3
+ movq %r9, %cr3
/* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%r8), %rsp
@@ -55,7 +103,8 @@ identity_mapped:
/* store the start address on the stack */
pushq %rdx
- /* Set cr0 to a known state:
+ /*
+ * Set cr0 to a known state:
* - Paging enabled
* - Alignment check disabled
* - Write protect disabled
@@ -68,7 +117,8 @@ identity_mapped:
orl $(X86_CR0_PG | X86_CR0_PE), %eax
movq %rax, %cr0
- /* Set cr4 to a known state:
+ /*
+ * Set cr4 to a known state:
* - physical address extension enabled
*/
movq $X86_CR4_PAE, %rax
@@ -78,9 +128,87 @@ identity_mapped:
1:
/* Flush the TLB (needed?) */
- movq %rcx, %cr3
+ movq %r9, %cr3
+
+ movq %rcx, %r11
+ call swap_pages
+
+ /*
+ * To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB by reloading %cr3 here, it's handy,
+ * and not processor dependent.
+ */
+ movq %cr3, %rax
+ movq %rax, %cr3
+
+ /*
+ * set all of the registers to known values
+ * leave %rsp alone
+ */
+
+ testq %r11, %r11
+ jnz 1f
+ xorq %rax, %rax
+ xorq %rbx, %rbx
+ xorq %rcx, %rcx
+ xorq %rdx, %rdx
+ xorq %rsi, %rsi
+ xorq %rdi, %rdi
+ xorq %rbp, %rbp
+ xorq %r8, %r8
+ xorq %r9, %r9
+ xorq %r10, %r9
+ xorq %r11, %r11
+ xorq %r12, %r12
+ xorq %r13, %r13
+ xorq %r14, %r14
+ xorq %r15, %r15
+
+ ret
+
+1:
+ popq %rdx
+ leaq PAGE_SIZE(%r10), %rsp
+ call *%rdx
+
+ /* get the re-entry point of the peer system */
+ movq 0(%rsp), %rbp
+ call 1f
+1:
+ popq %r8
+ subq $(1b - relocate_kernel), %r8
+ movq CP_PA_SWAP_PAGE(%r8), %r10
+ movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
+ movq CP_PA_TABLE_PAGE(%r8), %rax
+ movq %rax, %cr3
+ lea PAGE_SIZE(%r8), %rsp
+ call swap_pages
+ movq $virtual_mapped, %rax
+ pushq %rax
+ ret
+
+virtual_mapped:
+ movq RSP(%r8), %rsp
+ movq CR4(%r8), %rax
+ movq %rax, %cr4
+ movq CR3(%r8), %rax
+ movq CR0(%r8), %r8
+ movq %rax, %cr3
+ movq %r8, %cr0
+ movq %rbp, %rax
+
+ popf
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ ret
/* Do the copies */
+swap_pages:
movq %rdi, %rcx /* Put the page_list in %rcx */
xorq %rdi, %rdi
xorq %rsi, %rsi
@@ -112,36 +240,27 @@ identity_mapped:
movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi
+ movq %rdi, %rdx
+ movq %rsi, %rax
+
+ movq %r10, %rdi
movq $512, %rcx
rep ; movsq
- jmp 0b
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * I need to execute a serializing instruction here.
- * So I flush the TLB by reloading %cr3 here, it's handy,
- * and not processor dependent.
- */
- movq %cr3, %rax
- movq %rax, %cr3
- /* set all of the registers to known values */
- /* leave %rsp alone */
+ movq %rax, %rdi
+ movq %rdx, %rsi
+ movq $512, %rcx
+ rep ; movsq
- xorq %rax, %rax
- xorq %rbx, %rbx
- xorq %rcx, %rcx
- xorq %rdx, %rdx
- xorq %rsi, %rsi
- xorq %rdi, %rdi
- xorq %rbp, %rbp
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r9
- xorq %r11, %r11
- xorq %r12, %r12
- xorq %r13, %r13
- xorq %r14, %r14
- xorq %r15, %r15
+ movq %rdx, %rdi
+ movq %r10, %rsi
+ movq $512, %rcx
+ rep ; movsq
+ lea PAGE_SIZE(%rax), %rsi
+ jmp 0b
+3:
ret
+
+ .globl kexec_control_code_size
+.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 191a876e9e87..31ffc24eec4d 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = {
static irqreturn_t piix4_master_intr(int irq, void *dev_id)
{
int realirq;
- irq_desc_t *desc;
+ struct irq_desc *desc;
unsigned long flags;
spin_lock_irqsave(&i8259A_lock, flags);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6f6800..5bf54e40c6ef 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
+
+#ifdef CONFIG_KEXEC
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+ "kexec control code size is too big")
+#endif
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index f3a5305b8adf..9fe4ddaa8f6f 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -348,6 +348,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
* flush_tlb_user() for both user and kernel mappings unless
* the Page Global Enable (PGE) feature bit is set. */
*dx |= 0x00002000;
+ /* We also lie, and say we're family id 5. 6 or greater
+ * leads to a rdmsr in early_init_intel which we can't handle.
+ * Family ID is returned as bits 8-12 in ax. */
+ *ax &= 0xFFFFF0FF;
+ *ax |= 0x00000500;
break;
case 0x80000000:
/* Futureproof this a little: if they ask how much extended
@@ -594,19 +599,21 @@ static void __init lguest_init_IRQ(void)
/* Some systems map "vectors" to interrupts weirdly. Lguest has
* a straightforward 1 to 1 mapping, so force that here. */
__get_cpu_var(vector_irq)[vector] = i;
- if (vector != SYSCALL_VECTOR) {
- set_intr_gate(vector,
- interrupt[vector-FIRST_EXTERNAL_VECTOR]);
- set_irq_chip_and_handler_name(i, &lguest_irq_controller,
- handle_level_irq,
- "level");
- }
+ if (vector != SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
}
/* This call is required to set up for 4k stacks, where we have
* separate stacks for hard and soft interrupts. */
irq_ctx_init(smp_processor_id());
}
+void lguest_setup_irq(unsigned int irq)
+{
+ irq_to_desc_alloc_cpu(irq, 0);
+ set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
+ handle_level_irq, "level");
+}
+
/*
* Time.
*
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index d11745334a67..f256e73542d7 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -121,23 +121,30 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
pagefault_enable();
}
-/* This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
{
enum fixed_addresses idx;
unsigned long vaddr;
pagefault_disable();
- idx = type + KM_TYPE_NR*smp_processor_id();
+ debug_kmap_atomic_prot(type);
+
+ idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
+ set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
arch_flush_lazy_mmu_mode();
return (void*) vaddr;
}
+
+/* This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+{
+ return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
+}
EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
struct page *kmap_atomic_to_page(void *ptr)
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 04102d42ff42..592984e5496b 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -18,6 +18,7 @@
#include <asm/iomap.h>
#include <asm/pat.h>
+#include <asm/highmem.h>
#include <linux/module.h>
int is_io_mapping_possible(resource_size_t base, unsigned long size)
@@ -36,11 +37,6 @@ EXPORT_SYMBOL_GPL(is_io_mapping_possible);
void *
iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
{
- enum fixed_addresses idx;
- unsigned long vaddr;
-
- pagefault_disable();
-
/*
* For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
* PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
@@ -50,12 +46,7 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
prot = PAGE_KERNEL_UC_MINUS;
- idx = type + KM_TYPE_NR*smp_processor_id();
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
- arch_flush_lazy_mmu_mode();
-
- return (void*) vaddr;
+ return kmap_atomic_prot_pfn(pfn, type, prot);
}
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 6a518dd08a36..4f115e00486b 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
if (!ctx->active) {
- pr_warning("kmmio: spurious debug trap on CPU %d.\n",
+ pr_debug("kmmio: spurious debug trap on CPU %d.\n",
smp_processor_id());
goto out;
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8253bc97587e..9c4294986af7 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -522,6 +522,17 @@ static int split_large_page(pte_t *kpte, unsigned long address)
* primary protection behavior:
*/
__set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
+
+ /*
+ * Intel Atom errata AAH41 workaround.
+ *
+ * The real fix should be in hw or in a microcode update, but
+ * we also probabilistically try to reduce the window of having
+ * a large TLB mixed with 4K TLBs while instruction fetches are
+ * going on.
+ */
+ __flush_tlb_all();
+
base = NULL;
out_unlock: