summaryrefslogtreecommitdiff
path: root/arch/x86/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r--arch/x86/include/asm/amd_nb.h17
-rw-r--r--arch/x86/include/asm/bootparam_utils.h38
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/hpet.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h13
-rw-r--r--arch/x86/include/asm/hypervisor.h13
-rw-r--r--arch/x86/include/asm/io_apic.h28
-rw-r--r--arch/x86/include/asm/irq_remapping.h40
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/kvm_para.h8
-rw-r--r--arch/x86/include/asm/linkage.h18
-rw-r--r--arch/x86/include/asm/mce.h84
-rw-r--r--arch/x86/include/asm/mshyperv.h4
-rw-r--r--arch/x86/include/asm/pci.h3
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/asm/pgtable.h17
-rw-r--r--arch/x86/include/asm/pgtable_32.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/x86_init.h27
-rw-r--r--arch/x86/include/asm/xor.h491
-rw-r--r--arch/x86/include/asm/xor_32.h309
-rw-r--r--arch/x86/include/asm/xor_64.h305
25 files changed, 793 insertions, 667 deletions
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index b3341e9cd8fd..a54ee1d054d9 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int node)
return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
}
+static inline u16 amd_get_node_id(struct pci_dev *pdev)
+{
+ struct pci_dev *misc;
+ int i;
+
+ for (i = 0; i != amd_nb_num(); i++) {
+ misc = node_to_amd_nb(i)->misc;
+
+ if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
+ PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
+ return i;
+ }
+
+ WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
+ return 0;
+}
+
#else
#define amd_nb_num(x) 0
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
new file mode 100644
index 000000000000..5b5e9cb774b5
--- /dev/null
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_X86_BOOTPARAM_UTILS_H
+#define _ASM_X86_BOOTPARAM_UTILS_H
+
+#include <asm/bootparam.h>
+
+/*
+ * This file is included from multiple environments. Do not
+ * add completing #includes to make it standalone.
+ */
+
+/*
+ * Deal with bootloaders which fail to initialize unknown fields in
+ * boot_params to zero. The list fields in this list are taken from
+ * analysis of kexec-tools; if other broken bootloaders initialize a
+ * different set of fields we will need to figure out how to disambiguate.
+ *
+ */
+static void sanitize_boot_params(struct boot_params *boot_params)
+{
+ if (boot_params->sentinel) {
+ /*fields in boot_params are not valid, clear them */
+ memset(&boot_params->olpc_ofw_header, 0,
+ (char *)&boot_params->alt_mem_k -
+ (char *)&boot_params->olpc_ofw_header);
+ memset(&boot_params->kbd_status, 0,
+ (char *)&boot_params->hdr -
+ (char *)&boot_params->kbd_status);
+ memset(&boot_params->_pad7[0], 0,
+ (char *)&boot_params->edd_mbr_sig_buffer[0] -
+ (char *)&boot_params->_pad7[0]);
+ memset(&boot_params->_pad8[0], 0,
+ (char *)&boot_params->eddbuf[0] -
+ (char *)&boot_params->_pad8[0]);
+ memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
+ }
+}
+
+#endif /* _ASM_X86_BOOTPARAM_UTILS_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 2d9075e863a0..93fe929d1cee 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32];
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
+#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b522d377..86cb51e1ca96 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
#ifdef CONFIG_DYNAMIC_FTRACE
#define ARCH_SUPPORTS_FTRACE_OPS 1
-#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
#endif
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 434e2106cc87..b18df579c0e9 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
#ifdef CONFIG_PCI_MSI
-extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
+extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
#else
-static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
+static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
{
return -EINVAL;
}
@@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
static inline int hpet_enable(void) { return 0; }
static inline int is_hpet_enabled(void) { return 0; }
#define hpet_readl(a) 0
+#define default_setup_hpet_msi NULL
#endif
#endif /* _ASM_X86_HPET_H */
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6ed2be7..10a78c3d3d5a 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
irq_attr->polarity = polarity;
}
+/* Intel specific interrupt remapping information */
struct irq_2_iommu {
struct intel_iommu *iommu;
u16 irte_index;
@@ -108,6 +109,12 @@ struct irq_2_iommu {
u8 irte_mask;
};
+/* AMD specific interrupt remapping information */
+struct irq_2_irte {
+ u16 devid; /* Device ID for IRTE table */
+ u16 index; /* Index into IRTE table*/
+};
+
/*
* This is performance-critical, we want to do it O(1)
*
@@ -120,7 +127,11 @@ struct irq_cfg {
u8 vector;
u8 move_in_progress : 1;
#ifdef CONFIG_IRQ_REMAP
- struct irq_2_iommu irq_2_iommu;
+ u8 remapped : 1;
+ union {
+ struct irq_2_iommu irq_2_iommu;
+ struct irq_2_irte irq_2_irte;
+ };
#endif
};
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index b518c7509933..86095ed14135 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -25,6 +25,7 @@
extern void init_hypervisor(struct cpuinfo_x86 *c);
extern void init_hypervisor_platform(void);
+extern bool hypervisor_x2apic_available(void);
/*
* x86 hypervisor information
@@ -41,6 +42,9 @@ struct hypervisor_x86 {
/* Platform setup (run once per boot) */
void (*init_platform)(void);
+
+ /* X2APIC detection (run once per boot) */
+ bool (*x2apic_available)(void);
};
extern const struct hypervisor_x86 *x86_hyper;
@@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm;
-static inline bool hypervisor_x2apic_available(void)
-{
- if (kvm_para_available())
- return true;
- if (xen_x2apic_para_available())
- return true;
- return false;
-}
-
#endif
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 73d8c5398ea9..459e50a424d1 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -144,11 +144,24 @@ extern int timer_through_8259;
(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
struct io_apic_irq_attr;
+struct irq_cfg;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
void setup_IO_APIC_irq_extra(u32 gsi);
extern void ioapic_insert_resources(void);
+extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
+ unsigned int, int,
+ struct io_apic_irq_attr *);
+extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
+ unsigned int, int,
+ struct io_apic_irq_attr *);
+extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
+
+extern void native_compose_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id);
+extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
extern int save_ioapic_entries(void);
@@ -179,6 +192,12 @@ extern void __init native_io_apic_init_mappings(void);
extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
+extern void native_disable_io_apic(void);
+extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
+extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
+extern int native_ioapic_set_affinity(struct irq_data *,
+ const struct cpumask *,
+ bool);
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
@@ -193,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
{
x86_io_apic_ops.modify(apic, reg, value);
}
+
+extern void io_apic_eoi(unsigned int apic, unsigned int vector);
+
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
@@ -223,6 +245,12 @@ static inline void disable_ioapic_support(void) { }
#define native_io_apic_read NULL
#define native_io_apic_write NULL
#define native_io_apic_modify NULL
+#define native_disable_io_apic NULL
+#define native_io_apic_print_entries NULL
+#define native_ioapic_set_affinity NULL
+#define native_setup_ioapic_entry NULL
+#define native_compose_msi_msg NULL
+#define native_eoi_ioapic_pin NULL
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f14..95fd3527f632 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -26,8 +26,6 @@
#ifdef CONFIG_IRQ_REMAP
-extern int irq_remapping_enabled;
-
extern void setup_irq_remapping_ops(void);
extern int irq_remapping_supported(void);
extern int irq_remapping_prepare(void);
@@ -40,21 +38,19 @@ extern int setup_ioapic_remapped_entry(int irq,
unsigned int destination,
int vector,
struct io_apic_irq_attr *attr);
-extern int set_remapped_irq_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force);
extern void free_remapped_irq(int irq);
extern void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id);
-extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
-extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle);
extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
+extern void panic_if_irq_remap(const char *msg);
+extern bool setup_remapped_irq(int irq,
+ struct irq_cfg *cfg,
+ struct irq_chip *chip);
-#else /* CONFIG_IRQ_REMAP */
+void irq_remap_modify_chip_defaults(struct irq_chip *chip);
-#define irq_remapping_enabled 0
+#else /* CONFIG_IRQ_REMAP */
static inline void setup_irq_remapping_ops(void) { }
static inline int irq_remapping_supported(void) { return 0; }
@@ -71,30 +67,30 @@ static inline int setup_ioapic_remapped_entry(int irq,
{
return -ENODEV;
}
-static inline int set_remapped_irq_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force)
-{
- return 0;
-}
static inline void free_remapped_irq(int irq) { }
static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id)
{
}
-static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
+static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
{
return -ENODEV;
}
-static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle)
+
+static inline void panic_if_irq_remap(const char *msg)
+{
+}
+
+static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
{
- return -ENODEV;
}
-static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
+
+static inline bool setup_remapped_irq(int irq,
+ struct irq_cfg *cfg,
+ struct irq_chip *chip)
{
- return -ENODEV;
+ return false;
}
#endif /* CONFIG_IRQ_REMAP */
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 1508e518c7e3..aac5fa62a86c 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -109,8 +109,8 @@
#define UV_BAU_MESSAGE 0xf5
-/* Xen vector callback to receive events in a HVM domain */
-#define XEN_HVM_EVTCHN_CALLBACK 0xf3
+/* Vector on which hypervisor callbacks will be delivered */
+#define HYPERVISOR_CALLBACK_VECTOR 0xf3
/*
* Local APIC timer IRQ vector is on a different priority level,
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed1f16187be..65231e173baf 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
return ret;
}
-static inline int kvm_para_available(void)
+static inline bool kvm_para_available(void)
{
unsigned int eax, ebx, ecx, edx;
char signature[13];
if (boot_cpu_data.cpuid_level < 0)
- return 0; /* So we don't blow up on old processors */
+ return false; /* So we don't blow up on old processors */
if (cpu_has_hypervisor) {
cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
@@ -101,10 +101,10 @@ static inline int kvm_para_available(void)
signature[12] = 0;
if (strcmp(signature, "KVMKVMKVM") == 0)
- return 1;
+ return true;
}
- return 0;
+ return false;
}
static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 48142971b25d..79327e9483a3 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -27,20 +27,20 @@
#define __asmlinkage_protect0(ret) \
__asmlinkage_protect_n(ret)
#define __asmlinkage_protect1(ret, arg1) \
- __asmlinkage_protect_n(ret, "g" (arg1))
+ __asmlinkage_protect_n(ret, "m" (arg1))
#define __asmlinkage_protect2(ret, arg1, arg2) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4))
#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4), "g" (arg5))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4), "m" (arg5))
#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4), "g" (arg5), "g" (arg6))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4), "m" (arg5), "m" (arg6))
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ecdfee60ee4a..f4076af1f4ed 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -3,6 +3,90 @@
#include <uapi/asm/mce.h>
+/*
+ * Machine Check support for x86
+ */
+
+/* MCG_CAP register defines */
+#define MCG_BANKCNT_MASK 0xff /* Number of Banks */
+#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */
+#define MCG_EXT_P (1ULL<<9) /* Extended registers available */
+#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */
+#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
+#define MCG_EXT_CNT_SHIFT 16
+#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
+#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
+
+/* MCG_STATUS register defines */
+#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
+#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
+#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
+
+/* MCi_STATUS register defines */
+#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
+#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
+#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL<<60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
+#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
+#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
+#define MCI_STATUS_AR (1ULL<<55) /* Action required */
+#define MCACOD 0xffff /* MCA Error Code */
+
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
+#define MCACOD_DATA 0x0134 /* Data Load */
+#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
+
+/* MCi_MISC register defines */
+#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f)
+#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7)
+#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */
+#define MCI_MISC_ADDR_LINEAR 1 /* linear address */
+#define MCI_MISC_ADDR_PHYS 2 /* physical address */
+#define MCI_MISC_ADDR_MEM 3 /* memory address */
+#define MCI_MISC_ADDR_GENERIC 7 /* generic */
+
+/* CTL2 register defines */
+#define MCI_CTL2_CMCI_EN (1ULL << 30)
+#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
+
+#define MCJ_CTX_MASK 3
+#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK)
+#define MCJ_CTX_RANDOM 0 /* inject context: random */
+#define MCJ_CTX_PROCESS 0x1 /* inject context: process */
+#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */
+#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */
+#define MCJ_EXCEPTION 0x8 /* raise as exception */
+#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
+
+#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
+
+/* Software defined banks */
+#define MCE_EXTENDED_BANK 128
+#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
+#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
+
+#define MCE_LOG_LEN 32
+#define MCE_LOG_SIGNATURE "MACHINECHECK"
+
+/*
+ * This structure contains all data related to the MCE log. Also
+ * carries a signature to make it easier to find from external
+ * debugging tools. Each entry is only valid when its finished flag
+ * is set.
+ */
+struct mce_log {
+ char signature[12]; /* "MACHINECHECK" */
+ unsigned len; /* = MCE_LOG_LEN */
+ unsigned next;
+ unsigned flags;
+ unsigned recordlen; /* length of struct mce */
+ struct mce entry[MCE_LOG_LEN];
+};
struct mca_config {
bool dont_log_ce;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 79ce5685ab64..c2934be2446a 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -11,4 +11,8 @@ struct ms_hyperv_info {
extern struct ms_hyperv_info ms_hyperv;
+void hyperv_callback_vector(void);
+void hyperv_vector_handler(struct pt_regs *regs);
+void hv_register_vmbus_handler(int irq, irq_handler_t handler);
+
#endif
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index dba7805176bf..c28fd02f4bf7 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
#define arch_teardown_msi_irq x86_teardown_msi_irq
#define arch_restore_msi_irqs x86_restore_msi_irqs
/* implemented in arch/x86/kernel/apic/io_apic. */
+struct msi_desc;
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
void native_restore_msi_irqs(struct pci_dev *dev, int irq);
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ unsigned int irq_base, unsigned int irq_offset);
/* default to the implementation in drivers/lib/msi.c */
#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
#define HAVE_DEFAULT_MSI_RESTORE_IRQS
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf1cfa7..57cb63402213 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,8 +29,13 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
-#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40)
-#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41)
+#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
+#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
+#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
+
+#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
+#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
+ (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
#define AMD64_EVENTSEL_EVENT \
(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -46,8 +51,12 @@
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
+#define AMD64_RAW_EVENT_MASK_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK)
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
+#define AMD64_NUM_COUNTERS_NB 4
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5199db2923d3..fc304279b559 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
@@ -781,6 +786,18 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
memcpy(dst, src, count * sizeof(pgd_t));
}
+/*
+ * The x86 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+}
+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd)
+{
+}
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 8faa215a503e..9ee322103c6d 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -66,13 +66,6 @@ do { \
__flush_tlb_one((vaddr)); \
} while (0)
-/*
- * The i386 doesn't have any external MMU info: the kernel page
- * tables contain all the necessary information.
- */
-#define update_mmu_cache(vma, address, ptep) do { } while (0)
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 47356f9df82e..615b0c78449f 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
#define pte_unmap(pte) ((void)(pte))/* NOP */
-#define update_mmu_cache(vma, address, ptep) do { } while (0)
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 888184b2fc85..cf500543f6ff 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -943,7 +943,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
-extern int amd_get_nb_id(int cpu);
+extern u16 amd_get_nb_id(int cpu);
struct aperfmperf {
u64 aperf, mperf;
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 6c7fc25f2c34..5c6e4fb370f5 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -47,6 +47,12 @@
# define NEED_NOPL 0
#endif
+#ifdef CONFIG_MATOM
+# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
+#else
+# define NEED_MOVBE 0
+#endif
+
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
@@ -80,7 +86,7 @@
#define REQUIRED_MASK2 0
#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 0
+#define REQUIRED_MASK4 (NEED_MOVBE)
#define REQUIRED_MASK5 0
#define REQUIRED_MASK6 0
#define REQUIRED_MASK7 0
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 57693498519c..7669941cc9d2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -181,19 +181,38 @@ struct x86_platform_ops {
};
struct pci_dev;
+struct msi_msg;
struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
+ void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
+ unsigned int dest, struct msi_msg *msg,
+ u8 hpet_id);
void (*teardown_msi_irq)(unsigned int irq);
void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
+ int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
};
+struct IO_APIC_route_entry;
+struct io_apic_irq_attr;
+struct irq_data;
+struct cpumask;
+
struct x86_io_apic_ops {
- void (*init) (void);
- unsigned int (*read) (unsigned int apic, unsigned int reg);
- void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
- void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
+ void (*init) (void);
+ unsigned int (*read) (unsigned int apic, unsigned int reg);
+ void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*disable)(void);
+ void (*print_entries)(unsigned int apic, unsigned int nr_entries);
+ int (*set_affinity)(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force);
+ int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
+ unsigned int destination, int vector,
+ struct io_apic_irq_attr *attr);
+ void (*eoi_ioapic_pin)(int apic, int pin, int vector);
};
extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index f8fde90bc45e..d8829751b3f8 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,10 +1,499 @@
#ifdef CONFIG_KMEMCHECK
/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
# include <asm-generic/xor.h>
+#elif !defined(_ASM_X86_XOR_H)
+#define _ASM_X86_XOR_H
+
+/*
+ * Optimized RAID-5 checksumming functions for SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+/*
+ * Based on
+ * High-speed RAID5 checksumming functions utilizing SSE instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+/*
+ * x86-64 changes / gcc fixes from Andi Kleen.
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ *
+ * This hasn't been optimized for the hammer yet, but there are likely
+ * no advantages to be gotten from x86-64 here anyways.
+ */
+
+#include <asm/i387.h>
+
+#ifdef CONFIG_X86_32
+/* reduce register pressure */
+# define XOR_CONSTANT_CONSTRAINT "i"
#else
+# define XOR_CONSTANT_CONSTRAINT "re"
+#endif
+
+#define OFFS(x) "16*("#x")"
+#define PF_OFFS(x) "256+16*("#x")"
+#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
+#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
+#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
+#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
+#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
+#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
+#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
+#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
+#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
+#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
+#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
+#define NOP(x)
+
+#define BLK64(pf, op, i) \
+ pf(i) \
+ op(i, 0) \
+ op(i + 1, 1) \
+ op(i + 2, 2) \
+ op(i + 3, 3)
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ PF3(i) \
+ PF3(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ XO3(i, 0) \
+ XO3(i + 1, 1) \
+ XO3(i + 2, 2) \
+ XO3(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1),
+ [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(PF3, XO3, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1),
+ [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ PF3(i) \
+ PF3(i + 2) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ PF4(i) \
+ PF4(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO3(i, 0) \
+ XO3(i + 1, 1) \
+ XO3(i + 2, 2) \
+ XO3(i + 3, 3) \
+ XO4(i, 0) \
+ XO4(i + 1, 1) \
+ XO4(i + 2, 2) \
+ XO4(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " add %[inc], %[p5] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
+ [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(PF3, XO3, i) \
+ BLK64(PF4, XO4, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " add %[inc], %[p5] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
+ [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static struct xor_block_template xor_block_sse_pf64 = {
+ .name = "prefetch64-sse",
+ .do_2 = xor_sse_2_pf64,
+ .do_3 = xor_sse_3_pf64,
+ .do_4 = xor_sse_4_pf64,
+ .do_5 = xor_sse_5_pf64,
+};
+
+#undef LD
+#undef XO1
+#undef XO2
+#undef XO3
+#undef XO4
+#undef ST
+#undef NOP
+#undef BLK64
+#undef BLOCK
+
+#undef XOR_CONSTANT_CONSTRAINT
+
#ifdef CONFIG_X86_32
# include <asm/xor_32.h>
#else
# include <asm/xor_64.h>
#endif
-#endif
+
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+ AVX_SELECT(FASTEST)
+
+#endif /* _ASM_X86_XOR_H */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index f79cb7ec0e06..ce05722e3c68 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -2,7 +2,7 @@
#define _ASM_X86_XOR_32_H
/*
- * Optimized RAID-5 checksumming functions for MMX and SSE.
+ * Optimized RAID-5 checksumming functions for MMX.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = {
.do_5 = xor_p5_mmx_5,
};
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-#define OFFS(x) "16*("#x")"
-#define PF_OFFS(x) "256+16*("#x")"
-#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
-#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
-#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
-#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
-#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
-#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
-#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
-#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
-#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
-#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
-#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
-#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
-#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- PF1(i) \
- PF1(i + 2) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r"(p2), "+r"(p3)
- :
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i,0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
- :
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- /* Make sure GCC forgets anything it knows about p4 or p5,
- such that it won't pass to the asm volatile below a
- register that is shared with any other variable. That's
- because we modify p4 and p5 there, but we can't mark them
- as read/write, otherwise we'd overflow the 10-asm-operands
- limit of GCC < 3.1. */
- asm("" : "+r" (p4), "+r" (p5));
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- PF4(i) \
- PF4(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO3(i,0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- XO4(i,0) \
- XO4(i + 1, 1) \
- XO4(i + 2, 2) \
- XO4(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " addl $256, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- : "r" (p4), "r" (p5)
- : "memory");
-
- /* p4 and p5 were modified, and now the variables are dead.
- Clobber them just to be sure nobody does something stupid
- like assuming they have some legal value. */
- asm("" : "=r" (p4), "=r" (p5));
-
- kernel_fpu_end();
-}
-
static struct xor_block_template xor_block_pIII_sse = {
.name = "pIII_sse",
.do_2 = xor_sse_2,
@@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = {
/* Also try the generic routines. */
#include <asm-generic/xor.h>
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
- xor_speed(&xor_block_8regs); \
- xor_speed(&xor_block_8regs_p); \
- xor_speed(&xor_block_32regs); \
- xor_speed(&xor_block_32regs_p); \
AVX_XOR_SPEED; \
- if (cpu_has_xmm) \
+ if (cpu_has_xmm) { \
xor_speed(&xor_block_pIII_sse); \
- if (cpu_has_mmx) { \
+ xor_speed(&xor_block_sse_pf64); \
+ } else if (cpu_has_mmx) { \
xor_speed(&xor_block_pII_mmx); \
xor_speed(&xor_block_p5_mmx); \
+ } else { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_8regs_p); \
+ xor_speed(&xor_block_32regs); \
+ xor_speed(&xor_block_32regs_p); \
} \
} while (0)
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
-
#endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 87ac522c4af5..546f1e3b87cc 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -1,301 +1,6 @@
#ifndef _ASM_X86_XOR_64_H
#define _ASM_X86_XOR_64_H
-/*
- * Optimized RAID-5 checksumming functions for MMX and SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-/*
- * Based on
- * High-speed RAID5 checksumming functions utilizing SSE instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-/*
- * x86-64 changes / gcc fixes from Andi Kleen.
- * Copyright 2002 Andi Kleen, SuSE Labs.
- *
- * This hasn't been optimized for the hammer yet, but there are likely
- * no advantages to be gotten from x86-64 here anyways.
- */
-
-#include <asm/i387.h>
-
-#define OFFS(x) "16*("#x")"
-#define PF_OFFS(x) "256+16*("#x")"
-#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
-#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
-#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
-#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
-#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
-#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
-#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
-#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
-#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
-#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
-#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
-#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
-#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- PF1(i) \
- PF1(i + 2) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
- : [inc] "r" (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+r" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
- : [inc] "r" (256UL)
- : "memory");
- kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " addq %[inc], %[p4] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+c" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
- : [inc] "r" (256UL)
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- PF4(i) \
- PF4(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- XO4(i, 0) \
- XO4(i + 1, 1) \
- XO4(i + 2, 2) \
- XO4(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " addq %[inc], %[p4] ;\n"
- " addq %[inc], %[p5] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+c" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
- [p5] "+r" (p5)
- : [inc] "r" (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
static struct xor_block_template xor_block_sse = {
.name = "generic_sse",
.do_2 = xor_sse_2,
@@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = {
/* Also try the AVX routines */
#include <asm/xor_avx.h>
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
AVX_XOR_SPEED; \
+ xor_speed(&xor_block_sse_pf64); \
xor_speed(&xor_block_sse); \
} while (0)
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- AVX_SELECT(&xor_block_sse)
-
#endif /* _ASM_X86_XOR_64_H */