Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (246 commits) x86: traps.c replace #if CONFIG_X86_32 with #ifdef CONFIG_X86_32 x86: PAT: fix address types in track_pfn_vma_new() x86: prioritize the FPU traps for the error code x86: PAT: pfnmap documentation update changes x86: PAT: move track untrack pfnmap stubs to asm-generic x86: PAT: remove follow_pfnmap_pte in favor of follow_phys x86: PAT: modify follow_phys to return phys_addr prot and return value x86: PAT: clarify is_linear_pfn_mapping() interface x86: ia32_signal: remove unnecessary declaration x86: common.c boot_cpu_stack and boot_exception_stacks should be static x86: fix intel x86_64 llc_shared_map/cpu_llc_id anomolies x86: fix warning in arch/x86/kernel/microcode_amd.c x86: ia32.h: remove unused struct sigfram32 and rt_sigframe32 x86: asm-offset_64: use rt_sigframe_ia32 x86: sigframe.h: include headers for dependency x86: traps.c declare functions before they get used x86: PAT: update documentation to cover pgprot and remap_pfn related changes - v3 x86: PAT: add pgprot_writecombine() interface for drivers - v3 x86: PAT: change pgprot_noncached to uc_minus instead of strong uc - v3 x86: PAT: implement track/untrack of pfnmap regions for x86 - v3 ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-12-28 12:07:57 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-12-28 12:07:57 -0800
commit: be9c5ae4eeec2e85527e95647348b8ea4eb25128 (patch)
tree: 59383b15bc0891b8a44500a0ac172a8850f1068d /arch/x86/kernel/cpu
parent: bb26c6c29b7cc9f39e491b074b09f3c284738d36 (diff)
parent: 79a66b96c339626a3e4b226fefc0e45244cfe6ff (diff)
11 files changed, 389 insertions, 195 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 82ec6075c057..a5c04e88777e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -4,6 +4,7 @@
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o capflags.o powerflags.o common.o
+obj-y			+= vmware.o hypervisor.o
 
 obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index ef8f831af823..2cf23634b6d9 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -120,9 +120,17 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
 	c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
 						 & core_select_mask;
 	c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+	/*
+	 * Reinit the apicid, now that we have extended initial_apicid.
+	 */
+	c->apicid = phys_pkg_id(c->initial_apicid, 0);
 #else
 	c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
 	c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
+	/*
+	 * Reinit the apicid, now that we have extended initial_apicid.
+	 */
+	c->apicid = phys_pkg_id(0);
 #endif
 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8f1e31db2ad5..7c878f6aa919 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -283,9 +283,14 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	early_init_amd_mc(c);
 
-	/* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
-	if (c->x86_power & (1<<8))
+	/*
+	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+	 * with P/T states and does not stop in deep C-states
+	 */
+	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+	}
 
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b9c9ea0217a9..42e0853030cb 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -36,6 +36,7 @@
 #include <asm/proto.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/hypervisor.h>
 
 #include "cpu.h"
 
@@ -703,6 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	detect_ht(c);
 #endif
 
+	init_hypervisor(c);
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
 	 * all CPUs; so make sure that we indicate which features are
@@ -862,7 +864,7 @@ EXPORT_SYMBOL(_cpu_pda);
 
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 
-char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
+static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
 
 void __cpuinit pda_init(int cpu)
 {
@@ -903,8 +905,8 @@ void __cpuinit pda_init(int cpu)
 	}
 }
 
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
-			   DEBUG_STKSZ] __page_aligned_bss;
+static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+				  DEBUG_STKSZ] __page_aligned_bss;
 
 extern asmlinkage void ignore_sysret(void);
 
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
new file mode 100644
index 000000000000..fb5b86af0b01
--- /dev/null
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -0,0 +1,58 @@
+/*
+ * Common hypervisor code
+ *
+ * Copyright (C) 2008, VMware, Inc.
+ * Author : Alok N Kataria <akataria@vmware.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/vmware.h>
+#include <asm/hypervisor.h>
+
+static inline void __cpuinit
+detect_hypervisor_vendor(struct cpuinfo_x86 *c)
+{
+	if (vmware_platform()) {
+		c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
+	} else {
+		c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
+	}
+}
+
+unsigned long get_hypervisor_tsc_freq(void)
+{
+	if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
+		return vmware_get_tsc_khz();
+	return 0;
+}
+
+static inline void __cpuinit
+hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
+{
+	if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) {
+		vmware_set_feature_bits(c);
+		return;
+	}
+}
+
+void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
+{
+	detect_hypervisor_vendor(c);
+	hypervisor_set_feature_bits(c);
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cce0b6118d55..ccfd2047630c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -41,6 +41,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86 == 15 && c->x86_cache_alignment == 64)
 		c->x86_cache_alignment = 128;
 #endif
+
+	/*
+	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+	 * with P/T states and does not stop in deep C-states
+	 */
+	if (c->x86_power & (1 << 8)) {
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+	}
+
 }
 
 #ifdef CONFIG_X86_32
@@ -242,6 +252,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 	intel_workarounds(c);
 
+	/*
+	 * Detect the extended topology information if available. This
+	 * will reinitialise the initial_apicid which will be used
+	 * in init_intel_cacheinfo()
+	 */
+	detect_extended_topology(c);
+
 	l2 = init_intel_cacheinfo(c);
 	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
@@ -307,13 +324,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		set_cpu_cap(c, X86_FEATURE_P4);
 	if (c->x86 == 6)
 		set_cpu_cap(c, X86_FEATURE_P3);
+#endif
 
 	if (cpu_has_bts)
 		ptrace_bts_init_intel(c);
 
-#endif
-
-	detect_extended_topology(c);
 	if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
 		/*
 		 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 3f46afbb1cf1..68b5d8681cbb 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -644,20 +644,17 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
 	return show_shared_cpu_map_func(leaf, 1, buf);
 }
 
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
-	switch(this_leaf->eax.split.type) {
-	    case CACHE_TYPE_DATA:
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+{
+	switch (this_leaf->eax.split.type) {
+	case CACHE_TYPE_DATA:
 		return sprintf(buf, "Data\n");
-		break;
-	    case CACHE_TYPE_INST:
+	case CACHE_TYPE_INST:
 		return sprintf(buf, "Instruction\n");
-		break;
-	    case CACHE_TYPE_UNIFIED:
+	case CACHE_TYPE_UNIFIED:
 		return sprintf(buf, "Unified\n");
-		break;
-	    default:
+	default:
 		return sprintf(buf, "Unknown\n");
-		break;
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 5eb390a4b2e9..748c8f9e7a05 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void)
 		}
 	}
 out:
-	add_pda(irq_threshold_count, 1);
+	inc_irq_stat(irq_threshold_count);
 	irq_exit();
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index c17eaf5dd6dd..4b48f251fd39 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void)
 	if (therm_throt_process(msr_val & 1))
 		mce_log_therm_throt_event(smp_processor_id(), msr_val);
 
-	add_pda(irq_thermal_count, 1);
+	inc_irq_stat(irq_thermal_count);
 	irq_exit();
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index c78c04821ea1..1159e269e596 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
 }
 
 static struct res_range __initdata range[RANGE_NUM];
+static int __initdata nr_range;
 
 #ifdef CONFIG_MTRR_SANITIZER
 
@@ -1206,39 +1207,43 @@ struct mtrr_cleanup_result {
 #define PSHIFT		(PAGE_SHIFT - 10)
 
 static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
-static struct res_range __initdata range_new[RANGE_NUM];
 static unsigned long __initdata min_loss_pfn[RANGE_NUM];
 
-static int __init mtrr_cleanup(unsigned address_bits)
+static void __init print_out_mtrr_range_state(void)
 {
-	unsigned long extra_remove_base, extra_remove_size;
-	unsigned long base, size, def, dummy;
-	mtrr_type type;
-	int nr_range, nr_range_new;
-	u64 chunk_size, gran_size;
-	unsigned long range_sums, range_sums_new;
-	int index_good;
-	int num_reg_good;
 	int i;
+	char start_factor = 'K', size_factor = 'K';
+	unsigned long start_base, size_base;
+	mtrr_type type;
 
-	/* extra one for all 0 */
-	int num[MTRR_NUM_TYPES + 1];
+	for (i = 0; i < num_var_ranges; i++) {
 
-	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
-		return 0;
-	rdmsr(MTRRdefType_MSR, def, dummy);
-	def &= 0xff;
-	if (def != MTRR_TYPE_UNCACHABLE)
-		return 0;
+		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+		if (!size_base)
+			continue;
 
-	/* get it and store it aside */
-	memset(range_state, 0, sizeof(range_state));
-	for (i = 0; i < num_var_ranges; i++) {
-		mtrr_if->get(i, &base, &size, &type);
-		range_state[i].base_pfn = base;
-		range_state[i].size_pfn = size;
-		range_state[i].type = type;
+		size_base = to_size_factor(size_base, &size_factor),
+		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+		start_base = to_size_factor(start_base, &start_factor),
+		type = range_state[i].type;
+
+		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+			i, start_base, start_factor,
+			size_base, size_factor,
+			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
+			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+			);
 	}
+}
+
+static int __init mtrr_need_cleanup(void)
+{
+	int i;
+	mtrr_type type;
+	unsigned long size;
+	/* extra one for all 0 */
+	int num[MTRR_NUM_TYPES + 1];
 
 	/* check entries number */
 	memset(num, 0, sizeof(num));
@@ -1263,29 +1268,133 @@ static int __init mtrr_cleanup(unsigned address_bits)
 		num_var_ranges - num[MTRR_NUM_TYPES])
 		return 0;
 
-	/* print original var MTRRs at first, for debugging: */
-	printk(KERN_DEBUG "original variable MTRRs\n");
-	for (i = 0; i < num_var_ranges; i++) {
-		char start_factor = 'K', size_factor = 'K';
-		unsigned long start_base, size_base;
+	return 1;
+}
 
-		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
-		if (!size_base)
-			continue;
+static unsigned long __initdata range_sums;
+static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
+					 unsigned long extra_remove_base,
+					 unsigned long extra_remove_size,
+					 int i)
+{
+	int num_reg;
+	static struct res_range range_new[RANGE_NUM];
+	static int nr_range_new;
+	unsigned long range_sums_new;
+
+	/* convert ranges to var ranges state */
+	num_reg = x86_setup_var_mtrrs(range, nr_range,
+						chunk_size, gran_size);
+
+	/* we got new setting in range_state, check it */
+	memset(range_new, 0, sizeof(range_new));
+	nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+				extra_remove_base, extra_remove_size);
+	range_sums_new = sum_ranges(range_new, nr_range_new);
+
+	result[i].chunk_sizek = chunk_size >> 10;
+	result[i].gran_sizek = gran_size >> 10;
+	result[i].num_reg = num_reg;
+	if (range_sums < range_sums_new) {
+		result[i].lose_cover_sizek =
+			(range_sums_new - range_sums) << PSHIFT;
+		result[i].bad = 1;
+	} else
+		result[i].lose_cover_sizek =
+			(range_sums - range_sums_new) << PSHIFT;
 
-		size_base = to_size_factor(size_base, &size_factor),
-		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
-		start_base = to_size_factor(start_base, &start_factor),
-		type = range_state[i].type;
+	/* double check it */
+	if (!result[i].bad && !result[i].lose_cover_sizek) {
+		if (nr_range_new != nr_range ||
+			memcmp(range, range_new, sizeof(range)))
+				result[i].bad = 1;
+	}
 
-		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
-			i, start_base, start_factor,
-			size_base, size_factor,
-			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
-			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
-			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
-			);
+	if (!result[i].bad && (range_sums - range_sums_new <
+				min_loss_pfn[num_reg])) {
+		min_loss_pfn[num_reg] =
+			range_sums - range_sums_new;
 	}
+}
+
+static void __init mtrr_print_out_one_result(int i)
+{
+	char gran_factor, chunk_factor, lose_factor;
+	unsigned long gran_base, chunk_base, lose_base;
+
+	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+	printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+			result[i].bad ? "*BAD*" : " ",
+			gran_base, gran_factor, chunk_base, chunk_factor);
+	printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
+			result[i].num_reg, result[i].bad ? "-" : "",
+			lose_base, lose_factor);
+}
+
+static int __init mtrr_search_optimal_index(void)
+{
+	int i;
+	int num_reg_good;
+	int index_good;
+
+	if (nr_mtrr_spare_reg >= num_var_ranges)
+		nr_mtrr_spare_reg = num_var_ranges - 1;
+	num_reg_good = -1;
+	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+		if (!min_loss_pfn[i])
+			num_reg_good = i;
+	}
+
+	index_good = -1;
+	if (num_reg_good != -1) {
+		for (i = 0; i < NUM_RESULT; i++) {
+			if (!result[i].bad &&
+			    result[i].num_reg == num_reg_good &&
+			    !result[i].lose_cover_sizek) {
+				index_good = i;
+				break;
+			}
+		}
+	}
+
+	return index_good;
+}
+
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+	unsigned long extra_remove_base, extra_remove_size;
+	unsigned long base, size, def, dummy;
+	mtrr_type type;
+	u64 chunk_size, gran_size;
+	int index_good;
+	int i;
+
+	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+		return 0;
+	rdmsr(MTRRdefType_MSR, def, dummy);
+	def &= 0xff;
+	if (def != MTRR_TYPE_UNCACHABLE)
+		return 0;
+
+	/* get it and store it aside */
+	memset(range_state, 0, sizeof(range_state));
+	for (i = 0; i < num_var_ranges; i++) {
+		mtrr_if->get(i, &base, &size, &type);
+		range_state[i].base_pfn = base;
+		range_state[i].size_pfn = size;
+		range_state[i].type = type;
+	}
+
+	/* check if we need handle it and can handle it */
+	if (!mtrr_need_cleanup())
+		return 0;
+
+	/* print original var MTRRs at first, for debugging: */
+	printk(KERN_DEBUG "original variable MTRRs\n");
+	print_out_mtrr_range_state();
 
 	memset(range, 0, sizeof(range));
 	extra_remove_size = 0;
@@ -1309,176 +1418,64 @@ static int __init mtrr_cleanup(unsigned address_bits)
 	       range_sums >> (20 - PAGE_SHIFT));
 
 	if (mtrr_chunk_size && mtrr_gran_size) {
-		int num_reg;
-		char gran_factor, chunk_factor, lose_factor;
-		unsigned long gran_base, chunk_base, lose_base;
-
-		debug_print++;
-		/* convert ranges to var ranges state */
-		num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
-					      mtrr_gran_size);
+		i = 0;
+		mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
+				      extra_remove_base, extra_remove_size, i);
 
-		/* we got new setting in range_state, check it */
-		memset(range_new, 0, sizeof(range_new));
-		nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
-						      extra_remove_base,
-						      extra_remove_size);
-		range_sums_new = sum_ranges(range_new, nr_range_new);
+		mtrr_print_out_one_result(i);
 
-		i = 0;
-		result[i].chunk_sizek = mtrr_chunk_size >> 10;
-		result[i].gran_sizek = mtrr_gran_size >> 10;
-		result[i].num_reg = num_reg;
-		if (range_sums < range_sums_new) {
-			result[i].lose_cover_sizek =
-				(range_sums_new - range_sums) << PSHIFT;
-			result[i].bad = 1;
-		} else
-			result[i].lose_cover_sizek =
-				(range_sums - range_sums_new) << PSHIFT;
-
-		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
-		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
-		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
-		printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
-			 result[i].bad?"*BAD*":" ",
-			 gran_base, gran_factor, chunk_base, chunk_factor);
-		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
-			 result[i].num_reg, result[i].bad?"-":"",
-			 lose_base, lose_factor);
 		if (!result[i].bad) {
 			set_var_mtrr_all(address_bits);
 			return 1;
 		}
 		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
 		       "will find optimal one\n");
-		debug_print--;
-		memset(result, 0, sizeof(result[0]));
 	}
 
 	i = 0;
 	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
 	memset(result, 0, sizeof(result));
 	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
-		char gran_factor;
-		unsigned long gran_base;
-
-		if (debug_print)
-			gran_base = to_size_factor(gran_size >> 10, &gran_factor);
 
 		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
 		     chunk_size <<= 1) {
-			int num_reg;
 
-			if (debug_print) {
-				char chunk_factor;
-				unsigned long chunk_base;
-
-				chunk_base = to_size_factor(chunk_size>>10, &chunk_factor),
-				printk(KERN_INFO "\n");
-				printk(KERN_INFO "gran_size: %ld%c   chunk_size: %ld%c \n",
-				       gran_base, gran_factor, chunk_base, chunk_factor);
-			}
 			if (i >= NUM_RESULT)
 				continue;
 
-			/* convert ranges to var ranges state */
-			num_reg = x86_setup_var_mtrrs(range, nr_range,
-							 chunk_size, gran_size);
-
-			/* we got new setting in range_state, check it */
-			memset(range_new, 0, sizeof(range_new));
-			nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
-					 extra_remove_base, extra_remove_size);
-			range_sums_new = sum_ranges(range_new, nr_range_new);
-
-			result[i].chunk_sizek = chunk_size >> 10;
-			result[i].gran_sizek = gran_size >> 10;
-			result[i].num_reg = num_reg;
-			if (range_sums < range_sums_new) {
-				result[i].lose_cover_sizek =
-					(range_sums_new - range_sums) << PSHIFT;
-				result[i].bad = 1;
-			} else
-				result[i].lose_cover_sizek =
-					(range_sums - range_sums_new) << PSHIFT;
-
-			/* double check it */
-			if (!result[i].bad && !result[i].lose_cover_sizek) {
-				if (nr_range_new != nr_range ||
-					memcmp(range, range_new, sizeof(range)))
-						result[i].bad = 1;
+			mtrr_calc_range_state(chunk_size, gran_size,
+				      extra_remove_base, extra_remove_size, i);
+			if (debug_print) {
+				mtrr_print_out_one_result(i);
+				printk(KERN_INFO "\n");
 			}
 
-			if (!result[i].bad && (range_sums - range_sums_new <
-					       min_loss_pfn[num_reg])) {
-				min_loss_pfn[num_reg] =
-					range_sums - range_sums_new;
-			}
 			i++;
 		}
 	}
 
-	/* print out all */
-	for (i = 0; i < NUM_RESULT; i++) {
-		char gran_factor, chunk_factor, lose_factor;
-		unsigned long gran_base, chunk_base, lose_base;
-
-		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
-		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
-		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
-		printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
-			 result[i].bad?"*BAD*":" ",
-			 gran_base, gran_factor, chunk_base, chunk_factor);
-		printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
-			 result[i].num_reg, result[i].bad?"-":"",
-			 lose_base, lose_factor);
-	}
-
 	/* try to find the optimal index */
-	if (nr_mtrr_spare_reg >= num_var_ranges)
-		nr_mtrr_spare_reg = num_var_ranges - 1;
-	num_reg_good = -1;
-	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
-		if (!min_loss_pfn[i])
-			num_reg_good = i;
-	}
-
-	index_good = -1;
-	if (num_reg_good != -1) {
-		for (i = 0; i < NUM_RESULT; i++) {
-			if (!result[i].bad &&
-			    result[i].num_reg == num_reg_good &&
-			    !result[i].lose_cover_sizek) {
-				index_good = i;
-				break;
-			}
-		}
-	}
+	index_good = mtrr_search_optimal_index();
 
 	if (index_good != -1) {
-		char gran_factor, chunk_factor, lose_factor;
-		unsigned long gran_base, chunk_base, lose_base;
-
 		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
 		i = index_good;
-		gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
-		chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
-		lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
-		printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t",
-			 gran_base, gran_factor, chunk_base, chunk_factor);
-		printk(KERN_CONT "num_reg: %d  \tlose RAM: %ld%c\n",
-			 result[i].num_reg, lose_base, lose_factor);
+		mtrr_print_out_one_result(i);
+
 		/* convert ranges to var ranges state */
 		chunk_size = result[i].chunk_sizek;
 		chunk_size <<= 10;
 		gran_size = result[i].gran_sizek;
 		gran_size <<= 10;
-		debug_print++;
 		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
-		debug_print--;
 		set_var_mtrr_all(address_bits);
+		printk(KERN_DEBUG "New variable MTRRs\n");
+		print_out_mtrr_range_state();
 		return 1;
+	} else {
+		/* print out all */
+		for (i = 0; i < NUM_RESULT; i++)
+			mtrr_print_out_one_result(i);
 	}
 
 	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
@@ -1562,7 +1559,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 {
 	unsigned long i, base, size, highest_pfn = 0, def, dummy;
 	mtrr_type type;
-	int nr_range;
 	u64 total_trim_size;
 
 	/* extra one for all 0 */
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
new file mode 100644
index 000000000000..284c399e3234
--- /dev/null
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -0,0 +1,112 @@
+/*
+ * VMware Detection code.
+ *
+ * Copyright (C) 2008, VMware, Inc.
+ * Author : Alok N Kataria <akataria@vmware.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/dmi.h>
+#include <asm/div64.h>
+#include <asm/vmware.h>
+
+#define CPUID_VMWARE_INFO_LEAF	0x40000000
+#define VMWARE_HYPERVISOR_MAGIC	0x564D5868
+#define VMWARE_HYPERVISOR_PORT	0x5658
+
+#define VMWARE_PORT_CMD_GETVERSION	10
+#define VMWARE_PORT_CMD_GETHZ		45
+
+#define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
+	__asm__("inl (%%dx)" :						\
+			"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :	\
+			"0"(VMWARE_HYPERVISOR_MAGIC),			\
+			"1"(VMWARE_PORT_CMD_##cmd),			\
+			"2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) :	\
+			"memory");
+
+static inline int __vmware_platform(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+	VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx);
+	return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
+}
+
+static unsigned long __vmware_get_tsc_khz(void)
+{
+        uint64_t tsc_hz;
+        uint32_t eax, ebx, ecx, edx;
+
+        VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+
+        if (ebx == UINT_MAX)
+                return 0;
+        tsc_hz = eax | (((uint64_t)ebx) << 32);
+        do_div(tsc_hz, 1000);
+        BUG_ON(tsc_hz >> 32);
+        return tsc_hz;
+}
+
+/*
+ * While checking the dmi string infomation, just checking the product
+ * serial key should be enough, as this will always have a VMware
+ * specific string when running under VMware hypervisor.
+ */
+int vmware_platform(void)
+{
+	if (cpu_has_hypervisor) {
+		unsigned int eax, ebx, ecx, edx;
+		char hyper_vendor_id[13];
+
+		cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx);
+		memcpy(hyper_vendor_id + 0, &ebx, 4);
+		memcpy(hyper_vendor_id + 4, &ecx, 4);
+		memcpy(hyper_vendor_id + 8, &edx, 4);
+		hyper_vendor_id[12] = '\0';
+		if (!strcmp(hyper_vendor_id, "VMwareVMware"))
+			return 1;
+	} else if (dmi_available && dmi_name_in_serial("VMware") &&
+		   __vmware_platform())
+		return 1;
+
+	return 0;
+}
+
+unsigned long vmware_get_tsc_khz(void)
+{
+	BUG_ON(!vmware_platform());
+	return __vmware_get_tsc_khz();
+}
+
+/*
+ * VMware hypervisor takes care of exporting a reliable TSC to the guest.
+ * Still, due to timing difference when running on virtual cpus, the TSC can
+ * be marked as unstable in some cases. For example, the TSC sync check at
+ * bootup can fail due to a marginal offset between vcpus' TSCs (though the
+ * TSCs do not drift from each other).  Also, the ACPI PM timer clocksource
+ * is not suitable as a watchdog when running on a hypervisor because the
+ * kernel may miss a wrap of the counter if the vcpu is descheduled for a
+ * long time. To skip these checks at runtime we set these capability bits,
+ * so that the kernel could just trust the hypervisor with providing a
+ * reliable virtual TSC that is suitable for timekeeping.
+ */
+void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c)
+{
+	set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+	set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+}
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-12-28 12:07:57 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-12-28 12:07:57 -0800
commit	be9c5ae4eeec2e85527e95647348b8ea4eb25128 (patch)
tree	59383b15bc0891b8a44500a0ac172a8850f1068d /arch/x86/kernel/cpu
parent	bb26c6c29b7cc9f39e491b074b09f3c284738d36 (diff)
parent	79a66b96c339626a3e4b226fefc0e45244cfe6ff (diff)