/* * ORC - Library of Optimized Inner Loops * Copyright (c) 2003,2004,2010 David A. Schleef * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #include #include #include "orcinternal.h" #include #include #include #include #include #include #include orc_uint32 orc_x86_vendor; int orc_x86_sse_flags; int orc_x86_mmx_flags; int orc_x86_microarchitecture; #if defined(_MSC_VER) static void get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d) { int tmp[4]; __cpuid(tmp, op); *a = tmp[0]; *b = tmp[1]; *c = tmp[2]; *d = tmp[3]; } static void get_cpuid_ecx (orc_uint32 op, orc_uint32 init_ecx, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d) { #if _MSC_VER >= 1500 int tmp[4]; __cpuidex(tmp, op, init_ecx); *a = tmp[0]; *b = tmp[1]; *c = tmp[2]; *d = tmp[3]; #else *a = 0; *b = 0; *c = 0; *d = 0; #endif } #elif defined(__GNUC__) || defined (__SUNPRO_C) static void get_cpuid_ecx (orc_uint32 op, orc_uint32 init_ecx, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d) { *a = op; *c = init_ecx; #ifdef __i386__ __asm__ ( " pushl %%ebx\n" " cpuid\n" " mov %%ebx, %%esi\n" " popl %%ebx\n" : "+a" (*a), "=S" (*b), "+c" (*c), "=d" (*d)); #elif defined(__amd64__) __asm__ ( " cpuid\n" : "+a" (*a), "=b" (*b), "+c" (*c), "=d" (*d)); #endif } static void get_cpuid (orc_uint32 op, orc_uint32 *a, orc_uint32 *b, orc_uint32 *c, orc_uint32 *d) { get_cpuid_ecx (op, 0, a, b, c, d); } #else /* FIXME generate a get_cpuid() function at runtime. */ #error Need get_cpuid() function. #endif struct desc_struct { int desc; int level; int size; }; struct desc_struct cache_descriptors[] = { { 0x0a, 1, 8*1024 }, { 0x0c, 1, 16*1024 }, { 0x0d, 1, 16*1024 }, { 0x0e, 1, 24*1024 }, { 0x21, 2, 256*1024 }, { 0x22, 3, 512*1024 }, { 0x23, 3, 1024*1024 }, { 0x25, 3, 2*1024*1024 }, { 0x29, 3, 4*1024*1024 }, { 0x2c, 1, 32*1024 }, { 0x41, 2, 128*1024 }, { 0x42, 2, 256*1024 }, { 0x43, 2, 512*1024 }, { 0x44, 2, 1*1024*1024 }, { 0x45, 2, 2*1024*1024 }, { 0x46, 3, 4*1024*1024 }, { 0x47, 3, 8*1024*1024 }, { 0x48, 2, 3*1024*1024 }, { 0x49, 2, 4*1024*1024 }, /* special case */ { 0x4a, 3, 6*1024*1024 }, { 0x4b, 3, 8*1024*1024 }, { 0x4c, 3, 12*1024*1024 }, { 0x4d, 3, 16*1024*1024 }, { 0x4e, 2, 6*1024*1024 }, { 0x60, 1, 16*1024 }, { 0x66, 1, 8*1024 }, { 0x67, 1, 16*1024 }, { 0x68, 1, 32*1024 }, { 0x78, 2, 1*1024*1024 }, { 0x79, 2, 128*1024 }, { 0x7a, 2, 256*1024 }, { 0x7b, 2, 512*1024 }, { 0x7c, 2, 1*1024*1024 }, { 0x7d, 2, 2*1024*1024 }, { 0x7f, 2, 512*1024 }, { 0x80, 2, 512*1024 }, { 0x82, 2, 256*1024 }, { 0x83, 2, 512*1024 }, { 0x84, 2, 1*1024*1024 }, { 0x85, 2, 2*1024*1024 }, { 0x86, 2, 512*1024 }, { 0x87, 2, 1*1024*1024 }, { 0xe4, 3, 8*1024*1024 } }; static void handle_cache_descriptor (unsigned int desc) { int i; if (desc == 0) return; /* special case */ if (desc == 0x49 && _orc_cpu_family == 0xf && _orc_cpu_model == 0x6) { ORC_DEBUG("level %d size %d", 3, 4*1024*1024); _orc_data_cache_size_level3 = 4*1024*1024; return; } for(i=0;i>8)&0xf; model_id = (eax>>4)&0xf; ext_family_id = (eax>>20)&0xff; ext_model_id = (eax>>16)&0xf; _orc_cpu_family = family_id + ext_family_id; _orc_cpu_model = (ext_model_id << 4) | model_id; _orc_cpu_stepping = eax&0xf; ORC_INFO ("family_id %d model_id %d stepping %d", _orc_cpu_family, _orc_cpu_model, _orc_cpu_stepping); } static void orc_sse_detect_cpuid_generic (orc_uint32 level) { if (level >= 1) { orc_x86_cpuid_handle_standard_flags (); orc_x86_cpuid_handle_family_model_stepping (); } } static void orc_sse_detect_cpuid_intel (orc_uint32 level) { orc_uint32 eax, ebx, ecx, edx; if (level >= 1) { orc_x86_cpuid_handle_standard_flags (); orc_x86_cpuid_handle_family_model_stepping (); orc_x86_microarchitecture = ORC_X86_UNKNOWN; if (_orc_cpu_family == 6) { switch (_orc_cpu_model) { case 6: /* Mendocino */ case 11: /* Tualatin-256 */ orc_x86_microarchitecture = ORC_X86_P6; break; case 15: case 22: orc_x86_microarchitecture = ORC_X86_CORE; break; case 23: case 29: orc_x86_microarchitecture = ORC_X86_PENRYN; break; case 26: orc_x86_microarchitecture = ORC_X86_NEHALEM; break; case 28: orc_x86_microarchitecture = ORC_X86_BONNELL; break; //orc_x86_microarchitecture = ORC_X86_WESTMERE; //orc_x86_microarchitecture = ORC_X86_SANDY_BRIDGE; } } else if (_orc_cpu_family == 15) { orc_x86_microarchitecture = ORC_X86_NETBURST; } } if (level >= 2) { get_cpuid (0x00000002, &eax, &ebx, &ecx, &edx); if ((eax&0x80000000) == 0) { handle_cache_descriptor ((eax>>8)&0xff); handle_cache_descriptor ((eax>>16)&0xff); handle_cache_descriptor ((eax>>24)&0xff); } if ((ebx&0x80000000) == 0) { handle_cache_descriptor (ebx&0xff); handle_cache_descriptor ((ebx>>8)&0xff); handle_cache_descriptor ((ebx>>16)&0xff); handle_cache_descriptor ((ebx>>24)&0xff); } if ((ecx&0x80000000) == 0) { handle_cache_descriptor (ecx&0xff); handle_cache_descriptor ((ecx>>8)&0xff); handle_cache_descriptor ((ecx>>16)&0xff); handle_cache_descriptor ((ecx>>24)&0xff); } if ((edx&0x80000000) == 0) { handle_cache_descriptor (edx&0xff); handle_cache_descriptor ((edx>>8)&0xff); handle_cache_descriptor ((edx>>16)&0xff); handle_cache_descriptor ((edx>>24)&0xff); } } if (level >= 4) { int i; for(i=0;i<10;i++){ int type; int level; int l; int p; int w; int s; get_cpuid_ecx (0x00000004, i, &eax, &ebx, &ecx, &edx); type = eax&0xf; if (type == 0) break; level = (eax>>5)&0x7; l = ((ebx>>0)&0xfff)+1; p = ((ebx>>12)&0x3ff)+1; w = ((ebx>>22)&0x3ff)+1; s = ecx + 1; ORC_INFO ("type %d level %d line size %d partitions %d ways %d sets %d", type, level, l, p, w, s); if (type == 1 || type == 3) { switch (level) { case 1: _orc_data_cache_size_level1 = l*p*w*s; break; case 2: _orc_data_cache_size_level2 = l*p*w*s; break; case 3: _orc_data_cache_size_level3 = l*p*w*s; break; } } } } get_cpuid (0x80000000, &level, &ebx, &ecx, &edx); if (level >= 4) { orc_x86_cpuid_get_branding_string (); } } static void orc_sse_detect_cpuid_amd (orc_uint32 level) { orc_uint32 eax, ebx, ecx, edx; if (level >= 1) { orc_x86_cpuid_handle_standard_flags (); orc_x86_cpuid_handle_family_model_stepping (); orc_x86_microarchitecture = ORC_X86_UNKNOWN; switch (_orc_cpu_family) { case 5: /* Don't know if 8 is correct */ if (_orc_cpu_model < 8) { orc_x86_microarchitecture = ORC_X86_K5; } else { orc_x86_microarchitecture = ORC_X86_K6; } break; case 6: orc_x86_microarchitecture = ORC_X86_K7; break; case 0xf: orc_x86_microarchitecture = ORC_X86_K8; break; case 0x10: orc_x86_microarchitecture = ORC_X86_K10; break; default: break; } } get_cpuid (0x80000000, &level, &ebx, &ecx, &edx); if (level >= 1) { get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx); /* AMD flags */ if (ecx & (1<<6)) { orc_x86_sse_flags |= ORC_TARGET_SSE_SSE4A; } if (ecx & (1<<11)) { orc_x86_sse_flags |= ORC_TARGET_SSE_SSE5; } if (edx & (1<<22)) { orc_x86_mmx_flags |= ORC_TARGET_MMX_MMXEXT; } if (edx & (1<<31)) { orc_x86_mmx_flags |= ORC_TARGET_MMX_3DNOW; } if (edx & (1<<30)) { orc_x86_mmx_flags |= ORC_TARGET_MMX_3DNOWEXT; } } if (level >= 4) { orc_x86_cpuid_get_branding_string (); } if (level >= 6) { get_cpuid (0x80000005, &eax, &ebx, &ecx, &edx); _orc_data_cache_size_level1 = ((ecx>>24)&0xff) * 1024; ORC_INFO ("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size", (ecx>>24)&0xff, (ecx>>16)&0xff, (ecx>>8)&0xff, ecx&0xff); ORC_INFO ("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size", (edx>>24)&0xff, (edx>>16)&0xff, (edx>>8)&0xff, edx&0xff); get_cpuid (0x80000006, &eax, &ebx, &ecx, &edx); _orc_data_cache_size_level2 = ((ecx>>16)&0xffff) * 1024; ORC_INFO ("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size", (ecx>>16)&0xffff, (ecx>>12)&0xf, (ecx>>8)&0xf, ecx&0xff); } } unsigned int orc_sse_get_cpu_flags(void) { orc_x86_detect_cpuid (); return orc_x86_sse_flags; } unsigned int orc_mmx_get_cpu_flags(void) { orc_x86_detect_cpuid (); return orc_x86_mmx_flags; }