diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 55 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 214 |
2 files changed, 233 insertions, 36 deletions
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 74e44507dfe9..e4d0690cccf9 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -4,15 +4,11 @@ turbostat \- Report processor frequency and idle statistics .SH SYNOPSIS .ft B .B turbostat -.RB [ "\-s" ] -.RB [ "\-v" ] -.RB [ "\-M MSR#" ] +.RB [ Options ] .RB command .br .B turbostat -.RB [ "\-s" ] -.RB [ "\-v" ] -.RB [ "\-M MSR#" ] +.RB [ Options ] .RB [ "\-i interval_sec" ] .SH DESCRIPTION \fBturbostat \fP reports processor topology, frequency @@ -27,16 +23,23 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. on processors that additionally support C-state residency counters. .SS Options -The \fB-s\fP option limits output to a 1-line system summary for each interval. +The \fB-p\fP option limits output to the 1st thread in 1st core of each package. .PP -The \fB-c\fP option limits output to the 1st thread in each core. +The \fB-P\fP option limits output to the 1st thread in each Package. .PP -The \fB-p\fP option limits output to the 1st thread in each package. +The \fB-S\fP option limits output to a 1-line System Summary for each interval. .PP The \fB-v\fP option increases verbosity. .PP -The \fB-M MSR#\fP option dumps the specified MSR, -in addition to the usual frequency and idle statistics. +The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34" +.PP +The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. +.PP +The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. +.PP +The \fB-m MSR#\fP option includes the the specified 32-bit MSR value. +.PP +The \fB-M MSR#\fP option includes the the specified 64-bit MSR value. .PP The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds. The default is 5 seconds. @@ -150,6 +153,29 @@ Note that turbostat reports average GHz of 3.63, while the arithmetic average of the GHz column above is lower. This is a weighted average, where the weight is %c0. ie. it is the total number of un-halted cycles elapsed per time divided by the number of CPUs. +.SH SMI COUNTING EXAMPLE +On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter. +Using the -m option, you can display how many SMIs have fired since reset, or if there +are SMIs during the measurement interval, you can display the delta using the -d option. +.nf +[root@x980 ~]# turbostat -m 0x34 +cor CPU %c0 GHz TSC MSR 0x034 %c1 %c3 %c6 %pc3 %pc6 + 1.41 1.82 3.38 0x00000000 8.92 37.82 51.85 17.37 0.55 + 0 0 3.73 2.03 3.38 0x00000055 1.72 48.25 46.31 17.38 0.55 + 0 6 0.14 1.63 3.38 0x00000056 5.30 + 1 2 2.51 1.80 3.38 0x00000056 15.65 29.33 52.52 + 1 8 0.10 1.65 3.38 0x00000056 18.05 + 2 4 1.16 1.68 3.38 0x00000056 5.87 24.47 68.50 + 2 10 0.10 1.63 3.38 0x00000056 6.93 + 8 1 3.84 1.91 3.38 0x00000056 1.36 50.65 44.16 + 8 7 0.08 1.64 3.38 0x00000056 5.12 + 9 3 1.82 1.73 3.38 0x00000056 7.59 24.21 66.38 + 9 9 0.09 1.68 3.38 0x00000056 9.32 + 10 5 1.66 1.65 3.38 0x00000056 15.10 50.00 33.23 + 10 11 1.72 1.65 3.38 0x00000056 15.05 +^C +[root@x980 ~]# +.fi .SH NOTES .B "turbostat " @@ -165,6 +191,13 @@ may work poorly on Linux-2.6.20 through 2.6.29, as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF in those kernels. +If the TSC column does not make sense, then +the other numbers will also make no sense. +Turbostat is lightweight, and its data collection is not atomic. +These issues are usually caused by an extremely short measurement +interval (much less than 1 second), or system activity that prevents +turbostat from being able to run on all CPUS to quickly collect data. + The APERF, MPERF MSRs are defined to count non-halted cycles. Although it is not guaranteed by the architecture, turbostat assumes that they count at TSC rate, which is true on all processors tested to date. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 861d77190206..2655ae9a3ad8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -35,9 +35,9 @@ #include <ctype.h> #include <sched.h> -#define MSR_TSC 0x10 #define MSR_NEHALEM_PLATFORM_INFO 0xCE #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD +#define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE #define MSR_APERF 0xE8 #define MSR_MPERF 0xE7 #define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ @@ -62,7 +62,11 @@ unsigned int genuine_intel; unsigned int has_invariant_tsc; unsigned int do_nehalem_platform_info; unsigned int do_nehalem_turbo_ratio_limit; -unsigned int extra_msr_offset; +unsigned int do_ivt_turbo_ratio_limit; +unsigned int extra_msr_offset32; +unsigned int extra_msr_offset64; +unsigned int extra_delta_offset32; +unsigned int extra_delta_offset64; double bclk; unsigned int show_pkg; unsigned int show_core; @@ -83,7 +87,10 @@ struct thread_data { unsigned long long aperf; unsigned long long mperf; unsigned long long c1; /* derived */ - unsigned long long extra_msr; + unsigned long long extra_msr64; + unsigned long long extra_delta64; + unsigned long long extra_msr32; + unsigned long long extra_delta32; unsigned int cpu_id; unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 @@ -222,6 +229,14 @@ void print_header(void) if (has_aperf) outp += sprintf(outp, " GHz"); outp += sprintf(outp, " TSC"); + if (extra_delta_offset32) + outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); + if (extra_delta_offset64) + outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); + if (extra_msr_offset32) + outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); + if (extra_msr_offset64) + outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); if (do_nhm_cstates) outp += sprintf(outp, " %%c1"); if (do_nhm_cstates) @@ -238,8 +253,6 @@ void print_header(void) outp += sprintf(outp, " %%pc6"); if (do_snb_cstates) outp += sprintf(outp, " %%pc7"); - if (extra_msr_offset) - outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset); outp += sprintf(outp, "\n"); } @@ -255,8 +268,14 @@ int dump_counters(struct thread_data *t, struct core_data *c, fprintf(stderr, "aperf: %016llX\n", t->aperf); fprintf(stderr, "mperf: %016llX\n", t->mperf); fprintf(stderr, "c1: %016llX\n", t->c1); + fprintf(stderr, "msr0x%x: %08llX\n", + extra_delta_offset32, t->extra_delta32); fprintf(stderr, "msr0x%x: %016llX\n", - extra_msr_offset, t->extra_msr); + extra_delta_offset64, t->extra_delta64); + fprintf(stderr, "msr0x%x: %08llX\n", + extra_msr_offset32, t->extra_msr32); + fprintf(stderr, "msr0x%x: %016llX\n", + extra_msr_offset64, t->extra_msr64); } if (c) { @@ -360,6 +379,21 @@ int format_counters(struct thread_data *t, struct core_data *c, /* TSC */ outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); + /* delta */ + if (extra_delta_offset32) + outp += sprintf(outp, " %11llu", t->extra_delta32); + + /* DELTA */ + if (extra_delta_offset64) + outp += sprintf(outp, " %11llu", t->extra_delta64); + /* msr */ + if (extra_msr_offset32) + outp += sprintf(outp, " 0x%08llx", t->extra_msr32); + + /* MSR */ + if (extra_msr_offset64) + outp += sprintf(outp, " 0x%016llx", t->extra_msr64); + if (do_nhm_cstates) { if (!skip_c1) outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); @@ -391,8 +425,6 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); done: - if (extra_msr_offset) - outp += sprintf(outp, " 0x%016llx", t->extra_msr); outp += sprintf(outp, "\n"); return 0; @@ -502,10 +534,16 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->mperf = 1; /* divide by 0 protection */ } + old->extra_delta32 = new->extra_delta32 - old->extra_delta32; + old->extra_delta32 &= 0xFFFFFFFF; + + old->extra_delta64 = new->extra_delta64 - old->extra_delta64; + /* - * for "extra msr", just copy the latest w/o subtracting + * Extra MSR is just a snapshot, simply copy latest w/o subtracting */ - old->extra_msr = new->extra_msr; + old->extra_msr32 = new->extra_msr32; + old->extra_msr64 = new->extra_msr64; } int delta_cpu(struct thread_data *t, struct core_data *c, @@ -533,6 +571,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->mperf = 0; t->c1 = 0; + t->extra_delta32 = 0; + t->extra_delta64 = 0; + /* tells format_counters to dump all fields from this set */ t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; @@ -553,6 +594,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.threads.mperf += t->mperf; average.threads.c1 += t->c1; + average.threads.extra_delta32 += t->extra_delta32; + average.threads.extra_delta64 += t->extra_delta64; + /* sum per-core values only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -588,6 +632,11 @@ void compute_average(struct thread_data *t, struct core_data *c, average.threads.mperf /= topo.num_cpus; average.threads.c1 /= topo.num_cpus; + average.threads.extra_delta32 /= topo.num_cpus; + average.threads.extra_delta32 &= 0xFFFFFFFF; + + average.threads.extra_delta64 /= topo.num_cpus; + average.cores.c3 /= topo.num_cores; average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; @@ -629,8 +678,24 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -4; } - if (extra_msr_offset) - if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) + if (extra_delta_offset32) { + if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) + return -5; + t->extra_delta32 &= 0xFFFFFFFF; + } + + if (extra_delta_offset64) + if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) + return -5; + + if (extra_msr_offset32) { + if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) + return -5; + t->extra_msr32 &= 0xFFFFFFFF; + } + + if (extra_msr_offset64) + if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) return -5; /* collect core counters only for 1st thread in core */ @@ -677,6 +742,9 @@ void print_verbose_header(void) get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); + if (verbose > 1) + fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", ratio, bclk, ratio * bclk); @@ -685,14 +753,84 @@ void print_verbose_header(void) fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", ratio, bclk, ratio * bclk); + if (!do_ivt_turbo_ratio_limit) + goto print_nhm_turbo_ratio_limits; + + get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); + if (verbose > 1) - fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + + ratio = (msr >> 56) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 48) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 40) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 32) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 24) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", + ratio, bclk, ratio * bclk); + +print_nhm_turbo_ratio_limits: if (!do_nehalem_turbo_ratio_limit) return; get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); + if (verbose > 1) + fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + + ratio = (msr >> 56) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 48) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 40) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 32) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", + ratio, bclk, ratio * bclk); + ratio = (msr >> 24) & 0xFF; if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", @@ -712,7 +850,6 @@ void print_verbose_header(void) if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); - } void free_all_buffers(void) @@ -1038,7 +1175,7 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ case 0x3A: /* IVB */ - case 0x3D: /* IVB Xeon */ + case 0x3E: /* IVB Xeon */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ @@ -1046,6 +1183,22 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } } +int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x3E: /* IVB Xeon */ + return 1; + default: + return 0; + } +} + int is_snb(unsigned int family, unsigned int model) { @@ -1056,7 +1209,7 @@ int is_snb(unsigned int family, unsigned int model) case 0x2A: case 0x2D: case 0x3A: /* IVB */ - case 0x3D: /* IVB Xeon */ + case 0x3E: /* IVB Xeon */ return 1; } return 0; @@ -1145,12 +1298,13 @@ void check_cpuid() bclk = discover_bclk(family, model); do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); + do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); } void usage() { - fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n", + fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", progname); exit(1); } @@ -1440,15 +1594,15 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) { + while ((opt = getopt(argc, argv, "+pPSvisc:sC:m:M:")) != -1) { switch (opt) { - case 'c': + case 'p': show_core_only++; break; - case 'p': + case 'P': show_pkg_only++; break; - case 's': + case 'S': summary_only++; break; case 'v': @@ -1457,10 +1611,20 @@ void cmdline(int argc, char **argv) case 'i': interval_sec = atoi(optarg); break; + case 'c': + sscanf(optarg, "%x", &extra_delta_offset32); + break; + case 's': + extra_delta_offset32 = 0x34; /* SMI counter */ + break; + case 'C': + sscanf(optarg, "%x", &extra_delta_offset64); + break; + case 'm': + sscanf(optarg, "%x", &extra_msr_offset32); + break; case 'M': - sscanf(optarg, "%x", &extra_msr_offset); - if (verbose > 1) - fprintf(stderr, "MSR 0x%X\n", extra_msr_offset); + sscanf(optarg, "%x", &extra_msr_offset64); break; default: usage(); @@ -1473,7 +1637,7 @@ int main(int argc, char **argv) cmdline(argc, argv); if (verbose > 1) - fprintf(stderr, "turbostat v2.0 May 16, 2012" + fprintf(stderr, "turbostat v2.1 October 6, 2012" " - Len Brown <lenb@kernel.org>\n"); turbostat_init(); |