diff options
Diffstat (limited to 'tools/power/cpupower/utils/idle_monitor')
9 files changed, 1736 insertions, 0 deletions
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c new file mode 100644 index 000000000000..202e555988be --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -0,0 +1,338 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * PCI initialization based on example code from: + * Andreas Herrmann <andreas.herrmann3@amd.com> + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <time.h> +#include <string.h> + +#include <pci/pci.h> + +#include "idle_monitor/cpupower-monitor.h" +#include "helpers/helpers.h" + +/******** PCI parts could go into own file and get shared ***************/ + +#define PCI_NON_PC0_OFFSET 0xb0 +#define PCI_PC1_OFFSET 0xb4 +#define PCI_PC6_OFFSET 0xb8 + +#define PCI_MONITOR_ENABLE_REG 0xe0 + +#define PCI_NON_PC0_ENABLE_BIT 0 +#define PCI_PC1_ENABLE_BIT 1 +#define PCI_PC6_ENABLE_BIT 2 + +#define PCI_NBP1_STAT_OFFSET 0x98 +#define PCI_NBP1_ACTIVE_BIT 2 +#define PCI_NBP1_ENTERED_BIT 1 + +#define PCI_NBP1_CAP_OFFSET 0x90 +#define PCI_NBP1_CAPABLE_BIT 31 + +#define OVERFLOW_MS 343597 /* 32 bit register filled at 12500 HZ + (1 tick per 80ns) */ + +enum amd_fam14h_states {NON_PC0 = 0, PC1, PC6, NBP1, + AMD_FAM14H_STATE_NUM}; + +static int fam14h_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); +static int fam14h_nbp1_count(unsigned int id, unsigned long long *count, + unsigned int cpu); + +static cstate_t amd_fam14h_cstates[AMD_FAM14H_STATE_NUM] = { + { + .name = "!PC0", + .desc = N_("Package in sleep state (PC1 or deeper)"), + .id = NON_PC0, + .range = RANGE_PACKAGE, + .get_count_percent = fam14h_get_count_percent, + }, + { + .name = "PC1", + .desc = N_("Processor Package C1"), + .id = PC1, + .range = RANGE_PACKAGE, + .get_count_percent = fam14h_get_count_percent, + }, + { + .name = "PC6", + .desc = N_("Processor Package C6"), + .id = PC6, + .range = RANGE_PACKAGE, + .get_count_percent = fam14h_get_count_percent, + }, + { + .name = "NBP1", + .desc = N_("North Bridge P1 boolean counter (returns 0 or 1)"), + .id = NBP1, + .range = RANGE_PACKAGE, + .get_count = fam14h_nbp1_count, + }, +}; + +static struct pci_access *pci_acc; +static int pci_vendor_id = 0x1022; +static int pci_dev_ids[2] = {0x1716, 0}; +static struct pci_dev *amd_fam14h_pci_dev; + +static int nbp1_entered; + +struct timespec start_time; +static unsigned long long timediff; + +#ifdef DEBUG +struct timespec dbg_time; +long dbg_timediff; +#endif + +static unsigned long long *previous_count[AMD_FAM14H_STATE_NUM]; +static unsigned long long *current_count[AMD_FAM14H_STATE_NUM]; + +static int amd_fam14h_get_pci_info(struct cstate *state, + unsigned int *pci_offset, + unsigned int *enable_bit, + unsigned int cpu) +{ + switch (state->id) { + case NON_PC0: + *enable_bit = PCI_NON_PC0_ENABLE_BIT; + *pci_offset = PCI_NON_PC0_OFFSET; + break; + case PC1: + *enable_bit = PCI_PC1_ENABLE_BIT; + *pci_offset = PCI_PC1_OFFSET; + break; + case PC6: + *enable_bit = PCI_PC6_ENABLE_BIT; + *pci_offset = PCI_PC6_OFFSET; + break; + case NBP1: + *enable_bit = PCI_NBP1_ENTERED_BIT; + *pci_offset = PCI_NBP1_STAT_OFFSET; + break; + default: + return -1; + }; + return 0; +} + +static int amd_fam14h_init(cstate_t *state, unsigned int cpu) +{ + int enable_bit, pci_offset, ret; + uint32_t val; + + ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu); + if (ret) + return ret; + + /* NBP1 needs extra treating -> write 1 to D18F6x98 bit 1 for init */ + if (state->id == NBP1) { + val = pci_read_long(amd_fam14h_pci_dev, pci_offset); + val |= 1 << enable_bit; + val = pci_write_long(amd_fam14h_pci_dev, pci_offset, val); + return ret; + } + + /* Enable monitor */ + val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG); + dprint("Init %s: read at offset: 0x%x val: %u\n", state->name, + PCI_MONITOR_ENABLE_REG, (unsigned int) val); + val |= 1 << enable_bit; + pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val); + + dprint("Init %s: offset: 0x%x enable_bit: %d - val: %u (%u)\n", + state->name, PCI_MONITOR_ENABLE_REG, enable_bit, + (unsigned int) val, cpu); + + /* Set counter to zero */ + pci_write_long(amd_fam14h_pci_dev, pci_offset, 0); + previous_count[state->id][cpu] = 0; + + return 0; +} + +static int amd_fam14h_disable(cstate_t *state, unsigned int cpu) +{ + int enable_bit, pci_offset, ret; + uint32_t val; + + ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu); + if (ret) + return ret; + + val = pci_read_long(amd_fam14h_pci_dev, pci_offset); + dprint("%s: offset: 0x%x %u\n", state->name, pci_offset, val); + if (state->id == NBP1) { + /* was the bit whether NBP1 got entered set? */ + nbp1_entered = (val & (1 << PCI_NBP1_ACTIVE_BIT)) | + (val & (1 << PCI_NBP1_ENTERED_BIT)); + + dprint("NBP1 was %sentered - 0x%x - enable_bit: " + "%d - pci_offset: 0x%x\n", + nbp1_entered ? "" : "not ", + val, enable_bit, pci_offset); + return ret; + } + current_count[state->id][cpu] = val; + + dprint("%s: Current - %llu (%u)\n", state->name, + current_count[state->id][cpu], cpu); + dprint("%s: Previous - %llu (%u)\n", state->name, + previous_count[state->id][cpu], cpu); + + val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG); + val &= ~(1 << enable_bit); + pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val); + + return 0; +} + +static int fam14h_nbp1_count(unsigned int id, unsigned long long *count, + unsigned int cpu) +{ + if (id == NBP1) { + if (nbp1_entered) + *count = 1; + else + *count = 0; + return 0; + } + return -1; +} +static int fam14h_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + unsigned long diff; + + if (id >= AMD_FAM14H_STATE_NUM) + return -1; + /* residency count in 80ns -> divide through 12.5 to get us residency */ + diff = current_count[id][cpu] - previous_count[id][cpu]; + + if (timediff == 0) + *percent = 0.0; + else + *percent = 100.0 * diff / timediff / 12.5; + + dprint("Timediff: %llu - res~: %lu us - percent: %.2f %%\n", + timediff, diff * 10 / 125, *percent); + + return 0; +} + +static int amd_fam14h_start(void) +{ + int num, cpu; + clock_gettime(CLOCK_REALTIME, &start_time); + for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) + amd_fam14h_init(&amd_fam14h_cstates[num], cpu); + } +#ifdef DEBUG + clock_gettime(CLOCK_REALTIME, &dbg_time); + dbg_timediff = timespec_diff_us(start_time, dbg_time); + dprint("Enabling counters took: %lu us\n", + dbg_timediff); +#endif + return 0; +} + +static int amd_fam14h_stop(void) +{ + int num, cpu; + struct timespec end_time; + + clock_gettime(CLOCK_REALTIME, &end_time); + + for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) + amd_fam14h_disable(&amd_fam14h_cstates[num], cpu); + } +#ifdef DEBUG + clock_gettime(CLOCK_REALTIME, &dbg_time); + dbg_timediff = timespec_diff_us(end_time, dbg_time); + dprint("Disabling counters took: %lu ns\n", dbg_timediff); +#endif + timediff = timespec_diff_us(start_time, end_time); + if (timediff / 1000 > OVERFLOW_MS) + print_overflow_err((unsigned int)timediff / 1000000, + OVERFLOW_MS / 1000); + + return 0; +} + +static int is_nbp1_capable(void) +{ + uint32_t val; + val = pci_read_long(amd_fam14h_pci_dev, PCI_NBP1_CAP_OFFSET); + return val & (1 << 31); +} + +struct cpuidle_monitor *amd_fam14h_register(void) +{ + int num; + + if (cpupower_cpu_info.vendor != X86_VENDOR_AMD) + return NULL; + + if (cpupower_cpu_info.family == 0x14) { + if (cpu_count <= 0 || cpu_count > 2) { + fprintf(stderr, "AMD fam14h: Invalid cpu count: %d\n", + cpu_count); + return NULL; + } + } else + return NULL; + + /* We do not alloc for nbp1 machine wide counter */ + for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) { + previous_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + current_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + } + + amd_fam14h_pci_dev = pci_acc_init(&pci_acc, pci_vendor_id, pci_dev_ids); + if (amd_fam14h_pci_dev == NULL || pci_acc == NULL) + return NULL; + + if (!is_nbp1_capable()) + amd_fam14h_monitor.hw_states_num = AMD_FAM14H_STATE_NUM - 1; + + amd_fam14h_monitor.name_len = strlen(amd_fam14h_monitor.name); + return &amd_fam14h_monitor; +} + +static void amd_fam14h_unregister(void) +{ + int num; + for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) { + free(previous_count[num]); + free(current_count[num]); + } + pci_cleanup(pci_acc); +} + +struct cpuidle_monitor amd_fam14h_monitor = { + .name = "Ontario", + .hw_states = amd_fam14h_cstates, + .hw_states_num = AMD_FAM14H_STATE_NUM, + .start = amd_fam14h_start, + .stop = amd_fam14h_stop, + .do_register = amd_fam14h_register, + .unregister = amd_fam14h_unregister, + .needs_root = 1, + .overflow_s = OVERFLOW_MS / 1000, +}; +#endif /* #if defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c new file mode 100644 index 000000000000..d048b96a6155 --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -0,0 +1,196 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc + * + * Licensed under the terms of the GNU GPL License version 2. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> + +#include "helpers/sysfs.h" +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define CPUIDLE_STATES_MAX 10 +static cstate_t cpuidle_cstates[CPUIDLE_STATES_MAX]; +struct cpuidle_monitor cpuidle_sysfs_monitor; + +static unsigned long long **previous_count; +static unsigned long long **current_count; +struct timespec start_time; +static unsigned long long timediff; + +static int cpuidle_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + unsigned long long statediff = current_count[cpu][id] + - previous_count[cpu][id]; + dprint("%s: - diff: %llu - percent: %f (%u)\n", + cpuidle_cstates[id].name, timediff, *percent, cpu); + + if (timediff == 0) + *percent = 0.0; + else + *percent = ((100.0 * statediff) / timediff); + + dprint("%s: - timediff: %llu - statediff: %llu - percent: %f (%u)\n", + cpuidle_cstates[id].name, timediff, statediff, *percent, cpu); + + return 0; +} + +static int cpuidle_start(void) +{ + int cpu, state; + clock_gettime(CLOCK_REALTIME, &start_time); + for (cpu = 0; cpu < cpu_count; cpu++) { + for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num; + state++) { + previous_count[cpu][state] = + sysfs_get_idlestate_time(cpu, state); + dprint("CPU %d - State: %d - Val: %llu\n", + cpu, state, previous_count[cpu][state]); + } + }; + return 0; +} + +static int cpuidle_stop(void) +{ + int cpu, state; + struct timespec end_time; + clock_gettime(CLOCK_REALTIME, &end_time); + timediff = timespec_diff_us(start_time, end_time); + + for (cpu = 0; cpu < cpu_count; cpu++) { + for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num; + state++) { + current_count[cpu][state] = + sysfs_get_idlestate_time(cpu, state); + dprint("CPU %d - State: %d - Val: %llu\n", + cpu, state, previous_count[cpu][state]); + } + }; + return 0; +} + +void fix_up_intel_idle_driver_name(char *tmp, int num) +{ + /* fix up cpuidle name for intel idle driver */ + if (!strncmp(tmp, "NHM-", 4)) { + switch (num) { + case 1: + strcpy(tmp, "C1"); + break; + case 2: + strcpy(tmp, "C3"); + break; + case 3: + strcpy(tmp, "C6"); + break; + } + } else if (!strncmp(tmp, "SNB-", 4)) { + switch (num) { + case 1: + strcpy(tmp, "C1"); + break; + case 2: + strcpy(tmp, "C3"); + break; + case 3: + strcpy(tmp, "C6"); + break; + case 4: + strcpy(tmp, "C7"); + break; + } + } else if (!strncmp(tmp, "ATM-", 4)) { + switch (num) { + case 1: + strcpy(tmp, "C1"); + break; + case 2: + strcpy(tmp, "C2"); + break; + case 3: + strcpy(tmp, "C4"); + break; + case 4: + strcpy(tmp, "C6"); + break; + } + } +} + +static struct cpuidle_monitor *cpuidle_register(void) +{ + int num; + char *tmp; + + /* Assume idle state count is the same for all CPUs */ + cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0); + + if (cpuidle_sysfs_monitor.hw_states_num == 0) + return NULL; + + for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { + tmp = sysfs_get_idlestate_name(0, num); + if (tmp == NULL) + continue; + + fix_up_intel_idle_driver_name(tmp, num); + strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); + free(tmp); + + tmp = sysfs_get_idlestate_desc(0, num); + if (tmp == NULL) + continue; + strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1); + free(tmp); + + cpuidle_cstates[num].range = RANGE_THREAD; + cpuidle_cstates[num].id = num; + cpuidle_cstates[num].get_count_percent = + cpuidle_get_count_percent; + }; + + /* Free this at program termination */ + previous_count = malloc(sizeof(long long *) * cpu_count); + current_count = malloc(sizeof(long long *) * cpu_count); + for (num = 0; num < cpu_count; num++) { + previous_count[num] = malloc(sizeof(long long) * + cpuidle_sysfs_monitor.hw_states_num); + current_count[num] = malloc(sizeof(long long) * + cpuidle_sysfs_monitor.hw_states_num); + } + + cpuidle_sysfs_monitor.name_len = strlen(cpuidle_sysfs_monitor.name); + return &cpuidle_sysfs_monitor; +} + +void cpuidle_unregister(void) +{ + int num; + + for (num = 0; num < cpu_count; num++) { + free(previous_count[num]); + free(current_count[num]); + } + free(previous_count); + free(current_count); +} + +struct cpuidle_monitor cpuidle_sysfs_monitor = { + .name = "Idle_Stats", + .hw_states = cpuidle_cstates, + .start = cpuidle_start, + .stop = cpuidle_stop, + .do_register = cpuidle_register, + .unregister = cpuidle_unregister, + .needs_root = 0, + .overflow_s = UINT_MAX, +}; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c new file mode 100644 index 000000000000..ba4bf068380d --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -0,0 +1,448 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Output format inspired by Len Brown's <lenb@kernel.org> turbostat tool. + * + */ + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <libgen.h> + +#include "idle_monitor/cpupower-monitor.h" +#include "idle_monitor/idle_monitors.h" +#include "helpers/helpers.h" + +/* Define pointers to all monitors. */ +#define DEF(x) & x ## _monitor , +struct cpuidle_monitor *all_monitors[] = { +#include "idle_monitors.def" +0 +}; + +static struct cpuidle_monitor *monitors[MONITORS_MAX]; +static unsigned int avail_monitors; + +static char *progname; + +enum operation_mode_e { list = 1, show, show_all }; +static int mode; +static int interval = 1; +static char *show_monitors_param; +static struct cpupower_topology cpu_top; + +/* ToDo: Document this in the manpage */ +static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', }; + +long long timespec_diff_us(struct timespec start, struct timespec end) +{ + struct timespec temp; + if ((end.tv_nsec - start.tv_nsec) < 0) { + temp.tv_sec = end.tv_sec - start.tv_sec - 1; + temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec; + } else { + temp.tv_sec = end.tv_sec - start.tv_sec; + temp.tv_nsec = end.tv_nsec - start.tv_nsec; + } + return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000); +} + +void monitor_help(void) +{ + printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] command\n")); + printf(_("cpupower monitor: [-m <mon1>,[<mon2>],.. ] [ -i interval_sec ]\n")); + printf(_("cpupower monitor: -l\n")); + printf(_("\t command: pass an arbitrary command to measure specific workload\n")); + printf(_("\t -i: time intervall to measure for in seconds (default 1)\n")); + printf(_("\t -l: list available CPU sleep monitors (for use with -m)\n")); + printf(_("\t -m: show specific CPU sleep monitors only (in same order)\n")); + printf(_("\t -h: print this help\n")); + printf("\n"); + printf(_("only one of: -l, -m are allowed\nIf none of them is passed,")); + printf(_(" all supported monitors are shown\n")); +} + +void print_n_spaces(int n) +{ + int x; + for (x = 0; x < n; x++) + printf(" "); +} + +/* size of s must be at least n + 1 */ +int fill_string_with_spaces(char *s, int n) +{ + int len = strlen(s); + if (len > n) + return -1; + for (; len < n; len++) + s[len] = ' '; + s[len] = '\0'; + return 0; +} + +void print_header(int topology_depth) +{ + int unsigned mon; + int state, need_len, pr_mon_len; + cstate_t s; + char buf[128] = ""; + int percent_width = 4; + + fill_string_with_spaces(buf, topology_depth * 5 - 1); + printf("%s|", buf); + + for (mon = 0; mon < avail_monitors; mon++) { + pr_mon_len = 0; + need_len = monitors[mon]->hw_states_num * (percent_width + 3) + - 1; + if (mon != 0) { + printf("|| "); + need_len--; + } + sprintf(buf, "%s", monitors[mon]->name); + fill_string_with_spaces(buf, need_len); + printf("%s", buf); + } + printf("\n"); + + if (topology_depth > 2) + printf("PKG |"); + if (topology_depth > 1) + printf("CORE|"); + if (topology_depth > 0) + printf("CPU |"); + + for (mon = 0; mon < avail_monitors; mon++) { + if (mon != 0) + printf("|| "); + else + printf(" "); + for (state = 0; state < monitors[mon]->hw_states_num; state++) { + if (state != 0) + printf(" | "); + s = monitors[mon]->hw_states[state]; + sprintf(buf, "%s", s.name); + fill_string_with_spaces(buf, percent_width); + printf("%s", buf); + } + printf(" "); + } + printf("\n"); +} + + +void print_results(int topology_depth, int cpu) +{ + unsigned int mon; + int state, ret; + double percent; + unsigned long long result; + cstate_t s; + + if (topology_depth > 2) + printf("%4d|", cpu_top.core_info[cpu].pkg); + if (topology_depth > 1) + printf("%4d|", cpu_top.core_info[cpu].core); + if (topology_depth > 0) + printf("%4d|", cpu_top.core_info[cpu].cpu); + + for (mon = 0; mon < avail_monitors; mon++) { + if (mon != 0) + printf("||"); + + for (state = 0; state < monitors[mon]->hw_states_num; state++) { + if (state != 0) + printf("|"); + + s = monitors[mon]->hw_states[state]; + + if (s.get_count_percent) { + ret = s.get_count_percent(s.id, &percent, + cpu_top.core_info[cpu].cpu); + if (ret) + printf("******"); + else if (percent >= 100.0) + printf("%6.1f", percent); + else + printf("%6.2f", percent); + } else if (s.get_count) { + ret = s.get_count(s.id, &result, + cpu_top.core_info[cpu].cpu); + if (ret) + printf("******"); + else + printf("%6llu", result); + } else { + printf(_("Monitor %s, Counter %s has no count " + "function. Implementation error\n"), + monitors[mon]->name, s.name); + exit(EXIT_FAILURE); + } + } + } + /* cpu offline */ + if (cpu_top.core_info[cpu].pkg == -1 || + cpu_top.core_info[cpu].core == -1) { + printf(_(" *is offline\n")); + return; + } else + printf("\n"); +} + + +/* param: string passed by -m param (The list of monitors to show) + * + * Monitors must have been registered already, matching monitors + * are picked out and available monitors array is overridden + * with matching ones + * + * Monitors get sorted in the same order the user passes them +*/ + +static void parse_monitor_param(char *param) +{ + unsigned int num; + int mon, hits = 0; + char *tmp = param, *token; + struct cpuidle_monitor *tmp_mons[MONITORS_MAX]; + + + for (mon = 0; mon < MONITORS_MAX; mon++, tmp = NULL) { + token = strtok(tmp, ","); + if (token == NULL) + break; + if (strlen(token) >= MONITOR_NAME_LEN) { + printf(_("%s: max monitor name length" + " (%d) exceeded\n"), token, MONITOR_NAME_LEN); + continue; + } + + for (num = 0; num < avail_monitors; num++) { + if (!strcmp(monitors[num]->name, token)) { + dprint("Found requested monitor: %s\n", token); + tmp_mons[hits] = monitors[num]; + hits++; + } + } + } + if (hits == 0) { + printf(_("No matching monitor found in %s, " + "try -l option\n"), param); + monitor_help(); + exit(EXIT_FAILURE); + } + /* Override detected/registerd monitors array with requested one */ + memcpy(monitors, tmp_mons, + sizeof(struct cpuidle_monitor *) * MONITORS_MAX); + avail_monitors = hits; +} + +void list_monitors(void) +{ + unsigned int mon; + int state; + cstate_t s; + + for (mon = 0; mon < avail_monitors; mon++) { + printf(_("Monitor \"%s\" (%d states) - Might overflow after %u " + "s\n"), + monitors[mon]->name, monitors[mon]->hw_states_num, + monitors[mon]->overflow_s); + + for (state = 0; state < monitors[mon]->hw_states_num; state++) { + s = monitors[mon]->hw_states[state]; + /* + * ToDo show more state capabilities: + * percent, time (granlarity) + */ + printf("%s\t[%c] -> %s\n", s.name, range_abbr[s.range], + gettext(s.desc)); + } + } +} + +int fork_it(char **argv) +{ + int status; + unsigned int num; + unsigned long long timediff; + pid_t child_pid; + struct timespec start, end; + + child_pid = fork(); + clock_gettime(CLOCK_REALTIME, &start); + + for (num = 0; num < avail_monitors; num++) + monitors[num]->start(); + + if (!child_pid) { + /* child */ + execvp(argv[0], argv); + } else { + /* parent */ + if (child_pid == -1) { + perror("fork"); + exit(1); + } + + signal(SIGINT, SIG_IGN); + signal(SIGQUIT, SIG_IGN); + if (waitpid(child_pid, &status, 0) == -1) { + perror("wait"); + exit(1); + } + } + clock_gettime(CLOCK_REALTIME, &end); + for (num = 0; num < avail_monitors; num++) + monitors[num]->stop(); + + timediff = timespec_diff_us(start, end); + if (WIFEXITED(status)) + printf(_("%s took %.5f seconds and exited with status %d\n"), + argv[0], timediff / (1000.0 * 1000), + WEXITSTATUS(status)); + return 0; +} + +int do_interval_measure(int i) +{ + unsigned int num; + + for (num = 0; num < avail_monitors; num++) { + dprint("HW C-state residency monitor: %s - States: %d\n", + monitors[num]->name, monitors[num]->hw_states_num); + monitors[num]->start(); + } + sleep(i); + for (num = 0; num < avail_monitors; num++) + monitors[num]->stop(); + + return 0; +} + +static void cmdline(int argc, char *argv[]) +{ + int opt; + progname = basename(argv[0]); + + while ((opt = getopt(argc, argv, "+hli:m:")) != -1) { + switch (opt) { + case 'h': + monitor_help(); + exit(EXIT_SUCCESS); + case 'l': + if (mode) { + monitor_help(); + exit(EXIT_FAILURE); + } + mode = list; + break; + case 'i': + /* only allow -i with -m or no option */ + if (mode && mode != show) { + monitor_help(); + exit(EXIT_FAILURE); + } + interval = atoi(optarg); + break; + case 'm': + if (mode) { + monitor_help(); + exit(EXIT_FAILURE); + } + mode = show; + show_monitors_param = optarg; + break; + default: + monitor_help(); + exit(EXIT_FAILURE); + } + } + if (!mode) + mode = show_all; +} + +int cmd_monitor(int argc, char **argv) +{ + unsigned int num; + struct cpuidle_monitor *test_mon; + int cpu; + + cmdline(argc, argv); + cpu_count = get_cpu_topology(&cpu_top); + if (cpu_count < 0) { + printf(_("Cannot read number of available processors\n")); + return EXIT_FAILURE; + } + + dprint("System has up to %d CPU cores\n", cpu_count); + + for (num = 0; all_monitors[num]; num++) { + dprint("Try to register: %s\n", all_monitors[num]->name); + test_mon = all_monitors[num]->do_register(); + if (test_mon) { + if (test_mon->needs_root && !run_as_root) { + fprintf(stderr, _("Available monitor %s needs " + "root access\n"), test_mon->name); + continue; + } + monitors[avail_monitors] = test_mon; + dprint("%s registered\n", all_monitors[num]->name); + avail_monitors++; + } + } + + if (avail_monitors == 0) { + printf(_("No HW Cstate monitors found\n")); + return 1; + } + + if (mode == list) { + list_monitors(); + exit(EXIT_SUCCESS); + } + + if (mode == show) + parse_monitor_param(show_monitors_param); + + dprint("Packages: %d - Cores: %d - CPUs: %d\n", + cpu_top.pkgs, cpu_top.cores, cpu_count); + + /* + * if any params left, it must be a command to fork + */ + if (argc - optind) + fork_it(argv + optind); + else + do_interval_measure(interval); + + /* ToDo: Topology parsing needs fixing first to do + this more generically */ + if (cpu_top.pkgs > 1) + print_header(3); + else + print_header(1); + + for (cpu = 0; cpu < cpu_count; cpu++) { + if (cpu_top.pkgs > 1) + print_results(3, cpu); + else + print_results(1, cpu); + } + + for (num = 0; num < avail_monitors; num++) + monitors[num]->unregister(); + + cpu_topology_release(cpu_top); + return 0; +} diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h new file mode 100644 index 000000000000..9312ee1f2dbc --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -0,0 +1,68 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + */ + +#ifndef __CPUIDLE_INFO_HW__ +#define __CPUIDLE_INFO_HW__ + +#include <stdarg.h> +#include <time.h> + +#include "idle_monitor/idle_monitors.h" + +#define MONITORS_MAX 20 +#define MONITOR_NAME_LEN 20 +#define CSTATE_NAME_LEN 5 +#define CSTATE_DESC_LEN 60 + +int cpu_count; + +/* Hard to define the right names ...: */ +enum power_range_e { + RANGE_THREAD, /* Lowest in topology hierarcy, AMD: core, Intel: thread + kernel sysfs: cpu */ + RANGE_CORE, /* AMD: unit, Intel: core, kernel_sysfs: core_id */ + RANGE_PACKAGE, /* Package, processor socket */ + RANGE_MACHINE, /* Machine, platform wide */ + RANGE_MAX }; + +typedef struct cstate { + int id; + enum power_range_e range; + char name[CSTATE_NAME_LEN]; + char desc[CSTATE_DESC_LEN]; + + /* either provide a percentage or a general count */ + int (*get_count_percent)(unsigned int self_id, double *percent, + unsigned int cpu); + int (*get_count)(unsigned int self_id, unsigned long long *count, + unsigned int cpu); +} cstate_t; + +struct cpuidle_monitor { + /* Name must not contain whitespaces */ + char name[MONITOR_NAME_LEN]; + int name_len; + int hw_states_num; + cstate_t *hw_states; + int (*start) (void); + int (*stop) (void); + struct cpuidle_monitor* (*do_register) (void); + void (*unregister)(void); + unsigned int overflow_s; + int needs_root; +}; + +extern long long timespec_diff_us(struct timespec start, struct timespec end); + +#define print_overflow_err(mes, ov) \ +{ \ + fprintf(stderr, gettext("Measure took %u seconds, but registers could " \ + "overflow at %u seconds, results " \ + "could be inaccurate\n"), mes, ov); \ +} + +#endif /* __CPUIDLE_INFO_HW__ */ diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.def b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def new file mode 100644 index 000000000000..e3f8d9b2b18f --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def @@ -0,0 +1,7 @@ +#if defined(__i386__) || defined(__x86_64__) +DEF(amd_fam14h) +DEF(intel_nhm) +DEF(intel_snb) +DEF(mperf) +#endif +DEF(cpuidle_sysfs) diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.h b/tools/power/cpupower/utils/idle_monitor/idle_monitors.h new file mode 100644 index 000000000000..4fcdeb1e07e8 --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.h @@ -0,0 +1,18 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Based on the idea from Michael Matz <matz@suse.de> + * + */ + +#ifndef _CPUIDLE_IDLE_MONITORS_H_ +#define _CPUIDLE_IDLE_MONITORS_H_ + +#define DEF(x) extern struct cpuidle_monitor x ##_monitor; +#include "idle_monitors.def" +#undef DEF +extern struct cpuidle_monitor *all_monitors[]; + +#endif /* _CPUIDLE_IDLE_MONITORS_H_ */ diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c new file mode 100644 index 000000000000..63ca87a05e5f --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -0,0 +1,255 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> + +#include <cpufreq.h> + +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define MSR_APERF 0xE8 +#define MSR_MPERF 0xE7 + +#define MSR_TSC 0x10 + +enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT }; + +static int mperf_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); +static int mperf_get_count_freq(unsigned int id, unsigned long long *count, + unsigned int cpu); + +static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { + { + .name = "C0", + .desc = N_("Processor Core not idle"), + .id = C0, + .range = RANGE_THREAD, + .get_count_percent = mperf_get_count_percent, + }, + { + .name = "Cx", + .desc = N_("Processor Core in an idle state"), + .id = Cx, + .range = RANGE_THREAD, + .get_count_percent = mperf_get_count_percent, + }, + + { + .name = "Freq", + .desc = N_("Average Frequency (including boost) in MHz"), + .id = AVG_FREQ, + .range = RANGE_THREAD, + .get_count = mperf_get_count_freq, + }, +}; + +static unsigned long long tsc_at_measure_start; +static unsigned long long tsc_at_measure_end; +static unsigned long max_frequency; +static unsigned long long *mperf_previous_count; +static unsigned long long *aperf_previous_count; +static unsigned long long *mperf_current_count; +static unsigned long long *aperf_current_count; +/* valid flag for all CPUs. If a MSR read failed it will be zero */ +static int *is_valid; + +static int mperf_get_tsc(unsigned long long *tsc) +{ + return read_msr(0, MSR_TSC, tsc); +} + +static int mperf_init_stats(unsigned int cpu) +{ + unsigned long long val; + int ret; + + ret = read_msr(cpu, MSR_APERF, &val); + aperf_previous_count[cpu] = val; + ret |= read_msr(cpu, MSR_MPERF, &val); + mperf_previous_count[cpu] = val; + is_valid[cpu] = !ret; + + return 0; +} + +static int mperf_measure_stats(unsigned int cpu) +{ + unsigned long long val; + int ret; + + ret = read_msr(cpu, MSR_APERF, &val); + aperf_current_count[cpu] = val; + ret |= read_msr(cpu, MSR_MPERF, &val); + mperf_current_count[cpu] = val; + is_valid[cpu] = !ret; + + return 0; +} + +/* + * get_average_perf() + * + * Returns the average performance (also considers boosted frequencies) + * + * Input: + * aperf_diff: Difference of the aperf register over a time period + * mperf_diff: Difference of the mperf register over the same time period + * max_freq: Maximum frequency (P0) + * + * Returns: + * Average performance over the time period + */ +static unsigned long get_average_perf(unsigned long long aperf_diff, + unsigned long long mperf_diff) +{ + unsigned int perf_percent = 0; + if (((unsigned long)(-1) / 100) < aperf_diff) { + int shift_count = 7; + aperf_diff >>= shift_count; + mperf_diff >>= shift_count; + } + perf_percent = (aperf_diff * 100) / mperf_diff; + return (max_frequency * perf_percent) / 100; +} + +static int mperf_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + unsigned long long aperf_diff, mperf_diff, tsc_diff; + + if (!is_valid[cpu]) + return -1; + + if (id != C0 && id != Cx) + return -1; + + mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; + aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; + tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + + *percent = 100.0 * mperf_diff / tsc_diff; + dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n", + mperf_cstates[id].name, mperf_diff, tsc_diff); + + if (id == Cx) + *percent = 100.0 - *percent; + + dprint("%s: previous: %llu - current: %llu - (%u)\n", + mperf_cstates[id].name, mperf_diff, aperf_diff, cpu); + dprint("%s: %f\n", mperf_cstates[id].name, *percent); + return 0; +} + +static int mperf_get_count_freq(unsigned int id, unsigned long long *count, + unsigned int cpu) +{ + unsigned long long aperf_diff, mperf_diff; + + if (id != AVG_FREQ) + return 1; + + if (!is_valid[cpu]) + return -1; + + mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; + aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; + + /* Return MHz for now, might want to return KHz if column width is more + generic */ + *count = get_average_perf(aperf_diff, mperf_diff) / 1000; + dprint("%s: %llu\n", mperf_cstates[id].name, *count); + + return 0; +} + +static int mperf_start(void) +{ + int cpu; + unsigned long long dbg; + + mperf_get_tsc(&tsc_at_measure_start); + + for (cpu = 0; cpu < cpu_count; cpu++) + mperf_init_stats(cpu); + + mperf_get_tsc(&dbg); + dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start); + return 0; +} + +static int mperf_stop(void) +{ + unsigned long long dbg; + int cpu; + + mperf_get_tsc(&tsc_at_measure_end); + + for (cpu = 0; cpu < cpu_count; cpu++) + mperf_measure_stats(cpu); + + mperf_get_tsc(&dbg); + dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); + + return 0; +} + +struct cpuidle_monitor mperf_monitor; + +struct cpuidle_monitor *mperf_register(void) +{ + unsigned long min; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) + return NULL; + + /* Assume min/max all the same on all cores */ + if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) { + dprint("Cannot retrieve max freq from cpufreq kernel " + "subsystem\n"); + return NULL; + } + + /* Free this at program termination */ + is_valid = calloc(cpu_count, sizeof(int)); + mperf_previous_count = calloc(cpu_count, sizeof(unsigned long long)); + aperf_previous_count = calloc(cpu_count, sizeof(unsigned long long)); + mperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); + aperf_current_count = calloc(cpu_count, sizeof(unsigned long long)); + + mperf_monitor.name_len = strlen(mperf_monitor.name); + return &mperf_monitor; +} + +void mperf_unregister(void) +{ + free(mperf_previous_count); + free(aperf_previous_count); + free(mperf_current_count); + free(aperf_current_count); + free(is_valid); +} + +struct cpuidle_monitor mperf_monitor = { + .name = "Mperf", + .hw_states_num = MPERF_CSTATE_COUNT, + .hw_states = mperf_cstates, + .start = mperf_start, + .stop = mperf_stop, + .do_register = mperf_register, + .unregister = mperf_unregister, + .needs_root = 1, + .overflow_s = 922000000 /* 922337203 seconds TSC overflow + at 20GHz */ +}; +#endif /* #if defined(__i386__) || defined(__x86_64__) */ diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c new file mode 100644 index 000000000000..d2a91dd0d563 --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c @@ -0,0 +1,216 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Based on Len Brown's <lenb@kernel.org> turbostat tool. + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define MSR_PKG_C3_RESIDENCY 0x3F8 +#define MSR_PKG_C6_RESIDENCY 0x3F9 +#define MSR_CORE_C3_RESIDENCY 0x3FC +#define MSR_CORE_C6_RESIDENCY 0x3FD + +#define MSR_TSC 0x10 + +#define NHM_CSTATE_COUNT 4 + +enum intel_nhm_id { C3 = 0, C6, PC3, PC6, TSC = 0xFFFF }; + +static int nhm_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); + +static cstate_t nhm_cstates[NHM_CSTATE_COUNT] = { + { + .name = "C3", + .desc = N_("Processor Core C3"), + .id = C3, + .range = RANGE_CORE, + .get_count_percent = nhm_get_count_percent, + }, + { + .name = "C6", + .desc = N_("Processor Core C6"), + .id = C6, + .range = RANGE_CORE, + .get_count_percent = nhm_get_count_percent, + }, + + { + .name = "PC3", + .desc = N_("Processor Package C3"), + .id = PC3, + .range = RANGE_PACKAGE, + .get_count_percent = nhm_get_count_percent, + }, + { + .name = "PC6", + .desc = N_("Processor Package C6"), + .id = PC6, + .range = RANGE_PACKAGE, + .get_count_percent = nhm_get_count_percent, + }, +}; + +static unsigned long long tsc_at_measure_start; +static unsigned long long tsc_at_measure_end; +static unsigned long long *previous_count[NHM_CSTATE_COUNT]; +static unsigned long long *current_count[NHM_CSTATE_COUNT]; +/* valid flag for all CPUs. If a MSR read failed it will be zero */ +static int *is_valid; + +static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val, + unsigned int cpu) +{ + int msr; + + switch (id) { + case C3: + msr = MSR_CORE_C3_RESIDENCY; + break; + case C6: + msr = MSR_CORE_C6_RESIDENCY; + break; + case PC3: + msr = MSR_PKG_C3_RESIDENCY; + break; + case PC6: + msr = MSR_PKG_C6_RESIDENCY; + break; + case TSC: + msr = MSR_TSC; + break; + default: + return -1; + }; + if (read_msr(cpu, msr, val)) + return -1; + + return 0; +} + +static int nhm_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + *percent = 0.0; + + if (!is_valid[cpu]) + return -1; + + *percent = (100.0 * + (current_count[id][cpu] - previous_count[id][cpu])) / + (tsc_at_measure_end - tsc_at_measure_start); + + dprint("%s: previous: %llu - current: %llu - (%u)\n", + nhm_cstates[id].name, previous_count[id][cpu], + current_count[id][cpu], cpu); + + dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n", + nhm_cstates[id].name, + (unsigned long long) tsc_at_measure_end - tsc_at_measure_start, + current_count[id][cpu] - previous_count[id][cpu], + *percent, cpu); + + return 0; +} + +static int nhm_start(void) +{ + int num, cpu; + unsigned long long dbg, val; + + nhm_get_count(TSC, &tsc_at_measure_start, 0); + + for (num = 0; num < NHM_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + is_valid[cpu] = !nhm_get_count(num, &val, cpu); + previous_count[num][cpu] = val; + } + } + nhm_get_count(TSC, &dbg, 0); + dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start); + return 0; +} + +static int nhm_stop(void) +{ + unsigned long long val; + unsigned long long dbg; + int num, cpu; + + nhm_get_count(TSC, &tsc_at_measure_end, 0); + + for (num = 0; num < NHM_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + is_valid[cpu] = !nhm_get_count(num, &val, cpu); + current_count[num][cpu] = val; + } + } + nhm_get_count(TSC, &dbg, 0); + dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); + + return 0; +} + +struct cpuidle_monitor intel_nhm_monitor; + +struct cpuidle_monitor *intel_nhm_register(void) +{ + int num; + + if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL) + return NULL; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC)) + return NULL; + + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) + return NULL; + + /* Free this at program termination */ + is_valid = calloc(cpu_count, sizeof(int)); + for (num = 0; num < NHM_CSTATE_COUNT; num++) { + previous_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + current_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + } + + intel_nhm_monitor.name_len = strlen(intel_nhm_monitor.name); + return &intel_nhm_monitor; +} + +void intel_nhm_unregister(void) +{ + int num; + + for (num = 0; num < NHM_CSTATE_COUNT; num++) { + free(previous_count[num]); + free(current_count[num]); + } + free(is_valid); +} + +struct cpuidle_monitor intel_nhm_monitor = { + .name = "Nehalem", + .hw_states_num = NHM_CSTATE_COUNT, + .hw_states = nhm_cstates, + .start = nhm_start, + .stop = nhm_stop, + .do_register = intel_nhm_register, + .unregister = intel_nhm_unregister, + .needs_root = 1, + .overflow_s = 922000000 /* 922337203 seconds TSC overflow + at 20GHz */ +}; +#endif diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c new file mode 100644 index 000000000000..a1bc07cd53e1 --- /dev/null +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -0,0 +1,190 @@ +/* + * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc. + * + * Licensed under the terms of the GNU GPL License version 2. + * + * Based on Len Brown's <lenb@kernel.org> turbostat tool. + */ + +#if defined(__i386__) || defined(__x86_64__) + +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "helpers/helpers.h" +#include "idle_monitor/cpupower-monitor.h" + +#define MSR_PKG_C2_RESIDENCY 0x60D +#define MSR_PKG_C7_RESIDENCY 0x3FA +#define MSR_CORE_C7_RESIDENCY 0x3FE + +#define MSR_TSC 0x10 + +enum intel_snb_id { C7 = 0, PC2, PC7, SNB_CSTATE_COUNT, TSC = 0xFFFF }; + +static int snb_get_count_percent(unsigned int self_id, double *percent, + unsigned int cpu); + +static cstate_t snb_cstates[SNB_CSTATE_COUNT] = { + { + .name = "C7", + .desc = N_("Processor Core C7"), + .id = C7, + .range = RANGE_CORE, + .get_count_percent = snb_get_count_percent, + }, + { + .name = "PC2", + .desc = N_("Processor Package C2"), + .id = PC2, + .range = RANGE_PACKAGE, + .get_count_percent = snb_get_count_percent, + }, + { + .name = "PC7", + .desc = N_("Processor Package C7"), + .id = PC7, + .range = RANGE_PACKAGE, + .get_count_percent = snb_get_count_percent, + }, +}; + +static unsigned long long tsc_at_measure_start; +static unsigned long long tsc_at_measure_end; +static unsigned long long *previous_count[SNB_CSTATE_COUNT]; +static unsigned long long *current_count[SNB_CSTATE_COUNT]; +/* valid flag for all CPUs. If a MSR read failed it will be zero */ +static int *is_valid; + +static int snb_get_count(enum intel_snb_id id, unsigned long long *val, + unsigned int cpu) +{ + int msr; + + switch (id) { + case C7: + msr = MSR_CORE_C7_RESIDENCY; + break; + case PC2: + msr = MSR_PKG_C2_RESIDENCY; + break; + case PC7: + msr = MSR_PKG_C7_RESIDENCY; + break; + case TSC: + msr = MSR_TSC; + break; + default: + return -1; + }; + if (read_msr(cpu, msr, val)) + return -1; + return 0; +} + +static int snb_get_count_percent(unsigned int id, double *percent, + unsigned int cpu) +{ + *percent = 0.0; + + if (!is_valid[cpu]) + return -1; + + *percent = (100.0 * + (current_count[id][cpu] - previous_count[id][cpu])) / + (tsc_at_measure_end - tsc_at_measure_start); + + dprint("%s: previous: %llu - current: %llu - (%u)\n", + snb_cstates[id].name, previous_count[id][cpu], + current_count[id][cpu], cpu); + + dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n", + snb_cstates[id].name, + (unsigned long long) tsc_at_measure_end - tsc_at_measure_start, + current_count[id][cpu] - previous_count[id][cpu], + *percent, cpu); + + return 0; +} + +static int snb_start(void) +{ + int num, cpu; + unsigned long long val; + + for (num = 0; num < SNB_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + snb_get_count(num, &val, cpu); + previous_count[num][cpu] = val; + } + } + snb_get_count(TSC, &tsc_at_measure_start, 0); + return 0; +} + +static int snb_stop(void) +{ + unsigned long long val; + int num, cpu; + + snb_get_count(TSC, &tsc_at_measure_end, 0); + + for (num = 0; num < SNB_CSTATE_COUNT; num++) { + for (cpu = 0; cpu < cpu_count; cpu++) { + is_valid[cpu] = !snb_get_count(num, &val, cpu); + current_count[num][cpu] = val; + } + } + return 0; +} + +struct cpuidle_monitor intel_snb_monitor; + +static struct cpuidle_monitor *snb_register(void) +{ + int num; + + if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL + || cpupower_cpu_info.family != 6) + return NULL; + + if (cpupower_cpu_info.model != 0x2A + && cpupower_cpu_info.model != 0x2D) + return NULL; + + is_valid = calloc(cpu_count, sizeof(int)); + for (num = 0; num < SNB_CSTATE_COUNT; num++) { + previous_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + current_count[num] = calloc(cpu_count, + sizeof(unsigned long long)); + } + intel_snb_monitor.name_len = strlen(intel_snb_monitor.name); + return &intel_snb_monitor; +} + +void snb_unregister(void) +{ + int num; + free(is_valid); + for (num = 0; num < SNB_CSTATE_COUNT; num++) { + free(previous_count[num]); + free(current_count[num]); + } +} + +struct cpuidle_monitor intel_snb_monitor = { + .name = "SandyBridge", + .hw_states = snb_cstates, + .hw_states_num = SNB_CSTATE_COUNT, + .start = snb_start, + .stop = snb_stop, + .do_register = snb_register, + .unregister = snb_unregister, + .needs_root = 1, + .overflow_s = 922000000 /* 922337203 seconds TSC overflow + at 20GHz */ +}; +#endif /* defined(__i386__) || defined(__x86_64__) */ |