From d7cddbb07b18cc45e57420005850ca0326b9e074 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 7 Apr 2014 15:16:57 +0200 Subject: [PATCH 01/11] ACPI / tools: Introduce ec_access.c - tool to access the EC This userspace tool accesses the EC through the ec_sys debug driver (through /sys/kernel/debug/ec/ec0/io). The EC command/data registers cannot be accessed directly, because they may be manipulated by the AML interpreter in parallel. The ec_sys driver synchronizes user space (debug) access with the AML interpreter. Signed-off-by: Thomas Renninger [rjw: Changelog] Signed-off-by: Rafael J. Wysocki --- tools/power/acpi/Makefile | 2 + tools/power/acpi/tools/ec/Makefile | 22 +++ tools/power/acpi/tools/ec/ec_access.c | 238 ++++++++++++++++++++++++++ 3 files changed, 262 insertions(+) create mode 100644 tools/power/acpi/tools/ec/Makefile create mode 100644 tools/power/acpi/tools/ec/ec_access.c diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile index c2c0f20067a5..c225d6d9fb29 100644 --- a/tools/power/acpi/Makefile +++ b/tools/power/acpi/Makefile @@ -19,6 +19,8 @@ OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif +SUBDIRS = tools/ec + # --- CONFIGURATION BEGIN --- # Set the following to `true' to make a unstripped, unoptimized diff --git a/tools/power/acpi/tools/ec/Makefile b/tools/power/acpi/tools/ec/Makefile new file mode 100644 index 000000000000..b7b0b929bd32 --- /dev/null +++ b/tools/power/acpi/tools/ec/Makefile @@ -0,0 +1,22 @@ +ec_access: ec_access.o + $(ECHO) " LD " $@ + $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $< -o $@ + $(QUIET) $(STRIPCMD) $@ + +%.o: %.c + $(ECHO) " CC " $@ + $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< + +all: ec_access + +install: + $(INSTALL) -d $(DESTDIR)${sbindir} + $(INSTALL_PROGRAM) ec_access $(DESTDIR)${sbindir} + +uninstall: + - rm -f $(DESTDIR)${sbindir}/ec_access + +clean: + -rm -f $(OUTPUT)ec_access + +.PHONY: all install uninstall diff --git a/tools/power/acpi/tools/ec/ec_access.c b/tools/power/acpi/tools/ec/ec_access.c new file mode 100644 index 000000000000..6b8aaed44f2c --- /dev/null +++ b/tools/power/acpi/tools/ec/ec_access.c @@ -0,0 +1,238 @@ +/* + * ec_access.c + * + * Copyright (C) 2010 SUSE Linux Products GmbH + * Author: + * Thomas Renninger + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define EC_SPACE_SIZE 256 +#define SYSFS_PATH "/sys/kernel/debug/ec/ec0/io" + +/* TBD/Enhancements: + - Provide param for accessing different ECs (not supported by kernel yet) +*/ + +static int read_mode = -1; +static int sleep_time; +static int write_byte_offset = -1; +static int read_byte_offset = -1; +static uint8_t write_value = -1; + +void usage(char progname[], int exit_status) +{ + printf("Usage:\n"); + printf("1) %s -r [-s sleep]\n", basename(progname)); + printf("2) %s -b byte_offset\n", basename(progname)); + printf("3) %s -w byte_offset -v value\n\n", basename(progname)); + + puts("\t-r [-s sleep] : Dump EC registers"); + puts("\t If sleep is given, sleep x seconds,"); + puts("\t re-read EC registers and show changes"); + puts("\t-b offset : Read value at byte_offset (in hex)"); + puts("\t-w offset -v value : Write value at byte_offset"); + puts("\t-h : Print this help\n\n"); + puts("Offsets and values are in hexadecimal number sytem."); + puts("The offset and value must be between 0 and 0xff."); + exit(exit_status); +} + +void parse_opts(int argc, char *argv[]) +{ + int c; + + while ((c = getopt(argc, argv, "rs:b:w:v:h")) != -1) { + + switch (c) { + case 'r': + if (read_mode != -1) + usage(argv[0], EXIT_FAILURE); + read_mode = 1; + break; + case 's': + if (read_mode != -1 && read_mode != 1) + usage(argv[0], EXIT_FAILURE); + + sleep_time = atoi(optarg); + if (sleep_time <= 0) { + sleep_time = 0; + usage(argv[0], EXIT_FAILURE); + printf("Bad sleep time: %s\n", optarg); + } + break; + case 'b': + if (read_mode != -1) + usage(argv[0], EXIT_FAILURE); + read_mode = 1; + read_byte_offset = strtoul(optarg, NULL, 16); + break; + case 'w': + if (read_mode != -1) + usage(argv[0], EXIT_FAILURE); + read_mode = 0; + write_byte_offset = strtoul(optarg, NULL, 16); + break; + case 'v': + write_value = strtoul(optarg, NULL, 16); + break; + case 'h': + usage(argv[0], EXIT_SUCCESS); + default: + fprintf(stderr, "Unknown option!\n"); + usage(argv[0], EXIT_FAILURE); + } + } + if (read_mode == 0) { + if (write_byte_offset < 0 || + write_byte_offset >= EC_SPACE_SIZE) { + fprintf(stderr, "Wrong byte offset 0x%.2x, valid: " + "[0-0x%.2x]\n", + write_byte_offset, EC_SPACE_SIZE - 1); + usage(argv[0], EXIT_FAILURE); + } + if (write_value < 0 || + write_value >= 255) { + fprintf(stderr, "Wrong byte offset 0x%.2x, valid:" + "[0-0xff]\n", write_byte_offset); + usage(argv[0], EXIT_FAILURE); + } + } + if (read_mode == 1 && read_byte_offset != -1) { + if (read_byte_offset < -1 || + read_byte_offset >= EC_SPACE_SIZE) { + fprintf(stderr, "Wrong byte offset 0x%.2x, valid: " + "[0-0x%.2x]\n", + read_byte_offset, EC_SPACE_SIZE - 1); + usage(argv[0], EXIT_FAILURE); + } + } + /* Add additional parameter checks here */ +} + +void dump_ec(int fd) +{ + char buf[EC_SPACE_SIZE]; + char buf2[EC_SPACE_SIZE]; + int byte_off, bytes_read; + + bytes_read = read(fd, buf, EC_SPACE_SIZE); + + if (bytes_read == -1) + err(EXIT_FAILURE, "Could not read from %s\n", SYSFS_PATH); + + if (bytes_read != EC_SPACE_SIZE) + fprintf(stderr, "Could only read %d bytes\n", bytes_read); + + printf(" 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F"); + for (byte_off = 0; byte_off < bytes_read; byte_off++) { + if ((byte_off % 16) == 0) + printf("\n%.2X: ", byte_off); + printf(" %.2x ", (uint8_t)buf[byte_off]); + } + printf("\n"); + + if (!sleep_time) + return; + + printf("\n"); + lseek(fd, 0, SEEK_SET); + sleep(sleep_time); + + bytes_read = read(fd, buf2, EC_SPACE_SIZE); + + if (bytes_read == -1) + err(EXIT_FAILURE, "Could not read from %s\n", SYSFS_PATH); + + if (bytes_read != EC_SPACE_SIZE) + fprintf(stderr, "Could only read %d bytes\n", bytes_read); + + printf(" 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F"); + for (byte_off = 0; byte_off < bytes_read; byte_off++) { + if ((byte_off % 16) == 0) + printf("\n%.2X: ", byte_off); + + if (buf[byte_off] == buf2[byte_off]) + printf(" %.2x ", (uint8_t)buf2[byte_off]); + else + printf("*%.2x ", (uint8_t)buf2[byte_off]); + } + printf("\n"); +} + +void read_ec_val(int fd, int byte_offset) +{ + uint8_t buf; + int error; + + error = lseek(fd, byte_offset, SEEK_SET); + if (error != byte_offset) + err(EXIT_FAILURE, "Cannot set offset to 0x%.2x", byte_offset); + + error = read(fd, &buf, 1); + if (error != 1) + err(EXIT_FAILURE, "Could not read byte 0x%.2x from %s\n", + byte_offset, SYSFS_PATH); + printf("0x%.2x\n", buf); + return; +} + +void write_ec_val(int fd, int byte_offset, uint8_t value) +{ + int error; + + error = lseek(fd, byte_offset, SEEK_SET); + if (error != byte_offset) + err(EXIT_FAILURE, "Cannot set offset to 0x%.2x", byte_offset); + + error = write(fd, &value, 1); + if (error != 1) + err(EXIT_FAILURE, "Cannot write value 0x%.2x to offset 0x%.2x", + value, byte_offset); +} + +int main(int argc, char *argv[]) +{ + int file_mode = O_RDONLY; + int fd; + + parse_opts(argc, argv); + + if (read_mode == 0) + file_mode = O_WRONLY; + else if (read_mode == 1) + file_mode = O_RDONLY; + else + usage(argv[0], EXIT_FAILURE); + + fd = open(SYSFS_PATH, file_mode); + if (fd == -1) + err(EXIT_FAILURE, "%s", SYSFS_PATH); + + if (read_mode) + if (read_byte_offset == -1) + dump_ec(fd); + else if (read_byte_offset < 0 || + read_byte_offset >= EC_SPACE_SIZE) + usage(argv[0], EXIT_FAILURE); + else + read_ec_val(fd, read_byte_offset); + else + write_ec_val(fd, write_byte_offset, write_value); + close(fd); + + exit(EXIT_SUCCESS); +} From 3482124a6a22c631df23958df497f000ba0e1667 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 1 May 2014 11:40:19 +0200 Subject: [PATCH 02/11] tools / power: turbostat: Drop temperature checks The Intel 64 and IA-32 Architectures Software Developer's Manual says that TjMax is stored in bits 23:16 of MSR_TEMPERATURE TARGET (0x1a2). That's 8 bits, not 7, so it must be masked with 0xFF rather than 0x7F. The manual has no mention of which values should be considered valid, which kind of implies that they all are. Arbitrarily discarding values outside a specific range is wrong. The upper range check had to be fixed recently (commit 144b44b1) and the lower range check is just as wrong. See bug #75071: https://bugzilla.kernel.org/show_bug.cgi?id=75071 There are many Xeon processor series with TjMax of 70, 71 or 80 degrees Celsius, way below the arbitrary 85 degrees Celsius limit. There may be other (past or future) models with even lower limits. So drop this arbitrary check. The only value that would be clearly invalid is 0. Everything else should be accepted. After these changes, turbostat is aligned with what the coretemp driver does. Signed-off-by: Jean Delvare Cc: Len Brown Acked-by: Guenter Roeck Reviewed-by: Josh Triplett Signed-off-by: Rafael J. Wysocki --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7c9d8e71eb9e..d0396af99fa0 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1971,13 +1971,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) goto guess; - target_c_local = (msr >> 16) & 0x7F; + target_c_local = (msr >> 16) & 0xFF; if (verbose) fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, target_c_local); - if (target_c_local < 85 || target_c_local > 127) + if (!target_c_local) goto guess; tcc_activation_temp = target_c_local; From e091abc7f92b45010992df1ceb5da023d8faf13b Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 24 Apr 2014 10:32:07 -0400 Subject: [PATCH 03/11] PM / tools: cpupower: add option to display values without round offs The command "cpupower frequency-info" can be used when using cpupower to monitor and test processor behaviour to determine if the processor is behaving as expected. This data can be compared to the output of /proc/cpuinfo or the output of /sys/devices/system/cpu/cpuX/cpufreq/scaling_available_frequencies to determine if the cpu is in an expected state. When doing this I noticed comparison test failures due to the way the data is displayed in cpupower. For example, [root@intel-s3e37-02 cpupower]# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies 2262000 2261000 2128000 1995000 1862000 1729000 1596000 1463000 1330000 1197000 1064000 compared to [root@intel-s3e37-02 cpupower]# cpupower frequency-info analyzing CPU 0: driver: acpi-cpufreq CPUs which run at the same hardware frequency: 0 CPUs which need to have their frequency coordinated by software: 0 maximum transition latency: 10.0 us. hardware limits: 1.06 GHz - 2.26 GHz available frequency steps: 2.26 GHz, 2.26 GHz, 2.13 GHz, 2.00 GHz, 1.86 GHz, 1.73 GHz, 1.60 GHz, 1.46 GHz, 1.33 GHz, 1.20 GHz, 1.06 GHz available cpufreq governors: conservative, userspace, powersave, ondemand, performance current policy: frequency should be within 1.06 GHz and 2.26 GHz. The governor "performance" may decide which speed to use within this range. current CPU frequency is 2.26 GHz (asserted by call to hardware). boost state support: Supported: yes Active: yes shows very different values for the available frequency steps. The cpupower output rounds off values at 2 decimal points and this causes problems with test scripts. For example, with the data above, 1.064 is 1.06 1.197 is 1.20 1.596 is 1.60 1.995 is 2.00 2.128 is 2.13 and most confusingly, 2.261 is 2.26 2.262 is 2.26 Truncating these values serves no real purpose other than making the output pretty. Since the default has been to round off these values I am adding a -n/--no-rounding option to the cpupower utility that will display the data without rounding off the still significant digits. After patch, analyzing CPU 0: driver: acpi-cpufreq CPUs which run at the same hardware frequency: 0 CPUs which need to have their frequency coordinated by software: 0 maximum transition latency: 10.000 us. hardware limits: 1.064000 GHz - 2.262000 GHz available frequency steps: 2.262000 GHz, 2.261000 GHz, 2.128000 GHz, 1.995000 GHz, 1.862000 GHz, 1.729000 GHz, 1.596000 GHz, 1.463000 GHz, 1.330000 GHz, 1.197000 GHz, 1.064000 GHz available cpufreq governors: conservative, userspace, powersave, ondemand, performance current policy: frequency should be within 1.064000 GHz and 2.262000 GHz. The governor "performance" may decide which speed to use within this range. current CPU frequency is 2.262000 GHz (asserted by call to hardware). boost state support: Supported: yes Active: yes Acked-by: Thomas Renninger Signed-off-by: Prarit Bhargava [rjw: Subject] Signed-off-by: Rafael J. Wysocki --- .../cpupower/man/cpupower-frequency-info.1 | 3 + tools/power/cpupower/utils/cpufreq-info.c | 110 +++++++++++------- 2 files changed, 73 insertions(+), 40 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1 index 4a1918ea8f9c..9c85a382e355 100644 --- a/tools/power/cpupower/man/cpupower-frequency-info.1 +++ b/tools/power/cpupower/man/cpupower-frequency-info.1 @@ -50,6 +50,9 @@ Prints out information like provided by the /proc/cpufreq interface in 2.4. and \fB\-m\fR \fB\-\-human\fR human\-readable output for the \-f, \-w, \-s and \-y parameters. .TP +\fB\-n\fR \fB\-\-no-rounding\fR +Output frequencies and latencies without rounding off values. +.TP .SH "REMARKS" .LP By default only values of core zero are displayed. How to display settings of diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 28953c9a7bd5..b4b90a97662c 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -82,29 +82,42 @@ static void proc_cpufreq_output(void) } } +static int no_rounding; static void print_speed(unsigned long speed) { unsigned long tmp; - if (speed > 1000000) { - tmp = speed % 10000; - if (tmp >= 5000) - speed += 10000; - printf("%u.%02u GHz", ((unsigned int) speed/1000000), - ((unsigned int) (speed%1000000)/10000)); - } else if (speed > 100000) { - tmp = speed % 1000; - if (tmp >= 500) - speed += 1000; - printf("%u MHz", ((unsigned int) speed / 1000)); - } else if (speed > 1000) { - tmp = speed % 100; - if (tmp >= 50) - speed += 100; - printf("%u.%01u MHz", ((unsigned int) speed/1000), - ((unsigned int) (speed%1000)/100)); - } else - printf("%lu kHz", speed); + if (no_rounding) { + if (speed > 1000000) + printf("%u.%06u GHz", ((unsigned int) speed/1000000), + ((unsigned int) speed%1000000)); + else if (speed > 100000) + printf("%u MHz", (unsigned int) speed); + else if (speed > 1000) + printf("%u.%03u MHz", ((unsigned int) speed/1000), + (unsigned int) (speed%1000)); + else + printf("%lu kHz", speed); + } else { + if (speed > 1000000) { + tmp = speed%10000; + if (tmp >= 5000) + speed += 10000; + printf("%u.%02u GHz", ((unsigned int) speed/1000000), + ((unsigned int) (speed%1000000)/10000)); + } else if (speed > 100000) { + tmp = speed%1000; + if (tmp >= 500) + speed += 1000; + printf("%u MHz", ((unsigned int) speed/1000)); + } else if (speed > 1000) { + tmp = speed%100; + if (tmp >= 50) + speed += 100; + printf("%u.%01u MHz", ((unsigned int) speed/1000), + ((unsigned int) (speed%1000)/100)); + } + } return; } @@ -113,26 +126,38 @@ static void print_duration(unsigned long duration) { unsigned long tmp; - if (duration > 1000000) { - tmp = duration % 10000; - if (tmp >= 5000) - duration += 10000; - printf("%u.%02u ms", ((unsigned int) duration/1000000), - ((unsigned int) (duration%1000000)/10000)); - } else if (duration > 100000) { - tmp = duration % 1000; - if (tmp >= 500) - duration += 1000; - printf("%u us", ((unsigned int) duration / 1000)); - } else if (duration > 1000) { - tmp = duration % 100; - if (tmp >= 50) - duration += 100; - printf("%u.%01u us", ((unsigned int) duration/1000), - ((unsigned int) (duration%1000)/100)); - } else - printf("%lu ns", duration); - + if (no_rounding) { + if (duration > 1000000) + printf("%u.%06u ms", ((unsigned int) duration/1000000), + ((unsigned int) duration%1000000)); + else if (duration > 100000) + printf("%u us", ((unsigned int) duration/1000)); + else if (duration > 1000) + printf("%u.%03u us", ((unsigned int) duration/1000), + ((unsigned int) duration%1000)); + else + printf("%lu ns", duration); + } else { + if (duration > 1000000) { + tmp = duration%10000; + if (tmp >= 5000) + duration += 10000; + printf("%u.%02u ms", ((unsigned int) duration/1000000), + ((unsigned int) (duration%1000000)/10000)); + } else if (duration > 100000) { + tmp = duration%1000; + if (tmp >= 500) + duration += 1000; + printf("%u us", ((unsigned int) duration / 1000)); + } else if (duration > 1000) { + tmp = duration%100; + if (tmp >= 50) + duration += 100; + printf("%u.%01u us", ((unsigned int) duration/1000), + ((unsigned int) (duration%1000)/100)); + } else + printf("%lu ns", duration); + } return; } @@ -525,6 +550,7 @@ static struct option info_opts[] = { { .name = "latency", .has_arg = no_argument, .flag = NULL, .val = 'y'}, { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, { .name = "human", .has_arg = no_argument, .flag = NULL, .val = 'm'}, + { .name = "no-rounding", .has_arg = no_argument, .flag = NULL, .val = 'n'}, { }, }; @@ -538,7 +564,8 @@ int cmd_freq_info(int argc, char **argv) int output_param = 0; do { - ret = getopt_long(argc, argv, "oefwldpgrasmyb", info_opts, NULL); + ret = getopt_long(argc, argv, "oefwldpgrasmybn", info_opts, + NULL); switch (ret) { case '?': output_param = '?'; @@ -575,6 +602,9 @@ int cmd_freq_info(int argc, char **argv) } human = 1; break; + case 'n': + no_rounding = 1; + break; default: fprintf(stderr, "invalid or unknown argument\n"); return EXIT_FAILURE; From a504c028c96b738d1579b0bfe73782f80d8696f6 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 13 May 2014 12:41:38 +0200 Subject: [PATCH 04/11] cpupower: Rename cpufrequtils -> cpupower, and libcpufreq -> libcpupower. Signed-off-by: Ramkumar Ramachandra Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/README | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README index fd9d4c0d6688..96ff1ff9ae97 100644 --- a/tools/power/cpupower/README +++ b/tools/power/cpupower/README @@ -1,4 +1,4 @@ -The cpufrequtils package (homepage: +The cpupower package (homepage: http://www.kernel.org/pub/linux/utils/kernel/cpufreq/cpufrequtils.html ) consists of the following elements: @@ -11,10 +11,10 @@ providing cpuid.h is needed. For both it's not explicitly checked for (yet). -libcpufreq +libcpupower ---------- -"libcpufreq" is a library which offers a unified access method for userspace +"libcpupower" is a library which offers a unified access method for userspace tools and programs to the cpufreq core and drivers in the Linux kernel. This allows for code reduction in userspace tools, a clean implementation of the interaction to the cpufreq core, and support for both the sysfs and proc @@ -28,7 +28,7 @@ make su make install -should suffice on most systems. It builds default libcpufreq, +should suffice on most systems. It builds default libcpupower, cpufreq-set and cpufreq-info files and installs them in /usr/lib and /usr/bin, respectively. If you want to set up the paths differently and/or want to configure the package to your specific needs, you need to open @@ -39,11 +39,11 @@ CONFIGURATION. THANKS ------ Many thanks to Mattia Dongili who wrote the autotoolization and -libtoolization, the manpages and the italian language file for cpufrequtils; +libtoolization, the manpages and the italian language file for cpupower; to Dave Jones for his feedback and his dump_psb tool; to Bruno Ducrot for his powernow-k8-decode and intel_gsic tools as well as the french language file; and to various others commenting on the previous (pre-)releases of -cpufrequtils. +cpupower. Dominik Brodowski From db262ea4152a45bb35dd4e87e13bb234e0543a77 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 13 May 2014 12:41:39 +0200 Subject: [PATCH 05/11] cpupower: Remove dead link to homepage, and update the targets built. Signed-off-by: Ramkumar Ramachandra Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/README | 16 +++++++--------- tools/power/cpupower/ToDo | 1 - 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README index 96ff1ff9ae97..1c68f47663b2 100644 --- a/tools/power/cpupower/README +++ b/tools/power/cpupower/README @@ -1,6 +1,4 @@ -The cpupower package (homepage: -http://www.kernel.org/pub/linux/utils/kernel/cpufreq/cpufrequtils.html ) -consists of the following elements: +The cpupower package consists of the following elements: requirements ------------ @@ -28,12 +26,12 @@ make su make install -should suffice on most systems. It builds default libcpupower, -cpufreq-set and cpufreq-info files and installs them in /usr/lib and -/usr/bin, respectively. If you want to set up the paths differently and/or -want to configure the package to your specific needs, you need to open -"Makefile" with an editor of your choice and edit the block marked -CONFIGURATION. +should suffice on most systems. It builds libcpupower to put in +/usr/lib; cpupower, cpufreq-bench_plot.sh to put in /usr/bin; and +cpufreq-bench to put in /usr/sbin. If you want to set up the paths +differently and/or want to configure the package to your specific +needs, you need to open "Makefile" with an editor of your choice and +edit the block marked CONFIGURATION. THANKS diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/ToDo index 874b78b586ee..6e8b89f282e6 100644 --- a/tools/power/cpupower/ToDo +++ b/tools/power/cpupower/ToDo @@ -3,7 +3,6 @@ ToDos sorted by priority: - Use bitmask functions to parse CPU topology more robust (current implementation has issues on AMD) - Try to read out boost states and frequencies on Intel -- Adjust README - Somewhere saw the ability to read power consumption of RAM from HW on Intel SandyBridge -> another monitor? - Add another c1e debug idle monitor From 706f4c1100e7131f2e8aa9552daba52fbf6b97cb Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 13 May 2014 12:41:40 +0200 Subject: [PATCH 06/11] cpupower: Remove all manpages on make uninstall Signed-off-by: Ramkumar Ramachandra Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index cbfec92af327..dca6b4383d39 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -295,8 +295,12 @@ uninstall: - rm -f $(DESTDIR)${libdir}/libcpupower.* - rm -f $(DESTDIR)${includedir}/cpufreq.h - rm -f $(DESTDIR)${bindir}/utils/cpupower - - rm -f $(DESTDIR)${mandir}/man1/cpufreq-set.1 - - rm -f $(DESTDIR)${mandir}/man1/cpufreq-info.1 + - rm -f $(DESTDIR)${mandir}/man1/cpupower.1 + - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 + - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1 + - rm -f $(DESTDIR)${mandir}/man1/cpupower-set.1 + - rm -f $(DESTDIR)${mandir}/man1/cpupower-info.1 + - rm -f $(DESTDIR)${mandir}/man1/cpupower-monitor.1 - for HLANG in $(LANGUAGES); do \ rm -f $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \ done; From 69cd502dd8432dcca24026efdd04192ec0e0c54e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 13 May 2014 12:41:41 +0200 Subject: [PATCH 07/11] cpupower: Introduce idle state disable-by-latency and enable-all Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/man/cpupower-idle-set.1 | 10 ++- tools/power/cpupower/utils/cpuidle-set.c | 75 ++++++++++++++++++-- 2 files changed, 77 insertions(+), 8 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 6b1607272a5b..3e6799d7a79f 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -13,11 +13,17 @@ sleep states. This can be handy for power vs performance tuning. .SH "OPTIONS" .LP .TP -\fB\-d\fR \fB\-\-disable\fR +\fB\-d\fR \fB\-\-disable\fR Disable a specific processor sleep state. .TP -\fB\-e\fR \fB\-\-enable\fR +\fB\-e\fR \fB\-\-enable\fR Enable a specific processor sleep state. +.TP +\fB\-D\fR \fB\-\-disable-by-latency\fR +Disable all idle states with a equal or higher latency than +.TP +\fB\-E\fR \fB\-\-enable-all\fR +Enable all idle states if not enabled already. .SH "REMARKS" .LP diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index c78141c5dfac..d45d8d775c02 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -13,8 +13,14 @@ #include "helpers/sysfs.h" static struct option info_opts[] = { - { .name = "disable", .has_arg = required_argument, .flag = NULL, .val = 'd'}, - { .name = "enable", .has_arg = required_argument, .flag = NULL, .val = 'e'}, + { .name = "disable", + .has_arg = required_argument, .flag = NULL, .val = 'd'}, + { .name = "enable", + .has_arg = required_argument, .flag = NULL, .val = 'e'}, + { .name = "disable-by-latency", + .has_arg = required_argument, .flag = NULL, .val = 'D'}, + { .name = "enable-all", + .has_arg = no_argument, .flag = NULL, .val = 'E'}, { }, }; @@ -23,11 +29,13 @@ int cmd_idle_set(int argc, char **argv) { extern char *optarg; extern int optind, opterr, optopt; - int ret = 0, cont = 1, param = 0, idlestate = 0; - unsigned int cpu = 0; + int ret = 0, cont = 1, param = 0, disabled; + unsigned long long latency = 0, state_latency; + unsigned int cpu = 0, idlestate = 0, idlestates = 0; + char *endptr; do { - ret = getopt_long(argc, argv, "d:e:", info_opts, NULL); + ret = getopt_long(argc, argv, "d:e:ED:", info_opts, NULL); if (ret == -1) break; switch (ret) { @@ -53,6 +61,27 @@ int cmd_idle_set(int argc, char **argv) param = ret; idlestate = atoi(optarg); break; + case 'D': + if (param) { + param = -1; + cont = 0; + break; + } + param = ret; + latency = strtoull(optarg, &endptr, 10); + if (*endptr != '\0') { + printf(_("Bad latency value: %s\n"), optarg); + exit(EXIT_FAILURE); + } + break; + case 'E': + if (param) { + param = -1; + cont = 0; + break; + } + param = ret; + break; case -1: cont = 0; break; @@ -79,8 +108,14 @@ int cmd_idle_set(int argc, char **argv) if (!bitmask_isbitset(cpus_chosen, cpu)) continue; - switch (param) { + if (sysfs_is_cpu_online(cpu) != 1) + continue; + idlestates = sysfs_get_idlestate_count(cpu); + if (idlestates <= 0) + continue; + + switch (param) { case 'd': ret = sysfs_idlestate_disable(cpu, idlestate, 1); if (ret == 0) @@ -107,6 +142,34 @@ int cmd_idle_set(int argc, char **argv) printf(_("Idlestate %u not enabled on CPU %u\n"), idlestate, cpu); break; + case 'D': + for (idlestate = 0; idlestate < idlestates; idlestate++) { + disabled = sysfs_is_idlestate_disabled + (cpu, idlestate); + state_latency = sysfs_get_idlestate_latency + (cpu, idlestate); + printf("CPU: %u - idlestate %u - state_latency: %llu - latency: %llu\n", + cpu, idlestate, state_latency, latency); + if (disabled == 1 || latency > state_latency) + continue; + ret = sysfs_idlestate_disable + (cpu, idlestate, 1); + if (ret == 0) + printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); + } + break; + case 'E': + for (idlestate = 0; idlestate < idlestates; idlestate++) { + disabled = sysfs_is_idlestate_disabled + (cpu, idlestate); + if (disabled == 1) { + ret = sysfs_idlestate_disable + (cpu, idlestate, 0); + if (ret == 0) + printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); + } + } + break; default: /* Not reachable with proper args checking */ printf(_("Invalid or unknown argument\n")); From 84baab91772b5aeec9040cb33de6a46738b1aba8 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 13 May 2014 12:41:42 +0200 Subject: [PATCH 08/11] cpupower: Install recently added cpupower-idle-{set, info} manpages Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index dca6b4383d39..78ce109aa044 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -274,6 +274,8 @@ install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 $(INSTALL_DATA) -D man/cpupower-frequency-set.1 $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 $(INSTALL_DATA) -D man/cpupower-frequency-info.1 $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1 + $(INSTALL_DATA) -D man/cpupower-idle-set.1 $(DESTDIR)${mandir}/man1/cpupower-idle-set.1 + $(INSTALL_DATA) -D man/cpupower-idle-info.1 $(DESTDIR)${mandir}/man1/cpupower-idle-info.1 $(INSTALL_DATA) -D man/cpupower-set.1 $(DESTDIR)${mandir}/man1/cpupower-set.1 $(INSTALL_DATA) -D man/cpupower-info.1 $(DESTDIR)${mandir}/man1/cpupower-info.1 $(INSTALL_DATA) -D man/cpupower-monitor.1 $(DESTDIR)${mandir}/man1/cpupower-monitor.1 From 8a19cb586708361058e089b7c23b6f3eb33af6c6 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 13 May 2014 12:41:43 +0200 Subject: [PATCH 09/11] cpupower: If root, try to load msr driver on x86 if /dev/cpu/0/msr is not available Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/utils/cpupower.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 7efc570ffbaa..7cdcf88659c7 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include #include "builtin.h" #include "helpers/helpers.h" @@ -169,6 +172,8 @@ int main(int argc, const char *argv[]) { const char *cmd; unsigned int i, ret; + struct stat statbuf; + struct utsname uts; cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); @@ -195,6 +200,15 @@ int main(int argc, const char *argv[]) get_cpu_info(0, &cpupower_cpu_info); run_as_root = !getuid(); + if (run_as_root) { + ret = uname(&uts); + if (!ret && !strcmp(uts.machine, "x86_64") && + stat("/dev/cpu/0/msr", &statbuf) != 0) { + if (system("modprobe msr") == -1) + fprintf(stderr, _("MSR access not available.\n")); + } + } + for (i = 0; i < ARRAY_SIZE(commands); i++) { struct cmd_struct *p = commands + i; From 3fc5a0e51aef4503b6a06ef35409370eed568684 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 13 May 2014 12:41:44 +0200 Subject: [PATCH 10/11] cpupower: cpupower info -b should return 0 on success, not the perf bias value Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/utils/cpupower-info.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 3f68632c28c7..0ac25772bf63 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -125,11 +125,12 @@ int cmd_info(int argc, char **argv) if (params.perf_bias) { ret = msr_intel_get_perf_bias(cpu); if (ret < 0) { - printf(_("Could not read perf-bias value\n")); - break; + fprintf(stderr, + _("Could not read perf-bias value[%d]\n"), ret); + exit(EXIT_FAILURE); } else printf(_("perf-bias: %d\n"), ret); } } - return ret; + return 0; } From 7ea1bdb8e162ef7b90eef2450e9a2eaefeb58d61 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 13 May 2014 12:41:45 +0200 Subject: [PATCH 11/11] cpupower: Remove mc and smt power aware scheduler info/settings These kernel interfaces got removed by: commit 8e7fbcbc22c12414bcc9dfdd683637f58fb32759 Author: Peter Zijlstra Date: Mon Jan 9 11:28:35 2012 +0100 sched: Remove stale power aware scheduling remnants and dysfunctional knobs No need to further keep them as userspace configurations. Signed-off-by: Thomas Renninger Signed-off-by: Rafael J. Wysocki --- tools/power/cpupower/man/cpupower-info.1 | 2 +- tools/power/cpupower/man/cpupower-set.1 | 31 +--------------- tools/power/cpupower/utils/cpupower-info.c | 35 +----------------- tools/power/cpupower/utils/cpupower-set.c | 43 +--------------------- 4 files changed, 5 insertions(+), 106 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-info.1 b/tools/power/cpupower/man/cpupower-info.1 index 58e21196f17f..340bcd0be7de 100644 --- a/tools/power/cpupower/man/cpupower-info.1 +++ b/tools/power/cpupower/man/cpupower-info.1 @@ -3,7 +3,7 @@ cpupower\-info \- Shows processor power related kernel or hardware configurations .SH SYNOPSIS .ft B -.B cpupower info [ \-b ] [ \-s ] [ \-m ] +.B cpupower info [ \-b ] .SH DESCRIPTION \fBcpupower info \fP shows kernel configurations or processor hardware diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index 9dbd536518ab..2bcc696f4496 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 @@ -3,7 +3,7 @@ cpupower\-set \- Set processor power related kernel or hardware configurations .SH SYNOPSIS .ft B -.B cpupower set [ \-b VAL ] [ \-s VAL ] [ \-m VAL ] +.B cpupower set [ \-b VAL ] .SH DESCRIPTION @@ -55,35 +55,6 @@ Use \fBcpupower -c all info -b\fP to verify. This options needs the msr kernel driver (CONFIG_X86_MSR) loaded. .RE -.PP -\-\-sched\-mc, \-m [ VAL ] -.RE -\-\-sched\-smt, \-s [ VAL ] -.RS 4 -\-\-sched\-mc utilizes cores in one processor package/socket first before -processes are scheduled to other processor packages/sockets. - -\-\-sched\-smt utilizes thread siblings of one processor core first before -processes are scheduled to other cores. - -The impact on power consumption and performance (positiv or negativ) heavily -depends on processor support for deep sleep states, frequency scaling and -frequency boost modes and their dependencies between other thread siblings -and processor cores. - -Taken over from kernel documentation: - -Adjust the kernel's multi-core scheduler support. - -Possible values are: -.RS 2 -0 - No power saving load balance (default value) - -1 - Fill one thread/core/package first for long running threads - -2 - Also bias task wakeups to semi-idle cpu package for power -savings -.RE .SH "SEE ALSO" cpupower-info(1), cpupower-monitor(1), powertop(1) diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 0ac25772bf63..136d979e9586 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -18,8 +18,6 @@ static struct option set_opts[] = { { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, - { .name = "sched-mc", .has_arg = optional_argument, .flag = NULL, .val = 'm'}, - { .name = "sched-smt", .has_arg = optional_argument, .flag = NULL, .val = 's'}, { }, }; @@ -37,8 +35,6 @@ int cmd_info(int argc, char **argv) union { struct { - int sched_mc:1; - int sched_smt:1; int perf_bias:1; }; int params; @@ -49,23 +45,13 @@ int cmd_info(int argc, char **argv) textdomain(PACKAGE); /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "msb", set_opts, NULL)) != -1) { + while ((ret = getopt_long(argc, argv, "b", set_opts, NULL)) != -1) { switch (ret) { case 'b': if (params.perf_bias) print_wrong_arg_exit(); params.perf_bias = 1; break; - case 'm': - if (params.sched_mc) - print_wrong_arg_exit(); - params.sched_mc = 1; - break; - case 's': - if (params.sched_smt) - print_wrong_arg_exit(); - params.sched_smt = 1; - break; default: print_wrong_arg_exit(); } @@ -78,25 +64,6 @@ int cmd_info(int argc, char **argv) if (bitmask_isallclear(cpus_chosen)) bitmask_setbit(cpus_chosen, 0); - if (params.sched_mc) { - ret = sysfs_get_sched("mc"); - printf(_("System's multi core scheduler setting: ")); - if (ret < 0) - /* if sysfs file is missing it's: errno == ENOENT */ - printf(_("not supported\n")); - else - printf("%d\n", ret); - } - if (params.sched_smt) { - ret = sysfs_get_sched("smt"); - printf(_("System's thread sibling scheduler setting: ")); - if (ret < 0) - /* if sysfs file is missing it's: errno == ENOENT */ - printf(_("not supported\n")); - else - printf("%d\n", ret); - } - /* Add more per cpu options here */ if (!params.perf_bias) return ret; diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index bcf1d2f0b791..573c75f8e3f5 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -19,8 +19,6 @@ static struct option set_opts[] = { { .name = "perf-bias", .has_arg = required_argument, .flag = NULL, .val = 'b'}, - { .name = "sched-mc", .has_arg = required_argument, .flag = NULL, .val = 'm'}, - { .name = "sched-smt", .has_arg = required_argument, .flag = NULL, .val = 's'}, { }, }; @@ -38,13 +36,11 @@ int cmd_set(int argc, char **argv) union { struct { - int sched_mc:1; - int sched_smt:1; int perf_bias:1; }; int params; } params; - int sched_mc = 0, sched_smt = 0, perf_bias = 0; + int perf_bias = 0; int ret = 0; setlocale(LC_ALL, ""); @@ -52,7 +48,7 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "m:s:b:", + while ((ret = getopt_long(argc, argv, "b:", set_opts, NULL)) != -1) { switch (ret) { case 'b': @@ -66,28 +62,6 @@ int cmd_set(int argc, char **argv) } params.perf_bias = 1; break; - case 'm': - if (params.sched_mc) - print_wrong_arg_exit(); - sched_mc = atoi(optarg); - if (sched_mc < 0 || sched_mc > 2) { - printf(_("--sched-mc param out " - "of range [0-%d]\n"), 2); - print_wrong_arg_exit(); - } - params.sched_mc = 1; - break; - case 's': - if (params.sched_smt) - print_wrong_arg_exit(); - sched_smt = atoi(optarg); - if (sched_smt < 0 || sched_smt > 2) { - printf(_("--sched-smt param out " - "of range [0-%d]\n"), 2); - print_wrong_arg_exit(); - } - params.sched_smt = 1; - break; default: print_wrong_arg_exit(); } @@ -96,19 +70,6 @@ int cmd_set(int argc, char **argv) if (!params.params) print_wrong_arg_exit(); - if (params.sched_mc) { - ret = sysfs_set_sched("mc", sched_mc); - if (ret) - fprintf(stderr, _("Error setting sched-mc %s\n"), - (ret == -ENODEV) ? "not supported" : ""); - } - if (params.sched_smt) { - ret = sysfs_set_sched("smt", sched_smt); - if (ret) - fprintf(stderr, _("Error setting sched-smt %s\n"), - (ret == -ENODEV) ? "not supported" : ""); - } - /* Default is: set all CPUs */ if (bitmask_isallclear(cpus_chosen)) bitmask_setall(cpus_chosen);