diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-05 10:34:30 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-05 10:34:30 -0700 |
commit | 2784d74bcc811e9d743398da38552e6f9c73e96b (patch) | |
tree | 1586d80b623e98b66c33802f1a4d3b4add1ee760 /tools | |
parent | 2a95b03d4cf780611ac6903fddc79e6d9789966e (diff) | |
parent | 6127383217741615f3450b684ecbee1ff570ee98 (diff) |
Merge tag 'trace-tools-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing tooling updates from Steven Rostedt:
- Add cgroup support for rtla via the -C option
- Add --house-keeping option that tells rtla where to place the
housekeeping threads
- Have rtla/timerlat have its own tracing instance instead of using the
top level tracing instance that is the default for other tracing
users to use
- Add auto analysis to timerlat_hist
- Have rtla start the tracers after creating the instances
- Reduce rtla hwnoise down to 75% from 100% as it runs with preemption
disabled and can cause system instability at 100%
- Add support to run timerlat_top and timerlat_hist threads in
user-space instead of just using the kernel tasks
- Some minor clean ups and documentation changes
* tag 'trace-tools-v6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
Documentation: Add tools/rtla timerlat -u option documentation
rtla/timerlat_hist: Add timerlat user-space support
rtla/timerlat_top: Add timerlat user-space support
rtla/hwnoise: Reduce runtime to 75%
rtla: Start the tracers after creating all instances
rtla/timerlat_hist: Add auto-analysis support
rtla/timerlat: Give timerlat auto analysis its own instance
rtla: Automatically move rtla to a house-keeping cpu
rtla: Change monitored_cpus from char * to cpu_set_t
rtla: Add --house-keeping option
rtla: Add -C cgroup support
Diffstat (limited to 'tools')
-rw-r--r-- | tools/tracing/rtla/src/osnoise.c | 65 | ||||
-rw-r--r-- | tools/tracing/rtla/src/osnoise.h | 5 | ||||
-rw-r--r-- | tools/tracing/rtla/src/osnoise_hist.c | 90 | ||||
-rw-r--r-- | tools/tracing/rtla/src/osnoise_top.c | 83 | ||||
-rw-r--r-- | tools/tracing/rtla/src/timerlat_aa.c | 35 | ||||
-rw-r--r-- | tools/tracing/rtla/src/timerlat_aa.h | 5 | ||||
-rw-r--r-- | tools/tracing/rtla/src/timerlat_hist.c | 262 | ||||
-rw-r--r-- | tools/tracing/rtla/src/timerlat_top.c | 229 | ||||
-rw-r--r-- | tools/tracing/rtla/src/timerlat_u.c | 224 | ||||
-rw-r--r-- | tools/tracing/rtla/src/timerlat_u.h | 18 | ||||
-rw-r--r-- | tools/tracing/rtla/src/utils.c | 324 | ||||
-rw-r--r-- | tools/tracing/rtla/src/utils.h | 7 |
12 files changed, 1250 insertions, 97 deletions
diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index 3ca7a3853943..245e9344932b 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -841,6 +841,67 @@ static void osnoise_put_irq_disable(struct osnoise_context *context) context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; } +static int osnoise_get_workload(struct osnoise_context *context) +{ + if (context->opt_workload != OSNOISE_OPTION_INIT_VAL) + return context->opt_workload; + + if (context->orig_opt_workload != OSNOISE_OPTION_INIT_VAL) + return context->orig_opt_workload; + + context->orig_opt_workload = osnoise_options_get_option("OSNOISE_WORKLOAD"); + + return context->orig_opt_workload; +} + +int osnoise_set_workload(struct osnoise_context *context, bool onoff) +{ + int opt_workload = osnoise_get_workload(context); + int retval; + + if (opt_workload == OSNOISE_OPTION_INIT_VAL) + return -1; + + if (opt_workload == onoff) + return 0; + + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff); + if (retval < 0) + return -1; + + context->opt_workload = onoff; + + return 0; +} + +static void osnoise_restore_workload(struct osnoise_context *context) +{ + int retval; + + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_opt_workload == context->opt_workload) + goto out_done; + + retval = osnoise_options_set_option("OSNOISE_WORKLOAD", context->orig_opt_workload); + if (retval < 0) + err_msg("Could not restore original OSNOISE_WORKLOAD option\n"); + +out_done: + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; +} + +static void osnoise_put_workload(struct osnoise_context *context) +{ + osnoise_restore_workload(context); + + if (context->orig_opt_workload == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; +} + /* * enable_osnoise - enable osnoise tracer in the trace_instance */ @@ -908,6 +969,9 @@ struct osnoise_context *osnoise_context_alloc(void) context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL; context->opt_irq_disable = OSNOISE_OPTION_INIT_VAL; + context->orig_opt_workload = OSNOISE_OPTION_INIT_VAL; + context->opt_workload = OSNOISE_OPTION_INIT_VAL; + osnoise_get_context(context); return context; @@ -935,6 +999,7 @@ void osnoise_put_context(struct osnoise_context *context) osnoise_put_print_stack(context); osnoise_put_tracing_thresh(context); osnoise_put_irq_disable(context); + osnoise_put_workload(context); free(context); } diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h index 4dcf22ccd704..555f4f4903cc 100644 --- a/tools/tracing/rtla/src/osnoise.h +++ b/tools/tracing/rtla/src/osnoise.h @@ -42,6 +42,10 @@ struct osnoise_context { /* -1 as init value because 0 is off */ int orig_opt_irq_disable; int opt_irq_disable; + + /* -1 as init value because 0 is off */ + int orig_opt_workload; + int opt_workload; }; /* @@ -84,6 +88,7 @@ int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack); int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff); +int osnoise_set_workload(struct osnoise_context *context, bool onoff); /* * osnoise_tool - osnoise based tool definition. diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 13e1233690bb..8f81fa007364 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -11,14 +12,16 @@ #include <errno.h> #include <stdio.h> #include <time.h> +#include <sched.h> #include "utils.h" #include "osnoise.h" struct osnoise_hist_params { char *cpus; - char *monitored_cpus; + cpu_set_t monitored_cpus; char *trace_output; + char *cgroup_name; unsigned long long runtime; unsigned long long period; long long threshold; @@ -28,6 +31,9 @@ struct osnoise_hist_params { int duration; int set_sched; int output_divisor; + int cgroup; + int hk_cpus; + cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; @@ -268,7 +274,7 @@ static void osnoise_hist_header(struct osnoise_tool *tool) trace_seq_printf(s, "Index"); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].count) @@ -299,7 +305,7 @@ osnoise_print_summary(struct osnoise_hist_params *params, trace_seq_printf(trace->seq, "count:"); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].count) @@ -313,7 +319,7 @@ osnoise_print_summary(struct osnoise_hist_params *params, trace_seq_printf(trace->seq, "min: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].count) @@ -328,7 +334,7 @@ osnoise_print_summary(struct osnoise_hist_params *params, trace_seq_printf(trace->seq, "avg: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].count) @@ -346,7 +352,7 @@ osnoise_print_summary(struct osnoise_hist_params *params, trace_seq_printf(trace->seq, "max: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].count) @@ -381,7 +387,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too bucket * data->bucket_size); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].count) @@ -405,7 +411,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too trace_seq_printf(trace->seq, "over: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].count) @@ -432,8 +438,8 @@ static void osnoise_hist_usage(char *usage) "", " usage: rtla osnoise hist [-h] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", - " [-c cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] [--no-index] \\", - " [--with-zeros]", + " [-c cpu-list] [-H cpu-list] [-P priority] [-b N] [-E N] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros] [-C[=cgroup_name]]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", @@ -443,6 +449,8 @@ static void osnoise_hist_usage(char *usage) " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", " -T/--threshold us: the minimum delta to be considered a noise", " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", @@ -501,8 +509,10 @@ static struct osnoise_hist_params {"bucket-size", required_argument, 0, 'b'}, {"entries", required_argument, 0, 'E'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"debug", no_argument, 0, 'D'}, {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, {"help", no_argument, 0, 'h'}, {"period", required_argument, 0, 'p'}, {"priority", required_argument, 0, 'P'}, @@ -524,7 +534,7 @@ static struct osnoise_hist_params /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhp:P:r:s:S:t::T:01234:5:", + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:p:P:r:s:S:t::T:01234:5:", long_options, &option_index); /* detect the end of the options. */ @@ -549,11 +559,21 @@ static struct osnoise_hist_params osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); if (retval) osnoise_hist_usage("\nInvalid -c cpu list\n"); params->cpus = optarg; break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; case 'D': config_debug = 1; break; @@ -583,6 +603,14 @@ static struct osnoise_hist_params case '?': osnoise_hist_usage(NULL); break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; case 'p': params->period = get_llong_from_str(optarg); if (params->period > 10000000) @@ -718,6 +746,24 @@ osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params } } + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + return 0; out_err: @@ -816,7 +862,13 @@ int osnoise_hist_main(int argc, char *argv[]) } } - trace_instance_start(trace); + if (params->cgroup) { + retval = set_comm_cgroup("timerlat/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } if (params->trace_output) { record = osnoise_init_trace_tool("osnoise"); @@ -831,9 +883,19 @@ int osnoise_hist_main(int argc, char *argv[]) goto out_hist; } - trace_instance_start(&record->trace); } + /* + * Start the tracer here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + trace_instance_start(trace); + tool->start_time = time(NULL); osnoise_hist_set_signals(params); diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 562f2e4b18c5..f7c959be8677 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -10,6 +11,7 @@ #include <unistd.h> #include <stdio.h> #include <time.h> +#include <sched.h> #include "osnoise.h" #include "utils.h" @@ -24,8 +26,9 @@ enum osnoise_mode { */ struct osnoise_top_params { char *cpus; - char *monitored_cpus; + cpu_set_t monitored_cpus; char *trace_output; + char *cgroup_name; unsigned long long runtime; unsigned long long period; long long threshold; @@ -35,6 +38,9 @@ struct osnoise_top_params { int duration; int quiet; int set_sched; + int cgroup; + int hk_cpus; + cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; enum osnoise_mode mode; @@ -257,7 +263,7 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) osnoise_top_header(top); for (i = 0; i < nr_cpus; i++) { - if (params->cpus && !params->monitored_cpus[i]) + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) continue; osnoise_top_print(top, i); } @@ -276,7 +282,7 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage) static const char * const msg[] = { " [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\", " [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\", - " [-c cpu-list] [-P priority]", + " [-c cpu-list] [-H cpu-list] [-P priority] [-C[=cgroup_name]]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us sample is hit", @@ -286,6 +292,8 @@ static void osnoise_top_usage(struct osnoise_top_params *params, char *usage) " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", " -T/--threshold us: the minimum delta to be considered a noise", " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", @@ -340,16 +348,24 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) if (!params) exit(1); - if (strcmp(argv[0], "hwnoise") == 0) + if (strcmp(argv[0], "hwnoise") == 0) { params->mode = MODE_HWNOISE; + /* + * Reduce CPU usage for 75% to avoid killing the system. + */ + params->runtime = 750000; + params->period = 1000000; + } while (1) { static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"debug", no_argument, 0, 'D'}, {"duration", required_argument, 0, 'd'}, {"event", required_argument, 0, 'e'}, + {"house-keeping", required_argument, 0, 'H'}, {"help", no_argument, 0, 'h'}, {"period", required_argument, 0, 'p'}, {"priority", required_argument, 0, 'P'}, @@ -367,7 +383,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:d:De:hp:P:qr:s:S:t::T:0:1:", + c = getopt_long(argc, argv, "a:c:C::d:De:hH:p:P:qr:s:S:t::T:0:1:", long_options, &option_index); /* Detect the end of the options. */ @@ -387,11 +403,21 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); if (retval) osnoise_top_usage(params, "\nInvalid -c cpu list\n"); params->cpus = optarg; break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; case 'D': config_debug = 1; break; @@ -416,6 +442,14 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) case '?': osnoise_top_usage(params, NULL); break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; case 'p': params->period = get_llong_from_str(optarg); if (params->period > 10000000) @@ -547,6 +581,24 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p } } + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + return 0; out_err: @@ -643,7 +695,13 @@ int osnoise_top_main(int argc, char **argv) } } - trace_instance_start(trace); + if (params->cgroup) { + retval = set_comm_cgroup("osnoise/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } if (params->trace_output) { record = osnoise_init_trace_tool("osnoise"); @@ -657,9 +715,18 @@ int osnoise_top_main(int argc, char **argv) if (retval) goto out_top; } + } + /* + * Start the tracer here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) trace_instance_start(&record->trace); - } + trace_instance_start(trace); tool->start_time = time(NULL); osnoise_top_set_signals(params); diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c index 1843fff66da5..e0ffe69c271c 100644 --- a/tools/tracing/rtla/src/timerlat_aa.c +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -8,6 +8,7 @@ #include "utils.h" #include "osnoise.h" #include "timerlat.h" +#include <unistd.h> enum timelat_state { TIMERLAT_INIT = 0, @@ -233,7 +234,7 @@ static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, * * Returns 0 on success, -1 otherwise. */ -int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, +static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context) { struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); @@ -665,6 +666,25 @@ print_total: ns_to_usf(total)); } +static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) +{ + struct trace_instance *trace = &taa_ctx->tool->trace; + int retval; + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + return 0; + } + + return 1; +} + /** * timerlat_auto_analysis - Analyze the collected data */ @@ -677,6 +697,8 @@ void timerlat_auto_analysis(int irq_thresh, int thread_thresh) struct tep_handle *tep; int cpu; + timerlat_auto_analysis_collect_trace(taa_ctx); + /* bring stop tracing to the ns scale */ irq_thresh = irq_thresh * 1000; thread_thresh = thread_thresh * 1000; @@ -838,6 +860,10 @@ out_err: */ static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) { + + tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_aa_handler, tool); + tracefs_event_disable(tool->trace.inst, "osnoise", NULL); tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", @@ -875,6 +901,10 @@ static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks { int retval; + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_aa_handler, tool); + + /* * register auto-analysis handlers. */ @@ -955,8 +985,9 @@ out_ctx: * * Returns 0 on success, -1 otherwise. */ -int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_tasks) +int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks) { + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); struct timerlat_aa_context *taa_ctx; int retval; diff --git a/tools/tracing/rtla/src/timerlat_aa.h b/tools/tracing/rtla/src/timerlat_aa.h index d4f6ca7e342a..cea4bb1531a8 100644 --- a/tools/tracing/rtla/src/timerlat_aa.h +++ b/tools/tracing/rtla/src/timerlat_aa.h @@ -3,10 +3,7 @@ * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ -int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_task); +int timerlat_aa_init(struct osnoise_tool *tool, int dump_task); void timerlat_aa_destroy(void); -int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event *event, void *context); - void timerlat_auto_analysis(int irq_thresh, int thread_thresh); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 4b48af8a8309..47d3d8b53cb2 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -10,15 +11,20 @@ #include <unistd.h> #include <stdio.h> #include <time.h> +#include <sched.h> +#include <pthread.h> #include "utils.h" #include "osnoise.h" #include "timerlat.h" +#include "timerlat_aa.h" +#include "timerlat_u.h" struct timerlat_hist_params { char *cpus; - char *monitored_cpus; + cpu_set_t monitored_cpus; char *trace_output; + char *cgroup_name; unsigned long long runtime; long long stop_us; long long stop_total_us; @@ -29,9 +35,14 @@ struct timerlat_hist_params { int duration; int set_sched; int dma_latency; + int cgroup; + int hk_cpus; + int no_aa; + int dump_tasks; + int user_hist; + cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; - char no_irq; char no_thread; char no_header; @@ -45,9 +56,11 @@ struct timerlat_hist_params { struct timerlat_hist_cpu { int *irq; int *thread; + int *user; int irq_count; int thread_count; + int user_count; unsigned long long min_irq; unsigned long long sum_irq; @@ -56,6 +69,10 @@ struct timerlat_hist_cpu { unsigned long long min_thread; unsigned long long sum_thread; unsigned long long max_thread; + + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; }; struct timerlat_hist_data { @@ -80,6 +97,10 @@ timerlat_free_histogram(struct timerlat_hist_data *data) if (data->hist[cpu].thread) free(data->hist[cpu].thread); + + if (data->hist[cpu].user) + free(data->hist[cpu].user); + } /* one set of histograms per CPU */ @@ -116,15 +137,21 @@ static struct timerlat_hist_data data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); if (!data->hist[cpu].irq) goto cleanup; + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); if (!data->hist[cpu].thread) goto cleanup; + + data->hist[cpu].user = calloc(1, sizeof(*data->hist->user) * (entries + 1)); + if (!data->hist[cpu].user) + goto cleanup; } /* set the min to max */ for (cpu = 0; cpu < nr_cpus; cpu++) { data->hist[cpu].min_irq = ~0; data->hist[cpu].min_thread = ~0; + data->hist[cpu].min_user = ~0; } return data; @@ -139,7 +166,7 @@ cleanup: */ static void timerlat_hist_update(struct osnoise_tool *tool, int cpu, - unsigned long long thread, + unsigned long long context, unsigned long long latency) { struct timerlat_hist_params *params = tool->params; @@ -154,18 +181,24 @@ timerlat_hist_update(struct osnoise_tool *tool, int cpu, if (data->bucket_size) bucket = latency / data->bucket_size; - if (!thread) { + if (!context) { hist = data->hist[cpu].irq; data->hist[cpu].irq_count++; update_min(&data->hist[cpu].min_irq, &latency); update_sum(&data->hist[cpu].sum_irq, &latency); update_max(&data->hist[cpu].max_irq, &latency); - } else { + } else if (context == 1) { hist = data->hist[cpu].thread; data->hist[cpu].thread_count++; update_min(&data->hist[cpu].min_thread, &latency); update_sum(&data->hist[cpu].sum_thread, &latency); update_max(&data->hist[cpu].max_thread, &latency); + } else { /* user */ + hist = data->hist[cpu].user; + data->hist[cpu].user_count++; + update_min(&data->hist[cpu].min_user, &latency); + update_sum(&data->hist[cpu].sum_user, &latency); + update_max(&data->hist[cpu].max_user, &latency); } if (bucket < entries) @@ -182,16 +215,16 @@ timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *data) { struct trace_instance *trace = data; - unsigned long long thread, latency; + unsigned long long context, latency; struct osnoise_tool *tool; int cpu = record->cpu; tool = container_of(trace, struct osnoise_tool, trace); - tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "context", record, &context, 1); tep_get_field_val(s, event, "timer_latency", record, &latency, 1); - timerlat_hist_update(tool, cpu, thread, latency); + timerlat_hist_update(tool, cpu, context, latency); return 0; } @@ -222,7 +255,7 @@ static void timerlat_hist_header(struct osnoise_tool *tool) trace_seq_printf(s, "Index"); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -233,6 +266,9 @@ static void timerlat_hist_header(struct osnoise_tool *tool) if (!params->no_thread) trace_seq_printf(s, " Thr-%03d", cpu); + + if (params->user_hist) + trace_seq_printf(s, " Usr-%03d", cpu); } trace_seq_printf(s, "\n"); @@ -258,7 +294,7 @@ timerlat_print_summary(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "count:"); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -271,6 +307,10 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!params->no_thread) trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].thread_count); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user_count); } trace_seq_printf(trace->seq, "\n"); @@ -278,7 +318,7 @@ timerlat_print_summary(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "min: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -291,6 +331,10 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!params->no_thread) trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_thread); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_user); } trace_seq_printf(trace->seq, "\n"); @@ -298,7 +342,7 @@ timerlat_print_summary(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "avg: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -315,7 +359,15 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!params->no_thread) { if (data->hist[cpu].thread_count) trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + else + trace_seq_printf(trace->seq, " - "); + } + + if (params->user_hist) { + if (data->hist[cpu].user_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_user / data->hist[cpu].user_count); else trace_seq_printf(trace->seq, " - "); } @@ -326,7 +378,7 @@ timerlat_print_summary(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "max: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -339,6 +391,10 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!params->no_thread) trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_thread); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_user); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -366,7 +422,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t bucket * data->bucket_size); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -384,6 +440,12 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t data->hist[cpu].thread[bucket]); } + if (params->user_hist) { + total += data->hist[cpu].user[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[bucket]); + } + } if (total == 0 && !params->with_zeros) { @@ -400,7 +462,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t trace_seq_printf(trace->seq, "over: "); for (cpu = 0; cpu < data->nr_cpus; cpu++) { - if (params->cpus && !params->monitored_cpus[cpu]) + if (params->cpus && !CPU_ISSET(cpu, ¶ms->monitored_cpus)) continue; if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) @@ -413,6 +475,10 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t if (!params->no_thread) trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].thread[data->entries]); + + if (params->user_hist) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].user[data->entries]); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -431,9 +497,9 @@ static void timerlat_hist_usage(char *usage) char *msg[] = { "", " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-a us] [-p us] [-i us] [-T us] [-s us] \\", - " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\", + " [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", - " [--no-index] [--with-zeros] [--dma-latency us]", + " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -442,13 +508,17 @@ static void timerlat_hist_usage(char *usage) " -T/--thread us: stop trace if the thread latency is higher than the argument in us", " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[m|h|d]: duration of the session in seconds", + " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", " -D/--debug: print debug info", " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", " -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed", " --filter <filter>: enable a trace event filter to the previous -e event", " --trigger <trigger>: enable a trace event trigger to the previous -e event", " -n/--nano: display data in nanoseconds", + " --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage", " -b/--bucket-size N: set the histogram bucket size (default 1)", " -E/--entries N: set the number of entries of the histogram (default 256)", " --no-irq: ignore IRQ latencies", @@ -464,6 +534,7 @@ static void timerlat_hist_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", NULL, }; @@ -506,10 +577,12 @@ static struct timerlat_hist_params static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"bucket-size", required_argument, 0, 'b'}, {"debug", no_argument, 0, 'D'}, {"entries", required_argument, 0, 'E'}, {"duration", required_argument, 0, 'd'}, + {"house-keeping", required_argument, 0, 'H'}, {"help", no_argument, 0, 'h'}, {"irq", required_argument, 0, 'i'}, {"nano", no_argument, 0, 'n'}, @@ -518,6 +591,7 @@ static struct timerlat_hist_params {"stack", required_argument, 0, 's'}, {"thread", required_argument, 0, 'T'}, {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, {"event", required_argument, 0, 'e'}, {"no-irq", no_argument, 0, '0'}, {"no-thread", no_argument, 0, '1'}, @@ -528,13 +602,15 @@ static struct timerlat_hist_params {"trigger", required_argument, 0, '6'}, {"filter", required_argument, 0, '7'}, {"dma-latency", required_argument, 0, '8'}, + {"no-aa", no_argument, 0, '9'}, + {"dump-task", no_argument, 0, '\1'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:b:d:e:E:Dhi:np:P:s:t::T:0123456:7:8:", + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:np:P:s:t::T:u0123456:7:8:9\1", long_options, &option_index); /* detect the end of the options. */ @@ -547,6 +623,7 @@ static struct timerlat_hist_params /* set thread stop to auto_thresh */ params->stop_total_us = auto_thresh; + params->stop_us = auto_thresh; /* get stack trace */ params->print_stack = auto_thresh; @@ -556,11 +633,21 @@ static struct timerlat_hist_params break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); if (retval) timerlat_hist_usage("\nInvalid -c cpu list\n"); params->cpus = optarg; break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; case 'b': params->bucket_size = get_llong_from_str(optarg); if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) @@ -595,6 +682,14 @@ static struct timerlat_hist_params case '?': timerlat_hist_usage(NULL); break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; case 'i': params->stop_us = get_llong_from_str(optarg); break; @@ -625,6 +720,9 @@ static struct timerlat_hist_params else params->trace_output = "timerlat_trace.txt"; break; + case 'u': + params->user_hist = 1; + break; case '0': /* no irq */ params->no_irq = 1; break; @@ -672,6 +770,12 @@ static struct timerlat_hist_params exit(EXIT_FAILURE); } break; + case '9': + params->no_aa = 1; + break; + case '\1': + params->dump_tasks = 1; + break; default: timerlat_hist_usage("Invalid option"); } @@ -688,6 +792,12 @@ static struct timerlat_hist_params if (params->no_index && !params->with_zeros) timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense"); + /* + * Auto analysis only happens if stop tracing, thus: + */ + if (!params->stop_us && !params->stop_total_us) + params->no_aa = 1; + return params; } @@ -697,7 +807,7 @@ static struct timerlat_hist_params static int timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) { - int retval; + int retval, i; if (!params->sleep_time) params->sleep_time = 1; @@ -708,6 +818,9 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param err_msg("Failed to apply CPUs config\n"); goto out_err; } + } else { + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) + CPU_SET(i, ¶ms->monitored_cpus); } if (params->stop_us) { @@ -742,6 +855,32 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param } } + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + if (params->user_hist) { + retval = osnoise_set_workload(tool->context, 0); + if (retval) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; + } + } + return 0; out_err: @@ -802,10 +941,13 @@ int timerlat_hist_main(int argc, char *argv[]) { struct timerlat_hist_params *params; struct osnoise_tool *record = NULL; + struct timerlat_u_params params_u; struct osnoise_tool *tool = NULL; + struct osnoise_tool *aa = NULL; struct trace_instance *trace; int dma_latency_fd = -1; int return_value = 1; + pthread_t timerlat_u; int retval; params = timerlat_hist_parse_args(argc, argv); @@ -840,6 +982,14 @@ int timerlat_hist_main(int argc, char *argv[]) } } + if (params->cgroup && !params->user_hist) { + retval = set_comm_cgroup("timerlat/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + if (params->dma_latency >= 0) { dma_latency_fd = set_cpu_dma_latency(params->dma_latency); if (dma_latency_fd < 0) { @@ -848,8 +998,6 @@ int timerlat_hist_main(int argc, char *argv[]) } } - trace_instance_start(trace); - if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -862,13 +1010,61 @@ int timerlat_hist_main(int argc, char *argv[]) if (retval) goto out_hist; } + } - trace_instance_start(&record->trace); + if (!params->no_aa) { + aa = osnoise_init_tool("timerlat_aa"); + if (!aa) + goto out_hist; + + retval = timerlat_aa_init(aa, params->dump_tasks); + if (retval) { + err_msg("Failed to enable the auto analysis instance\n"); + goto out_hist; + } + + retval = enable_timerlat(&aa->trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_hist; + } } + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + if (!params->no_aa) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + tool->start_time = time(NULL); timerlat_hist_set_signals(params); + if (params->user_hist) { + /* rtla asked to stop */ + params_u.should_run = 1; + /* all threads left */ + params_u.stopped_running = 0; + + params_u.set = ¶ms->monitored_cpus; + if (params->set_sched) + params_u.sched_param = ¶ms->sched_param; + else + params_u.sched_param = NULL; + + params_u.cgroup_name = params->cgroup_name; + + retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, ¶ms_u); + if (retval) + err_msg("Error creating timerlat user-space threads\n"); + } + while (!stop_tracing) { sleep(params->sleep_time); @@ -885,6 +1081,18 @@ int timerlat_hist_main(int argc, char *argv[]) if (trace_is_off(&tool->trace, &record->trace)) break; + + /* is there still any user-threads ? */ + if (params->user_hist) { + if (params_u.stopped_running) { + debug_msg("timerlat user-space threads stopped!\n"); + break; + } + } + } + if (params->user_hist && !params_u.stopped_running) { + params_u.should_run = 0; + sleep(1); } timerlat_print_stats(params, tool); @@ -893,6 +1101,10 @@ int timerlat_hist_main(int argc, char *argv[]) if (trace_is_off(&tool->trace, &record->trace)) { printf("rtla timerlat hit stop tracing\n"); + + if (!params->no_aa) + timerlat_auto_analysis(params->stop_us, params->stop_total_us); + if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); save_trace_to_file(record->trace.inst, params->trace_output); @@ -900,12 +1112,14 @@ int timerlat_hist_main(int argc, char *argv[]) } out_hist: + timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: timerlat_free_histogram(tool->data); + osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(tool); free(params); diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 92c658c64f28..1640f121baca 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <getopt.h> #include <stdlib.h> #include <string.h> @@ -11,16 +12,20 @@ #include <stdio.h> #include <time.h> #include <errno.h> +#include <sched.h> +#include <pthread.h> #include "utils.h" #include "osnoise.h" #include "timerlat.h" #include "timerlat_aa.h" +#include "timerlat_u.h" struct timerlat_top_params { char *cpus; - char *monitored_cpus; + cpu_set_t monitored_cpus; char *trace_output; + char *cgroup_name; unsigned long long runtime; long long stop_us; long long stop_total_us; @@ -35,6 +40,10 @@ struct timerlat_top_params { int no_aa; int aa_only; int dump_tasks; + int cgroup; + int hk_cpus; + int user_top; + cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; }; @@ -42,6 +51,7 @@ struct timerlat_top_params { struct timerlat_top_cpu { int irq_count; int thread_count; + int user_count; unsigned long long cur_irq; unsigned long long min_irq; @@ -52,6 +62,11 @@ struct timerlat_top_cpu { unsigned long long min_thread; unsigned long long sum_thread; unsigned long long max_thread; + + unsigned long long cur_user; + unsigned long long min_user; + unsigned long long sum_user; + unsigned long long max_user; }; struct timerlat_top_data { @@ -92,6 +107,7 @@ static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) for (cpu = 0; cpu < nr_cpus; cpu++) { data->cpu_data[cpu].min_irq = ~0; data->cpu_data[cpu].min_thread = ~0; + data->cpu_data[cpu].min_user = ~0; } return data; @@ -118,12 +134,18 @@ timerlat_top_update(struct osnoise_tool *tool, int cpu, update_min(&cpu_data->min_irq, &latency); update_sum(&cpu_data->sum_irq, &latency); update_max(&cpu_data->max_irq, &latency); - } else { + } else if (thread == 1) { cpu_data->thread_count++; cpu_data->cur_thread = latency; update_min(&cpu_data->min_thread, &latency); update_sum(&cpu_data->sum_thread, &latency); update_max(&cpu_data->max_thread, &latency); + } else { + cpu_data->user_count++; + cpu_data->cur_user = latency; + update_min(&cpu_data->min_user, &latency); + update_sum(&cpu_data->sum_user, &latency); + update_max(&cpu_data->max_user, &latency); } } @@ -150,9 +172,6 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record, timerlat_top_update(top, cpu, thread, latency); } - if (!params->no_aa) - timerlat_aa_handler(s, record, event, context); - return 0; } @@ -169,15 +188,25 @@ static void timerlat_top_header(struct osnoise_tool *top) trace_seq_printf(s, "\033[2;37;40m"); trace_seq_printf(s, " Timer Latency "); + if (params->user_top) + trace_seq_printf(s, " "); trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); - trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration, + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)", duration, params->output_divisor == 1 ? "ns" : "us", params->output_divisor == 1 ? "ns" : "us"); + if (params->user_top) { + trace_seq_printf(s, " | Ret user Timer Latency (%s)", + params->output_divisor == 1 ? "ns" : "us"); + } + + trace_seq_printf(s, "\n"); trace_seq_printf(s, "\033[2;30;47m"); trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); + if (params->user_top) + trace_seq_printf(s, " | cur min avg max"); trace_seq_printf(s, "\033[0;0;0m"); trace_seq_printf(s, "\n"); } @@ -230,7 +259,27 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); trace_seq_printf(s, "%9llu ", (cpu_data->sum_thread / cpu_data->thread_count) / divisor); - trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor); + trace_seq_printf(s, "%9llu", cpu_data->max_thread / divisor); + } + + if (!params->user_top) { + trace_seq_printf(s, "\n"); + return; + } + + trace_seq_printf(s, " |"); + + if (!cpu_data->user_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " -\n"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor); + trace_seq_printf(s, "%9llu ", + (cpu_data->sum_user / cpu_data->user_count) / divisor); + trace_seq_printf(s, "%9llu\n", cpu_data->max_user / divisor); } } @@ -265,7 +314,7 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to timerlat_top_header(top); for (i = 0; i < nr_cpus; i++) { - if (params->cpus && !params->monitored_cpus[i]) + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) continue; timerlat_top_print(top, i); } @@ -284,8 +333,8 @@ static void timerlat_top_usage(char *usage) static const char *const msg[] = { "", " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", - " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\", - " [-P priority] [--dma-latency us] [--aa-only us]", + " [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -295,6 +344,8 @@ static void timerlat_top_usage(char *usage) " -T/--thread us: stop trace if the thread latency is higher than the argument in us", " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", " -c/--cpus cpus: run the tracer only on the given cpus", + " -H/--house-keeping cpus: run rtla control threads only on the given cpus", + " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", " -d/--duration time[m|h|d]: duration of the session in seconds", " -D/--debug: print debug info", " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", @@ -312,6 +363,7 @@ static void timerlat_top_usage(char *usage) " f:prio - use SCHED_FIFO with prio", " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", " in nanoseconds", + " -u/--user-threads: use rtla user-space threads instead of in-kernel timerlat threads", NULL, }; @@ -352,10 +404,12 @@ static struct timerlat_top_params static struct option long_options[] = { {"auto", required_argument, 0, 'a'}, {"cpus", required_argument, 0, 'c'}, + {"cgroup", optional_argument, 0, 'C'}, {"debug", no_argument, 0, 'D'}, {"duration", required_argument, 0, 'd'}, {"event", required_argument, 0, 'e'}, {"help", no_argument, 0, 'h'}, + {"house-keeping", required_argument, 0, 'H'}, {"irq", required_argument, 0, 'i'}, {"nano", no_argument, 0, 'n'}, {"period", required_argument, 0, 'p'}, @@ -364,6 +418,7 @@ static struct timerlat_top_params {"stack", required_argument, 0, 's'}, {"thread", required_argument, 0, 'T'}, {"trace", optional_argument, 0, 't'}, + {"user-threads", no_argument, 0, 'u'}, {"trigger", required_argument, 0, '0'}, {"filter", required_argument, 0, '1'}, {"dma-latency", required_argument, 0, '2'}, @@ -376,7 +431,7 @@ static struct timerlat_top_params /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:345:", + c = getopt_long(argc, argv, "a:c:C::d:De:hH:i:np:P:qs:t::T:u0:1:2:345:", long_options, &option_index); /* detect the end of the options. */ @@ -412,11 +467,21 @@ static struct timerlat_top_params params->aa_only = 1; break; case 'c': - retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + retval = parse_cpu_set(optarg, ¶ms->monitored_cpus); if (retval) timerlat_top_usage("\nInvalid -c cpu list\n"); params->cpus = optarg; break; + case 'C': + params->cgroup = 1; + if (!optarg) { + /* will inherit this cgroup */ + params->cgroup_name = NULL; + } else if (*optarg == '=') { + /* skip the = */ + params->cgroup_name = ++optarg; + } + break; case 'D': config_debug = 1; break; @@ -440,6 +505,14 @@ static struct timerlat_top_params case '?': timerlat_top_usage(NULL); break; + case 'H': + params->hk_cpus = 1; + retval = parse_cpu_set(optarg, ¶ms->hk_cpu_set); + if (retval) { + err_msg("Error parsing house keeping CPUs\n"); + exit(EXIT_FAILURE); + } + break; case 'i': params->stop_us = get_llong_from_str(optarg); break; @@ -474,6 +547,9 @@ static struct timerlat_top_params params->trace_output = "timerlat_trace.txt"; break; + case 'u': + params->user_top = true; + break; case '0': /* trigger */ if (params->events) { retval = trace_event_add_trigger(params->events, optarg); @@ -538,6 +614,7 @@ static int timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) { int retval; + int i; if (!params->sleep_time) params->sleep_time = 1; @@ -548,6 +625,9 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * err_msg("Failed to apply CPUs config\n"); goto out_err; } + } else { + for (i = 0; i < sysconf(_SC_NPROCESSORS_CONF); i++) + CPU_SET(i, ¶ms->monitored_cpus); } if (params->stop_us) { @@ -584,6 +664,32 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * } } + if (params->hk_cpus) { + retval = sched_setaffinity(getpid(), sizeof(params->hk_cpu_set), + ¶ms->hk_cpu_set); + if (retval == -1) { + err_msg("Failed to set rtla to the house keeping CPUs\n"); + goto out_err; + } + } else if (params->cpus) { + /* + * Even if the user do not set a house-keeping CPU, try to + * move rtla to a CPU set different to the one where the user + * set the workload to run. + * + * No need to check results as this is an automatic attempt. + */ + auto_house_keeping(¶ms->monitored_cpus); + } + + if (params->user_top) { + retval = osnoise_set_workload(top->context, 0); + if (retval) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; + } + } + return 0; out_err: @@ -598,7 +704,6 @@ static struct osnoise_tool { struct osnoise_tool *top; int nr_cpus; - int retval; nr_cpus = sysconf(_SC_NPROCESSORS_CONF); @@ -615,16 +720,6 @@ static struct osnoise_tool tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", timerlat_top_handler, top); - /* - * If no auto analysis, we are ready. - */ - if (params->no_aa) - return top; - - retval = timerlat_aa_init(top, nr_cpus, params->dump_tasks); - if (retval) - goto out_err; - return top; out_err: @@ -655,9 +750,12 @@ int timerlat_top_main(int argc, char *argv[]) { struct timerlat_top_params *params; struct osnoise_tool *record = NULL; + struct timerlat_u_params params_u; struct osnoise_tool *top = NULL; + struct osnoise_tool *aa = NULL; struct trace_instance *trace; int dma_latency_fd = -1; + pthread_t timerlat_u; int return_value = 1; char *max_lat; int retval; @@ -694,6 +792,14 @@ int timerlat_top_main(int argc, char *argv[]) } } + if (params->cgroup && !params->user_top) { + retval = set_comm_cgroup("timerlat/", params->cgroup_name); + if (!retval) { + err_msg("Failed to move threads to cgroup\n"); + goto out_free; + } + } + if (params->dma_latency >= 0) { dma_latency_fd = set_cpu_dma_latency(params->dma_latency); if (dma_latency_fd < 0) { @@ -702,8 +808,6 @@ int timerlat_top_main(int argc, char *argv[]) } } - trace_instance_start(trace); - if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -716,13 +820,70 @@ int timerlat_top_main(int argc, char *argv[]) if (retval) goto out_top; } + } - trace_instance_start(&record->trace); + if (!params->no_aa) { + if (params->aa_only) { + /* as top is not used for display, use it for aa */ + aa = top; + } else { + /* otherwise, a new instance is needed */ + aa = osnoise_init_tool("timerlat_aa"); + if (!aa) + goto out_top; + } + + retval = timerlat_aa_init(aa, params->dump_tasks); + if (retval) { + err_msg("Failed to enable the auto analysis instance\n"); + goto out_top; + } + + /* if it is re-using the main instance, there is no need to start it */ + if (aa != top) { + retval = enable_timerlat(&aa->trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_top; + } + } } + /* + * Start the tracers here, after having set all instances. + * + * Let the trace instance start first for the case of hitting a stop + * tracing while enabling other instances. The trace instance is the + * one with most valuable information. + */ + if (params->trace_output) + trace_instance_start(&record->trace); + if (!params->no_aa && aa != top) + trace_instance_start(&aa->trace); + trace_instance_start(trace); + top->start_time = time(NULL); timerlat_top_set_signals(params); + if (params->user_top) { + /* rtla asked to stop */ + params_u.should_run = 1; + /* all threads left */ + params_u.stopped_running = 0; + + params_u.set = ¶ms->monitored_cpus; + if (params->set_sched) + params_u.sched_param = ¶ms->sched_param; + else + params_u.sched_param = NULL; + + params_u.cgroup_name = params->cgroup_name; + + retval = pthread_create(&timerlat_u, NULL, timerlat_u_dispatcher, ¶ms_u); + if (retval) + err_msg("Error creating timerlat user-space threads\n"); + } + while (!stop_tracing) { sleep(params->sleep_time); @@ -746,6 +907,18 @@ int timerlat_top_main(int argc, char *argv[]) if (trace_is_off(&top->trace, &record->trace)) break; + /* is there still any user-threads ? */ + if (params->user_top) { + if (params_u.stopped_running) { + debug_msg("timerlat user space threads stopped!\n"); + break; + } + } + } + + if (params->user_top && !params_u.stopped_running) { + params_u.should_run = 0; + sleep(1); } timerlat_print_stats(params, top); @@ -775,13 +948,15 @@ int timerlat_top_main(int argc, char *argv[]) } out_top: + timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: timerlat_free_top(top->data); - timerlat_aa_destroy(); + if (aa && aa != top) + osnoise_destroy_tool(aa); osnoise_destroy_tool(record); osnoise_destroy_tool(top); free(params); diff --git a/tools/tracing/rtla/src/timerlat_u.c b/tools/tracing/rtla/src/timerlat_u.c new file mode 100644 index 000000000000..05e310696dd5 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_u.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <tracefs.h> +#include <pthread.h> +#include <sys/wait.h> +#include <sys/prctl.h> + +#include "utils.h" +#include "timerlat_u.h" + +/* + * This is the user-space main for the tool timerlatu/ threads. + * + * It is as simple as this: + * - set affinity + * - set priority + * - open tracer fd + * - spin + * - close + */ +static int timerlat_u_main(int cpu, struct timerlat_u_params *params) +{ + struct sched_param sp = { .sched_priority = 95 }; + char buffer[1024]; + int timerlat_fd; + cpu_set_t set; + int retval; + + /* + * This all is only setting up the tool. + */ + CPU_ZERO(&set); + CPU_SET(cpu, &set); + + retval = sched_setaffinity(gettid(), sizeof(set), &set); + if (retval == -1) { + err_msg("Error setting user thread affinity\n"); + exit(1); + } + + if (!params->sched_param) { + retval = sched_setscheduler(0, SCHED_FIFO, &sp); + if (retval < 0) { + err_msg("Error setting timerlat u default priority: %s\n", strerror(errno)); + exit(1); + } + } else { + retval = __set_sched_attr(getpid(), params->sched_param); + if (retval) { + /* __set_sched_attr prints an error message, so */ + exit(0); + } + } + + if (params->cgroup_name) { + retval = set_pid_cgroup(gettid(), params->cgroup_name); + if (!retval) { + err_msg("Error setting timerlat u cgroup pid\n"); + pthread_exit(&retval); + } + } + + /* + * This is the tool's loop. If you want to use as base for your own tool... + * go ahead. + */ + snprintf(buffer, sizeof(buffer), "osnoise/per_cpu/cpu%d/timerlat_fd", cpu); + + timerlat_fd = tracefs_instance_file_open(NULL, buffer, O_RDONLY); + if (timerlat_fd < 0) { + err_msg("Error opening %s:%s\n", buffer, strerror(errno)); + exit(1); + } + + debug_msg("User-space timerlat pid %d on cpu %d\n", gettid(), cpu); + + /* add should continue with a signal handler */ + while (true) { + retval = read(timerlat_fd, buffer, 1024); + if (retval < 0) + break; + } + + close(timerlat_fd); + + debug_msg("Leaving timerlat pid %d on cpu %d\n", gettid(), cpu); + exit(0); +} + +/* + * timerlat_u_send_kill - send a kill signal for all processes + * + * Return the number of processes that received the kill. + */ +static int timerlat_u_send_kill(pid_t *procs, int nr_cpus) +{ + int killed = 0; + int i, retval; + + for (i = 0; i < nr_cpus; i++) { + if (!procs[i]) + continue; + retval = kill(procs[i], SIGKILL); + if (!retval) + killed++; + else + err_msg("Error killing child process %d\n", procs[i]); + } + + return killed; +} + +/** + * timerlat_u_dispatcher - dispatch one timerlatu/ process per monitored CPU + * + * This is a thread main that will fork one new process for each monitored + * CPU. It will wait for: + * + * - rtla to tell to kill the child processes + * - some child process to die, and the cleanup all the processes + * + * whichever comes first. + * + */ +void *timerlat_u_dispatcher(void *data) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + struct timerlat_u_params *params = data; + char proc_name[128]; + int procs_count = 0; + int retval = 1; + pid_t *procs; + int wstatus; + pid_t pid; + int i; + + debug_msg("Dispatching timerlat u procs\n"); + + procs = calloc(nr_cpus, sizeof(pid_t)); + if (!procs) + pthread_exit(&retval); + + for (i = 0; i < nr_cpus; i++) { + if (params->set && !CPU_ISSET(i, params->set)) + continue; + + pid = fork(); + + /* child */ + if (!pid) { + + /* + * rename the process + */ + snprintf(proc_name, sizeof(proc_name), "timerlatu/%d", i); + pthread_setname_np(pthread_self(), proc_name); + prctl(PR_SET_NAME, (unsigned long)proc_name, 0, 0, 0); + + timerlat_u_main(i, params); + /* timerlat_u_main should exit()! Anyways... */ + pthread_exit(&retval); + } + + /* parent */ + if (pid == -1) { + timerlat_u_send_kill(procs, nr_cpus); + debug_msg("Failed to create child processes"); + pthread_exit(&retval); + } + + procs_count++; + procs[i] = pid; + } + + while (params->should_run) { + /* check if processes died */ + pid = waitpid(-1, &wstatus, WNOHANG); + if (pid != 0) { + for (i = 0; i < nr_cpus; i++) { + if (procs[i] == pid) { + procs[i] = 0; + procs_count--; + } + } + break; + } + + sleep(1); + } + + timerlat_u_send_kill(procs, nr_cpus); + + while (procs_count) { + pid = waitpid(-1, &wstatus, 0); + if (pid == -1) { + err_msg("Failed to monitor child processes"); + pthread_exit(&retval); + } + for (i = 0; i < nr_cpus; i++) { + if (procs[i] == pid) { + procs[i] = 0; + procs_count--; + } + } + } + + params->stopped_running = 1; + + free(procs); + retval = 0; + pthread_exit(&retval); + +} diff --git a/tools/tracing/rtla/src/timerlat_u.h b/tools/tracing/rtla/src/timerlat_u.h new file mode 100644 index 000000000000..661511908957 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_u.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +struct timerlat_u_params { + /* timerlat -> timerlat_u: user-space threads can keep running */ + int should_run; + /* timerlat_u -> timerlat: all timerlat_u threads left, no reason to continue */ + int stopped_running; + + /* threads config */ + cpu_set_t *set; + char *cgroup_name; + struct sched_attr *sched_param; +}; + +void *timerlat_u_dispatcher(void *data); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 663a047f794d..623a38908ed5 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -3,6 +3,7 @@ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> */ +#define _GNU_SOURCE #include <dirent.h> #include <stdarg.h> #include <stdlib.h> @@ -88,27 +89,24 @@ void get_duration(time_t start_time, char *output, int output_size) } /* - * parse_cpu_list - parse a cpu_list filling a char vector with cpus set + * parse_cpu_set - parse a cpu_list filling cpu_set_t argument * - * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char - * in the monitored_cpus. + * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set + * filling cpu_set_t argument. * - * XXX: convert to a bitmask. + * Returns 1 on success, 0 otherwise. */ -int parse_cpu_list(char *cpu_list, char **monitored_cpus) +int parse_cpu_set(char *cpu_list, cpu_set_t *set) { - char *mon_cpus; const char *p; int end_cpu; int nr_cpus; int cpu; int i; - nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + CPU_ZERO(set); - mon_cpus = calloc(nr_cpus, sizeof(char)); - if (!mon_cpus) - goto err; + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); for (p = cpu_list; *p; ) { cpu = atoi(p); @@ -128,12 +126,12 @@ int parse_cpu_list(char *cpu_list, char **monitored_cpus) end_cpu = cpu; if (cpu == end_cpu) { - debug_msg("cpu_list: adding cpu %d\n", cpu); - mon_cpus[cpu] = 1; + debug_msg("cpu_set: adding cpu %d\n", cpu); + CPU_SET(cpu, set); } else { for (i = cpu; i <= end_cpu; i++) { - debug_msg("cpu_list: adding cpu %d\n", i); - mon_cpus[i] = 1; + debug_msg("cpu_set: adding cpu %d\n", i); + CPU_SET(i, set); } } @@ -141,12 +139,9 @@ int parse_cpu_list(char *cpu_list, char **monitored_cpus) p++; } - *monitored_cpus = mon_cpus; - return 0; - err: - debug_msg("Error parsing the cpu list %s", cpu_list); + debug_msg("Error parsing the cpu set %s\n", cpu_list); return 1; } @@ -529,3 +524,296 @@ int set_cpu_dma_latency(int32_t latency) return fd; } + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * find_mount - find a the mount point of a given fs + * + * Returns 0 if mount is not found, otherwise return 1 and fill mp + * with the mount point. + */ +static const int find_mount(const char *fs, char *mp, int sizeof_mp) +{ + char mount_point[MAX_PATH]; + char type[100]; + int found; + FILE *fp; + + fp = fopen("/proc/mounts", "r"); + if (!fp) + return 0; + + while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) { + if (strcmp(type, fs) == 0) { + found = 1; + break; + } + } + fclose(fp); + + if (!found) + return 0; + + memset(mp, 0, sizeof_mp); + strncpy(mp, mount_point, sizeof_mp - 1); + + debug_msg("Fs %s found at %s\n", fs, mp); + return 1; +} + +/* + * get_self_cgroup - get the current thread cgroup path + * + * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse: + * + * 0::/user.slice/user-0.slice/session-3.scope'\n' + * + * This function is interested in the content after the second : and before the '\n'. + * + * Returns 1 if a string was found, 0 otherwise. + */ +static int get_self_cgroup(char *self_cg, int sizeof_self_cg) +{ + char path[MAX_PATH], *start; + int fd, retval; + + snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid()); + + fd = open(path, O_RDONLY); + if (fd < 0) + return 0; + + retval = read(fd, path, MAX_PATH); + + close(fd); + + if (retval <= 0) + return 0; + + start = path; + + start = strstr(start, ":"); + if (!start) + return 0; + + /* skip ":" */ + start++; + + start = strstr(start, ":"); + if (!start) + return 0; + + /* skip ":" */ + start++; + + if (strlen(start) >= sizeof_self_cg) + return 0; + + snprintf(self_cg, sizeof_self_cg, "%s", start); + + /* Swap '\n' with '\0' */ + start = strstr(self_cg, "\n"); + + /* there must be '\n' */ + if (!start) + return 0; + + /* ok, it found a string after the second : and before the \n */ + *start = '\0'; + + return 1; +} + +/* + * set_comm_cgroup - Set cgroup to pid_t pid + * + * If cgroup argument is not NULL, the threads will move to the given cgroup. + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. + * + * Supports cgroup v2. + * + * Returns 1 on success, 0 otherwise. + */ +int set_pid_cgroup(pid_t pid, const char *cgroup) +{ + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; + char cgroup_procs[MAX_PATH]; + char pid_str[24]; + int retval; + int cg_fd; + + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); + if (!retval) { + err_msg("Did not find cgroupv2 mount point\n"); + return 0; + } + + if (!cgroup) { + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path)); + if (!retval) { + err_msg("Did not find self cgroup\n"); + return 0; + } + } else { + snprintf(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); + } + + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); + + debug_msg("Using cgroup path at: %s\n", cgroup_procs); + + cg_fd = open(cgroup_procs, O_RDWR); + if (cg_fd < 0) + return 0; + + snprintf(pid_str, sizeof(pid_str), "%d\n", pid); + + retval = write(cg_fd, pid_str, strlen(pid_str)); + if (retval < 0) + err_msg("Error setting cgroup attributes for pid:%s - %s\n", + pid_str, strerror(errno)); + else + debug_msg("Set cgroup attributes for pid:%s\n", pid_str); + + close(cg_fd); + + return (retval >= 0); +} + +/** + * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix + * + * If cgroup argument is not NULL, the threads will move to the given cgroup. + * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used. + * + * Supports cgroup v2. + * + * Returns 1 on success, 0 otherwise. + */ +int set_comm_cgroup(const char *comm_prefix, const char *cgroup) +{ + char cgroup_path[MAX_PATH - strlen("/cgroup.procs")]; + char cgroup_procs[MAX_PATH]; + struct dirent *proc_entry; + DIR *procfs; + int retval; + int cg_fd; + + if (strlen(comm_prefix) >= MAX_PATH) { + err_msg("Command prefix is too long: %d < strlen(%s)\n", + MAX_PATH, comm_prefix); + return 0; + } + + retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path)); + if (!retval) { + err_msg("Did not find cgroupv2 mount point\n"); + return 0; + } + + if (!cgroup) { + retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path)); + if (!retval) { + err_msg("Did not find self cgroup\n"); + return 0; + } + } else { + snprintf(&cgroup_path[strlen(cgroup_path)], + sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup); + } + + snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path); + + debug_msg("Using cgroup path at: %s\n", cgroup_procs); + + cg_fd = open(cgroup_procs, O_RDWR); + if (cg_fd < 0) + return 0; + + procfs = opendir("/proc"); + if (!procfs) { + err_msg("Could not open procfs\n"); + goto out_cg; + } + + while ((proc_entry = readdir(procfs))) { + + retval = procfs_is_workload_pid(comm_prefix, proc_entry); + if (!retval) + continue; + + retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name)); + if (retval < 0) { + err_msg("Error setting cgroup attributes for pid:%s - %s\n", + proc_entry->d_name, strerror(errno)); + goto out_procfs; + } + + debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name); + } + + closedir(procfs); + close(cg_fd); + return 1; + +out_procfs: + closedir(procfs); +out_cg: + close(cg_fd); + return 0; +} + +/** + * auto_house_keeping - Automatically move rtla out of measurement threads + * + * Try to move rtla away from the tracer, if possible. + * + * Returns 1 on success, 0 otherwise. + */ +int auto_house_keeping(cpu_set_t *monitored_cpus) +{ + cpu_set_t rtla_cpus, house_keeping_cpus; + int retval; + + /* first get the CPUs in which rtla can actually run. */ + retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus); + if (retval == -1) { + debug_msg("Could not get rtla affinity, rtla might run with the threads!\n"); + return 0; + } + + /* then check if the existing setup is already good. */ + CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus); + if (!CPU_COUNT(&house_keeping_cpus)) { + debug_msg("rtla and the monitored CPUs do not share CPUs."); + debug_msg("Skipping auto house-keeping\n"); + return 1; + } + + /* remove the intersection */ + CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus); + + /* get only those that rtla can run */ + CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus); + + /* is there any cpu left? */ + if (!CPU_COUNT(&house_keeping_cpus)) { + debug_msg("Could not find any CPU for auto house-keeping\n"); + return 0; + } + + retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus); + if (retval == -1) { + debug_msg("Could not set affinity for auto house-keeping\n"); + return 0; + } + + debug_msg("rtla automatically moved to an auto house-keeping cpu set\n"); + + return 1; +} diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index 90e4f52a030b..04ed1e650495 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 + #include <stdint.h> #include <time.h> +#include <sched.h> /* * '18446744073709551615\0' @@ -54,8 +56,13 @@ struct sched_attr { }; int parse_prio(char *arg, struct sched_attr *sched_param); +int parse_cpu_set(char *cpu_list, cpu_set_t *set); +int __set_sched_attr(int pid, struct sched_attr *attr); int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); +int set_comm_cgroup(const char *comm_prefix, const char *cgroup); +int set_pid_cgroup(pid_t pid, const char *cgroup); int set_cpu_dma_latency(int32_t latency); +int auto_house_keeping(cpu_set_t *monitored_cpus); #define ns_to_usf(x) (((double)x/1000)) #define ns_to_per(total, part) ((part * 100) / (double)total) |