From a4b3ada83d06554d307dd54abdc62b2e5648264a Mon Sep 17 00:00:00 2001 From: Carl Henrik Lunde Date: Fri, 3 Apr 2009 14:27:15 +0200 Subject: blktrace: NUL-terminate user space messages Impact: fix corrupted blkparse output Make sure messages from user space are NUL-terminated strings, otherwise we could dump random memory to the block trace file. Additionally, I've limited the message to BLK_TN_MAX_MSG-1 characters, because the last character would be stripped by vscnprintf anyway. Signed-off-by: Carl Henrik Lunde Cc: Li Zefan Cc: Arnaldo Carvalho de Melo Cc: "Alan D. Brunelle" Cc: Steven Rostedt LKML-Reference: <20090403122714.GT5178@kernel.dk> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 947c5b3f90c4..a400b861fad3 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, char *msg; struct blk_trace *bt; - if (count > BLK_TN_MAX_MSG) + if (count > BLK_TN_MAX_MSG - 1) return -EINVAL; - msg = kmalloc(count, GFP_KERNEL); + msg = kmalloc(count + 1, GFP_KERNEL); if (msg == NULL) return -ENOMEM; @@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, return -EFAULT; } + msg[count] = '\0'; bt = filp->private_data; __trace_note_message(bt, "%s", msg); kfree(msg); -- cgit v1.2.3 From 7635b03adf3d7b84da7649b81efa91e6ebf11b85 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 3 Apr 2009 15:31:34 +0800 Subject: blktrace: small cleanup in blk_msg_write() Signed-off-by: Li Zefan Cc: Arnaldo Carvalho de Melo Cc: "Alan D. Brunelle" Cc: Jens Axboe LKML-Reference: <49D5BB56.7000807@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/trace/blktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index a400b861fad3..73d7860b72e2 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -327,7 +327,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, char *msg; struct blk_trace *bt; - if (count > BLK_TN_MAX_MSG - 1) + if (count >= BLK_TN_MAX_MSG) return -EINVAL; msg = kmalloc(count + 1, GFP_KERNEL); -- cgit v1.2.3 From e2494e1b42ebac402324105d57646489d19e2b01 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 2 Apr 2009 13:43:26 +0800 Subject: blktrace: fix pdu_len when tracing packet command requests Impact: output all of packet commands - not just the first 4 / 8 bytes Since commit d7e3c3249ef23b4617393c69fe464765b4ff1645 ("block: add large command support"), struct request->cmd has been changed from unsinged char cmd[BLK_MAX_CDB] to unsigned char *cmd. v1 -> v2: by: FUJITA Tomonori - make sure rq->cmd_len is always intialized, and then we can use rq->cmd_len instead of BLK_MAX_CDB. Signed-off-by: Li Zefan Acked-by: FUJITA Tomonori Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Jens Axboe LKML-Reference: <49D4507E.2060602@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- block/blk-core.c | 1 + kernel/trace/blktrace.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/block/blk-core.c b/block/blk-core.c index 29bcfac6c688..859879d0a0bf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -132,6 +132,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->cmd = rq->__cmd; + rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; rq->ref_count = 1; } diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 73d7860b72e2..b32ff446c3fb 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -643,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, - sizeof(rq->cmd), rq->cmd); + rq->cmd_len, rq->cmd); } else { what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, -- cgit v1.2.3 From 301fd748e2c81e78e74edbc694a64caa7b95dda2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 3 Apr 2009 11:12:23 -0400 Subject: tracing: remove CALLER_ADDR2 from wakeup tracer Maneesh Soni was getting a crash when running the wakeup tracer. We debugged it down to the recording of the function with the CALLER_ADDR2 macro. This is used to get the location of the caller to schedule. But the problem comes when schedule is called by assmebly. In the case that Maneesh had, retint_careful would call schedule. But retint_careful does not set up a proper frame pointer. CALLER_ADDR2 is defined as __builtin_return_address(2). This produces the following assembly in the wakeup tracer code. mov 0x0(%rbp),%rcx <--- get the frame pointer of the caller mov %r14d,%r8d mov 0xf2de8e(%rip),%rdi mov 0x8(%rcx),%rsi <-- this is __builtin_return_address(1) mov 0x28(%rdi,%rax,8),%rbx mov (%rcx),%rax <-- get the frame pointer of the caller's caller mov %r12,%rcx mov 0x8(%rax),%rdx <-- this is __builtin_return_address(2) At the reading of 0x8(%rax) Maneesh's machine would take a fault. The reason is that retint_careful did not set up the return address and the content of %rax here was zero. To verify this, I sent Maneesh a patch to create a frame pointer in retint_careful. He ran the test again but this time he would take the same type of fault from sysret_careful. The retint_careful was no longer an issue, but there are other callers that still have issues. Instead of adding frame pointers for all callers to schedule (in possibly all archs), it is much safer to simply not use CALLER_ADDR2. This loses out on knowing what called schedule, but the function tracer will help there if needed. Reported-by: Maneesh Soni Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_sched_wakeup.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3c5ad6b2ec84..5bc00e8f153e 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); /* @@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) data = wakeup_trace->data[wakeup_cpu]; data->preempt_timestamp = ftrace_now(cpu); tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); + + /* + * We must be careful in using CALLER_ADDR2. But since wake_up + * is not called by an assembly function (where as schedule is) + * it should be safe to use it here. + */ trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); out_locked: -- cgit v1.2.3 From cf8e3474654f20433aab9aa35826d43b5f245008 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 30 Mar 2009 13:48:00 +0800 Subject: tracing: fix incorrect return type of ns2usecs() Impact: fix time output bug in 32bits system ns2usecs() returns 'long', it's incorrect. (In i386) ... -0 [000] 521.442100: _spin_lock <-tick_do_update_jiffies64 -0 [000] 521.442101: do_timer <-tick_do_update_jiffies64 -0 [000] 521.442102: update_wall_time <-do_timer -0 [000] 521.442102: update_xtime_cache <-update_wall_time .... (It always print the time less than 2200 seconds besides ...) Because 'long' is 32bits in i386. ( (1<<31) useconds is about 2200 seconds) ... -0 [001] 4154502640.134759: rcu_bh_qsctr_inc <-__do_softirq -0 [001] 4154502640.134760: _local_bh_enable <-__do_softirq -0 [001] 4154502640.134761: idle_cpu <-irq_exit ... (very large value) Because 'long' is a signed type and it is 32bits in i386. Changes in v2: return 'unsigned long long' instead of 'cycle_t' Signed-off-by: Lai Jiangshan LKML-Reference: <49D05D10.4030009@cn.fujitsu.com> Reported-by: Li Zefan Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 3 +-- kernel/trace/trace.h | 2 +- kernel/trace/trace_output.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0174a40c563..457dd8c97e0d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -147,8 +147,7 @@ static int __init set_ftrace_dump_on_oops(char *str) } __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); -long -ns2usecs(cycle_t nsec) +unsigned long long ns2usecs(cycle_t nsec) { nsec += 500; do_div(nsec, 1000); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cb0ce3fc36d3..0d81a4a2a4a5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -596,7 +596,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace, #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); -extern long ns2usecs(cycle_t nsec); +extern unsigned long long ns2usecs(cycle_t nsec); extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args); extern int diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d72b9a63b247..64b54a59c55b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" + ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" " %ld.%03ldms (+%ld.%03ldms): ", comm, entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx, -- cgit v1.2.3 From 5f0c6c03c5fee91c02c696bc9bf4c0d41392abe7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Mar 2009 14:22:10 +0100 Subject: tracing/ftrace: fix missing include string.h Building a kernel with tracing can raise the following warning on tip/master: kernel/trace/trace.c:1249: error: implicit declaration of function 'vbin_printf' We are missing an include to string.h Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker LKML-Reference: <1238160130-7437-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 457dd8c97e0d..2230b46f9e1c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 8bcae09b93e7f96f700b6bb372c2b3f2b36636dc Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Tue, 31 Mar 2009 15:24:51 +0800 Subject: ftrace: Add check of sched_stopped for probe_sched_wakeup The wakeup tracing in sched_switch does not stop when a user disables tracing. This is because the probe_sched_wakeup() is missing the check to prevent the wakeup from being traced. Signed-off-by: Zhao Lei LKML-Reference: <49D1C543.3010307@cn.fujitsu.com> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_sched_switch.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index de35f200abd3..9117cea6f1ae 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) pc = preempt_count(); tracing_record_cmdline(current); + if (sched_stopped) + return; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = ctx_trace->data[cpu]; -- cgit v1.2.3 From b0dfa978c7a1699fb3506fbfcba0b6a5c4bd17ae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 1 Apr 2009 22:53:08 +0200 Subject: tracing/ftrace: alloc the started cpumask for the trace file Impact: fix a crash while cat trace file Currently we are using a cpumask to remind each cpu where a trace occured. It lets us notice the user that a cpu just had its first trace. But on latest -tip we have the following crash once we cat the trace file: IP: [] print_trace_fmt+0x45/0xe7 *pde = 00000000 Oops: 0000 [#1] PREEMPT SMP last sysfs file: /sys/class/net/eth0/carrier Pid: 3897, comm: cat Not tainted (2.6.29-tip-02825-g0f22972-dirty #81) EIP: 0060:[] EFLAGS: 00010297 CPU: 0 EIP is at print_trace_fmt+0x45/0xe7 EAX: 00000000 EBX: 00000000 ECX: c12d9e98 EDX: ccdb7010 ESI: d31f4000 EDI: 00322401 EBP: d31f3f10 ESP: d31f3efc DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process cat (pid: 3897, ti=d31f2000 task=d3b3cf20 task.ti=d31f2000) Stack: d31f4080 ccdb7010 d31f4000 d691fe70 ccdb7010 d31f3f24 c0270e5c d31f4000 d691fe70 d31f4000 d31f3f34 c02718e8 c12d9e98 d691fe70 d31f3f70 c02bfc33 00001000 09130000 d3b46e00 d691fe98 00000000 00000079 00000001 00000000 Call Trace: [] ? print_trace_line+0x170/0x17c [] ? s_show+0xa7/0xbd [] ? seq_read+0x24a/0x327 [] ? seq_read+0x0/0x327 [] ? vfs_read+0x86/0xe1 [] ? sys_read+0x40/0x65 [] ? sysenter_do_call+0x12/0x3c Code: 00 00 00 89 45 ec f7 c7 00 20 00 00 89 55 f0 74 4e f6 86 98 10 00 00 02 74 45 8b 86 8c 10 00 00 8b 9e a8 10 00 00 e8 52 f3 ff ff <0f> a3 03 19 c0 85 c0 75 2b 8b 86 8c 10 00 00 8b 9e a8 10 00 00 EIP: [] print_trace_fmt+0x45/0xe7 SS:ESP 0068:d31f3efc CR2: 0000000000000000 ---[ end trace aa9cf38e5ebed9dd ]--- This is because we alloc the iter->started cpumask on tracing_pipe_open but not on tracing_open. It hadn't been noticed until now because we need to have ring buffer overruns to activate the starting of cpu buffer detection. Also, we need a check to not print the messagge for the first trace on the file. Signed-off-by: Frederic Weisbecker LKML-Reference: <1238619188-6109-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2230b46f9e1c..fc8c7d66832b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter) return; cpumask_set_cpu(iter->cpu, iter->started); - trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); + + /* Don't print started cpu buffer for the first entry of the trace */ + if (iter->idx > 1) + trace_seq_printf(s, "##### CPU %u buffer started ####\n", + iter->cpu); } static enum print_line_t print_trace_fmt(struct trace_iterator *iter) @@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file) if (current_trace) *iter->trace = *current_trace; + if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) + goto fail; + + cpumask_clear(iter->started); + if (current_trace && current_trace->print_max) iter->tr = &max_tr; else @@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file) if (iter->buffer_iter[cpu]) ring_buffer_read_finish(iter->buffer_iter[cpu]); } + free_cpumask_var(iter->started); fail: mutex_unlock(&trace_types_lock); kfree(iter->trace); @@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file) seq_release(inode, file); mutex_destroy(&iter->mutex); + free_cpumask_var(iter->started); kfree(iter->trace); kfree(iter); return 0; -- cgit v1.2.3 From bc2b6871c17b3aff79fb14e1a1c06c5f5a187f76 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Mon, 23 Mar 2009 11:58:31 +0530 Subject: Update /debug/tracing/README Some of the tracers have been renamed, which was not updated in the in-kernel run-time README file. Update it. Signed-off-by: Nikanth Karthikesan LKML-Reference: <200903231158.32151.knikanth@suse.de> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fc8c7d66832b..9d28476a9851 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2369,9 +2369,9 @@ static const char readme_msg[] = "# mkdir /debug\n" "# mount -t debugfs nodev /debug\n\n" "# cat /debug/tracing/available_tracers\n" - "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" + "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" "# cat /debug/tracing/current_tracer\n" - "none\n" + "nop\n" "# echo sched_switch > /debug/tracing/current_tracer\n" "# cat /debug/tracing/current_tracer\n" "sched_switch\n" -- cgit v1.2.3 From 1bbe2a83ab68e5cf8c66c372c7cb3b51910c2cfe Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 3 Apr 2009 18:24:46 +0800 Subject: ftrace: Correct a text align for event format output If we cat debugfs/tracing/events/ftrace/bprint/format, we'll see: name: bprint ID: 6 format: field:unsigned char common_type; offset:0; size:1; field:unsigned char common_flags; offset:1; size:1; field:unsigned char common_preempt_count; offset:2; size:1; field:int common_pid; offset:4; size:4; field:int common_tgid; offset:8; size:4; field:unsigned long ip; offset:12; size:4; field:char * fmt; offset:16; size:4; field: char buf; offset:20; size:0; print fmt: "%08lx (%d) fmt:%p %s" There is an inconsistent blank before char buf. Signed-off-by: Zhao Lei LKML-Reference: <49D5E3EE.70201@cn.fujitsu.com> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace_export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4d9952d3df50..07a22c33ebf3 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -40,7 +40,7 @@ #undef TRACE_FIELD_ZERO_CHAR #define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ + ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ "offset:%u;\tsize:0;\n", \ (unsigned int)offsetof(typeof(field), item)); \ if (!ret) \ -- cgit v1.2.3