From e9dbfae53eeb9fc3d4bb7da3df87fa9875f5da02 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 Jul 2011 11:36:06 -0400 Subject: tracing: Fix bug when reading system filters on module removal The event system is freed when its nr_events is set to zero. This happens when a module created an event system and then later the module is removed. Modules may share systems, so the system is allocated when it is created and freed when the modules are unloaded and all the events under the system are removed (nr_events set to zero). The problem arises when a task opened the "filter" file for the system. If the module is unloaded and it removed the last event for that system, the system structure is freed. If the task that opened the filter file accesses the "filter" file after the system has been freed, the system will access an invalid pointer. By adding a ref_count, and using it to keep track of what is using the event system, we can free it after all users are finished with the event system. Cc: Reported-by: Johannes Berg Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 86 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 11 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 686ec399f2a8..ffc5b2884af1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -244,6 +244,35 @@ static void ftrace_clear_events(void) mutex_unlock(&event_mutex); } +static void __put_system(struct event_subsystem *system) +{ + struct event_filter *filter = system->filter; + + WARN_ON_ONCE(system->ref_count == 0); + if (--system->ref_count) + return; + + if (filter) { + kfree(filter->filter_string); + kfree(filter); + } + kfree(system->name); + kfree(system); +} + +static void __get_system(struct event_subsystem *system) +{ + WARN_ON_ONCE(system->ref_count == 0); + system->ref_count++; +} + +static void put_system(struct event_subsystem *system) +{ + mutex_lock(&event_mutex); + __put_system(system); + mutex_unlock(&event_mutex); +} + /* * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events. */ @@ -826,6 +855,47 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static LIST_HEAD(event_subsystems); + +static int subsystem_open(struct inode *inode, struct file *filp) +{ + struct event_subsystem *system = NULL; + int ret; + + /* Make sure the system still exists */ + mutex_lock(&event_mutex); + list_for_each_entry(system, &event_subsystems, list) { + if (system == inode->i_private) { + /* Don't open systems with no events */ + if (!system->nr_events) { + system = NULL; + break; + } + __get_system(system); + break; + } + } + mutex_unlock(&event_mutex); + + if (system != inode->i_private) + return -ENODEV; + + ret = tracing_open_generic(inode, filp); + if (ret < 0) + put_system(system); + + return ret; +} + +static int subsystem_release(struct inode *inode, struct file *file) +{ + struct event_subsystem *system = inode->i_private; + + put_system(system); + + return 0; +} + static ssize_t subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -963,10 +1033,11 @@ static const struct file_operations ftrace_event_filter_fops = { }; static const struct file_operations ftrace_subsystem_filter_fops = { - .open = tracing_open_generic, + .open = subsystem_open, .read = subsystem_filter_read, .write = subsystem_filter_write, .llseek = default_llseek, + .release = subsystem_release, }; static const struct file_operations ftrace_system_enable_fops = { @@ -1002,8 +1073,6 @@ static struct dentry *event_trace_events_dir(void) return d_events; } -static LIST_HEAD(event_subsystems); - static struct dentry * event_subsystem_dir(const char *name, struct dentry *d_events) { @@ -1013,6 +1082,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { if (strcmp(system->name, name) == 0) { + __get_system(system); system->nr_events++; return system->entry; } @@ -1035,6 +1105,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) } system->nr_events = 1; + system->ref_count = 1; system->name = kstrdup(name, GFP_KERNEL); if (!system->name) { debugfs_remove(system->entry); @@ -1184,16 +1255,9 @@ static void remove_subsystem_dir(const char *name) list_for_each_entry(system, &event_subsystems, list) { if (strcmp(system->name, name) == 0) { if (!--system->nr_events) { - struct event_filter *filter = system->filter; - debugfs_remove_recursive(system->entry); list_del(&system->list); - if (filter) { - kfree(filter->filter_string); - kfree(filter); - } - kfree(system->name); - kfree(system); + __put_system(system); } break; } -- cgit v1.2.3 From 40ee4dffff061399eb9358e0c8fcfbaf8de4c8fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 5 Jul 2011 14:32:51 -0400 Subject: tracing: Have "enable" file use refcounts like the "filter" file The "enable" file for the event system can be removed when a module is unloaded and the event system only has events from that module. As the event system nr_events count goes to zero, it may be freed if its ref_count is also set to zero. Like the "filter" file, the "enable" file may be opened by a task and referenced later, after a module has been unloaded and the events for that event system have been removed. Although the "filter" file referenced the event system structure, the "enable" file only references a pointer to the event system name. Since the name is freed when the event system is removed, it is possible that an access to the "enable" file may reference a freed pointer. Update the "enable" file to use the subsystem_open() routine that the "filter" file uses, to keep a reference to the event system structure while the "enable" file is opened. Cc: Reported-by: Johannes Berg Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace_events.c') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ffc5b2884af1..3e2a7c91c548 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -557,7 +557,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { const char set_to_char[4] = { '?', '0', '1', 'X' }; - const char *system = filp->private_data; + struct event_subsystem *system = filp->private_data; struct ftrace_event_call *call; char buf[2]; int set = 0; @@ -568,7 +568,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, if (!call->name || !call->class || !call->class->reg) continue; - if (system && strcmp(call->class->system, system) != 0) + if (system && strcmp(call->class->system, system->name) != 0) continue; /* @@ -598,7 +598,8 @@ static ssize_t system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - const char *system = filp->private_data; + struct event_subsystem *system = filp->private_data; + const char *name = NULL; unsigned long val; char buf[64]; ssize_t ret; @@ -622,7 +623,14 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, if (val != 0 && val != 1) return -EINVAL; - ret = __ftrace_set_clr_event(NULL, system, NULL, val); + /* + * Opening of "enable" adds a ref count to system, + * so the name is safe to use. + */ + if (system) + name = system->name; + + ret = __ftrace_set_clr_event(NULL, name, NULL, val); if (ret) goto out; @@ -862,6 +870,9 @@ static int subsystem_open(struct inode *inode, struct file *filp) struct event_subsystem *system = NULL; int ret; + if (!inode->i_private) + goto skip_search; + /* Make sure the system still exists */ mutex_lock(&event_mutex); list_for_each_entry(system, &event_subsystems, list) { @@ -880,8 +891,9 @@ static int subsystem_open(struct inode *inode, struct file *filp) if (system != inode->i_private) return -ENODEV; + skip_search: ret = tracing_open_generic(inode, filp); - if (ret < 0) + if (ret < 0 && system) put_system(system); return ret; @@ -891,7 +903,8 @@ static int subsystem_release(struct inode *inode, struct file *file) { struct event_subsystem *system = inode->i_private; - put_system(system); + if (system) + put_system(system); return 0; } @@ -1041,10 +1054,11 @@ static const struct file_operations ftrace_subsystem_filter_fops = { }; static const struct file_operations ftrace_system_enable_fops = { - .open = tracing_open_generic, + .open = subsystem_open, .read = system_enable_read, .write = system_enable_write, .llseek = default_llseek, + .release = subsystem_release, }; static const struct file_operations ftrace_show_header_fops = { @@ -1133,8 +1147,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) "'%s/filter' entry\n", name); } - trace_create_file("enable", 0644, system->entry, - (void *)system->name, + trace_create_file("enable", 0644, system->entry, system, &ftrace_system_enable_fops); return system->entry; -- cgit v1.2.3