Tracing fixes:

- Fix polling to block on watermark like the reads do, as user space
   applications get confused when the select says read is available, and then
   the read blocks.
 
 - Fix accounting of ring buffer dropped pages as it is what is used to
   determine if the buffer is empty or not.
 
 - Fix memory leak in tracing_read_pipe()
 
 - Fix struct trace_array warning about being declared in parameters
 
 - Fix accounting of ftrace pages used in output at start up.
 
 - Fix allocation of dyn_ftrace pages by subtracting one from order instead of
   diving it by 2
 
 - Static analyzer found a case were a pointer being used outside of a NULL
   check. (rb_head_page_deactivate())
 
 - Fix possible NULL pointer dereference if kstrdup() fails in ftrace_add_mod()
 
 - Fix memory leak in test_gen_synth_cmd() and test_empty_synth_event()
 
 - Fix bad pointer dereference in register_synth_event() on error path.
 
 - Remove unused __bad_type_size() method
 
 - Fix possible NULL pointer dereference of entry in list 'tr->err_log'
 
 - Fix NULL pointer deference race if eprobe is called before the event setup
 -----BEGIN PGP SIGNATURE-----
 
 iIoEABYIADIWIQRRSw7ePDh/lE+zeZMp5XQQmuv6qgUCY3qNNBQccm9zdGVkdEBn
 b29kbWlzLm9yZwAKCRAp5XQQmuv6qiVzAP9vdtLkseOueVqPJ/Wc6v3z0xlkxO4L
 Aj9jOac822SPOQEAvUJ1DM1bxm/D2BY5AQsfgSGjdaVYP+I3kvETNgWspQI=
 =3ta3
 -----END PGP SIGNATURE-----

Merge tag 'trace-v6.1-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull tracing fixes from Steven Rostedt:

 - Fix polling to block on watermark like the reads do, as user space
   applications get confused when the select says read is available, and
   then the read blocks

 - Fix accounting of ring buffer dropped pages as it is what is used to
   determine if the buffer is empty or not

 - Fix memory leak in tracing_read_pipe()

 - Fix struct trace_array warning about being declared in parameters

 - Fix accounting of ftrace pages used in output at start up.

 - Fix allocation of dyn_ftrace pages by subtracting one from order
   instead of diving it by 2

 - Static analyzer found a case were a pointer being used outside of a
   NULL check (rb_head_page_deactivate())

 - Fix possible NULL pointer dereference if kstrdup() fails in
   ftrace_add_mod()

 - Fix memory leak in test_gen_synth_cmd() and test_empty_synth_event()

 - Fix bad pointer dereference in register_synth_event() on error path

 - Remove unused __bad_type_size() method

 - Fix possible NULL pointer dereference of entry in list 'tr->err_log'

 - Fix NULL pointer deference race if eprobe is called before the event
   setup

* tag 'trace-v6.1-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  tracing: Fix race where eprobes can be called before the event
  tracing: Fix potential null-pointer-access of entry in list 'tr->err_log'
  tracing: Remove unused __bad_type_size() method
  tracing: Fix wild-memory-access in register_synth_event()
  tracing: Fix memory leak in test_gen_synth_cmd() and test_empty_synth_event()
  ftrace: Fix null pointer dereference in ftrace_add_mod()
  ring_buffer: Do not deactivate non-existant pages
  ftrace: Optimize the allocation for mcount entries
  ftrace: Fix the possible incorrect kernel message
  tracing: Fix warning on variable 'struct trace_array'
  tracing: Fix memory leak in tracing_read_pipe()
  ring-buffer: Include dropped pages in counting dirty patches
  tracing/ring-buffer: Have polling block on watermark
This commit is contained in:
Linus Torvalds 2022-11-20 15:25:32 -08:00
commit 5239ddeb48
9 changed files with 74 additions and 46 deletions

View File

@ -100,7 +100,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table);
struct file *filp, poll_table *poll_table, int full);
void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
#define RING_BUFFER_ALL_CPUS -1

View File

@ -26,13 +26,13 @@ struct trace_export {
int flags;
};
struct trace_array;
#ifdef CONFIG_TRACING
int register_ftrace_export(struct trace_export *export);
int unregister_ftrace_export(struct trace_export *export);
struct trace_array;
void trace_printk_init_buffers(void);
__printf(3, 4)
int trace_array_printk(struct trace_array *tr, unsigned long ip,

View File

@ -1289,6 +1289,7 @@ static int ftrace_add_mod(struct trace_array *tr,
if (!ftrace_mod)
return -ENOMEM;
INIT_LIST_HEAD(&ftrace_mod->list);
ftrace_mod->func = kstrdup(func, GFP_KERNEL);
ftrace_mod->module = kstrdup(module, GFP_KERNEL);
ftrace_mod->enable = enable;
@ -3190,7 +3191,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
/* if we can't allocate this size, try something smaller */
if (!order)
return -ENOMEM;
order >>= 1;
order--;
goto again;
}
@ -7391,7 +7392,7 @@ void __init ftrace_init(void)
}
pr_info("ftrace: allocating %ld entries in %ld pages\n",
count, count / ENTRIES_PER_PAGE + 1);
count, DIV_ROUND_UP(count, ENTRIES_PER_PAGE));
ret = ftrace_process_locs(NULL,
__start_mcount_loc,

View File

@ -519,6 +519,7 @@ struct ring_buffer_per_cpu {
local_t committing;
local_t commits;
local_t pages_touched;
local_t pages_lost;
local_t pages_read;
long last_pages_touch;
size_t shortest_full;
@ -894,10 +895,18 @@ size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu)
size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
{
size_t read;
size_t lost;
size_t cnt;
read = local_read(&buffer->buffers[cpu]->pages_read);
lost = local_read(&buffer->buffers[cpu]->pages_lost);
cnt = local_read(&buffer->buffers[cpu]->pages_touched);
if (WARN_ON_ONCE(cnt < lost))
return 0;
cnt -= lost;
/* The reader can read an empty page, but not more than that */
if (cnt < read) {
WARN_ON_ONCE(read > cnt + 1);
@ -907,6 +916,21 @@ size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu)
return cnt - read;
}
static __always_inline bool full_hit(struct trace_buffer *buffer, int cpu, int full)
{
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
size_t nr_pages;
size_t dirty;
nr_pages = cpu_buffer->nr_pages;
if (!nr_pages || !full)
return true;
dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
return (dirty * 100) > (full * nr_pages);
}
/*
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
*
@ -1046,22 +1070,20 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
!ring_buffer_empty_cpu(buffer, cpu)) {
unsigned long flags;
bool pagebusy;
size_t nr_pages;
size_t dirty;
bool done;
if (!full)
break;
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
nr_pages = cpu_buffer->nr_pages;
dirty = ring_buffer_nr_dirty_pages(buffer, cpu);
done = !pagebusy && full_hit(buffer, cpu, full);
if (!cpu_buffer->shortest_full ||
cpu_buffer->shortest_full > full)
cpu_buffer->shortest_full = full;
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
if (!pagebusy &&
(!nr_pages || (dirty * 100) > full * nr_pages))
if (done)
break;
}
@ -1087,6 +1109,7 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* @cpu: the cpu buffer to wait on
* @filp: the file descriptor
* @poll_table: The poll descriptor
* @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
*
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
* as data is added to any of the @buffer's cpu buffers. Otherwise
@ -1096,14 +1119,15 @@ int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
* zero otherwise.
*/
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table)
struct file *filp, poll_table *poll_table, int full)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct rb_irq_work *work;
if (cpu == RING_BUFFER_ALL_CPUS)
if (cpu == RING_BUFFER_ALL_CPUS) {
work = &buffer->irq_work;
else {
full = 0;
} else {
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return -EINVAL;
@ -1111,8 +1135,14 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
work = &cpu_buffer->irq_work;
}
poll_wait(filp, &work->waiters, poll_table);
work->waiters_pending = true;
if (full) {
poll_wait(filp, &work->full_waiters, poll_table);
work->full_waiters_pending = true;
} else {
poll_wait(filp, &work->waiters, poll_table);
work->waiters_pending = true;
}
/*
* There's a tight race between setting the waiters_pending and
* checking if the ring buffer is empty. Once the waiters_pending bit
@ -1128,6 +1158,9 @@ __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
*/
smp_mb();
if (full)
return full_hit(buffer, cpu, full) ? EPOLLIN | EPOLLRDNORM : 0;
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
return EPOLLIN | EPOLLRDNORM;
@ -1769,9 +1802,9 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
free_buffer_page(cpu_buffer->reader_page);
rb_head_page_deactivate(cpu_buffer);
if (head) {
rb_head_page_deactivate(cpu_buffer);
list_for_each_entry_safe(bpage, tmp, head, list) {
list_del_init(&bpage->list);
free_buffer_page(bpage);
@ -2007,6 +2040,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
*/
local_add(page_entries, &cpu_buffer->overrun);
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
}
/*
@ -2491,6 +2525,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
*/
local_add(entries, &cpu_buffer->overrun);
local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
/*
* The entries will be zeroed out when we move the
@ -3155,10 +3190,6 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
static __always_inline void
rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
{
size_t nr_pages;
size_t dirty;
size_t full;
if (buffer->irq_work.waiters_pending) {
buffer->irq_work.waiters_pending = false;
/* irq_work_queue() supplies it's own memory barriers */
@ -3182,10 +3213,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->last_pages_touch = local_read(&cpu_buffer->pages_touched);
full = cpu_buffer->shortest_full;
nr_pages = cpu_buffer->nr_pages;
dirty = ring_buffer_nr_dirty_pages(buffer, cpu_buffer->cpu);
if (full && nr_pages && (dirty * 100) <= full * nr_pages)
if (!full_hit(buffer, cpu_buffer->cpu, cpu_buffer->shortest_full))
return;
cpu_buffer->irq_work.wakeup_full = true;
@ -5248,6 +5276,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
local_set(&cpu_buffer->pages_touched, 0);
local_set(&cpu_buffer->pages_lost, 0);
local_set(&cpu_buffer->pages_read, 0);
cpu_buffer->last_pages_touch = 0;
cpu_buffer->shortest_full = 0;

View File

@ -120,15 +120,13 @@ static int __init test_gen_synth_cmd(void)
/* Now generate a gen_synth_test event */
ret = synth_event_trace_array(gen_synth_test, vals, ARRAY_SIZE(vals));
out:
free:
kfree(buf);
return ret;
delete:
/* We got an error after creating the event, delete it */
synth_event_delete("gen_synth_test");
free:
kfree(buf);
goto out;
goto free;
}
/*
@ -227,15 +225,13 @@ static int __init test_empty_synth_event(void)
/* Now trace an empty_synth_test event */
ret = synth_event_trace_array(empty_synth_test, vals, ARRAY_SIZE(vals));
out:
free:
kfree(buf);
return ret;
delete:
/* We got an error after creating the event, delete it */
synth_event_delete("empty_synth_test");
free:
kfree(buf);
goto out;
goto free;
}
static struct synth_field_desc create_synth_test_fields[] = {

View File

@ -6657,6 +6657,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
mutex_unlock(&trace_types_lock);
free_cpumask_var(iter->started);
kfree(iter->fmt);
mutex_destroy(&iter->mutex);
kfree(iter);
@ -6681,7 +6682,7 @@ trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_tabl
return EPOLLIN | EPOLLRDNORM;
else
return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
filp, poll_table);
filp, poll_table, iter->tr->buffer_percent);
}
static __poll_t
@ -7802,6 +7803,7 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
int len)
{
struct tracing_log_err *err;
char *cmd;
if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
err = alloc_tracing_log_err(len);
@ -7810,12 +7812,12 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
return err;
}
cmd = kzalloc(len, GFP_KERNEL);
if (!cmd)
return ERR_PTR(-ENOMEM);
err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
kfree(err->cmd);
err->cmd = kzalloc(len, GFP_KERNEL);
if (!err->cmd)
return ERR_PTR(-ENOMEM);
err->cmd = cmd;
list_del(&err->list);
return err;

View File

@ -563,6 +563,9 @@ static void eprobe_trigger_func(struct event_trigger_data *data,
{
struct eprobe_data *edata = data->private_data;
if (unlikely(!rec))
return;
__eprobe_trace_func(edata, rec);
}

View File

@ -828,10 +828,9 @@ static int register_synth_event(struct synth_event *event)
}
ret = set_synth_event_print_fmt(call);
if (ret < 0) {
/* unregister_trace_event() will be called inside */
if (ret < 0)
trace_remove_event_call(call);
goto err;
}
out:
return ret;
err:

View File

@ -201,8 +201,6 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
return trace_handle_return(s);
}
extern char *__bad_type_size(void);
#define SYSCALL_FIELD(_type, _name) { \
.type = #_type, .name = #_name, \
.size = sizeof(_type), .align = __alignof__(_type), \