perf tools: Provide mutex wrappers for pthreads rwlocks

Andi reported a performance drop in single threaded perf tools such as
'perf script' due to the growing number of locks being put in place to
allow for multithreaded tools, so wrap the POSIX threads rwlock routines
with the names used for such kinds of locks in the Linux kernel and then
allow for tools to ask for those locks to be used or not.

I.e. a tool may have a multithreaded phase and then switch to single
threaded, like the upcoming patches for the synthesizing of
PERF_RECORD_{FORK,MMAP,etc} for pre-existing processes to then switch to
single threaded mode in 'perf top'.

The init routines will not be conditional, this way starting as single
threaded to then move to multi threaded mode should be possible.

Reported-by: Andi Kleen <ak@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/20170404161739.GH12903@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Arnaldo Carvalho de Melo 2017-04-04 13:15:04 -03:00
parent 6ae8eefc6c
commit 0a7c74eae3
18 changed files with 130 additions and 58 deletions

View File

@ -34,7 +34,6 @@
#include <termios.h>
#include <semaphore.h>
#include <signal.h>
#include <pthread.h>
#include <math.h>
static const char *get_filename_for_perf_kvm(void)

View File

@ -2829,6 +2829,8 @@ int cmd_script(int argc, const char **argv)
NULL
};
perf_set_singlethreaded();
setup_scripting();
argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,

View File

@ -79,6 +79,7 @@ libperf-y += data.o
libperf-y += tsc.o
libperf-y += cloexec.o
libperf-y += call-path.o
libperf-y += rwsem.o
libperf-y += thread-stack.o
libperf-$(CONFIG_AUXTRACE) += auxtrace.o
libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/

View File

@ -1366,9 +1366,9 @@ void __dsos__add(struct dsos *dsos, struct dso *dso)
void dsos__add(struct dsos *dsos, struct dso *dso)
{
pthread_rwlock_wrlock(&dsos->lock);
down_write(&dsos->lock);
__dsos__add(dsos, dso);
pthread_rwlock_unlock(&dsos->lock);
up_write(&dsos->lock);
}
struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
@ -1387,9 +1387,9 @@ struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
{
struct dso *dso;
pthread_rwlock_rdlock(&dsos->lock);
down_read(&dsos->lock);
dso = __dsos__find(dsos, name, cmp_short);
pthread_rwlock_unlock(&dsos->lock);
up_read(&dsos->lock);
return dso;
}
@ -1416,9 +1416,9 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
struct dso *dsos__findnew(struct dsos *dsos, const char *name)
{
struct dso *dso;
pthread_rwlock_wrlock(&dsos->lock);
down_write(&dsos->lock);
dso = dso__get(__dsos__findnew(dsos, name));
pthread_rwlock_unlock(&dsos->lock);
up_write(&dsos->lock);
return dso;
}

View File

@ -6,7 +6,7 @@
#include <linux/rbtree.h>
#include <sys/types.h>
#include <stdbool.h>
#include <pthread.h>
#include "rwsem.h"
#include <linux/types.h>
#include <linux/bitops.h>
#include "map.h"
@ -129,7 +129,7 @@ struct dso_cache {
struct dsos {
struct list_head head;
struct rb_root root; /* rbtree root sorted by long name */
pthread_rwlock_t lock;
struct rw_semaphore lock;
};
struct auxtrace_cache;

View File

@ -30,7 +30,7 @@ static void dsos__init(struct dsos *dsos)
{
INIT_LIST_HEAD(&dsos->head);
dsos->root = RB_ROOT;
pthread_rwlock_init(&dsos->lock, NULL);
init_rwsem(&dsos->lock);
}
static void machine__threads_init(struct machine *machine)
@ -40,7 +40,7 @@ static void machine__threads_init(struct machine *machine)
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
threads->entries = RB_ROOT;
pthread_rwlock_init(&threads->lock, NULL);
init_rwsem(&threads->lock);
threads->nr = 0;
INIT_LIST_HEAD(&threads->dead);
threads->last_match = NULL;
@ -130,7 +130,7 @@ static void dsos__purge(struct dsos *dsos)
{
struct dso *pos, *n;
pthread_rwlock_wrlock(&dsos->lock);
down_write(&dsos->lock);
list_for_each_entry_safe(pos, n, &dsos->head, node) {
RB_CLEAR_NODE(&pos->rb_node);
@ -139,13 +139,13 @@ static void dsos__purge(struct dsos *dsos)
dso__put(pos);
}
pthread_rwlock_unlock(&dsos->lock);
up_write(&dsos->lock);
}
static void dsos__exit(struct dsos *dsos)
{
dsos__purge(dsos);
pthread_rwlock_destroy(&dsos->lock);
exit_rwsem(&dsos->lock);
}
void machine__delete_threads(struct machine *machine)
@ -155,7 +155,7 @@ void machine__delete_threads(struct machine *machine)
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
pthread_rwlock_wrlock(&threads->lock);
down_write(&threads->lock);
nd = rb_first(&threads->entries);
while (nd) {
struct thread *t = rb_entry(nd, struct thread, rb_node);
@ -163,7 +163,7 @@ void machine__delete_threads(struct machine *machine)
nd = rb_next(nd);
__machine__remove_thread(machine, t, false);
}
pthread_rwlock_unlock(&threads->lock);
up_write(&threads->lock);
}
}
@ -180,7 +180,7 @@ void machine__exit(struct machine *machine)
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
pthread_rwlock_destroy(&threads->lock);
exit_rwsem(&threads->lock);
}
}
@ -482,9 +482,9 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
struct threads *threads = machine__threads(machine, tid);
struct thread *th;
pthread_rwlock_wrlock(&threads->lock);
down_write(&threads->lock);
th = __machine__findnew_thread(machine, pid, tid);
pthread_rwlock_unlock(&threads->lock);
up_write(&threads->lock);
return th;
}
@ -494,9 +494,9 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid,
struct threads *threads = machine__threads(machine, tid);
struct thread *th;
pthread_rwlock_rdlock(&threads->lock);
down_read(&threads->lock);
th = ____machine__findnew_thread(machine, threads, pid, tid, false);
pthread_rwlock_unlock(&threads->lock);
up_read(&threads->lock);
return th;
}
@ -588,7 +588,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
{
struct dso *dso;
pthread_rwlock_wrlock(&machine->dsos.lock);
down_write(&machine->dsos.lock);
dso = __dsos__find(&machine->dsos, m->name, true);
if (!dso) {
@ -602,7 +602,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
dso__get(dso);
out_unlock:
pthread_rwlock_unlock(&machine->dsos.lock);
up_write(&machine->dsos.lock);
return dso;
}
@ -749,7 +749,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
pthread_rwlock_rdlock(&threads->lock);
down_read(&threads->lock);
ret = fprintf(fp, "Threads: %u\n", threads->nr);
@ -759,7 +760,7 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
ret += thread__fprintf(pos, fp);
}
pthread_rwlock_unlock(&threads->lock);
up_read(&threads->lock);
}
return ret;
}
@ -1319,7 +1320,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
struct dso *kernel = NULL;
struct dso *dso;
pthread_rwlock_rdlock(&machine->dsos.lock);
down_read(&machine->dsos.lock);
list_for_each_entry(dso, &machine->dsos.head, node) {
@ -1349,7 +1350,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
break;
}
pthread_rwlock_unlock(&machine->dsos.lock);
up_read(&machine->dsos.lock);
if (kernel == NULL)
kernel = machine__findnew_dso(machine, kmmap_prefix);
@ -1513,7 +1514,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
BUG_ON(refcount_read(&th->refcnt) == 0);
if (lock)
pthread_rwlock_wrlock(&threads->lock);
down_write(&threads->lock);
rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
--threads->nr;
@ -1524,7 +1525,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
*/
list_add_tail(&th->node, &threads->dead);
if (lock)
pthread_rwlock_unlock(&threads->lock);
up_write(&threads->lock);
thread__put(th);
}

View File

@ -6,6 +6,7 @@
#include "map.h"
#include "dso.h"
#include "event.h"
#include "rwsem.h"
struct addr_location;
struct branch_stack;
@ -28,7 +29,7 @@ struct vdso_info;
struct threads {
struct rb_root entries;
pthread_rwlock_t lock;
struct rw_semaphore lock;
unsigned int nr;
struct list_head dead;
struct thread *last_match;

View File

@ -488,7 +488,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
static void maps__init(struct maps *maps)
{
maps->entries = RB_ROOT;
pthread_rwlock_init(&maps->lock, NULL);
init_rwsem(&maps->lock);
}
void map_groups__init(struct map_groups *mg, struct machine *machine)
@ -517,9 +517,9 @@ static void __maps__purge(struct maps *maps)
static void maps__exit(struct maps *maps)
{
pthread_rwlock_wrlock(&maps->lock);
down_write(&maps->lock);
__maps__purge(maps);
pthread_rwlock_unlock(&maps->lock);
up_write(&maps->lock);
}
void map_groups__exit(struct map_groups *mg)
@ -586,7 +586,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
struct symbol *sym;
struct rb_node *nd;
pthread_rwlock_rdlock(&maps->lock);
down_read(&maps->lock);
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
@ -602,7 +602,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
sym = NULL;
out:
pthread_rwlock_unlock(&maps->lock);
up_read(&maps->lock);
return sym;
}
@ -638,7 +638,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
size_t printed = 0;
struct rb_node *nd;
pthread_rwlock_rdlock(&maps->lock);
down_read(&maps->lock);
for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
struct map *pos = rb_entry(nd, struct map, rb_node);
@ -650,7 +650,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp)
}
}
pthread_rwlock_unlock(&maps->lock);
up_read(&maps->lock);
return printed;
}
@ -682,7 +682,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
struct rb_node *next;
int err = 0;
pthread_rwlock_wrlock(&maps->lock);
down_write(&maps->lock);
root = &maps->entries;
next = rb_first(root);
@ -750,7 +750,7 @@ put_map:
err = 0;
out:
pthread_rwlock_unlock(&maps->lock);
up_write(&maps->lock);
return err;
}
@ -771,7 +771,7 @@ int map_groups__clone(struct thread *thread,
struct map *map;
struct maps *maps = &parent->maps[type];
pthread_rwlock_rdlock(&maps->lock);
down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
struct map *new = map__clone(map);
@ -788,7 +788,7 @@ int map_groups__clone(struct thread *thread,
err = 0;
out_unlock:
pthread_rwlock_unlock(&maps->lock);
up_read(&maps->lock);
return err;
}
@ -815,9 +815,9 @@ static void __maps__insert(struct maps *maps, struct map *map)
void maps__insert(struct maps *maps, struct map *map)
{
pthread_rwlock_wrlock(&maps->lock);
down_write(&maps->lock);
__maps__insert(maps, map);
pthread_rwlock_unlock(&maps->lock);
up_write(&maps->lock);
}
static void __maps__remove(struct maps *maps, struct map *map)
@ -828,9 +828,9 @@ static void __maps__remove(struct maps *maps, struct map *map)
void maps__remove(struct maps *maps, struct map *map)
{
pthread_rwlock_wrlock(&maps->lock);
down_write(&maps->lock);
__maps__remove(maps, map);
pthread_rwlock_unlock(&maps->lock);
up_write(&maps->lock);
}
struct map *maps__find(struct maps *maps, u64 ip)
@ -838,7 +838,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
struct rb_node **p, *parent = NULL;
struct map *m;
pthread_rwlock_rdlock(&maps->lock);
down_read(&maps->lock);
p = &maps->entries.rb_node;
while (*p != NULL) {
@ -854,7 +854,7 @@ struct map *maps__find(struct maps *maps, u64 ip)
m = NULL;
out:
pthread_rwlock_unlock(&maps->lock);
up_read(&maps->lock);
return m;
}

View File

@ -9,6 +9,7 @@
#include <stdio.h>
#include <stdbool.h>
#include <linux/types.h>
#include "rwsem.h"
enum map_type {
MAP__FUNCTION = 0,
@ -61,7 +62,7 @@ struct kmap {
struct maps {
struct rb_root entries;
pthread_rwlock_t lock;
struct rw_semaphore lock;
};
struct map_groups {

32
tools/perf/util/rwsem.c Normal file
View File

@ -0,0 +1,32 @@
#include "util.h"
#include "rwsem.h"
int init_rwsem(struct rw_semaphore *sem)
{
return pthread_rwlock_init(&sem->lock, NULL);
}
int exit_rwsem(struct rw_semaphore *sem)
{
return pthread_rwlock_destroy(&sem->lock);
}
int down_read(struct rw_semaphore *sem)
{
return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
}
int up_read(struct rw_semaphore *sem)
{
return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
}
int down_write(struct rw_semaphore *sem)
{
return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
}
int up_write(struct rw_semaphore *sem)
{
return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
}

19
tools/perf/util/rwsem.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef _PERF_RWSEM_H
#define _PERF_RWSEM_H
#include <pthread.h>
struct rw_semaphore {
pthread_rwlock_t lock;
};
int init_rwsem(struct rw_semaphore *sem);
int exit_rwsem(struct rw_semaphore *sem);
int down_read(struct rw_semaphore *sem);
int up_read(struct rw_semaphore *sem);
int down_write(struct rw_semaphore *sem);
int up_write(struct rw_semaphore *sem);
#endif /* _PERF_RWSEM_H */

View File

@ -226,7 +226,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
struct maps *maps = &mg->maps[type];
struct map *next, *curr;
pthread_rwlock_wrlock(&maps->lock);
down_write(&maps->lock);
curr = maps__first(maps);
if (curr == NULL)
@ -246,7 +246,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
curr->end = ~0ULL;
out_unlock:
pthread_rwlock_unlock(&maps->lock);
up_write(&maps->lock);
}
struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@ -1671,7 +1671,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
struct maps *maps = &mg->maps[type];
struct map *map;
pthread_rwlock_rdlock(&maps->lock);
down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
if (map->dso && strcmp(map->dso->short_name, name) == 0)
@ -1681,7 +1681,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
map = NULL;
out_unlock:
pthread_rwlock_unlock(&maps->lock);
up_read(&maps->lock);
return map;
}

View File

@ -264,7 +264,7 @@ static int __thread__prepare_access(struct thread *thread)
struct maps *maps = &thread->mg->maps[i];
struct map *map;
pthread_rwlock_rdlock(&maps->lock);
down_read(&maps->lock);
for (map = maps__first(maps); map; map = map__next(map)) {
err = unwind__prepare_access(thread, map, &initialized);
@ -272,7 +272,7 @@ static int __thread__prepare_access(struct thread *thread)
break;
}
pthread_rwlock_unlock(&maps->lock);
up_read(&maps->lock);
}
return err;

View File

@ -28,7 +28,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>

View File

@ -27,7 +27,6 @@
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>

View File

@ -23,6 +23,19 @@
/*
* XXX We need to find a better place for these things...
*/
bool perf_singlethreaded = true;
void perf_set_singlethreaded(void)
{
perf_singlethreaded = true;
}
void perf_set_multithreaded(void)
{
perf_singlethreaded = false;
}
unsigned int page_size;
int cacheline_size;

View File

@ -62,4 +62,9 @@ int sched_getcpu(void);
int setns(int fd, int nstype);
#endif
extern bool perf_singlethreaded;
void perf_set_singlethreaded(void);
void perf_set_multithreaded(void);
#endif /* GIT_COMPAT_UTIL_H */

View File

@ -319,7 +319,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
struct vdso_info *vdso_info;
struct dso *dso = NULL;
pthread_rwlock_wrlock(&machine->dsos.lock);
down_write(&machine->dsos.lock);
if (!machine->vdso_info)
machine->vdso_info = vdso_info__new();
@ -347,7 +347,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
out_unlock:
dso__get(dso);
pthread_rwlock_unlock(&machine->dsos.lock);
up_write(&machine->dsos.lock);
return dso;
}