forked from Minki/linux
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "14 fixes and one selftest to verify the ipc fixes herein" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm: limit boost_watermark on small zones ubsan: disable UBSAN_ALIGNMENT under COMPILE_TEST mm/vmscan: remove unnecessary argument description of isolate_lru_pages() epoll: atomically remove wait entry on wake up kselftests: introduce new epoll60 testcase for catching lost wakeups percpu: make pcpu_alloc() aware of current gfp context mm/slub: fix incorrect interpretation of s->offset scripts/gdb: repair rb_first() and rb_last() eventpoll: fix missing wakeup for ovflist in ep_poll_callback arch/x86/kvm/svm/sev.c: change flag passed to GUP fast in sev_pin_memory() scripts/decodecode: fix trapping instruction formatting kernel/kcov.c: fix typos in kcov_remote_start documentation mm/page_alloc: fix watchdog soft lockups during set_zone_contiguous() mm, memcg: fix error return value of mem_cgroup_css_alloc() ipc/mqueue.c: change __do_notify() to bypass check_kill_permission()
This commit is contained in:
commit
af38553c66
@ -345,7 +345,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
|
||||
return NULL;
|
||||
|
||||
/* Pin the user virtual address. */
|
||||
npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages);
|
||||
npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
|
||||
if (npinned != npages) {
|
||||
pr_err("SEV: Failure locking %lu pages.\n", npages);
|
||||
goto err;
|
||||
|
@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi)
|
||||
{
|
||||
struct eventpoll *ep = epi->ep;
|
||||
|
||||
/* Fast preliminary check */
|
||||
if (epi->next != EP_UNACTIVE_PTR)
|
||||
return false;
|
||||
|
||||
/* Check that the same epi has not been just chained from another CPU */
|
||||
if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
|
||||
return false;
|
||||
@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
|
||||
* chained in ep->ovflist and requeued later on.
|
||||
*/
|
||||
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
|
||||
if (epi->next == EP_UNACTIVE_PTR &&
|
||||
chain_epi_lockless(epi))
|
||||
if (chain_epi_lockless(epi))
|
||||
ep_pm_stay_awake_rcu(epi);
|
||||
} else if (!ep_is_linked(epi)) {
|
||||
/* In the usual case, add event to ready list. */
|
||||
if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
|
||||
ep_pm_stay_awake_rcu(epi);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* If this file is already in the ready list we exit soon */
|
||||
if (!ep_is_linked(epi) &&
|
||||
list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) {
|
||||
ep_pm_stay_awake_rcu(epi);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
|
||||
{
|
||||
int res = 0, eavail, timed_out = 0;
|
||||
u64 slack = 0;
|
||||
bool waiter = false;
|
||||
wait_queue_entry_t wait;
|
||||
ktime_t expires, *to = NULL;
|
||||
|
||||
@ -1867,21 +1866,23 @@ fetch_events:
|
||||
*/
|
||||
ep_reset_busy_poll_napi_id(ep);
|
||||
|
||||
/*
|
||||
* We don't have any available event to return to the caller. We need
|
||||
* to sleep here, and we will be woken by ep_poll_callback() when events
|
||||
* become available.
|
||||
*/
|
||||
if (!waiter) {
|
||||
waiter = true;
|
||||
init_waitqueue_entry(&wait, current);
|
||||
|
||||
do {
|
||||
/*
|
||||
* Internally init_wait() uses autoremove_wake_function(),
|
||||
* thus wait entry is removed from the wait queue on each
|
||||
* wakeup. Why it is important? In case of several waiters
|
||||
* each new wakeup will hit the next waiter, giving it the
|
||||
* chance to harvest new event. Otherwise wakeup can be
|
||||
* lost. This is also good performance-wise, because on
|
||||
* normal wakeup path no need to call __remove_wait_queue()
|
||||
* explicitly, thus ep->lock is not taken, which halts the
|
||||
* event delivery.
|
||||
*/
|
||||
init_wait(&wait);
|
||||
write_lock_irq(&ep->lock);
|
||||
__add_wait_queue_exclusive(&ep->wq, &wait);
|
||||
write_unlock_irq(&ep->lock);
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
/*
|
||||
* We don't want to sleep if the ep_poll_callback() sends us
|
||||
* a wakeup in between. That's why we set the task state
|
||||
@ -1911,10 +1912,20 @@ fetch_events:
|
||||
timed_out = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* We were woken up, thus go and try to harvest some events */
|
||||
eavail = 1;
|
||||
|
||||
} while (0);
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
if (!list_empty_careful(&wait.entry)) {
|
||||
write_lock_irq(&ep->lock);
|
||||
__remove_wait_queue(&ep->wq, &wait);
|
||||
write_unlock_irq(&ep->lock);
|
||||
}
|
||||
|
||||
send_events:
|
||||
/*
|
||||
* Try to transfer events to user space. In case we get 0 events and
|
||||
@ -1925,12 +1936,6 @@ send_events:
|
||||
!(res = ep_send_events(ep, events, maxevents)) && !timed_out)
|
||||
goto fetch_events;
|
||||
|
||||
if (waiter) {
|
||||
write_lock_irq(&ep->lock);
|
||||
__remove_wait_queue(&ep->wq, &wait);
|
||||
write_unlock_irq(&ep->lock);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
34
ipc/mqueue.c
34
ipc/mqueue.c
@ -142,6 +142,7 @@ struct mqueue_inode_info {
|
||||
|
||||
struct sigevent notify;
|
||||
struct pid *notify_owner;
|
||||
u32 notify_self_exec_id;
|
||||
struct user_namespace *notify_user_ns;
|
||||
struct user_struct *user; /* user who created, for accounting */
|
||||
struct sock *notify_sock;
|
||||
@ -773,28 +774,44 @@ static void __do_notify(struct mqueue_inode_info *info)
|
||||
* synchronously. */
|
||||
if (info->notify_owner &&
|
||||
info->attr.mq_curmsgs == 1) {
|
||||
struct kernel_siginfo sig_i;
|
||||
switch (info->notify.sigev_notify) {
|
||||
case SIGEV_NONE:
|
||||
break;
|
||||
case SIGEV_SIGNAL:
|
||||
/* sends signal */
|
||||
case SIGEV_SIGNAL: {
|
||||
struct kernel_siginfo sig_i;
|
||||
struct task_struct *task;
|
||||
|
||||
/* do_mq_notify() accepts sigev_signo == 0, why?? */
|
||||
if (!info->notify.sigev_signo)
|
||||
break;
|
||||
|
||||
clear_siginfo(&sig_i);
|
||||
sig_i.si_signo = info->notify.sigev_signo;
|
||||
sig_i.si_errno = 0;
|
||||
sig_i.si_code = SI_MESGQ;
|
||||
sig_i.si_value = info->notify.sigev_value;
|
||||
/* map current pid/uid into info->owner's namespaces */
|
||||
rcu_read_lock();
|
||||
/* map current pid/uid into info->owner's namespaces */
|
||||
sig_i.si_pid = task_tgid_nr_ns(current,
|
||||
ns_of_pid(info->notify_owner));
|
||||
sig_i.si_uid = from_kuid_munged(info->notify_user_ns, current_uid());
|
||||
sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
|
||||
current_uid());
|
||||
/*
|
||||
* We can't use kill_pid_info(), this signal should
|
||||
* bypass check_kill_permission(). It is from kernel
|
||||
* but si_fromuser() can't know this.
|
||||
* We do check the self_exec_id, to avoid sending
|
||||
* signals to programs that don't expect them.
|
||||
*/
|
||||
task = pid_task(info->notify_owner, PIDTYPE_TGID);
|
||||
if (task && task->self_exec_id ==
|
||||
info->notify_self_exec_id) {
|
||||
do_send_sig_info(info->notify.sigev_signo,
|
||||
&sig_i, task, PIDTYPE_TGID);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
kill_pid_info(info->notify.sigev_signo,
|
||||
&sig_i, info->notify_owner);
|
||||
break;
|
||||
}
|
||||
case SIGEV_THREAD:
|
||||
set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
|
||||
netlink_sendskb(info->notify_sock, info->notify_cookie);
|
||||
@ -1383,6 +1400,7 @@ retry:
|
||||
info->notify.sigev_signo = notification->sigev_signo;
|
||||
info->notify.sigev_value = notification->sigev_value;
|
||||
info->notify.sigev_notify = SIGEV_SIGNAL;
|
||||
info->notify_self_exec_id = current->self_exec_id;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -740,8 +740,8 @@ static const struct file_operations kcov_fops = {
|
||||
* kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an
|
||||
* arbitrary 4-byte non-zero number as the instance id). This common handle
|
||||
* then gets saved into the task_struct of the process that issued the
|
||||
* KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn
|
||||
* kernel threads, the common handle must be retrived via kcov_common_handle()
|
||||
* KCOV_REMOTE_ENABLE ioctl. When this process issues system calls that spawn
|
||||
* kernel threads, the common handle must be retrieved via kcov_common_handle()
|
||||
* and passed to the spawned threads via custom annotations. Those kernel
|
||||
* threads must in turn be annotated with kcov_remote_start(common_handle) and
|
||||
* kcov_remote_stop(). All of the threads that are spawned by the same process
|
||||
|
@ -60,17 +60,14 @@ config UBSAN_SANITIZE_ALL
|
||||
Enabling this option will get kernel image size increased
|
||||
significantly.
|
||||
|
||||
config UBSAN_NO_ALIGNMENT
|
||||
bool "Disable checking of pointers alignment"
|
||||
default y if HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
help
|
||||
This option disables the check of unaligned memory accesses.
|
||||
This option should be used when building allmodconfig.
|
||||
Disabling this option on architectures that support unaligned
|
||||
accesses may produce a lot of false positives.
|
||||
|
||||
config UBSAN_ALIGNMENT
|
||||
def_bool !UBSAN_NO_ALIGNMENT
|
||||
bool "Enable checks for pointers alignment"
|
||||
default !HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
depends on !X86 || !COMPILE_TEST
|
||||
help
|
||||
This option enables the check of unaligned memory accesses.
|
||||
Enabling this option on architectures that support unaligned
|
||||
accesses may produce a lot of false positives.
|
||||
|
||||
config TEST_UBSAN
|
||||
tristate "Module for testing for undefined behavior detection"
|
||||
|
@ -4990,19 +4990,22 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
unsigned int size;
|
||||
int node;
|
||||
int __maybe_unused i;
|
||||
long error = -ENOMEM;
|
||||
|
||||
size = sizeof(struct mem_cgroup);
|
||||
size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
|
||||
|
||||
memcg = kzalloc(size, GFP_KERNEL);
|
||||
if (!memcg)
|
||||
return NULL;
|
||||
return ERR_PTR(error);
|
||||
|
||||
memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
|
||||
1, MEM_CGROUP_ID_MAX,
|
||||
GFP_KERNEL);
|
||||
if (memcg->id.id < 0)
|
||||
if (memcg->id.id < 0) {
|
||||
error = memcg->id.id;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
|
||||
if (!memcg->vmstats_local)
|
||||
@ -5046,7 +5049,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
fail:
|
||||
mem_cgroup_id_remove(memcg);
|
||||
__mem_cgroup_free(memcg);
|
||||
return NULL;
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
static struct cgroup_subsys_state * __ref
|
||||
@ -5057,8 +5060,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
long error = -ENOMEM;
|
||||
|
||||
memcg = mem_cgroup_alloc();
|
||||
if (!memcg)
|
||||
return ERR_PTR(error);
|
||||
if (IS_ERR(memcg))
|
||||
return ERR_CAST(memcg);
|
||||
|
||||
WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
|
||||
memcg->soft_limit = PAGE_COUNTER_MAX;
|
||||
@ -5108,7 +5111,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
fail:
|
||||
mem_cgroup_id_remove(memcg);
|
||||
mem_cgroup_free(memcg);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return ERR_PTR(error);
|
||||
}
|
||||
|
||||
static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
|
||||
|
@ -1607,6 +1607,7 @@ void set_zone_contiguous(struct zone *zone)
|
||||
if (!__pageblock_pfn_to_page(block_start_pfn,
|
||||
block_end_pfn, zone))
|
||||
return;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/* We confirm that there is no hole */
|
||||
@ -2400,6 +2401,14 @@ static inline void boost_watermark(struct zone *zone)
|
||||
|
||||
if (!watermark_boost_factor)
|
||||
return;
|
||||
/*
|
||||
* Don't bother in zones that are unlikely to produce results.
|
||||
* On small machines, including kdump capture kernels running
|
||||
* in a small area, boosting the watermark can cause an out of
|
||||
* memory situation immediately.
|
||||
*/
|
||||
if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
|
||||
return;
|
||||
|
||||
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
|
||||
watermark_boost_factor, 10000);
|
||||
|
14
mm/percpu.c
14
mm/percpu.c
@ -80,6 +80,7 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/sections.h>
|
||||
@ -1557,10 +1558,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
|
||||
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
|
||||
gfp_t gfp)
|
||||
{
|
||||
/* whitelisted flags that can be passed to the backing allocators */
|
||||
gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
|
||||
bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
|
||||
bool do_warn = !(gfp & __GFP_NOWARN);
|
||||
gfp_t pcpu_gfp;
|
||||
bool is_atomic;
|
||||
bool do_warn;
|
||||
static int warn_limit = 10;
|
||||
struct pcpu_chunk *chunk, *next;
|
||||
const char *err;
|
||||
@ -1569,6 +1569,12 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
|
||||
void __percpu *ptr;
|
||||
size_t bits, bit_align;
|
||||
|
||||
gfp = current_gfp_context(gfp);
|
||||
/* whitelisted flags that can be passed to the backing allocators */
|
||||
pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
|
||||
is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
|
||||
do_warn = !(gfp & __GFP_NOWARN);
|
||||
|
||||
/*
|
||||
* There is now a minimum allocation size of PCPU_MIN_ALLOC_SIZE,
|
||||
* therefore alignment must be a minimum of that many bytes.
|
||||
|
45
mm/slub.c
45
mm/slub.c
@ -551,15 +551,32 @@ static void print_section(char *level, char *text, u8 *addr,
|
||||
metadata_access_disable();
|
||||
}
|
||||
|
||||
/*
|
||||
* See comment in calculate_sizes().
|
||||
*/
|
||||
static inline bool freeptr_outside_object(struct kmem_cache *s)
|
||||
{
|
||||
return s->offset >= s->inuse;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return offset of the end of info block which is inuse + free pointer if
|
||||
* not overlapping with object.
|
||||
*/
|
||||
static inline unsigned int get_info_end(struct kmem_cache *s)
|
||||
{
|
||||
if (freeptr_outside_object(s))
|
||||
return s->inuse + sizeof(void *);
|
||||
else
|
||||
return s->inuse;
|
||||
}
|
||||
|
||||
static struct track *get_track(struct kmem_cache *s, void *object,
|
||||
enum track_item alloc)
|
||||
{
|
||||
struct track *p;
|
||||
|
||||
if (s->offset)
|
||||
p = object + s->offset + sizeof(void *);
|
||||
else
|
||||
p = object + s->inuse;
|
||||
p = object + get_info_end(s);
|
||||
|
||||
return p + alloc;
|
||||
}
|
||||
@ -686,10 +703,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
|
||||
print_section(KERN_ERR, "Redzone ", p + s->object_size,
|
||||
s->inuse - s->object_size);
|
||||
|
||||
if (s->offset)
|
||||
off = s->offset + sizeof(void *);
|
||||
else
|
||||
off = s->inuse;
|
||||
off = get_info_end(s);
|
||||
|
||||
if (s->flags & SLAB_STORE_USER)
|
||||
off += 2 * sizeof(struct track);
|
||||
@ -782,7 +796,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
|
||||
* object address
|
||||
* Bytes of the object to be managed.
|
||||
* If the freepointer may overlay the object then the free
|
||||
* pointer is the first word of the object.
|
||||
* pointer is at the middle of the object.
|
||||
*
|
||||
* Poisoning uses 0x6b (POISON_FREE) and the last byte is
|
||||
* 0xa5 (POISON_END)
|
||||
@ -816,11 +830,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
|
||||
|
||||
static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p)
|
||||
{
|
||||
unsigned long off = s->inuse; /* The end of info */
|
||||
|
||||
if (s->offset)
|
||||
/* Freepointer is placed after the object. */
|
||||
off += sizeof(void *);
|
||||
unsigned long off = get_info_end(s); /* The end of info */
|
||||
|
||||
if (s->flags & SLAB_STORE_USER)
|
||||
/* We also have user information there */
|
||||
@ -907,7 +917,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
|
||||
check_pad_bytes(s, page, p);
|
||||
}
|
||||
|
||||
if (!s->offset && val == SLUB_RED_ACTIVE)
|
||||
if (!freeptr_outside_object(s) && val == SLUB_RED_ACTIVE)
|
||||
/*
|
||||
* Object and freepointer overlap. Cannot check
|
||||
* freepointer while object is allocated.
|
||||
@ -3587,6 +3597,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
|
||||
*
|
||||
* This is the case if we do RCU, have a constructor or
|
||||
* destructor or are poisoning the objects.
|
||||
*
|
||||
* The assumption that s->offset >= s->inuse means free
|
||||
* pointer is outside of the object is used in the
|
||||
* freeptr_outside_object() function. If that is no
|
||||
* longer true, the function needs to be modified.
|
||||
*/
|
||||
s->offset = size;
|
||||
size += sizeof(void *);
|
||||
|
@ -1625,7 +1625,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
|
||||
* @dst: The temp list to put pages on to.
|
||||
* @nr_scanned: The number of pages that were scanned.
|
||||
* @sc: The scan_control struct for this reclaim session
|
||||
* @mode: One of the LRU isolation modes
|
||||
* @lru: LRU list id for isolating
|
||||
*
|
||||
* returns how many pages were moved onto *@dst.
|
||||
|
@ -126,7 +126,7 @@ faultlinenum=$(( $(wc -l $T.oo | cut -d" " -f1) - \
|
||||
faultline=`cat $T.dis | head -1 | cut -d":" -f2-`
|
||||
faultline=`echo "$faultline" | sed -e 's/\[/\\\[/g; s/\]/\\\]/g'`
|
||||
|
||||
cat $T.oo | sed -e "${faultlinenum}s/^\(.*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/"
|
||||
cat $T.oo | sed -e "${faultlinenum}s/^\([^:]*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/"
|
||||
echo
|
||||
cat $T.aa
|
||||
cleanup
|
||||
|
@ -12,7 +12,7 @@ rb_node_type = utils.CachedType("struct rb_node")
|
||||
|
||||
def rb_first(root):
|
||||
if root.type == rb_root_type.get_type():
|
||||
node = node.address.cast(rb_root_type.get_type().pointer())
|
||||
node = root.address.cast(rb_root_type.get_type().pointer())
|
||||
elif root.type != rb_root_type.get_type().pointer():
|
||||
raise gdb.GdbError("Must be struct rb_root not {}".format(root.type))
|
||||
|
||||
@ -28,7 +28,7 @@ def rb_first(root):
|
||||
|
||||
def rb_last(root):
|
||||
if root.type == rb_root_type.get_type():
|
||||
node = node.address.cast(rb_root_type.get_type().pointer())
|
||||
node = root.address.cast(rb_root_type.get_type().pointer())
|
||||
elif root.type != rb_root_type.get_type().pointer():
|
||||
raise gdb.GdbError("Must be struct rb_root not {}".format(root.type))
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <poll.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <signal.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/epoll.h>
|
||||
@ -3136,4 +3137,149 @@ TEST(epoll59)
|
||||
close(ctx.sfd[0]);
|
||||
}
|
||||
|
||||
enum {
|
||||
EPOLL60_EVENTS_NR = 10,
|
||||
};
|
||||
|
||||
struct epoll60_ctx {
|
||||
volatile int stopped;
|
||||
int ready;
|
||||
int waiters;
|
||||
int epfd;
|
||||
int evfd[EPOLL60_EVENTS_NR];
|
||||
};
|
||||
|
||||
static void *epoll60_wait_thread(void *ctx_)
|
||||
{
|
||||
struct epoll60_ctx *ctx = ctx_;
|
||||
struct epoll_event e;
|
||||
sigset_t sigmask;
|
||||
uint64_t v;
|
||||
int ret;
|
||||
|
||||
/* Block SIGUSR1 */
|
||||
sigemptyset(&sigmask);
|
||||
sigaddset(&sigmask, SIGUSR1);
|
||||
sigprocmask(SIG_SETMASK, &sigmask, NULL);
|
||||
|
||||
/* Prepare empty mask for epoll_pwait() */
|
||||
sigemptyset(&sigmask);
|
||||
|
||||
while (!ctx->stopped) {
|
||||
/* Mark we are ready */
|
||||
__atomic_fetch_add(&ctx->ready, 1, __ATOMIC_ACQUIRE);
|
||||
|
||||
/* Start when all are ready */
|
||||
while (__atomic_load_n(&ctx->ready, __ATOMIC_ACQUIRE) &&
|
||||
!ctx->stopped);
|
||||
|
||||
/* Account this waiter */
|
||||
__atomic_fetch_add(&ctx->waiters, 1, __ATOMIC_ACQUIRE);
|
||||
|
||||
ret = epoll_pwait(ctx->epfd, &e, 1, 2000, &sigmask);
|
||||
if (ret != 1) {
|
||||
/* We expect only signal delivery on stop */
|
||||
assert(ret < 0 && errno == EINTR && "Lost wakeup!\n");
|
||||
assert(ctx->stopped);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = read(e.data.fd, &v, sizeof(v));
|
||||
/* Since we are on ET mode, thus each thread gets its own fd. */
|
||||
assert(ret == sizeof(v));
|
||||
|
||||
__atomic_fetch_sub(&ctx->waiters, 1, __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline unsigned long long msecs(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
unsigned long long msecs;
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &ts);
|
||||
msecs = ts.tv_sec * 1000ull;
|
||||
msecs += ts.tv_nsec / 1000000ull;
|
||||
|
||||
return msecs;
|
||||
}
|
||||
|
||||
static inline int count_waiters(struct epoll60_ctx *ctx)
|
||||
{
|
||||
return __atomic_load_n(&ctx->waiters, __ATOMIC_ACQUIRE);
|
||||
}
|
||||
|
||||
TEST(epoll60)
|
||||
{
|
||||
struct epoll60_ctx ctx = { 0 };
|
||||
pthread_t waiters[ARRAY_SIZE(ctx.evfd)];
|
||||
struct epoll_event e;
|
||||
int i, n, ret;
|
||||
|
||||
signal(SIGUSR1, signal_handler);
|
||||
|
||||
ctx.epfd = epoll_create1(0);
|
||||
ASSERT_GE(ctx.epfd, 0);
|
||||
|
||||
/* Create event fds */
|
||||
for (i = 0; i < ARRAY_SIZE(ctx.evfd); i++) {
|
||||
ctx.evfd[i] = eventfd(0, EFD_NONBLOCK);
|
||||
ASSERT_GE(ctx.evfd[i], 0);
|
||||
|
||||
e.events = EPOLLIN | EPOLLET;
|
||||
e.data.fd = ctx.evfd[i];
|
||||
ASSERT_EQ(epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd[i], &e), 0);
|
||||
}
|
||||
|
||||
/* Create waiter threads */
|
||||
for (i = 0; i < ARRAY_SIZE(waiters); i++)
|
||||
ASSERT_EQ(pthread_create(&waiters[i], NULL,
|
||||
epoll60_wait_thread, &ctx), 0);
|
||||
|
||||
for (i = 0; i < 300; i++) {
|
||||
uint64_t v = 1, ms;
|
||||
|
||||
/* Wait for all to be ready */
|
||||
while (__atomic_load_n(&ctx.ready, __ATOMIC_ACQUIRE) !=
|
||||
ARRAY_SIZE(ctx.evfd))
|
||||
;
|
||||
|
||||
/* Steady, go */
|
||||
__atomic_fetch_sub(&ctx.ready, ARRAY_SIZE(ctx.evfd),
|
||||
__ATOMIC_ACQUIRE);
|
||||
|
||||
/* Wait all have gone to kernel */
|
||||
while (count_waiters(&ctx) != ARRAY_SIZE(ctx.evfd))
|
||||
;
|
||||
|
||||
/* 1ms should be enough to schedule away */
|
||||
usleep(1000);
|
||||
|
||||
/* Quickly signal all handles at once */
|
||||
for (n = 0; n < ARRAY_SIZE(ctx.evfd); n++) {
|
||||
ret = write(ctx.evfd[n], &v, sizeof(v));
|
||||
ASSERT_EQ(ret, sizeof(v));
|
||||
}
|
||||
|
||||
/* Busy loop for 1s and wait for all waiters to wake up */
|
||||
ms = msecs();
|
||||
while (count_waiters(&ctx) && msecs() < ms + 1000)
|
||||
;
|
||||
|
||||
ASSERT_EQ(count_waiters(&ctx), 0);
|
||||
}
|
||||
ctx.stopped = 1;
|
||||
/* Stop waiters */
|
||||
for (i = 0; i < ARRAY_SIZE(waiters); i++)
|
||||
ret = pthread_kill(waiters[i], SIGUSR1);
|
||||
for (i = 0; i < ARRAY_SIZE(waiters); i++)
|
||||
pthread_join(waiters[i], NULL);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(waiters); i++)
|
||||
close(ctx.evfd[i]);
|
||||
close(ctx.epfd);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
|
@ -25,7 +25,6 @@ CONFIG_KASAN=y
|
||||
CONFIG_KASAN_INLINE=y
|
||||
CONFIG_UBSAN=y
|
||||
CONFIG_UBSAN_SANITIZE_ALL=y
|
||||
CONFIG_UBSAN_NO_ALIGNMENT=y
|
||||
CONFIG_UBSAN_NULL=y
|
||||
CONFIG_DEBUG_KMEMLEAK=y
|
||||
CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
|
||||
|
Loading…
Reference in New Issue
Block a user