signal: Print warning message when dropping signals

When the system has too many timers or too many aggregate
queued signals, the EAGAIN error is returned to application
from kernel, including timer_create() [POSIX.1b].

It means that the app exceeded the limit of pending signals,
but in general application writers do not expect this
outcome and the current silent failure can cause rare app
failures under very high load.

This patch adds a new message when we reach the limit
and if print_fatal_signals is enabled:

    task/1234: reached RLIMIT_SIGPENDING, dropping signal

If you see this message and your system behaved unexpectedly,
you can run following command to lift the limit:

   # ulimit -i unlimited

With help from Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>.

Signed-off-by: Naohiro Ooiwa <nooiwa@miraclelinux.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: oleg@redhat.com
LKML-Reference: <4AF6E7E2.9080406@miraclelinux.com>
[ Modified a few small details, gave surrounding code some love. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Naohiro Ooiwa 2009-11-09 00:46:42 +09:00 committed by Ingo Molnar
parent 2a855dd01b
commit f84d49b218
2 changed files with 42 additions and 15 deletions

View File

@ -2032,8 +2032,15 @@ and is between 256 and 4096 characters. It is defined in the file
print-fatal-signals=
[KNL] debug: print fatal signals
print-fatal-signals=1: print segfault info to
the kernel console.
If enabled, warn about various signal handling
related application anomalies: too many signals,
too many POSIX.1 timers, fatal signals causing a
coredump - etc.
If you hit the warning due to signal overflow,
you might want to try "ulimit -i unlimited".
default: off.
printk.time= Show timing data prefixed to each printk message line

View File

@ -22,6 +22,7 @@
#include <linux/ptrace.h>
#include <linux/signal.h>
#include <linux/signalfd.h>
#include <linux/ratelimit.h>
#include <linux/tracehook.h>
#include <linux/capability.h>
#include <linux/freezer.h>
@ -41,6 +42,8 @@
static struct kmem_cache *sigqueue_cachep;
int print_fatal_signals __read_mostly;
static void __user *sig_handler(struct task_struct *t, int sig)
{
return t->sighand->action[sig - 1].sa.sa_handler;
@ -159,7 +162,7 @@ int next_signal(struct sigpending *pending, sigset_t *mask)
{
unsigned long i, *s, *m, x;
int sig = 0;
s = pending->signal.sig;
m = mask->sig;
switch (_NSIG_WORDS) {
@ -184,17 +187,31 @@ int next_signal(struct sigpending *pending, sigset_t *mask)
sig = ffz(~x) + 1;
break;
}
return sig;
}
static inline void print_dropped_signal(int sig)
{
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
if (!print_fatal_signals)
return;
if (!__ratelimit(&ratelimit_state))
return;
printk(KERN_INFO "%s/%d: reached RLIMIT_SIGPENDING, dropped signal %d\n",
current->comm, current->pid, sig);
}
/*
* allocate a new signal queue record
* - this may be called without locks if and only if t == current, otherwise an
* appopriate lock must be held to stop the target task from exiting
*/
static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
int override_rlimit)
static struct sigqueue *
__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
{
struct sigqueue *q = NULL;
struct user_struct *user;
@ -207,10 +224,15 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
*/
user = get_uid(__task_cred(t)->user);
atomic_inc(&user->sigpending);
if (override_rlimit ||
atomic_read(&user->sigpending) <=
t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
q = kmem_cache_alloc(sigqueue_cachep, flags);
} else {
print_dropped_signal(sig);
}
if (unlikely(q == NULL)) {
atomic_dec(&user->sigpending);
free_uid(user);
@ -869,7 +891,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
else
override_rlimit = 0;
q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE,
override_rlimit);
if (q) {
list_add_tail(&q->list, &pending->list);
@ -925,8 +947,6 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
return __send_signal(sig, info, t, group, from_ancestor_ns);
}
int print_fatal_signals;
static void print_fatal_signal(struct pt_regs *regs, int signr)
{
printk("%s/%d: potentially unexpected fatal signal %d.\n",
@ -1293,19 +1313,19 @@ EXPORT_SYMBOL(kill_pid);
* These functions support sending signals using preallocated sigqueue
* structures. This is needed "because realtime applications cannot
* afford to lose notifications of asynchronous events, like timer
* expirations or I/O completions". In the case of Posix Timers
* expirations or I/O completions". In the case of Posix Timers
* we allocate the sigqueue structure from the timer_create. If this
* allocation fails we are able to report the failure to the application
* with an EAGAIN error.
*/
struct sigqueue *sigqueue_alloc(void)
{
struct sigqueue *q;
struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0);
if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
if (q)
q->flags |= SIGQUEUE_PREALLOC;
return(q);
return q;
}
void sigqueue_free(struct sigqueue *q)