mirror of
https://github.com/torvalds/linux.git
synced 2024-12-27 05:11:48 +00:00
098284020c
Davi fixed a missing cast in the __put_user(), that was making timerfd return a single byte instead of the full value. Talking with Michael about the timerfd man page, we think it'd be better to use a u64 for the returned value, to align it with the eventfd implementation. This is an ABI change. The timerfd code is new in 2.6.22 and if we merge this into 2.6.23 then we should also merge it into 2.6.22.x. That will leave a few early 2.6.22 kernels out in the wild which might misbehave when a future timerfd-enabled glibc is run on them. mtk says: The difference would be that read() will only return 4 bytes, while the application will expect 8. If the application is checking the size of returned value, as it should, then it will be able to detect the problem (it could even be sophisticated enough to know that if this is a 4-byte return, then it is running on an old 2.6.22 kernel). If the application is not checking the return from read(), then its 8-byte buffer will not be filled -- the contents of the last 4 bytes will be undefined, so the u64 value as a whole will be junk. Signed-off-by: Davide Libenzi <davidel@xmailserver.org> Cc: Michael Kerrisk <mtk-manpages@gmx.net> Cc: Davi Arnaut <davi@haxent.com.br> Cc: <stable@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
226 lines
5.0 KiB
C
226 lines
5.0 KiB
C
/*
|
|
* fs/timerfd.c
|
|
*
|
|
* Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
|
|
*
|
|
*
|
|
* Thanks to Thomas Gleixner for code reviews and useful comments.
|
|
*
|
|
*/
|
|
|
|
#include <linux/file.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/init.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/list.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/time.h>
|
|
#include <linux/hrtimer.h>
|
|
#include <linux/anon_inodes.h>
|
|
#include <linux/timerfd.h>
|
|
|
|
struct timerfd_ctx {
|
|
struct hrtimer tmr;
|
|
ktime_t tintv;
|
|
wait_queue_head_t wqh;
|
|
int expired;
|
|
};
|
|
|
|
/*
|
|
* This gets called when the timer event triggers. We set the "expired"
|
|
* flag, but we do not re-arm the timer (in case it's necessary,
|
|
* tintv.tv64 != 0) until the timer is read.
|
|
*/
|
|
static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
|
|
{
|
|
struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr);
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&ctx->wqh.lock, flags);
|
|
ctx->expired = 1;
|
|
wake_up_locked(&ctx->wqh);
|
|
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
|
|
|
|
return HRTIMER_NORESTART;
|
|
}
|
|
|
|
static void timerfd_setup(struct timerfd_ctx *ctx, int clockid, int flags,
|
|
const struct itimerspec *ktmr)
|
|
{
|
|
enum hrtimer_mode htmode;
|
|
ktime_t texp;
|
|
|
|
htmode = (flags & TFD_TIMER_ABSTIME) ?
|
|
HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
|
|
|
|
texp = timespec_to_ktime(ktmr->it_value);
|
|
ctx->expired = 0;
|
|
ctx->tintv = timespec_to_ktime(ktmr->it_interval);
|
|
hrtimer_init(&ctx->tmr, clockid, htmode);
|
|
ctx->tmr.expires = texp;
|
|
ctx->tmr.function = timerfd_tmrproc;
|
|
if (texp.tv64 != 0)
|
|
hrtimer_start(&ctx->tmr, texp, htmode);
|
|
}
|
|
|
|
static int timerfd_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct timerfd_ctx *ctx = file->private_data;
|
|
|
|
hrtimer_cancel(&ctx->tmr);
|
|
kfree(ctx);
|
|
return 0;
|
|
}
|
|
|
|
static unsigned int timerfd_poll(struct file *file, poll_table *wait)
|
|
{
|
|
struct timerfd_ctx *ctx = file->private_data;
|
|
unsigned int events = 0;
|
|
unsigned long flags;
|
|
|
|
poll_wait(file, &ctx->wqh, wait);
|
|
|
|
spin_lock_irqsave(&ctx->wqh.lock, flags);
|
|
if (ctx->expired)
|
|
events |= POLLIN;
|
|
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
|
|
|
|
return events;
|
|
}
|
|
|
|
static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
|
|
loff_t *ppos)
|
|
{
|
|
struct timerfd_ctx *ctx = file->private_data;
|
|
ssize_t res;
|
|
u64 ticks = 0;
|
|
DECLARE_WAITQUEUE(wait, current);
|
|
|
|
if (count < sizeof(ticks))
|
|
return -EINVAL;
|
|
spin_lock_irq(&ctx->wqh.lock);
|
|
res = -EAGAIN;
|
|
if (!ctx->expired && !(file->f_flags & O_NONBLOCK)) {
|
|
__add_wait_queue(&ctx->wqh, &wait);
|
|
for (res = 0;;) {
|
|
set_current_state(TASK_INTERRUPTIBLE);
|
|
if (ctx->expired) {
|
|
res = 0;
|
|
break;
|
|
}
|
|
if (signal_pending(current)) {
|
|
res = -ERESTARTSYS;
|
|
break;
|
|
}
|
|
spin_unlock_irq(&ctx->wqh.lock);
|
|
schedule();
|
|
spin_lock_irq(&ctx->wqh.lock);
|
|
}
|
|
__remove_wait_queue(&ctx->wqh, &wait);
|
|
__set_current_state(TASK_RUNNING);
|
|
}
|
|
if (ctx->expired) {
|
|
ctx->expired = 0;
|
|
if (ctx->tintv.tv64 != 0) {
|
|
/*
|
|
* If tintv.tv64 != 0, this is a periodic timer that
|
|
* needs to be re-armed. We avoid doing it in the timer
|
|
* callback to avoid DoS attacks specifying a very
|
|
* short timer period.
|
|
*/
|
|
ticks = (u64)
|
|
hrtimer_forward(&ctx->tmr,
|
|
hrtimer_cb_get_time(&ctx->tmr),
|
|
ctx->tintv);
|
|
hrtimer_restart(&ctx->tmr);
|
|
} else
|
|
ticks = 1;
|
|
}
|
|
spin_unlock_irq(&ctx->wqh.lock);
|
|
if (ticks)
|
|
res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
|
|
return res;
|
|
}
|
|
|
|
static const struct file_operations timerfd_fops = {
|
|
.release = timerfd_release,
|
|
.poll = timerfd_poll,
|
|
.read = timerfd_read,
|
|
};
|
|
|
|
asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
|
|
const struct itimerspec __user *utmr)
|
|
{
|
|
int error;
|
|
struct timerfd_ctx *ctx;
|
|
struct file *file;
|
|
struct inode *inode;
|
|
struct itimerspec ktmr;
|
|
|
|
if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
|
|
return -EFAULT;
|
|
|
|
if (clockid != CLOCK_MONOTONIC &&
|
|
clockid != CLOCK_REALTIME)
|
|
return -EINVAL;
|
|
if (!timespec_valid(&ktmr.it_value) ||
|
|
!timespec_valid(&ktmr.it_interval))
|
|
return -EINVAL;
|
|
|
|
if (ufd == -1) {
|
|
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
|
|
if (!ctx)
|
|
return -ENOMEM;
|
|
|
|
init_waitqueue_head(&ctx->wqh);
|
|
|
|
timerfd_setup(ctx, clockid, flags, &ktmr);
|
|
|
|
/*
|
|
* When we call this, the initialization must be complete, since
|
|
* anon_inode_getfd() will install the fd.
|
|
*/
|
|
error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
|
|
&timerfd_fops, ctx);
|
|
if (error)
|
|
goto err_tmrcancel;
|
|
} else {
|
|
file = fget(ufd);
|
|
if (!file)
|
|
return -EBADF;
|
|
ctx = file->private_data;
|
|
if (file->f_op != &timerfd_fops) {
|
|
fput(file);
|
|
return -EINVAL;
|
|
}
|
|
/*
|
|
* We need to stop the existing timer before reprogramming
|
|
* it to the new values.
|
|
*/
|
|
for (;;) {
|
|
spin_lock_irq(&ctx->wqh.lock);
|
|
if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
|
|
break;
|
|
spin_unlock_irq(&ctx->wqh.lock);
|
|
cpu_relax();
|
|
}
|
|
/*
|
|
* Re-program the timer to the new value ...
|
|
*/
|
|
timerfd_setup(ctx, clockid, flags, &ktmr);
|
|
|
|
spin_unlock_irq(&ctx->wqh.lock);
|
|
fput(file);
|
|
}
|
|
|
|
return ufd;
|
|
|
|
err_tmrcancel:
|
|
hrtimer_cancel(&ctx->tmr);
|
|
kfree(ctx);
|
|
return error;
|
|
}
|
|
|