mirror of
https://github.com/torvalds/linux.git
synced 2024-11-26 22:21:42 +00:00
560e20e4bf
We currently have two interfaces that take a block_devices and the find a mounted file systems to flush or invaldidate data on it. Both are a bit problematic because they only work for the "main" block devices that is used as s_dev for the super_block, and because they don't call into the file system at all. Merge the two into a new bdev_mark_dead helper that does both the syncing and invalidation and which is properly documented. This is in preparation of merging the functionality into the ->mark_dead holder operation so that it will work on additional block devices used by a file systems and give us a single entry point for invalidation of dead devices or media. Note that a single standalone fsync_bdev call for an obscure ioctl remains for now, but that one will also be deal with in a bit. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Message-Id: <20230811100828.1897174-14-hch@lst.de> Signed-off-by: Christian Brauner <brauner@kernel.org>
495 lines
13 KiB
C
495 lines
13 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Disk events - monitor disk events like media change and eject request.
|
|
*/
|
|
#include <linux/export.h>
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/blkdev.h>
|
|
#include "blk.h"
|
|
|
|
struct disk_events {
|
|
struct list_head node; /* all disk_event's */
|
|
struct gendisk *disk; /* the associated disk */
|
|
spinlock_t lock;
|
|
|
|
struct mutex block_mutex; /* protects blocking */
|
|
int block; /* event blocking depth */
|
|
unsigned int pending; /* events already sent out */
|
|
unsigned int clearing; /* events being cleared */
|
|
|
|
long poll_msecs; /* interval, -1 for default */
|
|
struct delayed_work dwork;
|
|
};
|
|
|
|
static const char *disk_events_strs[] = {
|
|
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "media_change",
|
|
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "eject_request",
|
|
};
|
|
|
|
static char *disk_uevents[] = {
|
|
[ilog2(DISK_EVENT_MEDIA_CHANGE)] = "DISK_MEDIA_CHANGE=1",
|
|
[ilog2(DISK_EVENT_EJECT_REQUEST)] = "DISK_EJECT_REQUEST=1",
|
|
};
|
|
|
|
/* list of all disk_events */
|
|
static DEFINE_MUTEX(disk_events_mutex);
|
|
static LIST_HEAD(disk_events);
|
|
|
|
/* disable in-kernel polling by default */
|
|
static unsigned long disk_events_dfl_poll_msecs;
|
|
|
|
static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
|
|
{
|
|
struct disk_events *ev = disk->ev;
|
|
long intv_msecs = 0;
|
|
|
|
/*
|
|
* If device-specific poll interval is set, always use it. If
|
|
* the default is being used, poll if the POLL flag is set.
|
|
*/
|
|
if (ev->poll_msecs >= 0)
|
|
intv_msecs = ev->poll_msecs;
|
|
else if (disk->event_flags & DISK_EVENT_FLAG_POLL)
|
|
intv_msecs = disk_events_dfl_poll_msecs;
|
|
|
|
return msecs_to_jiffies(intv_msecs);
|
|
}
|
|
|
|
/**
|
|
* disk_block_events - block and flush disk event checking
|
|
* @disk: disk to block events for
|
|
*
|
|
* On return from this function, it is guaranteed that event checking
|
|
* isn't in progress and won't happen until unblocked by
|
|
* disk_unblock_events(). Events blocking is counted and the actual
|
|
* unblocking happens after the matching number of unblocks are done.
|
|
*
|
|
* Note that this intentionally does not block event checking from
|
|
* disk_clear_events().
|
|
*
|
|
* CONTEXT:
|
|
* Might sleep.
|
|
*/
|
|
void disk_block_events(struct gendisk *disk)
|
|
{
|
|
struct disk_events *ev = disk->ev;
|
|
unsigned long flags;
|
|
bool cancel;
|
|
|
|
if (!ev)
|
|
return;
|
|
|
|
/*
|
|
* Outer mutex ensures that the first blocker completes canceling
|
|
* the event work before further blockers are allowed to finish.
|
|
*/
|
|
mutex_lock(&ev->block_mutex);
|
|
|
|
spin_lock_irqsave(&ev->lock, flags);
|
|
cancel = !ev->block++;
|
|
spin_unlock_irqrestore(&ev->lock, flags);
|
|
|
|
if (cancel)
|
|
cancel_delayed_work_sync(&disk->ev->dwork);
|
|
|
|
mutex_unlock(&ev->block_mutex);
|
|
}
|
|
|
|
static void __disk_unblock_events(struct gendisk *disk, bool check_now)
|
|
{
|
|
struct disk_events *ev = disk->ev;
|
|
unsigned long intv;
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&ev->lock, flags);
|
|
|
|
if (WARN_ON_ONCE(ev->block <= 0))
|
|
goto out_unlock;
|
|
|
|
if (--ev->block)
|
|
goto out_unlock;
|
|
|
|
intv = disk_events_poll_jiffies(disk);
|
|
if (check_now)
|
|
queue_delayed_work(system_freezable_power_efficient_wq,
|
|
&ev->dwork, 0);
|
|
else if (intv)
|
|
queue_delayed_work(system_freezable_power_efficient_wq,
|
|
&ev->dwork, intv);
|
|
out_unlock:
|
|
spin_unlock_irqrestore(&ev->lock, flags);
|
|
}
|
|
|
|
/**
|
|
* disk_unblock_events - unblock disk event checking
|
|
* @disk: disk to unblock events for
|
|
*
|
|
* Undo disk_block_events(). When the block count reaches zero, it
|
|
* starts events polling if configured.
|
|
*
|
|
* CONTEXT:
|
|
* Don't care. Safe to call from irq context.
|
|
*/
|
|
void disk_unblock_events(struct gendisk *disk)
|
|
{
|
|
if (disk->ev)
|
|
__disk_unblock_events(disk, false);
|
|
}
|
|
|
|
/**
|
|
* disk_flush_events - schedule immediate event checking and flushing
|
|
* @disk: disk to check and flush events for
|
|
* @mask: events to flush
|
|
*
|
|
* Schedule immediate event checking on @disk if not blocked. Events in
|
|
* @mask are scheduled to be cleared from the driver. Note that this
|
|
* doesn't clear the events from @disk->ev.
|
|
*
|
|
* CONTEXT:
|
|
* If @mask is non-zero must be called with disk->open_mutex held.
|
|
*/
|
|
void disk_flush_events(struct gendisk *disk, unsigned int mask)
|
|
{
|
|
struct disk_events *ev = disk->ev;
|
|
|
|
if (!ev)
|
|
return;
|
|
|
|
spin_lock_irq(&ev->lock);
|
|
ev->clearing |= mask;
|
|
if (!ev->block)
|
|
mod_delayed_work(system_freezable_power_efficient_wq,
|
|
&ev->dwork, 0);
|
|
spin_unlock_irq(&ev->lock);
|
|
}
|
|
|
|
/*
|
|
* Tell userland about new events. Only the events listed in @disk->events are
|
|
* reported, and only if DISK_EVENT_FLAG_UEVENT is set. Otherwise, events are
|
|
* processed internally but never get reported to userland.
|
|
*/
|
|
static void disk_event_uevent(struct gendisk *disk, unsigned int events)
|
|
{
|
|
char *envp[ARRAY_SIZE(disk_uevents) + 1] = { };
|
|
int nr_events = 0, i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(disk_uevents); i++)
|
|
if (events & disk->events & (1 << i))
|
|
envp[nr_events++] = disk_uevents[i];
|
|
|
|
if (nr_events)
|
|
kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
|
|
}
|
|
|
|
static void disk_check_events(struct disk_events *ev,
|
|
unsigned int *clearing_ptr)
|
|
{
|
|
struct gendisk *disk = ev->disk;
|
|
unsigned int clearing = *clearing_ptr;
|
|
unsigned int events;
|
|
unsigned long intv;
|
|
|
|
/* check events */
|
|
events = disk->fops->check_events(disk, clearing);
|
|
|
|
/* accumulate pending events and schedule next poll if necessary */
|
|
spin_lock_irq(&ev->lock);
|
|
|
|
events &= ~ev->pending;
|
|
ev->pending |= events;
|
|
*clearing_ptr &= ~clearing;
|
|
|
|
intv = disk_events_poll_jiffies(disk);
|
|
if (!ev->block && intv)
|
|
queue_delayed_work(system_freezable_power_efficient_wq,
|
|
&ev->dwork, intv);
|
|
|
|
spin_unlock_irq(&ev->lock);
|
|
|
|
if (events & DISK_EVENT_MEDIA_CHANGE)
|
|
inc_diskseq(disk);
|
|
|
|
if (disk->event_flags & DISK_EVENT_FLAG_UEVENT)
|
|
disk_event_uevent(disk, events);
|
|
}
|
|
|
|
/**
|
|
* disk_clear_events - synchronously check, clear and return pending events
|
|
* @disk: disk to fetch and clear events from
|
|
* @mask: mask of events to be fetched and cleared
|
|
*
|
|
* Disk events are synchronously checked and pending events in @mask
|
|
* are cleared and returned. This ignores the block count.
|
|
*
|
|
* CONTEXT:
|
|
* Might sleep.
|
|
*/
|
|
static unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
|
|
{
|
|
struct disk_events *ev = disk->ev;
|
|
unsigned int pending;
|
|
unsigned int clearing = mask;
|
|
|
|
if (!ev)
|
|
return 0;
|
|
|
|
disk_block_events(disk);
|
|
|
|
/*
|
|
* store the union of mask and ev->clearing on the stack so that the
|
|
* race with disk_flush_events does not cause ambiguity (ev->clearing
|
|
* can still be modified even if events are blocked).
|
|
*/
|
|
spin_lock_irq(&ev->lock);
|
|
clearing |= ev->clearing;
|
|
ev->clearing = 0;
|
|
spin_unlock_irq(&ev->lock);
|
|
|
|
disk_check_events(ev, &clearing);
|
|
/*
|
|
* if ev->clearing is not 0, the disk_flush_events got called in the
|
|
* middle of this function, so we want to run the workfn without delay.
|
|
*/
|
|
__disk_unblock_events(disk, ev->clearing ? true : false);
|
|
|
|
/* then, fetch and clear pending events */
|
|
spin_lock_irq(&ev->lock);
|
|
pending = ev->pending & mask;
|
|
ev->pending &= ~mask;
|
|
spin_unlock_irq(&ev->lock);
|
|
WARN_ON_ONCE(clearing & mask);
|
|
|
|
return pending;
|
|
}
|
|
|
|
/**
|
|
* disk_check_media_change - check if a removable media has been changed
|
|
* @disk: gendisk to check
|
|
*
|
|
* Check whether a removable media has been changed, and attempt to free all
|
|
* dentries and inodes and invalidates all block device page cache entries in
|
|
* that case.
|
|
*
|
|
* Returns %true if the media has changed, or %false if not.
|
|
*/
|
|
bool disk_check_media_change(struct gendisk *disk)
|
|
{
|
|
unsigned int events;
|
|
|
|
events = disk_clear_events(disk, DISK_EVENT_MEDIA_CHANGE |
|
|
DISK_EVENT_EJECT_REQUEST);
|
|
if (!(events & DISK_EVENT_MEDIA_CHANGE))
|
|
return false;
|
|
|
|
bdev_mark_dead(disk->part0, true);
|
|
set_bit(GD_NEED_PART_SCAN, &disk->state);
|
|
return true;
|
|
}
|
|
EXPORT_SYMBOL(disk_check_media_change);
|
|
|
|
/**
|
|
* disk_force_media_change - force a media change event
|
|
* @disk: the disk which will raise the event
|
|
* @events: the events to raise
|
|
*
|
|
* Should be called when the media changes for @disk. Generates a uevent
|
|
* and attempts to free all dentries and inodes and invalidates all block
|
|
* device page cache entries in that case.
|
|
*/
|
|
void disk_force_media_change(struct gendisk *disk)
|
|
{
|
|
disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE);
|
|
inc_diskseq(disk);
|
|
bdev_mark_dead(disk->part0, true);
|
|
set_bit(GD_NEED_PART_SCAN, &disk->state);
|
|
}
|
|
EXPORT_SYMBOL_GPL(disk_force_media_change);
|
|
|
|
/*
|
|
* Separate this part out so that a different pointer for clearing_ptr can be
|
|
* passed in for disk_clear_events.
|
|
*/
|
|
static void disk_events_workfn(struct work_struct *work)
|
|
{
|
|
struct delayed_work *dwork = to_delayed_work(work);
|
|
struct disk_events *ev = container_of(dwork, struct disk_events, dwork);
|
|
|
|
disk_check_events(ev, &ev->clearing);
|
|
}
|
|
|
|
/*
|
|
* A disk events enabled device has the following sysfs nodes under
|
|
* its /sys/block/X/ directory.
|
|
*
|
|
* events : list of all supported events
|
|
* events_async : list of events which can be detected w/o polling
|
|
* (always empty, only for backwards compatibility)
|
|
* events_poll_msecs : polling interval, 0: disable, -1: system default
|
|
*/
|
|
static ssize_t __disk_events_show(unsigned int events, char *buf)
|
|
{
|
|
const char *delim = "";
|
|
ssize_t pos = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(disk_events_strs); i++)
|
|
if (events & (1 << i)) {
|
|
pos += sprintf(buf + pos, "%s%s",
|
|
delim, disk_events_strs[i]);
|
|
delim = " ";
|
|
}
|
|
if (pos)
|
|
pos += sprintf(buf + pos, "\n");
|
|
return pos;
|
|
}
|
|
|
|
static ssize_t disk_events_show(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
|
|
if (!(disk->event_flags & DISK_EVENT_FLAG_UEVENT))
|
|
return 0;
|
|
return __disk_events_show(disk->events, buf);
|
|
}
|
|
|
|
static ssize_t disk_events_async_show(struct device *dev,
|
|
struct device_attribute *attr, char *buf)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t disk_events_poll_msecs_show(struct device *dev,
|
|
struct device_attribute *attr,
|
|
char *buf)
|
|
{
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
|
|
if (!disk->ev)
|
|
return sprintf(buf, "-1\n");
|
|
return sprintf(buf, "%ld\n", disk->ev->poll_msecs);
|
|
}
|
|
|
|
static ssize_t disk_events_poll_msecs_store(struct device *dev,
|
|
struct device_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
struct gendisk *disk = dev_to_disk(dev);
|
|
long intv;
|
|
|
|
if (!count || !sscanf(buf, "%ld", &intv))
|
|
return -EINVAL;
|
|
|
|
if (intv < 0 && intv != -1)
|
|
return -EINVAL;
|
|
|
|
if (!disk->ev)
|
|
return -ENODEV;
|
|
|
|
disk_block_events(disk);
|
|
disk->ev->poll_msecs = intv;
|
|
__disk_unblock_events(disk, true);
|
|
return count;
|
|
}
|
|
|
|
DEVICE_ATTR(events, 0444, disk_events_show, NULL);
|
|
DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
|
|
DEVICE_ATTR(events_poll_msecs, 0644, disk_events_poll_msecs_show,
|
|
disk_events_poll_msecs_store);
|
|
|
|
/*
|
|
* The default polling interval can be specified by the kernel
|
|
* parameter block.events_dfl_poll_msecs which defaults to 0
|
|
* (disable). This can also be modified runtime by writing to
|
|
* /sys/module/block/parameters/events_dfl_poll_msecs.
|
|
*/
|
|
static int disk_events_set_dfl_poll_msecs(const char *val,
|
|
const struct kernel_param *kp)
|
|
{
|
|
struct disk_events *ev;
|
|
int ret;
|
|
|
|
ret = param_set_ulong(val, kp);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
mutex_lock(&disk_events_mutex);
|
|
list_for_each_entry(ev, &disk_events, node)
|
|
disk_flush_events(ev->disk, 0);
|
|
mutex_unlock(&disk_events_mutex);
|
|
return 0;
|
|
}
|
|
|
|
static const struct kernel_param_ops disk_events_dfl_poll_msecs_param_ops = {
|
|
.set = disk_events_set_dfl_poll_msecs,
|
|
.get = param_get_ulong,
|
|
};
|
|
|
|
#undef MODULE_PARAM_PREFIX
|
|
#define MODULE_PARAM_PREFIX "block."
|
|
|
|
module_param_cb(events_dfl_poll_msecs, &disk_events_dfl_poll_msecs_param_ops,
|
|
&disk_events_dfl_poll_msecs, 0644);
|
|
|
|
/*
|
|
* disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
|
|
*/
|
|
int disk_alloc_events(struct gendisk *disk)
|
|
{
|
|
struct disk_events *ev;
|
|
|
|
if (!disk->fops->check_events || !disk->events)
|
|
return 0;
|
|
|
|
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
|
|
if (!ev) {
|
|
pr_warn("%s: failed to initialize events\n", disk->disk_name);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
INIT_LIST_HEAD(&ev->node);
|
|
ev->disk = disk;
|
|
spin_lock_init(&ev->lock);
|
|
mutex_init(&ev->block_mutex);
|
|
ev->block = 1;
|
|
ev->poll_msecs = -1;
|
|
INIT_DELAYED_WORK(&ev->dwork, disk_events_workfn);
|
|
|
|
disk->ev = ev;
|
|
return 0;
|
|
}
|
|
|
|
void disk_add_events(struct gendisk *disk)
|
|
{
|
|
if (!disk->ev)
|
|
return;
|
|
|
|
mutex_lock(&disk_events_mutex);
|
|
list_add_tail(&disk->ev->node, &disk_events);
|
|
mutex_unlock(&disk_events_mutex);
|
|
|
|
/*
|
|
* Block count is initialized to 1 and the following initial
|
|
* unblock kicks it into action.
|
|
*/
|
|
__disk_unblock_events(disk, true);
|
|
}
|
|
|
|
void disk_del_events(struct gendisk *disk)
|
|
{
|
|
if (disk->ev) {
|
|
disk_block_events(disk);
|
|
|
|
mutex_lock(&disk_events_mutex);
|
|
list_del_init(&disk->ev->node);
|
|
mutex_unlock(&disk_events_mutex);
|
|
}
|
|
}
|
|
|
|
void disk_release_events(struct gendisk *disk)
|
|
{
|
|
/* the block count should be 1 from disk_del_events() */
|
|
WARN_ON_ONCE(disk->ev && disk->ev->block != 1);
|
|
kfree(disk->ev);
|
|
}
|