vfs-6.8.cachefiles

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZZU0egAKCRCRxhvAZXjc
 onAqAP9s2ohvjE4QE2ad7svXOzNWKesGcyDyoEBwBpt3Yq8hvAEA+J4xiaMBlRAg
 FmBobDwtcvOzxL1q+BbB3IsmmuFrRww=
 =ZS18
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.8.cachefiles' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs cachefiles updates from Christian Brauner:
 "This contains improvements for on-demand cachefiles.

  If the daemon crashes and the on-demand cachefiles fd is unexpectedly
  closed in-flight requests and subsequent read operations associated
  with the fd will fail with EIO. This causes issues in various
  scenarios as this failure is currently unrecoverable.

  The work contained in this pull request introduces a failover mode and
  enables the daemon to recover in-flight requested-related objects. A
  restarted daemon will be able to process requests as usual.

  This requires that in-flight requests are stored during daemon crash
  or while the daemon is offline. In addition, a handle to
  /dev/cachefiles needs to be stored.

  This can be done by e.g., systemd's fdstore (cf. [1]) which enables
  the restarted daemon to recover state.

  Three new states are introduced in this patchset:

   (1) CLOSE
       Object is closed by the daemon.

   (2) OPEN
       Object is open and ready for processing. IOW, the open request
       has been handled successfully.

   (3) REOPENING
       Object has been previously closed and is now reopened due to a
       read request.

  A restarted daemon can recover the /dev/cachefiles fd from systemd's
  fdstore and writes "restore" to the device. This causes the object
  state to be reset from CLOSE to REOPENING and reinitializes the
  object.

  The daemon may now handle the open request. Any in-flight operations
  are restored and handled avoiding interruptions for users"

Link: https://systemd.io/FILE_DESCRIPTOR_STORE [1]

* tag 'vfs-6.8.cachefiles' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  cachefiles: add restore command to recover inflight ondemand read requests
  cachefiles: narrow the scope of triggering EPOLLIN events in ondemand mode
  cachefiles: resend an open request if the read request's object is closed
  cachefiles: extract ondemand info field from cachefiles_object
  cachefiles: introduce object ondemand state
This commit is contained in:
Linus Torvalds 2024-01-08 11:26:50 -08:00
commit 26458409a9
4 changed files with 201 additions and 46 deletions

View File

@ -77,6 +77,7 @@ static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = {
{ "tag", cachefiles_daemon_tag },
#ifdef CONFIG_CACHEFILES_ONDEMAND
{ "copen", cachefiles_ondemand_copen },
{ "restore", cachefiles_ondemand_restore },
#endif
{ "", NULL }
};
@ -355,14 +356,24 @@ static __poll_t cachefiles_daemon_poll(struct file *file,
struct poll_table_struct *poll)
{
struct cachefiles_cache *cache = file->private_data;
XA_STATE(xas, &cache->reqs, 0);
struct cachefiles_req *req;
__poll_t mask;
poll_wait(file, &cache->daemon_pollwq, poll);
mask = 0;
if (cachefiles_in_ondemand_mode(cache)) {
if (!xa_empty(&cache->reqs))
mask |= EPOLLIN;
if (!xa_empty(&cache->reqs)) {
rcu_read_lock();
xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
if (!cachefiles_ondemand_is_reopening_read(req)) {
mask |= EPOLLIN;
break;
}
}
rcu_read_unlock();
}
} else {
if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
mask |= EPOLLIN;

View File

@ -31,6 +31,11 @@ struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie)
if (!object)
return NULL;
if (cachefiles_ondemand_init_obj_info(object, volume)) {
kmem_cache_free(cachefiles_object_jar, object);
return NULL;
}
refcount_set(&object->ref, 1);
spin_lock_init(&object->lock);
@ -88,7 +93,7 @@ void cachefiles_put_object(struct cachefiles_object *object,
ASSERTCMP(object->file, ==, NULL);
kfree(object->d_name);
cachefiles_ondemand_deinit_obj_info(object);
cache = object->volume->cache->cache;
fscache_put_cookie(object->cookie, fscache_cookie_put_object);
object->cookie = NULL;

View File

@ -44,6 +44,19 @@ struct cachefiles_volume {
struct dentry *fanout[256]; /* Fanout subdirs */
};
enum cachefiles_object_state {
CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */
CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */
CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */
};
struct cachefiles_ondemand_info {
struct work_struct ondemand_work;
int ondemand_id;
enum cachefiles_object_state state;
struct cachefiles_object *object;
};
/*
* Backing file state.
*/
@ -61,7 +74,7 @@ struct cachefiles_object {
unsigned long flags;
#define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */
#ifdef CONFIG_CACHEFILES_ONDEMAND
int ondemand_id;
struct cachefiles_ondemand_info *ondemand;
#endif
};
@ -290,12 +303,42 @@ extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache,
char *args);
extern int cachefiles_ondemand_restore(struct cachefiles_cache *cache,
char *args);
extern int cachefiles_ondemand_init_object(struct cachefiles_object *object);
extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object);
extern int cachefiles_ondemand_read(struct cachefiles_object *object,
loff_t pos, size_t len);
extern int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj,
struct cachefiles_volume *volume);
extern void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj);
#define CACHEFILES_OBJECT_STATE_FUNCS(_state, _STATE) \
static inline bool \
cachefiles_ondemand_object_is_##_state(const struct cachefiles_object *object) \
{ \
return object->ondemand->state == CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \
} \
\
static inline void \
cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \
{ \
object->ondemand->state = CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \
}
CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN);
CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE);
CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING);
static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
{
return cachefiles_ondemand_object_is_reopening(req->object) &&
req->msg.opcode == CACHEFILES_OP_READ;
}
#else
static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
char __user *_buffer, size_t buflen)
@ -317,6 +360,20 @@ static inline int cachefiles_ondemand_read(struct cachefiles_object *object,
{
return -EOPNOTSUPP;
}
static inline int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj,
struct cachefiles_volume *volume)
{
return 0;
}
static inline void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj)
{
}
static inline bool cachefiles_ondemand_is_reopening_read(struct cachefiles_req *req)
{
return false;
}
#endif
/*

View File

@ -9,21 +9,19 @@ static int cachefiles_ondemand_fd_release(struct inode *inode,
{
struct cachefiles_object *object = file->private_data;
struct cachefiles_cache *cache = object->volume->cache;
int object_id = object->ondemand_id;
struct cachefiles_ondemand_info *info = object->ondemand;
int object_id = info->ondemand_id;
struct cachefiles_req *req;
XA_STATE(xas, &cache->reqs, 0);
xa_lock(&cache->reqs);
object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED;
info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED;
cachefiles_ondemand_set_object_close(object);
/*
* Flush all pending READ requests since their completion depends on
* anon_fd.
*/
xas_for_each(&xas, req, ULONG_MAX) {
/* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */
xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
if (req->msg.object_id == object_id &&
req->msg.opcode == CACHEFILES_OP_READ) {
req->error = -EIO;
req->msg.opcode == CACHEFILES_OP_CLOSE) {
complete(&req->done);
xas_store(&xas, NULL);
}
@ -176,11 +174,37 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags);
trace_cachefiles_ondemand_copen(req->object, id, size);
cachefiles_ondemand_set_object_open(req->object);
wake_up_all(&cache->daemon_pollwq);
out:
complete(&req->done);
return ret;
}
int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args)
{
struct cachefiles_req *req;
XA_STATE(xas, &cache->reqs, 0);
if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags))
return -EOPNOTSUPP;
/*
* Reset the requests to CACHEFILES_REQ_NEW state, so that the
* requests have been processed halfway before the crash of the
* user daemon could be reprocessed after the recovery.
*/
xas_lock(&xas);
xas_for_each(&xas, req, ULONG_MAX)
xas_set_mark(&xas, CACHEFILES_REQ_NEW);
xas_unlock(&xas);
wake_up_all(&cache->daemon_pollwq);
return 0;
}
static int cachefiles_ondemand_get_fd(struct cachefiles_req *req)
{
struct cachefiles_object *object;
@ -218,8 +242,7 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req)
load = (void *)req->msg.data;
load->fd = fd;
req->msg.object_id = object_id;
object->ondemand_id = object_id;
object->ondemand->ondemand_id = object_id;
cachefiles_get_unbind_pincount(cache);
trace_cachefiles_ondemand_open(object, &req->msg, load);
@ -234,6 +257,43 @@ err:
return ret;
}
static void ondemand_object_worker(struct work_struct *work)
{
struct cachefiles_ondemand_info *info =
container_of(work, struct cachefiles_ondemand_info, ondemand_work);
cachefiles_ondemand_init_object(info->object);
}
/*
* If there are any inflight or subsequent READ requests on the
* closed object, reopen it.
* Skip read requests whose related object is reopening.
*/
static struct cachefiles_req *cachefiles_ondemand_select_req(struct xa_state *xas,
unsigned long xa_max)
{
struct cachefiles_req *req;
struct cachefiles_object *object;
struct cachefiles_ondemand_info *info;
xas_for_each_marked(xas, req, xa_max, CACHEFILES_REQ_NEW) {
if (req->msg.opcode != CACHEFILES_OP_READ)
return req;
object = req->object;
info = object->ondemand;
if (cachefiles_ondemand_object_is_close(object)) {
cachefiles_ondemand_set_object_reopening(object);
queue_work(fscache_wq, &info->ondemand_work);
continue;
}
if (cachefiles_ondemand_object_is_reopening(object))
continue;
return req;
}
return NULL;
}
ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
char __user *_buffer, size_t buflen)
{
@ -244,16 +304,16 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
int ret = 0;
XA_STATE(xas, &cache->reqs, cache->req_id_next);
xa_lock(&cache->reqs);
/*
* Cyclically search for a request that has not ever been processed,
* to prevent requests from being processed repeatedly, and make
* request distribution fair.
*/
xa_lock(&cache->reqs);
req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW);
req = cachefiles_ondemand_select_req(&xas, ULONG_MAX);
if (!req && cache->req_id_next > 0) {
xas_set(&xas, 0);
req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW);
req = cachefiles_ondemand_select_req(&xas, cache->req_id_next - 1);
}
if (!req) {
xa_unlock(&cache->reqs);
@ -273,14 +333,18 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
xa_unlock(&cache->reqs);
id = xas.xa_index;
msg->msg_id = id;
if (msg->opcode == CACHEFILES_OP_OPEN) {
ret = cachefiles_ondemand_get_fd(req);
if (ret)
if (ret) {
cachefiles_ondemand_set_object_close(req->object);
goto error;
}
}
msg->msg_id = id;
msg->object_id = req->object->ondemand->ondemand_id;
if (copy_to_user(_buffer, msg, n) != 0) {
ret = -EFAULT;
goto err_put_fd;
@ -313,19 +377,23 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
void *private)
{
struct cachefiles_cache *cache = object->volume->cache;
struct cachefiles_req *req;
struct cachefiles_req *req = NULL;
XA_STATE(xas, &cache->reqs, 0);
int ret;
if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags))
return 0;
if (test_bit(CACHEFILES_DEAD, &cache->flags))
return -EIO;
if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
ret = -EIO;
goto out;
}
req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL);
if (!req)
return -ENOMEM;
if (!req) {
ret = -ENOMEM;
goto out;
}
req->object = object;
init_completion(&req->done);
@ -363,8 +431,9 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
/* coupled with the barrier in cachefiles_flush_reqs() */
smp_mb();
if (opcode != CACHEFILES_OP_OPEN && object->ondemand_id <= 0) {
WARN_ON_ONCE(object->ondemand_id == 0);
if (opcode == CACHEFILES_OP_CLOSE &&
!cachefiles_ondemand_object_is_open(object)) {
WARN_ON_ONCE(object->ondemand->ondemand_id == 0);
xas_unlock(&xas);
ret = -EIO;
goto out;
@ -387,7 +456,15 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
wake_up_all(&cache->daemon_pollwq);
wait_for_completion(&req->done);
ret = req->error;
kfree(req);
return ret;
out:
/* Reset the object to close state in error handling path.
* If error occurs after creating the anonymous fd,
* cachefiles_ondemand_fd_release() will set object to close.
*/
if (opcode == CACHEFILES_OP_OPEN)
cachefiles_ondemand_set_object_close(object);
kfree(req);
return ret;
}
@ -430,18 +507,10 @@ static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req,
void *private)
{
struct cachefiles_object *object = req->object;
int object_id = object->ondemand_id;
/*
* It's possible that object id is still 0 if the cookie looking up
* phase failed before OPEN request has ever been sent. Also avoid
* sending CLOSE request for CACHEFILES_ONDEMAND_ID_CLOSED, which means
* anon_fd has already been closed.
*/
if (object_id <= 0)
if (!cachefiles_ondemand_object_is_open(object))
return -ENOENT;
req->msg.object_id = object_id;
trace_cachefiles_ondemand_close(object, &req->msg);
return 0;
}
@ -457,16 +526,7 @@ static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req,
struct cachefiles_object *object = req->object;
struct cachefiles_read *load = (void *)req->msg.data;
struct cachefiles_read_ctx *read_ctx = private;
int object_id = object->ondemand_id;
/* Stop enqueuing requests when daemon has closed anon_fd. */
if (object_id <= 0) {
WARN_ON_ONCE(object_id == 0);
pr_info_once("READ: anonymous fd closed prematurely.\n");
return -EIO;
}
req->msg.object_id = object_id;
load->off = read_ctx->off;
load->len = read_ctx->len;
trace_cachefiles_ondemand_read(object, &req->msg, load);
@ -485,7 +545,7 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object)
* creating a new tmpfile as the cache file. Reuse the previously
* allocated object ID if any.
*/
if (object->ondemand_id > 0)
if (cachefiles_ondemand_object_is_open(object))
return 0;
volume_key_size = volume->key[0] + 1;
@ -503,6 +563,28 @@ void cachefiles_ondemand_clean_object(struct cachefiles_object *object)
cachefiles_ondemand_init_close_req, NULL);
}
int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object,
struct cachefiles_volume *volume)
{
if (!cachefiles_in_ondemand_mode(volume->cache))
return 0;
object->ondemand = kzalloc(sizeof(struct cachefiles_ondemand_info),
GFP_KERNEL);
if (!object->ondemand)
return -ENOMEM;
object->ondemand->object = object;
INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker);
return 0;
}
void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *object)
{
kfree(object->ondemand);
object->ondemand = NULL;
}
int cachefiles_ondemand_read(struct cachefiles_object *object,
loff_t pos, size_t len)
{