mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 04:31:50 +00:00
5b11888471
Until now, fscrypt has always used the filesystem block size as the granularity of file contents encryption. Two scenarios have come up where a sub-block granularity of contents encryption would be useful: 1. Inline crypto hardware that only supports a crypto data unit size that is less than the filesystem block size. 2. Support for direct I/O at a granularity less than the filesystem block size, for example at the block device's logical block size in order to match the traditional direct I/O alignment requirement. (1) first came up with older eMMC inline crypto hardware that only supports a crypto data unit size of 512 bytes. That specific case ultimately went away because all systems with that hardware continued using out of tree code and never actually upgraded to the upstream inline crypto framework. But, now it's coming back in a new way: some current UFS controllers only support a data unit size of 4096 bytes, and there is a proposal to increase the filesystem block size to 16K. (2) was discussed as a "nice to have" feature, though not essential, when support for direct I/O on encrypted files was being upstreamed. Still, the fact that this feature has come up several times does suggest it would be wise to have available. Therefore, this patch implements it by using one of the reserved bytes in fscrypt_policy_v2 to allow users to select a sub-block data unit size. Supported data unit sizes are powers of 2 between 512 and the filesystem block size, inclusively. Support is implemented for both the FS-layer and inline crypto cases. This patch focuses on the basic support for sub-block data units. Some things are out of scope for this patch but may be addressed later: - Supporting sub-block data units in combination with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64, in most cases. Unfortunately this combination usually causes data unit indices to exceed 32 bits, and thus fscrypt_supported_policy() correctly disallows it. The users who potentially need this combination are using f2fs. To support it, f2fs would need to provide an option to slightly reduce its max file size. - Supporting sub-block data units in combination with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32. This has the same problem described above, but also it will need special code to make DUN wraparound still happen on a FS block boundary. - Supporting use case (2) mentioned above. The encrypted direct I/O code will need to stop requiring and assuming FS block alignment. This won't be hard, but it belongs in a separate patch. - Supporting this feature on filesystems other than ext4 and f2fs. (Filesystems declare support for it via their fscrypt_operations.) On UBIFS, sub-block data units don't make sense because UBIFS encrypts variable-length blocks as a result of compression. CephFS could support it, but a bit more work would be needed to make the fscrypt_*_block_inplace functions play nicely with sub-block data units. I don't think there's a use case for this on CephFS anyway. Link: https://lore.kernel.org/r/20230925055451.59499-6-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@google.com>
199 lines
5.9 KiB
C
199 lines
5.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Utility functions for file contents encryption/decryption on
|
|
* block device-based filesystems.
|
|
*
|
|
* Copyright (C) 2015, Google, Inc.
|
|
* Copyright (C) 2015, Motorola Mobility
|
|
*/
|
|
|
|
#include <linux/pagemap.h>
|
|
#include <linux/module.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/namei.h>
|
|
#include "fscrypt_private.h"
|
|
|
|
/**
|
|
* fscrypt_decrypt_bio() - decrypt the contents of a bio
|
|
* @bio: the bio to decrypt
|
|
*
|
|
* Decrypt the contents of a "read" bio following successful completion of the
|
|
* underlying disk read. The bio must be reading a whole number of blocks of an
|
|
* encrypted file directly into the page cache. If the bio is reading the
|
|
* ciphertext into bounce pages instead of the page cache (for example, because
|
|
* the file is also compressed, so decompression is required after decryption),
|
|
* then this function isn't applicable. This function may sleep, so it must be
|
|
* called from a workqueue rather than from the bio's bi_end_io callback.
|
|
*
|
|
* Return: %true on success; %false on failure. On failure, bio->bi_status is
|
|
* also set to an error status.
|
|
*/
|
|
bool fscrypt_decrypt_bio(struct bio *bio)
|
|
{
|
|
struct folio_iter fi;
|
|
|
|
bio_for_each_folio_all(fi, bio) {
|
|
int err = fscrypt_decrypt_pagecache_blocks(fi.folio, fi.length,
|
|
fi.offset);
|
|
|
|
if (err) {
|
|
bio->bi_status = errno_to_blk_status(err);
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
EXPORT_SYMBOL(fscrypt_decrypt_bio);
|
|
|
|
static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
|
|
pgoff_t lblk, sector_t pblk,
|
|
unsigned int len)
|
|
{
|
|
const unsigned int blockbits = inode->i_blkbits;
|
|
const unsigned int blocks_per_page = 1 << (PAGE_SHIFT - blockbits);
|
|
struct bio *bio;
|
|
int ret, err = 0;
|
|
int num_pages = 0;
|
|
|
|
/* This always succeeds since __GFP_DIRECT_RECLAIM is set. */
|
|
bio = bio_alloc(inode->i_sb->s_bdev, BIO_MAX_VECS, REQ_OP_WRITE,
|
|
GFP_NOFS);
|
|
|
|
while (len) {
|
|
unsigned int blocks_this_page = min(len, blocks_per_page);
|
|
unsigned int bytes_this_page = blocks_this_page << blockbits;
|
|
|
|
if (num_pages == 0) {
|
|
fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS);
|
|
bio->bi_iter.bi_sector =
|
|
pblk << (blockbits - SECTOR_SHIFT);
|
|
}
|
|
ret = bio_add_page(bio, ZERO_PAGE(0), bytes_this_page, 0);
|
|
if (WARN_ON_ONCE(ret != bytes_this_page)) {
|
|
err = -EIO;
|
|
goto out;
|
|
}
|
|
num_pages++;
|
|
len -= blocks_this_page;
|
|
lblk += blocks_this_page;
|
|
pblk += blocks_this_page;
|
|
if (num_pages == BIO_MAX_VECS || !len ||
|
|
!fscrypt_mergeable_bio(bio, inode, lblk)) {
|
|
err = submit_bio_wait(bio);
|
|
if (err)
|
|
goto out;
|
|
bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE);
|
|
num_pages = 0;
|
|
}
|
|
}
|
|
out:
|
|
bio_put(bio);
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file
|
|
* @inode: the file's inode
|
|
* @lblk: the first file logical block to zero out
|
|
* @pblk: the first filesystem physical block to zero out
|
|
* @len: number of blocks to zero out
|
|
*
|
|
* Zero out filesystem blocks in an encrypted regular file on-disk, i.e. write
|
|
* ciphertext blocks which decrypt to the all-zeroes block. The blocks must be
|
|
* both logically and physically contiguous. It's also assumed that the
|
|
* filesystem only uses a single block device, ->s_bdev.
|
|
*
|
|
* Note that since each block uses a different IV, this involves writing a
|
|
* different ciphertext to each block; we can't simply reuse the same one.
|
|
*
|
|
* Return: 0 on success; -errno on failure.
|
|
*/
|
|
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
|
|
sector_t pblk, unsigned int len)
|
|
{
|
|
const struct fscrypt_info *ci = inode->i_crypt_info;
|
|
const unsigned int du_bits = ci->ci_data_unit_bits;
|
|
const unsigned int du_size = 1U << du_bits;
|
|
const unsigned int du_per_page_bits = PAGE_SHIFT - du_bits;
|
|
const unsigned int du_per_page = 1U << du_per_page_bits;
|
|
u64 du_index = (u64)lblk << (inode->i_blkbits - du_bits);
|
|
u64 du_remaining = (u64)len << (inode->i_blkbits - du_bits);
|
|
sector_t sector = pblk << (inode->i_blkbits - SECTOR_SHIFT);
|
|
struct page *pages[16]; /* write up to 16 pages at a time */
|
|
unsigned int nr_pages;
|
|
unsigned int i;
|
|
unsigned int offset;
|
|
struct bio *bio;
|
|
int ret, err;
|
|
|
|
if (len == 0)
|
|
return 0;
|
|
|
|
if (fscrypt_inode_uses_inline_crypto(inode))
|
|
return fscrypt_zeroout_range_inline_crypt(inode, lblk, pblk,
|
|
len);
|
|
|
|
BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_VECS);
|
|
nr_pages = min_t(u64, ARRAY_SIZE(pages),
|
|
(du_remaining + du_per_page - 1) >> du_per_page_bits);
|
|
|
|
/*
|
|
* We need at least one page for ciphertext. Allocate the first one
|
|
* from a mempool, with __GFP_DIRECT_RECLAIM set so that it can't fail.
|
|
*
|
|
* Any additional page allocations are allowed to fail, as they only
|
|
* help performance, and waiting on the mempool for them could deadlock.
|
|
*/
|
|
for (i = 0; i < nr_pages; i++) {
|
|
pages[i] = fscrypt_alloc_bounce_page(i == 0 ? GFP_NOFS :
|
|
GFP_NOWAIT | __GFP_NOWARN);
|
|
if (!pages[i])
|
|
break;
|
|
}
|
|
nr_pages = i;
|
|
if (WARN_ON_ONCE(nr_pages <= 0))
|
|
return -EINVAL;
|
|
|
|
/* This always succeeds since __GFP_DIRECT_RECLAIM is set. */
|
|
bio = bio_alloc(inode->i_sb->s_bdev, nr_pages, REQ_OP_WRITE, GFP_NOFS);
|
|
|
|
do {
|
|
bio->bi_iter.bi_sector = sector;
|
|
|
|
i = 0;
|
|
offset = 0;
|
|
do {
|
|
err = fscrypt_crypt_data_unit(ci, FS_ENCRYPT, du_index,
|
|
ZERO_PAGE(0), pages[i],
|
|
du_size, offset,
|
|
GFP_NOFS);
|
|
if (err)
|
|
goto out;
|
|
du_index++;
|
|
sector += 1U << (du_bits - SECTOR_SHIFT);
|
|
du_remaining--;
|
|
offset += du_size;
|
|
if (offset == PAGE_SIZE || du_remaining == 0) {
|
|
ret = bio_add_page(bio, pages[i++], offset, 0);
|
|
if (WARN_ON_ONCE(ret != offset)) {
|
|
err = -EIO;
|
|
goto out;
|
|
}
|
|
offset = 0;
|
|
}
|
|
} while (i != nr_pages && du_remaining != 0);
|
|
|
|
err = submit_bio_wait(bio);
|
|
if (err)
|
|
goto out;
|
|
bio_reset(bio, inode->i_sb->s_bdev, REQ_OP_WRITE);
|
|
} while (du_remaining != 0);
|
|
err = 0;
|
|
out:
|
|
bio_put(bio);
|
|
for (i = 0; i < nr_pages; i++)
|
|
fscrypt_free_bounce_page(pages[i]);
|
|
return err;
|
|
}
|
|
EXPORT_SYMBOL(fscrypt_zeroout_range);
|