linux/fs/f2fs/inline.c
Chao Yu 675f10bde6 f2fs: fix to convert inline directory correctly
With below serials, we will lose parts of dirents:

1) mount f2fs with inline_dentry option
2) echo 1 > /sys/fs/f2fs/sdX/dir_level
3) mkdir dir
4) touch 180 files named [1-180] in dir
5) touch 181 in dir
6) echo 3 > /proc/sys/vm/drop_caches
7) ll dir

ls: cannot access 2: No such file or directory
ls: cannot access 4: No such file or directory
ls: cannot access 5: No such file or directory
ls: cannot access 6: No such file or directory
ls: cannot access 8: No such file or directory
ls: cannot access 9: No such file or directory
...
total 360
drwxr-xr-x 2 root root 4096 Feb 19 15:12 ./
drwxr-xr-x 3 root root 4096 Feb 19 15:11 ../
-rw-r--r-- 1 root root    0 Feb 19 15:12 1
-rw-r--r-- 1 root root    0 Feb 19 15:12 10
-rw-r--r-- 1 root root    0 Feb 19 15:12 100
-????????? ? ?    ?       ?            ? 101
-????????? ? ?    ?       ?            ? 102
-????????? ? ?    ?       ?            ? 103
...

The reason is: when doing the inline dir conversion, we didn't consider
that directory has hierarchical hash structure which can be configured
through sysfs interface 'dir_level'.

By default, dir_level of directory inode is 0, it means we have one bucket
in hash table located in first level, all dirents will be hashed in this
bucket, so it has no problem for us to do the duplication simply between
inline dentry page and converted normal dentry page.

However, if we configured dir_level with the value N (greater than 0), it
will expand the bucket number of first level hash table by 2^N - 1, it
hashs dirents into different buckets according their hash value, if we
still move all dirents to first bucket, it makes incorrent locating for
inline dirents, the result is, although we can iterate all dirents through
->readdir, we can't stat some of them in ->lookup which based on hash
table searching.

This patch fixes this issue by rehashing dirents into correct position
when converting inline directory.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2016-04-15 08:49:47 -07:00

689 lines
16 KiB
C

/*
* fs/f2fs/inline.c
* Copyright (c) 2013, Intel Corporation
* Authors: Huajun Li <huajun.li@intel.com>
* Haicheng Li <haicheng.li@intel.com>
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
bool f2fs_may_inline_data(struct inode *inode)
{
if (f2fs_is_atomic_file(inode))
return false;
if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
return false;
if (i_size_read(inode) > MAX_INLINE_DATA)
return false;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return false;
return true;
}
bool f2fs_may_inline_dentry(struct inode *inode)
{
if (!test_opt(F2FS_I_SB(inode), INLINE_DENTRY))
return false;
if (!S_ISDIR(inode->i_mode))
return false;
return true;
}
void read_inline_data(struct page *page, struct page *ipage)
{
void *src_addr, *dst_addr;
if (PageUptodate(page))
return;
f2fs_bug_on(F2FS_P_SB(page), page->index);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
/* Copy the whole inline data block */
src_addr = inline_data_addr(ipage);
dst_addr = kmap_atomic(page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
flush_dcache_page(page);
kunmap_atomic(dst_addr);
SetPageUptodate(page);
}
bool truncate_inline_inode(struct page *ipage, u64 from)
{
void *addr;
if (from >= MAX_INLINE_DATA)
return false;
addr = inline_data_addr(ipage);
f2fs_wait_on_page_writeback(ipage, NODE, true);
memset(addr + from, 0, MAX_INLINE_DATA - from);
return true;
}
int f2fs_read_inline_data(struct inode *inode, struct page *page)
{
struct page *ipage;
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage)) {
unlock_page(page);
return PTR_ERR(ipage);
}
if (!f2fs_has_inline_data(inode)) {
f2fs_put_page(ipage, 1);
return -EAGAIN;
}
if (page->index)
zero_user_segment(page, 0, PAGE_SIZE);
else
read_inline_data(page, ipage);
SetPageUptodate(page);
f2fs_put_page(ipage, 1);
unlock_page(page);
return 0;
}
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(dn->inode),
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
.page = page,
.encrypted_page = NULL,
};
int dirty, err;
if (!f2fs_exist_data(dn->inode))
goto clear_out;
err = f2fs_reserve_block(dn, 0);
if (err)
return err;
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
read_inline_data(page, dn->inode_page);
set_page_dirty(page);
/* clear dirty state */
dirty = clear_page_dirty_for_io(page);
/* write data page to try to make data consistent */
set_page_writeback(page);
fio.old_blkaddr = dn->data_blkaddr;
write_data_page(dn, &fio);
f2fs_wait_on_page_writeback(page, DATA, true);
if (dirty)
inode_dec_dirty_pages(dn->inode);
/* this converted inline_data should be recovered. */
set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
truncate_inline_inode(dn->inode_page, 0);
clear_inline_node(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
f2fs_clear_inline_inode(dn->inode);
sync_inode_page(dn);
f2fs_put_dnode(dn);
return 0;
}
int f2fs_convert_inline_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
struct page *ipage, *page;
int err = 0;
if (!f2fs_has_inline_data(inode))
return 0;
page = grab_cache_page(inode->i_mapping, 0);
if (!page)
return -ENOMEM;
f2fs_lock_op(sbi);
ipage = get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto out;
}
set_new_dnode(&dn, inode, ipage, ipage, 0);
if (f2fs_has_inline_data(inode))
err = f2fs_convert_inline_page(&dn, page);
f2fs_put_dnode(&dn);
out:
f2fs_unlock_op(sbi);
f2fs_put_page(page, 1);
f2fs_balance_fs(sbi, dn.node_changed);
return err;
}
int f2fs_write_inline_data(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
struct dnode_of_data dn;
int err;
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
if (!f2fs_has_inline_data(inode)) {
f2fs_put_dnode(&dn);
return -EAGAIN;
}
f2fs_bug_on(F2FS_I_SB(inode), page->index);
f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
src_addr = kmap_atomic(page);
dst_addr = inline_data_addr(dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
kunmap_atomic(src_addr);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
sync_inode_page(&dn);
clear_inline_node(dn.inode_page);
f2fs_put_dnode(&dn);
return 0;
}
bool recover_inline_data(struct inode *inode, struct page *npage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode *ri = NULL;
void *src_addr, *dst_addr;
struct page *ipage;
/*
* The inline_data recovery policy is as follows.
* [prev.] [next] of inline_data flag
* o o -> recover inline_data
* o x -> remove inline_data, and then recover data blocks
* x o -> remove inline_data, and then recover inline_data
* x x -> recover data blocks
*/
if (IS_INODE(npage))
ri = F2FS_INODE(npage);
if (f2fs_has_inline_data(inode) &&
ri && (ri->i_inline & F2FS_INLINE_DATA)) {
process_inline:
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
f2fs_wait_on_page_writeback(ipage, NODE, true);
src_addr = inline_data_addr(npage);
dst_addr = inline_data_addr(ipage);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
return true;
}
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
if (!truncate_inline_inode(ipage, 0))
return false;
f2fs_clear_inline_inode(inode);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
if (truncate_blocks(inode, 0, false))
return false;
goto process_inline;
}
return false;
}
struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct f2fs_inline_dentry *inline_dentry;
struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
struct page *ipage;
f2fs_hash_t namehash;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return NULL;
namehash = f2fs_dentry_hash(&name);
inline_dentry = inline_data_addr(ipage);
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
de = find_target_dentry(fname, namehash, NULL, &d);
unlock_page(ipage);
if (de)
*res_page = ipage;
else
f2fs_put_page(ipage, 0);
/*
* For the most part, it should be a bug when name_len is zero.
* We stop here for figuring out where the bugs has occurred.
*/
f2fs_bug_on(sbi, d.max < 0);
return de;
}
struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir,
struct page **p)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
struct f2fs_dir_entry *de;
struct f2fs_inline_dentry *dentry_blk;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return NULL;
dentry_blk = inline_data_addr(ipage);
de = &dentry_blk->dentry[1];
*p = ipage;
unlock_page(ipage);
return de;
}
int make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage)
{
struct f2fs_inline_dentry *dentry_blk;
struct f2fs_dentry_ptr d;
dentry_blk = inline_data_addr(ipage);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
do_make_empty_dir(inode, parent, &d);
set_page_dirty(ipage);
/* update i_size to MAX_INLINE_DATA */
if (i_size_read(inode) < MAX_INLINE_DATA) {
i_size_write(inode, MAX_INLINE_DATA);
set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
}
return 0;
}
/*
* NOTE: ipage is grabbed by caller, but if any error occurs, we should
* release ipage in this function.
*/
static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
{
struct page *page;
struct dnode_of_data dn;
struct f2fs_dentry_block *dentry_blk;
int err;
page = grab_cache_page(dir->i_mapping, 0);
if (!page) {
f2fs_put_page(ipage, 1);
return -ENOMEM;
}
set_new_dnode(&dn, dir, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, 0);
if (err)
goto out;
f2fs_wait_on_page_writeback(page, DATA, true);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
dentry_blk = kmap_atomic(page);
/* copy data from inline dentry block to new dentry block */
memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
INLINE_DENTRY_BITMAP_SIZE);
memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0,
SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE);
/*
* we do not need to zero out remainder part of dentry and filename
* field, since we have used bitmap for marking the usage status of
* them, besides, we can also ignore copying/zeroing reserved space
* of dentry block, because them haven't been used so far.
*/
memcpy(dentry_blk->dentry, inline_dentry->dentry,
sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
memcpy(dentry_blk->filename, inline_dentry->filename,
NR_INLINE_DENTRY * F2FS_SLOT_LEN);
kunmap_atomic(dentry_blk);
SetPageUptodate(page);
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
truncate_inline_inode(ipage, 0);
stat_dec_inline_dir(dir);
clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
if (i_size_read(dir) < PAGE_SIZE) {
i_size_write(dir, PAGE_SIZE);
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
sync_inode_page(&dn);
out:
f2fs_put_page(page, 1);
return err;
}
static int f2fs_add_inline_entries(struct inode *dir,
struct f2fs_inline_dentry *inline_dentry)
{
struct f2fs_dentry_ptr d;
unsigned long bit_pos = 0;
int err = 0;
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
while (bit_pos < d.max) {
struct f2fs_dir_entry *de;
struct qstr new_name;
nid_t ino;
umode_t fake_mode;
if (!test_bit_le(bit_pos, d.bitmap)) {
bit_pos++;
continue;
}
de = &d.dentry[bit_pos];
new_name.name = d.filename[bit_pos];
new_name.len = de->name_len;
ino = le32_to_cpu(de->ino);
fake_mode = get_de_type(de) << S_SHIFT;
err = f2fs_add_regular_entry(dir, &new_name, NULL,
ino, fake_mode);
if (err)
goto punch_dentry_pages;
if (unlikely(!de->name_len))
d.max = -1;
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
}
return 0;
punch_dentry_pages:
truncate_inode_pages(&dir->i_data, 0);
truncate_blocks(dir, 0, false);
remove_dirty_inode(dir);
return err;
}
static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
{
struct f2fs_inline_dentry *backup_dentry;
int err;
backup_dentry = kmalloc(sizeof(struct f2fs_inline_dentry),
GFP_F2FS_ZERO);
if (!backup_dentry)
return -ENOMEM;
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA);
truncate_inline_inode(ipage, 0);
unlock_page(ipage);
err = f2fs_add_inline_entries(dir, backup_dentry);
if (err)
goto recover;
lock_page(ipage);
stat_dec_inline_dir(dir);
clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
update_inode(dir, ipage);
kfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA);
i_size_write(dir, MAX_INLINE_DATA);
update_inode(dir, ipage);
f2fs_put_page(ipage, 1);
kfree(backup_dentry);
return err;
}
static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
{
if (!F2FS_I(dir)->i_dir_level)
return f2fs_move_inline_dirents(dir, ipage, inline_dentry);
else
return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry);
}
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos;
f2fs_hash_t name_hash;
size_t namelen = name->len;
struct f2fs_inline_dentry *dentry_blk = NULL;
struct f2fs_dentry_ptr d;
int slots = GET_DENTRY_SLOTS(namelen);
struct page *page = NULL;
int err = 0;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
dentry_blk = inline_data_addr(ipage);
bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
slots, NR_INLINE_DENTRY);
if (bit_pos >= NR_INLINE_DENTRY) {
err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
if (err)
return err;
err = -EAGAIN;
goto out;
}
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
page = init_inode_metadata(inode, dir, name, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
}
}
f2fs_wait_on_page_writeback(ipage, NODE, true);
name_hash = f2fs_dentry_hash(name);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
set_page_dirty(ipage);
/* we don't need to mark_inode_dirty now */
if (inode) {
F2FS_I(inode)->i_pino = dir->i_ino;
update_inode(inode, page);
f2fs_put_page(page, 1);
}
update_parent_metadata(dir, inode, 0);
fail:
if (inode)
up_write(&F2FS_I(inode)->i_sem);
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
update_inode(dir, ipage);
clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
out:
f2fs_put_page(ipage, 1);
return err;
}
void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct inode *dir, struct inode *inode)
{
struct f2fs_inline_dentry *inline_dentry;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
unsigned int bit_pos;
int i;
lock_page(page);
f2fs_wait_on_page_writeback(page, NODE, true);
inline_dentry = inline_data_addr(page);
bit_pos = dentry - inline_dentry->dentry;
for (i = 0; i < slots; i++)
test_and_clear_bit_le(bit_pos + i,
&inline_dentry->dentry_bitmap);
set_page_dirty(page);
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
if (inode)
f2fs_drop_nlink(dir, inode, page);
f2fs_put_page(page, 1);
}
bool f2fs_empty_inline_dir(struct inode *dir)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos = 2;
struct f2fs_inline_dentry *dentry_blk;
ipage = get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage))
return false;
dentry_blk = inline_data_addr(ipage);
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
NR_INLINE_DENTRY,
bit_pos);
f2fs_put_page(ipage, 1);
if (bit_pos < NR_INLINE_DENTRY)
return false;
return true;
}
int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
struct fscrypt_str *fstr)
{
struct inode *inode = file_inode(file);
struct f2fs_inline_dentry *inline_dentry = NULL;
struct page *ipage = NULL;
struct f2fs_dentry_ptr d;
if (ctx->pos == NR_INLINE_DENTRY)
return 0;
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
inline_dentry = inline_data_addr(ipage);
make_dentry_ptr(inode, &d, (void *)inline_dentry, 2);
if (!f2fs_fill_dentries(ctx, &d, 0, fstr))
ctx->pos = NR_INLINE_DENTRY;
f2fs_put_page(ipage, 1);
return 0;
}
int f2fs_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, __u64 start, __u64 len)
{
__u64 byteaddr, ilen;
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
FIEMAP_EXTENT_LAST;
struct node_info ni;
struct page *ipage;
int err = 0;
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
if (!f2fs_has_inline_data(inode)) {
err = -EAGAIN;
goto out;
}
ilen = min_t(size_t, MAX_INLINE_DATA, i_size_read(inode));
if (start >= ilen)
goto out;
if (start + len < ilen)
ilen = start + len;
ilen -= start;
get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(ipage) - (char *)F2FS_INODE(ipage);
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
out:
f2fs_put_page(ipage, 1);
return err;
}