forked from Minki/linux
a4b6817625
Whenever we notice some sluggish issues on our machines, we are always curious about how well all types of I/O in the f2fs filesystem are handled. But, it's hard to get this kind of real data. First of all, we need to reproduce the issue while turning on the profiling tool like blktrace, but the issue doesn't happen again easily. Second, with the intervention of any tools, the overall timing of the issue will be slightly changed and it sometimes makes us hard to figure it out. So, I added the feature printing out IO latency statistics tracepoint events, which are minimal things to understand filesystem's I/O related behaviors, into F2FS_IOSTAT kernel config. With "iostat_enable" sysfs node on, we can get this statistics info in a periodic way and it would cause the least overhead. [samples] f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency: dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count], rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4], wr_sync_data [164/16/3331], wr_sync_node [152/3/648], wr_sync_meta [160/2/4243], wr_async_data [24/13/15], wr_async_node [0/0/0], wr_async_meta [0/0/0] f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency: dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count], rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1], wr_sync_data [120/12/2285], wr_sync_node [88/5/428], wr_sync_meta [52/6/2990], wr_async_data [4/1/3], wr_async_node [0/0/0], wr_async_meta [0/0/0] Signed-off-by: Daeho Jeong <daehojeong@google.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
288 lines
7.5 KiB
C
288 lines
7.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* f2fs iostat support
|
|
*
|
|
* Copyright 2021 Google LLC
|
|
* Author: Daeho Jeong <daehojeong@google.com>
|
|
*/
|
|
|
|
#include <linux/fs.h>
|
|
#include <linux/f2fs_fs.h>
|
|
#include <linux/seq_file.h>
|
|
|
|
#include "f2fs.h"
|
|
#include "iostat.h"
|
|
#include <trace/events/f2fs.h>
|
|
|
|
#define NUM_PREALLOC_IOSTAT_CTXS 128
|
|
static struct kmem_cache *bio_iostat_ctx_cache;
|
|
static mempool_t *bio_iostat_ctx_pool;
|
|
|
|
int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
|
|
{
|
|
struct super_block *sb = seq->private;
|
|
struct f2fs_sb_info *sbi = F2FS_SB(sb);
|
|
time64_t now = ktime_get_real_seconds();
|
|
|
|
if (!sbi->iostat_enable)
|
|
return 0;
|
|
|
|
seq_printf(seq, "time: %-16llu\n", now);
|
|
|
|
/* print app write IOs */
|
|
seq_puts(seq, "[WRITE]\n");
|
|
seq_printf(seq, "app buffered: %-16llu\n",
|
|
sbi->rw_iostat[APP_BUFFERED_IO]);
|
|
seq_printf(seq, "app direct: %-16llu\n",
|
|
sbi->rw_iostat[APP_DIRECT_IO]);
|
|
seq_printf(seq, "app mapped: %-16llu\n",
|
|
sbi->rw_iostat[APP_MAPPED_IO]);
|
|
|
|
/* print fs write IOs */
|
|
seq_printf(seq, "fs data: %-16llu\n",
|
|
sbi->rw_iostat[FS_DATA_IO]);
|
|
seq_printf(seq, "fs node: %-16llu\n",
|
|
sbi->rw_iostat[FS_NODE_IO]);
|
|
seq_printf(seq, "fs meta: %-16llu\n",
|
|
sbi->rw_iostat[FS_META_IO]);
|
|
seq_printf(seq, "fs gc data: %-16llu\n",
|
|
sbi->rw_iostat[FS_GC_DATA_IO]);
|
|
seq_printf(seq, "fs gc node: %-16llu\n",
|
|
sbi->rw_iostat[FS_GC_NODE_IO]);
|
|
seq_printf(seq, "fs cp data: %-16llu\n",
|
|
sbi->rw_iostat[FS_CP_DATA_IO]);
|
|
seq_printf(seq, "fs cp node: %-16llu\n",
|
|
sbi->rw_iostat[FS_CP_NODE_IO]);
|
|
seq_printf(seq, "fs cp meta: %-16llu\n",
|
|
sbi->rw_iostat[FS_CP_META_IO]);
|
|
|
|
/* print app read IOs */
|
|
seq_puts(seq, "[READ]\n");
|
|
seq_printf(seq, "app buffered: %-16llu\n",
|
|
sbi->rw_iostat[APP_BUFFERED_READ_IO]);
|
|
seq_printf(seq, "app direct: %-16llu\n",
|
|
sbi->rw_iostat[APP_DIRECT_READ_IO]);
|
|
seq_printf(seq, "app mapped: %-16llu\n",
|
|
sbi->rw_iostat[APP_MAPPED_READ_IO]);
|
|
|
|
/* print fs read IOs */
|
|
seq_printf(seq, "fs data: %-16llu\n",
|
|
sbi->rw_iostat[FS_DATA_READ_IO]);
|
|
seq_printf(seq, "fs gc data: %-16llu\n",
|
|
sbi->rw_iostat[FS_GDATA_READ_IO]);
|
|
seq_printf(seq, "fs compr_data: %-16llu\n",
|
|
sbi->rw_iostat[FS_CDATA_READ_IO]);
|
|
seq_printf(seq, "fs node: %-16llu\n",
|
|
sbi->rw_iostat[FS_NODE_READ_IO]);
|
|
seq_printf(seq, "fs meta: %-16llu\n",
|
|
sbi->rw_iostat[FS_META_READ_IO]);
|
|
|
|
/* print other IOs */
|
|
seq_puts(seq, "[OTHER]\n");
|
|
seq_printf(seq, "fs discard: %-16llu\n",
|
|
sbi->rw_iostat[FS_DISCARD]);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
|
|
{
|
|
int io, idx = 0;
|
|
unsigned int cnt;
|
|
struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
|
|
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
|
|
|
|
spin_lock_irq(&sbi->iostat_lat_lock);
|
|
for (idx = 0; idx < MAX_IO_TYPE; idx++) {
|
|
for (io = 0; io < NR_PAGE_TYPE; io++) {
|
|
cnt = io_lat->bio_cnt[idx][io];
|
|
iostat_lat[idx][io].peak_lat =
|
|
jiffies_to_msecs(io_lat->peak_lat[idx][io]);
|
|
iostat_lat[idx][io].cnt = cnt;
|
|
iostat_lat[idx][io].avg_lat = cnt ?
|
|
jiffies_to_msecs(io_lat->sum_lat[idx][io]) / cnt : 0;
|
|
io_lat->sum_lat[idx][io] = 0;
|
|
io_lat->peak_lat[idx][io] = 0;
|
|
io_lat->bio_cnt[idx][io] = 0;
|
|
}
|
|
}
|
|
spin_unlock_irq(&sbi->iostat_lat_lock);
|
|
|
|
trace_f2fs_iostat_latency(sbi, iostat_lat);
|
|
}
|
|
|
|
static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
|
|
{
|
|
unsigned long long iostat_diff[NR_IO_TYPE];
|
|
int i;
|
|
|
|
if (time_is_after_jiffies(sbi->iostat_next_period))
|
|
return;
|
|
|
|
/* Need double check under the lock */
|
|
spin_lock(&sbi->iostat_lock);
|
|
if (time_is_after_jiffies(sbi->iostat_next_period)) {
|
|
spin_unlock(&sbi->iostat_lock);
|
|
return;
|
|
}
|
|
sbi->iostat_next_period = jiffies +
|
|
msecs_to_jiffies(sbi->iostat_period_ms);
|
|
|
|
for (i = 0; i < NR_IO_TYPE; i++) {
|
|
iostat_diff[i] = sbi->rw_iostat[i] -
|
|
sbi->prev_rw_iostat[i];
|
|
sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
|
|
}
|
|
spin_unlock(&sbi->iostat_lock);
|
|
|
|
trace_f2fs_iostat(sbi, iostat_diff);
|
|
|
|
__record_iostat_latency(sbi);
|
|
}
|
|
|
|
void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
|
|
{
|
|
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
|
|
int i;
|
|
|
|
spin_lock(&sbi->iostat_lock);
|
|
for (i = 0; i < NR_IO_TYPE; i++) {
|
|
sbi->rw_iostat[i] = 0;
|
|
sbi->prev_rw_iostat[i] = 0;
|
|
}
|
|
spin_unlock(&sbi->iostat_lock);
|
|
|
|
spin_lock_irq(&sbi->iostat_lat_lock);
|
|
memset(io_lat, 0, sizeof(struct iostat_lat_info));
|
|
spin_unlock_irq(&sbi->iostat_lat_lock);
|
|
}
|
|
|
|
void f2fs_update_iostat(struct f2fs_sb_info *sbi,
|
|
enum iostat_type type, unsigned long long io_bytes)
|
|
{
|
|
if (!sbi->iostat_enable)
|
|
return;
|
|
|
|
spin_lock(&sbi->iostat_lock);
|
|
sbi->rw_iostat[type] += io_bytes;
|
|
|
|
if (type == APP_WRITE_IO || type == APP_DIRECT_IO)
|
|
sbi->rw_iostat[APP_BUFFERED_IO] =
|
|
sbi->rw_iostat[APP_WRITE_IO] -
|
|
sbi->rw_iostat[APP_DIRECT_IO];
|
|
|
|
if (type == APP_READ_IO || type == APP_DIRECT_READ_IO)
|
|
sbi->rw_iostat[APP_BUFFERED_READ_IO] =
|
|
sbi->rw_iostat[APP_READ_IO] -
|
|
sbi->rw_iostat[APP_DIRECT_READ_IO];
|
|
spin_unlock(&sbi->iostat_lock);
|
|
|
|
f2fs_record_iostat(sbi);
|
|
}
|
|
|
|
static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
|
|
int rw, bool is_sync)
|
|
{
|
|
unsigned long ts_diff;
|
|
unsigned int iotype = iostat_ctx->type;
|
|
unsigned long flags;
|
|
struct f2fs_sb_info *sbi = iostat_ctx->sbi;
|
|
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
|
|
int idx;
|
|
|
|
if (!sbi->iostat_enable)
|
|
return;
|
|
|
|
ts_diff = jiffies - iostat_ctx->submit_ts;
|
|
if (iotype >= META_FLUSH)
|
|
iotype = META;
|
|
|
|
if (rw == 0) {
|
|
idx = READ_IO;
|
|
} else {
|
|
if (is_sync)
|
|
idx = WRITE_SYNC_IO;
|
|
else
|
|
idx = WRITE_ASYNC_IO;
|
|
}
|
|
|
|
spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
|
|
io_lat->sum_lat[idx][iotype] += ts_diff;
|
|
io_lat->bio_cnt[idx][iotype]++;
|
|
if (ts_diff > io_lat->peak_lat[idx][iotype])
|
|
io_lat->peak_lat[idx][iotype] = ts_diff;
|
|
spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
|
|
}
|
|
|
|
void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
|
|
{
|
|
struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
|
|
bool is_sync = bio->bi_opf & REQ_SYNC;
|
|
|
|
if (rw == 0)
|
|
bio->bi_private = iostat_ctx->post_read_ctx;
|
|
else
|
|
bio->bi_private = iostat_ctx->sbi;
|
|
__update_iostat_latency(iostat_ctx, rw, is_sync);
|
|
mempool_free(iostat_ctx, bio_iostat_ctx_pool);
|
|
}
|
|
|
|
void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
|
|
struct bio *bio, struct bio_post_read_ctx *ctx)
|
|
{
|
|
struct bio_iostat_ctx *iostat_ctx;
|
|
/* Due to the mempool, this never fails. */
|
|
iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
|
|
iostat_ctx->sbi = sbi;
|
|
iostat_ctx->submit_ts = 0;
|
|
iostat_ctx->type = 0;
|
|
iostat_ctx->post_read_ctx = ctx;
|
|
bio->bi_private = iostat_ctx;
|
|
}
|
|
|
|
int __init f2fs_init_iostat_processing(void)
|
|
{
|
|
bio_iostat_ctx_cache =
|
|
kmem_cache_create("f2fs_bio_iostat_ctx",
|
|
sizeof(struct bio_iostat_ctx), 0, 0, NULL);
|
|
if (!bio_iostat_ctx_cache)
|
|
goto fail;
|
|
bio_iostat_ctx_pool =
|
|
mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
|
|
bio_iostat_ctx_cache);
|
|
if (!bio_iostat_ctx_pool)
|
|
goto fail_free_cache;
|
|
return 0;
|
|
|
|
fail_free_cache:
|
|
kmem_cache_destroy(bio_iostat_ctx_cache);
|
|
fail:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void f2fs_destroy_iostat_processing(void)
|
|
{
|
|
mempool_destroy(bio_iostat_ctx_pool);
|
|
kmem_cache_destroy(bio_iostat_ctx_cache);
|
|
}
|
|
|
|
int f2fs_init_iostat(struct f2fs_sb_info *sbi)
|
|
{
|
|
/* init iostat info */
|
|
spin_lock_init(&sbi->iostat_lock);
|
|
spin_lock_init(&sbi->iostat_lat_lock);
|
|
sbi->iostat_enable = false;
|
|
sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
|
|
sbi->iostat_io_lat = f2fs_kzalloc(sbi, sizeof(struct iostat_lat_info),
|
|
GFP_KERNEL);
|
|
if (!sbi->iostat_io_lat)
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void f2fs_destroy_iostat(struct f2fs_sb_info *sbi)
|
|
{
|
|
kfree(sbi->iostat_io_lat);
|
|
}
|