writeback: support retrieving per group debug writeback stats of bdi

Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
of bdi.

Following domain hierarchy is tested:
                global domain (320G)
                /                 \
        cgroup domain1(10G)     cgroup domain2(10G)
                |                 |
bdi            wb1               wb2

/* per wb writeback info of bdi is collected */
cat wb_stats
WbCgIno:                    1
WbWriteback:                0 kB
WbReclaimable:              0 kB
WbDirtyThresh:              0 kB
WbDirtied:                  0 kB
WbWritten:                  0 kB
WbWriteBandwidth:      102400 kBps
b_dirty:                    0
b_io:                       0
b_more_io:                  0
b_dirty_time:               0
state:                      1

WbCgIno:                 4091
WbWriteback:             1792 kB
WbReclaimable:         820512 kB
WbDirtyThresh:        6004692 kB
WbDirtied:            1820448 kB
WbWritten:             999488 kB
WbWriteBandwidth:      169020 kBps
b_dirty:                    0
b_io:                       0
b_more_io:                  1
b_dirty_time:               0
state:                      5

WbCgIno:                 4131
WbWriteback:             1120 kB
WbReclaimable:         820064 kB
WbDirtyThresh:        6004728 kB
WbDirtied:            1822688 kB
WbWritten:            1002400 kB
WbWriteBandwidth:      153520 kBps
b_dirty:                    0
b_io:                       0
b_more_io:                  1
b_dirty_time:               0
state:                      5

[shikemeng@huaweicloud.com: fix build problems]
  Link: https://lkml.kernel.org/r/20240423034643.141219-4-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20240423034643.141219-3-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Kemeng Shi 2024-04-23 11:46:40 +08:00 committed by Andrew Morton
parent e32e27009f
commit 4b5bbc39d7
3 changed files with 99 additions and 2 deletions

View File

@ -355,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
unsigned long cgwb_calc_thresh(struct bdi_writeback *wb);
void wb_update_bandwidth(struct bdi_writeback *wb); void wb_update_bandwidth(struct bdi_writeback *wb);

View File

@ -155,19 +155,96 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
} }
DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats); DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
static void wb_stats_show(struct seq_file *m, struct bdi_writeback *wb,
struct wb_stats *stats)
{
seq_printf(m,
"WbCgIno: %10lu\n"
"WbWriteback: %10lu kB\n"
"WbReclaimable: %10lu kB\n"
"WbDirtyThresh: %10lu kB\n"
"WbDirtied: %10lu kB\n"
"WbWritten: %10lu kB\n"
"WbWriteBandwidth: %10lu kBps\n"
"b_dirty: %10lu\n"
"b_io: %10lu\n"
"b_more_io: %10lu\n"
"b_dirty_time: %10lu\n"
"state: %10lx\n\n",
#ifdef CONFIG_CGROUP_WRITEBACK
cgroup_ino(wb->memcg_css->cgroup),
#else
1ul,
#endif
K(stats->nr_writeback),
K(stats->nr_reclaimable),
K(stats->wb_thresh),
K(stats->nr_dirtied),
K(stats->nr_written),
K(wb->avg_write_bandwidth),
stats->nr_dirty,
stats->nr_io,
stats->nr_more_io,
stats->nr_dirty_time,
wb->state);
}
static int cgwb_debug_stats_show(struct seq_file *m, void *v)
{
struct backing_dev_info *bdi = m->private;
unsigned long background_thresh;
unsigned long dirty_thresh;
struct bdi_writeback *wb;
global_dirty_limits(&background_thresh, &dirty_thresh);
rcu_read_lock();
list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) {
struct wb_stats stats = { .dirty_thresh = dirty_thresh };
if (!wb_tryget(wb))
continue;
collect_wb_stats(&stats, wb);
/*
* Calculate thresh of wb in writeback cgroup which is min of
* thresh in global domain and thresh in cgroup domain. Drop
* rcu lock because cgwb_calc_thresh may sleep in
* cgroup_rstat_flush. We can do so here because we have a ref.
*/
if (mem_cgroup_wb_domain(wb)) {
rcu_read_unlock();
stats.wb_thresh = min(stats.wb_thresh, cgwb_calc_thresh(wb));
rcu_read_lock();
}
wb_stats_show(m, wb, &stats);
wb_put(wb);
}
rcu_read_unlock();
return 0;
}
DEFINE_SHOW_ATTRIBUTE(cgwb_debug_stats);
static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
{ {
bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
debugfs_create_file("stats", 0444, bdi->debug_dir, bdi, debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
&bdi_debug_stats_fops); &bdi_debug_stats_fops);
debugfs_create_file("wb_stats", 0444, bdi->debug_dir, bdi,
&cgwb_debug_stats_fops);
} }
static void bdi_debug_unregister(struct backing_dev_info *bdi) static void bdi_debug_unregister(struct backing_dev_info *bdi)
{ {
debugfs_remove_recursive(bdi->debug_dir); debugfs_remove_recursive(bdi->debug_dir);
} }
#else #else /* CONFIG_DEBUG_FS */
static inline void bdi_debug_init(void) static inline void bdi_debug_init(void)
{ {
} }
@ -178,7 +255,7 @@ static inline void bdi_debug_register(struct backing_dev_info *bdi,
static inline void bdi_debug_unregister(struct backing_dev_info *bdi) static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
{ {
} }
#endif #endif /* CONFIG_DEBUG_FS */
static ssize_t read_ahead_kb_store(struct device *dev, static ssize_t read_ahead_kb_store(struct device *dev,
struct device_attribute *attr, struct device_attribute *attr,

View File

@ -892,6 +892,25 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
return __wb_calc_thresh(&gdtc); return __wb_calc_thresh(&gdtc);
} }
unsigned long cgwb_calc_thresh(struct bdi_writeback *wb)
{
struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
unsigned long filepages = 0, headroom = 0, writeback = 0;
gdtc.avail = global_dirtyable_memory();
gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
global_node_page_state(NR_WRITEBACK);
mem_cgroup_wb_stats(wb, &filepages, &headroom,
&mdtc.dirty, &writeback);
mdtc.dirty += writeback;
mdtc_calc_avail(&mdtc, filepages, headroom);
domain_dirty_limits(&mdtc);
return __wb_calc_thresh(&mdtc);
}
/* /*
* setpoint - dirty 3 * setpoint - dirty 3
* f(dirty) := 1.0 + (----------------) * f(dirty) := 1.0 + (----------------)