mm: vmstat: move slab statistics from zone to node counters

Patch series "mm: per-lruvec slab stats"

Josef is working on a new approach to balancing slab caches and the page
cache.  For this to work, he needs slab cache statistics on the lruvec
level.  These patches implement that by adding infrastructure that
allows updating and reading generic VM stat items per lruvec, then
switches some existing VM accounting sites, including the slab
accounting ones, to this new cgroup-aware API.

I'll follow up with more patches on this, because there is actually
substantial simplification that can be done to the memory controller
when we replace private memcg accounting with making the existing VM
accounting sites cgroup-aware.  But this is enough for Josef to base his
slab reclaim work on, so here goes.

This patch (of 5):

To re-implement slab cache vs.  page cache balancing, we'll need the
slab counters at the lruvec level, which, ever since lru reclaim was
moved from the zone to the node, is the intersection of the node, not
the zone, and the memcg.

We could retain the per-zone counters for when the page allocator dumps
its memory information on failures, and have counters on both levels -
which on all but NUMA node 0 is usually redundant.  But let's keep it
simple for now and just move them.  If anybody complains we can restore
the per-zone counters.

[hannes@cmpxchg.org: fix oops]
  Link: http://lkml.kernel.org/r/20170605183511.GA8915@cmpxchg.org
Link: http://lkml.kernel.org/r/20170530181724.27197-3-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Johannes Weiner 2017-07-06 15:40:43 -07:00 committed by Linus Torvalds
parent 2b2695f5fd
commit 385386cff4
7 changed files with 19 additions and 20 deletions

View File

@ -129,11 +129,11 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE) + nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) +
sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE)), nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
nid, K(node_page_state(pgdat, NR_ANON_THPS) * nid, K(node_page_state(pgdat, NR_ANON_THPS) *
HPAGE_PMD_NR), HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
@ -141,7 +141,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
HPAGE_PMD_NR)); HPAGE_PMD_NR));
#else #else
nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE))); nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)));
#endif #endif
n += hugetlb_report_node_meminfo(nid, buf + n); n += hugetlb_report_node_meminfo(nid, buf + n);
return n; return n;

View File

@ -125,8 +125,6 @@ enum zone_stat_item {
NR_ZONE_UNEVICTABLE, NR_ZONE_UNEVICTABLE,
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */ NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */ NR_KERNEL_STACK_KB, /* measured in KiB */
/* Second 128 byte cacheline */ /* Second 128 byte cacheline */
@ -152,6 +150,8 @@ enum node_stat_item {
NR_INACTIVE_FILE, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */
NR_ACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */
NR_UNEVICTABLE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
WORKINGSET_REFAULT, WORKINGSET_REFAULT,

View File

@ -4643,8 +4643,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" present:%lukB" " present:%lukB"
" managed:%lukB" " managed:%lukB"
" mlocked:%lukB" " mlocked:%lukB"
" slab_reclaimable:%lukB"
" slab_unreclaimable:%lukB"
" kernel_stack:%lukB" " kernel_stack:%lukB"
" pagetables:%lukB" " pagetables:%lukB"
" bounce:%lukB" " bounce:%lukB"
@ -4666,8 +4664,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone->present_pages), K(zone->present_pages),
K(zone->managed_pages), K(zone->managed_pages),
K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_MLOCK)),
K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
zone_page_state(zone, NR_KERNEL_STACK_KB), zone_page_state(zone, NR_KERNEL_STACK_KB),
K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)), K(zone_page_state(zone, NR_BOUNCE)),
@ -5153,6 +5149,7 @@ static void build_zonelists(pg_data_t *pgdat)
*/ */
static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch); static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
static void setup_zone_pageset(struct zone *zone); static void setup_zone_pageset(struct zone *zone);
/* /*
@ -6053,6 +6050,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
spin_lock_init(&pgdat->lru_lock); spin_lock_init(&pgdat->lru_lock);
lruvec_init(node_lruvec(pgdat)); lruvec_init(node_lruvec(pgdat));
pgdat->per_cpu_nodestats = &boot_nodestats;
for (j = 0; j < MAX_NR_ZONES; j++) { for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j; struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, freesize, memmap_pages; unsigned long size, realsize, freesize, memmap_pages;

View File

@ -1425,10 +1425,10 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
nr_pages = (1 << cachep->gfporder); nr_pages = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT) if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
add_zone_page_state(page_zone(page), add_node_page_state(page_pgdat(page),
NR_SLAB_RECLAIMABLE, nr_pages); NR_SLAB_RECLAIMABLE, nr_pages);
else else
add_zone_page_state(page_zone(page), add_node_page_state(page_pgdat(page),
NR_SLAB_UNRECLAIMABLE, nr_pages); NR_SLAB_UNRECLAIMABLE, nr_pages);
__SetPageSlab(page); __SetPageSlab(page);
@ -1459,10 +1459,10 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
kmemcheck_free_shadow(page, order); kmemcheck_free_shadow(page, order);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT) if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
sub_zone_page_state(page_zone(page), sub_node_page_state(page_pgdat(page),
NR_SLAB_RECLAIMABLE, nr_freed); NR_SLAB_RECLAIMABLE, nr_freed);
else else
sub_zone_page_state(page_zone(page), sub_node_page_state(page_pgdat(page),
NR_SLAB_UNRECLAIMABLE, nr_freed); NR_SLAB_UNRECLAIMABLE, nr_freed);
BUG_ON(!PageSlab(page)); BUG_ON(!PageSlab(page));

View File

@ -1615,7 +1615,7 @@ out:
if (!page) if (!page)
return NULL; return NULL;
mod_zone_page_state(page_zone(page), mod_node_page_state(page_pgdat(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ? (s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1 << oo_order(oo)); 1 << oo_order(oo));
@ -1655,7 +1655,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
kmemcheck_free_shadow(page, compound_order(page)); kmemcheck_free_shadow(page, compound_order(page));
mod_zone_page_state(page_zone(page), mod_node_page_state(page_pgdat(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ? (s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages); -pages);

View File

@ -3874,7 +3874,7 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
* unmapped file backed pages. * unmapped file backed pages.
*/ */
if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages && if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages) node_page_state(pgdat, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
return NODE_RECLAIM_FULL; return NODE_RECLAIM_FULL;
/* /*

View File

@ -928,8 +928,6 @@ const char * const vmstat_text[] = {
"nr_zone_unevictable", "nr_zone_unevictable",
"nr_zone_write_pending", "nr_zone_write_pending",
"nr_mlock", "nr_mlock",
"nr_slab_reclaimable",
"nr_slab_unreclaimable",
"nr_page_table_pages", "nr_page_table_pages",
"nr_kernel_stack", "nr_kernel_stack",
"nr_bounce", "nr_bounce",
@ -952,6 +950,8 @@ const char * const vmstat_text[] = {
"nr_inactive_file", "nr_inactive_file",
"nr_active_file", "nr_active_file",
"nr_unevictable", "nr_unevictable",
"nr_slab_reclaimable",
"nr_slab_unreclaimable",
"nr_isolated_anon", "nr_isolated_anon",
"nr_isolated_file", "nr_isolated_file",
"workingset_refault", "workingset_refault",