diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e946c96daa32..78ca34c935ab 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -371,6 +371,62 @@ enum page_memcg_data_flags { #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) +static inline bool PageMemcgKmem(struct page *page); + +/* + * After the initialization objcg->memcg is always pointing at + * a valid memcg, but can be atomically swapped to the parent memcg. + * + * The caller must ensure that the returned memcg won't be released: + * e.g. acquire the rcu_read_lock or css_set_lock. + */ +static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) +{ + return READ_ONCE(objcg->memcg); +} + +/* + * __page_memcg - get the memory cgroup associated with a non-kmem page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages or + * kmem pages. + */ +static inline struct mem_cgroup *__page_memcg(struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * __page_objcg - get the object cgroup associated with a kmem page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper object cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages or + * LRU pages. + */ +static inline struct obj_cgroup *__page_objcg(struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page); + + return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + /* * page_memcg - get the memory cgroup associated with a page * @page: a pointer to the page struct @@ -380,20 +436,23 @@ enum page_memcg_data_flags { * proper memory cgroup pointer. It's not safe to call this function * against some type of pages, e.g. slab pages or ex-slab pages. * - * Any of the following ensures page and memcg binding stability: + * For a non-kmem page any of the following ensures page and memcg binding + * stability: + * * - the page lock * - LRU isolation * - lock_page_memcg() * - exclusive reference + * + * For a kmem page a caller should hold an rcu read lock to protect memcg + * associated with a kmem page from being released. */ static inline struct mem_cgroup *page_memcg(struct page *page) { - unsigned long memcg_data = page->memcg_data; - - VM_BUG_ON_PAGE(PageSlab(page), page); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); - - return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + if (PageMemcgKmem(page)) + return obj_cgroup_memcg(__page_objcg(page)); + else + return __page_memcg(page); } /* @@ -407,11 +466,19 @@ static inline struct mem_cgroup *page_memcg(struct page *page) */ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) { + unsigned long memcg_data = READ_ONCE(page->memcg_data); + VM_BUG_ON_PAGE(PageSlab(page), page); WARN_ON_ONCE(!rcu_read_lock_held()); - return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) & - ~MEMCG_DATA_FLAGS_MASK); + if (memcg_data & MEMCG_DATA_KMEM) { + struct obj_cgroup *objcg; + + objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return obj_cgroup_memcg(objcg); + } + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } /* @@ -419,15 +486,21 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) * @page: a pointer to the page struct * * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function unlike page_memcg() can take any page + * or NULL. This function unlike page_memcg() can take any page * as an argument. It has to be used in cases when it's not known if a page - * has an associated memory cgroup pointer or an object cgroups vector. + * has an associated memory cgroup pointer or an object cgroups vector or + * an object cgroup. + * + * For a non-kmem page any of the following ensures page and memcg binding + * stability: * - * Any of the following ensures page and memcg binding stability: * - the page lock * - LRU isolation * - lock_page_memcg() * - exclusive reference + * + * For a kmem page a caller should hold an rcu read lock to protect memcg + * associated with a kmem page from being released. */ static inline struct mem_cgroup *page_memcg_check(struct page *page) { @@ -440,6 +513,13 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; + if (memcg_data & MEMCG_DATA_KMEM) { + struct obj_cgroup *objcg; + + objcg = (void *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); + return obj_cgroup_memcg(objcg); + } + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } @@ -718,23 +798,17 @@ static inline void obj_cgroup_get(struct obj_cgroup *objcg) percpu_ref_get(&objcg->refcnt); } +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg, + unsigned long nr) +{ + percpu_ref_get_many(&objcg->refcnt, nr); +} + static inline void obj_cgroup_put(struct obj_cgroup *objcg) { percpu_ref_put(&objcg->refcnt); } -/* - * After the initialization objcg->memcg is always pointing at - * a valid memcg, but can be atomically swapped to the parent memcg. - * - * The caller must ensure that the returned memcg won't be released: - * e.g. acquire the rcu_read_lock or css_set_lock. - */ -static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg) -{ - return READ_ONCE(objcg->memcg); -} - static inline void mem_cgroup_put(struct mem_cgroup *memcg) { if (memcg) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 85fd2d86d23f..f3ddf94c9485 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -865,18 +865,22 @@ void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { struct page *head = compound_head(page); /* rmap on tail pages */ - struct mem_cgroup *memcg = page_memcg(head); + struct mem_cgroup *memcg; pg_data_t *pgdat = page_pgdat(page); struct lruvec *lruvec; + rcu_read_lock(); + memcg = page_memcg(head); /* Untracked pages have no memcg, no lruvec. Update only the node */ if (!memcg) { + rcu_read_unlock(); __mod_node_page_state(pgdat, idx, val); return; } lruvec = mem_cgroup_lruvec(memcg, pgdat); __mod_lruvec_state(lruvec, idx, val); + rcu_read_unlock(); } EXPORT_SYMBOL(__mod_lruvec_page_state); @@ -1051,20 +1055,6 @@ static __always_inline struct mem_cgroup *active_memcg(void) return current->active_memcg; } -static __always_inline struct mem_cgroup *get_active_memcg(void) -{ - struct mem_cgroup *memcg; - - rcu_read_lock(); - memcg = active_memcg(); - /* remote memcg must hold a ref. */ - if (memcg && WARN_ON_ONCE(!css_tryget(&memcg->css))) - memcg = root_mem_cgroup; - rcu_read_unlock(); - - return memcg; -} - static __always_inline bool memcg_kmem_bypass(void) { /* Allow remote memcg charging from any context. */ @@ -1078,20 +1068,6 @@ static __always_inline bool memcg_kmem_bypass(void) return false; } -/** - * If active memcg is set, do not fallback to current->mm->memcg. - */ -static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void) -{ - if (memcg_kmem_bypass()) - return NULL; - - if (unlikely(active_memcg())) - return get_active_memcg(); - - return get_mem_cgroup_from_mm(current->mm); -} - /** * mem_cgroup_iter - iterate over memory cgroup hierarchy * @root: hierarchy root @@ -3121,18 +3097,18 @@ static void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_page */ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) { - struct mem_cgroup *memcg; + struct obj_cgroup *objcg; int ret = 0; - memcg = get_mem_cgroup_from_current(); - if (memcg && !mem_cgroup_is_root(memcg)) { - ret = __memcg_kmem_charge(memcg, gfp, 1 << order); + objcg = get_obj_cgroup_from_current(); + if (objcg) { + ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order); if (!ret) { - page->memcg_data = (unsigned long)memcg | + page->memcg_data = (unsigned long)objcg | MEMCG_DATA_KMEM; return 0; } - css_put(&memcg->css); + obj_cgroup_put(objcg); } return ret; } @@ -3144,16 +3120,16 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) */ void __memcg_kmem_uncharge_page(struct page *page, int order) { - struct mem_cgroup *memcg = page_memcg(page); + struct obj_cgroup *objcg; unsigned int nr_pages = 1 << order; - if (!memcg) + if (!PageMemcgKmem(page)) return; - VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page); - __memcg_kmem_uncharge(memcg, nr_pages); + objcg = __page_objcg(page); + obj_cgroup_uncharge_pages(objcg, nr_pages); page->memcg_data = 0; - css_put(&memcg->css); + obj_cgroup_put(objcg); } static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) @@ -3294,7 +3270,11 @@ void split_page_memcg(struct page *head, unsigned int nr) for (i = 1; i < nr; i++) head[i].memcg_data = head->memcg_data; - css_get_many(&memcg->css, nr - 1); + + if (PageMemcgKmem(head)) + obj_cgroup_get_many(__page_objcg(head), nr - 1); + else + css_get_many(&memcg->css, nr - 1); } #ifdef CONFIG_MEMCG_SWAP @@ -6788,7 +6768,7 @@ void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry) struct uncharge_gather { struct mem_cgroup *memcg; - unsigned long nr_pages; + unsigned long nr_memory; unsigned long pgpgout; unsigned long nr_kmem; struct page *dummy_page; @@ -6803,10 +6783,10 @@ static void uncharge_batch(const struct uncharge_gather *ug) { unsigned long flags; - if (!mem_cgroup_is_root(ug->memcg)) { - page_counter_uncharge(&ug->memcg->memory, ug->nr_pages); + if (ug->nr_memory) { + page_counter_uncharge(&ug->memcg->memory, ug->nr_memory); if (do_memsw_account()) - page_counter_uncharge(&ug->memcg->memsw, ug->nr_pages); + page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory); if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem) page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem); memcg_oom_recover(ug->memcg); @@ -6814,7 +6794,7 @@ static void uncharge_batch(const struct uncharge_gather *ug) local_irq_save(flags); __count_memcg_events(ug->memcg, PGPGOUT, ug->pgpgout); - __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_pages); + __this_cpu_add(ug->memcg->vmstats_percpu->nr_page_events, ug->nr_memory); memcg_check_events(ug->memcg, ug->dummy_page); local_irq_restore(flags); @@ -6825,40 +6805,60 @@ static void uncharge_batch(const struct uncharge_gather *ug) static void uncharge_page(struct page *page, struct uncharge_gather *ug) { unsigned long nr_pages; + struct mem_cgroup *memcg; + struct obj_cgroup *objcg; VM_BUG_ON_PAGE(PageLRU(page), page); - if (!page_memcg(page)) - return; - /* * Nobody should be changing or seriously looking at - * page_memcg(page) at this point, we have fully + * page memcg or objcg at this point, we have fully * exclusive access to the page. */ + if (PageMemcgKmem(page)) { + objcg = __page_objcg(page); + /* + * This get matches the put at the end of the function and + * kmem pages do not hold memcg references anymore. + */ + memcg = get_mem_cgroup_from_objcg(objcg); + } else { + memcg = __page_memcg(page); + } - if (ug->memcg != page_memcg(page)) { + if (!memcg) + return; + + if (ug->memcg != memcg) { if (ug->memcg) { uncharge_batch(ug); uncharge_gather_clear(ug); } - ug->memcg = page_memcg(page); + ug->memcg = memcg; ug->dummy_page = page; /* pairs with css_put in uncharge_batch */ - css_get(&ug->memcg->css); + css_get(&memcg->css); } nr_pages = compound_nr(page); - ug->nr_pages += nr_pages; - if (PageMemcgKmem(page)) + if (PageMemcgKmem(page)) { + ug->nr_memory += nr_pages; ug->nr_kmem += nr_pages; - else + + page->memcg_data = 0; + obj_cgroup_put(objcg); + } else { + /* LRU pages aren't accounted at the root level */ + if (!mem_cgroup_is_root(memcg)) + ug->nr_memory += nr_pages; ug->pgpgout++; - page->memcg_data = 0; - css_put(&ug->memcg->css); + page->memcg_data = 0; + } + + css_put(&memcg->css); } /**