diff --git a/mm/vmscan.c b/mm/vmscan.c index 3584067800e1..d3488828331a 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2198,10 +2198,40 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, } #endif +/* + * pgdat_balanced is used when checking if a node is balanced for high-order + * allocations. Only zones that meet watermarks and are in a zone allowed + * by the callers classzone_idx are added to balanced_pages. The total of + * balanced pages must be at least 25% of the zones allowed by classzone_idx + * for the node to be considered balanced. Forcing all zones to be balanced + * for high orders can cause excessive reclaim when there are imbalanced zones. + * The choice of 25% is due to + * o a 16M DMA zone that is balanced will not balance a zone on any + * reasonable sized machine + * o On all other machines, the top zone must be at least a reasonable + * precentage of the middle zones. For example, on 32-bit x86, highmem + * would need to be at least 256M for it to be balance a whole node. + * Similarly, on x86-64 the Normal zone would need to be at least 1G + * to balance a node on its own. These seemed like reasonable ratios. + */ +static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages, + int classzone_idx) +{ + unsigned long present_pages = 0; + int i; + + for (i = 0; i <= classzone_idx; i++) + present_pages += pgdat->node_zones[i].present_pages; + + return balanced_pages > (present_pages >> 2); +} + /* is kswapd sleeping prematurely? */ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) { int i; + unsigned long balanced = 0; + bool all_zones_ok = true; /* If a direct reclaimer woke kswapd within HZ/10, it's premature */ if (remaining) @@ -2219,10 +2249,20 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), 0, 0)) - return 1; + all_zones_ok = false; + else + balanced += zone->present_pages; } - return 0; + /* + * For high-order requests, the balanced zones must contain at least + * 25% of the nodes pages for kswapd to sleep. For order-0, all zones + * must be balanced + */ + if (order) + return pgdat_balanced(pgdat, balanced, 0); + else + return !all_zones_ok; } /* @@ -2250,7 +2290,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) { int all_zones_ok; - int any_zone_ok; + unsigned long balanced; int priority; int i; int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ @@ -2284,7 +2324,7 @@ loop_again: disable_swap_token(); all_zones_ok = 1; - any_zone_ok = 0; + balanced = 0; /* * Scan in the highmem->dma direction for the highest @@ -2404,11 +2444,11 @@ loop_again: */ zone_clear_flag(zone, ZONE_CONGESTED); if (i <= classzone_idx) - any_zone_ok = 1; + balanced += zone->present_pages; } } - if (all_zones_ok || (order && any_zone_ok)) + if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx))) break; /* kswapd: all done */ /* * OK, kswapd is getting into trouble. Take a nap, then take @@ -2434,10 +2474,10 @@ out: /* * order-0: All zones must meet high watermark for a balanced node - * high-order: Any zone below pgdats classzone_idx must meet the high - * watermark for a balanced node + * high-order: Balanced zones must make up at least 25% of the node + * for the node to be balanced */ - if (!(all_zones_ok || (order && any_zone_ok))) { + if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) { cond_resched(); try_to_freeze();