2007-10-16 08:26:11 +00:00
|
|
|
/*
|
|
|
|
* linux/mm/page_isolation.c
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/page-isolation.h>
|
|
|
|
#include <linux/pageblock-flags.h>
|
2012-07-31 23:43:50 +00:00
|
|
|
#include <linux/memory.h>
|
2007-10-16 08:26:11 +00:00
|
|
|
#include "internal.h"
|
|
|
|
|
2012-07-31 23:43:50 +00:00
|
|
|
int set_migratetype_isolate(struct page *page)
|
|
|
|
{
|
|
|
|
struct zone *zone;
|
|
|
|
unsigned long flags, pfn;
|
|
|
|
struct memory_isolate_notify arg;
|
|
|
|
int notifier_ret;
|
|
|
|
int ret = -EBUSY;
|
|
|
|
|
|
|
|
zone = page_zone(page);
|
|
|
|
|
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
|
|
|
|
pfn = page_to_pfn(page);
|
|
|
|
arg.start_pfn = pfn;
|
|
|
|
arg.nr_pages = pageblock_nr_pages;
|
|
|
|
arg.pages_found = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* It may be possible to isolate a pageblock even if the
|
|
|
|
* migratetype is not MIGRATE_MOVABLE. The memory isolation
|
|
|
|
* notifier chain is used by balloon drivers to return the
|
|
|
|
* number of pages in a range that are held by the balloon
|
|
|
|
* driver to shrink memory. If all the pages are accounted for
|
|
|
|
* by balloons, are free, or on the LRU, isolation can continue.
|
|
|
|
* Later, for example, when memory hotplug notifier runs, these
|
|
|
|
* pages reported as "can be isolated" should be isolated(freed)
|
|
|
|
* by the balloon driver through the memory notifier chain.
|
|
|
|
*/
|
|
|
|
notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
|
|
|
|
notifier_ret = notifier_to_errno(notifier_ret);
|
|
|
|
if (notifier_ret)
|
|
|
|
goto out;
|
|
|
|
/*
|
|
|
|
* FIXME: Now, memory hotplug doesn't call shrink_slab() by itself.
|
|
|
|
* We just check MOVABLE pages.
|
|
|
|
*/
|
|
|
|
if (!has_unmovable_pages(zone, page, arg.pages_found))
|
|
|
|
ret = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* immobile means "not-on-lru" paes. If immobile is larger than
|
|
|
|
* removable-by-driver pages reported by notifier, we'll fail.
|
|
|
|
*/
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (!ret) {
|
|
|
|
set_pageblock_migratetype(page, MIGRATE_ISOLATE);
|
|
|
|
move_freepages_block(zone, page, MIGRATE_ISOLATE);
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
|
if (!ret)
|
|
|
|
drain_all_pages();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void unset_migratetype_isolate(struct page *page, unsigned migratetype)
|
|
|
|
{
|
|
|
|
struct zone *zone;
|
|
|
|
unsigned long flags;
|
|
|
|
zone = page_zone(page);
|
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
|
|
|
goto out;
|
|
|
|
set_pageblock_migratetype(page, migratetype);
|
|
|
|
move_freepages_block(zone, page, migratetype);
|
|
|
|
out:
|
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
|
}
|
|
|
|
|
2007-10-16 08:26:11 +00:00
|
|
|
static inline struct page *
|
|
|
|
__first_valid_page(unsigned long pfn, unsigned long nr_pages)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < nr_pages; i++)
|
|
|
|
if (pfn_valid_within(pfn + i))
|
|
|
|
break;
|
|
|
|
if (unlikely(i == nr_pages))
|
|
|
|
return NULL;
|
|
|
|
return pfn_to_page(pfn + i);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* start_isolate_page_range() -- make page-allocation-type of range of pages
|
|
|
|
* to be MIGRATE_ISOLATE.
|
|
|
|
* @start_pfn: The lower PFN of the range to be isolated.
|
|
|
|
* @end_pfn: The upper PFN of the range to be isolated.
|
2012-04-03 13:06:15 +00:00
|
|
|
* @migratetype: migrate type to set in error recovery.
|
2007-10-16 08:26:11 +00:00
|
|
|
*
|
|
|
|
* Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
|
|
|
|
* the range will never be allocated. Any free pages and pages freed in the
|
|
|
|
* future will not be allocated again.
|
|
|
|
*
|
|
|
|
* start_pfn/end_pfn must be aligned to pageblock_order.
|
|
|
|
* Returns 0 on success and -EBUSY if any part of range cannot be isolated.
|
|
|
|
*/
|
2012-04-03 13:06:15 +00:00
|
|
|
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
|
|
|
|
unsigned migratetype)
|
2007-10-16 08:26:11 +00:00
|
|
|
{
|
|
|
|
unsigned long pfn;
|
|
|
|
unsigned long undo_pfn;
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
|
|
|
|
for (pfn = start_pfn;
|
|
|
|
pfn < end_pfn;
|
|
|
|
pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
|
|
|
if (page && set_migratetype_isolate(page)) {
|
|
|
|
undo_pfn = pfn;
|
|
|
|
goto undo;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
undo:
|
|
|
|
for (pfn = start_pfn;
|
2007-11-15 00:59:12 +00:00
|
|
|
pfn < undo_pfn;
|
2007-10-16 08:26:11 +00:00
|
|
|
pfn += pageblock_nr_pages)
|
2012-04-03 13:06:15 +00:00
|
|
|
unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
|
2007-10-16 08:26:11 +00:00
|
|
|
|
|
|
|
return -EBUSY;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make isolated pages available again.
|
|
|
|
*/
|
2012-04-03 13:06:15 +00:00
|
|
|
int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
|
|
|
|
unsigned migratetype)
|
2007-10-16 08:26:11 +00:00
|
|
|
{
|
|
|
|
unsigned long pfn;
|
|
|
|
struct page *page;
|
|
|
|
BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
|
|
|
|
for (pfn = start_pfn;
|
|
|
|
pfn < end_pfn;
|
|
|
|
pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
2007-11-15 00:59:12 +00:00
|
|
|
if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
2007-10-16 08:26:11 +00:00
|
|
|
continue;
|
2012-04-03 13:06:15 +00:00
|
|
|
unset_migratetype_isolate(page, migratetype);
|
2007-10-16 08:26:11 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Test all pages in the range is free(means isolated) or not.
|
|
|
|
* all pages in [start_pfn...end_pfn) must be in the same zone.
|
|
|
|
* zone->lock must be held before call this.
|
|
|
|
*
|
2012-04-03 13:06:15 +00:00
|
|
|
* Returns 1 if all pages in the range are isolated.
|
2007-10-16 08:26:11 +00:00
|
|
|
*/
|
|
|
|
static int
|
|
|
|
__test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn)
|
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
|
|
|
|
while (pfn < end_pfn) {
|
|
|
|
if (!pfn_valid_within(pfn)) {
|
|
|
|
pfn++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
page = pfn_to_page(pfn);
|
|
|
|
if (PageBuddy(page))
|
|
|
|
pfn += 1 << page_order(page);
|
|
|
|
else if (page_count(page) == 0 &&
|
|
|
|
page_private(page) == MIGRATE_ISOLATE)
|
|
|
|
pfn += 1;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (pfn < end_pfn)
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
|
|
|
|
{
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-02 21:50:16 +00:00
|
|
|
unsigned long pfn, flags;
|
2007-10-16 08:26:11 +00:00
|
|
|
struct page *page;
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-02 21:50:16 +00:00
|
|
|
struct zone *zone;
|
|
|
|
int ret;
|
2007-10-16 08:26:11 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: pageblock_nr_page != MAX_ORDER. Then, chunks of free page
|
|
|
|
* is not aligned to pageblock_nr_pages.
|
|
|
|
* Then we just check pagetype fist.
|
|
|
|
*/
|
|
|
|
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
|
|
|
|
page = __first_valid_page(pfn, pageblock_nr_pages);
|
2007-11-15 00:59:12 +00:00
|
|
|
if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
|
2007-10-16 08:26:11 +00:00
|
|
|
break;
|
|
|
|
}
|
2008-11-06 20:53:36 +00:00
|
|
|
page = __first_valid_page(start_pfn, end_pfn - start_pfn);
|
|
|
|
if ((pfn < end_pfn) || !page)
|
2007-10-16 08:26:11 +00:00
|
|
|
return -EBUSY;
|
|
|
|
/* Check all pages are free or Marked as ISOLATED */
|
2008-11-06 20:53:36 +00:00
|
|
|
zone = page_zone(page);
|
memory hotplug: missing zone->lock in test_pages_isolated()
__test_page_isolated_in_pageblock() in mm/page_isolation.c has a comment
saying that the caller must hold zone->lock. But the only caller of that
function, test_pages_isolated(), does not hold zone->lock and the lock is
also not acquired anywhere before. This patch adds the missing zone->lock
to test_pages_isolated().
We reproducibly run into BUG_ON(!PageBuddy(page)) in __offline_isolated_pages()
during memory hotplug stress test, see trace below. This patch fixes that
problem, it would be good if we could have it in 2.6.27.
kernel BUG at /home/autobuild/BUILD/linux-2.6.26-20080909/mm/page_alloc.c:4561!
illegal operation: 0001 [#1] PREEMPT SMP
Modules linked in: dm_multipath sunrpc bonding qeth_l3 dm_mod qeth ccwgroup vmur
CPU: 1 Not tainted 2.6.26-29.x.20080909-s390default #1
Process memory_loop_all (pid: 10025, task: 2f444028, ksp: 2b10dd28)
Krnl PSW : 040c0000 801727ea (__offline_isolated_pages+0x18e/0x1c4)
R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:0 CC:0 PM:0
Krnl GPRS: 00000000 7e27fc00 00000000 7e27fc00
00000000 00000400 00014000 7e27fc01
00606f00 7e27fc00 00013fe0 2b10dd28
00000005 80172662 801727b2 2b10dd28
Krnl Code: 801727de: 5810900c l %r1,12(%r9)
801727e2: a7f4ffb3 brc 15,80172748
801727e6: a7f40001 brc 15,801727e8
>801727ea: a7f4ffbc brc 15,80172762
801727ee: a7f40001 brc 15,801727f0
801727f2: a7f4ffaf brc 15,80172750
801727f6: 0707 bcr 0,%r7
801727f8: 0017 unknown
Call Trace:
([<0000000000172772>] __offline_isolated_pages+0x116/0x1c4)
[<00000000001953a2>] offline_isolated_pages_cb+0x22/0x34
[<000000000013164c>] walk_memory_resource+0xcc/0x11c
[<000000000019520e>] offline_pages+0x36a/0x498
[<00000000001004d6>] remove_memory+0x36/0x44
[<000000000028fb06>] memory_block_change_state+0x112/0x150
[<000000000028ffb8>] store_mem_state+0x90/0xe4
[<0000000000289c00>] sysdev_store+0x34/0x40
[<00000000001ee048>] sysfs_write_file+0xd0/0x178
[<000000000019b1a8>] vfs_write+0x74/0x118
[<000000000019b9ae>] sys_write+0x46/0x7c
[<000000000011160e>] sysc_do_restart+0x12/0x16
[<0000000077f3e8ca>] 0x77f3e8ca
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-10-02 21:50:16 +00:00
|
|
|
spin_lock_irqsave(&zone->lock, flags);
|
|
|
|
ret = __test_page_isolated_in_pageblock(start_pfn, end_pfn);
|
|
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
|
|
return ret ? 0 : -EBUSY;
|
2007-10-16 08:26:11 +00:00
|
|
|
}
|