staging: zsmalloc: remove x86 dependency

This patch replaces the page table assisted object mapping
method, which has x86 dependencies, with a arch-independent
method that does a simple copy into a temporary per-cpu
buffer.

While a copy seems like it would be worse than mapping the pages,
tests demonstrate the copying is always faster and, in the case of
running inside a KVM guest, roughly 4x faster.

Signed-off-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Seth Jennings 2012-07-02 16:15:49 -05:00 committed by Greg Kroah-Hartman
parent c666e636ac
commit 5f601902c6
3 changed files with 72 additions and 36 deletions

View File

@ -1,9 +1,5 @@
config ZSMALLOC config ZSMALLOC
tristate "Memory allocator for compressed pages" tristate "Memory allocator for compressed pages"
# X86 dependency is because of the use of __flush_tlb_one and set_pte
# in zsmalloc-main.c.
# TODO: convert these to portable functions
depends on X86
default n default n
help help
zsmalloc is a slab-based memory allocator designed to store zsmalloc is a slab-based memory allocator designed to store

View File

@ -470,6 +470,57 @@ static struct page *find_get_zspage(struct size_class *class)
return page; return page;
} }
static void zs_copy_map_object(char *buf, struct page *firstpage,
int off, int size)
{
struct page *pages[2];
int sizes[2];
void *addr;
pages[0] = firstpage;
pages[1] = get_next_page(firstpage);
BUG_ON(!pages[1]);
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
/* disable page faults to match kmap_atomic() return conditions */
pagefault_disable();
/* copy object to per-cpu buffer */
addr = kmap_atomic(pages[0]);
memcpy(buf, addr + off, sizes[0]);
kunmap_atomic(addr);
addr = kmap_atomic(pages[1]);
memcpy(buf + sizes[0], addr, sizes[1]);
kunmap_atomic(addr);
}
static void zs_copy_unmap_object(char *buf, struct page *firstpage,
int off, int size)
{
struct page *pages[2];
int sizes[2];
void *addr;
pages[0] = firstpage;
pages[1] = get_next_page(firstpage);
BUG_ON(!pages[1]);
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
/* copy per-cpu buffer to object */
addr = kmap_atomic(pages[0]);
memcpy(addr + off, buf, sizes[0]);
kunmap_atomic(addr);
addr = kmap_atomic(pages[1]);
memcpy(addr, buf + sizes[0], sizes[1]);
kunmap_atomic(addr);
/* enable page faults to match kunmap_atomic() return conditions */
pagefault_enable();
}
static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action, static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
void *pcpu) void *pcpu)
@ -480,18 +531,23 @@ static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
switch (action) { switch (action) {
case CPU_UP_PREPARE: case CPU_UP_PREPARE:
area = &per_cpu(zs_map_area, cpu); area = &per_cpu(zs_map_area, cpu);
if (area->vm) /*
break; * Make sure we don't leak memory if a cpu UP notification
area->vm = alloc_vm_area(2 * PAGE_SIZE, area->vm_ptes); * and zs_init() race and both call zs_cpu_up() on the same cpu
if (!area->vm) */
return notifier_from_errno(-ENOMEM); if (area->vm_buf)
return 0;
area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
if (!area->vm_buf)
return -ENOMEM;
return 0;
break; break;
case CPU_DEAD: case CPU_DEAD:
case CPU_UP_CANCELED: case CPU_UP_CANCELED:
area = &per_cpu(zs_map_area, cpu); area = &per_cpu(zs_map_area, cpu);
if (area->vm) if (area->vm_buf)
free_vm_area(area->vm); free_page((unsigned long)area->vm_buf);
area->vm = NULL; area->vm_buf = NULL;
break; break;
} }
@ -714,22 +770,11 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle)
if (off + class->size <= PAGE_SIZE) { if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */ /* this object is contained entirely within a page */
area->vm_addr = kmap_atomic(page); area->vm_addr = kmap_atomic(page);
} else { return area->vm_addr + off;
/* this object spans two pages */
struct page *nextp;
nextp = get_next_page(page);
BUG_ON(!nextp);
set_pte(area->vm_ptes[0], mk_pte(page, PAGE_KERNEL));
set_pte(area->vm_ptes[1], mk_pte(nextp, PAGE_KERNEL));
/* We pre-allocated VM area so mapping can never fail */
area->vm_addr = area->vm->addr;
} }
return area->vm_addr + off; zs_copy_map_object(area->vm_buf, page, off, class->size);
return area->vm_buf;
} }
EXPORT_SYMBOL_GPL(zs_map_object); EXPORT_SYMBOL_GPL(zs_map_object);
@ -751,14 +796,10 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
off = obj_idx_to_offset(page, obj_idx, class->size); off = obj_idx_to_offset(page, obj_idx, class->size);
area = &__get_cpu_var(zs_map_area); area = &__get_cpu_var(zs_map_area);
if (off + class->size <= PAGE_SIZE) { if (off + class->size <= PAGE_SIZE)
kunmap_atomic(area->vm_addr); kunmap_atomic(area->vm_addr);
} else { else
set_pte(area->vm_ptes[0], __pte(0)); zs_copy_unmap_object(area->vm_buf, page, off, class->size);
set_pte(area->vm_ptes[1], __pte(0));
__flush_tlb_one((unsigned long)area->vm_addr);
__flush_tlb_one((unsigned long)area->vm_addr + PAGE_SIZE);
}
put_cpu_var(zs_map_area); put_cpu_var(zs_map_area);
} }
EXPORT_SYMBOL_GPL(zs_unmap_object); EXPORT_SYMBOL_GPL(zs_unmap_object);

View File

@ -110,9 +110,8 @@ enum fullness_group {
static const int fullness_threshold_frac = 4; static const int fullness_threshold_frac = 4;
struct mapping_area { struct mapping_area {
struct vm_struct *vm; char *vm_buf; /* copy buffer for objects that span pages */
pte_t *vm_ptes[2]; char *vm_addr; /* address of kmap_atomic()'ed pages */
char *vm_addr;
}; };
struct size_class { struct size_class {