mirror of
https://github.com/torvalds/linux.git
synced 2024-12-27 05:11:48 +00:00
staging: ramster: move to new zcache2 codebase
[V2: rebased to apply to 20120905 staging-next, no other changes] The original zcache in staging is a "demo" version, and this is a massive rewrite. This was intended to result in a merged zcache and ramster, but that option has been blocked so, to continue forward progress on ramster and future related projects, only ramster moves to the new codebase. To differentiate between the old demo zcache and the rewrite, we refer to the latter as zcache2, config'd as CONFIG_ZCACHE2. Zcache and zcache2 cannot be built in the same kernel, so CONFIG_ZCACHE2 implies !CONFIG_ZCACHE. This developer still has hope that zcache and zcache2 will be merged into one codebase. Until then, zcache2 can be considered a one-node version of ramster. No history of changes was recorded during the zcache2 rewrite and recreating a sane one would be a Sisyphean task but, since ramster is still in staging and has been unchanged since it was merged, presumably this is acceptable. This commit also provides the hooks in zcache2 for ramster, but all ramster-specific code is provided in a separate commit. Some of the highlights of this rewritten codebase for zcache2: (Note: If you are not familiar with the tmem terminology, you can review it here: http://lwn.net/Articles/454795/ ) 1. Merge of "demo" zcache and the v1.1 version of zcache in ramster. Zcache and ramster had a great deal of duplicate code which is now merged. In essence, zcache2 *is* ramster but with no remote machine available, but !CONFIG_RAMSTER will avoid compiling lots of ramster-specific code. 2. Allocator. Previously, persistent pools used zsmalloc and ephemeral pools used zbud. Now a completely rewritten zbud is used for both. Notably this zbud maintains all persistent (frontswap) and ephemeral (cleancache) pageframes in separate queues in LRU order. 3. Interaction with page allocator. Zbud does no page allocation/freeing, it is done entirely in zcache2 where it can be tracked more effectively. 4. Better pre-allocation. Previously, on put, if a new pageframe could not be pre-allocated, the put would fail, even if the allocator had plenty of partial pages where the data could be stored; this is now fixed. 5. Ouroboros ("eating its own tail") allocation. If no pageframe can be allocated AND no partial pages are available, the least-recently-used ephemeral pageframe is reclaimed immediately (including flushing tmem pointers to it) and re-used. This ensures that most-recently-used cleancache pages are more likely to be retained than LRU pages and also that, as in the core mm subsystem, anonymous pages have a higher priority than clean page cache pages. 6. Zcache and zbud now use debugfs instead of sysfs. Ramster uses debugfs where possible and sysfs where necessary. (Some ramster configuration is done from userspace so some sysfs is necessary.) 7. Modularization. As some have observed, the monolithic zcache-main.c code included zbud code, which has now been separated into its own code module. Much ramster-specific code in the old ramster zcache-main.c has also been moved into ramster.c so that it does not get compiled with !CONFIG_RAMSTER. 8. Rebased to 3.5. This new codebase also provides hooks for several future new features: A. WasActive patch, requires some mm/frontswap changes previously posted. A new version of this patch will be provided separately. See ifdef __PG_WAS_ACTIVE B. Exclusive gets. It seems tmem _can_ support exclusive gets with a minor change to both zcache2 and a small backwards-compatible change to frontswap.c. Explanation and frontswap patch will be provided separately. See ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS C. Ouroboros writeback. Since persistent (frontswap) pages may now also be reclaimed in LRU order, the foundation is in place to properly writeback these pages back into the swap cache and then the swap disk. This is still under development and requires some other mm changes which are prototyped. See ifdef FRONTSWAP_HAS_UNUSE. A new feature that desperately needs attention (if someone is looking for a way to contribute) is kernel module support. A preliminary version of a patch was posted by Erlangen University and needs to be integrated and tested for zcache2 and brought up to kernel standards. If anybody is interested on helping out with any of these, let me know! Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
c857ce1659
commit
faca2ef77a
@ -134,4 +134,6 @@ source "drivers/staging/csr/Kconfig"
|
||||
|
||||
source "drivers/staging/omap-thermal/Kconfig"
|
||||
|
||||
source "drivers/staging/ramster/Kconfig"
|
||||
|
||||
endif # STAGING
|
||||
|
@ -59,3 +59,4 @@ obj-$(CONFIG_USB_G_CCG) += ccg/
|
||||
obj-$(CONFIG_WIMAX_GDM72XX) += gdm72xx/
|
||||
obj-$(CONFIG_CSR_WIFI) += csr/
|
||||
obj-$(CONFIG_OMAP_BANDGAP) += omap-thermal/
|
||||
obj-$(CONFIG_ZCACHE2) += ramster/
|
||||
|
16
drivers/staging/ramster/Kconfig
Normal file
16
drivers/staging/ramster/Kconfig
Normal file
@ -0,0 +1,16 @@
|
||||
config ZCACHE2
|
||||
bool "Dynamic compression of swap pages and clean pagecache pages"
|
||||
depends on CRYPTO=y && SWAP=y && CLEANCACHE && FRONTSWAP && !ZCACHE
|
||||
select CRYPTO_LZO
|
||||
default n
|
||||
help
|
||||
Zcache2 doubles RAM efficiency while providing a significant
|
||||
performance boosts on many workloads. Zcache2 uses
|
||||
compression and an in-kernel implementation of transcendent
|
||||
memory to store clean page cache pages and swap in RAM,
|
||||
providing a noticeable reduction in disk I/O. Zcache2
|
||||
is a complete rewrite of the older zcache; it was intended to
|
||||
be a merge but that has been blocked due to political and
|
||||
technical disagreements. It is intended that they will merge
|
||||
again in the future. Until then, zcache2 is a single-node
|
||||
version of ramster.
|
3
drivers/staging/ramster/Makefile
Normal file
3
drivers/staging/ramster/Makefile
Normal file
@ -0,0 +1,3 @@
|
||||
zcache-y := zcache-main.o tmem.o zbud.o
|
||||
|
||||
obj-$(CONFIG_ZCACHE2) += zcache.o
|
59
drivers/staging/ramster/ramster.h
Normal file
59
drivers/staging/ramster/ramster.h
Normal file
@ -0,0 +1,59 @@
|
||||
|
||||
/*
|
||||
* zcache/ramster.h
|
||||
*
|
||||
* Placeholder to resolve ramster references when !CONFIG_RAMSTER
|
||||
* Real ramster.h lives in ramster subdirectory.
|
||||
*
|
||||
* Copyright (c) 2009-2012, Dan Magenheimer, Oracle Corp.
|
||||
*/
|
||||
|
||||
#ifndef _ZCACHE_RAMSTER_H_
|
||||
#define _ZCACHE_RAMSTER_H_
|
||||
|
||||
#ifdef CONFIG_RAMSTER
|
||||
#include "ramster/ramster.h"
|
||||
#else
|
||||
static inline void ramster_init(bool x, bool y, bool z)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ramster_register_pamops(struct tmem_pamops *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int ramster_remotify_pageframe(bool b)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void *ramster_pampd_free(void *v, struct tmem_pool *p,
|
||||
struct tmem_oid *o, uint32_t u, bool b)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int ramster_do_preload_flnode(struct tmem_pool *p)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline bool pampd_is_remote(void *v)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void ramster_count_foreign_pages(bool b, int i)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ramster_cpu_up(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void ramster_cpu_down(int cpu)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZCACHE_RAMSTER_H */
|
894
drivers/staging/ramster/tmem.c
Normal file
894
drivers/staging/ramster/tmem.c
Normal file
@ -0,0 +1,894 @@
|
||||
/*
|
||||
* In-kernel transcendent memory (generic implementation)
|
||||
*
|
||||
* Copyright (c) 2009-2012, Dan Magenheimer, Oracle Corp.
|
||||
*
|
||||
* The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
|
||||
* "handles" (triples containing a pool id, and object id, and an index), to
|
||||
* pages in a page-accessible memory (PAM). Tmem references the PAM pages via
|
||||
* an abstract "pampd" (PAM page-descriptor), which can be operated on by a
|
||||
* set of functions (pamops). Each pampd contains some representation of
|
||||
* PAGE_SIZE bytes worth of data. For those familiar with key-value stores,
|
||||
* the tmem handle is a three-level hierarchical key, and the value is always
|
||||
* reconstituted (but not necessarily stored) as PAGE_SIZE bytes and is
|
||||
* referenced in the datastore by the pampd. The hierarchy is required
|
||||
* to ensure that certain invalidation functions can be performed efficiently
|
||||
* (i.e. flush all indexes associated with this object_id, or
|
||||
* flush all objects associated with this pool).
|
||||
*
|
||||
* Tmem must support potentially millions of pages and must be able to insert,
|
||||
* find, and delete these pages at a potential frequency of thousands per
|
||||
* second concurrently across many CPUs, (and, if used with KVM, across many
|
||||
* vcpus across many guests). Tmem is tracked with a hierarchy of data
|
||||
* structures, organized by the elements in the handle-tuple: pool_id,
|
||||
* object_id, and page index. One or more "clients" (e.g. guests) each
|
||||
* provide one or more tmem_pools. Each pool, contains a hash table of
|
||||
* rb_trees of tmem_objs. Each tmem_obj contains a radix-tree-like tree
|
||||
* of pointers, with intermediate nodes called tmem_objnodes. Each leaf
|
||||
* pointer in this tree points to a pampd, which is accessible only through
|
||||
* a small set of callbacks registered by the PAM implementation (see
|
||||
* tmem_register_pamops). Tmem only needs to memory allocation for objs
|
||||
* and objnodes and this is done via a set of callbacks that must be
|
||||
* registered by the tmem host implementation (e.g. see tmem_register_hostops).
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/atomic.h>
|
||||
#ifdef CONFIG_RAMSTER
|
||||
#include <linux/delay.h>
|
||||
#endif
|
||||
|
||||
#include "tmem.h"
|
||||
|
||||
/* data structure sentinels used for debugging... see tmem.h */
|
||||
#define POOL_SENTINEL 0x87658765
|
||||
#define OBJ_SENTINEL 0x12345678
|
||||
#define OBJNODE_SENTINEL 0xfedcba09
|
||||
|
||||
/*
|
||||
* A tmem host implementation must use this function to register callbacks
|
||||
* for memory allocation.
|
||||
*/
|
||||
static struct tmem_hostops tmem_hostops;
|
||||
|
||||
static void tmem_objnode_tree_init(void);
|
||||
|
||||
void tmem_register_hostops(struct tmem_hostops *m)
|
||||
{
|
||||
tmem_objnode_tree_init();
|
||||
tmem_hostops = *m;
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem host implementation must use this function to register
|
||||
* callbacks for a page-accessible memory (PAM) implementation.
|
||||
*/
|
||||
static struct tmem_pamops tmem_pamops;
|
||||
|
||||
void tmem_register_pamops(struct tmem_pamops *m)
|
||||
{
|
||||
tmem_pamops = *m;
|
||||
}
|
||||
|
||||
/*
|
||||
* Oid's are potentially very sparse and tmem_objs may have an indeterminately
|
||||
* short life, being added and deleted at a relatively high frequency.
|
||||
* So an rb_tree is an ideal data structure to manage tmem_objs. But because
|
||||
* of the potentially huge number of tmem_objs, each pool manages a hashtable
|
||||
* of rb_trees to reduce search, insert, delete, and rebalancing time.
|
||||
* Each hashbucket also has a lock to manage concurrent access and no
|
||||
* searches, inserts, or deletions can be performed unless the lock is held.
|
||||
* As a result, care must be taken to ensure tmem routines are not called
|
||||
* recursively; the vast majority of the time, a recursive call may work
|
||||
* but a deadlock will occur a small fraction of the time due to the
|
||||
* hashbucket lock.
|
||||
*
|
||||
* The following routines manage tmem_objs. In all of these routines,
|
||||
* the hashbucket lock is already held.
|
||||
*/
|
||||
|
||||
/* Search for object==oid in pool, returns object if found. */
|
||||
static struct tmem_obj *__tmem_obj_find(struct tmem_hashbucket *hb,
|
||||
struct tmem_oid *oidp,
|
||||
struct rb_node **parent,
|
||||
struct rb_node ***link)
|
||||
{
|
||||
struct rb_node *_parent = NULL, **rbnode;
|
||||
struct tmem_obj *obj = NULL;
|
||||
|
||||
rbnode = &hb->obj_rb_root.rb_node;
|
||||
while (*rbnode) {
|
||||
BUG_ON(RB_EMPTY_NODE(*rbnode));
|
||||
_parent = *rbnode;
|
||||
obj = rb_entry(*rbnode, struct tmem_obj,
|
||||
rb_tree_node);
|
||||
switch (tmem_oid_compare(oidp, &obj->oid)) {
|
||||
case 0: /* equal */
|
||||
goto out;
|
||||
case -1:
|
||||
rbnode = &(*rbnode)->rb_left;
|
||||
break;
|
||||
case 1:
|
||||
rbnode = &(*rbnode)->rb_right;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (parent)
|
||||
*parent = _parent;
|
||||
if (link)
|
||||
*link = rbnode;
|
||||
obj = NULL;
|
||||
out:
|
||||
return obj;
|
||||
}
|
||||
|
||||
static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
|
||||
struct tmem_oid *oidp)
|
||||
{
|
||||
return __tmem_obj_find(hb, oidp, NULL, NULL);
|
||||
}
|
||||
|
||||
static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *, bool);
|
||||
|
||||
/* Free an object that has no more pampds in it. */
|
||||
static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
|
||||
{
|
||||
struct tmem_pool *pool;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pampd_count > 0);
|
||||
pool = obj->pool;
|
||||
BUG_ON(pool == NULL);
|
||||
if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
|
||||
tmem_pampd_destroy_all_in_obj(obj, false);
|
||||
BUG_ON(obj->objnode_tree_root != NULL);
|
||||
BUG_ON((long)obj->objnode_count != 0);
|
||||
atomic_dec(&pool->obj_count);
|
||||
BUG_ON(atomic_read(&pool->obj_count) < 0);
|
||||
INVERT_SENTINEL(obj, OBJ);
|
||||
obj->pool = NULL;
|
||||
tmem_oid_set_invalid(&obj->oid);
|
||||
rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize, and insert an tmem_object_root (called only if find failed).
|
||||
*/
|
||||
static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
|
||||
struct tmem_pool *pool,
|
||||
struct tmem_oid *oidp)
|
||||
{
|
||||
struct rb_root *root = &hb->obj_rb_root;
|
||||
struct rb_node **new = NULL, *parent = NULL;
|
||||
|
||||
BUG_ON(pool == NULL);
|
||||
atomic_inc(&pool->obj_count);
|
||||
obj->objnode_tree_height = 0;
|
||||
obj->objnode_tree_root = NULL;
|
||||
obj->pool = pool;
|
||||
obj->oid = *oidp;
|
||||
obj->objnode_count = 0;
|
||||
obj->pampd_count = 0;
|
||||
#ifdef CONFIG_RAMSTER
|
||||
if (tmem_pamops.new_obj != NULL)
|
||||
(*tmem_pamops.new_obj)(obj);
|
||||
#endif
|
||||
SET_SENTINEL(obj, OBJ);
|
||||
|
||||
if (__tmem_obj_find(hb, oidp, &parent, &new))
|
||||
BUG();
|
||||
|
||||
rb_link_node(&obj->rb_tree_node, parent, new);
|
||||
rb_insert_color(&obj->rb_tree_node, root);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tmem is managed as a set of tmem_pools with certain attributes, such as
|
||||
* "ephemeral" vs "persistent". These attributes apply to all tmem_objs
|
||||
* and all pampds that belong to a tmem_pool. A tmem_pool is created
|
||||
* or deleted relatively rarely (for example, when a filesystem is
|
||||
* mounted or unmounted).
|
||||
*/
|
||||
|
||||
/* flush all data from a pool and, optionally, free it */
|
||||
static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
|
||||
{
|
||||
struct rb_node *rbnode;
|
||||
struct tmem_obj *obj;
|
||||
struct tmem_hashbucket *hb = &pool->hashbucket[0];
|
||||
int i;
|
||||
|
||||
BUG_ON(pool == NULL);
|
||||
for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
|
||||
spin_lock(&hb->lock);
|
||||
rbnode = rb_first(&hb->obj_rb_root);
|
||||
while (rbnode != NULL) {
|
||||
obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
|
||||
rbnode = rb_next(rbnode);
|
||||
tmem_pampd_destroy_all_in_obj(obj, true);
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
}
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
if (destroy)
|
||||
list_del(&pool->pool_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem_obj contains a radix-tree-like tree in which the intermediate
|
||||
* nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation
|
||||
* is very specialized and tuned for specific uses and is not particularly
|
||||
* suited for use from this code, though some code from the core algorithms has
|
||||
* been reused, thus the copyright notices below). Each tmem_objnode contains
|
||||
* a set of pointers which point to either a set of intermediate tmem_objnodes
|
||||
* or a set of of pampds.
|
||||
*
|
||||
* Portions Copyright (C) 2001 Momchil Velikov
|
||||
* Portions Copyright (C) 2001 Christoph Hellwig
|
||||
* Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
|
||||
*/
|
||||
|
||||
struct tmem_objnode_tree_path {
|
||||
struct tmem_objnode *objnode;
|
||||
int offset;
|
||||
};
|
||||
|
||||
/* objnode height_to_maxindex translation */
|
||||
static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
|
||||
|
||||
static void tmem_objnode_tree_init(void)
|
||||
{
|
||||
unsigned int ht, tmp;
|
||||
|
||||
for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
|
||||
tmp = ht * OBJNODE_TREE_MAP_SHIFT;
|
||||
if (tmp >= OBJNODE_TREE_INDEX_BITS)
|
||||
tmem_objnode_tree_h2max[ht] = ~0UL;
|
||||
else
|
||||
tmem_objnode_tree_h2max[ht] =
|
||||
(~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
|
||||
{
|
||||
struct tmem_objnode *objnode;
|
||||
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
|
||||
if (unlikely(objnode == NULL))
|
||||
goto out;
|
||||
objnode->obj = obj;
|
||||
SET_SENTINEL(objnode, OBJNODE);
|
||||
memset(&objnode->slots, 0, sizeof(objnode->slots));
|
||||
objnode->slots_in_use = 0;
|
||||
obj->objnode_count++;
|
||||
out:
|
||||
return objnode;
|
||||
}
|
||||
|
||||
static void tmem_objnode_free(struct tmem_objnode *objnode)
|
||||
{
|
||||
struct tmem_pool *pool;
|
||||
int i;
|
||||
|
||||
BUG_ON(objnode == NULL);
|
||||
for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
|
||||
BUG_ON(objnode->slots[i] != NULL);
|
||||
ASSERT_SENTINEL(objnode, OBJNODE);
|
||||
INVERT_SENTINEL(objnode, OBJNODE);
|
||||
BUG_ON(objnode->obj == NULL);
|
||||
ASSERT_SENTINEL(objnode->obj, OBJ);
|
||||
pool = objnode->obj->pool;
|
||||
BUG_ON(pool == NULL);
|
||||
ASSERT_SENTINEL(pool, POOL);
|
||||
objnode->obj->objnode_count--;
|
||||
objnode->obj = NULL;
|
||||
(*tmem_hostops.objnode_free)(objnode, pool);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup index in object and return associated pampd (or NULL if not found).
|
||||
*/
|
||||
static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
unsigned int height, shift;
|
||||
struct tmem_objnode **slot = NULL;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
|
||||
height = obj->objnode_tree_height;
|
||||
if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
|
||||
goto out;
|
||||
if (height == 0 && obj->objnode_tree_root) {
|
||||
slot = &obj->objnode_tree_root;
|
||||
goto out;
|
||||
}
|
||||
shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
slot = &obj->objnode_tree_root;
|
||||
while (height > 0) {
|
||||
if (*slot == NULL)
|
||||
goto out;
|
||||
slot = (struct tmem_objnode **)
|
||||
((*slot)->slots +
|
||||
((index >> shift) & OBJNODE_TREE_MAP_MASK));
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
}
|
||||
out:
|
||||
return slot != NULL ? (void **)slot : NULL;
|
||||
}
|
||||
|
||||
static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
struct tmem_objnode **slot;
|
||||
|
||||
slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
|
||||
return slot != NULL ? *slot : NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RAMSTER
|
||||
static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
|
||||
void *new_pampd, bool no_free)
|
||||
{
|
||||
struct tmem_objnode **slot;
|
||||
void *ret = NULL;
|
||||
|
||||
slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
|
||||
if ((slot != NULL) && (*slot != NULL)) {
|
||||
void *old_pampd = *(void **)slot;
|
||||
*(void **)slot = new_pampd;
|
||||
if (!no_free)
|
||||
(*tmem_pamops.free)(old_pampd, obj->pool,
|
||||
NULL, 0, false);
|
||||
ret = new_pampd;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
|
||||
void *pampd)
|
||||
{
|
||||
int ret = 0;
|
||||
struct tmem_objnode *objnode = NULL, *newnode, *slot;
|
||||
unsigned int height, shift;
|
||||
int offset = 0;
|
||||
|
||||
/* if necessary, extend the tree to be higher */
|
||||
if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
|
||||
height = obj->objnode_tree_height + 1;
|
||||
if (index > tmem_objnode_tree_h2max[height])
|
||||
while (index > tmem_objnode_tree_h2max[height])
|
||||
height++;
|
||||
if (obj->objnode_tree_root == NULL) {
|
||||
obj->objnode_tree_height = height;
|
||||
goto insert;
|
||||
}
|
||||
do {
|
||||
newnode = tmem_objnode_alloc(obj);
|
||||
if (!newnode) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
newnode->slots[0] = obj->objnode_tree_root;
|
||||
newnode->slots_in_use = 1;
|
||||
obj->objnode_tree_root = newnode;
|
||||
obj->objnode_tree_height++;
|
||||
} while (height > obj->objnode_tree_height);
|
||||
}
|
||||
insert:
|
||||
slot = obj->objnode_tree_root;
|
||||
height = obj->objnode_tree_height;
|
||||
shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
while (height > 0) {
|
||||
if (slot == NULL) {
|
||||
/* add a child objnode. */
|
||||
slot = tmem_objnode_alloc(obj);
|
||||
if (!slot) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (objnode) {
|
||||
|
||||
objnode->slots[offset] = slot;
|
||||
objnode->slots_in_use++;
|
||||
} else
|
||||
obj->objnode_tree_root = slot;
|
||||
}
|
||||
/* go down a level */
|
||||
offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
|
||||
objnode = slot;
|
||||
slot = objnode->slots[offset];
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
}
|
||||
BUG_ON(slot != NULL);
|
||||
if (objnode) {
|
||||
objnode->slots_in_use++;
|
||||
objnode->slots[offset] = pampd;
|
||||
} else
|
||||
obj->objnode_tree_root = pampd;
|
||||
obj->pampd_count++;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
|
||||
struct tmem_objnode_tree_path *pathp = path;
|
||||
struct tmem_objnode *slot = NULL;
|
||||
unsigned int height, shift;
|
||||
int offset;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
height = obj->objnode_tree_height;
|
||||
if (index > tmem_objnode_tree_h2max[height])
|
||||
goto out;
|
||||
slot = obj->objnode_tree_root;
|
||||
if (height == 0 && obj->objnode_tree_root) {
|
||||
obj->objnode_tree_root = NULL;
|
||||
goto out;
|
||||
}
|
||||
shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
pathp->objnode = NULL;
|
||||
do {
|
||||
if (slot == NULL)
|
||||
goto out;
|
||||
pathp++;
|
||||
offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
|
||||
pathp->offset = offset;
|
||||
pathp->objnode = slot;
|
||||
slot = slot->slots[offset];
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
} while (height > 0);
|
||||
if (slot == NULL)
|
||||
goto out;
|
||||
while (pathp->objnode) {
|
||||
pathp->objnode->slots[pathp->offset] = NULL;
|
||||
pathp->objnode->slots_in_use--;
|
||||
if (pathp->objnode->slots_in_use) {
|
||||
if (pathp->objnode == obj->objnode_tree_root) {
|
||||
while (obj->objnode_tree_height > 0 &&
|
||||
obj->objnode_tree_root->slots_in_use == 1 &&
|
||||
obj->objnode_tree_root->slots[0]) {
|
||||
struct tmem_objnode *to_free =
|
||||
obj->objnode_tree_root;
|
||||
|
||||
obj->objnode_tree_root =
|
||||
to_free->slots[0];
|
||||
obj->objnode_tree_height--;
|
||||
to_free->slots[0] = NULL;
|
||||
to_free->slots_in_use = 0;
|
||||
tmem_objnode_free(to_free);
|
||||
}
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
|
||||
pathp--;
|
||||
}
|
||||
obj->objnode_tree_height = 0;
|
||||
obj->objnode_tree_root = NULL;
|
||||
|
||||
out:
|
||||
if (slot != NULL)
|
||||
obj->pampd_count--;
|
||||
BUG_ON(obj->pampd_count < 0);
|
||||
return slot;
|
||||
}
|
||||
|
||||
/* Recursively walk the objnode_tree destroying pampds and objnodes. */
|
||||
static void tmem_objnode_node_destroy(struct tmem_obj *obj,
|
||||
struct tmem_objnode *objnode,
|
||||
unsigned int ht)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (ht == 0)
|
||||
return;
|
||||
for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
|
||||
if (objnode->slots[i]) {
|
||||
if (ht == 1) {
|
||||
obj->pampd_count--;
|
||||
(*tmem_pamops.free)(objnode->slots[i],
|
||||
obj->pool, NULL, 0, true);
|
||||
objnode->slots[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
|
||||
tmem_objnode_free(objnode->slots[i]);
|
||||
objnode->slots[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj,
|
||||
bool pool_destroy)
|
||||
{
|
||||
if (obj->objnode_tree_root == NULL)
|
||||
return;
|
||||
if (obj->objnode_tree_height == 0) {
|
||||
obj->pampd_count--;
|
||||
(*tmem_pamops.free)(obj->objnode_tree_root,
|
||||
obj->pool, NULL, 0, true);
|
||||
} else {
|
||||
tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
|
||||
obj->objnode_tree_height);
|
||||
tmem_objnode_free(obj->objnode_tree_root);
|
||||
obj->objnode_tree_height = 0;
|
||||
}
|
||||
obj->objnode_tree_root = NULL;
|
||||
#ifdef CONFIG_RAMSTER
|
||||
if (tmem_pamops.free_obj != NULL)
|
||||
(*tmem_pamops.free_obj)(obj->pool, obj, pool_destroy);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Tmem is operated on by a set of well-defined actions:
|
||||
* "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
|
||||
* (The tmem ABI allows for subpages and exchanges but these operations
|
||||
* are not included in this implementation.)
|
||||
*
|
||||
* These "tmem core" operations are implemented in the following functions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* "Put" a page, e.g. associate the passed pampd with the passed handle.
|
||||
* Tmem_put is complicated by a corner case: What if a page with matching
|
||||
* handle already exists in tmem? To guarantee coherency, one of two
|
||||
* actions is necessary: Either the data for the page must be overwritten,
|
||||
* or the page must be "flushed" so that the data is not accessible to a
|
||||
* subsequent "get". Since these "duplicate puts" are relatively rare,
|
||||
* this implementation always flushes for simplicity.
|
||||
*/
|
||||
int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
|
||||
bool raw, void *pampd_to_use)
|
||||
{
|
||||
struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
|
||||
void *pampd = NULL, *pampd_del = NULL;
|
||||
int ret = -ENOMEM;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = objfound = tmem_obj_find(hb, oidp);
|
||||
if (obj != NULL) {
|
||||
pampd = tmem_pampd_lookup_in_obj(objfound, index);
|
||||
if (pampd != NULL) {
|
||||
/* if found, is a dup put, flush the old one */
|
||||
pampd_del = tmem_pampd_delete_from_obj(obj, index);
|
||||
BUG_ON(pampd_del != pampd);
|
||||
(*tmem_pamops.free)(pampd, pool, oidp, index, true);
|
||||
if (obj->pampd_count == 0) {
|
||||
objnew = obj;
|
||||
objfound = NULL;
|
||||
}
|
||||
pampd = NULL;
|
||||
}
|
||||
} else {
|
||||
obj = objnew = (*tmem_hostops.obj_alloc)(pool);
|
||||
if (unlikely(obj == NULL)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
tmem_obj_init(obj, hb, pool, oidp);
|
||||
}
|
||||
BUG_ON(obj == NULL);
|
||||
BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
|
||||
pampd = pampd_to_use;
|
||||
BUG_ON(pampd_to_use == NULL);
|
||||
ret = tmem_pampd_add_to_obj(obj, index, pampd);
|
||||
if (unlikely(ret == -ENOMEM))
|
||||
/* may have partially built objnode tree ("stump") */
|
||||
goto delete_and_free;
|
||||
(*tmem_pamops.create_finish)(pampd, is_ephemeral(pool));
|
||||
goto out;
|
||||
|
||||
delete_and_free:
|
||||
(void)tmem_pampd_delete_from_obj(obj, index);
|
||||
if (pampd)
|
||||
(*tmem_pamops.free)(pampd, pool, NULL, 0, true);
|
||||
if (objnew) {
|
||||
tmem_obj_free(objnew, hb);
|
||||
(*tmem_hostops.obj_free)(objnew, pool);
|
||||
}
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RAMSTER
|
||||
/*
|
||||
* For ramster only: The following routines provide a two-step sequence
|
||||
* to allow the caller to replace a pampd in the tmem data structures with
|
||||
* another pampd. Here, we lookup the passed handle and, if found, return the
|
||||
* associated pampd and object, leaving the hashbucket locked and returning
|
||||
* a reference to it. The caller is expected to immediately call the
|
||||
* matching tmem_localify_finish routine which will handles the replacement
|
||||
* and unlocks the hashbucket.
|
||||
*/
|
||||
void *tmem_localify_get_pampd(struct tmem_pool *pool, struct tmem_oid *oidp,
|
||||
uint32_t index, struct tmem_obj **ret_obj,
|
||||
void **saved_hb)
|
||||
{
|
||||
struct tmem_hashbucket *hb;
|
||||
struct tmem_obj *obj = NULL;
|
||||
void *pampd = NULL;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (likely(obj != NULL))
|
||||
pampd = tmem_pampd_lookup_in_obj(obj, index);
|
||||
*ret_obj = obj;
|
||||
*saved_hb = (void *)hb;
|
||||
/* note, hashbucket remains locked */
|
||||
return pampd;
|
||||
}
|
||||
|
||||
void tmem_localify_finish(struct tmem_obj *obj, uint32_t index,
|
||||
void *pampd, void *saved_hb, bool delete)
|
||||
{
|
||||
struct tmem_hashbucket *hb = (struct tmem_hashbucket *)saved_hb;
|
||||
|
||||
BUG_ON(!spin_is_locked(&hb->lock));
|
||||
if (pampd != NULL) {
|
||||
BUG_ON(obj == NULL);
|
||||
(void)tmem_pampd_replace_in_obj(obj, index, pampd, 1);
|
||||
(*tmem_pamops.create_finish)(pampd, is_ephemeral(obj->pool));
|
||||
} else if (delete) {
|
||||
BUG_ON(obj == NULL);
|
||||
(void)tmem_pampd_delete_from_obj(obj, index);
|
||||
}
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* For ramster only. Helper function to support asynchronous tmem_get.
|
||||
*/
|
||||
static int tmem_repatriate(void **ppampd, struct tmem_hashbucket *hb,
|
||||
struct tmem_pool *pool, struct tmem_oid *oidp,
|
||||
uint32_t index, bool free, char *data)
|
||||
{
|
||||
void *old_pampd = *ppampd, *new_pampd = NULL;
|
||||
bool intransit = false;
|
||||
int ret = 0;
|
||||
|
||||
if (!is_ephemeral(pool))
|
||||
new_pampd = (*tmem_pamops.repatriate_preload)(
|
||||
old_pampd, pool, oidp, index, &intransit);
|
||||
if (intransit)
|
||||
ret = -EAGAIN;
|
||||
else if (new_pampd != NULL)
|
||||
*ppampd = new_pampd;
|
||||
/* must release the hb->lock else repatriate can't sleep */
|
||||
spin_unlock(&hb->lock);
|
||||
if (!intransit)
|
||||
ret = (*tmem_pamops.repatriate)(old_pampd, new_pampd, pool,
|
||||
oidp, index, free, data);
|
||||
if (ret == -EAGAIN) {
|
||||
/* rare I think, but should cond_resched()??? */
|
||||
usleep_range(10, 1000);
|
||||
} else if (ret == -ENOTCONN || ret == -EHOSTDOWN) {
|
||||
ret = -1;
|
||||
} else if (ret != 0 && ret != -ENOENT) {
|
||||
ret = -1;
|
||||
}
|
||||
/* note hb->lock has now been unlocked */
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* For ramster only. If a page in tmem matches the handle, replace the
|
||||
* page so that any subsequent "get" gets the new page. Returns 0 if
|
||||
* there was a page to replace, else returns -1.
|
||||
*/
|
||||
int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
|
||||
uint32_t index, void *new_pampd)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd, 0);
|
||||
/* if we bug here, pamops wasn't properly set up for ramster */
|
||||
BUG_ON(tmem_pamops.replace_in_obj == NULL);
|
||||
ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* "Get" a page, e.g. if a pampd can be found matching the passed handle,
|
||||
* use a pamops callback to recreated the page from the pampd with the
|
||||
* matching handle. By tmem definition, when a "get" is successful on
|
||||
* an ephemeral page, the page is "flushed", and when a "get" is successful
|
||||
* on a persistent page, the page is retained in tmem. Note that to preserve
|
||||
* coherency, "get" can never be skipped if tmem contains the data.
|
||||
* That is, if a get is done with a certain handle and fails, any
|
||||
* subsequent "get" must also fail (unless of course there is a
|
||||
* "put" done with the same handle).
|
||||
*/
|
||||
int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
|
||||
char *data, size_t *sizep, bool raw, int get_and_free)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
void *pampd = NULL;
|
||||
bool ephemeral = is_ephemeral(pool);
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
|
||||
bool lock_held = false;
|
||||
void **ppampd;
|
||||
|
||||
do {
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
lock_held = true;
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
ppampd = __tmem_pampd_lookup_in_obj(obj, index);
|
||||
if (ppampd == NULL)
|
||||
goto out;
|
||||
#ifdef CONFIG_RAMSTER
|
||||
if ((tmem_pamops.is_remote != NULL) &&
|
||||
tmem_pamops.is_remote(*ppampd)) {
|
||||
ret = tmem_repatriate(ppampd, hb, pool, oidp,
|
||||
index, free, data);
|
||||
/* tmem_repatriate releases hb->lock */
|
||||
lock_held = false;
|
||||
*sizep = PAGE_SIZE;
|
||||
if (ret != -EAGAIN)
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
} while (ret == -EAGAIN);
|
||||
if (free)
|
||||
pampd = tmem_pampd_delete_from_obj(obj, index);
|
||||
else
|
||||
pampd = tmem_pampd_lookup_in_obj(obj, index);
|
||||
if (pampd == NULL)
|
||||
goto out;
|
||||
if (free) {
|
||||
if (obj->pampd_count == 0) {
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
obj = NULL;
|
||||
}
|
||||
}
|
||||
if (free)
|
||||
ret = (*tmem_pamops.get_data_and_free)(
|
||||
data, sizep, raw, pampd, pool, oidp, index);
|
||||
else
|
||||
ret = (*tmem_pamops.get_data)(
|
||||
data, sizep, raw, pampd, pool, oidp, index);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = 0;
|
||||
out:
|
||||
if (lock_held)
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page in tmem matches the handle, "flush" this page from tmem such
|
||||
* that any subsequent "get" does not succeed (unless, of course, there
|
||||
* was another "put" with the same handle).
|
||||
*/
|
||||
int tmem_flush_page(struct tmem_pool *pool,
|
||||
struct tmem_oid *oidp, uint32_t index)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
void *pampd;
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
pampd = tmem_pampd_delete_from_obj(obj, index);
|
||||
if (pampd == NULL)
|
||||
goto out;
|
||||
(*tmem_pamops.free)(pampd, pool, oidp, index, true);
|
||||
if (obj->pampd_count == 0) {
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Flush" all pages in tmem matching this oid.
|
||||
*/
|
||||
int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
struct tmem_hashbucket *hb;
|
||||
int ret = -1;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
tmem_pampd_destroy_all_in_obj(obj, false);
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Flush" all pages (and tmem_objs) from this tmem_pool and disable
|
||||
* all subsequent access to this tmem_pool.
|
||||
*/
|
||||
int tmem_destroy_pool(struct tmem_pool *pool)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
if (pool == NULL)
|
||||
goto out;
|
||||
tmem_pool_flush(pool, 1);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static LIST_HEAD(tmem_global_pool_list);
|
||||
|
||||
/*
|
||||
* Create a new tmem_pool with the provided flag and return
|
||||
* a pool id provided by the tmem host implementation.
|
||||
*/
|
||||
void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
|
||||
{
|
||||
int persistent = flags & TMEM_POOL_PERSIST;
|
||||
int shared = flags & TMEM_POOL_SHARED;
|
||||
struct tmem_hashbucket *hb = &pool->hashbucket[0];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
|
||||
hb->obj_rb_root = RB_ROOT;
|
||||
spin_lock_init(&hb->lock);
|
||||
}
|
||||
INIT_LIST_HEAD(&pool->pool_list);
|
||||
atomic_set(&pool->obj_count, 0);
|
||||
SET_SENTINEL(pool, POOL);
|
||||
list_add_tail(&pool->pool_list, &tmem_global_pool_list);
|
||||
pool->persistent = persistent;
|
||||
pool->shared = shared;
|
||||
}
|
259
drivers/staging/ramster/tmem.h
Normal file
259
drivers/staging/ramster/tmem.h
Normal file
@ -0,0 +1,259 @@
|
||||
/*
|
||||
* tmem.h
|
||||
*
|
||||
* Transcendent memory
|
||||
*
|
||||
* Copyright (c) 2009-2012, Dan Magenheimer, Oracle Corp.
|
||||
*/
|
||||
|
||||
#ifndef _TMEM_H_
|
||||
#define _TMEM_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
/*
|
||||
* These are defined by the Xen<->Linux ABI so should remain consistent
|
||||
*/
|
||||
#define TMEM_POOL_PERSIST 1
|
||||
#define TMEM_POOL_SHARED 2
|
||||
#define TMEM_POOL_PRECOMPRESSED 4
|
||||
#define TMEM_POOL_PAGESIZE_SHIFT 4
|
||||
#define TMEM_POOL_PAGESIZE_MASK 0xf
|
||||
#define TMEM_POOL_RESERVED_BITS 0x00ffff00
|
||||
|
||||
/*
|
||||
* sentinels have proven very useful for debugging but can be removed
|
||||
* or disabled before final merge.
|
||||
*/
|
||||
#undef SENTINELS
|
||||
#ifdef SENTINELS
|
||||
#define DECL_SENTINEL uint32_t sentinel;
|
||||
#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL)
|
||||
#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL)
|
||||
#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL)
|
||||
#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL)
|
||||
#else
|
||||
#define DECL_SENTINEL
|
||||
#define SET_SENTINEL(_x, _y) do { } while (0)
|
||||
#define INVERT_SENTINEL(_x, _y) do { } while (0)
|
||||
#define ASSERT_SENTINEL(_x, _y) do { } while (0)
|
||||
#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0)
|
||||
#endif
|
||||
|
||||
#define ASSERT_SPINLOCK(_l) lockdep_assert_held(_l)
|
||||
|
||||
/*
|
||||
* A pool is the highest-level data structure managed by tmem and
|
||||
* usually corresponds to a large independent set of pages such as
|
||||
* a filesystem. Each pool has an id, and certain attributes and counters.
|
||||
* It also contains a set of hash buckets, each of which contains an rbtree
|
||||
* of objects and a lock to manage concurrency within the pool.
|
||||
*/
|
||||
|
||||
#define TMEM_HASH_BUCKET_BITS 8
|
||||
#define TMEM_HASH_BUCKETS (1<<TMEM_HASH_BUCKET_BITS)
|
||||
|
||||
struct tmem_hashbucket {
|
||||
struct rb_root obj_rb_root;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
struct tmem_pool {
|
||||
void *client; /* "up" for some clients, avoids table lookup */
|
||||
struct list_head pool_list;
|
||||
uint32_t pool_id;
|
||||
bool persistent;
|
||||
bool shared;
|
||||
atomic_t obj_count;
|
||||
atomic_t refcount;
|
||||
struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS];
|
||||
DECL_SENTINEL
|
||||
};
|
||||
|
||||
#define is_persistent(_p) (_p->persistent)
|
||||
#define is_ephemeral(_p) (!(_p->persistent))
|
||||
|
||||
/*
|
||||
* An object id ("oid") is large: 192-bits (to ensure, for example, files
|
||||
* in a modern filesystem can be uniquely identified).
|
||||
*/
|
||||
|
||||
struct tmem_oid {
|
||||
uint64_t oid[3];
|
||||
};
|
||||
|
||||
static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)
|
||||
{
|
||||
oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
|
||||
}
|
||||
|
||||
static inline bool tmem_oid_valid(struct tmem_oid *oidp)
|
||||
{
|
||||
return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL ||
|
||||
oidp->oid[2] != -1UL;
|
||||
}
|
||||
|
||||
static inline int tmem_oid_compare(struct tmem_oid *left,
|
||||
struct tmem_oid *right)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (left->oid[2] == right->oid[2]) {
|
||||
if (left->oid[1] == right->oid[1]) {
|
||||
if (left->oid[0] == right->oid[0])
|
||||
ret = 0;
|
||||
else if (left->oid[0] < right->oid[0])
|
||||
ret = -1;
|
||||
else
|
||||
return 1;
|
||||
} else if (left->oid[1] < right->oid[1])
|
||||
ret = -1;
|
||||
else
|
||||
ret = 1;
|
||||
} else if (left->oid[2] < right->oid[2])
|
||||
ret = -1;
|
||||
else
|
||||
ret = 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned tmem_oid_hash(struct tmem_oid *oidp)
|
||||
{
|
||||
return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
|
||||
TMEM_HASH_BUCKET_BITS);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RAMSTER
|
||||
struct tmem_xhandle {
|
||||
uint8_t client_id;
|
||||
uint8_t xh_data_cksum;
|
||||
uint16_t xh_data_size;
|
||||
uint16_t pool_id;
|
||||
struct tmem_oid oid;
|
||||
uint32_t index;
|
||||
void *extra;
|
||||
};
|
||||
|
||||
static inline struct tmem_xhandle tmem_xhandle_fill(uint16_t client_id,
|
||||
struct tmem_pool *pool,
|
||||
struct tmem_oid *oidp,
|
||||
uint32_t index)
|
||||
{
|
||||
struct tmem_xhandle xh;
|
||||
xh.client_id = client_id;
|
||||
xh.xh_data_cksum = (uint8_t)-1;
|
||||
xh.xh_data_size = (uint16_t)-1;
|
||||
xh.pool_id = pool->pool_id;
|
||||
xh.oid = *oidp;
|
||||
xh.index = index;
|
||||
return xh;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* A tmem_obj contains an identifier (oid), pointers to the parent
|
||||
* pool and the rb_tree to which it belongs, counters, and an ordered
|
||||
* set of pampds, structured in a radix-tree-like tree. The intermediate
|
||||
* nodes of the tree are called tmem_objnodes.
|
||||
*/
|
||||
|
||||
struct tmem_objnode;
|
||||
|
||||
struct tmem_obj {
|
||||
struct tmem_oid oid;
|
||||
struct tmem_pool *pool;
|
||||
struct rb_node rb_tree_node;
|
||||
struct tmem_objnode *objnode_tree_root;
|
||||
unsigned int objnode_tree_height;
|
||||
unsigned long objnode_count;
|
||||
long pampd_count;
|
||||
#ifdef CONFIG_RAMSTER
|
||||
/*
|
||||
* for current design of ramster, all pages belonging to
|
||||
* an object reside on the same remotenode and extra is
|
||||
* used to record the number of the remotenode so a
|
||||
* flush-object operation can specify it
|
||||
*/
|
||||
void *extra; /* for private use by pampd implementation */
|
||||
#endif
|
||||
DECL_SENTINEL
|
||||
};
|
||||
|
||||
#define OBJNODE_TREE_MAP_SHIFT 6
|
||||
#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT)
|
||||
#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1)
|
||||
#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
|
||||
#define OBJNODE_TREE_MAX_PATH \
|
||||
(OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2)
|
||||
|
||||
struct tmem_objnode {
|
||||
struct tmem_obj *obj;
|
||||
DECL_SENTINEL
|
||||
void *slots[OBJNODE_TREE_MAP_SIZE];
|
||||
unsigned int slots_in_use;
|
||||
};
|
||||
|
||||
struct tmem_handle {
|
||||
struct tmem_oid oid; /* 24 bytes */
|
||||
uint32_t index;
|
||||
uint16_t pool_id;
|
||||
uint16_t client_id;
|
||||
};
|
||||
|
||||
|
||||
/* pampd abstract datatype methods provided by the PAM implementation */
|
||||
struct tmem_pamops {
|
||||
void (*create_finish)(void *, bool);
|
||||
int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
|
||||
struct tmem_oid *, uint32_t);
|
||||
int (*get_data_and_free)(char *, size_t *, bool, void *,
|
||||
struct tmem_pool *, struct tmem_oid *,
|
||||
uint32_t);
|
||||
void (*free)(void *, struct tmem_pool *,
|
||||
struct tmem_oid *, uint32_t, bool);
|
||||
#ifdef CONFIG_RAMSTER
|
||||
void (*new_obj)(struct tmem_obj *);
|
||||
void (*free_obj)(struct tmem_pool *, struct tmem_obj *, bool);
|
||||
void *(*repatriate_preload)(void *, struct tmem_pool *,
|
||||
struct tmem_oid *, uint32_t, bool *);
|
||||
int (*repatriate)(void *, void *, struct tmem_pool *,
|
||||
struct tmem_oid *, uint32_t, bool, void *);
|
||||
bool (*is_remote)(void *);
|
||||
int (*replace_in_obj)(void *, struct tmem_obj *);
|
||||
#endif
|
||||
};
|
||||
extern void tmem_register_pamops(struct tmem_pamops *m);
|
||||
|
||||
/* memory allocation methods provided by the host implementation */
|
||||
struct tmem_hostops {
|
||||
struct tmem_obj *(*obj_alloc)(struct tmem_pool *);
|
||||
void (*obj_free)(struct tmem_obj *, struct tmem_pool *);
|
||||
struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *);
|
||||
void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *);
|
||||
};
|
||||
extern void tmem_register_hostops(struct tmem_hostops *m);
|
||||
|
||||
/* core tmem accessor functions */
|
||||
extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
bool, void *);
|
||||
extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
char *, size_t *, bool, int);
|
||||
extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
|
||||
uint32_t index);
|
||||
extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
|
||||
extern int tmem_destroy_pool(struct tmem_pool *);
|
||||
extern void tmem_new_pool(struct tmem_pool *, uint32_t);
|
||||
#ifdef CONFIG_RAMSTER
|
||||
extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
void *);
|
||||
extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *,
|
||||
uint32_t index, struct tmem_obj **,
|
||||
void **);
|
||||
extern void tmem_localify_finish(struct tmem_obj *, uint32_t index,
|
||||
void *, void *, bool);
|
||||
#endif
|
||||
#endif /* _TMEM_H */
|
1060
drivers/staging/ramster/zbud.c
Normal file
1060
drivers/staging/ramster/zbud.c
Normal file
File diff suppressed because it is too large
Load Diff
33
drivers/staging/ramster/zbud.h
Normal file
33
drivers/staging/ramster/zbud.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* zbud.h
|
||||
*
|
||||
* Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _ZBUD_H_
|
||||
#define _ZBUD_H_
|
||||
|
||||
#include "tmem.h"
|
||||
|
||||
struct zbudref;
|
||||
|
||||
extern unsigned int zbud_max_buddy_size(void);
|
||||
extern struct zbudref *zbud_match_prep(struct tmem_handle *th, bool eph,
|
||||
void *cdata, unsigned size);
|
||||
extern struct zbudref *zbud_create_prep(struct tmem_handle *th, bool eph,
|
||||
void *cdata, unsigned size,
|
||||
struct page *newpage);
|
||||
extern void zbud_create_finish(struct zbudref *, bool);
|
||||
extern int zbud_decompress(struct page *, struct zbudref *, bool,
|
||||
void (*func)(char *, unsigned int, char *));
|
||||
extern int zbud_copy_from_zbud(char *, struct zbudref *, size_t *, bool);
|
||||
extern int zbud_copy_to_zbud(struct zbudref *, char *, bool);
|
||||
extern struct page *zbud_free_and_delist(struct zbudref *, bool eph,
|
||||
unsigned int *, unsigned int *);
|
||||
extern struct page *zbud_evict_pageframe_lru(unsigned int *, unsigned int *);
|
||||
extern unsigned int zbud_make_zombie_lru(struct tmem_handle *, unsigned char **,
|
||||
unsigned int *, bool);
|
||||
extern void zbud_init(void);
|
||||
|
||||
#endif /* _ZBUD_H_ */
|
1812
drivers/staging/ramster/zcache-main.c
Normal file
1812
drivers/staging/ramster/zcache-main.c
Normal file
File diff suppressed because it is too large
Load Diff
53
drivers/staging/ramster/zcache.h
Normal file
53
drivers/staging/ramster/zcache.h
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
/*
|
||||
* zcache.h
|
||||
*
|
||||
* Copyright (c) 2012, Dan Magenheimer, Oracle Corp.
|
||||
*/
|
||||
|
||||
#ifndef _ZCACHE_H_
|
||||
#define _ZCACHE_H_
|
||||
|
||||
struct zcache_preload {
|
||||
struct tmem_obj *obj;
|
||||
struct tmem_objnode *objnodes[OBJNODE_TREE_MAX_PATH];
|
||||
};
|
||||
|
||||
struct tmem_pool;
|
||||
|
||||
#define MAX_POOLS_PER_CLIENT 16
|
||||
|
||||
#define MAX_CLIENTS 16
|
||||
#define LOCAL_CLIENT ((uint16_t)-1)
|
||||
|
||||
struct zcache_client {
|
||||
struct tmem_pool *tmem_pools[MAX_POOLS_PER_CLIENT];
|
||||
bool allocated;
|
||||
atomic_t refcount;
|
||||
};
|
||||
|
||||
extern struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id,
|
||||
uint16_t poolid);
|
||||
extern void zcache_put_pool(struct tmem_pool *pool);
|
||||
|
||||
extern int zcache_put_page(int, int, struct tmem_oid *,
|
||||
uint32_t, void *,
|
||||
unsigned int, bool, int);
|
||||
extern int zcache_get_page(int, int, struct tmem_oid *, uint32_t,
|
||||
void *, size_t *, bool, int);
|
||||
extern int zcache_flush_page(int, int, struct tmem_oid *, uint32_t);
|
||||
extern int zcache_flush_object(int, int, struct tmem_oid *);
|
||||
extern void zcache_decompress_to_page(char *, unsigned int, struct page *);
|
||||
|
||||
#ifdef CONFIG_RAMSTER
|
||||
extern void *zcache_pampd_create(char *, unsigned int, bool, int,
|
||||
struct tmem_handle *);
|
||||
extern int zcache_autocreate_pool(int, int, bool);
|
||||
#endif
|
||||
|
||||
#define MAX_POOLS_PER_CLIENT 16
|
||||
|
||||
#define MAX_CLIENTS 16
|
||||
#define LOCAL_CLIENT ((uint16_t)-1)
|
||||
|
||||
#endif /* _ZCACHE_H_ */
|
Loading…
Reference in New Issue
Block a user