RDMA/restrack: Prepare restrack_root to addition of extra fields per-type

As a preparation to extension of rdma_restrack_root to provide software
IDs, which will be per-type too. We convert the rdma_restrack_root from
struct with arrays to array of structs.

Such conversion allows us to drop rwsem lock in favour of internal XArray
lock.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Leon Romanovsky 2019-02-18 22:25:48 +02:00 committed by Jason Gunthorpe
parent 41eda65c61
commit 7c77c6a9bf
3 changed files with 43 additions and 78 deletions

View File

@ -1018,6 +1018,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct rdma_restrack_entry *res; struct rdma_restrack_entry *res;
struct rdma_restrack_root *rt;
int err, ret = 0, idx = 0; int err, ret = 0, idx = 0;
struct nlattr *table_attr; struct nlattr *table_attr;
struct nlattr *entry_attr; struct nlattr *entry_attr;
@ -1028,7 +1029,6 @@ static int res_get_common_dumpit(struct sk_buff *skb,
unsigned long id; unsigned long id;
u32 index, port = 0; u32 index, port = 0;
bool filled = false; bool filled = false;
struct xarray *xa;
err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, NULL); nldev_policy, NULL);
@ -1076,14 +1076,14 @@ static int res_get_common_dumpit(struct sk_buff *skb,
has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
xa = &device->res->xa[res_type]; rt = &device->res[res_type];
down_read(&device->res->rwsem); xa_lock(&rt->xa);
/* /*
* FIXME: if the skip ahead is something common this loop should * FIXME: if the skip ahead is something common this loop should
* use xas_for_each & xas_pause to optimize, we can have a lot of * use xas_for_each & xas_pause to optimize, we can have a lot of
* objects. * objects.
*/ */
xa_for_each(xa, id, res) { xa_for_each(&rt->xa, id, res) {
if (idx < start) if (idx < start)
goto next; goto next;
@ -1091,45 +1091,37 @@ static int res_get_common_dumpit(struct sk_buff *skb,
goto next; goto next;
if (!rdma_restrack_get(res)) if (!rdma_restrack_get(res))
/*
* Resource is under release now, but we are not
* relesing lock now, so it will be released in
* our next pass, once we will get ->next pointer.
*/
goto next; goto next;
xa_unlock(&rt->xa);
filled = true; filled = true;
entry_attr = nla_nest_start(skb, fe->entry); entry_attr = nla_nest_start(skb, fe->entry);
if (!entry_attr) { if (!entry_attr) {
ret = -EMSGSIZE; ret = -EMSGSIZE;
rdma_restrack_put(res); rdma_restrack_put(res);
up_read(&device->res->rwsem); goto msg_full;
break;
} }
up_read(&device->res->rwsem);
ret = fe->fill_res_func(skb, has_cap_net_admin, res, port); ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
down_read(&device->res->rwsem);
/*
* Return resource back, but it won't be released till
* the &device->res.rwsem will be released for write.
*/
rdma_restrack_put(res); rdma_restrack_put(res);
if (ret) if (ret) {
nla_nest_cancel(skb, entry_attr); nla_nest_cancel(skb, entry_attr);
if (ret == -EMSGSIZE) if (ret == -EMSGSIZE)
break; goto msg_full;
if (ret == -EAGAIN) if (ret == -EAGAIN)
goto next; goto again;
if (ret)
goto res_err; goto res_err;
}
nla_nest_end(skb, entry_attr); nla_nest_end(skb, entry_attr);
again: xa_lock(&rt->xa);
next: idx++; next: idx++;
} }
up_read(&device->res->rwsem); xa_unlock(&rt->xa);
msg_full:
nla_nest_end(skb, table_attr); nla_nest_end(skb, table_attr);
nlmsg_end(skb, nlh); nlmsg_end(skb, nlh);
cb->args[0] = idx; cb->args[0] = idx;
@ -1146,7 +1138,6 @@ next: idx++;
res_err: res_err:
nla_nest_cancel(skb, table_attr); nla_nest_cancel(skb, table_attr);
up_read(&device->res->rwsem);
err: err:
nlmsg_cancel(skb, nlh); nlmsg_cancel(skb, nlh);

View File

@ -9,7 +9,6 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/pid_namespace.h> #include <linux/pid_namespace.h>
#include <linux/rwsem.h>
#include "cma_priv.h" #include "cma_priv.h"
#include "restrack.h" #include "restrack.h"
@ -47,15 +46,14 @@ int rdma_restrack_init(struct ib_device *dev)
struct rdma_restrack_root *rt; struct rdma_restrack_root *rt;
int i; int i;
dev->res = kzalloc(sizeof(*rt), GFP_KERNEL); dev->res = kcalloc(RDMA_RESTRACK_MAX, sizeof(*rt), GFP_KERNEL);
if (!dev->res) if (!dev->res)
return -ENOMEM; return -ENOMEM;
rt = dev->res; rt = dev->res;
for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) for (i = 0; i < RDMA_RESTRACK_MAX; i++)
xa_init_flags(&rt->xa[i], XA_FLAGS_ALLOC); xa_init_flags(&rt[i].xa, XA_FLAGS_ALLOC);
init_rwsem(&rt->rwsem);
return 0; return 0;
} }
@ -88,7 +86,7 @@ void rdma_restrack_clean(struct ib_device *dev)
int i; int i;
for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) { for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
struct xarray *xa = &dev->res->xa[i]; struct xarray *xa = &dev->res[i].xa;
if (!xa_empty(xa)) { if (!xa_empty(xa)) {
unsigned long index; unsigned long index;
@ -134,19 +132,19 @@ void rdma_restrack_clean(struct ib_device *dev)
int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type, int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
struct pid_namespace *ns) struct pid_namespace *ns)
{ {
struct xarray *xa = &dev->res->xa[type]; struct rdma_restrack_root *rt = &dev->res[type];
struct rdma_restrack_entry *e; struct rdma_restrack_entry *e;
unsigned long index = 0; XA_STATE(xas, &rt->xa, 0);
u32 cnt = 0; u32 cnt = 0;
down_read(&dev->res->rwsem); xa_lock(&rt->xa);
xa_for_each(xa, index, e) { xas_for_each(&xas, e, U32_MAX) {
if (ns == &init_pid_ns || if (ns == &init_pid_ns ||
(!rdma_is_kernel_res(e) && (!rdma_is_kernel_res(e) &&
ns == task_active_pid_ns(e->task))) ns == task_active_pid_ns(e->task)))
cnt++; cnt++;
} }
up_read(&dev->res->rwsem); xa_unlock(&rt->xa);
return cnt; return cnt;
} }
EXPORT_SYMBOL(rdma_restrack_count); EXPORT_SYMBOL(rdma_restrack_count);
@ -218,18 +216,16 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
{ {
struct ib_device *dev = res_to_dev(res); struct ib_device *dev = res_to_dev(res);
struct rdma_restrack_root *rt; struct rdma_restrack_root *rt;
struct xarray *xa;
int ret; int ret;
if (!dev) if (!dev)
return; return;
rt = dev->res; rt = &dev->res[res->type];
xa = &dev->res->xa[res->type];
kref_init(&res->kref); kref_init(&res->kref);
init_completion(&res->comp); init_completion(&res->comp);
ret = rt_xa_alloc_cyclic(xa, &res->id, res, &rt->next_id[res->type]); ret = rt_xa_alloc_cyclic(&rt->xa, &res->id, res, &rt->next_id);
if (!ret) if (!ret)
res->valid = true; res->valid = true;
} }
@ -283,14 +279,14 @@ struct rdma_restrack_entry *
rdma_restrack_get_byid(struct ib_device *dev, rdma_restrack_get_byid(struct ib_device *dev,
enum rdma_restrack_type type, u32 id) enum rdma_restrack_type type, u32 id)
{ {
struct xarray *xa = &dev->res->xa[type]; struct rdma_restrack_root *rt = &dev->res[type];
struct rdma_restrack_entry *res; struct rdma_restrack_entry *res;
down_read(&dev->res->rwsem); xa_lock(&rt->xa);
res = xa_load(xa, id); res = xa_load(&rt->xa, id);
if (!res || !rdma_restrack_get(res)) if (!res || !rdma_restrack_get(res))
res = ERR_PTR(-ENOENT); res = ERR_PTR(-ENOENT);
up_read(&dev->res->rwsem); xa_unlock(&rt->xa);
return res; return res;
} }
@ -312,33 +308,22 @@ EXPORT_SYMBOL(rdma_restrack_put);
void rdma_restrack_del(struct rdma_restrack_entry *res) void rdma_restrack_del(struct rdma_restrack_entry *res)
{ {
struct ib_device *dev = res_to_dev(res); struct rdma_restrack_entry *old;
struct xarray *xa; struct rdma_restrack_root *rt;
struct ib_device *dev;
if (!res->valid) if (!res->valid)
goto out; goto out;
/* dev = res_to_dev(res);
* All objects except CM_ID set valid device immediately if (WARN_ON(!dev))
* after new object is created, it means that for not valid
* objects will still have "dev".
*
* It is not the case for CM_ID, newly created object has
* this field set to NULL and it is set in _cma_attach_to_dev()
* only.
*
* Because we don't want to add any conditions on call
* to rdma_restrack_del(), the check below protects from
* NULL-dereference.
*/
if (!dev)
return; return;
xa = &dev->res->xa[res->type]; rt = &dev->res[res->type];
down_write(&dev->res->rwsem);
xa_erase(xa, res->id); old = xa_erase(&rt->xa, res->id);
WARN_ON(old != res);
res->valid = false; res->valid = false;
up_write(&dev->res->rwsem);
rdma_restrack_put(res); rdma_restrack_put(res);
wait_for_completion(&res->comp); wait_for_completion(&res->comp);

View File

@ -7,33 +7,22 @@
#define _RDMA_CORE_RESTRACK_H_ #define _RDMA_CORE_RESTRACK_H_
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/rwsem.h>
/** /**
* struct rdma_restrack_root - main resource tracking management * struct rdma_restrack_root - main resource tracking management
* entity, per-device * entity, per-device
*/ */
struct rdma_restrack_root { struct rdma_restrack_root {
/*
* @rwsem: Read/write lock to protect erase of entry.
* Lists and insertions are protected by XArray internal lock.
*/
struct rw_semaphore rwsem;
/** /**
* @xa: Array of XArray structures to hold restrack entries. * @xa: Array of XArray structure to hold restrack entries.
* We want to use array of XArrays because insertion is type
* dependent. For types with xisiting unique ID (like QPN),
* we will insert to that unique index. For other types,
* we insert based on pointers and auto-allocate unique index.
*/ */
struct xarray xa[RDMA_RESTRACK_MAX]; struct xarray xa;
/** /**
* @next_id: Next ID to support cyclic allocation * @next_id: Next ID to support cyclic allocation
*/ */
u32 next_id[RDMA_RESTRACK_MAX]; u32 next_id;
}; };
int rdma_restrack_init(struct ib_device *dev); int rdma_restrack_init(struct ib_device *dev);
void rdma_restrack_clean(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev);
#endif /* _RDMA_CORE_RESTRACK_H_ */ #endif /* _RDMA_CORE_RESTRACK_H_ */