Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a smaller cycle with many of the commits being smallish
  code fixes and improvements across the drivers.

   - Driver updates for bnxt_re, cxgb4, hfi1, hns, mlx5, nes, qedr, and
     rxe

   - Memory window support in hns

   - mlx5 user API 'flow mutate/steering' allows accessing the full
     packet mangling and matching machinery from user space

   - Support inter-working with verbs API calls in the 'devx' mlx5 user
     API, and provide options to use devx with less privilege

   - Modernize the use of syfs and the device interface to use attribute
     groups and cdev properly for uverbs, and clean up some of the core
     code's device list management

   - More progress on net namespaces for RDMA devices

   - Consolidate driver BAR mmapping support into core code helpers and
     rework how RDMA holds poitners to mm_struct for get_user_pages
     cases

   - First pass to use 'dev_name' instead of ib_device->name

   - Device renaming for RDMA devices"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (242 commits)
  IB/mlx5: Add support for extended atomic operations
  RDMA/core: Fix comment for hw stats init for port == 0
  RDMA/core: Refactor ib_register_device() function
  RDMA/core: Fix unwinding flow in case of error to register device
  ib_srp: Remove WARN_ON in srp_terminate_io()
  IB/mlx5: Allow scatter to CQE without global signaled WRs
  IB/mlx5: Verify that driver supports user flags
  IB/mlx5: Support scatter to CQE for DC transport type
  RDMA/drivers: Use core provided API for registering device attributes
  RDMA/core: Allow existing drivers to set one sysfs group per device
  IB/rxe: Remove unnecessary enum values
  RDMA/umad: Use kernel API to allocate umad indexes
  RDMA/uverbs: Use kernel API to allocate uverbs indexes
  RDMA/core: Increase total number of RDMA ports across all devices
  IB/mlx4: Add port and TID to MAD debug print
  IB/mlx4: Enable debug print of SMPs
  RDMA/core: Rename ports_parent to ports_kobj
  RDMA/core: Do not expose unsupported counters
  IB/mlx4: Refer to the device kobject instead of ports_parent
  RDMA/nldev: Allow IB device rename through RDMA netlink
  ...
This commit is contained in:
Linus Torvalds
2018-10-26 07:38:19 -07:00
204 changed files with 7633 additions and 5205 deletions

View File

@@ -91,6 +91,24 @@ Description:
stacked (e.g: VLAN interfaces) but still have the same MAC stacked (e.g: VLAN interfaces) but still have the same MAC
address as their parent device. address as their parent device.
What: /sys/class/net/<iface>/dev_port
Date: February 2014
KernelVersion: 3.15
Contact: netdev@vger.kernel.org
Description:
Indicates the port number of this network device, formatted
as a decimal value. Some NICs have multiple independent ports
on the same PCI bus, device and function. This attribute allows
userspace to distinguish the respective interfaces.
Note: some device drivers started to use 'dev_id' for this
purpose since long before 3.15 and have not adopted the new
attribute ever since. To query the port number, some tools look
exclusively at 'dev_port', while others only consult 'dev_id'.
If a network device has multiple client adapter ports as
described in the previous paragraph and does not set this
attribute to its port number, it's a kernel bug.
What: /sys/class/net/<iface>/dormant What: /sys/class/net/<iface>/dormant
Date: March 2006 Date: March 2006
KernelVersion: 2.6.17 KernelVersion: 2.6.17

View File

@@ -26,6 +26,7 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)" tristate "InfiniBand userspace access (verbs and CM)"
select ANON_INODES select ANON_INODES
depends on MMU
---help--- ---help---
Userspace InfiniBand access support. This enables the Userspace InfiniBand access support. This enables the
kernel side of userspace verbs and the userspace kernel side of userspace verbs and the userspace

View File

@@ -45,6 +45,7 @@
#include <net/addrconf.h> #include <net/addrconf.h>
#include <net/ip6_route.h> #include <net/ip6_route.h>
#include <rdma/ib_addr.h> #include <rdma/ib_addr.h>
#include <rdma/ib_sa.h>
#include <rdma/ib.h> #include <rdma/ib.h>
#include <rdma/rdma_netlink.h> #include <rdma/rdma_netlink.h>
#include <net/netlink.h> #include <net/netlink.h>
@@ -61,6 +62,7 @@ struct addr_req {
struct rdma_dev_addr *addr, void *context); struct rdma_dev_addr *addr, void *context);
unsigned long timeout; unsigned long timeout;
struct delayed_work work; struct delayed_work work;
bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */
int status; int status;
u32 seq; u32 seq;
}; };
@@ -219,18 +221,54 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
} }
EXPORT_SYMBOL(rdma_addr_size_kss); EXPORT_SYMBOL(rdma_addr_size_kss);
void rdma_copy_addr(struct rdma_dev_addr *dev_addr, /**
const struct net_device *dev, * rdma_copy_src_l2_addr - Copy netdevice source addresses
const unsigned char *dst_dev_addr) * @dev_addr: Destination address pointer where to copy the addresses
* @dev: Netdevice whose source addresses to copy
*
* rdma_copy_src_l2_addr() copies source addresses from the specified netdevice.
* This includes unicast address, broadcast address, device type and
* interface index.
*/
void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
const struct net_device *dev)
{ {
dev_addr->dev_type = dev->type; dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
dev_addr->bound_dev_if = dev->ifindex; dev_addr->bound_dev_if = dev->ifindex;
} }
EXPORT_SYMBOL(rdma_copy_addr); EXPORT_SYMBOL(rdma_copy_src_l2_addr);
static struct net_device *
rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in)
{
struct net_device *dev = NULL;
int ret = -EADDRNOTAVAIL;
switch (src_in->sa_family) {
case AF_INET:
dev = __ip_dev_find(net,
((const struct sockaddr_in *)src_in)->sin_addr.s_addr,
false);
if (dev)
ret = 0;
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
for_each_netdev_rcu(net, dev) {
if (ipv6_chk_addr(net,
&((const struct sockaddr_in6 *)src_in)->sin6_addr,
dev, 1)) {
ret = 0;
break;
}
}
break;
#endif
}
return ret ? ERR_PTR(ret) : dev;
}
int rdma_translate_ip(const struct sockaddr *addr, int rdma_translate_ip(const struct sockaddr *addr,
struct rdma_dev_addr *dev_addr) struct rdma_dev_addr *dev_addr)
@@ -241,38 +279,17 @@ int rdma_translate_ip(const struct sockaddr *addr,
dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
if (!dev) if (!dev)
return -ENODEV; return -ENODEV;
rdma_copy_addr(dev_addr, dev, NULL); rdma_copy_src_l2_addr(dev_addr, dev);
dev_put(dev); dev_put(dev);
return 0; return 0;
} }
switch (addr->sa_family) { rcu_read_lock();
case AF_INET: dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr);
dev = ip_dev_find(dev_addr->net, if (!IS_ERR(dev))
((const struct sockaddr_in *)addr)->sin_addr.s_addr); rdma_copy_src_l2_addr(dev_addr, dev);
rcu_read_unlock();
if (!dev) return PTR_ERR_OR_ZERO(dev);
return -EADDRNOTAVAIL;
rdma_copy_addr(dev_addr, dev, NULL);
dev_put(dev);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
rcu_read_lock();
for_each_netdev_rcu(dev_addr->net, dev) {
if (ipv6_chk_addr(dev_addr->net,
&((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
rdma_copy_addr(dev_addr, dev, NULL);
break;
}
}
rcu_read_unlock();
break;
#endif
}
return 0;
} }
EXPORT_SYMBOL(rdma_translate_ip); EXPORT_SYMBOL(rdma_translate_ip);
@@ -295,15 +312,12 @@ static void queue_req(struct addr_req *req)
spin_unlock_bh(&lock); spin_unlock_bh(&lock);
} }
static int ib_nl_fetch_ha(const struct dst_entry *dst, static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr,
struct rdma_dev_addr *dev_addr,
const void *daddr, u32 seq, u16 family) const void *daddr, u32 seq, u16 family)
{ {
if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
/* We fill in what we can, the response will fill the rest */
rdma_copy_addr(dev_addr, dst->dev, NULL);
return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
} }
@@ -322,7 +336,7 @@ static int dst_fetch_ha(const struct dst_entry *dst,
neigh_event_send(n, NULL); neigh_event_send(n, NULL);
ret = -ENODATA; ret = -ENODATA;
} else { } else {
rdma_copy_addr(dev_addr, dst->dev, n->ha); memcpy(dev_addr->dst_dev_addr, n->ha, MAX_ADDR_LEN);
} }
neigh_release(n); neigh_release(n);
@@ -356,18 +370,22 @@ static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
(const void *)&dst_in6->sin6_addr; (const void *)&dst_in6->sin6_addr;
sa_family_t family = dst_in->sa_family; sa_family_t family = dst_in->sa_family;
/* Gateway + ARPHRD_INFINIBAND -> IB router */ /* If we have a gateway in IB mode then it must be an IB network */
if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND) if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB)
return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family); return ib_nl_fetch_ha(dev_addr, daddr, seq, family);
else else
return dst_fetch_ha(dst, dev_addr, daddr); return dst_fetch_ha(dst, dev_addr, daddr);
} }
static int addr4_resolve(struct sockaddr_in *src_in, static int addr4_resolve(struct sockaddr *src_sock,
const struct sockaddr_in *dst_in, const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr, struct rdma_dev_addr *addr,
struct rtable **prt) struct rtable **prt)
{ {
struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock;
const struct sockaddr_in *dst_in =
(const struct sockaddr_in *)dst_sock;
__be32 src_ip = src_in->sin_addr.s_addr; __be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr;
struct rtable *rt; struct rtable *rt;
@@ -383,16 +401,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
if (ret) if (ret)
return ret; return ret;
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr; src_in->sin_addr.s_addr = fl4.saddr;
/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
* definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
* type accordingly.
*/
if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
addr->network = RDMA_NETWORK_IPV4;
addr->hoplimit = ip4_dst_hoplimit(&rt->dst); addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
*prt = rt; *prt = rt;
@@ -400,14 +410,16 @@ static int addr4_resolve(struct sockaddr_in *src_in,
} }
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in, static int addr6_resolve(struct sockaddr *src_sock,
const struct sockaddr_in6 *dst_in, const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr, struct rdma_dev_addr *addr,
struct dst_entry **pdst) struct dst_entry **pdst)
{ {
struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock;
const struct sockaddr_in6 *dst_in =
(const struct sockaddr_in6 *)dst_sock;
struct flowi6 fl6; struct flowi6 fl6;
struct dst_entry *dst; struct dst_entry *dst;
struct rt6_info *rt;
int ret; int ret;
memset(&fl6, 0, sizeof fl6); memset(&fl6, 0, sizeof fl6);
@@ -419,19 +431,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
if (ret < 0) if (ret < 0)
return ret; return ret;
rt = (struct rt6_info *)dst; if (ipv6_addr_any(&src_in->sin6_addr))
if (ipv6_addr_any(&src_in->sin6_addr)) {
src_in->sin6_family = AF_INET6;
src_in->sin6_addr = fl6.saddr; src_in->sin6_addr = fl6.saddr;
}
/* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
* definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
* type accordingly.
*/
if (rt->rt6i_flags & RTF_GATEWAY &&
ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
addr->network = RDMA_NETWORK_IPV6;
addr->hoplimit = ip6_dst_hoplimit(dst); addr->hoplimit = ip6_dst_hoplimit(dst);
@@ -439,8 +440,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
return 0; return 0;
} }
#else #else
static int addr6_resolve(struct sockaddr_in6 *src_in, static int addr6_resolve(struct sockaddr *src_sock,
const struct sockaddr_in6 *dst_in, const struct sockaddr *dst_sock,
struct rdma_dev_addr *addr, struct rdma_dev_addr *addr,
struct dst_entry **pdst) struct dst_entry **pdst)
{ {
@@ -451,36 +452,110 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
static int addr_resolve_neigh(const struct dst_entry *dst, static int addr_resolve_neigh(const struct dst_entry *dst,
const struct sockaddr *dst_in, const struct sockaddr *dst_in,
struct rdma_dev_addr *addr, struct rdma_dev_addr *addr,
unsigned int ndev_flags,
u32 seq) u32 seq)
{ {
if (dst->dev->flags & IFF_LOOPBACK) { int ret = 0;
int ret;
ret = rdma_translate_ip(dst_in, addr); if (ndev_flags & IFF_LOOPBACK) {
if (!ret) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
memcpy(addr->dst_dev_addr, addr->src_dev_addr, } else {
MAX_ADDR_LEN); if (!(ndev_flags & IFF_NOARP)) {
/* If the device doesn't do ARP internally */
ret = fetch_ha(dst, addr, dst_in, seq);
}
}
return ret;
}
return ret; static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
const struct sockaddr *dst_in,
const struct dst_entry *dst,
const struct net_device *ndev)
{
int ret = 0;
if (dst->dev->flags & IFF_LOOPBACK)
ret = rdma_translate_ip(dst_in, dev_addr);
else
rdma_copy_src_l2_addr(dev_addr, dst->dev);
/*
* If there's a gateway and type of device not ARPHRD_INFINIBAND,
* we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the
* network type accordingly.
*/
if (has_gateway(dst, dst_in->sa_family) &&
ndev->type != ARPHRD_INFINIBAND)
dev_addr->network = dst_in->sa_family == AF_INET ?
RDMA_NETWORK_IPV4 :
RDMA_NETWORK_IPV6;
else
dev_addr->network = RDMA_NETWORK_IB;
return ret;
}
static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr,
unsigned int *ndev_flags,
const struct sockaddr *dst_in,
const struct dst_entry *dst)
{
struct net_device *ndev = READ_ONCE(dst->dev);
*ndev_flags = ndev->flags;
/* A physical device must be the RDMA device to use */
if (ndev->flags & IFF_LOOPBACK) {
/*
* RDMA (IB/RoCE, iWarp) doesn't run on lo interface or
* loopback IP address. So if route is resolved to loopback
* interface, translate that to a real ndev based on non
* loopback IP address.
*/
ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in);
if (IS_ERR(ndev))
return -ENODEV;
} }
/* If the device doesn't do ARP internally */ return copy_src_l2_addr(dev_addr, dst_in, dst, ndev);
if (!(dst->dev->flags & IFF_NOARP)) }
return fetch_ha(dst, addr, dst_in, seq);
rdma_copy_addr(addr, dst->dev, NULL); static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr)
{
struct net_device *ndev;
ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
/*
* Since we are holding the rcu, reading net and ifindex
* are safe without any additional reference; because
* change_net_namespace() in net/core/dev.c does rcu sync
* after it changes the state to IFF_DOWN and before
* updating netdev fields {net, ifindex}.
*/
addr->net = dev_net(ndev);
addr->bound_dev_if = ndev->ifindex;
return 0; return 0;
} }
static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr)
{
addr->net = &init_net;
addr->bound_dev_if = 0;
}
static int addr_resolve(struct sockaddr *src_in, static int addr_resolve(struct sockaddr *src_in,
const struct sockaddr *dst_in, const struct sockaddr *dst_in,
struct rdma_dev_addr *addr, struct rdma_dev_addr *addr,
bool resolve_neigh, bool resolve_neigh,
bool resolve_by_gid_attr,
u32 seq) u32 seq)
{ {
struct net_device *ndev; struct dst_entry *dst = NULL;
struct dst_entry *dst; unsigned int ndev_flags = 0;
struct rtable *rt = NULL;
int ret; int ret;
if (!addr->net) { if (!addr->net) {
@@ -488,58 +563,55 @@ static int addr_resolve(struct sockaddr *src_in,
return -EINVAL; return -EINVAL;
} }
rcu_read_lock();
if (resolve_by_gid_attr) {
if (!addr->sgid_attr) {
rcu_read_unlock();
pr_warn_ratelimited("%s: missing gid_attr\n", __func__);
return -EINVAL;
}
/*
* If the request is for a specific gid attribute of the
* rdma_dev_addr, derive net from the netdevice of the
* GID attribute.
*/
ret = set_addr_netns_by_gid_rcu(addr);
if (ret) {
rcu_read_unlock();
return ret;
}
}
if (src_in->sa_family == AF_INET) { if (src_in->sa_family == AF_INET) {
struct rtable *rt = NULL; ret = addr4_resolve(src_in, dst_in, addr, &rt);
const struct sockaddr_in *dst_in4 = dst = &rt->dst;
(const struct sockaddr_in *)dst_in;
ret = addr4_resolve((struct sockaddr_in *)src_in,
dst_in4, addr, &rt);
if (ret)
return ret;
if (resolve_neigh)
ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
if (addr->bound_dev_if) {
ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
} else {
ndev = rt->dst.dev;
dev_hold(ndev);
}
ip_rt_put(rt);
} else { } else {
const struct sockaddr_in6 *dst_in6 = ret = addr6_resolve(src_in, dst_in, addr, &dst);
(const struct sockaddr_in6 *)dst_in; }
if (ret) {
rcu_read_unlock();
goto done;
}
ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst);
rcu_read_unlock();
ret = addr6_resolve((struct sockaddr_in6 *)src_in, /*
dst_in6, addr, * Resolve neighbor destination address if requested and
&dst); * only if src addr translation didn't fail.
if (ret) */
return ret; if (!ret && resolve_neigh)
ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq);
if (resolve_neigh)
ret = addr_resolve_neigh(dst, dst_in, addr, seq);
if (addr->bound_dev_if) {
ndev = dev_get_by_index(addr->net, addr->bound_dev_if);
} else {
ndev = dst->dev;
dev_hold(ndev);
}
if (src_in->sa_family == AF_INET)
ip_rt_put(rt);
else
dst_release(dst); dst_release(dst);
} done:
/*
if (ndev) { * Clear the addr net to go back to its original state, only if it was
if (ndev->flags & IFF_LOOPBACK) * derived from GID attribute in this context.
ret = rdma_translate_ip(dst_in, addr); */
else if (resolve_by_gid_attr)
addr->bound_dev_if = ndev->ifindex; rdma_addr_set_net_defaults(addr);
dev_put(ndev);
}
return ret; return ret;
} }
@@ -554,7 +626,8 @@ static void process_one_req(struct work_struct *_work)
src_in = (struct sockaddr *)&req->src_addr; src_in = (struct sockaddr *)&req->src_addr;
dst_in = (struct sockaddr *)&req->dst_addr; dst_in = (struct sockaddr *)&req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr, req->status = addr_resolve(src_in, dst_in, req->addr,
true, req->seq); true, req->resolve_by_gid_attr,
req->seq);
if (req->status && time_after_eq(jiffies, req->timeout)) { if (req->status && time_after_eq(jiffies, req->timeout)) {
req->status = -ETIMEDOUT; req->status = -ETIMEDOUT;
} else if (req->status == -ENODATA) { } else if (req->status == -ENODATA) {
@@ -586,10 +659,10 @@ static void process_one_req(struct work_struct *_work)
} }
int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms, struct rdma_dev_addr *addr, unsigned long timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr, void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context), struct rdma_dev_addr *addr, void *context),
void *context) bool resolve_by_gid_attr, void *context)
{ {
struct sockaddr *src_in, *dst_in; struct sockaddr *src_in, *dst_in;
struct addr_req *req; struct addr_req *req;
@@ -617,10 +690,12 @@ int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
req->addr = addr; req->addr = addr;
req->callback = callback; req->callback = callback;
req->context = context; req->context = context;
req->resolve_by_gid_attr = resolve_by_gid_attr;
INIT_DELAYED_WORK(&req->work, process_one_req); INIT_DELAYED_WORK(&req->work, process_one_req);
req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
req->status = addr_resolve(src_in, dst_in, addr, true, req->seq); req->status = addr_resolve(src_in, dst_in, addr, true,
req->resolve_by_gid_attr, req->seq);
switch (req->status) { switch (req->status) {
case 0: case 0:
req->timeout = jiffies; req->timeout = jiffies;
@@ -641,25 +716,53 @@ err:
} }
EXPORT_SYMBOL(rdma_resolve_ip); EXPORT_SYMBOL(rdma_resolve_ip);
int rdma_resolve_ip_route(struct sockaddr *src_addr, int roce_resolve_route_from_path(struct sa_path_rec *rec,
const struct sockaddr *dst_addr, const struct ib_gid_attr *attr)
struct rdma_dev_addr *addr)
{ {
struct sockaddr_storage ssrc_addr = {}; union {
struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr; struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid, dgid;
struct rdma_dev_addr dev_addr = {};
int ret;
if (src_addr) { if (rec->roce.route_resolved)
if (src_addr->sa_family != dst_addr->sa_family) return 0;
return -EINVAL;
memcpy(src_in, src_addr, rdma_addr_size(src_addr)); rdma_gid2ip(&sgid._sockaddr, &rec->sgid);
} else { rdma_gid2ip(&dgid._sockaddr, &rec->dgid);
src_in->sa_family = dst_addr->sa_family;
}
return addr_resolve(src_in, dst_addr, addr, false, 0); if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family)
return -EINVAL;
if (!attr || !attr->ndev)
return -EINVAL;
dev_addr.net = &init_net;
dev_addr.sgid_attr = attr;
ret = addr_resolve(&sgid._sockaddr, &dgid._sockaddr,
&dev_addr, false, true, 0);
if (ret)
return ret;
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
rec->roce.route_resolved = true;
return 0;
} }
/**
* rdma_addr_cancel - Cancel resolve ip request
* @addr: Pointer to address structure given previously
* during rdma_resolve_ip().
* rdma_addr_cancel() is synchronous function which cancels any pending
* request if there is any.
*/
void rdma_addr_cancel(struct rdma_dev_addr *addr) void rdma_addr_cancel(struct rdma_dev_addr *addr)
{ {
struct addr_req *req, *temp_req; struct addr_req *req, *temp_req;
@@ -687,11 +790,6 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
* guarentees no work is running and none will be started. * guarentees no work is running and none will be started.
*/ */
cancel_delayed_work_sync(&found->work); cancel_delayed_work_sync(&found->work);
if (found->callback)
found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
found->addr, found->context);
kfree(found); kfree(found);
} }
EXPORT_SYMBOL(rdma_addr_cancel); EXPORT_SYMBOL(rdma_addr_cancel);
@@ -710,7 +808,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid, const union ib_gid *dgid,
u8 *dmac, const struct net_device *ndev, u8 *dmac, const struct ib_gid_attr *sgid_attr,
int *hoplimit) int *hoplimit)
{ {
struct rdma_dev_addr dev_addr; struct rdma_dev_addr dev_addr;
@@ -726,12 +824,12 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
rdma_gid2ip(&dgid_addr._sockaddr, dgid); rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr)); memset(&dev_addr, 0, sizeof(dev_addr));
dev_addr.bound_dev_if = ndev->ifindex;
dev_addr.net = &init_net; dev_addr.net = &init_net;
dev_addr.sgid_attr = sgid_attr;
init_completion(&ctx.comp); init_completion(&ctx.comp);
ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr, ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx); &dev_addr, 1000, resolve_cb, true, &ctx);
if (ret) if (ret)
return ret; return ret;

View File

@@ -212,9 +212,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
u8 port_num = entry->attr.port_num; u8 port_num = entry->attr.port_num;
struct ib_gid_table *table = rdma_gid_table(device, port_num); struct ib_gid_table *table = rdma_gid_table(device, port_num);
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
device->name, port_num, entry->attr.index, port_num, entry->attr.index, entry->attr.gid.raw);
entry->attr.gid.raw);
if (rdma_cap_roce_gid_table(device, port_num) && if (rdma_cap_roce_gid_table(device, port_num) &&
entry->state != GID_TABLE_ENTRY_INVALID) entry->state != GID_TABLE_ENTRY_INVALID)
@@ -289,9 +288,9 @@ static void store_gid_entry(struct ib_gid_table *table,
{ {
entry->state = GID_TABLE_ENTRY_VALID; entry->state = GID_TABLE_ENTRY_VALID;
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, dev_dbg(&entry->attr.device->dev, "%s port=%d index=%d gid %pI6\n",
entry->attr.device->name, entry->attr.port_num, __func__, entry->attr.port_num, entry->attr.index,
entry->attr.index, entry->attr.gid.raw); entry->attr.gid.raw);
lockdep_assert_held(&table->lock); lockdep_assert_held(&table->lock);
write_lock_irq(&table->rwlock); write_lock_irq(&table->rwlock);
@@ -320,17 +319,16 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
int ret; int ret;
if (!attr->ndev) { if (!attr->ndev) {
pr_err("%s NULL netdev device=%s port=%d index=%d\n", dev_err(&attr->device->dev, "%s NULL netdev port=%d index=%d\n",
__func__, attr->device->name, attr->port_num, __func__, attr->port_num, attr->index);
attr->index);
return -EINVAL; return -EINVAL;
} }
if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
ret = attr->device->add_gid(attr, &entry->context); ret = attr->device->add_gid(attr, &entry->context);
if (ret) { if (ret) {
pr_err("%s GID add failed device=%s port=%d index=%d\n", dev_err(&attr->device->dev,
__func__, attr->device->name, attr->port_num, "%s GID add failed port=%d index=%d\n",
attr->index); __func__, attr->port_num, attr->index);
return ret; return ret;
} }
} }
@@ -353,9 +351,8 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
lockdep_assert_held(&table->lock); lockdep_assert_held(&table->lock);
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, dev_dbg(&ib_dev->dev, "%s port=%d index=%d gid %pI6\n", __func__, port,
ib_dev->name, port, ix, ix, table->data_vec[ix]->attr.gid.raw);
table->data_vec[ix]->attr.gid.raw);
write_lock_irq(&table->rwlock); write_lock_irq(&table->rwlock);
entry = table->data_vec[ix]; entry = table->data_vec[ix];
@@ -782,9 +779,9 @@ static void release_gid_table(struct ib_device *device, u8 port,
if (is_gid_entry_free(table->data_vec[i])) if (is_gid_entry_free(table->data_vec[i]))
continue; continue;
if (kref_read(&table->data_vec[i]->kref) > 1) { if (kref_read(&table->data_vec[i]->kref) > 1) {
pr_err("GID entry ref leak for %s (index %d) ref=%d\n", dev_err(&device->dev,
device->name, i, "GID entry ref leak for index %d ref=%d\n", i,
kref_read(&table->data_vec[i]->kref)); kref_read(&table->data_vec[i]->kref));
leak = true; leak = true;
} }
} }
@@ -1252,6 +1249,39 @@ void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
} }
EXPORT_SYMBOL(rdma_hold_gid_attr); EXPORT_SYMBOL(rdma_hold_gid_attr);
/**
* rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
* which must be in UP state.
*
* @attr:Pointer to the GID attribute
*
* Returns pointer to netdevice if the netdevice was attached to GID and
* netdevice is in UP state. Caller must hold RCU lock as this API
* reads the netdev flags which can change while netdevice migrates to
* different net namespace. Returns ERR_PTR with error code otherwise.
*
*/
struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
{
struct ib_gid_table_entry *entry =
container_of(attr, struct ib_gid_table_entry, attr);
struct ib_device *device = entry->attr.device;
struct net_device *ndev = ERR_PTR(-ENODEV);
u8 port_num = entry->attr.port_num;
struct ib_gid_table *table;
unsigned long flags;
bool valid;
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
ndev = attr->ndev;
read_unlock_irqrestore(&table->rwlock, flags);
return ndev;
}
static int config_non_roce_gid_cache(struct ib_device *device, static int config_non_roce_gid_cache(struct ib_device *device,
u8 port, int gid_tbl_len) u8 port, int gid_tbl_len)
{ {
@@ -1270,8 +1300,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
continue; continue;
ret = device->query_gid(device, port, i, &gid_attr.gid); ret = device->query_gid(device, port, i, &gid_attr.gid);
if (ret) { if (ret) {
pr_warn("query_gid failed (%d) for %s (index %d)\n", dev_warn(&device->dev,
ret, device->name, i); "query_gid failed (%d) for index %d\n", ret,
i);
goto err; goto err;
} }
gid_attr.index = i; gid_attr.index = i;
@@ -1300,8 +1331,7 @@ static void ib_cache_update(struct ib_device *device,
ret = ib_query_port(device, port, tprops); ret = ib_query_port(device, port, tprops);
if (ret) { if (ret) {
pr_warn("ib_query_port failed (%d) for %s\n", dev_warn(&device->dev, "ib_query_port failed (%d)\n", ret);
ret, device->name);
goto err; goto err;
} }
@@ -1323,8 +1353,9 @@ static void ib_cache_update(struct ib_device *device,
for (i = 0; i < pkey_cache->table_len; ++i) { for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i); ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) { if (ret) {
pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", dev_warn(&device->dev,
ret, device->name, i); "ib_query_pkey failed (%d) for index %d\n",
ret, i);
goto err; goto err;
} }
} }

View File

@@ -3292,8 +3292,11 @@ static int cm_lap_handler(struct cm_work *work)
if (ret) if (ret)
goto unlock; goto unlock;
cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av, ret = cm_init_av_by_path(param->alternate_path, NULL,
cm_id_priv); &cm_id_priv->alt_av, cm_id_priv);
if (ret)
goto unlock;
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD; cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid; cm_id_priv->tid = lap_msg->hdr.tid;
ret = atomic_inc_and_test(&cm_id_priv->work_count); ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -4367,7 +4370,7 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev->going_down = 0; cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev, cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL, MKDEV(0, 0), NULL,
"%s", ib_device->name); "%s", dev_name(&ib_device->dev));
if (IS_ERR(cm_dev->device)) { if (IS_ERR(cm_dev->device)) {
kfree(cm_dev); kfree(cm_dev);
return; return;

View File

@@ -639,13 +639,21 @@ static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr; id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
} }
static int cma_acquire_dev(struct rdma_id_private *id_priv, /**
const struct rdma_id_private *listen_id_priv) * cma_acquire_dev_by_src_ip - Acquire cma device, port, gid attribute
* based on source ip address.
* @id_priv: cm_id which should be bound to cma device
*
* cma_acquire_dev_by_src_ip() binds cm id to cma device, port and GID attribute
* based on source IP address. It returns 0 on success or error code otherwise.
* It is applicable to active and passive side cm_id.
*/
static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
{ {
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
const struct ib_gid_attr *sgid_attr; const struct ib_gid_attr *sgid_attr;
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid, *gidp; union ib_gid gid, iboe_gid, *gidp;
struct cma_device *cma_dev;
enum ib_gid_type gid_type; enum ib_gid_type gid_type;
int ret = -ENODEV; int ret = -ENODEV;
u8 port; u8 port;
@@ -654,41 +662,125 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
id_priv->id.ps == RDMA_PS_IPOIB) id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL; return -EINVAL;
mutex_lock(&lock);
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&iboe_gid); &iboe_gid);
memcpy(&gid, dev_addr->src_dev_addr + memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof gid); rdma_addr_gid_offset(dev_addr), sizeof(gid));
if (listen_id_priv) {
cma_dev = listen_id_priv->cma_dev;
port = listen_id_priv->id.port_num;
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
gid_type = listen_id_priv->gid_type;
sgid_attr = cma_validate_port(cma_dev->device, port,
gid_type, gidp, id_priv);
if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr);
ret = 0;
goto out;
}
}
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list) { list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { for (port = rdma_start_port(cma_dev->device);
if (listen_id_priv && port <= rdma_end_port(cma_dev->device); port++) {
listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
gidp = rdma_protocol_roce(cma_dev->device, port) ? gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid; &iboe_gid : &gid;
gid_type = cma_dev->default_gid_type[port - 1]; gid_type = cma_dev->default_gid_type[port - 1];
sgid_attr = cma_validate_port(cma_dev->device, port, sgid_attr = cma_validate_port(cma_dev->device, port,
gid_type, gidp, id_priv); gid_type, gidp, id_priv);
if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr);
cma_attach_to_dev(id_priv, cma_dev);
ret = 0;
goto out;
}
}
}
out:
mutex_unlock(&lock);
return ret;
}
/**
* cma_ib_acquire_dev - Acquire cma device, port and SGID attribute
* @id_priv: cm id to bind to cma device
* @listen_id_priv: listener cm id to match against
* @req: Pointer to req structure containaining incoming
* request information
* cma_ib_acquire_dev() acquires cma device, port and SGID attribute when
* rdma device matches for listen_id and incoming request. It also verifies
* that a GID table entry is present for the source address.
* Returns 0 on success, or returns error code otherwise.
*/
static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
const struct rdma_id_private *listen_id_priv,
struct cma_req_info *req)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
const struct ib_gid_attr *sgid_attr;
enum ib_gid_type gid_type;
union ib_gid gid;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
if (rdma_protocol_roce(req->device, req->port))
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&gid);
else
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof(gid));
gid_type = listen_id_priv->cma_dev->default_gid_type[req->port - 1];
sgid_attr = cma_validate_port(req->device, req->port,
gid_type, &gid, id_priv);
if (IS_ERR(sgid_attr))
return PTR_ERR(sgid_attr);
id_priv->id.port_num = req->port;
cma_bind_sgid_attr(id_priv, sgid_attr);
/* Need to acquire lock to protect against reader
* of cma_dev->id_list such as cma_netdev_callback() and
* cma_process_remove().
*/
mutex_lock(&lock);
cma_attach_to_dev(id_priv, listen_id_priv->cma_dev);
mutex_unlock(&lock);
return 0;
}
static int cma_iw_acquire_dev(struct rdma_id_private *id_priv,
const struct rdma_id_private *listen_id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
const struct ib_gid_attr *sgid_attr;
struct cma_device *cma_dev;
enum ib_gid_type gid_type;
int ret = -ENODEV;
union ib_gid gid;
u8 port;
if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
memcpy(&gid, dev_addr->src_dev_addr +
rdma_addr_gid_offset(dev_addr), sizeof(gid));
mutex_lock(&lock);
cma_dev = listen_id_priv->cma_dev;
port = listen_id_priv->id.port_num;
gid_type = listen_id_priv->gid_type;
sgid_attr = cma_validate_port(cma_dev->device, port,
gid_type, &gid, id_priv);
if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr);
ret = 0;
goto out;
}
list_for_each_entry(cma_dev, &dev_list, list) {
for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
if (listen_id_priv->cma_dev == cma_dev &&
listen_id_priv->id.port_num == port)
continue;
gid_type = cma_dev->default_gid_type[port - 1];
sgid_attr = cma_validate_port(cma_dev->device, port,
gid_type, &gid, id_priv);
if (!IS_ERR(sgid_attr)) { if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port; id_priv->id.port_num = port;
cma_bind_sgid_attr(id_priv, sgid_attr); cma_bind_sgid_attr(id_priv, sgid_attr);
@@ -785,10 +877,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
if (!id_priv) if (!id_priv)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
if (caller) rdma_restrack_set_task(&id_priv->res, caller);
id_priv->res.kern_name = caller;
else
rdma_restrack_set_task(&id_priv->res, current);
id_priv->res.type = RDMA_RESTRACK_CM_ID; id_priv->res.type = RDMA_RESTRACK_CM_ID;
id_priv->state = RDMA_CM_IDLE; id_priv->state = RDMA_CM_IDLE;
id_priv->id.context = context; id_priv->id.context = context;
@@ -1462,17 +1551,34 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
return rdma_protocol_roce(device, port_num); return rdma_protocol_roce(device, port_num);
} }
static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
{
const struct sockaddr *daddr =
(const struct sockaddr *)&req->listen_addr_storage;
const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr;
/* Returns true if the req is for IPv6 link local */
return (daddr->sa_family == AF_INET6 &&
(ipv6_addr_type(&daddr6->sin6_addr) & IPV6_ADDR_LINKLOCAL));
}
static bool cma_match_net_dev(const struct rdma_cm_id *id, static bool cma_match_net_dev(const struct rdma_cm_id *id,
const struct net_device *net_dev, const struct net_device *net_dev,
u8 port_num) const struct cma_req_info *req)
{ {
const struct rdma_addr *addr = &id->route.addr; const struct rdma_addr *addr = &id->route.addr;
if (!net_dev) if (!net_dev)
/* This request is an AF_IB request */ /* This request is an AF_IB request */
return (!id->port_num || id->port_num == port_num) && return (!id->port_num || id->port_num == req->port) &&
(addr->src_addr.ss_family == AF_IB); (addr->src_addr.ss_family == AF_IB);
/*
* If the request is not for IPv6 link local, allow matching
* request to any netdevice of the one or multiport rdma device.
*/
if (!cma_is_req_ipv6_ll(req))
return true;
/* /*
* Net namespaces must match, and if the listner is listening * Net namespaces must match, and if the listner is listening
* on a specific netdevice than netdevice must match as well. * on a specific netdevice than netdevice must match as well.
@@ -1500,13 +1606,14 @@ static struct rdma_id_private *cma_find_listener(
hlist_for_each_entry(id_priv, &bind_list->owners, node) { hlist_for_each_entry(id_priv, &bind_list->owners, node) {
if (cma_match_private_data(id_priv, ib_event->private_data)) { if (cma_match_private_data(id_priv, ib_event->private_data)) {
if (id_priv->id.device == cm_id->device && if (id_priv->id.device == cm_id->device &&
cma_match_net_dev(&id_priv->id, net_dev, req->port)) cma_match_net_dev(&id_priv->id, net_dev, req))
return id_priv; return id_priv;
list_for_each_entry(id_priv_dev, list_for_each_entry(id_priv_dev,
&id_priv->listen_list, &id_priv->listen_list,
listen_list) { listen_list) {
if (id_priv_dev->id.device == cm_id->device && if (id_priv_dev->id.device == cm_id->device &&
cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) cma_match_net_dev(&id_priv_dev->id,
net_dev, req))
return id_priv_dev; return id_priv_dev;
} }
} }
@@ -1518,18 +1625,18 @@ static struct rdma_id_private *cma_find_listener(
static struct rdma_id_private * static struct rdma_id_private *
cma_ib_id_from_event(struct ib_cm_id *cm_id, cma_ib_id_from_event(struct ib_cm_id *cm_id,
const struct ib_cm_event *ib_event, const struct ib_cm_event *ib_event,
struct cma_req_info *req,
struct net_device **net_dev) struct net_device **net_dev)
{ {
struct cma_req_info req;
struct rdma_bind_list *bind_list; struct rdma_bind_list *bind_list;
struct rdma_id_private *id_priv; struct rdma_id_private *id_priv;
int err; int err;
err = cma_save_req_info(ib_event, &req); err = cma_save_req_info(ib_event, req);
if (err) if (err)
return ERR_PTR(err); return ERR_PTR(err);
*net_dev = cma_get_net_dev(ib_event, &req); *net_dev = cma_get_net_dev(ib_event, req);
if (IS_ERR(*net_dev)) { if (IS_ERR(*net_dev)) {
if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
/* Assuming the protocol is AF_IB */ /* Assuming the protocol is AF_IB */
@@ -1567,17 +1674,17 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
} }
if (!validate_net_dev(*net_dev, if (!validate_net_dev(*net_dev,
(struct sockaddr *)&req.listen_addr_storage, (struct sockaddr *)&req->listen_addr_storage,
(struct sockaddr *)&req.src_addr_storage)) { (struct sockaddr *)&req->src_addr_storage)) {
id_priv = ERR_PTR(-EHOSTUNREACH); id_priv = ERR_PTR(-EHOSTUNREACH);
goto err; goto err;
} }
} }
bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
rdma_ps_from_service_id(req.service_id), rdma_ps_from_service_id(req->service_id),
cma_port_from_service_id(req.service_id)); cma_port_from_service_id(req->service_id));
id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); id_priv = cma_find_listener(bind_list, cm_id, ib_event, req, *net_dev);
err: err:
rcu_read_unlock(); rcu_read_unlock();
if (IS_ERR(id_priv) && *net_dev) { if (IS_ERR(id_priv) && *net_dev) {
@@ -1710,8 +1817,8 @@ void rdma_destroy_id(struct rdma_cm_id *id)
mutex_lock(&id_priv->handler_mutex); mutex_lock(&id_priv->handler_mutex);
mutex_unlock(&id_priv->handler_mutex); mutex_unlock(&id_priv->handler_mutex);
rdma_restrack_del(&id_priv->res);
if (id_priv->cma_dev) { if (id_priv->cma_dev) {
rdma_restrack_del(&id_priv->res);
if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
if (id_priv->cm_id.ib) if (id_priv->cm_id.ib)
ib_destroy_cm_id(id_priv->cm_id.ib); ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1902,7 +2009,7 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
if (net_dev) { if (net_dev) {
rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); rdma_copy_src_l2_addr(&rt->addr.dev_addr, net_dev);
} else { } else {
if (!cma_protocol_roce(listen_id) && if (!cma_protocol_roce(listen_id) &&
cma_any_addr(cma_src_addr(id_priv))) { cma_any_addr(cma_src_addr(id_priv))) {
@@ -1952,7 +2059,7 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
goto err; goto err;
if (net_dev) { if (net_dev) {
rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); rdma_copy_src_l2_addr(&id->route.addr.dev_addr, net_dev);
} else { } else {
if (!cma_any_addr(cma_src_addr(id_priv))) { if (!cma_any_addr(cma_src_addr(id_priv))) {
ret = cma_translate_addr(cma_src_addr(id_priv), ret = cma_translate_addr(cma_src_addr(id_priv),
@@ -1999,11 +2106,12 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
{ {
struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_id_private *listen_id, *conn_id = NULL;
struct rdma_cm_event event = {}; struct rdma_cm_event event = {};
struct cma_req_info req = {};
struct net_device *net_dev; struct net_device *net_dev;
u8 offset; u8 offset;
int ret; int ret;
listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev); listen_id = cma_ib_id_from_event(cm_id, ib_event, &req, &net_dev);
if (IS_ERR(listen_id)) if (IS_ERR(listen_id))
return PTR_ERR(listen_id); return PTR_ERR(listen_id);
@@ -2036,7 +2144,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id,
} }
mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
ret = cma_acquire_dev(conn_id, listen_id); ret = cma_ib_acquire_dev(conn_id, listen_id, &req);
if (ret) if (ret)
goto err2; goto err2;
@@ -2232,7 +2340,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
goto out; goto out;
} }
ret = cma_acquire_dev(conn_id, listen_id); ret = cma_iw_acquire_dev(conn_id, listen_id);
if (ret) { if (ret) {
mutex_unlock(&conn_id->handler_mutex); mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id); rdma_destroy_id(new_cm_id);
@@ -2354,8 +2462,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
ret = rdma_listen(id, id_priv->backlog); ret = rdma_listen(id, id_priv->backlog);
if (ret) if (ret)
pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", dev_warn(&cma_dev->device->dev,
ret, cma_dev->device->name); "RDMA CMA: cma_listen_on_dev, error %d\n", ret);
} }
static void cma_listen_on_all(struct rdma_id_private *id_priv) static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2402,8 +2510,8 @@ static void cma_query_handler(int status, struct sa_path_rec *path_rec,
queue_work(cma_wq, &work->work); queue_work(cma_wq, &work->work);
} }
static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, static int cma_query_ib_route(struct rdma_id_private *id_priv,
struct cma_work *work) unsigned long timeout_ms, struct cma_work *work)
{ {
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
struct sa_path_rec path_rec; struct sa_path_rec path_rec;
@@ -2521,7 +2629,8 @@ static void cma_init_resolve_addr_work(struct cma_work *work,
work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
} }
static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) static int cma_resolve_ib_route(struct rdma_id_private *id_priv,
unsigned long timeout_ms)
{ {
struct rdma_route *route = &id_priv->id.route; struct rdma_route *route = &id_priv->id.route;
struct cma_work *work; struct cma_work *work;
@@ -2643,7 +2752,7 @@ err:
} }
EXPORT_SYMBOL(rdma_set_ib_path); EXPORT_SYMBOL(rdma_set_ib_path);
static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) static int cma_resolve_iw_route(struct rdma_id_private *id_priv)
{ {
struct cma_work *work; struct cma_work *work;
@@ -2744,7 +2853,7 @@ err1:
return ret; return ret;
} }
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
{ {
struct rdma_id_private *id_priv; struct rdma_id_private *id_priv;
int ret; int ret;
@@ -2759,7 +2868,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
else if (rdma_protocol_roce(id->device, id->port_num)) else if (rdma_protocol_roce(id->device, id->port_num))
ret = cma_resolve_iboe_route(id_priv); ret = cma_resolve_iboe_route(id_priv);
else if (rdma_protocol_iwarp(id->device, id->port_num)) else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv, timeout_ms); ret = cma_resolve_iw_route(id_priv);
else else
ret = -ENOSYS; ret = -ENOSYS;
@@ -2862,7 +2971,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
if (!status && !id_priv->cma_dev) { if (!status && !id_priv->cma_dev) {
status = cma_acquire_dev(id_priv, NULL); status = cma_acquire_dev_by_src_ip(id_priv);
if (status) if (status)
pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
status); status);
@@ -2882,13 +2991,11 @@ static void addr_handler(int status, struct sockaddr *src_addr,
if (id_priv->id.event_handler(&id_priv->id, &event)) { if (id_priv->id.event_handler(&id_priv->id, &event)) {
cma_exch(id_priv, RDMA_CM_DESTROYING); cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex); mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
rdma_destroy_id(&id_priv->id); rdma_destroy_id(&id_priv->id);
return; return;
} }
out: out:
mutex_unlock(&id_priv->handler_mutex); mutex_unlock(&id_priv->handler_mutex);
cma_deref_id(id_priv);
} }
static int cma_resolve_loopback(struct rdma_id_private *id_priv) static int cma_resolve_loopback(struct rdma_id_private *id_priv)
@@ -2966,7 +3073,7 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
} }
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
const struct sockaddr *dst_addr, int timeout_ms) const struct sockaddr *dst_addr, unsigned long timeout_ms)
{ {
struct rdma_id_private *id_priv; struct rdma_id_private *id_priv;
int ret; int ret;
@@ -2985,16 +3092,16 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
return -EINVAL; return -EINVAL;
memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
atomic_inc(&id_priv->refcount);
if (cma_any_addr(dst_addr)) { if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv); ret = cma_resolve_loopback(id_priv);
} else { } else {
if (dst_addr->sa_family == AF_IB) { if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv); ret = cma_resolve_ib_addr(id_priv);
} else { } else {
ret = rdma_resolve_ip(cma_src_addr(id_priv), ret = rdma_resolve_ip(cma_src_addr(id_priv), dst_addr,
dst_addr, &id->route.addr.dev_addr, &id->route.addr.dev_addr,
timeout_ms, addr_handler, id_priv); timeout_ms, addr_handler,
false, id_priv);
} }
} }
if (ret) if (ret)
@@ -3003,7 +3110,6 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
return 0; return 0;
err: err:
cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);
cma_deref_id(id_priv);
return ret; return ret;
} }
EXPORT_SYMBOL(rdma_resolve_addr); EXPORT_SYMBOL(rdma_resolve_addr);
@@ -3414,7 +3520,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret) if (ret)
goto err1; goto err1;
ret = cma_acquire_dev(id_priv, NULL); ret = cma_acquire_dev_by_src_ip(id_priv);
if (ret) if (ret)
goto err1; goto err1;
} }
@@ -3439,10 +3545,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
return 0; return 0;
err2: err2:
if (id_priv->cma_dev) { rdma_restrack_del(&id_priv->res);
rdma_restrack_del(&id_priv->res); if (id_priv->cma_dev)
cma_release_dev(id_priv); cma_release_dev(id_priv);
}
err1: err1:
cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
return ret; return ret;
@@ -3839,10 +3944,7 @@ int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
id_priv = container_of(id, struct rdma_id_private, id); id_priv = container_of(id, struct rdma_id_private, id);
if (caller) rdma_restrack_set_task(&id_priv->res, caller);
id_priv->res.kern_name = caller;
else
rdma_restrack_set_task(&id_priv->res, current);
if (!cma_comp(id_priv, RDMA_CM_CONNECT)) if (!cma_comp(id_priv, RDMA_CM_CONNECT))
return -EINVAL; return -EINVAL;
@@ -4087,9 +4189,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
(!ib_sa_sendonly_fullmem_support(&sa_client, (!ib_sa_sendonly_fullmem_support(&sa_client,
id_priv->id.device, id_priv->id.device,
id_priv->id.port_num))) { id_priv->id.port_num))) {
pr_warn("RDMA CM: %s port %u Unable to multicast join\n" dev_warn(
"RDMA CM: SM doesn't support Send Only Full Member option\n", &id_priv->id.device->dev,
id_priv->id.device->name, id_priv->id.port_num); "RDMA CM: port %u Unable to multicast join: SM doesn't support Send Only Full Member option\n",
id_priv->id.port_num);
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }

View File

@@ -65,7 +65,7 @@ static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
static bool filter_by_name(struct ib_device *ib_dev, void *cookie) static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
{ {
return !strcmp(ib_dev->name, cookie); return !strcmp(dev_name(&ib_dev->dev), cookie);
} }
static int cma_configfs_params_get(struct config_item *item, static int cma_configfs_params_get(struct config_item *item,

View File

@@ -44,7 +44,7 @@
#include "mad_priv.h" #include "mad_priv.h"
/* Total number of ports combined across all struct ib_devices's */ /* Total number of ports combined across all struct ib_devices's */
#define RDMA_MAX_PORTS 1024 #define RDMA_MAX_PORTS 8192
struct pkey_index_qp_list { struct pkey_index_qp_list {
struct list_head pkey_index_list; struct list_head pkey_index_list;
@@ -87,6 +87,7 @@ int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *, int (*port_callback)(struct ib_device *,
u8, struct kobject *)); u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device); void ib_device_unregister_sysfs(struct ib_device *device);
int ib_device_rename(struct ib_device *ibdev, const char *name);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie); struct net_device *idev, void *cookie);
@@ -338,7 +339,14 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid, const union ib_gid *dgid,
u8 *dmac, const struct net_device *ndev, u8 *dmac, const struct ib_gid_attr *sgid_attr,
int *hoplimit); int *hoplimit);
void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr,
const struct net_device *dev);
struct sa_path_rec;
int roce_resolve_route_from_path(struct sa_path_rec *rec,
const struct ib_gid_attr *attr);
struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
#endif /* _CORE_PRIV_H */ #endif /* _CORE_PRIV_H */

View File

@@ -112,12 +112,12 @@ static void ib_cq_poll_work(struct work_struct *work)
IB_POLL_BATCH); IB_POLL_BATCH);
if (completed >= IB_POLL_BUDGET_WORKQUEUE || if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
queue_work(ib_comp_wq, &cq->work); queue_work(cq->comp_wq, &cq->work);
} }
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{ {
queue_work(ib_comp_wq, &cq->work); queue_work(cq->comp_wq, &cq->work);
} }
/** /**
@@ -161,7 +161,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
goto out_destroy_cq; goto out_destroy_cq;
cq->res.type = RDMA_RESTRACK_CQ; cq->res.type = RDMA_RESTRACK_CQ;
cq->res.kern_name = caller; rdma_restrack_set_task(&cq->res, caller);
rdma_restrack_add(&cq->res); rdma_restrack_add(&cq->res);
switch (cq->poll_ctx) { switch (cq->poll_ctx) {
@@ -175,9 +175,12 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
break; break;
case IB_POLL_WORKQUEUE: case IB_POLL_WORKQUEUE:
case IB_POLL_UNBOUND_WORKQUEUE:
cq->comp_handler = ib_cq_completion_workqueue; cq->comp_handler = ib_cq_completion_workqueue;
INIT_WORK(&cq->work, ib_cq_poll_work); INIT_WORK(&cq->work, ib_cq_poll_work);
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
ib_comp_wq : ib_comp_unbound_wq;
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
@@ -213,6 +216,7 @@ void ib_free_cq(struct ib_cq *cq)
irq_poll_disable(&cq->iop); irq_poll_disable(&cq->iop);
break; break;
case IB_POLL_WORKQUEUE: case IB_POLL_WORKQUEUE:
case IB_POLL_UNBOUND_WORKQUEUE:
cancel_work_sync(&cq->work); cancel_work_sync(&cq->work);
break; break;
default: default:

View File

@@ -61,6 +61,7 @@ struct ib_client_data {
}; };
struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq; struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq); EXPORT_SYMBOL_GPL(ib_wq);
@@ -122,8 +123,9 @@ static int ib_device_check_mandatory(struct ib_device *device)
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) { for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
if (!*(void **) ((void *) device + mandatory_table[i].offset)) { if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
pr_warn("Device %s is missing mandatory function %s\n", dev_warn(&device->dev,
device->name, mandatory_table[i].name); "Device is missing mandatory function %s\n",
mandatory_table[i].name);
return -EINVAL; return -EINVAL;
} }
} }
@@ -163,16 +165,40 @@ static struct ib_device *__ib_device_get_by_name(const char *name)
struct ib_device *device; struct ib_device *device;
list_for_each_entry(device, &device_list, core_list) list_for_each_entry(device, &device_list, core_list)
if (!strncmp(name, device->name, IB_DEVICE_NAME_MAX)) if (!strcmp(name, dev_name(&device->dev)))
return device; return device;
return NULL; return NULL;
} }
static int alloc_name(char *name) int ib_device_rename(struct ib_device *ibdev, const char *name)
{
struct ib_device *device;
int ret = 0;
if (!strcmp(name, dev_name(&ibdev->dev)))
return ret;
mutex_lock(&device_mutex);
list_for_each_entry(device, &device_list, core_list) {
if (!strcmp(name, dev_name(&device->dev))) {
ret = -EEXIST;
goto out;
}
}
ret = device_rename(&ibdev->dev, name);
if (ret)
goto out;
strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
out:
mutex_unlock(&device_mutex);
return ret;
}
static int alloc_name(struct ib_device *ibdev, const char *name)
{ {
unsigned long *inuse; unsigned long *inuse;
char buf[IB_DEVICE_NAME_MAX];
struct ib_device *device; struct ib_device *device;
int i; int i;
@@ -181,24 +207,21 @@ static int alloc_name(char *name)
return -ENOMEM; return -ENOMEM;
list_for_each_entry(device, &device_list, core_list) { list_for_each_entry(device, &device_list, core_list) {
if (!sscanf(device->name, name, &i)) char buf[IB_DEVICE_NAME_MAX];
if (sscanf(dev_name(&device->dev), name, &i) != 1)
continue; continue;
if (i < 0 || i >= PAGE_SIZE * 8) if (i < 0 || i >= PAGE_SIZE * 8)
continue; continue;
snprintf(buf, sizeof buf, name, i); snprintf(buf, sizeof buf, name, i);
if (!strncmp(buf, device->name, IB_DEVICE_NAME_MAX)) if (!strcmp(buf, dev_name(&device->dev)))
set_bit(i, inuse); set_bit(i, inuse);
} }
i = find_first_zero_bit(inuse, PAGE_SIZE * 8); i = find_first_zero_bit(inuse, PAGE_SIZE * 8);
free_page((unsigned long) inuse); free_page((unsigned long) inuse);
snprintf(buf, sizeof buf, name, i);
if (__ib_device_get_by_name(buf)) return dev_set_name(&ibdev->dev, name, i);
return -ENFILE;
strlcpy(name, buf, IB_DEVICE_NAME_MAX);
return 0;
} }
static void ib_device_release(struct device *device) static void ib_device_release(struct device *device)
@@ -221,9 +244,7 @@ static void ib_device_release(struct device *device)
static int ib_device_uevent(struct device *device, static int ib_device_uevent(struct device *device,
struct kobj_uevent_env *env) struct kobj_uevent_env *env)
{ {
struct ib_device *dev = container_of(device, struct ib_device, dev); if (add_uevent_var(env, "NAME=%s", dev_name(device)))
if (add_uevent_var(env, "NAME=%s", dev->name))
return -ENOMEM; return -ENOMEM;
/* /*
@@ -269,7 +290,7 @@ struct ib_device *ib_alloc_device(size_t size)
INIT_LIST_HEAD(&device->event_handler_list); INIT_LIST_HEAD(&device->event_handler_list);
spin_lock_init(&device->event_handler_lock); spin_lock_init(&device->event_handler_lock);
spin_lock_init(&device->client_data_lock); rwlock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list); INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list); INIT_LIST_HEAD(&device->port_list);
@@ -285,6 +306,7 @@ EXPORT_SYMBOL(ib_alloc_device);
*/ */
void ib_dealloc_device(struct ib_device *device) void ib_dealloc_device(struct ib_device *device)
{ {
WARN_ON(!list_empty(&device->client_data_list));
WARN_ON(device->reg_state != IB_DEV_UNREGISTERED && WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
device->reg_state != IB_DEV_UNINITIALIZED); device->reg_state != IB_DEV_UNINITIALIZED);
rdma_restrack_clean(&device->res); rdma_restrack_clean(&device->res);
@@ -295,9 +317,8 @@ EXPORT_SYMBOL(ib_dealloc_device);
static int add_client_context(struct ib_device *device, struct ib_client *client) static int add_client_context(struct ib_device *device, struct ib_client *client)
{ {
struct ib_client_data *context; struct ib_client_data *context;
unsigned long flags;
context = kmalloc(sizeof *context, GFP_KERNEL); context = kmalloc(sizeof(*context), GFP_KERNEL);
if (!context) if (!context)
return -ENOMEM; return -ENOMEM;
@@ -306,9 +327,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
context->going_down = false; context->going_down = false;
down_write(&lists_rwsem); down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags); write_lock_irq(&device->client_data_lock);
list_add(&context->list, &device->client_data_list); list_add(&context->list, &device->client_data_list);
spin_unlock_irqrestore(&device->client_data_lock, flags); write_unlock_irq(&device->client_data_lock);
up_write(&lists_rwsem); up_write(&lists_rwsem);
return 0; return 0;
@@ -444,22 +465,8 @@ static u32 __dev_new_index(void)
} }
} }
/** static void setup_dma_device(struct ib_device *device)
* ib_register_device - Register an IB device with IB core
* @device:Device to register
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
int ib_register_device(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
{ {
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
struct device *parent = device->dev.parent; struct device *parent = device->dev.parent;
WARN_ON_ONCE(device->dma_device); WARN_ON_ONCE(device->dma_device);
@@ -491,56 +498,113 @@ int ib_register_device(struct ib_device *device,
WARN_ON_ONCE(!parent); WARN_ON_ONCE(!parent);
device->dma_device = parent; device->dma_device = parent;
} }
}
mutex_lock(&device_mutex); static void cleanup_device(struct ib_device *device)
{
ib_cache_cleanup_one(device);
ib_cache_release_one(device);
kfree(device->port_pkey_list);
kfree(device->port_immutable);
}
if (strchr(device->name, '%')) { static int setup_device(struct ib_device *device)
ret = alloc_name(device->name); {
if (ret) struct ib_udata uhw = {.outlen = 0, .inlen = 0};
goto out; int ret;
}
if (ib_device_check_mandatory(device)) { ret = ib_device_check_mandatory(device);
ret = -EINVAL; if (ret)
goto out; return ret;
}
ret = read_port_immutable(device); ret = read_port_immutable(device);
if (ret) { if (ret) {
pr_warn("Couldn't create per port immutable data %s\n", dev_warn(&device->dev,
device->name); "Couldn't create per port immutable data\n");
goto out; return ret;
}
ret = setup_port_pkey_list(device);
if (ret) {
pr_warn("Couldn't create per port_pkey_list\n");
goto out;
}
ret = ib_cache_setup_one(device);
if (ret) {
pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
goto port_cleanup;
}
ret = ib_device_register_rdmacg(device);
if (ret) {
pr_warn("Couldn't register device with rdma cgroup\n");
goto cache_cleanup;
} }
memset(&device->attrs, 0, sizeof(device->attrs)); memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw); ret = device->query_device(device, &device->attrs, &uhw);
if (ret) { if (ret) {
pr_warn("Couldn't query the device attributes\n"); dev_warn(&device->dev,
goto cg_cleanup; "Couldn't query the device attributes\n");
goto port_cleanup;
}
ret = setup_port_pkey_list(device);
if (ret) {
dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
goto port_cleanup;
}
ret = ib_cache_setup_one(device);
if (ret) {
dev_warn(&device->dev,
"Couldn't set up InfiniBand P_Key/GID cache\n");
goto pkey_cleanup;
}
return 0;
pkey_cleanup:
kfree(device->port_pkey_list);
port_cleanup:
kfree(device->port_immutable);
return ret;
}
/**
* ib_register_device - Register an IB device with IB core
* @device:Device to register
*
* Low-level drivers use ib_register_device() to register their
* devices with the IB core. All registered clients will receive a
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
int ib_register_device(struct ib_device *device, const char *name,
int (*port_callback)(struct ib_device *, u8,
struct kobject *))
{
int ret;
struct ib_client *client;
setup_dma_device(device);
mutex_lock(&device_mutex);
if (strchr(name, '%')) {
ret = alloc_name(device, name);
if (ret)
goto out;
} else {
ret = dev_set_name(&device->dev, name);
if (ret)
goto out;
}
if (__ib_device_get_by_name(dev_name(&device->dev))) {
ret = -ENFILE;
goto out;
}
strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
ret = setup_device(device);
if (ret)
goto out;
device->index = __dev_new_index();
ret = ib_device_register_rdmacg(device);
if (ret) {
dev_warn(&device->dev,
"Couldn't register device with rdma cgroup\n");
goto dev_cleanup;
} }
ret = ib_device_register_sysfs(device, port_callback); ret = ib_device_register_sysfs(device, port_callback);
if (ret) { if (ret) {
pr_warn("Couldn't register device %s with driver model\n", dev_warn(&device->dev,
device->name); "Couldn't register device with driver model\n");
goto cg_cleanup; goto cg_cleanup;
} }
@@ -550,7 +614,6 @@ int ib_register_device(struct ib_device *device,
if (!add_client_context(device, client) && client->add) if (!add_client_context(device, client) && client->add)
client->add(device); client->add(device);
device->index = __dev_new_index();
down_write(&lists_rwsem); down_write(&lists_rwsem);
list_add_tail(&device->core_list, &device_list); list_add_tail(&device->core_list, &device_list);
up_write(&lists_rwsem); up_write(&lists_rwsem);
@@ -559,11 +622,8 @@ int ib_register_device(struct ib_device *device,
cg_cleanup: cg_cleanup:
ib_device_unregister_rdmacg(device); ib_device_unregister_rdmacg(device);
cache_cleanup: dev_cleanup:
ib_cache_cleanup_one(device); cleanup_device(device);
ib_cache_release_one(device);
port_cleanup:
kfree(device->port_immutable);
out: out:
mutex_unlock(&device_mutex); mutex_unlock(&device_mutex);
return ret; return ret;
@@ -585,21 +645,20 @@ void ib_unregister_device(struct ib_device *device)
down_write(&lists_rwsem); down_write(&lists_rwsem);
list_del(&device->core_list); list_del(&device->core_list);
spin_lock_irqsave(&device->client_data_lock, flags); write_lock_irq(&device->client_data_lock);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list) list_for_each_entry(context, &device->client_data_list, list)
context->going_down = true; context->going_down = true;
spin_unlock_irqrestore(&device->client_data_lock, flags); write_unlock_irq(&device->client_data_lock);
downgrade_write(&lists_rwsem); downgrade_write(&lists_rwsem);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list_for_each_entry(context, &device->client_data_list, list) {
list) {
if (context->client->remove) if (context->client->remove)
context->client->remove(device, context->data); context->client->remove(device, context->data);
} }
up_read(&lists_rwsem); up_read(&lists_rwsem);
ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device); ib_device_unregister_sysfs(device);
ib_device_unregister_rdmacg(device);
mutex_unlock(&device_mutex); mutex_unlock(&device_mutex);
@@ -609,10 +668,13 @@ void ib_unregister_device(struct ib_device *device)
kfree(device->port_pkey_list); kfree(device->port_pkey_list);
down_write(&lists_rwsem); down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags); write_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list) list_for_each_entry_safe(context, tmp, &device->client_data_list,
list) {
list_del(&context->list);
kfree(context); kfree(context);
spin_unlock_irqrestore(&device->client_data_lock, flags); }
write_unlock_irqrestore(&device->client_data_lock, flags);
up_write(&lists_rwsem); up_write(&lists_rwsem);
device->reg_state = IB_DEV_UNREGISTERED; device->reg_state = IB_DEV_UNREGISTERED;
@@ -662,9 +724,8 @@ EXPORT_SYMBOL(ib_register_client);
*/ */
void ib_unregister_client(struct ib_client *client) void ib_unregister_client(struct ib_client *client)
{ {
struct ib_client_data *context, *tmp; struct ib_client_data *context;
struct ib_device *device; struct ib_device *device;
unsigned long flags;
mutex_lock(&device_mutex); mutex_lock(&device_mutex);
@@ -676,14 +737,14 @@ void ib_unregister_client(struct ib_client *client)
struct ib_client_data *found_context = NULL; struct ib_client_data *found_context = NULL;
down_write(&lists_rwsem); down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags); write_lock_irq(&device->client_data_lock);
list_for_each_entry_safe(context, tmp, &device->client_data_list, list) list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) { if (context->client == client) {
context->going_down = true; context->going_down = true;
found_context = context; found_context = context;
break; break;
} }
spin_unlock_irqrestore(&device->client_data_lock, flags); write_unlock_irq(&device->client_data_lock);
up_write(&lists_rwsem); up_write(&lists_rwsem);
if (client->remove) if (client->remove)
@@ -691,17 +752,18 @@ void ib_unregister_client(struct ib_client *client)
found_context->data : NULL); found_context->data : NULL);
if (!found_context) { if (!found_context) {
pr_warn("No client context found for %s/%s\n", dev_warn(&device->dev,
device->name, client->name); "No client context found for %s\n",
client->name);
continue; continue;
} }
down_write(&lists_rwsem); down_write(&lists_rwsem);
spin_lock_irqsave(&device->client_data_lock, flags); write_lock_irq(&device->client_data_lock);
list_del(&found_context->list); list_del(&found_context->list);
kfree(found_context); write_unlock_irq(&device->client_data_lock);
spin_unlock_irqrestore(&device->client_data_lock, flags);
up_write(&lists_rwsem); up_write(&lists_rwsem);
kfree(found_context);
} }
mutex_unlock(&device_mutex); mutex_unlock(&device_mutex);
@@ -722,13 +784,13 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
void *ret = NULL; void *ret = NULL;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags); read_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list) list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) { if (context->client == client) {
ret = context->data; ret = context->data;
break; break;
} }
spin_unlock_irqrestore(&device->client_data_lock, flags); read_unlock_irqrestore(&device->client_data_lock, flags);
return ret; return ret;
} }
@@ -749,18 +811,18 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
struct ib_client_data *context; struct ib_client_data *context;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&device->client_data_lock, flags); write_lock_irqsave(&device->client_data_lock, flags);
list_for_each_entry(context, &device->client_data_list, list) list_for_each_entry(context, &device->client_data_list, list)
if (context->client == client) { if (context->client == client) {
context->data = data; context->data = data;
goto out; goto out;
} }
pr_warn("No client context found for %s/%s\n", dev_warn(&device->dev, "No client context found for %s\n",
device->name, client->name); client->name);
out: out:
spin_unlock_irqrestore(&device->client_data_lock, flags); write_unlock_irqrestore(&device->client_data_lock, flags);
} }
EXPORT_SYMBOL(ib_set_client_data); EXPORT_SYMBOL(ib_set_client_data);
@@ -1166,10 +1228,19 @@ static int __init ib_core_init(void)
goto err; goto err;
} }
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
if (!ib_comp_unbound_wq) {
ret = -ENOMEM;
goto err_comp;
}
ret = class_register(&ib_class); ret = class_register(&ib_class);
if (ret) { if (ret) {
pr_warn("Couldn't create InfiniBand device class\n"); pr_warn("Couldn't create InfiniBand device class\n");
goto err_comp; goto err_comp_unbound;
} }
ret = rdma_nl_init(); ret = rdma_nl_init();
@@ -1218,6 +1289,8 @@ err_ibnl:
rdma_nl_exit(); rdma_nl_exit();
err_sysfs: err_sysfs:
class_unregister(&ib_class); class_unregister(&ib_class);
err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
err_comp: err_comp:
destroy_workqueue(ib_comp_wq); destroy_workqueue(ib_comp_wq);
err: err:
@@ -1236,6 +1309,7 @@ static void __exit ib_core_cleanup(void)
addr_cleanup(); addr_cleanup();
rdma_nl_exit(); rdma_nl_exit();
class_unregister(&ib_class); class_unregister(&ib_class);
destroy_workqueue(ib_comp_unbound_wq);
destroy_workqueue(ib_comp_wq); destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */ /* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq); destroy_workqueue(ib_wq);

View File

@@ -213,7 +213,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
device = pd->device; device = pd->device;
if (!device->alloc_fmr || !device->dealloc_fmr || if (!device->alloc_fmr || !device->dealloc_fmr ||
!device->map_phys_fmr || !device->unmap_fmr) { !device->map_phys_fmr || !device->unmap_fmr) {
pr_info(PFX "Device %s does not support FMRs\n", device->name); dev_info(&device->dev, "Device does not support FMRs\n");
return ERR_PTR(-ENOSYS); return ERR_PTR(-ENOSYS);
} }
@@ -257,7 +257,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
atomic_set(&pool->flush_ser, 0); atomic_set(&pool->flush_ser, 0);
init_waitqueue_head(&pool->force_wait); init_waitqueue_head(&pool->force_wait);
pool->worker = kthread_create_worker(0, "ib_fmr(%s)", device->name); pool->worker =
kthread_create_worker(0, "ib_fmr(%s)", dev_name(&device->dev));
if (IS_ERR(pool->worker)) { if (IS_ERR(pool->worker)) {
pr_warn(PFX "couldn't start cleanup kthread worker\n"); pr_warn(PFX "couldn't start cleanup kthread worker\n");
ret = PTR_ERR(pool->worker); ret = PTR_ERR(pool->worker);

View File

@@ -509,7 +509,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
cm_id->m_local_addr = cm_id->local_addr; cm_id->m_local_addr = cm_id->local_addr;
cm_id->m_remote_addr = cm_id->remote_addr; cm_id->m_remote_addr = cm_id->remote_addr;
memcpy(pm_reg_msg.dev_name, cm_id->device->name, memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev),
sizeof(pm_reg_msg.dev_name)); sizeof(pm_reg_msg.dev_name));
memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
sizeof(pm_reg_msg.if_name)); sizeof(pm_reg_msg.if_name));

View File

@@ -220,33 +220,37 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
int ret2, qpn; int ret2, qpn;
u8 mgmt_class, vclass; u8 mgmt_class, vclass;
if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
(qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
return ERR_PTR(-EPROTONOSUPPORT);
/* Validate parameters */ /* Validate parameters */
qpn = get_spl_qp_index(qp_type); qpn = get_spl_qp_index(qp_type);
if (qpn == -1) { if (qpn == -1) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
"ib_register_mad_agent: invalid QP Type %d\n", __func__, qp_type);
qp_type);
goto error1; goto error1;
} }
if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: invalid RMPP Version %u\n", "%s: invalid RMPP Version %u\n",
rmpp_version); __func__, rmpp_version);
goto error1; goto error1;
} }
/* Validate MAD registration request if supplied */ /* Validate MAD registration request if supplied */
if (mad_reg_req) { if (mad_reg_req) {
if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: invalid Class Version %u\n", "%s: invalid Class Version %u\n",
mad_reg_req->mgmt_class_version); __func__,
mad_reg_req->mgmt_class_version);
goto error1; goto error1;
} }
if (!recv_handler) { if (!recv_handler) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: no recv_handler\n"); "%s: no recv_handler\n", __func__);
goto error1; goto error1;
} }
if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
@@ -256,9 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
*/ */
if (mad_reg_req->mgmt_class != if (mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", "%s: Invalid Mgmt Class 0x%x\n",
mad_reg_req->mgmt_class); __func__, mad_reg_req->mgmt_class);
goto error1; goto error1;
} }
} else if (mad_reg_req->mgmt_class == 0) { } else if (mad_reg_req->mgmt_class == 0) {
@@ -266,8 +270,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
* Class 0 is reserved in IBA and is used for * Class 0 is reserved in IBA and is used for
* aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
*/ */
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: Invalid Mgmt Class 0\n"); "%s: Invalid Mgmt Class 0\n",
__func__);
goto error1; goto error1;
} else if (is_vendor_class(mad_reg_req->mgmt_class)) { } else if (is_vendor_class(mad_reg_req->mgmt_class)) {
/* /*
@@ -275,18 +280,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
* ensure supplied OUI is not zero * ensure supplied OUI is not zero
*/ */
if (!is_vendor_oui(mad_reg_req->oui)) { if (!is_vendor_oui(mad_reg_req->oui)) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: No OUI specified for class 0x%x\n", "%s: No OUI specified for class 0x%x\n",
mad_reg_req->mgmt_class); __func__,
mad_reg_req->mgmt_class);
goto error1; goto error1;
} }
} }
/* Make sure class supplied is consistent with RMPP */ /* Make sure class supplied is consistent with RMPP */
if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
if (rmpp_version) { if (rmpp_version) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", "%s: RMPP version for non-RMPP class 0x%x\n",
mad_reg_req->mgmt_class); __func__, mad_reg_req->mgmt_class);
goto error1; goto error1;
} }
} }
@@ -297,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
IB_MGMT_CLASS_SUBN_LID_ROUTED) && IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
(mad_reg_req->mgmt_class != (mad_reg_req->mgmt_class !=
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", "%s: Invalid SM QP type: class 0x%x\n",
mad_reg_req->mgmt_class); __func__, mad_reg_req->mgmt_class);
goto error1; goto error1;
} }
} else { } else {
@@ -307,9 +313,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
IB_MGMT_CLASS_SUBN_LID_ROUTED) || IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
(mad_reg_req->mgmt_class == (mad_reg_req->mgmt_class ==
IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev,
"ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", "%s: Invalid GS QP type: class 0x%x\n",
mad_reg_req->mgmt_class); __func__, mad_reg_req->mgmt_class);
goto error1; goto error1;
} }
} }
@@ -324,18 +330,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */ /* Validate device and port */
port_priv = ib_get_mad_port(device, port_num); port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) { if (!port_priv) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
"ib_register_mad_agent: Invalid port %d\n", __func__, port_num);
port_num);
ret = ERR_PTR(-ENODEV); ret = ERR_PTR(-ENODEV);
goto error1; goto error1;
} }
/* Verify the QP requested is supported. For example, Ethernet devices /* Verify the QP requested is supported. For example, Ethernet devices
* will not have QP0 */ * will not have QP0.
*/
if (!port_priv->qp_info[qpn].qp) { if (!port_priv->qp_info[qpn].qp) {
dev_notice(&device->dev, dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
"ib_register_mad_agent: QP %d not supported\n", qpn); __func__, qpn);
ret = ERR_PTR(-EPROTONOSUPPORT); ret = ERR_PTR(-EPROTONOSUPPORT);
goto error1; goto error1;
} }
@@ -2408,7 +2414,7 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
} }
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
int timeout_ms) unsigned long timeout_ms)
{ {
mad_send_wr->timeout = msecs_to_jiffies(timeout_ms); mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
wait_for_response(mad_send_wr); wait_for_response(mad_send_wr);
@@ -3183,7 +3189,7 @@ static int ib_mad_port_open(struct ib_device *device,
cq_size *= 2; cq_size *= 2;
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0, port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
IB_POLL_WORKQUEUE); IB_POLL_UNBOUND_WORKQUEUE);
if (IS_ERR(port_priv->cq)) { if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq); ret = PTR_ERR(port_priv->cq);

View File

@@ -221,6 +221,6 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr); void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr);
void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
int timeout_ms); unsigned long timeout_ms);
#endif /* __IB_MAD_PRIV_H__ */ #endif /* __IB_MAD_PRIV_H__ */

View File

@@ -47,9 +47,9 @@ static struct {
const struct rdma_nl_cbs *cb_table; const struct rdma_nl_cbs *cb_table;
} rdma_nl_types[RDMA_NL_NUM_CLIENTS]; } rdma_nl_types[RDMA_NL_NUM_CLIENTS];
int rdma_nl_chk_listeners(unsigned int group) bool rdma_nl_chk_listeners(unsigned int group)
{ {
return (netlink_has_listeners(nls, group)) ? 0 : -1; return netlink_has_listeners(nls, group);
} }
EXPORT_SYMBOL(rdma_nl_chk_listeners); EXPORT_SYMBOL(rdma_nl_chk_listeners);

View File

@@ -179,7 +179,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{ {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
return -EMSGSIZE; return -EMSGSIZE;
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, device->name)) if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
dev_name(&device->dev)))
return -EMSGSIZE; return -EMSGSIZE;
return 0; return 0;
@@ -645,6 +646,36 @@ err:
return err; return err;
} }
static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct ib_device *device;
u32 index;
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
extack);
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
if (!device)
return -EINVAL;
if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
char name[IB_DEVICE_NAME_MAX] = {};
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
err = ib_device_rename(device, name);
}
put_device(&device->dev);
return err;
}
static int _nldev_get_dumpit(struct ib_device *device, static int _nldev_get_dumpit(struct ib_device *device,
struct sk_buff *skb, struct sk_buff *skb,
struct netlink_callback *cb, struct netlink_callback *cb,
@@ -1077,6 +1108,10 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_get_doit, .doit = nldev_get_doit,
.dump = nldev_get_dumpit, .dump = nldev_get_dumpit,
}, },
[RDMA_NLDEV_CMD_SET] = {
.doit = nldev_set_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_PORT_GET] = { [RDMA_NLDEV_CMD_PORT_GET] = {
.doit = nldev_port_get_doit, .doit = nldev_port_get_doit,
.dump = nldev_port_get_dumpit, .dump = nldev_port_get_dumpit,

View File

@@ -794,44 +794,6 @@ void uverbs_close_fd(struct file *f)
uverbs_uobject_put(uobj); uverbs_uobject_put(uobj);
} }
static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
struct ib_device *ib_dev = ibcontext->device;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
if (!owning_process)
return;
owning_mm = get_task_mm(owning_process);
if (!owning_mm) {
pr_info("no mm, disassociate ucontext is pending task termination\n");
while (1) {
put_task_struct(owning_process);
usleep_range(1000, 2000);
owning_process = get_pid_task(ibcontext->tgid,
PIDTYPE_PID);
if (!owning_process ||
owning_process->state == TASK_DEAD) {
pr_info("disassociate ucontext done, task was terminated\n");
/* in case task was dead need to release the
* task struct.
*/
if (owning_process)
put_task_struct(owning_process);
return;
}
}
}
down_write(&owning_mm->mmap_sem);
ib_dev->disassociate_ucontext(ibcontext);
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
/* /*
* Drop the ucontext off the ufile and completely disconnect it from the * Drop the ucontext off the ufile and completely disconnect it from the
* ib_device * ib_device
@@ -840,20 +802,28 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason) enum rdma_remove_reason reason)
{ {
struct ib_ucontext *ucontext = ufile->ucontext; struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *ib_dev = ucontext->device;
int ret; int ret;
if (reason == RDMA_REMOVE_DRIVER_REMOVE) /*
ufile_disassociate_ucontext(ucontext); * If we are closing the FD then the user mmap VMAs must have
* already been destroyed as they hold on to the filep, otherwise
* they need to be zap'd.
*/
if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
uverbs_user_mmap_disassociate(ufile);
if (ib_dev->disassociate_ucontext)
ib_dev->disassociate_ucontext(ucontext);
}
put_pid(ucontext->tgid); ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
RDMACG_RESOURCE_HCA_HANDLE); RDMACG_RESOURCE_HCA_HANDLE);
/* /*
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
* the error return. * the error return.
*/ */
ret = ucontext->device->dealloc_ucontext(ucontext); ret = ib_dev->dealloc_ucontext(ucontext);
WARN_ON(ret); WARN_ON(ret);
ufile->ucontext = NULL; ufile->ucontext = NULL;

View File

@@ -160,5 +160,6 @@ void uverbs_disassociate_api(struct uverbs_api *uapi);
void uverbs_destroy_api(struct uverbs_api *uapi); void uverbs_destroy_api(struct uverbs_api *uapi);
void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm, void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs); unsigned int num_attrs);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
#endif /* RDMA_CORE_H */ #endif /* RDMA_CORE_H */

View File

@@ -50,8 +50,7 @@ void rdma_restrack_clean(struct rdma_restrack_root *res)
dev = container_of(res, struct ib_device, res); dev = container_of(res, struct ib_device, res);
pr_err("restrack: %s", CUT_HERE); pr_err("restrack: %s", CUT_HERE);
pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n", dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
dev->name);
hash_for_each(res->hash, bkt, e, node) { hash_for_each(res->hash, bkt, e, node) {
if (rdma_is_kernel_res(e)) { if (rdma_is_kernel_res(e)) {
owner = e->kern_name; owner = e->kern_name;
@@ -156,6 +155,21 @@ static bool res_is_user(struct rdma_restrack_entry *res)
} }
} }
void rdma_restrack_set_task(struct rdma_restrack_entry *res,
const char *caller)
{
if (caller) {
res->kern_name = caller;
return;
}
if (res->task)
put_task_struct(res->task);
get_task_struct(current);
res->task = current;
}
EXPORT_SYMBOL(rdma_restrack_set_task);
void rdma_restrack_add(struct rdma_restrack_entry *res) void rdma_restrack_add(struct rdma_restrack_entry *res)
{ {
struct ib_device *dev = res_to_dev(res); struct ib_device *dev = res_to_dev(res);
@@ -168,7 +182,7 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
if (res_is_user(res)) { if (res_is_user(res)) {
if (!res->task) if (!res->task)
rdma_restrack_set_task(res, current); rdma_restrack_set_task(res, NULL);
res->kern_name = NULL; res->kern_name = NULL;
} else { } else {
set_kern_name(res); set_kern_name(res);
@@ -209,7 +223,7 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
struct ib_device *dev; struct ib_device *dev;
if (!res->valid) if (!res->valid)
return; goto out;
dev = res_to_dev(res); dev = res_to_dev(res);
if (!dev) if (!dev)
@@ -222,8 +236,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
down_write(&dev->res.rwsem); down_write(&dev->res.rwsem);
hash_del(&res->node); hash_del(&res->node);
res->valid = false; res->valid = false;
if (res->task)
put_task_struct(res->task);
up_write(&dev->res.rwsem); up_write(&dev->res.rwsem);
out:
if (res->task) {
put_task_struct(res->task);
res->task = NULL;
}
} }
EXPORT_SYMBOL(rdma_restrack_del); EXPORT_SYMBOL(rdma_restrack_del);

View File

@@ -49,16 +49,14 @@ static inline void ib_sa_client_put(struct ib_sa_client *client)
} }
int ib_sa_mcmember_rec_query(struct ib_sa_client *client, int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num, struct ib_device *device, u8 port_num, u8 method,
u8 method,
struct ib_sa_mcmember_rec *rec, struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask, unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status, void (*callback)(int status,
struct ib_sa_mcmember_rec *resp, struct ib_sa_mcmember_rec *resp,
void *context), void *context),
void *context, void *context, struct ib_sa_query **sa_query);
struct ib_sa_query **sa_query);
int mcast_init(void); int mcast_init(void);
void mcast_cleanup(void); void mcast_cleanup(void);

View File

@@ -761,7 +761,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb,
/* Construct the family header first */ /* Construct the family header first */
header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); header = skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
memcpy(header->device_name, query->port->agent->device->name, memcpy(header->device_name, dev_name(&query->port->agent->device->dev),
LS_DEVICE_NAME_MAX); LS_DEVICE_NAME_MAX);
header->port_num = query->port->port_num; header->port_num = query->port->port_num;
@@ -835,7 +835,6 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
struct sk_buff *skb = NULL; struct sk_buff *skb = NULL;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
void *data; void *data;
int ret = 0;
struct ib_sa_mad *mad; struct ib_sa_mad *mad;
int len; int len;
@@ -862,13 +861,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
/* Repair the nlmsg header length */ /* Repair the nlmsg header length */
nlmsg_end(skb, nlh); nlmsg_end(skb, nlh);
ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask); return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
if (!ret)
ret = len;
else
ret = 0;
return ret;
} }
static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
@@ -891,14 +884,12 @@ static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
spin_unlock_irqrestore(&ib_nl_request_lock, flags); spin_unlock_irqrestore(&ib_nl_request_lock, flags);
ret = ib_nl_send_msg(query, gfp_mask); ret = ib_nl_send_msg(query, gfp_mask);
if (ret <= 0) { if (ret) {
ret = -EIO; ret = -EIO;
/* Remove the request */ /* Remove the request */
spin_lock_irqsave(&ib_nl_request_lock, flags); spin_lock_irqsave(&ib_nl_request_lock, flags);
list_del(&query->list); list_del(&query->list);
spin_unlock_irqrestore(&ib_nl_request_lock, flags); spin_unlock_irqrestore(&ib_nl_request_lock, flags);
} else {
ret = 0;
} }
return ret; return ret;
@@ -1227,46 +1218,6 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask; return src_path_mask;
} }
static int roce_resolve_route_from_path(struct sa_path_rec *rec,
const struct ib_gid_attr *attr)
{
struct rdma_dev_addr dev_addr = {};
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
struct sockaddr_in6 _sockaddr_in6;
} sgid_addr, dgid_addr;
int ret;
if (rec->roce.route_resolved)
return 0;
if (!attr || !attr->ndev)
return -EINVAL;
dev_addr.bound_dev_if = attr->ndev->ifindex;
/* TODO: Use net from the ib_gid_attr once it is added to it,
* until than, limit itself to init_net.
*/
dev_addr.net = &init_net;
rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
/* validate the route */
ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
&dgid_addr._sockaddr, &dev_addr);
if (ret)
return ret;
if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
dev_addr.network == RDMA_NETWORK_IPV6) &&
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
rec->roce.route_resolved = true;
return 0;
}
static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num, static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec, struct sa_path_rec *rec,
struct rdma_ah_attr *ah_attr, struct rdma_ah_attr *ah_attr,
@@ -1409,7 +1360,8 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
spin_unlock_irqrestore(&tid_lock, flags); spin_unlock_irqrestore(&tid_lock, flags);
} }
static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
gfp_t gfp_mask)
{ {
bool preload = gfpflags_allow_blocking(gfp_mask); bool preload = gfpflags_allow_blocking(gfp_mask);
unsigned long flags; unsigned long flags;
@@ -1433,7 +1385,7 @@ static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) && if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) &&
(!(query->flags & IB_SA_QUERY_OPA))) { (!(query->flags & IB_SA_QUERY_OPA))) {
if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) { if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) {
if (!ib_nl_make_request(query, gfp_mask)) if (!ib_nl_make_request(query, gfp_mask))
return id; return id;
} }
@@ -1599,7 +1551,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
struct ib_device *device, u8 port_num, struct ib_device *device, u8 port_num,
struct sa_path_rec *rec, struct sa_path_rec *rec,
ib_sa_comp_mask comp_mask, ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask, unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status, void (*callback)(int status,
struct sa_path_rec *resp, struct sa_path_rec *resp,
void *context), void *context),
@@ -1753,7 +1705,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num, u8 method, struct ib_device *device, u8 port_num, u8 method,
struct ib_sa_service_rec *rec, struct ib_sa_service_rec *rec,
ib_sa_comp_mask comp_mask, ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask, unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status, void (*callback)(int status,
struct ib_sa_service_rec *resp, struct ib_sa_service_rec *resp,
void *context), void *context),
@@ -1850,7 +1802,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
u8 method, u8 method,
struct ib_sa_mcmember_rec *rec, struct ib_sa_mcmember_rec *rec,
ib_sa_comp_mask comp_mask, ib_sa_comp_mask comp_mask,
int timeout_ms, gfp_t gfp_mask, unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status, void (*callback)(int status,
struct ib_sa_mcmember_rec *resp, struct ib_sa_mcmember_rec *resp,
void *context), void *context),
@@ -1941,7 +1893,7 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
struct ib_device *device, u8 port_num, struct ib_device *device, u8 port_num,
struct ib_sa_guidinfo_rec *rec, struct ib_sa_guidinfo_rec *rec,
ib_sa_comp_mask comp_mask, u8 method, ib_sa_comp_mask comp_mask, u8 method,
int timeout_ms, gfp_t gfp_mask, unsigned long timeout_ms, gfp_t gfp_mask,
void (*callback)(int status, void (*callback)(int status,
struct ib_sa_guidinfo_rec *resp, struct ib_sa_guidinfo_rec *resp,
void *context), void *context),
@@ -2108,7 +2060,7 @@ static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query)
} }
static int ib_sa_classport_info_rec_query(struct ib_sa_port *port, static int ib_sa_classport_info_rec_query(struct ib_sa_port *port,
int timeout_ms, unsigned long timeout_ms,
void (*callback)(void *context), void (*callback)(void *context),
void *context, void *context,
struct ib_sa_query **sa_query) struct ib_sa_query **sa_query)

View File

@@ -685,9 +685,8 @@ static int ib_mad_agent_security_change(struct notifier_block *nb,
if (event != LSM_POLICY_CHANGE) if (event != LSM_POLICY_CHANGE)
return NOTIFY_DONE; return NOTIFY_DONE;
ag->smp_allowed = !security_ib_endport_manage_subnet(ag->security, ag->smp_allowed = !security_ib_endport_manage_subnet(
ag->device->name, ag->security, dev_name(&ag->device->dev), ag->port_num);
ag->port_num);
return NOTIFY_OK; return NOTIFY_OK;
} }
@@ -708,7 +707,7 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
return 0; return 0;
ret = security_ib_endport_manage_subnet(agent->security, ret = security_ib_endport_manage_subnet(agent->security,
agent->device->name, dev_name(&agent->device->dev),
agent->port_num); agent->port_num);
if (ret) if (ret)
return ret; return ret;

View File

@@ -512,7 +512,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data, ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
40 + offset / 8, sizeof(data)); 40 + offset / 8, sizeof(data));
if (ret < 0) if (ret < 0)
return sprintf(buf, "N/A (no PMA)\n"); return ret;
switch (width) { switch (width) {
case 4: case 4:
@@ -1036,7 +1036,7 @@ static int add_port(struct ib_device *device, int port_num,
p->port_num = port_num; p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type, ret = kobject_init_and_add(&p->kobj, &port_type,
device->ports_parent, device->ports_kobj,
"%d", port_num); "%d", port_num);
if (ret) { if (ret) {
kfree(p); kfree(p);
@@ -1057,10 +1057,12 @@ static int add_port(struct ib_device *device, int port_num,
goto err_put; goto err_put;
} }
p->pma_table = get_counter_table(device, port_num); if (device->process_mad) {
ret = sysfs_create_group(&p->kobj, p->pma_table); p->pma_table = get_counter_table(device, port_num);
if (ret) ret = sysfs_create_group(&p->kobj, p->pma_table);
goto err_put_gid_attrs; if (ret)
goto err_put_gid_attrs;
}
p->gid_group.name = "gids"; p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -1118,9 +1120,9 @@ static int add_port(struct ib_device *device, int port_num,
} }
/* /*
* If port == 0, it means we have only one port and the parent * If port == 0, it means hw_counters are per device and not per
* device, not this port device, should be the holder of the * port, so holder should be device. Therefore skip per port conunter
* hw_counters * initialization.
*/ */
if (device->alloc_hw_stats && port_num) if (device->alloc_hw_stats && port_num)
setup_hw_stats(device, p, port_num); setup_hw_stats(device, p, port_num);
@@ -1173,7 +1175,8 @@ err_free_gid:
p->gid_group.attrs = NULL; p->gid_group.attrs = NULL;
err_remove_pma: err_remove_pma:
sysfs_remove_group(&p->kobj, p->pma_table); if (p->pma_table)
sysfs_remove_group(&p->kobj, p->pma_table);
err_put_gid_attrs: err_put_gid_attrs:
kobject_put(&p->gid_attr_group->kobj); kobject_put(&p->gid_attr_group->kobj);
@@ -1183,7 +1186,7 @@ err_put:
return ret; return ret;
} }
static ssize_t show_node_type(struct device *device, static ssize_t node_type_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1198,8 +1201,9 @@ static ssize_t show_node_type(struct device *device,
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
} }
} }
static DEVICE_ATTR_RO(node_type);
static ssize_t show_sys_image_guid(struct device *device, static ssize_t sys_image_guid_show(struct device *device,
struct device_attribute *dev_attr, char *buf) struct device_attribute *dev_attr, char *buf)
{ {
struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1210,8 +1214,9 @@ static ssize_t show_sys_image_guid(struct device *device,
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]), be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3])); be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
} }
static DEVICE_ATTR_RO(sys_image_guid);
static ssize_t show_node_guid(struct device *device, static ssize_t node_guid_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1222,8 +1227,9 @@ static ssize_t show_node_guid(struct device *device,
be16_to_cpu(((__be16 *) &dev->node_guid)[2]), be16_to_cpu(((__be16 *) &dev->node_guid)[2]),
be16_to_cpu(((__be16 *) &dev->node_guid)[3])); be16_to_cpu(((__be16 *) &dev->node_guid)[3]));
} }
static DEVICE_ATTR_RO(node_guid);
static ssize_t show_node_desc(struct device *device, static ssize_t node_desc_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1231,9 +1237,9 @@ static ssize_t show_node_desc(struct device *device,
return sprintf(buf, "%.64s\n", dev->node_desc); return sprintf(buf, "%.64s\n", dev->node_desc);
} }
static ssize_t set_node_desc(struct device *device, static ssize_t node_desc_store(struct device *device,
struct device_attribute *attr, struct device_attribute *attr,
const char *buf, size_t count) const char *buf, size_t count)
{ {
struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device *dev = container_of(device, struct ib_device, dev);
struct ib_device_modify desc = {}; struct ib_device_modify desc = {};
@@ -1249,8 +1255,9 @@ static ssize_t set_node_desc(struct device *device,
return count; return count;
} }
static DEVICE_ATTR_RW(node_desc);
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct ib_device *dev = container_of(device, struct ib_device, dev); struct ib_device *dev = container_of(device, struct ib_device, dev);
@@ -1259,19 +1266,19 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX); strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
return strlen(buf); return strlen(buf);
} }
static DEVICE_ATTR_RO(fw_ver);
static DEVICE_ATTR(node_type, S_IRUGO, show_node_type, NULL); static struct attribute *ib_dev_attrs[] = {
static DEVICE_ATTR(sys_image_guid, S_IRUGO, show_sys_image_guid, NULL); &dev_attr_node_type.attr,
static DEVICE_ATTR(node_guid, S_IRUGO, show_node_guid, NULL); &dev_attr_node_guid.attr,
static DEVICE_ATTR(node_desc, S_IRUGO | S_IWUSR, show_node_desc, set_node_desc); &dev_attr_sys_image_guid.attr,
static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); &dev_attr_fw_ver.attr,
&dev_attr_node_desc.attr,
NULL,
};
static struct device_attribute *ib_class_attributes[] = { static const struct attribute_group dev_attr_group = {
&dev_attr_node_type, .attrs = ib_dev_attrs,
&dev_attr_sys_image_guid,
&dev_attr_node_guid,
&dev_attr_node_desc,
&dev_attr_fw_ver,
}; };
static void free_port_list_attributes(struct ib_device *device) static void free_port_list_attributes(struct ib_device *device)
@@ -1285,7 +1292,9 @@ static void free_port_list_attributes(struct ib_device *device)
kfree(port->hw_stats); kfree(port->hw_stats);
free_hsag(&port->kobj, port->hw_stats_ag); free_hsag(&port->kobj, port->hw_stats_ag);
} }
sysfs_remove_group(p, port->pma_table);
if (port->pma_table)
sysfs_remove_group(p, port->pma_table);
sysfs_remove_group(p, &port->pkey_group); sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group); sysfs_remove_group(p, &port->gid_group);
sysfs_remove_group(&port->gid_attr_group->kobj, sysfs_remove_group(&port->gid_attr_group->kobj,
@@ -1296,7 +1305,7 @@ static void free_port_list_attributes(struct ib_device *device)
kobject_put(p); kobject_put(p);
} }
kobject_put(device->ports_parent); kobject_put(device->ports_kobj);
} }
int ib_device_register_sysfs(struct ib_device *device, int ib_device_register_sysfs(struct ib_device *device,
@@ -1307,23 +1316,15 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret; int ret;
int i; int i;
ret = dev_set_name(class_dev, "%s", device->name); device->groups[0] = &dev_attr_group;
if (ret) class_dev->groups = device->groups;
return ret;
ret = device_add(class_dev); ret = device_add(class_dev);
if (ret) if (ret)
goto err; goto err;
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) { device->ports_kobj = kobject_create_and_add("ports", &class_dev->kobj);
ret = device_create_file(class_dev, ib_class_attributes[i]); if (!device->ports_kobj) {
if (ret)
goto err_unregister;
}
device->ports_parent = kobject_create_and_add("ports",
&class_dev->kobj);
if (!device->ports_parent) {
ret = -ENOMEM; ret = -ENOMEM;
goto err_put; goto err_put;
} }
@@ -1347,20 +1348,15 @@ int ib_device_register_sysfs(struct ib_device *device,
err_put: err_put:
free_port_list_attributes(device); free_port_list_attributes(device);
err_unregister:
device_del(class_dev); device_del(class_dev);
err: err:
return ret; return ret;
} }
void ib_device_unregister_sysfs(struct ib_device *device) void ib_device_unregister_sysfs(struct ib_device *device)
{ {
int i; /* Hold device until ib_dealloc_device() */
get_device(&device->dev);
/* Hold kobject until ib_dealloc_device() */
kobject_get(&device->dev.kobj);
free_port_list_attributes(device); free_port_list_attributes(device);
@@ -1369,8 +1365,5 @@ void ib_device_unregister_sysfs(struct ib_device *device)
free_hsag(&device->dev.kobj, device->hw_stats_ag); free_hsag(&device->dev.kobj, device->hw_stats_ag);
} }
for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i)
device_remove_file(&device->dev, ib_class_attributes[i]);
device_unregister(&device->dev); device_unregister(&device->dev);
} }

View File

@@ -85,7 +85,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct page **page_list; struct page **page_list;
struct vm_area_struct **vma_list; struct vm_area_struct **vma_list;
unsigned long lock_limit; unsigned long lock_limit;
unsigned long new_pinned;
unsigned long cur_base; unsigned long cur_base;
struct mm_struct *mm;
unsigned long npages; unsigned long npages;
int ret; int ret;
int i; int i;
@@ -107,25 +109,32 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (!can_do_mlock()) if (!can_do_mlock())
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
umem = kzalloc(sizeof *umem, GFP_KERNEL); if (access & IB_ACCESS_ON_DEMAND) {
if (!umem) umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
return ERR_PTR(-ENOMEM); if (!umem)
return ERR_PTR(-ENOMEM);
umem->is_odp = 1;
} else {
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
}
umem->context = context; umem->context = context;
umem->length = size; umem->length = size;
umem->address = addr; umem->address = addr;
umem->page_shift = PAGE_SHIFT; umem->page_shift = PAGE_SHIFT;
umem->writable = ib_access_writable(access); umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
if (access & IB_ACCESS_ON_DEMAND) { if (access & IB_ACCESS_ON_DEMAND) {
ret = ib_umem_odp_get(context, umem, access); ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
if (ret) if (ret)
goto umem_kfree; goto umem_kfree;
return umem; return umem;
} }
umem->odp_data = NULL;
/* We assume the memory is from hugetlb until proved otherwise */ /* We assume the memory is from hugetlb until proved otherwise */
umem->hugetlb = 1; umem->hugetlb = 1;
@@ -144,25 +153,25 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->hugetlb = 0; umem->hugetlb = 0;
npages = ib_umem_num_pages(umem); npages = ib_umem_num_pages(umem);
if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
goto out;
}
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem); down_write(&mm->mmap_sem);
current->mm->pinned_vm += npages; if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) { (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
up_write(&current->mm->mmap_sem); up_write(&mm->mmap_sem);
ret = -ENOMEM; ret = -ENOMEM;
goto vma; goto out;
} }
up_write(&current->mm->mmap_sem); mm->pinned_vm = new_pinned;
up_write(&mm->mmap_sem);
cur_base = addr & PAGE_MASK; cur_base = addr & PAGE_MASK;
if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
goto vma;
}
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret) if (ret)
goto vma; goto vma;
@@ -172,14 +181,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
sg_list_start = umem->sg_head.sgl; sg_list_start = umem->sg_head.sgl;
down_read(&current->mm->mmap_sem);
while (npages) { while (npages) {
down_read(&mm->mmap_sem);
ret = get_user_pages_longterm(cur_base, ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages, min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)), PAGE_SIZE / sizeof (struct page *)),
gup_flags, page_list, vma_list); gup_flags, page_list, vma_list);
if (ret < 0) { if (ret < 0) {
up_read(&current->mm->mmap_sem); up_read(&mm->mmap_sem);
goto umem_release; goto umem_release;
} }
@@ -187,17 +196,20 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
cur_base += ret * PAGE_SIZE; cur_base += ret * PAGE_SIZE;
npages -= ret; npages -= ret;
/* Continue to hold the mmap_sem as vma_list access
* needs to be protected.
*/
for_each_sg(sg_list_start, sg, ret, i) { for_each_sg(sg_list_start, sg, ret, i) {
if (vma_list && !is_vm_hugetlb_page(vma_list[i])) if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
umem->hugetlb = 0; umem->hugetlb = 0;
sg_set_page(sg, page_list[i], PAGE_SIZE, 0); sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
} }
up_read(&mm->mmap_sem);
/* preparing for next loop */ /* preparing for next loop */
sg_list_start = sg; sg_list_start = sg;
} }
up_read(&current->mm->mmap_sem);
umem->nmap = ib_dma_map_sg_attrs(context->device, umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl, umem->sg_head.sgl,
@@ -216,29 +228,40 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem_release: umem_release:
__ib_umem_release(context->device, umem, 0); __ib_umem_release(context->device, umem, 0);
vma: vma:
down_write(&current->mm->mmap_sem); down_write(&mm->mmap_sem);
current->mm->pinned_vm -= ib_umem_num_pages(umem); mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&current->mm->mmap_sem); up_write(&mm->mmap_sem);
out: out:
if (vma_list) if (vma_list)
free_page((unsigned long) vma_list); free_page((unsigned long) vma_list);
free_page((unsigned long) page_list); free_page((unsigned long) page_list);
umem_kfree: umem_kfree:
if (ret) if (ret) {
mmdrop(umem->owning_mm);
kfree(umem); kfree(umem);
}
return ret ? ERR_PTR(ret) : umem; return ret ? ERR_PTR(ret) : umem;
} }
EXPORT_SYMBOL(ib_umem_get); EXPORT_SYMBOL(ib_umem_get);
static void ib_umem_account(struct work_struct *work) static void __ib_umem_release_tail(struct ib_umem *umem)
{
mmdrop(umem->owning_mm);
if (umem->is_odp)
kfree(to_ib_umem_odp(umem));
else
kfree(umem);
}
static void ib_umem_release_defer(struct work_struct *work)
{ {
struct ib_umem *umem = container_of(work, struct ib_umem, work); struct ib_umem *umem = container_of(work, struct ib_umem, work);
down_write(&umem->mm->mmap_sem); down_write(&umem->owning_mm->mmap_sem);
umem->mm->pinned_vm -= umem->diff; umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&umem->mm->mmap_sem); up_write(&umem->owning_mm->mmap_sem);
mmput(umem->mm);
kfree(umem); __ib_umem_release_tail(umem);
} }
/** /**
@@ -248,52 +271,36 @@ static void ib_umem_account(struct work_struct *work)
void ib_umem_release(struct ib_umem *umem) void ib_umem_release(struct ib_umem *umem)
{ {
struct ib_ucontext *context = umem->context; struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
struct task_struct *task;
unsigned long diff;
if (umem->odp_data) { if (umem->is_odp) {
ib_umem_odp_release(umem); ib_umem_odp_release(to_ib_umem_odp(umem));
__ib_umem_release_tail(umem);
return; return;
} }
__ib_umem_release(umem->context->device, umem, 1); __ib_umem_release(umem->context->device, umem, 1);
task = get_pid_task(umem->context->tgid, PIDTYPE_PID);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
diff = ib_umem_num_pages(umem);
/* /*
* We may be called with the mm's mmap_sem already held. This * We may be called with the mm's mmap_sem already held. This
* can happen when a userspace munmap() is the call that drops * can happen when a userspace munmap() is the call that drops
* the last reference to our file and calls our release * the last reference to our file and calls our release
* method. If there are memory regions to destroy, we'll end * method. If there are memory regions to destroy, we'll end
* up here and not be able to take the mmap_sem. In that case * up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting to the system workqueue. * we defer the vm_locked accounting a workqueue.
*/ */
if (context->closing) { if (context->closing) {
if (!down_write_trylock(&mm->mmap_sem)) { if (!down_write_trylock(&umem->owning_mm->mmap_sem)) {
INIT_WORK(&umem->work, ib_umem_account); INIT_WORK(&umem->work, ib_umem_release_defer);
umem->mm = mm;
umem->diff = diff;
queue_work(ib_wq, &umem->work); queue_work(ib_wq, &umem->work);
return; return;
} }
} else } else {
down_write(&mm->mmap_sem); down_write(&umem->owning_mm->mmap_sem);
}
umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&umem->owning_mm->mmap_sem);
mm->pinned_vm -= diff; __ib_umem_release_tail(umem);
up_write(&mm->mmap_sem);
mmput(mm);
out:
kfree(umem);
} }
EXPORT_SYMBOL(ib_umem_release); EXPORT_SYMBOL(ib_umem_release);
@@ -303,7 +310,7 @@ int ib_umem_page_count(struct ib_umem *umem)
int n; int n;
struct scatterlist *sg; struct scatterlist *sg;
if (umem->odp_data) if (umem->is_odp)
return ib_umem_num_pages(umem); return ib_umem_num_pages(umem);
n = 0; n = 0;

View File

@@ -58,7 +58,7 @@ static u64 node_start(struct umem_odp_node *n)
struct ib_umem_odp *umem_odp = struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree); container_of(n, struct ib_umem_odp, interval_tree);
return ib_umem_start(umem_odp->umem); return ib_umem_start(&umem_odp->umem);
} }
/* Note that the representation of the intervals in the interval tree /* Note that the representation of the intervals in the interval tree
@@ -71,140 +71,86 @@ static u64 node_last(struct umem_odp_node *n)
struct ib_umem_odp *umem_odp = struct ib_umem_odp *umem_odp =
container_of(n, struct ib_umem_odp, interval_tree); container_of(n, struct ib_umem_odp, interval_tree);
return ib_umem_end(umem_odp->umem) - 1; return ib_umem_end(&umem_odp->umem) - 1;
} }
INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
node_start, node_last, static, rbt_ib_umem) node_start, node_last, static, rbt_ib_umem)
static void ib_umem_notifier_start_account(struct ib_umem *item) static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
{ {
mutex_lock(&item->odp_data->umem_mutex); mutex_lock(&umem_odp->umem_mutex);
if (umem_odp->notifiers_count++ == 0)
/* Only update private counters for this umem if it has them.
* Otherwise skip it. All page faults will be delayed for this umem. */
if (item->odp_data->mn_counters_active) {
int notifiers_count = item->odp_data->notifiers_count++;
if (notifiers_count == 0)
/* Initialize the completion object for waiting on
* notifiers. Since notifier_count is zero, no one
* should be waiting right now. */
reinit_completion(&item->odp_data->notifier_completion);
}
mutex_unlock(&item->odp_data->umem_mutex);
}
static void ib_umem_notifier_end_account(struct ib_umem *item)
{
mutex_lock(&item->odp_data->umem_mutex);
/* Only update private counters for this umem if it has them.
* Otherwise skip it. All page faults will be delayed for this umem. */
if (item->odp_data->mn_counters_active) {
/* /*
* This sequence increase will notify the QP page fault that * Initialize the completion object for waiting on
* the page that is going to be mapped in the spte could have * notifiers. Since notifier_count is zero, no one should be
* been freed. * waiting right now.
*/ */
++item->odp_data->notifiers_seq; reinit_completion(&umem_odp->notifier_completion);
if (--item->odp_data->notifiers_count == 0) mutex_unlock(&umem_odp->umem_mutex);
complete_all(&item->odp_data->notifier_completion);
}
mutex_unlock(&item->odp_data->umem_mutex);
} }
/* Account for a new mmu notifier in an ib_ucontext. */ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
{ {
atomic_inc(&context->notifier_count); mutex_lock(&umem_odp->umem_mutex);
/*
* This sequence increase will notify the QP page fault that the page
* that is going to be mapped in the spte could have been freed.
*/
++umem_odp->notifiers_seq;
if (--umem_odp->notifiers_count == 0)
complete_all(&umem_odp->notifier_completion);
mutex_unlock(&umem_odp->umem_mutex);
} }
/* Account for a terminating mmu notifier in an ib_ucontext. static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
* u64 start, u64 end, void *cookie)
* Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
* the function takes the semaphore itself. */
static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
{ {
int zero_notifiers = atomic_dec_and_test(&context->notifier_count); struct ib_umem *umem = &umem_odp->umem;
if (zero_notifiers &&
!list_empty(&context->no_private_counters)) {
/* No currently running mmu notifiers. Now is the chance to
* add private accounting to all previously added umems. */
struct ib_umem_odp *odp_data, *next;
/* Prevent concurrent mmu notifiers from working on the
* no_private_counters list. */
down_write(&context->umem_rwsem);
/* Read the notifier_count again, with the umem_rwsem
* semaphore taken for write. */
if (!atomic_read(&context->notifier_count)) {
list_for_each_entry_safe(odp_data, next,
&context->no_private_counters,
no_private_counters) {
mutex_lock(&odp_data->umem_mutex);
odp_data->mn_counters_active = true;
list_del(&odp_data->no_private_counters);
complete_all(&odp_data->notifier_completion);
mutex_unlock(&odp_data->umem_mutex);
}
}
up_write(&context->umem_rwsem);
}
}
static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
u64 end, void *cookie) {
/* /*
* Increase the number of notifiers running, to * Increase the number of notifiers running, to
* prevent any further fault handling on this MR. * prevent any further fault handling on this MR.
*/ */
ib_umem_notifier_start_account(item); ib_umem_notifier_start_account(umem_odp);
item->odp_data->dying = 1; umem_odp->dying = 1;
/* Make sure that the fact the umem is dying is out before we release /* Make sure that the fact the umem is dying is out before we release
* all pending page faults. */ * all pending page faults. */
smp_wmb(); smp_wmb();
complete_all(&item->odp_data->notifier_completion); complete_all(&umem_odp->notifier_completion);
item->context->invalidate_range(item, ib_umem_start(item), umem->context->invalidate_range(umem_odp, ib_umem_start(umem),
ib_umem_end(item)); ib_umem_end(umem));
return 0; return 0;
} }
static void ib_umem_notifier_release(struct mmu_notifier *mn, static void ib_umem_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm) struct mm_struct *mm)
{ {
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); struct ib_ucontext_per_mm *per_mm =
container_of(mn, struct ib_ucontext_per_mm, mn);
if (!context->invalidate_range) down_read(&per_mm->umem_rwsem);
return; if (per_mm->active)
rbt_ib_umem_for_each_in_range(
ib_ucontext_notifier_start_account(context); &per_mm->umem_tree, 0, ULLONG_MAX,
down_read(&context->umem_rwsem); ib_umem_notifier_release_trampoline, true, NULL);
rbt_ib_umem_for_each_in_range(&context->umem_tree, 0, up_read(&per_mm->umem_rwsem);
ULLONG_MAX,
ib_umem_notifier_release_trampoline,
true,
NULL);
up_read(&context->umem_rwsem);
} }
static int invalidate_page_trampoline(struct ib_umem *item, u64 start, static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
u64 end, void *cookie) u64 end, void *cookie)
{ {
ib_umem_notifier_start_account(item); ib_umem_notifier_start_account(item);
item->context->invalidate_range(item, start, start + PAGE_SIZE); item->umem.context->invalidate_range(item, start, start + PAGE_SIZE);
ib_umem_notifier_end_account(item); ib_umem_notifier_end_account(item);
return 0; return 0;
} }
static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start, static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
u64 end, void *cookie) u64 start, u64 end, void *cookie)
{ {
ib_umem_notifier_start_account(item); ib_umem_notifier_start_account(item);
item->context->invalidate_range(item, start, end); item->umem.context->invalidate_range(item, start, end);
return 0; return 0;
} }
@@ -214,28 +160,30 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
unsigned long end, unsigned long end,
bool blockable) bool blockable)
{ {
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); struct ib_ucontext_per_mm *per_mm =
int ret; container_of(mn, struct ib_ucontext_per_mm, mn);
if (!context->invalidate_range)
return 0;
if (blockable) if (blockable)
down_read(&context->umem_rwsem); down_read(&per_mm->umem_rwsem);
else if (!down_read_trylock(&context->umem_rwsem)) else if (!down_read_trylock(&per_mm->umem_rwsem))
return -EAGAIN; return -EAGAIN;
ib_ucontext_notifier_start_account(context); if (!per_mm->active) {
ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start, up_read(&per_mm->umem_rwsem);
end, /*
invalidate_range_start_trampoline, * At this point active is permanently set and visible to this
blockable, NULL); * CPU without a lock, that fact is relied on to skip the unlock
up_read(&context->umem_rwsem); * in range_end.
*/
return 0;
}
return ret; return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end,
invalidate_range_start_trampoline,
blockable, NULL);
} }
static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start, static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
u64 end, void *cookie) u64 end, void *cookie)
{ {
ib_umem_notifier_end_account(item); ib_umem_notifier_end_account(item);
@@ -247,22 +195,16 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
unsigned long start, unsigned long start,
unsigned long end) unsigned long end)
{ {
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn); struct ib_ucontext_per_mm *per_mm =
container_of(mn, struct ib_ucontext_per_mm, mn);
if (!context->invalidate_range) if (unlikely(!per_mm->active))
return; return;
/* rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
* TODO: we currently bail out if there is any sleepable work to be done
* in ib_umem_notifier_invalidate_range_start so we shouldn't really block
* here. But this is ugly and fragile.
*/
down_read(&context->umem_rwsem);
rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
end, end,
invalidate_range_end_trampoline, true, NULL); invalidate_range_end_trampoline, true, NULL);
up_read(&context->umem_rwsem); up_read(&per_mm->umem_rwsem);
ib_ucontext_notifier_end_account(context);
} }
static const struct mmu_notifier_ops ib_umem_notifiers = { static const struct mmu_notifier_ops ib_umem_notifiers = {
@@ -271,31 +213,158 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end, .invalidate_range_end = ib_umem_notifier_invalidate_range_end,
}; };
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context, static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
unsigned long addr,
size_t size)
{ {
struct ib_umem *umem; struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
struct ib_umem *umem = &umem_odp->umem;
down_write(&per_mm->umem_rwsem);
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
rbt_ib_umem_insert(&umem_odp->interval_tree,
&per_mm->umem_tree);
up_write(&per_mm->umem_rwsem);
}
static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
struct ib_umem *umem = &umem_odp->umem;
down_write(&per_mm->umem_rwsem);
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
rbt_ib_umem_remove(&umem_odp->interval_tree,
&per_mm->umem_tree);
complete_all(&umem_odp->notifier_completion);
up_write(&per_mm->umem_rwsem);
}
static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
struct mm_struct *mm)
{
struct ib_ucontext_per_mm *per_mm;
int ret;
per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
if (!per_mm)
return ERR_PTR(-ENOMEM);
per_mm->context = ctx;
per_mm->mm = mm;
per_mm->umem_tree = RB_ROOT_CACHED;
init_rwsem(&per_mm->umem_rwsem);
per_mm->active = ctx->invalidate_range;
rcu_read_lock();
per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
WARN_ON(mm != current->mm);
per_mm->mn.ops = &ib_umem_notifiers;
ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
if (ret) {
dev_err(&ctx->device->dev,
"Failed to register mmu_notifier %d\n", ret);
goto out_pid;
}
list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
return per_mm;
out_pid:
put_pid(per_mm->tgid);
kfree(per_mm);
return ERR_PTR(ret);
}
static int get_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext *ctx = umem_odp->umem.context;
struct ib_ucontext_per_mm *per_mm;
/*
* Generally speaking we expect only one or two per_mm in this list,
* so no reason to optimize this search today.
*/
mutex_lock(&ctx->per_mm_list_lock);
list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
if (per_mm->mm == umem_odp->umem.owning_mm)
goto found;
}
per_mm = alloc_per_mm(ctx, umem_odp->umem.owning_mm);
if (IS_ERR(per_mm)) {
mutex_unlock(&ctx->per_mm_list_lock);
return PTR_ERR(per_mm);
}
found:
umem_odp->per_mm = per_mm;
per_mm->odp_mrs_count++;
mutex_unlock(&ctx->per_mm_list_lock);
return 0;
}
static void free_per_mm(struct rcu_head *rcu)
{
kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
}
void put_per_mm(struct ib_umem_odp *umem_odp)
{
struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
struct ib_ucontext *ctx = umem_odp->umem.context;
bool need_free;
mutex_lock(&ctx->per_mm_list_lock);
umem_odp->per_mm = NULL;
per_mm->odp_mrs_count--;
need_free = per_mm->odp_mrs_count == 0;
if (need_free)
list_del(&per_mm->ucontext_list);
mutex_unlock(&ctx->per_mm_list_lock);
if (!need_free)
return;
/*
* NOTE! mmu_notifier_unregister() can happen between a start/end
* callback, resulting in an start/end, and thus an unbalanced
* lock. This doesn't really matter to us since we are about to kfree
* the memory that holds the lock, however LOCKDEP doesn't like this.
*/
down_write(&per_mm->umem_rwsem);
per_mm->active = false;
up_write(&per_mm->umem_rwsem);
WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
put_pid(per_mm->tgid);
mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
}
struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
unsigned long addr, size_t size)
{
struct ib_ucontext *ctx = per_mm->context;
struct ib_umem_odp *odp_data; struct ib_umem_odp *odp_data;
struct ib_umem *umem;
int pages = size >> PAGE_SHIFT; int pages = size >> PAGE_SHIFT;
int ret; int ret;
umem = kzalloc(sizeof(*umem), GFP_KERNEL); odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
if (!umem) if (!odp_data)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
umem = &odp_data->umem;
umem->context = context; umem->context = ctx;
umem->length = size; umem->length = size;
umem->address = addr; umem->address = addr;
umem->page_shift = PAGE_SHIFT; umem->page_shift = PAGE_SHIFT;
umem->writable = 1; umem->writable = 1;
umem->is_odp = 1;
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL); odp_data->per_mm = per_mm;
if (!odp_data) {
ret = -ENOMEM;
goto out_umem;
}
odp_data->umem = umem;
mutex_init(&odp_data->umem_mutex); mutex_init(&odp_data->umem_mutex);
init_completion(&odp_data->notifier_completion); init_completion(&odp_data->notifier_completion);
@@ -314,39 +383,34 @@ struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
goto out_page_list; goto out_page_list;
} }
down_write(&context->umem_rwsem); /*
context->odp_mrs_count++; * Caller must ensure that the umem_odp that the per_mm came from
rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree); * cannot be freed during the call to ib_alloc_odp_umem.
if (likely(!atomic_read(&context->notifier_count))) */
odp_data->mn_counters_active = true; mutex_lock(&ctx->per_mm_list_lock);
else per_mm->odp_mrs_count++;
list_add(&odp_data->no_private_counters, mutex_unlock(&ctx->per_mm_list_lock);
&context->no_private_counters); add_umem_to_per_mm(odp_data);
up_write(&context->umem_rwsem);
umem->odp_data = odp_data; return odp_data;
return umem;
out_page_list: out_page_list:
vfree(odp_data->page_list); vfree(odp_data->page_list);
out_odp_data: out_odp_data:
kfree(odp_data); kfree(odp_data);
out_umem:
kfree(umem);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
EXPORT_SYMBOL(ib_alloc_odp_umem); EXPORT_SYMBOL(ib_alloc_odp_umem);
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem, int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
int access)
{ {
struct ib_umem *umem = &umem_odp->umem;
/*
* NOTE: This must called in a process context where umem->owning_mm
* == current->mm
*/
struct mm_struct *mm = umem->owning_mm;
int ret_val; int ret_val;
struct pid *our_pid;
struct mm_struct *mm = get_task_mm(current);
if (!mm)
return -EINVAL;
if (access & IB_ACCESS_HUGETLB) { if (access & IB_ACCESS_HUGETLB) {
struct vm_area_struct *vma; struct vm_area_struct *vma;
@@ -366,111 +430,43 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem,
umem->hugetlb = 0; umem->hugetlb = 0;
} }
/* Prevent creating ODP MRs in child processes */ mutex_init(&umem_odp->umem_mutex);
rcu_read_lock();
our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
put_pid(our_pid);
if (context->tgid != our_pid) {
ret_val = -EINVAL;
goto out_mm;
}
umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL); init_completion(&umem_odp->notifier_completion);
if (!umem->odp_data) {
ret_val = -ENOMEM;
goto out_mm;
}
umem->odp_data->umem = umem;
mutex_init(&umem->odp_data->umem_mutex);
init_completion(&umem->odp_data->notifier_completion);
if (ib_umem_num_pages(umem)) { if (ib_umem_num_pages(umem)) {
umem->odp_data->page_list = umem_odp->page_list =
vzalloc(array_size(sizeof(*umem->odp_data->page_list), vzalloc(array_size(sizeof(*umem_odp->page_list),
ib_umem_num_pages(umem))); ib_umem_num_pages(umem)));
if (!umem->odp_data->page_list) { if (!umem_odp->page_list)
ret_val = -ENOMEM; return -ENOMEM;
goto out_odp_data;
}
umem->odp_data->dma_list = umem_odp->dma_list =
vzalloc(array_size(sizeof(*umem->odp_data->dma_list), vzalloc(array_size(sizeof(*umem_odp->dma_list),
ib_umem_num_pages(umem))); ib_umem_num_pages(umem)));
if (!umem->odp_data->dma_list) { if (!umem_odp->dma_list) {
ret_val = -ENOMEM; ret_val = -ENOMEM;
goto out_page_list; goto out_page_list;
} }
} }
/* ret_val = get_per_mm(umem_odp);
* When using MMU notifiers, we will get a if (ret_val)
* notification before the "current" task (and MM) is goto out_dma_list;
* destroyed. We use the umem_rwsem semaphore to synchronize. add_umem_to_per_mm(umem_odp);
*/
down_write(&context->umem_rwsem);
context->odp_mrs_count++;
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
rbt_ib_umem_insert(&umem->odp_data->interval_tree,
&context->umem_tree);
if (likely(!atomic_read(&context->notifier_count)) ||
context->odp_mrs_count == 1)
umem->odp_data->mn_counters_active = true;
else
list_add(&umem->odp_data->no_private_counters,
&context->no_private_counters);
downgrade_write(&context->umem_rwsem);
if (context->odp_mrs_count == 1) {
/*
* Note that at this point, no MMU notifier is running
* for this context!
*/
atomic_set(&context->notifier_count, 0);
INIT_HLIST_NODE(&context->mn.hlist);
context->mn.ops = &ib_umem_notifiers;
/*
* Lock-dep detects a false positive for mmap_sem vs.
* umem_rwsem, due to not grasping downgrade_write correctly.
*/
lockdep_off();
ret_val = mmu_notifier_register(&context->mn, mm);
lockdep_on();
if (ret_val) {
pr_err("Failed to register mmu_notifier %d\n", ret_val);
ret_val = -EBUSY;
goto out_mutex;
}
}
up_read(&context->umem_rwsem);
/*
* Note that doing an mmput can cause a notifier for the relevant mm.
* If the notifier is called while we hold the umem_rwsem, this will
* cause a deadlock. Therefore, we release the reference only after we
* released the semaphore.
*/
mmput(mm);
return 0; return 0;
out_mutex: out_dma_list:
up_read(&context->umem_rwsem); vfree(umem_odp->dma_list);
vfree(umem->odp_data->dma_list);
out_page_list: out_page_list:
vfree(umem->odp_data->page_list); vfree(umem_odp->page_list);
out_odp_data:
kfree(umem->odp_data);
out_mm:
mmput(mm);
return ret_val; return ret_val;
} }
void ib_umem_odp_release(struct ib_umem *umem) void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
{ {
struct ib_ucontext *context = umem->context; struct ib_umem *umem = &umem_odp->umem;
/* /*
* Ensure that no more pages are mapped in the umem. * Ensure that no more pages are mapped in the umem.
@@ -478,61 +474,13 @@ void ib_umem_odp_release(struct ib_umem *umem)
* It is the driver's responsibility to ensure, before calling us, * It is the driver's responsibility to ensure, before calling us,
* that the hardware will not attempt to access the MR any more. * that the hardware will not attempt to access the MR any more.
*/ */
ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem), ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem),
ib_umem_end(umem)); ib_umem_end(umem));
down_write(&context->umem_rwsem); remove_umem_from_per_mm(umem_odp);
if (likely(ib_umem_start(umem) != ib_umem_end(umem))) put_per_mm(umem_odp);
rbt_ib_umem_remove(&umem->odp_data->interval_tree, vfree(umem_odp->dma_list);
&context->umem_tree); vfree(umem_odp->page_list);
context->odp_mrs_count--;
if (!umem->odp_data->mn_counters_active) {
list_del(&umem->odp_data->no_private_counters);
complete_all(&umem->odp_data->notifier_completion);
}
/*
* Downgrade the lock to a read lock. This ensures that the notifiers
* (who lock the mutex for reading) will be able to finish, and we
* will be able to enventually obtain the mmu notifiers SRCU. Note
* that since we are doing it atomically, no other user could register
* and unregister while we do the check.
*/
downgrade_write(&context->umem_rwsem);
if (!context->odp_mrs_count) {
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
owning_process = get_pid_task(context->tgid,
PIDTYPE_PID);
if (owning_process == NULL)
/*
* The process is already dead, notifier were removed
* already.
*/
goto out;
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL)
/*
* The process' mm is already dead, notifier were
* removed already.
*/
goto out_put_task;
mmu_notifier_unregister(&context->mn, owning_mm);
mmput(owning_mm);
out_put_task:
put_task_struct(owning_process);
}
out:
up_read(&context->umem_rwsem);
vfree(umem->odp_data->dma_list);
vfree(umem->odp_data->page_list);
kfree(umem->odp_data);
kfree(umem);
} }
/* /*
@@ -544,7 +492,7 @@ out:
* @access_mask: access permissions needed for this page. * @access_mask: access permissions needed for this page.
* @current_seq: sequence number for synchronization with invalidations. * @current_seq: sequence number for synchronization with invalidations.
* the sequence number is taken from * the sequence number is taken from
* umem->odp_data->notifiers_seq. * umem_odp->notifiers_seq.
* *
* The function returns -EFAULT if the DMA mapping operation fails. It returns * The function returns -EFAULT if the DMA mapping operation fails. It returns
* -EAGAIN if a concurrent invalidation prevents us from updating the page. * -EAGAIN if a concurrent invalidation prevents us from updating the page.
@@ -554,12 +502,13 @@ out:
* umem. * umem.
*/ */
static int ib_umem_odp_map_dma_single_page( static int ib_umem_odp_map_dma_single_page(
struct ib_umem *umem, struct ib_umem_odp *umem_odp,
int page_index, int page_index,
struct page *page, struct page *page,
u64 access_mask, u64 access_mask,
unsigned long current_seq) unsigned long current_seq)
{ {
struct ib_umem *umem = &umem_odp->umem;
struct ib_device *dev = umem->context->device; struct ib_device *dev = umem->context->device;
dma_addr_t dma_addr; dma_addr_t dma_addr;
int stored_page = 0; int stored_page = 0;
@@ -571,11 +520,11 @@ static int ib_umem_odp_map_dma_single_page(
* handle case of a racing notifier. This check also allows us to bail * handle case of a racing notifier. This check also allows us to bail
* early if we have a notifier running in parallel with us. * early if we have a notifier running in parallel with us.
*/ */
if (ib_umem_mmu_notifier_retry(umem, current_seq)) { if (ib_umem_mmu_notifier_retry(umem_odp, current_seq)) {
ret = -EAGAIN; ret = -EAGAIN;
goto out; goto out;
} }
if (!(umem->odp_data->dma_list[page_index])) { if (!(umem_odp->dma_list[page_index])) {
dma_addr = ib_dma_map_page(dev, dma_addr = ib_dma_map_page(dev,
page, page,
0, BIT(umem->page_shift), 0, BIT(umem->page_shift),
@@ -584,15 +533,15 @@ static int ib_umem_odp_map_dma_single_page(
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
umem->odp_data->dma_list[page_index] = dma_addr | access_mask; umem_odp->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page; umem_odp->page_list[page_index] = page;
umem->npages++; umem->npages++;
stored_page = 1; stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) { } else if (umem_odp->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask; umem_odp->dma_list[page_index] |= access_mask;
} else { } else {
pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
umem->odp_data->page_list[page_index], page); umem_odp->page_list[page_index], page);
/* Better remove the mapping now, to prevent any further /* Better remove the mapping now, to prevent any further
* damage. */ * damage. */
remove_existing_mapping = 1; remove_existing_mapping = 1;
@@ -605,7 +554,7 @@ out:
if (remove_existing_mapping && umem->context->invalidate_range) { if (remove_existing_mapping && umem->context->invalidate_range) {
invalidate_page_trampoline( invalidate_page_trampoline(
umem, umem_odp,
ib_umem_start(umem) + (page_index >> umem->page_shift), ib_umem_start(umem) + (page_index >> umem->page_shift),
ib_umem_start(umem) + ((page_index + 1) >> ib_umem_start(umem) + ((page_index + 1) >>
umem->page_shift), umem->page_shift),
@@ -621,7 +570,7 @@ out:
* *
* Pins the range of pages passed in the argument, and maps them to * Pins the range of pages passed in the argument, and maps them to
* DMA addresses. The DMA addresses of the mapped pages is updated in * DMA addresses. The DMA addresses of the mapped pages is updated in
* umem->odp_data->dma_list. * umem_odp->dma_list.
* *
* Returns the number of pages mapped in success, negative error code * Returns the number of pages mapped in success, negative error code
* for failure. * for failure.
@@ -629,7 +578,7 @@ out:
* the function from completing its task. * the function from completing its task.
* An -ENOENT error code indicates that userspace process is being terminated * An -ENOENT error code indicates that userspace process is being terminated
* and mm was already destroyed. * and mm was already destroyed.
* @umem: the umem to map and pin * @umem_odp: the umem to map and pin
* @user_virt: the address from which we need to map. * @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be * @bcnt: the minimal number of bytes to pin and map. The mapping might be
* bigger due to alignment, and may also be smaller in case of an error * bigger due to alignment, and may also be smaller in case of an error
@@ -639,13 +588,15 @@ out:
* range. * range.
* @current_seq: the MMU notifiers sequance value for synchronization with * @current_seq: the MMU notifiers sequance value for synchronization with
* invalidations. the sequance number is read from * invalidations. the sequance number is read from
* umem->odp_data->notifiers_seq before calling this function * umem_odp->notifiers_seq before calling this function
*/ */
int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
u64 access_mask, unsigned long current_seq) u64 bcnt, u64 access_mask,
unsigned long current_seq)
{ {
struct ib_umem *umem = &umem_odp->umem;
struct task_struct *owning_process = NULL; struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL; struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
struct page **local_page_list = NULL; struct page **local_page_list = NULL;
u64 page_mask, off; u64 page_mask, off;
int j, k, ret = 0, start_idx, npages = 0, page_shift; int j, k, ret = 0, start_idx, npages = 0, page_shift;
@@ -669,15 +620,14 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
user_virt = user_virt & page_mask; user_virt = user_virt & page_mask;
bcnt += off; /* Charge for the first page offset as well. */ bcnt += off; /* Charge for the first page offset as well. */
owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID); /*
if (owning_process == NULL) { * owning_process is allowed to be NULL, this means somehow the mm is
* existing beyond the lifetime of the originating process.. Presumably
* mmget_not_zero will fail in this case.
*/
owning_process = get_pid_task(umem_odp->per_mm->tgid, PIDTYPE_PID);
if (WARN_ON(!mmget_not_zero(umem_odp->umem.owning_mm))) {
ret = -EINVAL; ret = -EINVAL;
goto out_no_task;
}
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
ret = -ENOENT;
goto out_put_task; goto out_put_task;
} }
@@ -709,7 +659,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
break; break;
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
mutex_lock(&umem->odp_data->umem_mutex); mutex_lock(&umem_odp->umem_mutex);
for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) { for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) {
if (user_virt & ~page_mask) { if (user_virt & ~page_mask) {
p += PAGE_SIZE; p += PAGE_SIZE;
@@ -722,7 +672,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
} }
ret = ib_umem_odp_map_dma_single_page( ret = ib_umem_odp_map_dma_single_page(
umem, k, local_page_list[j], umem_odp, k, local_page_list[j],
access_mask, current_seq); access_mask, current_seq);
if (ret < 0) if (ret < 0)
break; break;
@@ -730,7 +680,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
p = page_to_phys(local_page_list[j]); p = page_to_phys(local_page_list[j]);
k++; k++;
} }
mutex_unlock(&umem->odp_data->umem_mutex); mutex_unlock(&umem_odp->umem_mutex);
if (ret < 0) { if (ret < 0) {
/* Release left over pages when handling errors. */ /* Release left over pages when handling errors. */
@@ -749,16 +699,17 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
mmput(owning_mm); mmput(owning_mm);
out_put_task: out_put_task:
put_task_struct(owning_process); if (owning_process)
out_no_task: put_task_struct(owning_process);
free_page((unsigned long)local_page_list); free_page((unsigned long)local_page_list);
return ret; return ret;
} }
EXPORT_SYMBOL(ib_umem_odp_map_dma_pages); EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt, void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 bound) u64 bound)
{ {
struct ib_umem *umem = &umem_odp->umem;
int idx; int idx;
u64 addr; u64 addr;
struct ib_device *dev = umem->context->device; struct ib_device *dev = umem->context->device;
@@ -770,12 +721,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
* faults from completion. We might be racing with other * faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only * invalidations, so we must make sure we free each page only
* once. */ * once. */
mutex_lock(&umem->odp_data->umem_mutex); mutex_lock(&umem_odp->umem_mutex);
for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) { for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) {
idx = (addr - ib_umem_start(umem)) >> umem->page_shift; idx = (addr - ib_umem_start(umem)) >> umem->page_shift;
if (umem->odp_data->page_list[idx]) { if (umem_odp->page_list[idx]) {
struct page *page = umem->odp_data->page_list[idx]; struct page *page = umem_odp->page_list[idx];
dma_addr_t dma = umem->odp_data->dma_list[idx]; dma_addr_t dma = umem_odp->dma_list[idx];
dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
WARN_ON(!dma_addr); WARN_ON(!dma_addr);
@@ -798,12 +749,12 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
/* on demand pinning support */ /* on demand pinning support */
if (!umem->context->invalidate_range) if (!umem->context->invalidate_range)
put_page(page); put_page(page);
umem->odp_data->page_list[idx] = NULL; umem_odp->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0; umem_odp->dma_list[idx] = 0;
umem->npages--; umem->npages--;
} }
} }
mutex_unlock(&umem->odp_data->umem_mutex); mutex_unlock(&umem_odp->umem_mutex);
} }
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages); EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
@@ -830,7 +781,7 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
return -EAGAIN; return -EAGAIN;
next = rbt_ib_umem_iter_next(node, start, last - 1); next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree); umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val; ret_val = cb(umem, start, last, cookie) || ret_val;
} }
return ret_val; return ret_val;

View File

@@ -138,7 +138,7 @@ static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
static dev_t dynamic_umad_dev; static dev_t dynamic_umad_dev;
static dev_t dynamic_issm_dev; static dev_t dynamic_issm_dev;
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static DEFINE_IDA(umad_ida);
static void ib_umad_add_one(struct ib_device *device); static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data); static void ib_umad_remove_one(struct ib_device *device, void *client_data);
@@ -1132,7 +1132,7 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
if (!port) if (!port)
return -ENODEV; return -ENODEV;
return sprintf(buf, "%s\n", port->ib_dev->name); return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
} }
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
@@ -1159,11 +1159,10 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
dev_t base_umad; dev_t base_umad;
dev_t base_issm; dev_t base_issm;
devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
if (devnum >= IB_UMAD_MAX_PORTS) if (devnum < 0)
return -1; return -1;
port->dev_num = devnum; port->dev_num = devnum;
set_bit(devnum, dev_map);
if (devnum >= IB_UMAD_NUM_FIXED_MINOR) { if (devnum >= IB_UMAD_NUM_FIXED_MINOR) {
base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR; base_umad = dynamic_umad_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR; base_issm = dynamic_issm_dev + devnum - IB_UMAD_NUM_FIXED_MINOR;
@@ -1227,7 +1226,7 @@ err_dev:
err_cdev: err_cdev:
cdev_del(&port->cdev); cdev_del(&port->cdev);
clear_bit(devnum, dev_map); ida_free(&umad_ida, devnum);
return -1; return -1;
} }
@@ -1261,7 +1260,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
} }
mutex_unlock(&port->file_mutex); mutex_unlock(&port->file_mutex);
clear_bit(port->dev_num, dev_map); ida_free(&umad_ida, port->dev_num);
} }
static void ib_umad_add_one(struct ib_device *device) static void ib_umad_add_one(struct ib_device *device)

View File

@@ -100,13 +100,14 @@ struct ib_uverbs_device {
atomic_t refcount; atomic_t refcount;
int num_comp_vectors; int num_comp_vectors;
struct completion comp; struct completion comp;
struct device *dev; struct device dev;
/* First group for device attributes, NULL terminated array */
const struct attribute_group *groups[2];
struct ib_device __rcu *ib_dev; struct ib_device __rcu *ib_dev;
int devnum; int devnum;
struct cdev cdev; struct cdev cdev;
struct rb_root xrcd_tree; struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex; struct mutex xrcd_tree_mutex;
struct kobject kobj;
struct srcu_struct disassociate_srcu; struct srcu_struct disassociate_srcu;
struct mutex lists_mutex; /* protect lists */ struct mutex lists_mutex; /* protect lists */
struct list_head uverbs_file_list; struct list_head uverbs_file_list;
@@ -146,7 +147,6 @@ struct ib_uverbs_file {
struct ib_event_handler event_handler; struct ib_event_handler event_handler;
struct ib_uverbs_async_event_file *async_file; struct ib_uverbs_async_event_file *async_file;
struct list_head list; struct list_head list;
int is_closed;
/* /*
* To access the uobjects list hw_destroy_rwsem must be held for write * To access the uobjects list hw_destroy_rwsem must be held for write
@@ -158,6 +158,9 @@ struct ib_uverbs_file {
spinlock_t uobjects_lock; spinlock_t uobjects_lock;
struct list_head uobjects; struct list_head uobjects;
struct mutex umap_lock;
struct list_head umaps;
u64 uverbs_cmd_mask; u64 uverbs_cmd_mask;
u64 uverbs_ex_cmd_mask; u64 uverbs_ex_cmd_mask;
@@ -218,12 +221,6 @@ struct ib_ucq_object {
u32 async_events_reported; u32 async_events_reported;
}; };
struct ib_uflow_resources;
struct ib_uflow_object {
struct ib_uobject uobject;
struct ib_uflow_resources *resources;
};
extern const struct file_operations uverbs_event_fops; extern const struct file_operations uverbs_event_fops;
void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue); void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file, struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,

View File

@@ -117,18 +117,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
/* ufile is required when some objects are released */ /* ufile is required when some objects are released */
ucontext->ufile = file; ucontext->ufile = file;
rcu_read_lock(); ucontext->closing = false;
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
ucontext->closing = 0;
ucontext->cleanup_retryable = false; ucontext->cleanup_retryable = false;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
ucontext->umem_tree = RB_ROOT_CACHED; mutex_init(&ucontext->per_mm_list_lock);
init_rwsem(&ucontext->umem_rwsem); INIT_LIST_HEAD(&ucontext->per_mm_list);
ucontext->odp_mrs_count = 0;
INIT_LIST_HEAD(&ucontext->no_private_counters);
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)) if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
ucontext->invalidate_range = NULL; ucontext->invalidate_range = NULL;
@@ -172,7 +166,6 @@ err_fd:
put_unused_fd(resp.async_fd); put_unused_fd(resp.async_fd);
err_free: err_free:
put_pid(ucontext->tgid);
ib_dev->dealloc_ucontext(ucontext); ib_dev->dealloc_ucontext(ucontext);
err_alloc: err_alloc:
@@ -2769,16 +2762,7 @@ out_put:
return ret ? ret : in_len; return ret ? ret : in_len;
} }
struct ib_uflow_resources { struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
size_t max;
size_t num;
size_t collection_num;
size_t counters_num;
struct ib_counters **counters;
struct ib_flow_action **collection;
};
static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
{ {
struct ib_uflow_resources *resources; struct ib_uflow_resources *resources;
@@ -2808,6 +2792,7 @@ err:
return NULL; return NULL;
} }
EXPORT_SYMBOL(flow_resources_alloc);
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res) void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
{ {
@@ -2826,10 +2811,11 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
kfree(uflow_res->counters); kfree(uflow_res->counters);
kfree(uflow_res); kfree(uflow_res);
} }
EXPORT_SYMBOL(ib_uverbs_flow_resources_free);
static void flow_resources_add(struct ib_uflow_resources *uflow_res, void flow_resources_add(struct ib_uflow_resources *uflow_res,
enum ib_flow_spec_type type, enum ib_flow_spec_type type,
void *ibobj) void *ibobj)
{ {
WARN_ON(uflow_res->num >= uflow_res->max); WARN_ON(uflow_res->num >= uflow_res->max);
@@ -2850,6 +2836,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
uflow_res->num++; uflow_res->num++;
} }
EXPORT_SYMBOL(flow_resources_add);
static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile, static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
struct ib_uverbs_flow_spec *kern_spec, struct ib_uverbs_flow_spec *kern_spec,
@@ -3484,7 +3471,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_uverbs_create_flow cmd; struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp; struct ib_uverbs_create_flow_resp resp;
struct ib_uobject *uobj; struct ib_uobject *uobj;
struct ib_uflow_object *uflow;
struct ib_flow *flow_id; struct ib_flow *flow_id;
struct ib_uverbs_flow_attr *kern_flow_attr; struct ib_uverbs_flow_attr *kern_flow_attr;
struct ib_flow_attr *flow_attr; struct ib_flow_attr *flow_attr;
@@ -3623,13 +3609,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = PTR_ERR(flow_id); err = PTR_ERR(flow_id);
goto err_free; goto err_free;
} }
atomic_inc(&qp->usecnt);
flow_id->qp = qp; ib_set_flow(uobj, flow_id, qp, qp->device, uflow_res);
flow_id->device = qp->device;
flow_id->uobject = uobj;
uobj->object = flow_id;
uflow = container_of(uobj, typeof(*uflow), uobject);
uflow->resources = uflow_res;
memset(&resp, 0, sizeof(resp)); memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id; resp.flow_handle = uobj->id;

View File

@@ -57,6 +57,7 @@ struct bundle_priv {
struct ib_uverbs_attr *uattrs; struct ib_uverbs_attr *uattrs;
DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN); DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
DECLARE_BITMAP(spec_finalize, UVERBS_API_ATTR_BKEY_LEN);
/* /*
* Must be last. bundle ends in a flex array which overlaps * Must be last. bundle ends in a flex array which overlaps
@@ -143,6 +144,86 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
0, uattr->len - len); 0, uattr->len - len);
} }
static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
const struct uverbs_api_attr *attr_uapi,
struct uverbs_objs_arr_attr *attr,
struct ib_uverbs_attr *uattr,
u32 attr_bkey)
{
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
size_t array_len;
u32 *idr_vals;
int ret = 0;
size_t i;
if (uattr->attr_data.reserved)
return -EINVAL;
if (uattr->len % sizeof(u32))
return -EINVAL;
array_len = uattr->len / sizeof(u32);
if (array_len < spec->u2.objs_arr.min_len ||
array_len > spec->u2.objs_arr.max_len)
return -EINVAL;
attr->uobjects =
uverbs_alloc(&pbundle->bundle,
array_size(array_len, sizeof(*attr->uobjects)));
if (IS_ERR(attr->uobjects))
return PTR_ERR(attr->uobjects);
/*
* Since idr is 4B and *uobjects is >= 4B, we can use attr->uobjects
* to store idrs array and avoid additional memory allocation. The
* idrs array is offset to the end of the uobjects array so we will be
* able to read idr and replace with a pointer.
*/
idr_vals = (u32 *)(attr->uobjects + array_len) - array_len;
if (uattr->len > sizeof(uattr->data)) {
ret = copy_from_user(idr_vals, u64_to_user_ptr(uattr->data),
uattr->len);
if (ret)
return -EFAULT;
} else {
memcpy(idr_vals, &uattr->data, uattr->len);
}
for (i = 0; i != array_len; i++) {
attr->uobjects[i] = uverbs_get_uobject_from_file(
spec->u2.objs_arr.obj_type, pbundle->bundle.ufile,
spec->u2.objs_arr.access, idr_vals[i]);
if (IS_ERR(attr->uobjects[i])) {
ret = PTR_ERR(attr->uobjects[i]);
break;
}
}
attr->len = i;
__set_bit(attr_bkey, pbundle->spec_finalize);
return ret;
}
static int uverbs_free_idrs_array(const struct uverbs_api_attr *attr_uapi,
struct uverbs_objs_arr_attr *attr,
bool commit)
{
const struct uverbs_attr_spec *spec = &attr_uapi->spec;
int current_ret;
int ret = 0;
size_t i;
for (i = 0; i != attr->len; i++) {
current_ret = uverbs_finalize_object(
attr->uobjects[i], spec->u2.objs_arr.access, commit);
if (!ret)
ret = current_ret;
}
return ret;
}
static int uverbs_process_attr(struct bundle_priv *pbundle, static int uverbs_process_attr(struct bundle_priv *pbundle,
const struct uverbs_api_attr *attr_uapi, const struct uverbs_api_attr *attr_uapi,
struct ib_uverbs_attr *uattr, u32 attr_bkey) struct ib_uverbs_attr *uattr, u32 attr_bkey)
@@ -246,6 +327,11 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
} }
break; break;
case UVERBS_ATTR_TYPE_IDRS_ARRAY:
return uverbs_process_idrs_array(pbundle, attr_uapi,
&e->objs_arr_attr, uattr,
attr_bkey);
default: default:
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
@@ -300,8 +386,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
return -EPROTONOSUPPORT; return -EPROTONOSUPPORT;
return 0; return 0;
} }
attr = srcu_dereference( attr = rcu_dereference_protected(*slot, true);
*slot, &pbundle->bundle.ufile->device->disassociate_srcu);
/* Reject duplicate attributes from user-space */ /* Reject duplicate attributes from user-space */
if (test_bit(attr_bkey, pbundle->bundle.attr_present)) if (test_bit(attr_bkey, pbundle->bundle.attr_present))
@@ -384,6 +469,7 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
unsigned int i; unsigned int i;
int ret = 0; int ret = 0;
/* fast path for simple uobjects */
i = -1; i = -1;
while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len, while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
i + 1)) < key_bitmap_len) { i + 1)) < key_bitmap_len) {
@@ -397,6 +483,30 @@ static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
ret = current_ret; ret = current_ret;
} }
i = -1;
while ((i = find_next_bit(pbundle->spec_finalize, key_bitmap_len,
i + 1)) < key_bitmap_len) {
struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
const struct uverbs_api_attr *attr_uapi;
void __rcu **slot;
int current_ret;
slot = uapi_get_attr_for_method(
pbundle,
pbundle->method_key | uapi_bkey_to_key_attr(i));
if (WARN_ON(!slot))
continue;
attr_uapi = rcu_dereference_protected(*slot, true);
if (attr_uapi->spec.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
current_ret = uverbs_free_idrs_array(
attr_uapi, &attr->objs_arr_attr, commit);
if (!ret)
ret = current_ret;
}
}
for (memblock = pbundle->allocated_mem; memblock;) { for (memblock = pbundle->allocated_mem; memblock;) {
struct bundle_alloc_head *tmp = memblock; struct bundle_alloc_head *tmp = memblock;
@@ -429,7 +539,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
uapi_key_ioctl_method(hdr->method_id)); uapi_key_ioctl_method(hdr->method_id));
if (unlikely(!slot)) if (unlikely(!slot))
return -EPROTONOSUPPORT; return -EPROTONOSUPPORT;
method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu); method_elm = rcu_dereference_protected(*slot, true);
if (!method_elm->use_stack) { if (!method_elm->use_stack) {
pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL); pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
@@ -461,6 +571,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
memset(pbundle->bundle.attr_present, 0, memset(pbundle->bundle.attr_present, 0,
sizeof(pbundle->bundle.attr_present)); sizeof(pbundle->bundle.attr_present));
memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize)); memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
memset(pbundle->spec_finalize, 0, sizeof(pbundle->spec_finalize));
ret = ib_uverbs_run_method(pbundle, hdr->num_attrs); ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
destroy_ret = bundle_destroy(pbundle, ret == 0); destroy_ret = bundle_destroy(pbundle, ret == 0);
@@ -611,3 +722,26 @@ int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
return 0; return 0;
} }
EXPORT_SYMBOL(uverbs_copy_to); EXPORT_SYMBOL(uverbs_copy_to);
int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, s64 lower_bound, u64 upper_bound,
s64 *def_val)
{
const struct uverbs_attr *attr;
attr = uverbs_attr_get(attrs_bundle, idx);
if (IS_ERR(attr)) {
if ((PTR_ERR(attr) != -ENOENT) || !def_val)
return PTR_ERR(attr);
*to = *def_val;
} else {
*to = attr->ptr_attr.data;
}
if (*to < lower_bound || (*to > 0 && (u64)*to > upper_bound))
return -EINVAL;
return 0;
}
EXPORT_SYMBOL(_uverbs_get_const);

View File

@@ -45,6 +45,7 @@
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/anon_inodes.h> #include <linux/anon_inodes.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
@@ -72,7 +73,7 @@ enum {
static dev_t dynamic_uverbs_dev; static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class; static struct class *uverbs_class;
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); static DEFINE_IDA(uverbs_ida);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len, const char __user *buf, int in_len,
@@ -169,20 +170,16 @@ int uverbs_dealloc_mw(struct ib_mw *mw)
return ret; return ret;
} }
static void ib_uverbs_release_dev(struct kobject *kobj) static void ib_uverbs_release_dev(struct device *device)
{ {
struct ib_uverbs_device *dev = struct ib_uverbs_device *dev =
container_of(kobj, struct ib_uverbs_device, kobj); container_of(device, struct ib_uverbs_device, dev);
uverbs_destroy_api(dev->uapi); uverbs_destroy_api(dev->uapi);
cleanup_srcu_struct(&dev->disassociate_srcu); cleanup_srcu_struct(&dev->disassociate_srcu);
kfree(dev); kfree(dev);
} }
static struct kobj_type ib_uverbs_dev_ktype = {
.release = ib_uverbs_release_dev,
};
static void ib_uverbs_release_async_event_file(struct kref *ref) static void ib_uverbs_release_async_event_file(struct kref *ref)
{ {
struct ib_uverbs_async_event_file *file = struct ib_uverbs_async_event_file *file =
@@ -265,7 +262,7 @@ void ib_uverbs_release_file(struct kref *ref)
if (atomic_dec_and_test(&file->device->refcount)) if (atomic_dec_and_test(&file->device->refcount))
ib_uverbs_comp_dev(file->device); ib_uverbs_comp_dev(file->device);
kobject_put(&file->device->kobj); put_device(&file->device->dev);
kfree(file); kfree(file);
} }
@@ -816,6 +813,226 @@ out:
return ret; return ret;
} }
/*
* Each time we map IO memory into user space this keeps track of the mapping.
* When the device is hot-unplugged we 'zap' the mmaps in user space to point
* to the zero page and allow the hot unplug to proceed.
*
* This is necessary for cases like PCI physical hot unplug as the actual BAR
* memory may vanish after this and access to it from userspace could MCE.
*
* RDMA drivers supporting disassociation must have their user space designed
* to cope in some way with their IO pages going to the zero page.
*/
struct rdma_umap_priv {
struct vm_area_struct *vma;
struct list_head list;
};
static const struct vm_operations_struct rdma_umap_ops;
static void rdma_umap_priv_init(struct rdma_umap_priv *priv,
struct vm_area_struct *vma)
{
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
priv->vma = vma;
vma->vm_private_data = priv;
vma->vm_ops = &rdma_umap_ops;
mutex_lock(&ufile->umap_lock);
list_add(&priv->list, &ufile->umaps);
mutex_unlock(&ufile->umap_lock);
}
/*
* The VMA has been dup'd, initialize the vm_private_data with a new tracking
* struct
*/
static void rdma_umap_open(struct vm_area_struct *vma)
{
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
struct rdma_umap_priv *opriv = vma->vm_private_data;
struct rdma_umap_priv *priv;
if (!opriv)
return;
/* We are racing with disassociation */
if (!down_read_trylock(&ufile->hw_destroy_rwsem))
goto out_zap;
/*
* Disassociation already completed, the VMA should already be zapped.
*/
if (!ufile->ucontext)
goto out_unlock;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
goto out_unlock;
rdma_umap_priv_init(priv, vma);
up_read(&ufile->hw_destroy_rwsem);
return;
out_unlock:
up_read(&ufile->hw_destroy_rwsem);
out_zap:
/*
* We can't allow the VMA to be created with the actual IO pages, that
* would break our API contract, and it can't be stopped at this
* point, so zap it.
*/
vma->vm_private_data = NULL;
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
}
static void rdma_umap_close(struct vm_area_struct *vma)
{
struct ib_uverbs_file *ufile = vma->vm_file->private_data;
struct rdma_umap_priv *priv = vma->vm_private_data;
if (!priv)
return;
/*
* The vma holds a reference on the struct file that created it, which
* in turn means that the ib_uverbs_file is guaranteed to exist at
* this point.
*/
mutex_lock(&ufile->umap_lock);
list_del(&priv->list);
mutex_unlock(&ufile->umap_lock);
kfree(priv);
}
static const struct vm_operations_struct rdma_umap_ops = {
.open = rdma_umap_open,
.close = rdma_umap_close,
};
static struct rdma_umap_priv *rdma_user_mmap_pre(struct ib_ucontext *ucontext,
struct vm_area_struct *vma,
unsigned long size)
{
struct ib_uverbs_file *ufile = ucontext->ufile;
struct rdma_umap_priv *priv;
if (vma->vm_end - vma->vm_start != size)
return ERR_PTR(-EINVAL);
/* Driver is using this wrong, must be called by ib_uverbs_mmap */
if (WARN_ON(!vma->vm_file ||
vma->vm_file->private_data != ufile))
return ERR_PTR(-EINVAL);
lockdep_assert_held(&ufile->device->disassociate_srcu);
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return ERR_PTR(-ENOMEM);
return priv;
}
/*
* Map IO memory into a process. This is to be called by drivers as part of
* their mmap() functions if they wish to send something like PCI-E BAR memory
* to userspace.
*/
int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
if (IS_ERR(priv))
return PTR_ERR(priv);
vma->vm_page_prot = prot;
if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) {
kfree(priv);
return -EAGAIN;
}
rdma_umap_priv_init(priv, vma);
return 0;
}
EXPORT_SYMBOL(rdma_user_mmap_io);
/*
* The page case is here for a slightly different reason, the driver expects
* to be able to free the page it is sharing to user space when it destroys
* its ucontext, which means we need to zap the user space references.
*
* We could handle this differently by providing an API to allocate a shared
* page and then only freeing the shared page when the last ufile is
* destroyed.
*/
int rdma_user_mmap_page(struct ib_ucontext *ucontext,
struct vm_area_struct *vma, struct page *page,
unsigned long size)
{
struct rdma_umap_priv *priv = rdma_user_mmap_pre(ucontext, vma, size);
if (IS_ERR(priv))
return PTR_ERR(priv);
if (remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
vma->vm_page_prot)) {
kfree(priv);
return -EAGAIN;
}
rdma_umap_priv_init(priv, vma);
return 0;
}
EXPORT_SYMBOL(rdma_user_mmap_page);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
{
struct rdma_umap_priv *priv, *next_priv;
lockdep_assert_held(&ufile->hw_destroy_rwsem);
while (1) {
struct mm_struct *mm = NULL;
/* Get an arbitrary mm pointer that hasn't been cleaned yet */
mutex_lock(&ufile->umap_lock);
if (!list_empty(&ufile->umaps)) {
mm = list_first_entry(&ufile->umaps,
struct rdma_umap_priv, list)
->vma->vm_mm;
mmget(mm);
}
mutex_unlock(&ufile->umap_lock);
if (!mm)
return;
/*
* The umap_lock is nested under mmap_sem since it used within
* the vma_ops callbacks, so we have to clean the list one mm
* at a time to get the lock ordering right. Typically there
* will only be one mm, so no big deal.
*/
down_write(&mm->mmap_sem);
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) {
struct vm_area_struct *vma = priv->vma;
if (vma->vm_mm != mm)
continue;
list_del_init(&priv->list);
zap_vma_ptes(vma, vma->vm_start,
vma->vm_end - vma->vm_start);
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
}
mutex_unlock(&ufile->umap_lock);
up_write(&mm->mmap_sem);
mmput(mm);
}
}
/* /*
* ib_uverbs_open() does not need the BKL: * ib_uverbs_open() does not need the BKL:
* *
@@ -839,6 +1056,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
if (!atomic_inc_not_zero(&dev->refcount)) if (!atomic_inc_not_zero(&dev->refcount))
return -ENXIO; return -ENXIO;
get_device(&dev->dev);
srcu_key = srcu_read_lock(&dev->disassociate_srcu); srcu_key = srcu_read_lock(&dev->disassociate_srcu);
mutex_lock(&dev->lists_mutex); mutex_lock(&dev->lists_mutex);
ib_dev = srcu_dereference(dev->ib_dev, ib_dev = srcu_dereference(dev->ib_dev,
@@ -876,9 +1094,10 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
spin_lock_init(&file->uobjects_lock); spin_lock_init(&file->uobjects_lock);
INIT_LIST_HEAD(&file->uobjects); INIT_LIST_HEAD(&file->uobjects);
init_rwsem(&file->hw_destroy_rwsem); init_rwsem(&file->hw_destroy_rwsem);
mutex_init(&file->umap_lock);
INIT_LIST_HEAD(&file->umaps);
filp->private_data = file; filp->private_data = file;
kobject_get(&dev->kobj);
list_add_tail(&file->list, &dev->uverbs_file_list); list_add_tail(&file->list, &dev->uverbs_file_list);
mutex_unlock(&dev->lists_mutex); mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key); srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
@@ -899,6 +1118,7 @@ err:
if (atomic_dec_and_test(&dev->refcount)) if (atomic_dec_and_test(&dev->refcount))
ib_uverbs_comp_dev(dev); ib_uverbs_comp_dev(dev);
put_device(&dev->dev);
return ret; return ret;
} }
@@ -909,10 +1129,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE); uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
mutex_lock(&file->device->lists_mutex); mutex_lock(&file->device->lists_mutex);
if (!file->is_closed) { list_del_init(&file->list);
list_del(&file->list);
file->is_closed = 1;
}
mutex_unlock(&file->device->lists_mutex); mutex_unlock(&file->device->lists_mutex);
if (file->async_file) if (file->async_file)
@@ -951,37 +1168,34 @@ static struct ib_client uverbs_client = {
.remove = ib_uverbs_remove_one .remove = ib_uverbs_remove_one
}; };
static ssize_t show_ibdev(struct device *device, struct device_attribute *attr, static ssize_t ibdev_show(struct device *device, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct ib_uverbs_device *dev =
container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV; int ret = -ENODEV;
int srcu_key; int srcu_key;
struct ib_uverbs_device *dev = dev_get_drvdata(device);
struct ib_device *ib_dev; struct ib_device *ib_dev;
if (!dev)
return -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu); srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev) if (ib_dev)
ret = sprintf(buf, "%s\n", ib_dev->name); ret = sprintf(buf, "%s\n", dev_name(&ib_dev->dev));
srcu_read_unlock(&dev->disassociate_srcu, srcu_key); srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
return ret; return ret;
} }
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); static DEVICE_ATTR_RO(ibdev);
static ssize_t show_dev_abi_version(struct device *device, static ssize_t abi_version_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct ib_uverbs_device *dev = dev_get_drvdata(device); struct ib_uverbs_device *dev =
container_of(device, struct ib_uverbs_device, dev);
int ret = -ENODEV; int ret = -ENODEV;
int srcu_key; int srcu_key;
struct ib_device *ib_dev; struct ib_device *ib_dev;
if (!dev)
return -ENODEV;
srcu_key = srcu_read_lock(&dev->disassociate_srcu); srcu_key = srcu_read_lock(&dev->disassociate_srcu);
ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu); ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
if (ib_dev) if (ib_dev)
@@ -990,7 +1204,17 @@ static ssize_t show_dev_abi_version(struct device *device,
return ret; return ret;
} }
static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL); static DEVICE_ATTR_RO(abi_version);
static struct attribute *ib_dev_attrs[] = {
&dev_attr_abi_version.attr,
&dev_attr_ibdev.attr,
NULL,
};
static const struct attribute_group dev_attr_group = {
.attrs = ib_dev_attrs,
};
static CLASS_ATTR_STRING(abi_version, S_IRUGO, static CLASS_ATTR_STRING(abi_version, S_IRUGO,
__stringify(IB_USER_VERBS_ABI_VERSION)); __stringify(IB_USER_VERBS_ABI_VERSION));
@@ -1028,65 +1252,56 @@ static void ib_uverbs_add_one(struct ib_device *device)
return; return;
} }
device_initialize(&uverbs_dev->dev);
uverbs_dev->dev.class = uverbs_class;
uverbs_dev->dev.parent = device->dev.parent;
uverbs_dev->dev.release = ib_uverbs_release_dev;
uverbs_dev->groups[0] = &dev_attr_group;
uverbs_dev->dev.groups = uverbs_dev->groups;
atomic_set(&uverbs_dev->refcount, 1); atomic_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp); init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT; uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex); mutex_init(&uverbs_dev->xrcd_tree_mutex);
kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
mutex_init(&uverbs_dev->lists_mutex); mutex_init(&uverbs_dev->lists_mutex);
INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list); INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list); INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); devnum = ida_alloc_max(&uverbs_ida, IB_UVERBS_MAX_DEVICES - 1,
if (devnum >= IB_UVERBS_MAX_DEVICES) GFP_KERNEL);
if (devnum < 0)
goto err; goto err;
uverbs_dev->devnum = devnum; uverbs_dev->devnum = devnum;
set_bit(devnum, dev_map);
if (devnum >= IB_UVERBS_NUM_FIXED_MINOR) if (devnum >= IB_UVERBS_NUM_FIXED_MINOR)
base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR; base = dynamic_uverbs_dev + devnum - IB_UVERBS_NUM_FIXED_MINOR;
else else
base = IB_UVERBS_BASE_DEV + devnum; base = IB_UVERBS_BASE_DEV + devnum;
rcu_assign_pointer(uverbs_dev->ib_dev, device);
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
if (ib_uverbs_create_uapi(device, uverbs_dev)) if (ib_uverbs_create_uapi(device, uverbs_dev))
goto err_uapi; goto err_uapi;
cdev_init(&uverbs_dev->cdev, NULL); uverbs_dev->dev.devt = base;
dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
cdev_init(&uverbs_dev->cdev,
device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
uverbs_dev->cdev.owner = THIS_MODULE; uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
cdev_set_parent(&uverbs_dev->cdev, &uverbs_dev->kobj);
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
uverbs_dev->dev = device_create(uverbs_class, device->dev.parent, ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
uverbs_dev->cdev.dev, uverbs_dev, if (ret)
"uverbs%d", uverbs_dev->devnum); goto err_uapi;
if (IS_ERR(uverbs_dev->dev))
goto err_cdev;
if (device_create_file(uverbs_dev->dev, &dev_attr_ibdev))
goto err_class;
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
ib_set_client_data(device, &uverbs_client, uverbs_dev); ib_set_client_data(device, &uverbs_client, uverbs_dev);
return; return;
err_class:
device_destroy(uverbs_class, uverbs_dev->cdev.dev);
err_cdev:
cdev_del(&uverbs_dev->cdev);
err_uapi: err_uapi:
clear_bit(devnum, dev_map); ida_free(&uverbs_ida, devnum);
err: err:
if (atomic_dec_and_test(&uverbs_dev->refcount)) if (atomic_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev); ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp); wait_for_completion(&uverbs_dev->comp);
kobject_put(&uverbs_dev->kobj); put_device(&uverbs_dev->dev);
return; return;
} }
@@ -1107,8 +1322,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
while (!list_empty(&uverbs_dev->uverbs_file_list)) { while (!list_empty(&uverbs_dev->uverbs_file_list)) {
file = list_first_entry(&uverbs_dev->uverbs_file_list, file = list_first_entry(&uverbs_dev->uverbs_file_list,
struct ib_uverbs_file, list); struct ib_uverbs_file, list);
file->is_closed = 1; list_del_init(&file->list);
list_del(&file->list);
kref_get(&file->ref); kref_get(&file->ref);
/* We must release the mutex before going ahead and calling /* We must release the mutex before going ahead and calling
@@ -1156,10 +1370,8 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
if (!uverbs_dev) if (!uverbs_dev)
return; return;
dev_set_drvdata(uverbs_dev->dev, NULL); cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
device_destroy(uverbs_class, uverbs_dev->cdev.dev); ida_free(&uverbs_ida, uverbs_dev->devnum);
cdev_del(&uverbs_dev->cdev);
clear_bit(uverbs_dev->devnum, dev_map);
if (device->disassociate_ucontext) { if (device->disassociate_ucontext) {
/* We disassociate HW resources and immediately return. /* We disassociate HW resources and immediately return.
@@ -1182,7 +1394,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
if (wait_clients) if (wait_clients)
wait_for_completion(&uverbs_dev->comp); wait_for_completion(&uverbs_dev->comp);
kobject_put(&uverbs_dev->kobj); put_device(&uverbs_dev->dev);
} }
static char *uverbs_devnode(struct device *dev, umode_t *mode) static char *uverbs_devnode(struct device *dev, umode_t *mode)

View File

@@ -326,11 +326,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
if (IS_ERR(action)) if (IS_ERR(action))
return PTR_ERR(action); return PTR_ERR(action);
atomic_set(&action->usecnt, 0); uverbs_flow_action_fill_action(action, uobj, ib_dev,
action->device = ib_dev; IB_FLOW_ACTION_ESP);
action->type = IB_FLOW_ACTION_ESP;
action->uobject = uobj;
uobj->object = action;
return 0; return 0;
} }

View File

@@ -73,6 +73,18 @@ static int uapi_merge_method(struct uverbs_api *uapi,
if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN) if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
method_elm->driver_method |= is_driver; method_elm->driver_method |= is_driver;
/*
* Like other uobject based things we only support a single
* uobject being NEW'd or DESTROY'd
*/
if (attr->attr.type == UVERBS_ATTR_TYPE_IDRS_ARRAY) {
u8 access = attr->attr.u2.objs_arr.access;
if (WARN_ON(access == UVERBS_ACCESS_NEW ||
access == UVERBS_ACCESS_DESTROY))
return -EINVAL;
}
attr_slot = attr_slot =
uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id), uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
sizeof(*attr_slot)); sizeof(*attr_slot));

View File

@@ -264,7 +264,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
} }
pd->res.type = RDMA_RESTRACK_PD; pd->res.type = RDMA_RESTRACK_PD;
pd->res.kern_name = caller; rdma_restrack_set_task(&pd->res, caller);
rdma_restrack_add(&pd->res); rdma_restrack_add(&pd->res);
if (mr_access_flags) { if (mr_access_flags) {
@@ -710,7 +710,7 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid, ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
ah_attr->roce.dmac, ah_attr->roce.dmac,
sgid_attr->ndev, &hop_limit); sgid_attr, &hop_limit);
grh->hop_limit = hop_limit; grh->hop_limit = hop_limit;
return ret; return ret;
@@ -1509,8 +1509,7 @@ static const struct {
}; };
bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
enum ib_qp_type type, enum ib_qp_attr_mask mask, enum ib_qp_type type, enum ib_qp_attr_mask mask)
enum rdma_link_layer ll)
{ {
enum ib_qp_attr_mask req_param, opt_param; enum ib_qp_attr_mask req_param, opt_param;
@@ -1629,14 +1628,16 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
if (rdma_ib_or_roce(qp->device, port)) { if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n", dev_warn(&qp->device->dev,
__func__, qp->device->name); "%s rq_psn overflow, masking to 24 bits\n",
__func__);
attr->rq_psn &= 0xffffff; attr->rq_psn &= 0xffffff;
} }
if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) { if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
pr_warn("%s: %s sq_psn overflow, masking to 24 bits\n", dev_warn(&qp->device->dev,
__func__, qp->device->name); " %s sq_psn overflow, masking to 24 bits\n",
__func__);
attr->sq_psn &= 0xffffff; attr->sq_psn &= 0xffffff;
} }
} }
@@ -1888,7 +1889,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
cq->cq_context = cq_context; cq->cq_context = cq_context;
atomic_set(&cq->usecnt, 0); atomic_set(&cq->usecnt, 0);
cq->res.type = RDMA_RESTRACK_CQ; cq->res.type = RDMA_RESTRACK_CQ;
cq->res.kern_name = caller; rdma_restrack_set_task(&cq->res, caller);
rdma_restrack_add(&cq->res); rdma_restrack_add(&cq->res);
} }

View File

@@ -40,7 +40,6 @@
#ifndef __BNXT_RE_H__ #ifndef __BNXT_RE_H__
#define __BNXT_RE_H__ #define __BNXT_RE_H__
#define ROCE_DRV_MODULE_NAME "bnxt_re" #define ROCE_DRV_MODULE_NAME "bnxt_re"
#define ROCE_DRV_MODULE_VERSION "1.0.0"
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver" #define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
#define BNXT_RE_PAGE_SHIFT_4K (12) #define BNXT_RE_PAGE_SHIFT_4K (12)
@@ -120,6 +119,8 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_HAVE_L2_REF 3
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
#define BNXT_RE_FLAG_QOS_WORK_REG 5 #define BNXT_RE_FLAG_QOS_WORK_REG 5
#define BNXT_RE_FLAG_RESOURCES_ALLOCATED 7
#define BNXT_RE_FLAG_RESOURCES_INITIALIZED 8
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev; struct net_device *netdev;
unsigned int version, major, minor; unsigned int version, major, minor;

View File

@@ -68,6 +68,8 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_TX_PKTS] = "tx_pkts", [BNXT_RE_TX_PKTS] = "tx_pkts",
[BNXT_RE_TX_BYTES] = "tx_bytes", [BNXT_RE_TX_BYTES] = "tx_bytes",
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors", [BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
[BNXT_RE_RX_DROPS] = "rx_roce_drops",
[BNXT_RE_RX_DISCARDS] = "rx_roce_discards",
[BNXT_RE_TO_RETRANSMITS] = "to_retransmits", [BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd", [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded", [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
@@ -106,7 +108,8 @@ static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err", [BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
[BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err", [BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
[BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err", [BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
[BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err" [BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err",
[BNXT_RE_OUT_OF_SEQ_ERR] = "oos_drop_count"
}; };
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
@@ -128,6 +131,10 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
if (bnxt_re_stats) { if (bnxt_re_stats) {
stats->value[BNXT_RE_RECOVERABLE_ERRORS] = stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
le64_to_cpu(bnxt_re_stats->tx_bcast_pkts); le64_to_cpu(bnxt_re_stats->tx_bcast_pkts);
stats->value[BNXT_RE_RX_DROPS] =
le64_to_cpu(bnxt_re_stats->rx_drop_pkts);
stats->value[BNXT_RE_RX_DISCARDS] =
le64_to_cpu(bnxt_re_stats->rx_discard_pkts);
stats->value[BNXT_RE_RX_PKTS] = stats->value[BNXT_RE_RX_PKTS] =
le64_to_cpu(bnxt_re_stats->rx_ucast_pkts); le64_to_cpu(bnxt_re_stats->rx_ucast_pkts);
stats->value[BNXT_RE_RX_BYTES] = stats->value[BNXT_RE_RX_BYTES] =
@@ -220,6 +227,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
rdev->stats.res_tx_pci_err; rdev->stats.res_tx_pci_err;
stats->value[BNXT_RE_RES_RX_PCI_ERR] = stats->value[BNXT_RE_RES_RX_PCI_ERR] =
rdev->stats.res_rx_pci_err; rdev->stats.res_rx_pci_err;
stats->value[BNXT_RE_OUT_OF_SEQ_ERR] =
rdev->stats.res_oos_drop_count;
} }
return ARRAY_SIZE(bnxt_re_stat_name); return ARRAY_SIZE(bnxt_re_stat_name);

View File

@@ -51,6 +51,8 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_PKTS, BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES, BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS, BNXT_RE_RECOVERABLE_ERRORS,
BNXT_RE_RX_DROPS,
BNXT_RE_RX_DISCARDS,
BNXT_RE_TO_RETRANSMITS, BNXT_RE_TO_RETRANSMITS,
BNXT_RE_SEQ_ERR_NAKS_RCVD, BNXT_RE_SEQ_ERR_NAKS_RCVD,
BNXT_RE_MAX_RETRY_EXCEEDED, BNXT_RE_MAX_RETRY_EXCEEDED,
@@ -90,6 +92,7 @@ enum bnxt_re_hw_stats {
BNXT_RE_RES_SRQ_LOAD_ERR, BNXT_RE_RES_SRQ_LOAD_ERR,
BNXT_RE_RES_TX_PCI_ERR, BNXT_RE_RES_TX_PCI_ERR,
BNXT_RE_RES_RX_PCI_ERR, BNXT_RE_RES_RX_PCI_ERR,
BNXT_RE_OUT_OF_SEQ_ERR,
BNXT_RE_NUM_COUNTERS BNXT_RE_NUM_COUNTERS
}; };

View File

@@ -1598,8 +1598,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state); curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
new_qp_state = qp_attr->qp_state; new_qp_state = qp_attr->qp_state;
if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state, if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
ib_qp->qp_type, qp_attr_mask, ib_qp->qp_type, qp_attr_mask)) {
IB_LINK_LAYER_ETHERNET)) {
dev_err(rdev_to_dev(rdev), dev_err(rdev_to_dev(rdev),
"Invalid attribute mask: %#x specified ", "Invalid attribute mask: %#x specified ",
qp_attr_mask); qp_attr_mask);
@@ -2664,6 +2663,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
nq->budget++; nq->budget++;
atomic_inc(&rdev->cq_count); atomic_inc(&rdev->cq_count);
spin_lock_init(&cq->cq_lock);
if (context) { if (context) {
struct bnxt_re_cq_resp resp; struct bnxt_re_cq_resp resp;

View File

@@ -67,7 +67,7 @@
#include "hw_counters.h" #include "hw_counters.h"
static char version[] = static char version[] =
BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n"; BNXT_RE_DESC "\n";
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>"); MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
@@ -535,6 +535,34 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
return en_dev; return en_dev;
} }
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
}
static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
}
static DEVICE_ATTR_RO(hca_type);
static struct attribute *bnxt_re_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_hca_type.attr,
NULL
};
static const struct attribute_group bnxt_re_dev_attr_group = {
.attrs = bnxt_re_attributes,
};
static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
{ {
ib_unregister_device(&rdev->ibdev); ib_unregister_device(&rdev->ibdev);
@@ -547,7 +575,6 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
/* ib device init */ /* ib device init */
ibdev->owner = THIS_MODULE; ibdev->owner = THIS_MODULE;
ibdev->node_type = RDMA_NODE_IB_CA; ibdev->node_type = RDMA_NODE_IB_CA;
strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
strlen(BNXT_RE_DESC) + 5); strlen(BNXT_RE_DESC) + 5);
ibdev->phys_port_cnt = 1; ibdev->phys_port_cnt = 1;
@@ -639,34 +666,11 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ibdev->driver_id = RDMA_DRIVER_BNXT_RE; ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
return ib_register_device(ibdev, NULL); return ib_register_device(ibdev, "bnxt_re%d", NULL);
} }
static ssize_t show_rev(struct device *device, struct device_attribute *attr,
char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
}
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
char *buf)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
}
static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
static struct device_attribute *bnxt_re_attributes[] = {
&dev_attr_hw_rev,
&dev_attr_hca_type
};
static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
{ {
dev_put(rdev->netdev); dev_put(rdev->netdev);
@@ -864,10 +868,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{ {
int i; int i;
if (rdev->nq[0].hwq.max_elements) { for (i = 1; i < rdev->num_msix; i++)
for (i = 1; i < rdev->num_msix; i++) bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
bnxt_qplib_disable_nq(&rdev->nq[i - 1]);
}
if (rdev->qplib_res.rcfw) if (rdev->qplib_res.rcfw)
bnxt_qplib_cleanup_res(&rdev->qplib_res); bnxt_qplib_cleanup_res(&rdev->qplib_res);
@@ -876,6 +878,7 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
static int bnxt_re_init_res(struct bnxt_re_dev *rdev) static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
{ {
int rc = 0, i; int rc = 0, i;
int num_vec_enabled = 0;
bnxt_qplib_init_res(&rdev->qplib_res); bnxt_qplib_init_res(&rdev->qplib_res);
@@ -891,9 +894,13 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
"Failed to enable NQ with rc = 0x%x", rc); "Failed to enable NQ with rc = 0x%x", rc);
goto fail; goto fail;
} }
num_vec_enabled++;
} }
return 0; return 0;
fail: fail:
for (i = num_vec_enabled; i >= 0; i--)
bnxt_qplib_disable_nq(&rdev->nq[i]);
return rc; return rc;
} }
@@ -925,6 +932,7 @@ static void bnxt_re_free_res(struct bnxt_re_dev *rdev)
static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
{ {
int rc = 0, i; int rc = 0, i;
int num_vec_created = 0;
/* Configure and allocate resources for qplib */ /* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw; rdev->qplib_res.rcfw = &rdev->rcfw;
@@ -951,7 +959,7 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
if (rc) { if (rc) {
dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x", dev_err(rdev_to_dev(rdev), "Alloc Failed NQ%d rc:%#x",
i, rc); i, rc);
goto dealloc_dpi; goto free_nq;
} }
rc = bnxt_re_net_ring_alloc rc = bnxt_re_net_ring_alloc
(rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr, (rdev, rdev->nq[i].hwq.pbl[PBL_LVL_0].pg_map_arr,
@@ -964,14 +972,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
dev_err(rdev_to_dev(rdev), dev_err(rdev_to_dev(rdev),
"Failed to allocate NQ fw id with rc = 0x%x", "Failed to allocate NQ fw id with rc = 0x%x",
rc); rc);
bnxt_qplib_free_nq(&rdev->nq[i]);
goto free_nq; goto free_nq;
} }
num_vec_created++;
} }
return 0; return 0;
free_nq: free_nq:
for (i = 0; i < rdev->num_msix - 1; i++) for (i = num_vec_created; i >= 0; i--) {
bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id);
bnxt_qplib_free_nq(&rdev->nq[i]); bnxt_qplib_free_nq(&rdev->nq[i]);
dealloc_dpi: }
bnxt_qplib_dealloc_dpi(&rdev->qplib_res, bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
&rdev->qplib_res.dpi_tbl, &rdev->qplib_res.dpi_tbl,
&rdev->dpi_privileged); &rdev->dpi_privileged);
@@ -989,12 +1000,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
struct ib_event ib_event; struct ib_event ib_event;
ib_event.device = ibdev; ib_event.device = ibdev;
if (qp) if (qp) {
ib_event.element.qp = qp; ib_event.element.qp = qp;
else ib_event.event = event;
if (qp->event_handler)
qp->event_handler(&ib_event, qp->qp_context);
} else {
ib_event.element.port_num = port_num; ib_event.element.port_num = port_num;
ib_event.event = event; ib_event.event = event;
ib_dispatch_event(&ib_event); ib_dispatch_event(&ib_event);
}
} }
#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02 #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
@@ -1189,20 +1205,20 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
{ {
int i, rc; int rc;
if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) { if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
device_remove_file(&rdev->ibdev.dev,
bnxt_re_attributes[i]);
/* Cleanup ib dev */ /* Cleanup ib dev */
bnxt_re_unregister_ib(rdev); bnxt_re_unregister_ib(rdev);
} }
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
cancel_delayed_work(&rdev->worker); cancel_delayed_work_sync(&rdev->worker);
bnxt_re_cleanup_res(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED,
bnxt_re_free_res(rdev); &rdev->flags))
bnxt_re_cleanup_res(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags))
bnxt_re_free_res(rdev);
if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) { if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw); rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
@@ -1241,7 +1257,7 @@ static void bnxt_re_worker(struct work_struct *work)
static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
{ {
int i, j, rc; int rc;
bool locked; bool locked;
@@ -1331,12 +1347,15 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
pr_err("Failed to allocate resources: %#x\n", rc); pr_err("Failed to allocate resources: %#x\n", rc);
goto fail; goto fail;
} }
set_bit(BNXT_RE_FLAG_RESOURCES_ALLOCATED, &rdev->flags);
rc = bnxt_re_init_res(rdev); rc = bnxt_re_init_res(rdev);
if (rc) { if (rc) {
pr_err("Failed to initialize resources: %#x\n", rc); pr_err("Failed to initialize resources: %#x\n", rc);
goto fail; goto fail;
} }
set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
if (!rdev->is_virtfn) { if (!rdev->is_virtfn) {
rc = bnxt_re_setup_qos(rdev); rc = bnxt_re_setup_qos(rdev);
if (rc) if (rc)
@@ -1358,20 +1377,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
} }
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
dev_info(rdev_to_dev(rdev), "Device registered successfully"); dev_info(rdev_to_dev(rdev), "Device registered successfully");
for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
rc = device_create_file(&rdev->ibdev.dev,
bnxt_re_attributes[i]);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to create IB sysfs: %#x", rc);
/* Must clean up all created device files */
for (j = 0; j < i; j++)
device_remove_file(&rdev->ibdev.dev,
bnxt_re_attributes[j]);
bnxt_re_unregister_ib(rdev);
goto fail;
}
}
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width); &rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);

View File

@@ -36,6 +36,8 @@
* Description: Fast Path Operators * Description: Fast Path Operators
*/ */
#define dev_fmt(fmt) "QPLIB: " fmt
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/sched.h> #include <linux/sched.h>
@@ -71,8 +73,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
if (!qp->sq.flushed) { if (!qp->sq.flushed) {
dev_dbg(&scq->hwq.pdev->dev, dev_dbg(&scq->hwq.pdev->dev,
"QPLIB: FP: Adding to SQ Flush list = %p", "FP: Adding to SQ Flush list = %p\n", qp);
qp);
bnxt_qplib_cancel_phantom_processing(qp); bnxt_qplib_cancel_phantom_processing(qp);
list_add_tail(&qp->sq_flush, &scq->sqf_head); list_add_tail(&qp->sq_flush, &scq->sqf_head);
qp->sq.flushed = true; qp->sq.flushed = true;
@@ -80,8 +81,7 @@ static void __bnxt_qplib_add_flush_qp(struct bnxt_qplib_qp *qp)
if (!qp->srq) { if (!qp->srq) {
if (!qp->rq.flushed) { if (!qp->rq.flushed) {
dev_dbg(&rcq->hwq.pdev->dev, dev_dbg(&rcq->hwq.pdev->dev,
"QPLIB: FP: Adding to RQ Flush list = %p", "FP: Adding to RQ Flush list = %p\n", qp);
qp);
list_add_tail(&qp->rq_flush, &rcq->rqf_head); list_add_tail(&qp->rq_flush, &rcq->rqf_head);
qp->rq.flushed = true; qp->rq.flushed = true;
} }
@@ -207,7 +207,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
if (!qp->sq_hdr_buf) { if (!qp->sq_hdr_buf) {
rc = -ENOMEM; rc = -ENOMEM;
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: Failed to create sq_hdr_buf"); "Failed to create sq_hdr_buf\n");
goto fail; goto fail;
} }
} }
@@ -221,7 +221,7 @@ static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
if (!qp->rq_hdr_buf) { if (!qp->rq_hdr_buf) {
rc = -ENOMEM; rc = -ENOMEM;
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: Failed to create rq_hdr_buf"); "Failed to create rq_hdr_buf\n");
goto fail; goto fail;
} }
} }
@@ -277,8 +277,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
num_cqne_processed++; num_cqne_processed++;
else else
dev_warn(&nq->pdev->dev, dev_warn(&nq->pdev->dev,
"QPLIB: cqn - type 0x%x not handled", "cqn - type 0x%x not handled\n", type);
type);
spin_unlock_bh(&cq->compl_lock); spin_unlock_bh(&cq->compl_lock);
break; break;
} }
@@ -298,7 +297,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
num_srqne_processed++; num_srqne_processed++;
else else
dev_warn(&nq->pdev->dev, dev_warn(&nq->pdev->dev,
"QPLIB: SRQ event 0x%x not handled", "SRQ event 0x%x not handled\n",
nqsrqe->event); nqsrqe->event);
break; break;
} }
@@ -306,8 +305,7 @@ static void bnxt_qplib_service_nq(unsigned long data)
break; break;
default: default:
dev_warn(&nq->pdev->dev, dev_warn(&nq->pdev->dev,
"QPLIB: nqe with type = 0x%x not handled", "nqe with type = 0x%x not handled\n", type);
type);
break; break;
} }
raw_cons++; raw_cons++;
@@ -360,7 +358,8 @@ void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
} }
/* Make sure the HW is stopped! */ /* Make sure the HW is stopped! */
bnxt_qplib_nq_stop_irq(nq, true); if (nq->requested)
bnxt_qplib_nq_stop_irq(nq, true);
if (nq->bar_reg_iomem) if (nq->bar_reg_iomem)
iounmap(nq->bar_reg_iomem); iounmap(nq->bar_reg_iomem);
@@ -396,7 +395,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx,
rc = irq_set_affinity_hint(nq->vector, &nq->mask); rc = irq_set_affinity_hint(nq->vector, &nq->mask);
if (rc) { if (rc) {
dev_warn(&nq->pdev->dev, dev_warn(&nq->pdev->dev,
"QPLIB: set affinity failed; vector: %d nq_idx: %d\n", "set affinity failed; vector: %d nq_idx: %d\n",
nq->vector, nq_indx); nq->vector, nq_indx);
} }
nq->requested = true; nq->requested = true;
@@ -443,7 +442,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true); rc = bnxt_qplib_nq_start_irq(nq, nq_idx, msix_vector, true);
if (rc) { if (rc) {
dev_err(&nq->pdev->dev, dev_err(&nq->pdev->dev,
"QPLIB: Failed to request irq for nq-idx %d", nq_idx); "Failed to request irq for nq-idx %d\n", nq_idx);
goto fail; goto fail;
} }
@@ -662,8 +661,8 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
spin_lock(&srq_hwq->lock); spin_lock(&srq_hwq->lock);
if (srq->start_idx == srq->last_idx) { if (srq->start_idx == srq->last_idx) {
dev_err(&srq_hwq->pdev->dev, "QPLIB: FP: SRQ (0x%x) is full!", dev_err(&srq_hwq->pdev->dev,
srq->id); "FP: SRQ (0x%x) is full!\n", srq->id);
rc = -EINVAL; rc = -EINVAL;
spin_unlock(&srq_hwq->lock); spin_unlock(&srq_hwq->lock);
goto done; goto done;
@@ -1324,7 +1323,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
} }
} }
if (i == res->sgid_tbl.max) if (i == res->sgid_tbl.max)
dev_warn(&res->pdev->dev, "QPLIB: SGID not found??"); dev_warn(&res->pdev->dev, "SGID not found??\n");
qp->ah.hop_limit = sb->hop_limit; qp->ah.hop_limit = sb->hop_limit;
qp->ah.traffic_class = sb->traffic_class; qp->ah.traffic_class = sb->traffic_class;
@@ -1536,7 +1535,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
if (bnxt_qplib_queue_full(sq)) { if (bnxt_qplib_queue_full(sq)) {
dev_err(&sq->hwq.pdev->dev, dev_err(&sq->hwq.pdev->dev,
"QPLIB: prod = %#x cons = %#x qdepth = %#x delta = %#x", "prod = %#x cons = %#x qdepth = %#x delta = %#x\n",
sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements, sq->hwq.prod, sq->hwq.cons, sq->hwq.max_elements,
sq->q_full_delta); sq->q_full_delta);
rc = -ENOMEM; rc = -ENOMEM;
@@ -1561,7 +1560,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
/* Copy the inline data */ /* Copy the inline data */
if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) { if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
dev_warn(&sq->hwq.pdev->dev, dev_warn(&sq->hwq.pdev->dev,
"QPLIB: Inline data length > 96 detected"); "Inline data length > 96 detected\n");
data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH; data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
} else { } else {
data_len = wqe->inline_len; data_len = wqe->inline_len;
@@ -1776,7 +1775,7 @@ done:
queue_work(qp->scq->nq->cqn_wq, &nq_work->work); queue_work(qp->scq->nq->cqn_wq, &nq_work->work);
} else { } else {
dev_err(&sq->hwq.pdev->dev, dev_err(&sq->hwq.pdev->dev,
"QPLIB: FP: Failed to allocate SQ nq_work!"); "FP: Failed to allocate SQ nq_work!\n");
rc = -ENOMEM; rc = -ENOMEM;
} }
} }
@@ -1815,13 +1814,12 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
sch_handler = true; sch_handler = true;
dev_dbg(&rq->hwq.pdev->dev, dev_dbg(&rq->hwq.pdev->dev,
"%s Error QP. Scheduling for poll_cq\n", "%s: Error QP. Scheduling for poll_cq\n", __func__);
__func__);
goto queue_err; goto queue_err;
} }
if (bnxt_qplib_queue_full(rq)) { if (bnxt_qplib_queue_full(rq)) {
dev_err(&rq->hwq.pdev->dev, dev_err(&rq->hwq.pdev->dev,
"QPLIB: FP: QP (0x%x) RQ is full!", qp->id); "FP: QP (0x%x) RQ is full!\n", qp->id);
rc = -EINVAL; rc = -EINVAL;
goto done; goto done;
} }
@@ -1870,7 +1868,7 @@ queue_err:
queue_work(qp->rcq->nq->cqn_wq, &nq_work->work); queue_work(qp->rcq->nq->cqn_wq, &nq_work->work);
} else { } else {
dev_err(&rq->hwq.pdev->dev, dev_err(&rq->hwq.pdev->dev,
"QPLIB: FP: Failed to allocate RQ nq_work!"); "FP: Failed to allocate RQ nq_work!\n");
rc = -ENOMEM; rc = -ENOMEM;
} }
} }
@@ -1932,7 +1930,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
if (!cq->dpi) { if (!cq->dpi) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: FP: CREATE_CQ failed due to NULL DPI"); "FP: CREATE_CQ failed due to NULL DPI\n");
return -EINVAL; return -EINVAL;
} }
req.dpi = cpu_to_le32(cq->dpi->dpi); req.dpi = cpu_to_le32(cq->dpi->dpi);
@@ -1969,6 +1967,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
INIT_LIST_HEAD(&cq->sqf_head); INIT_LIST_HEAD(&cq->sqf_head);
INIT_LIST_HEAD(&cq->rqf_head); INIT_LIST_HEAD(&cq->rqf_head);
spin_lock_init(&cq->compl_lock); spin_lock_init(&cq->compl_lock);
spin_lock_init(&cq->flush_lock);
bnxt_qplib_arm_cq_enable(cq); bnxt_qplib_arm_cq_enable(cq);
return 0; return 0;
@@ -2172,7 +2171,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
* comes back * comes back
*/ */
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev,
"FP:Got Phantom CQE"); "FP: Got Phantom CQE\n");
sq->condition = false; sq->condition = false;
sq->single = true; sq->single = true;
rc = 0; rc = 0;
@@ -2189,7 +2188,7 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
peek_raw_cq_cons++; peek_raw_cq_cons++;
} }
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x", "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
cq_cons, qp->id, sw_sq_cons, cqe_sq_cons); cq_cons, qp->id, sw_sq_cons, cqe_sq_cons);
rc = -EINVAL; rc = -EINVAL;
} }
@@ -2213,7 +2212,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
le64_to_cpu(hwcqe->qp_handle)); le64_to_cpu(hwcqe->qp_handle));
if (!qp) { if (!qp) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: Process Req qp is NULL"); "FP: Process Req qp is NULL\n");
return -EINVAL; return -EINVAL;
} }
sq = &qp->sq; sq = &qp->sq;
@@ -2221,16 +2220,14 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq); cqe_sq_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
if (cqe_sq_cons > sq->hwq.max_elements) { if (cqe_sq_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process req reported "); "FP: CQ Process req reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
cqe_sq_cons, sq->hwq.max_elements); cqe_sq_cons, sq->hwq.max_elements);
return -EINVAL; return -EINVAL;
} }
if (qp->sq.flushed) { if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); "%s: QP in Flush QP = %p\n", __func__, qp);
goto done; goto done;
} }
/* Require to walk the sq's swq to fabricate CQEs for all previously /* Require to walk the sq's swq to fabricate CQEs for all previously
@@ -2262,9 +2259,7 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
hwcqe->status != CQ_REQ_STATUS_OK) { hwcqe->status != CQ_REQ_STATUS_OK) {
cqe->status = hwcqe->status; cqe->status = hwcqe->status;
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Processed Req "); "FP: CQ Processed Req wr_id[%d] = 0x%llx with status 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
sw_sq_cons, cqe->wr_id, cqe->status); sw_sq_cons, cqe->wr_id, cqe->status);
cqe++; cqe++;
(*budget)--; (*budget)--;
@@ -2330,12 +2325,12 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long) qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle)); le64_to_cpu(hwcqe->qp_handle));
if (!qp) { if (!qp) {
dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL"); dev_err(&cq->hwq.pdev->dev, "process_cq RC qp is NULL\n");
return -EINVAL; return -EINVAL;
} }
if (qp->rq.flushed) { if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); "%s: QP in Flush QP = %p\n", __func__, qp);
goto done; goto done;
} }
@@ -2356,9 +2351,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
return -EINVAL; return -EINVAL;
if (wr_id_idx >= srq->hwq.max_elements) { if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process RC "); "FP: CQ Process RC wr_id idx 0x%x exceeded SRQ max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
wr_id_idx, srq->hwq.max_elements); wr_id_idx, srq->hwq.max_elements);
return -EINVAL; return -EINVAL;
} }
@@ -2371,9 +2364,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
rq = &qp->rq; rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) { if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process RC "); "FP: CQ Process RC wr_id idx 0x%x exceeded RQ max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
wr_id_idx, rq->hwq.max_elements); wr_id_idx, rq->hwq.max_elements);
return -EINVAL; return -EINVAL;
} }
@@ -2409,12 +2400,12 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long) qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle)); le64_to_cpu(hwcqe->qp_handle));
if (!qp) { if (!qp) {
dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL"); dev_err(&cq->hwq.pdev->dev, "process_cq UD qp is NULL\n");
return -EINVAL; return -EINVAL;
} }
if (qp->rq.flushed) { if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); "%s: QP in Flush QP = %p\n", __func__, qp);
goto done; goto done;
} }
cqe = *pcqe; cqe = *pcqe;
@@ -2439,9 +2430,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
if (wr_id_idx >= srq->hwq.max_elements) { if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process UD "); "FP: CQ Process UD wr_id idx 0x%x exceeded SRQ max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
wr_id_idx, srq->hwq.max_elements); wr_id_idx, srq->hwq.max_elements);
return -EINVAL; return -EINVAL;
} }
@@ -2454,9 +2443,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
rq = &qp->rq; rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) { if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process UD "); "FP: CQ Process UD wr_id idx 0x%x exceeded RQ max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
wr_id_idx, rq->hwq.max_elements); wr_id_idx, rq->hwq.max_elements);
return -EINVAL; return -EINVAL;
} }
@@ -2508,13 +2495,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
qp = (struct bnxt_qplib_qp *)((unsigned long) qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle)); le64_to_cpu(hwcqe->qp_handle));
if (!qp) { if (!qp) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev, "process_cq Raw/QP1 qp is NULL\n");
"QPLIB: process_cq Raw/QP1 qp is NULL");
return -EINVAL; return -EINVAL;
} }
if (qp->rq.flushed) { if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); "%s: QP in Flush QP = %p\n", __func__, qp);
goto done; goto done;
} }
cqe = *pcqe; cqe = *pcqe;
@@ -2543,14 +2529,12 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
srq = qp->srq; srq = qp->srq;
if (!srq) { if (!srq) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: SRQ used but not defined??"); "FP: SRQ used but not defined??\n");
return -EINVAL; return -EINVAL;
} }
if (wr_id_idx >= srq->hwq.max_elements) { if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Raw/QP1 "); "FP: CQ Process Raw/QP1 wr_id idx 0x%x exceeded SRQ max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: wr_id idx 0x%x exceeded SRQ max 0x%x",
wr_id_idx, srq->hwq.max_elements); wr_id_idx, srq->hwq.max_elements);
return -EINVAL; return -EINVAL;
} }
@@ -2563,9 +2547,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
rq = &qp->rq; rq = &qp->rq;
if (wr_id_idx >= rq->hwq.max_elements) { if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Raw/QP1 RQ wr_id "); "FP: CQ Process Raw/QP1 RQ wr_id idx 0x%x exceeded RQ max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: ix 0x%x exceeded RQ max 0x%x",
wr_id_idx, rq->hwq.max_elements); wr_id_idx, rq->hwq.max_elements);
return -EINVAL; return -EINVAL;
} }
@@ -2600,14 +2582,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
/* Check the Status */ /* Check the Status */
if (hwcqe->status != CQ_TERMINAL_STATUS_OK) if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
dev_warn(&cq->hwq.pdev->dev, dev_warn(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Terminal Error status = 0x%x", "FP: CQ Process Terminal Error status = 0x%x\n",
hwcqe->status); hwcqe->status);
qp = (struct bnxt_qplib_qp *)((unsigned long) qp = (struct bnxt_qplib_qp *)((unsigned long)
le64_to_cpu(hwcqe->qp_handle)); le64_to_cpu(hwcqe->qp_handle));
if (!qp) { if (!qp) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process terminal qp is NULL"); "FP: CQ Process terminal qp is NULL\n");
return -EINVAL; return -EINVAL;
} }
@@ -2623,16 +2605,14 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
if (cqe_cons > sq->hwq.max_elements) { if (cqe_cons > sq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process terminal reported "); "FP: CQ Process terminal reported sq_cons_idx 0x%x which exceeded max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
cqe_cons, sq->hwq.max_elements); cqe_cons, sq->hwq.max_elements);
goto do_rq; goto do_rq;
} }
if (qp->sq.flushed) { if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); "%s: QP in Flush QP = %p\n", __func__, qp);
goto sq_done; goto sq_done;
} }
@@ -2673,16 +2653,14 @@ do_rq:
goto done; goto done;
} else if (cqe_cons > rq->hwq.max_elements) { } else if (cqe_cons > rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Processed terminal "); "FP: CQ Processed terminal reported rq_cons_idx 0x%x exceeds max 0x%x\n",
dev_err(&cq->hwq.pdev->dev,
"QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
cqe_cons, rq->hwq.max_elements); cqe_cons, rq->hwq.max_elements);
goto done; goto done;
} }
if (qp->rq.flushed) { if (qp->rq.flushed) {
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev,
"%s: QPLIB: QP in Flush QP = %p\n", __func__, qp); "%s: QP in Flush QP = %p\n", __func__, qp);
rc = 0; rc = 0;
goto done; goto done;
} }
@@ -2704,7 +2682,7 @@ static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
/* Check the Status */ /* Check the Status */
if (hwcqe->status != CQ_CUTOFF_STATUS_OK) { if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Cutoff Error status = 0x%x", "FP: CQ Process Cutoff Error status = 0x%x\n",
hwcqe->status); hwcqe->status);
return -EINVAL; return -EINVAL;
} }
@@ -2724,16 +2702,12 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
spin_lock_irqsave(&cq->flush_lock, flags); spin_lock_irqsave(&cq->flush_lock, flags);
list_for_each_entry(qp, &cq->sqf_head, sq_flush) { list_for_each_entry(qp, &cq->sqf_head, sq_flush) {
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing SQ QP= %p\n", qp);
"QPLIB: FP: Flushing SQ QP= %p",
qp);
__flush_sq(&qp->sq, qp, &cqe, &budget); __flush_sq(&qp->sq, qp, &cqe, &budget);
} }
list_for_each_entry(qp, &cq->rqf_head, rq_flush) { list_for_each_entry(qp, &cq->rqf_head, rq_flush) {
dev_dbg(&cq->hwq.pdev->dev, dev_dbg(&cq->hwq.pdev->dev, "FP: Flushing RQ QP= %p\n", qp);
"QPLIB: FP: Flushing RQ QP= %p",
qp);
__flush_rq(&qp->rq, qp, &cqe, &budget); __flush_rq(&qp->rq, qp, &cqe, &budget);
} }
spin_unlock_irqrestore(&cq->flush_lock, flags); spin_unlock_irqrestore(&cq->flush_lock, flags);
@@ -2801,7 +2775,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
goto exit; goto exit;
default: default:
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: process_cq unknown type 0x%lx", "process_cq unknown type 0x%lx\n",
hw_cqe->cqe_type_toggle & hw_cqe->cqe_type_toggle &
CQ_BASE_CQE_TYPE_MASK); CQ_BASE_CQE_TYPE_MASK);
rc = -EINVAL; rc = -EINVAL;
@@ -2814,7 +2788,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
* next one * next one
*/ */
dev_err(&cq->hwq.pdev->dev, dev_err(&cq->hwq.pdev->dev,
"QPLIB: process_cqe error rc = 0x%x", rc); "process_cqe error rc = 0x%x\n", rc);
} }
raw_cons++; raw_cons++;
} }

View File

@@ -35,6 +35,9 @@
* *
* Description: RDMA Controller HW interface * Description: RDMA Controller HW interface
*/ */
#define dev_fmt(fmt) "QPLIB: " fmt
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/pci.h> #include <linux/pci.h>
@@ -96,14 +99,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW && opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW &&
opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) { opcode != CMDQ_BASE_OPCODE_QUERY_VERSION)) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: RCFW not initialized, reject opcode 0x%x", "RCFW not initialized, reject opcode 0x%x\n", opcode);
opcode);
return -EINVAL; return -EINVAL;
} }
if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) && if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) { opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!"); dev_err(&rcfw->pdev->dev, "RCFW already initialized!\n");
return -EINVAL; return -EINVAL;
} }
@@ -115,7 +117,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
*/ */
spin_lock_irqsave(&cmdq->lock, flags); spin_lock_irqsave(&cmdq->lock, flags);
if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) { if (req->cmd_size >= HWQ_FREE_SLOTS(cmdq)) {
dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!"); dev_err(&rcfw->pdev->dev, "RCFW: CMDQ is full!\n");
spin_unlock_irqrestore(&cmdq->lock, flags); spin_unlock_irqrestore(&cmdq->lock, flags);
return -EAGAIN; return -EAGAIN;
} }
@@ -154,7 +156,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
if (!cmdqe) { if (!cmdqe) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: RCFW request failed with no cmdqe!"); "RCFW request failed with no cmdqe!\n");
goto done; goto done;
} }
/* Copy a segment of the req cmd to the cmdq */ /* Copy a segment of the req cmd to the cmdq */
@@ -210,7 +212,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) { if (!retry_cnt || (rc != -EAGAIN && rc != -EBUSY)) {
/* send failed */ /* send failed */
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x send failed", dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x send failed\n",
cookie, opcode); cookie, opcode);
return rc; return rc;
} }
@@ -224,7 +226,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
rc = __wait_for_resp(rcfw, cookie); rc = __wait_for_resp(rcfw, cookie);
if (rc) { if (rc) {
/* timed out */ /* timed out */
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec", dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x timedout (%d)msec\n",
cookie, opcode, RCFW_CMD_WAIT_TIME_MS); cookie, opcode, RCFW_CMD_WAIT_TIME_MS);
set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags); set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags);
return rc; return rc;
@@ -232,7 +234,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
if (evnt->status) { if (evnt->status) {
/* failed with status */ /* failed with status */
dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x status %#x", dev_err(&rcfw->pdev->dev, "cmdq[%#x]=%#x status %#x\n",
cookie, opcode, evnt->status); cookie, opcode, evnt->status);
rc = -EFAULT; rc = -EFAULT;
} }
@@ -298,9 +300,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
qp_id = le32_to_cpu(err_event->xid); qp_id = le32_to_cpu(err_event->xid);
qp = rcfw->qp_tbl[qp_id].qp_handle; qp = rcfw->qp_tbl[qp_id].qp_handle;
dev_dbg(&rcfw->pdev->dev, dev_dbg(&rcfw->pdev->dev,
"QPLIB: Received QP error notification"); "Received QP error notification\n");
dev_dbg(&rcfw->pdev->dev, dev_dbg(&rcfw->pdev->dev,
"QPLIB: qpid 0x%x, req_err=0x%x, resp_err=0x%x\n", "qpid 0x%x, req_err=0x%x, resp_err=0x%x\n",
qp_id, err_event->req_err_state_reason, qp_id, err_event->req_err_state_reason,
err_event->res_err_state_reason); err_event->res_err_state_reason);
if (!qp) if (!qp)
@@ -309,8 +311,17 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
rcfw->aeq_handler(rcfw, qp_event, qp); rcfw->aeq_handler(rcfw, qp_event, qp);
break; break;
default: default:
/* Command Response */ /*
spin_lock_irqsave(&cmdq->lock, flags); * Command Response
* cmdq->lock needs to be acquired to synchronie
* the command send and completion reaping. This function
* is always called with creq->lock held. Using
* the nested variant of spin_lock.
*
*/
spin_lock_irqsave_nested(&cmdq->lock, flags,
SINGLE_DEPTH_NESTING);
cookie = le16_to_cpu(qp_event->cookie); cookie = le16_to_cpu(qp_event->cookie);
mcookie = qp_event->cookie; mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING; blocked = cookie & RCFW_CMD_IS_BLOCKING;
@@ -322,14 +333,16 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
crsqe->resp = NULL; crsqe->resp = NULL;
} else { } else {
dev_err(&rcfw->pdev->dev, if (crsqe->resp && crsqe->resp->cookie)
"QPLIB: CMD %s resp->cookie = %#x, evnt->cookie = %#x", dev_err(&rcfw->pdev->dev,
crsqe->resp ? "mismatch" : "collision", "CMD %s cookie sent=%#x, recd=%#x\n",
crsqe->resp ? crsqe->resp->cookie : 0, mcookie); crsqe->resp ? "mismatch" : "collision",
crsqe->resp ? crsqe->resp->cookie : 0,
mcookie);
} }
if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap)) if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
dev_warn(&rcfw->pdev->dev, dev_warn(&rcfw->pdev->dev,
"QPLIB: CMD bit %d was not requested", cbit); "CMD bit %d was not requested\n", cbit);
cmdq->cons += crsqe->req_size; cmdq->cons += crsqe->req_size;
crsqe->req_size = 0; crsqe->req_size = 0;
@@ -376,14 +389,14 @@ static void bnxt_qplib_service_creq(unsigned long data)
(rcfw, (struct creq_func_event *)creqe)) (rcfw, (struct creq_func_event *)creqe))
rcfw->creq_func_event_processed++; rcfw->creq_func_event_processed++;
else else
dev_warn dev_warn(&rcfw->pdev->dev,
(&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled", "aeqe:%#x Not handled\n", type);
type);
break; break;
default: default:
dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with "); if (type != ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT)
dev_warn(&rcfw->pdev->dev, dev_warn(&rcfw->pdev->dev,
"QPLIB: op_event = 0x%x not handled", type); "creqe with event 0x%x not handled\n",
type);
break; break;
} }
raw_cons++; raw_cons++;
@@ -551,7 +564,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE, BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_L2_CMPL)) { HWQ_TYPE_L2_CMPL)) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: HW channel CREQ allocation failed"); "HW channel CREQ allocation failed\n");
goto fail; goto fail;
} }
rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT; rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
@@ -560,7 +573,7 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE, BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
HWQ_TYPE_CTX)) { HWQ_TYPE_CTX)) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: HW channel CMDQ allocation failed"); "HW channel CMDQ allocation failed\n");
goto fail; goto fail;
} }
@@ -605,21 +618,18 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
bnxt_qplib_rcfw_stop_irq(rcfw, true); bnxt_qplib_rcfw_stop_irq(rcfw, true);
if (rcfw->cmdq_bar_reg_iomem) iounmap(rcfw->cmdq_bar_reg_iomem);
iounmap(rcfw->cmdq_bar_reg_iomem); iounmap(rcfw->creq_bar_reg_iomem);
rcfw->cmdq_bar_reg_iomem = NULL;
if (rcfw->creq_bar_reg_iomem)
iounmap(rcfw->creq_bar_reg_iomem);
rcfw->creq_bar_reg_iomem = NULL;
indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size); indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
if (indx != rcfw->bmap_size) if (indx != rcfw->bmap_size)
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: disabling RCFW with pending cmd-bit %lx", indx); "disabling RCFW with pending cmd-bit %lx\n", indx);
kfree(rcfw->cmdq_bitmap); kfree(rcfw->cmdq_bitmap);
rcfw->bmap_size = 0; rcfw->bmap_size = 0;
rcfw->cmdq_bar_reg_iomem = NULL;
rcfw->creq_bar_reg_iomem = NULL;
rcfw->aeq_handler = NULL; rcfw->aeq_handler = NULL;
rcfw->vector = 0; rcfw->vector = 0;
} }
@@ -681,8 +691,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
RCFW_COMM_BASE_OFFSET, RCFW_COMM_BASE_OFFSET,
RCFW_COMM_SIZE); RCFW_COMM_SIZE);
if (!rcfw->cmdq_bar_reg_iomem) { if (!rcfw->cmdq_bar_reg_iomem) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev, "CMDQ BAR region %d mapping failed\n",
"QPLIB: CMDQ BAR region %d mapping failed",
rcfw->cmdq_bar_reg); rcfw->cmdq_bar_reg);
return -ENOMEM; return -ENOMEM;
} }
@@ -697,14 +706,15 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
res_base = pci_resource_start(pdev, rcfw->creq_bar_reg); res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
if (!res_base) if (!res_base)
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: CREQ BAR region %d resc start is 0!", "CREQ BAR region %d resc start is 0!\n",
rcfw->creq_bar_reg); rcfw->creq_bar_reg);
rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off, rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
4); 4);
if (!rcfw->creq_bar_reg_iomem) { if (!rcfw->creq_bar_reg_iomem) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev, "CREQ BAR region %d mapping failed\n",
"QPLIB: CREQ BAR region %d mapping failed",
rcfw->creq_bar_reg); rcfw->creq_bar_reg);
iounmap(rcfw->cmdq_bar_reg_iomem);
rcfw->cmdq_bar_reg_iomem = NULL;
return -ENOMEM; return -ENOMEM;
} }
rcfw->creq_qp_event_processed = 0; rcfw->creq_qp_event_processed = 0;
@@ -717,7 +727,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true); rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_vector, true);
if (rc) { if (rc) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc); "Failed to request IRQ for CREQ rc = 0x%x\n", rc);
bnxt_qplib_disable_rcfw_channel(rcfw); bnxt_qplib_disable_rcfw_channel(rcfw);
return rc; return rc;
} }

View File

@@ -154,6 +154,8 @@ struct bnxt_qplib_qp_node {
void *qp_handle; /* ptr to qplib_qp */ void *qp_handle; /* ptr to qplib_qp */
}; };
#define BNXT_QPLIB_OOS_COUNT_MASK 0xFFFFFFFF
/* RCFW Communication Channels */ /* RCFW Communication Channels */
struct bnxt_qplib_rcfw { struct bnxt_qplib_rcfw {
struct pci_dev *pdev; struct pci_dev *pdev;
@@ -190,6 +192,8 @@ struct bnxt_qplib_rcfw {
struct bnxt_qplib_crsq *crsqe_tbl; struct bnxt_qplib_crsq *crsqe_tbl;
int qp_tbl_size; int qp_tbl_size;
struct bnxt_qplib_qp_node *qp_tbl; struct bnxt_qplib_qp_node *qp_tbl;
u64 oos_prev;
u32 init_oos_stats;
}; };
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);

View File

@@ -36,6 +36,8 @@
* Description: QPLib resource manager * Description: QPLib resource manager
*/ */
#define dev_fmt(fmt) "QPLIB: " fmt
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
@@ -68,8 +70,7 @@ static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
pbl->pg_map_arr[i]); pbl->pg_map_arr[i]);
else else
dev_warn(&pdev->dev, dev_warn(&pdev->dev,
"QPLIB: PBL free pg_arr[%d] empty?!", "PBL free pg_arr[%d] empty?!\n", i);
i);
pbl->pg_arr[i] = NULL; pbl->pg_arr[i] = NULL;
} }
} }
@@ -537,7 +538,7 @@ static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
struct bnxt_qplib_pkey_tbl *pkey_tbl) struct bnxt_qplib_pkey_tbl *pkey_tbl)
{ {
if (!pkey_tbl->tbl) if (!pkey_tbl->tbl)
dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present"); dev_dbg(&res->pdev->dev, "PKEY tbl not present\n");
else else
kfree(pkey_tbl->tbl); kfree(pkey_tbl->tbl);
@@ -578,7 +579,7 @@ int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd *pd) struct bnxt_qplib_pd *pd)
{ {
if (test_and_set_bit(pd->id, pdt->tbl)) { if (test_and_set_bit(pd->id, pdt->tbl)) {
dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d", dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
pd->id); pd->id);
return -EINVAL; return -EINVAL;
} }
@@ -639,11 +640,11 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
struct bnxt_qplib_dpi *dpi) struct bnxt_qplib_dpi *dpi)
{ {
if (dpi->dpi >= dpit->max) { if (dpi->dpi >= dpit->max) {
dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi); dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d\n", dpi->dpi);
return -EINVAL; return -EINVAL;
} }
if (test_and_set_bit(dpi->dpi, dpit->tbl)) { if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d", dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d\n",
dpi->dpi); dpi->dpi);
return -EINVAL; return -EINVAL;
} }
@@ -673,22 +674,21 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
u32 dbr_len, bytes; u32 dbr_len, bytes;
if (dpit->dbr_bar_reg_iomem) { if (dpit->dbr_bar_reg_iomem) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev, "DBR BAR region %d already mapped\n",
"QPLIB: DBR BAR region %d already mapped", dbr_bar_reg); dbr_bar_reg);
return -EALREADY; return -EALREADY;
} }
bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg); bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
if (!bar_reg_base) { if (!bar_reg_base) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev, "BAR region %d resc start failed\n",
"QPLIB: BAR region %d resc start failed", dbr_bar_reg); dbr_bar_reg);
return -ENOMEM; return -ENOMEM;
} }
dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset; dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) { if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d", dev_err(&res->pdev->dev, "Invalid DBR length %d\n", dbr_len);
dbr_len);
return -ENOMEM; return -ENOMEM;
} }
@@ -696,8 +696,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
dbr_len); dbr_len);
if (!dpit->dbr_bar_reg_iomem) { if (!dpit->dbr_bar_reg_iomem) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: FP: DBR BAR region %d mapping failed", "FP: DBR BAR region %d mapping failed\n", dbr_bar_reg);
dbr_bar_reg);
return -ENOMEM; return -ENOMEM;
} }
@@ -767,7 +766,7 @@ static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
&stats->dma_map, GFP_KERNEL); &stats->dma_map, GFP_KERNEL);
if (!stats->dma) { if (!stats->dma) {
dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed"); dev_err(&pdev->dev, "Stats DMA allocation failed\n");
return -ENOMEM; return -ENOMEM;
} }
return 0; return 0;

View File

@@ -36,6 +36,8 @@
* Description: Slow Path Operators * Description: Slow Path Operators
*/ */
#define dev_fmt(fmt) "QPLIB: " fmt
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/sched.h> #include <linux/sched.h>
@@ -89,7 +91,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) { if (!sbuf) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: SP: QUERY_FUNC alloc side buffer failed"); "SP: QUERY_FUNC alloc side buffer failed\n");
return -ENOMEM; return -ENOMEM;
} }
@@ -135,8 +137,16 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->max_srq = le16_to_cpu(sb->max_srq); attr->max_srq = le16_to_cpu(sb->max_srq);
attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1; attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
attr->max_srq_sges = sb->max_srq_sge; attr->max_srq_sges = sb->max_srq_sge;
/* Bono only reports 1 PKEY for now, but it can support > 1 */
attr->max_pkey = le32_to_cpu(sb->max_pkeys); attr->max_pkey = le32_to_cpu(sb->max_pkeys);
/*
* Some versions of FW reports more than 0xFFFF.
* Restrict it for now to 0xFFFF to avoid
* reporting trucated value
*/
if (attr->max_pkey > 0xFFFF) {
/* ib_port_attr::pkey_tbl_len is u16 */
attr->max_pkey = 0xFFFF;
}
attr->max_inline_data = le32_to_cpu(sb->max_inline_data); attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
attr->l2_db_size = (sb->l2_db_space_size + 1) * attr->l2_db_size = (sb->l2_db_space_size + 1) *
@@ -186,8 +196,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
(void *)&resp, (void *)&resp,
NULL, 0); NULL, 0);
if (rc) { if (rc) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev, "Failed to set function resources\n");
"QPLIB: Failed to set function resources");
} }
return rc; return rc;
} }
@@ -199,7 +208,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
{ {
if (index >= sgid_tbl->max) { if (index >= sgid_tbl->max) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: Index %d exceeded SGID table max (%d)", "Index %d exceeded SGID table max (%d)\n",
index, sgid_tbl->max); index, sgid_tbl->max);
return -EINVAL; return -EINVAL;
} }
@@ -217,13 +226,12 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int index; int index;
if (!sgid_tbl) { if (!sgid_tbl) {
dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated"); dev_err(&res->pdev->dev, "SGID table not allocated\n");
return -EINVAL; return -EINVAL;
} }
/* Do we need a sgid_lock here? */ /* Do we need a sgid_lock here? */
if (!sgid_tbl->active) { if (!sgid_tbl->active) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev, "SGID table has no active entries\n");
"QPLIB: SGID table has no active entries");
return -ENOMEM; return -ENOMEM;
} }
for (index = 0; index < sgid_tbl->max; index++) { for (index = 0; index < sgid_tbl->max; index++) {
@@ -231,7 +239,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
break; break;
} }
if (index == sgid_tbl->max) { if (index == sgid_tbl->max) {
dev_warn(&res->pdev->dev, "GID not found in the SGID table"); dev_warn(&res->pdev->dev, "GID not found in the SGID table\n");
return 0; return 0;
} }
/* Remove GID from the SGID table */ /* Remove GID from the SGID table */
@@ -244,7 +252,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
RCFW_CMD_PREP(req, DELETE_GID, cmd_flags); RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
if (sgid_tbl->hw_id[index] == 0xFFFF) { if (sgid_tbl->hw_id[index] == 0xFFFF) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: GID entry contains an invalid HW id"); "GID entry contains an invalid HW id\n");
return -EINVAL; return -EINVAL;
} }
req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]); req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
@@ -258,7 +266,7 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
sgid_tbl->vlan[index] = 0; sgid_tbl->vlan[index] = 0;
sgid_tbl->active--; sgid_tbl->active--;
dev_dbg(&res->pdev->dev, dev_dbg(&res->pdev->dev,
"QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x", "SGID deleted hw_id[0x%x] = 0x%x active = 0x%x\n",
index, sgid_tbl->hw_id[index], sgid_tbl->active); index, sgid_tbl->hw_id[index], sgid_tbl->active);
sgid_tbl->hw_id[index] = (u16)-1; sgid_tbl->hw_id[index] = (u16)-1;
@@ -277,20 +285,19 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
int i, free_idx; int i, free_idx;
if (!sgid_tbl) { if (!sgid_tbl) {
dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated"); dev_err(&res->pdev->dev, "SGID table not allocated\n");
return -EINVAL; return -EINVAL;
} }
/* Do we need a sgid_lock here? */ /* Do we need a sgid_lock here? */
if (sgid_tbl->active == sgid_tbl->max) { if (sgid_tbl->active == sgid_tbl->max) {
dev_err(&res->pdev->dev, "QPLIB: SGID table is full"); dev_err(&res->pdev->dev, "SGID table is full\n");
return -ENOMEM; return -ENOMEM;
} }
free_idx = sgid_tbl->max; free_idx = sgid_tbl->max;
for (i = 0; i < sgid_tbl->max; i++) { for (i = 0; i < sgid_tbl->max; i++) {
if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) { if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
dev_dbg(&res->pdev->dev, dev_dbg(&res->pdev->dev,
"QPLIB: SGID entry already exist in entry %d!", "SGID entry already exist in entry %d!\n", i);
i);
*index = i; *index = i;
return -EALREADY; return -EALREADY;
} else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero, } else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
@@ -301,7 +308,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
} }
if (free_idx == sgid_tbl->max) { if (free_idx == sgid_tbl->max) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: SGID table is FULL but count is not MAX??"); "SGID table is FULL but count is not MAX??\n");
return -ENOMEM; return -ENOMEM;
} }
if (update) { if (update) {
@@ -348,7 +355,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
sgid_tbl->vlan[free_idx] = 1; sgid_tbl->vlan[free_idx] = 1;
dev_dbg(&res->pdev->dev, dev_dbg(&res->pdev->dev,
"QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x", "SGID added hw_id[0x%x] = 0x%x active = 0x%x\n",
free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active); free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
*index = free_idx; *index = free_idx;
@@ -404,7 +411,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
} }
if (index >= pkey_tbl->max) { if (index >= pkey_tbl->max) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: Index %d exceeded PKEY table max (%d)", "Index %d exceeded PKEY table max (%d)\n",
index, pkey_tbl->max); index, pkey_tbl->max);
return -EINVAL; return -EINVAL;
} }
@@ -419,14 +426,13 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
int i, rc = 0; int i, rc = 0;
if (!pkey_tbl) { if (!pkey_tbl) {
dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated"); dev_err(&res->pdev->dev, "PKEY table not allocated\n");
return -EINVAL; return -EINVAL;
} }
/* Do we need a pkey_lock here? */ /* Do we need a pkey_lock here? */
if (!pkey_tbl->active) { if (!pkey_tbl->active) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev, "PKEY table has no active entries\n");
"QPLIB: PKEY table has no active entries");
return -ENOMEM; return -ENOMEM;
} }
for (i = 0; i < pkey_tbl->max; i++) { for (i = 0; i < pkey_tbl->max; i++) {
@@ -435,8 +441,7 @@ int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
} }
if (i == pkey_tbl->max) { if (i == pkey_tbl->max) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: PKEY 0x%04x not found in the pkey table", "PKEY 0x%04x not found in the pkey table\n", *pkey);
*pkey);
return -ENOMEM; return -ENOMEM;
} }
memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey)); memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
@@ -453,13 +458,13 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
int i, free_idx, rc = 0; int i, free_idx, rc = 0;
if (!pkey_tbl) { if (!pkey_tbl) {
dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated"); dev_err(&res->pdev->dev, "PKEY table not allocated\n");
return -EINVAL; return -EINVAL;
} }
/* Do we need a pkey_lock here? */ /* Do we need a pkey_lock here? */
if (pkey_tbl->active == pkey_tbl->max) { if (pkey_tbl->active == pkey_tbl->max) {
dev_err(&res->pdev->dev, "QPLIB: PKEY table is full"); dev_err(&res->pdev->dev, "PKEY table is full\n");
return -ENOMEM; return -ENOMEM;
} }
free_idx = pkey_tbl->max; free_idx = pkey_tbl->max;
@@ -471,7 +476,7 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
} }
if (free_idx == pkey_tbl->max) { if (free_idx == pkey_tbl->max) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"QPLIB: PKEY table is FULL but count is not MAX??"); "PKEY table is FULL but count is not MAX??\n");
return -ENOMEM; return -ENOMEM;
} }
/* Add PKEY to the pkey_tbl */ /* Add PKEY to the pkey_tbl */
@@ -555,8 +560,7 @@ int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
int rc; int rc;
if (mrw->lkey == 0xFFFFFFFF) { if (mrw->lkey == 0xFFFFFFFF) {
dev_info(&res->pdev->dev, dev_info(&res->pdev->dev, "SP: Free a reserved lkey MRW\n");
"QPLIB: SP: Free a reserved lkey MRW");
return 0; return 0;
} }
@@ -666,9 +670,8 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
pages++; pages++;
if (pages > MAX_PBL_LVL_1_PGS) { if (pages > MAX_PBL_LVL_1_PGS) {
dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"requested (0x%x) exceeded max (0x%x)", "SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
pages, MAX_PBL_LVL_1_PGS); pages, MAX_PBL_LVL_1_PGS);
return -ENOMEM; return -ENOMEM;
} }
@@ -684,7 +687,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
HWQ_TYPE_CTX); HWQ_TYPE_CTX);
if (rc) { if (rc) {
dev_err(&res->pdev->dev, dev_err(&res->pdev->dev,
"SP: Reg MR memory allocation failed"); "SP: Reg MR memory allocation failed\n");
return -ENOMEM; return -ENOMEM;
} }
/* Write to the hwq */ /* Write to the hwq */
@@ -795,7 +798,7 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) { if (!sbuf) {
dev_err(&rcfw->pdev->dev, dev_err(&rcfw->pdev->dev,
"QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed"); "SP: QUERY_ROCE_STATS alloc side buffer failed\n");
return -ENOMEM; return -ENOMEM;
} }
@@ -845,6 +848,16 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err); stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err); stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err); stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
if (!rcfw->init_oos_stats) {
rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
rcfw->init_oos_stats = 1;
} else {
stats->res_oos_drop_count +=
(le64_to_cpu(sb->res_oos_drop_count) -
rcfw->oos_prev) & BNXT_QPLIB_OOS_COUNT_MASK;
rcfw->oos_prev = le64_to_cpu(sb->res_oos_drop_count);
}
bail: bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc; return rc;

View File

@@ -205,6 +205,16 @@ struct bnxt_qplib_roce_stats {
/* res_tx_pci_err is 64 b */ /* res_tx_pci_err is 64 b */
u64 res_rx_pci_err; u64 res_rx_pci_err;
/* res_rx_pci_err is 64 b */ /* res_rx_pci_err is 64 b */
u64 res_oos_drop_count;
/* res_oos_drop_count */
u64 active_qp_count_p0;
/* port 0 active qps */
u64 active_qp_count_p1;
/* port 1 active qps */
u64 active_qp_count_p2;
/* port 2 active qps */
u64 active_qp_count_p3;
/* port 3 active qps */
}; };
int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res, int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,

View File

@@ -2929,6 +2929,11 @@ struct creq_query_roce_stats_resp_sb {
__le64 res_srq_load_err; __le64 res_srq_load_err;
__le64 res_tx_pci_err; __le64 res_tx_pci_err;
__le64 res_rx_pci_err; __le64 res_rx_pci_err;
__le64 res_oos_drop_count;
__le64 active_qp_count_p0;
__le64 active_qp_count_p1;
__le64 active_qp_count_p2;
__le64 active_qp_count_p3;
}; };
/* QP error notification event (16 bytes) */ /* QP error notification event (16 bytes) */

View File

@@ -1127,17 +1127,18 @@ static int iwch_query_port(struct ib_device *ibdev,
return 0; return 0;
} }
static ssize_t show_rev(struct device *dev, struct device_attribute *attr, static ssize_t hw_rev_show(struct device *dev,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev); ibdev.dev);
pr_debug("%s dev 0x%p\n", __func__, dev); pr_debug("%s dev 0x%p\n", __func__, dev);
return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type); return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
} }
static DEVICE_ATTR_RO(hw_rev);
static ssize_t show_hca(struct device *dev, struct device_attribute *attr, static ssize_t hca_type_show(struct device *dev,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev); ibdev.dev);
@@ -1148,9 +1149,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
lldev->ethtool_ops->get_drvinfo(lldev, &info); lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver); return sprintf(buf, "%s\n", info.driver);
} }
static DEVICE_ATTR_RO(hca_type);
static ssize_t show_board(struct device *dev, struct device_attribute *attr, static ssize_t board_id_show(struct device *dev,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev, struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
ibdev.dev); ibdev.dev);
@@ -1158,6 +1160,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor, return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
iwch_dev->rdev.rnic_info.pdev->device); iwch_dev->rdev.rnic_info.pdev->device);
} }
static DEVICE_ATTR_RO(board_id);
enum counters { enum counters {
IPINRECEIVES, IPINRECEIVES,
@@ -1274,14 +1277,15 @@ static int iwch_get_mib(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return stats->num_counters; return stats->num_counters;
} }
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static struct attribute *iwch_class_attributes[] = {
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); &dev_attr_hw_rev.attr,
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); &dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
NULL
};
static struct device_attribute *iwch_class_attributes[] = { static const struct attribute_group iwch_attr_group = {
&dev_attr_hw_rev, .attrs = iwch_class_attributes,
&dev_attr_hca_type,
&dev_attr_board_id,
}; };
static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num, static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -1316,10 +1320,8 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
int iwch_register_device(struct iwch_dev *dev) int iwch_register_device(struct iwch_dev *dev)
{ {
int ret; int ret;
int i;
pr_debug("%s iwch_dev %p\n", __func__, dev); pr_debug("%s iwch_dev %p\n", __func__, dev);
strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE; dev->ibdev.owner = THIS_MODULE;
@@ -1402,33 +1404,16 @@ int iwch_register_device(struct iwch_dev *dev)
sizeof(dev->ibdev.iwcm->ifname)); sizeof(dev->ibdev.iwcm->ifname));
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
ret = ib_register_device(&dev->ibdev, NULL); rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
if (ret) if (ret)
goto bail1; kfree(dev->ibdev.iwcm);
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
ret = device_create_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
if (ret) {
goto bail2;
}
}
return 0;
bail2:
ib_unregister_device(&dev->ibdev);
bail1:
kfree(dev->ibdev.iwcm);
return ret; return ret;
} }
void iwch_unregister_device(struct iwch_dev *dev) void iwch_unregister_device(struct iwch_dev *dev)
{ {
int i;
pr_debug("%s iwch_dev %p\n", __func__, dev); pr_debug("%s iwch_dev %p\n", __func__, dev);
for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
iwch_class_attributes[i]);
ib_unregister_device(&dev->ibdev); ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm); kfree(dev->ibdev.iwcm);
return; return;

View File

@@ -403,8 +403,7 @@ void _c4iw_free_ep(struct kref *kref)
ep->com.local_addr.ss_family); ep->com.local_addr.ss_family);
dst_release(ep->dst); dst_release(ep->dst);
cxgb4_l2t_release(ep->l2t); cxgb4_l2t_release(ep->l2t);
if (ep->mpa_skb) kfree_skb(ep->mpa_skb);
kfree_skb(ep->mpa_skb);
} }
if (!skb_queue_empty(&ep->com.ep_skb_list)) if (!skb_queue_empty(&ep->com.ep_skb_list))
skb_queue_purge(&ep->com.ep_skb_list); skb_queue_purge(&ep->com.ep_skb_list);

View File

@@ -161,7 +161,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
cq->gts = rdev->lldi.gts_reg; cq->gts = rdev->lldi.gts_reg;
cq->rdev = rdev; cq->rdev = rdev;
cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS, cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, CXGB4_BAR2_QTYPE_INGRESS,
&cq->bar2_qid, &cq->bar2_qid,
user ? &cq->bar2_pa : NULL); user ? &cq->bar2_pa : NULL);
if (user && !cq->bar2_pa) { if (user && !cq->bar2_pa) {

View File

@@ -373,8 +373,8 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
return 0; return 0;
} }
static ssize_t show_rev(struct device *dev, struct device_attribute *attr, static ssize_t hw_rev_show(struct device *dev,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev); ibdev.dev);
@@ -382,9 +382,10 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n", return sprintf(buf, "%d\n",
CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
} }
static DEVICE_ATTR_RO(hw_rev);
static ssize_t show_hca(struct device *dev, struct device_attribute *attr, static ssize_t hca_type_show(struct device *dev,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev); ibdev.dev);
@@ -395,9 +396,10 @@ static ssize_t show_hca(struct device *dev, struct device_attribute *attr,
lldev->ethtool_ops->get_drvinfo(lldev, &info); lldev->ethtool_ops->get_drvinfo(lldev, &info);
return sprintf(buf, "%s\n", info.driver); return sprintf(buf, "%s\n", info.driver);
} }
static DEVICE_ATTR_RO(hca_type);
static ssize_t show_board(struct device *dev, struct device_attribute *attr, static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
ibdev.dev); ibdev.dev);
@@ -405,6 +407,7 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor, return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
c4iw_dev->rdev.lldi.pdev->device); c4iw_dev->rdev.lldi.pdev->device);
} }
static DEVICE_ATTR_RO(board_id);
enum counters { enum counters {
IP4INSEGS, IP4INSEGS,
@@ -461,14 +464,15 @@ static int c4iw_get_mib(struct ib_device *ibdev,
return stats->num_counters; return stats->num_counters;
} }
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static struct attribute *c4iw_class_attributes[] = {
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); &dev_attr_hw_rev.attr,
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); &dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
NULL
};
static struct device_attribute *c4iw_class_attributes[] = { static const struct attribute_group c4iw_attr_group = {
&dev_attr_hw_rev, .attrs = c4iw_class_attributes,
&dev_attr_hca_type,
&dev_attr_board_id,
}; };
static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num, static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -530,12 +534,10 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
void c4iw_register_device(struct work_struct *work) void c4iw_register_device(struct work_struct *work)
{ {
int ret; int ret;
int i;
struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work); struct uld_ctx *ctx = container_of(work, struct uld_ctx, reg_work);
struct c4iw_dev *dev = ctx->dev; struct c4iw_dev *dev = ctx->dev;
pr_debug("c4iw_dev %p\n", dev); pr_debug("c4iw_dev %p\n", dev);
strlcpy(dev->ibdev.name, "cxgb4_%d", IB_DEVICE_NAME_MAX);
memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); memcpy(&dev->ibdev.node_guid, dev->rdev.lldi.ports[0]->dev_addr, 6);
dev->ibdev.owner = THIS_MODULE; dev->ibdev.owner = THIS_MODULE;
@@ -626,20 +628,13 @@ void c4iw_register_device(struct work_struct *work)
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname)); sizeof(dev->ibdev.iwcm->ifname));
rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
ret = ib_register_device(&dev->ibdev, NULL); ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
if (ret) if (ret)
goto err_kfree_iwcm; goto err_kfree_iwcm;
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i) {
ret = device_create_file(&dev->ibdev.dev,
c4iw_class_attributes[i]);
if (ret)
goto err_unregister_device;
}
return; return;
err_unregister_device:
ib_unregister_device(&dev->ibdev);
err_kfree_iwcm: err_kfree_iwcm:
kfree(dev->ibdev.iwcm); kfree(dev->ibdev.iwcm);
err_dealloc_ctx: err_dealloc_ctx:
@@ -651,12 +646,7 @@ err_dealloc_ctx:
void c4iw_unregister_device(struct c4iw_dev *dev) void c4iw_unregister_device(struct c4iw_dev *dev)
{ {
int i;
pr_debug("c4iw_dev %p\n", dev); pr_debug("c4iw_dev %p\n", dev);
for (i = 0; i < ARRAY_SIZE(c4iw_class_attributes); ++i)
device_remove_file(&dev->ibdev.dev,
c4iw_class_attributes[i]);
ib_unregister_device(&dev->ibdev); ib_unregister_device(&dev->ibdev);
kfree(dev->ibdev.iwcm); kfree(dev->ibdev.iwcm);
return; return;

View File

@@ -279,12 +279,13 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->db = rdev->lldi.db_reg; wq->db = rdev->lldi.db_reg;
wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid,
CXGB4_BAR2_QTYPE_EGRESS,
&wq->sq.bar2_qid, &wq->sq.bar2_qid,
user ? &wq->sq.bar2_pa : NULL); user ? &wq->sq.bar2_pa : NULL);
if (need_rq) if (need_rq)
wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
T4_BAR2_QTYPE_EGRESS, CXGB4_BAR2_QTYPE_EGRESS,
&wq->rq.bar2_qid, &wq->rq.bar2_qid,
user ? &wq->rq.bar2_pa : NULL); user ? &wq->rq.bar2_pa : NULL);
@@ -2572,7 +2573,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
memset(wq->queue, 0, wq->memsize); memset(wq->queue, 0, wq->memsize);
dma_unmap_addr_set(wq, mapping, wq->dma_addr); dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS, wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
&wq->bar2_qid, &wq->bar2_qid,
user ? &wq->bar2_pa : NULL); user ? &wq->bar2_pa : NULL);
@@ -2813,8 +2814,7 @@ err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp); srq->wr_waitp);
err_free_skb: err_free_skb:
if (srq->destroy_skb) kfree_skb(srq->destroy_skb);
kfree_skb(srq->destroy_skb);
err_free_srq_idx: err_free_srq_idx:
c4iw_free_srq_idx(&rhp->rdev, srq->idx); c4iw_free_srq_idx(&rhp->rdev, srq->idx);
err_free_wr_wait: err_free_wr_wait:

View File

@@ -8,12 +8,42 @@
# #
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ hfi1-y := \
eprom.o exp_rcv.o file_ops.o firmware.o \ affinity.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \ chip.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ device.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ driver.o \
verbs_txreq.o vnic_main.o vnic_sdma.o efivar.o \
eprom.o \
exp_rcv.o \
file_ops.o \
firmware.o \
init.o \
intr.o \
iowait.o \
mad.o \
mmu_rb.o \
msix.o \
pcie.o \
pio.o \
pio_copy.o \
platform.o \
qp.o \
qsfp.o \
rc.o \
ruc.o \
sdma.o \
sysfs.o \
trace.o \
uc.o \
ud.o \
user_exp_rcv.o \
user_pages.o \
user_sdma.o \
verbs.o \
verbs_txreq.o \
vnic_main.o \
vnic_sdma.o
ifdef CONFIG_DEBUG_FS ifdef CONFIG_DEBUG_FS
hfi1-y += debugfs.o hfi1-y += debugfs.o

View File

@@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
set = &entry->def_intr; set = &entry->def_intr;
cpumask_set_cpu(cpu, &set->mask); cpumask_set_cpu(cpu, &set->mask);
cpumask_set_cpu(cpu, &set->used); cpumask_set_cpu(cpu, &set->used);
for (i = 0; i < dd->num_msix_entries; i++) { for (i = 0; i < dd->msix_info.max_requested; i++) {
struct hfi1_msix_entry *other_msix; struct hfi1_msix_entry *other_msix;
other_msix = &dd->msix_entries[i]; other_msix = &dd->msix_info.msix_entries[i];
if (other_msix->type != IRQ_SDMA || other_msix == msix) if (other_msix->type != IRQ_SDMA || other_msix == msix)
continue; continue;

View File

@@ -67,8 +67,6 @@
#include "debugfs.h" #include "debugfs.h"
#include "fault.h" #include "fault.h"
#define NUM_IB_PORTS 1
uint kdeth_qp; uint kdeth_qp;
module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO); module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix"); MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
@@ -1100,9 +1098,9 @@ struct err_reg_info {
const char *desc; const char *desc;
}; };
#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START) #define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
#define NUM_DC_ERRS (IS_DC_END - IS_DC_START) #define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START) #define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
/* /*
* Helpers for building HFI and DC error interrupt table entries. Different * Helpers for building HFI and DC error interrupt table entries. Different
@@ -8181,7 +8179,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
/** /**
* is_rcv_urgent_int() - User receive context urgent IRQ handler * is_rcv_urgent_int() - User receive context urgent IRQ handler
* @dd: valid dd * @dd: valid dd
* @source: logical IRQ source (ofse from IS_RCVURGENT_START) * @source: logical IRQ source (offset from IS_RCVURGENT_START)
* *
* RX block receive urgent interrupt. Source is < 160. * RX block receive urgent interrupt. Source is < 160.
* *
@@ -8231,7 +8229,7 @@ static const struct is_table is_table[] = {
is_sdma_eng_err_name, is_sdma_eng_err_int }, is_sdma_eng_err_name, is_sdma_eng_err_int },
{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, { IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
is_sendctxt_err_name, is_sendctxt_err_int }, is_sendctxt_err_name, is_sendctxt_err_int },
{ IS_SDMA_START, IS_SDMA_END, { IS_SDMA_START, IS_SDMA_IDLE_END,
is_sdma_eng_name, is_sdma_eng_int }, is_sdma_eng_name, is_sdma_eng_int },
{ IS_VARIOUS_START, IS_VARIOUS_END, { IS_VARIOUS_START, IS_VARIOUS_END,
is_various_name, is_various_int }, is_various_name, is_various_int },
@@ -8257,7 +8255,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
/* avoids a double compare by walking the table in-order */ /* avoids a double compare by walking the table in-order */
for (entry = &is_table[0]; entry->is_name; entry++) { for (entry = &is_table[0]; entry->is_name; entry++) {
if (source < entry->end) { if (source <= entry->end) {
trace_hfi1_interrupt(dd, entry, source); trace_hfi1_interrupt(dd, entry, source);
entry->is_int(dd, source - entry->start); entry->is_int(dd, source - entry->start);
return; return;
@@ -8276,7 +8274,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
* context DATA IRQs are threaded and are not supported by this handler. * context DATA IRQs are threaded and are not supported by this handler.
* *
*/ */
static irqreturn_t general_interrupt(int irq, void *data) irqreturn_t general_interrupt(int irq, void *data)
{ {
struct hfi1_devdata *dd = data; struct hfi1_devdata *dd = data;
u64 regs[CCE_NUM_INT_CSRS]; u64 regs[CCE_NUM_INT_CSRS];
@@ -8309,7 +8307,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
return handled; return handled;
} }
static irqreturn_t sdma_interrupt(int irq, void *data) irqreturn_t sdma_interrupt(int irq, void *data)
{ {
struct sdma_engine *sde = data; struct sdma_engine *sde = data;
struct hfi1_devdata *dd = sde->dd; struct hfi1_devdata *dd = sde->dd;
@@ -8401,7 +8399,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
* invoked) is finished. The intent is to avoid extra interrupts while we * invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway. * are processing packets anyway.
*/ */
static irqreturn_t receive_context_interrupt(int irq, void *data) irqreturn_t receive_context_interrupt(int irq, void *data)
{ {
struct hfi1_ctxtdata *rcd = data; struct hfi1_ctxtdata *rcd = data;
struct hfi1_devdata *dd = rcd->dd; struct hfi1_devdata *dd = rcd->dd;
@@ -8441,7 +8439,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
* Receive packet thread handler. This expects to be invoked with the * Receive packet thread handler. This expects to be invoked with the
* receive interrupt still blocked. * receive interrupt still blocked.
*/ */
static irqreturn_t receive_context_thread(int irq, void *data) irqreturn_t receive_context_thread(int irq, void *data)
{ {
struct hfi1_ctxtdata *rcd = data; struct hfi1_ctxtdata *rcd = data;
int present; int present;
@@ -9651,30 +9649,10 @@ void qsfp_event(struct work_struct *work)
} }
} }
static void init_qsfp_int(struct hfi1_devdata *dd) void init_qsfp_int(struct hfi1_devdata *dd)
{ {
struct hfi1_pportdata *ppd = dd->pport; struct hfi1_pportdata *ppd = dd->pport;
u64 qsfp_mask, cce_int_mask; u64 qsfp_mask;
const int qsfp1_int_smask = QSFP1_INT % 64;
const int qsfp2_int_smask = QSFP2_INT % 64;
/*
* disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
* Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
* therefore just one of QSFP1_INT/QSFP2_INT can be used to find
* the index of the appropriate CSR in the CCEIntMask CSR array
*/
cce_int_mask = read_csr(dd, CCE_INT_MASK +
(8 * (QSFP1_INT / 64)));
if (dd->hfi1_id) {
cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
cce_int_mask);
} else {
cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
cce_int_mask);
}
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N); qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
/* Clear current status to avoid spurious interrupts */ /* Clear current status to avoid spurious interrupts */
@@ -9691,6 +9669,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
write_csr(dd, write_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT, dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
qsfp_mask); qsfp_mask);
/* Enable the appropriate QSFP IRQ source */
if (!dd->hfi1_id)
set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
else
set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
} }
/* /*
@@ -10577,12 +10561,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
} }
} }
/* /**
* Verify if BCT for data VLs is non-zero. * data_vls_operational() - Verify if data VL BCT credits and MTU
* are both set.
* @ppd: pointer to hfi1_pportdata structure
*
* Return: true - Ok, false -otherwise.
*/ */
static inline bool data_vls_operational(struct hfi1_pportdata *ppd) static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
{ {
return !!ppd->actual_vls_operational; int i;
u64 reg;
if (!ppd->actual_vls_operational)
return false;
for (i = 0; i < ppd->vls_supported; i++) {
reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
if ((reg && !ppd->dd->vld[i].mtu) ||
(!reg && ppd->dd->vld[i].mtu))
return false;
}
return true;
} }
/* /*
@@ -10695,7 +10696,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (!data_vls_operational(ppd)) { if (!data_vls_operational(ppd)) {
dd_dev_err(dd, dd_dev_err(dd,
"%s: data VLs not operational\n", __func__); "%s: Invalid data VL credits or mtu\n",
__func__);
ret = -EINVAL; ret = -EINVAL;
break; break;
} }
@@ -11932,10 +11934,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK; rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
} }
if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
IS_RCVAVAIL_START + rcd->ctxt, true);
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) }
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
}
if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr) if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) { if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
@@ -11965,6 +11973,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS) if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK; rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_URGENT_ENB)
set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
IS_RCVURGENT_START + rcd->ctxt, true);
if (op & HFI1_RCVCTRL_URGENT_DIS)
set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
IS_RCVURGENT_START + rcd->ctxt, false);
hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl); hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl); write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
@@ -12963,63 +12978,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
return ret; return ret;
} }
/**
* get_int_mask - get 64 bit int mask
* @dd - the devdata
* @i - the csr (relative to CCE_INT_MASK)
*
* Returns the mask with the urgent interrupt mask
* bit clear for kernel receive contexts.
*/
static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
{
u64 mask = U64_MAX; /* default to no change */
if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
int j = (i - (IS_RCVURGENT_START / 64)) * 64;
int k = !j ? IS_RCVURGENT_START % 64 : 0;
if (j)
j -= IS_RCVURGENT_START % 64;
/* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
/* convert to bit in mask and clear */
mask &= ~BIT_ULL(k);
}
return mask;
}
/* ========================================================================= */ /* ========================================================================= */
/* /**
* Enable/disable chip from delivering interrupts. * read_mod_write() - Calculate the IRQ register index and set/clear the bits
* @dd: valid devdata
* @src: IRQ source to determine register index from
* @bits: the bits to set or clear
* @set: true == set the bits, false == clear the bits
*
*/ */
void set_intr_state(struct hfi1_devdata *dd, u32 enable) static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
bool set)
{ {
int i; u64 reg;
u16 idx = src / BITS_PER_REGISTER;
/* spin_lock(&dd->irq_src_lock);
* In HFI, the mask needs to be 1 to allow interrupts. reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
*/ if (set)
if (enable) { reg |= bits;
/* enable all interrupts but urgent on kernel contexts */ else
for (i = 0; i < CCE_NUM_INT_CSRS; i++) { reg &= ~bits;
u64 mask = get_int_mask(dd, i); write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
spin_unlock(&dd->irq_src_lock);
}
write_csr(dd, CCE_INT_MASK + (8 * i), mask); /**
* set_intr_bits() - Enable/disable a range (one or more) IRQ sources
* @dd: valid devdata
* @first: first IRQ source to set/clear
* @last: last IRQ source (inclusive) to set/clear
* @set: true == set the bits, false == clear the bits
*
* If first == last, set the exact source.
*/
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
{
u64 bits = 0;
u64 bit;
u16 src;
if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
return -EINVAL;
if (last < first)
return -ERANGE;
for (src = first; src <= last; src++) {
bit = src % BITS_PER_REGISTER;
/* wrapped to next register? */
if (!bit && bits) {
read_mod_write(dd, src - 1, bits, set);
bits = 0;
} }
bits |= BIT_ULL(bit);
init_qsfp_int(dd);
} else {
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
} }
read_mod_write(dd, last, bits, set);
return 0;
} }
/* /*
* Clear all interrupt sources on the chip. * Clear all interrupt sources on the chip.
*/ */
static void clear_all_interrupts(struct hfi1_devdata *dd) void clear_all_interrupts(struct hfi1_devdata *dd)
{ {
int i; int i;
@@ -13043,38 +13066,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0); write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
} }
/**
* hfi1_clean_up_interrupts() - Free all IRQ resources
* @dd: valid device data data structure
*
* Free the MSIx and assoicated PCI resources, if they have been allocated.
*/
void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
{
int i;
struct hfi1_msix_entry *me = dd->msix_entries;
/* remove irqs - must happen before disabling/turning off */
for (i = 0; i < dd->num_msix_entries; i++, me++) {
if (!me->arg) /* => no irq, no affinity */
continue;
hfi1_put_irq_affinity(dd, me);
pci_free_irq(dd->pcidev, i, me->arg);
}
/* clean structures */
kfree(dd->msix_entries);
dd->msix_entries = NULL;
dd->num_msix_entries = 0;
pci_free_irq_vectors(dd->pcidev);
}
/* /*
* Remap the interrupt source from the general handler to the given MSI-X * Remap the interrupt source from the general handler to the given MSI-X
* interrupt. * interrupt.
*/ */
static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr) void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
{ {
u64 reg; u64 reg;
int m, n; int m, n;
@@ -13098,8 +13094,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
write_csr(dd, CCE_INT_MAP + (8 * m), reg); write_csr(dd, CCE_INT_MAP + (8 * m), reg);
} }
static void remap_sdma_interrupts(struct hfi1_devdata *dd, void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
int engine, int msix_intr)
{ {
/* /*
* SDMA engine interrupt sources grouped by type, rather than * SDMA engine interrupt sources grouped by type, rather than
@@ -13108,204 +13103,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
* SDMAProgress * SDMAProgress
* SDMAIdle * SDMAIdle
*/ */
remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine, remap_intr(dd, IS_SDMA_START + engine, msix_intr);
msix_intr); remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine, remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
msix_intr);
remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
msix_intr);
}
static int request_msix_irqs(struct hfi1_devdata *dd)
{
int first_general, last_general;
int first_sdma, last_sdma;
int first_rx, last_rx;
int i, ret = 0;
/* calculate the ranges we are going to use */
first_general = 0;
last_general = first_general + 1;
first_sdma = last_general;
last_sdma = first_sdma + dd->num_sdma;
first_rx = last_sdma;
last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
/* VNIC MSIx interrupts get mapped when VNIC contexts are created */
dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
/*
* Sanity check - the code expects all SDMA chip source
* interrupts to be in the same CSR, starting at bit 0. Verify
* that this is true by checking the bit location of the start.
*/
BUILD_BUG_ON(IS_SDMA_START % 64);
for (i = 0; i < dd->num_msix_entries; i++) {
struct hfi1_msix_entry *me = &dd->msix_entries[i];
const char *err_info;
irq_handler_t handler;
irq_handler_t thread = NULL;
void *arg = NULL;
int idx;
struct hfi1_ctxtdata *rcd = NULL;
struct sdma_engine *sde = NULL;
char name[MAX_NAME_SIZE];
/* obtain the arguments to pci_request_irq */
if (first_general <= i && i < last_general) {
idx = i - first_general;
handler = general_interrupt;
arg = dd;
snprintf(name, sizeof(name),
DRIVER_NAME "_%d", dd->unit);
err_info = "general";
me->type = IRQ_GENERAL;
} else if (first_sdma <= i && i < last_sdma) {
idx = i - first_sdma;
sde = &dd->per_sdma[idx];
handler = sdma_interrupt;
arg = sde;
snprintf(name, sizeof(name),
DRIVER_NAME "_%d sdma%d", dd->unit, idx);
err_info = "sdma";
remap_sdma_interrupts(dd, idx, i);
me->type = IRQ_SDMA;
} else if (first_rx <= i && i < last_rx) {
idx = i - first_rx;
rcd = hfi1_rcd_get_by_index_safe(dd, idx);
if (rcd) {
/*
* Set the interrupt register and mask for this
* context's interrupt.
*/
rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
rcd->imask = ((u64)1) <<
((IS_RCVAVAIL_START + idx) % 64);
handler = receive_context_interrupt;
thread = receive_context_thread;
arg = rcd;
snprintf(name, sizeof(name),
DRIVER_NAME "_%d kctxt%d",
dd->unit, idx);
err_info = "receive context";
remap_intr(dd, IS_RCVAVAIL_START + idx, i);
me->type = IRQ_RCVCTXT;
rcd->msix_intr = i;
hfi1_rcd_put(rcd);
}
} else {
/* not in our expected range - complain, then
* ignore it
*/
dd_dev_err(dd,
"Unexpected extra MSI-X interrupt %d\n", i);
continue;
}
/* no argument, no interrupt */
if (!arg)
continue;
/* make sure the name is terminated */
name[sizeof(name) - 1] = 0;
me->irq = pci_irq_vector(dd->pcidev, i);
ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
name);
if (ret) {
dd_dev_err(dd,
"unable to allocate %s interrupt, irq %d, index %d, err %d\n",
err_info, me->irq, idx, ret);
return ret;
}
/*
* assign arg after pci_request_irq call, so it will be
* cleaned up
*/
me->arg = arg;
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
}
return ret;
}
void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
{
int i;
for (i = 0; i < dd->vnic.num_ctxt; i++) {
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
synchronize_irq(me->irq);
}
}
void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
if (!me->arg) /* => no irq, no affinity */
return;
hfi1_put_irq_affinity(dd, me);
pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
me->arg = NULL;
}
void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
struct hfi1_msix_entry *me;
int idx = rcd->ctxt;
void *arg = rcd;
int ret;
rcd->msix_intr = dd->vnic.msix_idx++;
me = &dd->msix_entries[rcd->msix_intr];
/*
* Set the interrupt register and mask for this
* context's interrupt.
*/
rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
rcd->imask = ((u64)1) <<
((IS_RCVAVAIL_START + idx) % 64);
me->type = IRQ_RCVCTXT;
me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
receive_context_interrupt,
receive_context_thread, arg,
DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
if (ret) {
dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
me->irq, idx, ret);
return;
}
/*
* assign arg after pci_request_irq call, so it will be
* cleaned up
*/
me->arg = arg;
ret = hfi1_get_irq_affinity(dd, me);
if (ret) {
dd_dev_err(dd,
"unable to pin IRQ %d\n", ret);
pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
}
} }
/* /*
* Set the general handler to accept all interrupts, remap all * Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0. * chip interrupts back to MSI-X 0.
*/ */
static void reset_interrupts(struct hfi1_devdata *dd) void reset_interrupts(struct hfi1_devdata *dd)
{ {
int i; int i;
@@ -13318,54 +13125,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_MAP + (8 * i), 0); write_csr(dd, CCE_INT_MAP + (8 * i), 0);
} }
/**
* set_up_interrupts() - Initialize the IRQ resources and state
* @dd: valid devdata
*
*/
static int set_up_interrupts(struct hfi1_devdata *dd) static int set_up_interrupts(struct hfi1_devdata *dd)
{ {
u32 total; int ret;
int ret, request;
/*
* Interrupt count:
* 1 general, "slow path" interrupt (includes the SDMA engines
* slow source, SDMACleanupDone)
* N interrupts - one per used SDMA engine
* M interrupt - one per kernel receive context
* V interrupt - one for each VNIC context
*/
total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
/* ask for MSI-X interrupts */
request = request_msix(dd, total);
if (request < 0) {
ret = request;
goto fail;
} else {
dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
GFP_KERNEL);
if (!dd->msix_entries) {
ret = -ENOMEM;
goto fail;
}
/* using MSI-X */
dd->num_msix_entries = total;
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
}
/* mask all interrupts */ /* mask all interrupts */
set_intr_state(dd, 0); set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
/* clear all pending interrupts */ /* clear all pending interrupts */
clear_all_interrupts(dd); clear_all_interrupts(dd);
/* reset general handler mask, chip MSI-X mappings */ /* reset general handler mask, chip MSI-X mappings */
reset_interrupts(dd); reset_interrupts(dd);
ret = request_msix_irqs(dd); /* ask for MSI-X interrupts */
ret = msix_initialize(dd);
if (ret) if (ret)
goto fail; return ret;
return 0; ret = msix_request_irqs(dd);
if (ret)
msix_clean_up_interrupts(dd);
fail:
hfi1_clean_up_interrupts(dd);
return ret; return ret;
} }
@@ -14918,20 +14704,16 @@ err_exit:
} }
/** /**
* Allocate and initialize the device structure for the hfi. * hfi1_init_dd() - Initialize most of the dd structure.
* @dev: the pci_dev for hfi1_ib device * @dev: the pci_dev for hfi1_ib device
* @ent: pci_device_id struct for this dev * @ent: pci_device_id struct for this dev
* *
* Also allocates, initializes, and returns the devdata struct for this
* device instance
*
* This is global, and is called directly at init to set up the * This is global, and is called directly at init to set up the
* chip-specific function pointers for later use. * chip-specific function pointers for later use.
*/ */
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, int hfi1_init_dd(struct hfi1_devdata *dd)
const struct pci_device_id *ent)
{ {
struct hfi1_devdata *dd; struct pci_dev *pdev = dd->pcidev;
struct hfi1_pportdata *ppd; struct hfi1_pportdata *ppd;
u64 reg; u64 reg;
int i, ret; int i, ret;
@@ -14942,13 +14724,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
"Functional simulator" "Functional simulator"
}; };
struct pci_dev *parent = pdev->bus->self; struct pci_dev *parent = pdev->bus->self;
u32 sdma_engines; u32 sdma_engines = chip_sdma_engines(dd);
dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
sizeof(struct hfi1_pportdata));
if (IS_ERR(dd))
goto bail;
sdma_engines = chip_sdma_engines(dd);
ppd = dd->pport; ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) { for (i = 0; i < dd->num_pports; i++, ppd++) {
int vl; int vl;
@@ -15127,6 +14904,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret) if (ret)
goto bail_cleanup; goto bail_cleanup;
/*
* This should probably occur in hfi1_pcie_init(), but historically
* occurs after the do_pcie_gen3_transition() code.
*/
tune_pcie_caps(dd);
/* start setting dd values and adjusting CSRs */ /* start setting dd values and adjusting CSRs */
init_early_variables(dd); init_early_variables(dd);
@@ -15239,14 +15022,13 @@ bail_free_cntrs:
free_cntrs(dd); free_cntrs(dd);
bail_clear_intr: bail_clear_intr:
hfi1_comp_vectors_clean_up(dd); hfi1_comp_vectors_clean_up(dd);
hfi1_clean_up_interrupts(dd); msix_clean_up_interrupts(dd);
bail_cleanup: bail_cleanup:
hfi1_pcie_ddcleanup(dd); hfi1_pcie_ddcleanup(dd);
bail_free: bail_free:
hfi1_free_devdata(dd); hfi1_free_devdata(dd);
dd = ERR_PTR(ret);
bail: bail:
return dd; return ret;
} }
static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,

View File

@@ -52,9 +52,7 @@
*/ */
/* sizes */ /* sizes */
#define CCE_NUM_MSIX_VECTORS 256 #define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
#define CCE_NUM_INT_CSRS 12
#define CCE_NUM_INT_MAP_CSRS 96
#define NUM_INTERRUPT_SOURCES 768 #define NUM_INTERRUPT_SOURCES 768
#define RXE_NUM_CONTEXTS 160 #define RXE_NUM_CONTEXTS 160
#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */ #define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */
@@ -161,34 +159,49 @@
(CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \ (CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT) CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
/* interrupt source numbers */ /* Specific IRQ sources */
#define IS_GENERAL_ERR_START 0 #define CCE_ERR_INT 0
#define IS_SDMAENG_ERR_START 16 #define RXE_ERR_INT 1
#define IS_SENDCTXT_ERR_START 32 #define MISC_ERR_INT 2
#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */ #define PIO_ERR_INT 4
#define SDMA_ERR_INT 5
#define EGRESS_ERR_INT 6
#define TXE_ERR_INT 7
#define PBC_INT 240
#define GPIO_ASSERT_INT 241
#define QSFP1_INT 242
#define QSFP2_INT 243
#define TCRIT_INT 244
/* interrupt source ranges */
#define IS_FIRST_SOURCE CCE_ERR_INT
#define IS_GENERAL_ERR_START 0
#define IS_SDMAENG_ERR_START 16
#define IS_SENDCTXT_ERR_START 32
#define IS_SDMA_START 192
#define IS_SDMA_PROGRESS_START 208
#define IS_SDMA_IDLE_START 224
#define IS_VARIOUS_START 240 #define IS_VARIOUS_START 240
#define IS_DC_START 248 #define IS_DC_START 248
#define IS_RCVAVAIL_START 256 #define IS_RCVAVAIL_START 256
#define IS_RCVURGENT_START 416 #define IS_RCVURGENT_START 416
#define IS_SENDCREDIT_START 576 #define IS_SENDCREDIT_START 576
#define IS_RESERVED_START 736 #define IS_RESERVED_START 736
#define IS_MAX_SOURCES 768 #define IS_LAST_SOURCE 767
/* derived interrupt source values */ /* derived interrupt source values */
#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START #define IS_GENERAL_ERR_END 7
#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START #define IS_SDMAENG_ERR_END 31
#define IS_SENDCTXT_ERR_END IS_SDMA_START #define IS_SENDCTXT_ERR_END 191
#define IS_SDMA_END IS_VARIOUS_START #define IS_SDMA_END 207
#define IS_VARIOUS_END IS_DC_START #define IS_SDMA_PROGRESS_END 223
#define IS_DC_END IS_RCVAVAIL_START #define IS_SDMA_IDLE_END 239
#define IS_RCVAVAIL_END IS_RCVURGENT_START #define IS_VARIOUS_END 244
#define IS_RCVURGENT_END IS_SENDCREDIT_START #define IS_DC_END 255
#define IS_SENDCREDIT_END IS_RESERVED_START #define IS_RCVAVAIL_END 415
#define IS_RESERVED_END IS_MAX_SOURCES #define IS_RCVURGENT_END 575
#define IS_SENDCREDIT_END 735
/* absolute interrupt numbers for QSFP1Int and QSFP2Int */ #define IS_RESERVED_END IS_LAST_SOURCE
#define QSFP1_INT 242
#define QSFP2_INT 243
/* DCC_CFG_PORT_CONFIG logical link states */ /* DCC_CFG_PORT_CONFIG logical link states */
#define LSTATE_DOWN 0x1 #define LSTATE_DOWN 0x1
@@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd); void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd); void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
irqreturn_t general_interrupt(int irq, void *data);
irqreturn_t sdma_interrupt(int irq, void *data);
irqreturn_t receive_context_interrupt(int irq, void *data);
irqreturn_t receive_context_thread(int irq, void *data);
int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
void init_qsfp_int(struct hfi1_devdata *dd);
void clear_all_interrupts(struct hfi1_devdata *dd);
void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
void reset_interrupts(struct hfi1_devdata *dd);
/* /*
* Interrupt source table. * Interrupt source table.
* *

View File

@@ -878,6 +878,10 @@
#define SEND_CTRL (TXE + 0x000000000000) #define SEND_CTRL (TXE + 0x000000000000)
#define SEND_CTRL_CM_RESET_SMASK 0x4ull #define SEND_CTRL_CM_RESET_SMASK 0x4ull
#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull #define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull #define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080) #define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull #define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull

View File

@@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); HFI1_RCVCTRL_NO_EGR_DROP_DIS |
HFI1_RCVCTRL_URGENT_DIS, uctxt);
/* Clear the context's J_KEY */ /* Clear the context's J_KEY */
hfi1_clear_ctxt_jkey(dd, uctxt); hfi1_clear_ctxt_jkey(dd, uctxt);
/* /*
@@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey); hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP)) if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB; rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
/* /*

View File

@@ -80,6 +80,7 @@
#include "qsfp.h" #include "qsfp.h"
#include "platform.h" #include "platform.h"
#include "affinity.h" #include "affinity.h"
#include "msix.h"
/* bumped 1 from s/w major version of TrueScale */ /* bumped 1 from s/w major version of TrueScale */
#define HFI1_CHIP_VERS_MAJ 3U #define HFI1_CHIP_VERS_MAJ 3U
@@ -620,6 +621,8 @@ struct rvt_sge_state;
#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000 #define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000 #define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000 #define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
#define HFI1_RCVCTRL_URGENT_ENB 0x40000
#define HFI1_RCVCTRL_URGENT_DIS 0x80000
/* partition enforcement flags */ /* partition enforcement flags */
#define HFI1_PART_ENFORCE_IN 0x1 #define HFI1_PART_ENFORCE_IN 0x1
@@ -667,6 +670,14 @@ struct hfi1_msix_entry {
struct irq_affinity_notify notify; struct irq_affinity_notify notify;
}; };
struct hfi1_msix_info {
/* lock to synchronize in_use_msix access */
spinlock_t msix_lock;
DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
struct hfi1_msix_entry *msix_entries;
u16 max_requested;
};
/* per-SL CCA information */ /* per-SL CCA information */
struct cca_timer { struct cca_timer {
struct hrtimer hrtimer; struct hrtimer hrtimer;
@@ -992,7 +1003,6 @@ struct hfi1_vnic_data {
struct idr vesw_idr; struct idr vesw_idr;
u8 rmt_start; u8 rmt_start;
u8 num_ctxt; u8 num_ctxt;
u32 msix_idx;
}; };
struct hfi1_vnic_vport_info; struct hfi1_vnic_vport_info;
@@ -1205,11 +1215,6 @@ struct hfi1_devdata {
struct diag_client *diag_client; struct diag_client *diag_client;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
u32 num_msix_entries;
u32 first_dyn_msix_idx;
/* general interrupt: mask of handled interrupts */ /* general interrupt: mask of handled interrupts */
u64 gi_mask[CCE_NUM_INT_CSRS]; u64 gi_mask[CCE_NUM_INT_CSRS];
@@ -1223,6 +1228,9 @@ struct hfi1_devdata {
*/ */
struct timer_list synth_stats_timer; struct timer_list synth_stats_timer;
/* MSI-X information */
struct hfi1_msix_info msix_info;
/* /*
* device counters * device counters
*/ */
@@ -1349,6 +1357,8 @@ struct hfi1_devdata {
/* vnic data */ /* vnic data */
struct hfi1_vnic_data vnic; struct hfi1_vnic_data vnic;
/* Lock to protect IRQ SRC register access */
spinlock_t irq_src_lock;
}; };
static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare) static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
@@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
void set_all_slowpath(struct hfi1_devdata *dd); void set_all_slowpath(struct hfi1_devdata *dd);
void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[]; extern const struct pci_device_id hfi1_pci_tbl[];
void hfi1_make_ud_req_9B(struct rvt_qp *qp, void hfi1_make_ud_req_9B(struct rvt_qp *qp,
@@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_CTXT_WAITING_URG 4 #define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */ /* free up any allocated data at closes */
struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, int hfi1_init_dd(struct hfi1_devdata *dd);
const struct pci_device_id *ent);
void hfi1_free_devdata(struct hfi1_devdata *dd); void hfi1_free_devdata(struct hfi1_devdata *dd);
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */ /* LED beaconing functions */
void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
@@ -1963,6 +1968,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
*/ */
extern const char ib_hfi1_version[]; extern const char ib_hfi1_version[];
extern const struct attribute_group ib_hfi1_attr_group;
int hfi1_device_create(struct hfi1_devdata *dd); int hfi1_device_create(struct hfi1_devdata *dd);
void hfi1_device_remove(struct hfi1_devdata *dd); void hfi1_device_remove(struct hfi1_devdata *dd);
@@ -1974,16 +1980,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */ /* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent); int hfi1_pcie_init(struct hfi1_devdata *dd);
void hfi1_clean_up_interrupts(struct hfi1_devdata *dd);
void hfi1_pcie_cleanup(struct pci_dev *pdev); void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev); int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *); void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *dd); int pcie_speeds(struct hfi1_devdata *dd);
int request_msix(struct hfi1_devdata *dd, u32 msireq);
int restore_pci_variables(struct hfi1_devdata *dd); int restore_pci_variables(struct hfi1_devdata *dd);
int save_pci_variables(struct hfi1_devdata *dd); int save_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd); int do_pcie_gen3_transition(struct hfi1_devdata *dd);
void tune_pcie_caps(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd); int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd, int get_platform_config_field(struct hfi1_devdata *dd,
enum platform_config_table_type_encoding enum platform_config_table_type_encoding
@@ -2124,19 +2129,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
return base_sdma_integrity; return base_sdma_integrity;
} }
/*
* hfi1_early_err is used (only!) to print early errors before devdata is
* allocated, or when dd->pcidev may not be valid, and at the tail end of
* cleanup when devdata may have been freed, etc. hfi1_dev_porterr is
* the same as dd_dev_err, but is used when the message really needs
* the IB port# to be definitive as to what's happening..
*/
#define hfi1_early_err(dev, fmt, ...) \
dev_err(dev, fmt, ##__VA_ARGS__)
#define hfi1_early_info(dev, fmt, ...) \
dev_info(dev, fmt, ##__VA_ARGS__)
#define dd_dev_emerg(dd, fmt, ...) \ #define dd_dev_emerg(dd, fmt, ...) \
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \ dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__) rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)

View File

@@ -83,6 +83,8 @@
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */ #define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */ #define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
#define NUM_IB_PORTS 1
/* /*
* Number of user receive contexts we are configured to use (to allow for more * Number of user receive contexts we are configured to use (to allow for more
* pio buffers per ctxt, etc.) Zero means use one user context per CPU. * pio buffers per ctxt, etc.) Zero means use one user context per CPU.
@@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
ppd->part_enforce |= HFI1_PART_ENFORCE_IN; ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
if (loopback) { if (loopback) {
hfi1_early_err(&pdev->dev, dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
"Faking data partition 0x8001 in idx %u\n", !default_pkey_idx);
!default_pkey_idx);
ppd->pkeys[!default_pkey_idx] = 0x8001; ppd->pkeys[!default_pkey_idx] = 0x8001;
} }
@@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
return; return;
bail: bail:
dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
hfi1_early_err(&pdev->dev,
"Congestion Control Agent disabled for port %d\n", port);
} }
/* /*
@@ -832,6 +831,23 @@ wq_error:
return -ENOMEM; return -ENOMEM;
} }
/**
* enable_general_intr() - Enable the IRQs that will be handled by the
* general interrupt handler.
* @dd: valid devdata
*
*/
static void enable_general_intr(struct hfi1_devdata *dd)
{
set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
}
/** /**
* hfi1_init - do the actual initialization sequence on the chip * hfi1_init - do the actual initialization sequence on the chip
* @dd: the hfi1_ib device * @dd: the hfi1_ib device
@@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
ret = lastfail; ret = lastfail;
} }
/* enable IRQ */
hfi1_rcd_put(rcd); hfi1_rcd_put(rcd);
} }
@@ -954,7 +971,8 @@ done:
HFI1_STATUS_INITTED; HFI1_STATUS_INITTED;
if (!ret) { if (!ret) {
/* enable all interrupts from the chip */ /* enable all interrupts from the chip */
set_intr_state(dd, 1); enable_general_intr(dd);
init_qsfp_int(dd);
/* chip is OK for user apps; mark it as initialized */ /* chip is OK for user apps; mark it as initialized */
for (pidx = 0; pidx < dd->num_pports; ++pidx) { for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd)
} }
dd->flags &= ~HFI1_INITTED; dd->flags &= ~HFI1_INITTED;
/* mask and clean up interrupts, but not errors */ /* mask and clean up interrupts */
set_intr_state(dd, 0); set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
hfi1_clean_up_interrupts(dd); msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) { for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx; ppd = dd->pport + pidx;
@@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
kobject_put(&dd->kobj); kobject_put(&dd->kobj);
} }
/* /**
* Allocate our primary per-unit data structure. Must be done via verbs * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
* allocator, because the verbs cleanup process both does cleanup and * @pdev: Valid PCI device
* free of the data structure. * @extra: How many bytes to alloc past the default
*
* Must be done via verbs allocator, because the verbs cleanup process
* both does cleanup and free of the data structure.
* "extra" is for chip-specific data. * "extra" is for chip-specific data.
* *
* Use the idr mechanism to get a unit number for this unit. * Use the idr mechanism to get a unit number for this unit.
*/ */
struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
size_t extra)
{ {
unsigned long flags; unsigned long flags;
struct hfi1_devdata *dd; struct hfi1_devdata *dd;
@@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
idr_preload_end(); idr_preload_end();
if (ret < 0) { if (ret < 0) {
hfi1_early_err(&pdev->dev, dev_err(&pdev->dev,
"Could not allocate unit ID: error %d\n", -ret); "Could not allocate unit ID: error %d\n", -ret);
goto bail; goto bail;
} }
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit); rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
@@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->pio_map_lock); spin_lock_init(&dd->pio_map_lock);
mutex_init(&dd->dc8051_lock); mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue); init_waitqueue_head(&dd->event_queue);
spin_lock_init(&dd->irq_src_lock);
dd->int_counter = alloc_percpu(u64); dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) { if (!dd->int_counter) {
@@ -1481,9 +1504,6 @@ static int __init hfi1_mod_init(void)
idr_init(&hfi1_unit_table); idr_init(&hfi1_unit_table);
hfi1_dbg_init(); hfi1_dbg_init();
ret = hfi1_wss_init();
if (ret < 0)
goto bail_wss;
ret = pci_register_driver(&hfi1_pci_driver); ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) { if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret); pr_err("Unable to register driver: error %d\n", -ret);
@@ -1492,8 +1512,6 @@ static int __init hfi1_mod_init(void)
goto bail; /* all OK */ goto bail; /* all OK */
bail_dev: bail_dev:
hfi1_wss_exit();
bail_wss:
hfi1_dbg_exit(); hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table); idr_destroy(&hfi1_unit_table);
dev_cleanup(); dev_cleanup();
@@ -1510,7 +1528,6 @@ static void __exit hfi1_mod_cleanup(void)
{ {
pci_unregister_driver(&hfi1_pci_driver); pci_unregister_driver(&hfi1_pci_driver);
node_affinity_destroy_all(); node_affinity_destroy_all();
hfi1_wss_exit();
hfi1_dbg_exit(); hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table); idr_destroy(&hfi1_unit_table);
@@ -1604,23 +1621,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd); hfi1_free_devdata(dd);
} }
static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt) static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
{ {
if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
hfi1_early_err(dev, "Receive header queue count too small\n"); dd_dev_err(dd, "Receive header queue count too small\n");
return -EINVAL; return -EINVAL;
} }
if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
hfi1_early_err(dev, dd_dev_err(dd,
"Receive header queue count cannot be greater than %u\n", "Receive header queue count cannot be greater than %u\n",
HFI1_MAX_HDRQ_EGRBUF_CNT); HFI1_MAX_HDRQ_EGRBUF_CNT);
return -EINVAL; return -EINVAL;
} }
if (thecnt % HDRQ_INCREMENT) { if (thecnt % HDRQ_INCREMENT) {
hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n", dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
thecnt, HDRQ_INCREMENT); thecnt, HDRQ_INCREMENT);
return -EINVAL; return -EINVAL;
} }
@@ -1639,22 +1656,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Validate dev ids */ /* Validate dev ids */
if (!(ent->device == PCI_DEVICE_ID_INTEL0 || if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
ent->device == PCI_DEVICE_ID_INTEL1)) { ent->device == PCI_DEVICE_ID_INTEL1)) {
hfi1_early_err(&pdev->dev, dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
"Failing on unknown Intel deviceid 0x%x\n", ent->device);
ent->device);
ret = -ENODEV; ret = -ENODEV;
goto bail; goto bail;
} }
/* Allocate the dd so we can get to work */
dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
sizeof(struct hfi1_pportdata));
if (IS_ERR(dd)) {
ret = PTR_ERR(dd);
goto bail;
}
/* Validate some global module parameters */ /* Validate some global module parameters */
ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret) if (ret)
goto bail; goto bail;
/* use the encoding function as a sanitization check */ /* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
hfi1_hdrq_entsize); hfi1_hdrq_entsize);
ret = -EINVAL; ret = -EINVAL;
goto bail; goto bail;
} }
@@ -1676,10 +1700,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
clamp_val(eager_buffer_size, clamp_val(eager_buffer_size,
MIN_EAGER_BUFFER * 8, MIN_EAGER_BUFFER * 8,
MAX_EAGER_BUFFER_TOTAL); MAX_EAGER_BUFFER_TOTAL);
hfi1_early_info(&pdev->dev, "Eager buffer size %u\n", dd_dev_info(dd, "Eager buffer size %u\n",
eager_buffer_size); eager_buffer_size);
} else { } else {
hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n"); dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
ret = -EINVAL; ret = -EINVAL;
goto bail; goto bail;
} }
@@ -1687,7 +1711,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* restrict value of hfi1_rcvarr_split */ /* restrict value of hfi1_rcvarr_split */
hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100); hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
ret = hfi1_pcie_init(pdev, ent); ret = hfi1_pcie_init(dd);
if (ret) if (ret)
goto bail; goto bail;
@@ -1695,12 +1719,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* Do device-specific initialization, function table setup, dd * Do device-specific initialization, function table setup, dd
* allocation, etc. * allocation, etc.
*/ */
dd = hfi1_init_dd(pdev, ent); ret = hfi1_init_dd(dd);
if (ret)
if (IS_ERR(dd)) {
ret = PTR_ERR(dd);
goto clean_bail; /* error already printed */ goto clean_bail; /* error already printed */
}
ret = create_workqueues(dd); ret = create_workqueues(dd);
if (ret) if (ret)
@@ -1731,7 +1752,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j); dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
if (initfail || ret) { if (initfail || ret) {
hfi1_clean_up_interrupts(dd); msix_clean_up_interrupts(dd);
stop_timers(dd); stop_timers(dd);
flush_workqueue(ib_wq); flush_workqueue(ib_wq);
for (pidx = 0; pidx < dd->num_pports; ++pidx) { for (pidx = 0; pidx < dd->num_pports; ++pidx) {

View File

@@ -0,0 +1,94 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#include "iowait.h"
#include "trace_iowait.h"
void iowait_set_flag(struct iowait *wait, u32 flag)
{
trace_hfi1_iowait_set(wait, flag);
set_bit(flag, &wait->flags);
}
bool iowait_flag_set(struct iowait *wait, u32 flag)
{
return test_bit(flag, &wait->flags);
}
inline void iowait_clear_flag(struct iowait *wait, u32 flag)
{
trace_hfi1_iowait_clear(wait, flag);
clear_bit(flag, &wait->flags);
}
/**
* iowait_init() - initialize wait structure
* @wait: wait struct to initialize
* @tx_limit: limit for overflow queuing
* @func: restart function for workqueue
* @sleep: sleep function for no space
* @resume: wakeup function for no space
*
* This function initializes the iowait
* structure embedded in the QP or PQ.
*
*/
void iowait_init(struct iowait *wait, u32 tx_limit,
void (*func)(struct work_struct *work),
void (*tidfunc)(struct work_struct *work),
int (*sleep)(struct sdma_engine *sde,
struct iowait_work *wait,
struct sdma_txreq *tx,
uint seq,
bool pkts_sent),
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
{
int i;
wait->count = 0;
INIT_LIST_HEAD(&wait->list);
init_waitqueue_head(&wait->wait_dma);
init_waitqueue_head(&wait->wait_pio);
atomic_set(&wait->sdma_busy, 0);
atomic_set(&wait->pio_busy, 0);
wait->tx_limit = tx_limit;
wait->sleep = sleep;
wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained;
wait->flags = 0;
for (i = 0; i < IOWAIT_SES; i++) {
wait->wait[i].iow = wait;
INIT_LIST_HEAD(&wait->wait[i].tx_head);
if (i == IOWAIT_IB_SE)
INIT_WORK(&wait->wait[i].iowork, func);
else
INIT_WORK(&wait->wait[i].iowork, tidfunc);
}
}
/**
* iowait_cancel_work - cancel all work in iowait
* @w: the iowait struct
*/
void iowait_cancel_work(struct iowait *w)
{
cancel_work_sync(&iowait_get_ib_work(w)->iowork);
cancel_work_sync(&iowait_get_tid_work(w)->iowork);
}
/**
* iowait_set_work_flag - set work flag based on leg
* @w - the iowait work struct
*/
int iowait_set_work_flag(struct iowait_work *w)
{
if (w == &w->iow->wait[IOWAIT_IB_SE]) {
iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
return IOWAIT_IB_SE;
}
iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
return IOWAIT_TID_SE;
}

View File

@@ -1,7 +1,7 @@
#ifndef _HFI1_IOWAIT_H #ifndef _HFI1_IOWAIT_H
#define _HFI1_IOWAIT_H #define _HFI1_IOWAIT_H
/* /*
* Copyright(c) 2015, 2016 Intel Corporation. * Copyright(c) 2015 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/wait.h>
#include <linux/sched.h> #include <linux/sched.h>
#include "sdma_txreq.h" #include "sdma_txreq.h"
@@ -59,16 +60,47 @@
*/ */
typedef void (*restart_t)(struct work_struct *work); typedef void (*restart_t)(struct work_struct *work);
#define IOWAIT_PENDING_IB 0x0
#define IOWAIT_PENDING_TID 0x1
/*
* A QP can have multiple Send Engines (SEs).
*
* The current use case is for supporting a TID RDMA
* packet build/xmit mechanism independent from verbs.
*/
#define IOWAIT_SES 2
#define IOWAIT_IB_SE 0
#define IOWAIT_TID_SE 1
struct sdma_txreq; struct sdma_txreq;
struct sdma_engine; struct sdma_engine;
/** /**
* struct iowait - linkage for delayed progress/waiting * @iowork: the work struct
* @tx_head: list of prebuilt packets
* @iow: the parent iowait structure
*
* This structure is the work item (process) specific
* details associated with the each of the two SEs of the
* QP.
*
* The workstruct and the queued TXs are unique to each
* SE.
*/
struct iowait;
struct iowait_work {
struct work_struct iowork;
struct list_head tx_head;
struct iowait *iow;
};
/**
* @list: used to add/insert into QP/PQ wait lists * @list: used to add/insert into QP/PQ wait lists
* @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's * @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback * @sleep: no space callback
* @wakeup: space callback wakeup * @wakeup: space callback wakeup
* @sdma_drained: sdma count drained * @sdma_drained: sdma count drained
* @lock: lock protected head of wait queue
* @iowork: workqueue overhead * @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0 * @wait_dma: wait for sdma_busy == 0
* @wait_pio: wait for pio_busy == 0 * @wait_pio: wait for pio_busy == 0
@@ -76,6 +108,8 @@ struct sdma_engine;
* @count: total number of descriptors in tx_head'ed list * @count: total number of descriptors in tx_head'ed list
* @tx_limit: limit for overflow queuing * @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list * @tx_count: number of tx entry's in tx_head'ed list
* @flags: wait flags (one per QP)
* @wait: SE array
* *
* This is to be embedded in user's state structure * This is to be embedded in user's state structure
* (QP or PQ). * (QP or PQ).
@@ -98,13 +132,11 @@ struct sdma_engine;
* Waiters explicity know that, but the destroy * Waiters explicity know that, but the destroy
* code that unwaits QPs does not. * code that unwaits QPs does not.
*/ */
struct iowait { struct iowait {
struct list_head list; struct list_head list;
struct list_head tx_head;
int (*sleep)( int (*sleep)(
struct sdma_engine *sde, struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *tx, struct sdma_txreq *tx,
uint seq, uint seq,
bool pkts_sent bool pkts_sent
@@ -112,7 +144,6 @@ struct iowait {
void (*wakeup)(struct iowait *wait, int reason); void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait); void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock; seqlock_t *lock;
struct work_struct iowork;
wait_queue_head_t wait_dma; wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio; wait_queue_head_t wait_pio;
atomic_t sdma_busy; atomic_t sdma_busy;
@@ -121,63 +152,37 @@ struct iowait {
u32 tx_limit; u32 tx_limit;
u32 tx_count; u32 tx_count;
u8 starved_cnt; u8 starved_cnt;
unsigned long flags;
struct iowait_work wait[IOWAIT_SES];
}; };
#define SDMA_AVAIL_REASON 0 #define SDMA_AVAIL_REASON 0
/** void iowait_set_flag(struct iowait *wait, u32 flag);
* iowait_init() - initialize wait structure bool iowait_flag_set(struct iowait *wait, u32 flag);
* @wait: wait struct to initialize void iowait_clear_flag(struct iowait *wait, u32 flag);
* @tx_limit: limit for overflow queuing
* @func: restart function for workqueue
* @sleep: sleep function for no space
* @resume: wakeup function for no space
*
* This function initializes the iowait
* structure embedded in the QP or PQ.
*
*/
static inline void iowait_init( void iowait_init(struct iowait *wait, u32 tx_limit,
struct iowait *wait, void (*func)(struct work_struct *work),
u32 tx_limit, void (*tidfunc)(struct work_struct *work),
void (*func)(struct work_struct *work), int (*sleep)(struct sdma_engine *sde,
int (*sleep)( struct iowait_work *wait,
struct sdma_engine *sde, struct sdma_txreq *tx,
struct iowait *wait, uint seq,
struct sdma_txreq *tx, bool pkts_sent),
uint seq, void (*wakeup)(struct iowait *wait, int reason),
bool pkts_sent), void (*sdma_drained)(struct iowait *wait));
void (*wakeup)(struct iowait *wait, int reason),
void (*sdma_drained)(struct iowait *wait))
{
wait->count = 0;
wait->lock = NULL;
INIT_LIST_HEAD(&wait->list);
INIT_LIST_HEAD(&wait->tx_head);
INIT_WORK(&wait->iowork, func);
init_waitqueue_head(&wait->wait_dma);
init_waitqueue_head(&wait->wait_pio);
atomic_set(&wait->sdma_busy, 0);
atomic_set(&wait->pio_busy, 0);
wait->tx_limit = tx_limit;
wait->sleep = sleep;
wait->wakeup = wakeup;
wait->sdma_drained = sdma_drained;
}
/** /**
* iowait_schedule() - initialize wait structure * iowait_schedule() - schedule the default send engine work
* @wait: wait struct to schedule * @wait: wait struct to schedule
* @wq: workqueue for schedule * @wq: workqueue for schedule
* @cpu: cpu * @cpu: cpu
*/ */
static inline void iowait_schedule( static inline bool iowait_schedule(struct iowait *wait,
struct iowait *wait, struct workqueue_struct *wq, int cpu)
struct workqueue_struct *wq,
int cpu)
{ {
queue_work_on(cpu, wq, &wait->iowork); return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
} }
/** /**
@@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
*/ */
static inline int iowait_sdma_dec(struct iowait *wait) static inline int iowait_sdma_dec(struct iowait *wait)
{ {
if (!wait)
return 0;
return atomic_dec_and_test(&wait->sdma_busy); return atomic_dec_and_test(&wait->sdma_busy);
} }
@@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
} }
/** /**
* iowait_sdma_dec - note pio complete * iowait_pio_dec - note pio complete
* @wait: iowait structure * @wait: iowait structure
*/ */
static inline int iowait_pio_dec(struct iowait *wait) static inline int iowait_pio_dec(struct iowait *wait)
{ {
if (!wait)
return 0;
return atomic_dec_and_test(&wait->pio_busy); return atomic_dec_and_test(&wait->pio_busy);
} }
@@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
/** /**
* iowait_get_txhead() - get packet off of iowait list * iowait_get_txhead() - get packet off of iowait list
* *
* @wait wait struture * @wait iowait_work struture
*/ */
static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait) static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
{ {
struct sdma_txreq *tx = NULL; struct sdma_txreq *tx = NULL;
@@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
return tx; return tx;
} }
static inline u16 iowait_get_desc(struct iowait_work *w)
{
u16 num_desc = 0;
struct sdma_txreq *tx = NULL;
if (!list_empty(&w->tx_head)) {
tx = list_first_entry(&w->tx_head, struct sdma_txreq,
list);
num_desc = tx->num_desc;
}
return num_desc;
}
static inline u32 iowait_get_all_desc(struct iowait *w)
{
u32 num_desc = 0;
num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
return num_desc;
}
/** /**
* iowait_queue - Put the iowait on a wait queue * iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing? * @pkts_sent: have some packets been sent before queuing?
@@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
} }
/** /**
* iowait_packet_queued() - determine if a packet is already built * iowait_packet_queued() - determine if a packet is queued
* @wait: the wait structure * @wait: the iowait_work structure
*/ */
static inline bool iowait_packet_queued(struct iowait *wait) static inline bool iowait_packet_queued(struct iowait_work *wait)
{ {
return !list_empty(&wait->tx_head); return !list_empty(&wait->tx_head);
} }
/**
* inc_wait_count - increment wait counts
* @w: the log work struct
* @n: the count
*/
static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
{
if (!w)
return;
w->iow->tx_count++;
w->iow->count += n;
}
/**
* iowait_get_tid_work - return iowait_work for tid SE
* @w: the iowait struct
*/
static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
{
return &w->wait[IOWAIT_TID_SE];
}
/**
* iowait_get_ib_work - return iowait_work for ib SE
* @w: the iowait struct
*/
static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
{
return &w->wait[IOWAIT_IB_SE];
}
/**
* iowait_ioww_to_iow - return iowait given iowait_work
* @w: the iowait_work struct
*/
static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
{
if (likely(w))
return w->iow;
return NULL;
}
void iowait_cancel_work(struct iowait *w);
int iowait_set_work_flag(struct iowait_work *w);
#endif #endif

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2015-2017 Intel Corporation. * Copyright(c) 2015-2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
@@ -4836,7 +4836,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
int ret; int ret;
int pkey_idx; int pkey_idx;
int local_mad = 0; int local_mad = 0;
u32 resp_len = 0; u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_ibport *ibp = to_iport(ibdev, port);
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY); pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);

View File

@@ -0,0 +1,363 @@
// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
/*
* Copyright(c) 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "hfi.h"
#include "affinity.h"
#include "sdma.h"
/**
* msix_initialize() - Calculate, request and configure MSIx IRQs
* @dd: valid hfi1 devdata
*
*/
int msix_initialize(struct hfi1_devdata *dd)
{
u32 total;
int ret;
struct hfi1_msix_entry *entries;
/*
* MSIx interrupt count:
* one for the general, "slow path" interrupt
* one per used SDMA engine
* one per kernel receive context
* one for each VNIC context
* ...any new IRQs should be added here.
*/
total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
if (total >= CCE_NUM_MSIX_VECTORS)
return -EINVAL;
ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
if (ret < 0) {
dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
return ret;
}
entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
GFP_KERNEL);
if (!entries) {
pci_free_irq_vectors(dd->pcidev);
return -ENOMEM;
}
dd->msix_info.msix_entries = entries;
spin_lock_init(&dd->msix_info.msix_lock);
bitmap_zero(dd->msix_info.in_use_msix, total);
dd->msix_info.max_requested = total;
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
return 0;
}
/**
* msix_request_irq() - Allocate a free MSIx IRQ
* @dd: valid devdata
* @arg: context information for the IRQ
* @handler: IRQ handler
* @thread: IRQ thread handler (could be NULL)
* @idx: zero base idx if multiple devices are needed
* @type: affinty IRQ type
*
* Allocated an MSIx vector if available, and then create the appropriate
* meta data needed to keep track of the pci IRQ request.
*
* Return:
* < 0 Error
* >= 0 MSIx vector
*
*/
static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
irq_handler_t handler, irq_handler_t thread,
u32 idx, enum irq_type type)
{
unsigned long nr;
int irq;
int ret;
const char *err_info;
char name[MAX_NAME_SIZE];
struct hfi1_msix_entry *me;
/* Allocate an MSIx vector */
spin_lock(&dd->msix_info.msix_lock);
nr = find_first_zero_bit(dd->msix_info.in_use_msix,
dd->msix_info.max_requested);
if (nr < dd->msix_info.max_requested)
__set_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
if (nr == dd->msix_info.max_requested)
return -ENOSPC;
/* Specific verification and determine the name */
switch (type) {
case IRQ_GENERAL:
/* general interrupt must be MSIx vector 0 */
if (nr) {
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
nr);
return -EINVAL;
}
snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
err_info = "general";
break;
case IRQ_SDMA:
snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
dd->unit, idx);
err_info = "sdma";
break;
case IRQ_RCVCTXT:
snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
dd->unit, idx);
err_info = "receive context";
break;
case IRQ_OTHER:
default:
return -EINVAL;
}
name[sizeof(name) - 1] = 0;
irq = pci_irq_vector(dd->pcidev, nr);
ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
if (ret) {
dd_dev_err(dd,
"%s: request for IRQ %d failed, MSIx %d, err %d\n",
err_info, irq, idx, ret);
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(nr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
return ret;
}
/*
* assign arg after pci_request_irq call, so it will be
* cleaned up
*/
me = &dd->msix_info.msix_entries[nr];
me->irq = irq;
me->arg = arg;
me->type = type;
/* This is a request, so a failure is not fatal */
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
return nr;
}
/**
* msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
* @rcd: valid rcd context
*
*/
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
{
int nr;
nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
if (nr < 0)
return nr;
/*
* Set the interrupt register and mask for this
* context's interrupt.
*/
rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
rcd->msix_intr = nr;
remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
return 0;
}
/**
* msix_request_smda_ira() - Helper for getting SDMA IRQ resources
* @sde: valid sdma engine
*
*/
int msix_request_sdma_irq(struct sdma_engine *sde)
{
int nr;
nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
sde->this_idx, IRQ_SDMA);
if (nr < 0)
return nr;
sde->msix_intr = nr;
remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
return 0;
}
/**
* enable_sdma_src() - Helper to enable SDMA IRQ srcs
* @dd: valid devdata structure
* @i: index of SDMA engine
*/
static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
{
set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
IS_SDMA_PROGRESS_START + i, true);
set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
true);
}
/**
* msix_request_irqs() - Allocate all MSIx IRQs
* @dd: valid devdata structure
*
* Helper function to request the used MSIx IRQs.
*
*/
int msix_request_irqs(struct hfi1_devdata *dd)
{
int i;
int ret;
ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
if (ret < 0)
return ret;
for (i = 0; i < dd->num_sdma; i++) {
struct sdma_engine *sde = &dd->per_sdma[i];
ret = msix_request_sdma_irq(sde);
if (ret)
return ret;
enable_sdma_srcs(sde->dd, i);
}
for (i = 0; i < dd->n_krcv_queues; i++) {
struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
if (rcd)
ret = msix_request_rcd_irq(rcd);
hfi1_rcd_put(rcd);
if (ret)
return ret;
}
return 0;
}
/**
* msix_free_irq() - Free the specified MSIx resources and IRQ
* @dd: valid devdata
* @msix_intr: MSIx vector to free.
*
*/
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
{
struct hfi1_msix_entry *me;
if (msix_intr >= dd->msix_info.max_requested)
return;
me = &dd->msix_info.msix_entries[msix_intr];
if (!me->arg) /* => no irq, no affinity */
return;
hfi1_put_irq_affinity(dd, me);
pci_free_irq(dd->pcidev, msix_intr, me->arg);
me->arg = NULL;
spin_lock(&dd->msix_info.msix_lock);
__clear_bit(msix_intr, dd->msix_info.in_use_msix);
spin_unlock(&dd->msix_info.msix_lock);
}
/**
* hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
* @dd: valid device data data structure
*
* Free the MSIx and associated PCI resources, if they have been allocated.
*/
void msix_clean_up_interrupts(struct hfi1_devdata *dd)
{
int i;
struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
/* remove irqs - must happen before disabling/turning off */
for (i = 0; i < dd->msix_info.max_requested; i++, me++)
msix_free_irq(dd, i);
/* clean structures */
kfree(dd->msix_info.msix_entries);
dd->msix_info.msix_entries = NULL;
dd->msix_info.max_requested = 0;
pci_free_irq_vectors(dd->pcidev);
}
/**
* msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
* @dd: valid devdata
*/
void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
{
int i;
for (i = 0; i < dd->vnic.num_ctxt; i++) {
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
struct hfi1_msix_entry *me;
me = &dd->msix_info.msix_entries[rcd->msix_intr];
synchronize_irq(me->irq);
}
}

View File

@@ -0,0 +1,64 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* BSD LICENSE
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef _HFI1_MSIX_H
#define _HFI1_MSIX_H
#include "hfi.h"
/* MSIx interface */
int msix_initialize(struct hfi1_devdata *dd);
int msix_request_irqs(struct hfi1_devdata *dd);
void msix_clean_up_interrupts(struct hfi1_devdata *dd);
int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
int msix_request_sdma_irq(struct sdma_engine *sde);
void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
/* VNIC interface */
void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
#endif

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2015 - 2017 Intel Corporation. * Copyright(c) 2015 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
@@ -60,20 +60,13 @@
* This file contains PCIe utility routines. * This file contains PCIe utility routines.
*/ */
/*
* Code to adjust PCIe capabilities.
*/
static void tune_pcie_caps(struct hfi1_devdata *);
/* /*
* Do all the common PCIe setup and initialization. * Do all the common PCIe setup and initialization.
* devdata is not yet allocated, and is not allocated until after this
* routine returns success. Therefore dd_dev_err() can't be used for error
* printing.
*/ */
int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent) int hfi1_pcie_init(struct hfi1_devdata *dd)
{ {
int ret; int ret;
struct pci_dev *pdev = dd->pcidev;
ret = pci_enable_device(pdev); ret = pci_enable_device(pdev);
if (ret) { if (ret) {
@@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
* about that, it appears. If the original BAR was retained * about that, it appears. If the original BAR was retained
* in the kernel data structures, this may be OK. * in the kernel data structures, this may be OK.
*/ */
hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n", dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
-ret); return ret;
goto done;
} }
ret = pci_request_regions(pdev, DRIVER_NAME); ret = pci_request_regions(pdev, DRIVER_NAME);
if (ret) { if (ret) {
hfi1_early_err(&pdev->dev, dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
"pci_request_regions fails: err %d\n", -ret);
goto bail; goto bail;
} }
@@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
*/ */
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret) { if (ret) {
hfi1_early_err(&pdev->dev, dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
"Unable to set DMA mask: %d\n", ret);
goto bail; goto bail;
} }
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
@@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
} }
if (ret) { if (ret) {
hfi1_early_err(&pdev->dev, dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
"Unable to set DMA consistent mask: %d\n", ret);
goto bail; goto bail;
} }
pci_set_master(pdev); pci_set_master(pdev);
(void)pci_enable_pcie_error_reporting(pdev); (void)pci_enable_pcie_error_reporting(pdev);
goto done; return 0;
bail: bail:
hfi1_pcie_cleanup(pdev); hfi1_pcie_cleanup(pdev);
done:
return ret; return ret;
} }
@@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
dd_dev_err(dd, "WC mapping of send buffers failed\n"); dd_dev_err(dd, "WC mapping of send buffers failed\n");
goto nomem; goto nomem;
} }
dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE); dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
dd->physaddr = addr; /* used for io_remap, etc. */ dd->physaddr = addr; /* used for io_remap, etc. */
@@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0; return 0;
} }
/*
* Returns:
* - actual number of interrupts allocated or
* - error
*/
int request_msix(struct hfi1_devdata *dd, u32 msireq)
{
int nvec;
nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
if (nvec < 0) {
dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
return nvec;
}
tune_pcie_caps(dd);
return nvec;
}
/* restore command and BARs after a reset has wiped them out */ /* restore command and BARs after a reset has wiped them out */
int restore_pci_variables(struct hfi1_devdata *dd) int restore_pci_variables(struct hfi1_devdata *dd)
{ {
@@ -479,14 +447,19 @@ error:
* Check and optionally adjust them to maximize our throughput. * Check and optionally adjust them to maximize our throughput.
*/ */
static int hfi1_pcie_caps; static int hfi1_pcie_caps;
module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO); module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
uint aspm_mode = ASPM_MODE_DISABLED; uint aspm_mode = ASPM_MODE_DISABLED;
module_param_named(aspm, aspm_mode, uint, S_IRUGO); module_param_named(aspm, aspm_mode, uint, 0444);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
static void tune_pcie_caps(struct hfi1_devdata *dd) /**
* tune_pcie_caps() - Code to adjust PCIe capabilities.
* @dd: Valid device data structure
*
*/
void tune_pcie_caps(struct hfi1_devdata *dd)
{ {
struct pci_dev *parent; struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps; u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -1028,6 +1001,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
const u8 (*ctle_tunings)[4]; const u8 (*ctle_tunings)[4];
uint static_ctle_mode; uint static_ctle_mode;
int return_error = 0; int return_error = 0;
u32 target_width;
/* PCIe Gen3 is for the ASIC only */ /* PCIe Gen3 is for the ASIC only */
if (dd->icode != ICODE_RTL_SILICON) if (dd->icode != ICODE_RTL_SILICON)
@@ -1067,6 +1041,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
return 0; return 0;
} }
/* Previous Gen1/Gen2 bus width */
target_width = dd->lbus_width;
/* /*
* Do the Gen3 transition. Steps are those of the PCIe Gen3 * Do the Gen3 transition. Steps are those of the PCIe Gen3
* recipe. * recipe.
@@ -1435,11 +1412,12 @@ retry:
dd_dev_info(dd, "%s: new speed and width: %s\n", __func__, dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
dd->lbus_info); dd->lbus_info);
if (dd->lbus_speed != target_speed) { /* not target */ if (dd->lbus_speed != target_speed ||
dd->lbus_width < target_width) { /* not target */
/* maybe retry */ /* maybe retry */
do_retry = retry_count < pcie_retry; do_retry = retry_count < pcie_retry;
dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n", dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
pcie_target, do_retry ? ", retrying" : ""); do_retry ? ", retrying" : "");
retry_count++; retry_count++;
if (do_retry) { if (do_retry) {
msleep(100); /* allow time to settle */ msleep(100); /* allow time to settle */

View File

@@ -71,14 +71,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
} }
} }
/* defined in header release 48 and higher */
#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
<< SEND_CTRL_UNSUPPORTED_VL_SHIFT)
#endif
/* global control of PIO send */ /* global control of PIO send */
void pio_send_control(struct hfi1_devdata *dd, int op) void pio_send_control(struct hfi1_devdata *dd, int op)
{ {

View File

@@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
static void flush_tx_list(struct rvt_qp *qp); static void flush_tx_list(struct rvt_qp *qp);
static int iowait_sleep( static int iowait_sleep(
struct sdma_engine *sde, struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *stx, struct sdma_txreq *stx,
unsigned int seq, unsigned int seq,
bool pkts_sent); bool pkts_sent);
@@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
}; };
static void flush_tx_list(struct rvt_qp *qp) static void flush_list_head(struct list_head *l)
{ {
struct hfi1_qp_priv *priv = qp->priv; while (!list_empty(l)) {
while (!list_empty(&priv->s_iowait.tx_head)) {
struct sdma_txreq *tx; struct sdma_txreq *tx;
tx = list_first_entry( tx = list_first_entry(
&priv->s_iowait.tx_head, l,
struct sdma_txreq, struct sdma_txreq,
list); list);
list_del_init(&tx->list); list_del_init(&tx->list);
@@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
} }
} }
static void flush_tx_list(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
}
static void flush_iowait(struct rvt_qp *qp) static void flush_iowait(struct rvt_qp *qp)
{ {
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
@@ -282,33 +288,46 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
} }
/** /**
* hfi1_check_send_wqe - validate wqe * hfi1_setup_wqe - set up the wqe
* @qp - The qp * @qp - The qp
* @wqe - The built wqe * @wqe - The built wqe
* @call_send - Determine if the send should be posted or scheduled.
* *
* validate wqe. This is called * Perform setup of the wqe. This is called
* prior to inserting the wqe into * prior to inserting the wqe into the ring but after
* the ring but after the wqe has been * the wqe has been setup by RDMAVT. This function
* setup. * allows the driver the opportunity to perform
* validation and additional setup of the wqe.
* *
* Returns 0 on success, -EINVAL on failure * Returns 0 on success, -EINVAL on failure
* *
*/ */
int hfi1_check_send_wqe(struct rvt_qp *qp, int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
struct rvt_swqe *wqe)
{ {
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah; struct rvt_ah *ah;
struct hfi1_pportdata *ppd;
struct hfi1_devdata *dd;
switch (qp->ibqp.qp_type) { switch (qp->ibqp.qp_type) {
case IB_QPT_RC: case IB_QPT_RC:
case IB_QPT_UC: case IB_QPT_UC:
if (wqe->length > 0x80000000U) if (wqe->length > 0x80000000U)
return -EINVAL; return -EINVAL;
if (wqe->length > qp->pmtu)
*call_send = false;
break; break;
case IB_QPT_SMI: case IB_QPT_SMI:
ah = ibah_to_rvtah(wqe->ud_wr.ah); /*
if (wqe->length > (1 << ah->log_pmtu)) * SM packets should exclusively use VL15 and their SL is
* ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
* is created, SL is 0 in most cases and as a result some
* fields (vl and pmtu) in ah may not be set correctly,
* depending on the SL2SC and SC2VL tables at the time.
*/
ppd = ppd_from_ibp(ibp);
dd = dd_from_ppd(ppd);
if (wqe->length > dd->vld[15].mtu)
return -EINVAL; return -EINVAL;
break; break;
case IB_QPT_GSI: case IB_QPT_GSI:
@@ -321,7 +340,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
default: default:
break; break;
} }
return wqe->length <= piothreshold; return 0;
} }
/** /**
@@ -333,7 +352,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
* It is only used in the post send, which doesn't hold * It is only used in the post send, which doesn't hold
* the s_lock. * the s_lock.
*/ */
void _hfi1_schedule_send(struct rvt_qp *qp) bool _hfi1_schedule_send(struct rvt_qp *qp)
{ {
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp = struct hfi1_ibport *ibp =
@@ -341,10 +360,10 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
priv->s_sde ? priv->s_sde ?
priv->s_sde->cpu : priv->s_sde->cpu :
cpumask_first(cpumask_of_node(dd->node))); cpumask_first(cpumask_of_node(dd->node)));
} }
static void qp_pio_drain(struct rvt_qp *qp) static void qp_pio_drain(struct rvt_qp *qp)
@@ -372,12 +391,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
* *
* This schedules qp progress and caller should hold * This schedules qp progress and caller should hold
* the s_lock. * the s_lock.
* @return true if the first leg is scheduled;
* false if the first leg is not scheduled.
*/ */
void hfi1_schedule_send(struct rvt_qp *qp) bool hfi1_schedule_send(struct rvt_qp *qp)
{ {
lockdep_assert_held(&qp->s_lock); lockdep_assert_held(&qp->s_lock);
if (hfi1_send_ok(qp)) if (hfi1_send_ok(qp)) {
_hfi1_schedule_send(qp); _hfi1_schedule_send(qp);
return true;
}
if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
IOWAIT_PENDING_IB);
return false;
}
static void hfi1_qp_schedule(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
bool ret;
if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
ret = hfi1_schedule_send(qp);
if (ret)
iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
}
} }
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -388,16 +427,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
if (qp->s_flags & flag) { if (qp->s_flags & flag) {
qp->s_flags &= ~flag; qp->s_flags &= ~flag;
trace_hfi1_qpwakeup(qp, flag); trace_hfi1_qpwakeup(qp, flag);
hfi1_schedule_send(qp); hfi1_qp_schedule(qp);
} }
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */ /* Notify hfi1_destroy_qp() if it is waiting. */
rvt_put_qp(qp); rvt_put_qp(qp);
} }
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
{
if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
qp->s_flags &= ~RVT_S_BUSY;
}
static int iowait_sleep( static int iowait_sleep(
struct sdma_engine *sde, struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *stx, struct sdma_txreq *stx,
uint seq, uint seq,
bool pkts_sent) bool pkts_sent)
@@ -438,7 +483,7 @@ static int iowait_sleep(
rvt_get_qp(qp); rvt_get_qp(qp);
} }
write_sequnlock(&dev->iowait_lock); write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY; hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY; ret = -EBUSY;
} else { } else {
@@ -637,6 +682,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait, &priv->s_iowait,
1, 1,
_hfi1_do_send, _hfi1_do_send,
NULL,
iowait_sleep, iowait_sleep,
iowait_wakeup, iowait_wakeup,
iowait_sdma_drained); iowait_sdma_drained);
@@ -686,7 +732,7 @@ void stop_send_queue(struct rvt_qp *qp)
{ {
struct hfi1_qp_priv *priv = qp->priv; struct hfi1_qp_priv *priv = qp->priv;
cancel_work_sync(&priv->s_iowait.iowork); iowait_cancel_work(&priv->s_iowait);
} }
void quiesce_qp(struct rvt_qp *qp) void quiesce_qp(struct rvt_qp *qp)

View File

@@ -57,18 +57,6 @@ extern unsigned int hfi1_qp_table_size;
extern const struct rvt_operation_params hfi1_post_parms[]; extern const struct rvt_operation_params hfi1_post_parms[];
/*
* Send if not busy or waiting for I/O and either
* a RC response is pending or we can process send work requests.
*/
static inline int hfi1_send_ok(struct rvt_qp *qp)
{
return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
(verbs_txreq_queued(qp) ||
(qp->s_flags & RVT_S_RESP_PENDING) ||
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
/* /*
* Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
* *
@@ -89,6 +77,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN) #define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) #define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
/*
* Send if not busy or waiting for I/O and either
* a RC response is pending or we can process send work requests.
*/
static inline int hfi1_send_ok(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
(verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
(qp->s_flags & RVT_S_RESP_PENDING) ||
!(qp->s_flags & RVT_S_ANY_WAIT_SEND));
}
/* /*
* free_ahg - clear ahg from QP * free_ahg - clear ahg from QP
*/ */
@@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter); void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
void _hfi1_schedule_send(struct rvt_qp *qp); bool _hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_schedule_send(struct rvt_qp *qp); bool hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_migrate_qp(struct rvt_qp *qp); void hfi1_migrate_qp(struct rvt_qp *qp);
@@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
int mtu_to_path_mtu(u32 mtu); int mtu_to_path_mtu(u32 mtu);
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl); void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
#endif /* _QP_H */ #endif /* _QP_H */

View File

@@ -309,7 +309,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
} }
clear_ahg(qp); clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last); wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ? rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */ /* will get called again */
goto done_free_tx; goto done_free_tx;
@@ -378,9 +378,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
wqe->wr.ex.invalidate_rkey); wqe->wr.ex.invalidate_rkey);
local_ops = 1; local_ops = 1;
} }
hfi1_send_complete(qp, wqe, rvt_send_complete(qp, wqe,
err ? IB_WC_LOC_PROT_ERR err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS); : IB_WC_SUCCESS);
if (local_ops) if (local_ops)
atomic_dec(&qp->local_ops_pending); atomic_dec(&qp->local_ops_pending);
goto done_free_tx; goto done_free_tx;
@@ -1043,7 +1043,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
hfi1_migrate_qp(qp); hfi1_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt; qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) { } else if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return; return;
} else { /* need to handle delayed completion */ } else { /* need to handle delayed completion */
@@ -1468,7 +1468,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++; ibp->rvp.n_other_naks++;
class_b: class_b:
if (qp->s_last == qp->s_acked) { if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, status); rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
} }
break; break;
@@ -1644,7 +1644,8 @@ read_middle:
qp->s_rdma_read_len -= pmtu; qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn); update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false); rvt_copy_sge(qp, &qp->s_rdma_read_sge,
data, pmtu, false, false);
goto bail; goto bail;
case OP(RDMA_READ_RESPONSE_ONLY): case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1684,7 +1685,8 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len)) if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err; goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth); aeth = be32_to_cpu(ohdr->u.aeth);
hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false); rvt_copy_sge(qp, &qp->s_rdma_read_sge,
data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge); WARN_ON(qp->s_rdma_read_sge.num_sge);
(void)do_rc_ack(qp, aeth, psn, (void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd); OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1704,7 +1706,7 @@ ack_len_err:
status = IB_WC_LOC_LEN_ERR; status = IB_WC_LOC_LEN_ERR;
ack_err: ack_err:
if (qp->s_last == qp->s_acked) { if (qp->s_last == qp->s_acked) {
hfi1_send_complete(qp, wqe, status); rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
} }
ack_done: ack_done:
@@ -2144,7 +2146,7 @@ send_middle:
qp->r_rcv_len += pmtu; qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv; goto nack_inv;
hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false); rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break; break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2200,7 +2202,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len; wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len)) if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv; goto nack_inv;
hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last); rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge); rvt_put_ss(&qp->r_sge);
qp->r_msn++; qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))

View File

@@ -155,333 +155,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
return 0; return 0;
} }
/**
* ruc_loopback - handle UC and RC loopback requests
* @sqp: the sending QP
*
* This is called from hfi1_do_send() to
* forward a WQE addressed to the same HFI.
* Note that although we are single threaded due to the send engine, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
*/
static void ruc_loopback(struct rvt_qp *sqp)
{
struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
struct rvt_qp *qp;
struct rvt_swqe *wqe;
struct rvt_sge *sge;
unsigned long flags;
struct ib_wc wc;
u64 sdata;
atomic64_t *maddr;
enum ib_wc_status send_status;
bool release;
int ret;
bool copy_last = false;
int local_ops = 0;
rcu_read_lock();
/*
* Note that we check the responder QP state after
* checking the requester's state.
*/
qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
sqp->remote_qpn);
spin_lock_irqsave(&sqp->s_lock, flags);
/* Return if we are already busy processing a work request. */
if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
goto unlock;
sqp->s_flags |= RVT_S_BUSY;
again:
if (sqp->s_last == READ_ONCE(sqp->s_head))
goto clr_busy;
wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
/* Return if it is not OK to start a new work request. */
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
goto clr_busy;
/* We are in the error state, flush the work request. */
send_status = IB_WC_WR_FLUSH_ERR;
goto flush_send;
}
/*
* We can rely on the entry not changing without the s_lock
* being held until we update s_last.
* We increment s_cur to indicate s_last is in progress.
*/
if (sqp->s_last == sqp->s_cur) {
if (++sqp->s_cur >= sqp->s_size)
sqp->s_cur = 0;
}
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
qp->ibqp.qp_type != sqp->ibqp.qp_type) {
ibp->rvp.n_pkt_drops++;
/*
* For RC, the requester would timeout and retry so
* shortcut the timeouts and just signal too many retries.
*/
if (sqp->ibqp.qp_type == IB_QPT_RC)
send_status = IB_WC_RETRY_EXC_ERR;
else
send_status = IB_WC_SUCCESS;
goto serr;
}
memset(&wc, 0, sizeof(wc));
send_status = IB_WC_SUCCESS;
release = true;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
sqp->s_sge.num_sge = wqe->wr.num_sge;
sqp->s_len = wqe->length;
switch (wqe->wr.opcode) {
case IB_WR_REG_MR:
goto send_comp;
case IB_WR_LOCAL_INV:
if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
if (rvt_invalidate_rkey(sqp,
wqe->wr.ex.invalidate_rkey))
send_status = IB_WC_LOC_PROT_ERR;
local_ops = 1;
}
goto send_comp;
case IB_WR_SEND_WITH_INV:
if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
wc.wc_flags = IB_WC_WITH_INVALIDATE;
wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
}
goto send;
case IB_WR_SEND_WITH_IMM:
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
send:
ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
goto rnr_nak;
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
goto rnr_nak;
/* skip copy_last set and qp_access_flags recheck */
goto do_write;
case IB_WR_RDMA_WRITE:
copy_last = rvt_is_user_qp(qp);
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
do_write:
if (wqe->length == 0)
break;
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
wqe->rdma_wr.remote_addr,
wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_WRITE)))
goto acc_err;
qp->r_sge.sg_list = NULL;
qp->r_sge.num_sge = 1;
qp->r_sge.total_len = wqe->length;
break;
case IB_WR_RDMA_READ:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
goto inv_err;
if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
wqe->rdma_wr.remote_addr,
wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
release = false;
sqp->s_sge.sg_list = NULL;
sqp->s_sge.num_sge = 1;
qp->r_sge.sge = wqe->sg_list[0];
qp->r_sge.sg_list = wqe->sg_list + 1;
qp->r_sge.num_sge = wqe->wr.num_sge;
qp->r_sge.total_len = wqe->length;
break;
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
goto inv_err;
if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
wqe->atomic_wr.remote_addr,
wqe->atomic_wr.rkey,
IB_ACCESS_REMOTE_ATOMIC)))
goto acc_err;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
sdata = wqe->atomic_wr.compare_add;
*(u64 *)sqp->s_sge.sge.vaddr =
(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
(u64)atomic64_add_return(sdata, maddr) - sdata :
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
sdata, wqe->atomic_wr.swap);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
default:
send_status = IB_WC_LOC_QP_OP_ERR;
goto serr;
}
sge = &sqp->s_sge.sge;
while (sqp->s_len) {
u32 len = sqp->s_len;
if (len > sge->length)
len = sge->length;
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
rvt_put_mr(sge->mr);
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (++sge->n >= RVT_SEGSZ) {
if (++sge->m >= sge->mr->mapsz)
break;
sge->n = 0;
}
sge->vaddr =
sge->mr->map[sge->m]->segs[sge->n].vaddr;
sge->length =
sge->mr->map[sge->m]->segs[sge->n].length;
}
sqp->s_len -= len;
}
if (release)
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
else
wc.opcode = IB_WC_RECV;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
wc.byte_len = wqe->length;
wc.qp = &qp->ibqp;
wc.src_qp = qp->remote_qpn;
wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
wc.port_num = 1;
/* Signal completion event if the solicited bit is set. */
rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
wqe->wr.send_flags & IB_SEND_SOLICITED);
send_comp:
spin_lock_irqsave(&sqp->s_lock, flags);
ibp->rvp.n_loop_pkts++;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
hfi1_send_complete(sqp, wqe, send_status);
if (local_ops) {
atomic_dec(&sqp->local_ops_pending);
local_ops = 0;
}
goto again;
rnr_nak:
/* Handle RNR NAK */
if (qp->ibqp.qp_type == IB_QPT_UC)
goto send_comp;
ibp->rvp.n_rnr_naks++;
/*
* Note: we don't need the s_lock held since the BUSY flag
* makes this single threaded.
*/
if (sqp->s_rnr_retry == 0) {
send_status = IB_WC_RNR_RETRY_EXC_ERR;
goto serr;
}
if (sqp->s_rnr_retry_cnt < 7)
sqp->s_rnr_retry--;
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
IB_AETH_CREDIT_SHIFT);
goto clr_busy;
op_err:
send_status = IB_WC_REM_OP_ERR;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
inv_err:
send_status = IB_WC_REM_INV_REQ_ERR;
wc.status = IB_WC_LOC_QP_OP_ERR;
goto err;
acc_err:
send_status = IB_WC_REM_ACCESS_ERR;
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
rvt_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
hfi1_send_complete(sqp, wqe, send_status);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
sqp->s_flags &= ~RVT_S_BUSY;
spin_unlock_irqrestore(&sqp->s_lock, flags);
if (lastwqe) {
struct ib_event ev;
ev.device = sqp->ibqp.device;
ev.element.qp = &sqp->ibqp;
ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
}
goto done;
}
clr_busy:
sqp->s_flags &= ~RVT_S_BUSY;
unlock:
spin_unlock_irqrestore(&sqp->s_lock, flags);
done:
rcu_read_unlock();
}
/** /**
* hfi1_make_grh - construct a GRH header * hfi1_make_grh - construct a GRH header
* @ibp: a pointer to the IB port * @ibp: a pointer to the IB port
@@ -825,8 +498,8 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp)
void _hfi1_do_send(struct work_struct *work) void _hfi1_do_send(struct work_struct *work)
{ {
struct iowait *wait = container_of(work, struct iowait, iowork); struct iowait_work *w = container_of(work, struct iowait_work, iowork);
struct rvt_qp *qp = iowait_to_qp(wait); struct rvt_qp *qp = iowait_to_qp(w->iow);
hfi1_do_send(qp, true); hfi1_do_send(qp, true);
} }
@@ -850,6 +523,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.ibp = to_iport(qp->ibqp.device, qp->port_num); ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp); ps.ppd = ppd_from_ibp(ps.ibp);
ps.in_thread = in_thread; ps.in_thread = in_thread;
ps.wait = iowait_get_ib_work(&priv->s_iowait);
trace_hfi1_rc_do_send(qp, in_thread); trace_hfi1_rc_do_send(qp, in_thread);
@@ -858,7 +532,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) == ~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) { ps.ppd->lid)) {
ruc_loopback(qp); rvt_ruc_loopback(qp);
return; return;
} }
make_req = hfi1_make_rc_req; make_req = hfi1_make_rc_req;
@@ -868,7 +542,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) == ~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) { ps.ppd->lid)) {
ruc_loopback(qp); rvt_ruc_loopback(qp);
return; return;
} }
make_req = hfi1_make_uc_req; make_req = hfi1_make_uc_req;
@@ -883,6 +557,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
/* Return if we are already busy processing a work request. */ /* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) { if (!hfi1_send_ok(qp)) {
if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
spin_unlock_irqrestore(&qp->s_lock, ps.flags); spin_unlock_irqrestore(&qp->s_lock, ps.flags);
return; return;
} }
@@ -896,7 +572,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.pkts_sent = false; ps.pkts_sent = false;
/* insure a pre-built packet is handled */ /* insure a pre-built packet is handled */
ps.s_txreq = get_waiting_verbs_txreq(qp); ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
do { do {
/* Check for a constructed packet to be sent. */ /* Check for a constructed packet to be sent. */
if (ps.s_txreq) { if (ps.s_txreq) {
@@ -907,6 +583,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
*/ */
if (hfi1_verbs_send(qp, &ps)) if (hfi1_verbs_send(qp, &ps))
return; return;
/* allow other tasks to run */ /* allow other tasks to run */
if (schedule_send_yield(qp, &ps)) if (schedule_send_yield(qp, &ps))
return; return;
@@ -917,44 +594,3 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
iowait_starve_clear(ps.pkts_sent, &priv->s_iowait); iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags); spin_unlock_irqrestore(&qp->s_lock, ps.flags);
} }
/*
* This should be called with s_lock held.
*/
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
last = qp->s_last;
old_last = last;
trace_hfi1_qp_send_completion(qp, wqe, last);
if (++last >= qp->s_size)
last = 0;
trace_hfi1_qp_send_completion(qp, wqe, last);
qp->s_last = last;
/* See post_send() */
barrier();
rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
rvt_qp_swqe_complete(qp,
wqe,
ib_hfi1_wc_opcode[wqe->wr.opcode],
status);
if (qp->s_acked == old_last)
qp->s_acked = last;
if (qp->s_cur == old_last)
qp->s_cur = last;
if (qp->s_tail == old_last)
qp->s_tail = last;
if (qp->state == IB_QPS_SQD && last == qp->s_cur)
qp->s_draining = 0;
}

View File

@@ -378,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
__sdma_txclean(sde->dd, tx); __sdma_txclean(sde->dd, tx);
if (complete) if (complete)
(*complete)(tx, res); (*complete)(tx, res);
if (wait && iowait_sdma_dec(wait)) if (iowait_sdma_dec(wait))
iowait_drain_wakeup(wait); iowait_drain_wakeup(wait);
} }
@@ -1758,7 +1758,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
struct iowait *wait, *nw; struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0; uint i, n = 0, seq, max_idx = 0;
struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev; struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0; u8 max_starved_cnt = 0;
@@ -1779,19 +1778,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
nw, nw,
&sde->dmawait, &sde->dmawait,
list) { list) {
u16 num_desc = 0; u32 num_desc;
if (!wait->wakeup) if (!wait->wakeup)
continue; continue;
if (n == ARRAY_SIZE(waits)) if (n == ARRAY_SIZE(waits))
break; break;
if (!list_empty(&wait->tx_head)) { num_desc = iowait_get_all_desc(wait);
stx = list_first_entry(
&wait->tx_head,
struct sdma_txreq,
list);
num_desc = stx->num_desc;
}
if (num_desc > avail) if (num_desc > avail)
break; break;
avail -= num_desc; avail -= num_desc;
@@ -2346,7 +2339,7 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
*/ */
static int sdma_check_progress( static int sdma_check_progress(
struct sdma_engine *sde, struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *tx, struct sdma_txreq *tx,
bool pkts_sent) bool pkts_sent)
{ {
@@ -2356,12 +2349,12 @@ static int sdma_check_progress(
if (tx->num_desc <= sde->desc_avail) if (tx->num_desc <= sde->desc_avail)
return -EAGAIN; return -EAGAIN;
/* pulse the head_lock */ /* pulse the head_lock */
if (wait && wait->sleep) { if (wait && iowait_ioww_to_iow(wait)->sleep) {
unsigned seq; unsigned seq;
seq = raw_seqcount_begin( seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount); (const seqcount_t *)&sde->head_lock.seqcount);
ret = wait->sleep(sde, wait, tx, seq, pkts_sent); ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN) if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde); sde->desc_avail = sdma_descq_freecnt(sde);
} else { } else {
@@ -2373,7 +2366,7 @@ static int sdma_check_progress(
/** /**
* sdma_send_txreq() - submit a tx req to ring * sdma_send_txreq() - submit a tx req to ring
* @sde: sdma engine to use * @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL) * @wait: SE wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit * @tx: sdma_txreq to submit
* @pkts_sent: has any packet been sent yet? * @pkts_sent: has any packet been sent yet?
* *
@@ -2386,7 +2379,7 @@ static int sdma_check_progress(
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/ */
int sdma_send_txreq(struct sdma_engine *sde, int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *tx, struct sdma_txreq *tx,
bool pkts_sent) bool pkts_sent)
{ {
@@ -2397,7 +2390,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
/* user should have supplied entire packet */ /* user should have supplied entire packet */
if (unlikely(tx->tlen)) if (unlikely(tx->tlen))
return -EINVAL; return -EINVAL;
tx->wait = wait; tx->wait = iowait_ioww_to_iow(wait);
spin_lock_irqsave(&sde->tail_lock, flags); spin_lock_irqsave(&sde->tail_lock, flags);
retry: retry:
if (unlikely(!__sdma_running(sde))) if (unlikely(!__sdma_running(sde)))
@@ -2406,14 +2399,14 @@ retry:
goto nodesc; goto nodesc;
tail = submit_tx(sde, tx); tail = submit_tx(sde, tx);
if (wait) if (wait)
iowait_sdma_inc(wait); iowait_sdma_inc(iowait_ioww_to_iow(wait));
sdma_update_tail(sde, tail); sdma_update_tail(sde, tail);
unlock: unlock:
spin_unlock_irqrestore(&sde->tail_lock, flags); spin_unlock_irqrestore(&sde->tail_lock, flags);
return ret; return ret;
unlock_noconn: unlock_noconn:
if (wait) if (wait)
iowait_sdma_inc(wait); iowait_sdma_inc(iowait_ioww_to_iow(wait));
tx->next_descq_idx = 0; tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++; tx->sn = sde->tail_sn++;
@@ -2422,10 +2415,7 @@ unlock_noconn:
spin_lock(&sde->flushlist_lock); spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist); list_add_tail(&tx->list, &sde->flushlist);
spin_unlock(&sde->flushlist_lock); spin_unlock(&sde->flushlist_lock);
if (wait) { iowait_inc_wait_count(wait, tx->num_desc);
wait->tx_count++;
wait->count += tx->num_desc;
}
schedule_work(&sde->flush_worker); schedule_work(&sde->flush_worker);
ret = -ECOMM; ret = -ECOMM;
goto unlock; goto unlock;
@@ -2442,9 +2432,9 @@ nodesc:
/** /**
* sdma_send_txlist() - submit a list of tx req to ring * sdma_send_txlist() - submit a list of tx req to ring
* @sde: sdma engine to use * @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL) * @wait: SE wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit * @tx_list: list of sdma_txreqs to submit
* @count: pointer to a u32 which, after return will contain the total number of * @count: pointer to a u16 which, after return will contain the total number of
* sdma_txreqs removed from the tx_list. This will include sdma_txreqs * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
* whose SDMA descriptors are submitted to the ring and the sdma_txreqs * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
* which are added to SDMA engine flush list if the SDMA engine state is * which are added to SDMA engine flush list if the SDMA engine state is
@@ -2467,8 +2457,8 @@ nodesc:
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/ */
int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
struct list_head *tx_list, u32 *count_out) struct list_head *tx_list, u16 *count_out)
{ {
struct sdma_txreq *tx, *tx_next; struct sdma_txreq *tx, *tx_next;
int ret = 0; int ret = 0;
@@ -2479,7 +2469,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
spin_lock_irqsave(&sde->tail_lock, flags); spin_lock_irqsave(&sde->tail_lock, flags);
retry: retry:
list_for_each_entry_safe(tx, tx_next, tx_list, list) { list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait; tx->wait = iowait_ioww_to_iow(wait);
if (unlikely(!__sdma_running(sde))) if (unlikely(!__sdma_running(sde)))
goto unlock_noconn; goto unlock_noconn;
if (unlikely(tx->num_desc > sde->desc_avail)) if (unlikely(tx->num_desc > sde->desc_avail))
@@ -2500,8 +2490,9 @@ retry:
update_tail: update_tail:
total_count = submit_count + flush_count; total_count = submit_count + flush_count;
if (wait) { if (wait) {
iowait_sdma_add(wait, total_count); iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
iowait_starve_clear(submit_count > 0, wait); iowait_starve_clear(submit_count > 0,
iowait_ioww_to_iow(wait));
} }
if (tail != INVALID_TAIL) if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail); sdma_update_tail(sde, tail);
@@ -2511,7 +2502,7 @@ update_tail:
unlock_noconn: unlock_noconn:
spin_lock(&sde->flushlist_lock); spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) { list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait; tx->wait = iowait_ioww_to_iow(wait);
list_del_init(&tx->list); list_del_init(&tx->list);
tx->next_descq_idx = 0; tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@@ -2520,10 +2511,7 @@ unlock_noconn:
#endif #endif
list_add_tail(&tx->list, &sde->flushlist); list_add_tail(&tx->list, &sde->flushlist);
flush_count++; flush_count++;
if (wait) { iowait_inc_wait_count(wait, tx->num_desc);
wait->tx_count++;
wait->count += tx->num_desc;
}
} }
spin_unlock(&sde->flushlist_lock); spin_unlock(&sde->flushlist_lock);
schedule_work(&sde->flush_worker); schedule_work(&sde->flush_worker);

View File

@@ -1,7 +1,7 @@
#ifndef _HFI1_SDMA_H #ifndef _HFI1_SDMA_H
#define _HFI1_SDMA_H #define _HFI1_SDMA_H
/* /*
* Copyright(c) 2015, 2016 Intel Corporation. * Copyright(c) 2015 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
@@ -62,16 +62,6 @@
/* Hardware limit for SDMA packet size */ /* Hardware limit for SDMA packet size */
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1) #define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
#define SDMA_TXREQ_S_OK 0
#define SDMA_TXREQ_S_SENDERROR 1
#define SDMA_TXREQ_S_ABORTED 2
#define SDMA_TXREQ_S_SHUTDOWN 3
/* flags bits */
#define SDMA_TXREQ_F_URGENT 0x0001
#define SDMA_TXREQ_F_AHG_COPY 0x0002
#define SDMA_TXREQ_F_USE_AHG 0x0004
#define SDMA_MAP_NONE 0 #define SDMA_MAP_NONE 0
#define SDMA_MAP_SINGLE 1 #define SDMA_MAP_SINGLE 1
#define SDMA_MAP_PAGE 2 #define SDMA_MAP_PAGE 2
@@ -415,6 +405,7 @@ struct sdma_engine {
struct list_head flushlist; struct list_head flushlist;
struct cpumask cpu_mask; struct cpumask cpu_mask;
struct kobject kobj; struct kobject kobj;
u32 msix_intr;
}; };
int sdma_init(struct hfi1_devdata *dd, u8 port); int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -849,16 +840,16 @@ static inline int sdma_txadd_kvaddr(
dd, SDMA_MAP_SINGLE, tx, addr, len); dd, SDMA_MAP_SINGLE, tx, addr, len);
} }
struct iowait; struct iowait_work;
int sdma_send_txreq(struct sdma_engine *sde, int sdma_send_txreq(struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *tx, struct sdma_txreq *tx,
bool pkts_sent); bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde, int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct list_head *tx_list, struct list_head *tx_list,
u32 *count); u16 *count_out);
int sdma_ahg_alloc(struct sdma_engine *sde); int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index); void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);

View File

@@ -494,17 +494,18 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
* Start of per-unit (or driver, in some cases, but replicated * Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *) * per unit) functions (these get a device *)
*/ */
static ssize_t show_rev(struct device *device, struct device_attribute *attr, static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf) char *buf)
{ {
struct hfi1_ibdev *dev = struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev); return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
} }
static DEVICE_ATTR_RO(hw_rev);
static ssize_t show_hfi(struct device *device, struct device_attribute *attr, static ssize_t board_id_show(struct device *device,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct hfi1_ibdev *dev = struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
@@ -517,8 +518,9 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname); ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
return ret; return ret;
} }
static DEVICE_ATTR_RO(board_id);
static ssize_t show_boardversion(struct device *device, static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct hfi1_ibdev *dev = struct hfi1_ibdev *dev =
@@ -528,8 +530,9 @@ static ssize_t show_boardversion(struct device *device,
/* The string printed here is already newline-terminated. */ /* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion); return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
} }
static DEVICE_ATTR_RO(boardversion);
static ssize_t show_nctxts(struct device *device, static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct hfi1_ibdev *dev = struct hfi1_ibdev *dev =
@@ -546,8 +549,9 @@ static ssize_t show_nctxts(struct device *device,
min(dd->num_user_contexts, min(dd->num_user_contexts,
(u32)dd->sc_sizes[SC_USER].count)); (u32)dd->sc_sizes[SC_USER].count));
} }
static DEVICE_ATTR_RO(nctxts);
static ssize_t show_nfreectxts(struct device *device, static ssize_t nfreectxts_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct hfi1_ibdev *dev = struct hfi1_ibdev *dev =
@@ -557,8 +561,9 @@ static ssize_t show_nfreectxts(struct device *device,
/* Return the number of free user ports (contexts) available. */ /* Return the number of free user ports (contexts) available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
} }
static DEVICE_ATTR_RO(nfreectxts);
static ssize_t show_serial(struct device *device, static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct hfi1_ibdev *dev = struct hfi1_ibdev *dev =
@@ -567,8 +572,9 @@ static ssize_t show_serial(struct device *device,
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial); return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
} }
static DEVICE_ATTR_RO(serial);
static ssize_t store_chip_reset(struct device *device, static ssize_t chip_reset_store(struct device *device,
struct device_attribute *attr, const char *buf, struct device_attribute *attr, const char *buf,
size_t count) size_t count)
{ {
@@ -586,6 +592,7 @@ static ssize_t store_chip_reset(struct device *device,
bail: bail:
return ret < 0 ? ret : count; return ret < 0 ? ret : count;
} }
static DEVICE_ATTR_WO(chip_reset);
/* /*
* Convert the reported temperature from an integer (reported in * Convert the reported temperature from an integer (reported in
@@ -598,7 +605,7 @@ bail:
/* /*
* Dump tempsense values, in decimal, to ease shell-scripts. * Dump tempsense values, in decimal, to ease shell-scripts.
*/ */
static ssize_t show_tempsense(struct device *device, static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct hfi1_ibdev *dev = struct hfi1_ibdev *dev =
@@ -622,6 +629,7 @@ static ssize_t show_tempsense(struct device *device,
} }
return ret; return ret;
} }
static DEVICE_ATTR_RO(tempsense);
/* /*
* end of per-unit (or driver, in some cases, but replicated * end of per-unit (or driver, in some cases, but replicated
@@ -629,24 +637,20 @@ static ssize_t show_tempsense(struct device *device,
*/ */
/* start of per-unit file structures and support code */ /* start of per-unit file structures and support code */
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static struct attribute *hfi1_attributes[] = {
static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL); &dev_attr_hw_rev.attr,
static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL); &dev_attr_board_id.attr,
static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL); &dev_attr_nctxts.attr,
static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); &dev_attr_nfreectxts.attr,
static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); &dev_attr_serial.attr,
static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); &dev_attr_boardversion.attr,
static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); &dev_attr_tempsense.attr,
&dev_attr_chip_reset.attr,
NULL,
};
static struct device_attribute *hfi1_attributes[] = { const struct attribute_group ib_hfi1_attr_group = {
&dev_attr_hw_rev, .attrs = hfi1_attributes,
&dev_attr_board_id,
&dev_attr_nctxts,
&dev_attr_nfreectxts,
&dev_attr_serial,
&dev_attr_boardversion,
&dev_attr_tempsense,
&dev_attr_chip_reset,
}; };
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -832,12 +836,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
struct device *class_dev = &dev->dev; struct device *class_dev = &dev->dev;
int i, j, ret; int i, j, ret;
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
ret = device_create_file(&dev->dev, hfi1_attributes[i]);
if (ret)
goto bail;
}
for (i = 0; i < dd->num_sdma; i++) { for (i = 0; i < dd->num_sdma; i++) {
ret = kobject_init_and_add(&dd->per_sdma[i].kobj, ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
&sde_ktype, &class_dev->kobj, &sde_ktype, &class_dev->kobj,
@@ -855,9 +853,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
return 0; return 0;
bail: bail:
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
device_remove_file(&dev->dev, hfi1_attributes[i]);
for (i = 0; i < dd->num_sdma; i++) for (i = 0; i < dd->num_sdma; i++)
kobject_del(&dd->per_sdma[i].kobj); kobject_del(&dd->per_sdma[i].kobj);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2015 - 2017 Intel Corporation. * Copyright(c) 2015 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
@@ -62,3 +62,4 @@ __print_symbolic(etype, \
#include "trace_rx.h" #include "trace_rx.h"
#include "trace_tx.h" #include "trace_tx.h"
#include "trace_mmu.h" #include "trace_mmu.h"
#include "trace_iowait.h"

View File

@@ -0,0 +1,54 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* Copyright(c) 2018 Intel Corporation.
*
*/
#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
#define __HFI1_TRACE_IOWAIT_H
#include <linux/tracepoint.h>
#include "iowait.h"
#include "verbs.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_iowait
DECLARE_EVENT_CLASS(hfi1_iowait_template,
TP_PROTO(struct iowait *wait, u32 flag),
TP_ARGS(wait, flag),
TP_STRUCT__entry(/* entry */
__field(unsigned long, addr)
__field(unsigned long, flags)
__field(u32, flag)
__field(u32, qpn)
),
TP_fast_assign(/* assign */
__entry->addr = (unsigned long)wait;
__entry->flags = wait->flags;
__entry->flag = (1 << flag);
__entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
),
TP_printk(/* print */
"iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
__entry->addr,
__entry->qpn,
__entry->flags,
__entry->flag
)
);
DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
TP_PROTO(struct iowait *wait, u32 flag),
TP_ARGS(wait, flag));
DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
TP_PROTO(struct iowait *wait, u32 flag),
TP_ARGS(wait, flag));
#endif /* __HFI1_TRACE_IOWAIT_H */
#undef TRACE_INCLUDE_PATH
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_iowait
#include <trace/define_trace.h>

View File

@@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
} }
clear_ahg(qp); clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last); wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx; goto done_free_tx;
} }
@@ -140,7 +140,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp, wqe->wr.ex.invalidate_rkey); qp, wqe->wr.ex.invalidate_rkey);
local_ops = 1; local_ops = 1;
} }
hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS); : IB_WC_SUCCESS);
if (local_ops) if (local_ops)
atomic_dec(&qp->local_ops_pending); atomic_dec(&qp->local_ops_pending);
@@ -426,7 +426,7 @@ send_first:
qp->r_rcv_len += pmtu; qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind; goto rewind;
hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false); rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
break; break;
case OP(SEND_LAST_WITH_IMMEDIATE): case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -449,7 +449,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len)) if (unlikely(wc.byte_len > qp->r_len))
goto rewind; goto rewind;
wc.opcode = IB_WC_RECV; wc.opcode = IB_WC_RECV;
hfi1_copy_sge(&qp->r_sge, data, tlen, false, false); rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge); rvt_put_ss(&qp->s_rdma_read_sge);
last_imm: last_imm:
wc.wr_id = qp->r_wr_id; wc.wr_id = qp->r_wr_id;
@@ -523,7 +523,7 @@ rdma_first:
qp->r_rcv_len += pmtu; qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len)) if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop; goto drop;
hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false); rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break; break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -550,7 +550,7 @@ rdma_last_imm:
} }
wc.byte_len = qp->r_len; wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge); rvt_put_ss(&qp->r_sge);
goto last_imm; goto last_imm;
@@ -564,7 +564,7 @@ rdma_last:
tlen -= (hdrsize + extra_bytes); tlen -= (hdrsize + extra_bytes);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop; goto drop;
hfi1_copy_sge(&qp->r_sge, data, tlen, true, false); rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge); rvt_put_ss(&qp->r_sge);
break; break;

View File

@@ -210,8 +210,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
} }
hfi1_make_grh(ibp, &grh, &grd, 0, 0); hfi1_make_grh(ibp, &grh, &grd, 0, 0);
hfi1_copy_sge(&qp->r_sge, &grh, rvt_copy_sge(qp, &qp->r_sge, &grh,
sizeof(grh), true, false); sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH; wc.wc_flags |= IB_WC_GRH;
} else { } else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -228,7 +228,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length) if (len > sge->sge_length)
len = sge->sge_length; len = sge->sge_length;
WARN_ON_ONCE(len == 0); WARN_ON_ONCE(len == 0);
hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false); rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len; sge->vaddr += len;
sge->length -= len; sge->length -= len;
sge->sge_length -= len; sge->sge_length -= len;
@@ -518,7 +518,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail; goto bail;
} }
wqe = rvt_get_swqe_ptr(qp, qp->s_last); wqe = rvt_get_swqe_ptr(qp, qp->s_last);
hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx; goto done_free_tx;
} }
@@ -560,7 +560,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ud_loopback(qp, wqe); ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, tflags); spin_lock_irqsave(&qp->s_lock, tflags);
ps->flags = tflags; ps->flags = tflags;
hfi1_send_complete(qp, wqe, IB_WC_SUCCESS); rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done_free_tx; goto done_free_tx;
} }
} }
@@ -1019,8 +1019,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
goto drop; goto drop;
} }
if (packet->grh) { if (packet->grh) {
hfi1_copy_sge(&qp->r_sge, packet->grh, rvt_copy_sge(qp, &qp->r_sge, packet->grh,
sizeof(struct ib_grh), true, false); sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH; wc.wc_flags |= IB_WC_GRH;
} else if (packet->etype == RHF_RCV_TYPE_BYPASS) { } else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
struct ib_grh grh; struct ib_grh grh;
@@ -1030,14 +1030,14 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* out when creating 16B, add back the GRH here. * out when creating 16B, add back the GRH here.
*/ */
hfi1_make_ext_grh(packet, &grh, slid, dlid); hfi1_make_ext_grh(packet, &grh, slid, dlid);
hfi1_copy_sge(&qp->r_sge, &grh, rvt_copy_sge(qp, &qp->r_sge, &grh,
sizeof(struct ib_grh), true, false); sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH; wc.wc_flags |= IB_WC_GRH;
} else { } else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true); rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
} }
hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
true, false); true, false);
rvt_put_ss(&qp->r_sge); rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return; return;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2015 - 2017 Intel Corporation. * Copyright(c) 2015 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
@@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
static unsigned initial_pkt_count = 8; static unsigned initial_pkt_count = 8;
static int user_sdma_send_pkts(struct user_sdma_request *req, static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
unsigned maxpkts);
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status); static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq); static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin); static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
@@ -101,7 +100,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue( static int defer_packet_queue(
struct sdma_engine *sde, struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *txreq, struct sdma_txreq *txreq,
uint seq, uint seq,
bool pkts_sent); bool pkts_sent);
@@ -124,13 +123,13 @@ static struct mmu_rb_ops sdma_rb_ops = {
static int defer_packet_queue( static int defer_packet_queue(
struct sdma_engine *sde, struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *txreq, struct sdma_txreq *txreq,
uint seq, uint seq,
bool pkts_sent) bool pkts_sent)
{ {
struct hfi1_user_sdma_pkt_q *pq = struct hfi1_user_sdma_pkt_q *pq =
container_of(wait, struct hfi1_user_sdma_pkt_q, busy); container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
struct hfi1_ibdev *dev = &pq->dd->verbs_dev; struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
struct user_sdma_txreq *tx = struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq); container_of(txreq, struct user_sdma_txreq, txreq);
@@ -187,13 +186,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->ctxt = uctxt->ctxt; pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt; pq->subctxt = fd->subctxt;
pq->n_max_reqs = hfi1_sdma_comp_ring_size; pq->n_max_reqs = hfi1_sdma_comp_ring_size;
pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0); atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait); init_waitqueue_head(&pq->wait);
atomic_set(&pq->n_locked, 0); atomic_set(&pq->n_locked, 0);
pq->mm = fd->mm; pq->mm = fd->mm;
iowait_init(&pq->busy, 0, NULL, defer_packet_queue, iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL); activate_packet_queue, NULL);
pq->reqidx = 0; pq->reqidx = 0;
@@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
/* Wait until all requests have been freed. */ /* Wait until all requests have been freed. */
wait_event_interruptible( wait_event_interruptible(
pq->wait, pq->wait,
(READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE)); !atomic_read(&pq->n_reqs));
kfree(pq->reqs); kfree(pq->reqs);
kfree(pq->req_in_use); kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache); kmem_cache_destroy(pq->txreq_cache);
@@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash]; return mapping[hash];
} }
/**
* hfi1_user_sdma_process_request() - Process and start a user sdma request
* @fd: valid file descriptor
* @iovec: array of io vectors to process
* @dim: overall iovec array size
* @count: number of io vector array entries processed
*/
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim, struct iovec *iovec, unsigned long dim,
unsigned long *count) unsigned long *count)
@@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
u8 opcode, sc, vl; u8 opcode, sc, vl;
u16 pkey; u16 pkey;
u32 slid; u32 slid;
int req_queued = 0;
u16 dlid; u16 dlid;
u32 selector; u32 selector;
@@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->data_len = 0; req->data_len = 0;
req->pq = pq; req->pq = pq;
req->cq = cq; req->cq = cq;
req->status = -1;
req->ahg_idx = -1; req->ahg_idx = -1;
req->iov_idx = 0; req->iov_idx = 0;
req->sent = 0; req->sent = 0;
@@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->seqcomp = 0; req->seqcomp = 0;
req->seqsubmitted = 0; req->seqsubmitted = 0;
req->tids = NULL; req->tids = NULL;
req->done = 0;
req->has_error = 0; req->has_error = 0;
INIT_LIST_HEAD(&req->txps); INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info)); memcpy(&req->info, &info, sizeof(info));
/* The request is initialized, count it */
atomic_inc(&pq->n_reqs);
if (req_opcode(info.ctrl) == EXPECTED) { if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */ /* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) { if (req->data_iovs < 2) {
@@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ret = pin_vector_pages(req, &req->iovs[i]); ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) { if (ret) {
req->data_iovs = i; req->data_iovs = i;
req->status = ret;
goto free_req; goto free_req;
} }
req->data_len += req->iovs[i].iov.iov_len; req->data_len += req->iovs[i].iov.iov_len;
@@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->ahg_idx = sdma_ahg_alloc(req->sde); req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0); set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
atomic_inc(&pq->n_reqs); pq->state = SDMA_PKT_Q_ACTIVE;
req_queued = 1;
/* Send the first N packets in the request to buy us some time */ /* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount); ret = user_sdma_send_pkts(req, pcount);
if (unlikely(ret < 0 && ret != -EBUSY)) { if (unlikely(ret < 0 && ret != -EBUSY))
req->status = ret;
goto free_req; goto free_req;
}
/*
* It is possible that the SDMA engine would have processed all the
* submitted packets by the time we get here. Therefore, only set
* packet queue state to ACTIVE if there are still uncompleted
* requests.
*/
if (atomic_read(&pq->n_reqs))
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
/* /*
* This is a somewhat blocking send implementation. * This is a somewhat blocking send implementation.
@@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
while (req->seqsubmitted != req->info.npkts) { while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount); ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) { if (ret < 0) {
if (ret != -EBUSY) { if (ret != -EBUSY)
req->status = ret; goto free_req;
WRITE_ONCE(req->has_error, 1);
if (READ_ONCE(req->seqcomp) ==
req->seqsubmitted - 1)
goto free_req;
return ret;
}
wait_event_interruptible_timeout( wait_event_interruptible_timeout(
pq->busy.wait_dma, pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE), (pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
*count += idx; *count += idx;
return 0; return 0;
free_req: free_req:
user_sdma_free_request(req, true); /*
if (req_queued) * If the submitted seqsubmitted == npkts, the completion routine
* controls the final state. If sequbmitted < npkts, wait for any
* outstanding packets to finish before cleaning up.
*/
if (req->seqsubmitted < req->info.npkts) {
if (req->seqsubmitted)
wait_event(pq->busy.wait_dma,
(req->seqcomp == req->seqsubmitted - 1));
user_sdma_free_request(req, true);
pq_update(pq); pq_update(pq);
set_comp_state(pq, cq, info.comp_idx, ERROR, req->status); set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
}
return ret; return ret;
} }
@@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req,
return ret; return ret;
} }
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
{ {
int ret = 0, count; int ret = 0;
u16 count;
unsigned npkts = 0; unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL; struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL; struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
changes = set_txreq_header_ahg(req, tx, changes = set_txreq_header_ahg(req, tx,
datalen); datalen);
if (changes < 0) if (changes < 0) {
ret = changes;
goto free_tx; goto free_tx;
}
} }
} else { } else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -914,10 +912,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++; npkts++;
} }
dosend: dosend:
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); ret = sdma_send_txlist(req->sde,
iowait_get_ib_work(&pq->busy),
&req->txps, &count);
req->seqsubmitted += count; req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) { if (req->seqsubmitted == req->info.npkts) {
WRITE_ONCE(req->done, 1);
/* /*
* The txreq has already been submitted to the HW queue * The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not * so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1364,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return idx; return idx;
} }
/* /**
* SDMA tx request completion callback. Called when the SDMA progress * user_sdma_txreq_cb() - SDMA tx request completion callback.
* state machine gets notification that the SDMA descriptors for this * @txreq: valid sdma tx request
* tx request have been processed by the DMA engine. Called in * @status: success/failure of request
* interrupt context. *
* Called when the SDMA progress state machine gets notification that
* the SDMA descriptors for this tx request have been processed by the
* DMA engine. Called in interrupt context.
* Only do work on completed sequences.
*/ */
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status) static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{ {
@@ -1378,7 +1381,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req; struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq; struct hfi1_user_sdma_comp_q *cq;
u16 idx; enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req) if (!tx->req)
return; return;
@@ -1391,39 +1394,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
SDMA_DBG(req, "SDMA completion with error %d", SDMA_DBG(req, "SDMA completion with error %d",
status); status);
WRITE_ONCE(req->has_error, 1); WRITE_ONCE(req->has_error, 1);
state = ERROR;
} }
req->seqcomp = tx->seqnum; req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx); kmem_cache_free(pq->txreq_cache, tx);
tx = NULL;
idx = req->info.comp_idx; /* sequence isn't complete? We are done */
if (req->status == -1 && status == SDMA_TXREQ_S_OK) { if (req->seqcomp != req->info.npkts - 1)
if (req->seqcomp == req->info.npkts - 1) { return;
req->status = 0;
user_sdma_free_request(req, false); user_sdma_free_request(req, false);
pq_update(pq); set_comp_state(pq, cq, req->info.comp_idx, state, status);
set_comp_state(pq, cq, idx, COMPLETE, 0); pq_update(pq);
}
} else {
if (status != SDMA_TXREQ_S_OK)
req->status = status;
if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
(READ_ONCE(req->done) ||
READ_ONCE(req->has_error))) {
user_sdma_free_request(req, false);
pq_update(pq);
set_comp_state(pq, cq, idx, ERROR, req->status);
}
}
} }
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{ {
if (atomic_dec_and_test(&pq->n_reqs)) { if (atomic_dec_and_test(&pq->n_reqs))
xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
wake_up(&pq->wait); wake_up(&pq->wait);
}
} }
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
if (!node) if (!node)
continue; continue;
req->iovs[i].node = NULL;
if (unpin) if (unpin)
hfi1_mmu_rb_remove(req->pq->handler, hfi1_mmu_rb_remove(req->pq->handler,
&node->rb); &node->rb);

View File

@@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */ #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
#define SDMA_PKT_Q_INACTIVE BIT(0) enum pkt_q_sdma_state {
#define SDMA_PKT_Q_ACTIVE BIT(1) SDMA_PKT_Q_ACTIVE,
#define SDMA_PKT_Q_DEFERRED BIT(2) SDMA_PKT_Q_DEFERRED,
};
/* /*
* Maximum retry attempts to submit a TX request * Maximum retry attempts to submit a TX request
@@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q {
struct user_sdma_request *reqs; struct user_sdma_request *reqs;
unsigned long *req_in_use; unsigned long *req_in_use;
struct iowait busy; struct iowait busy;
unsigned state; enum pkt_q_sdma_state state;
wait_queue_head_t wait; wait_queue_head_t wait;
unsigned long unpinned; unsigned long unpinned;
struct mmu_rb_handler *handler; struct mmu_rb_handler *handler;
@@ -203,14 +204,12 @@ struct user_sdma_request {
s8 ahg_idx; s8 ahg_idx;
/* Writeable fields shared with interrupt */ /* Writeable fields shared with interrupt */
u64 seqcomp ____cacheline_aligned_in_smp; u16 seqcomp ____cacheline_aligned_in_smp;
u64 seqsubmitted; u16 seqsubmitted;
/* status of the last txreq completed */
int status;
/* Send side fields */ /* Send side fields */
struct list_head txps ____cacheline_aligned_in_smp; struct list_head txps ____cacheline_aligned_in_smp;
u64 seqnum; u16 seqnum;
/* /*
* KDETH.OFFSET (TID) field * KDETH.OFFSET (TID) field
* The offset can cover multiple packets, depending on the * The offset can cover multiple packets, depending on the
@@ -228,7 +227,6 @@ struct user_sdma_request {
u16 tididx; u16 tididx;
/* progress index moving along the iovs array */ /* progress index moving along the iovs array */
u8 iov_idx; u8 iov_idx;
u8 done;
u8 has_error; u8 has_error;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ]; struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
@@ -248,7 +246,7 @@ struct user_sdma_txreq {
struct user_sdma_request *req; struct user_sdma_request *req;
u16 flags; u16 flags;
unsigned int busycount; unsigned int busycount;
u64 seqnum; u16 seqnum;
}; };
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,

View File

@@ -129,8 +129,6 @@ unsigned short piothreshold = 256;
module_param(piothreshold, ushort, S_IRUGO); module_param(piothreshold, ushort, S_IRUGO);
MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
#define COPY_CACHELESS 1
#define COPY_ADAPTIVE 2
static unsigned int sge_copy_mode; static unsigned int sge_copy_mode;
module_param(sge_copy_mode, uint, S_IRUGO); module_param(sge_copy_mode, uint, S_IRUGO);
MODULE_PARM_DESC(sge_copy_mode, MODULE_PARM_DESC(sge_copy_mode,
@@ -151,159 +149,13 @@ static int pio_wait(struct rvt_qp *qp,
/* 16B trailing buffer */ /* 16B trailing buffer */
static const u8 trail_buf[MAX_16B_PADDING]; static const u8 trail_buf[MAX_16B_PADDING];
static uint wss_threshold; static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO); module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
static uint wss_clean_period = 256; static uint wss_clean_period = 256;
module_param(wss_clean_period, uint, S_IRUGO); module_param(wss_clean_period, uint, S_IRUGO);
MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned"); MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
/* memory working set size */
struct hfi1_wss {
unsigned long *entries;
atomic_t total_count;
atomic_t clean_counter;
atomic_t clean_entry;
int threshold;
int num_entries;
long pages_mask;
};
static struct hfi1_wss wss;
int hfi1_wss_init(void)
{
long llc_size;
long llc_bits;
long table_size;
long table_bits;
/* check for a valid percent range - default to 80 if none or invalid */
if (wss_threshold < 1 || wss_threshold > 100)
wss_threshold = 80;
/* reject a wildly large period */
if (wss_clean_period > 1000000)
wss_clean_period = 256;
/* reject a zero period */
if (wss_clean_period == 0)
wss_clean_period = 1;
/*
* Calculate the table size - the next power of 2 larger than the
* LLC size. LLC size is in KiB.
*/
llc_size = wss_llc_size() * 1024;
table_size = roundup_pow_of_two(llc_size);
/* one bit per page in rounded up table */
llc_bits = llc_size / PAGE_SIZE;
table_bits = table_size / PAGE_SIZE;
wss.pages_mask = table_bits - 1;
wss.num_entries = table_bits / BITS_PER_LONG;
wss.threshold = (llc_bits * wss_threshold) / 100;
if (wss.threshold == 0)
wss.threshold = 1;
atomic_set(&wss.clean_counter, wss_clean_period);
wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
GFP_KERNEL);
if (!wss.entries) {
hfi1_wss_exit();
return -ENOMEM;
}
return 0;
}
void hfi1_wss_exit(void)
{
/* coded to handle partially initialized and repeat callers */
kfree(wss.entries);
wss.entries = NULL;
}
/*
* Advance the clean counter. When the clean period has expired,
* clean an entry.
*
* This is implemented in atomics to avoid locking. Because multiple
* variables are involved, it can be racy which can lead to slightly
* inaccurate information. Since this is only a heuristic, this is
* OK. Any innaccuracies will clean themselves out as the counter
* advances. That said, it is unlikely the entry clean operation will
* race - the next possible racer will not start until the next clean
* period.
*
* The clean counter is implemented as a decrement to zero. When zero
* is reached an entry is cleaned.
*/
static void wss_advance_clean_counter(void)
{
int entry;
int weight;
unsigned long bits;
/* become the cleaner if we decrement the counter to zero */
if (atomic_dec_and_test(&wss.clean_counter)) {
/*
* Set, not add, the clean period. This avoids an issue
* where the counter could decrement below the clean period.
* Doing a set can result in lost decrements, slowing the
* clean advance. Since this a heuristic, this possible
* slowdown is OK.
*
* An alternative is to loop, advancing the counter by a
* clean period until the result is > 0. However, this could
* lead to several threads keeping another in the clean loop.
* This could be mitigated by limiting the number of times
* we stay in the loop.
*/
atomic_set(&wss.clean_counter, wss_clean_period);
/*
* Uniquely grab the entry to clean and move to next.
* The current entry is always the lower bits of
* wss.clean_entry. The table size, wss.num_entries,
* is always a power-of-2.
*/
entry = (atomic_inc_return(&wss.clean_entry) - 1)
& (wss.num_entries - 1);
/* clear the entry and count the bits */
bits = xchg(&wss.entries[entry], 0);
weight = hweight64((u64)bits);
/* only adjust the contended total count if needed */
if (weight)
atomic_sub(weight, &wss.total_count);
}
}
/*
* Insert the given address into the working set array.
*/
static void wss_insert(void *address)
{
u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
u32 nr = page & (BITS_PER_LONG - 1);
if (!test_and_set_bit(nr, &wss.entries[entry]))
atomic_inc(&wss.total_count);
wss_advance_clean_counter();
}
/*
* Is the working set larger than the threshold?
*/
static inline bool wss_exceeds_threshold(void)
{
return atomic_read(&wss.total_count) >= wss.threshold;
}
/* /*
* Translate ib_wr_opcode into ib_wc_opcode. * Translate ib_wr_opcode into ib_wc_opcode.
*/ */
@@ -438,79 +290,6 @@ static const u32 pio_opmask[BIT(3)] = {
*/ */
__be64 ib_hfi1_sys_image_guid; __be64 ib_hfi1_sys_image_guid;
/**
* hfi1_copy_sge - copy data to SGE memory
* @ss: the SGE state
* @data: the data to copy
* @length: the length of the data
* @release: boolean to release MR
* @copy_last: do a separate copy of the last 8 bytes
*/
void hfi1_copy_sge(
struct rvt_sge_state *ss,
void *data, u32 length,
bool release,
bool copy_last)
{
struct rvt_sge *sge = &ss->sge;
int i;
bool in_last = false;
bool cacheless_copy = false;
if (sge_copy_mode == COPY_CACHELESS) {
cacheless_copy = length >= PAGE_SIZE;
} else if (sge_copy_mode == COPY_ADAPTIVE) {
if (length >= PAGE_SIZE) {
/*
* NOTE: this *assumes*:
* o The first vaddr is the dest.
* o If multiple pages, then vaddr is sequential.
*/
wss_insert(sge->vaddr);
if (length >= (2 * PAGE_SIZE))
wss_insert(sge->vaddr + PAGE_SIZE);
cacheless_copy = wss_exceeds_threshold();
} else {
wss_advance_clean_counter();
}
}
if (copy_last) {
if (length > 8) {
length -= 8;
} else {
copy_last = false;
in_last = true;
}
}
again:
while (length) {
u32 len = rvt_get_sge_length(sge, length);
WARN_ON_ONCE(len == 0);
if (unlikely(in_last)) {
/* enforce byte transfer ordering */
for (i = 0; i < len; i++)
((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
} else if (cacheless_copy) {
cacheless_memcpy(sge->vaddr, data, len);
} else {
memcpy(sge->vaddr, data, len);
}
rvt_update_sge(ss, len, release);
data += len;
length -= len;
}
if (copy_last) {
copy_last = false;
in_last = true;
length = 8;
goto again;
}
}
/* /*
* Make sure the QP is ready and able to accept the given opcode. * Make sure the QP is ready and able to accept the given opcode.
*/ */
@@ -713,7 +492,7 @@ static void verbs_sdma_complete(
spin_lock(&qp->s_lock); spin_lock(&qp->s_lock);
if (tx->wqe) { if (tx->wqe) {
hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) { } else if (qp->ibqp.qp_type == IB_QPT_RC) {
struct hfi1_opa_header *hdr; struct hfi1_opa_header *hdr;
@@ -737,7 +516,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock); write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list, list_add_tail(&ps->s_txreq->txreq.list,
&priv->s_iowait.tx_head); &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) { if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait)) if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1); mod_timer(&dev->mem_timer, jiffies + 1);
@@ -748,7 +527,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
rvt_get_qp(qp); rvt_get_qp(qp);
} }
write_sequnlock(&dev->iowait_lock); write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY; hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY; ret = -EBUSY;
} }
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -950,8 +729,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(ret)) if (unlikely(ret))
goto bail_build; goto bail_build;
} }
ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq, ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
ps->pkts_sent);
if (unlikely(ret < 0)) { if (unlikely(ret < 0)) {
if (ret == -ECOMM) if (ret == -ECOMM)
goto bail_ecomm; goto bail_ecomm;
@@ -1001,7 +779,7 @@ static int pio_wait(struct rvt_qp *qp,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock); write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list, list_add_tail(&ps->s_txreq->txreq.list,
&priv->s_iowait.tx_head); &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) { if (list_empty(&priv->s_iowait.list)) {
struct hfi1_ibdev *dev = &dd->verbs_dev; struct hfi1_ibdev *dev = &dd->verbs_dev;
int was_empty; int was_empty;
@@ -1020,7 +798,7 @@ static int pio_wait(struct rvt_qp *qp,
hfi1_sc_wantpiobuf_intr(sc, 1); hfi1_sc_wantpiobuf_intr(sc, 1);
} }
write_sequnlock(&dev->iowait_lock); write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY; hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY; ret = -EBUSY;
} }
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1160,7 +938,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
pio_bail: pio_bail:
if (qp->s_wqe) { if (qp->s_wqe) {
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
hfi1_send_complete(qp, qp->s_wqe, wc_status); rvt_send_complete(qp, qp->s_wqe, wc_status);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) { } else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
@@ -1367,7 +1145,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hfi1_cdbg(PIO, "%s() Failed. Completing with err", hfi1_cdbg(PIO, "%s() Failed. Completing with err",
__func__); __func__);
spin_lock_irqsave(&qp->s_lock, flags); spin_lock_irqsave(&qp->s_lock, flags);
hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock_irqrestore(&qp->s_lock, flags); spin_unlock_irqrestore(&qp->s_lock, flags);
} }
return -EINVAL; return -EINVAL;
@@ -1943,7 +1721,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
hfi1_comp_vect_mappings_lookup; hfi1_comp_vect_mappings_lookup;
@@ -1956,10 +1734,16 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
dd->verbs_dev.rdi.dparms.nports = dd->num_pports; dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
/* post send table */ /* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms; dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
/* opcode translation table */
dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
ppd = dd->pport; ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) for (i = 0; i < dd->num_pports; i++, ppd++)
rvt_init_port(&dd->verbs_dev.rdi, rvt_init_port(&dd->verbs_dev.rdi,
@@ -1967,6 +1751,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i, i,
ppd->pkeys); ppd->pkeys);
rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
&ib_hfi1_attr_group);
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
if (ret) if (ret)
goto err_verbs_txreq; goto err_verbs_txreq;

View File

@@ -166,11 +166,13 @@ struct hfi1_qp_priv {
* This structure is used to hold commonly lookedup and computed values during * This structure is used to hold commonly lookedup and computed values during
* the send engine progress. * the send engine progress.
*/ */
struct iowait_work;
struct hfi1_pkt_state { struct hfi1_pkt_state {
struct hfi1_ibdev *dev; struct hfi1_ibdev *dev;
struct hfi1_ibport *ibp; struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd; struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq; struct verbs_txreq *s_txreq;
struct iowait_work *wait;
unsigned long flags; unsigned long flags;
unsigned long timeout; unsigned long timeout;
unsigned long timeout_int; unsigned long timeout_int;
@@ -247,7 +249,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
return container_of(rdi, struct hfi1_ibdev, rdi); return container_of(rdi, struct hfi1_ibdev, rdi);
} }
static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait) static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
{ {
struct hfi1_qp_priv *priv; struct hfi1_qp_priv *priv;
@@ -313,9 +315,6 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps); int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
bool release, bool copy_last);
void hfi1_cnp_rcv(struct hfi1_packet *packet); void hfi1_cnp_rcv(struct hfi1_packet *packet);
void hfi1_uc_rcv(struct hfi1_packet *packet); void hfi1_uc_rcv(struct hfi1_packet *packet);
@@ -343,7 +342,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata); int attr_mask, struct ib_udata *udata);
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait); void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
bool *call_send);
extern const u32 rc_only_opcode; extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode; extern const u32 uc_only_opcode;
@@ -363,9 +363,6 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp);
void hfi1_do_send(struct rvt_qp *qp, bool in_thread); void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status);
void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn); void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps); int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
@@ -390,28 +387,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc); u64 pbc);
int hfi1_wss_init(void);
void hfi1_wss_exit(void);
/* platform specific: return the lowest level cache (llc) size, in KiB */
static inline int wss_llc_size(void)
{
/* assume that the boot CPU value is universal for all CPUs */
return boot_cpu_data.x86_cache_size;
}
/* platform specific: cacheless copy */
static inline void cacheless_memcpy(void *dst, void *src, size_t n)
{
/*
* Use the only available X64 cacheless copy. Add a __user cast
* to quiet sparse. The src agument is already in the kernel so
* there are no security issues. The extra fault recovery machinery
* is not invoked.
*/
__copy_user_nocache(dst, (void __user *)src, n, 0);
}
static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr) static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
{ {
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ); return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);

View File

@@ -102,22 +102,19 @@ static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
return &tx->txreq; return &tx->txreq;
} }
static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp) static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
{ {
struct sdma_txreq *stx; struct sdma_txreq *stx;
struct hfi1_qp_priv *priv = qp->priv;
stx = iowait_get_txhead(&priv->s_iowait); stx = iowait_get_txhead(w);
if (stx) if (stx)
return container_of(stx, struct verbs_txreq, txreq); return container_of(stx, struct verbs_txreq, txreq);
return NULL; return NULL;
} }
static inline bool verbs_txreq_queued(struct rvt_qp *qp) static inline bool verbs_txreq_queued(struct iowait_work *w)
{ {
struct hfi1_qp_priv *priv = qp->priv; return iowait_packet_queued(w);
return iowait_packet_queued(&priv->s_iowait);
} }
void hfi1_put_txreq(struct verbs_txreq *tx); void hfi1_put_txreq(struct verbs_txreq *tx);

View File

@@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
uctxt->seq_cnt = 1; uctxt->seq_cnt = 1;
uctxt->is_vnic = true; uctxt->is_vnic = true;
hfi1_set_vnic_msix_info(uctxt); msix_request_rcd_irq(uctxt);
hfi1_stats.sps_ctxts++; hfi1_stats.sps_ctxts++;
dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
flush_wc(); flush_wc();
hfi1_reset_vnic_msix_info(uctxt);
/* /*
* Disable receive context and interrupt available, reset all * Disable receive context and interrupt available, reset all
* RcvCtxtCtrl bits to default values. * RcvCtxtCtrl bits to default values.
@@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
/* msix_intr will always be > 0, only clean up if this is true */
if (uctxt->msix_intr)
msix_free_irq(dd, uctxt->msix_intr);
uctxt->event_flags = 0; uctxt->event_flags = 0;
hfi1_clear_tids(uctxt); hfi1_clear_tids(uctxt);
@@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
/* ensure irqs see the change */ /* ensure irqs see the change */
hfi1_vnic_synchronize_irq(dd); msix_vnic_synchronize_irq(dd);
/* remove unread skbs */ /* remove unread skbs */
for (i = 0; i < vinfo->num_rx_q; i++) { for (i = 0; i < vinfo->num_rx_q; i++) {
@@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
rc = hfi1_vnic_txreq_init(dd); rc = hfi1_vnic_txreq_init(dd);
if (rc) if (rc)
goto txreq_fail; goto txreq_fail;
dd->vnic.msix_idx = dd->first_dyn_msix_idx;
} }
for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright(c) 2017 Intel Corporation. * Copyright(c) 2017 - 2018 Intel Corporation.
* *
* This file is provided under a dual BSD/GPLv2 license. When using or * This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license. * redistributing this file, you may do so under either license.
@@ -198,8 +198,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
goto free_desc; goto free_desc;
tx->retry_count = 0; tx->retry_count = 0;
ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq, ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
vnic_sdma->pkts_sent); &tx->txreq, vnic_sdma->pkts_sent);
/* When -ECOMM, sdma callback will be called with ABORT status */ /* When -ECOMM, sdma callback will be called with ABORT status */
if (unlikely(ret && unlikely(ret != -ECOMM))) if (unlikely(ret && unlikely(ret != -ECOMM)))
goto free_desc; goto free_desc;
@@ -230,13 +230,13 @@ tx_err:
* become available. * become available.
*/ */
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
struct iowait *wait, struct iowait_work *wait,
struct sdma_txreq *txreq, struct sdma_txreq *txreq,
uint seq, uint seq,
bool pkts_sent) bool pkts_sent)
{ {
struct hfi1_vnic_sdma *vnic_sdma = struct hfi1_vnic_sdma *vnic_sdma =
container_of(wait, struct hfi1_vnic_sdma, wait); container_of(wait->iow, struct hfi1_vnic_sdma, wait);
struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
@@ -247,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
write_seqlock(&dev->iowait_lock); write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list)) if (list_empty(&vnic_sdma->wait.list))
iowait_queue(pkts_sent, wait, &sde->dmawait); iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
write_sequnlock(&dev->iowait_lock); write_sequnlock(&dev->iowait_lock);
return -EBUSY; return -EBUSY;
} }
@@ -285,7 +285,8 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
for (i = 0; i < vinfo->num_tx_q; i++) { for (i = 0; i < vinfo->num_tx_q; i++) {
struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i]; struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep, iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
hfi1_vnic_sdma_sleep,
hfi1_vnic_sdma_wakeup, NULL); hfi1_vnic_sdma_wakeup, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i]; vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd; vnic_sdma->dd = vinfo->dd;
@@ -295,10 +296,12 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
/* Add a free descriptor watermark for wakeups */ /* Add a free descriptor watermark for wakeups */
if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) { if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
struct iowait_work *work;
INIT_LIST_HEAD(&vnic_sdma->stx.list); INIT_LIST_HEAD(&vnic_sdma->stx.list);
vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK; vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
list_add_tail(&vnic_sdma->stx.list, work = iowait_get_ib_work(&vnic_sdma->wait);
&vnic_sdma->wait.tx_head); list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
} }
} }
} }

View File

@@ -1,6 +1,7 @@
config INFINIBAND_HNS config INFINIBAND_HNS
tristate "HNS RoCE Driver" tristate "HNS RoCE Driver"
depends on NET_VENDOR_HISILICON depends on NET_VENDOR_HISILICON
depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on ARM64 || (COMPILE_TEST && 64BIT) depends on ARM64 || (COMPILE_TEST && 64BIT)
---help--- ---help---
This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine

View File

@@ -49,6 +49,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
struct hns_roce_ah *ah; struct hns_roce_ah *ah;
u16 vlan_tag = 0xffff; u16 vlan_tag = 0xffff;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
bool vlan_en = false;
ah = kzalloc(sizeof(*ah), GFP_ATOMIC); ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah) if (!ah)
@@ -58,8 +59,10 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
gid_attr = ah_attr->grh.sgid_attr; gid_attr = ah_attr->grh.sgid_attr;
if (is_vlan_dev(gid_attr->ndev)) if (is_vlan_dev(gid_attr->ndev)) {
vlan_tag = vlan_dev_vlan_id(gid_attr->ndev); vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
vlan_en = true;
}
if (vlan_tag < 0x1000) if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) & vlan_tag |= (rdma_ah_get_sl(ah_attr) &
@@ -71,6 +74,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
HNS_ROCE_PORT_NUM_SHIFT)); HNS_ROCE_PORT_NUM_SHIFT));
ah->av.gid_index = grh->sgid_index; ah->av.gid_index = grh->sgid_index;
ah->av.vlan = cpu_to_le16(vlan_tag); ah->av.vlan = cpu_to_le16(vlan_tag);
ah->av.vlan_en = vlan_en;
dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index,
ah->av.vlan); ah->av.vlan);

View File

@@ -88,8 +88,11 @@
#define BITMAP_RR 1 #define BITMAP_RR 1
#define MR_TYPE_MR 0x00 #define MR_TYPE_MR 0x00
#define MR_TYPE_FRMR 0x01
#define MR_TYPE_DMA 0x03 #define MR_TYPE_DMA 0x03
#define HNS_ROCE_FRMR_MAX_PA 512
#define PKEY_ID 0xffff #define PKEY_ID 0xffff
#define GUID_LEN 8 #define GUID_LEN 8
#define NODE_DESC_SIZE 64 #define NODE_DESC_SIZE 64
@@ -193,6 +196,9 @@ enum {
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
HNS_ROCE_CAP_FLAG_MW = BIT(7),
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
}; };
enum hns_roce_mtt_type { enum hns_roce_mtt_type {
@@ -219,19 +225,11 @@ struct hns_roce_uar {
unsigned long logic_idx; unsigned long logic_idx;
}; };
struct hns_roce_vma_data {
struct list_head list;
struct vm_area_struct *vma;
struct mutex *vma_list_mutex;
};
struct hns_roce_ucontext { struct hns_roce_ucontext {
struct ib_ucontext ibucontext; struct ib_ucontext ibucontext;
struct hns_roce_uar uar; struct hns_roce_uar uar;
struct list_head page_list; struct list_head page_list;
struct mutex page_mutex; struct mutex page_mutex;
struct list_head vma_list;
struct mutex vma_list_mutex;
}; };
struct hns_roce_pd { struct hns_roce_pd {
@@ -293,6 +291,16 @@ struct hns_roce_mtt {
enum hns_roce_mtt_type mtt_type; enum hns_roce_mtt_type mtt_type;
}; };
struct hns_roce_mw {
struct ib_mw ibmw;
u32 pdn;
u32 rkey;
int enabled; /* MW's active status */
u32 pbl_hop_num;
u32 pbl_ba_pg_sz;
u32 pbl_buf_pg_sz;
};
/* Only support 4K page size for mr register */ /* Only support 4K page size for mr register */
#define MR_SIZE_4K 0 #define MR_SIZE_4K 0
@@ -304,6 +312,7 @@ struct hns_roce_mr {
u32 key; /* Key of MR */ u32 key; /* Key of MR */
u32 pd; /* PD num of MR */ u32 pd; /* PD num of MR */
u32 access;/* Access permission of MR */ u32 access;/* Access permission of MR */
u32 npages;
int enabled; /* MR's active status */ int enabled; /* MR's active status */
int type; /* MR's register type */ int type; /* MR's register type */
u64 *pbl_buf;/* MR's PBL space */ u64 *pbl_buf;/* MR's PBL space */
@@ -457,6 +466,7 @@ struct hns_roce_av {
u8 dgid[HNS_ROCE_GID_SIZE]; u8 dgid[HNS_ROCE_GID_SIZE];
u8 mac[6]; u8 mac[6];
__le16 vlan; __le16 vlan;
bool vlan_en;
}; };
struct hns_roce_ah { struct hns_roce_ah {
@@ -656,6 +666,7 @@ struct hns_roce_eq_table {
}; };
struct hns_roce_caps { struct hns_roce_caps {
u64 fw_ver;
u8 num_ports; u8 num_ports;
int gid_table_len[HNS_ROCE_MAX_PORTS]; int gid_table_len[HNS_ROCE_MAX_PORTS];
int pkey_table_len[HNS_ROCE_MAX_PORTS]; int pkey_table_len[HNS_ROCE_MAX_PORTS];
@@ -665,7 +676,9 @@ struct hns_roce_caps {
u32 max_sq_sg; /* 2 */ u32 max_sq_sg; /* 2 */
u32 max_sq_inline; /* 32 */ u32 max_sq_inline; /* 32 */
u32 max_rq_sg; /* 2 */ u32 max_rq_sg; /* 2 */
u32 max_extend_sg;
int num_qps; /* 256k */ int num_qps; /* 256k */
int reserved_qps;
u32 max_wqes; /* 16k */ u32 max_wqes; /* 16k */
u32 max_sq_desc_sz; /* 64 */ u32 max_sq_desc_sz; /* 64 */
u32 max_rq_desc_sz; /* 64 */ u32 max_rq_desc_sz; /* 64 */
@@ -738,6 +751,7 @@ struct hns_roce_work {
struct hns_roce_dev *hr_dev; struct hns_roce_dev *hr_dev;
struct work_struct work; struct work_struct work;
u32 qpn; u32 qpn;
u32 cqn;
int event_type; int event_type;
int sub_type; int sub_type;
}; };
@@ -764,6 +778,8 @@ struct hns_roce_hw {
struct hns_roce_mr *mr, int flags, u32 pdn, struct hns_roce_mr *mr, int flags, u32 pdn,
int mr_access_flags, u64 iova, u64 size, int mr_access_flags, u64 iova, u64 size,
void *mb_buf); void *mb_buf);
int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev, void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
dma_addr_t dma_handle, int nent, u32 vector); dma_addr_t dma_handle, int nent, u32 vector);
@@ -863,6 +879,11 @@ static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr)
return container_of(ibmr, struct hns_roce_mr, ibmr); return container_of(ibmr, struct hns_roce_mr, ibmr);
} }
static inline struct hns_roce_mw *to_hr_mw(struct ib_mw *ibmw)
{
return container_of(ibmw, struct hns_roce_mw, ibmw);
}
static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp) static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp)
{ {
return container_of(ibqp, struct hns_roce_qp, ibqp); return container_of(ibqp, struct hns_roce_qp, ibqp);
@@ -968,12 +989,20 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length, int hns_roce_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, u64 length,
u64 virt_addr, int mr_access_flags, struct ib_pd *pd, u64 virt_addr, int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata); struct ib_udata *udata);
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg);
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
int hns_roce_dereg_mr(struct ib_mr *ibmr); int hns_roce_dereg_mr(struct ib_mr *ibmr);
int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox, struct hns_roce_cmd_mailbox *mailbox,
unsigned long mpt_index); unsigned long mpt_index);
unsigned long key_to_hw_index(u32 key); unsigned long key_to_hw_index(u32 key);
struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type,
struct ib_udata *udata);
int hns_roce_dealloc_mw(struct ib_mw *ibmw);
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
struct hns_roce_buf *buf); struct hns_roce_buf *buf);
int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,

View File

@@ -731,7 +731,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
cq_init_attr.comp_vector = 0; cq_init_attr.comp_vector = 0;
cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL); cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
if (IS_ERR(cq)) { if (IS_ERR(cq)) {
dev_err(dev, "Create cq for reseved loop qp failed!"); dev_err(dev, "Create cq for reserved loop qp failed!");
return -ENOMEM; return -ENOMEM;
} }
free_mr->mr_free_cq = to_hr_cq(cq); free_mr->mr_free_cq = to_hr_cq(cq);
@@ -744,7 +744,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL); pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
if (IS_ERR(pd)) { if (IS_ERR(pd)) {
dev_err(dev, "Create pd for reseved loop qp failed!"); dev_err(dev, "Create pd for reserved loop qp failed!");
ret = -ENOMEM; ret = -ENOMEM;
goto alloc_pd_failed; goto alloc_pd_failed;
} }

View File

@@ -54,6 +54,59 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length); dseg->len = cpu_to_le32(sg->length);
} }
static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
struct hns_roce_wqe_frmr_seg *fseg,
const struct ib_reg_wr *wr)
{
struct hns_roce_mr *mr = to_hr_mr(wr->mr);
/* use ib_access_flags */
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S,
wr->access & IB_ACCESS_MW_BIND ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S,
wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_RR_S,
wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_RW_S,
wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_FRMR_WQE_BYTE_4_LW_S,
wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
/* Data structure reuse may lead to confusion */
rc_sq_wqe->msg_len = cpu_to_le32(mr->pbl_ba & 0xffffffff);
rc_sq_wqe->inv_key = cpu_to_le32(mr->pbl_ba >> 32);
rc_sq_wqe->byte_16 = cpu_to_le32(wr->mr->length & 0xffffffff);
rc_sq_wqe->byte_20 = cpu_to_le32(wr->mr->length >> 32);
rc_sq_wqe->rkey = cpu_to_le32(wr->key);
rc_sq_wqe->va = cpu_to_le64(wr->mr->iova);
fseg->pbl_size = cpu_to_le32(mr->pbl_size);
roce_set_field(fseg->mode_buf_pg_sz,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M,
V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S,
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
roce_set_bit(fseg->mode_buf_pg_sz,
V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0);
}
static void set_atomic_seg(struct hns_roce_wqe_atomic_seg *aseg,
const struct ib_atomic_wr *wr)
{
if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
aseg->fetchadd_swap_data = cpu_to_le64(wr->swap);
aseg->cmp_data = cpu_to_le64(wr->compare_add);
} else {
aseg->fetchadd_swap_data = cpu_to_le64(wr->compare_add);
aseg->cmp_data = 0;
}
}
static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
unsigned int *sge_ind) unsigned int *sge_ind)
{ {
@@ -121,6 +174,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
} }
if (wr->opcode == IB_WR_RDMA_READ) { if (wr->opcode == IB_WR_RDMA_READ) {
*bad_wr = wr;
dev_err(hr_dev->dev, "Not support inline data!\n"); dev_err(hr_dev->dev, "Not support inline data!\n");
return -EINVAL; return -EINVAL;
} }
@@ -179,6 +233,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe; struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe; struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp); struct hns_roce_qp *qp = to_hr_qp(ibqp);
struct hns_roce_wqe_frmr_seg *fseg;
struct device *dev = hr_dev->dev; struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db; struct hns_roce_v2_db sq_db;
struct ib_qp_attr attr; struct ib_qp_attr attr;
@@ -191,6 +246,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
int attr_mask; int attr_mask;
u32 tmp_len; u32 tmp_len;
int ret = 0; int ret = 0;
u32 hr_op;
u8 *smac; u8 *smac;
int nreq; int nreq;
int i; int i;
@@ -356,6 +412,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
V2_UD_SEND_WQE_BYTE_40_PORTN_S, V2_UD_SEND_WQE_BYTE_40_PORTN_S,
qp->port); qp->port);
roce_set_bit(ud_sq_wqe->byte_40,
V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
ah->av.vlan_en ? 1 : 0);
roce_set_field(ud_sq_wqe->byte_48, roce_set_field(ud_sq_wqe->byte_48,
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_M,
V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S, V2_UD_SEND_WQE_BYTE_48_SGID_INDX_S,
@@ -406,99 +465,100 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
roce_set_bit(rc_sq_wqe->byte_4, roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit);
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
switch (wr->opcode) { switch (wr->opcode) {
case IB_WR_RDMA_READ: case IB_WR_RDMA_READ:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_RDMA_READ;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_RDMA_READ);
rc_sq_wqe->rkey = rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey); cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va = rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr); cpu_to_le64(rdma_wr(wr)->remote_addr);
break; break;
case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_RDMA_WRITE);
rc_sq_wqe->rkey = rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey); cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va = rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr); cpu_to_le64(rdma_wr(wr)->remote_addr);
break; break;
case IB_WR_RDMA_WRITE_WITH_IMM: case IB_WR_RDMA_WRITE_WITH_IMM:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM);
rc_sq_wqe->rkey = rc_sq_wqe->rkey =
cpu_to_le32(rdma_wr(wr)->rkey); cpu_to_le32(rdma_wr(wr)->rkey);
rc_sq_wqe->va = rc_sq_wqe->va =
cpu_to_le64(rdma_wr(wr)->remote_addr); cpu_to_le64(rdma_wr(wr)->remote_addr);
break; break;
case IB_WR_SEND: case IB_WR_SEND:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_SEND;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND);
break; break;
case IB_WR_SEND_WITH_INV: case IB_WR_SEND_WITH_INV:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_INV;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND_WITH_INV);
break; break;
case IB_WR_SEND_WITH_IMM: case IB_WR_SEND_WITH_IMM:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_SEND_WITH_IMM);
break; break;
case IB_WR_LOCAL_INV: case IB_WR_LOCAL_INV:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_LOCAL_INV;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M, roce_set_bit(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
HNS_ROCE_V2_WQE_OP_LOCAL_INV); rc_sq_wqe->inv_key =
cpu_to_le32(wr->ex.invalidate_rkey);
break;
case IB_WR_REG_MR:
hr_op = HNS_ROCE_V2_WQE_OP_FAST_REG_PMR;
fseg = wqe;
set_frmr_seg(rc_sq_wqe, fseg, reg_wr(wr));
break; break;
case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_CMP_AND_SWP:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M, rc_sq_wqe->rkey =
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, cpu_to_le32(atomic_wr(wr)->rkey);
HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP); rc_sq_wqe->va =
cpu_to_le64(atomic_wr(wr)->remote_addr);
break; break;
case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_ATOMIC_FETCH_AND_ADD:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M, rc_sq_wqe->rkey =
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, cpu_to_le32(atomic_wr(wr)->rkey);
HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD); rc_sq_wqe->va =
cpu_to_le64(atomic_wr(wr)->remote_addr);
break; break;
case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
roce_set_field(rc_sq_wqe->byte_4, hr_op =
V2_RC_SEND_WQE_BYTE_4_OPCODE_M, HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP;
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP);
break; break;
case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
roce_set_field(rc_sq_wqe->byte_4, hr_op =
V2_RC_SEND_WQE_BYTE_4_OPCODE_M, HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD;
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD);
break; break;
default: default:
roce_set_field(rc_sq_wqe->byte_4, hr_op = HNS_ROCE_V2_WQE_OP_MASK;
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S,
HNS_ROCE_V2_WQE_OP_MASK);
break; break;
} }
wqe += sizeof(struct hns_roce_v2_rc_send_wqe); roce_set_field(rc_sq_wqe->byte_4,
V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, hr_op);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
struct hns_roce_v2_wqe_data_seg *dseg;
dseg = wqe;
set_data_seg_v2(dseg, wr->sg_list);
wqe += sizeof(struct hns_roce_v2_wqe_data_seg);
set_atomic_seg(wqe, atomic_wr(wr));
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
wr->num_sge);
} else if (wr->opcode != IB_WR_REG_MR) {
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe,
wqe, &sge_ind, bad_wr);
if (ret)
goto out;
}
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
&sge_ind, bad_wr);
if (ret)
goto out;
ind++; ind++;
} else { } else {
dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type); dev_err(dev, "Illegal qp_type(0x%x)\n", ibqp->qp_type);
@@ -935,7 +995,24 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev)
resp = (struct hns_roce_query_version *)desc.data; resp = (struct hns_roce_query_version *)desc.data;
hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version); hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version);
hr_dev->vendor_id = le32_to_cpu(resp->rocee_vendor_id); hr_dev->vendor_id = hr_dev->pci_dev->vendor;
return 0;
}
static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev)
{
struct hns_roce_query_fw_info *resp;
struct hns_roce_cmq_desc desc;
int ret;
hns_roce_cmq_setup_basic_desc(&desc, HNS_QUERY_FW_VER, true);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
return ret;
resp = (struct hns_roce_query_fw_info *)desc.data;
hr_dev->caps.fw_ver = (u64)(le32_to_cpu(resp->fw_ver));
return 0; return 0;
} }
@@ -1157,6 +1234,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
int ret; int ret;
ret = hns_roce_cmq_query_hw_info(hr_dev); ret = hns_roce_cmq_query_hw_info(hr_dev);
if (ret) {
dev_err(hr_dev->dev, "Query hardware version fail, ret = %d.\n",
ret);
return ret;
}
ret = hns_roce_query_fw_ver(hr_dev);
if (ret) { if (ret) {
dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n", dev_err(hr_dev->dev, "Query firmware version fail, ret = %d.\n",
ret); ret);
@@ -1185,14 +1269,16 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret; return ret;
} }
hr_dev->vendor_part_id = 0;
hr_dev->sys_image_guid = 0; hr_dev->vendor_part_id = hr_dev->pci_dev->device;
hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
caps->num_uars = HNS_ROCE_V2_UAR_NUM; caps->num_uars = HNS_ROCE_V2_UAR_NUM;
@@ -1222,6 +1308,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = 1; caps->reserved_mrws = 1;
caps->reserved_uars = 0; caps->reserved_uars = 0;
caps->reserved_cqs = 0; caps->reserved_cqs = 0;
caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
caps->qpc_ba_pg_sz = 0; caps->qpc_ba_pg_sz = 0;
caps->qpc_buf_pg_sz = 0; caps->qpc_buf_pg_sz = 0;
@@ -1255,6 +1342,11 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
HNS_ROCE_CAP_FLAG_RQ_INLINE | HNS_ROCE_CAP_FLAG_RQ_INLINE |
HNS_ROCE_CAP_FLAG_RECORD_DB | HNS_ROCE_CAP_FLAG_RECORD_DB |
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
if (hr_dev->pci_dev->revision == 0x21)
caps->flags |= HNS_ROCE_CAP_FLAG_MW |
HNS_ROCE_CAP_FLAG_FRMR;
caps->pkey_table_len[0] = 1; caps->pkey_table_len[0] = 1;
caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM;
@@ -1262,6 +1354,9 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->local_ca_ack_delay = 0; caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096; caps->max_mtu = IB_MTU_4096;
if (hr_dev->pci_dev->revision == 0x21)
caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
ret = hns_roce_v2_set_bt(hr_dev); ret = hns_roce_v2_set_bt(hr_dev);
if (ret) if (ret)
dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n",
@@ -1690,10 +1785,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S,
(mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); (mr->access & IB_ACCESS_MW_BIND ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, 0); roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S,
mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
(mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
@@ -1817,6 +1913,88 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
return 0; return 0;
} }
static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(mr->pbl_ba >> 3));
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
return 0;
}
static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mw->pdn);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S,
mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ?
0 : mw->pbl_hop_num);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
mpt_entry->lkey = cpu_to_le32(mw->rkey);
return 0;
}
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n)
{ {
return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf,
@@ -2274,6 +2452,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->src_qp = (u8)roce_get_field(cqe->byte_32, wc->src_qp = (u8)roce_get_field(cqe->byte_32,
V2_CQE_BYTE_32_RMT_QPN_M, V2_CQE_BYTE_32_RMT_QPN_M,
V2_CQE_BYTE_32_RMT_QPN_S); V2_CQE_BYTE_32_RMT_QPN_S);
wc->slid = 0;
wc->wc_flags |= (roce_get_bit(cqe->byte_32, wc->wc_flags |= (roce_get_bit(cqe->byte_32,
V2_CQE_BYTE_32_GRH_S) ? V2_CQE_BYTE_32_GRH_S) ?
IB_WC_GRH : 0); IB_WC_GRH : 0);
@@ -2287,7 +2466,14 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->smac[5] = roce_get_field(cqe->byte_28, wc->smac[5] = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_SMAC_5_M, V2_CQE_BYTE_28_SMAC_5_M,
V2_CQE_BYTE_28_SMAC_5_S); V2_CQE_BYTE_28_SMAC_5_S);
wc->vlan_id = 0xffff; if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) {
wc->vlan_id = (u16)roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_VID_M,
V2_CQE_BYTE_28_VID_S);
} else {
wc->vlan_id = 0xffff;
}
wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC);
wc->network_hdr_type = roce_get_field(cqe->byte_28, wc->network_hdr_type = roce_get_field(cqe->byte_28,
V2_CQE_BYTE_28_PORT_TYPE_M, V2_CQE_BYTE_28_PORT_TYPE_M,
@@ -2589,21 +2775,16 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0); roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_TX_ERR_S, 0);
roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0); roce_set_bit(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_RQ_RX_ERR_S, 0);
roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_MAPID_M, roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_TEMPID_M,
V2_QPC_BYTE_60_MAPID_S, 0); V2_QPC_BYTE_60_TEMPID_S, 0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid, roce_set_field(qpc_mask->byte_60_qpst_tempid,
V2_QPC_BYTE_60_INNER_MAP_IND_S, 0); V2_QPC_BYTE_60_SCC_TOKEN_M, V2_QPC_BYTE_60_SCC_TOKEN_S,
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_MAP_IND_S, 0);
0); roce_set_bit(qpc_mask->byte_60_qpst_tempid,
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_RQ_MAP_IND_S, V2_QPC_BYTE_60_SQ_DB_DOING_S, 0);
0); roce_set_bit(qpc_mask->byte_60_qpst_tempid,
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_EXT_MAP_IND_S, V2_QPC_BYTE_60_RQ_DB_DOING_S, 0);
0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_RLS_IND_S,
0);
roce_set_bit(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_SQ_EXT_IND_S,
0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0); roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CNP_TX_FLAG_S, 0);
roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0); roce_set_bit(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_CE_FLAG_S, 0);
@@ -2685,7 +2866,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M, roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_TAIL_MAX_M,
V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0); V2_QPC_BYTE_132_TRRL_TAIL_MAX_S, 0);
roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RSVD_RAQ_MAP_S, 0); roce_set_bit(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S,
0);
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M, roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M,
V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0); V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S, 0);
roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M, roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_RAQ_TRRL_TAIL_M,
@@ -2694,8 +2876,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_144_raq, roce_set_field(qpc_mask->byte_144_raq,
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M, V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M,
V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0); V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S, 0);
roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S,
0);
roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M, roce_set_field(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RAQ_CREDIT_M,
V2_QPC_BYTE_144_RAQ_CREDIT_S, 0); V2_QPC_BYTE_144_RAQ_CREDIT_S, 0);
roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0); roce_set_bit(qpc_mask->byte_144_raq, V2_QPC_BYTE_144_RESP_RTY_FLG_S, 0);
@@ -2721,14 +2901,12 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M, V2_QPC_BYTE_160_SQ_CONSUMER_IDX_M,
V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0); V2_QPC_BYTE_160_SQ_CONSUMER_IDX_S, 0);
roce_set_field(context->byte_168_irrl_idx, roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S, 0);
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, roce_set_bit(qpc_mask->byte_168_irrl_idx,
ilog2((unsigned int)hr_qp->sq.wqe_cnt)); V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S, 0);
roce_set_field(qpc_mask->byte_168_irrl_idx, roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M, V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S, 0);
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx, roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0); V2_QPC_BYTE_168_MSG_RTY_LP_FLG_S, 0);
roce_set_bit(qpc_mask->byte_168_irrl_idx, roce_set_bit(qpc_mask->byte_168_irrl_idx,
@@ -2746,6 +2924,9 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S, roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_MSG_RNR_FLG_S,
0); 0);
roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1);
roce_set_bit(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 0);
roce_set_field(qpc_mask->byte_176_msg_pktn, roce_set_field(qpc_mask->byte_176_msg_pktn,
V2_QPC_BYTE_176_MSG_USE_PKTN_M, V2_QPC_BYTE_176_MSG_USE_PKTN_M,
V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0); V2_QPC_BYTE_176_MSG_USE_PKTN_S, 0);
@@ -2790,6 +2971,13 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
V2_QPC_BYTE_232_IRRL_SGE_IDX_M, V2_QPC_BYTE_232_IRRL_SGE_IDX_M,
V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0); V2_QPC_BYTE_232_IRRL_SGE_IDX_S, 0);
roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_SO_LP_VLD_S,
0);
roce_set_bit(qpc_mask->byte_232_irrl_sge,
V2_QPC_BYTE_232_FENCE_LP_VLD_S, 0);
roce_set_bit(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_IRRL_LP_VLD_S,
0);
qpc_mask->irrl_cur_sge_offset = 0; qpc_mask->irrl_cur_sge_offset = 0;
roce_set_field(qpc_mask->byte_240_irrl_tail, roce_set_field(qpc_mask->byte_240_irrl_tail,
@@ -2955,13 +3143,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(qpc_mask->byte_56_dqpn_err, roce_set_field(qpc_mask->byte_56_dqpn_err,
V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0);
} }
roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S,
ilog2((unsigned int)hr_qp->sq.wqe_cnt));
roce_set_field(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_M,
V2_QPC_BYTE_168_SQ_SHIFT_BAK_S, 0);
} }
static int modify_qp_init_to_rtr(struct ib_qp *ibqp, static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
@@ -3271,13 +3452,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to * we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1. * 0 at the same time, else set them to 0x1.
*/ */
roce_set_field(context->byte_60_qpst_mapid,
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, attr->retry_cnt);
roce_set_field(qpc_mask->byte_60_qpst_mapid,
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M,
V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S, 0);
context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); context->sq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT);
roce_set_field(context->byte_168_irrl_idx, roce_set_field(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M,
@@ -3538,6 +3712,17 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN); memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
} }
if (is_vlan_dev(gid_attr->ndev)) {
roce_set_bit(context->byte_76_srqn_op_en,
V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1);
roce_set_bit(qpc_mask->byte_76_srqn_op_en,
V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0);
roce_set_bit(context->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1);
roce_set_bit(qpc_mask->byte_168_irrl_idx,
V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0);
}
roce_set_field(context->byte_24_mtu_tc, roce_set_field(context->byte_24_mtu_tc,
V2_QPC_BYTE_24_VLAN_ID_M, V2_QPC_BYTE_24_VLAN_ID_M,
V2_QPC_BYTE_24_VLAN_ID_S, vlan); V2_QPC_BYTE_24_VLAN_ID_S, vlan);
@@ -3584,8 +3769,15 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_M,
V2_QPC_BYTE_24_HOP_LIMIT_S, 0); V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, if (hr_dev->pci_dev->revision == 0x21 &&
V2_QPC_BYTE_24_TC_S, grh->traffic_class); gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
roce_set_field(context->byte_24_mtu_tc,
V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
grh->traffic_class >> 2);
else
roce_set_field(context->byte_24_mtu_tc,
V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S,
grh->traffic_class);
roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
V2_QPC_BYTE_24_TC_S, 0); V2_QPC_BYTE_24_TC_S, 0);
roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
@@ -3606,9 +3798,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
/* Every status migrate must change state */ /* Every status migrate must change state */
roce_set_field(context->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, new_state); V2_QPC_BYTE_60_QP_ST_S, new_state);
roce_set_field(qpc_mask->byte_60_qpst_mapid, V2_QPC_BYTE_60_QP_ST_M, roce_set_field(qpc_mask->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, 0); V2_QPC_BYTE_60_QP_ST_S, 0);
/* SW pass context to HW */ /* SW pass context to HW */
@@ -3728,7 +3920,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
goto out; goto out;
} }
state = roce_get_field(context->byte_60_qpst_mapid, state = roce_get_field(context->byte_60_qpst_tempid,
V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S); V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S);
tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state);
if (tmp_qp_state == -1) { if (tmp_qp_state == -1) {
@@ -3995,13 +4187,103 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
{ {
struct hns_roce_work *irq_work = struct hns_roce_work *irq_work =
container_of(work, struct hns_roce_work, work); container_of(work, struct hns_roce_work, work);
struct device *dev = irq_work->hr_dev->dev;
u32 qpn = irq_work->qpn; u32 qpn = irq_work->qpn;
u32 cqn = irq_work->cqn;
switch (irq_work->event_type) { switch (irq_work->event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
dev_info(dev, "Path migrated succeeded.\n");
break;
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
dev_warn(dev, "Path migration failed.\n");
break;
case HNS_ROCE_EVENT_TYPE_COMM_EST:
dev_info(dev, "Communication established.\n");
break;
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
dev_warn(dev, "Send queue drained.\n");
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: dev_err(dev, "Local work queue catastrophic error.\n");
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
switch (irq_work->sub_type) {
case HNS_ROCE_LWQCE_QPC_ERROR:
dev_err(dev, "QP %d, QPC error.\n", qpn);
break;
case HNS_ROCE_LWQCE_MTU_ERROR:
dev_err(dev, "QP %d, MTU error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
dev_err(dev, "QP %d, WQE BA addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
dev_err(dev, "QP %d, WQE addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
dev_err(dev, "QP %d, WQE shift error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n",
irq_work->sub_type);
break;
}
break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
dev_err(dev, "Invalid request local work queue error.\n");
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
dev_err(dev, "Local access violation work queue error.\n");
hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
switch (irq_work->sub_type) {
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
dev_err(dev, "QP %d, R_key violation.\n", qpn);
break;
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
dev_err(dev, "QP %d, length error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_VA_ERROR:
dev_err(dev, "QP %d, VA error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_PD_ERROR:
dev_err(dev, "QP %d, PD error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
dev_err(dev, "QP %d, rw acc error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
dev_err(dev, "QP %d, key state error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
dev_err(dev, "QP %d, MR operation error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n",
irq_work->sub_type);
break;
}
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
dev_warn(dev, "SRQ limit reach.\n");
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
dev_warn(dev, "SRQ last wqe reach.\n");
break;
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
dev_err(dev, "SRQ catas error.\n");
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
dev_err(dev, "CQ 0x%x access err.\n", cqn);
break;
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
dev_warn(dev, "CQ 0x%x overflow\n", cqn);
break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
dev_warn(dev, "DB overflow.\n");
break;
case HNS_ROCE_EVENT_TYPE_FLR:
dev_warn(dev, "Function level reset.\n");
break; break;
default: default:
break; break;
@@ -4011,7 +4293,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
} }
static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
struct hns_roce_eq *eq, u32 qpn) struct hns_roce_eq *eq,
u32 qpn, u32 cqn)
{ {
struct hns_roce_work *irq_work; struct hns_roce_work *irq_work;
@@ -4022,6 +4305,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
irq_work->hr_dev = hr_dev; irq_work->hr_dev = hr_dev;
irq_work->qpn = qpn; irq_work->qpn = qpn;
irq_work->cqn = cqn;
irq_work->event_type = eq->event_type; irq_work->event_type = eq->event_type;
irq_work->sub_type = eq->sub_type; irq_work->sub_type = eq->sub_type;
queue_work(hr_dev->irq_workq, &(irq_work->work)); queue_work(hr_dev->irq_workq, &(irq_work->work));
@@ -4058,124 +4342,6 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
hns_roce_write64_k(doorbell, eq->doorbell); hns_roce_write64_k(doorbell, eq->doorbell);
} }
static void hns_roce_v2_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe,
u32 qpn)
{
struct device *dev = hr_dev->dev;
int sub_type;
dev_warn(dev, "Local work queue catastrophic error.\n");
sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
switch (sub_type) {
case HNS_ROCE_LWQCE_QPC_ERROR:
dev_warn(dev, "QP %d, QPC error.\n", qpn);
break;
case HNS_ROCE_LWQCE_MTU_ERROR:
dev_warn(dev, "QP %d, MTU error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR:
dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_WQE_ADDR_ERROR:
dev_warn(dev, "QP %d, WQE addr error.\n", qpn);
break;
case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR:
dev_warn(dev, "QP %d, WQE shift error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
break;
}
}
static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe, u32 qpn)
{
struct device *dev = hr_dev->dev;
int sub_type;
dev_warn(dev, "Local access violation work queue error.\n");
sub_type = roce_get_field(aeqe->asyn, HNS_ROCE_V2_AEQE_SUB_TYPE_M,
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
switch (sub_type) {
case HNS_ROCE_LAVWQE_R_KEY_VIOLATION:
dev_warn(dev, "QP %d, R_key violation.\n", qpn);
break;
case HNS_ROCE_LAVWQE_LENGTH_ERROR:
dev_warn(dev, "QP %d, length error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_VA_ERROR:
dev_warn(dev, "QP %d, VA error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_PD_ERROR:
dev_err(dev, "QP %d, PD error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_RW_ACC_ERROR:
dev_warn(dev, "QP %d, rw acc error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_KEY_STATE_ERROR:
dev_warn(dev, "QP %d, key state error.\n", qpn);
break;
case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR:
dev_warn(dev, "QP %d, MR operation error.\n", qpn);
break;
default:
dev_err(dev, "Unhandled sub_event type %d.\n", sub_type);
break;
}
}
static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe,
int event_type, u32 qpn)
{
struct device *dev = hr_dev->dev;
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_COMM_EST:
dev_warn(dev, "Communication established.\n");
break;
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
dev_warn(dev, "Send queue drained.\n");
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
hns_roce_v2_wq_catas_err_handle(hr_dev, aeqe, qpn);
break;
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
dev_warn(dev, "Invalid request local work queue error.\n");
break;
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
hns_roce_v2_local_wq_access_err_handle(hr_dev, aeqe, qpn);
break;
default:
break;
}
hns_roce_qp_event(hr_dev, qpn, event_type);
}
static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
struct hns_roce_aeqe *aeqe,
int event_type, u32 cqn)
{
struct device *dev = hr_dev->dev;
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
dev_warn(dev, "CQ 0x%x access err.\n", cqn);
break;
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
dev_warn(dev, "CQ 0x%x overflow\n", cqn);
break;
default:
break;
}
hns_roce_cq_event(hr_dev, cqn, event_type);
}
static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry) static struct hns_roce_aeqe *get_aeqe_v2(struct hns_roce_eq *eq, u32 entry)
{ {
u32 buf_chk_sz; u32 buf_chk_sz;
@@ -4251,31 +4417,23 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
switch (event_type) { switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG: case HNS_ROCE_EVENT_TYPE_PATH_MIG:
dev_warn(dev, "Path migrated succeeded.\n");
break;
case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED:
dev_warn(dev, "Path migration failed.\n");
break;
case HNS_ROCE_EVENT_TYPE_COMM_EST: case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type, hns_roce_qp_event(hr_dev, qpn, event_type);
qpn);
break; break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
dev_warn(dev, "SRQ not support.\n");
break; break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type, hns_roce_cq_event(hr_dev, cqn, event_type);
cqn);
break; break;
case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
dev_warn(dev, "DB overflow.\n");
break; break;
case HNS_ROCE_EVENT_TYPE_MB: case HNS_ROCE_EVENT_TYPE_MB:
hns_roce_cmd_event(hr_dev, hns_roce_cmd_event(hr_dev,
@@ -4284,10 +4442,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
le64_to_cpu(aeqe->event.cmd.out_param)); le64_to_cpu(aeqe->event.cmd.out_param));
break; break;
case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW:
dev_warn(dev, "CEQ overflow.\n");
break; break;
case HNS_ROCE_EVENT_TYPE_FLR: case HNS_ROCE_EVENT_TYPE_FLR:
dev_warn(dev, "Function level reset.\n");
break; break;
default: default:
dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n",
@@ -4304,7 +4460,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
dev_warn(dev, "cons_index overflow, set back to 0.\n"); dev_warn(dev, "cons_index overflow, set back to 0.\n");
eq->cons_index = 0; eq->cons_index = 0;
} }
hns_roce_v2_init_irq_work(hr_dev, eq, qpn); hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn);
} }
set_eq_cons_index_v2(eq); set_eq_cons_index_v2(eq);
@@ -5125,6 +5281,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
create_singlethread_workqueue("hns_roce_irq_workqueue"); create_singlethread_workqueue("hns_roce_irq_workqueue");
if (!hr_dev->irq_workq) { if (!hr_dev->irq_workq) {
dev_err(dev, "Create irq workqueue failed!\n"); dev_err(dev, "Create irq workqueue failed!\n");
ret = -ENOMEM;
goto err_request_irq_fail; goto err_request_irq_fail;
} }
@@ -5195,6 +5352,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.set_mac = hns_roce_v2_set_mac, .set_mac = hns_roce_v2_set_mac,
.write_mtpt = hns_roce_v2_write_mtpt, .write_mtpt = hns_roce_v2_write_mtpt,
.rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt, .rereg_write_mtpt = hns_roce_v2_rereg_write_mtpt,
.frmr_write_mtpt = hns_roce_v2_frmr_write_mtpt,
.mw_write_mtpt = hns_roce_v2_mw_write_mtpt,
.write_cqc = hns_roce_v2_write_cqc, .write_cqc = hns_roce_v2_write_cqc,
.set_hem = hns_roce_v2_set_hem, .set_hem = hns_roce_v2_set_hem,
.clear_hem = hns_roce_v2_clear_hem, .clear_hem = hns_roce_v2_clear_hem,

View File

@@ -50,6 +50,7 @@
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V2_UAR_NUM 256 #define HNS_ROCE_V2_UAR_NUM 256
#define HNS_ROCE_V2_PHY_UAR_NUM 1 #define HNS_ROCE_V2_PHY_UAR_NUM 1
@@ -78,6 +79,7 @@
#define HNS_ROCE_INVALID_LKEY 0x100 #define HNS_ROCE_INVALID_LKEY 0x100
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
#define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1
@@ -201,6 +203,7 @@ enum {
/* CMQ command */ /* CMQ command */
enum hns_roce_opcode_type { enum hns_roce_opcode_type {
HNS_QUERY_FW_VER = 0x0001,
HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
@@ -324,6 +327,7 @@ struct hns_roce_v2_cq_context {
enum{ enum{
V2_MPT_ST_VALID = 0x1, V2_MPT_ST_VALID = 0x1,
V2_MPT_ST_FREE = 0x2,
}; };
enum hns_roce_v2_qp_state { enum hns_roce_v2_qp_state {
@@ -350,7 +354,7 @@ struct hns_roce_v2_qp_context {
__le32 dmac; __le32 dmac;
__le32 byte_52_udpspn_dmac; __le32 byte_52_udpspn_dmac;
__le32 byte_56_dqpn_err; __le32 byte_56_dqpn_err;
__le32 byte_60_qpst_mapid; __le32 byte_60_qpst_tempid;
__le32 qkey_xrcd; __le32 qkey_xrcd;
__le32 byte_68_rq_db; __le32 byte_68_rq_db;
__le32 rq_db_record_addr; __le32 rq_db_record_addr;
@@ -492,26 +496,15 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_56_LP_PKTN_INI_S 28 #define V2_QPC_BYTE_56_LP_PKTN_INI_S 28
#define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28) #define V2_QPC_BYTE_56_LP_PKTN_INI_M GENMASK(31, 28)
#define V2_QPC_BYTE_60_MAPID_S 0 #define V2_QPC_BYTE_60_TEMPID_S 0
#define V2_QPC_BYTE_60_MAPID_M GENMASK(12, 0) #define V2_QPC_BYTE_60_TEMPID_M GENMASK(7, 0)
#define V2_QPC_BYTE_60_INNER_MAP_IND_S 13 #define V2_QPC_BYTE_60_SCC_TOKEN_S 8
#define V2_QPC_BYTE_60_SCC_TOKEN_M GENMASK(26, 8)
#define V2_QPC_BYTE_60_SQ_MAP_IND_S 14 #define V2_QPC_BYTE_60_SQ_DB_DOING_S 27
#define V2_QPC_BYTE_60_RQ_MAP_IND_S 15 #define V2_QPC_BYTE_60_RQ_DB_DOING_S 28
#define V2_QPC_BYTE_60_TEMPID_S 16
#define V2_QPC_BYTE_60_TEMPID_M GENMASK(22, 16)
#define V2_QPC_BYTE_60_EXT_MAP_IND_S 23
#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_S 24
#define V2_QPC_BYTE_60_RTY_NUM_INI_BAK_M GENMASK(26, 24)
#define V2_QPC_BYTE_60_SQ_RLS_IND_S 27
#define V2_QPC_BYTE_60_SQ_EXT_IND_S 28
#define V2_QPC_BYTE_60_QP_ST_S 29 #define V2_QPC_BYTE_60_QP_ST_S 29
#define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29) #define V2_QPC_BYTE_60_QP_ST_M GENMASK(31, 29)
@@ -534,6 +527,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_76_RQIE_S 28 #define V2_QPC_BYTE_76_RQIE_S 28
#define V2_QPC_BYTE_76_RQ_VLAN_EN_S 30
#define V2_QPC_BYTE_80_RX_CQN_S 0 #define V2_QPC_BYTE_80_RX_CQN_S 0
#define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0) #define V2_QPC_BYTE_80_RX_CQN_M GENMASK(23, 0)
@@ -588,7 +582,7 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_140_RR_MAX_S 12 #define V2_QPC_BYTE_140_RR_MAX_S 12
#define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12) #define V2_QPC_BYTE_140_RR_MAX_M GENMASK(14, 12)
#define V2_QPC_BYTE_140_RSVD_RAQ_MAP_S 15 #define V2_QPC_BYTE_140_RQ_RTY_WAIT_DO_S 15
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16 #define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_S 16
#define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16) #define V2_QPC_BYTE_140_RAQ_TRRL_HEAD_M GENMASK(23, 16)
@@ -599,8 +593,6 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0 #define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_S 0
#define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0) #define V2_QPC_BYTE_144_RAQ_RTY_INI_PSN_M GENMASK(23, 0)
#define V2_QPC_BYTE_144_RAQ_RTY_INI_IND_S 24
#define V2_QPC_BYTE_144_RAQ_CREDIT_S 25 #define V2_QPC_BYTE_144_RAQ_CREDIT_S 25
#define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25) #define V2_QPC_BYTE_144_RAQ_CREDIT_M GENMASK(29, 25)
@@ -637,9 +629,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_168_LP_SGEN_INI_S 22 #define V2_QPC_BYTE_168_LP_SGEN_INI_S 22
#define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22) #define V2_QPC_BYTE_168_LP_SGEN_INI_M GENMASK(23, 22)
#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_S 24 #define V2_QPC_BYTE_168_SQ_VLAN_EN_S 24
#define V2_QPC_BYTE_168_SQ_SHIFT_BAK_M GENMASK(27, 24) #define V2_QPC_BYTE_168_POLL_DB_WAIT_DO_S 25
#define V2_QPC_BYTE_168_SCC_TOKEN_FORBID_SQ_DEQ_S 26
#define V2_QPC_BYTE_168_WAIT_ACK_TIMEOUT_S 27
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28 #define V2_QPC_BYTE_168_IRRL_IDX_LSB_S 28
#define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28) #define V2_QPC_BYTE_168_IRRL_IDX_LSB_M GENMASK(31, 28)
@@ -725,6 +718,10 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20 #define V2_QPC_BYTE_232_IRRL_SGE_IDX_S 20
#define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20) #define V2_QPC_BYTE_232_IRRL_SGE_IDX_M GENMASK(28, 20)
#define V2_QPC_BYTE_232_SO_LP_VLD_S 29
#define V2_QPC_BYTE_232_FENCE_LP_VLD_S 30
#define V2_QPC_BYTE_232_IRRL_LP_VLD_S 31
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0 #define V2_QPC_BYTE_240_IRRL_TAIL_REAL_S 0
#define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0) #define V2_QPC_BYTE_240_IRRL_TAIL_REAL_M GENMASK(7, 0)
@@ -743,6 +740,9 @@ struct hns_roce_v2_qp_context {
#define V2_QPC_BYTE_244_RNR_CNT_S 27 #define V2_QPC_BYTE_244_RNR_CNT_S 27
#define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27) #define V2_QPC_BYTE_244_RNR_CNT_M GENMASK(29, 27)
#define V2_QPC_BYTE_244_LCL_OP_FLG_S 30
#define V2_QPC_BYTE_244_IRRL_RD_FLG_S 31
#define V2_QPC_BYTE_248_IRRL_PSN_S 0 #define V2_QPC_BYTE_248_IRRL_PSN_S 0
#define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0) #define V2_QPC_BYTE_248_IRRL_PSN_M GENMASK(23, 0)
@@ -818,6 +818,11 @@ struct hns_roce_v2_cqe {
#define V2_CQE_BYTE_28_PORT_TYPE_S 16 #define V2_CQE_BYTE_28_PORT_TYPE_S 16
#define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16) #define V2_CQE_BYTE_28_PORT_TYPE_M GENMASK(17, 16)
#define V2_CQE_BYTE_28_VID_S 18
#define V2_CQE_BYTE_28_VID_M GENMASK(29, 18)
#define V2_CQE_BYTE_28_VID_VLD_S 30
#define V2_CQE_BYTE_32_RMT_QPN_S 0 #define V2_CQE_BYTE_32_RMT_QPN_S 0
#define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0) #define V2_CQE_BYTE_32_RMT_QPN_M GENMASK(23, 0)
@@ -878,8 +883,19 @@ struct hns_roce_v2_mpt_entry {
#define V2_MPT_BYTE_8_LW_EN_S 7 #define V2_MPT_BYTE_8_LW_EN_S 7
#define V2_MPT_BYTE_8_MW_CNT_S 8
#define V2_MPT_BYTE_8_MW_CNT_M GENMASK(31, 8)
#define V2_MPT_BYTE_12_FRE_S 0
#define V2_MPT_BYTE_12_PA_S 1 #define V2_MPT_BYTE_12_PA_S 1
#define V2_MPT_BYTE_12_MR_MW_S 4
#define V2_MPT_BYTE_12_BPD_S 5
#define V2_MPT_BYTE_12_BQP_S 6
#define V2_MPT_BYTE_12_INNER_PA_VLD_S 7 #define V2_MPT_BYTE_12_INNER_PA_VLD_S 7
#define V2_MPT_BYTE_12_MW_BIND_QPN_S 8 #define V2_MPT_BYTE_12_MW_BIND_QPN_S 8
@@ -988,6 +1004,8 @@ struct hns_roce_v2_ud_send_wqe {
#define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24 #define V2_UD_SEND_WQE_BYTE_40_PORTN_S 24
#define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24) #define V2_UD_SEND_WQE_BYTE_40_PORTN_M GENMASK(26, 24)
#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31 #define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
#define V2_UD_SEND_WQE_DMAC_0_S 0 #define V2_UD_SEND_WQE_DMAC_0_S 0
@@ -1042,6 +1060,16 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19
#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20
#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21
#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22
#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
@@ -1051,6 +1079,16 @@ struct hns_roce_v2_rc_send_wqe {
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
__le32 mode_buf_pg_sz;
};
#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4
#define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M GENMASK(7, 4)
#define V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S 8
struct hns_roce_v2_wqe_data_seg { struct hns_roce_v2_wqe_data_seg {
__le32 len; __le32 len;
__le32 lkey; __le32 lkey;
@@ -1068,6 +1106,11 @@ struct hns_roce_query_version {
__le32 rsv[5]; __le32 rsv[5];
}; };
struct hns_roce_query_fw_info {
__le32 fw_ver;
__le32 rsv[5];
};
struct hns_roce_cfg_llm_a { struct hns_roce_cfg_llm_a {
__le32 base_addr_l; __le32 base_addr_l;
__le32 base_addr_h; __le32 base_addr_h;
@@ -1564,4 +1607,9 @@ struct hns_roce_eq_context {
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
struct hns_roce_wqe_atomic_seg {
__le64 fetchadd_swap_data;
__le64 cmp_data;
};
#endif #endif

View File

@@ -196,6 +196,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
memset(props, 0, sizeof(*props)); memset(props, 0, sizeof(*props));
props->fw_ver = hr_dev->caps.fw_ver;
props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid); props->sys_image_guid = cpu_to_be64(hr_dev->sys_image_guid);
props->max_mr_size = (u64)(~(0ULL)); props->max_mr_size = (u64)(~(0ULL));
props->page_size_cap = hr_dev->caps.page_size_cap; props->page_size_cap = hr_dev->caps.page_size_cap;
@@ -215,7 +216,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_pd = hr_dev->caps.num_pds; props->max_pd = hr_dev->caps.num_pds;
props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma; props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma;
props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma; props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma;
props->atomic_cap = IB_ATOMIC_NONE; props->atomic_cap = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_ATOMIC ?
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1; props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
@@ -344,8 +346,6 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
if (ret) if (ret)
goto error_fail_uar_alloc; goto error_fail_uar_alloc;
INIT_LIST_HEAD(&context->vma_list);
mutex_init(&context->vma_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
INIT_LIST_HEAD(&context->page_list); INIT_LIST_HEAD(&context->page_list);
mutex_init(&context->page_mutex); mutex_init(&context->page_mutex);
@@ -376,76 +376,34 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0; return 0;
} }
static void hns_roce_vma_open(struct vm_area_struct *vma)
{
vma->vm_ops = NULL;
}
static void hns_roce_vma_close(struct vm_area_struct *vma)
{
struct hns_roce_vma_data *vma_data;
vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
vma_data->vma = NULL;
mutex_lock(vma_data->vma_list_mutex);
list_del(&vma_data->list);
mutex_unlock(vma_data->vma_list_mutex);
kfree(vma_data);
}
static const struct vm_operations_struct hns_roce_vm_ops = {
.open = hns_roce_vma_open,
.close = hns_roce_vma_close,
};
static int hns_roce_set_vma_data(struct vm_area_struct *vma,
struct hns_roce_ucontext *context)
{
struct list_head *vma_head = &context->vma_list;
struct hns_roce_vma_data *vma_data;
vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
if (!vma_data)
return -ENOMEM;
vma_data->vma = vma;
vma_data->vma_list_mutex = &context->vma_list_mutex;
vma->vm_private_data = vma_data;
vma->vm_ops = &hns_roce_vm_ops;
mutex_lock(&context->vma_list_mutex);
list_add(&vma_data->list, vma_head);
mutex_unlock(&context->vma_list_mutex);
return 0;
}
static int hns_roce_mmap(struct ib_ucontext *context, static int hns_roce_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
struct hns_roce_dev *hr_dev = to_hr_dev(context->device); struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) switch (vma->vm_pgoff) {
return -EINVAL; case 0:
return rdma_user_mmap_io(context, vma,
to_hr_ucontext(context)->uar.pfn,
PAGE_SIZE,
pgprot_noncached(vma->vm_page_prot));
if (vma->vm_pgoff == 0) { /* vm_pgoff: 1 -- TPTR */
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); case 1:
if (io_remap_pfn_range(vma, vma->vm_start, if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
to_hr_ucontext(context)->uar.pfn, return -EINVAL;
PAGE_SIZE, vma->vm_page_prot)) /*
return -EAGAIN; * FIXME: using io_remap_pfn_range on the dma address returned
} else if (vma->vm_pgoff == 1 && hr_dev->tptr_dma_addr && * by dma_alloc_coherent is totally wrong.
hr_dev->tptr_size) { */
/* vm_pgoff: 1 -- TPTR */ return rdma_user_mmap_io(context, vma,
if (io_remap_pfn_range(vma, vma->vm_start, hr_dev->tptr_dma_addr >> PAGE_SHIFT,
hr_dev->tptr_dma_addr >> PAGE_SHIFT, hr_dev->tptr_size,
hr_dev->tptr_size, vma->vm_page_prot);
vma->vm_page_prot))
return -EAGAIN;
} else
return -EINVAL;
return hns_roce_set_vma_data(vma, to_hr_ucontext(context)); default:
return -EINVAL;
}
} }
static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
@@ -471,21 +429,6 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext) static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
{ {
struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
struct hns_roce_vma_data *vma_data, *n;
struct vm_area_struct *vma;
mutex_lock(&context->vma_list_mutex);
list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
vma = vma_data->vma;
zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
vma->vm_ops = NULL;
list_del(&vma_data->list);
kfree(vma_data);
}
mutex_unlock(&context->vma_list_mutex);
} }
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
@@ -508,7 +451,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
spin_lock_init(&iboe->lock); spin_lock_init(&iboe->lock);
ib_dev = &hr_dev->ib_dev; ib_dev = &hr_dev->ib_dev;
strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
ib_dev->owner = THIS_MODULE; ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA; ib_dev->node_type = RDMA_NODE_IB_CA;
@@ -584,12 +526,27 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
} }
/* MW */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
ib_dev->alloc_mw = hns_roce_alloc_mw;
ib_dev->dealloc_mw = hns_roce_dealloc_mw;
ib_dev->uverbs_cmd_mask |=
(1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
}
/* FRMR */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
ib_dev->alloc_mr = hns_roce_alloc_mr;
ib_dev->map_mr_sg = hns_roce_map_mr_sg;
}
/* OTHERS */ /* OTHERS */
ib_dev->get_port_immutable = hns_roce_port_immutable; ib_dev->get_port_immutable = hns_roce_port_immutable;
ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
ib_dev->driver_id = RDMA_DRIVER_HNS; ib_dev->driver_id = RDMA_DRIVER_HNS;
ret = ib_register_device(ib_dev, NULL); ret = ib_register_device(ib_dev, "hns_%d", NULL);
if (ret) { if (ret) {
dev_err(dev, "ib_register_device failed!\n"); dev_err(dev, "ib_register_device failed!\n");
return ret; return ret;

View File

@@ -329,7 +329,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
u64 bt_idx; u64 bt_idx;
u64 size; u64 size;
mhop_num = hr_dev->caps.pbl_hop_num; mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num);
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8);
@@ -351,7 +351,7 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages,
mr->pbl_size = npages; mr->pbl_size = npages;
mr->pbl_ba = mr->pbl_dma_addr; mr->pbl_ba = mr->pbl_dma_addr;
mr->pbl_hop_num = hr_dev->caps.pbl_hop_num; mr->pbl_hop_num = mhop_num;
mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
return 0; return 0;
@@ -511,7 +511,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->key = hw_index_to_key(index); /* MR key */ mr->key = hw_index_to_key(index); /* MR key */
if (size == ~0ull) { if (size == ~0ull) {
mr->type = MR_TYPE_DMA;
mr->pbl_buf = NULL; mr->pbl_buf = NULL;
mr->pbl_dma_addr = 0; mr->pbl_dma_addr = 0;
/* PBL multi-hop addressing parameters */ /* PBL multi-hop addressing parameters */
@@ -522,7 +521,6 @@ static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova,
mr->pbl_l1_dma_addr = NULL; mr->pbl_l1_dma_addr = NULL;
mr->pbl_l0_dma_addr = 0; mr->pbl_l0_dma_addr = 0;
} else { } else {
mr->type = MR_TYPE_MR;
if (!hr_dev->caps.pbl_hop_num) { if (!hr_dev->caps.pbl_hop_num) {
mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, mr->pbl_buf = dma_alloc_coherent(dev, npages * 8,
&(mr->pbl_dma_addr), &(mr->pbl_dma_addr),
@@ -548,9 +546,9 @@ static void hns_roce_mhop_free(struct hns_roce_dev *hr_dev,
u32 mhop_num; u32 mhop_num;
u64 bt_idx; u64 bt_idx;
npages = ib_umem_page_count(mr->umem); npages = mr->pbl_size;
pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
mhop_num = hr_dev->caps.pbl_hop_num; mhop_num = (mr->type == MR_TYPE_FRMR) ? 1 : hr_dev->caps.pbl_hop_num;
if (mhop_num == HNS_ROCE_HOP_NUM_0) if (mhop_num == HNS_ROCE_HOP_NUM_0)
return; return;
@@ -636,7 +634,8 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
} }
if (mr->size != ~0ULL) { if (mr->size != ~0ULL) {
npages = ib_umem_page_count(mr->umem); if (mr->type == MR_TYPE_MR)
npages = ib_umem_page_count(mr->umem);
if (!hr_dev->caps.pbl_hop_num) if (!hr_dev->caps.pbl_hop_num)
dma_free_coherent(dev, (unsigned int)(npages * 8), dma_free_coherent(dev, (unsigned int)(npages * 8),
@@ -674,7 +673,10 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
goto err_table; goto err_table;
} }
ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx); if (mr->type != MR_TYPE_FRMR)
ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx);
else
ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
if (ret) { if (ret) {
dev_err(dev, "Write mtpt fail!\n"); dev_err(dev, "Write mtpt fail!\n");
goto err_page; goto err_page;
@@ -855,6 +857,8 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
if (mr == NULL) if (mr == NULL)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_DMA;
/* Allocate memory region key */ /* Allocate memory region key */
ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0,
~0ULL, acc, 0, mr); ~0ULL, acc, 0, mr);
@@ -1031,6 +1035,8 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
} }
} }
mr->type = MR_TYPE_MR;
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length,
access_flags, n, mr); access_flags, n, mr);
if (ret) if (ret)
@@ -1201,3 +1207,193 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr)
return ret; return ret;
} }
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
u32 max_num_sg)
{
struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
struct device *dev = hr_dev->dev;
struct hns_roce_mr *mr;
u64 length;
u32 page_size;
int ret;
page_size = 1 << (hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT);
length = max_num_sg * page_size;
if (mr_type != IB_MR_TYPE_MEM_REG)
return ERR_PTR(-EINVAL);
if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
dev_err(dev, "max_num_sg larger than %d\n",
HNS_ROCE_FRMR_MAX_PA);
return ERR_PTR(-EINVAL);
}
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_FRMR;
/* Allocate memory region key */
ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, 0, length,
0, max_num_sg, mr);
if (ret)
goto err_free;
ret = hns_roce_mr_enable(hr_dev, mr);
if (ret)
goto err_mr;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
mr->umem = NULL;
return &mr->ibmr;
err_mr:
hns_roce_mr_free(to_hr_dev(pd->device), mr);
err_free:
kfree(mr);
return ERR_PTR(ret);
}
static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
{
struct hns_roce_mr *mr = to_hr_mr(ibmr);
mr->pbl_buf[mr->npages++] = cpu_to_le64(addr);
return 0;
}
int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset)
{
struct hns_roce_mr *mr = to_hr_mr(ibmr);
mr->npages = 0;
return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
}
static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
struct hns_roce_mw *mw)
{
struct device *dev = hr_dev->dev;
int ret;
if (mw->enabled) {
ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mw->rkey)
& (hr_dev->caps.num_mtpts - 1));
if (ret)
dev_warn(dev, "MW HW2SW_MPT failed (%d)\n", ret);
hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
key_to_hw_index(mw->rkey));
}
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
key_to_hw_index(mw->rkey), BITMAP_NO_RR);
}
static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
struct hns_roce_mw *mw)
{
struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
struct hns_roce_cmd_mailbox *mailbox;
struct device *dev = hr_dev->dev;
unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
int ret;
/* prepare HEM entry memory */
ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
if (ret)
return ret;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox)) {
ret = PTR_ERR(mailbox);
goto err_table;
}
ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
if (ret) {
dev_err(dev, "MW write mtpt fail!\n");
goto err_page;
}
ret = hns_roce_sw2hw_mpt(hr_dev, mailbox,
mtpt_idx & (hr_dev->caps.num_mtpts - 1));
if (ret) {
dev_err(dev, "MW sw2hw_mpt failed (%d)\n", ret);
goto err_page;
}
mw->enabled = 1;
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return 0;
err_page:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
err_table:
hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
return ret;
}
struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device);
struct hns_roce_mw *mw;
unsigned long index = 0;
int ret;
mw = kmalloc(sizeof(*mw), GFP_KERNEL);
if (!mw)
return ERR_PTR(-ENOMEM);
/* Allocate a key for mw from bitmap */
ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index);
if (ret)
goto err_bitmap;
mw->rkey = hw_index_to_key(index);
mw->ibmw.rkey = mw->rkey;
mw->ibmw.type = type;
mw->pdn = to_hr_pd(ib_pd)->pdn;
mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
ret = hns_roce_mw_enable(hr_dev, mw);
if (ret)
goto err_mw;
return &mw->ibmw;
err_mw:
hns_roce_mw_free(hr_dev, mw);
err_bitmap:
kfree(mw);
return ERR_PTR(ret);
}
int hns_roce_dealloc_mw(struct ib_mw *ibmw)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
struct hns_roce_mw *mw = to_hr_mw(ibmw);
hns_roce_mw_free(hr_dev, mw);
kfree(mw);
return 0;
}

View File

@@ -31,6 +31,7 @@
* SOFTWARE. * SOFTWARE.
*/ */
#include <linux/pci.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <rdma/ib_addr.h> #include <rdma/ib_addr.h>
#include <rdma/ib_umem.h> #include <rdma/ib_umem.h>
@@ -343,6 +344,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
{ {
u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz); u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
u8 max_sq_stride = ilog2(roundup_sq_stride); u8 max_sq_stride = ilog2(roundup_sq_stride);
u32 ex_sge_num;
u32 page_size; u32 page_size;
u32 max_cnt; u32 max_cnt;
@@ -372,7 +374,18 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
if (hr_qp->sq.max_gs > 2) if (hr_qp->sq.max_gs > 2)
hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt *
(hr_qp->sq.max_gs - 2)); (hr_qp->sq.max_gs - 2));
if ((hr_qp->sq.max_gs > 2) && (hr_dev->pci_dev->revision == 0x20)) {
if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
dev_err(hr_dev->dev,
"The extended sge cnt error! sge_cnt=%d\n",
hr_qp->sge.sge_cnt);
return -EINVAL;
}
}
hr_qp->sge.sge_shift = 4; hr_qp->sge.sge_shift = 4;
ex_sge_num = hr_qp->sge.sge_cnt;
/* Get buf size, SQ and RQ are aligned to page_szie */ /* Get buf size, SQ and RQ are aligned to page_szie */
if (hr_dev->caps.max_sq_sg <= 2) { if (hr_dev->caps.max_sq_sg <= 2) {
@@ -386,6 +399,8 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), PAGE_SIZE); hr_qp->sq.wqe_shift), PAGE_SIZE);
} else { } else {
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sge.sge_cnt =
max(page_size / (1 << hr_qp->sge.sge_shift), ex_sge_num);
hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
hr_qp->rq.wqe_shift), page_size) + hr_qp->rq.wqe_shift), page_size) +
HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt << HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
@@ -394,7 +409,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift), page_size); hr_qp->sq.wqe_shift), page_size);
hr_qp->sq.offset = 0; hr_qp->sq.offset = 0;
if (hr_qp->sge.sge_cnt) { if (ex_sge_num) {
hr_qp->sge.offset = HNS_ROCE_ALOGN_UP( hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
(hr_qp->sq.wqe_cnt << (hr_qp->sq.wqe_cnt <<
hr_qp->sq.wqe_shift), hr_qp->sq.wqe_shift),
@@ -465,6 +480,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sge.sge_shift = 4; hr_qp->sge.sge_shift = 4;
} }
if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) {
if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) {
dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n",
hr_qp->sge.sge_cnt);
return -EINVAL;
}
}
/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
hr_qp->sq.offset = 0; hr_qp->sq.offset = 0;
@@ -472,6 +495,8 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
page_size); page_size);
if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) { if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
hr_qp->sge.sge_cnt = max(page_size/(1 << hr_qp->sge.sge_shift),
(u32)hr_qp->sge.sge_cnt);
hr_qp->sge.offset = size; hr_qp->sge.offset = size;
size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt << size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
hr_qp->sge.sge_shift, page_size); hr_qp->sge.sge_shift, page_size);
@@ -952,8 +977,8 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
} }
} }
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
IB_LINK_LAYER_ETHERNET)) { attr_mask)) {
dev_err(dev, "ib_modify_qp_is_ok failed\n"); dev_err(dev, "ib_modify_qp_is_ok failed\n");
goto out; goto out;
} }
@@ -1106,14 +1131,20 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
{ {
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
int reserved_from_top = 0; int reserved_from_top = 0;
int reserved_from_bot;
int ret; int ret;
spin_lock_init(&qp_table->lock); spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC); INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC);
/* A port include two SQP, six port total 12 */ /* In hw v1, a port include two SQP, six ports total 12 */
if (hr_dev->caps.max_sq_sg <= 2)
reserved_from_bot = SQP_NUM;
else
reserved_from_bot = hr_dev->caps.reserved_qps;
ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
hr_dev->caps.num_qps - 1, SQP_NUM, hr_dev->caps.num_qps - 1, reserved_from_bot,
reserved_from_top); reserved_from_top);
if (ret) { if (ret) {
dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n", dev_err(hr_dev->dev, "qp bitmap init failed!error=%d\n",

View File

@@ -1689,7 +1689,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
unsigned long flags; unsigned long flags;
rtnl_lock(); rtnl_lock();
for_each_netdev_rcu(&init_net, ip_dev) { for_each_netdev(&init_net, ip_dev) {
if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) && if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) &&
(rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) || (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) ||
(ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) { (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) {

View File

@@ -2135,10 +2135,10 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
} }
/** /**
* i40iw_show_rev * hw_rev_show
*/ */
static ssize_t i40iw_show_rev(struct device *dev, static ssize_t hw_rev_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct i40iw_ib_device *iwibdev = container_of(dev, struct i40iw_ib_device *iwibdev = container_of(dev,
struct i40iw_ib_device, struct i40iw_ib_device,
@@ -2147,34 +2147,37 @@ static ssize_t i40iw_show_rev(struct device *dev,
return sprintf(buf, "%x\n", hw_rev); return sprintf(buf, "%x\n", hw_rev);
} }
static DEVICE_ATTR_RO(hw_rev);
/** /**
* i40iw_show_hca * hca_type_show
*/ */
static ssize_t i40iw_show_hca(struct device *dev, static ssize_t hca_type_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
return sprintf(buf, "I40IW\n"); return sprintf(buf, "I40IW\n");
} }
static DEVICE_ATTR_RO(hca_type);
/** /**
* i40iw_show_board * board_id_show
*/ */
static ssize_t i40iw_show_board(struct device *dev, static ssize_t board_id_show(struct device *dev,
struct device_attribute *attr, struct device_attribute *attr, char *buf)
char *buf)
{ {
return sprintf(buf, "%.*s\n", 32, "I40IW Board ID"); return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
} }
static DEVICE_ATTR_RO(board_id);
static DEVICE_ATTR(hw_rev, S_IRUGO, i40iw_show_rev, NULL); static struct attribute *i40iw_dev_attributes[] = {
static DEVICE_ATTR(hca_type, S_IRUGO, i40iw_show_hca, NULL); &dev_attr_hw_rev.attr,
static DEVICE_ATTR(board_id, S_IRUGO, i40iw_show_board, NULL); &dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
NULL
};
static struct device_attribute *i40iw_dev_attributes[] = { static const struct attribute_group i40iw_attr_group = {
&dev_attr_hw_rev, .attrs = i40iw_dev_attributes,
&dev_attr_hca_type,
&dev_attr_board_id
}; };
/** /**
@@ -2752,7 +2755,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
i40iw_pr_err("iwdev == NULL\n"); i40iw_pr_err("iwdev == NULL\n");
return NULL; return NULL;
} }
strlcpy(iwibdev->ibdev.name, "i40iw%d", IB_DEVICE_NAME_MAX);
iwibdev->ibdev.owner = THIS_MODULE; iwibdev->ibdev.owner = THIS_MODULE;
iwdev->iwibdev = iwibdev; iwdev->iwibdev = iwibdev;
iwibdev->iwdev = iwdev; iwibdev->iwdev = iwdev;
@@ -2850,20 +2852,6 @@ void i40iw_port_ibevent(struct i40iw_device *iwdev)
ib_dispatch_event(&event); ib_dispatch_event(&event);
} }
/**
* i40iw_unregister_rdma_device - unregister of iwarp from IB
* @iwibdev: rdma device ptr
*/
static void i40iw_unregister_rdma_device(struct i40iw_ib_device *iwibdev)
{
int i;
for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i)
device_remove_file(&iwibdev->ibdev.dev,
i40iw_dev_attributes[i]);
ib_unregister_device(&iwibdev->ibdev);
}
/** /**
* i40iw_destroy_rdma_device - destroy rdma device and free resources * i40iw_destroy_rdma_device - destroy rdma device and free resources
* @iwibdev: IB device ptr * @iwibdev: IB device ptr
@@ -2873,7 +2861,7 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
if (!iwibdev) if (!iwibdev)
return; return;
i40iw_unregister_rdma_device(iwibdev); ib_unregister_device(&iwibdev->ibdev);
kfree(iwibdev->ibdev.iwcm); kfree(iwibdev->ibdev.iwcm);
iwibdev->ibdev.iwcm = NULL; iwibdev->ibdev.iwcm = NULL;
wait_event_timeout(iwibdev->iwdev->close_wq, wait_event_timeout(iwibdev->iwdev->close_wq,
@@ -2888,32 +2876,19 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
*/ */
int i40iw_register_rdma_device(struct i40iw_device *iwdev) int i40iw_register_rdma_device(struct i40iw_device *iwdev)
{ {
int i, ret; int ret;
struct i40iw_ib_device *iwibdev; struct i40iw_ib_device *iwibdev;
iwdev->iwibdev = i40iw_init_rdma_device(iwdev); iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
if (!iwdev->iwibdev) if (!iwdev->iwibdev)
return -ENOMEM; return -ENOMEM;
iwibdev = iwdev->iwibdev; iwibdev = iwdev->iwibdev;
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW; iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
ret = ib_register_device(&iwibdev->ibdev, NULL); ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
if (ret) if (ret)
goto error; goto error;
for (i = 0; i < ARRAY_SIZE(i40iw_dev_attributes); ++i) {
ret =
device_create_file(&iwibdev->ibdev.dev,
i40iw_dev_attributes[i]);
if (ret) {
while (i > 0) {
i--;
device_remove_file(&iwibdev->ibdev.dev, i40iw_dev_attributes[i]);
}
ib_unregister_device(&iwibdev->ibdev);
goto error;
}
}
return 0; return 0;
error: error:
kfree(iwdev->iwibdev->ibdev.iwcm); kfree(iwdev->iwibdev->ibdev.iwcm);

View File

@@ -1,6 +1,7 @@
config MLX4_INFINIBAND config MLX4_INFINIBAND
tristate "Mellanox ConnectX HCA support" tristate "Mellanox ConnectX HCA support"
depends on NETDEVICES && ETHERNET && PCI && INET depends on NETDEVICES && ETHERNET && PCI && INET
depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
depends on MAY_USE_DEVLINK depends on MAY_USE_DEVLINK
select NET_VENDOR_MELLANOX select NET_VENDOR_MELLANOX
select MLX4_CORE select MLX4_CORE

View File

@@ -807,15 +807,17 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
int err; int err;
struct ib_port_attr pattr; struct ib_port_attr pattr;
if (in_wc && in_wc->qp->qp_num) { if (in_wc && in_wc->qp) {
pr_debug("received MAD: slid:%d sqpn:%d " pr_debug("received MAD: port:%d slid:%d sqpn:%d "
"dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n", "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n",
in_wc->slid, in_wc->src_qp, port_num,
in_wc->dlid_path_bits, in_wc->slid, in_wc->src_qp,
in_wc->qp->qp_num, in_wc->dlid_path_bits,
in_wc->wc_flags, in_wc->qp->qp_num,
in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, in_wc->wc_flags,
be16_to_cpu(in_mad->mad_hdr.attr_id)); be64_to_cpu(in_mad->mad_hdr.tid),
in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
be16_to_cpu(in_mad->mad_hdr.attr_id));
if (in_wc->wc_flags & IB_WC_GRH) { if (in_wc->wc_flags & IB_WC_GRH) {
pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
be64_to_cpu(in_grh->sgid.global.subnet_prefix), be64_to_cpu(in_grh->sgid.global.subnet_prefix),

View File

@@ -1140,144 +1140,50 @@ static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0; return 0;
} }
static void mlx4_ib_vma_open(struct vm_area_struct *area)
{
/* vma_open is called when a new VMA is created on top of our VMA.
* This is done through either mremap flow or split_vma (usually due
* to mlock, madvise, munmap, etc.). We do not support a clone of the
* vma, as this VMA is strongly hardware related. Therefore we set the
* vm_ops of the newly created/cloned VMA to NULL, to prevent it from
* calling us again and trying to do incorrect actions. We assume that
* the original vma size is exactly a single page that there will be no
* "splitting" operations on.
*/
area->vm_ops = NULL;
}
static void mlx4_ib_vma_close(struct vm_area_struct *area)
{
struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
/* It's guaranteed that all VMAs opened on a FD are closed before the
* file itself is closed, therefore no sync is needed with the regular
* closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
* with accessing the vma as part of mlx4_ib_disassociate_ucontext.
* The close operation is usually called under mm->mmap_sem except when
* process is exiting. The exiting case is handled explicitly as part
* of mlx4_ib_disassociate_ucontext.
*/
mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
area->vm_private_data;
/* set the vma context pointer to null in the mlx4_ib driver's private
* data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
*/
mlx4_ib_vma_priv_data->vma = NULL;
}
static const struct vm_operations_struct mlx4_ib_vm_ops = {
.open = mlx4_ib_vma_open,
.close = mlx4_ib_vma_close
};
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{ {
int i;
struct vm_area_struct *vma;
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
/* need to protect from a race on closing the vma as part of
* mlx4_ib_vma_close().
*/
for (i = 0; i < HW_BAR_COUNT; i++) {
vma = context->hw_bar_info[i].vma;
if (!vma)
continue;
zap_vma_ptes(context->hw_bar_info[i].vma,
context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
context->hw_bar_info[i].vma->vm_flags &=
~(VM_SHARED | VM_MAYSHARE);
/* context going to be destroyed, should not access ops any more */
context->hw_bar_info[i].vma->vm_ops = NULL;
}
}
static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
struct mlx4_ib_vma_private_data *vma_private_data)
{
vma_private_data->vma = vma;
vma->vm_private_data = vma_private_data;
vma->vm_ops = &mlx4_ib_vm_ops;
} }
static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
{ {
struct mlx4_ib_dev *dev = to_mdev(context->device); struct mlx4_ib_dev *dev = to_mdev(context->device);
struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
if (vma->vm_end - vma->vm_start != PAGE_SIZE) switch (vma->vm_pgoff) {
return -EINVAL; case 0:
return rdma_user_mmap_io(context, vma,
to_mucontext(context)->uar.pfn,
PAGE_SIZE,
pgprot_noncached(vma->vm_page_prot));
if (vma->vm_pgoff == 0) { case 1:
/* We prevent double mmaping on same context */ if (dev->dev->caps.bf_reg_size == 0)
if (mucontext->hw_bar_info[HW_BAR_DB].vma)
return -EINVAL; return -EINVAL;
return rdma_user_mmap_io(
context, vma,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
PAGE_SIZE, pgprot_writecombine(vma->vm_page_prot));
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); case 3: {
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
/* We prevent double mmaping on same context */
if (mucontext->hw_bar_info[HW_BAR_BF].vma)
return -EINVAL;
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
if (io_remap_pfn_range(vma, vma->vm_start,
to_mucontext(context)->uar.pfn +
dev->dev->caps.num_uars,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
} else if (vma->vm_pgoff == 3) {
struct mlx4_clock_params params; struct mlx4_clock_params params;
int ret; int ret;
/* We prevent double mmaping on same context */
if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
return -EINVAL;
ret = mlx4_get_internal_clock_params(dev->dev, &params); ret = mlx4_get_internal_clock_params(dev->dev, &params);
if (ret) if (ret)
return ret; return ret;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return rdma_user_mmap_io(
if (io_remap_pfn_range(vma, vma->vm_start, context, vma,
(pci_resource_start(dev->dev->persist->pdev, (pci_resource_start(dev->dev->persist->pdev,
params.bar) + params.bar) +
params.offset) params.offset) >>
>> PAGE_SHIFT, PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot)) PAGE_SIZE, pgprot_noncached(vma->vm_page_prot));
return -EAGAIN;
mlx4_ib_set_vma_data(vma,
&mucontext->hw_bar_info[HW_BAR_CLOCK]);
} else {
return -EINVAL;
} }
return 0; default:
return -EINVAL;
}
} }
static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
@@ -2133,39 +2039,43 @@ out:
return err; return err;
} }
static ssize_t show_hca(struct device *device, struct device_attribute *attr, static ssize_t hca_type_show(struct device *device,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct mlx4_ib_dev *dev = struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev); container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
} }
static DEVICE_ATTR_RO(hca_type);
static ssize_t show_rev(struct device *device, struct device_attribute *attr, static ssize_t hw_rev_show(struct device *device,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct mlx4_ib_dev *dev = struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev); container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%x\n", dev->dev->rev_id); return sprintf(buf, "%x\n", dev->dev->rev_id);
} }
static DEVICE_ATTR_RO(hw_rev);
static ssize_t show_board(struct device *device, struct device_attribute *attr, static ssize_t board_id_show(struct device *device,
char *buf) struct device_attribute *attr, char *buf)
{ {
struct mlx4_ib_dev *dev = struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev); container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
dev->dev->board_id); dev->dev->board_id);
} }
static DEVICE_ATTR_RO(board_id);
static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static struct attribute *mlx4_class_attributes[] = {
static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); &dev_attr_hw_rev.attr,
static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); &dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
NULL
};
static struct device_attribute *mlx4_class_attributes[] = { static const struct attribute_group mlx4_attr_group = {
&dev_attr_hw_rev, .attrs = mlx4_class_attributes,
&dev_attr_hca_type,
&dev_attr_board_id
}; };
struct diag_counter { struct diag_counter {
@@ -2636,7 +2546,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->dev = dev; ibdev->dev = dev;
ibdev->bond_next_port = 0; ibdev->bond_next_port = 0;
strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
ibdev->ib_dev.owner = THIS_MODULE; ibdev->ib_dev.owner = THIS_MODULE;
ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey;
@@ -2898,8 +2807,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_alloc_diag_counters(ibdev)) if (mlx4_ib_alloc_diag_counters(ibdev))
goto err_steer_free_bitmap; goto err_steer_free_bitmap;
rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4; ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
if (ib_register_device(&ibdev->ib_dev, NULL)) if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
goto err_diag_counters; goto err_diag_counters;
if (mlx4_ib_mad_init(ibdev)) if (mlx4_ib_mad_init(ibdev))
@@ -2922,12 +2832,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif; goto err_notif;
} }
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
if (device_create_file(&ibdev->ib_dev.dev,
mlx4_class_attributes[j]))
goto err_notif;
}
ibdev->ib_active = true; ibdev->ib_active = true;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i), devlink_port_type_ib_set(mlx4_get_devlink_port(dev, i),

View File

@@ -673,7 +673,7 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work)
if (!list_empty(&group->pending_list)) if (!list_empty(&group->pending_list))
req = list_first_entry(&group->pending_list, req = list_first_entry(&group->pending_list,
struct mcast_req, group_list); struct mcast_req, group_list);
if ((method == IB_MGMT_METHOD_GET_RESP)) { if (method == IB_MGMT_METHOD_GET_RESP) {
if (req) { if (req) {
send_reply_to_slave(req->func, group, &req->sa_mad, status); send_reply_to_slave(req->func, group, &req->sa_mad, status);
--group->func[req->func].num_pend_reqs; --group->func[req->func].num_pend_reqs;

View File

@@ -80,16 +80,11 @@ enum hw_bar_type {
HW_BAR_COUNT HW_BAR_COUNT
}; };
struct mlx4_ib_vma_private_data {
struct vm_area_struct *vma;
};
struct mlx4_ib_ucontext { struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext; struct ib_ucontext ibucontext;
struct mlx4_uar uar; struct mlx4_uar uar;
struct list_head db_page_list; struct list_head db_page_list;
struct mutex db_page_mutex; struct mutex db_page_mutex;
struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
struct list_head wqn_ranges_list; struct list_head wqn_ranges_list;
struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */ struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
}; };

View File

@@ -2629,7 +2629,6 @@ enum {
static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata) int attr_mask, struct ib_udata *udata)
{ {
enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
struct mlx4_ib_dev *dev = to_mdev(ibqp->device); struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp); struct mlx4_ib_qp *qp = to_mqp(ibqp);
enum ib_qp_state cur_state, new_state; enum ib_qp_state cur_state, new_state;
@@ -2639,13 +2638,8 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
if (cur_state != new_state || cur_state != IB_QPS_RESET) {
int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
ll = rdma_port_get_link_layer(&dev->ib_dev, port);
}
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask, ll)) { attr_mask)) {
pr_debug("qpn 0x%x: invalid attribute mask specified " pr_debug("qpn 0x%x: invalid attribute mask specified "
"for transition %d to %d. qp_type %d," "for transition %d to %d. qp_type %d,"
" attr_mask 0x%x\n", " attr_mask 0x%x\n",

Some files were not shown because too many files have changed in this diff Show More