From fdff704dc60418e9a1bac78ae09c857d05c65aa3 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:37 +0200 Subject: [PATCH 01/13] net/smc: rework pnet table to support SMC-R failover The pnet table stored pnet ids in the smc device structures. When a device is going down its smc device structure is freed, and when the device is brought online again it no longer has a pnet id set. Rework the pnet table implementation to store the device name with their assigned pnet id and apply the pnet id to devices when they are registered. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 5 +- net/smc/smc_ism.c | 3 +- net/smc/smc_pnet.c | 545 ++++++++++++++++++++++++++------------------- net/smc/smc_pnet.h | 2 + 4 files changed, 322 insertions(+), 233 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 04b6fefb8bce..440f9e319a38 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -579,8 +579,9 @@ static void smc_ib_add_dev(struct ib_device *ibdev) i++) { set_bit(i, &smcibdev->port_event_mask); /* determine pnetids of the port */ - smc_pnetid_by_dev_port(ibdev->dev.parent, i, - smcibdev->pnetid[i]); + if (smc_pnetid_by_dev_port(ibdev->dev.parent, i, + smcibdev->pnetid[i])) + smc_pnetid_by_table_ib(smcibdev, i + 1); } schedule_work(&smcibdev->port_event_work); } diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 5c4727d5066e..32be2da2cb85 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -296,7 +296,8 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, device_initialize(&smcd->dev); dev_set_name(&smcd->dev, name); smcd->ops = ops; - smc_pnetid_by_dev_port(parent, 0, smcd->pnetid); + if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid)) + smc_pnetid_by_table_smcd(smcd); spin_lock_init(&smcd->lock); spin_lock_init(&smcd->lgr_lock); diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 2a5ed47c3e08..bd01c71b827a 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -50,29 +50,26 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { static struct genl_family smc_pnet_nl_family; -/** - * struct smc_user_pnetentry - pnet identifier name entry for/from user - * @list: List node. - * @pnet_name: Pnet identifier name - * @ndev: pointer to network device. - * @smcibdev: Pointer to IB device. - * @ib_port: Port of IB device. - * @smcd_dev: Pointer to smcd device. - */ -struct smc_user_pnetentry { - struct list_head list; - char pnet_name[SMC_MAX_PNETID_LEN + 1]; - struct net_device *ndev; - struct smc_ib_device *smcibdev; - u8 ib_port; - struct smcd_dev *smcd_dev; +enum smc_pnet_nametype { + SMC_PNET_ETH = 1, + SMC_PNET_IB = 2, }; /* pnet entry stored in pnet table */ struct smc_pnetentry { struct list_head list; char pnet_name[SMC_MAX_PNETID_LEN + 1]; - struct net_device *ndev; + enum smc_pnet_nametype type; + union { + struct { + char eth_name[IFNAMSIZ + 1]; + struct net_device *ndev; + }; + struct { + char ib_name[IB_DEVICE_NAME_MAX + 1]; + u8 ib_port; + }; + }; }; /* Check if two given pnetids match */ @@ -106,14 +103,15 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - /* remove netdevices */ + /* remove table entry */ write_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (!pnet_name || smc_pnet_match(pnetelem->pnet_name, pnet_name)) { list_del(&pnetelem->list); - dev_put(pnetelem->ndev); + if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) + dev_put(pnetelem->ndev); kfree(pnetelem); rc = 0; } @@ -155,7 +153,35 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) return rc; } -/* Remove a pnet entry mentioning a given network device from the pnet table. +/* Add the reference to a given network device to the pnet table. + */ +static int smc_pnet_add_by_ndev(struct net_device *ndev) +{ + struct smc_pnetentry *pnetelem, *tmp_pe; + struct smc_pnettable *pnettable; + struct net *net = dev_net(ndev); + struct smc_net *sn; + int rc = -ENOENT; + + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; + + write_lock(&pnettable->lock); + list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { + if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && + !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { + dev_hold(ndev); + pnetelem->ndev = ndev; + rc = 0; + break; + } + } + write_unlock(&pnettable->lock); + return rc; +} + +/* Remove the reference to a given network device from the pnet table. */ static int smc_pnet_remove_by_ndev(struct net_device *ndev) { @@ -171,10 +197,9 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) write_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { - if (pnetelem->ndev == ndev) { - list_del(&pnetelem->list); + if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { dev_put(pnetelem->ndev); - kfree(pnetelem); + pnetelem->ndev = NULL; rc = 0; break; } @@ -183,80 +208,40 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) return rc; } -/* Append a pnetid to the end of the pnet table if not already on this list. +/* Apply pnetid to ib device when no pnetid is set. */ -static int smc_pnet_enter(struct smc_pnettable *pnettable, - struct smc_user_pnetentry *new_pnetelem) +static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port, + char *pnet_name) { u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; - u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; - struct smc_pnetentry *tmp_pnetelem; - struct smc_pnetentry *pnetelem; - bool new_smcddev = false; - struct net_device *ndev; - bool new_netdev = true; - bool new_ibdev = false; + bool applied = false; - if (new_pnetelem->smcibdev) { - struct smc_ib_device *ib_dev = new_pnetelem->smcibdev; - int ib_port = new_pnetelem->ib_port; - - spin_lock(&smc_ib_devices.lock); - if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { - memcpy(ib_dev->pnetid[ib_port - 1], - new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN); - ib_dev->pnetid_by_user[ib_port - 1] = true; - new_ibdev = true; - } - spin_unlock(&smc_ib_devices.lock); + spin_lock(&smc_ib_devices.lock); + if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) { + memcpy(ib_dev->pnetid[ib_port - 1], pnet_name, + SMC_MAX_PNETID_LEN); + ib_dev->pnetid_by_user[ib_port - 1] = true; + applied = true; } - if (new_pnetelem->smcd_dev) { - struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev; + spin_unlock(&smc_ib_devices.lock); + return applied; +} - spin_lock(&smcd_dev_list.lock); - if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { - memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name, - SMC_MAX_PNETID_LEN); - smcd_dev->pnetid_by_user = true; - new_smcddev = true; - } - spin_unlock(&smcd_dev_list.lock); +/* Apply pnetid to smcd device when no pnetid is set. + */ +static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name) +{ + u8 pnet_null[SMC_MAX_PNETID_LEN] = {0}; + bool applied = false; + + spin_lock(&smcd_dev_list.lock); + if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) { + memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN); + smcd_dev->pnetid_by_user = true; + applied = true; } - - if (!new_pnetelem->ndev) - return (new_ibdev || new_smcddev) ? 0 : -EEXIST; - - /* check if (base) netdev already has a pnetid. If there is one, we do - * not want to add a pnet table entry - */ - ndev = pnet_find_base_ndev(new_pnetelem->ndev); - if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, - ndev_pnetid)) - return (new_ibdev || new_smcddev) ? 0 : -EEXIST; - - /* add a new netdev entry to the pnet table if there isn't one */ - tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL); - if (!tmp_pnetelem) - return -ENOMEM; - memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name, - SMC_MAX_PNETID_LEN); - tmp_pnetelem->ndev = new_pnetelem->ndev; - - write_lock(&pnettable->lock); - list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { - if (pnetelem->ndev == new_pnetelem->ndev) - new_netdev = false; - } - if (new_netdev) { - dev_hold(tmp_pnetelem->ndev); - list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist); - write_unlock(&pnettable->lock); - } else { - write_unlock(&pnettable->lock); - kfree(tmp_pnetelem); - } - - return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST; + spin_unlock(&smcd_dev_list.lock); + return applied; } /* The limit for pnetid is 16 characters. @@ -323,57 +308,167 @@ out: return smcd_dev; } -/* Parse the supplied netlink attributes and fill a pnetentry structure. - * For ethernet and infiniband device names verify that the devices exist. - */ -static int smc_pnet_fill_entry(struct net *net, - struct smc_user_pnetentry *pnetelem, - struct nlattr *tb[]) +static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, + char *eth_name, char *pnet_name) { - char *string, *ibname; + struct smc_pnetentry *tmp_pe, *new_pe; + struct net_device *ndev, *base_ndev; + u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; + bool new_netdev; int rc; - memset(pnetelem, 0, sizeof(*pnetelem)); - INIT_LIST_HEAD(&pnetelem->list); + /* check if (base) netdev already has a pnetid. If there is one, we do + * not want to add a pnet table entry + */ + rc = -EEXIST; + ndev = dev_get_by_name(net, eth_name); /* dev_hold() */ + if (ndev) { + base_ndev = pnet_find_base_ndev(ndev); + if (!smc_pnetid_by_dev_port(base_ndev->dev.parent, + base_ndev->dev_port, ndev_pnetid)) + goto out_put; + } + + /* add a new netdev entry to the pnet table if there isn't one */ + rc = -ENOMEM; + new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); + if (!new_pe) + goto out_put; + new_pe->type = SMC_PNET_ETH; + memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); + strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); + new_pe->ndev = ndev; + + rc = -EEXIST; + new_netdev = true; + write_lock(&pnettable->lock); + list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { + if (tmp_pe->type == SMC_PNET_ETH && + !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { + new_netdev = false; + break; + } + } + if (new_netdev) { + list_add_tail(&new_pe->list, &pnettable->pnetlist); + write_unlock(&pnettable->lock); + } else { + write_unlock(&pnettable->lock); + kfree(new_pe); + goto out_put; + } + return 0; + +out_put: + if (ndev) + dev_put(ndev); + return rc; +} + +static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, + u8 ib_port, char *pnet_name) +{ + struct smc_pnetentry *tmp_pe, *new_pe; + struct smc_ib_device *ib_dev; + bool smcddev_applied = true; + bool ibdev_applied = true; + struct smcd_dev *smcd_dev; + bool new_ibdev; + + /* try to apply the pnetid to active devices */ + ib_dev = smc_pnet_find_ib(ib_name); + if (ib_dev) + ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name); + smcd_dev = smc_pnet_find_smcd(ib_name); + if (smcd_dev) + smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name); + /* Apply fails when a device has a hardware-defined pnetid set, do not + * add a pnet table entry in that case. + */ + if (!ibdev_applied || !smcddev_applied) + return -EEXIST; + + /* add a new ib entry to the pnet table if there isn't one */ + new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); + if (!new_pe) + return -ENOMEM; + new_pe->type = SMC_PNET_IB; + memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); + strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX); + new_pe->ib_port = ib_port; + + new_ibdev = true; + write_lock(&pnettable->lock); + list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { + if (tmp_pe->type == SMC_PNET_IB && + !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { + new_ibdev = false; + break; + } + } + if (new_ibdev) { + list_add_tail(&new_pe->list, &pnettable->pnetlist); + write_unlock(&pnettable->lock); + } else { + write_unlock(&pnettable->lock); + kfree(new_pe); + } + return (new_ibdev) ? 0 : -EEXIST; +} + +/* Append a pnetid to the end of the pnet table if not already on this list. + */ +static int smc_pnet_enter(struct net *net, struct nlattr *tb[]) +{ + char pnet_name[SMC_MAX_PNETID_LEN + 1]; + struct smc_pnettable *pnettable; + bool new_netdev = false; + bool new_ibdev = false; + struct smc_net *sn; + u8 ibport = 1; + char *string; + int rc; + + /* get pnettable for namespace */ + sn = net_generic(net, smc_net_id); + pnettable = &sn->pnettable; rc = -EINVAL; if (!tb[SMC_PNETID_NAME]) goto error; string = (char *)nla_data(tb[SMC_PNETID_NAME]); - if (!smc_pnetid_valid(string, pnetelem->pnet_name)) + if (!smc_pnetid_valid(string, pnet_name)) goto error; - rc = -EINVAL; if (tb[SMC_PNETID_ETHNAME]) { string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); - pnetelem->ndev = dev_get_by_name(net, string); - if (!pnetelem->ndev) + rc = smc_pnet_add_eth(pnettable, net, string, pnet_name); + if (!rc) + new_netdev = true; + else if (rc != -EEXIST) goto error; } /* if this is not the initial namespace, stop here */ if (net != &init_net) - return 0; + return new_netdev ? 0 : -EEXIST; rc = -EINVAL; if (tb[SMC_PNETID_IBNAME]) { - ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); - ibname = strim(ibname); - pnetelem->smcibdev = smc_pnet_find_ib(ibname); - pnetelem->smcd_dev = smc_pnet_find_smcd(ibname); - if (!pnetelem->smcibdev && !pnetelem->smcd_dev) - goto error; - if (pnetelem->smcibdev) { - if (!tb[SMC_PNETID_IBPORT]) - goto error; - pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); - if (pnetelem->ib_port < 1 || - pnetelem->ib_port > SMC_MAX_PORTS) + string = (char *)nla_data(tb[SMC_PNETID_IBNAME]); + string = strim(string); + if (tb[SMC_PNETID_IBPORT]) { + ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]); + if (ibport < 1 || ibport > SMC_MAX_PORTS) goto error; } + rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name); + if (!rc) + new_ibdev = true; + else if (rc != -EEXIST) + goto error; } - - return 0; + return (new_netdev || new_ibdev) ? 0 : -EEXIST; error: return rc; @@ -381,28 +476,22 @@ error: /* Convert an smc_pnetentry to a netlink attribute sequence */ static int smc_pnet_set_nla(struct sk_buff *msg, - struct smc_user_pnetentry *pnetelem) + struct smc_pnetentry *pnetelem) { if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name)) return -1; - if (pnetelem->ndev) { + if (pnetelem->type == SMC_PNET_ETH) { if (nla_put_string(msg, SMC_PNETID_ETHNAME, - pnetelem->ndev->name)) + pnetelem->eth_name)) return -1; } else { if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a")) return -1; } - if (pnetelem->smcibdev) { - if (nla_put_string(msg, SMC_PNETID_IBNAME, - dev_name(pnetelem->smcibdev->ibdev->dev.parent)) || + if (pnetelem->type == SMC_PNET_IB) { + if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) || nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port)) return -1; - } else if (pnetelem->smcd_dev) { - if (nla_put_string(msg, SMC_PNETID_IBNAME, - dev_name(&pnetelem->smcd_dev->dev)) || - nla_put_u8(msg, SMC_PNETID_IBPORT, 1)) - return -1; } else { if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") || nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff)) @@ -415,21 +504,8 @@ static int smc_pnet_set_nla(struct sk_buff *msg, static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); - struct smc_user_pnetentry pnetelem; - struct smc_pnettable *pnettable; - struct smc_net *sn; - int rc; - /* get pnettable for namespace */ - sn = net_generic(net, smc_net_id); - pnettable = &sn->pnettable; - - rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs); - if (!rc) - rc = smc_pnet_enter(pnettable, &pnetelem); - if (pnetelem.ndev) - dev_put(pnetelem.ndev); - return rc; + return smc_pnet_enter(net, info->attrs); } static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) @@ -450,7 +526,7 @@ static int smc_pnet_dump_start(struct netlink_callback *cb) static int smc_pnet_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, u32 flags, - struct smc_user_pnetentry *pnetelem) + struct smc_pnetentry *pnetelem) { void *hdr; @@ -469,91 +545,32 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb, static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid, int start_idx) { - struct smc_user_pnetentry tmp_entry; struct smc_pnettable *pnettable; struct smc_pnetentry *pnetelem; - struct smc_ib_device *ibdev; - struct smcd_dev *smcd_dev; struct smc_net *sn; int idx = 0; - int ibport; /* get pnettable for namespace */ sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - /* dump netdevices */ + /* dump pnettable entries */ read_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) continue; if (idx++ < start_idx) continue; - memset(&tmp_entry, 0, sizeof(tmp_entry)); - memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name, - SMC_MAX_PNETID_LEN); - tmp_entry.ndev = pnetelem->ndev; + /* if this is not the initial namespace, dump only netdev */ + if (net != &init_net && pnetelem->type != SMC_PNET_ETH) + continue; if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, - &tmp_entry)) { + pnetelem)) { --idx; break; } } read_unlock(&pnettable->lock); - - /* if this is not the initial namespace, stop here */ - if (net != &init_net) - return idx; - - /* dump ib devices */ - spin_lock(&smc_ib_devices.lock); - list_for_each_entry(ibdev, &smc_ib_devices.list, list) { - for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) { - if (ibdev->pnetid_by_user[ibport]) { - if (pnetid && - !smc_pnet_match(ibdev->pnetid[ibport], - pnetid)) - continue; - if (idx++ < start_idx) - continue; - memset(&tmp_entry, 0, sizeof(tmp_entry)); - memcpy(&tmp_entry.pnet_name, - ibdev->pnetid[ibport], - SMC_MAX_PNETID_LEN); - tmp_entry.smcibdev = ibdev; - tmp_entry.ib_port = ibport + 1; - if (smc_pnet_dumpinfo(skb, portid, seq, - NLM_F_MULTI, - &tmp_entry)) { - --idx; - break; - } - } - } - } - spin_unlock(&smc_ib_devices.lock); - - /* dump smcd devices */ - spin_lock(&smcd_dev_list.lock); - list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) { - if (smcd_dev->pnetid_by_user) { - if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid)) - continue; - if (idx++ < start_idx) - continue; - memset(&tmp_entry, 0, sizeof(tmp_entry)); - memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid, - SMC_MAX_PNETID_LEN); - tmp_entry.smcd_dev = smcd_dev; - if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI, - &tmp_entry)) { - --idx; - break; - } - } - } - spin_unlock(&smcd_dev_list.lock); - return idx; } @@ -659,6 +676,9 @@ static int smc_pnet_netdev_event(struct notifier_block *this, case NETDEV_UNREGISTER: smc_pnet_remove_by_ndev(event_dev); return NOTIFY_OK; + case NETDEV_REGISTER: + smc_pnet_add_by_ndev(event_dev); + return NOTIFY_OK; default: return NOTIFY_DONE; } @@ -744,7 +764,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, read_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { - if (ndev == pnetelem->ndev) { + if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { /* get pnetid of netdev device */ memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN); rc = 0; @@ -755,6 +775,34 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, return rc; } +/* find a roce device for the given pnetid */ +static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, + struct smc_init_info *ini) +{ + struct smc_ib_device *ibdev; + int i; + + ini->ib_dev = NULL; + spin_lock(&smc_ib_devices.lock); + list_for_each_entry(ibdev, &smc_ib_devices.list, list) { + for (i = 1; i <= SMC_MAX_PORTS; i++) { + if (!rdma_is_port_valid(ibdev->ibdev, i)) + continue; + if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) && + smc_ib_port_active(ibdev, i) && + !test_bit(i - 1, ibdev->ports_going_away) && + !smc_ib_determine_gid(ibdev, i, ini->vlan_id, + ini->ib_gid, NULL)) { + ini->ib_dev = ibdev; + ini->ib_port = i; + goto out; + } + } + } +out: + spin_unlock(&smc_ib_devices.lock); +} + /* if handshake network device belongs to a roce device, return its * IB device and port */ @@ -801,8 +849,6 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_init_info *ini) { u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; - struct smc_ib_device *ibdev; - int i; ndev = pnet_find_base_ndev(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port, @@ -811,25 +857,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev, smc_pnet_find_rdma_dev(ndev, ini); return; /* pnetid could not be determined */ } - - spin_lock(&smc_ib_devices.lock); - list_for_each_entry(ibdev, &smc_ib_devices.list, list) { - for (i = 1; i <= SMC_MAX_PORTS; i++) { - if (!rdma_is_port_valid(ibdev->ibdev, i)) - continue; - if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) && - smc_ib_port_active(ibdev, i) && - !test_bit(i - 1, ibdev->ports_going_away) && - !smc_ib_determine_gid(ibdev, i, ini->vlan_id, - ini->ib_gid, NULL)) { - ini->ib_dev = ibdev; - ini->ib_port = i; - goto out; - } - } - } -out: - spin_unlock(&smc_ib_devices.lock); + _smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini); } static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev, @@ -895,3 +923,60 @@ out_rel: out: return; } + +/* Lookup and apply a pnet table entry to the given ib device. + */ +int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) +{ + char *ib_name = smcibdev->ibdev->name; + struct smc_pnettable *pnettable; + struct smc_pnetentry *tmp_pe; + struct smc_net *sn; + int rc = -ENOENT; + + /* get pnettable for init namespace */ + sn = net_generic(&init_net, smc_net_id); + pnettable = &sn->pnettable; + + read_lock(&pnettable->lock); + list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { + if (tmp_pe->type == SMC_PNET_IB && + !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && + tmp_pe->ib_port == ib_port) { + smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name); + rc = 0; + break; + } + } + read_unlock(&pnettable->lock); + + return rc; +} + +/* Lookup and apply a pnet table entry to the given smcd device. + */ +int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) +{ + const char *ib_name = dev_name(&smcddev->dev); + struct smc_pnettable *pnettable; + struct smc_pnetentry *tmp_pe; + struct smc_net *sn; + int rc = -ENOENT; + + /* get pnettable for init namespace */ + sn = net_generic(&init_net, smc_net_id); + pnettable = &sn->pnettable; + + read_lock(&pnettable->lock); + list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { + if (tmp_pe->type == SMC_PNET_IB && + !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { + smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name); + rc = 0; + break; + } + } + read_unlock(&pnettable->lock); + + return rc; +} diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 4564e4d69c2e..ea207f8fc6f7 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -46,5 +46,7 @@ void smc_pnet_exit(void); void smc_pnet_net_exit(struct net *net); void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini); void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini); +int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port); +int smc_pnetid_by_table_smcd(struct smcd_dev *smcd); #endif From f3c1deddb21c19fb0eec3c61e80567ef4a79b58b Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:38 +0200 Subject: [PATCH 02/13] net/smc: separate function for link initialization Move the initialization of a new link into its own function, separate from smc_lgr_create, to allow more than one link per link group. Do an extra check if the IB device initialization was successful, and reset the link state if any error occurs during smcr_link_init(). And rename two existing functions to use the prefix smcr_ to indicate that they belong to the SMC-R code path. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 114 ++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 48 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 824c5211b027..3bb45c33db22 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -179,7 +179,7 @@ void smc_lgr_cleanup_early(struct smc_connection *conn) * of the DELETE LINK sequence from server; or as server to * initiate the delete processing. See smc_llc_rx_delete_link(). */ -static int smc_link_send_delete(struct smc_link *lnk, bool orderly) +static int smcr_link_send_delete(struct smc_link *lnk, bool orderly) { if (lnk->state == SMC_LNK_ACTIVE && !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) { @@ -219,7 +219,7 @@ static void smc_lgr_free_work(struct work_struct *work) if (!lgr->is_smcd && !lgr->terminating) { /* try to send del link msg, on error free lgr immediately */ if (lnk->state == SMC_LNK_ACTIVE && - !smc_link_send_delete(lnk, true)) { + !smcr_link_send_delete(lnk, true)) { /* reschedule in case we never receive a response */ smc_lgr_schedule_free_work(lgr); spin_unlock_bh(lgr_lock); @@ -245,6 +245,64 @@ static void smc_lgr_terminate_work(struct work_struct *work) __smc_lgr_terminate(lgr, true); } +static int smcr_link_init(struct smc_link *lnk, u8 link_id, + struct smc_init_info *ini) +{ + u8 rndvec[3]; + int rc; + + get_device(&ini->ib_dev->ibdev->dev); + atomic_inc(&ini->ib_dev->lnk_cnt); + lnk->state = SMC_LNK_ACTIVATING; + lnk->link_id = link_id; + lnk->smcibdev = ini->ib_dev; + lnk->ibport = ini->ib_port; + lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; + if (!ini->ib_dev->initialized) { + rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); + if (rc) + goto out; + } + get_random_bytes(rndvec, sizeof(rndvec)); + lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + + (rndvec[2] << 16); + rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, + ini->vlan_id, lnk->gid, &lnk->sgid_index); + if (rc) + goto out; + rc = smc_llc_link_init(lnk); + if (rc) + goto out; + rc = smc_wr_alloc_link_mem(lnk); + if (rc) + goto clear_llc_lnk; + rc = smc_ib_create_protection_domain(lnk); + if (rc) + goto free_link_mem; + rc = smc_ib_create_queue_pair(lnk); + if (rc) + goto dealloc_pd; + rc = smc_wr_create_link(lnk); + if (rc) + goto destroy_qp; + return 0; + +destroy_qp: + smc_ib_destroy_queue_pair(lnk); +dealloc_pd: + smc_ib_dealloc_protection_domain(lnk); +free_link_mem: + smc_wr_free_link_mem(lnk); +clear_llc_lnk: + smc_llc_link_clear(lnk); +out: + put_device(&ini->ib_dev->ibdev->dev); + memset(lnk, 0, sizeof(struct smc_link)); + if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) + wake_up(&ini->ib_dev->lnks_deleted); + return rc; +} + /* create a new SMC link group */ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) { @@ -252,7 +310,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) struct list_head *lgr_list; struct smc_link *lnk; spinlock_t *lgr_lock; - u8 rndvec[3]; int rc = 0; int i; @@ -297,48 +354,17 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) atomic_inc(&ini->ism_dev->lgr_cnt); } else { /* SMC-R specific settings */ - get_device(&ini->ib_dev->ibdev->dev); lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, SMC_SYSTEMID_LEN); lnk = &lgr->lnk[SMC_SINGLE_LINK]; - /* initialize link */ - lnk->state = SMC_LNK_ACTIVATING; - lnk->link_id = SMC_SINGLE_LINK; - lnk->smcibdev = ini->ib_dev; - lnk->ibport = ini->ib_port; + rc = smcr_link_init(lnk, SMC_SINGLE_LINK, ini); + if (rc) + goto free_lgr; lgr_list = &smc_lgr_list.list; lgr_lock = &smc_lgr_list.lock; - lnk->path_mtu = - ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; - if (!ini->ib_dev->initialized) - smc_ib_setup_per_ibdev(ini->ib_dev); - get_random_bytes(rndvec, sizeof(rndvec)); - lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + - (rndvec[2] << 16); - rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, - ini->vlan_id, lnk->gid, - &lnk->sgid_index); - if (rc) - goto free_lgr; - rc = smc_llc_link_init(lnk); - if (rc) - goto free_lgr; - rc = smc_wr_alloc_link_mem(lnk); - if (rc) - goto clear_llc_lnk; - rc = smc_ib_create_protection_domain(lnk); - if (rc) - goto free_link_mem; - rc = smc_ib_create_queue_pair(lnk); - if (rc) - goto dealloc_pd; - rc = smc_wr_create_link(lnk); - if (rc) - goto destroy_qp; atomic_inc(&lgr_cnt); - atomic_inc(&ini->ib_dev->lnk_cnt); } smc->conn.lgr = lgr; spin_lock_bh(lgr_lock); @@ -346,14 +372,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) spin_unlock_bh(lgr_lock); return 0; -destroy_qp: - smc_ib_destroy_queue_pair(lnk); -dealloc_pd: - smc_ib_dealloc_protection_domain(lnk); -free_link_mem: - smc_wr_free_link_mem(lnk); -clear_llc_lnk: - smc_llc_link_clear(lnk); free_lgr: kfree(lgr); ism_put_vlan: @@ -417,7 +435,7 @@ void smc_conn_free(struct smc_connection *conn) smc_lgr_schedule_free_work(lgr); } -static void smc_link_clear(struct smc_link *lnk) +static void smcr_link_clear(struct smc_link *lnk) { lnk->peer_qpn = 0; smc_llc_link_clear(lnk); @@ -426,6 +444,7 @@ static void smc_link_clear(struct smc_link *lnk) smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); smc_wr_free_link_mem(lnk); + put_device(&lnk->smcibdev->ibdev->dev); if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt)) wake_up(&lnk->smcibdev->lnks_deleted); } @@ -512,8 +531,7 @@ static void smc_lgr_free(struct smc_link_group *lgr) if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) wake_up(&lgr->smcd->lgrs_deleted); } else { - smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); - put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev); + smcr_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } From 026c381fb4778d0d44af57b7ff674f31f04af221 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:39 +0200 Subject: [PATCH 03/13] net/smc: introduce link_idx for link group array The link_id is the index of the link in the array of the link group. When a link in the array is reused for a new link, a different unique link_id should be used, otherwise the index in the array could collide with the previous link at this array position. Use a new variable link_idx as array index, and make link_id an increasing unique id value. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 34 +++++++++++++++++++++++++++++----- net/smc/smc_core.h | 2 ++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 3bb45c33db22..d233112ced2a 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -245,8 +245,28 @@ static void smc_lgr_terminate_work(struct work_struct *work) __smc_lgr_terminate(lgr, true); } -static int smcr_link_init(struct smc_link *lnk, u8 link_id, - struct smc_init_info *ini) +/* return next unique link id for the lgr */ +static u8 smcr_next_link_id(struct smc_link_group *lgr) +{ + u8 link_id; + int i; + + while (1) { + link_id = ++lgr->next_link_id; + if (!link_id) /* skip zero as link_id */ + link_id = ++lgr->next_link_id; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state != SMC_LNK_INACTIVE && + lgr->lnk[i].link_id == link_id) + continue; + } + break; + } + return link_id; +} + +static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, + u8 link_idx, struct smc_init_info *ini) { u8 rndvec[3]; int rc; @@ -254,7 +274,8 @@ static int smcr_link_init(struct smc_link *lnk, u8 link_id, get_device(&ini->ib_dev->ibdev->dev); atomic_inc(&ini->ib_dev->lnk_cnt); lnk->state = SMC_LNK_ACTIVATING; - lnk->link_id = link_id; + lnk->link_id = smcr_next_link_id(lgr); + lnk->link_idx = link_idx; lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; @@ -310,6 +331,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) struct list_head *lgr_list; struct smc_link *lnk; spinlock_t *lgr_lock; + u8 link_idx; int rc = 0; int i; @@ -338,6 +360,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) INIT_LIST_HEAD(&lgr->sndbufs[i]); INIT_LIST_HEAD(&lgr->rmbs[i]); } + lgr->next_link_id = 0; smc_lgr_list.num += SMC_LGR_NUM_INCR; memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); @@ -358,8 +381,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, SMC_SYSTEMID_LEN); - lnk = &lgr->lnk[SMC_SINGLE_LINK]; - rc = smcr_link_init(lnk, SMC_SINGLE_LINK, ini); + link_idx = SMC_SINGLE_LINK; + lnk = &lgr->lnk[link_idx]; + rc = smcr_link_init(lgr, lnk, link_idx, ini); if (rc) goto free_lgr; lgr_list = &smc_lgr_list.list; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 8041db20c753..c459b0639bf3 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -115,6 +115,7 @@ struct smc_link { u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ u8 link_id; /* unique # within link group */ + u8 link_idx; /* index in lgr link array */ enum smc_link_state state; /* state of link */ struct workqueue_struct *llc_wq; /* single thread work queue */ @@ -222,6 +223,7 @@ struct smc_link_group { /* remote addr/key pairs */ DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX); /* used rtoken elements */ + u8 next_link_id; }; struct { /* SMC-D */ u64 peer_gid; From 387707fdf48697c667dd5e9715ac4feb41602d15 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:40 +0200 Subject: [PATCH 04/13] net/smc: convert static link ID to dynamic references As a preparation for the support of multiple links remove the usage of a static link id (SMC_SINGLE_LINK) and allow dynamic link ids. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 17 +++++------- net/smc/smc.h | 1 + net/smc/smc_cdc.c | 8 +++--- net/smc/smc_clc.c | 12 ++++----- net/smc/smc_core.c | 66 +++++++++++++++++++++++----------------------- net/smc/smc_core.h | 7 ++--- net/smc/smc_ib.c | 58 ++++++++++++++++++++-------------------- net/smc/smc_ib.h | 10 +++---- net/smc/smc_llc.c | 10 +++---- net/smc/smc_tx.c | 13 ++++----- 10 files changed, 99 insertions(+), 103 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6fd44bdb0fc3..6e4bad8c64a8 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -343,7 +343,7 @@ static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc, { if (!rmb_desc->wr_reg) { /* register memory region for new rmb */ - if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) { + if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { rmb_desc->regerr = 1; return -EFAULT; } @@ -362,12 +362,10 @@ static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc, static int smc_clnt_conf_first_link(struct smc_sock *smc) { struct net *net = sock_net(smc->clcsock->sk); - struct smc_link_group *lgr = smc->conn.lgr; - struct smc_link *link; + struct smc_link *link = smc->conn.lnk; int rest; int rc; - link = &lgr->lnk[SMC_SINGLE_LINK]; /* receive CONFIRM LINK request from server over RoCE fabric */ rest = wait_for_completion_interruptible_timeout( &link->llc_confirm, @@ -610,7 +608,7 @@ static int smc_connect_rdma(struct smc_sock *smc, mutex_unlock(&smc_client_lgr_pending); return reason_code; } - link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; + link = smc->conn.lnk; smc_conn_save_peer_info(smc, aclc); @@ -1002,13 +1000,10 @@ void smc_close_non_accepted(struct sock *sk) static int smc_serv_conf_first_link(struct smc_sock *smc) { struct net *net = sock_net(smc->clcsock->sk); - struct smc_link_group *lgr = smc->conn.lgr; - struct smc_link *link; + struct smc_link *link = smc->conn.lnk; int rest; int rc; - link = &lgr->lnk[SMC_SINGLE_LINK]; - if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) return SMC_CLC_DECL_ERR_REGRMB; @@ -1194,7 +1189,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, /* listen worker: register buffers */ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) { - struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; + struct smc_link *link = new_smc->conn.lnk; if (local_contact != SMC_FIRST_CONTACT) { if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) @@ -1210,7 +1205,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc, struct smc_clc_msg_accept_confirm *cclc, int local_contact) { - struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; + struct smc_link *link = new_smc->conn.lnk; int reason_code = 0; if (local_contact == SMC_FIRST_CONTACT) diff --git a/net/smc/smc.h b/net/smc/smc.h index be11ba41190f..1a084afa7372 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -121,6 +121,7 @@ enum smc_urg_state { struct smc_connection { struct rb_node alert_node; struct smc_link_group *lgr; /* link group of connection */ + struct smc_link *lnk; /* assigned SMC-R link */ u32 alert_token_local; /* unique conn. id */ u8 peer_rmbe_idx; /* from tcp handshake */ int peer_rmbe_size; /* size of peer rx buffer */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 164f1584861b..f64589d823aa 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -57,7 +57,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend) { - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + struct smc_link *link = conn->lnk; int rc; rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, @@ -91,12 +91,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend) { + struct smc_link *link = conn->lnk; union smc_host_cursor cfed; - struct smc_link *link; int rc; - link = &conn->lgr->lnk[SMC_SINGLE_LINK]; - smc_cdc_add_pending_send(conn, pend); conn->tx_cdc_seq++; @@ -165,7 +163,7 @@ static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend) void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) { - struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + struct smc_link *link = conn->lnk; smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE, smc_cdc_tx_filter, smc_cdc_tx_dismisser, diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index ea0068f0173c..d5627df24215 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -496,7 +496,7 @@ int smc_clc_send_confirm(struct smc_sock *smc) sizeof(SMCD_EYECATCHER)); } else { /* SMC-R specific settings */ - link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + link = conn->lnk; memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); cclc.hdr.path = SMC_TYPE_R; @@ -508,13 +508,13 @@ int smc_clc_send_confirm(struct smc_sock *smc) ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); cclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey); cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); cclc.rmbe_size = conn->rmbe_size_short; cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address - (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); + (conn->rmb_desc->sgt[link->link_idx].sgl)); hton24(cclc.psn, link->psn_initial); memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -572,7 +572,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); aclc.hdr.path = SMC_TYPE_R; - link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + link = conn->lnk; memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE); @@ -580,13 +580,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); aclc.rmb_rkey = - htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey); aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.qp_mtu = link->path_mtu; aclc.rmbe_size = conn->rmbe_size_short, aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address - (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); + (conn->rmb_desc->sgt[link->link_idx].sgl)); hton24(aclc.psn, link->psn_initial); memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d233112ced2a..1d695093f205 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -131,6 +131,11 @@ static void smc_lgr_register_conn(struct smc_connection *conn) conn->alert_token_local = 0; } smc_lgr_add_alert_token(conn); + + /* assign the new connection to a link */ + if (!conn->lgr->is_smcd) + conn->lnk = &conn->lgr->lnk[SMC_SINGLE_LINK]; + conn->lgr->conns_num++; } @@ -275,6 +280,7 @@ static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, atomic_inc(&ini->ib_dev->lnk_cnt); lnk->state = SMC_LNK_ACTIVATING; lnk->link_id = smcr_next_link_id(lgr); + lnk->lgr = lgr; lnk->link_idx = link_idx; lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; @@ -421,7 +427,7 @@ static void smc_buf_unuse(struct smc_connection *conn, if (!lgr->is_smcd && !list_empty(&lgr->list)) { /* unregister rmb with peer */ smc_llc_do_delete_rkey( - &lgr->lnk[SMC_SINGLE_LINK], + conn->lnk, conn->rmb_desc); } conn->rmb_desc->used = 0; @@ -479,16 +485,15 @@ static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; if (is_rmb) { - if (buf_desc->mr_rx[SMC_SINGLE_LINK]) + if (buf_desc->mr_rx[lnk->link_idx]) smc_ib_put_memory_region( - buf_desc->mr_rx[SMC_SINGLE_LINK]); - smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, - DMA_FROM_DEVICE); + buf_desc->mr_rx[lnk->link_idx]); + smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); } else { - smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, - DMA_TO_DEVICE); + smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); } - sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); + sg_free_table(&buf_desc->sgt[lnk->link_idx]); + if (buf_desc->pages) __free_pages(buf_desc->pages, buf_desc->order); kfree(buf_desc); @@ -1026,17 +1031,16 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, /* build the sg table from the pages */ lnk = &lgr->lnk[SMC_SINGLE_LINK]; - rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, - GFP_KERNEL); + rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); if (rc) { smc_buf_free(lgr, is_rmb, buf_desc); return ERR_PTR(rc); } - sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, + sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, buf_desc->cpu_addr, bufsize); /* map sg table to DMA address */ - rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, + rc = smc_ib_buf_map_sg(lnk, buf_desc, is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); /* SMC protocol depends on mapping to one DMA address only */ if (rc != 1) { @@ -1049,7 +1053,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, rc = smc_ib_get_memory_region(lnk->roce_pd, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE, - buf_desc); + buf_desc, lnk->link_idx); if (rc) { smc_buf_free(lgr, is_rmb, buf_desc); return ERR_PTR(rc); @@ -1174,22 +1178,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; - if (!conn->lgr || conn->lgr->is_smcd) return; - smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - conn->sndbuf_desc, DMA_TO_DEVICE); + smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; - if (!conn->lgr || conn->lgr->is_smcd) return; - smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - conn->sndbuf_desc, DMA_TO_DEVICE); + smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) @@ -1198,7 +1196,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; - smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + smc_ib_sync_sg_for_cpu(&lgr->lnk[SMC_SINGLE_LINK], conn->rmb_desc, DMA_FROM_DEVICE); } @@ -1208,7 +1206,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; - smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + smc_ib_sync_sg_for_device(&lgr->lnk[SMC_SINGLE_LINK], conn->rmb_desc, DMA_FROM_DEVICE); } @@ -1245,15 +1243,16 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) } /* add a new rtoken from peer */ -int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) +int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) { + struct smc_link_group *lgr = smc_get_lgr(lnk); u64 dma_addr = be64_to_cpu(nw_vaddr); u32 rkey = ntohl(nw_rkey); int i; for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { - if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && - (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && + if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && + lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && test_bit(i, lgr->rtokens_used_mask)) { /* already in list */ return i; @@ -1262,22 +1261,23 @@ int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) i = smc_rmb_reserve_rtoken_idx(lgr); if (i < 0) return i; - lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; - lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; + lgr->rtokens[i][lnk->link_idx].rkey = rkey; + lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; return i; } /* delete an rtoken */ -int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) +int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) { + struct smc_link_group *lgr = smc_get_lgr(lnk); u32 rkey = ntohl(nw_rkey); int i; for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { - if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && + if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && test_bit(i, lgr->rtokens_used_mask)) { - lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; - lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; + lgr->rtokens[i][lnk->link_idx].rkey = 0; + lgr->rtokens[i][lnk->link_idx].dma_addr = 0; clear_bit(i, lgr->rtokens_used_mask); return 0; @@ -1290,7 +1290,7 @@ int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc) { - conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, + conn->rtoken_idx = smc_rtoken_add(conn->lnk, clc->rmb_dma_addr, clc->rmb_rkey); if (conn->rtoken_idx < 0) return conn->rtoken_idx; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c459b0639bf3..c71c35a3596c 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -116,6 +116,7 @@ struct smc_link { u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ u8 link_id; /* unique # within link group */ u8 link_idx; /* index in lgr link array */ + struct smc_link_group *lgr; /* parent link group */ enum smc_link_state state; /* state of link */ struct workqueue_struct *llc_wq; /* single thread work queue */ @@ -303,8 +304,8 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); -int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey); -int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey); +int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey); +int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey); void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); @@ -319,6 +320,6 @@ void smc_core_exit(void); static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) { - return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + return link->lgr; } #endif diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 440f9e319a38..c090678a3e5a 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -389,15 +389,15 @@ void smc_ib_put_memory_region(struct ib_mr *mr) ib_dereg_mr(mr); } -static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot) +static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx) { unsigned int offset = 0; int sg_num; /* map the largest prefix of a dma mapped SG list */ - sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK], - buf_slot->sgt[SMC_SINGLE_LINK].sgl, - buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx], + buf_slot->sgt[link_idx].sgl, + buf_slot->sgt[link_idx].orig_nents, &offset, PAGE_SIZE); return sg_num; @@ -405,29 +405,29 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot) /* Allocate a memory region and map the dma mapped SG list of buf_slot */ int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, - struct smc_buf_desc *buf_slot) + struct smc_buf_desc *buf_slot, u8 link_idx) { - if (buf_slot->mr_rx[SMC_SINGLE_LINK]) + if (buf_slot->mr_rx[link_idx]) return 0; /* already done */ - buf_slot->mr_rx[SMC_SINGLE_LINK] = + buf_slot->mr_rx[link_idx] = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); - if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) { + if (IS_ERR(buf_slot->mr_rx[link_idx])) { int rc; - rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]); - buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL; + rc = PTR_ERR(buf_slot->mr_rx[link_idx]); + buf_slot->mr_rx[link_idx] = NULL; return rc; } - if (smc_ib_map_mr_sg(buf_slot) != 1) + if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1) return -EINVAL; return 0; } /* synchronize buffer usage for cpu access */ -void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, +void smc_ib_sync_sg_for_cpu(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { @@ -435,11 +435,11 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, unsigned int i; /* for now there is just one DMA address */ - for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg, - buf_slot->sgt[SMC_SINGLE_LINK].nents, i) { + for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, + buf_slot->sgt[lnk->link_idx].nents, i) { if (!sg_dma_len(sg)) break; - ib_dma_sync_single_for_cpu(smcibdev->ibdev, + ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev, sg_dma_address(sg), sg_dma_len(sg), data_direction); @@ -447,7 +447,7 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, } /* synchronize buffer usage for device access */ -void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, +void smc_ib_sync_sg_for_device(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { @@ -455,11 +455,11 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, unsigned int i; /* for now there is just one DMA address */ - for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg, - buf_slot->sgt[SMC_SINGLE_LINK].nents, i) { + for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg, + buf_slot->sgt[lnk->link_idx].nents, i) { if (!sg_dma_len(sg)) break; - ib_dma_sync_single_for_device(smcibdev->ibdev, + ib_dma_sync_single_for_device(lnk->smcibdev->ibdev, sg_dma_address(sg), sg_dma_len(sg), data_direction); @@ -467,15 +467,15 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, } /* Map a new TX or RX buffer SG-table to DMA */ -int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, +int smc_ib_buf_map_sg(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { int mapped_nents; - mapped_nents = ib_dma_map_sg(smcibdev->ibdev, - buf_slot->sgt[SMC_SINGLE_LINK].sgl, - buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev, + buf_slot->sgt[lnk->link_idx].sgl, + buf_slot->sgt[lnk->link_idx].orig_nents, data_direction); if (!mapped_nents) return -ENOMEM; @@ -483,18 +483,18 @@ int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, return mapped_nents; } -void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, +void smc_ib_buf_unmap_sg(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { - if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address) + if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address) return; /* already unmapped */ - ib_dma_unmap_sg(smcibdev->ibdev, - buf_slot->sgt[SMC_SINGLE_LINK].sgl, - buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + ib_dma_unmap_sg(lnk->smcibdev->ibdev, + buf_slot->sgt[lnk->link_idx].sgl, + buf_slot->sgt[lnk->link_idx].orig_nents, data_direction); - buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0; + buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0; } long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 5c2b115d36da..e6a696ae15f3 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -59,10 +59,10 @@ struct smc_link; int smc_ib_register_client(void) __init; void smc_ib_unregister_client(void); bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); -int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, +int smc_ib_buf_map_sg(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); -void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, +void smc_ib_buf_unmap_sg(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); void smc_ib_dealloc_protection_domain(struct smc_link *lnk); @@ -74,12 +74,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, - struct smc_buf_desc *buf_slot); + struct smc_buf_desc *buf_slot, u8 link_idx); void smc_ib_put_memory_region(struct ib_mr *mr); -void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, +void smc_ib_sync_sg_for_cpu(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); -void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, +void smc_ib_sync_sg_for_device(struct smc_link *lnk, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport, diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 0e52aab53d97..34d0752ba6af 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -231,9 +231,9 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link, rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY; rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey); rkeyllc->rtoken[0].rmb_key = - htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + htonl(rmb_desc->mr_rx[link->link_idx]->rkey); rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64( - (u64)sg_dma_address(rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); + (u64)sg_dma_address(rmb_desc->sgt[link->link_idx].sgl)); /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; @@ -256,7 +256,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey); rkeyllc->num_rkeys = 1; - rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; @@ -501,7 +501,7 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link, SMC_LLC_FLAG_RKEY_NEG; complete(&link->llc_confirm_rkey); } else { - rc = smc_rtoken_add(smc_get_lgr(link), + rc = smc_rtoken_add(link, llc->rtoken[0].rmb_vaddr, llc->rtoken[0].rmb_key); @@ -539,7 +539,7 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link, } else { max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); for (i = 0; i < max; i++) { - if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i])) + if (smc_rtoken_delete(link, llc->rkey[i])) err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); } diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 9f1ade86d70e..d74bfe6a90f1 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -269,19 +269,18 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, int num_sges, struct ib_rdma_wr *rdma_wr) { struct smc_link_group *lgr = conn->lgr; - struct smc_link *link; + struct smc_link *link = conn->lnk; int rc; - link = &lgr->lnk[SMC_SINGLE_LINK]; rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link); rdma_wr->wr.num_sge = num_sges; rdma_wr->remote_addr = - lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr + + lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr + /* RMBE within RMB */ conn->tx_off + /* offset within RMBE */ peer_rmbe_offset; - rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; + rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); if (rc) smc_lgr_terminate_sched(lgr); @@ -310,8 +309,10 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, size_t dst_off, size_t dst_len, struct smc_rdma_wr *wr_rdma_buf) { + struct smc_link *link = conn->lnk; + dma_addr_t dma_addr = - sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); + sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl); int src_len_sum = src_len, dst_len_sum = dst_len; int sent_count = src_off; int srcchunk, dstchunk; @@ -507,7 +508,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) if (!pflags->urg_data_present) { rc = smc_tx_rdma_writes(conn, wr_rdma_buf); if (rc) { - smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], + smc_wr_tx_put_slot(conn->lnk, (struct smc_wr_tx_pend_priv *)pend); goto out_unlock; } From b9247544c1bccfe1b74ddf1dade719a69946cbb1 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:41 +0200 Subject: [PATCH 05/13] net/smc: convert static link ID instances to support multiple links As a preparation for the support of multiple links remove the usage of a static link id (SMC_SINGLE_LINK) and allow dynamic link ids. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 54 +++++--- net/smc/smc_clc.h | 1 + net/smc/smc_core.c | 338 +++++++++++++++++++++++++++++++-------------- net/smc/smc_core.h | 37 +++-- net/smc/smc_llc.c | 2 + 5 files changed, 294 insertions(+), 138 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6e4bad8c64a8..890dc6422f8c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -338,28 +338,48 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) } /* register a new rmb, send confirm_rkey msg to register with peer */ -static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc, - bool conf_rkey) +static int smcr_link_reg_rmb(struct smc_link *link, + struct smc_buf_desc *rmb_desc, bool conf_rkey) { - if (!rmb_desc->wr_reg) { + if (!rmb_desc->is_reg_mr[link->link_idx]) { /* register memory region for new rmb */ if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) { - rmb_desc->regerr = 1; + rmb_desc->is_reg_err = true; return -EFAULT; } - rmb_desc->wr_reg = 1; + rmb_desc->is_reg_mr[link->link_idx] = true; } if (!conf_rkey) return 0; + /* exchange confirm_rkey msg with peer */ - if (smc_llc_do_confirm_rkey(link, rmb_desc)) { - rmb_desc->regerr = 1; - return -EFAULT; + if (!rmb_desc->is_conf_rkey) { + if (smc_llc_do_confirm_rkey(link, rmb_desc)) { + rmb_desc->is_reg_err = true; + return -EFAULT; + } + rmb_desc->is_conf_rkey = true; } return 0; } -static int smc_clnt_conf_first_link(struct smc_sock *smc) +/* register the new rmb on all links */ +static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr, + struct smc_buf_desc *rmb_desc) +{ + int i, rc; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state != SMC_LNK_ACTIVE) + continue; + rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc, true); + if (rc) + return rc; + } + return 0; +} + +static int smcr_clnt_conf_first_link(struct smc_sock *smc) { struct net *net = sock_net(smc->clcsock->sk); struct smc_link *link = smc->conn.lnk; @@ -387,7 +407,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) smc_wr_remember_qp_attr(link); - if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) + if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false)) return SMC_CLC_DECL_ERR_REGRMB; /* send CONFIRM LINK response over RoCE fabric */ @@ -632,7 +652,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, ini->cln_first_contact); } else { - if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) + if (smcr_lgr_reg_rmbs(smc->conn.lgr, smc->conn.rmb_desc)) return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, ini->cln_first_contact); } @@ -647,7 +667,7 @@ static int smc_connect_rdma(struct smc_sock *smc, if (ini->cln_first_contact == SMC_FIRST_CONTACT) { /* QP confirmation over RoCE fabric */ - reason_code = smc_clnt_conf_first_link(smc); + reason_code = smcr_clnt_conf_first_link(smc); if (reason_code) return smc_connect_abort(smc, reason_code, ini->cln_first_contact); @@ -997,14 +1017,14 @@ void smc_close_non_accepted(struct sock *sk) sock_put(sk); /* final sock_put */ } -static int smc_serv_conf_first_link(struct smc_sock *smc) +static int smcr_serv_conf_first_link(struct smc_sock *smc) { struct net *net = sock_net(smc->clcsock->sk); struct smc_link *link = smc->conn.lnk; int rest; int rc; - if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) + if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false)) return SMC_CLC_DECL_ERR_REGRMB; /* send CONFIRM LINK request to client over the RoCE fabric */ @@ -1189,10 +1209,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc, /* listen worker: register buffers */ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) { - struct smc_link *link = new_smc->conn.lnk; + struct smc_connection *conn = &new_smc->conn; if (local_contact != SMC_FIRST_CONTACT) { - if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) + if (smcr_lgr_reg_rmbs(conn->lgr, conn->rmb_desc)) return SMC_CLC_DECL_ERR_REGRMB; } smc_rmb_sync_sg_for_device(&new_smc->conn); @@ -1222,7 +1242,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc, goto decline; } /* QP confirmation over RoCE fabric */ - reason_code = smc_serv_conf_first_link(new_smc); + reason_code = smcr_serv_conf_first_link(new_smc); if (reason_code) goto decline; } diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index ca209272e5fa..4f2e150a2be1 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -44,6 +44,7 @@ #define SMC_CLC_DECL_DIFFPREFIX 0x03070000 /* IP prefix / subnet mismatch */ #define SMC_CLC_DECL_GETVLANERR 0x03080000 /* err to get vlan id of ip device*/ #define SMC_CLC_DECL_ISMVLANERR 0x03090000 /* err to reg vlan id on ism dev */ +#define SMC_CLC_DECL_NOACTLINK 0x030a0000 /* no active smc-r link in lgr */ #define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ #define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */ #define SMC_CLC_DECL_INTERR 0x09990000 /* internal error */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 1d695093f205..5df3f8f41d19 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -116,7 +116,7 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn) * Requires @conns_lock * Note that '0' is a reserved value and not assigned. */ -static void smc_lgr_register_conn(struct smc_connection *conn) +static int smc_lgr_register_conn(struct smc_connection *conn) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); static atomic_t nexttoken = ATOMIC_INIT(0); @@ -133,10 +133,22 @@ static void smc_lgr_register_conn(struct smc_connection *conn) smc_lgr_add_alert_token(conn); /* assign the new connection to a link */ - if (!conn->lgr->is_smcd) - conn->lnk = &conn->lgr->lnk[SMC_SINGLE_LINK]; + if (!conn->lgr->is_smcd) { + struct smc_link *lnk; + int i; + /* tbd - link balancing */ + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + lnk = &conn->lgr->lnk[i]; + if (lnk->state == SMC_LNK_ACTIVATING || + lnk->state == SMC_LNK_ACTIVE) + conn->lnk = lnk; + } + if (!conn->lnk) + return SMC_CLC_DECL_NOACTLINK; + } conn->lgr->conns_num++; + return 0; } /* Unregister connection and reset the alert token of the given connection< @@ -202,8 +214,8 @@ static void smc_lgr_free_work(struct work_struct *work) struct smc_link_group, free_work); spinlock_t *lgr_lock; - struct smc_link *lnk; bool conns; + int i; smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); @@ -220,25 +232,38 @@ static void smc_lgr_free_work(struct work_struct *work) } list_del_init(&lgr->list); /* remove from smc_lgr_list */ - lnk = &lgr->lnk[SMC_SINGLE_LINK]; if (!lgr->is_smcd && !lgr->terminating) { - /* try to send del link msg, on error free lgr immediately */ - if (lnk->state == SMC_LNK_ACTIVE && - !smcr_link_send_delete(lnk, true)) { - /* reschedule in case we never receive a response */ - smc_lgr_schedule_free_work(lgr); + bool do_wait = false; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &lgr->lnk[i]; + /* try to send del link msg, on err free immediately */ + if (lnk->state == SMC_LNK_ACTIVE && + !smcr_link_send_delete(lnk, true)) { + /* reschedule in case we never receive a resp */ + smc_lgr_schedule_free_work(lgr); + do_wait = true; + } + } + if (do_wait) { spin_unlock_bh(lgr_lock); - return; + return; /* wait for resp, see smc_llc_rx_delete_link */ } } lgr->freeing = 1; /* this instance does the freeing, no new schedule */ spin_unlock_bh(lgr_lock); cancel_delayed_work(&lgr->free_work); - if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) - smc_llc_link_inactive(lnk); if (lgr->is_smcd && !lgr->terminating) smc_ism_signal_shutdown(lgr); + if (!lgr->is_smcd) { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &lgr->lnk[i]; + + if (lnk->state != SMC_LNK_INACTIVE) + smc_llc_link_inactive(lnk); + } + } smc_lgr_free(lgr); } @@ -417,29 +442,37 @@ out: return rc; } +static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, + struct smc_link *lnk) +{ + struct smc_link_group *lgr = lnk->lgr; + + if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) { + /* unregister rmb with peer */ + smc_llc_do_delete_rkey(lnk, rmb_desc); + rmb_desc->is_conf_rkey = false; + } + if (rmb_desc->is_reg_err) { + /* buf registration failed, reuse not possible */ + write_lock_bh(&lgr->rmbs_lock); + list_del(&rmb_desc->list); + write_unlock_bh(&lgr->rmbs_lock); + + smc_buf_free(lgr, true, rmb_desc); + } else { + rmb_desc->used = 0; + } +} + static void smc_buf_unuse(struct smc_connection *conn, struct smc_link_group *lgr) { if (conn->sndbuf_desc) conn->sndbuf_desc->used = 0; - if (conn->rmb_desc) { - if (!conn->rmb_desc->regerr) { - if (!lgr->is_smcd && !list_empty(&lgr->list)) { - /* unregister rmb with peer */ - smc_llc_do_delete_rkey( - conn->lnk, - conn->rmb_desc); - } - conn->rmb_desc->used = 0; - } else { - /* buf registration failed, reuse not possible */ - write_lock_bh(&lgr->rmbs_lock); - list_del(&conn->rmb_desc->list); - write_unlock_bh(&lgr->rmbs_lock); - - smc_buf_free(lgr, true, conn->rmb_desc); - } - } + if (conn->rmb_desc && lgr->is_smcd) + conn->rmb_desc->used = 0; + else if (conn->rmb_desc) + smcr_buf_unuse(conn->rmb_desc, conn->lnk); } /* remove a finished connection from its link group */ @@ -467,6 +500,8 @@ void smc_conn_free(struct smc_connection *conn) static void smcr_link_clear(struct smc_link *lnk) { + if (lnk->peer_qpn == 0) + return; lnk->peer_qpn = 0; smc_llc_link_clear(lnk); smc_ib_modify_qp_reset(lnk); @@ -482,17 +517,23 @@ static void smcr_link_clear(struct smc_link *lnk) static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc) { - struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + struct smc_link *lnk; + int i; - if (is_rmb) { - if (buf_desc->mr_rx[lnk->link_idx]) - smc_ib_put_memory_region( - buf_desc->mr_rx[lnk->link_idx]); - smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); - } else { - smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + lnk = &lgr->lnk[i]; + if (!buf_desc->is_map_ib[lnk->link_idx]) + continue; + if (is_rmb) { + if (buf_desc->mr_rx[lnk->link_idx]) + smc_ib_put_memory_region( + buf_desc->mr_rx[lnk->link_idx]); + smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE); + } else { + smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE); + } + sg_free_table(&buf_desc->sgt[lnk->link_idx]); } - sg_free_table(&buf_desc->sgt[lnk->link_idx]); if (buf_desc->pages) __free_pages(buf_desc->pages, buf_desc->order); @@ -551,6 +592,8 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) /* remove a link group */ static void smc_lgr_free(struct smc_link_group *lgr) { + int i; + smc_lgr_free_bufs(lgr); if (lgr->is_smcd) { if (!lgr->terminating) { @@ -560,7 +603,11 @@ static void smc_lgr_free(struct smc_link_group *lgr) if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) wake_up(&lgr->smcd->lgrs_deleted); } else { - smcr_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state == SMC_LNK_INACTIVE) + continue; + smcr_link_clear(&lgr->lnk[i]); + } if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } @@ -628,16 +675,20 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) static void smc_lgr_cleanup(struct smc_link_group *lgr) { + int i; + if (lgr->is_smcd) { smc_ism_signal_shutdown(lgr); smcd_unregister_all_dmbs(lgr); smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); } else { - struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &lgr->lnk[i]; - if (lnk->state != SMC_LNK_INACTIVE) - smc_llc_link_inactive(lnk); + if (lnk->state != SMC_LNK_INACTIVE) + smc_llc_link_inactive(lnk); + } } } @@ -650,6 +701,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) struct smc_connection *conn; struct smc_sock *smc; struct rb_node *node; + int i; if (lgr->terminating) return; /* lgr already terminating */ @@ -657,7 +709,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) cancel_delayed_work_sync(&lgr->free_work); lgr->terminating = 1; if (!lgr->is_smcd) - smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) + smc_llc_link_inactive(&lgr->lnk[i]); /* kill remaining link group connections */ read_lock_bh(&lgr->conns_lock); @@ -703,14 +756,22 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) { struct smc_link_group *lgr, *l; LIST_HEAD(lgr_free_list); + int i; spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { - if (!lgr->is_smcd && - lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && - lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) { - list_move(&lgr->list, &lgr_free_list); - lgr->freeing = 1; + if (lgr->is_smcd) + continue; + /* tbd - terminate only when no more links are active */ + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state == SMC_LNK_INACTIVE || + lgr->lnk[i].state == SMC_LNK_DELETING) + continue; + if (lgr->lnk[i].smcibdev == smcibdev && + lgr->lnk[i].ibport == ibport) { + list_move(&lgr->list, &lgr_free_list); + lgr->freeing = 1; + } } } spin_unlock_bh(&smc_lgr_list.lock); @@ -775,6 +836,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_free_list); + int i; spin_lock_bh(&smc_lgr_list.lock); if (!smcibdev) { @@ -783,9 +845,12 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) lgr->freeing = 1; } else { list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { - if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) { - list_move(&lgr->list, &lgr_free_list); - lgr->freeing = 1; + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].smcibdev == smcibdev) { + list_move(&lgr->list, &lgr_free_list); + lgr->freeing = 1; + break; + } } } } @@ -857,15 +922,21 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, struct smc_clc_msg_local *lcl, enum smc_lgr_role role, u32 clcqpn) { - return !memcmp(lgr->peer_systemid, lcl->id_for_peer, - SMC_SYSTEMID_LEN) && - !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, - SMC_GID_SIZE) && - !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, - sizeof(lcl->mac)) && - lgr->role == role && - (lgr->role == SMC_SERV || - lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn); + int i; + + if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) || + lgr->role != role) + return false; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state != SMC_LNK_ACTIVE) + continue; + if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && + !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && + !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac))) + return true; + } + return false; } static bool smcd_lgr_match(struct smc_link_group *lgr, @@ -906,15 +977,17 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) /* link group found */ ini->cln_first_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; - smc_lgr_register_conn(conn); /* add smc conn to lgr */ - if (delayed_work_pending(&lgr->free_work)) - cancel_delayed_work(&lgr->free_work); + rc = smc_lgr_register_conn(conn); /* add conn to lgr */ write_unlock_bh(&lgr->conns_lock); + if (!rc && delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); break; } write_unlock_bh(&lgr->conns_lock); } spin_unlock_bh(lgr_lock); + if (rc) + return rc; if (role == SMC_CLNT && !ini->srv_first_contact && ini->cln_first_contact == SMC_FIRST_CONTACT) { @@ -932,8 +1005,10 @@ create: goto out; lgr = conn->lgr; write_lock_bh(&lgr->conns_lock); - smc_lgr_register_conn(conn); /* add smc conn to lgr */ + rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */ write_unlock_bh(&lgr->conns_lock); + if (rc) + goto out; } conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; @@ -1006,12 +1081,55 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size) return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } +/* map an rmb buf to a link */ +static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, + struct smc_link *lnk) +{ + int rc; + + if (buf_desc->is_map_ib[lnk->link_idx]) + return 0; + + rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); + if (rc) + return rc; + sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, + buf_desc->cpu_addr, buf_desc->len); + + /* map sg table to DMA address */ + rc = smc_ib_buf_map_sg(lnk, buf_desc, + is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + /* SMC protocol depends on mapping to one DMA address only */ + if (rc != 1) { + rc = -EAGAIN; + goto free_table; + } + + /* create a new memory region for the RMB */ + if (is_rmb) { + rc = smc_ib_get_memory_region(lnk->roce_pd, + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_LOCAL_WRITE, + buf_desc, lnk->link_idx); + if (rc) + goto buf_unmap; + smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE); + } + buf_desc->is_map_ib[lnk->link_idx] = true; + return 0; + +buf_unmap: + smc_ib_buf_unmap_sg(lnk, buf_desc, + is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); +free_table: + sg_free_table(&buf_desc->sgt[lnk->link_idx]); + return rc; +} + static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, bool is_rmb, int bufsize) { struct smc_buf_desc *buf_desc; - struct smc_link *lnk; - int rc; /* try to alloc a new buffer */ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); @@ -1028,42 +1146,34 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, return ERR_PTR(-EAGAIN); } buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); - - /* build the sg table from the pages */ - lnk = &lgr->lnk[SMC_SINGLE_LINK]; - rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL); - if (rc) { - smc_buf_free(lgr, is_rmb, buf_desc); - return ERR_PTR(rc); - } - sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl, - buf_desc->cpu_addr, bufsize); - - /* map sg table to DMA address */ - rc = smc_ib_buf_map_sg(lnk, buf_desc, - is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); - /* SMC protocol depends on mapping to one DMA address only */ - if (rc != 1) { - smc_buf_free(lgr, is_rmb, buf_desc); - return ERR_PTR(-EAGAIN); - } - - /* create a new memory region for the RMB */ - if (is_rmb) { - rc = smc_ib_get_memory_region(lnk->roce_pd, - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE, - buf_desc, lnk->link_idx); - if (rc) { - smc_buf_free(lgr, is_rmb, buf_desc); - return ERR_PTR(rc); - } - } - buf_desc->len = bufsize; return buf_desc; } +/* map buf_desc on all usable links, + * unused buffers stay mapped as long as the link is up + */ +static int smcr_buf_map_usable_links(struct smc_link_group *lgr, + struct smc_buf_desc *buf_desc, bool is_rmb) +{ + int i, rc = 0; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &lgr->lnk[i]; + + if (lnk->state != SMC_LNK_ACTIVE && + lnk->state != SMC_LNK_ACTIVATING) + continue; + if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { + smcr_buf_unuse(buf_desc, lnk); + rc = -ENOMEM; + goto out; + } + } +out: + return rc; +} + #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, @@ -1159,6 +1269,12 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (IS_ERR(buf_desc)) return -ENOMEM; + if (!is_smcd) { + if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { + return -ENOMEM; + } + } + if (is_rmb) { conn->rmb_desc = buf_desc; conn->rmbe_size_short = bufsize_short; @@ -1192,22 +1308,32 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; + int i; if (!conn->lgr || conn->lgr->is_smcd) return; - smc_ib_sync_sg_for_cpu(&lgr->lnk[SMC_SINGLE_LINK], - conn->rmb_desc, DMA_FROM_DEVICE); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE && + conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING) + continue; + smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, + DMA_FROM_DEVICE); + } } void smc_rmb_sync_sg_for_device(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; + int i; if (!conn->lgr || conn->lgr->is_smcd) return; - smc_ib_sync_sg_for_device(&lgr->lnk[SMC_SINGLE_LINK], - conn->rmb_desc, DMA_FROM_DEVICE); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE && + conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING) + continue; + smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, + DMA_FROM_DEVICE); + } } /* create the send and receive buffer for an SMC socket; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c71c35a3596c..66753ba23bc6 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -152,25 +152,32 @@ struct smc_buf_desc { struct page *pages; int len; /* length of buffer */ u32 used; /* currently used / unused */ - u8 wr_reg : 1; /* mem region registered */ - u8 regerr : 1; /* err during registration */ union { struct { /* SMC-R */ - struct sg_table sgt[SMC_LINKS_PER_LGR_MAX]; - /* virtual buffer */ - struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; - /* for rmb only: memory region - * incl. rkey provided to peer - */ - u32 order; /* allocation order */ + struct sg_table sgt[SMC_LINKS_PER_LGR_MAX]; + /* virtual buffer */ + struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + /* for rmb only: memory region + * incl. rkey provided to peer + */ + u32 order; /* allocation order */ + + u8 is_conf_rkey; + /* confirm_rkey done */ + u8 is_reg_mr[SMC_LINKS_PER_LGR_MAX]; + /* mem region registered */ + u8 is_map_ib[SMC_LINKS_PER_LGR_MAX]; + /* mem region mapped to lnk */ + u8 is_reg_err; + /* buffer registration err */ }; struct { /* SMC-D */ - unsigned short sba_idx; - /* SBA index number */ - u64 token; - /* DMB token number */ - dma_addr_t dma_addr; - /* DMA address */ + unsigned short sba_idx; + /* SBA index number */ + u64 token; + /* DMB token number */ + dma_addr_t dma_addr; + /* DMA address */ }; }; }; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 34d0752ba6af..903ae068da3a 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -662,6 +662,8 @@ void smc_llc_link_deleting(struct smc_link *link) /* called in tasklet context */ void smc_llc_link_inactive(struct smc_link *link) { + if (link->state == SMC_LNK_INACTIVE) + return; link->state = SMC_LNK_INACTIVE; cancel_delayed_work(&link->llc_testlink_wrk); smc_wr_wakeup_reg_wait(link); From e07d31dc16b0d77ff6b3f71cafe3a825fb80bed4 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:42 +0200 Subject: [PATCH 06/13] net/smc: multi-link support for smc_rmb_rtoken_handling() Extend smc_rmb_rtoken_handling() and smc_rtoken_delete() to support multiple links. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 ++-- net/smc/smc_core.c | 14 ++++++++------ net/smc/smc_core.h | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 890dc6422f8c..e39f6aedd3bd 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -640,7 +640,7 @@ static int smc_connect_rdma(struct smc_sock *smc, if (ini->cln_first_contact == SMC_FIRST_CONTACT) smc_link_save_peer_info(link, aclc); - if (smc_rmb_rtoken_handling(&smc->conn, aclc)) + if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, ini->cln_first_contact); @@ -1231,7 +1231,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc, if (local_contact == SMC_FIRST_CONTACT) smc_link_save_peer_info(link, cclc); - if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { + if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) { reason_code = SMC_CLC_DECL_ERR_RTOK; goto decline; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 5df3f8f41d19..e8897d60b27f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1392,19 +1392,20 @@ int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) return i; } -/* delete an rtoken */ +/* delete an rtoken from all links */ int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) { struct smc_link_group *lgr = smc_get_lgr(lnk); u32 rkey = ntohl(nw_rkey); - int i; + int i, j; for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && test_bit(i, lgr->rtokens_used_mask)) { - lgr->rtokens[i][lnk->link_idx].rkey = 0; - lgr->rtokens[i][lnk->link_idx].dma_addr = 0; - + for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { + lgr->rtokens[i][j].rkey = 0; + lgr->rtokens[i][j].dma_addr = 0; + } clear_bit(i, lgr->rtokens_used_mask); return 0; } @@ -1414,9 +1415,10 @@ int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) /* save rkey and dma_addr received from peer during clc handshake */ int smc_rmb_rtoken_handling(struct smc_connection *conn, + struct smc_link *lnk, struct smc_clc_msg_accept_confirm *clc) { - conn->rtoken_idx = smc_rtoken_add(conn->lnk, clc->rmb_dma_addr, + conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr, clc->rmb_rkey); if (conn->rtoken_idx < 0) return conn->rtoken_idx; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 66753ba23bc6..f68ba187ecf8 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -309,7 +309,7 @@ void smc_smcd_terminate_all(struct smcd_dev *dev); void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); int smc_buf_create(struct smc_sock *smc, bool is_smcd); int smc_uncompress_bufsize(u8 compressed); -int smc_rmb_rtoken_handling(struct smc_connection *conn, +int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link, struct smc_clc_msg_accept_confirm *clc); int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey); int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey); From d854fcbfaeda9748c85de296fbe07b7763a1939c Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:43 +0200 Subject: [PATCH 07/13] net/smc: add new link state and related helpers Before a link can be reused it must have been cleared. Lowest current link state is INACTIVE, which does not mean that the link is already cleared. Add a new state UNUSED that is set when the link is cleared and can be reused. Add helper smc_llc_usable_link() to find an active link in a link group, and smc_link_usable() to determine if a link is usable. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 36 +++++++++++++++++++----------------- net/smc/smc_core.h | 9 +++++++++ net/smc/smc_llc.c | 4 ++-- net/smc/smc_llc.h | 11 +++++++++++ net/smc/smc_wr.c | 2 +- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index e8897d60b27f..57890cbd4e8a 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -260,7 +260,7 @@ static void smc_lgr_free_work(struct work_struct *work) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (lnk->state != SMC_LNK_INACTIVE) + if (smc_link_usable(lnk)) smc_llc_link_inactive(lnk); } } @@ -286,7 +286,7 @@ static u8 smcr_next_link_id(struct smc_link_group *lgr) if (!link_id) /* skip zero as link_id */ link_id = ++lgr->next_link_id; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_INACTIVE && + if (smc_link_usable(&lgr->lnk[i]) && lgr->lnk[i].link_id == link_id) continue; } @@ -350,6 +350,7 @@ clear_llc_lnk: out: put_device(&ini->ib_dev->ibdev->dev); memset(lnk, 0, sizeof(struct smc_link)); + lnk->state = SMC_LNK_UNUSED; if (!atomic_dec_return(&ini->ib_dev->lnk_cnt)) wake_up(&ini->ib_dev->lnks_deleted); return rc; @@ -500,6 +501,8 @@ void smc_conn_free(struct smc_connection *conn) static void smcr_link_clear(struct smc_link *lnk) { + struct smc_ib_device *smcibdev; + if (lnk->peer_qpn == 0) return; lnk->peer_qpn = 0; @@ -510,8 +513,11 @@ static void smcr_link_clear(struct smc_link *lnk) smc_ib_dealloc_protection_domain(lnk); smc_wr_free_link_mem(lnk); put_device(&lnk->smcibdev->ibdev->dev); - if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt)) - wake_up(&lnk->smcibdev->lnks_deleted); + smcibdev = lnk->smcibdev; + memset(lnk, 0, sizeof(struct smc_link)); + lnk->state = SMC_LNK_UNUSED; + if (!atomic_dec_return(&smcibdev->lnk_cnt)) + wake_up(&smcibdev->lnks_deleted); } static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, @@ -604,9 +610,8 @@ static void smc_lgr_free(struct smc_link_group *lgr) wake_up(&lgr->smcd->lgrs_deleted); } else { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state == SMC_LNK_INACTIVE) - continue; - smcr_link_clear(&lgr->lnk[i]); + if (lgr->lnk[i].state != SMC_LNK_UNUSED) + smcr_link_clear(&lgr->lnk[i]); } if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); @@ -686,7 +691,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (lnk->state != SMC_LNK_INACTIVE) + if (smc_link_usable(lnk)) smc_llc_link_inactive(lnk); } } @@ -764,7 +769,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) continue; /* tbd - terminate only when no more links are active */ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state == SMC_LNK_INACTIVE || + if (!smc_link_usable(&lgr->lnk[i]) || lgr->lnk[i].state == SMC_LNK_DELETING) continue; if (lgr->lnk[i].smcibdev == smcibdev && @@ -1161,8 +1166,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (lnk->state != SMC_LNK_ACTIVE && - lnk->state != SMC_LNK_ACTIVATING) + if (!smc_link_usable(lnk)) continue; if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { smcr_buf_unuse(buf_desc, lnk); @@ -1294,14 +1298,14 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) return; smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) return; smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -1313,8 +1317,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE && - conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING) + if (!smc_link_usable(&conn->lgr->lnk[i])) continue; smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, DMA_FROM_DEVICE); @@ -1328,8 +1331,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE && - conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING) + if (!smc_link_usable(&conn->lgr->lnk[i])) continue; smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, DMA_FROM_DEVICE); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index f68ba187ecf8..2b1960c8c8ce 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -32,6 +32,7 @@ enum smc_lgr_role { /* possible roles of a link group */ }; enum smc_link_state { /* possible states of a link */ + SMC_LNK_UNUSED, /* link is unused */ SMC_LNK_INACTIVE, /* link is inactive */ SMC_LNK_ACTIVATING, /* link is being activated */ SMC_LNK_ACTIVE, /* link is active */ @@ -295,6 +296,14 @@ static inline struct smc_connection *smc_lgr_find_conn( return res; } +/* returns true if the specified link is usable */ +static inline bool smc_link_usable(struct smc_link *lnk) +{ + if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE) + return false; + return true; +} + struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 903ae068da3a..c267f5006faa 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -372,7 +372,7 @@ static void smc_llc_send_message_work(struct work_struct *work) struct smc_wr_buf *wr_buf; int rc; - if (llcwrk->link->state == SMC_LNK_INACTIVE) + if (!smc_link_usable(llcwrk->link)) goto out; rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend); if (rc) @@ -562,7 +562,7 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) return; /* short message */ if (llc->raw.hdr.length != sizeof(*llc)) return; /* invalid message */ - if (link->state == SMC_LNK_INACTIVE) + if (!smc_link_usable(link)) return; /* link not active, drop msg */ switch (llc->raw.hdr.common.type) { diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 461c0c3ef76e..08171131110c 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -35,6 +35,17 @@ enum smc_llc_msg_type { SMC_LLC_DELETE_RKEY = 0x09, }; +/* returns a usable link of the link group, or NULL */ +static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr) +{ + int i; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) + if (smc_link_usable(&lgr->lnk[i])) + return &lgr->lnk[i]; + return NULL; +} + /* transmit */ int smc_llc_send_confirm_link(struct smc_link *lnk, enum smc_llc_reqresp reqresp); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 337ee52ad3d3..93223628c002 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -207,7 +207,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { rc = wait_event_interruptible_timeout( link->wr_tx_wait, - link->state == SMC_LNK_INACTIVE || + !smc_link_usable(link) || lgr->terminating || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); From 1020e1ef53ceef715f2bc144eebbfe01e88effcf Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:44 +0200 Subject: [PATCH 08/13] net/smc: move testlink work to system work queue The testlink work waits for a response to the testlink request and blocks the single threaded llc_wq. This type of work does not have to be serialized and can be moved to the system work queue. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index c267f5006faa..69cc0d65b437 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -613,14 +613,15 @@ static void smc_llc_testlink_work(struct work_struct *work) /* receive TEST LINK response over RoCE fabric */ rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); + if (link->state != SMC_LNK_ACTIVE) + return; /* link state changed */ if (rc <= 0) { smc_lgr_terminate_sched(smc_get_lgr(link)); return; } next_interval = link->llc_testlink_time; out: - queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk, - next_interval); + schedule_delayed_work(&link->llc_testlink_wrk, next_interval); } int smc_llc_link_init(struct smc_link *link) @@ -648,8 +649,8 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time) link->state = SMC_LNK_ACTIVE; if (testlink_time) { link->llc_testlink_time = testlink_time * HZ; - queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk, - link->llc_testlink_time); + schedule_delayed_work(&link->llc_testlink_wrk, + link->llc_testlink_time); } } @@ -665,7 +666,7 @@ void smc_llc_link_inactive(struct smc_link *link) if (link->state == SMC_LNK_INACTIVE) return; link->state = SMC_LNK_INACTIVE; - cancel_delayed_work(&link->llc_testlink_wrk); + cancel_delayed_work_sync(&link->llc_testlink_wrk); smc_wr_wakeup_reg_wait(link); smc_wr_wakeup_tx_wait(link); } From 2140ac26f8f501d3cc8f1575e6419f1a50779496 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:45 +0200 Subject: [PATCH 09/13] net/smc: simplify link deactivation Cancel the testlink worker during link clear processing and remove the extra function smc_llc_link_inactive(). Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 8 ++------ net/smc/smc_llc.c | 15 ++++----------- net/smc/smc_llc.h | 1 - 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 57890cbd4e8a..78ccfbf6e4af 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -261,7 +261,7 @@ static void smc_lgr_free_work(struct work_struct *work) struct smc_link *lnk = &lgr->lnk[i]; if (smc_link_usable(lnk)) - smc_llc_link_inactive(lnk); + lnk->state = SMC_LNK_INACTIVE; } } smc_lgr_free(lgr); @@ -692,7 +692,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) struct smc_link *lnk = &lgr->lnk[i]; if (smc_link_usable(lnk)) - smc_llc_link_inactive(lnk); + lnk->state = SMC_LNK_INACTIVE; } } } @@ -706,16 +706,12 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) struct smc_connection *conn; struct smc_sock *smc; struct rb_node *node; - int i; if (lgr->terminating) return; /* lgr already terminating */ if (!soft) cancel_delayed_work_sync(&lgr->free_work); lgr->terminating = 1; - if (!lgr->is_smcd) - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) - smc_llc_link_inactive(&lgr->lnk[i]); /* kill remaining link group connections */ read_lock_bh(&lgr->conns_lock); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 69cc0d65b437..2f03131c85fd 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -660,22 +660,15 @@ void smc_llc_link_deleting(struct smc_link *link) smc_wr_wakeup_tx_wait(link); } -/* called in tasklet context */ -void smc_llc_link_inactive(struct smc_link *link) -{ - if (link->state == SMC_LNK_INACTIVE) - return; - link->state = SMC_LNK_INACTIVE; - cancel_delayed_work_sync(&link->llc_testlink_wrk); - smc_wr_wakeup_reg_wait(link); - smc_wr_wakeup_tx_wait(link); -} - /* called in worker context */ void smc_llc_link_clear(struct smc_link *link) { flush_workqueue(link->llc_wq); destroy_workqueue(link->llc_wq); + complete(&link->llc_testlink_resp); + cancel_delayed_work_sync(&link->llc_testlink_wrk); + smc_wr_wakeup_reg_wait(link); + smc_wr_wakeup_tx_wait(link); } /* register a new rtoken at the remote peer */ diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 08171131110c..c2c9d48d079f 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -56,7 +56,6 @@ int smc_llc_send_delete_link(struct smc_link *link, int smc_llc_link_init(struct smc_link *link); void smc_llc_link_active(struct smc_link *link, int testlink_time); void smc_llc_link_deleting(struct smc_link *link); -void smc_llc_link_inactive(struct smc_link *link); void smc_llc_link_clear(struct smc_link *link); int smc_llc_do_confirm_rkey(struct smc_link *link, struct smc_buf_desc *rmb_desc); From 6c8968c421e0e6bea8a78ee4fdd043d850cd5b26 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:46 +0200 Subject: [PATCH 10/13] net/smc: use worker to process incoming llc messages Incoming llc messages are processed in irq tasklet context, and a worker is used to send outgoing messages. The worker is needed because getting a send buffer could result in a wait for a free buffer. To make sure all incoming llc messages are processed in a serialized way introduce an event queue and create a new queue entry for each message which is queued to this event queue. A new worker processes the event queue entries in order. And remove the use of a separate worker to send outgoing llc messages because the messages are processed in worker context already. With this event queue the serialized llc_wq work queue is obsolete, remove it. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 4 +- net/smc/smc_core.h | 7 ++- net/smc/smc_llc.c | 142 +++++++++++++++++++++++++++------------------ net/smc/smc_llc.h | 1 + 4 files changed, 96 insertions(+), 58 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 78ccfbf6e4af..a1463da14614 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -412,7 +412,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, SMC_SYSTEMID_LEN); - + INIT_LIST_HEAD(&lgr->llc_event_q); + spin_lock_init(&lgr->llc_event_q_lock); link_idx = SMC_SINGLE_LINK; lnk = &lgr->lnk[link_idx]; rc = smcr_link_init(lgr, lnk, link_idx, ini); @@ -613,6 +614,7 @@ static void smc_lgr_free(struct smc_link_group *lgr) if (lgr->lnk[i].state != SMC_LNK_UNUSED) smcr_link_clear(&lgr->lnk[i]); } + smc_llc_event_flush(lgr); if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 2b1960c8c8ce..6548e9a06f73 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -120,7 +120,6 @@ struct smc_link { struct smc_link_group *lgr; /* parent link group */ enum smc_link_state state; /* state of link */ - struct workqueue_struct *llc_wq; /* single thread work queue */ struct completion llc_confirm; /* wait for rx of conf link */ struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */ int llc_confirm_rc; /* rc from confirm link msg */ @@ -233,6 +232,12 @@ struct smc_link_group { DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX); /* used rtoken elements */ u8 next_link_id; + struct list_head llc_event_q; + /* queue for llc events */ + spinlock_t llc_event_q_lock; + /* protects llc_event_q */ + struct work_struct llc_event_work; + /* llc event worker */ }; struct { /* SMC-D */ u64 peer_gid; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 2f03131c85fd..be74876a36ae 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -134,6 +134,12 @@ union smc_llc_msg { #define SMC_LLC_FLAG_RESP 0x80 +struct smc_llc_qentry { + struct list_head list; + struct smc_link *link; + union smc_llc_msg msg; +}; + /********************************** send *************************************/ struct smc_llc_tx_pend { @@ -356,46 +362,20 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) return rc; } -struct smc_llc_send_work { - struct work_struct work; - struct smc_link *link; - int llclen; - union smc_llc_msg llcbuf; -}; - -/* worker that sends a prepared message */ -static void smc_llc_send_message_work(struct work_struct *work) +/* schedule an llc send on link, may wait for buffers */ +static int smc_llc_send_message(struct smc_link *link, void *llcbuf) { - struct smc_llc_send_work *llcwrk = container_of(work, - struct smc_llc_send_work, work); struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; int rc; - if (!smc_link_usable(llcwrk->link)) - goto out; - rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend); + if (!smc_link_usable(link)) + return -ENOLINK; + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - goto out; - memcpy(wr_buf, &llcwrk->llcbuf, llcwrk->llclen); - smc_wr_tx_send(llcwrk->link, pend); -out: - kfree(llcwrk); -} - -/* copy llcbuf and schedule an llc send on link */ -static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen) -{ - struct smc_llc_send_work *wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC); - - if (!wrk) - return -ENOMEM; - INIT_WORK(&wrk->work, smc_llc_send_message_work); - wrk->link = link; - wrk->llclen = llclen; - memcpy(&wrk->llcbuf, llcbuf, llclen); - queue_work(link->llc_wq, &wrk->work); - return 0; + return rc; + memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); + return smc_wr_tx_send(link, pend); } /********************************* receive ***********************************/ @@ -452,7 +432,7 @@ static void smc_llc_rx_add_link(struct smc_link *link, link->smcibdev->mac[link->ibport - 1], link->gid, SMC_LLC_RESP); } - smc_llc_send_message(link, llc, sizeof(*llc)); + smc_llc_send_message(link, llc); } } @@ -474,7 +454,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link, /* server requests to delete this link, send response */ smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true); } - smc_llc_send_message(link, llc, sizeof(*llc)); + smc_llc_send_message(link, llc); smc_lgr_terminate_sched(lgr); } } @@ -487,7 +467,7 @@ static void smc_llc_rx_test_link(struct smc_link *link, complete(&link->llc_testlink_resp); } else { llc->hd.flags |= SMC_LLC_FLAG_RESP; - smc_llc_send_message(link, llc, sizeof(*llc)); + smc_llc_send_message(link, llc); } } @@ -510,7 +490,7 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link, llc->hd.flags |= SMC_LLC_FLAG_RESP; if (rc < 0) llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; - smc_llc_send_message(link, llc, sizeof(*llc)); + smc_llc_send_message(link, llc); } } @@ -522,7 +502,7 @@ static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link, } else { /* ignore rtokens for other links, we have only one link */ llc->hd.flags |= SMC_LLC_FLAG_RESP; - smc_llc_send_message(link, llc, sizeof(*llc)); + smc_llc_send_message(link, llc); } } @@ -549,21 +529,30 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link, } llc->hd.flags |= SMC_LLC_FLAG_RESP; - smc_llc_send_message(link, llc, sizeof(*llc)); + smc_llc_send_message(link, llc); } } -static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) +/* flush the llc event queue */ +void smc_llc_event_flush(struct smc_link_group *lgr) { - struct smc_link *link = (struct smc_link *)wc->qp->qp_context; - union smc_llc_msg *llc = buf; + struct smc_llc_qentry *qentry, *q; + + spin_lock_bh(&lgr->llc_event_q_lock); + list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) { + list_del_init(&qentry->list); + kfree(qentry); + } + spin_unlock_bh(&lgr->llc_event_q_lock); +} + +static void smc_llc_event_handler(struct smc_llc_qentry *qentry) +{ + union smc_llc_msg *llc = &qentry->msg; + struct smc_link *link = qentry->link; - if (wc->byte_len < sizeof(*llc)) - return; /* short message */ - if (llc->raw.hdr.length != sizeof(*llc)) - return; /* invalid message */ if (!smc_link_usable(link)) - return; /* link not active, drop msg */ + goto out; switch (llc->raw.hdr.common.type) { case SMC_LLC_TEST_LINK: @@ -588,6 +577,54 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) smc_llc_rx_delete_rkey(link, &llc->delete_rkey); break; } +out: + kfree(qentry); +} + +/* worker to process llc messages on the event queue */ +static void smc_llc_event_work(struct work_struct *work) +{ + struct smc_link_group *lgr = container_of(work, struct smc_link_group, + llc_event_work); + struct smc_llc_qentry *qentry; + +again: + spin_lock_bh(&lgr->llc_event_q_lock); + if (!list_empty(&lgr->llc_event_q)) { + qentry = list_first_entry(&lgr->llc_event_q, + struct smc_llc_qentry, list); + list_del_init(&qentry->list); + spin_unlock_bh(&lgr->llc_event_q_lock); + smc_llc_event_handler(qentry); + goto again; + } + spin_unlock_bh(&lgr->llc_event_q_lock); +} + +/* copy received msg and add it to the event queue */ +static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) +{ + struct smc_link *link = (struct smc_link *)wc->qp->qp_context; + struct smc_link_group *lgr = link->lgr; + struct smc_llc_qentry *qentry; + union smc_llc_msg *llc = buf; + unsigned long flags; + + if (wc->byte_len < sizeof(*llc)) + return; /* short message */ + if (llc->raw.hdr.length != sizeof(*llc)) + return; /* invalid message */ + + qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC); + if (!qentry) + return; + qentry->link = link; + INIT_LIST_HEAD(&qentry->list); + memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg)); + spin_lock_irqsave(&lgr->llc_event_q_lock, flags); + list_add_tail(&qentry->list, &lgr->llc_event_q); + spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags); + schedule_work(&link->lgr->llc_event_work); } /***************************** worker, utils *********************************/ @@ -626,12 +663,6 @@ out: int smc_llc_link_init(struct smc_link *link) { - struct smc_link_group *lgr = smc_get_lgr(link); - link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM, - *((u32 *)lgr->id), - link->link_id); - if (!link->llc_wq) - return -ENOMEM; init_completion(&link->llc_confirm); init_completion(&link->llc_confirm_resp); init_completion(&link->llc_add); @@ -640,6 +671,7 @@ int smc_llc_link_init(struct smc_link *link) init_completion(&link->llc_delete_rkey); mutex_init(&link->llc_delete_rkey_mutex); init_completion(&link->llc_testlink_resp); + INIT_WORK(&link->lgr->llc_event_work, smc_llc_event_work); INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work); return 0; } @@ -663,8 +695,6 @@ void smc_llc_link_deleting(struct smc_link *link) /* called in worker context */ void smc_llc_link_clear(struct smc_link *link) { - flush_workqueue(link->llc_wq); - destroy_workqueue(link->llc_wq); complete(&link->llc_testlink_resp); cancel_delayed_work_sync(&link->llc_testlink_wrk); smc_wr_wakeup_reg_wait(link); diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index c2c9d48d079f..9de83495ad14 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -61,6 +61,7 @@ int smc_llc_do_confirm_rkey(struct smc_link *link, struct smc_buf_desc *rmb_desc); int smc_llc_do_delete_rkey(struct smc_link *link, struct smc_buf_desc *rmb_desc); +void smc_llc_event_flush(struct smc_link_group *lgr); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ From ef79d439cd124d9fb7258bb35d44c71aec11b829 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:47 +0200 Subject: [PATCH 11/13] net/smc: process llc responses in tasklet context When llc responses are received then possible waiters for this response are to be notified. This can be done in tasklet context, without to use a work in the llc work queue. Move all code that handles llc responses into smc_llc_rx_response(). Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.h | 8 +- net/smc/smc_llc.c | 222 +++++++++++++++++++++++---------------------- 2 files changed, 119 insertions(+), 111 deletions(-) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 6548e9a06f73..d785656b3489 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -129,10 +129,10 @@ struct smc_link { struct delayed_work llc_testlink_wrk; /* testlink worker */ struct completion llc_testlink_resp; /* wait for rx of testlink */ int llc_testlink_time; /* testlink interval */ - struct completion llc_confirm_rkey; /* wait 4 rx of cnf rkey */ - int llc_confirm_rkey_rc; /* rc from cnf rkey msg */ - struct completion llc_delete_rkey; /* wait 4 rx of del rkey */ - int llc_delete_rkey_rc; /* rc from del rkey msg */ + struct completion llc_confirm_rkey_resp; /* w4 rx of cnf rkey */ + int llc_confirm_rkey_resp_rc; /* rc from cnf rkey */ + struct completion llc_delete_rkey_resp; /* w4 rx of del rkey */ + int llc_delete_rkey_resp_rc; /* rc from del rkey */ struct mutex llc_delete_rkey_mutex; /* serialize usage */ }; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index be74876a36ae..265889c8b03b 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -384,27 +384,17 @@ static void smc_llc_rx_confirm_link(struct smc_link *link, struct smc_llc_msg_confirm_link *llc) { struct smc_link_group *lgr = smc_get_lgr(link); - int conf_rc; + int conf_rc = 0; /* RMBE eyecatchers are not supported */ - if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC) - conf_rc = 0; - else + if (!(llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)) conf_rc = ENOTSUPP; - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - if (lgr->role == SMC_SERV && - link->state == SMC_LNK_ACTIVATING) { - link->llc_confirm_resp_rc = conf_rc; - complete(&link->llc_confirm_resp); - } - } else { - if (lgr->role == SMC_CLNT && - link->state == SMC_LNK_ACTIVATING) { - link->llc_confirm_rc = conf_rc; - link->link_id = llc->link_num; - complete(&link->llc_confirm); - } + if (lgr->role == SMC_CLNT && + link->state == SMC_LNK_ACTIVATING) { + link->llc_confirm_rc = conf_rc; + link->link_id = llc->link_num; + complete(&link->llc_confirm); } } @@ -413,27 +403,22 @@ static void smc_llc_rx_add_link(struct smc_link *link, { struct smc_link_group *lgr = smc_get_lgr(link); - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - if (link->state == SMC_LNK_ACTIVATING) - complete(&link->llc_add_resp); - } else { - if (link->state == SMC_LNK_ACTIVATING) { - complete(&link->llc_add); - return; - } - - if (lgr->role == SMC_SERV) { - smc_llc_prep_add_link(llc, link, - link->smcibdev->mac[link->ibport - 1], - link->gid, SMC_LLC_REQ); - - } else { - smc_llc_prep_add_link(llc, link, - link->smcibdev->mac[link->ibport - 1], - link->gid, SMC_LLC_RESP); - } - smc_llc_send_message(link, llc); + if (link->state == SMC_LNK_ACTIVATING) { + complete(&link->llc_add); + return; } + + if (lgr->role == SMC_SERV) { + smc_llc_prep_add_link(llc, link, + link->smcibdev->mac[link->ibport - 1], + link->gid, SMC_LLC_REQ); + + } else { + smc_llc_prep_add_link(llc, link, + link->smcibdev->mac[link->ibport - 1], + link->gid, SMC_LLC_RESP); + } + smc_llc_send_message(link, llc); } static void smc_llc_rx_delete_link(struct smc_link *link, @@ -441,34 +426,24 @@ static void smc_llc_rx_delete_link(struct smc_link *link, { struct smc_link_group *lgr = smc_get_lgr(link); - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - if (lgr->role == SMC_SERV) - smc_lgr_schedule_free_work_fast(lgr); + smc_lgr_forget(lgr); + smc_llc_link_deleting(link); + if (lgr->role == SMC_SERV) { + /* client asks to delete this link, send request */ + smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true); } else { - smc_lgr_forget(lgr); - smc_llc_link_deleting(link); - if (lgr->role == SMC_SERV) { - /* client asks to delete this link, send request */ - smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true); - } else { - /* server requests to delete this link, send response */ - smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true); - } - smc_llc_send_message(link, llc); - smc_lgr_terminate_sched(lgr); + /* server requests to delete this link, send response */ + smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true); } + smc_llc_send_message(link, llc); + smc_lgr_terminate_sched(lgr); } static void smc_llc_rx_test_link(struct smc_link *link, struct smc_llc_msg_test_link *llc) { - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - if (link->state == SMC_LNK_ACTIVE) - complete(&link->llc_testlink_resp); - } else { - llc->hd.flags |= SMC_LLC_FLAG_RESP; - smc_llc_send_message(link, llc); - } + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, llc); } static void smc_llc_rx_confirm_rkey(struct smc_link *link, @@ -476,34 +451,24 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link, { int rc; - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - link->llc_confirm_rkey_rc = llc->hd.flags & - SMC_LLC_FLAG_RKEY_NEG; - complete(&link->llc_confirm_rkey); - } else { - rc = smc_rtoken_add(link, - llc->rtoken[0].rmb_vaddr, - llc->rtoken[0].rmb_key); + rc = smc_rtoken_add(link, + llc->rtoken[0].rmb_vaddr, + llc->rtoken[0].rmb_key); - /* ignore rtokens for other links, we have only one link */ + /* ignore rtokens for other links, we have only one link */ - llc->hd.flags |= SMC_LLC_FLAG_RESP; - if (rc < 0) - llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; - smc_llc_send_message(link, llc); - } + llc->hd.flags |= SMC_LLC_FLAG_RESP; + if (rc < 0) + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + smc_llc_send_message(link, llc); } static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link, struct smc_llc_msg_confirm_rkey_cont *llc) { - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - /* unused as long as we don't send this type of msg */ - } else { - /* ignore rtokens for other links, we have only one link */ - llc->hd.flags |= SMC_LLC_FLAG_RESP; - smc_llc_send_message(link, llc); - } + /* ignore rtokens for other links, we have only one link */ + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, llc); } static void smc_llc_rx_delete_rkey(struct smc_link *link, @@ -512,25 +477,19 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link, u8 err_mask = 0; int i, max; - if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - link->llc_delete_rkey_rc = llc->hd.flags & - SMC_LLC_FLAG_RKEY_NEG; - complete(&link->llc_delete_rkey); - } else { - max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); - for (i = 0; i < max; i++) { - if (smc_rtoken_delete(link, llc->rkey[i])) - err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); - } - - if (err_mask) { - llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; - llc->err_mask = err_mask; - } - - llc->hd.flags |= SMC_LLC_FLAG_RESP; - smc_llc_send_message(link, llc); + max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); + for (i = 0; i < max; i++) { + if (smc_rtoken_delete(link, llc->rkey[i])) + err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); } + + if (err_mask) { + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + llc->err_mask = err_mask; + } + + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, llc); } /* flush the llc event queue */ @@ -601,6 +560,49 @@ again: spin_unlock_bh(&lgr->llc_event_q_lock); } +/* process llc responses in tasklet context */ +static void smc_llc_rx_response(struct smc_link *link, union smc_llc_msg *llc) +{ + int rc = 0; + + switch (llc->raw.hdr.common.type) { + case SMC_LLC_TEST_LINK: + if (link->state == SMC_LNK_ACTIVE) + complete(&link->llc_testlink_resp); + break; + case SMC_LLC_CONFIRM_LINK: + if (!(llc->raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)) + rc = ENOTSUPP; + if (link->lgr->role == SMC_SERV && + link->state == SMC_LNK_ACTIVATING) { + link->llc_confirm_resp_rc = rc; + complete(&link->llc_confirm_resp); + } + break; + case SMC_LLC_ADD_LINK: + if (link->state == SMC_LNK_ACTIVATING) + complete(&link->llc_add_resp); + break; + case SMC_LLC_DELETE_LINK: + if (link->lgr->role == SMC_SERV) + smc_lgr_schedule_free_work_fast(link->lgr); + break; + case SMC_LLC_CONFIRM_RKEY: + link->llc_confirm_rkey_resp_rc = llc->raw.hdr.flags & + SMC_LLC_FLAG_RKEY_NEG; + complete(&link->llc_confirm_rkey_resp); + break; + case SMC_LLC_CONFIRM_RKEY_CONT: + /* unused as long as we don't send this type of msg */ + break; + case SMC_LLC_DELETE_RKEY: + link->llc_delete_rkey_resp_rc = llc->raw.hdr.flags & + SMC_LLC_FLAG_RKEY_NEG; + complete(&link->llc_delete_rkey_resp); + break; + } +} + /* copy received msg and add it to the event queue */ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) { @@ -615,6 +617,12 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) if (llc->raw.hdr.length != sizeof(*llc)) return; /* invalid message */ + /* process responses immediately */ + if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) { + smc_llc_rx_response(link, llc); + return; + } + qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC); if (!qentry) return; @@ -667,8 +675,8 @@ int smc_llc_link_init(struct smc_link *link) init_completion(&link->llc_confirm_resp); init_completion(&link->llc_add); init_completion(&link->llc_add_resp); - init_completion(&link->llc_confirm_rkey); - init_completion(&link->llc_delete_rkey); + init_completion(&link->llc_confirm_rkey_resp); + init_completion(&link->llc_delete_rkey_resp); mutex_init(&link->llc_delete_rkey_mutex); init_completion(&link->llc_testlink_resp); INIT_WORK(&link->lgr->llc_event_work, smc_llc_event_work); @@ -708,14 +716,14 @@ int smc_llc_do_confirm_rkey(struct smc_link *link, int rc; /* protected by mutex smc_create_lgr_pending */ - reinit_completion(&link->llc_confirm_rkey); + reinit_completion(&link->llc_confirm_rkey_resp); rc = smc_llc_send_confirm_rkey(link, rmb_desc); if (rc) return rc; /* receive CONFIRM RKEY response from server over RoCE fabric */ - rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey, - SMC_LLC_WAIT_TIME); - if (rc <= 0 || link->llc_confirm_rkey_rc) + rc = wait_for_completion_interruptible_timeout( + &link->llc_confirm_rkey_resp, SMC_LLC_WAIT_TIME); + if (rc <= 0 || link->llc_confirm_rkey_resp_rc) return -EFAULT; return 0; } @@ -729,14 +737,14 @@ int smc_llc_do_delete_rkey(struct smc_link *link, mutex_lock(&link->llc_delete_rkey_mutex); if (link->state != SMC_LNK_ACTIVE) goto out; - reinit_completion(&link->llc_delete_rkey); + reinit_completion(&link->llc_delete_rkey_resp); rc = smc_llc_send_delete_rkey(link, rmb_desc); if (rc) goto out; /* receive DELETE RKEY response from server over RoCE fabric */ - rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey, - SMC_LLC_WAIT_TIME); - if (rc <= 0 || link->llc_delete_rkey_rc) + rc = wait_for_completion_interruptible_timeout( + &link->llc_delete_rkey_resp, SMC_LLC_WAIT_TIME); + if (rc <= 0 || link->llc_delete_rkey_resp_rc) rc = -EFAULT; else rc = 0; From faca536008375bece23783e7382b5d0356c13ba5 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:48 +0200 Subject: [PATCH 12/13] net/smc: use mutex instead of rwlock_t to protect buffers The locks for sndbufs and rmbs are never used from atomic context. Using a mutex for these locks will allow to nest locks with other mutexes. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 22 +++++++++++----------- net/smc/smc_core.h | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index a1463da14614..8a43d2948493 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -385,8 +385,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->freefast = 0; lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; - rwlock_init(&lgr->sndbufs_lock); - rwlock_init(&lgr->rmbs_lock); + mutex_init(&lgr->sndbufs_lock); + mutex_init(&lgr->rmbs_lock); rwlock_init(&lgr->conns_lock); for (i = 0; i < SMC_RMBE_SIZES; i++) { INIT_LIST_HEAD(&lgr->sndbufs[i]); @@ -456,9 +456,9 @@ static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, } if (rmb_desc->is_reg_err) { /* buf registration failed, reuse not possible */ - write_lock_bh(&lgr->rmbs_lock); + mutex_lock(&lgr->rmbs_lock); list_del(&rmb_desc->list); - write_unlock_bh(&lgr->rmbs_lock); + mutex_unlock(&lgr->rmbs_lock); smc_buf_free(lgr, true, rmb_desc); } else { @@ -1059,19 +1059,19 @@ int smc_uncompress_bufsize(u8 compressed) * buffer size; if not available, return NULL */ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, - rwlock_t *lock, + struct mutex *lock, struct list_head *buf_list) { struct smc_buf_desc *buf_slot; - read_lock_bh(lock); + mutex_lock(lock); list_for_each_entry(buf_slot, buf_list, list) { if (cmpxchg(&buf_slot->used, 0, 1) == 0) { - read_unlock_bh(lock); + mutex_unlock(lock); return buf_slot; } } - read_unlock_bh(lock); + mutex_unlock(lock); return NULL; } @@ -1220,8 +1220,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) struct smc_link_group *lgr = conn->lgr; struct list_head *buf_list; int bufsize, bufsize_short; + struct mutex *lock; /* lock buffer list */ int sk_buf_size; - rwlock_t *lock; if (is_rmb) /* use socket recv buffer size (w/o overhead) as start value */ @@ -1262,9 +1262,9 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) continue; buf_desc->used = 1; - write_lock_bh(lock); + mutex_lock(lock); list_add(&buf_desc->list, buf_list); - write_unlock_bh(lock); + mutex_unlock(lock); break; /* found */ } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index d785656b3489..379ced490c49 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -205,9 +205,9 @@ struct smc_link_group { unsigned short vlan_id; /* vlan id of link group */ struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */ - rwlock_t sndbufs_lock; /* protects tx buffers */ + struct mutex sndbufs_lock; /* protects tx buffers */ struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ - rwlock_t rmbs_lock; /* protects rx buffers */ + struct mutex rmbs_lock; /* protects rx buffers */ u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ struct delayed_work free_work; /* delayed freeing of an lgr */ From 00a049cfde95931c6832edad19d9a4be441cacf5 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Wed, 29 Apr 2020 17:10:49 +0200 Subject: [PATCH 13/13] net/smc: move llc layer related init and clear into smc_llc.c Introduce smc_llc_lgr_init() and smc_llc_lgr_clear() to implement all llc layer specific initialization and cleanup in module smc_llc.c. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 6 ++---- net/smc/smc_core.c | 6 +++--- net/smc/smc_core.h | 2 ++ net/smc/smc_llc.c | 26 +++++++++++++++++++++----- net/smc/smc_llc.h | 5 +++-- 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e39f6aedd3bd..e859e3f420d9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -381,7 +381,6 @@ static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr, static int smcr_clnt_conf_first_link(struct smc_sock *smc) { - struct net *net = sock_net(smc->clcsock->sk); struct smc_link *link = smc->conn.lnk; int rest; int rc; @@ -433,7 +432,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) if (rc < 0) return SMC_CLC_DECL_TIMEOUT_AL; - smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); + smc_llc_link_active(link); return 0; } @@ -1019,7 +1018,6 @@ void smc_close_non_accepted(struct sock *sk) static int smcr_serv_conf_first_link(struct smc_sock *smc) { - struct net *net = sock_net(smc->clcsock->sk); struct smc_link *link = smc->conn.lnk; int rest; int rc; @@ -1065,7 +1063,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; } - smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); + smc_llc_link_active(link); return 0; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8a43d2948493..db49f8cd5c95 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -412,8 +412,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, SMC_SYSTEMID_LEN); - INIT_LIST_HEAD(&lgr->llc_event_q); - spin_lock_init(&lgr->llc_event_q_lock); + smc_llc_lgr_init(lgr, smc); + link_idx = SMC_SINGLE_LINK; lnk = &lgr->lnk[link_idx]; rc = smcr_link_init(lgr, lnk, link_idx, ini); @@ -614,7 +614,7 @@ static void smc_lgr_free(struct smc_link_group *lgr) if (lgr->lnk[i].state != SMC_LNK_UNUSED) smcr_link_clear(&lgr->lnk[i]); } - smc_llc_event_flush(lgr); + smc_llc_lgr_clear(lgr); if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 379ced490c49..b5781511063d 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -238,6 +238,8 @@ struct smc_link_group { /* protects llc_event_q */ struct work_struct llc_event_work; /* llc event worker */ + int llc_testlink_time; + /* link keep alive time */ }; struct { /* SMC-D */ u64 peer_gid; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 265889c8b03b..e715dd6735ee 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -493,7 +493,7 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link, } /* flush the llc event queue */ -void smc_llc_event_flush(struct smc_link_group *lgr) +static void smc_llc_event_flush(struct smc_link_group *lgr) { struct smc_llc_qentry *qentry, *q; @@ -669,6 +669,23 @@ out: schedule_delayed_work(&link->llc_testlink_wrk, next_interval); } +void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) +{ + struct net *net = sock_net(smc->clcsock->sk); + + INIT_WORK(&lgr->llc_event_work, smc_llc_event_work); + INIT_LIST_HEAD(&lgr->llc_event_q); + spin_lock_init(&lgr->llc_event_q_lock); + lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time; +} + +/* called after lgr was removed from lgr_list */ +void smc_llc_lgr_clear(struct smc_link_group *lgr) +{ + smc_llc_event_flush(lgr); + cancel_work_sync(&lgr->llc_event_work); +} + int smc_llc_link_init(struct smc_link *link) { init_completion(&link->llc_confirm); @@ -679,16 +696,15 @@ int smc_llc_link_init(struct smc_link *link) init_completion(&link->llc_delete_rkey_resp); mutex_init(&link->llc_delete_rkey_mutex); init_completion(&link->llc_testlink_resp); - INIT_WORK(&link->lgr->llc_event_work, smc_llc_event_work); INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work); return 0; } -void smc_llc_link_active(struct smc_link *link, int testlink_time) +void smc_llc_link_active(struct smc_link *link) { link->state = SMC_LNK_ACTIVE; - if (testlink_time) { - link->llc_testlink_time = testlink_time * HZ; + if (link->lgr->llc_testlink_time) { + link->llc_testlink_time = link->lgr->llc_testlink_time * HZ; schedule_delayed_work(&link->llc_testlink_wrk, link->llc_testlink_time); } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 9de83495ad14..66063f22166b 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -53,15 +53,16 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], enum smc_llc_reqresp reqresp); int smc_llc_send_delete_link(struct smc_link *link, enum smc_llc_reqresp reqresp, bool orderly); +void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc); +void smc_llc_lgr_clear(struct smc_link_group *lgr); int smc_llc_link_init(struct smc_link *link); -void smc_llc_link_active(struct smc_link *link, int testlink_time); +void smc_llc_link_active(struct smc_link *link); void smc_llc_link_deleting(struct smc_link *link); void smc_llc_link_clear(struct smc_link *link); int smc_llc_do_confirm_rkey(struct smc_link *link, struct smc_buf_desc *rmb_desc); int smc_llc_do_delete_rkey(struct smc_link *link, struct smc_buf_desc *rmb_desc); -void smc_llc_event_flush(struct smc_link_group *lgr); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */