NFS: pnfs IPv6 support

Handle ipv6 remote addresses from GETDEVICEINFO

 - supports netid "tcp" for ipv4 and "tcp6" for ipv6 as rfc 5665 specifies
 - added ds_remotestr to avoid having to handle different AFs in every dprintk
 - tested against pynfs 4.1 server, submitting ipv6 support patch to pynfs
 - tested with IPv6 disabled, it compiles cleanly and relies on rpc_pton to
   refuse to accept IPv6 addresses

Signed-off-by: Weston Andros Adamson <dros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
This commit is contained in:
Weston Andros Adamson 2011-05-31 18:48:56 -04:00 committed by Trond Myklebust
parent 82c2c8b861
commit c9895cb69b
3 changed files with 194 additions and 79 deletions

View File

@ -344,8 +344,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED; return PNFS_NOT_ATTEMPTED;
} }
dprintk("%s USE DS:ip %x %hu\n", __func__, dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
/* No multipath support. Use first DS */ /* No multipath support. Use first DS */
data->ds_clp = ds->ds_clp; data->ds_clp = ds->ds_clp;
@ -384,9 +383,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED; return PNFS_NOT_ATTEMPTED;
} }
dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
data->inode->i_ino, sync, (size_t) data->args.count, offset, data->inode->i_ino, sync, (size_t) data->args.count, offset,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); ds->ds_remotestr);
data->write_done_cb = filelayout_write_done_cb; data->write_done_cb = filelayout_write_done_cb;
data->ds_clp = ds->ds_clp; data->ds_clp = ds->ds_clp;

View File

@ -49,8 +49,9 @@ enum stripetype4 {
/* Individual ip address */ /* Individual ip address */
struct nfs4_pnfs_ds { struct nfs4_pnfs_ds {
struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
u32 ds_ip_addr; struct sockaddr_storage ds_addr;
u32 ds_port; size_t ds_addrlen;
char *ds_remotestr; /* human readable addr+port */
struct nfs_client *ds_clp; struct nfs_client *ds_clp;
atomic_t ds_count; atomic_t ds_count;
}; };

View File

@ -56,28 +56,56 @@ print_ds(struct nfs4_pnfs_ds *ds)
printk("%s NULL device\n", __func__); printk("%s NULL device\n", __func__);
return; return;
} }
printk(" ip_addr %x port %hu\n" printk(" ds %s\n"
" ref count %d\n" " ref count %d\n"
" client %p\n" " client %p\n"
" cl_exchange_flags %x\n", " cl_exchange_flags %x\n",
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), ds->ds_remotestr,
atomic_read(&ds->ds_count), ds->ds_clp, atomic_read(&ds->ds_count), ds->ds_clp,
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
} }
/* nfs4_ds_cache_lock is held */ /* nfs4_ds_cache_lock is held */
static struct nfs4_pnfs_ds * static struct nfs4_pnfs_ds *
_data_server_lookup_locked(u32 ip_addr, u32 port) _data_server_lookup_locked(struct sockaddr *addr, size_t addrlen)
{ {
struct nfs4_pnfs_ds *ds; struct nfs4_pnfs_ds *ds;
struct sockaddr_in *a, *b;
dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", struct sockaddr_in6 *a6, *b6;
ntohl(ip_addr), ntohs(port));
list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
if (ds->ds_ip_addr == ip_addr && if (addr->sa_family != ds->ds_addr.ss_family)
ds->ds_port == port) { continue;
return ds;
switch (addr->sa_family) {
case AF_INET:
a = (struct sockaddr_in *)addr;
b = (struct sockaddr_in *)&ds->ds_addr;
if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
a->sin_port == b->sin_port)
return ds;
break;
case AF_INET6:
a6 = (struct sockaddr_in6 *)addr;
b6 = (struct sockaddr_in6 *)&ds->ds_addr;
/* LINKLOCAL addresses must have matching scope_id */
if (ipv6_addr_scope(&a6->sin6_addr) ==
IPV6_ADDR_SCOPE_LINKLOCAL &&
a6->sin6_scope_id != b6->sin6_scope_id)
continue;
if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
a6->sin6_port == b6->sin6_port)
return ds;
break;
default:
dprintk("%s: unhandled address family: %u\n",
__func__, addr->sa_family);
return NULL;
} }
} }
return NULL; return NULL;
@ -91,19 +119,14 @@ static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{ {
struct nfs_client *clp; struct nfs_client *clp;
struct sockaddr_in sin;
int status = 0; int status = 0;
dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, dprintk("--> %s addr %s au_flavor %d\n", __func__, ds->ds_remotestr,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
sin.sin_family = AF_INET; clp = nfs4_set_ds_client(mds_srv->nfs_client,
sin.sin_addr.s_addr = ds->ds_ip_addr; (struct sockaddr *)&ds->ds_addr,
sin.sin_port = ds->ds_port; ds->ds_addrlen, IPPROTO_TCP);
clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
sizeof(sin), IPPROTO_TCP);
if (IS_ERR(clp)) { if (IS_ERR(clp)) {
status = PTR_ERR(clp); status = PTR_ERR(clp);
goto out; goto out;
@ -115,8 +138,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put; goto out_put;
} }
ds->ds_clp = clp; ds->ds_clp = clp;
dprintk("%s [existing] ip=%x, port=%hu\n", __func__, dprintk("%s [existing] server=%s\n", __func__,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); ds->ds_remotestr);
goto out; goto out;
} }
@ -135,8 +158,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
goto out_put; goto out_put;
ds->ds_clp = clp; ds->ds_clp = clp;
dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
ntohs(ds->ds_port));
out: out:
return status; return status;
out_put: out_put:
@ -153,6 +175,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds)
if (ds->ds_clp) if (ds->ds_clp)
nfs_put_client(ds->ds_clp); nfs_put_client(ds->ds_clp);
kfree(ds->ds_remotestr);
kfree(ds); kfree(ds);
} }
@ -179,31 +202,85 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
kfree(dsaddr); kfree(dsaddr);
} }
static struct nfs4_pnfs_ds * /*
nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) * Create a string with a human readable address and port to avoid
* complicated setup around many dprinks.
*/
static char *
nfs4_pnfs_remotestr(struct sockaddr *ds_addr, gfp_t gfp_flags)
{ {
struct nfs4_pnfs_ds *tmp_ds, *ds; char buf[INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN];
char *remotestr;
char *startsep = "";
char *endsep = "";
size_t len;
uint16_t port;
switch (ds_addr->sa_family) {
case AF_INET:
port = ((struct sockaddr_in *)ds_addr)->sin_port;
break;
case AF_INET6:
startsep = "[";
endsep = "]";
port = ((struct sockaddr_in6 *)ds_addr)->sin6_port;
break;
default:
dprintk("%s: Unknown address family %u\n",
__func__, ds_addr->sa_family);
return NULL;
}
if (!rpc_ntop((struct sockaddr *)ds_addr, buf, sizeof(buf))) {
dprintk("%s: error printing addr\n", __func__);
return NULL;
}
len = strlen(buf) + strlen(startsep) + strlen(endsep) + 1 + 5 + 1;
remotestr = kzalloc(len, gfp_flags);
if (unlikely(!remotestr)) {
dprintk("%s: couldn't alloc remotestr\n", __func__);
return NULL;
}
snprintf(remotestr, len, "%s%s%s:%u",
startsep, buf, endsep, ntohs(port));
return remotestr;
}
static struct nfs4_pnfs_ds *
nfs4_pnfs_ds_add(struct sockaddr *addr, size_t addrlen, gfp_t gfp_flags)
{
struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
char *remotestr;
ds = kzalloc(sizeof(*tmp_ds), gfp_flags); ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
if (!ds) if (!ds)
goto out; goto out;
/* this is only used for debugging, so it's ok if its NULL */
remotestr = nfs4_pnfs_remotestr(addr, gfp_flags);
spin_lock(&nfs4_ds_cache_lock); spin_lock(&nfs4_ds_cache_lock);
tmp_ds = _data_server_lookup_locked(ip_addr, port); tmp_ds = _data_server_lookup_locked(addr, addrlen);
if (tmp_ds == NULL) { if (tmp_ds == NULL) {
ds->ds_ip_addr = ip_addr; memcpy(&ds->ds_addr, addr, addrlen);
ds->ds_port = port; ds->ds_addrlen = addrlen;
ds->ds_remotestr = remotestr;
atomic_set(&ds->ds_count, 1); atomic_set(&ds->ds_count, 1);
INIT_LIST_HEAD(&ds->ds_node); INIT_LIST_HEAD(&ds->ds_node);
ds->ds_clp = NULL; ds->ds_clp = NULL;
list_add(&ds->ds_node, &nfs4_data_server_cache); list_add(&ds->ds_node, &nfs4_data_server_cache);
dprintk("%s add new data server ip 0x%x\n", __func__, dprintk("%s add new data server %s\n", __func__,
ds->ds_ip_addr); ds->ds_remotestr);
} else { } else {
kfree(remotestr);
kfree(ds); kfree(ds);
atomic_inc(&tmp_ds->ds_count); atomic_inc(&tmp_ds->ds_count);
dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", dprintk("%s data server %s found, inc'ed ds_count to %d\n",
__func__, tmp_ds->ds_ip_addr, __func__, tmp_ds->ds_remotestr,
atomic_read(&tmp_ds->ds_count)); atomic_read(&tmp_ds->ds_count));
ds = tmp_ds; ds = tmp_ds;
} }
@ -213,18 +290,21 @@ out:
} }
/* /*
* Currently only support ipv4, and one multi-path address. * Currently only supports ipv4, ipv6 and one multi-path address.
*/ */
static struct nfs4_pnfs_ds * static struct nfs4_pnfs_ds *
decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
{ {
struct nfs4_pnfs_ds *ds = NULL; struct nfs4_pnfs_ds *ds = NULL;
char *buf; char *buf, *portstr;
const char *ipend, *pstr; struct sockaddr_storage ss;
u32 ip_addr, port; size_t sslen;
int nlen, rlen, i; u32 port;
int nlen, rlen;
int tmp[2]; int tmp[2];
__be32 *p; __be32 *p;
char *netid, *match_netid;
size_t match_netid_len;
/* r_netid */ /* r_netid */
p = xdr_inline_decode(streamp, 4); p = xdr_inline_decode(streamp, 4);
@ -236,62 +316,97 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
if (unlikely(!p)) if (unlikely(!p))
goto out_err; goto out_err;
/* Check that netid is "tcp" */ netid = kmalloc(nlen+1, gfp_flags);
if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { if (unlikely(!netid))
dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
goto out_err; goto out_err;
}
/* r_addr */ netid[nlen] = '\0';
memcpy(netid, p, nlen);
/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
p = xdr_inline_decode(streamp, 4); p = xdr_inline_decode(streamp, 4);
if (unlikely(!p)) if (unlikely(!p))
goto out_err; goto out_free_netid;
rlen = be32_to_cpup(p); rlen = be32_to_cpup(p);
p = xdr_inline_decode(streamp, rlen); p = xdr_inline_decode(streamp, rlen);
if (unlikely(!p)) if (unlikely(!p))
goto out_err; goto out_free_netid;
/* ipv6 length plus port is legal */ /* port is ".ABC.DEF", 8 chars max */
if (rlen > INET6_ADDRSTRLEN + 8) { if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
dprintk("%s: Invalid address, length %d\n", __func__, dprintk("%s: Invalid address, length %d\n", __func__,
rlen); rlen);
goto out_err; goto out_free_netid;
} }
buf = kmalloc(rlen + 1, gfp_flags); buf = kmalloc(rlen + 1, gfp_flags);
if (!buf) { if (!buf) {
dprintk("%s: Not enough memory\n", __func__); dprintk("%s: Not enough memory\n", __func__);
goto out_err; goto out_free_netid;
} }
buf[rlen] = '\0'; buf[rlen] = '\0';
memcpy(buf, p, rlen); memcpy(buf, p, rlen);
/* replace the port dots with dashes for the in4_pton() delimiter*/ /* replace port '.' with '-' */
for (i = 0; i < 2; i++) { portstr = strrchr(buf, '.');
char *res = strrchr(buf, '.'); if (!portstr) {
if (!res) { dprintk("%s: Failed finding expected dot in port\n",
dprintk("%s: Failed finding expected dots in port\n", __func__);
__func__); goto out_free_buf;
goto out_free; }
} *portstr = '-';
*res = '-';
/* find '.' between address and port */
portstr = strrchr(buf, '.');
if (!portstr) {
dprintk("%s: Failed finding expected dot between address and "
"port\n", __func__);
goto out_free_buf;
}
*portstr = '\0';
if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&ss, sizeof(ss))) {
dprintk("%s: Error parsing address %s\n", __func__, buf);
goto out_free_buf;
} }
/* Currently only support ipv4 address */ portstr++;
if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
dprintk("%s: Only ipv4 addresses supported\n", __func__);
goto out_free;
}
/* port */
pstr = ipend;
sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
port = htons((tmp[0] << 8) | (tmp[1])); port = htons((tmp[0] << 8) | (tmp[1]));
ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); switch (ss.ss_family) {
dprintk("%s: Decoded address and port %s\n", __func__, buf); case AF_INET:
out_free: ((struct sockaddr_in *)&ss)->sin_port = port;
sslen = sizeof(struct sockaddr_in);
match_netid = "tcp";
match_netid_len = 3;
break;
case AF_INET6:
((struct sockaddr_in6 *)&ss)->sin6_port = port;
sslen = sizeof(struct sockaddr_in6);
match_netid = "tcp6";
match_netid_len = 4;
break;
default:
dprintk("%s: unsupported address family: %u\n",
__func__, ss.ss_family);
goto out_free_buf;
}
if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
__func__, netid, match_netid);
goto out_free_buf;
}
ds = nfs4_pnfs_ds_add((struct sockaddr *)&ss, sslen, gfp_flags);
dprintk("%s: Added DS %s\n", __func__, ds->ds_remotestr);
out_free_buf:
kfree(buf); kfree(buf);
out_free_netid:
kfree(netid);
out_err: out_err:
return ds; return ds;
} }
@ -591,13 +706,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
static void static void
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
int err, u32 ds_addr) int err, const char *ds_remotestr)
{ {
u32 *p = (u32 *)&dsaddr->id_node.deviceid; u32 *p = (u32 *)&dsaddr->id_node.deviceid;
printk(KERN_ERR "NFS: data server %x connection error %d." printk(KERN_ERR "NFS: data server %s connection error %d."
" Deviceid [%x%x%x%x] marked out of use.\n", " Deviceid [%x%x%x%x] marked out of use.\n",
ds_addr, err, p[0], p[1], p[2], p[3]); ds_remotestr, err, p[0], p[1], p[2], p[3]);
spin_lock(&nfs4_ds_cache_lock); spin_lock(&nfs4_ds_cache_lock);
dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@ -628,7 +743,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
err = nfs4_ds_connect(s, ds); err = nfs4_ds_connect(s, ds);
if (err) { if (err) {
filelayout_mark_devid_negative(dsaddr, err, filelayout_mark_devid_negative(dsaddr, err,
ntohl(ds->ds_ip_addr)); ds->ds_remotestr);
return NULL; return NULL;
} }
} }