mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 22:21:40 +00:00
Merge branch 'pnfs-submit' of git://git.open-osd.org/linux-open-osd
* 'pnfs-submit' of git://git.open-osd.org/linux-open-osd: (32 commits) pnfs-obj: pg_test check for max_io_size NFSv4.1: define nfs_generic_pg_test NFSv4.1: use pnfs_generic_pg_test directly by layout driver NFSv4.1: change pg_test return type to bool NFSv4.1: unify pnfs_pageio_init functions pnfs-obj: objlayout_encode_layoutcommit implementation pnfs: encode_layoutcommit pnfs-obj: report errors and .encode_layoutreturn Implementation. pnfs: encode_layoutreturn pnfs: layoutret_on_setattr pnfs: layoutreturn pnfs-obj: osd raid engine read/write implementation pnfs: support for non-rpc layout drivers pnfs-obj: define per-inode private structure pnfs: alloc and free layout_hdr layoutdriver methods pnfs-obj: objio_osd device information retrieval and caching pnfs-obj: decode layout, alloc/free lseg pnfs-obj: pnfs_osd XDR client implementation pnfs-obj: pnfs_osd XDR definitions pnfs-obj: objlayoutdriver module skeleton ...
This commit is contained in:
commit
cd1acdf172
@ -87,6 +87,16 @@ config NFS_V4_1
|
||||
config PNFS_FILE_LAYOUT
|
||||
tristate
|
||||
|
||||
config PNFS_OBJLAYOUT
|
||||
tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)"
|
||||
depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
|
||||
help
|
||||
Say M here if you want your pNFS client to support the Objects Layout Driver.
|
||||
Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and
|
||||
upper level driver (SCSI_OSD_ULD).
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config ROOT_NFS
|
||||
bool "Root file system on NFS"
|
||||
depends on NFS_FS=y && IP_PNP
|
||||
|
@ -15,9 +15,11 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
|
||||
delegation.o idmap.o \
|
||||
callback.o callback_xdr.o callback_proc.o \
|
||||
nfs4namespace.o
|
||||
nfs-$(CONFIG_NFS_V4_1) += pnfs.o
|
||||
nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
|
||||
nfs-$(CONFIG_SYSCTL) += sysctl.o
|
||||
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
|
||||
|
||||
obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
|
||||
nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
|
||||
|
||||
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
|
||||
|
@ -167,6 +167,23 @@ extern unsigned nfs4_callback_layoutrecall(
|
||||
|
||||
extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses);
|
||||
extern void nfs4_cb_take_slot(struct nfs_client *clp);
|
||||
|
||||
struct cb_devicenotifyitem {
|
||||
uint32_t cbd_notify_type;
|
||||
uint32_t cbd_layout_type;
|
||||
struct nfs4_deviceid cbd_dev_id;
|
||||
uint32_t cbd_immediate;
|
||||
};
|
||||
|
||||
struct cb_devicenotifyargs {
|
||||
int ndevs;
|
||||
struct cb_devicenotifyitem *devs;
|
||||
};
|
||||
|
||||
extern __be32 nfs4_callback_devicenotify(
|
||||
struct cb_devicenotifyargs *args,
|
||||
void *dummy, struct cb_process_state *cps);
|
||||
|
||||
#endif /* CONFIG_NFS_V4_1 */
|
||||
extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *);
|
||||
extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
|
||||
|
@ -139,7 +139,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
|
||||
spin_lock(&ino->i_lock);
|
||||
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
|
||||
mark_matching_lsegs_invalid(lo, &free_me_list,
|
||||
args->cbl_range.iomode))
|
||||
&args->cbl_range))
|
||||
rv = NFS4ERR_DELAY;
|
||||
else
|
||||
rv = NFS4ERR_NOMATCHING_LAYOUT;
|
||||
@ -184,7 +184,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
|
||||
ino = lo->plh_inode;
|
||||
spin_lock(&ino->i_lock);
|
||||
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
|
||||
if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode))
|
||||
if (mark_matching_lsegs_invalid(lo, &free_me_list, &range))
|
||||
rv = NFS4ERR_DELAY;
|
||||
list_del_init(&lo->plh_bulk_recall);
|
||||
spin_unlock(&ino->i_lock);
|
||||
@ -241,6 +241,53 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp)
|
||||
do_callback_layoutrecall(clp, &args);
|
||||
}
|
||||
|
||||
__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
|
||||
void *dummy, struct cb_process_state *cps)
|
||||
{
|
||||
int i;
|
||||
__be32 res = 0;
|
||||
struct nfs_client *clp = cps->clp;
|
||||
struct nfs_server *server = NULL;
|
||||
|
||||
dprintk("%s: -->\n", __func__);
|
||||
|
||||
if (!clp) {
|
||||
res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < args->ndevs; i++) {
|
||||
struct cb_devicenotifyitem *dev = &args->devs[i];
|
||||
|
||||
if (!server ||
|
||||
server->pnfs_curr_ld->id != dev->cbd_layout_type) {
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
|
||||
if (server->pnfs_curr_ld &&
|
||||
server->pnfs_curr_ld->id == dev->cbd_layout_type) {
|
||||
rcu_read_unlock();
|
||||
goto found;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
dprintk("%s: layout type %u not found\n",
|
||||
__func__, dev->cbd_layout_type);
|
||||
continue;
|
||||
}
|
||||
|
||||
found:
|
||||
if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE)
|
||||
dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, "
|
||||
"deleting instead\n", __func__);
|
||||
nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id);
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(args->devs);
|
||||
dprintk("%s: exit with status = %u\n",
|
||||
__func__, be32_to_cpu(res));
|
||||
return res;
|
||||
}
|
||||
|
||||
int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
|
||||
{
|
||||
if (delegation == NULL)
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#if defined(CONFIG_NFS_V4_1)
|
||||
#define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
|
||||
#define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
|
||||
#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
|
||||
4 + 1 + 3)
|
||||
#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
|
||||
@ -284,6 +285,93 @@ out:
|
||||
return status;
|
||||
}
|
||||
|
||||
static
|
||||
__be32 decode_devicenotify_args(struct svc_rqst *rqstp,
|
||||
struct xdr_stream *xdr,
|
||||
struct cb_devicenotifyargs *args)
|
||||
{
|
||||
__be32 *p;
|
||||
__be32 status = 0;
|
||||
u32 tmp;
|
||||
int n, i;
|
||||
args->ndevs = 0;
|
||||
|
||||
/* Num of device notifications */
|
||||
p = read_buf(xdr, sizeof(uint32_t));
|
||||
if (unlikely(p == NULL)) {
|
||||
status = htonl(NFS4ERR_BADXDR);
|
||||
goto out;
|
||||
}
|
||||
n = ntohl(*p++);
|
||||
if (n <= 0)
|
||||
goto out;
|
||||
|
||||
args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL);
|
||||
if (!args->devs) {
|
||||
status = htonl(NFS4ERR_DELAY);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Decode each dev notification */
|
||||
for (i = 0; i < n; i++) {
|
||||
struct cb_devicenotifyitem *dev = &args->devs[i];
|
||||
|
||||
p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE);
|
||||
if (unlikely(p == NULL)) {
|
||||
status = htonl(NFS4ERR_BADXDR);
|
||||
goto err;
|
||||
}
|
||||
|
||||
tmp = ntohl(*p++); /* bitmap size */
|
||||
if (tmp != 1) {
|
||||
status = htonl(NFS4ERR_INVAL);
|
||||
goto err;
|
||||
}
|
||||
dev->cbd_notify_type = ntohl(*p++);
|
||||
if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE &&
|
||||
dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) {
|
||||
status = htonl(NFS4ERR_INVAL);
|
||||
goto err;
|
||||
}
|
||||
|
||||
tmp = ntohl(*p++); /* opaque size */
|
||||
if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) &&
|
||||
(tmp != NFS4_DEVICEID4_SIZE + 8)) ||
|
||||
((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) &&
|
||||
(tmp != NFS4_DEVICEID4_SIZE + 4))) {
|
||||
status = htonl(NFS4ERR_INVAL);
|
||||
goto err;
|
||||
}
|
||||
dev->cbd_layout_type = ntohl(*p++);
|
||||
memcpy(dev->cbd_dev_id.data, p, NFS4_DEVICEID4_SIZE);
|
||||
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
|
||||
|
||||
if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
|
||||
p = read_buf(xdr, sizeof(uint32_t));
|
||||
if (unlikely(p == NULL)) {
|
||||
status = htonl(NFS4ERR_BADXDR);
|
||||
goto err;
|
||||
}
|
||||
dev->cbd_immediate = ntohl(*p++);
|
||||
} else {
|
||||
dev->cbd_immediate = 0;
|
||||
}
|
||||
|
||||
args->ndevs++;
|
||||
|
||||
dprintk("%s: type %d layout 0x%x immediate %d\n",
|
||||
__func__, dev->cbd_notify_type, dev->cbd_layout_type,
|
||||
dev->cbd_immediate);
|
||||
}
|
||||
out:
|
||||
dprintk("%s: status %d ndevs %d\n",
|
||||
__func__, ntohl(status), args->ndevs);
|
||||
return status;
|
||||
err:
|
||||
kfree(args->devs);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static __be32 decode_sessionid(struct xdr_stream *xdr,
|
||||
struct nfs4_sessionid *sid)
|
||||
{
|
||||
@ -639,10 +727,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
|
||||
case OP_CB_RECALL_ANY:
|
||||
case OP_CB_RECALL_SLOT:
|
||||
case OP_CB_LAYOUTRECALL:
|
||||
case OP_CB_NOTIFY_DEVICEID:
|
||||
*op = &callback_ops[op_nr];
|
||||
break;
|
||||
|
||||
case OP_CB_NOTIFY_DEVICEID:
|
||||
case OP_CB_NOTIFY:
|
||||
case OP_CB_PUSH_DELEG:
|
||||
case OP_CB_RECALLABLE_OBJ_AVAIL:
|
||||
@ -849,6 +937,12 @@ static struct callback_op callback_ops[] = {
|
||||
(callback_decode_arg_t)decode_layoutrecall_args,
|
||||
.res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ,
|
||||
},
|
||||
[OP_CB_NOTIFY_DEVICEID] = {
|
||||
.process_op = (callback_process_op_t)nfs4_callback_devicenotify,
|
||||
.decode_args =
|
||||
(callback_decode_arg_t)decode_devicenotify_args,
|
||||
.res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ,
|
||||
},
|
||||
[OP_CB_SEQUENCE] = {
|
||||
.process_op = (callback_process_op_t)nfs4_callback_sequence,
|
||||
.decode_args = (callback_decode_arg_t)decode_cb_sequence_args,
|
||||
|
@ -290,6 +290,8 @@ static void nfs_free_client(struct nfs_client *clp)
|
||||
if (clp->cl_machine_cred != NULL)
|
||||
put_rpccred(clp->cl_machine_cred);
|
||||
|
||||
nfs4_deviceid_purge_client(clp);
|
||||
|
||||
kfree(clp->cl_hostname);
|
||||
kfree(clp);
|
||||
|
||||
|
@ -512,12 +512,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
|
||||
struct page **xdr_pages, struct page *page, unsigned int buflen)
|
||||
{
|
||||
struct xdr_stream stream;
|
||||
struct xdr_buf buf = {
|
||||
.pages = xdr_pages,
|
||||
.page_len = buflen,
|
||||
.buflen = buflen,
|
||||
.len = buflen,
|
||||
};
|
||||
struct xdr_buf buf;
|
||||
struct page *scratch;
|
||||
struct nfs_cache_array *array;
|
||||
unsigned int count = 0;
|
||||
@ -527,7 +522,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
|
||||
if (scratch == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
xdr_init_decode(&stream, &buf, NULL);
|
||||
xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
|
||||
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
|
||||
|
||||
do {
|
||||
|
@ -1428,9 +1428,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
|
||||
*/
|
||||
void nfs4_evict_inode(struct inode *inode)
|
||||
{
|
||||
pnfs_destroy_layout(NFS_I(inode));
|
||||
truncate_inode_pages(&inode->i_data, 0);
|
||||
end_writeback(inode);
|
||||
pnfs_return_layout(inode);
|
||||
pnfs_destroy_layout(NFS_I(inode));
|
||||
/* If we are holding a delegation, return it! */
|
||||
nfs_inode_return_delegation_noreclaim(inode);
|
||||
/* First call standard NFS clear_inode() code */
|
||||
|
@ -310,6 +310,7 @@ extern int nfs_migrate_page(struct address_space *,
|
||||
#endif
|
||||
|
||||
/* nfs4proc.c */
|
||||
extern void __nfs4_read_done_cb(struct nfs_read_data *);
|
||||
extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
|
||||
extern int nfs4_init_client(struct nfs_client *clp,
|
||||
const struct rpc_timeout *timeparms,
|
||||
|
@ -421,6 +421,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
|
||||
struct nfs4_deviceid *id,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct nfs4_deviceid_node *d;
|
||||
struct nfs4_file_layout_dsaddr *dsaddr;
|
||||
int status = -EINVAL;
|
||||
struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
|
||||
@ -428,7 +429,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
|
||||
dprintk("--> %s\n", __func__);
|
||||
|
||||
if (fl->pattern_offset > lgr->range.offset) {
|
||||
dprintk("%s pattern_offset %lld to large\n",
|
||||
dprintk("%s pattern_offset %lld too large\n",
|
||||
__func__, fl->pattern_offset);
|
||||
goto out;
|
||||
}
|
||||
@ -440,12 +441,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
|
||||
}
|
||||
|
||||
/* find and reference the deviceid */
|
||||
dsaddr = nfs4_fl_find_get_deviceid(id);
|
||||
if (dsaddr == NULL) {
|
||||
d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld,
|
||||
NFS_SERVER(lo->plh_inode)->nfs_client, id);
|
||||
if (d == NULL) {
|
||||
dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
|
||||
if (dsaddr == NULL)
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
|
||||
fl->dsaddr = dsaddr;
|
||||
|
||||
if (fl->first_stripe_index < 0 ||
|
||||
@ -507,12 +510,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct xdr_stream stream;
|
||||
struct xdr_buf buf = {
|
||||
.pages = lgr->layoutp->pages,
|
||||
.page_len = lgr->layoutp->len,
|
||||
.buflen = lgr->layoutp->len,
|
||||
.len = lgr->layoutp->len,
|
||||
};
|
||||
struct xdr_buf buf;
|
||||
struct page *scratch;
|
||||
__be32 *p;
|
||||
uint32_t nfl_util;
|
||||
@ -524,7 +522,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
|
||||
if (!scratch)
|
||||
return -ENOMEM;
|
||||
|
||||
xdr_init_decode(&stream, &buf, NULL);
|
||||
xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
|
||||
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
|
||||
|
||||
/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
|
||||
@ -535,7 +533,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
|
||||
|
||||
memcpy(id, p, sizeof(*id));
|
||||
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
|
||||
print_deviceid(id);
|
||||
nfs4_print_deviceid(id);
|
||||
|
||||
nfl_util = be32_to_cpup(p++);
|
||||
if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
|
||||
@ -653,16 +651,19 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
|
||||
/*
|
||||
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
|
||||
*
|
||||
* return 1 : coalesce page
|
||||
* return 0 : don't coalesce page
|
||||
* return true : coalesce page
|
||||
* return false : don't coalesce page
|
||||
*/
|
||||
int
|
||||
bool
|
||||
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
|
||||
struct nfs_page *req)
|
||||
{
|
||||
u64 p_stripe, r_stripe;
|
||||
u32 stripe_unit;
|
||||
|
||||
if (!pnfs_generic_pg_test(pgio, prev, req))
|
||||
return 0;
|
||||
|
||||
if (!pgio->pg_lseg)
|
||||
return 1;
|
||||
p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
|
||||
@ -860,6 +861,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void
|
||||
filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
|
||||
{
|
||||
nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
|
||||
}
|
||||
|
||||
static struct pnfs_layoutdriver_type filelayout_type = {
|
||||
.id = LAYOUT_NFSV4_1_FILES,
|
||||
.name = "LAYOUT_NFSV4_1_FILES",
|
||||
@ -872,6 +879,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
|
||||
.commit_pagelist = filelayout_commit_pagelist,
|
||||
.read_pagelist = filelayout_read_pagelist,
|
||||
.write_pagelist = filelayout_write_pagelist,
|
||||
.free_deviceid_node = filelayout_free_deveiceid_node,
|
||||
};
|
||||
|
||||
static int __init nfs4filelayout_init(void)
|
||||
|
@ -59,9 +59,7 @@ struct nfs4_pnfs_ds {
|
||||
#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
|
||||
|
||||
struct nfs4_file_layout_dsaddr {
|
||||
struct hlist_node node;
|
||||
struct nfs4_deviceid deviceid;
|
||||
atomic_t ref;
|
||||
struct nfs4_deviceid_node id_node;
|
||||
unsigned long flags;
|
||||
u32 stripe_count;
|
||||
u8 *stripe_indices;
|
||||
@ -95,14 +93,12 @@ extern struct nfs_fh *
|
||||
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
|
||||
|
||||
extern void print_ds(struct nfs4_pnfs_ds *ds);
|
||||
extern void print_deviceid(struct nfs4_deviceid *dev_id);
|
||||
u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
|
||||
u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
|
||||
struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
|
||||
u32 ds_idx);
|
||||
extern struct nfs4_file_layout_dsaddr *
|
||||
nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
|
||||
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
|
||||
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
|
||||
struct nfs4_file_layout_dsaddr *
|
||||
get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
|
||||
|
||||
|
@ -36,30 +36,6 @@
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
|
||||
|
||||
/*
|
||||
* Device ID RCU cache. A device ID is unique per client ID and layout type.
|
||||
*/
|
||||
#define NFS4_FL_DEVICE_ID_HASH_BITS 5
|
||||
#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
|
||||
#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
|
||||
|
||||
static inline u32
|
||||
nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
|
||||
{
|
||||
unsigned char *cptr = (unsigned char *)id->data;
|
||||
unsigned int nbytes = NFS4_DEVICEID4_SIZE;
|
||||
u32 x = 0;
|
||||
|
||||
while (nbytes--) {
|
||||
x *= 37;
|
||||
x += *cptr++;
|
||||
}
|
||||
return x & NFS4_FL_DEVICE_ID_HASH_MASK;
|
||||
}
|
||||
|
||||
static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
|
||||
static DEFINE_SPINLOCK(filelayout_deviceid_lock);
|
||||
|
||||
/*
|
||||
* Data server cache
|
||||
*
|
||||
@ -89,27 +65,6 @@ print_ds(struct nfs4_pnfs_ds *ds)
|
||||
ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
|
||||
}
|
||||
|
||||
void
|
||||
print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
|
||||
{
|
||||
int i;
|
||||
|
||||
ifdebug(FACILITY) {
|
||||
printk("%s dsaddr->ds_num %d\n", __func__,
|
||||
dsaddr->ds_num);
|
||||
for (i = 0; i < dsaddr->ds_num; i++)
|
||||
print_ds(dsaddr->ds_list[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void print_deviceid(struct nfs4_deviceid *id)
|
||||
{
|
||||
u32 *p = (u32 *)id;
|
||||
|
||||
dprintk("%s: device id= [%x%x%x%x]\n", __func__,
|
||||
p[0], p[1], p[2], p[3]);
|
||||
}
|
||||
|
||||
/* nfs4_ds_cache_lock is held */
|
||||
static struct nfs4_pnfs_ds *
|
||||
_data_server_lookup_locked(u32 ip_addr, u32 port)
|
||||
@ -201,13 +156,13 @@ destroy_ds(struct nfs4_pnfs_ds *ds)
|
||||
kfree(ds);
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
|
||||
{
|
||||
struct nfs4_pnfs_ds *ds;
|
||||
int i;
|
||||
|
||||
print_deviceid(&dsaddr->deviceid);
|
||||
nfs4_print_deviceid(&dsaddr->id_node.deviceid);
|
||||
|
||||
for (i = 0; i < dsaddr->ds_num; i++) {
|
||||
ds = dsaddr->ds_list[i];
|
||||
@ -353,12 +308,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
|
||||
u8 max_stripe_index;
|
||||
struct nfs4_file_layout_dsaddr *dsaddr = NULL;
|
||||
struct xdr_stream stream;
|
||||
struct xdr_buf buf = {
|
||||
.pages = pdev->pages,
|
||||
.page_len = pdev->pglen,
|
||||
.buflen = pdev->pglen,
|
||||
.len = pdev->pglen,
|
||||
};
|
||||
struct xdr_buf buf;
|
||||
struct page *scratch;
|
||||
|
||||
/* set up xdr stream */
|
||||
@ -366,7 +316,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
|
||||
if (!scratch)
|
||||
goto out_err;
|
||||
|
||||
xdr_init_decode(&stream, &buf, NULL);
|
||||
xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
|
||||
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
|
||||
|
||||
/* Get the stripe count (number of stripe index) */
|
||||
@ -431,8 +381,10 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
|
||||
dsaddr->stripe_indices = stripe_indices;
|
||||
stripe_indices = NULL;
|
||||
dsaddr->ds_num = num;
|
||||
|
||||
memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
|
||||
nfs4_init_deviceid_node(&dsaddr->id_node,
|
||||
NFS_SERVER(ino)->pnfs_curr_ld,
|
||||
NFS_SERVER(ino)->nfs_client,
|
||||
&pdev->dev_id);
|
||||
|
||||
for (i = 0; i < dsaddr->ds_num; i++) {
|
||||
int j;
|
||||
@ -505,8 +457,8 @@ out_err:
|
||||
static struct nfs4_file_layout_dsaddr *
|
||||
decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
|
||||
{
|
||||
struct nfs4_file_layout_dsaddr *d, *new;
|
||||
long hash;
|
||||
struct nfs4_deviceid_node *d;
|
||||
struct nfs4_file_layout_dsaddr *n, *new;
|
||||
|
||||
new = decode_device(inode, dev, gfp_flags);
|
||||
if (!new) {
|
||||
@ -515,20 +467,13 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl
|
||||
return NULL;
|
||||
}
|
||||
|
||||
spin_lock(&filelayout_deviceid_lock);
|
||||
d = nfs4_fl_find_get_deviceid(&new->deviceid);
|
||||
if (d) {
|
||||
spin_unlock(&filelayout_deviceid_lock);
|
||||
d = nfs4_insert_deviceid_node(&new->id_node);
|
||||
n = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
|
||||
if (n != new) {
|
||||
nfs4_fl_free_deviceid(new);
|
||||
return d;
|
||||
return n;
|
||||
}
|
||||
|
||||
INIT_HLIST_NODE(&new->node);
|
||||
atomic_set(&new->ref, 1);
|
||||
hash = nfs4_fl_deviceid_hash(&new->deviceid);
|
||||
hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
|
||||
spin_unlock(&filelayout_deviceid_lock);
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
@ -600,35 +545,7 @@ out_free:
|
||||
void
|
||||
nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
|
||||
{
|
||||
if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
|
||||
hlist_del_rcu(&dsaddr->node);
|
||||
spin_unlock(&filelayout_deviceid_lock);
|
||||
|
||||
synchronize_rcu();
|
||||
nfs4_fl_free_deviceid(dsaddr);
|
||||
}
|
||||
}
|
||||
|
||||
struct nfs4_file_layout_dsaddr *
|
||||
nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
|
||||
{
|
||||
struct nfs4_file_layout_dsaddr *d;
|
||||
struct hlist_node *n;
|
||||
long hash = nfs4_fl_deviceid_hash(id);
|
||||
|
||||
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
|
||||
if (!memcmp(&d->deviceid, id, sizeof(*id))) {
|
||||
if (!atomic_inc_not_zero(&d->ref))
|
||||
goto fail;
|
||||
rcu_read_unlock();
|
||||
return d;
|
||||
}
|
||||
}
|
||||
fail:
|
||||
rcu_read_unlock();
|
||||
return NULL;
|
||||
nfs4_put_deviceid_node(&dsaddr->id_node);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -676,15 +593,15 @@ static void
|
||||
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
|
||||
int err, u32 ds_addr)
|
||||
{
|
||||
u32 *p = (u32 *)&dsaddr->deviceid;
|
||||
u32 *p = (u32 *)&dsaddr->id_node.deviceid;
|
||||
|
||||
printk(KERN_ERR "NFS: data server %x connection error %d."
|
||||
" Deviceid [%x%x%x%x] marked out of use.\n",
|
||||
ds_addr, err, p[0], p[1], p[2], p[3]);
|
||||
|
||||
spin_lock(&filelayout_deviceid_lock);
|
||||
spin_lock(&nfs4_ds_cache_lock);
|
||||
dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
|
||||
spin_unlock(&filelayout_deviceid_lock);
|
||||
spin_unlock(&nfs4_ds_cache_lock);
|
||||
}
|
||||
|
||||
struct nfs4_pnfs_ds *
|
||||
|
@ -2363,6 +2363,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
|
||||
struct nfs4_state *state = NULL;
|
||||
int status;
|
||||
|
||||
if (pnfs_ld_layoutret_on_setattr(inode))
|
||||
pnfs_return_layout(inode);
|
||||
|
||||
nfs_fattr_init(fattr);
|
||||
|
||||
/* Search for an existing open(O_WRITE) file */
|
||||
@ -3177,6 +3180,11 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
|
||||
return err;
|
||||
}
|
||||
|
||||
void __nfs4_read_done_cb(struct nfs_read_data *data)
|
||||
{
|
||||
nfs_invalidate_atime(data->inode);
|
||||
}
|
||||
|
||||
static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
|
||||
{
|
||||
struct nfs_server *server = NFS_SERVER(data->inode);
|
||||
@ -3186,7 +3194,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
nfs_invalidate_atime(data->inode);
|
||||
__nfs4_read_done_cb(data);
|
||||
if (task->tk_status > 0)
|
||||
renew_lease(server, data->timestamp);
|
||||
return 0;
|
||||
@ -3200,7 +3208,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
|
||||
if (!nfs4_sequence_done(task, &data->res.seq_res))
|
||||
return -EAGAIN;
|
||||
|
||||
return data->read_done_cb(task, data);
|
||||
return data->read_done_cb ? data->read_done_cb(task, data) :
|
||||
nfs4_read_done_cb(task, data);
|
||||
}
|
||||
|
||||
static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
|
||||
@ -3245,7 +3254,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
|
||||
{
|
||||
if (!nfs4_sequence_done(task, &data->res.seq_res))
|
||||
return -EAGAIN;
|
||||
return data->write_done_cb(task, data);
|
||||
return data->write_done_cb ? data->write_done_cb(task, data) :
|
||||
nfs4_write_done_cb(task, data);
|
||||
}
|
||||
|
||||
/* Reset the the nfs_write_data to send the write to the MDS. */
|
||||
@ -5671,6 +5681,88 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
|
||||
return status;
|
||||
}
|
||||
|
||||
static void
|
||||
nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
|
||||
{
|
||||
struct nfs4_layoutreturn *lrp = calldata;
|
||||
|
||||
dprintk("--> %s\n", __func__);
|
||||
if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
|
||||
&lrp->res.seq_res, 0, task))
|
||||
return;
|
||||
rpc_call_start(task);
|
||||
}
|
||||
|
||||
static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
|
||||
{
|
||||
struct nfs4_layoutreturn *lrp = calldata;
|
||||
struct nfs_server *server;
|
||||
|
||||
dprintk("--> %s\n", __func__);
|
||||
|
||||
if (!nfs4_sequence_done(task, &lrp->res.seq_res))
|
||||
return;
|
||||
|
||||
server = NFS_SERVER(lrp->args.inode);
|
||||
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
|
||||
nfs_restart_rpc(task, lrp->clp);
|
||||
return;
|
||||
}
|
||||
if (task->tk_status == 0) {
|
||||
struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
|
||||
|
||||
if (lrp->res.lrs_present) {
|
||||
spin_lock(&lo->plh_inode->i_lock);
|
||||
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
|
||||
spin_unlock(&lo->plh_inode->i_lock);
|
||||
} else
|
||||
BUG_ON(!list_empty(&lo->plh_segs));
|
||||
}
|
||||
dprintk("<-- %s\n", __func__);
|
||||
}
|
||||
|
||||
static void nfs4_layoutreturn_release(void *calldata)
|
||||
{
|
||||
struct nfs4_layoutreturn *lrp = calldata;
|
||||
|
||||
dprintk("--> %s\n", __func__);
|
||||
put_layout_hdr(NFS_I(lrp->args.inode)->layout);
|
||||
kfree(calldata);
|
||||
dprintk("<-- %s\n", __func__);
|
||||
}
|
||||
|
||||
static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
|
||||
.rpc_call_prepare = nfs4_layoutreturn_prepare,
|
||||
.rpc_call_done = nfs4_layoutreturn_done,
|
||||
.rpc_release = nfs4_layoutreturn_release,
|
||||
};
|
||||
|
||||
int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
|
||||
{
|
||||
struct rpc_task *task;
|
||||
struct rpc_message msg = {
|
||||
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
|
||||
.rpc_argp = &lrp->args,
|
||||
.rpc_resp = &lrp->res,
|
||||
};
|
||||
struct rpc_task_setup task_setup_data = {
|
||||
.rpc_client = lrp->clp->cl_rpcclient,
|
||||
.rpc_message = &msg,
|
||||
.callback_ops = &nfs4_layoutreturn_call_ops,
|
||||
.callback_data = lrp,
|
||||
};
|
||||
int status;
|
||||
|
||||
dprintk("--> %s\n", __func__);
|
||||
task = rpc_run_task(&task_setup_data);
|
||||
if (IS_ERR(task))
|
||||
return PTR_ERR(task);
|
||||
status = task->tk_status;
|
||||
dprintk("<-- %s status=%d\n", __func__, status);
|
||||
rpc_put_task(task);
|
||||
return status;
|
||||
}
|
||||
|
||||
static int
|
||||
_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
|
||||
{
|
||||
|
134
fs/nfs/nfs4xdr.c
134
fs/nfs/nfs4xdr.c
@ -338,7 +338,11 @@ static int nfs4_stat_to_errno(int);
|
||||
1 /* layoutupdate4 layout type */ + \
|
||||
1 /* NULL filelayout layoutupdate4 payload */)
|
||||
#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
|
||||
|
||||
#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
|
||||
encode_stateid_maxsz + \
|
||||
1 /* FIXME: opaque lrf_body always empty at the moment */)
|
||||
#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
|
||||
1 + decode_stateid_maxsz)
|
||||
#else /* CONFIG_NFS_V4_1 */
|
||||
#define encode_sequence_maxsz 0
|
||||
#define decode_sequence_maxsz 0
|
||||
@ -760,7 +764,14 @@ static int nfs4_stat_to_errno(int);
|
||||
decode_putfh_maxsz + \
|
||||
decode_layoutcommit_maxsz + \
|
||||
decode_getattr_maxsz)
|
||||
|
||||
#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \
|
||||
encode_sequence_maxsz + \
|
||||
encode_putfh_maxsz + \
|
||||
encode_layoutreturn_maxsz)
|
||||
#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \
|
||||
decode_sequence_maxsz + \
|
||||
decode_putfh_maxsz + \
|
||||
decode_layoutreturn_maxsz)
|
||||
|
||||
const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
|
||||
compound_encode_hdr_maxsz +
|
||||
@ -1864,6 +1875,7 @@ encode_layoutget(struct xdr_stream *xdr,
|
||||
|
||||
static int
|
||||
encode_layoutcommit(struct xdr_stream *xdr,
|
||||
struct inode *inode,
|
||||
const struct nfs4_layoutcommit_args *args,
|
||||
struct compound_hdr *hdr)
|
||||
{
|
||||
@ -1872,7 +1884,7 @@ encode_layoutcommit(struct xdr_stream *xdr,
|
||||
dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
|
||||
NFS_SERVER(args->inode)->pnfs_curr_ld->id);
|
||||
|
||||
p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
|
||||
p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
|
||||
*p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
|
||||
/* Only whole file layouts */
|
||||
p = xdr_encode_hyper(p, 0); /* offset */
|
||||
@ -1883,12 +1895,49 @@ encode_layoutcommit(struct xdr_stream *xdr,
|
||||
p = xdr_encode_hyper(p, args->lastbytewritten);
|
||||
*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
|
||||
*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
|
||||
*p++ = cpu_to_be32(0); /* no file layout payload */
|
||||
|
||||
if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
|
||||
NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
|
||||
NFS_I(inode)->layout, xdr, args);
|
||||
else {
|
||||
p = reserve_space(xdr, 4);
|
||||
*p = cpu_to_be32(0); /* no layout-type payload */
|
||||
}
|
||||
|
||||
hdr->nops++;
|
||||
hdr->replen += decode_layoutcommit_maxsz;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
encode_layoutreturn(struct xdr_stream *xdr,
|
||||
const struct nfs4_layoutreturn_args *args,
|
||||
struct compound_hdr *hdr)
|
||||
{
|
||||
__be32 *p;
|
||||
|
||||
p = reserve_space(xdr, 20);
|
||||
*p++ = cpu_to_be32(OP_LAYOUTRETURN);
|
||||
*p++ = cpu_to_be32(0); /* reclaim. always 0 for now */
|
||||
*p++ = cpu_to_be32(args->layout_type);
|
||||
*p++ = cpu_to_be32(IOMODE_ANY);
|
||||
*p = cpu_to_be32(RETURN_FILE);
|
||||
p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
|
||||
p = xdr_encode_hyper(p, 0);
|
||||
p = xdr_encode_hyper(p, NFS4_MAX_UINT64);
|
||||
spin_lock(&args->inode->i_lock);
|
||||
xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
|
||||
spin_unlock(&args->inode->i_lock);
|
||||
if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) {
|
||||
NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn(
|
||||
NFS_I(args->inode)->layout, xdr, args);
|
||||
} else {
|
||||
p = reserve_space(xdr, 4);
|
||||
*p = cpu_to_be32(0);
|
||||
}
|
||||
hdr->nops++;
|
||||
hdr->replen += decode_layoutreturn_maxsz;
|
||||
}
|
||||
#endif /* CONFIG_NFS_V4_1 */
|
||||
|
||||
/*
|
||||
@ -2706,9 +2755,30 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
|
||||
/*
|
||||
* Encode LAYOUTCOMMIT request
|
||||
*/
|
||||
static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
|
||||
struct xdr_stream *xdr,
|
||||
struct nfs4_layoutcommit_args *args)
|
||||
static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
|
||||
struct xdr_stream *xdr,
|
||||
struct nfs4_layoutcommit_args *args)
|
||||
{
|
||||
struct nfs4_layoutcommit_data *data =
|
||||
container_of(args, struct nfs4_layoutcommit_data, args);
|
||||
struct compound_hdr hdr = {
|
||||
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
|
||||
};
|
||||
|
||||
encode_compound_hdr(xdr, req, &hdr);
|
||||
encode_sequence(xdr, &args->seq_args, &hdr);
|
||||
encode_putfh(xdr, NFS_FH(args->inode), &hdr);
|
||||
encode_layoutcommit(xdr, data->args.inode, args, &hdr);
|
||||
encode_getfattr(xdr, args->bitmask, &hdr);
|
||||
encode_nops(&hdr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Encode LAYOUTRETURN request
|
||||
*/
|
||||
static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
|
||||
struct xdr_stream *xdr,
|
||||
struct nfs4_layoutreturn_args *args)
|
||||
{
|
||||
struct compound_hdr hdr = {
|
||||
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
|
||||
@ -2717,10 +2787,8 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
|
||||
encode_compound_hdr(xdr, req, &hdr);
|
||||
encode_sequence(xdr, &args->seq_args, &hdr);
|
||||
encode_putfh(xdr, NFS_FH(args->inode), &hdr);
|
||||
encode_layoutcommit(xdr, args, &hdr);
|
||||
encode_getfattr(xdr, args->bitmask, &hdr);
|
||||
encode_layoutreturn(xdr, args, &hdr);
|
||||
encode_nops(&hdr);
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_NFS_V4_1 */
|
||||
|
||||
@ -5203,6 +5271,27 @@ out_overflow:
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int decode_layoutreturn(struct xdr_stream *xdr,
|
||||
struct nfs4_layoutreturn_res *res)
|
||||
{
|
||||
__be32 *p;
|
||||
int status;
|
||||
|
||||
status = decode_op_hdr(xdr, OP_LAYOUTRETURN);
|
||||
if (status)
|
||||
return status;
|
||||
p = xdr_inline_decode(xdr, 4);
|
||||
if (unlikely(!p))
|
||||
goto out_overflow;
|
||||
res->lrs_present = be32_to_cpup(p);
|
||||
if (res->lrs_present)
|
||||
status = decode_stateid(xdr, &res->stateid);
|
||||
return status;
|
||||
out_overflow:
|
||||
print_overflow_msg(__func__, xdr);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int decode_layoutcommit(struct xdr_stream *xdr,
|
||||
struct rpc_rqst *req,
|
||||
struct nfs4_layoutcommit_res *res)
|
||||
@ -6319,6 +6408,30 @@ out:
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode LAYOUTRETURN response
|
||||
*/
|
||||
static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp,
|
||||
struct xdr_stream *xdr,
|
||||
struct nfs4_layoutreturn_res *res)
|
||||
{
|
||||
struct compound_hdr hdr;
|
||||
int status;
|
||||
|
||||
status = decode_compound_hdr(xdr, &hdr);
|
||||
if (status)
|
||||
goto out;
|
||||
status = decode_sequence(xdr, &res->seq_res, rqstp);
|
||||
if (status)
|
||||
goto out;
|
||||
status = decode_putfh(xdr);
|
||||
if (status)
|
||||
goto out;
|
||||
status = decode_layoutreturn(xdr, res);
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode LAYOUTCOMMIT response
|
||||
*/
|
||||
@ -6547,6 +6660,7 @@ struct rpc_procinfo nfs4_procedures[] = {
|
||||
PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
|
||||
PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
|
||||
PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
|
||||
PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
|
||||
#endif /* CONFIG_NFS_V4_1 */
|
||||
};
|
||||
|
||||
|
5
fs/nfs/objlayout/Kbuild
Normal file
5
fs/nfs/objlayout/Kbuild
Normal file
@ -0,0 +1,5 @@
|
||||
#
|
||||
# Makefile for the pNFS Objects Layout Driver kernel module
|
||||
#
|
||||
objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
|
||||
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
|
1057
fs/nfs/objlayout/objio_osd.c
Normal file
1057
fs/nfs/objlayout/objio_osd.c
Normal file
File diff suppressed because it is too large
Load Diff
712
fs/nfs/objlayout/objlayout.c
Normal file
712
fs/nfs/objlayout/objlayout.c
Normal file
@ -0,0 +1,712 @@
|
||||
/*
|
||||
* pNFS Objects layout driver high level definitions
|
||||
*
|
||||
* Copyright (C) 2007 Panasas Inc. [year of first publication]
|
||||
* All rights reserved.
|
||||
*
|
||||
* Benny Halevy <bhalevy@panasas.com>
|
||||
* Boaz Harrosh <bharrosh@panasas.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2
|
||||
* See the file COPYING included with this distribution for more details.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Panasas company nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <scsi/osd_initiator.h>
|
||||
#include "objlayout.h"
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
|
||||
/*
|
||||
* Create a objlayout layout structure for the given inode and return it.
|
||||
*/
|
||||
struct pnfs_layout_hdr *
|
||||
objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
|
||||
{
|
||||
struct objlayout *objlay;
|
||||
|
||||
objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
|
||||
if (objlay) {
|
||||
spin_lock_init(&objlay->lock);
|
||||
INIT_LIST_HEAD(&objlay->err_list);
|
||||
}
|
||||
dprintk("%s: Return %p\n", __func__, objlay);
|
||||
return &objlay->pnfs_layout;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an objlayout layout structure
|
||||
*/
|
||||
void
|
||||
objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
|
||||
{
|
||||
struct objlayout *objlay = OBJLAYOUT(lo);
|
||||
|
||||
dprintk("%s: objlay %p\n", __func__, objlay);
|
||||
|
||||
WARN_ON(!list_empty(&objlay->err_list));
|
||||
kfree(objlay);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmarshall layout and store it in pnfslay.
|
||||
*/
|
||||
struct pnfs_layout_segment *
|
||||
objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
|
||||
struct nfs4_layoutget_res *lgr,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
int status = -ENOMEM;
|
||||
struct xdr_stream stream;
|
||||
struct xdr_buf buf = {
|
||||
.pages = lgr->layoutp->pages,
|
||||
.page_len = lgr->layoutp->len,
|
||||
.buflen = lgr->layoutp->len,
|
||||
.len = lgr->layoutp->len,
|
||||
};
|
||||
struct page *scratch;
|
||||
struct pnfs_layout_segment *lseg;
|
||||
|
||||
dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
|
||||
|
||||
scratch = alloc_page(gfp_flags);
|
||||
if (!scratch)
|
||||
goto err_nofree;
|
||||
|
||||
xdr_init_decode(&stream, &buf, NULL);
|
||||
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
|
||||
|
||||
status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
|
||||
if (unlikely(status)) {
|
||||
dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
|
||||
status);
|
||||
goto err;
|
||||
}
|
||||
|
||||
__free_page(scratch);
|
||||
|
||||
dprintk("%s: Return %p\n", __func__, lseg);
|
||||
return lseg;
|
||||
|
||||
err:
|
||||
__free_page(scratch);
|
||||
err_nofree:
|
||||
dprintk("%s: Err Return=>%d\n", __func__, status);
|
||||
return ERR_PTR(status);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a layout segement
|
||||
*/
|
||||
void
|
||||
objlayout_free_lseg(struct pnfs_layout_segment *lseg)
|
||||
{
|
||||
dprintk("%s: freeing layout segment %p\n", __func__, lseg);
|
||||
|
||||
if (unlikely(!lseg))
|
||||
return;
|
||||
|
||||
objio_free_lseg(lseg);
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O Operations
|
||||
*/
|
||||
static inline u64
|
||||
end_offset(u64 start, u64 len)
|
||||
{
|
||||
u64 end;
|
||||
|
||||
end = start + len;
|
||||
return end >= start ? end : NFS4_MAX_UINT64;
|
||||
}
|
||||
|
||||
/* last octet in a range */
|
||||
static inline u64
|
||||
last_byte_offset(u64 start, u64 len)
|
||||
{
|
||||
u64 end;
|
||||
|
||||
BUG_ON(!len);
|
||||
end = start + len;
|
||||
return end > start ? end - 1 : NFS4_MAX_UINT64;
|
||||
}
|
||||
|
||||
static struct objlayout_io_state *
|
||||
objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
|
||||
struct page **pages,
|
||||
unsigned pgbase,
|
||||
loff_t offset,
|
||||
size_t count,
|
||||
struct pnfs_layout_segment *lseg,
|
||||
void *rpcdata,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct objlayout_io_state *state;
|
||||
u64 lseg_end_offset;
|
||||
|
||||
dprintk("%s: allocating io_state\n", __func__);
|
||||
if (objio_alloc_io_state(lseg, &state, gfp_flags))
|
||||
return NULL;
|
||||
|
||||
BUG_ON(offset < lseg->pls_range.offset);
|
||||
lseg_end_offset = end_offset(lseg->pls_range.offset,
|
||||
lseg->pls_range.length);
|
||||
BUG_ON(offset >= lseg_end_offset);
|
||||
if (offset + count > lseg_end_offset) {
|
||||
count = lseg->pls_range.length -
|
||||
(offset - lseg->pls_range.offset);
|
||||
dprintk("%s: truncated count %Zd\n", __func__, count);
|
||||
}
|
||||
|
||||
if (pgbase > PAGE_SIZE) {
|
||||
pages += pgbase >> PAGE_SHIFT;
|
||||
pgbase &= ~PAGE_MASK;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&state->err_list);
|
||||
state->lseg = lseg;
|
||||
state->rpcdata = rpcdata;
|
||||
state->pages = pages;
|
||||
state->pgbase = pgbase;
|
||||
state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
state->offset = offset;
|
||||
state->count = count;
|
||||
state->sync = 0;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
static void
|
||||
objlayout_free_io_state(struct objlayout_io_state *state)
|
||||
{
|
||||
dprintk("%s: freeing io_state\n", __func__);
|
||||
if (unlikely(!state))
|
||||
return;
|
||||
|
||||
objio_free_io_state(state);
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O done common code
|
||||
*/
|
||||
static void
|
||||
objlayout_iodone(struct objlayout_io_state *state)
|
||||
{
|
||||
dprintk("%s: state %p status\n", __func__, state);
|
||||
|
||||
if (likely(state->status >= 0)) {
|
||||
objlayout_free_io_state(state);
|
||||
} else {
|
||||
struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
|
||||
|
||||
spin_lock(&objlay->lock);
|
||||
objlay->delta_space_valid = OBJ_DSU_INVALID;
|
||||
list_add(&objlay->err_list, &state->err_list);
|
||||
spin_unlock(&objlay->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* objlayout_io_set_result - Set an osd_error code on a specific osd comp.
|
||||
*
|
||||
* The @index component IO failed (error returned from target). Register
|
||||
* the error for later reporting at layout-return.
|
||||
*/
|
||||
void
|
||||
objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
|
||||
struct pnfs_osd_objid *pooid, int osd_error,
|
||||
u64 offset, u64 length, bool is_write)
|
||||
{
|
||||
struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
|
||||
|
||||
BUG_ON(index >= state->num_comps);
|
||||
if (osd_error) {
|
||||
ioerr->oer_component = *pooid;
|
||||
ioerr->oer_comp_offset = offset;
|
||||
ioerr->oer_comp_length = length;
|
||||
ioerr->oer_iswrite = is_write;
|
||||
ioerr->oer_errno = osd_error;
|
||||
|
||||
dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
|
||||
"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
|
||||
__func__, index, ioerr->oer_errno,
|
||||
ioerr->oer_iswrite,
|
||||
_DEVID_LO(&ioerr->oer_component.oid_device_id),
|
||||
_DEVID_HI(&ioerr->oer_component.oid_device_id),
|
||||
ioerr->oer_component.oid_partition_id,
|
||||
ioerr->oer_component.oid_object_id,
|
||||
ioerr->oer_comp_offset,
|
||||
ioerr->oer_comp_length);
|
||||
} else {
|
||||
/* User need not call if no error is reported */
|
||||
ioerr->oer_errno = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
|
||||
* This is because the osd completion is called with ints-off from
|
||||
* the block layer
|
||||
*/
|
||||
static void _rpc_read_complete(struct work_struct *work)
|
||||
{
|
||||
struct rpc_task *task;
|
||||
struct nfs_read_data *rdata;
|
||||
|
||||
dprintk("%s enter\n", __func__);
|
||||
task = container_of(work, struct rpc_task, u.tk_work);
|
||||
rdata = container_of(task, struct nfs_read_data, task);
|
||||
|
||||
pnfs_ld_read_done(rdata);
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
|
||||
{
|
||||
int eof = state->eof;
|
||||
struct nfs_read_data *rdata;
|
||||
|
||||
state->status = status;
|
||||
dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
|
||||
rdata = state->rpcdata;
|
||||
rdata->task.tk_status = status;
|
||||
if (status >= 0) {
|
||||
rdata->res.count = status;
|
||||
rdata->res.eof = eof;
|
||||
}
|
||||
objlayout_iodone(state);
|
||||
/* must not use state after this point */
|
||||
|
||||
if (sync)
|
||||
pnfs_ld_read_done(rdata);
|
||||
else {
|
||||
INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete);
|
||||
schedule_work(&rdata->task.u.tk_work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform sync or async reads.
|
||||
*/
|
||||
enum pnfs_try_status
|
||||
objlayout_read_pagelist(struct nfs_read_data *rdata)
|
||||
{
|
||||
loff_t offset = rdata->args.offset;
|
||||
size_t count = rdata->args.count;
|
||||
struct objlayout_io_state *state;
|
||||
ssize_t status = 0;
|
||||
loff_t eof;
|
||||
|
||||
dprintk("%s: Begin inode %p offset %llu count %d\n",
|
||||
__func__, rdata->inode, offset, (int)count);
|
||||
|
||||
eof = i_size_read(rdata->inode);
|
||||
if (unlikely(offset + count > eof)) {
|
||||
if (offset >= eof) {
|
||||
status = 0;
|
||||
rdata->res.count = 0;
|
||||
rdata->res.eof = 1;
|
||||
goto out;
|
||||
}
|
||||
count = eof - offset;
|
||||
}
|
||||
|
||||
state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout,
|
||||
rdata->args.pages, rdata->args.pgbase,
|
||||
offset, count,
|
||||
rdata->lseg, rdata,
|
||||
GFP_KERNEL);
|
||||
if (unlikely(!state)) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
state->eof = state->offset + state->count >= eof;
|
||||
|
||||
status = objio_read_pagelist(state);
|
||||
out:
|
||||
dprintk("%s: Return status %Zd\n", __func__, status);
|
||||
rdata->pnfs_error = status;
|
||||
return PNFS_ATTEMPTED;
|
||||
}
|
||||
|
||||
/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
|
||||
* This is because the osd completion is called with ints-off from
|
||||
* the block layer
|
||||
*/
|
||||
static void _rpc_write_complete(struct work_struct *work)
|
||||
{
|
||||
struct rpc_task *task;
|
||||
struct nfs_write_data *wdata;
|
||||
|
||||
dprintk("%s enter\n", __func__);
|
||||
task = container_of(work, struct rpc_task, u.tk_work);
|
||||
wdata = container_of(task, struct nfs_write_data, task);
|
||||
|
||||
pnfs_ld_write_done(wdata);
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
|
||||
bool sync)
|
||||
{
|
||||
struct nfs_write_data *wdata;
|
||||
|
||||
dprintk("%s: Begin\n", __func__);
|
||||
wdata = state->rpcdata;
|
||||
state->status = status;
|
||||
wdata->task.tk_status = status;
|
||||
if (status >= 0) {
|
||||
wdata->res.count = status;
|
||||
wdata->verf.committed = state->committed;
|
||||
dprintk("%s: Return status %d committed %d\n",
|
||||
__func__, wdata->task.tk_status,
|
||||
wdata->verf.committed);
|
||||
} else
|
||||
dprintk("%s: Return status %d\n",
|
||||
__func__, wdata->task.tk_status);
|
||||
objlayout_iodone(state);
|
||||
/* must not use state after this point */
|
||||
|
||||
if (sync)
|
||||
pnfs_ld_write_done(wdata);
|
||||
else {
|
||||
INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete);
|
||||
schedule_work(&wdata->task.u.tk_work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform sync or async writes.
|
||||
*/
|
||||
enum pnfs_try_status
|
||||
objlayout_write_pagelist(struct nfs_write_data *wdata,
|
||||
int how)
|
||||
{
|
||||
struct objlayout_io_state *state;
|
||||
ssize_t status;
|
||||
|
||||
dprintk("%s: Begin inode %p offset %llu count %u\n",
|
||||
__func__, wdata->inode, wdata->args.offset, wdata->args.count);
|
||||
|
||||
state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout,
|
||||
wdata->args.pages,
|
||||
wdata->args.pgbase,
|
||||
wdata->args.offset,
|
||||
wdata->args.count,
|
||||
wdata->lseg, wdata,
|
||||
GFP_NOFS);
|
||||
if (unlikely(!state)) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
state->sync = how & FLUSH_SYNC;
|
||||
|
||||
status = objio_write_pagelist(state, how & FLUSH_STABLE);
|
||||
out:
|
||||
dprintk("%s: Return status %Zd\n", __func__, status);
|
||||
wdata->pnfs_error = status;
|
||||
return PNFS_ATTEMPTED;
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
|
||||
struct xdr_stream *xdr,
|
||||
const struct nfs4_layoutcommit_args *args)
|
||||
{
|
||||
struct objlayout *objlay = OBJLAYOUT(pnfslay);
|
||||
struct pnfs_osd_layoutupdate lou;
|
||||
__be32 *start;
|
||||
|
||||
dprintk("%s: Begin\n", __func__);
|
||||
|
||||
spin_lock(&objlay->lock);
|
||||
lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
|
||||
lou.dsu_delta = objlay->delta_space_used;
|
||||
objlay->delta_space_used = 0;
|
||||
objlay->delta_space_valid = OBJ_DSU_INIT;
|
||||
lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
|
||||
spin_unlock(&objlay->lock);
|
||||
|
||||
start = xdr_reserve_space(xdr, 4);
|
||||
|
||||
BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
|
||||
|
||||
*start = cpu_to_be32((xdr->p - start - 1) * 4);
|
||||
|
||||
dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
|
||||
lou.dsu_delta, lou.olu_ioerr_flag);
|
||||
}
|
||||
|
||||
static int
|
||||
err_prio(u32 oer_errno)
|
||||
{
|
||||
switch (oer_errno) {
|
||||
case 0:
|
||||
return 0;
|
||||
|
||||
case PNFS_OSD_ERR_RESOURCE:
|
||||
return OSD_ERR_PRI_RESOURCE;
|
||||
case PNFS_OSD_ERR_BAD_CRED:
|
||||
return OSD_ERR_PRI_BAD_CRED;
|
||||
case PNFS_OSD_ERR_NO_ACCESS:
|
||||
return OSD_ERR_PRI_NO_ACCESS;
|
||||
case PNFS_OSD_ERR_UNREACHABLE:
|
||||
return OSD_ERR_PRI_UNREACHABLE;
|
||||
case PNFS_OSD_ERR_NOT_FOUND:
|
||||
return OSD_ERR_PRI_NOT_FOUND;
|
||||
case PNFS_OSD_ERR_NO_SPACE:
|
||||
return OSD_ERR_PRI_NO_SPACE;
|
||||
default:
|
||||
WARN_ON(1);
|
||||
/* fallthrough */
|
||||
case PNFS_OSD_ERR_EIO:
|
||||
return OSD_ERR_PRI_EIO;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_ioerr(struct pnfs_osd_ioerr *dest_err,
|
||||
const struct pnfs_osd_ioerr *src_err)
|
||||
{
|
||||
u64 dest_end, src_end;
|
||||
|
||||
if (!dest_err->oer_errno) {
|
||||
*dest_err = *src_err;
|
||||
/* accumulated device must be blank */
|
||||
memset(&dest_err->oer_component.oid_device_id, 0,
|
||||
sizeof(dest_err->oer_component.oid_device_id));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (dest_err->oer_component.oid_partition_id !=
|
||||
src_err->oer_component.oid_partition_id)
|
||||
dest_err->oer_component.oid_partition_id = 0;
|
||||
|
||||
if (dest_err->oer_component.oid_object_id !=
|
||||
src_err->oer_component.oid_object_id)
|
||||
dest_err->oer_component.oid_object_id = 0;
|
||||
|
||||
if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
|
||||
dest_err->oer_comp_offset = src_err->oer_comp_offset;
|
||||
|
||||
dest_end = end_offset(dest_err->oer_comp_offset,
|
||||
dest_err->oer_comp_length);
|
||||
src_end = end_offset(src_err->oer_comp_offset,
|
||||
src_err->oer_comp_length);
|
||||
if (dest_end < src_end)
|
||||
dest_end = src_end;
|
||||
|
||||
dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
|
||||
|
||||
if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
|
||||
(err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
|
||||
dest_err->oer_errno = src_err->oer_errno;
|
||||
} else if (src_err->oer_iswrite) {
|
||||
dest_err->oer_iswrite = true;
|
||||
dest_err->oer_errno = src_err->oer_errno;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
encode_accumulated_error(struct objlayout *objlay, __be32 *p)
|
||||
{
|
||||
struct objlayout_io_state *state, *tmp;
|
||||
struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
|
||||
|
||||
list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < state->num_comps; i++) {
|
||||
struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
|
||||
|
||||
if (!ioerr->oer_errno)
|
||||
continue;
|
||||
|
||||
printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
|
||||
"dev(%llx:%llx) par=0x%llx obj=0x%llx "
|
||||
"offset=0x%llx length=0x%llx\n",
|
||||
__func__, i, ioerr->oer_errno,
|
||||
ioerr->oer_iswrite,
|
||||
_DEVID_LO(&ioerr->oer_component.oid_device_id),
|
||||
_DEVID_HI(&ioerr->oer_component.oid_device_id),
|
||||
ioerr->oer_component.oid_partition_id,
|
||||
ioerr->oer_component.oid_object_id,
|
||||
ioerr->oer_comp_offset,
|
||||
ioerr->oer_comp_length);
|
||||
|
||||
merge_ioerr(&accumulated_err, ioerr);
|
||||
}
|
||||
list_del(&state->err_list);
|
||||
objlayout_free_io_state(state);
|
||||
}
|
||||
|
||||
pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
|
||||
struct xdr_stream *xdr,
|
||||
const struct nfs4_layoutreturn_args *args)
|
||||
{
|
||||
struct objlayout *objlay = OBJLAYOUT(pnfslay);
|
||||
struct objlayout_io_state *state, *tmp;
|
||||
__be32 *start;
|
||||
|
||||
dprintk("%s: Begin\n", __func__);
|
||||
start = xdr_reserve_space(xdr, 4);
|
||||
BUG_ON(!start);
|
||||
|
||||
spin_lock(&objlay->lock);
|
||||
|
||||
list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
|
||||
__be32 *last_xdr = NULL, *p;
|
||||
unsigned i;
|
||||
int res = 0;
|
||||
|
||||
for (i = 0; i < state->num_comps; i++) {
|
||||
struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
|
||||
|
||||
if (!ioerr->oer_errno)
|
||||
continue;
|
||||
|
||||
dprintk("%s: err[%d]: errno=%d is_write=%d "
|
||||
"dev(%llx:%llx) par=0x%llx obj=0x%llx "
|
||||
"offset=0x%llx length=0x%llx\n",
|
||||
__func__, i, ioerr->oer_errno,
|
||||
ioerr->oer_iswrite,
|
||||
_DEVID_LO(&ioerr->oer_component.oid_device_id),
|
||||
_DEVID_HI(&ioerr->oer_component.oid_device_id),
|
||||
ioerr->oer_component.oid_partition_id,
|
||||
ioerr->oer_component.oid_object_id,
|
||||
ioerr->oer_comp_offset,
|
||||
ioerr->oer_comp_length);
|
||||
|
||||
p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
|
||||
if (unlikely(!p)) {
|
||||
res = -E2BIG;
|
||||
break; /* accumulated_error */
|
||||
}
|
||||
|
||||
last_xdr = p;
|
||||
pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]);
|
||||
}
|
||||
|
||||
/* TODO: use xdr_write_pages */
|
||||
if (unlikely(res)) {
|
||||
/* no space for even one error descriptor */
|
||||
BUG_ON(!last_xdr);
|
||||
|
||||
/* we've encountered a situation with lots and lots of
|
||||
* errors and no space to encode them all. Use the last
|
||||
* available slot to report the union of all the
|
||||
* remaining errors.
|
||||
*/
|
||||
encode_accumulated_error(objlay, last_xdr);
|
||||
goto loop_done;
|
||||
}
|
||||
list_del(&state->err_list);
|
||||
objlayout_free_io_state(state);
|
||||
}
|
||||
loop_done:
|
||||
spin_unlock(&objlay->lock);
|
||||
|
||||
*start = cpu_to_be32((xdr->p - start - 1) * 4);
|
||||
dprintk("%s: Return\n", __func__);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get Device Info API for io engines
|
||||
*/
|
||||
struct objlayout_deviceinfo {
|
||||
struct page *page;
|
||||
struct pnfs_osd_deviceaddr da; /* This must be last */
|
||||
};
|
||||
|
||||
/* Initialize and call nfs_getdeviceinfo, then decode and return a
|
||||
* "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo()
|
||||
* should be called.
|
||||
*/
|
||||
int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
|
||||
struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct objlayout_deviceinfo *odi;
|
||||
struct pnfs_device pd;
|
||||
struct super_block *sb;
|
||||
struct page *page, **pages;
|
||||
u32 *p;
|
||||
int err;
|
||||
|
||||
page = alloc_page(gfp_flags);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
pages = &page;
|
||||
pd.pages = pages;
|
||||
|
||||
memcpy(&pd.dev_id, d_id, sizeof(*d_id));
|
||||
pd.layout_type = LAYOUT_OSD2_OBJECTS;
|
||||
pd.pages = &page;
|
||||
pd.pgbase = 0;
|
||||
pd.pglen = PAGE_SIZE;
|
||||
pd.mincount = 0;
|
||||
|
||||
sb = pnfslay->plh_inode->i_sb;
|
||||
err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd);
|
||||
dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err);
|
||||
if (err)
|
||||
goto err_out;
|
||||
|
||||
p = page_address(page);
|
||||
odi = kzalloc(sizeof(*odi), gfp_flags);
|
||||
if (!odi) {
|
||||
err = -ENOMEM;
|
||||
goto err_out;
|
||||
}
|
||||
pnfs_osd_xdr_decode_deviceaddr(&odi->da, p);
|
||||
odi->page = page;
|
||||
*deviceaddr = &odi->da;
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
__free_page(page);
|
||||
return err;
|
||||
}
|
||||
|
||||
void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr)
|
||||
{
|
||||
struct objlayout_deviceinfo *odi = container_of(deviceaddr,
|
||||
struct objlayout_deviceinfo,
|
||||
da);
|
||||
|
||||
__free_page(odi->page);
|
||||
kfree(odi);
|
||||
}
|
187
fs/nfs/objlayout/objlayout.h
Normal file
187
fs/nfs/objlayout/objlayout.h
Normal file
@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Data types and function declerations for interfacing with the
|
||||
* pNFS standard object layout driver.
|
||||
*
|
||||
* Copyright (C) 2007 Panasas Inc. [year of first publication]
|
||||
* All rights reserved.
|
||||
*
|
||||
* Benny Halevy <bhalevy@panasas.com>
|
||||
* Boaz Harrosh <bharrosh@panasas.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2
|
||||
* See the file COPYING included with this distribution for more details.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Panasas company nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _OBJLAYOUT_H
|
||||
#define _OBJLAYOUT_H
|
||||
|
||||
#include <linux/nfs_fs.h>
|
||||
#include <linux/pnfs_osd_xdr.h>
|
||||
#include "../pnfs.h"
|
||||
|
||||
/*
|
||||
* per-inode layout
|
||||
*/
|
||||
struct objlayout {
|
||||
struct pnfs_layout_hdr pnfs_layout;
|
||||
|
||||
/* for layout_commit */
|
||||
enum osd_delta_space_valid_enum {
|
||||
OBJ_DSU_INIT = 0,
|
||||
OBJ_DSU_VALID,
|
||||
OBJ_DSU_INVALID,
|
||||
} delta_space_valid;
|
||||
s64 delta_space_used; /* consumed by write ops */
|
||||
|
||||
/* for layout_return */
|
||||
spinlock_t lock;
|
||||
struct list_head err_list;
|
||||
};
|
||||
|
||||
static inline struct objlayout *
|
||||
OBJLAYOUT(struct pnfs_layout_hdr *lo)
|
||||
{
|
||||
return container_of(lo, struct objlayout, pnfs_layout);
|
||||
}
|
||||
|
||||
/*
|
||||
* per-I/O operation state
|
||||
* embedded in objects provider io_state data structure
|
||||
*/
|
||||
struct objlayout_io_state {
|
||||
struct pnfs_layout_segment *lseg;
|
||||
|
||||
struct page **pages;
|
||||
unsigned pgbase;
|
||||
unsigned nr_pages;
|
||||
unsigned long count;
|
||||
loff_t offset;
|
||||
bool sync;
|
||||
|
||||
void *rpcdata;
|
||||
int status; /* res */
|
||||
int eof; /* res */
|
||||
int committed; /* res */
|
||||
|
||||
/* Error reporting (layout_return) */
|
||||
struct list_head err_list;
|
||||
unsigned num_comps;
|
||||
/* Pointer to array of error descriptors of size num_comps.
|
||||
* It should contain as many entries as devices in the osd_layout
|
||||
* that participate in the I/O. It is up to the io_engine to allocate
|
||||
* needed space and set num_comps.
|
||||
*/
|
||||
struct pnfs_osd_ioerr *ioerrs;
|
||||
};
|
||||
|
||||
/*
|
||||
* Raid engine I/O API
|
||||
*/
|
||||
extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
|
||||
struct pnfs_layout_hdr *pnfslay,
|
||||
struct pnfs_layout_range *range,
|
||||
struct xdr_stream *xdr,
|
||||
gfp_t gfp_flags);
|
||||
extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
|
||||
|
||||
extern int objio_alloc_io_state(
|
||||
struct pnfs_layout_segment *lseg,
|
||||
struct objlayout_io_state **outp,
|
||||
gfp_t gfp_flags);
|
||||
extern void objio_free_io_state(struct objlayout_io_state *state);
|
||||
|
||||
extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state);
|
||||
extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
|
||||
bool stable);
|
||||
|
||||
/*
|
||||
* callback API
|
||||
*/
|
||||
extern void objlayout_io_set_result(struct objlayout_io_state *state,
|
||||
unsigned index, struct pnfs_osd_objid *pooid,
|
||||
int osd_error, u64 offset, u64 length, bool is_write);
|
||||
|
||||
static inline void
|
||||
objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used)
|
||||
{
|
||||
struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
|
||||
|
||||
/* If one of the I/Os errored out and the delta_space_used was
|
||||
* invalid we render the complete report as invalid. Protocol mandate
|
||||
* the DSU be accurate or not reported.
|
||||
*/
|
||||
spin_lock(&objlay->lock);
|
||||
if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
|
||||
objlay->delta_space_valid = OBJ_DSU_VALID;
|
||||
objlay->delta_space_used += space_used;
|
||||
}
|
||||
spin_unlock(&objlay->lock);
|
||||
}
|
||||
|
||||
extern void objlayout_read_done(struct objlayout_io_state *state,
|
||||
ssize_t status, bool sync);
|
||||
extern void objlayout_write_done(struct objlayout_io_state *state,
|
||||
ssize_t status, bool sync);
|
||||
|
||||
extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay,
|
||||
struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr,
|
||||
gfp_t gfp_flags);
|
||||
extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr);
|
||||
|
||||
/*
|
||||
* exported generic objects function vectors
|
||||
*/
|
||||
|
||||
extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
|
||||
extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
|
||||
|
||||
extern struct pnfs_layout_segment *objlayout_alloc_lseg(
|
||||
struct pnfs_layout_hdr *,
|
||||
struct nfs4_layoutget_res *,
|
||||
gfp_t gfp_flags);
|
||||
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
|
||||
|
||||
extern enum pnfs_try_status objlayout_read_pagelist(
|
||||
struct nfs_read_data *);
|
||||
|
||||
extern enum pnfs_try_status objlayout_write_pagelist(
|
||||
struct nfs_write_data *,
|
||||
int how);
|
||||
|
||||
extern void objlayout_encode_layoutcommit(
|
||||
struct pnfs_layout_hdr *,
|
||||
struct xdr_stream *,
|
||||
const struct nfs4_layoutcommit_args *);
|
||||
|
||||
extern void objlayout_encode_layoutreturn(
|
||||
struct pnfs_layout_hdr *,
|
||||
struct xdr_stream *,
|
||||
const struct nfs4_layoutreturn_args *);
|
||||
|
||||
#endif /* _OBJLAYOUT_H */
|
412
fs/nfs/objlayout/pnfs_osd_xdr_cli.c
Normal file
412
fs/nfs/objlayout/pnfs_osd_xdr_cli.c
Normal file
@ -0,0 +1,412 @@
|
||||
/*
|
||||
* Object-Based pNFS Layout XDR layer
|
||||
*
|
||||
* Copyright (C) 2007 Panasas Inc. [year of first publication]
|
||||
* All rights reserved.
|
||||
*
|
||||
* Benny Halevy <bhalevy@panasas.com>
|
||||
* Boaz Harrosh <bharrosh@panasas.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2
|
||||
* See the file COPYING included with this distribution for more details.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Panasas company nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/pnfs_osd_xdr.h>
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
|
||||
|
||||
/*
|
||||
* The following implementation is based on RFC5664
|
||||
*/
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_objid {
|
||||
* struct nfs4_deviceid oid_device_id;
|
||||
* u64 oid_partition_id;
|
||||
* u64 oid_object_id;
|
||||
* }; // xdr size 32 bytes
|
||||
*/
|
||||
static __be32 *
|
||||
_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
|
||||
{
|
||||
p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
|
||||
sizeof(objid->oid_device_id.data));
|
||||
|
||||
p = xdr_decode_hyper(p, &objid->oid_partition_id);
|
||||
p = xdr_decode_hyper(p, &objid->oid_object_id);
|
||||
return p;
|
||||
}
|
||||
/*
|
||||
* struct pnfs_osd_opaque_cred {
|
||||
* u32 cred_len;
|
||||
* void *cred;
|
||||
* }; // xdr size [variable]
|
||||
* The return pointers are from the xdr buffer
|
||||
*/
|
||||
static int
|
||||
_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
|
||||
struct xdr_stream *xdr)
|
||||
{
|
||||
__be32 *p = xdr_inline_decode(xdr, 1);
|
||||
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
opaque_cred->cred_len = be32_to_cpu(*p++);
|
||||
|
||||
p = xdr_inline_decode(xdr, opaque_cred->cred_len);
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
opaque_cred->cred = p;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_object_cred {
|
||||
* struct pnfs_osd_objid oc_object_id;
|
||||
* u32 oc_osd_version;
|
||||
* u32 oc_cap_key_sec;
|
||||
* struct pnfs_osd_opaque_cred oc_cap_key
|
||||
* struct pnfs_osd_opaque_cred oc_cap;
|
||||
* }; // xdr size 32 + 4 + 4 + [variable] + [variable]
|
||||
*/
|
||||
static int
|
||||
_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
|
||||
struct xdr_stream *xdr)
|
||||
{
|
||||
__be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
|
||||
int ret;
|
||||
|
||||
if (!p)
|
||||
return -EIO;
|
||||
|
||||
p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
|
||||
comp->oc_osd_version = be32_to_cpup(p++);
|
||||
comp->oc_cap_key_sec = be32_to_cpup(p);
|
||||
|
||||
ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_data_map {
|
||||
* u32 odm_num_comps;
|
||||
* u64 odm_stripe_unit;
|
||||
* u32 odm_group_width;
|
||||
* u32 odm_group_depth;
|
||||
* u32 odm_mirror_cnt;
|
||||
* u32 odm_raid_algorithm;
|
||||
* }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
|
||||
*/
|
||||
static inline int
|
||||
_osd_data_map_xdr_sz(void)
|
||||
{
|
||||
return 4 + 8 + 4 + 4 + 4 + 4;
|
||||
}
|
||||
|
||||
static __be32 *
|
||||
_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
|
||||
{
|
||||
data_map->odm_num_comps = be32_to_cpup(p++);
|
||||
p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
|
||||
data_map->odm_group_width = be32_to_cpup(p++);
|
||||
data_map->odm_group_depth = be32_to_cpup(p++);
|
||||
data_map->odm_mirror_cnt = be32_to_cpup(p++);
|
||||
data_map->odm_raid_algorithm = be32_to_cpup(p++);
|
||||
dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
|
||||
"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
|
||||
__func__,
|
||||
data_map->odm_num_comps,
|
||||
(unsigned long long)data_map->odm_stripe_unit,
|
||||
data_map->odm_group_width,
|
||||
data_map->odm_group_depth,
|
||||
data_map->odm_mirror_cnt,
|
||||
data_map->odm_raid_algorithm);
|
||||
return p;
|
||||
}
|
||||
|
||||
int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
|
||||
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
|
||||
{
|
||||
__be32 *p;
|
||||
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
|
||||
if (unlikely(!p))
|
||||
return -EINVAL;
|
||||
|
||||
p = _osd_xdr_decode_data_map(p, &layout->olo_map);
|
||||
layout->olo_comps_index = be32_to_cpup(p++);
|
||||
layout->olo_num_comps = be32_to_cpup(p++);
|
||||
iter->total_comps = layout->olo_num_comps;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
|
||||
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
|
||||
int *err)
|
||||
{
|
||||
BUG_ON(iter->decoded_comps > iter->total_comps);
|
||||
if (iter->decoded_comps == iter->total_comps)
|
||||
return false;
|
||||
|
||||
*err = _osd_xdr_decode_object_cred(comp, xdr);
|
||||
if (unlikely(*err)) {
|
||||
dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
|
||||
"total_comps=%d\n", __func__, *err,
|
||||
iter->decoded_comps, iter->total_comps);
|
||||
return false; /* stop the loop */
|
||||
}
|
||||
dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
|
||||
"key_len=%u cap_len=%u\n",
|
||||
__func__,
|
||||
_DEVID_LO(&comp->oc_object_id.oid_device_id),
|
||||
_DEVID_HI(&comp->oc_object_id.oid_device_id),
|
||||
comp->oc_object_id.oid_partition_id,
|
||||
comp->oc_object_id.oid_object_id,
|
||||
comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
|
||||
|
||||
iter->decoded_comps++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get Device Information Decoding
|
||||
*
|
||||
* Note: since Device Information is currently done synchronously, all
|
||||
* variable strings fields are left inside the rpc buffer and are only
|
||||
* pointed to by the pnfs_osd_deviceaddr members. So the read buffer
|
||||
* should not be freed while the returned information is in use.
|
||||
*/
|
||||
/*
|
||||
*struct nfs4_string {
|
||||
* unsigned int len;
|
||||
* char *data;
|
||||
*}; // size [variable]
|
||||
* NOTE: Returned string points to inside the XDR buffer
|
||||
*/
|
||||
static __be32 *
|
||||
__read_u8_opaque(__be32 *p, struct nfs4_string *str)
|
||||
{
|
||||
str->len = be32_to_cpup(p++);
|
||||
str->data = (char *)p;
|
||||
|
||||
p += XDR_QUADLEN(str->len);
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_targetid {
|
||||
* u32 oti_type;
|
||||
* struct nfs4_string oti_scsi_device_id;
|
||||
* };// size 4 + [variable]
|
||||
*/
|
||||
static __be32 *
|
||||
__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
|
||||
{
|
||||
u32 oti_type;
|
||||
|
||||
oti_type = be32_to_cpup(p++);
|
||||
targetid->oti_type = oti_type;
|
||||
|
||||
switch (oti_type) {
|
||||
case OBJ_TARGET_SCSI_NAME:
|
||||
case OBJ_TARGET_SCSI_DEVICE_ID:
|
||||
p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_net_addr {
|
||||
* struct nfs4_string r_netid;
|
||||
* struct nfs4_string r_addr;
|
||||
* };
|
||||
*/
|
||||
static __be32 *
|
||||
__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
|
||||
{
|
||||
p = __read_u8_opaque(p, &netaddr->r_netid);
|
||||
p = __read_u8_opaque(p, &netaddr->r_addr);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_targetaddr {
|
||||
* u32 ota_available;
|
||||
* struct pnfs_osd_net_addr ota_netaddr;
|
||||
* };
|
||||
*/
|
||||
static __be32 *
|
||||
__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
|
||||
{
|
||||
u32 ota_available;
|
||||
|
||||
ota_available = be32_to_cpup(p++);
|
||||
targetaddr->ota_available = ota_available;
|
||||
|
||||
if (ota_available)
|
||||
p = __read_net_addr(p, &targetaddr->ota_netaddr);
|
||||
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_deviceaddr {
|
||||
* struct pnfs_osd_targetid oda_targetid;
|
||||
* struct pnfs_osd_targetaddr oda_targetaddr;
|
||||
* u8 oda_lun[8];
|
||||
* struct nfs4_string oda_systemid;
|
||||
* struct pnfs_osd_object_cred oda_root_obj_cred;
|
||||
* struct nfs4_string oda_osdname;
|
||||
* };
|
||||
*/
|
||||
|
||||
/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
|
||||
* not have an xdr_stream
|
||||
*/
|
||||
static __be32 *
|
||||
__read_opaque_cred(__be32 *p,
|
||||
struct pnfs_osd_opaque_cred *opaque_cred)
|
||||
{
|
||||
opaque_cred->cred_len = be32_to_cpu(*p++);
|
||||
opaque_cred->cred = p;
|
||||
return p + XDR_QUADLEN(opaque_cred->cred_len);
|
||||
}
|
||||
|
||||
static __be32 *
|
||||
__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
|
||||
{
|
||||
p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
|
||||
comp->oc_osd_version = be32_to_cpup(p++);
|
||||
comp->oc_cap_key_sec = be32_to_cpup(p++);
|
||||
|
||||
p = __read_opaque_cred(p, &comp->oc_cap_key);
|
||||
p = __read_opaque_cred(p, &comp->oc_cap);
|
||||
return p;
|
||||
}
|
||||
|
||||
void pnfs_osd_xdr_decode_deviceaddr(
|
||||
struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
|
||||
{
|
||||
p = __read_targetid(p, &deviceaddr->oda_targetid);
|
||||
|
||||
p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
|
||||
|
||||
p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
|
||||
sizeof(deviceaddr->oda_lun));
|
||||
|
||||
p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
|
||||
|
||||
p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
|
||||
|
||||
p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
|
||||
|
||||
/* libosd likes this terminated in dbg. It's last, so no problems */
|
||||
deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_layoutupdate {
|
||||
* u32 dsu_valid;
|
||||
* s64 dsu_delta;
|
||||
* u32 olu_ioerr_flag;
|
||||
* }; xdr size 4 + 8 + 4
|
||||
*/
|
||||
int
|
||||
pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
|
||||
struct pnfs_osd_layoutupdate *lou)
|
||||
{
|
||||
__be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4);
|
||||
|
||||
if (!p)
|
||||
return -E2BIG;
|
||||
|
||||
*p++ = cpu_to_be32(lou->dsu_valid);
|
||||
if (lou->dsu_valid)
|
||||
p = xdr_encode_hyper(p, lou->dsu_delta);
|
||||
*p++ = cpu_to_be32(lou->olu_ioerr_flag);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_objid {
|
||||
* struct nfs4_deviceid oid_device_id;
|
||||
* u64 oid_partition_id;
|
||||
* u64 oid_object_id;
|
||||
* }; // xdr size 32 bytes
|
||||
*/
|
||||
static inline __be32 *
|
||||
pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
|
||||
{
|
||||
p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
|
||||
sizeof(object_id->oid_device_id.data));
|
||||
p = xdr_encode_hyper(p, object_id->oid_partition_id);
|
||||
p = xdr_encode_hyper(p, object_id->oid_object_id);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_ioerr {
|
||||
* struct pnfs_osd_objid oer_component;
|
||||
* u64 oer_comp_offset;
|
||||
* u64 oer_comp_length;
|
||||
* u32 oer_iswrite;
|
||||
* u32 oer_errno;
|
||||
* }; // xdr size 32 + 24 bytes
|
||||
*/
|
||||
void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
|
||||
{
|
||||
p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
|
||||
p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
|
||||
p = xdr_encode_hyper(p, ioerr->oer_comp_length);
|
||||
*p++ = cpu_to_be32(ioerr->oer_iswrite);
|
||||
*p = cpu_to_be32(ioerr->oer_errno);
|
||||
}
|
||||
|
||||
__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
|
||||
{
|
||||
__be32 *p;
|
||||
|
||||
p = xdr_reserve_space(xdr, 32 + 24);
|
||||
if (unlikely(!p))
|
||||
dprintk("%s: out of xdr space\n", __func__);
|
||||
|
||||
return p;
|
||||
}
|
@ -204,6 +204,21 @@ nfs_wait_on_request(struct nfs_page *req)
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
|
||||
static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
|
||||
{
|
||||
/*
|
||||
* FIXME: ideally we should be able to coalesce all requests
|
||||
* that are not block boundary aligned, but currently this
|
||||
* is problematic for the case of bsize < PAGE_CACHE_SIZE,
|
||||
* since nfs_flush_multi and nfs_pagein_multi assume you
|
||||
* can have only one struct nfs_page.
|
||||
*/
|
||||
if (desc->pg_bsize < PAGE_SIZE)
|
||||
return 0;
|
||||
|
||||
return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
|
||||
}
|
||||
|
||||
/**
|
||||
* nfs_pageio_init - initialise a page io descriptor
|
||||
* @desc: pointer to descriptor
|
||||
@ -229,6 +244,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
|
||||
desc->pg_ioflags = io_flags;
|
||||
desc->pg_error = 0;
|
||||
desc->pg_lseg = NULL;
|
||||
desc->pg_test = nfs_generic_pg_test;
|
||||
pnfs_pageio_init(desc, inode);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -242,29 +259,23 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
|
||||
*
|
||||
* Return 'true' if this is the case, else return 'false'.
|
||||
*/
|
||||
static int nfs_can_coalesce_requests(struct nfs_page *prev,
|
||||
struct nfs_page *req,
|
||||
struct nfs_pageio_descriptor *pgio)
|
||||
static bool nfs_can_coalesce_requests(struct nfs_page *prev,
|
||||
struct nfs_page *req,
|
||||
struct nfs_pageio_descriptor *pgio)
|
||||
{
|
||||
if (req->wb_context->cred != prev->wb_context->cred)
|
||||
return 0;
|
||||
return false;
|
||||
if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
|
||||
return 0;
|
||||
return false;
|
||||
if (req->wb_context->state != prev->wb_context->state)
|
||||
return 0;
|
||||
return false;
|
||||
if (req->wb_index != (prev->wb_index + 1))
|
||||
return 0;
|
||||
return false;
|
||||
if (req->wb_pgbase != 0)
|
||||
return 0;
|
||||
return false;
|
||||
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
|
||||
return 0;
|
||||
/*
|
||||
* Non-whole file layouts need to check that req is inside of
|
||||
* pgio->pg_lseg.
|
||||
*/
|
||||
if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
|
||||
return 0;
|
||||
return 1;
|
||||
return false;
|
||||
return pgio->pg_test(pgio, prev, req);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -278,31 +289,18 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
|
||||
static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
|
||||
struct nfs_page *req)
|
||||
{
|
||||
size_t newlen = req->wb_bytes;
|
||||
|
||||
if (desc->pg_count != 0) {
|
||||
struct nfs_page *prev;
|
||||
|
||||
/*
|
||||
* FIXME: ideally we should be able to coalesce all requests
|
||||
* that are not block boundary aligned, but currently this
|
||||
* is problematic for the case of bsize < PAGE_CACHE_SIZE,
|
||||
* since nfs_flush_multi and nfs_pagein_multi assume you
|
||||
* can have only one struct nfs_page.
|
||||
*/
|
||||
if (desc->pg_bsize < PAGE_SIZE)
|
||||
return 0;
|
||||
newlen += desc->pg_count;
|
||||
if (newlen > desc->pg_bsize)
|
||||
return 0;
|
||||
prev = nfs_list_entry(desc->pg_list.prev);
|
||||
if (!nfs_can_coalesce_requests(prev, req, desc))
|
||||
return 0;
|
||||
} else
|
||||
} else {
|
||||
desc->pg_base = req->wb_pgbase;
|
||||
}
|
||||
nfs_list_remove_request(req);
|
||||
nfs_list_add_request(req, &desc->pg_list);
|
||||
desc->pg_count = newlen;
|
||||
desc->pg_count += req->wb_bytes;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
344
fs/nfs/pnfs.c
344
fs/nfs/pnfs.c
@ -177,13 +177,28 @@ get_layout_hdr(struct pnfs_layout_hdr *lo)
|
||||
atomic_inc(&lo->plh_refcount);
|
||||
}
|
||||
|
||||
static struct pnfs_layout_hdr *
|
||||
pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
|
||||
{
|
||||
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
|
||||
return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) :
|
||||
kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
|
||||
}
|
||||
|
||||
static void
|
||||
pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
|
||||
{
|
||||
struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
|
||||
return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
|
||||
}
|
||||
|
||||
static void
|
||||
destroy_layout_hdr(struct pnfs_layout_hdr *lo)
|
||||
{
|
||||
dprintk("%s: freeing layout cache %p\n", __func__, lo);
|
||||
BUG_ON(!list_empty(&lo->plh_layouts));
|
||||
NFS_I(lo->plh_inode)->layout = NULL;
|
||||
kfree(lo);
|
||||
pnfs_free_layout_hdr(lo);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -228,7 +243,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg)
|
||||
{
|
||||
struct inode *inode = lseg->pls_layout->plh_inode;
|
||||
|
||||
BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
|
||||
WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
|
||||
list_del_init(&lseg->pls_list);
|
||||
if (list_empty(&lseg->pls_layout->plh_segs)) {
|
||||
set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
|
||||
@ -261,11 +276,72 @@ put_lseg(struct pnfs_layout_segment *lseg)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(put_lseg);
|
||||
|
||||
static bool
|
||||
should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
|
||||
static inline u64
|
||||
end_offset(u64 start, u64 len)
|
||||
{
|
||||
return (recall_iomode == IOMODE_ANY ||
|
||||
lseg_iomode == recall_iomode);
|
||||
u64 end;
|
||||
|
||||
end = start + len;
|
||||
return end >= start ? end : NFS4_MAX_UINT64;
|
||||
}
|
||||
|
||||
/* last octet in a range */
|
||||
static inline u64
|
||||
last_byte_offset(u64 start, u64 len)
|
||||
{
|
||||
u64 end;
|
||||
|
||||
BUG_ON(!len);
|
||||
end = start + len;
|
||||
return end > start ? end - 1 : NFS4_MAX_UINT64;
|
||||
}
|
||||
|
||||
/*
|
||||
* is l2 fully contained in l1?
|
||||
* start1 end1
|
||||
* [----------------------------------)
|
||||
* start2 end2
|
||||
* [----------------)
|
||||
*/
|
||||
static inline int
|
||||
lo_seg_contained(struct pnfs_layout_range *l1,
|
||||
struct pnfs_layout_range *l2)
|
||||
{
|
||||
u64 start1 = l1->offset;
|
||||
u64 end1 = end_offset(start1, l1->length);
|
||||
u64 start2 = l2->offset;
|
||||
u64 end2 = end_offset(start2, l2->length);
|
||||
|
||||
return (start1 <= start2) && (end1 >= end2);
|
||||
}
|
||||
|
||||
/*
|
||||
* is l1 and l2 intersecting?
|
||||
* start1 end1
|
||||
* [----------------------------------)
|
||||
* start2 end2
|
||||
* [----------------)
|
||||
*/
|
||||
static inline int
|
||||
lo_seg_intersecting(struct pnfs_layout_range *l1,
|
||||
struct pnfs_layout_range *l2)
|
||||
{
|
||||
u64 start1 = l1->offset;
|
||||
u64 end1 = end_offset(start1, l1->length);
|
||||
u64 start2 = l2->offset;
|
||||
u64 end2 = end_offset(start2, l2->length);
|
||||
|
||||
return (end1 == NFS4_MAX_UINT64 || end1 > start2) &&
|
||||
(end2 == NFS4_MAX_UINT64 || end2 > start1);
|
||||
}
|
||||
|
||||
static bool
|
||||
should_free_lseg(struct pnfs_layout_range *lseg_range,
|
||||
struct pnfs_layout_range *recall_range)
|
||||
{
|
||||
return (recall_range->iomode == IOMODE_ANY ||
|
||||
lseg_range->iomode == recall_range->iomode) &&
|
||||
lo_seg_intersecting(lseg_range, recall_range);
|
||||
}
|
||||
|
||||
/* Returns 1 if lseg is removed from list, 0 otherwise */
|
||||
@ -296,7 +372,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
|
||||
int
|
||||
mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
|
||||
struct list_head *tmp_list,
|
||||
u32 iomode)
|
||||
struct pnfs_layout_range *recall_range)
|
||||
{
|
||||
struct pnfs_layout_segment *lseg, *next;
|
||||
int invalid = 0, removed = 0;
|
||||
@ -309,7 +385,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
|
||||
return 0;
|
||||
}
|
||||
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
|
||||
if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
|
||||
if (!recall_range ||
|
||||
should_free_lseg(&lseg->pls_range, recall_range)) {
|
||||
dprintk("%s: freeing lseg %p iomode %d "
|
||||
"offset %llu length %llu\n", __func__,
|
||||
lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
|
||||
@ -358,7 +435,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
|
||||
lo = nfsi->layout;
|
||||
if (lo) {
|
||||
lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
|
||||
mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
|
||||
mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
|
||||
}
|
||||
spin_unlock(&nfsi->vfs_inode.i_lock);
|
||||
pnfs_free_lseg_list(&tmp_list);
|
||||
@ -467,7 +544,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
|
||||
static struct pnfs_layout_segment *
|
||||
send_layoutget(struct pnfs_layout_hdr *lo,
|
||||
struct nfs_open_context *ctx,
|
||||
u32 iomode,
|
||||
struct pnfs_layout_range *range,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct inode *ino = lo->plh_inode;
|
||||
@ -499,11 +576,11 @@ send_layoutget(struct pnfs_layout_hdr *lo,
|
||||
goto out_err_free;
|
||||
}
|
||||
|
||||
lgp->args.minlength = NFS4_MAX_UINT64;
|
||||
lgp->args.minlength = PAGE_CACHE_SIZE;
|
||||
if (lgp->args.minlength > range->length)
|
||||
lgp->args.minlength = range->length;
|
||||
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
|
||||
lgp->args.range.iomode = iomode;
|
||||
lgp->args.range.offset = 0;
|
||||
lgp->args.range.length = NFS4_MAX_UINT64;
|
||||
lgp->args.range = *range;
|
||||
lgp->args.type = server->pnfs_curr_ld->id;
|
||||
lgp->args.inode = ino;
|
||||
lgp->args.ctx = get_nfs_open_context(ctx);
|
||||
@ -518,7 +595,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
|
||||
nfs4_proc_layoutget(lgp);
|
||||
if (!lseg) {
|
||||
/* remember that LAYOUTGET failed and suspend trying */
|
||||
set_bit(lo_fail_bit(iomode), &lo->plh_flags);
|
||||
set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
|
||||
}
|
||||
|
||||
/* free xdr pages */
|
||||
@ -542,6 +619,51 @@ out_err_free:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Initiates a LAYOUTRETURN(FILE) */
|
||||
int
|
||||
_pnfs_return_layout(struct inode *ino)
|
||||
{
|
||||
struct pnfs_layout_hdr *lo = NULL;
|
||||
struct nfs_inode *nfsi = NFS_I(ino);
|
||||
LIST_HEAD(tmp_list);
|
||||
struct nfs4_layoutreturn *lrp;
|
||||
nfs4_stateid stateid;
|
||||
int status = 0;
|
||||
|
||||
dprintk("--> %s\n", __func__);
|
||||
|
||||
spin_lock(&ino->i_lock);
|
||||
lo = nfsi->layout;
|
||||
if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) {
|
||||
spin_unlock(&ino->i_lock);
|
||||
dprintk("%s: no layout segments to return\n", __func__);
|
||||
goto out;
|
||||
}
|
||||
stateid = nfsi->layout->plh_stateid;
|
||||
/* Reference matched in nfs4_layoutreturn_release */
|
||||
get_layout_hdr(lo);
|
||||
spin_unlock(&ino->i_lock);
|
||||
pnfs_free_lseg_list(&tmp_list);
|
||||
|
||||
WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags));
|
||||
|
||||
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
|
||||
if (unlikely(lrp == NULL)) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
lrp->args.stateid = stateid;
|
||||
lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
|
||||
lrp->args.inode = ino;
|
||||
lrp->clp = NFS_SERVER(ino)->nfs_client;
|
||||
|
||||
status = nfs4_proc_layoutreturn(lrp);
|
||||
out:
|
||||
dprintk("<-- %s status: %d\n", __func__, status);
|
||||
return status;
|
||||
}
|
||||
|
||||
bool pnfs_roc(struct inode *ino)
|
||||
{
|
||||
struct pnfs_layout_hdr *lo;
|
||||
@ -625,10 +747,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
|
||||
* are seen first.
|
||||
*/
|
||||
static s64
|
||||
cmp_layout(u32 iomode1, u32 iomode2)
|
||||
cmp_layout(struct pnfs_layout_range *l1,
|
||||
struct pnfs_layout_range *l2)
|
||||
{
|
||||
s64 d;
|
||||
|
||||
/* high offset > low offset */
|
||||
d = l1->offset - l2->offset;
|
||||
if (d)
|
||||
return d;
|
||||
|
||||
/* short length > long length */
|
||||
d = l2->length - l1->length;
|
||||
if (d)
|
||||
return d;
|
||||
|
||||
/* read > read/write */
|
||||
return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
|
||||
return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -636,13 +771,12 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
|
||||
struct pnfs_layout_segment *lseg)
|
||||
{
|
||||
struct pnfs_layout_segment *lp;
|
||||
int found = 0;
|
||||
|
||||
dprintk("%s:Begin\n", __func__);
|
||||
|
||||
assert_spin_locked(&lo->plh_inode->i_lock);
|
||||
list_for_each_entry(lp, &lo->plh_segs, pls_list) {
|
||||
if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
|
||||
if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
|
||||
continue;
|
||||
list_add_tail(&lseg->pls_list, &lp->pls_list);
|
||||
dprintk("%s: inserted lseg %p "
|
||||
@ -652,16 +786,14 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
|
||||
lseg->pls_range.offset, lseg->pls_range.length,
|
||||
lp, lp->pls_range.iomode, lp->pls_range.offset,
|
||||
lp->pls_range.length);
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
if (!found) {
|
||||
list_add_tail(&lseg->pls_list, &lo->plh_segs);
|
||||
dprintk("%s: inserted lseg %p "
|
||||
"iomode %d offset %llu length %llu at tail\n",
|
||||
__func__, lseg, lseg->pls_range.iomode,
|
||||
lseg->pls_range.offset, lseg->pls_range.length);
|
||||
goto out;
|
||||
}
|
||||
list_add_tail(&lseg->pls_list, &lo->plh_segs);
|
||||
dprintk("%s: inserted lseg %p "
|
||||
"iomode %d offset %llu length %llu at tail\n",
|
||||
__func__, lseg, lseg->pls_range.iomode,
|
||||
lseg->pls_range.offset, lseg->pls_range.length);
|
||||
out:
|
||||
get_layout_hdr(lo);
|
||||
|
||||
dprintk("%s:Return\n", __func__);
|
||||
@ -672,7 +804,7 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
|
||||
{
|
||||
struct pnfs_layout_hdr *lo;
|
||||
|
||||
lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
|
||||
lo = pnfs_alloc_layout_hdr(ino, gfp_flags);
|
||||
if (!lo)
|
||||
return NULL;
|
||||
atomic_set(&lo->plh_refcount, 1);
|
||||
@ -705,7 +837,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
|
||||
if (likely(nfsi->layout == NULL)) /* Won the race? */
|
||||
nfsi->layout = new;
|
||||
else
|
||||
kfree(new);
|
||||
pnfs_free_layout_hdr(new);
|
||||
return nfsi->layout;
|
||||
}
|
||||
|
||||
@ -721,16 +853,28 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
|
||||
* READ RW true
|
||||
*/
|
||||
static int
|
||||
is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
|
||||
is_matching_lseg(struct pnfs_layout_range *ls_range,
|
||||
struct pnfs_layout_range *range)
|
||||
{
|
||||
return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
|
||||
struct pnfs_layout_range range1;
|
||||
|
||||
if ((range->iomode == IOMODE_RW &&
|
||||
ls_range->iomode != IOMODE_RW) ||
|
||||
!lo_seg_intersecting(ls_range, range))
|
||||
return 0;
|
||||
|
||||
/* range1 covers only the first byte in the range */
|
||||
range1 = *range;
|
||||
range1.length = 1;
|
||||
return lo_seg_contained(ls_range, &range1);
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup range in layout
|
||||
*/
|
||||
static struct pnfs_layout_segment *
|
||||
pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
|
||||
pnfs_find_lseg(struct pnfs_layout_hdr *lo,
|
||||
struct pnfs_layout_range *range)
|
||||
{
|
||||
struct pnfs_layout_segment *lseg, *ret = NULL;
|
||||
|
||||
@ -739,11 +883,11 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
|
||||
assert_spin_locked(&lo->plh_inode->i_lock);
|
||||
list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
|
||||
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
|
||||
is_matching_lseg(lseg, iomode)) {
|
||||
is_matching_lseg(&lseg->pls_range, range)) {
|
||||
ret = get_lseg(lseg);
|
||||
break;
|
||||
}
|
||||
if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
|
||||
if (cmp_layout(range, &lseg->pls_range) > 0)
|
||||
break;
|
||||
}
|
||||
|
||||
@ -759,9 +903,17 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
|
||||
struct pnfs_layout_segment *
|
||||
pnfs_update_layout(struct inode *ino,
|
||||
struct nfs_open_context *ctx,
|
||||
loff_t pos,
|
||||
u64 count,
|
||||
enum pnfs_iomode iomode,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct pnfs_layout_range arg = {
|
||||
.iomode = iomode,
|
||||
.offset = pos,
|
||||
.length = count,
|
||||
};
|
||||
unsigned pg_offset;
|
||||
struct nfs_inode *nfsi = NFS_I(ino);
|
||||
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
|
||||
struct pnfs_layout_hdr *lo;
|
||||
@ -789,7 +941,7 @@ pnfs_update_layout(struct inode *ino,
|
||||
goto out_unlock;
|
||||
|
||||
/* Check to see if the layout for the given range already exists */
|
||||
lseg = pnfs_find_lseg(lo, iomode);
|
||||
lseg = pnfs_find_lseg(lo, &arg);
|
||||
if (lseg)
|
||||
goto out_unlock;
|
||||
|
||||
@ -811,7 +963,14 @@ pnfs_update_layout(struct inode *ino,
|
||||
spin_unlock(&clp->cl_lock);
|
||||
}
|
||||
|
||||
lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
|
||||
pg_offset = arg.offset & ~PAGE_CACHE_MASK;
|
||||
if (pg_offset) {
|
||||
arg.offset -= pg_offset;
|
||||
arg.length += pg_offset;
|
||||
}
|
||||
arg.length = PAGE_CACHE_ALIGN(arg.length);
|
||||
|
||||
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
|
||||
if (!lseg && first) {
|
||||
spin_lock(&clp->cl_lock);
|
||||
list_del_init(&lo->plh_layouts);
|
||||
@ -838,17 +997,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
|
||||
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
|
||||
int status = 0;
|
||||
|
||||
/* Verify we got what we asked for.
|
||||
* Note that because the xdr parsing only accepts a single
|
||||
* element array, this can fail even if the server is behaving
|
||||
* correctly.
|
||||
*/
|
||||
if (lgp->args.range.iomode > res->range.iomode ||
|
||||
res->range.offset != 0 ||
|
||||
res->range.length != NFS4_MAX_UINT64) {
|
||||
status = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
/* Inject layout blob into I/O device driver */
|
||||
lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
|
||||
if (!lseg || IS_ERR(lseg)) {
|
||||
@ -895,51 +1043,64 @@ out_forget_reply:
|
||||
goto out;
|
||||
}
|
||||
|
||||
static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
|
||||
struct nfs_page *prev,
|
||||
struct nfs_page *req)
|
||||
bool
|
||||
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
|
||||
struct nfs_page *req)
|
||||
{
|
||||
enum pnfs_iomode access_type;
|
||||
gfp_t gfp_flags;
|
||||
|
||||
/* We assume that pg_ioflags == 0 iff we're reading a page */
|
||||
if (pgio->pg_ioflags == 0) {
|
||||
access_type = IOMODE_READ;
|
||||
gfp_flags = GFP_KERNEL;
|
||||
} else {
|
||||
access_type = IOMODE_RW;
|
||||
gfp_flags = GFP_NOFS;
|
||||
}
|
||||
|
||||
if (pgio->pg_count == prev->wb_bytes) {
|
||||
/* This is first coelesce call for a series of nfs_pages */
|
||||
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
|
||||
prev->wb_context,
|
||||
IOMODE_READ,
|
||||
GFP_KERNEL);
|
||||
req_offset(req),
|
||||
pgio->pg_count,
|
||||
access_type,
|
||||
gfp_flags);
|
||||
return true;
|
||||
}
|
||||
return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
|
||||
|
||||
if (pgio->pg_lseg &&
|
||||
req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
|
||||
pgio->pg_lseg->pls_range.length))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
|
||||
|
||||
void
|
||||
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
|
||||
/*
|
||||
* Called by non rpc-based layout drivers
|
||||
*/
|
||||
int
|
||||
pnfs_ld_write_done(struct nfs_write_data *data)
|
||||
{
|
||||
struct pnfs_layoutdriver_type *ld;
|
||||
int status;
|
||||
|
||||
ld = NFS_SERVER(inode)->pnfs_curr_ld;
|
||||
pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
|
||||
}
|
||||
|
||||
static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
|
||||
struct nfs_page *prev,
|
||||
struct nfs_page *req)
|
||||
{
|
||||
if (pgio->pg_count == prev->wb_bytes) {
|
||||
/* This is first coelesce call for a series of nfs_pages */
|
||||
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
|
||||
prev->wb_context,
|
||||
IOMODE_RW,
|
||||
GFP_NOFS);
|
||||
if (!data->pnfs_error) {
|
||||
pnfs_set_layoutcommit(data);
|
||||
data->mds_ops->rpc_call_done(&data->task, data);
|
||||
data->mds_ops->rpc_release(data);
|
||||
return 0;
|
||||
}
|
||||
return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
|
||||
}
|
||||
|
||||
void
|
||||
pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
|
||||
{
|
||||
struct pnfs_layoutdriver_type *ld;
|
||||
|
||||
ld = NFS_SERVER(inode)->pnfs_curr_ld;
|
||||
pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
|
||||
dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
|
||||
data->pnfs_error);
|
||||
status = nfs_initiate_write(data, NFS_CLIENT(data->inode),
|
||||
data->mds_ops, NFS_FILE_SYNC);
|
||||
return status ? : -EAGAIN;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
|
||||
|
||||
enum pnfs_try_status
|
||||
pnfs_try_to_write_data(struct nfs_write_data *wdata,
|
||||
@ -965,6 +1126,29 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
|
||||
return trypnfs;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called by non rpc-based layout drivers
|
||||
*/
|
||||
int
|
||||
pnfs_ld_read_done(struct nfs_read_data *data)
|
||||
{
|
||||
int status;
|
||||
|
||||
if (!data->pnfs_error) {
|
||||
__nfs4_read_done_cb(data);
|
||||
data->mds_ops->rpc_call_done(&data->task, data);
|
||||
data->mds_ops->rpc_release(data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
|
||||
data->pnfs_error);
|
||||
status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
|
||||
data->mds_ops);
|
||||
return status ? : -EAGAIN;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
|
||||
|
||||
/*
|
||||
* Call the appropriate parallel I/O subsystem read function.
|
||||
*/
|
||||
|
117
fs/nfs/pnfs.h
117
fs/nfs/pnfs.h
@ -30,6 +30,7 @@
|
||||
#ifndef FS_NFS_PNFS_H
|
||||
#define FS_NFS_PNFS_H
|
||||
|
||||
#include <linux/nfs_fs.h>
|
||||
#include <linux/nfs_page.h>
|
||||
|
||||
enum {
|
||||
@ -64,17 +65,29 @@ enum {
|
||||
NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
|
||||
};
|
||||
|
||||
enum layoutdriver_policy_flags {
|
||||
/* Should the pNFS client commit and return the layout upon a setattr */
|
||||
PNFS_LAYOUTRET_ON_SETATTR = 1 << 0,
|
||||
};
|
||||
|
||||
struct nfs4_deviceid_node;
|
||||
|
||||
/* Per-layout driver specific registration structure */
|
||||
struct pnfs_layoutdriver_type {
|
||||
struct list_head pnfs_tblid;
|
||||
const u32 id;
|
||||
const char *name;
|
||||
struct module *owner;
|
||||
unsigned flags;
|
||||
|
||||
struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags);
|
||||
void (*free_layout_hdr) (struct pnfs_layout_hdr *);
|
||||
|
||||
struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
|
||||
void (*free_lseg) (struct pnfs_layout_segment *lseg);
|
||||
|
||||
/* test for nfs page cache coalescing */
|
||||
int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
|
||||
bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
|
||||
|
||||
/* Returns true if layoutdriver wants to divert this request to
|
||||
* driver's commit routine.
|
||||
@ -89,6 +102,16 @@ struct pnfs_layoutdriver_type {
|
||||
*/
|
||||
enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
|
||||
enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
|
||||
|
||||
void (*free_deviceid_node) (struct nfs4_deviceid_node *);
|
||||
|
||||
void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid,
|
||||
struct xdr_stream *xdr,
|
||||
const struct nfs4_layoutreturn_args *args);
|
||||
|
||||
void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
|
||||
struct xdr_stream *xdr,
|
||||
const struct nfs4_layoutcommit_args *args);
|
||||
};
|
||||
|
||||
struct pnfs_layout_hdr {
|
||||
@ -120,21 +143,22 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
|
||||
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
|
||||
struct pnfs_device *dev);
|
||||
extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
|
||||
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
|
||||
|
||||
/* pnfs.c */
|
||||
void get_layout_hdr(struct pnfs_layout_hdr *lo);
|
||||
void put_lseg(struct pnfs_layout_segment *lseg);
|
||||
struct pnfs_layout_segment *
|
||||
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
|
||||
enum pnfs_iomode access_type, gfp_t gfp_flags);
|
||||
loff_t pos, u64 count, enum pnfs_iomode access_type,
|
||||
gfp_t gfp_flags);
|
||||
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
|
||||
void unset_pnfs_layoutdriver(struct nfs_server *);
|
||||
enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
|
||||
const struct rpc_call_ops *, int);
|
||||
enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
|
||||
const struct rpc_call_ops *);
|
||||
void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
|
||||
void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
|
||||
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
|
||||
int pnfs_layout_process(struct nfs4_layoutget *lgp);
|
||||
void pnfs_free_lseg_list(struct list_head *tmp_list);
|
||||
void pnfs_destroy_layout(struct nfs_inode *);
|
||||
@ -148,13 +172,37 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
|
||||
struct nfs4_state *open_state);
|
||||
int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
|
||||
struct list_head *tmp_list,
|
||||
u32 iomode);
|
||||
struct pnfs_layout_range *recall_range);
|
||||
bool pnfs_roc(struct inode *ino);
|
||||
void pnfs_roc_release(struct inode *ino);
|
||||
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
|
||||
bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
|
||||
void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
|
||||
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
|
||||
int _pnfs_return_layout(struct inode *);
|
||||
int pnfs_ld_write_done(struct nfs_write_data *);
|
||||
int pnfs_ld_read_done(struct nfs_read_data *);
|
||||
|
||||
/* pnfs_dev.c */
|
||||
struct nfs4_deviceid_node {
|
||||
struct hlist_node node;
|
||||
const struct pnfs_layoutdriver_type *ld;
|
||||
const struct nfs_client *nfs_client;
|
||||
struct nfs4_deviceid deviceid;
|
||||
atomic_t ref;
|
||||
};
|
||||
|
||||
void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
|
||||
struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
|
||||
struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
|
||||
void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
|
||||
void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
|
||||
const struct pnfs_layoutdriver_type *,
|
||||
const struct nfs_client *,
|
||||
const struct nfs4_deviceid *);
|
||||
struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
|
||||
bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
|
||||
void nfs4_deviceid_purge_client(const struct nfs_client *);
|
||||
|
||||
static inline int lo_fail_bit(u32 iomode)
|
||||
{
|
||||
@ -223,6 +271,36 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req)
|
||||
put_lseg(req->wb_commit_lseg);
|
||||
}
|
||||
|
||||
/* Should the pNFS client commit and return the layout upon a setattr */
|
||||
static inline bool
|
||||
pnfs_ld_layoutret_on_setattr(struct inode *inode)
|
||||
{
|
||||
if (!pnfs_enabled_sb(NFS_SERVER(inode)))
|
||||
return false;
|
||||
return NFS_SERVER(inode)->pnfs_curr_ld->flags &
|
||||
PNFS_LAYOUTRET_ON_SETATTR;
|
||||
}
|
||||
|
||||
static inline int pnfs_return_layout(struct inode *ino)
|
||||
{
|
||||
struct nfs_inode *nfsi = NFS_I(ino);
|
||||
struct nfs_server *nfss = NFS_SERVER(ino);
|
||||
|
||||
if (pnfs_enabled_sb(nfss) && nfsi->layout)
|
||||
return _pnfs_return_layout(ino);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
|
||||
struct inode *inode)
|
||||
{
|
||||
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
|
||||
|
||||
if (ld)
|
||||
pgio->pg_test = ld->pg_test;
|
||||
}
|
||||
|
||||
#else /* CONFIG_NFS_V4_1 */
|
||||
|
||||
static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
|
||||
@ -245,7 +323,8 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
|
||||
|
||||
static inline struct pnfs_layout_segment *
|
||||
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
|
||||
enum pnfs_iomode access_type, gfp_t gfp_flags)
|
||||
loff_t pos, u64 count, enum pnfs_iomode access_type,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
@ -264,6 +343,17 @@ pnfs_try_to_write_data(struct nfs_write_data *data,
|
||||
return PNFS_NOT_ATTEMPTED;
|
||||
}
|
||||
|
||||
static inline int pnfs_return_layout(struct inode *ino)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
pnfs_ld_layoutret_on_setattr(struct inode *inode)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
pnfs_roc(struct inode *ino)
|
||||
{
|
||||
@ -294,16 +384,9 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
|
||||
static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
|
||||
struct inode *inode)
|
||||
{
|
||||
pgio->pg_test = NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
|
||||
{
|
||||
pgio->pg_test = NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -331,6 +414,10 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_NFS_V4_1 */
|
||||
|
||||
#endif /* FS_NFS_PNFS_H */
|
||||
|
270
fs/nfs/pnfs_dev.c
Normal file
270
fs/nfs/pnfs_dev.c
Normal file
@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Device operations for the pnfs client.
|
||||
*
|
||||
* Copyright (c) 2002
|
||||
* The Regents of the University of Michigan
|
||||
* All Rights Reserved
|
||||
*
|
||||
* Dean Hildebrand <dhildebz@umich.edu>
|
||||
* Garth Goodson <Garth.Goodson@netapp.com>
|
||||
*
|
||||
* Permission is granted to use, copy, create derivative works, and
|
||||
* redistribute this software and such derivative works for any purpose,
|
||||
* so long as the name of the University of Michigan is not used in
|
||||
* any advertising or publicity pertaining to the use or distribution
|
||||
* of this software without specific, written prior authorization. If
|
||||
* the above copyright notice or any other identification of the
|
||||
* University of Michigan is included in any copy of any portion of
|
||||
* this software, then the disclaimer below must also be included.
|
||||
*
|
||||
* This software is provided as is, without representation or warranty
|
||||
* of any kind either express or implied, including without limitation
|
||||
* the implied warranties of merchantability, fitness for a particular
|
||||
* purpose, or noninfringement. The Regents of the University of
|
||||
* Michigan shall not be liable for any damages, including special,
|
||||
* indirect, incidental, or consequential damages, with respect to any
|
||||
* claim arising out of or in connection with the use of the software,
|
||||
* even if it has been or is hereafter advised of the possibility of
|
||||
* such damages.
|
||||
*/
|
||||
|
||||
#include "pnfs.h"
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_PNFS
|
||||
|
||||
/*
|
||||
* Device ID RCU cache. A device ID is unique per server and layout type.
|
||||
*/
|
||||
#define NFS4_DEVICE_ID_HASH_BITS 5
|
||||
#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
|
||||
#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
|
||||
|
||||
static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
|
||||
static DEFINE_SPINLOCK(nfs4_deviceid_lock);
|
||||
|
||||
void
|
||||
nfs4_print_deviceid(const struct nfs4_deviceid *id)
|
||||
{
|
||||
u32 *p = (u32 *)id;
|
||||
|
||||
dprintk("%s: device id= [%x%x%x%x]\n", __func__,
|
||||
p[0], p[1], p[2], p[3]);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs4_print_deviceid);
|
||||
|
||||
static inline u32
|
||||
nfs4_deviceid_hash(const struct nfs4_deviceid *id)
|
||||
{
|
||||
unsigned char *cptr = (unsigned char *)id->data;
|
||||
unsigned int nbytes = NFS4_DEVICEID4_SIZE;
|
||||
u32 x = 0;
|
||||
|
||||
while (nbytes--) {
|
||||
x *= 37;
|
||||
x += *cptr++;
|
||||
}
|
||||
return x & NFS4_DEVICE_ID_HASH_MASK;
|
||||
}
|
||||
|
||||
static struct nfs4_deviceid_node *
|
||||
_lookup_deviceid(const struct pnfs_layoutdriver_type *ld,
|
||||
const struct nfs_client *clp, const struct nfs4_deviceid *id,
|
||||
long hash)
|
||||
{
|
||||
struct nfs4_deviceid_node *d;
|
||||
struct hlist_node *n;
|
||||
|
||||
hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
|
||||
if (d->ld == ld && d->nfs_client == clp &&
|
||||
!memcmp(&d->deviceid, id, sizeof(*id))) {
|
||||
if (atomic_read(&d->ref))
|
||||
return d;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup a deviceid in cache and get a reference count on it if found
|
||||
*
|
||||
* @clp nfs_client associated with deviceid
|
||||
* @id deviceid to look up
|
||||
*/
|
||||
struct nfs4_deviceid_node *
|
||||
_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
|
||||
const struct nfs_client *clp, const struct nfs4_deviceid *id,
|
||||
long hash)
|
||||
{
|
||||
struct nfs4_deviceid_node *d;
|
||||
|
||||
rcu_read_lock();
|
||||
d = _lookup_deviceid(ld, clp, id, hash);
|
||||
if (d && !atomic_inc_not_zero(&d->ref))
|
||||
d = NULL;
|
||||
rcu_read_unlock();
|
||||
return d;
|
||||
}
|
||||
|
||||
struct nfs4_deviceid_node *
|
||||
nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
|
||||
const struct nfs_client *clp, const struct nfs4_deviceid *id)
|
||||
{
|
||||
return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
|
||||
|
||||
/*
|
||||
* Unhash and put deviceid
|
||||
*
|
||||
* @clp nfs_client associated with deviceid
|
||||
* @id the deviceid to unhash
|
||||
*
|
||||
* @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
|
||||
*/
|
||||
struct nfs4_deviceid_node *
|
||||
nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
|
||||
const struct nfs_client *clp, const struct nfs4_deviceid *id)
|
||||
{
|
||||
struct nfs4_deviceid_node *d;
|
||||
|
||||
spin_lock(&nfs4_deviceid_lock);
|
||||
rcu_read_lock();
|
||||
d = _lookup_deviceid(ld, clp, id, nfs4_deviceid_hash(id));
|
||||
rcu_read_unlock();
|
||||
if (!d) {
|
||||
spin_unlock(&nfs4_deviceid_lock);
|
||||
return NULL;
|
||||
}
|
||||
hlist_del_init_rcu(&d->node);
|
||||
spin_unlock(&nfs4_deviceid_lock);
|
||||
synchronize_rcu();
|
||||
|
||||
/* balance the initial ref set in pnfs_insert_deviceid */
|
||||
if (atomic_dec_and_test(&d->ref))
|
||||
return d;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
|
||||
|
||||
/*
|
||||
* Delete a deviceid from cache
|
||||
*
|
||||
* @clp struct nfs_client qualifying the deviceid
|
||||
* @id deviceid to delete
|
||||
*/
|
||||
void
|
||||
nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
|
||||
const struct nfs_client *clp, const struct nfs4_deviceid *id)
|
||||
{
|
||||
struct nfs4_deviceid_node *d;
|
||||
|
||||
d = nfs4_unhash_put_deviceid(ld, clp, id);
|
||||
if (!d)
|
||||
return;
|
||||
d->ld->free_deviceid_node(d);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
|
||||
|
||||
void
|
||||
nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
|
||||
const struct pnfs_layoutdriver_type *ld,
|
||||
const struct nfs_client *nfs_client,
|
||||
const struct nfs4_deviceid *id)
|
||||
{
|
||||
INIT_HLIST_NODE(&d->node);
|
||||
d->ld = ld;
|
||||
d->nfs_client = nfs_client;
|
||||
d->deviceid = *id;
|
||||
atomic_set(&d->ref, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node);
|
||||
|
||||
/*
|
||||
* Uniquely initialize and insert a deviceid node into cache
|
||||
*
|
||||
* @new new deviceid node
|
||||
* Note that the caller must set up the following members:
|
||||
* new->ld
|
||||
* new->nfs_client
|
||||
* new->deviceid
|
||||
*
|
||||
* @ret the inserted node, if none found, otherwise, the found entry.
|
||||
*/
|
||||
struct nfs4_deviceid_node *
|
||||
nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new)
|
||||
{
|
||||
struct nfs4_deviceid_node *d;
|
||||
long hash;
|
||||
|
||||
spin_lock(&nfs4_deviceid_lock);
|
||||
hash = nfs4_deviceid_hash(&new->deviceid);
|
||||
d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash);
|
||||
if (d) {
|
||||
spin_unlock(&nfs4_deviceid_lock);
|
||||
return d;
|
||||
}
|
||||
|
||||
hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
|
||||
spin_unlock(&nfs4_deviceid_lock);
|
||||
|
||||
return new;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node);
|
||||
|
||||
/*
|
||||
* Dereference a deviceid node and delete it when its reference count drops
|
||||
* to zero.
|
||||
*
|
||||
* @d deviceid node to put
|
||||
*
|
||||
* @ret true iff the node was deleted
|
||||
*/
|
||||
bool
|
||||
nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
|
||||
{
|
||||
if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock))
|
||||
return false;
|
||||
hlist_del_init_rcu(&d->node);
|
||||
spin_unlock(&nfs4_deviceid_lock);
|
||||
synchronize_rcu();
|
||||
d->ld->free_deviceid_node(d);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
|
||||
|
||||
static void
|
||||
_deviceid_purge_client(const struct nfs_client *clp, long hash)
|
||||
{
|
||||
struct nfs4_deviceid_node *d;
|
||||
struct hlist_node *n, *next;
|
||||
HLIST_HEAD(tmp);
|
||||
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
|
||||
if (d->nfs_client == clp && atomic_read(&d->ref)) {
|
||||
hlist_del_init_rcu(&d->node);
|
||||
hlist_add_head(&d->node, &tmp);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (hlist_empty(&tmp))
|
||||
return;
|
||||
|
||||
synchronize_rcu();
|
||||
hlist_for_each_entry_safe(d, n, next, &tmp, node)
|
||||
if (atomic_dec_and_test(&d->ref))
|
||||
d->ld->free_deviceid_node(d);
|
||||
}
|
||||
|
||||
void
|
||||
nfs4_deviceid_purge_client(const struct nfs_client *clp)
|
||||
{
|
||||
long h;
|
||||
|
||||
spin_lock(&nfs4_deviceid_lock);
|
||||
for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
|
||||
_deviceid_purge_client(clp, h);
|
||||
spin_unlock(&nfs4_deviceid_lock);
|
||||
}
|
@ -288,7 +288,9 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
|
||||
atomic_set(&req->wb_complete, requests);
|
||||
|
||||
BUG_ON(desc->pg_lseg != NULL);
|
||||
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
|
||||
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
|
||||
req_offset(req), desc->pg_count,
|
||||
IOMODE_READ, GFP_KERNEL);
|
||||
ClearPageError(page);
|
||||
offset = 0;
|
||||
nbytes = desc->pg_count;
|
||||
@ -351,7 +353,9 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
|
||||
}
|
||||
req = nfs_list_entry(data->pages.next);
|
||||
if ((!lseg) && list_is_singular(&data->pages))
|
||||
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
|
||||
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
|
||||
req_offset(req), desc->pg_count,
|
||||
IOMODE_READ, GFP_KERNEL);
|
||||
|
||||
ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
|
||||
0, lseg);
|
||||
@ -660,7 +664,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
|
||||
if (ret == 0)
|
||||
goto read_complete; /* all pages were read */
|
||||
|
||||
pnfs_pageio_init_read(&pgio, inode);
|
||||
if (rsize < PAGE_CACHE_SIZE)
|
||||
nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
|
||||
else
|
||||
|
@ -63,6 +63,7 @@
|
||||
#include "iostat.h"
|
||||
#include "internal.h"
|
||||
#include "fscache.h"
|
||||
#include "pnfs.h"
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_VFS
|
||||
|
||||
@ -732,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
|
||||
|
||||
return 0;
|
||||
}
|
||||
#ifdef CONFIG_NFS_V4_1
|
||||
void show_sessions(struct seq_file *m, struct nfs_server *server)
|
||||
{
|
||||
if (nfs4_has_session(server->nfs_client))
|
||||
seq_printf(m, ",sessions");
|
||||
}
|
||||
#else
|
||||
void show_sessions(struct seq_file *m, struct nfs_server *server) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NFS_V4_1
|
||||
void show_pnfs(struct seq_file *m, struct nfs_server *server)
|
||||
{
|
||||
seq_printf(m, ",pnfs=");
|
||||
if (server->pnfs_curr_ld)
|
||||
seq_printf(m, "%s", server->pnfs_curr_ld->name);
|
||||
else
|
||||
seq_printf(m, "not configured");
|
||||
}
|
||||
#else /* CONFIG_NFS_V4_1 */
|
||||
void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
|
||||
#endif /* CONFIG_NFS_V4_1 */
|
||||
|
||||
static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
|
||||
{
|
||||
@ -792,6 +815,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
|
||||
seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
|
||||
seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
|
||||
seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
|
||||
show_sessions(m, nfss);
|
||||
show_pnfs(m, nfss);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -939,7 +939,9 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
|
||||
atomic_set(&req->wb_complete, requests);
|
||||
|
||||
BUG_ON(desc->pg_lseg);
|
||||
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
|
||||
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
|
||||
req_offset(req), desc->pg_count,
|
||||
IOMODE_RW, GFP_NOFS);
|
||||
ClearPageError(page);
|
||||
offset = 0;
|
||||
nbytes = desc->pg_count;
|
||||
@ -1013,7 +1015,9 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
|
||||
}
|
||||
req = nfs_list_entry(data->pages.next);
|
||||
if ((!lseg) && list_is_singular(&data->pages))
|
||||
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
|
||||
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
|
||||
req_offset(req), desc->pg_count,
|
||||
IOMODE_RW, GFP_NOFS);
|
||||
|
||||
if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
|
||||
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
|
||||
@ -1032,8 +1036,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
|
||||
{
|
||||
size_t wsize = NFS_SERVER(inode)->wsize;
|
||||
|
||||
pnfs_pageio_init_write(pgio, inode);
|
||||
|
||||
if (wsize < PAGE_CACHE_SIZE)
|
||||
nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
|
||||
else
|
||||
|
@ -562,6 +562,7 @@ enum {
|
||||
NFSPROC4_CLNT_LAYOUTGET,
|
||||
NFSPROC4_CLNT_GETDEVICEINFO,
|
||||
NFSPROC4_CLNT_LAYOUTCOMMIT,
|
||||
NFSPROC4_CLNT_LAYOUTRETURN,
|
||||
};
|
||||
|
||||
/* nfs41 types */
|
||||
|
@ -68,7 +68,7 @@ struct nfs_pageio_descriptor {
|
||||
int pg_ioflags;
|
||||
int pg_error;
|
||||
struct pnfs_layout_segment *pg_lseg;
|
||||
int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
|
||||
bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
|
||||
};
|
||||
|
||||
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
|
||||
|
@ -269,6 +269,27 @@ struct nfs4_layoutcommit_data {
|
||||
struct nfs4_layoutcommit_res res;
|
||||
};
|
||||
|
||||
struct nfs4_layoutreturn_args {
|
||||
__u32 layout_type;
|
||||
struct inode *inode;
|
||||
nfs4_stateid stateid;
|
||||
struct nfs4_sequence_args seq_args;
|
||||
};
|
||||
|
||||
struct nfs4_layoutreturn_res {
|
||||
struct nfs4_sequence_res seq_res;
|
||||
u32 lrs_present;
|
||||
nfs4_stateid stateid;
|
||||
};
|
||||
|
||||
struct nfs4_layoutreturn {
|
||||
struct nfs4_layoutreturn_args args;
|
||||
struct nfs4_layoutreturn_res res;
|
||||
struct rpc_cred *cred;
|
||||
struct nfs_client *clp;
|
||||
int rpc_status;
|
||||
};
|
||||
|
||||
/*
|
||||
* Arguments to the open call.
|
||||
*/
|
||||
@ -1087,6 +1108,7 @@ struct nfs_read_data {
|
||||
const struct rpc_call_ops *mds_ops;
|
||||
int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
|
||||
__u64 mds_offset;
|
||||
int pnfs_error;
|
||||
struct page *page_array[NFS_PAGEVEC_SIZE];
|
||||
};
|
||||
|
||||
@ -1112,6 +1134,7 @@ struct nfs_write_data {
|
||||
unsigned long timestamp; /* For lease renewal */
|
||||
#endif
|
||||
__u64 mds_offset; /* Filelayout dense stripe */
|
||||
int pnfs_error;
|
||||
struct page *page_array[NFS_PAGEVEC_SIZE];
|
||||
};
|
||||
|
||||
|
345
include/linux/pnfs_osd_xdr.h
Normal file
345
include/linux/pnfs_osd_xdr.h
Normal file
@ -0,0 +1,345 @@
|
||||
/*
|
||||
* pNFS-osd on-the-wire data structures
|
||||
*
|
||||
* Copyright (C) 2007 Panasas Inc. [year of first publication]
|
||||
* All rights reserved.
|
||||
*
|
||||
* Benny Halevy <bhalevy@panasas.com>
|
||||
* Boaz Harrosh <bharrosh@panasas.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2
|
||||
* See the file COPYING included with this distribution for more details.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Panasas company nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __PNFS_OSD_XDR_H__
|
||||
#define __PNFS_OSD_XDR_H__
|
||||
|
||||
#include <linux/nfs_fs.h>
|
||||
#include <linux/nfs_page.h>
|
||||
#include <scsi/osd_protocol.h>
|
||||
|
||||
#define PNFS_OSD_OSDNAME_MAXSIZE 256
|
||||
|
||||
/*
|
||||
* draft-ietf-nfsv4-minorversion-22
|
||||
* draft-ietf-nfsv4-pnfs-obj-12
|
||||
*/
|
||||
|
||||
/* Layout Structure */
|
||||
|
||||
enum pnfs_osd_raid_algorithm4 {
|
||||
PNFS_OSD_RAID_0 = 1,
|
||||
PNFS_OSD_RAID_4 = 2,
|
||||
PNFS_OSD_RAID_5 = 3,
|
||||
PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
|
||||
};
|
||||
|
||||
/* struct pnfs_osd_data_map4 {
|
||||
* uint32_t odm_num_comps;
|
||||
* length4 odm_stripe_unit;
|
||||
* uint32_t odm_group_width;
|
||||
* uint32_t odm_group_depth;
|
||||
* uint32_t odm_mirror_cnt;
|
||||
* pnfs_osd_raid_algorithm4 odm_raid_algorithm;
|
||||
* };
|
||||
*/
|
||||
struct pnfs_osd_data_map {
|
||||
u32 odm_num_comps;
|
||||
u64 odm_stripe_unit;
|
||||
u32 odm_group_width;
|
||||
u32 odm_group_depth;
|
||||
u32 odm_mirror_cnt;
|
||||
u32 odm_raid_algorithm;
|
||||
};
|
||||
|
||||
/* struct pnfs_osd_objid4 {
|
||||
* deviceid4 oid_device_id;
|
||||
* uint64_t oid_partition_id;
|
||||
* uint64_t oid_object_id;
|
||||
* };
|
||||
*/
|
||||
struct pnfs_osd_objid {
|
||||
struct nfs4_deviceid oid_device_id;
|
||||
u64 oid_partition_id;
|
||||
u64 oid_object_id;
|
||||
};
|
||||
|
||||
/* For printout. I use:
|
||||
* kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer));
|
||||
* BE style
|
||||
*/
|
||||
#define _DEVID_LO(oid_device_id) \
|
||||
(unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data)
|
||||
|
||||
#define _DEVID_HI(oid_device_id) \
|
||||
(unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
|
||||
|
||||
static inline int
|
||||
pnfs_osd_objid_xdr_sz(void)
|
||||
{
|
||||
return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
|
||||
}
|
||||
|
||||
enum pnfs_osd_version {
|
||||
PNFS_OSD_MISSING = 0,
|
||||
PNFS_OSD_VERSION_1 = 1,
|
||||
PNFS_OSD_VERSION_2 = 2
|
||||
};
|
||||
|
||||
struct pnfs_osd_opaque_cred {
|
||||
u32 cred_len;
|
||||
void *cred;
|
||||
};
|
||||
|
||||
enum pnfs_osd_cap_key_sec {
|
||||
PNFS_OSD_CAP_KEY_SEC_NONE = 0,
|
||||
PNFS_OSD_CAP_KEY_SEC_SSV = 1,
|
||||
};
|
||||
|
||||
/* struct pnfs_osd_object_cred4 {
|
||||
* pnfs_osd_objid4 oc_object_id;
|
||||
* pnfs_osd_version4 oc_osd_version;
|
||||
* pnfs_osd_cap_key_sec4 oc_cap_key_sec;
|
||||
* opaque oc_capability_key<>;
|
||||
* opaque oc_capability<>;
|
||||
* };
|
||||
*/
|
||||
struct pnfs_osd_object_cred {
|
||||
struct pnfs_osd_objid oc_object_id;
|
||||
u32 oc_osd_version;
|
||||
u32 oc_cap_key_sec;
|
||||
struct pnfs_osd_opaque_cred oc_cap_key;
|
||||
struct pnfs_osd_opaque_cred oc_cap;
|
||||
};
|
||||
|
||||
/* struct pnfs_osd_layout4 {
|
||||
* pnfs_osd_data_map4 olo_map;
|
||||
* uint32_t olo_comps_index;
|
||||
* pnfs_osd_object_cred4 olo_components<>;
|
||||
* };
|
||||
*/
|
||||
struct pnfs_osd_layout {
|
||||
struct pnfs_osd_data_map olo_map;
|
||||
u32 olo_comps_index;
|
||||
u32 olo_num_comps;
|
||||
struct pnfs_osd_object_cred *olo_comps;
|
||||
};
|
||||
|
||||
/* Device Address */
|
||||
enum pnfs_osd_targetid_type {
|
||||
OBJ_TARGET_ANON = 1,
|
||||
OBJ_TARGET_SCSI_NAME = 2,
|
||||
OBJ_TARGET_SCSI_DEVICE_ID = 3,
|
||||
};
|
||||
|
||||
/* union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
|
||||
* case OBJ_TARGET_SCSI_NAME:
|
||||
* string oti_scsi_name<>;
|
||||
*
|
||||
* case OBJ_TARGET_SCSI_DEVICE_ID:
|
||||
* opaque oti_scsi_device_id<>;
|
||||
*
|
||||
* default:
|
||||
* void;
|
||||
* };
|
||||
*
|
||||
* union pnfs_osd_targetaddr4 switch (bool ota_available) {
|
||||
* case TRUE:
|
||||
* netaddr4 ota_netaddr;
|
||||
* case FALSE:
|
||||
* void;
|
||||
* };
|
||||
*
|
||||
* struct pnfs_osd_deviceaddr4 {
|
||||
* pnfs_osd_targetid4 oda_targetid;
|
||||
* pnfs_osd_targetaddr4 oda_targetaddr;
|
||||
* uint64_t oda_lun;
|
||||
* opaque oda_systemid<>;
|
||||
* pnfs_osd_object_cred4 oda_root_obj_cred;
|
||||
* opaque oda_osdname<>;
|
||||
* };
|
||||
*/
|
||||
struct pnfs_osd_targetid {
|
||||
u32 oti_type;
|
||||
struct nfs4_string oti_scsi_device_id;
|
||||
};
|
||||
|
||||
enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
|
||||
|
||||
/* struct netaddr4 {
|
||||
* // see struct rpcb in RFC1833
|
||||
* string r_netid<>; // network id
|
||||
* string r_addr<>; // universal address
|
||||
* };
|
||||
*/
|
||||
struct pnfs_osd_net_addr {
|
||||
struct nfs4_string r_netid;
|
||||
struct nfs4_string r_addr;
|
||||
};
|
||||
|
||||
struct pnfs_osd_targetaddr {
|
||||
u32 ota_available;
|
||||
struct pnfs_osd_net_addr ota_netaddr;
|
||||
};
|
||||
|
||||
enum {
|
||||
NETWORK_ID_MAX = 16 / 4,
|
||||
UNIVERSAL_ADDRESS_MAX = 64 / 4,
|
||||
PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
|
||||
};
|
||||
|
||||
struct pnfs_osd_deviceaddr {
|
||||
struct pnfs_osd_targetid oda_targetid;
|
||||
struct pnfs_osd_targetaddr oda_targetaddr;
|
||||
u8 oda_lun[8];
|
||||
struct nfs4_string oda_systemid;
|
||||
struct pnfs_osd_object_cred oda_root_obj_cred;
|
||||
struct nfs4_string oda_osdname;
|
||||
};
|
||||
|
||||
enum {
|
||||
ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
|
||||
PNFS_OSD_DEVICEADDR_MAX =
|
||||
PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
|
||||
2 /*oda_lun*/ +
|
||||
1 + OSD_SYSTEMID_LEN +
|
||||
1 + ODA_OSDNAME_MAX,
|
||||
};
|
||||
|
||||
/* LAYOUTCOMMIT: layoutupdate */
|
||||
|
||||
/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
|
||||
* case TRUE:
|
||||
* int64_t dsu_delta;
|
||||
* case FALSE:
|
||||
* void;
|
||||
* };
|
||||
*
|
||||
* struct pnfs_osd_layoutupdate4 {
|
||||
* pnfs_osd_deltaspaceused4 olu_delta_space_used;
|
||||
* bool olu_ioerr_flag;
|
||||
* };
|
||||
*/
|
||||
struct pnfs_osd_layoutupdate {
|
||||
u32 dsu_valid;
|
||||
s64 dsu_delta;
|
||||
u32 olu_ioerr_flag;
|
||||
};
|
||||
|
||||
/* LAYOUTRETURN: I/O Rrror Report */
|
||||
|
||||
enum pnfs_osd_errno {
|
||||
PNFS_OSD_ERR_EIO = 1,
|
||||
PNFS_OSD_ERR_NOT_FOUND = 2,
|
||||
PNFS_OSD_ERR_NO_SPACE = 3,
|
||||
PNFS_OSD_ERR_BAD_CRED = 4,
|
||||
PNFS_OSD_ERR_NO_ACCESS = 5,
|
||||
PNFS_OSD_ERR_UNREACHABLE = 6,
|
||||
PNFS_OSD_ERR_RESOURCE = 7
|
||||
};
|
||||
|
||||
/* struct pnfs_osd_ioerr4 {
|
||||
* pnfs_osd_objid4 oer_component;
|
||||
* length4 oer_comp_offset;
|
||||
* length4 oer_comp_length;
|
||||
* bool oer_iswrite;
|
||||
* pnfs_osd_errno4 oer_errno;
|
||||
* };
|
||||
*/
|
||||
struct pnfs_osd_ioerr {
|
||||
struct pnfs_osd_objid oer_component;
|
||||
u64 oer_comp_offset;
|
||||
u64 oer_comp_length;
|
||||
u32 oer_iswrite;
|
||||
u32 oer_errno;
|
||||
};
|
||||
|
||||
/* OSD XDR API */
|
||||
/* Layout helpers */
|
||||
/* Layout decoding is done in two parts:
|
||||
* 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
|
||||
* of the layout. @iter members need not be initialized.
|
||||
* Returned:
|
||||
* @layout members are set. (@layout->olo_comps set to NULL).
|
||||
*
|
||||
* Zero on success, or negative error if passed xdr is broken.
|
||||
*
|
||||
* 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns
|
||||
* false, to decode the next component.
|
||||
* Returned:
|
||||
* true if there is more to decode or false if we are done or error.
|
||||
*
|
||||
* Example:
|
||||
* struct pnfs_osd_xdr_decode_layout_iter iter;
|
||||
* struct pnfs_osd_layout layout;
|
||||
* struct pnfs_osd_object_cred comp;
|
||||
* int status;
|
||||
*
|
||||
* status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
|
||||
* if (unlikely(status))
|
||||
* goto err;
|
||||
* while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) {
|
||||
* // All of @comp strings point to inside the xdr_buffer
|
||||
* // or scrach buffer. Copy them out to user memory eg.
|
||||
* copy_single_comp(dest_comp++, &comp);
|
||||
* }
|
||||
* if (unlikely(status))
|
||||
* goto err;
|
||||
*/
|
||||
|
||||
struct pnfs_osd_xdr_decode_layout_iter {
|
||||
unsigned total_comps;
|
||||
unsigned decoded_comps;
|
||||
};
|
||||
|
||||
extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
|
||||
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr);
|
||||
|
||||
extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
|
||||
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
|
||||
int *err);
|
||||
|
||||
/* Device Info helpers */
|
||||
|
||||
/* Note: All strings inside @deviceaddr point to space inside @p.
|
||||
* @p should stay valid while @deviceaddr is in use.
|
||||
*/
|
||||
extern void pnfs_osd_xdr_decode_deviceaddr(
|
||||
struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p);
|
||||
|
||||
/* layoutupdate (layout_commit) xdr helpers */
|
||||
extern int
|
||||
pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
|
||||
struct pnfs_osd_layoutupdate *lou);
|
||||
|
||||
/* osd_ioerror encoding/decoding (layout_return) */
|
||||
/* Client */
|
||||
extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
|
||||
extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
|
||||
|
||||
#endif /* __PNFS_OSD_XDR_H__ */
|
@ -216,6 +216,8 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
|
||||
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
|
||||
unsigned int base, unsigned int len);
|
||||
extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
|
||||
extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
|
||||
struct page **pages, unsigned int len);
|
||||
extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
|
||||
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
|
||||
extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
|
||||
|
@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_init_decode);
|
||||
|
||||
/**
|
||||
* xdr_init_decode - Initialize an xdr_stream for decoding data.
|
||||
* @xdr: pointer to xdr_stream struct
|
||||
* @buf: pointer to XDR buffer from which to decode data
|
||||
* @pages: list of pages to decode into
|
||||
* @len: length in bytes of buffer in pages
|
||||
*/
|
||||
void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
|
||||
struct page **pages, unsigned int len)
|
||||
{
|
||||
memset(buf, 0, sizeof(*buf));
|
||||
buf->pages = pages;
|
||||
buf->page_len = len;
|
||||
buf->buflen = len;
|
||||
buf->len = len;
|
||||
xdr_init_decode(xdr, buf, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
|
||||
|
||||
static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
|
||||
{
|
||||
__be32 *p = xdr->p;
|
||||
|
Loading…
Reference in New Issue
Block a user