nfs: remove the objlayout driver
The objlayout code has been in the tree, but it's been unmaintained and no server product for it actually ever shipped. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
This commit is contained in:
parent
260f32adb8
commit
6d22323b2e
@ -2419,12 +2419,6 @@
|
||||
and gids from such clients. This is intended to ease
|
||||
migration from NFSv2/v3.
|
||||
|
||||
objlayoutdriver.osd_login_prog=
|
||||
[NFS] [OBJLAYOUT] sets the pathname to the program which
|
||||
is used to automatically discover and login into new
|
||||
osd-targets. Please see:
|
||||
Documentation/filesystems/pnfs.txt for more explanations
|
||||
|
||||
nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take
|
||||
when a NMI is triggered.
|
||||
Format: [state][,regs][,debounce][,die]
|
||||
|
@ -64,46 +64,9 @@ table which are called by the nfs-client pnfs-core to implement the
|
||||
different layout types.
|
||||
|
||||
Files-layout-driver code is in: fs/nfs/filelayout/.. directory
|
||||
Objects-layout-driver code is in: fs/nfs/objlayout/.. directory
|
||||
Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory
|
||||
Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory
|
||||
|
||||
objects-layout setup
|
||||
--------------------
|
||||
|
||||
As part of the full STD implementation the objlayoutdriver.ko needs, at times,
|
||||
to automatically login to yet undiscovered iscsi/osd devices. For this the
|
||||
driver makes up-calles to a user-mode script called *osd_login*
|
||||
|
||||
The path_name of the script to use is by default:
|
||||
/sbin/osd_login.
|
||||
This name can be overridden by the Kernel module parameter:
|
||||
objlayoutdriver.osd_login_prog
|
||||
|
||||
If Kernel does not find the osd_login_prog path it will zero it out
|
||||
and will not attempt farther logins. An admin can then write new value
|
||||
to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it.
|
||||
|
||||
The /sbin/osd_login is part of the nfs-utils package, and should usually
|
||||
be installed on distributions that support this Kernel version.
|
||||
|
||||
The API to the login script is as follows:
|
||||
Usage: $0 -u <URI> -o <OSDNAME> -s <SYSTEMID>
|
||||
Options:
|
||||
-u target uri e.g. iscsi://<ip>:<port>
|
||||
(always exists)
|
||||
(More protocols can be defined in the future.
|
||||
The client does not interpret this string it is
|
||||
passed unchanged as received from the Server)
|
||||
-o osdname of the requested target OSD
|
||||
(Might be empty)
|
||||
(A string which denotes the OSD name, there is a
|
||||
limit of 64 chars on this string)
|
||||
-s systemid of the requested target OSD
|
||||
(Might be empty)
|
||||
(This string, if not empty is always an hex
|
||||
representation of the 20 bytes osd_system_id)
|
||||
|
||||
blocks-layout setup
|
||||
-------------------
|
||||
|
||||
|
@ -123,11 +123,6 @@ config PNFS_BLOCK
|
||||
depends on NFS_V4_1 && BLK_DEV_DM
|
||||
default NFS_V4
|
||||
|
||||
config PNFS_OBJLAYOUT
|
||||
tristate
|
||||
depends on NFS_V4_1 && SCSI_OSD_ULD
|
||||
default NFS_V4
|
||||
|
||||
config PNFS_FLEXFILE_LAYOUT
|
||||
tristate
|
||||
depends on NFS_V4_1 && NFS_V3
|
||||
|
@ -31,6 +31,5 @@ nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o
|
||||
nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o
|
||||
|
||||
obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
|
||||
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
|
||||
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
|
||||
obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/
|
||||
|
@ -1,5 +0,0 @@
|
||||
#
|
||||
# Makefile for the pNFS Objects Layout Driver kernel module
|
||||
#
|
||||
objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
|
||||
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
|
@ -1,675 +0,0 @@
|
||||
/*
|
||||
* pNFS Objects layout implementation over open-osd initiator library
|
||||
*
|
||||
* Copyright (C) 2009 Panasas Inc. [year of first publication]
|
||||
* All rights reserved.
|
||||
*
|
||||
* Benny Halevy <bhalevy@panasas.com>
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2
|
||||
* See the file COPYING included with this distribution for more details.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Panasas company nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <scsi/osd_ore.h>
|
||||
|
||||
#include "objlayout.h"
|
||||
#include "../internal.h"
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
|
||||
|
||||
struct objio_dev_ent {
|
||||
struct nfs4_deviceid_node id_node;
|
||||
struct ore_dev od;
|
||||
};
|
||||
|
||||
static void
|
||||
objio_free_deviceid_node(struct nfs4_deviceid_node *d)
|
||||
{
|
||||
struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
|
||||
|
||||
dprintk("%s: free od=%p\n", __func__, de->od.od);
|
||||
osduld_put_device(de->od.od);
|
||||
kfree_rcu(d, rcu);
|
||||
}
|
||||
|
||||
struct objio_segment {
|
||||
struct pnfs_layout_segment lseg;
|
||||
|
||||
struct ore_layout layout;
|
||||
struct ore_components oc;
|
||||
};
|
||||
|
||||
static inline struct objio_segment *
|
||||
OBJIO_LSEG(struct pnfs_layout_segment *lseg)
|
||||
{
|
||||
return container_of(lseg, struct objio_segment, lseg);
|
||||
}
|
||||
|
||||
struct objio_state {
|
||||
/* Generic layer */
|
||||
struct objlayout_io_res oir;
|
||||
|
||||
bool sync;
|
||||
/*FIXME: Support for extra_bytes at ore_get_rw_state() */
|
||||
struct ore_io_state *ios;
|
||||
};
|
||||
|
||||
/* Send and wait for a get_device_info of devices in the layout,
|
||||
then look them up with the osd_initiator library */
|
||||
struct nfs4_deviceid_node *
|
||||
objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct pnfs_osd_deviceaddr *deviceaddr;
|
||||
struct objio_dev_ent *ode = NULL;
|
||||
struct osd_dev *od;
|
||||
struct osd_dev_info odi;
|
||||
bool retry_flag = true;
|
||||
__be32 *p;
|
||||
int err;
|
||||
|
||||
deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
|
||||
if (!deviceaddr)
|
||||
return NULL;
|
||||
|
||||
p = page_address(pdev->pages[0]);
|
||||
pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);
|
||||
|
||||
odi.systemid_len = deviceaddr->oda_systemid.len;
|
||||
if (odi.systemid_len > sizeof(odi.systemid)) {
|
||||
dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
|
||||
__func__, sizeof(odi.systemid));
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
} else if (odi.systemid_len)
|
||||
memcpy(odi.systemid, deviceaddr->oda_systemid.data,
|
||||
odi.systemid_len);
|
||||
odi.osdname_len = deviceaddr->oda_osdname.len;
|
||||
odi.osdname = (u8 *)deviceaddr->oda_osdname.data;
|
||||
|
||||
if (!odi.osdname_len && !odi.systemid_len) {
|
||||
dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
|
||||
__func__);
|
||||
err = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
|
||||
retry_lookup:
|
||||
od = osduld_info_lookup(&odi);
|
||||
if (IS_ERR(od)) {
|
||||
err = PTR_ERR(od);
|
||||
dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
|
||||
if (err == -ENODEV && retry_flag) {
|
||||
err = objlayout_autologin(deviceaddr);
|
||||
if (likely(!err)) {
|
||||
retry_flag = false;
|
||||
goto retry_lookup;
|
||||
}
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
dprintk("Adding new dev_id(%llx:%llx)\n",
|
||||
_DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
|
||||
|
||||
ode = kzalloc(sizeof(*ode), gfp_flags);
|
||||
if (!ode) {
|
||||
dprintk("%s: -ENOMEM od=%p\n", __func__, od);
|
||||
goto out;
|
||||
}
|
||||
|
||||
nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
|
||||
kfree(deviceaddr);
|
||||
|
||||
ode->od.od = od;
|
||||
return &ode->id_node;
|
||||
|
||||
out:
|
||||
kfree(deviceaddr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void copy_single_comp(struct ore_components *oc, unsigned c,
|
||||
struct pnfs_osd_object_cred *src_comp)
|
||||
{
|
||||
struct ore_comp *ocomp = &oc->comps[c];
|
||||
|
||||
WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
|
||||
WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
|
||||
|
||||
ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
|
||||
ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
|
||||
|
||||
memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
|
||||
}
|
||||
|
||||
static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
|
||||
struct objio_segment **pseg)
|
||||
{
|
||||
/* This is the in memory structure of the objio_segment
|
||||
*
|
||||
* struct __alloc_objio_segment {
|
||||
* struct objio_segment olseg;
|
||||
* struct ore_dev *ods[numdevs];
|
||||
* struct ore_comp comps[numdevs];
|
||||
* } *aolseg;
|
||||
* NOTE: The code as above compiles and runs perfectly. It is elegant,
|
||||
* type safe and compact. At some Past time Linus has decided he does not
|
||||
* like variable length arrays, For the sake of this principal we uglify
|
||||
* the code as below.
|
||||
*/
|
||||
struct objio_segment *lseg;
|
||||
size_t lseg_size = sizeof(*lseg) +
|
||||
numdevs * sizeof(lseg->oc.ods[0]) +
|
||||
numdevs * sizeof(*lseg->oc.comps);
|
||||
|
||||
lseg = kzalloc(lseg_size, gfp_flags);
|
||||
if (unlikely(!lseg)) {
|
||||
dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
|
||||
numdevs, lseg_size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
lseg->oc.numdevs = numdevs;
|
||||
lseg->oc.single_comp = EC_MULTPLE_COMPS;
|
||||
lseg->oc.ods = (void *)(lseg + 1);
|
||||
lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
|
||||
|
||||
*pseg = lseg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int objio_alloc_lseg(struct pnfs_layout_segment **outp,
|
||||
struct pnfs_layout_hdr *pnfslay,
|
||||
struct pnfs_layout_range *range,
|
||||
struct xdr_stream *xdr,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
|
||||
struct objio_segment *objio_seg;
|
||||
struct pnfs_osd_xdr_decode_layout_iter iter;
|
||||
struct pnfs_osd_layout layout;
|
||||
struct pnfs_osd_object_cred src_comp;
|
||||
unsigned cur_comp;
|
||||
int err;
|
||||
|
||||
err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
|
||||
objio_seg->layout.group_width = layout.olo_map.odm_group_width;
|
||||
objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
|
||||
objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
|
||||
objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
|
||||
|
||||
err = ore_verify_layout(layout.olo_map.odm_num_comps,
|
||||
&objio_seg->layout);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
objio_seg->oc.first_dev = layout.olo_comps_index;
|
||||
cur_comp = 0;
|
||||
while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
|
||||
struct nfs4_deviceid_node *d;
|
||||
struct objio_dev_ent *ode;
|
||||
|
||||
copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
|
||||
|
||||
d = nfs4_find_get_deviceid(server,
|
||||
&src_comp.oc_object_id.oid_device_id,
|
||||
pnfslay->plh_lc_cred, gfp_flags);
|
||||
if (!d) {
|
||||
err = -ENXIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ode = container_of(d, struct objio_dev_ent, id_node);
|
||||
objio_seg->oc.ods[cur_comp++] = &ode->od;
|
||||
}
|
||||
/* pnfs_osd_xdr_decode_layout_comp returns false on error */
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
*outp = &objio_seg->lseg;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
kfree(objio_seg);
|
||||
dprintk("%s: Error: return %d\n", __func__, err);
|
||||
*outp = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
void objio_free_lseg(struct pnfs_layout_segment *lseg)
|
||||
{
|
||||
int i;
|
||||
struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
|
||||
|
||||
for (i = 0; i < objio_seg->oc.numdevs; i++) {
|
||||
struct ore_dev *od = objio_seg->oc.ods[i];
|
||||
struct objio_dev_ent *ode;
|
||||
|
||||
if (!od)
|
||||
break;
|
||||
ode = container_of(od, typeof(*ode), od);
|
||||
nfs4_put_deviceid_node(&ode->id_node);
|
||||
}
|
||||
kfree(objio_seg);
|
||||
}
|
||||
|
||||
static int
|
||||
objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
|
||||
struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
|
||||
loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
|
||||
struct objio_state **outp)
|
||||
{
|
||||
struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
|
||||
struct ore_io_state *ios;
|
||||
int ret;
|
||||
struct __alloc_objio_state {
|
||||
struct objio_state objios;
|
||||
struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
|
||||
} *aos;
|
||||
|
||||
aos = kzalloc(sizeof(*aos), gfp_flags);
|
||||
if (unlikely(!aos))
|
||||
return -ENOMEM;
|
||||
|
||||
objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
|
||||
aos->ioerrs, rpcdata, pnfs_layout_type);
|
||||
|
||||
ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
|
||||
offset, count, &ios);
|
||||
if (unlikely(ret)) {
|
||||
kfree(aos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ios->pages = pages;
|
||||
ios->pgbase = pgbase;
|
||||
ios->private = aos;
|
||||
BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
|
||||
|
||||
aos->objios.sync = 0;
|
||||
aos->objios.ios = ios;
|
||||
*outp = &aos->objios;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void objio_free_result(struct objlayout_io_res *oir)
|
||||
{
|
||||
struct objio_state *objios = container_of(oir, struct objio_state, oir);
|
||||
|
||||
ore_put_io_state(objios->ios);
|
||||
kfree(objios);
|
||||
}
|
||||
|
||||
static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
|
||||
{
|
||||
switch (oep) {
|
||||
case OSD_ERR_PRI_NO_ERROR:
|
||||
return (enum pnfs_osd_errno)0;
|
||||
|
||||
case OSD_ERR_PRI_CLEAR_PAGES:
|
||||
BUG_ON(1);
|
||||
return 0;
|
||||
|
||||
case OSD_ERR_PRI_RESOURCE:
|
||||
return PNFS_OSD_ERR_RESOURCE;
|
||||
case OSD_ERR_PRI_BAD_CRED:
|
||||
return PNFS_OSD_ERR_BAD_CRED;
|
||||
case OSD_ERR_PRI_NO_ACCESS:
|
||||
return PNFS_OSD_ERR_NO_ACCESS;
|
||||
case OSD_ERR_PRI_UNREACHABLE:
|
||||
return PNFS_OSD_ERR_UNREACHABLE;
|
||||
case OSD_ERR_PRI_NOT_FOUND:
|
||||
return PNFS_OSD_ERR_NOT_FOUND;
|
||||
case OSD_ERR_PRI_NO_SPACE:
|
||||
return PNFS_OSD_ERR_NO_SPACE;
|
||||
default:
|
||||
WARN_ON(1);
|
||||
/* fallthrough */
|
||||
case OSD_ERR_PRI_EIO:
|
||||
return PNFS_OSD_ERR_EIO;
|
||||
}
|
||||
}
|
||||
|
||||
static void __on_dev_error(struct ore_io_state *ios,
|
||||
struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
|
||||
u64 dev_offset, u64 dev_len)
|
||||
{
|
||||
struct objio_state *objios = ios->private;
|
||||
struct pnfs_osd_objid pooid;
|
||||
struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
|
||||
/* FIXME: what to do with more-then-one-group layouts. We need to
|
||||
* translate from ore_io_state index to oc->comps index
|
||||
*/
|
||||
unsigned comp = dev_index;
|
||||
|
||||
pooid.oid_device_id = ode->id_node.deviceid;
|
||||
pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
|
||||
pooid.oid_object_id = ios->oc->comps[comp].obj.id;
|
||||
|
||||
objlayout_io_set_result(&objios->oir, comp,
|
||||
&pooid, osd_pri_2_pnfs_err(oep),
|
||||
dev_offset, dev_len, !ios->reading);
|
||||
}
|
||||
|
||||
/*
|
||||
* read
|
||||
*/
|
||||
static void _read_done(struct ore_io_state *ios, void *private)
|
||||
{
|
||||
struct objio_state *objios = private;
|
||||
ssize_t status;
|
||||
int ret = ore_check_io(ios, &__on_dev_error);
|
||||
|
||||
/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
|
||||
|
||||
if (likely(!ret))
|
||||
status = ios->length;
|
||||
else
|
||||
status = ret;
|
||||
|
||||
objlayout_read_done(&objios->oir, status, objios->sync);
|
||||
}
|
||||
|
||||
int objio_read_pagelist(struct nfs_pgio_header *hdr)
|
||||
{
|
||||
struct objio_state *objios;
|
||||
int ret;
|
||||
|
||||
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
|
||||
hdr->lseg, hdr->args.pages, hdr->args.pgbase,
|
||||
hdr->args.offset, hdr->args.count, hdr,
|
||||
GFP_KERNEL, &objios);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
objios->ios->done = _read_done;
|
||||
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
|
||||
hdr->args.offset, hdr->args.count);
|
||||
ret = ore_read(objios->ios);
|
||||
if (unlikely(ret))
|
||||
objio_free_result(&objios->oir);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* write
|
||||
*/
|
||||
static void _write_done(struct ore_io_state *ios, void *private)
|
||||
{
|
||||
struct objio_state *objios = private;
|
||||
ssize_t status;
|
||||
int ret = ore_check_io(ios, &__on_dev_error);
|
||||
|
||||
/* FIXME: _io_free(ios) can we dealocate the libosd resources; */
|
||||
|
||||
if (likely(!ret)) {
|
||||
/* FIXME: should be based on the OSD's persistence model
|
||||
* See OSD2r05 Section 4.13 Data persistence model */
|
||||
objios->oir.committed = NFS_FILE_SYNC;
|
||||
status = ios->length;
|
||||
} else {
|
||||
status = ret;
|
||||
}
|
||||
|
||||
objlayout_write_done(&objios->oir, status, objios->sync);
|
||||
}
|
||||
|
||||
static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
|
||||
{
|
||||
struct objio_state *objios = priv;
|
||||
struct nfs_pgio_header *hdr = objios->oir.rpcdata;
|
||||
struct address_space *mapping = hdr->inode->i_mapping;
|
||||
pgoff_t index = offset / PAGE_SIZE;
|
||||
struct page *page;
|
||||
loff_t i_size = i_size_read(hdr->inode);
|
||||
|
||||
if (offset >= i_size) {
|
||||
*uptodate = true;
|
||||
dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
|
||||
return ZERO_PAGE(0);
|
||||
}
|
||||
|
||||
page = find_get_page(mapping, index);
|
||||
if (!page) {
|
||||
page = find_or_create_page(mapping, index, GFP_NOFS);
|
||||
if (unlikely(!page)) {
|
||||
dprintk("%s: grab_cache_page Failed index=0x%lx\n",
|
||||
__func__, index);
|
||||
return NULL;
|
||||
}
|
||||
unlock_page(page);
|
||||
}
|
||||
*uptodate = PageUptodate(page);
|
||||
dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
|
||||
return page;
|
||||
}
|
||||
|
||||
static void __r4w_put_page(void *priv, struct page *page)
|
||||
{
|
||||
dprintk("%s: index=0x%lx\n", __func__,
|
||||
(page == ZERO_PAGE(0)) ? -1UL : page->index);
|
||||
if (ZERO_PAGE(0) != page)
|
||||
put_page(page);
|
||||
return;
|
||||
}
|
||||
|
||||
static const struct _ore_r4w_op _r4w_op = {
|
||||
.get_page = &__r4w_get_page,
|
||||
.put_page = &__r4w_put_page,
|
||||
};
|
||||
|
||||
int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
|
||||
{
|
||||
struct objio_state *objios;
|
||||
int ret;
|
||||
|
||||
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
|
||||
hdr->lseg, hdr->args.pages, hdr->args.pgbase,
|
||||
hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
|
||||
&objios);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
objios->sync = 0 != (how & FLUSH_SYNC);
|
||||
objios->ios->r4w = &_r4w_op;
|
||||
|
||||
if (!objios->sync)
|
||||
objios->ios->done = _write_done;
|
||||
|
||||
dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
|
||||
hdr->args.offset, hdr->args.count);
|
||||
ret = ore_write(objios->ios);
|
||||
if (unlikely(ret)) {
|
||||
objio_free_result(&objios->oir);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (objios->sync)
|
||||
_write_done(objios->ios, objios);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
|
||||
* of bytes (maximum @req->wb_bytes) that can be coalesced.
|
||||
*/
|
||||
static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
|
||||
struct nfs_page *prev, struct nfs_page *req)
|
||||
{
|
||||
struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio);
|
||||
unsigned int size;
|
||||
|
||||
size = pnfs_generic_pg_test(pgio, prev, req);
|
||||
|
||||
if (!size || mirror->pg_count + req->wb_bytes >
|
||||
(unsigned long)pgio->pg_layout_private)
|
||||
return 0;
|
||||
|
||||
return min(size, req->wb_bytes);
|
||||
}
|
||||
|
||||
static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
|
||||
{
|
||||
pnfs_generic_pg_init_read(pgio, req);
|
||||
if (unlikely(pgio->pg_lseg == NULL))
|
||||
return; /* Not pNFS */
|
||||
|
||||
pgio->pg_layout_private = (void *)
|
||||
OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
|
||||
}
|
||||
|
||||
static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
|
||||
unsigned long *stripe_end)
|
||||
{
|
||||
u32 stripe_off;
|
||||
unsigned stripe_size;
|
||||
|
||||
if (layout->raid_algorithm == PNFS_OSD_RAID_0)
|
||||
return true;
|
||||
|
||||
stripe_size = layout->stripe_unit *
|
||||
(layout->group_width - layout->parity);
|
||||
|
||||
div_u64_rem(offset, stripe_size, &stripe_off);
|
||||
if (!stripe_off)
|
||||
return true;
|
||||
|
||||
*stripe_end = stripe_size - stripe_off;
|
||||
return false;
|
||||
}
|
||||
|
||||
static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
|
||||
{
|
||||
unsigned long stripe_end = 0;
|
||||
u64 wb_size;
|
||||
|
||||
if (pgio->pg_dreq == NULL)
|
||||
wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
|
||||
else
|
||||
wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
|
||||
|
||||
pnfs_generic_pg_init_write(pgio, req, wb_size);
|
||||
if (unlikely(pgio->pg_lseg == NULL))
|
||||
return; /* Not pNFS */
|
||||
|
||||
if (req->wb_offset ||
|
||||
!aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
|
||||
&OBJIO_LSEG(pgio->pg_lseg)->layout,
|
||||
&stripe_end)) {
|
||||
pgio->pg_layout_private = (void *)stripe_end;
|
||||
} else {
|
||||
pgio->pg_layout_private = (void *)
|
||||
OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct nfs_pageio_ops objio_pg_read_ops = {
|
||||
.pg_init = objio_init_read,
|
||||
.pg_test = objio_pg_test,
|
||||
.pg_doio = pnfs_generic_pg_readpages,
|
||||
.pg_cleanup = pnfs_generic_pg_cleanup,
|
||||
};
|
||||
|
||||
static const struct nfs_pageio_ops objio_pg_write_ops = {
|
||||
.pg_init = objio_init_write,
|
||||
.pg_test = objio_pg_test,
|
||||
.pg_doio = pnfs_generic_pg_writepages,
|
||||
.pg_cleanup = pnfs_generic_pg_cleanup,
|
||||
};
|
||||
|
||||
static struct pnfs_layoutdriver_type objlayout_type = {
|
||||
.id = LAYOUT_OSD2_OBJECTS,
|
||||
.name = "LAYOUT_OSD2_OBJECTS",
|
||||
.flags = PNFS_LAYOUTRET_ON_SETATTR |
|
||||
PNFS_LAYOUTRET_ON_ERROR,
|
||||
|
||||
.max_deviceinfo_size = PAGE_SIZE,
|
||||
.owner = THIS_MODULE,
|
||||
.alloc_layout_hdr = objlayout_alloc_layout_hdr,
|
||||
.free_layout_hdr = objlayout_free_layout_hdr,
|
||||
|
||||
.alloc_lseg = objlayout_alloc_lseg,
|
||||
.free_lseg = objlayout_free_lseg,
|
||||
|
||||
.read_pagelist = objlayout_read_pagelist,
|
||||
.write_pagelist = objlayout_write_pagelist,
|
||||
.pg_read_ops = &objio_pg_read_ops,
|
||||
.pg_write_ops = &objio_pg_write_ops,
|
||||
|
||||
.sync = pnfs_generic_sync,
|
||||
|
||||
.free_deviceid_node = objio_free_deviceid_node,
|
||||
|
||||
.encode_layoutcommit = objlayout_encode_layoutcommit,
|
||||
.encode_layoutreturn = objlayout_encode_layoutreturn,
|
||||
};
|
||||
|
||||
MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
|
||||
MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
static int __init
|
||||
objlayout_init(void)
|
||||
{
|
||||
int ret = pnfs_register_layoutdriver(&objlayout_type);
|
||||
|
||||
if (ret)
|
||||
printk(KERN_INFO
|
||||
"NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
|
||||
__func__, ret);
|
||||
else
|
||||
printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
|
||||
__func__);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit
|
||||
objlayout_exit(void)
|
||||
{
|
||||
pnfs_unregister_layoutdriver(&objlayout_type);
|
||||
printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
|
||||
__func__);
|
||||
}
|
||||
|
||||
MODULE_ALIAS("nfs-layouttype4-2");
|
||||
|
||||
module_init(objlayout_init);
|
||||
module_exit(objlayout_exit);
|
@ -1,706 +0,0 @@
|
||||
/*
|
||||
* pNFS Objects layout driver high level definitions
|
||||
*
|
||||
* Copyright (C) 2007 Panasas Inc. [year of first publication]
|
||||
* All rights reserved.
|
||||
*
|
||||
* Benny Halevy <bhalevy@panasas.com>
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2
|
||||
* See the file COPYING included with this distribution for more details.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Panasas company nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <scsi/osd_initiator.h>
|
||||
#include "objlayout.h"
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
|
||||
/*
|
||||
* Create a objlayout layout structure for the given inode and return it.
|
||||
*/
|
||||
struct pnfs_layout_hdr *
|
||||
objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
|
||||
{
|
||||
struct objlayout *objlay;
|
||||
|
||||
objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
|
||||
if (!objlay)
|
||||
return NULL;
|
||||
spin_lock_init(&objlay->lock);
|
||||
INIT_LIST_HEAD(&objlay->err_list);
|
||||
dprintk("%s: Return %p\n", __func__, objlay);
|
||||
return &objlay->pnfs_layout;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free an objlayout layout structure
|
||||
*/
|
||||
void
|
||||
objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
|
||||
{
|
||||
struct objlayout *objlay = OBJLAYOUT(lo);
|
||||
|
||||
dprintk("%s: objlay %p\n", __func__, objlay);
|
||||
|
||||
WARN_ON(!list_empty(&objlay->err_list));
|
||||
kfree(objlay);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmarshall layout and store it in pnfslay.
|
||||
*/
|
||||
struct pnfs_layout_segment *
|
||||
objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
|
||||
struct nfs4_layoutget_res *lgr,
|
||||
gfp_t gfp_flags)
|
||||
{
|
||||
int status = -ENOMEM;
|
||||
struct xdr_stream stream;
|
||||
struct xdr_buf buf = {
|
||||
.pages = lgr->layoutp->pages,
|
||||
.page_len = lgr->layoutp->len,
|
||||
.buflen = lgr->layoutp->len,
|
||||
.len = lgr->layoutp->len,
|
||||
};
|
||||
struct page *scratch;
|
||||
struct pnfs_layout_segment *lseg;
|
||||
|
||||
dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
|
||||
|
||||
scratch = alloc_page(gfp_flags);
|
||||
if (!scratch)
|
||||
goto err_nofree;
|
||||
|
||||
xdr_init_decode(&stream, &buf, NULL);
|
||||
xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
|
||||
|
||||
status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
|
||||
if (unlikely(status)) {
|
||||
dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
|
||||
status);
|
||||
goto err;
|
||||
}
|
||||
|
||||
__free_page(scratch);
|
||||
|
||||
dprintk("%s: Return %p\n", __func__, lseg);
|
||||
return lseg;
|
||||
|
||||
err:
|
||||
__free_page(scratch);
|
||||
err_nofree:
|
||||
dprintk("%s: Err Return=>%d\n", __func__, status);
|
||||
return ERR_PTR(status);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a layout segement
|
||||
*/
|
||||
void
|
||||
objlayout_free_lseg(struct pnfs_layout_segment *lseg)
|
||||
{
|
||||
dprintk("%s: freeing layout segment %p\n", __func__, lseg);
|
||||
|
||||
if (unlikely(!lseg))
|
||||
return;
|
||||
|
||||
objio_free_lseg(lseg);
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O Operations
|
||||
*/
|
||||
static inline u64
|
||||
end_offset(u64 start, u64 len)
|
||||
{
|
||||
u64 end;
|
||||
|
||||
end = start + len;
|
||||
return end >= start ? end : NFS4_MAX_UINT64;
|
||||
}
|
||||
|
||||
static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
|
||||
struct page ***p_pages, unsigned *p_pgbase,
|
||||
u64 offset, unsigned long count)
|
||||
{
|
||||
u64 lseg_end_offset;
|
||||
|
||||
BUG_ON(offset < lseg->pls_range.offset);
|
||||
lseg_end_offset = end_offset(lseg->pls_range.offset,
|
||||
lseg->pls_range.length);
|
||||
BUG_ON(offset >= lseg_end_offset);
|
||||
WARN_ON(offset + count > lseg_end_offset);
|
||||
|
||||
if (*p_pgbase > PAGE_SIZE) {
|
||||
dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
|
||||
*p_pages += *p_pgbase >> PAGE_SHIFT;
|
||||
*p_pgbase &= ~PAGE_MASK;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O done common code
|
||||
*/
|
||||
static void
|
||||
objlayout_iodone(struct objlayout_io_res *oir)
|
||||
{
|
||||
if (likely(oir->status >= 0)) {
|
||||
objio_free_result(oir);
|
||||
} else {
|
||||
struct objlayout *objlay = oir->objlay;
|
||||
|
||||
spin_lock(&objlay->lock);
|
||||
objlay->delta_space_valid = OBJ_DSU_INVALID;
|
||||
list_add(&objlay->err_list, &oir->err_list);
|
||||
spin_unlock(&objlay->lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* objlayout_io_set_result - Set an osd_error code on a specific osd comp.
|
||||
*
|
||||
* The @index component IO failed (error returned from target). Register
|
||||
* the error for later reporting at layout-return.
|
||||
*/
|
||||
void
|
||||
objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
|
||||
struct pnfs_osd_objid *pooid, int osd_error,
|
||||
u64 offset, u64 length, bool is_write)
|
||||
{
|
||||
struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
|
||||
|
||||
BUG_ON(index >= oir->num_comps);
|
||||
if (osd_error) {
|
||||
ioerr->oer_component = *pooid;
|
||||
ioerr->oer_comp_offset = offset;
|
||||
ioerr->oer_comp_length = length;
|
||||
ioerr->oer_iswrite = is_write;
|
||||
ioerr->oer_errno = osd_error;
|
||||
|
||||
dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
|
||||
"par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
|
||||
__func__, index, ioerr->oer_errno,
|
||||
ioerr->oer_iswrite,
|
||||
_DEVID_LO(&ioerr->oer_component.oid_device_id),
|
||||
_DEVID_HI(&ioerr->oer_component.oid_device_id),
|
||||
ioerr->oer_component.oid_partition_id,
|
||||
ioerr->oer_component.oid_object_id,
|
||||
ioerr->oer_comp_offset,
|
||||
ioerr->oer_comp_length);
|
||||
} else {
|
||||
/* User need not call if no error is reported */
|
||||
ioerr->oer_errno = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
|
||||
* This is because the osd completion is called with ints-off from
|
||||
* the block layer
|
||||
*/
|
||||
static void _rpc_read_complete(struct work_struct *work)
|
||||
{
|
||||
struct rpc_task *task;
|
||||
struct nfs_pgio_header *hdr;
|
||||
|
||||
dprintk("%s enter\n", __func__);
|
||||
task = container_of(work, struct rpc_task, u.tk_work);
|
||||
hdr = container_of(task, struct nfs_pgio_header, task);
|
||||
|
||||
pnfs_ld_read_done(hdr);
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
|
||||
{
|
||||
struct nfs_pgio_header *hdr = oir->rpcdata;
|
||||
|
||||
oir->status = hdr->task.tk_status = status;
|
||||
if (status >= 0)
|
||||
hdr->res.count = status;
|
||||
else
|
||||
hdr->pnfs_error = status;
|
||||
objlayout_iodone(oir);
|
||||
/* must not use oir after this point */
|
||||
|
||||
dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
|
||||
status, hdr->res.eof, sync);
|
||||
|
||||
if (sync)
|
||||
pnfs_ld_read_done(hdr);
|
||||
else {
|
||||
INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
|
||||
schedule_work(&hdr->task.u.tk_work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform sync or async reads.
|
||||
*/
|
||||
enum pnfs_try_status
|
||||
objlayout_read_pagelist(struct nfs_pgio_header *hdr)
|
||||
{
|
||||
struct inode *inode = hdr->inode;
|
||||
loff_t offset = hdr->args.offset;
|
||||
size_t count = hdr->args.count;
|
||||
int err;
|
||||
loff_t eof;
|
||||
|
||||
eof = i_size_read(inode);
|
||||
if (unlikely(offset + count > eof)) {
|
||||
if (offset >= eof) {
|
||||
err = 0;
|
||||
hdr->res.count = 0;
|
||||
hdr->res.eof = 1;
|
||||
/*FIXME: do we need to call pnfs_ld_read_done() */
|
||||
goto out;
|
||||
}
|
||||
count = eof - offset;
|
||||
}
|
||||
|
||||
hdr->res.eof = (offset + count) >= eof;
|
||||
_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
|
||||
&hdr->args.pgbase,
|
||||
hdr->args.offset, hdr->args.count);
|
||||
|
||||
dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n",
|
||||
__func__, inode->i_ino, offset, count, hdr->res.eof);
|
||||
|
||||
err = objio_read_pagelist(hdr);
|
||||
out:
|
||||
if (unlikely(err)) {
|
||||
hdr->pnfs_error = err;
|
||||
dprintk("%s: Returned Error %d\n", __func__, err);
|
||||
return PNFS_NOT_ATTEMPTED;
|
||||
}
|
||||
return PNFS_ATTEMPTED;
|
||||
}
|
||||
|
||||
/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
|
||||
* This is because the osd completion is called with ints-off from
|
||||
* the block layer
|
||||
*/
|
||||
static void _rpc_write_complete(struct work_struct *work)
|
||||
{
|
||||
struct rpc_task *task;
|
||||
struct nfs_pgio_header *hdr;
|
||||
|
||||
dprintk("%s enter\n", __func__);
|
||||
task = container_of(work, struct rpc_task, u.tk_work);
|
||||
hdr = container_of(task, struct nfs_pgio_header, task);
|
||||
|
||||
pnfs_ld_write_done(hdr);
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
|
||||
{
|
||||
struct nfs_pgio_header *hdr = oir->rpcdata;
|
||||
|
||||
oir->status = hdr->task.tk_status = status;
|
||||
if (status >= 0) {
|
||||
hdr->res.count = status;
|
||||
hdr->verf.committed = oir->committed;
|
||||
} else {
|
||||
hdr->pnfs_error = status;
|
||||
}
|
||||
objlayout_iodone(oir);
|
||||
/* must not use oir after this point */
|
||||
|
||||
dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
|
||||
status, hdr->verf.committed, sync);
|
||||
|
||||
if (sync)
|
||||
pnfs_ld_write_done(hdr);
|
||||
else {
|
||||
INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
|
||||
schedule_work(&hdr->task.u.tk_work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform sync or async writes.
|
||||
*/
|
||||
enum pnfs_try_status
|
||||
objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
|
||||
{
|
||||
int err;
|
||||
|
||||
_fix_verify_io_params(hdr->lseg, &hdr->args.pages,
|
||||
&hdr->args.pgbase,
|
||||
hdr->args.offset, hdr->args.count);
|
||||
|
||||
err = objio_write_pagelist(hdr, how);
|
||||
if (unlikely(err)) {
|
||||
hdr->pnfs_error = err;
|
||||
dprintk("%s: Returned Error %d\n", __func__, err);
|
||||
return PNFS_NOT_ATTEMPTED;
|
||||
}
|
||||
return PNFS_ATTEMPTED;
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
|
||||
struct xdr_stream *xdr,
|
||||
const struct nfs4_layoutcommit_args *args)
|
||||
{
|
||||
struct objlayout *objlay = OBJLAYOUT(pnfslay);
|
||||
struct pnfs_osd_layoutupdate lou;
|
||||
__be32 *start;
|
||||
|
||||
dprintk("%s: Begin\n", __func__);
|
||||
|
||||
spin_lock(&objlay->lock);
|
||||
lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
|
||||
lou.dsu_delta = objlay->delta_space_used;
|
||||
objlay->delta_space_used = 0;
|
||||
objlay->delta_space_valid = OBJ_DSU_INIT;
|
||||
lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
|
||||
spin_unlock(&objlay->lock);
|
||||
|
||||
start = xdr_reserve_space(xdr, 4);
|
||||
|
||||
BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
|
||||
|
||||
*start = cpu_to_be32((xdr->p - start - 1) * 4);
|
||||
|
||||
dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
|
||||
lou.dsu_delta, lou.olu_ioerr_flag);
|
||||
}
|
||||
|
||||
static int
|
||||
err_prio(u32 oer_errno)
|
||||
{
|
||||
switch (oer_errno) {
|
||||
case 0:
|
||||
return 0;
|
||||
|
||||
case PNFS_OSD_ERR_RESOURCE:
|
||||
return OSD_ERR_PRI_RESOURCE;
|
||||
case PNFS_OSD_ERR_BAD_CRED:
|
||||
return OSD_ERR_PRI_BAD_CRED;
|
||||
case PNFS_OSD_ERR_NO_ACCESS:
|
||||
return OSD_ERR_PRI_NO_ACCESS;
|
||||
case PNFS_OSD_ERR_UNREACHABLE:
|
||||
return OSD_ERR_PRI_UNREACHABLE;
|
||||
case PNFS_OSD_ERR_NOT_FOUND:
|
||||
return OSD_ERR_PRI_NOT_FOUND;
|
||||
case PNFS_OSD_ERR_NO_SPACE:
|
||||
return OSD_ERR_PRI_NO_SPACE;
|
||||
default:
|
||||
WARN_ON(1);
|
||||
/* fallthrough */
|
||||
case PNFS_OSD_ERR_EIO:
|
||||
return OSD_ERR_PRI_EIO;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_ioerr(struct pnfs_osd_ioerr *dest_err,
|
||||
const struct pnfs_osd_ioerr *src_err)
|
||||
{
|
||||
u64 dest_end, src_end;
|
||||
|
||||
if (!dest_err->oer_errno) {
|
||||
*dest_err = *src_err;
|
||||
/* accumulated device must be blank */
|
||||
memset(&dest_err->oer_component.oid_device_id, 0,
|
||||
sizeof(dest_err->oer_component.oid_device_id));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (dest_err->oer_component.oid_partition_id !=
|
||||
src_err->oer_component.oid_partition_id)
|
||||
dest_err->oer_component.oid_partition_id = 0;
|
||||
|
||||
if (dest_err->oer_component.oid_object_id !=
|
||||
src_err->oer_component.oid_object_id)
|
||||
dest_err->oer_component.oid_object_id = 0;
|
||||
|
||||
if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
|
||||
dest_err->oer_comp_offset = src_err->oer_comp_offset;
|
||||
|
||||
dest_end = end_offset(dest_err->oer_comp_offset,
|
||||
dest_err->oer_comp_length);
|
||||
src_end = end_offset(src_err->oer_comp_offset,
|
||||
src_err->oer_comp_length);
|
||||
if (dest_end < src_end)
|
||||
dest_end = src_end;
|
||||
|
||||
dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
|
||||
|
||||
if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
|
||||
(err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
|
||||
dest_err->oer_errno = src_err->oer_errno;
|
||||
} else if (src_err->oer_iswrite) {
|
||||
dest_err->oer_iswrite = true;
|
||||
dest_err->oer_errno = src_err->oer_errno;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
encode_accumulated_error(struct objlayout *objlay, __be32 *p)
|
||||
{
|
||||
struct objlayout_io_res *oir, *tmp;
|
||||
struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
|
||||
|
||||
list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < oir->num_comps; i++) {
|
||||
struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
|
||||
|
||||
if (!ioerr->oer_errno)
|
||||
continue;
|
||||
|
||||
printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
|
||||
"is_write=%d dev(%llx:%llx) par=0x%llx "
|
||||
"obj=0x%llx offset=0x%llx length=0x%llx\n",
|
||||
__func__, i, ioerr->oer_errno,
|
||||
ioerr->oer_iswrite,
|
||||
_DEVID_LO(&ioerr->oer_component.oid_device_id),
|
||||
_DEVID_HI(&ioerr->oer_component.oid_device_id),
|
||||
ioerr->oer_component.oid_partition_id,
|
||||
ioerr->oer_component.oid_object_id,
|
||||
ioerr->oer_comp_offset,
|
||||
ioerr->oer_comp_length);
|
||||
|
||||
merge_ioerr(&accumulated_err, ioerr);
|
||||
}
|
||||
list_del(&oir->err_list);
|
||||
objio_free_result(oir);
|
||||
}
|
||||
|
||||
pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
|
||||
}
|
||||
|
||||
void
|
||||
objlayout_encode_layoutreturn(struct xdr_stream *xdr,
|
||||
const struct nfs4_layoutreturn_args *args)
|
||||
{
|
||||
struct pnfs_layout_hdr *pnfslay = args->layout;
|
||||
struct objlayout *objlay = OBJLAYOUT(pnfslay);
|
||||
struct objlayout_io_res *oir, *tmp;
|
||||
__be32 *start;
|
||||
|
||||
dprintk("%s: Begin\n", __func__);
|
||||
start = xdr_reserve_space(xdr, 4);
|
||||
BUG_ON(!start);
|
||||
|
||||
spin_lock(&objlay->lock);
|
||||
|
||||
list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
|
||||
__be32 *last_xdr = NULL, *p;
|
||||
unsigned i;
|
||||
int res = 0;
|
||||
|
||||
for (i = 0; i < oir->num_comps; i++) {
|
||||
struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
|
||||
|
||||
if (!ioerr->oer_errno)
|
||||
continue;
|
||||
|
||||
dprintk("%s: err[%d]: errno=%d is_write=%d "
|
||||
"dev(%llx:%llx) par=0x%llx obj=0x%llx "
|
||||
"offset=0x%llx length=0x%llx\n",
|
||||
__func__, i, ioerr->oer_errno,
|
||||
ioerr->oer_iswrite,
|
||||
_DEVID_LO(&ioerr->oer_component.oid_device_id),
|
||||
_DEVID_HI(&ioerr->oer_component.oid_device_id),
|
||||
ioerr->oer_component.oid_partition_id,
|
||||
ioerr->oer_component.oid_object_id,
|
||||
ioerr->oer_comp_offset,
|
||||
ioerr->oer_comp_length);
|
||||
|
||||
p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
|
||||
if (unlikely(!p)) {
|
||||
res = -E2BIG;
|
||||
break; /* accumulated_error */
|
||||
}
|
||||
|
||||
last_xdr = p;
|
||||
pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
|
||||
}
|
||||
|
||||
/* TODO: use xdr_write_pages */
|
||||
if (unlikely(res)) {
|
||||
/* no space for even one error descriptor */
|
||||
BUG_ON(!last_xdr);
|
||||
|
||||
/* we've encountered a situation with lots and lots of
|
||||
* errors and no space to encode them all. Use the last
|
||||
* available slot to report the union of all the
|
||||
* remaining errors.
|
||||
*/
|
||||
encode_accumulated_error(objlay, last_xdr);
|
||||
goto loop_done;
|
||||
}
|
||||
list_del(&oir->err_list);
|
||||
objio_free_result(oir);
|
||||
}
|
||||
loop_done:
|
||||
spin_unlock(&objlay->lock);
|
||||
|
||||
*start = cpu_to_be32((xdr->p - start - 1) * 4);
|
||||
dprintk("%s: Return\n", __func__);
|
||||
}
|
||||
|
||||
enum {
|
||||
OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
|
||||
OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
|
||||
OSD_LOGIN_UPCALL_PATHLEN = 256
|
||||
};
|
||||
|
||||
static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
|
||||
|
||||
module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
|
||||
0600);
|
||||
MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
|
||||
|
||||
struct __auto_login {
|
||||
char uri[OBJLAYOUT_MAX_URI_LEN];
|
||||
char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
|
||||
char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
|
||||
};
|
||||
|
||||
static int __objlayout_upcall(struct __auto_login *login)
|
||||
{
|
||||
static char *envp[] = { "HOME=/",
|
||||
"TERM=linux",
|
||||
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
|
||||
NULL
|
||||
};
|
||||
char *argv[8];
|
||||
int ret;
|
||||
|
||||
if (unlikely(!osd_login_prog[0])) {
|
||||
dprintk("%s: osd_login_prog is disabled\n", __func__);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
dprintk("%s uri: %s\n", __func__, login->uri);
|
||||
dprintk("%s osdname %s\n", __func__, login->osdname);
|
||||
dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
|
||||
|
||||
argv[0] = (char *)osd_login_prog;
|
||||
argv[1] = "-u";
|
||||
argv[2] = login->uri;
|
||||
argv[3] = "-o";
|
||||
argv[4] = login->osdname;
|
||||
argv[5] = "-s";
|
||||
argv[6] = login->systemid_hex;
|
||||
argv[7] = NULL;
|
||||
|
||||
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
|
||||
/*
|
||||
* Disable the upcall mechanism if we're getting an ENOENT or
|
||||
* EACCES error. The admin can re-enable it on the fly by using
|
||||
* sysfs to set the objlayoutdriver.osd_login_prog module parameter once
|
||||
* the problem has been fixed.
|
||||
*/
|
||||
if (ret == -ENOENT || ret == -EACCES) {
|
||||
printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
|
||||
"objlayoutdriver.osd_login_prog kernel parameter!\n",
|
||||
osd_login_prog);
|
||||
osd_login_prog[0] = '\0';
|
||||
}
|
||||
dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Assume dest is all zeros */
|
||||
static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
|
||||
char *dest, int max_len,
|
||||
const char *var_name)
|
||||
{
|
||||
if (!s.len)
|
||||
return;
|
||||
|
||||
if (s.len >= max_len) {
|
||||
pr_warn_ratelimited(
|
||||
"objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
|
||||
var_name, s.len, max_len);
|
||||
s.len = max_len - 1; /* space for null terminator */
|
||||
}
|
||||
|
||||
memcpy(dest, s.data, s.len);
|
||||
}
|
||||
|
||||
/* Assume sysid is all zeros */
|
||||
static void _sysid_2_hex(struct nfs4_string s,
|
||||
char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
|
||||
{
|
||||
int i;
|
||||
char *cur;
|
||||
|
||||
if (!s.len)
|
||||
return;
|
||||
|
||||
if (s.len != OSD_SYSTEMID_LEN) {
|
||||
pr_warn_ratelimited(
|
||||
"objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
|
||||
s.len);
|
||||
if (s.len > OSD_SYSTEMID_LEN)
|
||||
s.len = OSD_SYSTEMID_LEN;
|
||||
}
|
||||
|
||||
cur = sysid;
|
||||
for (i = 0; i < s.len; i++)
|
||||
cur = hex_byte_pack(cur, s.data[i]);
|
||||
}
|
||||
|
||||
int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
|
||||
{
|
||||
int rc;
|
||||
struct __auto_login login;
|
||||
|
||||
if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
|
||||
return -ENODEV;
|
||||
|
||||
memset(&login, 0, sizeof(login));
|
||||
__copy_nfsS_and_zero_terminate(
|
||||
deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
|
||||
login.uri, sizeof(login.uri), "URI");
|
||||
|
||||
__copy_nfsS_and_zero_terminate(
|
||||
deviceaddr->oda_osdname,
|
||||
login.osdname, sizeof(login.osdname), "OSDNAME");
|
||||
|
||||
_sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
|
||||
|
||||
rc = __objlayout_upcall(&login);
|
||||
if (rc > 0) /* script returns positive values */
|
||||
rc = -ENODEV;
|
||||
|
||||
return rc;
|
||||
}
|
@ -1,183 +0,0 @@
|
||||
/*
|
||||
* Data types and function declerations for interfacing with the
|
||||
* pNFS standard object layout driver.
|
||||
*
|
||||
* Copyright (C) 2007 Panasas Inc. [year of first publication]
|
||||
* All rights reserved.
|
||||
*
|
||||
* Benny Halevy <bhalevy@panasas.com>
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2
|
||||
* See the file COPYING included with this distribution for more details.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Panasas company nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _OBJLAYOUT_H
|
||||
#define _OBJLAYOUT_H
|
||||
|
||||
#include <linux/nfs_fs.h>
|
||||
#include <linux/pnfs_osd_xdr.h>
|
||||
#include "../pnfs.h"
|
||||
|
||||
/*
|
||||
* per-inode layout
|
||||
*/
|
||||
struct objlayout {
|
||||
struct pnfs_layout_hdr pnfs_layout;
|
||||
|
||||
/* for layout_commit */
|
||||
enum osd_delta_space_valid_enum {
|
||||
OBJ_DSU_INIT = 0,
|
||||
OBJ_DSU_VALID,
|
||||
OBJ_DSU_INVALID,
|
||||
} delta_space_valid;
|
||||
s64 delta_space_used; /* consumed by write ops */
|
||||
|
||||
/* for layout_return */
|
||||
spinlock_t lock;
|
||||
struct list_head err_list;
|
||||
};
|
||||
|
||||
static inline struct objlayout *
|
||||
OBJLAYOUT(struct pnfs_layout_hdr *lo)
|
||||
{
|
||||
return container_of(lo, struct objlayout, pnfs_layout);
|
||||
}
|
||||
|
||||
/*
|
||||
* per-I/O operation state
|
||||
* embedded in objects provider io_state data structure
|
||||
*/
|
||||
struct objlayout_io_res {
|
||||
struct objlayout *objlay;
|
||||
|
||||
void *rpcdata;
|
||||
int status; /* res */
|
||||
int committed; /* res */
|
||||
|
||||
/* Error reporting (layout_return) */
|
||||
struct list_head err_list;
|
||||
unsigned num_comps;
|
||||
/* Pointer to array of error descriptors of size num_comps.
|
||||
* It should contain as many entries as devices in the osd_layout
|
||||
* that participate in the I/O. It is up to the io_engine to allocate
|
||||
* needed space and set num_comps.
|
||||
*/
|
||||
struct pnfs_osd_ioerr *ioerrs;
|
||||
};
|
||||
|
||||
static inline
|
||||
void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
|
||||
struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
|
||||
struct pnfs_layout_hdr *pnfs_layout_type)
|
||||
{
|
||||
oir->objlay = OBJLAYOUT(pnfs_layout_type);
|
||||
oir->rpcdata = rpcdata;
|
||||
INIT_LIST_HEAD(&oir->err_list);
|
||||
oir->num_comps = num_comps;
|
||||
oir->ioerrs = ioerrs;
|
||||
}
|
||||
|
||||
/*
|
||||
* Raid engine I/O API
|
||||
*/
|
||||
extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
|
||||
struct pnfs_layout_hdr *pnfslay,
|
||||
struct pnfs_layout_range *range,
|
||||
struct xdr_stream *xdr,
|
||||
gfp_t gfp_flags);
|
||||
extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
|
||||
|
||||
/* objio_free_result will free these @oir structs received from
|
||||
* objlayout_{read,write}_done
|
||||
*/
|
||||
extern void objio_free_result(struct objlayout_io_res *oir);
|
||||
|
||||
extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
|
||||
extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
|
||||
|
||||
/*
|
||||
* callback API
|
||||
*/
|
||||
extern void objlayout_io_set_result(struct objlayout_io_res *oir,
|
||||
unsigned index, struct pnfs_osd_objid *pooid,
|
||||
int osd_error, u64 offset, u64 length, bool is_write);
|
||||
|
||||
static inline void
|
||||
objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
|
||||
{
|
||||
/* If one of the I/Os errored out and the delta_space_used was
|
||||
* invalid we render the complete report as invalid. Protocol mandate
|
||||
* the DSU be accurate or not reported.
|
||||
*/
|
||||
spin_lock(&objlay->lock);
|
||||
if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
|
||||
objlay->delta_space_valid = OBJ_DSU_VALID;
|
||||
objlay->delta_space_used += space_used;
|
||||
}
|
||||
spin_unlock(&objlay->lock);
|
||||
}
|
||||
|
||||
extern void objlayout_read_done(struct objlayout_io_res *oir,
|
||||
ssize_t status, bool sync);
|
||||
extern void objlayout_write_done(struct objlayout_io_res *oir,
|
||||
ssize_t status, bool sync);
|
||||
|
||||
/*
|
||||
* exported generic objects function vectors
|
||||
*/
|
||||
|
||||
extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
|
||||
extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
|
||||
|
||||
extern struct pnfs_layout_segment *objlayout_alloc_lseg(
|
||||
struct pnfs_layout_hdr *,
|
||||
struct nfs4_layoutget_res *,
|
||||
gfp_t gfp_flags);
|
||||
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
|
||||
|
||||
extern enum pnfs_try_status objlayout_read_pagelist(
|
||||
struct nfs_pgio_header *);
|
||||
|
||||
extern enum pnfs_try_status objlayout_write_pagelist(
|
||||
struct nfs_pgio_header *,
|
||||
int how);
|
||||
|
||||
extern void objlayout_encode_layoutcommit(
|
||||
struct pnfs_layout_hdr *,
|
||||
struct xdr_stream *,
|
||||
const struct nfs4_layoutcommit_args *);
|
||||
|
||||
extern void objlayout_encode_layoutreturn(
|
||||
struct xdr_stream *,
|
||||
const struct nfs4_layoutreturn_args *);
|
||||
|
||||
extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
|
||||
|
||||
#endif /* _OBJLAYOUT_H */
|
@ -1,415 +0,0 @@
|
||||
/*
|
||||
* Object-Based pNFS Layout XDR layer
|
||||
*
|
||||
* Copyright (C) 2007 Panasas Inc. [year of first publication]
|
||||
* All rights reserved.
|
||||
*
|
||||
* Benny Halevy <bhalevy@panasas.com>
|
||||
* Boaz Harrosh <ooo@electrozaur.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2
|
||||
* See the file COPYING included with this distribution for more details.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. Neither the name of the Panasas company nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
||||
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/pnfs_osd_xdr.h>
|
||||
|
||||
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
|
||||
|
||||
/*
|
||||
* The following implementation is based on RFC5664
|
||||
*/
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_objid {
|
||||
* struct nfs4_deviceid oid_device_id;
|
||||
* u64 oid_partition_id;
|
||||
* u64 oid_object_id;
|
||||
* }; // xdr size 32 bytes
|
||||
*/
|
||||
static __be32 *
|
||||
_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
|
||||
{
|
||||
p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
|
||||
sizeof(objid->oid_device_id.data));
|
||||
|
||||
p = xdr_decode_hyper(p, &objid->oid_partition_id);
|
||||
p = xdr_decode_hyper(p, &objid->oid_object_id);
|
||||
return p;
|
||||
}
|
||||
/*
|
||||
* struct pnfs_osd_opaque_cred {
|
||||
* u32 cred_len;
|
||||
* void *cred;
|
||||
* }; // xdr size [variable]
|
||||
* The return pointers are from the xdr buffer
|
||||
*/
|
||||
static int
|
||||
_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
|
||||
struct xdr_stream *xdr)
|
||||
{
|
||||
__be32 *p = xdr_inline_decode(xdr, 1);
|
||||
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
opaque_cred->cred_len = be32_to_cpu(*p++);
|
||||
|
||||
p = xdr_inline_decode(xdr, opaque_cred->cred_len);
|
||||
if (!p)
|
||||
return -EINVAL;
|
||||
|
||||
opaque_cred->cred = p;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_object_cred {
|
||||
* struct pnfs_osd_objid oc_object_id;
|
||||
* u32 oc_osd_version;
|
||||
* u32 oc_cap_key_sec;
|
||||
* struct pnfs_osd_opaque_cred oc_cap_key
|
||||
* struct pnfs_osd_opaque_cred oc_cap;
|
||||
* }; // xdr size 32 + 4 + 4 + [variable] + [variable]
|
||||
*/
|
||||
static int
|
||||
_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
|
||||
struct xdr_stream *xdr)
|
||||
{
|
||||
__be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
|
||||
int ret;
|
||||
|
||||
if (!p)
|
||||
return -EIO;
|
||||
|
||||
p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
|
||||
comp->oc_osd_version = be32_to_cpup(p++);
|
||||
comp->oc_cap_key_sec = be32_to_cpup(p);
|
||||
|
||||
ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_data_map {
|
||||
* u32 odm_num_comps;
|
||||
* u64 odm_stripe_unit;
|
||||
* u32 odm_group_width;
|
||||
* u32 odm_group_depth;
|
||||
* u32 odm_mirror_cnt;
|
||||
* u32 odm_raid_algorithm;
|
||||
* }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
|
||||
*/
|
||||
static inline int
|
||||
_osd_data_map_xdr_sz(void)
|
||||
{
|
||||
return 4 + 8 + 4 + 4 + 4 + 4;
|
||||
}
|
||||
|
||||
static __be32 *
|
||||
_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
|
||||
{
|
||||
data_map->odm_num_comps = be32_to_cpup(p++);
|
||||
p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
|
||||
data_map->odm_group_width = be32_to_cpup(p++);
|
||||
data_map->odm_group_depth = be32_to_cpup(p++);
|
||||
data_map->odm_mirror_cnt = be32_to_cpup(p++);
|
||||
data_map->odm_raid_algorithm = be32_to_cpup(p++);
|
||||
dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
|
||||
"odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
|
||||
__func__,
|
||||
data_map->odm_num_comps,
|
||||
(unsigned long long)data_map->odm_stripe_unit,
|
||||
data_map->odm_group_width,
|
||||
data_map->odm_group_depth,
|
||||
data_map->odm_mirror_cnt,
|
||||
data_map->odm_raid_algorithm);
|
||||
return p;
|
||||
}
|
||||
|
||||
int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
|
||||
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
|
||||
{
|
||||
__be32 *p;
|
||||
|
||||
memset(iter, 0, sizeof(*iter));
|
||||
|
||||
p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
|
||||
if (unlikely(!p))
|
||||
return -EINVAL;
|
||||
|
||||
p = _osd_xdr_decode_data_map(p, &layout->olo_map);
|
||||
layout->olo_comps_index = be32_to_cpup(p++);
|
||||
layout->olo_num_comps = be32_to_cpup(p++);
|
||||
dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
|
||||
layout->olo_comps_index, layout->olo_num_comps);
|
||||
|
||||
iter->total_comps = layout->olo_num_comps;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
|
||||
struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
|
||||
int *err)
|
||||
{
|
||||
BUG_ON(iter->decoded_comps > iter->total_comps);
|
||||
if (iter->decoded_comps == iter->total_comps)
|
||||
return false;
|
||||
|
||||
*err = _osd_xdr_decode_object_cred(comp, xdr);
|
||||
if (unlikely(*err)) {
|
||||
dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
|
||||
"total_comps=%d\n", __func__, *err,
|
||||
iter->decoded_comps, iter->total_comps);
|
||||
return false; /* stop the loop */
|
||||
}
|
||||
dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
|
||||
"key_len=%u cap_len=%u\n",
|
||||
__func__,
|
||||
_DEVID_LO(&comp->oc_object_id.oid_device_id),
|
||||
_DEVID_HI(&comp->oc_object_id.oid_device_id),
|
||||
comp->oc_object_id.oid_partition_id,
|
||||
comp->oc_object_id.oid_object_id,
|
||||
comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
|
||||
|
||||
iter->decoded_comps++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get Device Information Decoding
|
||||
*
|
||||
* Note: since Device Information is currently done synchronously, all
|
||||
* variable strings fields are left inside the rpc buffer and are only
|
||||
* pointed to by the pnfs_osd_deviceaddr members. So the read buffer
|
||||
* should not be freed while the returned information is in use.
|
||||
*/
|
||||
/*
|
||||
*struct nfs4_string {
|
||||
* unsigned int len;
|
||||
* char *data;
|
||||
*}; // size [variable]
|
||||
* NOTE: Returned string points to inside the XDR buffer
|
||||
*/
|
||||
static __be32 *
|
||||
__read_u8_opaque(__be32 *p, struct nfs4_string *str)
|
||||
{
|
||||
str->len = be32_to_cpup(p++);
|
||||
str->data = (char *)p;
|
||||
|
||||
p += XDR_QUADLEN(str->len);
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_targetid {
|
||||
* u32 oti_type;
|
||||
* struct nfs4_string oti_scsi_device_id;
|
||||
* };// size 4 + [variable]
|
||||
*/
|
||||
static __be32 *
|
||||
__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
|
||||
{
|
||||
u32 oti_type;
|
||||
|
||||
oti_type = be32_to_cpup(p++);
|
||||
targetid->oti_type = oti_type;
|
||||
|
||||
switch (oti_type) {
|
||||
case OBJ_TARGET_SCSI_NAME:
|
||||
case OBJ_TARGET_SCSI_DEVICE_ID:
|
||||
p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_net_addr {
|
||||
* struct nfs4_string r_netid;
|
||||
* struct nfs4_string r_addr;
|
||||
* };
|
||||
*/
|
||||
static __be32 *
|
||||
__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
|
||||
{
|
||||
p = __read_u8_opaque(p, &netaddr->r_netid);
|
||||
p = __read_u8_opaque(p, &netaddr->r_addr);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_targetaddr {
|
||||
* u32 ota_available;
|
||||
* struct pnfs_osd_net_addr ota_netaddr;
|
||||
* };
|
||||
*/
|
||||
static __be32 *
|
||||
__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
|
||||
{
|
||||
u32 ota_available;
|
||||
|
||||
ota_available = be32_to_cpup(p++);
|
||||
targetaddr->ota_available = ota_available;
|
||||
|
||||
if (ota_available)
|
||||
p = __read_net_addr(p, &targetaddr->ota_netaddr);
|
||||
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_deviceaddr {
|
||||
* struct pnfs_osd_targetid oda_targetid;
|
||||
* struct pnfs_osd_targetaddr oda_targetaddr;
|
||||
* u8 oda_lun[8];
|
||||
* struct nfs4_string oda_systemid;
|
||||
* struct pnfs_osd_object_cred oda_root_obj_cred;
|
||||
* struct nfs4_string oda_osdname;
|
||||
* };
|
||||
*/
|
||||
|
||||
/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
|
||||
* not have an xdr_stream
|
||||
*/
|
||||
static __be32 *
|
||||
__read_opaque_cred(__be32 *p,
|
||||
struct pnfs_osd_opaque_cred *opaque_cred)
|
||||
{
|
||||
opaque_cred->cred_len = be32_to_cpu(*p++);
|
||||
opaque_cred->cred = p;
|
||||
return p + XDR_QUADLEN(opaque_cred->cred_len);
|
||||
}
|
||||
|
||||
static __be32 *
|
||||
__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
|
||||
{
|
||||
p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
|
||||
comp->oc_osd_version = be32_to_cpup(p++);
|
||||
comp->oc_cap_key_sec = be32_to_cpup(p++);
|
||||
|
||||
p = __read_opaque_cred(p, &comp->oc_cap_key);
|
||||
p = __read_opaque_cred(p, &comp->oc_cap);
|
||||
return p;
|
||||
}
|
||||
|
||||
void pnfs_osd_xdr_decode_deviceaddr(
|
||||
struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
|
||||
{
|
||||
p = __read_targetid(p, &deviceaddr->oda_targetid);
|
||||
|
||||
p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
|
||||
|
||||
p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
|
||||
sizeof(deviceaddr->oda_lun));
|
||||
|
||||
p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
|
||||
|
||||
p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
|
||||
|
||||
p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
|
||||
|
||||
/* libosd likes this terminated in dbg. It's last, so no problems */
|
||||
deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_layoutupdate {
|
||||
* u32 dsu_valid;
|
||||
* s64 dsu_delta;
|
||||
* u32 olu_ioerr_flag;
|
||||
* }; xdr size 4 + 8 + 4
|
||||
*/
|
||||
int
|
||||
pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
|
||||
struct pnfs_osd_layoutupdate *lou)
|
||||
{
|
||||
__be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4);
|
||||
|
||||
if (!p)
|
||||
return -E2BIG;
|
||||
|
||||
*p++ = cpu_to_be32(lou->dsu_valid);
|
||||
if (lou->dsu_valid)
|
||||
p = xdr_encode_hyper(p, lou->dsu_delta);
|
||||
*p++ = cpu_to_be32(lou->olu_ioerr_flag);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_objid {
|
||||
* struct nfs4_deviceid oid_device_id;
|
||||
* u64 oid_partition_id;
|
||||
* u64 oid_object_id;
|
||||
* }; // xdr size 32 bytes
|
||||
*/
|
||||
static inline __be32 *
|
||||
pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
|
||||
{
|
||||
p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
|
||||
sizeof(object_id->oid_device_id.data));
|
||||
p = xdr_encode_hyper(p, object_id->oid_partition_id);
|
||||
p = xdr_encode_hyper(p, object_id->oid_object_id);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* struct pnfs_osd_ioerr {
|
||||
* struct pnfs_osd_objid oer_component;
|
||||
* u64 oer_comp_offset;
|
||||
* u64 oer_comp_length;
|
||||
* u32 oer_iswrite;
|
||||
* u32 oer_errno;
|
||||
* }; // xdr size 32 + 24 bytes
|
||||
*/
|
||||
void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
|
||||
{
|
||||
p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
|
||||
p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
|
||||
p = xdr_encode_hyper(p, ioerr->oer_comp_length);
|
||||
*p++ = cpu_to_be32(ioerr->oer_iswrite);
|
||||
*p = cpu_to_be32(ioerr->oer_errno);
|
||||
}
|
||||
|
||||
__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
|
||||
{
|
||||
__be32 *p;
|
||||
|
||||
p = xdr_reserve_space(xdr, 32 + 24);
|
||||
if (unlikely(!p))
|
||||
dprintk("%s: out of xdr space\n", __func__);
|
||||
|
||||
return p;
|
||||
}
|
Loading…
Reference in New Issue
Block a user