NFSv4/pNFS: Do layout state recovery upon reboot

Some pNFS implementations, such as flexible files, want the client to
send the layout stats and layout errors that may have incurred while the
metadata server was booting. To do so, the client sends a layoutreturn
with an all-zero stateid while the server is in grace during reboot
recovery.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
This commit is contained in:
Trond Myklebust 2024-06-13 01:00:55 -04:00 committed by Anna Schumaker
parent ad3c436dac
commit 5468fc8298
5 changed files with 110 additions and 9 deletions

View File

@ -2548,7 +2548,7 @@ ff_layout_set_layoutdriver(struct nfs_server *server,
const struct nfs_fh *dummy)
{
#if IS_ENABLED(CONFIG_NFS_V4_2)
server->caps |= NFS_CAP_LAYOUTSTATS;
server->caps |= NFS_CAP_LAYOUTSTATS | NFS_CAP_REBOOT_LAYOUTRETURN;
#endif
return 0;
}

View File

@ -1863,6 +1863,7 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
if (!nfs4_state_clear_reclaim_reboot(clp))
return;
pnfs_destroy_all_layouts(clp);
ops = clp->cl_mvops->reboot_recovery_ops;
cred = nfs4_get_clid_cred(clp);
err = nfs4_reclaim_complete(clp, ops, cred);
@ -2068,7 +2069,6 @@ static int nfs4_establish_lease(struct nfs_client *clp)
put_cred(cred);
if (status != 0)
return status;
pnfs_destroy_all_layouts(clp);
return 0;
}
@ -2680,6 +2680,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim reboot";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->reboot_recovery_ops);
if (status == 0)
status = pnfs_layout_handle_reboot(clp);
if (status == -EAGAIN)
continue;
if (status < 0)

View File

@ -61,6 +61,7 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo,
u32 seq);
static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list);
static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo);
/* Return the registered pnfs layout driver module matching given id */
static struct pnfs_layoutdriver_type *
@ -937,25 +938,37 @@ restart:
return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
}
int pnfs_layout_destroy_byclid(struct nfs_client *clp,
enum pnfs_layout_destroy_mode mode)
static void pnfs_layout_build_destroy_list_byclient(struct nfs_client *clp,
struct list_head *list)
{
struct nfs_server *server;
LIST_HEAD(layout_list);
spin_lock(&clp->cl_lock);
rcu_read_lock();
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if (pnfs_layout_bulk_destroy_byserver_locked(clp,
server,
&layout_list) != 0)
if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
list) != 0)
goto restart;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
}
return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
static int pnfs_layout_do_destroy_byclid(struct nfs_client *clp,
struct list_head *list,
enum pnfs_layout_destroy_mode mode)
{
pnfs_layout_build_destroy_list_byclient(clp, list);
return pnfs_layout_free_bulk_destroy_list(list, mode);
}
int pnfs_layout_destroy_byclid(struct nfs_client *clp,
enum pnfs_layout_destroy_mode mode)
{
LIST_HEAD(layout_list);
return pnfs_layout_do_destroy_byclid(clp, &layout_list, mode);
}
/*
@ -971,6 +984,67 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE);
}
static void pnfs_layout_build_recover_list_byclient(struct nfs_client *clp,
struct list_head *list)
{
struct nfs_server *server;
spin_lock(&clp->cl_lock);
rcu_read_lock();
restart:
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
if (!(server->caps & NFS_CAP_REBOOT_LAYOUTRETURN))
continue;
if (pnfs_layout_bulk_destroy_byserver_locked(clp, server,
list) != 0)
goto restart;
}
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
}
static int pnfs_layout_bulk_list_reboot(struct list_head *list)
{
struct pnfs_layout_hdr *lo;
struct nfs_server *server;
int ret;
list_for_each_entry(lo, list, plh_bulk_destroy) {
server = NFS_SERVER(lo->plh_inode);
ret = pnfs_layout_return_on_reboot(lo);
switch (ret) {
case 0:
continue;
case -NFS4ERR_BAD_STATEID:
server->caps &= ~NFS_CAP_REBOOT_LAYOUTRETURN;
break;
case -NFS4ERR_NO_GRACE:
break;
default:
goto err;
}
break;
}
return 0;
err:
return ret;
}
int pnfs_layout_handle_reboot(struct nfs_client *clp)
{
LIST_HEAD(list);
int ret = 0, ret2;
pnfs_layout_build_recover_list_byclient(clp, &list);
if (!list_empty(&list))
ret = pnfs_layout_bulk_list_reboot(&list);
ret2 = pnfs_layout_do_destroy_byclid(clp, &list,
PNFS_LAYOUT_INVALIDATE);
if (!ret)
ret = ret2;
return (ret == 0) ? 0 : -EAGAIN;
}
static void
pnfs_set_layout_cred(struct pnfs_layout_hdr *lo, const struct cred *cred)
{
@ -1445,6 +1519,24 @@ pnfs_commit_and_return_layout(struct inode *inode)
return ret;
}
static int pnfs_layout_return_on_reboot(struct pnfs_layout_hdr *lo)
{
struct inode *inode = lo->plh_inode;
const struct cred *cred;
spin_lock(&inode->i_lock);
if (!pnfs_layout_is_valid(lo)) {
spin_unlock(&inode->i_lock);
return 0;
}
cred = get_cred(lo->plh_lc_cred);
pnfs_get_layout_hdr(lo);
spin_unlock(&inode->i_lock);
return pnfs_send_layoutreturn(lo, &zero_stateid, &cred, IOMODE_ANY,
PNFS_FL_LAYOUTRETURN_PRIVILEGED);
}
bool pnfs_roc(struct inode *ino,
struct nfs4_layoutreturn_args *args,
struct nfs4_layoutreturn_res *res,

View File

@ -356,6 +356,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg);
void pnfs_layout_return_unused_byclid(struct nfs_client *clp,
enum pnfs_iomode iomode);
int pnfs_layout_handle_reboot(struct nfs_client *clp);
/* nfs4_deviceid_flags */
enum {
@ -737,6 +738,11 @@ static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
{
}
static inline int pnfs_layout_handle_reboot(struct nfs_client *clp)
{
return 0;
}
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{

View File

@ -278,6 +278,7 @@ struct nfs_server {
#define NFS_CAP_LGOPEN (1U << 5)
#define NFS_CAP_CASE_INSENSITIVE (1U << 6)
#define NFS_CAP_CASE_PRESERVING (1U << 7)
#define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8)
#define NFS_CAP_OPEN_XOR (1U << 12)
#define NFS_CAP_DELEGTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14)