linux/fs/afs/server.c
David Howells c435ee3455 afs: Overhaul the callback handling
Overhaul the AFS callback handling by the following means:

 (1) Don't give up callback promises on vnodes that we are no longer using,
     rather let them just expire on the server or let the server break
     them.  This is actually more efficient for the server as the callback
     lookup is expensive if there are lots of extant callbacks.

 (2) Only give up the callback promises we have from a server when the
     server record is destroyed.  Then we can just give up *all* the
     callback promises on it in one go.

 (3) Servers can end up being shared between cells if cells are aliased, so
     don't add all the vnodes being backed by a particular server into a
     big FID-indexed tree on that server as there may be duplicates.

     Instead have each volume instance (~= superblock) register an interest
     in a server as it starts to make use of it and use this to allow the
     processor for callbacks from the server to find the superblock and
     thence the inode corresponding to the FID being broken by means of
     ilookup_nowait().

 (4) Rather than iterating over the entire callback list when a mass-break
     comes in from the server, maintain a counter of mass-breaks in
     afs_server (cb_seq) and make afs_validate() check it against the copy
     in afs_vnode.

     It would be nice not to have to take a read_lock whilst doing this,
     but that's tricky without using RCU.

 (5) Save a ref on the fileserver we're using for a call in the afs_call
     struct so that we can access its cb_s_break during call decoding.

 (6) Write-lock around callback and status storage in a vnode and read-lock
     around getattr so that we don't see the status mid-update.

This has the following consequences:

 (1) Data invalidation isn't seen until someone calls afs_validate() on a
     vnode.  Unfortunately, we need to use a key to query the server, but
     getting one from a background thread is tricky without caching loads
     of keys all over the place.

 (2) Mass invalidation isn't seen until someone calls afs_validate().

 (3) Callback breaking is going to hit the inode_hash_lock quite a bit.
     Could this be replaced with rcu_read_lock() since inodes are destroyed
     under RCU conditions.

Signed-off-by: David Howells <dhowells@redhat.com>
2017-11-13 15:38:18 +00:00

349 lines
8.3 KiB
C

/* AFS server record management
*
* Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include "afs_fs.h"
#include "internal.h"
static unsigned afs_server_timeout = 10; /* server timeout in seconds */
static void afs_inc_servers_outstanding(struct afs_net *net)
{
atomic_inc(&net->servers_outstanding);
}
static void afs_dec_servers_outstanding(struct afs_net *net)
{
if (atomic_dec_and_test(&net->servers_outstanding))
wake_up_atomic_t(&net->servers_outstanding);
}
void afs_server_timer(struct timer_list *timer)
{
struct afs_net *net = container_of(timer, struct afs_net, server_timer);
if (!queue_work(afs_wq, &net->server_reaper))
afs_dec_servers_outstanding(net);
}
/*
* install a server record in the master tree
*/
static int afs_install_server(struct afs_server *server)
{
struct afs_server *xserver;
struct afs_net *net = server->cell->net;
struct rb_node **pp, *p;
int ret, diff;
_enter("%p", server);
write_lock(&net->servers_lock);
ret = -EEXIST;
pp = &net->servers.rb_node;
p = NULL;
while (*pp) {
p = *pp;
_debug("- consider %p", p);
xserver = rb_entry(p, struct afs_server, master_rb);
diff = memcmp(&server->addr, &xserver->addr, sizeof(server->addr));
if (diff < 0)
pp = &(*pp)->rb_left;
else if (diff > 0)
pp = &(*pp)->rb_right;
else
goto error;
}
rb_link_node(&server->master_rb, p, pp);
rb_insert_color(&server->master_rb, &net->servers);
ret = 0;
error:
write_unlock(&net->servers_lock);
return ret;
}
/*
* allocate a new server record
*/
static struct afs_server *afs_alloc_server(struct afs_cell *cell,
const struct sockaddr_rxrpc *addr)
{
struct afs_server *server;
_enter("");
server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
if (server) {
atomic_set(&server->usage, 1);
server->net = cell->net;
server->cell = cell;
INIT_LIST_HEAD(&server->link);
INIT_LIST_HEAD(&server->grave);
init_rwsem(&server->sem);
spin_lock_init(&server->fs_lock);
INIT_LIST_HEAD(&server->cb_interests);
rwlock_init(&server->cb_break_lock);
server->addr = *addr;
afs_inc_servers_outstanding(cell->net);
_leave(" = %p{%d}", server, atomic_read(&server->usage));
} else {
_leave(" = NULL [nomem]");
}
return server;
}
/*
* get an FS-server record for a cell
*/
struct afs_server *afs_lookup_server(struct afs_cell *cell,
struct sockaddr_rxrpc *addr)
{
struct afs_server *server, *candidate;
_enter("%p,%pIS", cell, &addr->transport);
/* quick scan of the list to see if we already have the server */
read_lock(&cell->servers_lock);
list_for_each_entry(server, &cell->servers, link) {
if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
goto found_server_quickly;
}
read_unlock(&cell->servers_lock);
candidate = afs_alloc_server(cell, addr);
if (!candidate) {
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
}
write_lock(&cell->servers_lock);
/* check the cell's server list again */
list_for_each_entry(server, &cell->servers, link) {
if (memcmp(&server->addr, addr, sizeof(*addr)) == 0)
goto found_server;
}
_debug("new");
server = candidate;
if (afs_install_server(server) < 0)
goto server_in_two_cells;
afs_get_cell(cell);
list_add_tail(&server->link, &cell->servers);
write_unlock(&cell->servers_lock);
_leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
/* found a matching server quickly */
found_server_quickly:
_debug("found quickly");
afs_get_server(server);
read_unlock(&cell->servers_lock);
no_longer_unused:
if (!list_empty(&server->grave)) {
spin_lock(&cell->net->server_graveyard_lock);
list_del_init(&server->grave);
spin_unlock(&cell->net->server_graveyard_lock);
}
_leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
/* found a matching server on the second pass */
found_server:
_debug("found");
afs_get_server(server);
write_unlock(&cell->servers_lock);
kfree(candidate);
goto no_longer_unused;
/* found a server that seems to be in two cells */
server_in_two_cells:
write_unlock(&cell->servers_lock);
kfree(candidate);
afs_dec_servers_outstanding(cell->net);
printk(KERN_NOTICE "kAFS: Server %pI4 appears to be in two cells\n",
addr);
_leave(" = -EEXIST");
return ERR_PTR(-EEXIST);
}
/*
* look up a server by its IP address
*/
struct afs_server *afs_find_server(struct afs_net *net,
const struct sockaddr_rxrpc *srx)
{
struct afs_server *server = NULL;
struct rb_node *p;
int diff;
_enter("{%d,%pIS}", srx->transport.family, &srx->transport);
read_lock(&net->servers_lock);
p = net->servers.rb_node;
while (p) {
server = rb_entry(p, struct afs_server, master_rb);
_debug("- consider %p", p);
diff = memcmp(srx, &server->addr, sizeof(*srx));
if (diff < 0) {
p = p->rb_left;
} else if (diff > 0) {
p = p->rb_right;
} else {
afs_get_server(server);
goto found;
}
}
server = NULL;
found:
read_unlock(&net->servers_lock);
_leave(" = %p", server);
return server;
}
static void afs_set_server_timer(struct afs_net *net, time64_t delay)
{
afs_inc_servers_outstanding(net);
if (net->live) {
if (timer_reduce(&net->server_timer, jiffies + delay * HZ))
afs_dec_servers_outstanding(net);
} else {
if (!queue_work(afs_wq, &net->server_reaper))
afs_dec_servers_outstanding(net);
}
}
/*
* destroy a server record
* - removes from the cell list
*/
void afs_put_server(struct afs_net *net, struct afs_server *server)
{
if (!server)
return;
_enter("%p{%d}", server, atomic_read(&server->usage));
_debug("PUT SERVER %d", atomic_read(&server->usage));
ASSERTCMP(atomic_read(&server->usage), >, 0);
if (likely(!atomic_dec_and_test(&server->usage))) {
_leave("");
return;
}
spin_lock(&net->server_graveyard_lock);
if (atomic_read(&server->usage) == 0) {
list_move_tail(&server->grave, &net->server_graveyard);
server->time_of_death = ktime_get_real_seconds();
afs_set_server_timer(net, afs_server_timeout);
}
spin_unlock(&net->server_graveyard_lock);
_leave(" [dead]");
}
/*
* destroy a dead server
*/
static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
{
_enter("%p", server);
afs_fs_give_up_all_callbacks(server, NULL, false);
afs_put_cell(net, server->cell);
kfree(server);
afs_dec_servers_outstanding(net);
}
/*
* reap dead server records
*/
void afs_reap_server(struct work_struct *work)
{
LIST_HEAD(corpses);
struct afs_server *server;
struct afs_net *net = container_of(work, struct afs_net, server_reaper);
unsigned long delay, expiry;
time64_t now;
now = ktime_get_real_seconds();
spin_lock(&net->server_graveyard_lock);
while (!list_empty(&net->server_graveyard)) {
server = list_entry(net->server_graveyard.next,
struct afs_server, grave);
/* the queue is ordered most dead first */
if (net->live) {
expiry = server->time_of_death + afs_server_timeout;
if (expiry > now) {
delay = (expiry - now);
afs_set_server_timer(net, delay);
break;
}
}
write_lock(&server->cell->servers_lock);
write_lock(&net->servers_lock);
if (atomic_read(&server->usage) > 0) {
list_del_init(&server->grave);
} else {
list_move_tail(&server->grave, &corpses);
list_del_init(&server->link);
rb_erase(&server->master_rb, &net->servers);
}
write_unlock(&net->servers_lock);
write_unlock(&server->cell->servers_lock);
}
spin_unlock(&net->server_graveyard_lock);
/* now reap the corpses we've extracted */
while (!list_empty(&corpses)) {
server = list_entry(corpses.next, struct afs_server, grave);
list_del(&server->grave);
afs_destroy_server(net, server);
}
afs_dec_servers_outstanding(net);
}
/*
* Discard all the server records from a net namespace when it is destroyed or
* the afs module is removed.
*/
void __net_exit afs_purge_servers(struct afs_net *net)
{
if (del_timer_sync(&net->server_timer))
atomic_dec(&net->servers_outstanding);
afs_inc_servers_outstanding(net);
if (!queue_work(afs_wq, &net->server_reaper))
afs_dec_servers_outstanding(net);
wait_on_atomic_t(&net->servers_outstanding, atomic_t_wait,
TASK_UNINTERRUPTIBLE);
}