forked from Minki/linux
afs: Probe multiple fileservers simultaneously
Send probes to all the unprobed fileservers in a fileserver list on all addresses simultaneously in an attempt to find out the fastest route whilst not getting stuck for 20s on any server or address that we don't get a reply from. This alleviates the problem whereby attempting to access a new server can take a long time because the rotation algorithm ends up rotating through all servers and addresses until it finds one that responds. Signed-off-by: David Howells <dhowells@redhat.com>
This commit is contained in:
parent
18ac61853c
commit
3bf0fb6f33
@ -17,6 +17,7 @@ kafs-y := \
|
||||
file.o \
|
||||
flock.o \
|
||||
fsclient.o \
|
||||
fs_probe.o \
|
||||
inode.o \
|
||||
main.o \
|
||||
misc.o \
|
||||
@ -29,8 +30,9 @@ kafs-y := \
|
||||
super.o \
|
||||
netdevices.o \
|
||||
vlclient.o \
|
||||
vl_rotate.o \
|
||||
vl_list.o \
|
||||
vl_probe.o \
|
||||
vl_rotate.o \
|
||||
volume.o \
|
||||
write.o \
|
||||
xattr.o \
|
||||
|
@ -303,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
|
||||
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
|
||||
|
||||
srx = &alist->addrs[i];
|
||||
srx->srx_family = AF_RXRPC;
|
||||
srx->transport_type = SOCK_DGRAM;
|
||||
srx->transport_len = sizeof(srx->transport.sin);
|
||||
srx->transport.sin.sin_family = AF_INET;
|
||||
srx->transport.sin.sin_port = htons(port);
|
||||
@ -341,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
|
||||
sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
|
||||
|
||||
srx = &alist->addrs[i];
|
||||
srx->srx_family = AF_RXRPC;
|
||||
srx->transport_type = SOCK_DGRAM;
|
||||
srx->transport_len = sizeof(srx->transport.sin6);
|
||||
srx->transport.sin6.sin6_family = AF_INET6;
|
||||
srx->transport.sin6.sin6_port = htons(port);
|
||||
@ -353,23 +357,32 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
|
||||
*/
|
||||
bool afs_iterate_addresses(struct afs_addr_cursor *ac)
|
||||
{
|
||||
_enter("%hu+%hd", ac->start, (short)ac->index);
|
||||
unsigned long set, failed;
|
||||
int index;
|
||||
|
||||
if (!ac->alist)
|
||||
return false;
|
||||
|
||||
set = ac->alist->responded;
|
||||
failed = ac->alist->failed;
|
||||
_enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
|
||||
|
||||
ac->nr_iterations++;
|
||||
|
||||
if (ac->begun) {
|
||||
ac->index++;
|
||||
if (ac->index == ac->alist->nr_addrs)
|
||||
ac->index = 0;
|
||||
set &= ~(failed | ac->tried);
|
||||
|
||||
if (ac->index == ac->start)
|
||||
return false;
|
||||
}
|
||||
if (!set)
|
||||
return false;
|
||||
|
||||
ac->begun = true;
|
||||
index = READ_ONCE(ac->alist->preferred);
|
||||
if (test_bit(index, &set))
|
||||
goto selected;
|
||||
|
||||
index = __ffs(set);
|
||||
|
||||
selected:
|
||||
ac->index = index;
|
||||
set_bit(index, &ac->tried);
|
||||
ac->responded = false;
|
||||
return true;
|
||||
}
|
||||
@ -383,12 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
|
||||
|
||||
alist = ac->alist;
|
||||
if (alist) {
|
||||
if (ac->responded && ac->index != ac->start)
|
||||
WRITE_ONCE(alist->index, ac->index);
|
||||
if (ac->responded &&
|
||||
ac->index != alist->preferred &&
|
||||
test_bit(ac->alist->preferred, &ac->tried))
|
||||
WRITE_ONCE(alist->preferred, ac->index);
|
||||
afs_put_addrlist(alist);
|
||||
ac->alist = NULL;
|
||||
}
|
||||
|
||||
ac->alist = NULL;
|
||||
ac->begun = false;
|
||||
return ac->error;
|
||||
}
|
||||
|
@ -122,6 +122,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
|
||||
{
|
||||
_enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
|
||||
|
||||
call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);
|
||||
|
||||
switch (call->operation_ID) {
|
||||
case CBCallBack:
|
||||
call->type = &afs_SRXCBCallBack;
|
||||
@ -151,6 +153,91 @@ bool afs_cm_incoming_call(struct afs_call *call)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Record a probe to the cache manager from a server.
|
||||
*/
|
||||
static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
|
||||
{
|
||||
_enter("");
|
||||
|
||||
if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
|
||||
!test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
|
||||
if (server->cm_epoch == call->epoch)
|
||||
return 0;
|
||||
|
||||
if (!server->probe.said_rebooted) {
|
||||
pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
|
||||
server->probe.said_rebooted = true;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&server->probe_lock);
|
||||
|
||||
if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
|
||||
server->cm_epoch = call->epoch;
|
||||
server->probe.cm_epoch = call->epoch;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (server->probe.cm_probed &&
|
||||
call->epoch != server->probe.cm_epoch &&
|
||||
!server->probe.said_inconsistent) {
|
||||
pr_notice("kAFS: FS endpoints inconsistent %pU\n",
|
||||
&server->uuid);
|
||||
server->probe.said_inconsistent = true;
|
||||
}
|
||||
|
||||
if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
|
||||
server->probe.cm_epoch = server->cm_epoch;
|
||||
|
||||
out:
|
||||
server->probe.cm_probed = true;
|
||||
spin_unlock(&server->probe_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the server record by peer address and record a probe to the cache
|
||||
* manager from a server.
|
||||
*/
|
||||
static int afs_find_cm_server_by_peer(struct afs_call *call)
|
||||
{
|
||||
struct sockaddr_rxrpc srx;
|
||||
struct afs_server *server;
|
||||
|
||||
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
|
||||
|
||||
server = afs_find_server(call->net, &srx);
|
||||
if (!server) {
|
||||
trace_afs_cm_no_server(call, &srx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
call->cm_server = server;
|
||||
return afs_record_cm_probe(call, server);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the server record by server UUID and record a probe to the cache
|
||||
* manager from a server.
|
||||
*/
|
||||
static int afs_find_cm_server_by_uuid(struct afs_call *call,
|
||||
struct afs_uuid *uuid)
|
||||
{
|
||||
struct afs_server *server;
|
||||
|
||||
rcu_read_lock();
|
||||
server = afs_find_server_by_uuid(call->net, call->request);
|
||||
rcu_read_unlock();
|
||||
if (!server) {
|
||||
trace_afs_cm_no_server_u(call, call->request);
|
||||
return 0;
|
||||
}
|
||||
|
||||
call->cm_server = server;
|
||||
return afs_record_cm_probe(call, server);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up a cache manager call.
|
||||
*/
|
||||
@ -187,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
|
||||
static int afs_deliver_cb_callback(struct afs_call *call)
|
||||
{
|
||||
struct afs_callback_break *cb;
|
||||
struct sockaddr_rxrpc srx;
|
||||
__be32 *bp;
|
||||
int ret, loop;
|
||||
|
||||
@ -276,12 +362,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
|
||||
|
||||
/* we'll need the file server record as that tells us which set of
|
||||
* vnodes to operate upon */
|
||||
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
|
||||
call->cm_server = afs_find_server(call->net, &srx);
|
||||
if (!call->cm_server)
|
||||
trace_afs_cm_no_server(call, &srx);
|
||||
|
||||
return afs_queue_call_work(call);
|
||||
return afs_find_cm_server_by_peer(call);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -305,13 +386,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
|
||||
*/
|
||||
static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
|
||||
{
|
||||
struct sockaddr_rxrpc srx;
|
||||
int ret;
|
||||
|
||||
_enter("");
|
||||
|
||||
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
|
||||
|
||||
afs_extract_discard(call, 0);
|
||||
ret = afs_extract_data(call, false);
|
||||
if (ret < 0)
|
||||
@ -319,11 +397,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
|
||||
|
||||
/* we'll need the file server record as that tells us which set of
|
||||
* vnodes to operate upon */
|
||||
call->cm_server = afs_find_server(call->net, &srx);
|
||||
if (!call->cm_server)
|
||||
trace_afs_cm_no_server(call, &srx);
|
||||
|
||||
return afs_queue_call_work(call);
|
||||
return afs_find_cm_server_by_peer(call);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -384,13 +458,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
|
||||
|
||||
/* we'll need the file server record as that tells us which set of
|
||||
* vnodes to operate upon */
|
||||
rcu_read_lock();
|
||||
call->cm_server = afs_find_server_by_uuid(call->net, call->request);
|
||||
rcu_read_unlock();
|
||||
if (!call->cm_server)
|
||||
trace_afs_cm_no_server_u(call, call->request);
|
||||
|
||||
return afs_queue_call_work(call);
|
||||
return afs_find_cm_server_by_uuid(call, call->request);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -422,8 +490,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
|
||||
|
||||
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
|
||||
return afs_io_error(call, afs_io_error_cm_reply);
|
||||
|
||||
return afs_queue_call_work(call);
|
||||
return afs_find_cm_server_by_peer(call);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -503,8 +570,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
|
||||
|
||||
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
|
||||
return afs_io_error(call, afs_io_error_cm_reply);
|
||||
|
||||
return afs_queue_call_work(call);
|
||||
return afs_find_cm_server_by_uuid(call, call->request);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -586,8 +652,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
|
||||
|
||||
if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
|
||||
return afs_io_error(call, afs_io_error_cm_reply);
|
||||
|
||||
return afs_queue_call_work(call);
|
||||
return afs_find_cm_server_by_peer(call);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -596,7 +661,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
|
||||
static int afs_deliver_yfs_cb_callback(struct afs_call *call)
|
||||
{
|
||||
struct afs_callback_break *cb;
|
||||
struct sockaddr_rxrpc srx;
|
||||
struct yfs_xdr_YFSFid *bp;
|
||||
size_t size;
|
||||
int ret, loop;
|
||||
@ -664,10 +728,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
|
||||
/* We'll need the file server record as that tells us which set of
|
||||
* vnodes to operate upon.
|
||||
*/
|
||||
rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
|
||||
call->cm_server = afs_find_server(call->net, &srx);
|
||||
if (!call->cm_server)
|
||||
trace_afs_cm_no_server(call, &srx);
|
||||
|
||||
return afs_queue_call_work(call);
|
||||
return afs_find_cm_server_by_peer(call);
|
||||
}
|
||||
|
270
fs/afs/fs_probe.c
Normal file
270
fs/afs/fs_probe.c
Normal file
@ -0,0 +1,270 @@
|
||||
/* AFS fileserver probing
|
||||
*
|
||||
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public Licence
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the Licence, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include "afs_fs.h"
|
||||
#include "internal.h"
|
||||
#include "protocol_yfs.h"
|
||||
|
||||
static bool afs_fs_probe_done(struct afs_server *server)
|
||||
{
|
||||
if (!atomic_dec_and_test(&server->probe_outstanding))
|
||||
return false;
|
||||
|
||||
wake_up_var(&server->probe_outstanding);
|
||||
clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
|
||||
wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process the result of probing a fileserver. This is called after successful
|
||||
* or failed delivery of an FS.GetCapabilities operation.
|
||||
*/
|
||||
void afs_fileserver_probe_result(struct afs_call *call)
|
||||
{
|
||||
struct afs_addr_list *alist = call->alist;
|
||||
struct afs_server *server = call->reply[0];
|
||||
unsigned int server_index = (long)call->reply[1];
|
||||
unsigned int index = call->addr_ix;
|
||||
unsigned int rtt = UINT_MAX;
|
||||
bool have_result = false;
|
||||
u64 _rtt;
|
||||
int ret = call->error;
|
||||
|
||||
_enter("%pU,%u", &server->uuid, index);
|
||||
|
||||
spin_lock(&server->probe_lock);
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
server->probe.error = 0;
|
||||
goto responded;
|
||||
case -ECONNABORTED:
|
||||
if (!server->probe.responded) {
|
||||
server->probe.abort_code = call->abort_code;
|
||||
server->probe.error = ret;
|
||||
}
|
||||
goto responded;
|
||||
case -ENOMEM:
|
||||
case -ENONET:
|
||||
server->probe.local_failure = true;
|
||||
afs_io_error(call, afs_io_error_fs_probe_fail);
|
||||
goto out;
|
||||
case -ECONNRESET: /* Responded, but call expired. */
|
||||
case -ENETUNREACH:
|
||||
case -EHOSTUNREACH:
|
||||
case -ECONNREFUSED:
|
||||
case -ETIMEDOUT:
|
||||
case -ETIME:
|
||||
default:
|
||||
clear_bit(index, &alist->responded);
|
||||
set_bit(index, &alist->failed);
|
||||
if (!server->probe.responded &&
|
||||
(server->probe.error == 0 ||
|
||||
server->probe.error == -ETIMEDOUT ||
|
||||
server->probe.error == -ETIME))
|
||||
server->probe.error = ret;
|
||||
afs_io_error(call, afs_io_error_fs_probe_fail);
|
||||
goto out;
|
||||
}
|
||||
|
||||
responded:
|
||||
set_bit(index, &alist->responded);
|
||||
clear_bit(index, &alist->failed);
|
||||
|
||||
if (call->service_id == YFS_FS_SERVICE) {
|
||||
server->probe.is_yfs = true;
|
||||
set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
|
||||
alist->addrs[index].srx_service = call->service_id;
|
||||
} else {
|
||||
server->probe.not_yfs = true;
|
||||
if (!server->probe.is_yfs) {
|
||||
clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
|
||||
alist->addrs[index].srx_service = call->service_id;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the RTT and scale it to fit into a 32-bit value that represents
|
||||
* over a minute of time so that we can access it with one instruction
|
||||
* on a 32-bit system.
|
||||
*/
|
||||
_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
|
||||
_rtt /= 64;
|
||||
rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
|
||||
if (rtt < server->probe.rtt) {
|
||||
server->probe.rtt = rtt;
|
||||
alist->preferred = index;
|
||||
have_result = true;
|
||||
}
|
||||
|
||||
smp_wmb(); /* Set rtt before responded. */
|
||||
server->probe.responded = true;
|
||||
set_bit(AFS_SERVER_FL_PROBED, &server->flags);
|
||||
out:
|
||||
spin_unlock(&server->probe_lock);
|
||||
|
||||
_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
|
||||
server_index, index, &alist->addrs[index].transport,
|
||||
(unsigned int)rtt, ret);
|
||||
|
||||
have_result |= afs_fs_probe_done(server);
|
||||
if (have_result) {
|
||||
server->probe.have_result = true;
|
||||
wake_up_var(&server->probe.have_result);
|
||||
wake_up_all(&server->probe_wq);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe all of a fileserver's addresses to find out the best route and to
|
||||
* query its capabilities.
|
||||
*/
|
||||
static int afs_do_probe_fileserver(struct afs_net *net,
|
||||
struct afs_server *server,
|
||||
struct key *key,
|
||||
unsigned int server_index)
|
||||
{
|
||||
struct afs_addr_cursor ac = {
|
||||
.index = 0,
|
||||
};
|
||||
int ret;
|
||||
|
||||
_enter("%pU", &server->uuid);
|
||||
|
||||
read_lock(&server->fs_lock);
|
||||
ac.alist = rcu_dereference_protected(server->addresses,
|
||||
lockdep_is_held(&server->fs_lock));
|
||||
read_unlock(&server->fs_lock);
|
||||
|
||||
atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
|
||||
memset(&server->probe, 0, sizeof(server->probe));
|
||||
server->probe.rtt = UINT_MAX;
|
||||
|
||||
for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
|
||||
ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
|
||||
true);
|
||||
if (ret != -EINPROGRESS) {
|
||||
afs_fs_probe_done(server);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send off probes to all unprobed servers.
|
||||
*/
|
||||
int afs_probe_fileservers(struct afs_net *net, struct key *key,
|
||||
struct afs_server_list *list)
|
||||
{
|
||||
struct afs_server *server;
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < list->nr_servers; i++) {
|
||||
server = list->servers[i].server;
|
||||
if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
|
||||
continue;
|
||||
|
||||
if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
|
||||
ret = afs_do_probe_fileserver(net, server, key, i);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the first as-yet untried fileserver to respond.
|
||||
*/
|
||||
int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
|
||||
{
|
||||
struct wait_queue_entry *waits;
|
||||
struct afs_server *server;
|
||||
unsigned int rtt = UINT_MAX;
|
||||
bool have_responders = false;
|
||||
int pref = -1, i;
|
||||
|
||||
_enter("%u,%lx", slist->nr_servers, untried);
|
||||
|
||||
/* Only wait for servers that have a probe outstanding. */
|
||||
for (i = 0; i < slist->nr_servers; i++) {
|
||||
if (test_bit(i, &untried)) {
|
||||
server = slist->servers[i].server;
|
||||
if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
|
||||
__clear_bit(i, &untried);
|
||||
if (server->probe.responded)
|
||||
have_responders = true;
|
||||
}
|
||||
}
|
||||
if (have_responders || !untried)
|
||||
return 0;
|
||||
|
||||
waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
|
||||
if (!waits)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < slist->nr_servers; i++) {
|
||||
if (test_bit(i, &untried)) {
|
||||
server = slist->servers[i].server;
|
||||
init_waitqueue_entry(&waits[i], current);
|
||||
add_wait_queue(&server->probe_wq, &waits[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
bool still_probing = false;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
for (i = 0; i < slist->nr_servers; i++) {
|
||||
if (test_bit(i, &untried)) {
|
||||
server = slist->servers[i].server;
|
||||
if (server->probe.responded)
|
||||
goto stop;
|
||||
if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
|
||||
still_probing = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!still_probing || unlikely(signal_pending(current)))
|
||||
goto stop;
|
||||
schedule();
|
||||
}
|
||||
|
||||
stop:
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
for (i = 0; i < slist->nr_servers; i++) {
|
||||
if (test_bit(i, &untried)) {
|
||||
server = slist->servers[i].server;
|
||||
if (server->probe.responded &&
|
||||
server->probe.rtt < rtt) {
|
||||
pref = i;
|
||||
rtt = server->probe.rtt;
|
||||
}
|
||||
|
||||
remove_wait_queue(&server->probe_wq, &waits[i]);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(waits);
|
||||
|
||||
if (pref == -1 && signal_pending(current))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
if (pref >= 0)
|
||||
slist->preferred = pref;
|
||||
return 0;
|
||||
}
|
@ -2006,7 +2006,6 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
|
||||
*/
|
||||
static int afs_deliver_fs_get_capabilities(struct afs_call *call)
|
||||
{
|
||||
struct afs_server *server = call->reply[0];
|
||||
u32 count;
|
||||
int ret;
|
||||
|
||||
@ -2042,15 +2041,18 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
|
||||
break;
|
||||
}
|
||||
|
||||
if (call->service_id == YFS_FS_SERVICE)
|
||||
set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
|
||||
else
|
||||
clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
|
||||
|
||||
_leave(" = 0 [done]");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void afs_destroy_fs_get_capabilities(struct afs_call *call)
|
||||
{
|
||||
struct afs_server *server = call->reply[0];
|
||||
|
||||
afs_put_server(call->net, server);
|
||||
afs_flat_call_destructor(call);
|
||||
}
|
||||
|
||||
/*
|
||||
* FS.GetCapabilities operation type
|
||||
*/
|
||||
@ -2058,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
|
||||
.name = "FS.GetCapabilities",
|
||||
.op = afs_FS_GetCapabilities,
|
||||
.deliver = afs_deliver_fs_get_capabilities,
|
||||
.destructor = afs_flat_call_destructor,
|
||||
.done = afs_fileserver_probe_result,
|
||||
.destructor = afs_destroy_fs_get_capabilities,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -2068,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
|
||||
int afs_fs_get_capabilities(struct afs_net *net,
|
||||
struct afs_server *server,
|
||||
struct afs_addr_cursor *ac,
|
||||
struct key *key)
|
||||
struct key *key,
|
||||
unsigned int server_index,
|
||||
bool async)
|
||||
{
|
||||
struct afs_call *call;
|
||||
__be32 *bp;
|
||||
@ -2080,8 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net,
|
||||
return -ENOMEM;
|
||||
|
||||
call->key = key;
|
||||
call->reply[0] = server;
|
||||
call->reply[0] = afs_get_server(server);
|
||||
call->reply[1] = (void *)(long)server_index;
|
||||
call->upgrade = true;
|
||||
call->want_reply_time = true;
|
||||
|
||||
/* marshall the parameters */
|
||||
bp = call->request;
|
||||
@ -2089,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net,
|
||||
|
||||
/* Can't take a ref on server */
|
||||
trace_afs_make_fs_call(call, NULL);
|
||||
return afs_make_call(ac, call, GFP_NOFS, false);
|
||||
return afs_make_call(ac, call, GFP_NOFS, async);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -76,12 +76,13 @@ struct afs_addr_list {
|
||||
u32 version; /* Version */
|
||||
unsigned char max_addrs;
|
||||
unsigned char nr_addrs;
|
||||
unsigned char index; /* Address currently in use */
|
||||
unsigned char preferred; /* Preferred address */
|
||||
unsigned char nr_ipv4; /* Number of IPv4 addresses */
|
||||
enum dns_record_source source:8;
|
||||
enum dns_lookup_status status:8;
|
||||
unsigned long probed; /* Mask of servers that have been probed */
|
||||
unsigned long yfs; /* Mask of servers that are YFS */
|
||||
unsigned long failed; /* Mask of addrs that failed locally/ICMP */
|
||||
unsigned long responded; /* Mask of addrs that responded */
|
||||
struct sockaddr_rxrpc addrs[];
|
||||
#define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
|
||||
};
|
||||
@ -91,6 +92,7 @@ struct afs_addr_list {
|
||||
*/
|
||||
struct afs_call {
|
||||
const struct afs_call_type *type; /* type of call */
|
||||
struct afs_addr_list *alist; /* Address is alist[addr_ix] */
|
||||
wait_queue_head_t waitq; /* processes awaiting completion */
|
||||
struct work_struct async_work; /* async I/O processor */
|
||||
struct work_struct work; /* actual work processor */
|
||||
@ -116,6 +118,7 @@ struct afs_call {
|
||||
spinlock_t state_lock;
|
||||
int error; /* error code */
|
||||
u32 abort_code; /* Remote abort ID or 0 */
|
||||
u32 epoch;
|
||||
unsigned request_size; /* size of request data */
|
||||
unsigned reply_max; /* maximum size of reply */
|
||||
unsigned first_offset; /* offset into mapping[first] */
|
||||
@ -125,13 +128,14 @@ struct afs_call {
|
||||
unsigned count2; /* count used in unmarshalling */
|
||||
};
|
||||
unsigned char unmarshall; /* unmarshalling phase */
|
||||
unsigned char addr_ix; /* Address in ->alist */
|
||||
bool incoming; /* T if incoming call */
|
||||
bool send_pages; /* T if data from mapping should be sent */
|
||||
bool need_attention; /* T if RxRPC poked us */
|
||||
bool async; /* T if asynchronous */
|
||||
bool ret_reply0; /* T if should return reply[0] on success */
|
||||
bool upgrade; /* T to request service upgrade */
|
||||
bool want_reply_time; /* T if want reply_time */
|
||||
bool want_reply_time; /* T if want reply_time */
|
||||
u16 service_id; /* Actual service ID (after upgrade) */
|
||||
unsigned int debug_id; /* Trace ID */
|
||||
u32 operation_ID; /* operation ID for an incoming call */
|
||||
@ -162,6 +166,9 @@ struct afs_call_type {
|
||||
|
||||
/* Work function */
|
||||
void (*work)(struct work_struct *work);
|
||||
|
||||
/* Call done function (gets called immediately on success or failure) */
|
||||
void (*done)(struct afs_call *call);
|
||||
};
|
||||
|
||||
/*
|
||||
@ -376,10 +383,27 @@ struct afs_vlserver {
|
||||
unsigned long flags;
|
||||
#define AFS_VLSERVER_FL_PROBED 0 /* The VL server has been probed */
|
||||
#define AFS_VLSERVER_FL_PROBING 1 /* VL server is being probed */
|
||||
#define AFS_VLSERVER_FL_IS_YFS 2 /* Server is YFS not AFS */
|
||||
rwlock_t lock; /* Lock on addresses */
|
||||
atomic_t usage;
|
||||
u16 name_len; /* Length of name */
|
||||
|
||||
/* Probe state */
|
||||
wait_queue_head_t probe_wq;
|
||||
atomic_t probe_outstanding;
|
||||
spinlock_t probe_lock;
|
||||
struct {
|
||||
unsigned int rtt; /* RTT as ktime/64 */
|
||||
u32 abort_code;
|
||||
short error;
|
||||
bool have_result;
|
||||
bool responded:1;
|
||||
bool is_yfs:1;
|
||||
bool not_yfs:1;
|
||||
bool local_failure:1;
|
||||
} probe;
|
||||
|
||||
u16 port;
|
||||
u16 name_len; /* Length of name */
|
||||
char name[]; /* Server name, case-flattened */
|
||||
};
|
||||
|
||||
@ -399,6 +423,7 @@ struct afs_vlserver_list {
|
||||
atomic_t usage;
|
||||
u8 nr_servers;
|
||||
u8 index; /* Server currently in use */
|
||||
u8 preferred; /* Preferred server */
|
||||
enum dns_record_source source:8;
|
||||
enum dns_lookup_status status:8;
|
||||
rwlock_t lock;
|
||||
@ -461,8 +486,10 @@ struct afs_server {
|
||||
#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */
|
||||
#define AFS_SERVER_FL_IS_YFS 9 /* Server is YFS not AFS */
|
||||
#define AFS_SERVER_FL_NO_RM2 10 /* Fileserver doesn't support YFS.RemoveFile2 */
|
||||
#define AFS_SERVER_FL_HAVE_EPOCH 11 /* ->epoch is valid */
|
||||
atomic_t usage;
|
||||
u32 addr_version; /* Address list version */
|
||||
u32 cm_epoch; /* Server RxRPC epoch */
|
||||
|
||||
/* file service access */
|
||||
rwlock_t fs_lock; /* access lock */
|
||||
@ -471,6 +498,26 @@ struct afs_server {
|
||||
struct hlist_head cb_volumes; /* List of volume interests on this server */
|
||||
unsigned cb_s_break; /* Break-everything counter. */
|
||||
rwlock_t cb_break_lock; /* Volume finding lock */
|
||||
|
||||
/* Probe state */
|
||||
wait_queue_head_t probe_wq;
|
||||
atomic_t probe_outstanding;
|
||||
spinlock_t probe_lock;
|
||||
struct {
|
||||
unsigned int rtt; /* RTT as ktime/64 */
|
||||
u32 abort_code;
|
||||
u32 cm_epoch;
|
||||
short error;
|
||||
bool have_result;
|
||||
bool responded:1;
|
||||
bool is_yfs:1;
|
||||
bool not_yfs:1;
|
||||
bool local_failure:1;
|
||||
bool no_epoch:1;
|
||||
bool cm_probed:1;
|
||||
bool said_rebooted:1;
|
||||
bool said_inconsistent:1;
|
||||
} probe;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -505,8 +552,8 @@ struct afs_server_entry {
|
||||
|
||||
struct afs_server_list {
|
||||
refcount_t usage;
|
||||
unsigned short nr_servers;
|
||||
unsigned short index; /* Server currently in use */
|
||||
unsigned char nr_servers;
|
||||
unsigned char preferred; /* Preferred server */
|
||||
unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */
|
||||
unsigned int seq; /* Set to ->servers_seq when installed */
|
||||
rwlock_t lock;
|
||||
@ -653,13 +700,12 @@ struct afs_interface {
|
||||
*/
|
||||
struct afs_addr_cursor {
|
||||
struct afs_addr_list *alist; /* Current address list (pins ref) */
|
||||
u32 abort_code;
|
||||
unsigned short start; /* Starting point in alist->addrs[] */
|
||||
unsigned short index; /* Wrapping offset from start to current addr */
|
||||
short error;
|
||||
bool begun; /* T if we've begun iteration */
|
||||
unsigned long tried; /* Tried addresses */
|
||||
signed char index; /* Current address */
|
||||
bool responded; /* T if the current address responded */
|
||||
unsigned short nr_iterations; /* Number of address iterations */
|
||||
short error;
|
||||
u32 abort_code;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -669,9 +715,10 @@ struct afs_vl_cursor {
|
||||
struct afs_addr_cursor ac;
|
||||
struct afs_cell *cell; /* The cell we're querying */
|
||||
struct afs_vlserver_list *server_list; /* Current server list (pins ref) */
|
||||
struct afs_vlserver *server; /* Server on which this resides */
|
||||
struct key *key; /* Key for the server */
|
||||
unsigned char start; /* Initial index in server list */
|
||||
unsigned char index; /* Number of servers tried beyond start */
|
||||
unsigned long untried; /* Bitmask of untried servers */
|
||||
short index; /* Current server */
|
||||
short error;
|
||||
unsigned short flags;
|
||||
#define AFS_VL_CURSOR_STOP 0x0001 /* Set to cease iteration */
|
||||
@ -689,10 +736,10 @@ struct afs_fs_cursor {
|
||||
struct afs_server_list *server_list; /* Current server list (pins ref) */
|
||||
struct afs_cb_interest *cbi; /* Server on which this resides (pins ref) */
|
||||
struct key *key; /* Key for the server */
|
||||
unsigned long untried; /* Bitmask of untried servers */
|
||||
unsigned int cb_break; /* cb_break + cb_s_break before the call */
|
||||
unsigned int cb_break_2; /* cb_break + cb_s_break (2nd vnode) */
|
||||
unsigned char start; /* Initial index in server list */
|
||||
unsigned char index; /* Number of servers tried beyond start */
|
||||
short index; /* Current server */
|
||||
short error;
|
||||
unsigned short flags;
|
||||
#define AFS_FS_CURSOR_STOP 0x0001 /* Set to cease iteration */
|
||||
@ -888,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *);
|
||||
extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
|
||||
struct afs_addr_cursor *, struct key *);
|
||||
extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
|
||||
struct afs_addr_cursor *, struct key *);
|
||||
struct afs_addr_cursor *, struct key *, unsigned int, bool);
|
||||
extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
|
||||
struct afs_fid *, struct afs_file_status *,
|
||||
struct afs_callback *, unsigned int,
|
||||
@ -897,6 +944,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
|
||||
struct afs_fid *, struct afs_file_status *,
|
||||
struct afs_callback *, struct afs_volsync *);
|
||||
|
||||
/*
|
||||
* fs_probe.c
|
||||
*/
|
||||
extern void afs_fileserver_probe_result(struct afs_call *);
|
||||
extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *);
|
||||
extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
|
||||
|
||||
/*
|
||||
* inode.c
|
||||
*/
|
||||
@ -1013,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *);
|
||||
extern void __net_exit afs_close_socket(struct afs_net *);
|
||||
extern void afs_charge_preallocation(struct work_struct *);
|
||||
extern void afs_put_call(struct afs_call *);
|
||||
extern int afs_queue_call_work(struct afs_call *);
|
||||
extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
|
||||
extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
|
||||
const struct afs_call_type *,
|
||||
@ -1130,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *);
|
||||
extern void afs_manage_servers(struct work_struct *);
|
||||
extern void afs_servers_timer(struct timer_list *);
|
||||
extern void __net_exit afs_purge_servers(struct afs_net *);
|
||||
extern bool afs_probe_fileserver(struct afs_fs_cursor *);
|
||||
extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
|
||||
|
||||
/*
|
||||
@ -1160,9 +1212,17 @@ extern void afs_fs_exit(void);
|
||||
extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
|
||||
const char *, int);
|
||||
extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
|
||||
extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
|
||||
extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *,
|
||||
struct afs_vlserver *, unsigned int, bool);
|
||||
extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
|
||||
|
||||
/*
|
||||
* vl_probe.c
|
||||
*/
|
||||
extern void afs_vlserver_probe_result(struct afs_call *);
|
||||
extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
|
||||
extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
|
||||
|
||||
/*
|
||||
* vl_rotate.c
|
||||
*/
|
||||
|
@ -312,7 +312,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
|
||||
if (alist) {
|
||||
for (i = 0; i < alist->nr_addrs; i++)
|
||||
seq_printf(m, " %c %pISpc\n",
|
||||
alist->index == i ? '>' : '-',
|
||||
alist->preferred == i ? '>' : '-',
|
||||
&alist->addrs[i].transport);
|
||||
}
|
||||
return 0;
|
||||
@ -391,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
|
||||
&server->uuid,
|
||||
atomic_read(&server->usage),
|
||||
&alist->addrs[0].transport,
|
||||
alist->index == 0 ? "*" : "");
|
||||
alist->preferred == 0 ? "*" : "");
|
||||
for (i = 1; i < alist->nr_addrs; i++)
|
||||
seq_printf(m, " %pISpc%s\n",
|
||||
&alist->addrs[i].transport,
|
||||
alist->index == i ? "*" : "");
|
||||
alist->preferred == i ? "*" : "");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
174
fs/afs/rotate.c
174
fs/afs/rotate.c
@ -18,14 +18,6 @@
|
||||
#include "internal.h"
|
||||
#include "afs_fs.h"
|
||||
|
||||
/*
|
||||
* Initialise a filesystem server cursor for iterating over FS servers.
|
||||
*/
|
||||
static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
|
||||
{
|
||||
memset(fc, 0, sizeof(*fc));
|
||||
}
|
||||
|
||||
/*
|
||||
* Begin an operation on the fileserver.
|
||||
*
|
||||
@ -35,7 +27,7 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode
|
||||
bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
|
||||
struct key *key)
|
||||
{
|
||||
afs_init_fs_cursor(fc, vnode);
|
||||
memset(fc, 0, sizeof(*fc));
|
||||
fc->vnode = vnode;
|
||||
fc->key = key;
|
||||
fc->ac.error = SHRT_MAX;
|
||||
@ -66,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
|
||||
fc->server_list = afs_get_serverlist(vnode->volume->servers);
|
||||
read_unlock(&vnode->volume->servers_lock);
|
||||
|
||||
fc->untried = (1UL << fc->server_list->nr_servers) - 1;
|
||||
fc->index = READ_ONCE(fc->server_list->preferred);
|
||||
|
||||
cbi = vnode->cb_interest;
|
||||
if (cbi) {
|
||||
/* See if the vnode's preferred record is still available */
|
||||
for (i = 0; i < fc->server_list->nr_servers; i++) {
|
||||
if (fc->server_list->servers[i].cb_interest == cbi) {
|
||||
fc->start = i;
|
||||
fc->index = i;
|
||||
goto found_interest;
|
||||
}
|
||||
}
|
||||
@ -95,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
|
||||
|
||||
afs_put_cb_interest(afs_v2net(vnode), cbi);
|
||||
cbi = NULL;
|
||||
} else {
|
||||
fc->start = READ_ONCE(fc->server_list->index);
|
||||
}
|
||||
|
||||
found_interest:
|
||||
fc->index = fc->start;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -144,11 +136,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
|
||||
struct afs_addr_list *alist;
|
||||
struct afs_server *server;
|
||||
struct afs_vnode *vnode = fc->vnode;
|
||||
int error = fc->ac.error;
|
||||
u32 rtt, abort_code;
|
||||
int error = fc->ac.error, i;
|
||||
|
||||
_enter("%u/%u,%u/%u,%d,%d",
|
||||
fc->index, fc->start,
|
||||
fc->ac.index, fc->ac.start,
|
||||
_enter("%lx[%d],%lx[%d],%d,%d",
|
||||
fc->untried, fc->index,
|
||||
fc->ac.tried, fc->ac.index,
|
||||
error, fc->ac.abort_code);
|
||||
|
||||
if (fc->flags & AFS_FS_CURSOR_STOP) {
|
||||
@ -345,8 +338,50 @@ start:
|
||||
if (!afs_start_fs_iteration(fc, vnode))
|
||||
goto failed;
|
||||
|
||||
use_server:
|
||||
_debug("use");
|
||||
_debug("__ VOL %llx __", vnode->volume->vid);
|
||||
error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
|
||||
if (error < 0)
|
||||
goto failed_set_error;
|
||||
|
||||
pick_server:
|
||||
_debug("pick [%lx]", fc->untried);
|
||||
|
||||
error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
|
||||
if (error < 0)
|
||||
goto failed_set_error;
|
||||
|
||||
/* Pick the untried server with the lowest RTT. If we have outstanding
|
||||
* callbacks, we stick with the server we're already using if we can.
|
||||
*/
|
||||
if (fc->cbi) {
|
||||
_debug("cbi %u", fc->index);
|
||||
if (test_bit(fc->index, &fc->untried))
|
||||
goto selected_server;
|
||||
afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
|
||||
fc->cbi = NULL;
|
||||
_debug("nocbi");
|
||||
}
|
||||
|
||||
fc->index = -1;
|
||||
rtt = U32_MAX;
|
||||
for (i = 0; i < fc->server_list->nr_servers; i++) {
|
||||
struct afs_server *s = fc->server_list->servers[i].server;
|
||||
|
||||
if (!test_bit(i, &fc->untried) || !s->probe.responded)
|
||||
continue;
|
||||
if (s->probe.rtt < rtt) {
|
||||
fc->index = i;
|
||||
rtt = s->probe.rtt;
|
||||
}
|
||||
}
|
||||
|
||||
if (fc->index == -1)
|
||||
goto no_more_servers;
|
||||
|
||||
selected_server:
|
||||
_debug("use %d", fc->index);
|
||||
__clear_bit(fc->index, &fc->untried);
|
||||
|
||||
/* We're starting on a different fileserver from the list. We need to
|
||||
* check it, create a callback intercept, find its address list and
|
||||
* probe its capabilities before we use it.
|
||||
@ -379,60 +414,81 @@ use_server:
|
||||
|
||||
memset(&fc->ac, 0, sizeof(fc->ac));
|
||||
|
||||
/* Probe the current fileserver if we haven't done so yet. */
|
||||
if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
|
||||
fc->ac.alist = afs_get_addrlist(alist);
|
||||
|
||||
if (!afs_probe_fileserver(fc)) {
|
||||
switch (fc->ac.error) {
|
||||
case -ENOMEM:
|
||||
case -ERESTARTSYS:
|
||||
case -EINTR:
|
||||
goto failed;
|
||||
default:
|
||||
goto next_server;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!fc->ac.alist)
|
||||
fc->ac.alist = alist;
|
||||
else
|
||||
afs_put_addrlist(alist);
|
||||
|
||||
fc->ac.start = READ_ONCE(alist->index);
|
||||
fc->ac.index = fc->ac.start;
|
||||
fc->ac.index = -1;
|
||||
|
||||
iterate_address:
|
||||
ASSERT(fc->ac.alist);
|
||||
_debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
|
||||
/* Iterate over the current server's address list to try and find an
|
||||
* address on which it will respond to us.
|
||||
*/
|
||||
if (!afs_iterate_addresses(&fc->ac))
|
||||
goto next_server;
|
||||
|
||||
_debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
|
||||
|
||||
_leave(" = t");
|
||||
return true;
|
||||
|
||||
next_server:
|
||||
_debug("next");
|
||||
afs_end_cursor(&fc->ac);
|
||||
afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
|
||||
fc->cbi = NULL;
|
||||
fc->index++;
|
||||
if (fc->index >= fc->server_list->nr_servers)
|
||||
fc->index = 0;
|
||||
if (fc->index != fc->start)
|
||||
goto use_server;
|
||||
goto pick_server;
|
||||
|
||||
no_more_servers:
|
||||
/* That's all the servers poked to no good effect. Try again if some
|
||||
* of them were busy.
|
||||
*/
|
||||
if (fc->flags & AFS_FS_CURSOR_VBUSY)
|
||||
goto restart_from_beginning;
|
||||
|
||||
goto failed;
|
||||
abort_code = 0;
|
||||
error = -EDESTADDRREQ;
|
||||
for (i = 0; i < fc->server_list->nr_servers; i++) {
|
||||
struct afs_server *s = fc->server_list->servers[i].server;
|
||||
int probe_error = READ_ONCE(s->probe.error);
|
||||
|
||||
switch (probe_error) {
|
||||
case 0:
|
||||
continue;
|
||||
default:
|
||||
if (error == -ETIMEDOUT ||
|
||||
error == -ETIME)
|
||||
continue;
|
||||
case -ETIMEDOUT:
|
||||
case -ETIME:
|
||||
if (error == -ENOMEM ||
|
||||
error == -ENONET)
|
||||
continue;
|
||||
case -ENOMEM:
|
||||
case -ENONET:
|
||||
if (error == -ENETUNREACH)
|
||||
continue;
|
||||
case -ENETUNREACH:
|
||||
if (error == -EHOSTUNREACH)
|
||||
continue;
|
||||
case -EHOSTUNREACH:
|
||||
if (error == -ECONNREFUSED)
|
||||
continue;
|
||||
case -ECONNREFUSED:
|
||||
if (error == -ECONNRESET)
|
||||
continue;
|
||||
case -ECONNRESET: /* Responded, but call expired. */
|
||||
if (error == -ECONNABORTED)
|
||||
continue;
|
||||
case -ECONNABORTED:
|
||||
abort_code = s->probe.abort_code;
|
||||
error = probe_error;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error == -ECONNABORTED)
|
||||
error = afs_abort_to_error(abort_code);
|
||||
|
||||
failed_set_error:
|
||||
fc->error = error;
|
||||
@ -480,8 +536,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
|
||||
|
||||
memset(&fc->ac, 0, sizeof(fc->ac));
|
||||
fc->ac.alist = alist;
|
||||
fc->ac.start = READ_ONCE(alist->index);
|
||||
fc->ac.index = fc->ac.start;
|
||||
fc->ac.index = -1;
|
||||
goto iterate_address;
|
||||
|
||||
case 0:
|
||||
@ -538,13 +593,13 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
|
||||
pr_notice("EDESTADDR occurred\n");
|
||||
pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
|
||||
fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
|
||||
pr_notice("FC: st=%u ix=%u ni=%u\n",
|
||||
fc->start, fc->index, fc->nr_iterations);
|
||||
pr_notice("FC: ut=%lx ix=%d ni=%u\n",
|
||||
fc->untried, fc->index, fc->nr_iterations);
|
||||
|
||||
if (fc->server_list) {
|
||||
const struct afs_server_list *sl = fc->server_list;
|
||||
pr_notice("FC: SL nr=%u ix=%u vnov=%hx\n",
|
||||
sl->nr_servers, sl->index, sl->vnovol_mask);
|
||||
pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
|
||||
sl->nr_servers, sl->preferred, sl->vnovol_mask);
|
||||
for (i = 0; i < sl->nr_servers; i++) {
|
||||
const struct afs_server *s = sl->servers[i].server;
|
||||
pr_notice("FC: server fl=%lx av=%u %pU\n",
|
||||
@ -552,22 +607,21 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
|
||||
if (s->addresses) {
|
||||
const struct afs_addr_list *a =
|
||||
rcu_dereference(s->addresses);
|
||||
pr_notice("FC: - av=%u nr=%u/%u/%u ax=%u\n",
|
||||
pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
|
||||
a->version,
|
||||
a->nr_ipv4, a->nr_addrs, a->max_addrs,
|
||||
a->index);
|
||||
pr_notice("FC: - pr=%lx yf=%lx\n",
|
||||
a->probed, a->yfs);
|
||||
a->preferred);
|
||||
pr_notice("FC: - pr=%lx R=%lx F=%lx\n",
|
||||
a->probed, a->responded, a->failed);
|
||||
if (a == fc->ac.alist)
|
||||
pr_notice("FC: - current\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%u\n",
|
||||
fc->ac.start, fc->ac.index, fc->ac.abort_code, fc->ac.error,
|
||||
fc->ac.begun, fc->ac.responded, fc->ac.nr_iterations);
|
||||
|
||||
pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
|
||||
fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
|
||||
fc->ac.responded, fc->ac.nr_iterations);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -43,7 +43,6 @@ int afs_open_socket(struct afs_net *net)
|
||||
struct sockaddr_rxrpc srx;
|
||||
struct socket *socket;
|
||||
unsigned int min_level;
|
||||
u16 service_upgrade[2];
|
||||
int ret;
|
||||
|
||||
_enter("");
|
||||
@ -82,13 +81,12 @@ int afs_open_socket(struct afs_net *net)
|
||||
if (ret < 0)
|
||||
goto error_2;
|
||||
|
||||
service_upgrade[0] = CM_SERVICE;
|
||||
service_upgrade[1] = YFS_CM_SERVICE;
|
||||
ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE,
|
||||
(void *)service_upgrade, sizeof(service_upgrade));
|
||||
if (ret < 0)
|
||||
goto error_2;
|
||||
|
||||
/* Ideally, we'd turn on service upgrade here, but we can't because
|
||||
* OpenAFS is buggy and leaks the userStatus field from packet to
|
||||
* packet and between FS packets and CB packets - so if we try to do an
|
||||
* upgrade on an FS packet, OpenAFS will leak that into the CB packet
|
||||
* it sends back to us.
|
||||
*/
|
||||
|
||||
rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
|
||||
afs_rx_discard_new_call);
|
||||
@ -192,6 +190,7 @@ void afs_put_call(struct afs_call *call)
|
||||
|
||||
afs_put_server(call->net, call->cm_server);
|
||||
afs_put_cb_interest(call->net, call->cbi);
|
||||
afs_put_addrlist(call->alist);
|
||||
kfree(call->request);
|
||||
|
||||
trace_afs_call(call, afs_call_trace_free, 0, o,
|
||||
@ -205,21 +204,22 @@ void afs_put_call(struct afs_call *call)
|
||||
}
|
||||
|
||||
/*
|
||||
* Queue the call for actual work. Returns 0 unconditionally for convenience.
|
||||
* Queue the call for actual work.
|
||||
*/
|
||||
int afs_queue_call_work(struct afs_call *call)
|
||||
static void afs_queue_call_work(struct afs_call *call)
|
||||
{
|
||||
int u = atomic_inc_return(&call->usage);
|
||||
if (call->type->work) {
|
||||
int u = atomic_inc_return(&call->usage);
|
||||
|
||||
trace_afs_call(call, afs_call_trace_work, u,
|
||||
atomic_read(&call->net->nr_outstanding_calls),
|
||||
__builtin_return_address(0));
|
||||
trace_afs_call(call, afs_call_trace_work, u,
|
||||
atomic_read(&call->net->nr_outstanding_calls),
|
||||
__builtin_return_address(0));
|
||||
|
||||
INIT_WORK(&call->work, call->type->work);
|
||||
INIT_WORK(&call->work, call->type->work);
|
||||
|
||||
if (!queue_work(afs_wq, &call->work))
|
||||
afs_put_call(call);
|
||||
return 0;
|
||||
if (!queue_work(afs_wq, &call->work))
|
||||
afs_put_call(call);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -376,6 +376,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
|
||||
atomic_read(&call->net->nr_outstanding_calls));
|
||||
|
||||
call->async = async;
|
||||
call->addr_ix = ac->index;
|
||||
call->alist = afs_get_addrlist(ac->alist);
|
||||
|
||||
/* Work out the length we're going to transmit. This is awkward for
|
||||
* calls such as FS.StoreData where there's an extra injection of data
|
||||
@ -407,6 +409,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
|
||||
call->debug_id);
|
||||
if (IS_ERR(rxcall)) {
|
||||
ret = PTR_ERR(rxcall);
|
||||
call->error = ret;
|
||||
goto error_kill_call;
|
||||
}
|
||||
|
||||
@ -458,6 +461,8 @@ error_do_abort:
|
||||
call->error = ret;
|
||||
trace_afs_call_done(call);
|
||||
error_kill_call:
|
||||
if (call->type->done)
|
||||
call->type->done(call);
|
||||
afs_put_call(call);
|
||||
ac->error = ret;
|
||||
_leave(" = %d", ret);
|
||||
@ -509,6 +514,7 @@ static void afs_deliver_to_call(struct afs_call *call)
|
||||
state = READ_ONCE(call->state);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
afs_queue_call_work(call);
|
||||
if (state == AFS_CALL_CL_PROC_REPLY) {
|
||||
if (call->cbi)
|
||||
set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
|
||||
@ -546,6 +552,8 @@ static void afs_deliver_to_call(struct afs_call *call)
|
||||
}
|
||||
|
||||
done:
|
||||
if (call->type->done)
|
||||
call->type->done(call);
|
||||
if (state == AFS_CALL_COMPLETE && call->incoming)
|
||||
afs_put_call(call);
|
||||
out:
|
||||
|
109
fs/afs/server.c
109
fs/afs/server.c
@ -231,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
|
||||
rwlock_init(&server->fs_lock);
|
||||
INIT_HLIST_HEAD(&server->cb_volumes);
|
||||
rwlock_init(&server->cb_break_lock);
|
||||
init_waitqueue_head(&server->probe_wq);
|
||||
spin_lock_init(&server->probe_lock);
|
||||
|
||||
afs_inc_servers_outstanding(net);
|
||||
_leave(" = %p", server);
|
||||
@ -254,7 +256,7 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
|
||||
ret = -ERESTARTSYS;
|
||||
if (afs_begin_vlserver_operation(&vc, cell, key)) {
|
||||
while (afs_select_vlserver(&vc)) {
|
||||
if (test_bit(vc.ac.index, &vc.ac.alist->yfs))
|
||||
if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
|
||||
alist = afs_yfsvl_get_endpoints(&vc, uuid);
|
||||
else
|
||||
alist = afs_vl_get_addrs_u(&vc, uuid);
|
||||
@ -365,8 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
|
||||
struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
|
||||
struct afs_addr_cursor ac = {
|
||||
.alist = alist,
|
||||
.start = alist->index,
|
||||
.index = 0,
|
||||
.index = alist->preferred,
|
||||
.error = 0,
|
||||
};
|
||||
_enter("%p", server);
|
||||
@ -374,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
|
||||
if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
|
||||
afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
|
||||
|
||||
wait_var_event(&server->probe_outstanding,
|
||||
atomic_read(&server->probe_outstanding) == 0);
|
||||
|
||||
call_rcu(&server->rcu, afs_server_rcu);
|
||||
afs_dec_servers_outstanding(net);
|
||||
}
|
||||
@ -506,105 +510,6 @@ void afs_purge_servers(struct afs_net *net)
|
||||
_leave("");
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe a fileserver to find its capabilities.
|
||||
*
|
||||
* TODO: Try service upgrade.
|
||||
*/
|
||||
static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
|
||||
{
|
||||
int i;
|
||||
|
||||
_enter("");
|
||||
|
||||
fc->ac.start = READ_ONCE(fc->ac.alist->index);
|
||||
fc->ac.index = fc->ac.start;
|
||||
fc->ac.error = 0;
|
||||
fc->ac.begun = false;
|
||||
|
||||
while (afs_iterate_addresses(&fc->ac)) {
|
||||
afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
|
||||
&fc->ac, fc->key);
|
||||
switch (fc->ac.error) {
|
||||
case 0:
|
||||
if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) {
|
||||
for (i = 0; i < fc->ac.alist->nr_addrs; i++)
|
||||
fc->ac.alist->addrs[i].srx_service =
|
||||
YFS_FS_SERVICE;
|
||||
}
|
||||
afs_end_cursor(&fc->ac);
|
||||
set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
|
||||
return true;
|
||||
case -ECONNABORTED:
|
||||
fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
|
||||
goto error;
|
||||
case -ENOMEM:
|
||||
case -ENONET:
|
||||
goto error;
|
||||
case -ENETUNREACH:
|
||||
case -EHOSTUNREACH:
|
||||
case -ECONNREFUSED:
|
||||
case -ETIMEDOUT:
|
||||
case -ETIME:
|
||||
break;
|
||||
default:
|
||||
fc->ac.error = afs_io_error(NULL, afs_io_error_fs_probe_fail);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
error:
|
||||
afs_end_cursor(&fc->ac);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we haven't already, try probing the fileserver to get its capabilities.
|
||||
* We try not to instigate parallel probes, but it's possible that the parallel
|
||||
* probes will fail due to authentication failure when ours would succeed.
|
||||
*
|
||||
* TODO: Try sending an anonymous probe if an authenticated probe fails.
|
||||
*/
|
||||
bool afs_probe_fileserver(struct afs_fs_cursor *fc)
|
||||
{
|
||||
bool success;
|
||||
int ret, retries = 0;
|
||||
|
||||
_enter("");
|
||||
|
||||
retry:
|
||||
if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
|
||||
_leave(" = t");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
|
||||
success = afs_do_probe_fileserver(fc);
|
||||
clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
|
||||
wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
|
||||
_leave(" = t");
|
||||
return success;
|
||||
}
|
||||
|
||||
_debug("wait");
|
||||
ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
|
||||
TASK_INTERRUPTIBLE);
|
||||
if (ret == -ERESTARTSYS) {
|
||||
fc->ac.error = ret;
|
||||
_leave(" = f [%d]", ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
retries++;
|
||||
if (retries == 4) {
|
||||
fc->ac.error = -ESTALE;
|
||||
_leave(" = f [stale]");
|
||||
return false;
|
||||
}
|
||||
_debug("retry");
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get an update for a server's address list.
|
||||
*/
|
||||
|
@ -118,11 +118,11 @@ bool afs_annotate_server_list(struct afs_server_list *new,
|
||||
return false;
|
||||
|
||||
changed:
|
||||
/* Maintain the same current server as before if possible. */
|
||||
cur = old->servers[old->index].server;
|
||||
/* Maintain the same preferred server as before if possible. */
|
||||
cur = old->servers[old->preferred].server;
|
||||
for (j = 0; j < new->nr_servers; j++) {
|
||||
if (new->servers[j].server == cur) {
|
||||
new->index = j;
|
||||
new->preferred = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,8 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
|
||||
if (vlserver) {
|
||||
atomic_set(&vlserver->usage, 1);
|
||||
rwlock_init(&vlserver->lock);
|
||||
init_waitqueue_head(&vlserver->probe_wq);
|
||||
spin_lock_init(&vlserver->probe_lock);
|
||||
vlserver->name_len = name_len;
|
||||
vlserver->port = port;
|
||||
memcpy(vlserver->name, name, name_len);
|
||||
@ -141,7 +143,7 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
|
||||
|
||||
/* Start with IPv6 if available. */
|
||||
if (alist->nr_ipv4 < alist->nr_addrs)
|
||||
alist->index = alist->nr_ipv4;
|
||||
alist->preferred = alist->nr_ipv4;
|
||||
|
||||
*_b = b;
|
||||
return alist;
|
||||
@ -307,6 +309,8 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
|
||||
(vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
|
||||
}
|
||||
|
||||
clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
|
||||
|
||||
vllist->servers[j].priority = bs.priority;
|
||||
vllist->servers[j].weight = bs.weight;
|
||||
vllist->servers[j].server = server;
|
||||
|
273
fs/afs/vl_probe.c
Normal file
273
fs/afs/vl_probe.c
Normal file
@ -0,0 +1,273 @@
|
||||
/* AFS vlserver probing
|
||||
*
|
||||
* Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public Licence
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the Licence, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include "afs_fs.h"
|
||||
#include "internal.h"
|
||||
#include "protocol_yfs.h"
|
||||
|
||||
static bool afs_vl_probe_done(struct afs_vlserver *server)
|
||||
{
|
||||
if (!atomic_dec_and_test(&server->probe_outstanding))
|
||||
return false;
|
||||
|
||||
wake_up_var(&server->probe_outstanding);
|
||||
clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
|
||||
wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process the result of probing a vlserver. This is called after successful
|
||||
* or failed delivery of an VL.GetCapabilities operation.
|
||||
*/
|
||||
void afs_vlserver_probe_result(struct afs_call *call)
|
||||
{
|
||||
struct afs_addr_list *alist = call->alist;
|
||||
struct afs_vlserver *server = call->reply[0];
|
||||
unsigned int server_index = (long)call->reply[1];
|
||||
unsigned int index = call->addr_ix;
|
||||
unsigned int rtt = UINT_MAX;
|
||||
bool have_result = false;
|
||||
u64 _rtt;
|
||||
int ret = call->error;
|
||||
|
||||
_enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
|
||||
|
||||
spin_lock(&server->probe_lock);
|
||||
|
||||
switch (ret) {
|
||||
case 0:
|
||||
server->probe.error = 0;
|
||||
goto responded;
|
||||
case -ECONNABORTED:
|
||||
if (!server->probe.responded) {
|
||||
server->probe.abort_code = call->abort_code;
|
||||
server->probe.error = ret;
|
||||
}
|
||||
goto responded;
|
||||
case -ENOMEM:
|
||||
case -ENONET:
|
||||
server->probe.local_failure = true;
|
||||
afs_io_error(call, afs_io_error_vl_probe_fail);
|
||||
goto out;
|
||||
case -ECONNRESET: /* Responded, but call expired. */
|
||||
case -ENETUNREACH:
|
||||
case -EHOSTUNREACH:
|
||||
case -ECONNREFUSED:
|
||||
case -ETIMEDOUT:
|
||||
case -ETIME:
|
||||
default:
|
||||
clear_bit(index, &alist->responded);
|
||||
set_bit(index, &alist->failed);
|
||||
if (!server->probe.responded &&
|
||||
(server->probe.error == 0 ||
|
||||
server->probe.error == -ETIMEDOUT ||
|
||||
server->probe.error == -ETIME))
|
||||
server->probe.error = ret;
|
||||
afs_io_error(call, afs_io_error_vl_probe_fail);
|
||||
goto out;
|
||||
}
|
||||
|
||||
responded:
|
||||
set_bit(index, &alist->responded);
|
||||
clear_bit(index, &alist->failed);
|
||||
|
||||
if (call->service_id == YFS_VL_SERVICE) {
|
||||
server->probe.is_yfs = true;
|
||||
set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
|
||||
alist->addrs[index].srx_service = call->service_id;
|
||||
} else {
|
||||
server->probe.not_yfs = true;
|
||||
if (!server->probe.is_yfs) {
|
||||
clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
|
||||
alist->addrs[index].srx_service = call->service_id;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the RTT and scale it to fit into a 32-bit value that represents
|
||||
* over a minute of time so that we can access it with one instruction
|
||||
* on a 32-bit system.
|
||||
*/
|
||||
_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
|
||||
_rtt /= 64;
|
||||
rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
|
||||
if (rtt < server->probe.rtt) {
|
||||
server->probe.rtt = rtt;
|
||||
alist->preferred = index;
|
||||
have_result = true;
|
||||
}
|
||||
|
||||
smp_wmb(); /* Set rtt before responded. */
|
||||
server->probe.responded = true;
|
||||
set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
|
||||
out:
|
||||
spin_unlock(&server->probe_lock);
|
||||
|
||||
_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
|
||||
server_index, index, &alist->addrs[index].transport,
|
||||
(unsigned int)rtt, ret);
|
||||
|
||||
have_result |= afs_vl_probe_done(server);
|
||||
if (have_result) {
|
||||
server->probe.have_result = true;
|
||||
wake_up_var(&server->probe.have_result);
|
||||
wake_up_all(&server->probe_wq);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe all of a vlserver's addresses to find out the best route and to
|
||||
* query its capabilities.
|
||||
*/
|
||||
static int afs_do_probe_vlserver(struct afs_net *net,
|
||||
struct afs_vlserver *server,
|
||||
struct key *key,
|
||||
unsigned int server_index)
|
||||
{
|
||||
struct afs_addr_cursor ac = {
|
||||
.index = 0,
|
||||
};
|
||||
int ret;
|
||||
|
||||
_enter("%s", server->name);
|
||||
|
||||
read_lock(&server->lock);
|
||||
ac.alist = rcu_dereference_protected(server->addresses,
|
||||
lockdep_is_held(&server->lock));
|
||||
read_unlock(&server->lock);
|
||||
|
||||
atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
|
||||
memset(&server->probe, 0, sizeof(server->probe));
|
||||
server->probe.rtt = UINT_MAX;
|
||||
|
||||
for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
|
||||
ret = afs_vl_get_capabilities(net, &ac, key, server,
|
||||
server_index, true);
|
||||
if (ret != -EINPROGRESS) {
|
||||
afs_vl_probe_done(server);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send off probes to all unprobed servers.
|
||||
*/
|
||||
int afs_send_vl_probes(struct afs_net *net, struct key *key,
|
||||
struct afs_vlserver_list *vllist)
|
||||
{
|
||||
struct afs_vlserver *server;
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < vllist->nr_servers; i++) {
|
||||
server = vllist->servers[i].server;
|
||||
if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
|
||||
continue;
|
||||
|
||||
if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
|
||||
ret = afs_do_probe_vlserver(net, server, key, i);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the first as-yet untried server to respond.
|
||||
*/
|
||||
int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
|
||||
unsigned long untried)
|
||||
{
|
||||
struct wait_queue_entry *waits;
|
||||
struct afs_vlserver *server;
|
||||
unsigned int rtt = UINT_MAX;
|
||||
bool have_responders = false;
|
||||
int pref = -1, i;
|
||||
|
||||
_enter("%u,%lx", vllist->nr_servers, untried);
|
||||
|
||||
/* Only wait for servers that have a probe outstanding. */
|
||||
for (i = 0; i < vllist->nr_servers; i++) {
|
||||
if (test_bit(i, &untried)) {
|
||||
server = vllist->servers[i].server;
|
||||
if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
|
||||
__clear_bit(i, &untried);
|
||||
if (server->probe.responded)
|
||||
have_responders = true;
|
||||
}
|
||||
}
|
||||
if (have_responders || !untried)
|
||||
return 0;
|
||||
|
||||
waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL);
|
||||
if (!waits)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < vllist->nr_servers; i++) {
|
||||
if (test_bit(i, &untried)) {
|
||||
server = vllist->servers[i].server;
|
||||
init_waitqueue_entry(&waits[i], current);
|
||||
add_wait_queue(&server->probe_wq, &waits[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
bool still_probing = false;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
for (i = 0; i < vllist->nr_servers; i++) {
|
||||
if (test_bit(i, &untried)) {
|
||||
server = vllist->servers[i].server;
|
||||
if (server->probe.responded)
|
||||
goto stop;
|
||||
if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
|
||||
still_probing = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!still_probing || unlikely(signal_pending(current)))
|
||||
goto stop;
|
||||
schedule();
|
||||
}
|
||||
|
||||
stop:
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
for (i = 0; i < vllist->nr_servers; i++) {
|
||||
if (test_bit(i, &untried)) {
|
||||
server = vllist->servers[i].server;
|
||||
if (server->probe.responded &&
|
||||
server->probe.rtt < rtt) {
|
||||
pref = i;
|
||||
rtt = server->probe.rtt;
|
||||
}
|
||||
|
||||
remove_wait_queue(&server->probe_wq, &waits[i]);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(waits);
|
||||
|
||||
if (pref == -1 && signal_pending(current))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
if (pref >= 0)
|
||||
vllist->preferred = pref;
|
||||
|
||||
_leave(" = 0 [%u]", pref);
|
||||
return 0;
|
||||
}
|
@ -58,8 +58,8 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
|
||||
if (!vc->server_list || !vc->server_list->nr_servers)
|
||||
return false;
|
||||
|
||||
vc->start = READ_ONCE(vc->server_list->index);
|
||||
vc->index = vc->start;
|
||||
vc->untried = (1UL << vc->server_list->nr_servers) - 1;
|
||||
vc->index = -1;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -71,11 +71,12 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
|
||||
{
|
||||
struct afs_addr_list *alist;
|
||||
struct afs_vlserver *vlserver;
|
||||
int error = vc->ac.error;
|
||||
u32 rtt;
|
||||
int error = vc->ac.error, abort_code, i;
|
||||
|
||||
_enter("%u/%u,%u/%u,%d,%d",
|
||||
vc->index, vc->start,
|
||||
vc->ac.index, vc->ac.start,
|
||||
_enter("%lx[%d],%lx[%d],%d,%d",
|
||||
vc->untried, vc->index,
|
||||
vc->ac.tried, vc->ac.index,
|
||||
error, vc->ac.abort_code);
|
||||
|
||||
if (vc->flags & AFS_VL_CURSOR_STOP) {
|
||||
@ -145,23 +146,52 @@ restart_from_beginning:
|
||||
start:
|
||||
_debug("start");
|
||||
|
||||
/* TODO: Consider checking the VL server list */
|
||||
|
||||
if (!afs_start_vl_iteration(vc))
|
||||
goto failed;
|
||||
|
||||
use_server:
|
||||
_debug("use");
|
||||
error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
|
||||
if (error < 0)
|
||||
goto failed_set_error;
|
||||
|
||||
pick_server:
|
||||
_debug("pick [%lx]", vc->untried);
|
||||
|
||||
error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
|
||||
if (error < 0)
|
||||
goto failed_set_error;
|
||||
|
||||
/* Pick the untried server with the lowest RTT. */
|
||||
vc->index = vc->server_list->preferred;
|
||||
if (test_bit(vc->index, &vc->untried))
|
||||
goto selected_server;
|
||||
|
||||
vc->index = -1;
|
||||
rtt = U32_MAX;
|
||||
for (i = 0; i < vc->server_list->nr_servers; i++) {
|
||||
struct afs_vlserver *s = vc->server_list->servers[i].server;
|
||||
|
||||
if (!test_bit(i, &vc->untried) || !s->probe.responded)
|
||||
continue;
|
||||
if (s->probe.rtt < rtt) {
|
||||
vc->index = i;
|
||||
rtt = s->probe.rtt;
|
||||
}
|
||||
}
|
||||
|
||||
if (vc->index == -1)
|
||||
goto no_more_servers;
|
||||
|
||||
selected_server:
|
||||
_debug("use %d", vc->index);
|
||||
__clear_bit(vc->index, &vc->untried);
|
||||
|
||||
/* We're starting on a different vlserver from the list. We need to
|
||||
* check it, find its address list and probe its capabilities before we
|
||||
* use it.
|
||||
*/
|
||||
ASSERTCMP(vc->ac.alist, ==, NULL);
|
||||
vlserver = vc->server_list->servers[vc->index].server;
|
||||
|
||||
// TODO: Check the vlserver occasionally
|
||||
//if (!afs_check_vlserver_record(vc, vlserver))
|
||||
// goto failed;
|
||||
vc->server = vlserver;
|
||||
|
||||
_debug("USING VLSERVER: %s", vlserver->name);
|
||||
|
||||
@ -173,62 +203,84 @@ use_server:
|
||||
|
||||
memset(&vc->ac, 0, sizeof(vc->ac));
|
||||
|
||||
/* Probe the current vlserver if we haven't done so yet. */
|
||||
#if 0 // TODO
|
||||
if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) {
|
||||
vc->ac.alist = afs_get_addrlist(alist);
|
||||
|
||||
if (!afs_probe_vlserver(vc)) {
|
||||
error = vc->ac.error;
|
||||
switch (error) {
|
||||
case -ENOMEM:
|
||||
case -ERESTARTSYS:
|
||||
case -EINTR:
|
||||
goto failed_set_error;
|
||||
default:
|
||||
goto next_server;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!vc->ac.alist)
|
||||
vc->ac.alist = alist;
|
||||
else
|
||||
afs_put_addrlist(alist);
|
||||
|
||||
vc->ac.start = READ_ONCE(alist->index);
|
||||
vc->ac.index = vc->ac.start;
|
||||
vc->ac.index = -1;
|
||||
|
||||
iterate_address:
|
||||
ASSERT(vc->ac.alist);
|
||||
_debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
|
||||
/* Iterate over the current server's address list to try and find an
|
||||
* address on which it will respond to us.
|
||||
*/
|
||||
if (!afs_iterate_addresses(&vc->ac))
|
||||
goto next_server;
|
||||
|
||||
_debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
|
||||
|
||||
_leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
|
||||
return true;
|
||||
|
||||
next_server:
|
||||
_debug("next");
|
||||
afs_end_cursor(&vc->ac);
|
||||
vc->index++;
|
||||
if (vc->index >= vc->server_list->nr_servers)
|
||||
vc->index = 0;
|
||||
if (vc->index != vc->start)
|
||||
goto use_server;
|
||||
goto pick_server;
|
||||
|
||||
no_more_servers:
|
||||
/* That's all the servers poked to no good effect. Try again if some
|
||||
* of them were busy.
|
||||
*/
|
||||
if (vc->flags & AFS_VL_CURSOR_RETRY)
|
||||
goto restart_from_beginning;
|
||||
|
||||
goto failed;
|
||||
abort_code = 0;
|
||||
error = -EDESTADDRREQ;
|
||||
for (i = 0; i < vc->server_list->nr_servers; i++) {
|
||||
struct afs_vlserver *s = vc->server_list->servers[i].server;
|
||||
int probe_error = READ_ONCE(s->probe.error);
|
||||
|
||||
switch (probe_error) {
|
||||
case 0:
|
||||
continue;
|
||||
default:
|
||||
if (error == -ETIMEDOUT ||
|
||||
error == -ETIME)
|
||||
continue;
|
||||
case -ETIMEDOUT:
|
||||
case -ETIME:
|
||||
if (error == -ENOMEM ||
|
||||
error == -ENONET)
|
||||
continue;
|
||||
case -ENOMEM:
|
||||
case -ENONET:
|
||||
if (error == -ENETUNREACH)
|
||||
continue;
|
||||
case -ENETUNREACH:
|
||||
if (error == -EHOSTUNREACH)
|
||||
continue;
|
||||
case -EHOSTUNREACH:
|
||||
if (error == -ECONNREFUSED)
|
||||
continue;
|
||||
case -ECONNREFUSED:
|
||||
if (error == -ECONNRESET)
|
||||
continue;
|
||||
case -ECONNRESET: /* Responded, but call expired. */
|
||||
if (error == -ECONNABORTED)
|
||||
continue;
|
||||
case -ECONNABORTED:
|
||||
abort_code = s->probe.abort_code;
|
||||
error = probe_error;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (error == -ECONNABORTED)
|
||||
error = afs_abort_to_error(abort_code);
|
||||
|
||||
failed_set_error:
|
||||
vc->error = error;
|
||||
failed:
|
||||
vc->flags |= AFS_VL_CURSOR_STOP;
|
||||
afs_end_cursor(&vc->ac);
|
||||
@ -250,8 +302,8 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
|
||||
|
||||
rcu_read_lock();
|
||||
pr_notice("EDESTADDR occurred\n");
|
||||
pr_notice("VC: st=%u ix=%u ni=%hu fl=%hx err=%hd\n",
|
||||
vc->start, vc->index, vc->nr_iterations, vc->flags, vc->error);
|
||||
pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
|
||||
vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
|
||||
|
||||
if (vc->server_list) {
|
||||
const struct afs_vlserver_list *sl = vc->server_list;
|
||||
@ -259,26 +311,25 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
|
||||
sl->nr_servers, sl->index);
|
||||
for (i = 0; i < sl->nr_servers; i++) {
|
||||
const struct afs_vlserver *s = sl->servers[i].server;
|
||||
pr_notice("VC: server fl=%lx %s+%hu\n",
|
||||
s->flags, s->name, s->port);
|
||||
pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
|
||||
s->name, s->port, s->flags, s->probe.error);
|
||||
if (s->addresses) {
|
||||
const struct afs_addr_list *a =
|
||||
rcu_dereference(s->addresses);
|
||||
pr_notice("VC: - av=%u nr=%u/%u/%u ax=%u\n",
|
||||
a->version,
|
||||
pr_notice("VC: - nr=%u/%u/%u pf=%u\n",
|
||||
a->nr_ipv4, a->nr_addrs, a->max_addrs,
|
||||
a->index);
|
||||
pr_notice("VC: - pr=%lx yf=%lx\n",
|
||||
a->probed, a->yfs);
|
||||
a->preferred);
|
||||
pr_notice("VC: - pr=%lx R=%lx F=%lx\n",
|
||||
a->probed, a->responded, a->failed);
|
||||
if (a == vc->ac.alist)
|
||||
pr_notice("VC: - current\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%hu\n",
|
||||
vc->ac.start, vc->ac.index, vc->ac.abort_code, vc->ac.error,
|
||||
vc->ac.begun, vc->ac.responded, vc->ac.nr_iterations);
|
||||
pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
|
||||
vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
|
||||
vc->ac.responded, vc->ac.nr_iterations);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
|
@ -348,12 +348,18 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
|
||||
break;
|
||||
}
|
||||
|
||||
call->reply[0] = (void *)(unsigned long)call->service_id;
|
||||
|
||||
_leave(" = 0 [done]");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void afs_destroy_vl_get_capabilities(struct afs_call *call)
|
||||
{
|
||||
struct afs_vlserver *server = call->reply[0];
|
||||
|
||||
afs_put_vlserver(call->net, server);
|
||||
afs_flat_call_destructor(call);
|
||||
}
|
||||
|
||||
/*
|
||||
* VL.GetCapabilities operation type
|
||||
*/
|
||||
@ -361,7 +367,8 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
|
||||
.name = "VL.GetCapabilities",
|
||||
.op = afs_VL_GetCapabilities,
|
||||
.deliver = afs_deliver_vl_get_capabilities,
|
||||
.destructor = afs_flat_call_destructor,
|
||||
.done = afs_vlserver_probe_result,
|
||||
.destructor = afs_destroy_vl_get_capabilities,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -371,8 +378,12 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
|
||||
* We use this to probe for service upgrade to determine what the server at the
|
||||
* other end supports.
|
||||
*/
|
||||
int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
|
||||
struct key *key)
|
||||
int afs_vl_get_capabilities(struct afs_net *net,
|
||||
struct afs_addr_cursor *ac,
|
||||
struct key *key,
|
||||
struct afs_vlserver *server,
|
||||
unsigned int server_index,
|
||||
bool async)
|
||||
{
|
||||
struct afs_call *call;
|
||||
__be32 *bp;
|
||||
@ -384,9 +395,10 @@ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
|
||||
return -ENOMEM;
|
||||
|
||||
call->key = key;
|
||||
call->upgrade = true; /* Let's see if this is a YFS server */
|
||||
call->reply[0] = (void *)VLGETCAPABILITIES;
|
||||
call->ret_reply0 = true;
|
||||
call->reply[0] = afs_get_vlserver(server);
|
||||
call->reply[1] = (void *)(long)server_index;
|
||||
call->upgrade = true;
|
||||
call->want_reply_time = true;
|
||||
|
||||
/* marshall the parameters */
|
||||
bp = call->request;
|
||||
@ -394,7 +406,7 @@ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
|
||||
|
||||
/* Can't take a ref on server */
|
||||
trace_afs_make_vl_call(call);
|
||||
return afs_make_call(ac, call, GFP_KERNEL, false);
|
||||
return afs_make_call(ac, call, GFP_KERNEL, async);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -591,11 +603,6 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
|
||||
}
|
||||
|
||||
alist = call->reply[0];
|
||||
|
||||
/* Start with IPv6 if available. */
|
||||
if (alist->nr_ipv4 < alist->nr_addrs)
|
||||
alist->index = alist->nr_ipv4;
|
||||
|
||||
_leave(" = 0 [done]");
|
||||
return 0;
|
||||
}
|
||||
|
@ -82,22 +82,6 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
|
||||
return ERR_PTR(-ERESTARTSYS);
|
||||
|
||||
while (afs_select_vlserver(&vc)) {
|
||||
if (!test_bit(vc.ac.index, &vc.ac.alist->probed)) {
|
||||
ret = afs_vl_get_capabilities(cell->net, &vc.ac, key);
|
||||
switch (ret) {
|
||||
case VL_SERVICE:
|
||||
clear_bit(vc.ac.index, &vc.ac.alist->yfs);
|
||||
set_bit(vc.ac.index, &vc.ac.alist->probed);
|
||||
vc.ac.alist->addrs[vc.ac.index].srx_service = ret;
|
||||
break;
|
||||
case YFS_VL_SERVICE:
|
||||
set_bit(vc.ac.index, &vc.ac.alist->yfs);
|
||||
set_bit(vc.ac.index, &vc.ac.alist->probed);
|
||||
vc.ac.alist->addrs[vc.ac.index].srx_service = ret;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
|
||||
}
|
||||
|
||||
|
@ -137,6 +137,7 @@ enum afs_io_error {
|
||||
afs_io_error_extract,
|
||||
afs_io_error_fs_probe_fail,
|
||||
afs_io_error_vl_lookup_fail,
|
||||
afs_io_error_vl_probe_fail,
|
||||
};
|
||||
|
||||
enum afs_file_error {
|
||||
@ -261,7 +262,8 @@ enum afs_file_error {
|
||||
EM(afs_io_error_cm_reply, "CM_REPLY") \
|
||||
EM(afs_io_error_extract, "EXTRACT") \
|
||||
EM(afs_io_error_fs_probe_fail, "FS_PROBE_FAIL") \
|
||||
E_(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL")
|
||||
EM(afs_io_error_vl_lookup_fail, "VL_LOOKUP_FAIL") \
|
||||
E_(afs_io_error_vl_probe_fail, "VL_PROBE_FAIL")
|
||||
|
||||
#define afs_file_errors \
|
||||
EM(afs_file_error_dir_bad_magic, "DIR_BAD_MAGIC") \
|
||||
|
Loading…
Reference in New Issue
Block a user