summaryrefslogtreecommitdiff
path: root/fs/afs/server.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2020-04-24 15:10:00 +0100
committerDavid Howells <dhowells@redhat.com>2020-05-31 15:19:51 +0100
commitf6cbb368bcb0bc4fa7c11554d5293658bb4b26a2 (patch)
tree49b9ad0705135c3e985ce6560b793ab038a2b111 /fs/afs/server.c
parent977e5f8ed0ab2786755f8d2a96b78a3c7320f7c4 (diff)
afs: Actively poll fileservers to maintain NAT or firewall openings
When an AFS client accesses a file, it receives a limited-duration callback promise that the server will notify it if another client changes a file. This callback duration can be a few hours in length. If a client mounts a volume and then an application prevents it from being unmounted, say by chdir'ing into it, but then does nothing for some time, the rxrpc_peer record will expire and rxrpc-level keepalive will cease. If there is NAT or a firewall between the client and the server, the route back for the server may close after a comparatively short duration, meaning that attempts by the server to notify the client may then bounce. The client, however, may (so far as it knows) still have a valid unexpired promise and will then rely on its cached data and will not see changes made on the server by a third party until it incidentally rechecks the status or the promise needs renewal. To deal with this, the client needs to regularly probe the server. This has two effects: firstly, it keeps a route open back for the server, and secondly, it causes the server to disgorge any notifications that got queued up because they couldn't be sent. Fix this by adding a mechanism to emit regular probes. Two levels of probing are made available: Under normal circumstances the 'slow' queue will be used for a fileserver - this just probes the preferred address once every 5 mins or so; however, if server fails to respond to any probes, the server will shift to the 'fast' queue from which all its interfaces will be probed every 30s. When it finally responds, the record will switch back to the slow queue. Further notes: (1) Probing is now no longer driven from the fileserver rotation algorithm. (2) Probes are dispatched to all interfaces on a fileserver when that an afs_server object is set up to record it. (3) The afs_server object is removed from the probe queues when we start to probe it. afs_is_probing_server() returns true if it's not listed - ie. it's undergoing probing. (4) The afs_server object is added back on to the probe queue when the final outstanding probe completes, but the probed_at time is set when we're about to launch a probe so that it's not dependent on the probe duration. (5) The timer and the work item added for this must be handed a count on net->servers_outstanding, which they hand on or release. This makes sure that network namespace cleanup waits for them. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Dave Botsch <botsch@cnf.cornell.edu> Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'fs/afs/server.c')
-rw-r--r--fs/afs/server.c19
1 files changed, 8 insertions, 11 deletions
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 4969a681f8f5..3f707b5ecb62 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -14,17 +14,6 @@
static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
static atomic_t afs_server_debug_id;
-static void afs_inc_servers_outstanding(struct afs_net *net)
-{
- atomic_inc(&net->servers_outstanding);
-}
-
-static void afs_dec_servers_outstanding(struct afs_net *net)
-{
- if (atomic_dec_and_test(&net->servers_outstanding))
- wake_up_var(&net->servers_outstanding);
-}
-
static struct afs_server *afs_maybe_use_server(struct afs_server *,
enum afs_server_trace);
static void __afs_put_server(struct afs_net *, struct afs_server *);
@@ -226,6 +215,7 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
INIT_HLIST_HEAD(&server->cb_volumes);
rwlock_init(&server->cb_break_lock);
init_waitqueue_head(&server->probe_wq);
+ INIT_LIST_HEAD(&server->probe_link);
spin_lock_init(&server->probe_lock);
afs_inc_servers_outstanding(net);
@@ -295,6 +285,12 @@ struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
if (server != candidate) {
afs_put_addrlist(alist);
kfree(candidate);
+ } else {
+ /* Immediately dispatch an asynchronous probe to each interface
+ * on the fileserver. This will make sure the repeat-probing
+ * service is started.
+ */
+ afs_fs_probe_fileserver(cell->net, server, key, true);
}
return server;
@@ -464,6 +460,7 @@ static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
trace_afs_server(server, atomic_read(&server->ref),
active, afs_server_trace_gc);
rb_erase(&server->uuid_rb, &net->fs_servers);
+ list_del(&server->probe_link);
hlist_del_rcu(&server->proc_link);
if (!hlist_unhashed(&server->addr4_link))
hlist_del_rcu(&server->addr4_link);