From c5f9d9db83d9f84d2b4aae5a1b29d9b582ccff2f Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 4 May 2020 16:12:55 +0100 Subject: [PATCH 1/5] cachefiles: Fix corruption of the return value in cachefiles_read_or_alloc_pages() The patch which changed cachefiles from calling ->bmap() to using the bmap() wrapper overwrote the running return value with the result of calling bmap(). This causes an assertion failure elsewhere in the code. Fix this by using ret2 rather than ret to hold the return value. The oops looks like: kernel BUG at fs/nfs/fscache.c:468! ... RIP: 0010:__nfs_readpages_from_fscache+0x18b/0x190 [nfs] ... Call Trace: nfs_readpages+0xbf/0x1c0 [nfs] ? __alloc_pages_nodemask+0x16c/0x320 read_pages+0x67/0x1a0 __do_page_cache_readahead+0x1cf/0x1f0 ondemand_readahead+0x172/0x2b0 page_cache_async_readahead+0xaa/0xe0 generic_file_buffered_read+0x852/0xd50 ? mem_cgroup_commit_charge+0x6e/0x140 ? nfs4_have_delegation+0x19/0x30 [nfsv4] generic_file_read_iter+0x100/0x140 ? nfs_revalidate_mapping+0x176/0x2b0 [nfs] nfs_file_read+0x6d/0xc0 [nfs] new_sync_read+0x11a/0x1c0 __vfs_read+0x29/0x40 vfs_read+0x8e/0x140 ksys_read+0x61/0xd0 __x64_sys_read+0x1a/0x20 do_syscall_64+0x60/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f5d148267e0 Fixes: 10d83e11a582 ("cachefiles: drop direct usage of ->bmap method.") Reported-by: David Wysochanski Signed-off-by: David Howells Tested-by: David Wysochanski cc: Carlos Maiolino --- fs/cachefiles/rdwr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 1dc97f2d6201..d3d78176b23c 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -398,7 +398,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, struct inode *inode; sector_t block; unsigned shift; - int ret; + int ret, ret2; object = container_of(op->op.object, struct cachefiles_object, fscache); @@ -430,8 +430,8 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, block = page->index; block <<= shift; - ret = bmap(inode, &block); - ASSERT(ret < 0); + ret2 = bmap(inode, &block); + ASSERT(ret2 == 0); _debug("%llx -> %llx", (unsigned long long) (page->index << shift), @@ -739,8 +739,8 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, block = page->index; block <<= shift; - ret = bmap(inode, &block); - ASSERT(!ret); + ret2 = bmap(inode, &block); + ASSERT(ret2 == 0); _debug("%llx -> %llx", (unsigned long long) (page->index << shift), From d9bfced1fbcb35b28d8fbed4e785d2807055ed2b Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Wed, 15 Apr 2020 16:14:41 -0400 Subject: [PATCH 2/5] NFS: Fix fscache super_cookie index_key from changing after umount Commit 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie") added the index_key and index_key_len parameters to fscache_acquire_cookie(), and updated the callers in the NFS client. One of the callers was inside nfs_fscache_get_super_cookie() and was changed to use the full struct nfs_fscache_key as the index_key. However, a couple members of this structure contain pointers and thus will change each time the same NFS share is remounted. Since index_key is used for fscache_cookie->key_hash and this subsequently is used to compare cookies, the effectiveness of fscache with NFS is reduced to the point at which a umount occurs. Any subsequent remount of the same share will cause a unique NFS super_block index_key and key_hash to be generated for the same data, rendering any prior fscache data unable to be found. A simple reproducer demonstrates the problem. 1. Mount share with 'fsc', create a file, drop page cache systemctl start cachefilesd mount -o vers=3,fsc 127.0.0.1:/export /mnt dd if=/dev/zero of=/mnt/file1.bin bs=4096 count=1 echo 3 > /proc/sys/vm/drop_caches 2. Read file into page cache and fscache, then unmount dd if=/mnt/file1.bin of=/dev/null bs=4096 count=1 umount /mnt 3. Remount and re-read which should come from fscache mount -o vers=3,fsc 127.0.0.1:/export /mnt echo 3 > /proc/sys/vm/drop_caches dd if=/mnt/file1.bin of=/dev/null bs=4096 count=1 4. Check for READ ops in mountstats - there should be none grep READ: /proc/self/mountstats Looking at the history and the removed function, nfs_super_get_key(), we should only use nfs_fscache_key.key plus any uniquifier, for the fscache index_key. Fixes: 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie") Signed-off-by: Dave Wysochanski Signed-off-by: David Howells --- fs/nfs/fscache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 1abf126c2df4..8eff1fd806b1 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -188,7 +188,8 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int /* create a cache index for looking up filehandles */ nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, &nfs_fscache_super_index_def, - key, sizeof(*key) + ulen, + &key->key, + sizeof(key->key) + ulen, NULL, 0, nfss, 0, true); dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", From 15751612734ca0c419ac43ce986c9badcb5e2829 Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Wed, 15 Apr 2020 16:14:42 -0400 Subject: [PATCH 3/5] NFS: Fix fscache super_cookie allocation Commit f2aedb713c28 ("NFS: Add fs_context support.") reworked NFS mount code paths for fs_context support which included super_block initialization. In the process there was an extra return left in the code and so we never call nfs_fscache_get_super_cookie even if 'fsc' is given on as mount option. In addition, there is an extra check inside nfs_fscache_get_super_cookie for the NFS_OPTION_FSCACHE which is unnecessary since the only caller nfs_get_cache_cookie checks this flag. Fixes: f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Dave Wysochanski Signed-off-by: David Howells --- fs/nfs/fscache.c | 2 -- fs/nfs/super.c | 1 - 2 files changed, 3 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 8eff1fd806b1..f51718415606 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -118,8 +118,6 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int nfss->fscache_key = NULL; nfss->fscache = NULL; - if (!(nfss->options & NFS_OPTION_FSCACHE)) - return; if (!uniq) { uniq = ""; ulen = 1; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 59ef3b13ccca..cc34aa3a8ba4 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1189,7 +1189,6 @@ static void nfs_get_cache_cookie(struct super_block *sb, uniq = ctx->fscache_uniq; ulen = strlen(ctx->fscache_uniq); } - return; } nfs_fscache_get_super_cookie(sb, uniq, ulen); From 50eaa652b54df1e2b48dc398d9e6114c9ed080eb Mon Sep 17 00:00:00 2001 From: Dave Wysochanski Date: Thu, 16 Apr 2020 06:06:08 -0400 Subject: [PATCH 4/5] NFSv4: Fix fscache cookie aux_data to ensure change_attr is included Commit 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie") added the aux_data and aux_data_len to parameters to fscache_acquire_cookie(), and updated the callers in the NFS client. In the process it modified the aux_data to include the change_attr, but missed adding change_attr to a couple places where aux_data was used. Specifically, when opening a file and the change_attr is not added, the following attempt to lookup an object will fail inside cachefiles_check_object_xattr() = -116 due to nfs_fscache_inode_check_aux() failing memcmp on auxdata and returning FSCACHE_CHECKAUX_OBSOLETE. Fix this by adding nfs_fscache_update_auxdata() to set the auxdata from all relevant fields in the inode, including the change_attr. Fixes: 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie") Signed-off-by: Dave Wysochanski Signed-off-by: David Howells --- fs/nfs/fscache.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index f51718415606..a60df88efc40 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -225,6 +225,19 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) } } +static void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *auxdata, + struct nfs_inode *nfsi) +{ + memset(auxdata, 0, sizeof(*auxdata)); + auxdata->mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; + auxdata->mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; + auxdata->ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; + auxdata->ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; + + if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) + auxdata->change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); +} + /* * Initialise the per-inode cache cookie pointer for an NFS inode. */ @@ -238,14 +251,7 @@ void nfs_fscache_init_inode(struct inode *inode) if (!(nfss->fscache && S_ISREG(inode->i_mode))) return; - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; - auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; - auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; - auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; - - if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); + nfs_fscache_update_auxdata(&auxdata, nfsi); nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache, &nfs_fscache_inode_object_def, @@ -265,11 +271,7 @@ void nfs_fscache_clear_inode(struct inode *inode) dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; - auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; - auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; - auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; + nfs_fscache_update_auxdata(&auxdata, nfsi); fscache_relinquish_cookie(cookie, &auxdata, false); nfsi->fscache = NULL; } @@ -309,11 +311,7 @@ void nfs_fscache_open_file(struct inode *inode, struct file *filp) if (!fscache_cookie_valid(cookie)) return; - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.mtime_sec = nfsi->vfs_inode.i_mtime.tv_sec; - auxdata.mtime_nsec = nfsi->vfs_inode.i_mtime.tv_nsec; - auxdata.ctime_sec = nfsi->vfs_inode.i_ctime.tv_sec; - auxdata.ctime_nsec = nfsi->vfs_inode.i_ctime.tv_nsec; + nfs_fscache_update_auxdata(&auxdata, nfsi); if (inode_is_open_for_write(inode)) { dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); From 7bb0c5338436dae953622470d52689265867f032 Mon Sep 17 00:00:00 2001 From: Lei Xue Date: Thu, 7 May 2020 08:50:22 -0400 Subject: [PATCH 5/5] cachefiles: Fix race between read_waiter and read_copier involving op->to_do There is a potential race in fscache operation enqueuing for reading and copying multiple pages from cachefiles to netfs. The problem can be seen easily on a heavy loaded system (for example many processes reading files continually on an NFS share covered by fscache triggered this problem within a few minutes). The race is due to cachefiles_read_waiter() adding the op to the monitor to_do list and then then drop the object->work_lock spinlock before completing fscache_enqueue_operation(). Once the lock is dropped, cachefiles_read_copier() grabs the op, completes processing it, and makes it through fscache_retrieval_complete() which sets the op->state to the final state of FSCACHE_OP_ST_COMPLETE(4). When cachefiles_read_waiter() finally gets through the remainder of fscache_enqueue_operation() it sees the invalid state, and hits the ASSERTCMP and the following oops is seen: [ 2259.612361] FS-Cache: [ 2259.614785] FS-Cache: Assertion failed [ 2259.618639] FS-Cache: 4 == 5 is false [ 2259.622456] ------------[ cut here ]------------ [ 2259.627190] kernel BUG at fs/fscache/operation.c:70! ... [ 2259.791675] RIP: 0010:[] [] fscache_enqueue_operation+0xff/0x170 [fscache] [ 2259.802059] RSP: 0000:ffffa0263d543be0 EFLAGS: 00010046 [ 2259.807521] RAX: 0000000000000019 RBX: ffffa01a4d390480 RCX: 0000000000000006 [ 2259.814847] RDX: 0000000000000000 RSI: 0000000000000046 RDI: ffffa0263d553890 [ 2259.822176] RBP: ffffa0263d543be8 R08: 0000000000000000 R09: ffffa0263c2d8708 [ 2259.829502] R10: 0000000000001e7f R11: 0000000000000000 R12: ffffa01a4d390480 [ 2259.844483] R13: ffff9fa9546c5920 R14: ffffa0263d543c80 R15: ffffa0293ff9bf10 [ 2259.859554] FS: 00007f4b6efbd700(0000) GS:ffffa0263d540000(0000) knlGS:0000000000000000 [ 2259.875571] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2259.889117] CR2: 00007f49e1624ff0 CR3: 0000012b38b38000 CR4: 00000000007607e0 [ 2259.904015] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2259.918764] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 2259.933449] PKRU: 55555554 [ 2259.943654] Call Trace: [ 2259.953592] [ 2259.955577] [] cachefiles_read_waiter+0x92/0xf0 [cachefiles] [ 2259.978039] [] __wake_up_common+0x82/0x120 [ 2259.991392] [] __wake_up_common_lock+0x83/0xc0 [ 2260.004930] [] ? task_rq_unlock+0x20/0x20 [ 2260.017863] [] __wake_up+0x13/0x20 [ 2260.030230] [] __wake_up_bit+0x50/0x70 [ 2260.042535] [] unlock_page+0x2b/0x30 [ 2260.054495] [] page_endio+0x29/0x90 [ 2260.066184] [] mpage_end_io+0x51/0x80 CPU1 cachefiles_read_waiter() 20 static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, 21 int sync, void *_key) 22 { ... 61 spin_lock(&object->work_lock); 62 list_add_tail(&monitor->op_link, &op->to_do); 63 spin_unlock(&object->work_lock); 64 65 fscache_enqueue_retrieval(op); 182 static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) 183 { 184 fscache_enqueue_operation(&op->op); 185 } 58 void fscache_enqueue_operation(struct fscache_operation *op) 59 { 60 struct fscache_cookie *cookie = op->object->cookie; 61 62 _enter("{OBJ%x OP%x,%u}", 63 op->object->debug_id, op->debug_id, atomic_read(&op->usage)); 64 65 ASSERT(list_empty(&op->pend_link)); 66 ASSERT(op->processor != NULL); 67 ASSERT(fscache_object_is_available(op->object)); 68 ASSERTCMP(atomic_read(&op->usage), >, 0); CPU2 cachefiles_read_copier() 168 while (!list_empty(&op->to_do)) { ... 202 fscache_end_io(op, monitor->netfs_page, error); 203 put_page(monitor->netfs_page); 204 fscache_retrieval_complete(op, 1); CPU1 58 void fscache_enqueue_operation(struct fscache_operation *op) 59 { ... 69 ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS, 70 op->state, ==, FSCACHE_OP_ST_CANCELLED); Signed-off-by: Lei Xue Signed-off-by: Dave Wysochanski Signed-off-by: David Howells --- fs/cachefiles/rdwr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index d3d78176b23c..e7726f5f1241 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -60,9 +60,9 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, object = container_of(op->op.object, struct cachefiles_object, fscache); spin_lock(&object->work_lock); list_add_tail(&monitor->op_link, &op->to_do); + fscache_enqueue_retrieval(op); spin_unlock(&object->work_lock); - fscache_enqueue_retrieval(op); fscache_put_retrieval(op); return 0; }