forked from Minki/linux
027690c75e
I made every global per-network-namespace instead. But perhaps doing
that to this slab was a step too far.
The kmem_cache_create call in our net init method also seems to be
responsible for this lockdep warning:
[ 45.163710] Unable to find swap-space signature
[ 45.375718] trinity-c1 (855): attempted to duplicate a private mapping with mremap. This is not supported.
[ 46.055744] futex_wake_op: trinity-c1 tries to shift op by -209; fix this program
[ 51.011723]
[ 51.013378] ======================================================
[ 51.013875] WARNING: possible circular locking dependency detected
[ 51.014378] 5.2.0-rc2 #1 Not tainted
[ 51.014672] ------------------------------------------------------
[ 51.015182] trinity-c2/886 is trying to acquire lock:
[ 51.015593] 000000005405f099 (slab_mutex){+.+.}, at: slab_attr_store+0xa2/0x130
[ 51.016190]
[ 51.016190] but task is already holding lock:
[ 51.016652] 00000000ac662005 (kn->count#43){++++}, at: kernfs_fop_write+0x286/0x500
[ 51.017266]
[ 51.017266] which lock already depends on the new lock.
[ 51.017266]
[ 51.017909]
[ 51.017909] the existing dependency chain (in reverse order) is:
[ 51.018497]
[ 51.018497] -> #1 (kn->count#43){++++}:
[ 51.018956] __lock_acquire+0x7cf/0x1a20
[ 51.019317] lock_acquire+0x17d/0x390
[ 51.019658] __kernfs_remove+0x892/0xae0
[ 51.020020] kernfs_remove_by_name_ns+0x78/0x110
[ 51.020435] sysfs_remove_link+0x55/0xb0
[ 51.020832] sysfs_slab_add+0xc1/0x3e0
[ 51.021332] __kmem_cache_create+0x155/0x200
[ 51.021720] create_cache+0xf5/0x320
[ 51.022054] kmem_cache_create_usercopy+0x179/0x320
[ 51.022486] kmem_cache_create+0x1a/0x30
[ 51.022867] nfsd_reply_cache_init+0x278/0x560
[ 51.023266] nfsd_init_net+0x20f/0x5e0
[ 51.023623] ops_init+0xcb/0x4b0
[ 51.023928] setup_net+0x2fe/0x670
[ 51.024315] copy_net_ns+0x30a/0x3f0
[ 51.024653] create_new_namespaces+0x3c5/0x820
[ 51.025257] unshare_nsproxy_namespaces+0xd1/0x240
[ 51.025881] ksys_unshare+0x506/0x9c0
[ 51.026381] __x64_sys_unshare+0x3a/0x50
[ 51.026937] do_syscall_64+0x110/0x10b0
[ 51.027509] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 51.028175]
[ 51.028175] -> #0 (slab_mutex){+.+.}:
[ 51.028817] validate_chain+0x1c51/0x2cc0
[ 51.029422] __lock_acquire+0x7cf/0x1a20
[ 51.029947] lock_acquire+0x17d/0x390
[ 51.030438] __mutex_lock+0x100/0xfa0
[ 51.030995] mutex_lock_nested+0x27/0x30
[ 51.031516] slab_attr_store+0xa2/0x130
[ 51.032020] sysfs_kf_write+0x11d/0x180
[ 51.032529] kernfs_fop_write+0x32a/0x500
[ 51.033056] do_loop_readv_writev+0x21d/0x310
[ 51.033627] do_iter_write+0x2e5/0x380
[ 51.034148] vfs_writev+0x170/0x310
[ 51.034616] do_pwritev+0x13e/0x160
[ 51.035100] __x64_sys_pwritev+0xa3/0x110
[ 51.035633] do_syscall_64+0x110/0x10b0
[ 51.036200] entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 51.036924]
[ 51.036924] other info that might help us debug this:
[ 51.036924]
[ 51.037876] Possible unsafe locking scenario:
[ 51.037876]
[ 51.038556] CPU0 CPU1
[ 51.039130] ---- ----
[ 51.039676] lock(kn->count#43);
[ 51.040084] lock(slab_mutex);
[ 51.040597] lock(kn->count#43);
[ 51.041062] lock(slab_mutex);
[ 51.041320]
[ 51.041320] *** DEADLOCK ***
[ 51.041320]
[ 51.041793] 3 locks held by trinity-c2/886:
[ 51.042128] #0: 000000001f55e152 (sb_writers#5){.+.+}, at: vfs_writev+0x2b9/0x310
[ 51.042739] #1: 00000000c7d6c034 (&of->mutex){+.+.}, at: kernfs_fop_write+0x25b/0x500
[ 51.043400] #2: 00000000ac662005 (kn->count#43){++++}, at: kernfs_fop_write+0x286/0x500
Reported-by: kernel test robot <lkp@intel.com>
Fixes: 3ba75830ce
"drc containerization"
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
90 lines
2.0 KiB
C
90 lines
2.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Request reply cache. This was heavily inspired by the
|
|
* implementation in 4.3BSD/4.4BSD.
|
|
*
|
|
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
|
|
*/
|
|
|
|
#ifndef NFSCACHE_H
|
|
#define NFSCACHE_H
|
|
|
|
#include <linux/sunrpc/svc.h>
|
|
#include "netns.h"
|
|
|
|
/*
|
|
* Representation of a reply cache entry.
|
|
*
|
|
* Note that we use a sockaddr_in6 to hold the address instead of the more
|
|
* typical sockaddr_storage. This is for space reasons, since sockaddr_storage
|
|
* is much larger than a sockaddr_in6.
|
|
*/
|
|
struct svc_cacherep {
|
|
struct {
|
|
/* Keep often-read xid, csum in the same cache line: */
|
|
__be32 k_xid;
|
|
__wsum k_csum;
|
|
u32 k_proc;
|
|
u32 k_prot;
|
|
u32 k_vers;
|
|
unsigned int k_len;
|
|
struct sockaddr_in6 k_addr;
|
|
} c_key;
|
|
|
|
struct rb_node c_node;
|
|
struct list_head c_lru;
|
|
unsigned char c_state, /* unused, inprog, done */
|
|
c_type, /* status, buffer */
|
|
c_secure : 1; /* req came from port < 1024 */
|
|
unsigned long c_timestamp;
|
|
union {
|
|
struct kvec u_vec;
|
|
__be32 u_status;
|
|
} c_u;
|
|
};
|
|
|
|
#define c_replvec c_u.u_vec
|
|
#define c_replstat c_u.u_status
|
|
|
|
/* cache entry states */
|
|
enum {
|
|
RC_UNUSED,
|
|
RC_INPROG,
|
|
RC_DONE
|
|
};
|
|
|
|
/* return values */
|
|
enum {
|
|
RC_DROPIT,
|
|
RC_REPLY,
|
|
RC_DOIT
|
|
};
|
|
|
|
/*
|
|
* Cache types.
|
|
* We may want to add more types one day, e.g. for diropres and
|
|
* attrstat replies. Using cache entries with fixed length instead
|
|
* of buffer pointers may be more efficient.
|
|
*/
|
|
enum {
|
|
RC_NOCACHE,
|
|
RC_REPLSTAT,
|
|
RC_REPLBUFF,
|
|
};
|
|
|
|
/* Cache entries expire after this time period */
|
|
#define RC_EXPIRE (120 * HZ)
|
|
|
|
/* Checksum this amount of the request */
|
|
#define RC_CSUMLEN (256U)
|
|
|
|
int nfsd_drc_slab_create(void);
|
|
void nfsd_drc_slab_free(void);
|
|
int nfsd_reply_cache_init(struct nfsd_net *);
|
|
void nfsd_reply_cache_shutdown(struct nfsd_net *);
|
|
int nfsd_cache_lookup(struct svc_rqst *);
|
|
void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
|
|
int nfsd_reply_cache_stats_open(struct inode *, struct file *);
|
|
|
|
#endif /* NFSCACHE_H */
|