forked from Minki/linux
c755e25135
The xattr_sem deadlock problems fixed in commit2e81a4eeed
: "ext4: avoid deadlock when expanding inode size" didn't include the use of xattr_sem in fs/ext4/inline.c. With the addition of project quota which added a new extra inode field, this exposed deadlocks in the inline_data code similar to the ones fixed by2e81a4eeed
. The deadlock can be reproduced via: dmesg -n 7 mke2fs -t ext4 -O inline_data -Fq -I 256 /dev/vdc 32768 mount -t ext4 -o debug_want_extra_isize=24 /dev/vdc /vdc mkdir /vdc/a umount /vdc mount -t ext4 /dev/vdc /vdc echo foo > /vdc/a/foo and looks like this: [ 11.158815] [ 11.160276] ============================================= [ 11.161960] [ INFO: possible recursive locking detected ] [ 11.161960] 4.10.0-rc3-00015-g011b30a8a3cf #160 Tainted: G W [ 11.161960] --------------------------------------------- [ 11.161960] bash/2519 is trying to acquire lock: [ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1225a4b>] ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] [ 11.161960] but task is already holding lock: [ 11.161960] (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152 [ 11.161960] [ 11.161960] other info that might help us debug this: [ 11.161960] Possible unsafe locking scenario: [ 11.161960] [ 11.161960] CPU0 [ 11.161960] ---- [ 11.161960] lock(&ei->xattr_sem); [ 11.161960] lock(&ei->xattr_sem); [ 11.161960] [ 11.161960] *** DEADLOCK *** [ 11.161960] [ 11.161960] May be due to missing lock nesting notation [ 11.161960] [ 11.161960] 4 locks held by bash/2519: [ 11.161960] #0: (sb_writers#3){.+.+.+}, at: [<c11a2414>] mnt_want_write+0x1e/0x3e [ 11.161960] #1: (&type->i_mutex_dir_key){++++++}, at: [<c119508b>] path_openat+0x338/0x67a [ 11.161960] #2: (jbd2_handle){++++..}, at: [<c123314a>] start_this_handle+0x582/0x622 [ 11.161960] #3: (&ei->xattr_sem){++++..}, at: [<c1227941>] ext4_try_add_inline_entry+0x3a/0x152 [ 11.161960] [ 11.161960] stack backtrace: [ 11.161960] CPU: 0 PID: 2519 Comm: bash Tainted: G W 4.10.0-rc3-00015-g011b30a8a3cf #160 [ 11.161960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1 04/01/2014 [ 11.161960] Call Trace: [ 11.161960] dump_stack+0x72/0xa3 [ 11.161960] __lock_acquire+0xb7c/0xcb9 [ 11.161960] ? kvm_clock_read+0x1f/0x29 [ 11.161960] ? __lock_is_held+0x36/0x66 [ 11.161960] ? __lock_is_held+0x36/0x66 [ 11.161960] lock_acquire+0x106/0x18a [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] down_write+0x39/0x72 [ 11.161960] ? ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] ext4_expand_extra_isize_ea+0x3d/0x4cd [ 11.161960] ? _raw_read_unlock+0x22/0x2c [ 11.161960] ? jbd2_journal_extend+0x1e2/0x262 [ 11.161960] ? __ext4_journal_get_write_access+0x3d/0x60 [ 11.161960] ext4_mark_inode_dirty+0x17d/0x26d [ 11.161960] ? ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 [ 11.161960] ext4_add_dirent_to_inline.isra.12+0xa5/0xb2 [ 11.161960] ext4_try_add_inline_entry+0x69/0x152 [ 11.161960] ext4_add_entry+0xa3/0x848 [ 11.161960] ? __brelse+0x14/0x2f [ 11.161960] ? _raw_spin_unlock_irqrestore+0x44/0x4f [ 11.161960] ext4_add_nondir+0x17/0x5b [ 11.161960] ext4_create+0xcf/0x133 [ 11.161960] ? ext4_mknod+0x12f/0x12f [ 11.161960] lookup_open+0x39e/0x3fb [ 11.161960] ? __wake_up+0x1a/0x40 [ 11.161960] ? lock_acquire+0x11e/0x18a [ 11.161960] path_openat+0x35c/0x67a [ 11.161960] ? sched_clock_cpu+0xd7/0xf2 [ 11.161960] do_filp_open+0x36/0x7c [ 11.161960] ? _raw_spin_unlock+0x22/0x2c [ 11.161960] ? __alloc_fd+0x169/0x173 [ 11.161960] do_sys_open+0x59/0xcc [ 11.161960] SyS_open+0x1d/0x1f [ 11.161960] do_int80_syscall_32+0x4f/0x61 [ 11.161960] entry_INT80_32+0x2f/0x2f [ 11.161960] EIP: 0xb76ad469 [ 11.161960] EFLAGS: 00000286 CPU: 0 [ 11.161960] EAX: ffffffda EBX: 08168ac8 ECX: 00008241 EDX: 000001b6 [ 11.161960] ESI: b75e46bc EDI: b7755000 EBP: bfbdb108 ESP: bfbdafc0 [ 11.161960] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b Cc: stable@vger.kernel.org # 3.10 (requires2e81a4eeed
as a prereq) Reported-by: George Spelvin <linux@sciencehorizons.net> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
172 lines
5.6 KiB
C
172 lines
5.6 KiB
C
/*
|
|
File: fs/ext4/xattr.h
|
|
|
|
On-disk format of extended attributes for the ext4 filesystem.
|
|
|
|
(C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
|
|
*/
|
|
|
|
#include <linux/xattr.h>
|
|
|
|
/* Magic value in attribute blocks */
|
|
#define EXT4_XATTR_MAGIC 0xEA020000
|
|
|
|
/* Maximum number of references to one attribute block */
|
|
#define EXT4_XATTR_REFCOUNT_MAX 1024
|
|
|
|
/* Name indexes */
|
|
#define EXT4_XATTR_INDEX_USER 1
|
|
#define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS 2
|
|
#define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT 3
|
|
#define EXT4_XATTR_INDEX_TRUSTED 4
|
|
#define EXT4_XATTR_INDEX_LUSTRE 5
|
|
#define EXT4_XATTR_INDEX_SECURITY 6
|
|
#define EXT4_XATTR_INDEX_SYSTEM 7
|
|
#define EXT4_XATTR_INDEX_RICHACL 8
|
|
#define EXT4_XATTR_INDEX_ENCRYPTION 9
|
|
#define EXT4_XATTR_INDEX_HURD 10 /* Reserved for Hurd */
|
|
|
|
struct ext4_xattr_header {
|
|
__le32 h_magic; /* magic number for identification */
|
|
__le32 h_refcount; /* reference count */
|
|
__le32 h_blocks; /* number of disk blocks used */
|
|
__le32 h_hash; /* hash value of all attributes */
|
|
__le32 h_checksum; /* crc32c(uuid+id+xattrblock) */
|
|
/* id = inum if refcount=1, blknum otherwise */
|
|
__u32 h_reserved[3]; /* zero right now */
|
|
};
|
|
|
|
struct ext4_xattr_ibody_header {
|
|
__le32 h_magic; /* magic number for identification */
|
|
};
|
|
|
|
struct ext4_xattr_entry {
|
|
__u8 e_name_len; /* length of name */
|
|
__u8 e_name_index; /* attribute name index */
|
|
__le16 e_value_offs; /* offset in disk block of value */
|
|
__le32 e_value_block; /* disk block attribute is stored on (n/i) */
|
|
__le32 e_value_size; /* size of attribute value */
|
|
__le32 e_hash; /* hash value of name and value */
|
|
char e_name[0]; /* attribute name */
|
|
};
|
|
|
|
#define EXT4_XATTR_PAD_BITS 2
|
|
#define EXT4_XATTR_PAD (1<<EXT4_XATTR_PAD_BITS)
|
|
#define EXT4_XATTR_ROUND (EXT4_XATTR_PAD-1)
|
|
#define EXT4_XATTR_LEN(name_len) \
|
|
(((name_len) + EXT4_XATTR_ROUND + \
|
|
sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
|
|
#define EXT4_XATTR_NEXT(entry) \
|
|
((struct ext4_xattr_entry *)( \
|
|
(char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)))
|
|
#define EXT4_XATTR_SIZE(size) \
|
|
(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
|
|
|
|
#define IHDR(inode, raw_inode) \
|
|
((struct ext4_xattr_ibody_header *) \
|
|
((void *)raw_inode + \
|
|
EXT4_GOOD_OLD_INODE_SIZE + \
|
|
EXT4_I(inode)->i_extra_isize))
|
|
#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
|
|
|
|
#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
|
|
#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
|
|
#define BFIRST(bh) ENTRY(BHDR(bh)+1)
|
|
#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
|
|
|
|
#define EXT4_ZERO_XATTR_VALUE ((void *)-1)
|
|
|
|
struct ext4_xattr_info {
|
|
int name_index;
|
|
const char *name;
|
|
const void *value;
|
|
size_t value_len;
|
|
};
|
|
|
|
struct ext4_xattr_search {
|
|
struct ext4_xattr_entry *first;
|
|
void *base;
|
|
void *end;
|
|
struct ext4_xattr_entry *here;
|
|
int not_found;
|
|
};
|
|
|
|
struct ext4_xattr_ibody_find {
|
|
struct ext4_xattr_search s;
|
|
struct ext4_iloc iloc;
|
|
};
|
|
|
|
extern const struct xattr_handler ext4_xattr_user_handler;
|
|
extern const struct xattr_handler ext4_xattr_trusted_handler;
|
|
extern const struct xattr_handler ext4_xattr_security_handler;
|
|
|
|
#define EXT4_XATTR_NAME_ENCRYPTION_CONTEXT "c"
|
|
|
|
/*
|
|
* The EXT4_STATE_NO_EXPAND is overloaded and used for two purposes.
|
|
* The first is to signal that there the inline xattrs and data are
|
|
* taking up so much space that we might as well not keep trying to
|
|
* expand it. The second is that xattr_sem is taken for writing, so
|
|
* we shouldn't try to recurse into the inode expansion. For this
|
|
* second case, we need to make sure that we take save and restore the
|
|
* NO_EXPAND state flag appropriately.
|
|
*/
|
|
static inline void ext4_write_lock_xattr(struct inode *inode, int *save)
|
|
{
|
|
down_write(&EXT4_I(inode)->xattr_sem);
|
|
*save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
|
|
ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
|
|
}
|
|
|
|
static inline int ext4_write_trylock_xattr(struct inode *inode, int *save)
|
|
{
|
|
if (down_write_trylock(&EXT4_I(inode)->xattr_sem) == 0)
|
|
return 0;
|
|
*save = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
|
|
ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
|
|
return 1;
|
|
}
|
|
|
|
static inline void ext4_write_unlock_xattr(struct inode *inode, int *save)
|
|
{
|
|
if (*save == 0)
|
|
ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
|
|
up_write(&EXT4_I(inode)->xattr_sem);
|
|
}
|
|
|
|
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
|
|
|
|
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
|
|
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
|
|
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
|
|
|
|
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
|
|
|
|
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
|
|
struct ext4_inode *raw_inode, handle_t *handle);
|
|
|
|
extern const struct xattr_handler *ext4_xattr_handlers[];
|
|
|
|
extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
|
|
struct ext4_xattr_ibody_find *is);
|
|
extern int ext4_xattr_ibody_get(struct inode *inode, int name_index,
|
|
const char *name,
|
|
void *buffer, size_t buffer_size);
|
|
extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
|
|
struct ext4_xattr_info *i,
|
|
struct ext4_xattr_ibody_find *is);
|
|
|
|
extern struct mb_cache *ext4_xattr_create_cache(void);
|
|
extern void ext4_xattr_destroy_cache(struct mb_cache *);
|
|
|
|
#ifdef CONFIG_EXT4_FS_SECURITY
|
|
extern int ext4_init_security(handle_t *handle, struct inode *inode,
|
|
struct inode *dir, const struct qstr *qstr);
|
|
#else
|
|
static inline int ext4_init_security(handle_t *handle, struct inode *inode,
|
|
struct inode *dir, const struct qstr *qstr)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|