mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
overlayfs update for 4.19
This contains two new features: 1) Stack file operations: this allows removal of several hacks from the VFS, proper interaction of read-only open files with copy-up, possibility to implement fs modifying ioctls properly, and others. 2) Metadata only copy-up: when file is on lower layer and only metadata is modified (except size) then only copy up the metadata and continue to use the data from the lower file. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQSQHSd0lITzzeNWNm3h3BK/laaZPAUCW3srhAAKCRDh3BK/laaZ PC6tAQCP+KklcN+TvNp502f+O/kATahSpgnun4NY1/p4I8JV+AEAzdlkTN3+MiAO fn9brN6mBK7h59DO3hqedPLJy2vrgwg= =QDXH -----END PGP SIGNATURE----- Merge tag 'ovl-update-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs Pull overlayfs updates from Miklos Szeredi: "This contains two new features: - Stack file operations: this allows removal of several hacks from the VFS, proper interaction of read-only open files with copy-up, possibility to implement fs modifying ioctls properly, and others. - Metadata only copy-up: when file is on lower layer and only metadata is modified (except size) then only copy up the metadata and continue to use the data from the lower file" * tag 'ovl-update-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: (66 commits) ovl: Enable metadata only feature ovl: Do not do metacopy only for ioctl modifying file attr ovl: Do not do metadata only copy-up for truncate operation ovl: add helper to force data copy-up ovl: Check redirect on index as well ovl: Set redirect on upper inode when it is linked ovl: Set redirect on metacopy files upon rename ovl: Do not set dentry type ORIGIN for broken hardlinks ovl: Add an inode flag OVL_CONST_INO ovl: Treat metacopy dentries as type OVL_PATH_MERGE ovl: Check redirects for metacopy files ovl: Move some dir related ovl_lookup_single() code in else block ovl: Do not expose metacopy only dentry from d_real() ovl: Open file with data except for the case of fsync ovl: Add helper ovl_inode_realdata() ovl: Store lower data inode in ovl_inode ovl: Fix ovl_getattr() to get number of blocks from lower ovl: Add helper ovl_dentry_lowerdata() to get lower data dentry ovl: Copy up meta inode data from lowest data inode ovl: Modify ovl_lookup() and friends to lookup metacopy dentry ...
This commit is contained in:
commit
d9a185f8b4
@ -21,8 +21,7 @@ prototypes:
|
||||
char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
|
||||
struct vfsmount *(*d_automount)(struct path *path);
|
||||
int (*d_manage)(const struct path *, bool);
|
||||
struct dentry *(*d_real)(struct dentry *, const struct inode *,
|
||||
unsigned int, unsigned int);
|
||||
struct dentry *(*d_real)(struct dentry *, const struct inode *);
|
||||
|
||||
locking rules:
|
||||
rename_lock ->d_lock may block rcu-walk
|
||||
|
@ -10,10 +10,6 @@ union-filesystems). An overlay-filesystem tries to present a
|
||||
filesystem which is the result over overlaying one filesystem on top
|
||||
of the other.
|
||||
|
||||
The result will inevitably fail to look exactly like a normal
|
||||
filesystem for various technical reasons. The expectation is that
|
||||
many use cases will be able to ignore these differences.
|
||||
|
||||
|
||||
Overlay objects
|
||||
---------------
|
||||
@ -266,6 +262,30 @@ rightmost one and going left. In the above example lower1 will be the
|
||||
top, lower2 the middle and lower3 the bottom layer.
|
||||
|
||||
|
||||
Metadata only copy up
|
||||
--------------------
|
||||
|
||||
When metadata only copy up feature is enabled, overlayfs will only copy
|
||||
up metadata (as opposed to whole file), when a metadata specific operation
|
||||
like chown/chmod is performed. Full file will be copied up later when
|
||||
file is opened for WRITE operation.
|
||||
|
||||
In other words, this is delayed data copy up operation and data is copied
|
||||
up when there is a need to actually modify data.
|
||||
|
||||
There are multiple ways to enable/disable this feature. A config option
|
||||
CONFIG_OVERLAY_FS_METACOPY can be set/unset to enable/disable this feature
|
||||
by default. Or one can enable/disable it at module load time with module
|
||||
parameter metacopy=on/off. Lastly, there is also a per mount option
|
||||
metacopy=on/off to enable/disable this feature per mount.
|
||||
|
||||
Do not use metacopy=on with untrusted upper/lower directories. Otherwise
|
||||
it is possible that an attacker can create a handcrafted file with
|
||||
appropriate REDIRECT and METACOPY xattrs, and gain access to file on lower
|
||||
pointed by REDIRECT. This should not be possible on local system as setting
|
||||
"trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
|
||||
for untrusted layers like from a pen drive.
|
||||
|
||||
Sharing and copying layers
|
||||
--------------------------
|
||||
|
||||
@ -284,7 +304,7 @@ though it will not result in a crash or deadlock.
|
||||
Mounting an overlay using an upper layer path, where the upper layer path
|
||||
was previously used by another mounted overlay in combination with a
|
||||
different lower layer path, is allowed, unless the "inodes index" feature
|
||||
is enabled.
|
||||
or "metadata only copy up" feature is enabled.
|
||||
|
||||
With the "inodes index" feature, on the first time mount, an NFS file
|
||||
handle of the lower layer root directory, along with the UUID of the lower
|
||||
@ -297,6 +317,10 @@ lower root origin, mount will fail with ESTALE. An overlayfs mount with
|
||||
does not support NFS export, lower filesystem does not have a valid UUID or
|
||||
if the upper filesystem does not support extended attributes.
|
||||
|
||||
For "metadata only copy up" feature there is no verification mechanism at
|
||||
mount time. So if same upper is mounted with different set of lower, mount
|
||||
probably will succeed but expect the unexpected later on. So don't do it.
|
||||
|
||||
It is quite a common practice to copy overlay layers to a different
|
||||
directory tree on the same or different underlying filesystem, and even
|
||||
to a different machine. With the "inodes index" feature, trying to mount
|
||||
@ -306,27 +330,40 @@ the copied layers will fail the verification of the lower root file handle.
|
||||
Non-standard behavior
|
||||
---------------------
|
||||
|
||||
The copy_up operation essentially creates a new, identical file and
|
||||
moves it over to the old name. Any open files referring to this inode
|
||||
will access the old data.
|
||||
Overlayfs can now act as a POSIX compliant filesystem with the following
|
||||
features turned on:
|
||||
|
||||
The new file may be on a different filesystem, so both st_dev and st_ino
|
||||
of the real file may change. The values of st_dev and st_ino returned by
|
||||
stat(2) on an overlay object are often not the same as the real file
|
||||
stat(2) values to prevent the values from changing on copy_up.
|
||||
1) "redirect_dir"
|
||||
|
||||
Unless "xino" feature is enabled, when overlay layers are not all on the
|
||||
same underlying filesystem, the value of st_dev may be different for two
|
||||
non-directory objects in the same overlay filesystem and the value of
|
||||
st_ino for directory objects may be non persistent and could change even
|
||||
while the overlay filesystem is still mounted.
|
||||
Enabled with the mount option or module option: "redirect_dir=on" or with
|
||||
the kernel config option CONFIG_OVERLAY_FS_REDIRECT_DIR=y.
|
||||
|
||||
Unless "inode index" feature is enabled, if a file with multiple hard
|
||||
links is copied up, then this will "break" the link. Changes will not be
|
||||
propagated to other names referring to the same inode.
|
||||
If this feature is disabled, then rename(2) on a lower or merged directory
|
||||
will fail with EXDEV ("Invalid cross-device link").
|
||||
|
||||
Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged
|
||||
directory will fail with EXDEV.
|
||||
2) "inode index"
|
||||
|
||||
Enabled with the mount option or module option "index=on" or with the
|
||||
kernel config option CONFIG_OVERLAY_FS_INDEX=y.
|
||||
|
||||
If this feature is disabled and a file with multiple hard links is copied
|
||||
up, then this will "break" the link. Changes will not be propagated to
|
||||
other names referring to the same inode.
|
||||
|
||||
3) "xino"
|
||||
|
||||
Enabled with the mount option "xino=auto" or "xino=on", with the module
|
||||
option "xino_auto=on" or with the kernel config option
|
||||
CONFIG_OVERLAY_FS_XINO_AUTO=y. Also implicitly enabled by using the same
|
||||
underlying filesystem for all layers making up the overlay.
|
||||
|
||||
If this feature is disabled or the underlying filesystem doesn't have
|
||||
enough free bits in the inode number, then overlayfs will not be able to
|
||||
guarantee that the values of st_ino and st_dev returned by stat(2) and the
|
||||
value of d_ino returned by readdir(3) will act like on a normal filesystem.
|
||||
E.g. the value of st_dev may be different for two objects in the same
|
||||
overlay filesystem and the value of st_ino for directory objects may not be
|
||||
persistent and could change even while the overlay filesystem is mounted.
|
||||
|
||||
|
||||
Changes to underlying filesystems
|
||||
|
@ -989,8 +989,7 @@ struct dentry_operations {
|
||||
char *(*d_dname)(struct dentry *, char *, int);
|
||||
struct vfsmount *(*d_automount)(struct path *);
|
||||
int (*d_manage)(const struct path *, bool);
|
||||
struct dentry *(*d_real)(struct dentry *, const struct inode *,
|
||||
unsigned int, unsigned int);
|
||||
struct dentry *(*d_real)(struct dentry *, const struct inode *);
|
||||
};
|
||||
|
||||
d_revalidate: called when the VFS needs to revalidate a dentry. This
|
||||
@ -1124,22 +1123,15 @@ struct dentry_operations {
|
||||
dentry being transited from.
|
||||
|
||||
d_real: overlay/union type filesystems implement this method to return one of
|
||||
the underlying dentries hidden by the overlay. It is used in three
|
||||
the underlying dentries hidden by the overlay. It is used in two
|
||||
different modes:
|
||||
|
||||
Called from open it may need to copy-up the file depending on the
|
||||
supplied open flags. This mode is selected with a non-zero flags
|
||||
argument. In this mode the d_real method can return an error.
|
||||
|
||||
Called from file_dentry() it returns the real dentry matching the inode
|
||||
argument. The real dentry may be from a lower layer already copied up,
|
||||
but still referenced from the file. This mode is selected with a
|
||||
non-NULL inode argument. This will always succeed.
|
||||
non-NULL inode argument.
|
||||
|
||||
With NULL inode and zero flags the topmost real underlying dentry is
|
||||
returned. This will always succeed.
|
||||
|
||||
This method is never called with both non-NULL inode and non-zero flags.
|
||||
With NULL inode the topmost real underlying dentry is returned.
|
||||
|
||||
Each dentry has a pointer to its parent dentry, as well as a hash list
|
||||
of child dentries. Child dentries are basically like files in a
|
||||
|
@ -3217,8 +3217,9 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
|
||||
struct btrfs_ioctl_space_info *space);
|
||||
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_balance_args *bargs);
|
||||
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
|
||||
struct file *dst_file, u64 dst_loff);
|
||||
int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
|
||||
struct file *dst_file, loff_t dst_loff,
|
||||
u64 olen);
|
||||
|
||||
/* file.c */
|
||||
int __init btrfs_auto_defrag_init(void);
|
||||
|
@ -3592,13 +3592,13 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
|
||||
struct file *dst_file, u64 dst_loff)
|
||||
int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
|
||||
struct file *dst_file, loff_t dst_loff,
|
||||
u64 olen)
|
||||
{
|
||||
struct inode *src = file_inode(src_file);
|
||||
struct inode *dst = file_inode(dst_file);
|
||||
u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
|
||||
ssize_t res;
|
||||
|
||||
if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
|
||||
/*
|
||||
@ -3609,10 +3609,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
|
||||
if (res)
|
||||
return res;
|
||||
return olen;
|
||||
return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
|
||||
}
|
||||
|
||||
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
|
||||
|
@ -52,7 +52,8 @@ static void file_free_rcu(struct rcu_head *head)
|
||||
static inline void file_free(struct file *f)
|
||||
{
|
||||
security_file_free(f);
|
||||
percpu_counter_dec(&nr_files);
|
||||
if (!(f->f_mode & FMODE_NOACCOUNT))
|
||||
percpu_counter_dec(&nr_files);
|
||||
call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
|
||||
}
|
||||
|
||||
@ -91,34 +92,11 @@ int proc_nr_files(struct ctl_table *table, int write,
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Find an unused file structure and return a pointer to it.
|
||||
* Returns an error pointer if some error happend e.g. we over file
|
||||
* structures limit, run out of memory or operation is not permitted.
|
||||
*
|
||||
* Be very careful using this. You are responsible for
|
||||
* getting write access to any mount that you might assign
|
||||
* to this filp, if it is opened for write. If this is not
|
||||
* done, you will imbalance int the mount's writer count
|
||||
* and a warning at __fput() time.
|
||||
*/
|
||||
struct file *alloc_empty_file(int flags, const struct cred *cred)
|
||||
static struct file *__alloc_file(int flags, const struct cred *cred)
|
||||
{
|
||||
static long old_max;
|
||||
struct file *f;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Privileged users can go above max_files
|
||||
*/
|
||||
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
|
||||
/*
|
||||
* percpu_counters are inaccurate. Do an expensive check before
|
||||
* we go and fail.
|
||||
*/
|
||||
if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
|
||||
goto over;
|
||||
}
|
||||
|
||||
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
|
||||
if (unlikely(!f))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -138,7 +116,41 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
|
||||
f->f_flags = flags;
|
||||
f->f_mode = OPEN_FMODE(flags);
|
||||
/* f->f_version: 0 */
|
||||
percpu_counter_inc(&nr_files);
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
/* Find an unused file structure and return a pointer to it.
|
||||
* Returns an error pointer if some error happend e.g. we over file
|
||||
* structures limit, run out of memory or operation is not permitted.
|
||||
*
|
||||
* Be very careful using this. You are responsible for
|
||||
* getting write access to any mount that you might assign
|
||||
* to this filp, if it is opened for write. If this is not
|
||||
* done, you will imbalance int the mount's writer count
|
||||
* and a warning at __fput() time.
|
||||
*/
|
||||
struct file *alloc_empty_file(int flags, const struct cred *cred)
|
||||
{
|
||||
static long old_max;
|
||||
struct file *f;
|
||||
|
||||
/*
|
||||
* Privileged users can go above max_files
|
||||
*/
|
||||
if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
|
||||
/*
|
||||
* percpu_counters are inaccurate. Do an expensive check before
|
||||
* we go and fail.
|
||||
*/
|
||||
if (percpu_counter_sum_positive(&nr_files) >= files_stat.max_files)
|
||||
goto over;
|
||||
}
|
||||
|
||||
f = __alloc_file(flags, cred);
|
||||
if (!IS_ERR(f))
|
||||
percpu_counter_inc(&nr_files);
|
||||
|
||||
return f;
|
||||
|
||||
over:
|
||||
@ -150,6 +162,21 @@ over:
|
||||
return ERR_PTR(-ENFILE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Variant of alloc_empty_file() that doesn't check and modify nr_files.
|
||||
*
|
||||
* Should not be used unless there's a very good reason to do so.
|
||||
*/
|
||||
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
|
||||
{
|
||||
struct file *f = __alloc_file(flags, cred);
|
||||
|
||||
if (!IS_ERR(f))
|
||||
f->f_mode |= FMODE_NOACCOUNT;
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* alloc_file - allocate and initialize a 'struct file'
|
||||
*
|
||||
|
46
fs/inode.c
46
fs/inode.c
@ -1595,50 +1595,17 @@ sector_t bmap(struct inode *inode, sector_t block)
|
||||
}
|
||||
EXPORT_SYMBOL(bmap);
|
||||
|
||||
/*
|
||||
* Update times in overlayed inode from underlying real inode
|
||||
*/
|
||||
static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode,
|
||||
bool rcu)
|
||||
{
|
||||
struct dentry *upperdentry;
|
||||
|
||||
/*
|
||||
* Nothing to do if in rcu or if non-overlayfs
|
||||
*/
|
||||
if (rcu || likely(!(dentry->d_flags & DCACHE_OP_REAL)))
|
||||
return;
|
||||
|
||||
upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
|
||||
|
||||
/*
|
||||
* If file is on lower then we can't update atime, so no worries about
|
||||
* stale mtime/ctime.
|
||||
*/
|
||||
if (upperdentry) {
|
||||
struct inode *realinode = d_inode(upperdentry);
|
||||
|
||||
if ((!timespec64_equal(&inode->i_mtime, &realinode->i_mtime) ||
|
||||
!timespec64_equal(&inode->i_ctime, &realinode->i_ctime))) {
|
||||
inode->i_mtime = realinode->i_mtime;
|
||||
inode->i_ctime = realinode->i_ctime;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* With relative atime, only update atime if the previous atime is
|
||||
* earlier than either the ctime or mtime or if at least a day has
|
||||
* passed since the last atime update.
|
||||
*/
|
||||
static int relatime_need_update(const struct path *path, struct inode *inode,
|
||||
struct timespec now, bool rcu)
|
||||
static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
|
||||
struct timespec now)
|
||||
{
|
||||
|
||||
if (!(path->mnt->mnt_flags & MNT_RELATIME))
|
||||
if (!(mnt->mnt_flags & MNT_RELATIME))
|
||||
return 1;
|
||||
|
||||
update_ovl_inode_times(path->dentry, inode, rcu);
|
||||
/*
|
||||
* Is mtime younger than atime? If yes, update atime:
|
||||
*/
|
||||
@ -1709,8 +1676,7 @@ static int update_time(struct inode *inode, struct timespec64 *time, int flags)
|
||||
* This function automatically handles read only file systems and media,
|
||||
* as well as the "noatime" flag and inode specific "noatime" markers.
|
||||
*/
|
||||
bool __atime_needs_update(const struct path *path, struct inode *inode,
|
||||
bool rcu)
|
||||
bool atime_needs_update(const struct path *path, struct inode *inode)
|
||||
{
|
||||
struct vfsmount *mnt = path->mnt;
|
||||
struct timespec64 now;
|
||||
@ -1736,7 +1702,7 @@ bool __atime_needs_update(const struct path *path, struct inode *inode,
|
||||
|
||||
now = current_time(inode);
|
||||
|
||||
if (!relatime_need_update(path, inode, timespec64_to_timespec(now), rcu))
|
||||
if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
|
||||
return false;
|
||||
|
||||
if (timespec64_equal(&inode->i_atime, &now))
|
||||
@ -1751,7 +1717,7 @@ void touch_atime(const struct path *path)
|
||||
struct inode *inode = d_inode(path->dentry);
|
||||
struct timespec64 now;
|
||||
|
||||
if (!__atime_needs_update(path, inode, false))
|
||||
if (!atime_needs_update(path, inode))
|
||||
return;
|
||||
|
||||
if (!sb_start_write_trylock(inode->i_sb))
|
||||
|
@ -82,10 +82,8 @@ extern void __init mnt_init(void);
|
||||
|
||||
extern int __mnt_want_write(struct vfsmount *);
|
||||
extern int __mnt_want_write_file(struct file *);
|
||||
extern int mnt_want_write_file_path(struct file *);
|
||||
extern void __mnt_drop_write(struct vfsmount *);
|
||||
extern void __mnt_drop_write_file(struct file *);
|
||||
extern void mnt_drop_write_file_path(struct file *);
|
||||
|
||||
/*
|
||||
* fs_struct.c
|
||||
@ -96,6 +94,7 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
|
||||
* file_table.c
|
||||
*/
|
||||
extern struct file *alloc_empty_file(int, const struct cred *);
|
||||
extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
|
||||
|
||||
/*
|
||||
* super.c
|
||||
@ -136,13 +135,6 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
|
||||
extern void inode_add_lru(struct inode *inode);
|
||||
extern int dentry_needs_remove_privs(struct dentry *dentry);
|
||||
|
||||
extern bool __atime_needs_update(const struct path *, struct inode *, bool);
|
||||
static inline bool atime_needs_update_rcu(const struct path *path,
|
||||
struct inode *inode)
|
||||
{
|
||||
return __atime_needs_update(path, inode, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* fs-writeback.c
|
||||
*/
|
||||
@ -185,7 +177,6 @@ extern const struct dentry_operations ns_dentry_operations;
|
||||
*/
|
||||
extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
|
||||
/*
|
||||
* iomap support:
|
||||
|
@ -49,6 +49,7 @@ long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
out:
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL(vfs_ioctl);
|
||||
|
||||
static int ioctl_fibmap(struct file *filp, int __user *p)
|
||||
{
|
||||
|
20
fs/locks.c
20
fs/locks.c
@ -139,11 +139,6 @@
|
||||
#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
|
||||
#define IS_REMOTELCK(fl) (fl->fl_pid <= 0)
|
||||
|
||||
static inline bool is_remote_lock(struct file *filp)
|
||||
{
|
||||
return likely(!(filp->f_path.dentry->d_sb->s_flags & SB_NOREMOTELOCK));
|
||||
}
|
||||
|
||||
static bool lease_breaking(struct file_lock *fl)
|
||||
{
|
||||
return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
|
||||
@ -1651,8 +1646,7 @@ check_conflicting_open(const struct dentry *dentry, const long arg, int flags)
|
||||
if (flags & FL_LAYOUT)
|
||||
return 0;
|
||||
|
||||
if ((arg == F_RDLCK) &&
|
||||
(atomic_read(&d_real_inode(dentry)->i_writecount) > 0))
|
||||
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
|
||||
return -EAGAIN;
|
||||
|
||||
if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
|
||||
@ -1873,7 +1867,7 @@ EXPORT_SYMBOL(generic_setlease);
|
||||
int
|
||||
vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
|
||||
{
|
||||
if (filp->f_op->setlease && is_remote_lock(filp))
|
||||
if (filp->f_op->setlease)
|
||||
return filp->f_op->setlease(filp, arg, lease, priv);
|
||||
else
|
||||
return generic_setlease(filp, arg, lease, priv);
|
||||
@ -2020,7 +2014,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
|
||||
if (error)
|
||||
goto out_free;
|
||||
|
||||
if (f.file->f_op->flock && is_remote_lock(f.file))
|
||||
if (f.file->f_op->flock)
|
||||
error = f.file->f_op->flock(f.file,
|
||||
(can_sleep) ? F_SETLKW : F_SETLK,
|
||||
lock);
|
||||
@ -2046,7 +2040,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
|
||||
*/
|
||||
int vfs_test_lock(struct file *filp, struct file_lock *fl)
|
||||
{
|
||||
if (filp->f_op->lock && is_remote_lock(filp))
|
||||
if (filp->f_op->lock)
|
||||
return filp->f_op->lock(filp, F_GETLK, fl);
|
||||
posix_test_lock(filp, fl);
|
||||
return 0;
|
||||
@ -2196,7 +2190,7 @@ out:
|
||||
*/
|
||||
int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
|
||||
{
|
||||
if (filp->f_op->lock && is_remote_lock(filp))
|
||||
if (filp->f_op->lock)
|
||||
return filp->f_op->lock(filp, cmd, fl);
|
||||
else
|
||||
return posix_lock_file(filp, fl, conf);
|
||||
@ -2518,7 +2512,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
|
||||
if (list_empty(&flctx->flc_flock))
|
||||
return;
|
||||
|
||||
if (filp->f_op->flock && is_remote_lock(filp))
|
||||
if (filp->f_op->flock)
|
||||
filp->f_op->flock(filp, F_SETLKW, &fl);
|
||||
else
|
||||
flock_lock_inode(inode, &fl);
|
||||
@ -2605,7 +2599,7 @@ EXPORT_SYMBOL(posix_unblock_lock);
|
||||
*/
|
||||
int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
|
||||
{
|
||||
if (filp->f_op->lock && is_remote_lock(filp))
|
||||
if (filp->f_op->lock)
|
||||
return filp->f_op->lock(filp, F_CANCELLK, fl);
|
||||
return 0;
|
||||
}
|
||||
|
@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd)
|
||||
if (!(nd->flags & LOOKUP_RCU)) {
|
||||
touch_atime(&last->link);
|
||||
cond_resched();
|
||||
} else if (atime_needs_update_rcu(&last->link, inode)) {
|
||||
} else if (atime_needs_update(&last->link, inode)) {
|
||||
if (unlikely(unlazy_walk(nd)))
|
||||
return ERR_PTR(-ECHILD);
|
||||
touch_atime(&last->link);
|
||||
|
@ -430,75 +430,21 @@ int __mnt_want_write_file(struct file *file)
|
||||
return mnt_clone_write(file->f_path.mnt);
|
||||
}
|
||||
|
||||
/**
|
||||
* mnt_want_write_file_path - get write access to a file's mount
|
||||
* @file: the file who's mount on which to take a write
|
||||
*
|
||||
* This is like mnt_want_write, but it takes a file and can
|
||||
* do some optimisations if the file is open for write already
|
||||
*
|
||||
* Called by the vfs for cases when we have an open file at hand, but will do an
|
||||
* inode operation on it (important distinction for files opened on overlayfs,
|
||||
* since the file operations will come from the real underlying file, while
|
||||
* inode operations come from the overlay).
|
||||
*/
|
||||
int mnt_want_write_file_path(struct file *file)
|
||||
{
|
||||
int ret;
|
||||
|
||||
sb_start_write(file->f_path.mnt->mnt_sb);
|
||||
ret = __mnt_want_write_file(file);
|
||||
if (ret)
|
||||
sb_end_write(file->f_path.mnt->mnt_sb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int may_write_real(struct file *file)
|
||||
{
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
/* Writable file? */
|
||||
if (file->f_mode & FMODE_WRITER)
|
||||
return 0;
|
||||
|
||||
/* Not overlayfs? */
|
||||
if (likely(!(dentry->d_flags & DCACHE_OP_REAL)))
|
||||
return 0;
|
||||
|
||||
/* File refers to upper, writable layer? */
|
||||
upperdentry = d_real(dentry, NULL, 0, D_REAL_UPPER);
|
||||
if (upperdentry &&
|
||||
(file_inode(file) == d_inode(upperdentry) ||
|
||||
file_inode(file) == d_inode(dentry)))
|
||||
return 0;
|
||||
|
||||
/* Lower layer: can't write to real file, sorry... */
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
/**
|
||||
* mnt_want_write_file - get write access to a file's mount
|
||||
* @file: the file who's mount on which to take a write
|
||||
*
|
||||
* This is like mnt_want_write, but it takes a file and can
|
||||
* do some optimisations if the file is open for write already
|
||||
*
|
||||
* Mostly called by filesystems from their ioctl operation before performing
|
||||
* modification. On overlayfs this needs to check if the file is on a read-only
|
||||
* lower layer and deny access in that case.
|
||||
*/
|
||||
int mnt_want_write_file(struct file *file)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = may_write_real(file);
|
||||
if (!ret) {
|
||||
sb_start_write(file_inode(file)->i_sb);
|
||||
ret = __mnt_want_write_file(file);
|
||||
if (ret)
|
||||
sb_end_write(file_inode(file)->i_sb);
|
||||
}
|
||||
sb_start_write(file_inode(file)->i_sb);
|
||||
ret = __mnt_want_write_file(file);
|
||||
if (ret)
|
||||
sb_end_write(file_inode(file)->i_sb);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mnt_want_write_file);
|
||||
@ -538,14 +484,9 @@ void __mnt_drop_write_file(struct file *file)
|
||||
__mnt_drop_write(file->f_path.mnt);
|
||||
}
|
||||
|
||||
void mnt_drop_write_file_path(struct file *file)
|
||||
{
|
||||
mnt_drop_write(file->f_path.mnt);
|
||||
}
|
||||
|
||||
void mnt_drop_write_file(struct file *file)
|
||||
{
|
||||
__mnt_drop_write(file->f_path.mnt);
|
||||
__mnt_drop_write_file(file);
|
||||
sb_end_write(file_inode(file)->i_sb);
|
||||
}
|
||||
EXPORT_SYMBOL(mnt_drop_write_file);
|
||||
|
@ -2537,19 +2537,14 @@ static int ocfs2_file_clone_range(struct file *file_in,
|
||||
len, false);
|
||||
}
|
||||
|
||||
static ssize_t ocfs2_file_dedupe_range(struct file *src_file,
|
||||
u64 loff,
|
||||
u64 len,
|
||||
struct file *dst_file,
|
||||
u64 dst_loff)
|
||||
static int ocfs2_file_dedupe_range(struct file *file_in,
|
||||
loff_t pos_in,
|
||||
struct file *file_out,
|
||||
loff_t pos_out,
|
||||
u64 len)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = ocfs2_reflink_remap_range(src_file, loff, dst_file, dst_loff,
|
||||
return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
|
||||
len, true);
|
||||
if (error)
|
||||
return error;
|
||||
return len;
|
||||
}
|
||||
|
||||
const struct inode_operations ocfs2_file_iops = {
|
||||
|
44
fs/open.c
44
fs/open.c
@ -68,7 +68,6 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
|
||||
long vfs_truncate(const struct path *path, loff_t length)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct dentry *upperdentry;
|
||||
long error;
|
||||
|
||||
inode = path->dentry->d_inode;
|
||||
@ -91,17 +90,7 @@ long vfs_truncate(const struct path *path, loff_t length)
|
||||
if (IS_APPEND(inode))
|
||||
goto mnt_drop_write_and_out;
|
||||
|
||||
/*
|
||||
* If this is an overlayfs then do as if opening the file so we get
|
||||
* write access on the upper inode, not on the overlay inode. For
|
||||
* non-overlay filesystems d_real() is an identity function.
|
||||
*/
|
||||
upperdentry = d_real(path->dentry, NULL, O_WRONLY, 0);
|
||||
error = PTR_ERR(upperdentry);
|
||||
if (IS_ERR(upperdentry))
|
||||
goto mnt_drop_write_and_out;
|
||||
|
||||
error = get_write_access(upperdentry->d_inode);
|
||||
error = get_write_access(inode);
|
||||
if (error)
|
||||
goto mnt_drop_write_and_out;
|
||||
|
||||
@ -120,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length)
|
||||
error = do_truncate(path->dentry, length, 0, NULL);
|
||||
|
||||
put_write_and_out:
|
||||
put_write_access(upperdentry->d_inode);
|
||||
put_write_access(inode);
|
||||
mnt_drop_write_and_out:
|
||||
mnt_drop_write(path->mnt);
|
||||
out:
|
||||
@ -707,12 +696,12 @@ int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
|
||||
if (!f.file)
|
||||
goto out;
|
||||
|
||||
error = mnt_want_write_file_path(f.file);
|
||||
error = mnt_want_write_file(f.file);
|
||||
if (error)
|
||||
goto out_fput;
|
||||
audit_file(f.file);
|
||||
error = chown_common(&f.file->f_path, user, group);
|
||||
mnt_drop_write_file_path(f.file);
|
||||
mnt_drop_write_file(f.file);
|
||||
out_fput:
|
||||
fdput(f);
|
||||
out:
|
||||
@ -887,13 +876,8 @@ EXPORT_SYMBOL(file_path);
|
||||
*/
|
||||
int vfs_open(const struct path *path, struct file *file)
|
||||
{
|
||||
struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags, 0);
|
||||
|
||||
if (IS_ERR(dentry))
|
||||
return PTR_ERR(dentry);
|
||||
|
||||
file->f_path = *path;
|
||||
return do_dentry_open(file, d_backing_inode(dentry), NULL);
|
||||
return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
|
||||
}
|
||||
|
||||
struct file *dentry_open(const struct path *path, int flags,
|
||||
@ -919,6 +903,24 @@ struct file *dentry_open(const struct path *path, int flags,
|
||||
}
|
||||
EXPORT_SYMBOL(dentry_open);
|
||||
|
||||
struct file *open_with_fake_path(const struct path *path, int flags,
|
||||
struct inode *inode, const struct cred *cred)
|
||||
{
|
||||
struct file *f = alloc_empty_file_noaccount(flags, cred);
|
||||
if (!IS_ERR(f)) {
|
||||
int error;
|
||||
|
||||
f->f_path = *path;
|
||||
error = do_dentry_open(f, inode, NULL);
|
||||
if (error) {
|
||||
fput(f);
|
||||
f = ERR_PTR(error);
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
EXPORT_SYMBOL(open_with_fake_path);
|
||||
|
||||
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
|
||||
{
|
||||
int lookup_flags = 0;
|
||||
|
@ -64,6 +64,7 @@ config OVERLAY_FS_NFS_EXPORT
|
||||
bool "Overlayfs: turn on NFS export feature by default"
|
||||
depends on OVERLAY_FS
|
||||
depends on OVERLAY_FS_INDEX
|
||||
depends on !OVERLAY_FS_METACOPY
|
||||
help
|
||||
If this config option is enabled then overlay filesystems will use
|
||||
the index directory to decode overlay NFS file handles by default.
|
||||
@ -103,3 +104,21 @@ config OVERLAY_FS_XINO_AUTO
|
||||
For more information, see Documentation/filesystems/overlayfs.txt
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config OVERLAY_FS_METACOPY
|
||||
bool "Overlayfs: turn on metadata only copy up feature by default"
|
||||
depends on OVERLAY_FS
|
||||
select OVERLAY_FS_REDIRECT_DIR
|
||||
help
|
||||
If this config option is enabled then overlay filesystems will
|
||||
copy up only metadata where appropriate and data copy up will
|
||||
happen when a file is opened for WRITE operation. It is still
|
||||
possible to turn off this feature globally with the "metacopy=off"
|
||||
module option or on a filesystem instance basis with the
|
||||
"metacopy=off" mount option.
|
||||
|
||||
Note, that this feature is not backward compatible. That is,
|
||||
mounting an overlay which has metacopy only inodes on a kernel
|
||||
that doesn't support this feature will have unexpected results.
|
||||
|
||||
If unsure, say N.
|
||||
|
@ -4,5 +4,5 @@
|
||||
|
||||
obj-$(CONFIG_OVERLAY_FS) += overlay.o
|
||||
|
||||
overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \
|
||||
export.o
|
||||
overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
|
||||
copy_up.o export.o
|
||||
|
@ -25,35 +25,20 @@
|
||||
|
||||
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
|
||||
|
||||
static bool __read_mostly ovl_check_copy_up;
|
||||
module_param_named(check_copy_up, ovl_check_copy_up, bool,
|
||||
S_IWUSR | S_IRUGO);
|
||||
MODULE_PARM_DESC(ovl_check_copy_up,
|
||||
"Warn on copy-up when causing process also has a R/O fd open");
|
||||
|
||||
static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
|
||||
static int ovl_ccup_set(const char *buf, const struct kernel_param *param)
|
||||
{
|
||||
const struct dentry *dentry = data;
|
||||
|
||||
if (file_inode(f) == d_inode(dentry))
|
||||
pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
|
||||
f, fd, current->pid, current->comm);
|
||||
pr_warn("overlayfs: \"check_copy_up\" module option is obsolete\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the fds open by this process and warn if something like the following
|
||||
* scenario is about to occur:
|
||||
*
|
||||
* fd1 = open("foo", O_RDONLY);
|
||||
* fd2 = open("foo", O_RDWR);
|
||||
*/
|
||||
static void ovl_do_check_copy_up(struct dentry *dentry)
|
||||
static int ovl_ccup_get(char *buf, const struct kernel_param *param)
|
||||
{
|
||||
if (ovl_check_copy_up)
|
||||
iterate_fd(current->files, 0, ovl_check_fd, dentry);
|
||||
return sprintf(buf, "N\n");
|
||||
}
|
||||
|
||||
module_param_call(check_copy_up, ovl_ccup_set, ovl_ccup_get, NULL, 0644);
|
||||
MODULE_PARM_DESC(ovl_check_copy_up, "Obsolete; does nothing");
|
||||
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
|
||||
{
|
||||
ssize_t list_size, size, value_size = 0;
|
||||
@ -195,6 +180,16 @@ out_fput:
|
||||
return error;
|
||||
}
|
||||
|
||||
static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_SIZE,
|
||||
.ia_size = stat->size,
|
||||
};
|
||||
|
||||
return notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
|
||||
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
struct iattr attr = {
|
||||
@ -403,6 +398,7 @@ struct ovl_copy_up_ctx {
|
||||
bool tmpfile;
|
||||
bool origin;
|
||||
bool indexed;
|
||||
bool metacopy;
|
||||
};
|
||||
|
||||
static int ovl_link_up(struct ovl_copy_up_ctx *c)
|
||||
@ -505,28 +501,10 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (S_ISREG(c->stat.mode)) {
|
||||
struct path upperpath;
|
||||
|
||||
ovl_path_upper(c->dentry, &upperpath);
|
||||
BUG_ON(upperpath.dentry != NULL);
|
||||
upperpath.dentry = temp;
|
||||
|
||||
err = ovl_copy_up_data(&c->lowerpath, &upperpath, c->stat.size);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
err = ovl_copy_xattr(c->lowerpath.dentry, temp);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
inode_lock(temp->d_inode);
|
||||
err = ovl_set_attr(temp, &c->stat);
|
||||
inode_unlock(temp->d_inode);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* Store identifier of lower inode in upper inode xattr to
|
||||
* allow lookup of the copy up origin inode.
|
||||
@ -540,7 +518,34 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (S_ISREG(c->stat.mode) && !c->metacopy) {
|
||||
struct path upperpath, datapath;
|
||||
|
||||
ovl_path_upper(c->dentry, &upperpath);
|
||||
BUG_ON(upperpath.dentry != NULL);
|
||||
upperpath.dentry = temp;
|
||||
|
||||
ovl_path_lowerdata(c->dentry, &datapath);
|
||||
err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (c->metacopy) {
|
||||
err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY,
|
||||
NULL, 0, -EOPNOTSUPP);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
inode_lock(temp->d_inode);
|
||||
if (c->metacopy)
|
||||
err = ovl_set_size(temp, &c->stat);
|
||||
if (!err)
|
||||
err = ovl_set_attr(temp, &c->stat);
|
||||
inode_unlock(temp->d_inode);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
|
||||
@ -575,6 +580,8 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (!c->metacopy)
|
||||
ovl_set_upperdata(d_inode(c->dentry));
|
||||
inode = d_inode(c->dentry);
|
||||
ovl_inode_update(inode, newdentry);
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
@ -677,6 +684,49 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_need_meta_copy_up(struct dentry *dentry, umode_t mode,
|
||||
int flags)
|
||||
{
|
||||
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
|
||||
|
||||
if (!ofs->config.metacopy)
|
||||
return false;
|
||||
|
||||
if (!S_ISREG(mode))
|
||||
return false;
|
||||
|
||||
if (flags && ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Copy up data of an inode which was copied up metadata only in the past. */
|
||||
static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
|
||||
{
|
||||
struct path upperpath, datapath;
|
||||
int err;
|
||||
|
||||
ovl_path_upper(c->dentry, &upperpath);
|
||||
if (WARN_ON(upperpath.dentry == NULL))
|
||||
return -EIO;
|
||||
|
||||
ovl_path_lowerdata(c->dentry, &datapath);
|
||||
if (WARN_ON(datapath.dentry == NULL))
|
||||
return -EIO;
|
||||
|
||||
err = ovl_copy_up_data(&datapath, &upperpath, c->stat.size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = vfs_removexattr(upperpath.dentry, OVL_XATTR_METACOPY);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ovl_set_upperdata(d_inode(c->dentry));
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
int flags)
|
||||
{
|
||||
@ -698,6 +748,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
|
||||
|
||||
if (parent) {
|
||||
ovl_path_upper(parent, &parentpath);
|
||||
ctx.destdir = parentpath.dentry;
|
||||
@ -719,9 +771,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
if (IS_ERR(ctx.link))
|
||||
return PTR_ERR(ctx.link);
|
||||
}
|
||||
ovl_do_check_copy_up(ctx.lowerpath.dentry);
|
||||
|
||||
err = ovl_copy_up_start(dentry);
|
||||
err = ovl_copy_up_start(dentry, flags);
|
||||
/* err < 0: interrupted, err > 0: raced with another copy-up */
|
||||
if (unlikely(err)) {
|
||||
if (err > 0)
|
||||
@ -731,6 +782,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
err = ovl_do_copy_up(&ctx);
|
||||
if (!err && parent && !ovl_dentry_has_upper_alias(dentry))
|
||||
err = ovl_link_up(&ctx);
|
||||
if (!err && ovl_dentry_needs_data_copy_up_locked(dentry, flags))
|
||||
err = ovl_copy_up_meta_inode_data(&ctx);
|
||||
ovl_copy_up_end(dentry);
|
||||
}
|
||||
do_delayed_call(&done);
|
||||
@ -756,21 +809,7 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
|
||||
struct dentry *next;
|
||||
struct dentry *parent = NULL;
|
||||
|
||||
/*
|
||||
* Check if copy-up has happened as well as for upper alias (in
|
||||
* case of hard links) is there.
|
||||
*
|
||||
* Both checks are lockless:
|
||||
* - false negatives: will recheck under oi->lock
|
||||
* - false positives:
|
||||
* + ovl_dentry_upper() uses memory barriers to ensure the
|
||||
* upper dentry is up-to-date
|
||||
* + ovl_dentry_has_upper_alias() relies on locking of
|
||||
* upper parent i_rwsem to prevent reordering copy-up
|
||||
* with rename.
|
||||
*/
|
||||
if (ovl_dentry_upper(dentry) &&
|
||||
(ovl_dentry_has_upper_alias(dentry) || disconnected))
|
||||
if (ovl_already_copied_up(dentry, flags))
|
||||
break;
|
||||
|
||||
next = dget(dentry);
|
||||
@ -795,6 +834,41 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags)
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
|
||||
{
|
||||
/* Copy up of disconnected dentry does not set upper alias */
|
||||
if (ovl_already_copied_up(dentry, flags))
|
||||
return false;
|
||||
|
||||
if (special_file(d_inode(dentry)->i_mode))
|
||||
return false;
|
||||
|
||||
if (!ovl_open_flags_need_copy_up(flags))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (ovl_open_need_copy_up(dentry, file_flags)) {
|
||||
err = ovl_want_write(dentry);
|
||||
if (!err) {
|
||||
err = ovl_copy_up_flags(dentry, file_flags);
|
||||
ovl_drop_write(dentry);
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_copy_up_with_data(struct dentry *dentry)
|
||||
{
|
||||
return ovl_copy_up_flags(dentry, O_WRONLY);
|
||||
}
|
||||
|
||||
int ovl_copy_up(struct dentry *dentry)
|
||||
{
|
||||
return ovl_copy_up_flags(dentry, 0);
|
||||
|
@ -24,6 +24,8 @@ module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
|
||||
MODULE_PARM_DESC(ovl_redirect_max,
|
||||
"Maximum length of absolute redirect xattr value");
|
||||
|
||||
static int ovl_set_redirect(struct dentry *dentry, bool samedir);
|
||||
|
||||
int ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
|
||||
{
|
||||
int err;
|
||||
@ -242,7 +244,7 @@ static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
|
||||
.newinode = inode,
|
||||
};
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent, false);
|
||||
ovl_dir_modified(dentry->d_parent, false);
|
||||
ovl_dentry_set_upper_alias(dentry);
|
||||
if (!hardlink) {
|
||||
/*
|
||||
@ -657,6 +659,12 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
if (ovl_is_metacopy_dentry(old)) {
|
||||
err = ovl_set_redirect(old, false);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
err = ovl_nlink_start(old, &locked);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
@ -722,7 +730,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry,
|
||||
if (err)
|
||||
goto out_d_drop;
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent, true);
|
||||
ovl_dir_modified(dentry->d_parent, true);
|
||||
out_d_drop:
|
||||
d_drop(dentry);
|
||||
out_dput_upper:
|
||||
@ -767,7 +775,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
|
||||
err = vfs_rmdir(dir, upper);
|
||||
else
|
||||
err = vfs_unlink(dir, upper, NULL);
|
||||
ovl_dentry_version_inc(dentry->d_parent, ovl_type_origin(dentry));
|
||||
ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
|
||||
|
||||
/*
|
||||
* Keeping this dentry hashed would mean having to release
|
||||
@ -797,6 +805,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
|
||||
int err;
|
||||
bool locked = false;
|
||||
const struct cred *old_cred;
|
||||
struct dentry *upperdentry;
|
||||
bool lower_positive = ovl_lower_positive(dentry);
|
||||
LIST_HEAD(list);
|
||||
|
||||
@ -832,6 +841,17 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
|
||||
drop_nlink(dentry->d_inode);
|
||||
}
|
||||
ovl_nlink_end(dentry, locked);
|
||||
|
||||
/*
|
||||
* Copy ctime
|
||||
*
|
||||
* Note: we fail to update ctime if there was no copy-up, only a
|
||||
* whiteout
|
||||
*/
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
if (upperdentry)
|
||||
ovl_copyattr(d_inode(upperdentry), d_inode(dentry));
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
@ -862,13 +882,13 @@ static bool ovl_can_move(struct dentry *dentry)
|
||||
!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
|
||||
}
|
||||
|
||||
static char *ovl_get_redirect(struct dentry *dentry, bool samedir)
|
||||
static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
|
||||
{
|
||||
char *buf, *ret;
|
||||
struct dentry *d, *tmp;
|
||||
int buflen = ovl_redirect_max + 1;
|
||||
|
||||
if (samedir) {
|
||||
if (!abs_redirect) {
|
||||
ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
|
||||
GFP_KERNEL);
|
||||
goto out;
|
||||
@ -922,15 +942,43 @@ out:
|
||||
return ret ? ret : ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
|
||||
{
|
||||
struct dentry *lowerdentry;
|
||||
|
||||
if (!samedir)
|
||||
return true;
|
||||
|
||||
if (d_is_dir(dentry))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* For non-dir hardlinked files, we need absolute redirects
|
||||
* in general as two upper hardlinks could be in different
|
||||
* dirs. We could put a relative redirect now and convert
|
||||
* it to absolute redirect later. But when nlink > 1 and
|
||||
* indexing is on, that means relative redirect needs to be
|
||||
* converted to absolute during copy up of another lower
|
||||
* hardllink as well.
|
||||
*
|
||||
* So without optimizing too much, just check if lower is
|
||||
* a hard link or not. If lower is hard link, put absolute
|
||||
* redirect.
|
||||
*/
|
||||
lowerdentry = ovl_dentry_lower(dentry);
|
||||
return (d_inode(lowerdentry)->i_nlink > 1);
|
||||
}
|
||||
|
||||
static int ovl_set_redirect(struct dentry *dentry, bool samedir)
|
||||
{
|
||||
int err;
|
||||
const char *redirect = ovl_dentry_get_redirect(dentry);
|
||||
bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
|
||||
|
||||
if (redirect && (samedir || redirect[0] == '/'))
|
||||
if (redirect && (!absolute_redirect || redirect[0] == '/'))
|
||||
return 0;
|
||||
|
||||
redirect = ovl_get_redirect(dentry, samedir);
|
||||
redirect = ovl_get_redirect(dentry, absolute_redirect);
|
||||
if (IS_ERR(redirect))
|
||||
return PTR_ERR(redirect);
|
||||
|
||||
@ -1106,22 +1154,20 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
|
||||
goto out_dput;
|
||||
|
||||
err = 0;
|
||||
if (is_dir) {
|
||||
if (ovl_type_merge_or_lower(old))
|
||||
err = ovl_set_redirect(old, samedir);
|
||||
else if (!old_opaque && ovl_type_merge(new->d_parent))
|
||||
err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
if (!overwrite && new_is_dir) {
|
||||
if (ovl_type_merge_or_lower(new))
|
||||
err = ovl_set_redirect(new, samedir);
|
||||
else if (!new_opaque && ovl_type_merge(old->d_parent))
|
||||
err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
if (ovl_type_merge_or_lower(old))
|
||||
err = ovl_set_redirect(old, samedir);
|
||||
else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
|
||||
err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
if (!overwrite && ovl_type_merge_or_lower(new))
|
||||
err = ovl_set_redirect(new, samedir);
|
||||
else if (!overwrite && new_is_dir && !new_opaque &&
|
||||
ovl_type_merge(old->d_parent))
|
||||
err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry, flags);
|
||||
@ -1138,10 +1184,15 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
|
||||
drop_nlink(d_inode(new));
|
||||
}
|
||||
|
||||
ovl_dentry_version_inc(old->d_parent, ovl_type_origin(old) ||
|
||||
(!overwrite && ovl_type_origin(new)));
|
||||
ovl_dentry_version_inc(new->d_parent, ovl_type_origin(old) ||
|
||||
(d_inode(new) && ovl_type_origin(new)));
|
||||
ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
|
||||
(!overwrite && ovl_type_origin(new)));
|
||||
ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
|
||||
(d_inode(new) && ovl_type_origin(new)));
|
||||
|
||||
/* copy ctime: */
|
||||
ovl_copyattr(d_inode(olddentry), d_inode(old));
|
||||
if (d_inode(new) && ovl_dentry_upper(new))
|
||||
ovl_copyattr(d_inode(newdentry), d_inode(new));
|
||||
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
|
@ -317,6 +317,9 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
|
||||
return ERR_CAST(inode);
|
||||
}
|
||||
|
||||
if (upper)
|
||||
ovl_set_flag(OVL_UPPERDATA, inode);
|
||||
|
||||
dentry = d_find_any_alias(inode);
|
||||
if (!dentry) {
|
||||
dentry = d_alloc_anon(inode->i_sb);
|
||||
|
511
fs/overlayfs/file.c
Normal file
511
fs/overlayfs/file.c
Normal file
@ -0,0 +1,511 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Red Hat, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/cred.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/uio.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
static char ovl_whatisit(struct inode *inode, struct inode *realinode)
|
||||
{
|
||||
if (realinode != ovl_inode_upper(inode))
|
||||
return 'l';
|
||||
if (ovl_has_upperdata(inode))
|
||||
return 'u';
|
||||
else
|
||||
return 'm';
|
||||
}
|
||||
|
||||
static struct file *ovl_open_realfile(const struct file *file,
|
||||
struct inode *realinode)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct file *realfile;
|
||||
const struct cred *old_cred;
|
||||
|
||||
old_cred = ovl_override_creds(inode->i_sb);
|
||||
realfile = open_with_fake_path(&file->f_path, file->f_flags | O_NOATIME,
|
||||
realinode, current_cred());
|
||||
revert_creds(old_cred);
|
||||
|
||||
pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
|
||||
file, file, ovl_whatisit(inode, realinode), file->f_flags,
|
||||
realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
|
||||
|
||||
return realfile;
|
||||
}
|
||||
|
||||
#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
|
||||
|
||||
static int ovl_change_flags(struct file *file, unsigned int flags)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
int err;
|
||||
|
||||
/* No atime modificaton on underlying */
|
||||
flags |= O_NOATIME;
|
||||
|
||||
/* If some flag changed that cannot be changed then something's amiss */
|
||||
if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
|
||||
return -EIO;
|
||||
|
||||
flags &= OVL_SETFL_MASK;
|
||||
|
||||
if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
|
||||
return -EPERM;
|
||||
|
||||
if (flags & O_DIRECT) {
|
||||
if (!file->f_mapping->a_ops ||
|
||||
!file->f_mapping->a_ops->direct_IO)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (file->f_op->check_flags) {
|
||||
err = file->f_op->check_flags(flags);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
spin_lock(&file->f_lock);
|
||||
file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
|
||||
spin_unlock(&file->f_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
|
||||
bool allow_meta)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct inode *realinode;
|
||||
|
||||
real->flags = 0;
|
||||
real->file = file->private_data;
|
||||
|
||||
if (allow_meta)
|
||||
realinode = ovl_inode_real(inode);
|
||||
else
|
||||
realinode = ovl_inode_realdata(inode);
|
||||
|
||||
/* Has it been copied up since we'd opened it? */
|
||||
if (unlikely(file_inode(real->file) != realinode)) {
|
||||
real->flags = FDPUT_FPUT;
|
||||
real->file = ovl_open_realfile(file, realinode);
|
||||
|
||||
return PTR_ERR_OR_ZERO(real->file);
|
||||
}
|
||||
|
||||
/* Did the flags change since open? */
|
||||
if (unlikely((file->f_flags ^ real->file->f_flags) & ~O_NOATIME))
|
||||
return ovl_change_flags(real->file, file->f_flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_real_fdget(const struct file *file, struct fd *real)
|
||||
{
|
||||
return ovl_real_fdget_meta(file, real, false);
|
||||
}
|
||||
|
||||
static int ovl_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct dentry *dentry = file_dentry(file);
|
||||
struct file *realfile;
|
||||
int err;
|
||||
|
||||
err = ovl_open_maybe_copy_up(dentry, file->f_flags);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* No longer need these flags, so don't pass them on to underlying fs */
|
||||
file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
|
||||
|
||||
realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
|
||||
if (IS_ERR(realfile))
|
||||
return PTR_ERR(realfile);
|
||||
|
||||
/* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */
|
||||
file->f_mapping = realfile->f_mapping;
|
||||
|
||||
file->private_data = realfile;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
fput(file->private_data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
struct inode *realinode = ovl_inode_real(file_inode(file));
|
||||
|
||||
return generic_file_llseek_size(file, offset, whence,
|
||||
realinode->i_sb->s_maxbytes,
|
||||
i_size_read(realinode));
|
||||
}
|
||||
|
||||
static void ovl_file_accessed(struct file *file)
|
||||
{
|
||||
struct inode *inode, *upperinode;
|
||||
|
||||
if (file->f_flags & O_NOATIME)
|
||||
return;
|
||||
|
||||
inode = file_inode(file);
|
||||
upperinode = ovl_inode_upper(inode);
|
||||
|
||||
if (!upperinode)
|
||||
return;
|
||||
|
||||
if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
|
||||
!timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
|
||||
inode->i_mtime = upperinode->i_mtime;
|
||||
inode->i_ctime = upperinode->i_ctime;
|
||||
}
|
||||
|
||||
touch_atime(&file->f_path);
|
||||
}
|
||||
|
||||
static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
|
||||
{
|
||||
int ifl = iocb->ki_flags;
|
||||
rwf_t flags = 0;
|
||||
|
||||
if (ifl & IOCB_NOWAIT)
|
||||
flags |= RWF_NOWAIT;
|
||||
if (ifl & IOCB_HIPRI)
|
||||
flags |= RWF_HIPRI;
|
||||
if (ifl & IOCB_DSYNC)
|
||||
flags |= RWF_DSYNC;
|
||||
if (ifl & IOCB_SYNC)
|
||||
flags |= RWF_SYNC;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct fd real;
|
||||
const struct cred *old_cred;
|
||||
ssize_t ret;
|
||||
|
||||
if (!iov_iter_count(iter))
|
||||
return 0;
|
||||
|
||||
ret = ovl_real_fdget(file, &real);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
|
||||
ovl_iocb_to_rwf(iocb));
|
||||
revert_creds(old_cred);
|
||||
|
||||
ovl_file_accessed(file);
|
||||
|
||||
fdput(real);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct fd real;
|
||||
const struct cred *old_cred;
|
||||
ssize_t ret;
|
||||
|
||||
if (!iov_iter_count(iter))
|
||||
return 0;
|
||||
|
||||
inode_lock(inode);
|
||||
/* Update mode */
|
||||
ovl_copyattr(ovl_inode_real(inode), inode);
|
||||
ret = file_remove_privs(file);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = ovl_real_fdget(file, &real);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
|
||||
ovl_iocb_to_rwf(iocb));
|
||||
revert_creds(old_cred);
|
||||
|
||||
/* Update size */
|
||||
ovl_copyattr(ovl_inode_real(inode), inode);
|
||||
|
||||
fdput(real);
|
||||
|
||||
out_unlock:
|
||||
inode_unlock(inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
{
|
||||
struct fd real;
|
||||
const struct cred *old_cred;
|
||||
int ret;
|
||||
|
||||
ret = ovl_real_fdget_meta(file, &real, !datasync);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Don't sync lower file for fear of receiving EROFS error */
|
||||
if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
|
||||
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
ret = vfs_fsync_range(real.file, start, end, datasync);
|
||||
revert_creds(old_cred);
|
||||
}
|
||||
|
||||
fdput(real);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
{
|
||||
struct file *realfile = file->private_data;
|
||||
const struct cred *old_cred;
|
||||
int ret;
|
||||
|
||||
if (!realfile->f_op->mmap)
|
||||
return -ENODEV;
|
||||
|
||||
if (WARN_ON(file != vma->vm_file))
|
||||
return -EIO;
|
||||
|
||||
vma->vm_file = get_file(realfile);
|
||||
|
||||
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
ret = call_mmap(vma->vm_file, vma);
|
||||
revert_creds(old_cred);
|
||||
|
||||
if (ret) {
|
||||
/* Drop reference count from new vm_file value */
|
||||
fput(realfile);
|
||||
} else {
|
||||
/* Drop reference count from previous vm_file value */
|
||||
fput(file);
|
||||
}
|
||||
|
||||
ovl_file_accessed(file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct fd real;
|
||||
const struct cred *old_cred;
|
||||
int ret;
|
||||
|
||||
ret = ovl_real_fdget(file, &real);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
ret = vfs_fallocate(real.file, mode, offset, len);
|
||||
revert_creds(old_cred);
|
||||
|
||||
/* Update size */
|
||||
ovl_copyattr(ovl_inode_real(inode), inode);
|
||||
|
||||
fdput(real);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long ovl_real_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct fd real;
|
||||
const struct cred *old_cred;
|
||||
long ret;
|
||||
|
||||
ret = ovl_real_fdget(file, &real);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
old_cred = ovl_override_creds(file_inode(file)->i_sb);
|
||||
ret = vfs_ioctl(real.file, cmd, arg);
|
||||
revert_creds(old_cred);
|
||||
|
||||
fdput(real);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
long ret;
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
switch (cmd) {
|
||||
case FS_IOC_GETFLAGS:
|
||||
ret = ovl_real_ioctl(file, cmd, arg);
|
||||
break;
|
||||
|
||||
case FS_IOC_SETFLAGS:
|
||||
if (!inode_owner_or_capable(inode))
|
||||
return -EACCES;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = ovl_copy_up_with_data(file_dentry(file));
|
||||
if (!ret) {
|
||||
ret = ovl_real_ioctl(file, cmd, arg);
|
||||
|
||||
inode_lock(inode);
|
||||
ovl_copyflags(ovl_inode_real(inode), inode);
|
||||
inode_unlock(inode);
|
||||
}
|
||||
|
||||
mnt_drop_write_file(file);
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = -ENOTTY;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
case FS_IOC32_GETFLAGS:
|
||||
cmd = FS_IOC_GETFLAGS;
|
||||
break;
|
||||
|
||||
case FS_IOC32_SETFLAGS:
|
||||
cmd = FS_IOC_SETFLAGS;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
|
||||
return ovl_ioctl(file, cmd, arg);
|
||||
}
|
||||
|
||||
enum ovl_copyop {
|
||||
OVL_COPY,
|
||||
OVL_CLONE,
|
||||
OVL_DEDUPE,
|
||||
};
|
||||
|
||||
static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
|
||||
struct file *file_out, loff_t pos_out,
|
||||
u64 len, unsigned int flags, enum ovl_copyop op)
|
||||
{
|
||||
struct inode *inode_out = file_inode(file_out);
|
||||
struct fd real_in, real_out;
|
||||
const struct cred *old_cred;
|
||||
ssize_t ret;
|
||||
|
||||
ret = ovl_real_fdget(file_out, &real_out);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = ovl_real_fdget(file_in, &real_in);
|
||||
if (ret) {
|
||||
fdput(real_out);
|
||||
return ret;
|
||||
}
|
||||
|
||||
old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
|
||||
switch (op) {
|
||||
case OVL_COPY:
|
||||
ret = vfs_copy_file_range(real_in.file, pos_in,
|
||||
real_out.file, pos_out, len, flags);
|
||||
break;
|
||||
|
||||
case OVL_CLONE:
|
||||
ret = vfs_clone_file_range(real_in.file, pos_in,
|
||||
real_out.file, pos_out, len);
|
||||
break;
|
||||
|
||||
case OVL_DEDUPE:
|
||||
ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
|
||||
real_out.file, pos_out, len);
|
||||
break;
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
|
||||
/* Update size */
|
||||
ovl_copyattr(ovl_inode_real(inode_out), inode_out);
|
||||
|
||||
fdput(real_in);
|
||||
fdput(real_out);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
|
||||
struct file *file_out, loff_t pos_out,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
|
||||
OVL_COPY);
|
||||
}
|
||||
|
||||
static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
|
||||
struct file *file_out, loff_t pos_out, u64 len)
|
||||
{
|
||||
return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
|
||||
OVL_CLONE);
|
||||
}
|
||||
|
||||
static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
|
||||
struct file *file_out, loff_t pos_out, u64 len)
|
||||
{
|
||||
/*
|
||||
* Don't copy up because of a dedupe request, this wouldn't make sense
|
||||
* most of the time (data would be duplicated instead of deduplicated).
|
||||
*/
|
||||
if (!ovl_inode_upper(file_inode(file_in)) ||
|
||||
!ovl_inode_upper(file_inode(file_out)))
|
||||
return -EPERM;
|
||||
|
||||
return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
|
||||
OVL_DEDUPE);
|
||||
}
|
||||
|
||||
const struct file_operations ovl_file_operations = {
|
||||
.open = ovl_open,
|
||||
.release = ovl_release,
|
||||
.llseek = ovl_llseek,
|
||||
.read_iter = ovl_read_iter,
|
||||
.write_iter = ovl_write_iter,
|
||||
.fsync = ovl_fsync,
|
||||
.mmap = ovl_mmap,
|
||||
.fallocate = ovl_fallocate,
|
||||
.unlocked_ioctl = ovl_ioctl,
|
||||
.compat_ioctl = ovl_compat_ioctl,
|
||||
|
||||
.copy_file_range = ovl_copy_file_range,
|
||||
.clone_file_range = ovl_clone_file_range,
|
||||
.dedupe_file_range = ovl_dedupe_file_range,
|
||||
};
|
@ -19,18 +19,10 @@
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
int err;
|
||||
bool full_copy_up = false;
|
||||
struct dentry *upperdentry;
|
||||
const struct cred *old_cred;
|
||||
|
||||
/*
|
||||
* Check for permissions before trying to copy-up. This is redundant
|
||||
* since it will be rechecked later by ->setattr() on upper dentry. But
|
||||
* without this, copy-up can be triggered by just about anybody.
|
||||
*
|
||||
* We don't initialize inode->size, which just means that
|
||||
* inode_newsize_ok() will always check against MAX_LFS_FILESIZE and not
|
||||
* check for a swapfile (which this won't be anyway).
|
||||
*/
|
||||
err = setattr_prepare(dentry, attr);
|
||||
if (err)
|
||||
return err;
|
||||
@ -39,10 +31,33 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (attr->ia_valid & ATTR_SIZE) {
|
||||
struct inode *realinode = d_inode(ovl_dentry_real(dentry));
|
||||
|
||||
err = -ETXTBSY;
|
||||
if (atomic_read(&realinode->i_writecount) < 0)
|
||||
goto out_drop_write;
|
||||
|
||||
/* Truncate should trigger data copy up as well */
|
||||
full_copy_up = true;
|
||||
}
|
||||
|
||||
if (!full_copy_up)
|
||||
err = ovl_copy_up(dentry);
|
||||
else
|
||||
err = ovl_copy_up_with_data(dentry);
|
||||
if (!err) {
|
||||
struct inode *winode = NULL;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
|
||||
if (attr->ia_valid & ATTR_SIZE) {
|
||||
winode = d_inode(upperdentry);
|
||||
err = get_write_access(winode);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
|
||||
attr->ia_valid &= ~ATTR_MODE;
|
||||
|
||||
@ -53,7 +68,11 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
if (!err)
|
||||
ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
|
||||
inode_unlock(upperdentry->d_inode);
|
||||
|
||||
if (winode)
|
||||
put_write_access(winode);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
@ -133,6 +152,9 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
|
||||
bool samefs = ovl_same_sb(dentry->d_sb);
|
||||
struct ovl_layer *lower_layer = NULL;
|
||||
int err;
|
||||
bool metacopy_blocks = false;
|
||||
|
||||
metacopy_blocks = ovl_is_metacopy_dentry(dentry);
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
old_cred = ovl_override_creds(dentry->d_sb);
|
||||
@ -154,7 +176,8 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
|
||||
lower_layer = ovl_layer_lower(dentry);
|
||||
} else if (OVL_TYPE_ORIGIN(type)) {
|
||||
struct kstat lowerstat;
|
||||
u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
|
||||
u32 lowermask = STATX_INO | STATX_BLOCKS |
|
||||
(!is_dir ? STATX_NLINK : 0);
|
||||
|
||||
ovl_path_lower(dentry, &realpath);
|
||||
err = vfs_getattr(&realpath, &lowerstat,
|
||||
@ -183,6 +206,35 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
|
||||
stat->ino = lowerstat.ino;
|
||||
lower_layer = ovl_layer_lower(dentry);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are querying a metacopy dentry and lower
|
||||
* dentry is data dentry, then use the blocks we
|
||||
* queried just now. We don't have to do additional
|
||||
* vfs_getattr(). If lower itself is metacopy, then
|
||||
* additional vfs_getattr() is unavoidable.
|
||||
*/
|
||||
if (metacopy_blocks &&
|
||||
realpath.dentry == ovl_dentry_lowerdata(dentry)) {
|
||||
stat->blocks = lowerstat.blocks;
|
||||
metacopy_blocks = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (metacopy_blocks) {
|
||||
/*
|
||||
* If lower is not same as lowerdata or if there was
|
||||
* no origin on upper, we can end up here.
|
||||
*/
|
||||
struct kstat lowerdatastat;
|
||||
u32 lowermask = STATX_BLOCKS;
|
||||
|
||||
ovl_path_lowerdata(dentry, &realpath);
|
||||
err = vfs_getattr(&realpath, &lowerdatastat,
|
||||
lowermask, flags);
|
||||
if (err)
|
||||
goto out;
|
||||
stat->blocks = lowerdatastat.blocks;
|
||||
}
|
||||
}
|
||||
|
||||
@ -304,6 +356,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
|
||||
/* copy c/mtime */
|
||||
ovl_copyattr(d_inode(realdentry), inode);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
@ -384,38 +439,6 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
|
||||
return acl;
|
||||
}
|
||||
|
||||
static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
|
||||
{
|
||||
/* Copy up of disconnected dentry does not set upper alias */
|
||||
if (ovl_dentry_upper(dentry) &&
|
||||
(ovl_dentry_has_upper_alias(dentry) ||
|
||||
(dentry->d_flags & DCACHE_DISCONNECTED)))
|
||||
return false;
|
||||
|
||||
if (special_file(d_inode(dentry)->i_mode))
|
||||
return false;
|
||||
|
||||
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (ovl_open_need_copy_up(dentry, file_flags)) {
|
||||
err = ovl_want_write(dentry);
|
||||
if (!err) {
|
||||
err = ovl_copy_up_flags(dentry, file_flags);
|
||||
ovl_drop_write(dentry);
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
|
||||
{
|
||||
if (flags & S_ATIME) {
|
||||
@ -433,6 +456,23 @@ int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
int err;
|
||||
struct inode *realinode = ovl_inode_real(inode);
|
||||
const struct cred *old_cred;
|
||||
|
||||
if (!realinode->i_op->fiemap)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
old_cred = ovl_override_creds(inode->i_sb);
|
||||
err = realinode->i_op->fiemap(realinode, fieinfo, start, len);
|
||||
revert_creds(old_cred);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct inode_operations ovl_file_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.permission = ovl_permission,
|
||||
@ -440,6 +480,7 @@ static const struct inode_operations ovl_file_inode_operations = {
|
||||
.listxattr = ovl_listxattr,
|
||||
.get_acl = ovl_get_acl,
|
||||
.update_time = ovl_update_time,
|
||||
.fiemap = ovl_fiemap,
|
||||
};
|
||||
|
||||
static const struct inode_operations ovl_symlink_inode_operations = {
|
||||
@ -450,6 +491,15 @@ static const struct inode_operations ovl_symlink_inode_operations = {
|
||||
.update_time = ovl_update_time,
|
||||
};
|
||||
|
||||
static const struct inode_operations ovl_special_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_getattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.get_acl = ovl_get_acl,
|
||||
.update_time = ovl_update_time,
|
||||
};
|
||||
|
||||
/*
|
||||
* It is possible to stack overlayfs instance on top of another
|
||||
* overlayfs instance as lower layer. We need to annonate the
|
||||
@ -520,6 +570,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
|
||||
switch (mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
inode->i_op = &ovl_file_inode_operations;
|
||||
inode->i_fop = &ovl_file_operations;
|
||||
break;
|
||||
|
||||
case S_IFDIR:
|
||||
@ -532,7 +583,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
|
||||
break;
|
||||
|
||||
default:
|
||||
inode->i_op = &ovl_file_inode_operations;
|
||||
inode->i_op = &ovl_special_inode_operations;
|
||||
init_special_inode(inode, mode, rdev);
|
||||
break;
|
||||
}
|
||||
@ -769,8 +820,9 @@ struct inode *ovl_get_inode(struct super_block *sb,
|
||||
bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry,
|
||||
oip->index);
|
||||
int fsid = bylower ? oip->lowerpath->layer->fsid : 0;
|
||||
bool is_dir;
|
||||
bool is_dir, metacopy = false;
|
||||
unsigned long ino = 0;
|
||||
int err = -ENOMEM;
|
||||
|
||||
if (!realinode)
|
||||
realinode = d_inode(lowerdentry);
|
||||
@ -787,7 +839,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
|
||||
|
||||
inode = ovl_iget5(sb, oip->newinode, key);
|
||||
if (!inode)
|
||||
goto out_nomem;
|
||||
goto out_err;
|
||||
if (!(inode->i_state & I_NEW)) {
|
||||
/*
|
||||
* Verify that the underlying files stored in the inode
|
||||
@ -796,11 +848,12 @@ struct inode *ovl_get_inode(struct super_block *sb,
|
||||
if (!ovl_verify_inode(inode, lowerdentry, upperdentry,
|
||||
true)) {
|
||||
iput(inode);
|
||||
inode = ERR_PTR(-ESTALE);
|
||||
goto out;
|
||||
err = -ESTALE;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
dput(upperdentry);
|
||||
kfree(oip->redirect);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -812,11 +865,13 @@ struct inode *ovl_get_inode(struct super_block *sb,
|
||||
} else {
|
||||
/* Lower hardlink that will be broken on copy up */
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
goto out_nomem;
|
||||
if (!inode) {
|
||||
err = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
|
||||
ovl_inode_init(inode, upperdentry, lowerdentry);
|
||||
ovl_inode_init(inode, upperdentry, lowerdentry, oip->lowerdata);
|
||||
|
||||
if (upperdentry && ovl_is_impuredir(upperdentry))
|
||||
ovl_set_flag(OVL_IMPURE, inode);
|
||||
@ -824,6 +879,20 @@ struct inode *ovl_get_inode(struct super_block *sb,
|
||||
if (oip->index)
|
||||
ovl_set_flag(OVL_INDEX, inode);
|
||||
|
||||
if (upperdentry) {
|
||||
err = ovl_check_metacopy_xattr(upperdentry);
|
||||
if (err < 0)
|
||||
goto out_err;
|
||||
metacopy = err;
|
||||
if (!metacopy)
|
||||
ovl_set_flag(OVL_UPPERDATA, inode);
|
||||
}
|
||||
|
||||
OVL_I(inode)->redirect = oip->redirect;
|
||||
|
||||
if (bylower)
|
||||
ovl_set_flag(OVL_CONST_INO, inode);
|
||||
|
||||
/* Check for non-merge dir that may have whiteouts */
|
||||
if (is_dir) {
|
||||
if (((upperdentry && lowerdentry) || oip->numlower > 1) ||
|
||||
@ -837,7 +906,7 @@ struct inode *ovl_get_inode(struct super_block *sb,
|
||||
out:
|
||||
return inode;
|
||||
|
||||
out_nomem:
|
||||
inode = ERR_PTR(-ENOMEM);
|
||||
out_err:
|
||||
inode = ERR_PTR(err);
|
||||
goto out;
|
||||
}
|
||||
|
@ -24,38 +24,20 @@ struct ovl_lookup_data {
|
||||
bool stop;
|
||||
bool last;
|
||||
char *redirect;
|
||||
bool metacopy;
|
||||
};
|
||||
|
||||
static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
|
||||
size_t prelen, const char *post)
|
||||
{
|
||||
int res;
|
||||
char *s, *next, *buf = NULL;
|
||||
char *buf;
|
||||
|
||||
res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
|
||||
if (res < 0) {
|
||||
if (res == -ENODATA || res == -EOPNOTSUPP)
|
||||
return 0;
|
||||
goto fail;
|
||||
}
|
||||
buf = kzalloc(prelen + res + strlen(post) + 1, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
buf = ovl_get_redirect_xattr(dentry, prelen + strlen(post));
|
||||
if (IS_ERR_OR_NULL(buf))
|
||||
return PTR_ERR(buf);
|
||||
|
||||
if (res == 0)
|
||||
goto invalid;
|
||||
|
||||
res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
|
||||
if (res < 0)
|
||||
goto fail;
|
||||
if (res == 0)
|
||||
goto invalid;
|
||||
if (buf[0] == '/') {
|
||||
for (s = buf; *s++ == '/'; s = next) {
|
||||
next = strchrnul(s, '/');
|
||||
if (s == next)
|
||||
goto invalid;
|
||||
}
|
||||
/*
|
||||
* One of the ancestor path elements in an absolute path
|
||||
* lookup in ovl_lookup_layer() could have been opaque and
|
||||
@ -66,9 +48,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
|
||||
*/
|
||||
d->stop = false;
|
||||
} else {
|
||||
if (strchr(buf, '/') != NULL)
|
||||
goto invalid;
|
||||
|
||||
res = strlen(buf) + 1;
|
||||
memmove(buf + prelen, buf, res);
|
||||
memcpy(buf, d->name.name, prelen);
|
||||
}
|
||||
@ -80,16 +60,6 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
|
||||
d->name.len = strlen(d->redirect);
|
||||
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
kfree(buf);
|
||||
return 0;
|
||||
fail:
|
||||
pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
|
||||
goto err_free;
|
||||
invalid:
|
||||
pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
static int ovl_acceptable(void *ctx, struct dentry *dentry)
|
||||
@ -252,28 +222,39 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
|
||||
d->stop = d->opaque = true;
|
||||
goto put_and_out;
|
||||
}
|
||||
if (!d_can_lookup(this)) {
|
||||
/*
|
||||
* This dentry should be a regular file if previous layer lookup
|
||||
* found a metacopy dentry.
|
||||
*/
|
||||
if (last_element && d->metacopy && !d_is_reg(this)) {
|
||||
d->stop = true;
|
||||
if (d->is_dir)
|
||||
goto put_and_out;
|
||||
|
||||
/*
|
||||
* NB: handle failure to lookup non-last element when non-dir
|
||||
* redirects become possible
|
||||
*/
|
||||
WARN_ON(!last_element);
|
||||
goto out;
|
||||
goto put_and_out;
|
||||
}
|
||||
if (last_element)
|
||||
d->is_dir = true;
|
||||
if (d->last)
|
||||
goto out;
|
||||
if (!d_can_lookup(this)) {
|
||||
if (d->is_dir || !last_element) {
|
||||
d->stop = true;
|
||||
goto put_and_out;
|
||||
}
|
||||
err = ovl_check_metacopy_xattr(this);
|
||||
if (err < 0)
|
||||
goto out_err;
|
||||
|
||||
if (ovl_is_opaquedir(this)) {
|
||||
d->stop = true;
|
||||
d->metacopy = err;
|
||||
d->stop = !d->metacopy;
|
||||
if (!d->metacopy || d->last)
|
||||
goto out;
|
||||
} else {
|
||||
if (last_element)
|
||||
d->opaque = true;
|
||||
goto out;
|
||||
d->is_dir = true;
|
||||
if (d->last)
|
||||
goto out;
|
||||
|
||||
if (ovl_is_opaquedir(this)) {
|
||||
d->stop = true;
|
||||
if (last_element)
|
||||
d->opaque = true;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
err = ovl_check_redirect(this, d, prelen, post);
|
||||
if (err)
|
||||
@ -823,7 +804,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
|
||||
struct ovl_entry *poe = dentry->d_parent->d_fsdata;
|
||||
struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
|
||||
struct ovl_path *stack = NULL;
|
||||
struct ovl_path *stack = NULL, *origin_path = NULL;
|
||||
struct dentry *upperdir, *upperdentry = NULL;
|
||||
struct dentry *origin = NULL;
|
||||
struct dentry *index = NULL;
|
||||
@ -834,6 +815,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
struct dentry *this;
|
||||
unsigned int i;
|
||||
int err;
|
||||
bool metacopy = false;
|
||||
struct ovl_lookup_data d = {
|
||||
.name = dentry->d_name,
|
||||
.is_dir = false,
|
||||
@ -841,6 +823,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
.stop = false,
|
||||
.last = ofs->config.redirect_follow ? false : !poe->numlower,
|
||||
.redirect = NULL,
|
||||
.metacopy = false,
|
||||
};
|
||||
|
||||
if (dentry->d_name.len > ofs->namelen)
|
||||
@ -859,7 +842,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
goto out;
|
||||
}
|
||||
if (upperdentry && !d.is_dir) {
|
||||
BUG_ON(!d.stop || d.redirect);
|
||||
unsigned int origin_ctr = 0;
|
||||
|
||||
/*
|
||||
* Lookup copy up origin by decoding origin file handle.
|
||||
* We may get a disconnected dentry, which is fine,
|
||||
@ -870,9 +854,13 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
* number - it's the same as if we held a reference
|
||||
* to a dentry in lower layer that was moved under us.
|
||||
*/
|
||||
err = ovl_check_origin(ofs, upperdentry, &stack, &ctr);
|
||||
err = ovl_check_origin(ofs, upperdentry, &origin_path,
|
||||
&origin_ctr);
|
||||
if (err)
|
||||
goto out_put_upper;
|
||||
|
||||
if (d.metacopy)
|
||||
metacopy = true;
|
||||
}
|
||||
|
||||
if (d.redirect) {
|
||||
@ -913,7 +901,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
* If no origin fh is stored in upper of a merge dir, store fh
|
||||
* of lower dir and set upper parent "impure".
|
||||
*/
|
||||
if (upperdentry && !ctr && !ofs->noxattr) {
|
||||
if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
|
||||
err = ovl_fix_origin(dentry, this, upperdentry);
|
||||
if (err) {
|
||||
dput(this);
|
||||
@ -925,18 +913,35 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
* When "verify_lower" feature is enabled, do not merge with a
|
||||
* lower dir that does not match a stored origin xattr. In any
|
||||
* case, only verified origin is used for index lookup.
|
||||
*
|
||||
* For non-dir dentry, if index=on, then ensure origin
|
||||
* matches the dentry found using path based lookup,
|
||||
* otherwise error out.
|
||||
*/
|
||||
if (upperdentry && !ctr && ovl_verify_lower(dentry->d_sb)) {
|
||||
if (upperdentry && !ctr &&
|
||||
((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
|
||||
(!d.is_dir && ofs->config.index && origin_path))) {
|
||||
err = ovl_verify_origin(upperdentry, this, false);
|
||||
if (err) {
|
||||
dput(this);
|
||||
break;
|
||||
if (d.is_dir)
|
||||
break;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
/* Bless lower dir as verified origin */
|
||||
origin = this;
|
||||
}
|
||||
|
||||
if (d.metacopy)
|
||||
metacopy = true;
|
||||
/*
|
||||
* Do not store intermediate metacopy dentries in chain,
|
||||
* except top most lower metacopy dentry
|
||||
*/
|
||||
if (d.metacopy && ctr) {
|
||||
dput(this);
|
||||
continue;
|
||||
}
|
||||
|
||||
stack[ctr].dentry = this;
|
||||
stack[ctr].layer = lower.layer;
|
||||
ctr++;
|
||||
@ -968,13 +973,48 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
}
|
||||
}
|
||||
|
||||
if (metacopy) {
|
||||
/*
|
||||
* Found a metacopy dentry but did not find corresponding
|
||||
* data dentry
|
||||
*/
|
||||
if (d.metacopy) {
|
||||
err = -EIO;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
err = -EPERM;
|
||||
if (!ofs->config.metacopy) {
|
||||
pr_warn_ratelimited("overlay: refusing to follow metacopy origin for (%pd2)\n",
|
||||
dentry);
|
||||
goto out_put;
|
||||
}
|
||||
} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
|
||||
if (WARN_ON(stack != NULL)) {
|
||||
err = -EIO;
|
||||
goto out_put;
|
||||
}
|
||||
stack = origin_path;
|
||||
ctr = 1;
|
||||
origin_path = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup index by lower inode and verify it matches upper inode.
|
||||
* We only trust dir index if we verified that lower dir matches
|
||||
* origin, otherwise dir index entries may be inconsistent and we
|
||||
* ignore them. Always lookup index of non-dir and non-upper.
|
||||
* ignore them.
|
||||
*
|
||||
* For non-dir upper metacopy dentry, we already set "origin" if we
|
||||
* verified that lower matched upper origin. If upper origin was
|
||||
* not present (because lower layer did not support fh encode/decode),
|
||||
* or indexing is not enabled, do not set "origin" and skip looking up
|
||||
* index. This case should be handled in same way as a non-dir upper
|
||||
* without ORIGIN is handled.
|
||||
*
|
||||
* Always lookup index of non-dir non-metacopy and non-upper.
|
||||
*/
|
||||
if (ctr && (!upperdentry || !d.is_dir))
|
||||
if (ctr && (!upperdentry || (!d.is_dir && !metacopy)))
|
||||
origin = stack[0].dentry;
|
||||
|
||||
if (origin && ovl_indexdir(dentry->d_sb) &&
|
||||
@ -1000,8 +1040,15 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
|
||||
if (upperdentry)
|
||||
ovl_dentry_set_upper_alias(dentry);
|
||||
else if (index)
|
||||
else if (index) {
|
||||
upperdentry = dget(index);
|
||||
upperredirect = ovl_get_redirect_xattr(upperdentry, 0);
|
||||
if (IS_ERR(upperredirect)) {
|
||||
err = PTR_ERR(upperredirect);
|
||||
upperredirect = NULL;
|
||||
goto out_free_oe;
|
||||
}
|
||||
}
|
||||
|
||||
if (upperdentry || ctr) {
|
||||
struct ovl_inode_params oip = {
|
||||
@ -1009,22 +1056,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
.lowerpath = stack,
|
||||
.index = index,
|
||||
.numlower = ctr,
|
||||
.redirect = upperredirect,
|
||||
.lowerdata = (ctr > 1 && !d.is_dir) ?
|
||||
stack[ctr - 1].dentry : NULL,
|
||||
};
|
||||
|
||||
inode = ovl_get_inode(dentry->d_sb, &oip);
|
||||
err = PTR_ERR(inode);
|
||||
if (IS_ERR(inode))
|
||||
goto out_free_oe;
|
||||
|
||||
/*
|
||||
* NB: handle redirected hard links when non-dir redirects
|
||||
* become possible
|
||||
*/
|
||||
WARN_ON(OVL_I(inode)->redirect);
|
||||
OVL_I(inode)->redirect = upperredirect;
|
||||
}
|
||||
|
||||
revert_creds(old_cred);
|
||||
if (origin_path) {
|
||||
dput(origin_path->dentry);
|
||||
kfree(origin_path);
|
||||
}
|
||||
dput(index);
|
||||
kfree(stack);
|
||||
kfree(d.redirect);
|
||||
@ -1039,6 +1086,10 @@ out_put:
|
||||
dput(stack[i].dentry);
|
||||
kfree(stack);
|
||||
out_put_upper:
|
||||
if (origin_path) {
|
||||
dput(origin_path->dentry);
|
||||
kfree(origin_path);
|
||||
}
|
||||
dput(upperdentry);
|
||||
kfree(upperredirect);
|
||||
out:
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/uuid.h>
|
||||
#include <linux/fs.h>
|
||||
#include "ovl_entry.h"
|
||||
|
||||
enum ovl_path_type {
|
||||
@ -28,6 +29,7 @@ enum ovl_path_type {
|
||||
#define OVL_XATTR_IMPURE OVL_XATTR_PREFIX "impure"
|
||||
#define OVL_XATTR_NLINK OVL_XATTR_PREFIX "nlink"
|
||||
#define OVL_XATTR_UPPER OVL_XATTR_PREFIX "upper"
|
||||
#define OVL_XATTR_METACOPY OVL_XATTR_PREFIX "metacopy"
|
||||
|
||||
enum ovl_inode_flag {
|
||||
/* Pure upper dir that may contain non pure upper entries */
|
||||
@ -35,6 +37,9 @@ enum ovl_inode_flag {
|
||||
/* Non-merge dir that may contain whiteout entries */
|
||||
OVL_WHITEOUTS,
|
||||
OVL_INDEX,
|
||||
OVL_UPPERDATA,
|
||||
/* Inode number will remain constant over copy up. */
|
||||
OVL_CONST_INO,
|
||||
};
|
||||
|
||||
enum ovl_entry_flag {
|
||||
@ -190,6 +195,14 @@ static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool ovl_open_flags_need_copy_up(int flags)
|
||||
{
|
||||
if (!flags)
|
||||
return false;
|
||||
|
||||
return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC));
|
||||
}
|
||||
|
||||
/* util.c */
|
||||
int ovl_want_write(struct dentry *dentry);
|
||||
void ovl_drop_write(struct dentry *dentry);
|
||||
@ -206,15 +219,19 @@ bool ovl_dentry_weird(struct dentry *dentry);
|
||||
enum ovl_path_type ovl_path_type(struct dentry *dentry);
|
||||
void ovl_path_upper(struct dentry *dentry, struct path *path);
|
||||
void ovl_path_lower(struct dentry *dentry, struct path *path);
|
||||
void ovl_path_lowerdata(struct dentry *dentry, struct path *path);
|
||||
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
|
||||
struct dentry *ovl_dentry_upper(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_lower(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_lowerdata(struct dentry *dentry);
|
||||
struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_real(struct dentry *dentry);
|
||||
struct dentry *ovl_i_dentry_upper(struct inode *inode);
|
||||
struct inode *ovl_inode_upper(struct inode *inode);
|
||||
struct inode *ovl_inode_lower(struct inode *inode);
|
||||
struct inode *ovl_inode_lowerdata(struct inode *inode);
|
||||
struct inode *ovl_inode_real(struct inode *inode);
|
||||
struct inode *ovl_inode_realdata(struct inode *inode);
|
||||
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode);
|
||||
void ovl_set_dir_cache(struct inode *inode, struct ovl_dir_cache *cache);
|
||||
void ovl_dentry_set_flag(unsigned long flag, struct dentry *dentry);
|
||||
@ -225,18 +242,23 @@ bool ovl_dentry_is_whiteout(struct dentry *dentry);
|
||||
void ovl_dentry_set_opaque(struct dentry *dentry);
|
||||
bool ovl_dentry_has_upper_alias(struct dentry *dentry);
|
||||
void ovl_dentry_set_upper_alias(struct dentry *dentry);
|
||||
bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags);
|
||||
bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags);
|
||||
bool ovl_has_upperdata(struct inode *inode);
|
||||
void ovl_set_upperdata(struct inode *inode);
|
||||
bool ovl_redirect_dir(struct super_block *sb);
|
||||
const char *ovl_dentry_get_redirect(struct dentry *dentry);
|
||||
void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect);
|
||||
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
|
||||
struct dentry *lowerdentry);
|
||||
struct dentry *lowerdentry, struct dentry *lowerdata);
|
||||
void ovl_inode_update(struct inode *inode, struct dentry *upperdentry);
|
||||
void ovl_dentry_version_inc(struct dentry *dentry, bool impurity);
|
||||
void ovl_dir_modified(struct dentry *dentry, bool impurity);
|
||||
u64 ovl_dentry_version_get(struct dentry *dentry);
|
||||
bool ovl_is_whiteout(struct dentry *dentry);
|
||||
struct file *ovl_path_open(struct path *path, int flags);
|
||||
int ovl_copy_up_start(struct dentry *dentry);
|
||||
int ovl_copy_up_start(struct dentry *dentry, int flags);
|
||||
void ovl_copy_up_end(struct dentry *dentry);
|
||||
bool ovl_already_copied_up(struct dentry *dentry, int flags);
|
||||
bool ovl_check_origin_xattr(struct dentry *dentry);
|
||||
bool ovl_check_dir_xattr(struct dentry *dentry, const char *name);
|
||||
int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry,
|
||||
@ -252,6 +274,9 @@ bool ovl_need_index(struct dentry *dentry);
|
||||
int ovl_nlink_start(struct dentry *dentry, bool *locked);
|
||||
void ovl_nlink_end(struct dentry *dentry, bool locked);
|
||||
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
|
||||
int ovl_check_metacopy_xattr(struct dentry *dentry);
|
||||
bool ovl_is_metacopy_dentry(struct dentry *dentry);
|
||||
char *ovl_get_redirect_xattr(struct dentry *dentry, int padding);
|
||||
|
||||
static inline bool ovl_is_impuredir(struct dentry *dentry)
|
||||
{
|
||||
@ -324,7 +349,6 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
|
||||
void *value, size_t size);
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
|
||||
struct posix_acl *ovl_get_acl(struct inode *inode, int type);
|
||||
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
|
||||
int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
|
||||
bool ovl_is_private_xattr(const char *name);
|
||||
|
||||
@ -334,6 +358,8 @@ struct ovl_inode_params {
|
||||
struct ovl_path *lowerpath;
|
||||
struct dentry *index;
|
||||
unsigned int numlower;
|
||||
char *redirect;
|
||||
struct dentry *lowerdata;
|
||||
};
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
|
||||
struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
|
||||
@ -348,6 +374,14 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
|
||||
to->i_atime = from->i_atime;
|
||||
to->i_mtime = from->i_mtime;
|
||||
to->i_ctime = from->i_ctime;
|
||||
i_size_write(to, i_size_read(from));
|
||||
}
|
||||
|
||||
static inline void ovl_copyflags(struct inode *from, struct inode *to)
|
||||
{
|
||||
unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME;
|
||||
|
||||
inode_set_flags(to, from->i_flags & mask, mask);
|
||||
}
|
||||
|
||||
/* dir.c */
|
||||
@ -368,9 +402,14 @@ struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
int ovl_cleanup(struct inode *dir, struct dentry *dentry);
|
||||
struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr);
|
||||
|
||||
/* file.c */
|
||||
extern const struct file_operations ovl_file_operations;
|
||||
|
||||
/* copy_up.c */
|
||||
int ovl_copy_up(struct dentry *dentry);
|
||||
int ovl_copy_up_with_data(struct dentry *dentry);
|
||||
int ovl_copy_up_flags(struct dentry *dentry, int flags);
|
||||
int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
|
||||
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
|
||||
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
|
||||
|
@ -19,6 +19,7 @@ struct ovl_config {
|
||||
bool index;
|
||||
bool nfs_export;
|
||||
int xino;
|
||||
bool metacopy;
|
||||
};
|
||||
|
||||
struct ovl_sb {
|
||||
@ -88,7 +89,10 @@ static inline struct ovl_entry *OVL_E(struct dentry *dentry)
|
||||
}
|
||||
|
||||
struct ovl_inode {
|
||||
struct ovl_dir_cache *cache;
|
||||
union {
|
||||
struct ovl_dir_cache *cache; /* directory */
|
||||
struct inode *lowerdata; /* regular file */
|
||||
};
|
||||
const char *redirect;
|
||||
u64 version;
|
||||
unsigned long flags;
|
||||
|
@ -668,6 +668,21 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name,
|
||||
return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
|
||||
}
|
||||
|
||||
static bool ovl_is_impure_dir(struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct inode *dir = d_inode(file->f_path.dentry);
|
||||
|
||||
/*
|
||||
* Only upper dir can be impure, but if we are in the middle of
|
||||
* iterating a lower real dir, dir could be copied up and marked
|
||||
* impure. We only want the impure cache if we started iterating
|
||||
* a real upper dir to begin with.
|
||||
*/
|
||||
return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
|
||||
|
||||
}
|
||||
|
||||
static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
int err;
|
||||
@ -696,7 +711,7 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
|
||||
rdt.parent_ino = stat.ino;
|
||||
}
|
||||
|
||||
if (ovl_test_flag(OVL_IMPURE, d_inode(dir))) {
|
||||
if (ovl_is_impure_dir(file)) {
|
||||
rdt.cache = ovl_cache_get_impure(&file->f_path);
|
||||
if (IS_ERR(rdt.cache))
|
||||
return PTR_ERR(rdt.cache);
|
||||
@ -727,7 +742,7 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
|
||||
*/
|
||||
if (ovl_xino_bits(dentry->d_sb) ||
|
||||
(ovl_same_sb(dentry->d_sb) &&
|
||||
(ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
|
||||
(ovl_is_impure_dir(file) ||
|
||||
OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
|
||||
return ovl_iterate_real(file, ctx);
|
||||
}
|
||||
|
@ -64,6 +64,11 @@ static void ovl_entry_stack_free(struct ovl_entry *oe)
|
||||
dput(oe->lowerstack[i].dentry);
|
||||
}
|
||||
|
||||
static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
|
||||
module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
|
||||
MODULE_PARM_DESC(ovl_metacopy_def,
|
||||
"Default to on or off for the metadata only copy up feature");
|
||||
|
||||
static void ovl_dentry_release(struct dentry *dentry)
|
||||
{
|
||||
struct ovl_entry *oe = dentry->d_fsdata;
|
||||
@ -74,31 +79,14 @@ static void ovl_dentry_release(struct dentry *dentry)
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_check_append_only(struct inode *inode, int flag)
|
||||
{
|
||||
/*
|
||||
* This test was moot in vfs may_open() because overlay inode does
|
||||
* not have the S_APPEND flag, so re-check on real upper inode
|
||||
*/
|
||||
if (IS_APPEND(inode)) {
|
||||
if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
|
||||
return -EPERM;
|
||||
if (flag & O_TRUNC)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct dentry *ovl_d_real(struct dentry *dentry,
|
||||
const struct inode *inode,
|
||||
unsigned int open_flags, unsigned int flags)
|
||||
const struct inode *inode)
|
||||
{
|
||||
struct dentry *real;
|
||||
int err;
|
||||
|
||||
if (flags & D_REAL_UPPER)
|
||||
return ovl_dentry_upper(dentry);
|
||||
/* It's an overlay file */
|
||||
if (inode && d_inode(dentry) == inode)
|
||||
return dentry;
|
||||
|
||||
if (!d_is_reg(dentry)) {
|
||||
if (!inode || inode == d_inode(dentry))
|
||||
@ -106,28 +94,19 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
|
||||
goto bug;
|
||||
}
|
||||
|
||||
if (open_flags) {
|
||||
err = ovl_open_maybe_copy_up(dentry, open_flags);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
real = ovl_dentry_upper(dentry);
|
||||
if (real && (!inode || inode == d_inode(real))) {
|
||||
if (!inode) {
|
||||
err = ovl_check_append_only(d_inode(real), open_flags);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
if (real && (inode == d_inode(real)))
|
||||
return real;
|
||||
}
|
||||
|
||||
real = ovl_dentry_lower(dentry);
|
||||
if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
|
||||
return real;
|
||||
|
||||
real = ovl_dentry_lowerdata(dentry);
|
||||
if (!real)
|
||||
goto bug;
|
||||
|
||||
/* Handle recursion */
|
||||
real = d_real(real, inode, open_flags, 0);
|
||||
real = d_real(real, inode);
|
||||
|
||||
if (!inode || inode == d_inode(real))
|
||||
return real;
|
||||
@ -205,6 +184,7 @@ static struct inode *ovl_alloc_inode(struct super_block *sb)
|
||||
oi->flags = 0;
|
||||
oi->__upperdentry = NULL;
|
||||
oi->lower = NULL;
|
||||
oi->lowerdata = NULL;
|
||||
mutex_init(&oi->lock);
|
||||
|
||||
return &oi->vfs_inode;
|
||||
@ -223,8 +203,11 @@ static void ovl_destroy_inode(struct inode *inode)
|
||||
|
||||
dput(oi->__upperdentry);
|
||||
iput(oi->lower);
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
ovl_dir_cache_free(inode);
|
||||
else
|
||||
iput(oi->lowerdata);
|
||||
kfree(oi->redirect);
|
||||
ovl_dir_cache_free(inode);
|
||||
mutex_destroy(&oi->lock);
|
||||
|
||||
call_rcu(&inode->i_rcu, ovl_i_callback);
|
||||
@ -376,6 +359,9 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
|
||||
"on" : "off");
|
||||
if (ofs->config.xino != ovl_xino_def())
|
||||
seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
|
||||
if (ofs->config.metacopy != ovl_metacopy_def)
|
||||
seq_printf(m, ",metacopy=%s",
|
||||
ofs->config.metacopy ? "on" : "off");
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -413,6 +399,8 @@ enum {
|
||||
OPT_XINO_ON,
|
||||
OPT_XINO_OFF,
|
||||
OPT_XINO_AUTO,
|
||||
OPT_METACOPY_ON,
|
||||
OPT_METACOPY_OFF,
|
||||
OPT_ERR,
|
||||
};
|
||||
|
||||
@ -429,6 +417,8 @@ static const match_table_t ovl_tokens = {
|
||||
{OPT_XINO_ON, "xino=on"},
|
||||
{OPT_XINO_OFF, "xino=off"},
|
||||
{OPT_XINO_AUTO, "xino=auto"},
|
||||
{OPT_METACOPY_ON, "metacopy=on"},
|
||||
{OPT_METACOPY_OFF, "metacopy=off"},
|
||||
{OPT_ERR, NULL}
|
||||
};
|
||||
|
||||
@ -481,6 +471,7 @@ static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
|
||||
static int ovl_parse_opt(char *opt, struct ovl_config *config)
|
||||
{
|
||||
char *p;
|
||||
int err;
|
||||
|
||||
config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
|
||||
if (!config->redirect_mode)
|
||||
@ -555,6 +546,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
|
||||
config->xino = OVL_XINO_AUTO;
|
||||
break;
|
||||
|
||||
case OPT_METACOPY_ON:
|
||||
config->metacopy = true;
|
||||
break;
|
||||
|
||||
case OPT_METACOPY_OFF:
|
||||
config->metacopy = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
|
||||
return -EINVAL;
|
||||
@ -569,7 +568,20 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
|
||||
config->workdir = NULL;
|
||||
}
|
||||
|
||||
return ovl_parse_redirect_mode(config, config->redirect_mode);
|
||||
err = ovl_parse_redirect_mode(config, config->redirect_mode);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* metacopy feature with upper requires redirect_dir=on */
|
||||
if (config->upperdir && config->metacopy && !config->redirect_dir) {
|
||||
pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n");
|
||||
config->metacopy = false;
|
||||
} else if (config->metacopy && !config->redirect_follow) {
|
||||
pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n");
|
||||
config->metacopy = false;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define OVL_WORKDIR_NAME "work"
|
||||
@ -1042,7 +1054,8 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
|
||||
if (err) {
|
||||
ofs->noxattr = true;
|
||||
ofs->config.index = false;
|
||||
pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off.\n");
|
||||
ofs->config.metacopy = false;
|
||||
pr_warn("overlayfs: upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
|
||||
err = 0;
|
||||
} else {
|
||||
vfs_removexattr(ofs->workdir, OVL_XATTR_OPAQUE);
|
||||
@ -1064,7 +1077,6 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
|
||||
pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
|
||||
ofs->config.nfs_export = false;
|
||||
}
|
||||
|
||||
out:
|
||||
mnt_drop_write(mnt);
|
||||
return err;
|
||||
@ -1375,6 +1387,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
ofs->config.index = ovl_index_def;
|
||||
ofs->config.nfs_export = ovl_nfs_export_def;
|
||||
ofs->config.xino = ovl_xino_def();
|
||||
ofs->config.metacopy = ovl_metacopy_def;
|
||||
err = ovl_parse_opt((char *) data, &ofs->config);
|
||||
if (err)
|
||||
goto out_err;
|
||||
@ -1445,6 +1458,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
}
|
||||
}
|
||||
|
||||
if (ofs->config.metacopy && ofs->config.nfs_export) {
|
||||
pr_warn("overlayfs: NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
|
||||
ofs->config.nfs_export = false;
|
||||
}
|
||||
|
||||
if (ofs->config.nfs_export)
|
||||
sb->s_export_op = &ovl_export_operations;
|
||||
|
||||
@ -1455,7 +1473,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sb->s_op = &ovl_super_operations;
|
||||
sb->s_xattr = ovl_xattr_handlers;
|
||||
sb->s_fs_info = ofs;
|
||||
sb->s_flags |= SB_POSIXACL | SB_NOREMOTELOCK;
|
||||
sb->s_flags |= SB_POSIXACL;
|
||||
|
||||
err = -ENOMEM;
|
||||
root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
|
||||
@ -1474,8 +1492,9 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
/* Root is always merge -> can have whiteouts */
|
||||
ovl_set_flag(OVL_WHITEOUTS, d_inode(root_dentry));
|
||||
ovl_dentry_set_flag(OVL_E_CONNECTED, root_dentry);
|
||||
ovl_set_upperdata(d_inode(root_dentry));
|
||||
ovl_inode_init(d_inode(root_dentry), upperpath.dentry,
|
||||
ovl_dentry_lower(root_dentry));
|
||||
ovl_dentry_lower(root_dentry), NULL);
|
||||
|
||||
sb->s_root = root_dentry;
|
||||
|
||||
|
@ -133,8 +133,10 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
|
||||
* Non-dir dentry can hold lower dentry of its copy up origin.
|
||||
*/
|
||||
if (oe->numlower) {
|
||||
type |= __OVL_PATH_ORIGIN;
|
||||
if (d_is_dir(dentry))
|
||||
if (ovl_test_flag(OVL_CONST_INO, d_inode(dentry)))
|
||||
type |= __OVL_PATH_ORIGIN;
|
||||
if (d_is_dir(dentry) ||
|
||||
!ovl_has_upperdata(d_inode(dentry)))
|
||||
type |= __OVL_PATH_MERGE;
|
||||
}
|
||||
} else {
|
||||
@ -164,6 +166,18 @@ void ovl_path_lower(struct dentry *dentry, struct path *path)
|
||||
}
|
||||
}
|
||||
|
||||
void ovl_path_lowerdata(struct dentry *dentry, struct path *path)
|
||||
{
|
||||
struct ovl_entry *oe = dentry->d_fsdata;
|
||||
|
||||
if (oe->numlower) {
|
||||
path->mnt = oe->lowerstack[oe->numlower - 1].layer->mnt;
|
||||
path->dentry = oe->lowerstack[oe->numlower - 1].dentry;
|
||||
} else {
|
||||
*path = (struct path) { };
|
||||
}
|
||||
}
|
||||
|
||||
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
|
||||
{
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
@ -195,6 +209,19 @@ struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
|
||||
return oe->numlower ? oe->lowerstack[0].layer : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* ovl_dentry_lower() could return either a data dentry or metacopy dentry
|
||||
* dependig on what is stored in lowerstack[0]. At times we need to find
|
||||
* lower dentry which has data (and not metacopy dentry). This helper
|
||||
* returns the lower data dentry.
|
||||
*/
|
||||
struct dentry *ovl_dentry_lowerdata(struct dentry *dentry)
|
||||
{
|
||||
struct ovl_entry *oe = dentry->d_fsdata;
|
||||
|
||||
return oe->numlower ? oe->lowerstack[oe->numlower - 1].dentry : NULL;
|
||||
}
|
||||
|
||||
struct dentry *ovl_dentry_real(struct dentry *dentry)
|
||||
{
|
||||
return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
|
||||
@ -222,6 +249,26 @@ struct inode *ovl_inode_real(struct inode *inode)
|
||||
return ovl_inode_upper(inode) ?: ovl_inode_lower(inode);
|
||||
}
|
||||
|
||||
/* Return inode which contains lower data. Do not return metacopy */
|
||||
struct inode *ovl_inode_lowerdata(struct inode *inode)
|
||||
{
|
||||
if (WARN_ON(!S_ISREG(inode->i_mode)))
|
||||
return NULL;
|
||||
|
||||
return OVL_I(inode)->lowerdata ?: ovl_inode_lower(inode);
|
||||
}
|
||||
|
||||
/* Return real inode which contains data. Does not return metacopy inode */
|
||||
struct inode *ovl_inode_realdata(struct inode *inode)
|
||||
{
|
||||
struct inode *upperinode;
|
||||
|
||||
upperinode = ovl_inode_upper(inode);
|
||||
if (upperinode && ovl_has_upperdata(inode))
|
||||
return upperinode;
|
||||
|
||||
return ovl_inode_lowerdata(inode);
|
||||
}
|
||||
|
||||
struct ovl_dir_cache *ovl_dir_cache(struct inode *inode)
|
||||
{
|
||||
@ -279,6 +326,62 @@ void ovl_dentry_set_upper_alias(struct dentry *dentry)
|
||||
ovl_dentry_set_flag(OVL_E_UPPER_ALIAS, dentry);
|
||||
}
|
||||
|
||||
static bool ovl_should_check_upperdata(struct inode *inode)
|
||||
{
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
return false;
|
||||
|
||||
if (!ovl_inode_lower(inode))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ovl_has_upperdata(struct inode *inode)
|
||||
{
|
||||
if (!ovl_should_check_upperdata(inode))
|
||||
return true;
|
||||
|
||||
if (!ovl_test_flag(OVL_UPPERDATA, inode))
|
||||
return false;
|
||||
/*
|
||||
* Pairs with smp_wmb() in ovl_set_upperdata(). Main user of
|
||||
* ovl_has_upperdata() is ovl_copy_up_meta_inode_data(). Make sure
|
||||
* if setting of OVL_UPPERDATA is visible, then effects of writes
|
||||
* before that are visible too.
|
||||
*/
|
||||
smp_rmb();
|
||||
return true;
|
||||
}
|
||||
|
||||
void ovl_set_upperdata(struct inode *inode)
|
||||
{
|
||||
/*
|
||||
* Pairs with smp_rmb() in ovl_has_upperdata(). Make sure
|
||||
* if OVL_UPPERDATA flag is visible, then effects of write operations
|
||||
* before it are visible as well.
|
||||
*/
|
||||
smp_wmb();
|
||||
ovl_set_flag(OVL_UPPERDATA, inode);
|
||||
}
|
||||
|
||||
/* Caller should hold ovl_inode->lock */
|
||||
bool ovl_dentry_needs_data_copy_up_locked(struct dentry *dentry, int flags)
|
||||
{
|
||||
if (!ovl_open_flags_need_copy_up(flags))
|
||||
return false;
|
||||
|
||||
return !ovl_test_flag(OVL_UPPERDATA, d_inode(dentry));
|
||||
}
|
||||
|
||||
bool ovl_dentry_needs_data_copy_up(struct dentry *dentry, int flags)
|
||||
{
|
||||
if (!ovl_open_flags_need_copy_up(flags))
|
||||
return false;
|
||||
|
||||
return !ovl_has_upperdata(d_inode(dentry));
|
||||
}
|
||||
|
||||
bool ovl_redirect_dir(struct super_block *sb)
|
||||
{
|
||||
struct ovl_fs *ofs = sb->s_fs_info;
|
||||
@ -300,7 +403,7 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
|
||||
}
|
||||
|
||||
void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
|
||||
struct dentry *lowerdentry)
|
||||
struct dentry *lowerdentry, struct dentry *lowerdata)
|
||||
{
|
||||
struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
|
||||
|
||||
@ -308,8 +411,11 @@ void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
|
||||
OVL_I(inode)->__upperdentry = upperdentry;
|
||||
if (lowerdentry)
|
||||
OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
|
||||
if (lowerdata)
|
||||
OVL_I(inode)->lowerdata = igrab(d_inode(lowerdata));
|
||||
|
||||
ovl_copyattr(realinode, inode);
|
||||
ovl_copyflags(realinode, inode);
|
||||
if (!inode->i_ino)
|
||||
inode->i_ino = realinode->i_ino;
|
||||
}
|
||||
@ -333,7 +439,7 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
|
||||
}
|
||||
}
|
||||
|
||||
void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
|
||||
static void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
|
||||
{
|
||||
struct inode *inode = d_inode(dentry);
|
||||
|
||||
@ -348,6 +454,14 @@ void ovl_dentry_version_inc(struct dentry *dentry, bool impurity)
|
||||
OVL_I(inode)->version++;
|
||||
}
|
||||
|
||||
void ovl_dir_modified(struct dentry *dentry, bool impurity)
|
||||
{
|
||||
/* Copy mtime/ctime */
|
||||
ovl_copyattr(d_inode(ovl_dentry_upper(dentry)), d_inode(dentry));
|
||||
|
||||
ovl_dentry_version_inc(dentry, impurity);
|
||||
}
|
||||
|
||||
u64 ovl_dentry_version_get(struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = d_inode(dentry);
|
||||
@ -368,13 +482,51 @@ struct file *ovl_path_open(struct path *path, int flags)
|
||||
return dentry_open(path, flags | O_NOATIME, current_cred());
|
||||
}
|
||||
|
||||
int ovl_copy_up_start(struct dentry *dentry)
|
||||
/* Caller should hold ovl_inode->lock */
|
||||
static bool ovl_already_copied_up_locked(struct dentry *dentry, int flags)
|
||||
{
|
||||
bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
|
||||
|
||||
if (ovl_dentry_upper(dentry) &&
|
||||
(ovl_dentry_has_upper_alias(dentry) || disconnected) &&
|
||||
!ovl_dentry_needs_data_copy_up_locked(dentry, flags))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ovl_already_copied_up(struct dentry *dentry, int flags)
|
||||
{
|
||||
bool disconnected = dentry->d_flags & DCACHE_DISCONNECTED;
|
||||
|
||||
/*
|
||||
* Check if copy-up has happened as well as for upper alias (in
|
||||
* case of hard links) is there.
|
||||
*
|
||||
* Both checks are lockless:
|
||||
* - false negatives: will recheck under oi->lock
|
||||
* - false positives:
|
||||
* + ovl_dentry_upper() uses memory barriers to ensure the
|
||||
* upper dentry is up-to-date
|
||||
* + ovl_dentry_has_upper_alias() relies on locking of
|
||||
* upper parent i_rwsem to prevent reordering copy-up
|
||||
* with rename.
|
||||
*/
|
||||
if (ovl_dentry_upper(dentry) &&
|
||||
(ovl_dentry_has_upper_alias(dentry) || disconnected) &&
|
||||
!ovl_dentry_needs_data_copy_up(dentry, flags))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int ovl_copy_up_start(struct dentry *dentry, int flags)
|
||||
{
|
||||
struct ovl_inode *oi = OVL_I(d_inode(dentry));
|
||||
int err;
|
||||
|
||||
err = mutex_lock_interruptible(&oi->lock);
|
||||
if (!err && ovl_dentry_has_upper_alias(dentry)) {
|
||||
if (!err && ovl_already_copied_up_locked(dentry, flags)) {
|
||||
err = 1; /* Already copied up */
|
||||
mutex_unlock(&oi->lock);
|
||||
}
|
||||
@ -675,3 +827,91 @@ err:
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* err < 0, 0 if no metacopy xattr, 1 if metacopy xattr found */
|
||||
int ovl_check_metacopy_xattr(struct dentry *dentry)
|
||||
{
|
||||
int res;
|
||||
|
||||
/* Only regular files can have metacopy xattr */
|
||||
if (!S_ISREG(d_inode(dentry)->i_mode))
|
||||
return 0;
|
||||
|
||||
res = vfs_getxattr(dentry, OVL_XATTR_METACOPY, NULL, 0);
|
||||
if (res < 0) {
|
||||
if (res == -ENODATA || res == -EOPNOTSUPP)
|
||||
return 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return 1;
|
||||
out:
|
||||
pr_warn_ratelimited("overlayfs: failed to get metacopy (%i)\n", res);
|
||||
return res;
|
||||
}
|
||||
|
||||
bool ovl_is_metacopy_dentry(struct dentry *dentry)
|
||||
{
|
||||
struct ovl_entry *oe = dentry->d_fsdata;
|
||||
|
||||
if (!d_is_reg(dentry))
|
||||
return false;
|
||||
|
||||
if (ovl_dentry_upper(dentry)) {
|
||||
if (!ovl_has_upperdata(d_inode(dentry)))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
return (oe->numlower > 1);
|
||||
}
|
||||
|
||||
char *ovl_get_redirect_xattr(struct dentry *dentry, int padding)
|
||||
{
|
||||
int res;
|
||||
char *s, *next, *buf = NULL;
|
||||
|
||||
res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0);
|
||||
if (res < 0) {
|
||||
if (res == -ENODATA || res == -EOPNOTSUPP)
|
||||
return NULL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
buf = kzalloc(res + padding + 1, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (res == 0)
|
||||
goto invalid;
|
||||
|
||||
res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res);
|
||||
if (res < 0)
|
||||
goto fail;
|
||||
if (res == 0)
|
||||
goto invalid;
|
||||
|
||||
if (buf[0] == '/') {
|
||||
for (s = buf; *s++ == '/'; s = next) {
|
||||
next = strchrnul(s, '/');
|
||||
if (s == next)
|
||||
goto invalid;
|
||||
}
|
||||
} else {
|
||||
if (strchr(buf, '/') != NULL)
|
||||
goto invalid;
|
||||
}
|
||||
|
||||
return buf;
|
||||
|
||||
err_free:
|
||||
kfree(buf);
|
||||
return ERR_PTR(res);
|
||||
fail:
|
||||
pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res);
|
||||
goto err_free;
|
||||
invalid:
|
||||
pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf);
|
||||
res = -EINVAL;
|
||||
goto err_free;
|
||||
}
|
||||
|
@ -1964,6 +1964,44 @@ out_error:
|
||||
}
|
||||
EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
|
||||
|
||||
int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
|
||||
struct file *dst_file, loff_t dst_pos, u64 len)
|
||||
{
|
||||
s64 ret;
|
||||
|
||||
ret = mnt_want_write_file(dst_file);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = clone_verify_area(dst_file, dst_pos, len, true);
|
||||
if (ret < 0)
|
||||
goto out_drop_write;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
|
||||
goto out_drop_write;
|
||||
|
||||
ret = -EXDEV;
|
||||
if (src_file->f_path.mnt != dst_file->f_path.mnt)
|
||||
goto out_drop_write;
|
||||
|
||||
ret = -EISDIR;
|
||||
if (S_ISDIR(file_inode(dst_file)->i_mode))
|
||||
goto out_drop_write;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!dst_file->f_op->dedupe_file_range)
|
||||
goto out_drop_write;
|
||||
|
||||
ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
|
||||
dst_file, dst_pos, len);
|
||||
out_drop_write:
|
||||
mnt_drop_write_file(dst_file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(vfs_dedupe_file_range_one);
|
||||
|
||||
int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
|
||||
{
|
||||
struct file_dedupe_range_info *info;
|
||||
@ -1972,11 +2010,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
|
||||
u64 len;
|
||||
int i;
|
||||
int ret;
|
||||
bool is_admin = capable(CAP_SYS_ADMIN);
|
||||
u16 count = same->dest_count;
|
||||
struct file *dst_file;
|
||||
loff_t dst_off;
|
||||
ssize_t deduped;
|
||||
int deduped;
|
||||
|
||||
if (!(file->f_mode & FMODE_READ))
|
||||
return -EINVAL;
|
||||
@ -2003,6 +2038,9 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
|
||||
if (off + len > i_size_read(src))
|
||||
return -EINVAL;
|
||||
|
||||
/* Arbitrary 1G limit on a single dedupe request, can be raised. */
|
||||
len = min_t(u64, len, 1 << 30);
|
||||
|
||||
/* pre-format output fields to sane values */
|
||||
for (i = 0; i < count; i++) {
|
||||
same->info[i].bytes_deduped = 0ULL;
|
||||
@ -2010,54 +2048,28 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
|
||||
}
|
||||
|
||||
for (i = 0, info = same->info; i < count; i++, info++) {
|
||||
struct inode *dst;
|
||||
struct fd dst_fd = fdget(info->dest_fd);
|
||||
struct file *dst_file = dst_fd.file;
|
||||
|
||||
dst_file = dst_fd.file;
|
||||
if (!dst_file) {
|
||||
info->status = -EBADF;
|
||||
goto next_loop;
|
||||
}
|
||||
dst = file_inode(dst_file);
|
||||
|
||||
ret = mnt_want_write_file(dst_file);
|
||||
if (ret) {
|
||||
info->status = ret;
|
||||
goto next_fdput;
|
||||
}
|
||||
|
||||
dst_off = info->dest_offset;
|
||||
ret = clone_verify_area(dst_file, dst_off, len, true);
|
||||
if (ret < 0) {
|
||||
info->status = ret;
|
||||
goto next_file;
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
if (info->reserved) {
|
||||
info->status = -EINVAL;
|
||||
} else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
|
||||
info->status = -EINVAL;
|
||||
} else if (file->f_path.mnt != dst_file->f_path.mnt) {
|
||||
info->status = -EXDEV;
|
||||
} else if (S_ISDIR(dst->i_mode)) {
|
||||
info->status = -EISDIR;
|
||||
} else if (dst_file->f_op->dedupe_file_range == NULL) {
|
||||
info->status = -EINVAL;
|
||||
} else {
|
||||
deduped = dst_file->f_op->dedupe_file_range(file, off,
|
||||
len, dst_file,
|
||||
info->dest_offset);
|
||||
if (deduped == -EBADE)
|
||||
info->status = FILE_DEDUPE_RANGE_DIFFERS;
|
||||
else if (deduped < 0)
|
||||
info->status = deduped;
|
||||
else
|
||||
info->bytes_deduped += deduped;
|
||||
goto next_fdput;
|
||||
}
|
||||
|
||||
next_file:
|
||||
mnt_drop_write_file(dst_file);
|
||||
deduped = vfs_dedupe_file_range_one(file, off, dst_file,
|
||||
info->dest_offset, len);
|
||||
if (deduped == -EBADE)
|
||||
info->status = FILE_DEDUPE_RANGE_DIFFERS;
|
||||
else if (deduped < 0)
|
||||
info->status = deduped;
|
||||
else
|
||||
info->bytes_deduped = len;
|
||||
|
||||
next_fdput:
|
||||
fdput(dst_fd);
|
||||
next_loop:
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include "internal.h"
|
||||
|
||||
static const char *
|
||||
strcmp_prefix(const char *a, const char *a_prefix)
|
||||
@ -501,10 +500,10 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
|
||||
if (!f.file)
|
||||
return error;
|
||||
audit_file(f.file);
|
||||
error = mnt_want_write_file_path(f.file);
|
||||
error = mnt_want_write_file(f.file);
|
||||
if (!error) {
|
||||
error = setxattr(f.file->f_path.dentry, name, value, size, flags);
|
||||
mnt_drop_write_file_path(f.file);
|
||||
mnt_drop_write_file(f.file);
|
||||
}
|
||||
fdput(f);
|
||||
return error;
|
||||
@ -733,10 +732,10 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
|
||||
if (!f.file)
|
||||
return error;
|
||||
audit_file(f.file);
|
||||
error = mnt_want_write_file_path(f.file);
|
||||
error = mnt_want_write_file(f.file);
|
||||
if (!error) {
|
||||
error = removexattr(f.file->f_path.dentry, name);
|
||||
mnt_drop_write_file_path(f.file);
|
||||
mnt_drop_write_file(f.file);
|
||||
}
|
||||
fdput(f);
|
||||
return error;
|
||||
|
@ -931,31 +931,16 @@ xfs_file_clone_range(
|
||||
len, false);
|
||||
}
|
||||
|
||||
STATIC ssize_t
|
||||
STATIC int
|
||||
xfs_file_dedupe_range(
|
||||
struct file *src_file,
|
||||
u64 loff,
|
||||
u64 len,
|
||||
struct file *dst_file,
|
||||
u64 dst_loff)
|
||||
struct file *file_in,
|
||||
loff_t pos_in,
|
||||
struct file *file_out,
|
||||
loff_t pos_out,
|
||||
u64 len)
|
||||
{
|
||||
struct inode *srci = file_inode(src_file);
|
||||
u64 max_dedupe;
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Since we have to read all these pages in to compare them, cut
|
||||
* it off at MAX_RW_COUNT/2 rounded down to the nearest block.
|
||||
* That means we won't do more than MAX_RW_COUNT IO per request.
|
||||
*/
|
||||
max_dedupe = (MAX_RW_COUNT >> 1) & ~(i_blocksize(srci) - 1);
|
||||
if (len > max_dedupe)
|
||||
len = max_dedupe;
|
||||
error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
|
||||
return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
|
||||
len, true);
|
||||
if (error)
|
||||
return error;
|
||||
return len;
|
||||
}
|
||||
|
||||
STATIC int
|
||||
|
@ -145,8 +145,7 @@ struct dentry_operations {
|
||||
char *(*d_dname)(struct dentry *, char *, int);
|
||||
struct vfsmount *(*d_automount)(struct path *);
|
||||
int (*d_manage)(const struct path *, bool);
|
||||
struct dentry *(*d_real)(struct dentry *, const struct inode *,
|
||||
unsigned int, unsigned int);
|
||||
struct dentry *(*d_real)(struct dentry *, const struct inode *);
|
||||
} ____cacheline_aligned;
|
||||
|
||||
/*
|
||||
@ -561,15 +560,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
|
||||
return upper;
|
||||
}
|
||||
|
||||
/* d_real() flags */
|
||||
#define D_REAL_UPPER 0x2 /* return upper dentry or NULL if non-upper */
|
||||
|
||||
/**
|
||||
* d_real - Return the real dentry
|
||||
* @dentry: the dentry to query
|
||||
* @inode: inode to select the dentry from multiple layers (can be NULL)
|
||||
* @open_flags: open flags to control copy-up behavior
|
||||
* @flags: flags to control what is returned by this function
|
||||
*
|
||||
* If dentry is on a union/overlay, then return the underlying, real dentry.
|
||||
* Otherwise return the dentry itself.
|
||||
@ -577,11 +571,10 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper)
|
||||
* See also: Documentation/filesystems/vfs.txt
|
||||
*/
|
||||
static inline struct dentry *d_real(struct dentry *dentry,
|
||||
const struct inode *inode,
|
||||
unsigned int open_flags, unsigned int flags)
|
||||
const struct inode *inode)
|
||||
{
|
||||
if (unlikely(dentry->d_flags & DCACHE_OP_REAL))
|
||||
return dentry->d_op->d_real(dentry, inode, open_flags, flags);
|
||||
return dentry->d_op->d_real(dentry, inode);
|
||||
else
|
||||
return dentry;
|
||||
}
|
||||
@ -596,7 +589,7 @@ static inline struct dentry *d_real(struct dentry *dentry,
|
||||
static inline struct inode *d_real_inode(const struct dentry *dentry)
|
||||
{
|
||||
/* This usage of d_real() results in const dentry */
|
||||
return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0, 0));
|
||||
return d_backing_inode(d_real((struct dentry *) dentry, NULL));
|
||||
}
|
||||
|
||||
struct name_snapshot {
|
||||
|
@ -157,6 +157,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
|
||||
/* File is capable of returning -EAGAIN if I/O will block */
|
||||
#define FMODE_NOWAIT ((__force fmode_t)0x8000000)
|
||||
|
||||
/* File does not contribute to nr_files count */
|
||||
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
|
||||
|
||||
/*
|
||||
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
|
||||
* that indicates that they should check the contents of the iovec are
|
||||
@ -1067,17 +1070,7 @@ struct file_lock_context {
|
||||
|
||||
extern void send_sigio(struct fown_struct *fown, int fd, int band);
|
||||
|
||||
/*
|
||||
* Return the inode to use for locking
|
||||
*
|
||||
* For overlayfs this should be the overlay inode, not the real inode returned
|
||||
* by file_inode(). For any other fs file_inode(filp) and locks_inode(filp) are
|
||||
* equal.
|
||||
*/
|
||||
static inline struct inode *locks_inode(const struct file *f)
|
||||
{
|
||||
return f->f_path.dentry->d_inode;
|
||||
}
|
||||
#define locks_inode(f) file_inode(f)
|
||||
|
||||
#ifdef CONFIG_FILE_LOCKING
|
||||
extern int fcntl_getlk(struct file *, unsigned int, struct flock *);
|
||||
@ -1262,7 +1255,7 @@ static inline struct inode *file_inode(const struct file *f)
|
||||
|
||||
static inline struct dentry *file_dentry(const struct file *file)
|
||||
{
|
||||
return d_real(file->f_path.dentry, file_inode(file), 0, 0);
|
||||
return d_real(file->f_path.dentry, file_inode(file));
|
||||
}
|
||||
|
||||
static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
|
||||
@ -1318,7 +1311,6 @@ extern int send_sigurg(struct fown_struct *fown);
|
||||
|
||||
/* These sb flags are internal to the kernel */
|
||||
#define SB_SUBMOUNT (1<<26)
|
||||
#define SB_NOREMOTELOCK (1<<27)
|
||||
#define SB_NOSEC (1<<28)
|
||||
#define SB_BORN (1<<29)
|
||||
#define SB_ACTIVE (1<<30)
|
||||
@ -1647,6 +1639,8 @@ int vfs_mkobj(struct dentry *, umode_t,
|
||||
int (*f)(struct dentry *, umode_t, void *),
|
||||
void *);
|
||||
|
||||
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
|
||||
/*
|
||||
* VFS file helper functions.
|
||||
*/
|
||||
@ -1765,7 +1759,7 @@ struct file_operations {
|
||||
loff_t, size_t, unsigned int);
|
||||
int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
|
||||
u64);
|
||||
ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
|
||||
int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
|
||||
u64);
|
||||
} __randomize_layout;
|
||||
|
||||
@ -1838,6 +1832,10 @@ extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
|
||||
loff_t len, bool *is_same);
|
||||
extern int vfs_dedupe_file_range(struct file *file,
|
||||
struct file_dedupe_range *same);
|
||||
extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
|
||||
struct file *dst_file, loff_t dst_pos,
|
||||
u64 len);
|
||||
|
||||
|
||||
struct super_operations {
|
||||
struct inode *(*alloc_inode)(struct super_block *sb);
|
||||
@ -2096,6 +2094,7 @@ enum file_time_flags {
|
||||
S_VERSION = 8,
|
||||
};
|
||||
|
||||
extern bool atime_needs_update(const struct path *, struct inode *);
|
||||
extern void touch_atime(const struct path *);
|
||||
static inline void file_accessed(struct file *file)
|
||||
{
|
||||
@ -2441,6 +2440,8 @@ extern struct file *filp_open(const char *, int, umode_t);
|
||||
extern struct file *file_open_root(struct dentry *, struct vfsmount *,
|
||||
const char *, int, umode_t);
|
||||
extern struct file * dentry_open(const struct path *, int, const struct cred *);
|
||||
extern struct file * open_with_fake_path(const struct path *, int,
|
||||
struct inode*, const struct cred *);
|
||||
static inline struct file *file_clone_open(struct file *file)
|
||||
{
|
||||
return dentry_open(&file->f_path, file->f_flags, file->f_cred);
|
||||
|
@ -30,11 +30,7 @@ static inline int fsnotify_parent(const struct path *path, struct dentry *dentry
|
||||
static inline int fsnotify_perm(struct file *file, int mask)
|
||||
{
|
||||
const struct path *path = &file->f_path;
|
||||
/*
|
||||
* Do not use file_inode() here or anywhere in this file to get the
|
||||
* inode. That would break *notity on overlayfs.
|
||||
*/
|
||||
struct inode *inode = path->dentry->d_inode;
|
||||
struct inode *inode = file_inode(file);
|
||||
__u32 fsnotify_mask = 0;
|
||||
int ret;
|
||||
|
||||
@ -178,7 +174,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
|
||||
static inline void fsnotify_access(struct file *file)
|
||||
{
|
||||
const struct path *path = &file->f_path;
|
||||
struct inode *inode = path->dentry->d_inode;
|
||||
struct inode *inode = file_inode(file);
|
||||
__u32 mask = FS_ACCESS;
|
||||
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
@ -196,7 +192,7 @@ static inline void fsnotify_access(struct file *file)
|
||||
static inline void fsnotify_modify(struct file *file)
|
||||
{
|
||||
const struct path *path = &file->f_path;
|
||||
struct inode *inode = path->dentry->d_inode;
|
||||
struct inode *inode = file_inode(file);
|
||||
__u32 mask = FS_MODIFY;
|
||||
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
@ -214,7 +210,7 @@ static inline void fsnotify_modify(struct file *file)
|
||||
static inline void fsnotify_open(struct file *file)
|
||||
{
|
||||
const struct path *path = &file->f_path;
|
||||
struct inode *inode = path->dentry->d_inode;
|
||||
struct inode *inode = file_inode(file);
|
||||
__u32 mask = FS_OPEN;
|
||||
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
@ -230,7 +226,7 @@ static inline void fsnotify_open(struct file *file)
|
||||
static inline void fsnotify_close(struct file *file)
|
||||
{
|
||||
const struct path *path = &file->f_path;
|
||||
struct inode *inode = path->dentry->d_inode;
|
||||
struct inode *inode = file_inode(file);
|
||||
fmode_t mode = file->f_mode;
|
||||
__u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user