diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 60816e876455..4df0b320d524 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 64f068540d0d..706eb5c7e2ee 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -635,7 +635,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, if (dentry->d_inode) goto out_dput; - mode &= ~current->fs->umask; + mode &= ~current_umask(); if (flags & SPU_CREATE_GANG) ret = spufs_create_gang(nd->path.dentry->d_inode, diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index 34ab6d798f81..55ba6f142883 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -10,8 +10,6 @@ */ #include -#include -#include #include struct tty_audit_buf { diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index 7a84b406a952..f78f5b0127a8 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/Makefile b/fs/Makefile index 6e82a307bcd4..b5cd8e18dd9f 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ - stack.o + stack.o fs_struct.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 33b7235f853b..40381df34869 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -12,8 +12,6 @@ #include #include #include -#include -#include #include #include #include @@ -21,20 +19,15 @@ #include #include #include -#include -#include #include -#include #include #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -576,7 +569,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata; unsigned long elf_bss, elf_brk; - int elf_exec_fileno; int retval, i; unsigned int size; unsigned long elf_entry; @@ -631,12 +623,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto out_free_ph; } - retval = get_unused_fd(); - if (retval < 0) - goto out_free_ph; - get_file(bprm->file); - fd_install(elf_exec_fileno = retval, bprm->file); - elf_ppnt = elf_phdata; elf_bss = 0; elf_brk = 0; @@ -655,13 +641,13 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) retval = -ENOEXEC; if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2) - goto out_free_file; + goto out_free_ph; retval = -ENOMEM; elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL); if (!elf_interpreter) - goto out_free_file; + goto out_free_ph; retval = kernel_read(bprm->file, elf_ppnt->p_offset, elf_interpreter, @@ -956,8 +942,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) kfree(elf_phdata); - sys_close(elf_exec_fileno); - set_binfmt(&elf_format); #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES @@ -1028,8 +1012,6 @@ out_free_dentry: fput(interpreter); out_free_interp: kfree(elf_interpreter); -out_free_file: - sys_close(elf_exec_fileno); out_free_ph: kfree(elf_phdata); goto out; diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 08644a61616e..eff74b9c9e77 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -188,7 +188,6 @@ out: static int load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) { - int som_exec_fileno; int retval; unsigned int size; unsigned long som_entry; @@ -220,12 +219,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto out_free; } - retval = get_unused_fd(); - if (retval < 0) - goto out_free; - get_file(bprm->file); - fd_install(som_exec_fileno = retval, bprm->file); - /* Flush all traces of the currently running executable */ retval = flush_old_exec(bprm); if (retval) diff --git a/fs/block_dev.c b/fs/block_dev.c index 8c3c6899ccf3..f45dbc18dd17 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -204,6 +204,7 @@ int fsync_bdev(struct block_device *bdev) } return sync_blockdev(bdev); } +EXPORT_SYMBOL(fsync_bdev); /** * freeze_bdev -- lock a filesystem and force it into a consistent state diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 1d53b62dbba5..7fdd184a528d 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -256,7 +256,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if (IS_POSIXACL(dir) && acl) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bca729fc80c8..7594bec1be10 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -267,7 +267,7 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name, goto out_dput; if (!IS_POSIXACL(parent->dentry->d_inode)) - mode &= ~current->fs->umask; + mode &= ~current_umask(); error = mnt_want_write(parent->mnt); if (error) diff --git a/fs/buffer.c b/fs/buffer.c index 2963858f0f31..c2fa1be4923d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3315,7 +3315,6 @@ EXPORT_SYMBOL(cont_write_begin); EXPORT_SYMBOL(end_buffer_read_sync); EXPORT_SYMBOL(end_buffer_write_sync); EXPORT_SYMBOL(file_fsync); -EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(generic_block_bmap); EXPORT_SYMBOL(generic_cont_expand_simple); EXPORT_SYMBOL(init_buffer); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 2f35cccfcd8d..54dce78fbb73 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -254,7 +254,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, return -ENOMEM; } - mode &= ~current->fs->umask; + mode &= ~current_umask(); if (oplockEnabled) oplock = REQ_OPLOCK; @@ -479,7 +479,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = -ENOMEM; else if (pTcon->unix_ext) { struct cifs_unix_set_info_args args = { - .mode = mode & ~current->fs->umask, + .mode = mode & ~current_umask(), .ctime = NO_CHANGE_64, .atime = NO_CHANGE_64, .mtime = NO_CHANGE_64, diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a8797cc60805..f121a80fdd6f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1125,7 +1125,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) goto mkdir_out; } - mode &= ~current->fs->umask; + mode &= ~current_umask(); rc = CIFSPOSIXCreate(xid, pTcon, SMB_O_DIRECTORY | SMB_O_CREAT, mode, NULL /* netfid */, pInfo, &oplock, full_path, cifs_sb->local_nls, @@ -1204,7 +1204,7 @@ mkdir_get_info: if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) direntry->d_inode->i_nlink = 2; - mode &= ~current->fs->umask; + mode &= ~current_umask(); /* must turn on setgid bit if parent dir has it */ if (inode->i_mode & S_ISGID) mode |= S_ISGID; diff --git a/fs/compat.c b/fs/compat.c index 440a019256dd..1c859dae758f 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -1502,12 +1503,15 @@ int compat_do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + + retval = check_unsafe_exec(bprm); + if (retval) + goto out_unlock; file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_unlock; + goto out_unmark; sched_exec(); @@ -1549,6 +1553,9 @@ int compat_do_execve(char * filename, goto out; /* execve succeeded */ + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); acct_update_integrals(current); @@ -1567,6 +1574,11 @@ out_file: fput(bprm->file); } +out_unmark: + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); + out_unlock: current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); diff --git a/fs/dcache.c b/fs/dcache.c index 90bbd7e1b116..761d30be2683 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -32,6 +31,7 @@ #include #include #include +#include #include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; diff --git a/fs/exec.c b/fs/exec.c index c5128fbc9165..052a961e41aa 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -1056,28 +1057,35 @@ EXPORT_SYMBOL(install_exec_creds); * - the caller must hold current->cred_exec_mutex to protect against * PTRACE_ATTACH */ -void check_unsafe_exec(struct linux_binprm *bprm) +int check_unsafe_exec(struct linux_binprm *bprm) { struct task_struct *p = current, *t; unsigned long flags; - unsigned n_fs, n_sighand; + unsigned n_fs; + int res = 0; bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; - n_sighand = 1; + write_lock(&p->fs->lock); lock_task_sighand(p, &flags); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) n_fs++; - n_sighand++; } - if (atomic_read(&p->fs->count) > n_fs || - atomic_read(&p->sighand->count) > n_sighand) + if (p->fs->users > n_fs) { bprm->unsafe |= LSM_UNSAFE_SHARE; + } else { + if (p->fs->in_exec) + res = -EAGAIN; + p->fs->in_exec = 1; + } unlock_task_sighand(p, &flags); + write_unlock(&p->fs->lock); + + return res; } /* @@ -1296,12 +1304,15 @@ int do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + + retval = check_unsafe_exec(bprm); + if (retval) + goto out_unlock; file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_unlock; + goto out_unmark; sched_exec(); @@ -1344,6 +1355,9 @@ int do_execve(char * filename, goto out; /* execve succeeded */ + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); acct_update_integrals(current); @@ -1362,6 +1376,11 @@ out_file: fput(bprm->file); } +out_unmark: + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); + out_unlock: current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index ae8c4f850b27..d46e38cb85c5 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -318,7 +318,7 @@ ext2_init_acl(struct inode *inode, struct inode *dir) return PTR_ERR(acl); } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { struct posix_acl *clone; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index b60bb241880c..d81ef2fdb08e 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -323,7 +323,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) return PTR_ERR(acl); } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { struct posix_acl *clone; diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 694ed6fadcc8..647e0d65a284 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -323,7 +323,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) return PTR_ERR(acl); } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { struct posix_acl *clone; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 2cc952e4c3dc..296785a0dec8 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -934,7 +934,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->fs_uid = current_uid(); opts->fs_gid = current_gid(); - opts->fs_fmask = opts->fs_dmask = current->fs->umask; + opts->fs_fmask = current_umask(); opts->allow_utime = -1; opts->codepage = fat_default_codepage; opts->iocharset = fat_default_iocharset; diff --git a/fs/fs_struct.c b/fs/fs_struct.c new file mode 100644 index 000000000000..eee059052db5 --- /dev/null +++ b/fs/fs_struct.c @@ -0,0 +1,177 @@ +#include +#include +#include +#include +#include +#include + +/* + * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. + * It can block. + */ +void set_fs_root(struct fs_struct *fs, struct path *path) +{ + struct path old_root; + + write_lock(&fs->lock); + old_root = fs->root; + fs->root = *path; + path_get(path); + write_unlock(&fs->lock); + if (old_root.dentry) + path_put(&old_root); +} + +/* + * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. + * It can block. + */ +void set_fs_pwd(struct fs_struct *fs, struct path *path) +{ + struct path old_pwd; + + write_lock(&fs->lock); + old_pwd = fs->pwd; + fs->pwd = *path; + path_get(path); + write_unlock(&fs->lock); + + if (old_pwd.dentry) + path_put(&old_pwd); +} + +void chroot_fs_refs(struct path *old_root, struct path *new_root) +{ + struct task_struct *g, *p; + struct fs_struct *fs; + int count = 0; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_lock(p); + fs = p->fs; + if (fs) { + write_lock(&fs->lock); + if (fs->root.dentry == old_root->dentry + && fs->root.mnt == old_root->mnt) { + path_get(new_root); + fs->root = *new_root; + count++; + } + if (fs->pwd.dentry == old_root->dentry + && fs->pwd.mnt == old_root->mnt) { + path_get(new_root); + fs->pwd = *new_root; + count++; + } + write_unlock(&fs->lock); + } + task_unlock(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + while (count--) + path_put(old_root); +} + +void free_fs_struct(struct fs_struct *fs) +{ + path_put(&fs->root); + path_put(&fs->pwd); + kmem_cache_free(fs_cachep, fs); +} + +void exit_fs(struct task_struct *tsk) +{ + struct fs_struct *fs = tsk->fs; + + if (fs) { + int kill; + task_lock(tsk); + write_lock(&fs->lock); + tsk->fs = NULL; + kill = !--fs->users; + write_unlock(&fs->lock); + task_unlock(tsk); + if (kill) + free_fs_struct(fs); + } +} + +struct fs_struct *copy_fs_struct(struct fs_struct *old) +{ + struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); + /* We don't need to lock fs - think why ;-) */ + if (fs) { + fs->users = 1; + fs->in_exec = 0; + rwlock_init(&fs->lock); + fs->umask = old->umask; + read_lock(&old->lock); + fs->root = old->root; + path_get(&old->root); + fs->pwd = old->pwd; + path_get(&old->pwd); + read_unlock(&old->lock); + } + return fs; +} + +int unshare_fs_struct(void) +{ + struct fs_struct *fs = current->fs; + struct fs_struct *new_fs = copy_fs_struct(fs); + int kill; + + if (!new_fs) + return -ENOMEM; + + task_lock(current); + write_lock(&fs->lock); + kill = !--fs->users; + current->fs = new_fs; + write_unlock(&fs->lock); + task_unlock(current); + + if (kill) + free_fs_struct(fs); + + return 0; +} +EXPORT_SYMBOL_GPL(unshare_fs_struct); + +int current_umask(void) +{ + return current->fs->umask; +} +EXPORT_SYMBOL(current_umask); + +/* to be mentioned only in INIT_TASK */ +struct fs_struct init_fs = { + .users = 1, + .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .umask = 0022, +}; + +void daemonize_fs_struct(void) +{ + struct fs_struct *fs = current->fs; + + if (fs) { + int kill; + + task_lock(current); + + write_lock(&init_fs.lock); + init_fs.users++; + write_unlock(&init_fs.lock); + + write_lock(&fs->lock); + current->fs = &init_fs; + kill = !--fs->users; + write_unlock(&fs->lock); + + task_unlock(current); + if (kill) + free_fs_struct(fs); + } +} diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 995d63b2e747..e0b53aa7bbec 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -134,7 +134,7 @@ generic_acl_init(struct inode *inode, struct inode *dir, mode_t mode = inode->i_mode; int error; - inode->i_mode = mode & ~current->fs->umask; + inode->i_mode = mode & ~current_umask(); if (!S_ISLNK(inode->i_mode)) acl = ops->getacl(dir, ACL_TYPE_DEFAULT); if (acl) { diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 43764f4fa763..fa881bdc3d85 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -215,7 +215,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) if (error) return error; if (!acl) { - mode &= ~current->fs->umask; + mode &= ~current_umask(); if (mode != ip->i_inode.i_mode) error = munge_mode(ip, mode); return error; diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index bab7f8d1bdfa..3fcbb0e1f6fc 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -48,7 +48,7 @@ void hfsplus_fill_defaults(struct hfsplus_sb_info *opts) opts->creator = HFSPLUS_DEF_CR_TYPE; opts->type = HFSPLUS_DEF_CR_TYPE; - opts->umask = current->fs->umask; + opts->umask = current_umask(); opts->uid = current_uid(); opts->gid = current_gid(); opts->part = -1; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index c40f6e242444..fecf402d7b8a 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -480,7 +480,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) uid = current_uid(); gid = current_gid(); - umask = current->fs->umask; + umask = current_umask(); lowercase = 0; conv = CONV_BINARY; eas = 2; diff --git a/fs/internal.h b/fs/internal.h index 53af885f1732..b4dac4fb6b61 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -11,6 +11,7 @@ struct super_block; struct linux_binprm; +struct path; /* * block_dev.c @@ -43,7 +44,7 @@ extern void __init chrdev_init(void); /* * exec.c */ -extern void check_unsafe_exec(struct linux_binprm *); +extern int check_unsafe_exec(struct linux_binprm *); /* * namespace.c @@ -60,3 +61,8 @@ extern void umount_tree(struct vfsmount *, int, struct list_head *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void __init mnt_init(void); + +/* + * fs_struct.c + */ +extern void chroot_fs_refs(struct path *, struct path *); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index d98713777a1b..77ccf8cb0823 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -336,7 +336,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) return PTR_ERR(acl); if (!acl) { - *i_mode &= ~current->fs->umask; + *i_mode &= ~current_umask(); } else { if (S_ISDIR(*i_mode)) jffs2_iset_acl(inode, &f->i_acl_default, acl); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index a166c1669e82..06ca1b8d2054 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -182,7 +182,7 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) cleanup: posix_acl_release(acl); } else - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | inode->i_mode; diff --git a/fs/mpage.c b/fs/mpage.c index 16c3ef37eae3..680ba60863ff 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -82,7 +82,7 @@ static void mpage_end_io_write(struct bio *bio, int err) bio_put(bio); } -struct bio *mpage_bio_submit(int rw, struct bio *bio) +static struct bio *mpage_bio_submit(int rw, struct bio *bio) { bio->bi_end_io = mpage_end_io_read; if (rw == WRITE) @@ -90,7 +90,6 @@ struct bio *mpage_bio_submit(int rw, struct bio *bio) submit_bio(rw, bio); return NULL; } -EXPORT_SYMBOL(mpage_bio_submit); static struct bio * mpage_alloc(struct block_device *bdev, @@ -439,7 +438,14 @@ EXPORT_SYMBOL(mpage_readpage); * just allocate full-size (16-page) BIOs. */ -int __mpage_writepage(struct page *page, struct writeback_control *wbc, +struct mpage_data { + struct bio *bio; + sector_t last_block_in_bio; + get_block_t *get_block; + unsigned use_writepage; +}; + +static int __mpage_writepage(struct page *page, struct writeback_control *wbc, void *data) { struct mpage_data *mpd = data; @@ -648,7 +654,6 @@ out: mpd->bio = bio; return ret; } -EXPORT_SYMBOL(__mpage_writepage); /** * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them diff --git a/fs/namei.c b/fs/namei.c index d040ce11785d..b8433ebfae05 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) @@ -1578,7 +1579,7 @@ static int __open_namei_create(struct nameidata *nd, struct path *path, struct dentry *dir = nd->path.dentry; if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current->fs->umask; + mode &= ~current_umask(); error = security_path_mknod(&nd->path, path->dentry, mode, 0); if (error) goto out_unlock; @@ -1989,7 +1990,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, goto out_unlock; } if (!IS_POSIXACL(nd.path.dentry->d_inode)) - mode &= ~current->fs->umask; + mode &= ~current_umask(); error = may_mknod(mode); if (error) goto out_dput; @@ -2067,7 +2068,7 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) goto out_unlock; if (!IS_POSIXACL(nd.path.dentry->d_inode)) - mode &= ~current->fs->umask; + mode &= ~current_umask(); error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; @@ -2897,10 +2898,3 @@ EXPORT_SYMBOL(vfs_symlink); EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(dentry_unhash); EXPORT_SYMBOL(generic_readlink); - -/* to be mentioned only in INIT_TASK */ -struct fs_struct init_fs = { - .count = ATOMIC_INIT(1), - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), - .umask = 0022, -}; diff --git a/fs/namespace.c b/fs/namespace.c index 0a42e0e96027..c6f54e4c4290 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "pnode.h" @@ -2092,66 +2093,6 @@ out1: return retval; } -/* - * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. - * It can block. Requires the big lock held. - */ -void set_fs_root(struct fs_struct *fs, struct path *path) -{ - struct path old_root; - - write_lock(&fs->lock); - old_root = fs->root; - fs->root = *path; - path_get(path); - write_unlock(&fs->lock); - if (old_root.dentry) - path_put(&old_root); -} - -/* - * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. - * It can block. Requires the big lock held. - */ -void set_fs_pwd(struct fs_struct *fs, struct path *path) -{ - struct path old_pwd; - - write_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - write_unlock(&fs->lock); - - if (old_pwd.dentry) - path_put(&old_pwd); -} - -static void chroot_fs_refs(struct path *old_root, struct path *new_root) -{ - struct task_struct *g, *p; - struct fs_struct *fs; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - task_lock(p); - fs = p->fs; - if (fs) { - atomic_inc(&fs->count); - task_unlock(p); - if (fs->root.dentry == old_root->dentry - && fs->root.mnt == old_root->mnt) - set_fs_root(fs, new_root); - if (fs->pwd.dentry == old_root->dentry - && fs->pwd.mnt == old_root->mnt) - set_fs_pwd(fs, new_root); - put_fs_struct(fs); - } else - task_unlock(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); -} - /* * pivot_root Semantics: * Moves the root file system of the current process to the directory put_old, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index b82fe6847f14..d0cc5ce0edfe 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -328,7 +328,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, data->arg.create.verifier[1] = current->pid; } - sattr->ia_mode &= ~current->fs->umask; + sattr->ia_mode &= ~current_umask(); for (;;) { status = nfs3_do_create(dir, dentry, data); @@ -528,7 +528,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) dprintk("NFS call mkdir %s\n", dentry->d_name.name); - sattr->ia_mode &= ~current->fs->umask; + sattr->ia_mode &= ~current_umask(); data = nfs3_alloc_createdata(); if (data == NULL) @@ -639,7 +639,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); - sattr->ia_mode &= ~current->fs->umask; + sattr->ia_mode &= ~current_umask(); data = nfs3_alloc_createdata(); if (data == NULL) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 97bacccff579..a4d242680299 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1501,7 +1501,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) attr.ia_mode = nd->intent.open.create_mode; attr.ia_valid = ATTR_MODE; if (!IS_POSIXACL(dir)) - attr.ia_mode &= ~current->fs->umask; + attr.ia_mode &= ~current_umask(); } else { attr.ia_valid = 0; BUG_ON(nd->intent.open.flags & O_CREAT); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index bc3567bab8c4..7c09852be713 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -403,7 +403,6 @@ static int nfsd(void *vrqstp) { struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; - struct fs_struct *fsp; int err, preverr = 0; /* Lock module and set up kernel thread */ @@ -412,13 +411,11 @@ nfsd(void *vrqstp) /* At this point, the thread shares current->fs * with the init process. We need to create files with a * umask of 0 instead of init's umask. */ - fsp = copy_fs_struct(current->fs); - if (!fsp) { + if (unshare_fs_struct() < 0) { printk("Unable to start nfsd thread: out of memory\n"); goto out; } - exit_fs(current); - current->fs = fsp; + current->fs->umask = 0; /* diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 12dfb44c22e5..fbeaec762103 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -296,7 +296,7 @@ int ocfs2_init_acl(handle_t *handle, return PTR_ERR(acl); } if (!acl) - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { struct posix_acl *clone; diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index d79e808fd028..379ae5fb4411 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -426,7 +426,7 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) sbi->s_uid = current_uid(); sbi->s_gid = current_gid(); - sbi->s_dmask = sbi->s_fmask = current->fs->umask; + sbi->s_dmask = sbi->s_fmask = current_umask(); if (!parse_options((char *) data, sbi)) goto end; diff --git a/fs/open.c b/fs/open.c index 75b61677daaf..377eb25b6abf 100644 --- a/fs/open.c +++ b/fs/open.c @@ -29,6 +29,7 @@ #include #include #include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { diff --git a/fs/proc/base.c b/fs/proc/base.c index e0afd326b688..f71559784bfb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -80,6 +80,7 @@ #include #include #include +#include #include "internal.h" /* NOTE: diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 370be0a2c909..863464d5519c 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) else bytes += kobjsize(mm); - if (current->fs && atomic_read(¤t->fs->count) > 1) + if (current->fs && current->fs->users > 1) sbytes += kobjsize(current->fs); else bytes += kobjsize(current->fs); diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index d423416d93d1..c303c426fe2b 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -428,7 +428,7 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, } else { apply_umask: /* no ACL, apply umask */ - inode->i_mode &= ~current->fs->umask; + inode->i_mode &= ~current_umask(); } return err; diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 7aa53fefc67f..2940612e3aeb 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -227,7 +227,7 @@ xfs_vn_mknod( xfs_dentry_to_name(&name, dentry); if (IS_POSIXACL(dir) && !default_acl) - mode &= ~current->fs->umask; + mode &= ~current_umask(); switch (mode & S_IFMT) { case S_IFCHR: @@ -416,7 +416,7 @@ xfs_vn_symlink( mode_t mode; mode = S_IFLNK | - (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); + (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); xfs_dentry_to_name(&name, dentry); error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 3d7bcde2e332..7b73bb8f1970 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -332,22 +332,10 @@ extern int __set_page_dirty_buffers(struct page *page); static inline void buffer_init(void) {} static inline int try_to_free_buffers(struct page *page) { return 1; } -static inline int sync_blockdev(struct block_device *bdev) { return 0; } static inline int inode_has_buffers(struct inode *inode) { return 0; } static inline void invalidate_inode_buffers(struct inode *inode) {} static inline int remove_inode_buffers(struct inode *inode) { return 1; } static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; } -static inline void invalidate_bdev(struct block_device *bdev) {} - -static inline struct super_block *freeze_bdev(struct block_device *sb) -{ - return NULL; -} - -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) -{ - return 0; -} #endif /* CONFIG_BLOCK */ #endif /* _LINUX_BUFFER_HEAD_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 61211ad823fe..a09e17c8f5fd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1741,6 +1741,8 @@ extern void drop_collected_mounts(struct vfsmount *); extern int vfs_statfs(struct dentry *, struct kstatfs *); +extern int current_umask(void); + /* /sys/fs */ extern struct kobject *fs_kobj; @@ -1885,6 +1887,18 @@ extern int fsync_super(struct super_block *); extern int fsync_no_super(struct block_device *); #else static inline void bd_forget(struct inode *inode) {} +static inline int sync_blockdev(struct block_device *bdev) { return 0; } +static inline void invalidate_bdev(struct block_device *bdev) {} + +static inline struct super_block *freeze_bdev(struct block_device *sb) +{ + return NULL; +} + +static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + return 0; +} #endif extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 18b467dbe278..78a05bfcd8eb 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -4,12 +4,10 @@ #include struct fs_struct { - atomic_t count; /* This usage count is used by check_unsafe_exec() for - * security checking purposes - therefore it may not be - * incremented, except by clone(CLONE_FS). - */ + int users; rwlock_t lock; int umask; + int in_exec; struct path root, pwd; }; @@ -19,6 +17,8 @@ extern void exit_fs(struct task_struct *); extern void set_fs_root(struct fs_struct *, struct path *); extern void set_fs_pwd(struct fs_struct *, struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); -extern void put_fs_struct(struct fs_struct *); +extern void free_fs_struct(struct fs_struct *); +extern void daemonize_fs_struct(void); +extern int unshare_fs_struct(void); #endif /* _LINUX_FS_STRUCT_H */ diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 830bbcd449d6..3a059298cc19 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -22,6 +22,8 @@ struct proc_mounts { int event; }; +struct fs_struct; + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void __put_mnt_ns(struct mnt_namespace *ns); diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 5c42821da2d1..068a0c9946af 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -11,21 +11,11 @@ */ #ifdef CONFIG_BLOCK -struct mpage_data { - struct bio *bio; - sector_t last_block_in_bio; - get_block_t *get_block; - unsigned use_writepage; -}; - struct writeback_control; -struct bio *mpage_bio_submit(int rw, struct bio *bio); int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block); int mpage_readpage(struct page *page, get_block_t get_block); -int __mpage_writepage(struct page *page, struct writeback_control *wbc, - void *data); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index afad7dec1b36..7b370c7cfeff 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -8,6 +8,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; +struct fs_struct; /* * A structure to contain pointers to all per-process diff --git a/include/linux/sched.h b/include/linux/sched.h index 206ac003e8c0..9da5aa0771ef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -68,7 +68,7 @@ struct sched_param { #include #include #include -#include +#include #include #include #include @@ -97,6 +97,7 @@ struct futex_pi_state; struct robust_list_head; struct bio; struct bts_tracer; +struct fs_struct; /* * List of flags we want to share for kernel threads, diff --git a/init/do_mounts.c b/init/do_mounts.c index 8d4ff5afc1d8..dd7ee5f203f3 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/ipc/mqueue.c b/ipc/mqueue.c index a8ddadbc7459..916785363f0f 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -602,7 +602,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry, dentry->d_fsdata = attr; } - mode &= ~current->fs->umask; + mode &= ~current_umask(); ret = mnt_want_write(mqueue_mnt); if (ret) goto out; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 8cbddff6c283..2bfc64786765 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "audit.h" diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 667c841c2952..c35452cadded 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -18,6 +18,7 @@ #include #include #include +#include static void default_handler(int, struct pt_regs *); @@ -145,28 +146,6 @@ __set_personality(u_long personality) return 0; } - if (atomic_read(¤t->fs->count) != 1) { - struct fs_struct *fsp, *ofsp; - - fsp = copy_fs_struct(current->fs); - if (fsp == NULL) { - module_put(ep->module); - return -ENOMEM; - } - - task_lock(current); - ofsp = current->fs; - current->fs = fsp; - task_unlock(current); - - put_fs_struct(ofsp); - } - - /* - * At that point we are guaranteed to be the sole owner of - * current->fs. - */ - current->personality = personality; oep = current_thread_info()->exec_domain; current_thread_info()->exec_domain = ep; diff --git a/kernel/exit.c b/kernel/exit.c index 3bec141c82f6..6686ed1e4aa3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -420,7 +421,6 @@ EXPORT_SYMBOL(disallow_signal); void daemonize(const char *name, ...) { va_list args; - struct fs_struct *fs; sigset_t blocked; va_start(args, name); @@ -453,11 +453,7 @@ void daemonize(const char *name, ...) /* Become as one with the init task */ - exit_fs(current); /* current->fs->count--; */ - fs = init_task.fs; - current->fs = fs; - atomic_inc(&fs->count); - + daemonize_fs_struct(); exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); @@ -556,30 +552,6 @@ void exit_files(struct task_struct *tsk) } } -void put_fs_struct(struct fs_struct *fs) -{ - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { - path_put(&fs->root); - path_put(&fs->pwd); - kmem_cache_free(fs_cachep, fs); - } -} - -void exit_fs(struct task_struct *tsk) -{ - struct fs_struct * fs = tsk->fs; - - if (fs) { - task_lock(tsk); - tsk->fs = NULL; - task_unlock(tsk); - put_fs_struct(fs); - } -} - -EXPORT_SYMBOL_GPL(exit_fs); - #ifdef CONFIG_MM_OWNER /* * Task p is exiting and it owned mm, lets find a new owner for it diff --git a/kernel/fork.c b/kernel/fork.c index f74458231449..660c2b8765bc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -681,38 +682,21 @@ fail_nomem: return retval; } -static struct fs_struct *__copy_fs_struct(struct fs_struct *old) -{ - struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); - /* We don't need to lock fs - think why ;-) */ - if (fs) { - atomic_set(&fs->count, 1); - rwlock_init(&fs->lock); - fs->umask = old->umask; - read_lock(&old->lock); - fs->root = old->root; - path_get(&old->root); - fs->pwd = old->pwd; - path_get(&old->pwd); - read_unlock(&old->lock); - } - return fs; -} - -struct fs_struct *copy_fs_struct(struct fs_struct *old) -{ - return __copy_fs_struct(old); -} - -EXPORT_SYMBOL_GPL(copy_fs_struct); - static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) { + struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { - atomic_inc(¤t->fs->count); + /* tsk->fs is already what we want */ + write_lock(&fs->lock); + if (fs->in_exec) { + write_unlock(&fs->lock); + return -EAGAIN; + } + fs->users++; + write_unlock(&fs->lock); return 0; } - tsk->fs = __copy_fs_struct(current->fs); + tsk->fs = copy_fs_struct(fs); if (!tsk->fs) return -ENOMEM; return 0; @@ -1544,12 +1528,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) { struct fs_struct *fs = current->fs; - if ((unshare_flags & CLONE_FS) && - (fs && atomic_read(&fs->count) > 1)) { - *new_fsp = __copy_fs_struct(current->fs); - if (!*new_fsp) - return -ENOMEM; - } + if (!(unshare_flags & CLONE_FS) || !fs) + return 0; + + /* don't need lock here; in the worst case we'll do useless copy */ + if (fs->users == 1) + return 0; + + *new_fsp = copy_fs_struct(fs); + if (!*new_fsp) + return -ENOMEM; return 0; } @@ -1665,8 +1653,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (new_fs) { fs = current->fs; + write_lock(&fs->lock); current->fs = new_fs; - new_fs = fs; + if (--fs->users) + new_fs = NULL; + else + new_fs = fs; + write_unlock(&fs->lock); } if (new_mm) { @@ -1705,7 +1698,7 @@ bad_unshare_cleanup_sigh: bad_unshare_cleanup_fs: if (new_fs) - put_fs_struct(new_fs); + free_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: diff --git a/kernel/sys.c b/kernel/sys.c index 742cefa527e6..51dbb55604e8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index baac91049b0e..9dcc6e7f96ec 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -832,7 +832,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * All right, let's create it. */ mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current->fs->umask); + (SOCK_INODE(sock)->i_mode & ~current_umask()); err = mnt_want_write(nd.path.mnt); if (err) goto out_mknod_dput; diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index d47f16b844b2..3bbe01a7a4b5 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "common.h" #include "realpath.h"