LWN.net Logo

[PATCH] allowing user mounts (updated for 2.6.5)

From:  Miklos Szeredi <mszeredi@inf.bme.hu>
To:  linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject:  [RFC] [PATCH] allowing user mounts (updated for 2.6.5)
Date:  Mon, 5 Apr 2004 12:38:28 +0200 (MET DST)

This patch allows the use of the mount syscall by non-root users in a
controlled, and secure way.  For a detailed explanation on what this
patch does see the earlier relase notes:

  http://marc.theaimsgroup.com/?l=linux-kernel&m=107701231914499&w=2

Comments, suggestions are welcome.

Miklos

diff -rup linux-2.6.5.orig/fs/filesystems.c linux-2.6.5/fs/filesystems.c
--- linux-2.6.5.orig/fs/filesystems.c	2003-12-18 03:59:18.000000000 +0100
+++ linux-2.6.5/fs/filesystems.c	2004-04-05 10:54:37.000000000 +0200
@@ -222,7 +222,8 @@ struct file_system_type *get_fs_type(con
 	if (fs && !try_module_get(fs->owner))
 		fs = NULL;
 	read_unlock(&file_systems_lock);
-	if (!fs && (request_module("%s", name) == 0)) {
+	if (!fs && capable(CAP_SYS_ADMIN) && 
+	    (request_module("%s", name) == 0)) {
 		read_lock(&file_systems_lock);
 		fs = *(find_filesystem(name));
 		if (fs && !try_module_get(fs->owner))
diff -rup linux-2.6.5.orig/fs/namespace.c linux-2.6.5/fs/namespace.c
--- linux-2.6.5.orig/fs/namespace.c	2004-04-05 10:50:46.000000000 +0200
+++ linux-2.6.5/fs/namespace.c	2004-04-05 10:54:37.000000000 +0200
@@ -25,13 +25,16 @@
 
 extern int __init init_rootfs(void);
 extern int __init sysfs_init(void);
+extern void put_filesystem(struct file_system_type *fs);
+
+#define MAX_MOUNTS 256
 
 /* spinlock for vfsmount related operations, inplace of dcache_lock */
 spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 static struct list_head *mount_hashtable;
 static int hash_mask, hash_bits;
 static kmem_cache_t *mnt_cache; 
-
+struct mounts_stat_struct mounts_stat = { .max_mounts = MAX_MOUNTS };
 static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
 {
 	unsigned long tmp = ((unsigned long) mnt / L1_CACHE_BYTES);
@@ -40,10 +43,38 @@ static inline unsigned long hash(struct 
 	return tmp & hash_mask;
 }
 
+static inline int inc_nr_mounts(void)
+{
+	int err = 0;
+	spin_lock(&vfsmount_lock);
+	if (capable(CAP_SYS_ADMIN) ||
+	    mounts_stat.nr_mounts < mounts_stat.max_mounts)
+		mounts_stat.nr_mounts++;
+	else
+		err = mounts_stat.max_mounts ? -EMFILE : -EPERM;
+	spin_unlock(&vfsmount_lock);
+	return err;
+}
+
+static inline void dec_nr_mounts(void)
+{
+	spin_lock(&vfsmount_lock);
+	mounts_stat.nr_mounts--;
+	spin_unlock(&vfsmount_lock);		
+}
+
 struct vfsmount *alloc_vfsmnt(const char *name)
 {
-	struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 
-	if (mnt) {
+	struct vfsmount *mnt;
+	int err = inc_nr_mounts();
+	if (err)
+		return ERR_PTR(err);
+
+	mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL); 
+	if (!mnt) {
+		dec_nr_mounts();
+		return ERR_PTR(-ENOMEM);
+	} else {
 		memset(mnt, 0, sizeof(struct vfsmount));
 		atomic_set(&mnt->mnt_count,1);
 		INIT_LIST_HEAD(&mnt->mnt_hash);
@@ -66,6 +97,7 @@ void free_vfsmnt(struct vfsmount *mnt)
 {
 	kfree(mnt->mnt_devname);
 	kmem_cache_free(mnt_cache, mnt);
+	dec_nr_mounts();
 }
 
 /*
@@ -147,13 +179,14 @@ clone_mnt(struct vfsmount *old, struct d
 	struct super_block *sb = old->mnt_sb;
 	struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
 
-	if (mnt) {
+	if (!IS_ERR(mnt)) {
 		mnt->mnt_flags = old->mnt_flags;
 		atomic_inc(&sb->s_active);
 		mnt->mnt_sb = sb;
 		mnt->mnt_root = dget(root);
 		mnt->mnt_mountpoint = mnt->mnt_root;
 		mnt->mnt_parent = mnt;
+		mnt->user = capable(CAP_SYS_ADMIN) ? 0 : current->fsuid;
 	}
 	return mnt;
 }
@@ -238,6 +271,8 @@ static int show_vfsmnt(struct seq_file *
 		if (mnt->mnt_flags & fs_infop->flag)
 			seq_puts(m, fs_infop->str);
 	}
+	if (mnt->user)
+		seq_printf(m, ",user=%i", mnt->user);
 	if (mnt->mnt_sb->s_op->show_options)
 		err = mnt->mnt_sb->s_op->show_options(m, mnt);
 	seq_puts(m, " 0 0\n");
@@ -388,8 +423,10 @@ asmlinkage long sys_umount(char __user *
 		goto dput_and_out;
 
 	retval = -EPERM;
-	if (!capable(CAP_SYS_ADMIN))
-		goto dput_and_out;
+	if (!capable(CAP_SYS_ADMIN)) {
+		if(nd.mnt->user != current->fsuid || (flags & MNT_FORCE))
+			goto dput_and_out;
+	}
 
 	retval = do_umount(nd.mnt, flags);
 dput_and_out:
@@ -409,20 +446,15 @@ asmlinkage long sys_oldumount(char __use
 
 static int mount_is_safe(struct nameidata *nd)
 {
-	if (capable(CAP_SYS_ADMIN))
-		return 0;
-	return -EPERM;
-#ifdef notyet
-	if (S_ISLNK(nd->dentry->d_inode->i_mode))
-		return -EPERM;
-	if (nd->dentry->d_inode->i_mode & S_ISVTX) {
-		if (current->uid != nd->dentry->d_inode->i_uid)
+	if (!capable(CAP_SYS_ADMIN)) {
+		if (!S_ISDIR(nd->dentry->d_inode->i_mode) &&
+		    !S_ISREG(nd->dentry->d_inode->i_mode))
+			return -EPERM;
+		if (current->fsuid != nd->dentry->d_inode->i_uid ||
+		    permission(nd->dentry->d_inode, MAY_WRITE, nd))
 			return -EPERM;
 	}
-	if (permission(nd->dentry->d_inode, MAY_WRITE, nd))
-		return -EPERM;
 	return 0;
-#endif
 }
 
 static int
@@ -444,8 +476,8 @@ static struct vfsmount *copy_tree(struct
 	struct nameidata nd;
 
 	res = q = clone_mnt(mnt, dentry);
-	if (!q)
-		goto Enomem;
+	if (IS_ERR(q))
+		goto out_error;
 	q->mnt_mountpoint = mnt->mnt_mountpoint;
 
 	p = mnt;
@@ -463,8 +495,8 @@ static struct vfsmount *copy_tree(struct
 			nd.mnt = q;
 			nd.dentry = p->mnt_mountpoint;
 			q = clone_mnt(p, p->mnt_root);
-			if (!q)
-				goto Enomem;
+			if (IS_ERR(q))
+				goto out_error;
 			spin_lock(&vfsmount_lock);
 			list_add_tail(&q->mnt_list, &res->mnt_list);
 			attach_mnt(q, &nd);
@@ -472,13 +504,13 @@ static struct vfsmount *copy_tree(struct
 		}
 	}
 	return res;
- Enomem:
+ out_error:
 	if (res) {
 		spin_lock(&vfsmount_lock);
 		umount_tree(res);
 		spin_unlock(&vfsmount_lock);
 	}
-	return NULL;
+	return q;
 }
 
 static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
@@ -538,11 +570,14 @@ static int do_loopback(struct nameidata 
 	down_write(&current->namespace->sem);
 	err = -EINVAL;
 	if (check_mnt(nd->mnt) && (!recurse || check_mnt(old_nd.mnt))) {
-		err = -ENOMEM;
 		if (recurse)
 			mnt = copy_tree(old_nd.mnt, old_nd.dentry);
 		else
 			mnt = clone_mnt(old_nd.mnt, old_nd.dentry);
+		if (IS_ERR(mnt)) {
+			err = PTR_ERR(mnt);
+			goto out;
+		}
 	}
 
 	if (mnt) {
@@ -555,6 +590,7 @@ static int do_loopback(struct nameidata 
 			mntput(mnt);
 	}
 
+ out:
 	up_write(&current->namespace->sem);
 	path_release(&old_nd);
 	return err;
@@ -654,14 +690,28 @@ static int do_add_mount(struct nameidata
 			int mnt_flags, char *name, void *data)
 {
 	struct vfsmount *mnt;
-	int err;
+	int err = mount_is_safe(nd);
+	if(err)
+		return err;
 
 	if (!type || !memchr(type, 0, PAGE_SIZE))
 		return -EINVAL;
 
 	/* we need capabilities... */
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
+	if (!capable(CAP_SYS_ADMIN)) {
+		/* but allow "safe" filesystems anyway */
+		int issafe = 0;
+		struct file_system_type *t = get_fs_type(type);
+		if(t) {
+			issafe = t->fs_flags & FS_SAFE;
+			put_filesystem(t);
+		}
+		if(!issafe)
+			return -EPERM;
+
+		/* users should not have suid or dev files */
+		mnt_flags |= (MNT_NOSUID | MNT_NODEV);
+	}
 
 	mnt = do_kern_mount(type, flags, name, data);
 	err = PTR_ERR(mnt);
@@ -801,6 +851,7 @@ int copy_namespace(int flags, struct tas
 	struct namespace *new_ns;
 	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
 	struct fs_struct *fs = tsk->fs;
+	int err;
 
 	if (!namespace)
 		return 0;
@@ -810,11 +861,7 @@ int copy_namespace(int flags, struct tas
 	if (!(flags & CLONE_NEWNS))
 		return 0;
 
-	if (!capable(CAP_SYS_ADMIN)) {
-		put_namespace(namespace);
-		return -EPERM;
-	}
-
+	err = -ENOMEM;
 	new_ns = kmalloc(sizeof(struct namespace), GFP_KERNEL);
 	if (!new_ns)
 		goto out;
@@ -826,7 +873,8 @@ int copy_namespace(int flags, struct tas
 	down_write(&tsk->namespace->sem);
 	/* First pass: copy the tree topology */
 	new_ns->root = copy_tree(namespace->root, namespace->root->mnt_root);
-	if (!new_ns->root) {
+	if (IS_ERR(new_ns->root)) {
+		err = PTR_ERR(new_ns->root);
 		up_write(&tsk->namespace->sem);
 		kfree(new_ns);
 		goto out;
@@ -876,7 +924,7 @@ int copy_namespace(int flags, struct tas
 
 out:
 	put_namespace(namespace);
-	return -ENOMEM;
+	return err;
 }
 
 asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
diff -rup linux-2.6.5.orig/fs/super.c linux-2.6.5/fs/super.c
--- linux-2.6.5.orig/fs/super.c	2004-04-05 10:50:46.000000000 +0200
+++ linux-2.6.5/fs/super.c	2004-04-05 11:02:10.000000000 +0200
@@ -726,7 +726,7 @@ struct vfsmount *
 do_kern_mount(const char *fstype, int flags, const char *name, void *data)
 {
 	struct file_system_type *type = get_fs_type(fstype);
-	struct super_block *sb = ERR_PTR(-ENOMEM);
+	struct super_block *sb;
 	struct vfsmount *mnt;
 	int error;
 	char *secdata = NULL;
@@ -735,24 +735,23 @@ do_kern_mount(const char *fstype, int fl
 		return ERR_PTR(-ENODEV);
 
 	mnt = alloc_vfsmnt(name);
-	if (!mnt)
+	error = PTR_ERR(mnt);
+	if (IS_ERR(mnt))
 		goto out;
 
 	if (data) {
 		secdata = alloc_secdata();
-		if (!secdata) {
-			sb = ERR_PTR(-ENOMEM);
+		error = -ENOMEM;
+		if (!secdata)
 			goto out_mnt;
-		}
 
 		error = security_sb_copy_data(type, data, secdata);
-		if (error) {
-			sb = ERR_PTR(error);
+		if (error)
 			goto out_free_secdata;
-		}
 	}
 
 	sb = type->get_sb(type, flags, name, data);
+	error = PTR_ERR(sb);
 	if (IS_ERR(sb))
 		goto out_free_secdata;
  	error = security_sb_kern_mount(sb, secdata);
@@ -762,20 +761,20 @@ do_kern_mount(const char *fstype, int fl
 	mnt->mnt_root = dget(sb->s_root);
 	mnt->mnt_mountpoint = sb->s_root;
 	mnt->mnt_parent = mnt;
+	mnt->user = capable(CAP_SYS_ADMIN) ? 0 : current->fsuid;
 	up_write(&sb->s_umount);
 	put_filesystem(type);
 	return mnt;
 out_sb:
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
-	sb = ERR_PTR(error);
 out_free_secdata:
 	free_secdata(secdata);
 out_mnt:
 	free_vfsmnt(mnt);
 out:
 	put_filesystem(type);
-	return (struct vfsmount *)sb;
+	return ERR_PTR(error);
 }
 
 struct vfsmount *kern_mount(struct file_system_type *type)
diff -rup linux-2.6.5.orig/include/linux/fs.h linux-2.6.5/include/linux/fs.h
--- linux-2.6.5.orig/include/linux/fs.h	2004-04-05 10:50:50.000000000 +0200
+++ linux-2.6.5/include/linux/fs.h	2004-04-05 11:03:55.000000000 +0200
@@ -55,6 +55,12 @@ struct files_stat_struct {
 };
 extern struct files_stat_struct files_stat;
 
+struct mounts_stat_struct {
+	int nr_mounts;
+	int max_mounts;
+};
+extern struct mounts_stat_struct mounts_stat;
+
 struct inodes_stat_t {
 	int nr_inodes;
 	int nr_unused;
@@ -90,6 +96,7 @@ extern int leases_enable, dir_notify_ena
 /* public flags for file_system_type */
 #define FS_REQUIRES_DEV 1 
 #define FS_BINARY_MOUNTDATA 2
+#define FS_SAFE		4	/* Safe to mount by user */
 #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
 #define FS_ODD_RENAME	32768	/* Temporary stuff; will go away as soon
 				  * as nfs_rename() will be cleaned up
diff -rup linux-2.6.5.orig/include/linux/mount.h linux-2.6.5/include/linux/mount.h
--- linux-2.6.5.orig/include/linux/mount.h	2003-12-18 03:58:08.000000000 +0100
+++ linux-2.6.5/include/linux/mount.h	2004-04-05 10:54:37.000000000 +0200
@@ -30,6 +30,7 @@ struct vfsmount
 	atomic_t mnt_count;
 	int mnt_flags;
 	char *mnt_devname;		/* Name of device e.g. /dev/dsk/hda1 */
+	uid_t user;
 	struct list_head mnt_list;
 };
 
diff -rup linux-2.6.5.orig/include/linux/sysctl.h linux-2.6.5/include/linux/sysctl.h
--- linux-2.6.5.orig/include/linux/sysctl.h	2004-04-05 10:50:50.000000000 +0200
+++ linux-2.6.5/include/linux/sysctl.h	2004-04-05 10:54:37.000000000 +0200
@@ -624,8 +624,8 @@ enum
 	FS_NRFILE=6,	/* int:current number of allocated filedescriptors */
 	FS_MAXFILE=7,	/* int:maximum number of filedescriptors that can be allocated */
 	FS_DENTRY=8,
-	FS_NRSUPER=9,	/* int:current number of allocated super_blocks */
-	FS_MAXSUPER=10,	/* int:maximum number of super_blocks that can be allocated */
+	FS_NRMOUNT=9,	/* int:current number of mounts */
+	FS_MAXMOUNT=10,	/* int:maximum number of mounts allowed */
 	FS_OVERFLOWUID=11,	/* int: overflow UID */
 	FS_OVERFLOWGID=12,	/* int: overflow GID */
 	FS_LEASES=13,	/* int: leases enabled */
diff -rup linux-2.6.5.orig/kernel/sysctl.c linux-2.6.5/kernel/sysctl.c
--- linux-2.6.5.orig/kernel/sysctl.c	2004-04-05 10:50:51.000000000 +0200
+++ linux-2.6.5/kernel/sysctl.c	2004-04-05 10:54:37.000000000 +0200
@@ -793,6 +793,22 @@ static ctl_table fs_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 	{
+		.ctl_name	= FS_NRMOUNT,
+		.procname	= "mount-nr",
+		.data		= &mounts_stat.nr_mounts,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
+		.ctl_name	= FS_MAXMOUNT,
+		.procname	= "mount-max",
+		.data		= &mounts_stat.max_mounts,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+	{
 		.ctl_name	= FS_OVERFLOWUID,
 		.procname	= "overflowuid",
 		.data		= &fs_overflowuid,

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Copyright © 2004, Eklektix, Inc.
Comments and public postings are copyrighted by their creators.
Linux is a registered trademark of Linus Torvalds