User: Password:
|
|
Subscribe / Log in / New account

ioctl rework #2

From:  Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
To:  bk-commits-head@vger.kernel.org
Subject:  [PATCH] ioctl rework #2
Date:  Sat, 15 Jan 2005 23:38:01 +0000
Archive-link:  Article, Thread

ChangeSet 1.2423, 2005/01/15 15:38:01-08:00, hch@infradead.org

	[PATCH] ioctl rework #2
	
	- add ->unlocked_ioctl method and a do_ioctl wrapper in ioctl.c so all
	  places calling ->ioctl get it.  THis provides us a patch to migrate away
	  from holding bkl across ioctl implementations.
	
	- add ->compat_ioctl method and call it in compat_sys_ioctl before doing
	  the hash lookup for registered handlers.
	
	- streamline compat_sys_ioctl and move the complex error reporting into a
	  function of its own
	
	From: "Michael S. Tsirkin" <mst@mellanox.co.il>
	
	Handle generic ioctl commands by falling back on static conversion
	functions in fs/compat_ioctl.c on -ENOIOCTLCMD code.
	
	From: "Michael S. Tsirkin" <mst@mellanox.co.il>
	
	With new unlocked_ioctl and ioctl_compat, ioctls can now be as fast as
	read/write.  So lets use fget_light/fput_light there, to get some speedup
	in common case on SMP.
	
	Signed-off-by: Michael s. Tsirkin <mst@mellanox.co.il>
	Signed-off-by: Andrew Morton <akpm@osdl.org>
	Signed-off-by: Linus Torvalds <torvalds@osdl.org>



 Documentation/filesystems/Locking |    7 ++
 fs/compat.c                       |  127
++++++++++++++++++++------------------
 fs/ioctl.c                        |   64 +++++++++++++------
 include/linux/fs.h                |    6 +
 4 files changed, 126 insertions(+), 78 deletions(-)


diff -Nru a/Documentation/filesystems/Locking
b/Documentation/filesystems/Locking
--- a/Documentation/filesystems/Locking	2005-01-15 16:32:43 -08:00
+++ b/Documentation/filesystems/Locking	2005-01-15 16:32:43 -08:00
@@ -350,6 +350,8 @@
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	int (*ioctl) (struct inode *, struct file *, unsigned int,
 			unsigned long);
+	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);
@@ -383,6 +385,8 @@
 readdir: 		no
 poll:			no
 ioctl:			yes	(see below)
+unlocked_ioctl:		no	(see below)
+compat_ioctl:		no
 mmap:			no
 open:			maybe	(see below)
 flush:			no
@@ -427,6 +431,9 @@
 ->ioctl() or kill the latter completely. One of the problems is that for
 anything that resembles union-mount we won't have a struct file for all
 components. And there are other reasons why the current interface is a
mess...
+
+->ioctl() on regular files is superceded by the ->unlocked_ioctl() that
+doesn't take the BKL.
 
 ->read on directories probably must go away - we should just enforce -EISDIR
 in sys_read() and friends.
diff -Nru a/fs/compat.c b/fs/compat.c
--- a/fs/compat.c	2005-01-15 16:32:43 -08:00
+++ b/fs/compat.c	2005-01-15 16:32:43 -08:00
@@ -397,77 +397,90 @@
 }
 EXPORT_SYMBOL(unregister_ioctl32_conversion); 
 
+static void compat_ioctl_error(struct file *filp, unsigned int fd,
+		unsigned int cmd, unsigned long arg)
+{
+	char buf[10];
+	char *fn = "?";
+	char *path;
+
+	/* find the name of the device. */
+	path = (char *)__get_free_page(GFP_KERNEL);
+	if (path) {
+		fn = d_path(filp->f_dentry, filp->f_vfsmnt, path, PAGE_SIZE);
+		if (IS_ERR(fn))
+			fn = "?";
+	}
+
+	sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
+	if (!isprint(buf[1]))
+		sprintf(buf, "%02x", buf[1]);
+	printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
+			"cmd(%08x){%s} arg(%08x) on %s\n",
+			current->comm, current->pid,
+			(int)fd, (unsigned int)cmd, buf,
+			(unsigned int)arg, fn);
+
+	if (path)
+		free_page((unsigned long)path);
+}
+
 asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
 				unsigned long arg)
 {
-	struct file * filp;
+	struct file *filp;
 	int error = -EBADF;
 	struct ioctl_trans *t;
+	int fput_needed;
 
-	filp = fget(fd);
-	if(!filp)
-		goto out2;
-
-	if (!filp->f_op || !filp->f_op->ioctl) {
-		error = sys_ioctl (fd, cmd, arg);
+	filp = fget_light(fd, &fput_needed);
+	if (!filp)
 		goto out;
+
+	if (filp->f_op && filp->f_op->compat_ioctl) {
+		error = filp->f_op->compat_ioctl(filp, cmd, arg);
+		if (error != -ENOIOCTLCMD)
+			goto out_fput;
 	}
 
-	down_read(&ioctl32_sem);
+	if (!filp->f_op ||
+	    (!filp->f_op->ioctl && !filp->f_op->unlocked_ioctl))
+		goto do_ioctl;
 
-	t = ioctl32_hash_table[ioctl32_hash (cmd)];
+	down_read(&ioctl32_sem);
+	for (t = ioctl32_hash_table[ioctl32_hash(cmd)]; t; t = t->next) {
+		if (t->cmd == cmd)
+			goto found_handler;
+	}
+	up_read(&ioctl32_sem);
 
-	while (t && t->cmd != cmd)
-		t = t->next;
-	if (t) {
-		if (t->handler) { 
-			lock_kernel();
-			error = t->handler(fd, cmd, arg, filp);
-			unlock_kernel();
-			up_read(&ioctl32_sem);
-		} else {
-			up_read(&ioctl32_sem);
-			error = sys_ioctl(fd, cmd, arg);
-		}
+	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
+		error = siocdevprivate_ioctl(fd, cmd, arg);
 	} else {
+		static int count;
+
+		if (++count <= 50)
+			compat_ioctl_error(filp, fd, cmd, arg);
+		error = -EINVAL;
+	}
+
+	goto out_fput;
+
+ found_handler:
+	if (t->handler) {
+		lock_kernel();
+		error = t->handler(fd, cmd, arg, filp);
+		unlock_kernel();
 		up_read(&ioctl32_sem);
-		if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
-			error = siocdevprivate_ioctl(fd, cmd, arg);
-		} else {
-			static int count;
-			if (++count <= 50) {
-				char buf[10];
-				char *fn = "?";
-				char *path;
-
-				path = (char *)__get_free_page(GFP_KERNEL);
-
-				/* find the name of the device. */
-				if (path) {
-			       		fn = d_path(filp->f_dentry,
-						filp->f_vfsmnt, path,
-						PAGE_SIZE);
-					if (IS_ERR(fn))
-						fn = "?";
-				}
-
-				sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
-				if (!isprint(buf[1]))
-				    sprintf(buf, "%02x", buf[1]);
-				printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
-					"cmd(%08x){%s} arg(%08x) on %s\n",
-					current->comm, current->pid,
-					(int)fd, (unsigned int)cmd, buf,
-					(unsigned int)arg, fn);
-				if (path)
-					free_page((unsigned long)path);
-			}
-			error = -EINVAL;
-		}
+		goto out_fput;
 	}
-out:
-	fput(filp);
-out2:
+
+	up_read(&ioctl32_sem);
+ do_ioctl:
+	error = sys_ioctl(fd, cmd, arg);
+ out_fput:
+	fput_light(filp, fput_needed);
+ out:
 	return error;
 }
 
diff -Nru a/fs/ioctl.c b/fs/ioctl.c
--- a/fs/ioctl.c	2005-01-15 16:32:43 -08:00
+++ b/fs/ioctl.c	2005-01-15 16:32:43 -08:00
@@ -16,7 +16,32 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 
-static int file_ioctl(struct file *filp,unsigned int cmd,unsigned long arg)
+static long do_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
+{
+	int error = -ENOTTY;
+
+	if (!filp->f_op)
+		goto out;
+
+	if (filp->f_op->unlocked_ioctl) {
+		error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
+		if (error == -ENOIOCTLCMD)
+			error = -EINVAL;
+		goto out;
+	} else if (filp->f_op->ioctl) {
+		lock_kernel();
+		error = filp->f_op->ioctl(filp->f_dentry->d_inode,
+					  filp, cmd, arg);
+		unlock_kernel();
+	}
+
+ out:
+	return error;
+}
+
+static int file_ioctl(struct file *filp, unsigned int cmd,
+		unsigned long arg)
 {
 	int error;
 	int block;
@@ -36,7 +61,9 @@
 			if ((error = get_user(block, p)) != 0)
 				return error;
 
+			lock_kernel();
 			res = mapping->a_ops->bmap(mapping, block);
+			unlock_kernel();
 			return put_user(res, p);
 		}
 		case FIGETBSZ:
@@ -46,29 +73,26 @@
 		case FIONREAD:
 			return put_user(i_size_read(inode) - filp->f_pos, p);
 	}
-	if (filp->f_op && filp->f_op->ioctl)
-		return filp->f_op->ioctl(inode, filp, cmd, arg);
-	return -ENOTTY;
+
+	return do_ioctl(filp, cmd, arg);
 }
 
 
 asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long
arg)
-{	
+{
 	struct file * filp;
 	unsigned int flag;
 	int on, error = -EBADF;
+	int fput_needed;
 
-	filp = fget(fd);
+	filp = fget_light(fd, &fput_needed);
 	if (!filp)
 		goto out;
 
 	error = security_file_ioctl(filp, cmd, arg);
-	if (error) {
-                fput(filp);
-                goto out;
-        }
+	if (error)
+		goto out_fput;
 
-	lock_kernel();
 	switch (cmd) {
 		case FIOCLEX:
 			set_close_on_exec(fd, 1);
@@ -100,8 +124,11 @@
 
 			/* Did FASYNC state change ? */
 			if ((flag ^ filp->f_flags) & FASYNC) {
-				if (filp->f_op && filp->f_op->fasync)
+				if (filp->f_op && filp->f_op->fasync) {
+					lock_kernel();
 					error = filp->f_op->fasync(fd, filp, on);
+					unlock_kernel();
+				}
 				else error = -ENOTTY;
 			}
 			if (error != 0)
@@ -124,16 +151,15 @@
 				error = -ENOTTY;
 			break;
 		default:
-			error = -ENOTTY;
 			if (S_ISREG(filp->f_dentry->d_inode->i_mode))
 				error = file_ioctl(filp, cmd, arg);
-			else if (filp->f_op && filp->f_op->ioctl)
-				error = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg);
+			else
+				error = do_ioctl(filp, cmd, arg);
+			break;
 	}
-	unlock_kernel();
-	fput(filp);
-
-out:
+ out_fput:
+	fput_light(filp, fput_needed);
+ out:
 	return error;
 }
 
diff -Nru a/include/linux/fs.h b/include/linux/fs.h
--- a/include/linux/fs.h	2005-01-15 16:32:43 -08:00
+++ b/include/linux/fs.h	2005-01-15 16:32:43 -08:00
@@ -907,8 +907,8 @@
 
 /*
  * NOTE:
- * read, write, poll, fsync, readv, writev can be called
- *   without the big kernel lock held in all filesystems.
+ * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl
+ * can be called without the big kernel lock held in all filesystems.
  */
 struct file_operations {
 	struct module *owner;
@@ -920,6 +920,8 @@
 	int (*readdir) (struct file *, void *, filldir_t);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long);
+	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *);


Copyright © 2005, Eklektix, Inc.
Comments and public postings are copyrighted by their creators.
Linux is a registered trademark of Linus Torvalds