LWN.net Logo

Quota SMP locking

From:  Jan Kara <jack@suse.cz>
To:  torvalds@transmeta.com
Subject:  [PATCH] Quota SMP locking
Date:  Thu, 7 Nov 2002 19:59:47 +0100
Cc:  linux-kernel@vger.kernel.org

  Hi Linus,

  I'm sending you a patch with rewrite of SMP locking in quota code in
  kernel - the patch removes BKL and replaces it with two spinlocks
  protecting quota lists and data stored in dquot structures. Also
  non-SMP locking was changed a bit make SMP locking easier (eg. we got
  rid of not very nice dq_dup_ref counters). Would you please apply the
  patch? (I hope this kind of patch won't fall into 'freezed' cathegory
  :)).

  								Honza
-- 
Jan Kara <jack@suse.cz>
SuSE CR Labs

diff -ruNX /home/jack/.kerndiffexclude linux-2.5.45/fs/dquot.c linux-2.5.45-1-smplocks/fs/dquot.c
--- linux-2.5.45/fs/dquot.c	Thu Oct 17 11:13:14 2002
+++ linux-2.5.45-1-smplocks/fs/dquot.c	Thu Nov  7 10:44:11 2002
@@ -49,6 +49,9 @@
  *		formats registering.
  *		Jan Kara, <jack@suse.cz>, 2001,2002
  *
+ *		New SMP locking.
+ *		Jan Kara, <jack@suse.cz>, 10/2002
+ *
  * (C) Copyright 1994 - 1997 Marco van Wieringen 
  */
 
@@ -72,15 +75,32 @@
 
 #include <asm/uaccess.h>
 
+#define __DQUOT_PARANOIA
+
+/*
+ * There are two quota SMP locks. dq_list_lock protects all lists with quotas
+ * and quota formats and also dqstats structure containing statistics about the
+ * lists. dq_data_lock protects data from dq_dqb and also mem_dqinfo structures
+ * and also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
+ * Note that we don't have to do the locking of i_blocks and i_bytes when the
+ * quota is disabled - i_sem should serialize the access. dq_data_lock should
+ * be always grabbed before dq_list_lock.
+ *
+ * Note that some things (eg. sb pointer, type, id) doesn't change during
+ * the life of the dquot structure and so needn't to be protected by a lock
+ */
+spinlock_t dq_list_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t dq_data_lock = SPIN_LOCK_UNLOCKED;
+
 static char *quotatypes[] = INITQFNAMES;
 static struct quota_format_type *quota_formats;	/* List of registered formats */
 
 int register_quota_format(struct quota_format_type *fmt)
 {
-	lock_kernel();
+	spin_lock(&dq_list_lock);
 	fmt->qf_next = quota_formats;
 	quota_formats = fmt;
-	unlock_kernel();
+	spin_unlock(&dq_list_lock);
 	return 0;
 }
 
@@ -88,22 +108,22 @@
 {
 	struct quota_format_type **actqf;
 
-	lock_kernel();
+	spin_lock(&dq_list_lock);
 	for (actqf = &quota_formats; *actqf && *actqf != fmt; actqf = &(*actqf)->qf_next);
 	if (*actqf)
 		*actqf = (*actqf)->qf_next;
-	unlock_kernel();
+	spin_unlock(&dq_list_lock);
 }
 
 static struct quota_format_type *find_quota_format(int id)
 {
 	struct quota_format_type *actqf;
 
-	lock_kernel();
+	spin_lock(&dq_list_lock);
 	for (actqf = quota_formats; actqf && actqf->qf_fmt_id != id; actqf = actqf->qf_next);
 	if (actqf && !try_inc_mod_count(actqf->qf_owner))
 		actqf = NULL;
-	unlock_kernel();
+	spin_unlock(&dq_list_lock);
 	return actqf;
 }
 
@@ -135,18 +155,20 @@
  */
 
 /*
- * Note that any operation which operates on dquot data (ie. dq_dqb) mustn't
- * block while it's updating/reading it. Otherwise races would occur.
+ * Note that any operation which operates on dquot data (ie. dq_dqb) must
+ * hold dq_data_lock.
  *
- * Locked dquots might not be referenced in inodes - operations like
- * add_dquot_space() does dqduplicate() and would complain. Currently
- * dquot it locked only once in its existence - when it's being read
- * to memory on first dqget() and at that time it can't be referenced
- * from inode. Write operations on dquots don't hold dquot lock as they
- * copy data to internal buffers before writing anyway and copying as well
- * as any data update should be atomic. Also nobody can change used
- * entries in dquot structure as this is done only when quota is destroyed
- * and invalidate_dquots() waits for dquot to have dq_count == 0.
+ * Any operation working with dquots must hold dqoff_sem. If operation is
+ * just reading pointers from inodes than read lock is enough. If pointers
+ * are altered function must hold write lock.
+ *
+ * Locked dquots might not be referenced in inodes. Currently dquot it locked
+ * only once in its existence - when it's being read to memory on first dqget()
+ * and at that time it can't be referenced from inode. Write operations on
+ * dquots don't hold dquot lock as they copy data to internal buffers before
+ * writing anyway and copying as well as any data update should be atomic. Also
+ * nobody can change used entries in dquot structure as this is done only when
+ * quota is destroyed and invalidate_dquots() is called only when dq_count == 0.
  */
 
 static LIST_HEAD(inuse_list);
@@ -155,34 +177,14 @@
 
 struct dqstats dqstats;
 
-static void dqput(struct dquot *);
-static struct dquot *dqduplicate(struct dquot *);
-
-static inline void get_dquot_ref(struct dquot *dquot)
-{
-	dquot->dq_count++;
-}
-
-static inline void put_dquot_ref(struct dquot *dquot)
-{
-	dquot->dq_count--;
-}
-
-static inline void get_dquot_dup_ref(struct dquot *dquot)
-{
-	dquot->dq_dup_ref++;
-}
-
-static inline void put_dquot_dup_ref(struct dquot *dquot)
-{
-	dquot->dq_dup_ref--;
-}
-
 static inline int const hashfn(struct super_block *sb, unsigned int id, int type)
 {
 	return((((unsigned long)sb>>L1_CACHE_SHIFT) ^ id) * (MAXQUOTAS - type)) % NR_DQHASH;
 }
 
+/*
+ * Following list functions expect dq_list_lock to be held
+ */
 static inline void insert_dquot_hash(struct dquot *dquot)
 {
 	struct list_head *head = dquot_hash + hashfn(dquot->dq_sb, dquot->dq_id, dquot->dq_type);
@@ -207,13 +209,6 @@
 	return NODQUOT;
 }
 
-/* Add a dquot to the head of the free list */
-static inline void put_dquot_head(struct dquot *dquot)
-{
-	list_add(&dquot->dq_free, &free_dquots);
-	dqstats.free_dquots++;
-}
-
 /* Add a dquot to the tail of the free list */
 static inline void put_dquot_last(struct dquot *dquot)
 {
@@ -221,13 +216,6 @@
 	dqstats.free_dquots++;
 }
 
-/* Move dquot to the head of free list (it must be already on it) */
-static inline void move_dquot_head(struct dquot *dquot)
-{
-	list_del(&dquot->dq_free);
-	list_add(&dquot->dq_free, &free_dquots);
-}
-
 static inline void remove_free_dquot(struct dquot *dquot)
 {
 	if (list_empty(&dquot->dq_free))
@@ -250,69 +238,10 @@
 	list_del(&dquot->dq_inuse);
 }
 
-static void __wait_on_dquot(struct dquot *dquot)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(&dquot->dq_wait_lock, &wait);
-repeat:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	if (dquot->dq_flags & DQ_LOCKED) {
-		schedule();
-		goto repeat;
-	}
-	remove_wait_queue(&dquot->dq_wait_lock, &wait);
-	current->state = TASK_RUNNING;
-}
-
-static inline void wait_on_dquot(struct dquot *dquot)
-{
-	if (dquot->dq_flags & DQ_LOCKED)
-		__wait_on_dquot(dquot);
-}
-
-static inline void lock_dquot(struct dquot *dquot)
-{
-	wait_on_dquot(dquot);
-	dquot->dq_flags |= DQ_LOCKED;
-}
-
-static inline void unlock_dquot(struct dquot *dquot)
-{
-	dquot->dq_flags &= ~DQ_LOCKED;
-	wake_up(&dquot->dq_wait_lock);
-}
-
-/* Wait for dquot to be unused */
-static void __wait_dquot_unused(struct dquot *dquot)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(&dquot->dq_wait_free, &wait);
-repeat:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	if (dquot->dq_count) {
-		schedule();
-		goto repeat;
-	}
-	remove_wait_queue(&dquot->dq_wait_free, &wait);
-	current->state = TASK_RUNNING;
-}
-
-/* Wait for all duplicated dquot references to be dropped */
-static void __wait_dup_drop(struct dquot *dquot)
+static void wait_on_dquot(struct dquot *dquot)
 {
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(&dquot->dq_wait_free, &wait);
-repeat:
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	if (dquot->dq_dup_ref) {
-		schedule();
-		goto repeat;
-	}
-	remove_wait_queue(&dquot->dq_wait_free, &wait);
-	current->state = TASK_RUNNING;
+	down(&dquot->dq_lock);
+	up(&dquot->dq_lock);
 }
 
 static int read_dqblk(struct dquot *dquot)
@@ -320,11 +249,11 @@
 	int ret;
 	struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
 
-	lock_dquot(dquot);
+	down(&dquot->dq_lock);
 	down(&dqopt->dqio_sem);
 	ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot);
 	up(&dqopt->dqio_sem);
-	unlock_dquot(dquot);
+	up(&dquot->dq_lock);
 	return ret;
 }
 
@@ -339,36 +268,35 @@
 	return ret;
 }
 
-/* Invalidate all dquots on the list, wait for all users. Note that this function is called
- * after quota is disabled so no new quota might be created. As we only insert to the end of
- * inuse list, we don't have to restart searching... */
+/* Invalidate all dquots on the list. Note that this function is called after
+ * quota is disabled so no new quota might be created. Because we hold dqoff_sem
+ * for writing and pointers were already removed from inodes we actually know that
+ * no quota for this sb+type should be held. */
 static void invalidate_dquots(struct super_block *sb, int type)
 {
 	struct dquot *dquot;
 	struct list_head *head;
 
-restart:
-	list_for_each(head, &inuse_list) {
+	spin_lock(&dq_list_lock);
+	for (head = inuse_list.next; head != &inuse_list;) {
 		dquot = list_entry(head, struct dquot, dq_inuse);
+		head = head->next;
 		if (dquot->dq_sb != sb)
 			continue;
 		if (dquot->dq_type != type)
 			continue;
-		dquot->dq_flags |= DQ_INVAL;
-		if (dquot->dq_count)
-			/*
-			 *  Wait for any users of quota. As we have already cleared the flags in
-			 *  superblock and cleared all pointers from inodes we are assured
-			 *  that there will be no new users of this quota.
-			 */
-			__wait_dquot_unused(dquot);
+#ifdef __DQUOT_PARANOIA	
+		/* There should be no users of quota - we hold dqoff_sem for writing */
+		if (atomic_read(&dquot->dq_count))
+			BUG();
+#endif
 		/* Quota now have no users and it has been written on last dqput() */
 		remove_dquot_hash(dquot);
 		remove_free_dquot(dquot);
 		remove_inuse(dquot);
 		kmem_cache_free(dquot_cachep, dquot);
-		goto restart;
 	}
+	spin_unlock(&dq_list_lock);
 }
 
 static int vfs_quota_sync(struct super_block *sb, int type)
@@ -378,7 +306,14 @@
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
 
+	down_read(&dqopt->dqoff_sem);
 restart:
+	/* At this point any dirty dquot will definitely be written so we can clear
+	   dirty flag from info */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+		if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt))
+			clear_bit(DQF_ANY_DQUOT_DIRTY_B, &dqopt->info[cnt].dqi_flags);
+	spin_lock(&dq_list_lock);
 	list_for_each(head, &inuse_list) {
 		dquot = list_entry(head, struct dquot, dq_inuse);
 		if (sb && dquot->dq_sb != sb)
@@ -387,26 +322,24 @@
 			continue;
 		if (!dquot->dq_sb)	/* Invalidated? */
 			continue;
-		if (!dquot_dirty(dquot) && !(dquot->dq_flags & DQ_LOCKED))
+		if (!dquot_dirty(dquot))
 			continue;
-		/* Get reference to quota so it won't be invalidated. get_dquot_ref()
-		 * is enough since if dquot is locked/modified it can't be
-		 * on the free list */
-		get_dquot_ref(dquot);
-		if (dquot->dq_flags & DQ_LOCKED)
-			wait_on_dquot(dquot);
-		if (dquot_dirty(dquot))
-			commit_dqblk(dquot);
-		dqput(dquot);
+		spin_unlock(&dq_list_lock);
+		commit_dqblk(dquot);
 		goto restart;
 	}
+	spin_unlock(&dq_list_lock);
+
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt))
-			dqopt->info[cnt].dqi_flags &= ~DQF_ANY_DQUOT_DIRTY;
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) && info_dirty(&dqopt->info[cnt]))
+		if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) && info_dirty(&dqopt->info[cnt])) {
+			down(&dqopt->dqio_sem);
 			dqopt->ops[cnt]->write_file_info(sb, cnt);
+			up(&dqopt->dqio_sem);
+		}
+	spin_lock(&dq_list_lock);
 	dqstats.syncs++;
+	spin_unlock(&dq_list_lock);
+	up_read(&dqopt->dqoff_sem);
 
 	return 0;
 }
@@ -423,7 +356,7 @@
 
 		for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
 			if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-			    && sb_dqopt(sb)->info[cnt].dqi_flags & DQF_ANY_DQUOT_DIRTY)
+			    && info_any_dquot_dirty(&sb_dqopt(sb)->info[cnt]))
 				dirty = 1;
 		if (!dirty)
 			continue;
@@ -443,17 +376,13 @@
 void sync_dquots(struct super_block *sb, int type)
 {
 	if (sb) {
-		lock_kernel();
 		if (sb->s_qcop->quota_sync)
 			sb->s_qcop->quota_sync(sb, type);
-		unlock_kernel();
 	}
 	else {
 		while ((sb = get_super_to_sync(type))) {
-			lock_kernel();
 			if (sb->s_qcop->quota_sync)
 				sb->s_qcop->quota_sync(sb, type);
-			unlock_kernel();
 			drop_super(sb);
 		}
 	}
@@ -484,60 +413,60 @@
 
 static int shrink_dqcache_memory(int nr, unsigned int gfp_mask)
 {
-	if (nr) {
-		lock_kernel();
+	int ret;
+
+	spin_lock(&dq_list_lock);
+	if (nr)
 		prune_dqcache(nr);
-		unlock_kernel();
-	}
-	return dqstats.allocated_dquots;
+	ret = dqstats.allocated_dquots;
+	spin_unlock(&dq_list_lock);
+	return ret;
 }
 
 /*
  * Put reference to dquot
  * NOTE: If you change this function please check whether dqput_blocks() works right...
+ * MUST be called with dqoff_sem held
  */
 static void dqput(struct dquot *dquot)
 {
 	if (!dquot)
 		return;
 #ifdef __DQUOT_PARANOIA
-	if (!dquot->dq_count) {
+	if (!atomic_read(&dquot->dq_count)) {
 		printk("VFS: dqput: trying to free free dquot\n");
 		printk("VFS: device %s, dquot of %s %d\n",
 			dquot->dq_sb->s_id,
 			quotatypes[dquot->dq_type],
 			dquot->dq_id);
-		return;
+		BUG();
 	}
 #endif
-
+	
+	spin_lock(&dq_list_lock);
 	dqstats.drops++;
+	spin_unlock(&dq_list_lock);
 we_slept:
-	if (dquot->dq_dup_ref && dquot->dq_count - dquot->dq_dup_ref <= 1) {	/* Last unduplicated reference? */
-		__wait_dup_drop(dquot);
-		goto we_slept;
-	}
-	if (dquot->dq_count > 1) {
-		/* We have more than one user... We can simply decrement use count */
-		put_dquot_ref(dquot);
+	spin_lock(&dq_list_lock);
+	if (atomic_read(&dquot->dq_count) > 1) {
+		/* We have more than one user... nothing to do */
+		atomic_dec(&dquot->dq_count);
+		spin_unlock(&dq_list_lock);
 		return;
 	}
 	if (dquot_dirty(dquot)) {
+		spin_unlock(&dq_list_lock);
 		commit_dqblk(dquot);
 		goto we_slept;
 	}
-
+	atomic_dec(&dquot->dq_count);
+#ifdef __DQUOT_PARANOIA
 	/* sanity check */
-	if (!list_empty(&dquot->dq_free)) {
-		printk(KERN_ERR "dqput: dquot already on free list??\n");
-		put_dquot_ref(dquot);
-		return;
-	}
-	put_dquot_ref(dquot);
-	/* If dquot is going to be invalidated invalidate_dquots() is going to free it so */
-	if (!(dquot->dq_flags & DQ_INVAL))
-		put_dquot_last(dquot);	/* Place at end of LRU free queue */
-	wake_up(&dquot->dq_wait_free);
+	if (!list_empty(&dquot->dq_free))
+		BUG();
+#endif
+	put_dquot_last(dquot);
+	spin_unlock(&dq_list_lock);
 }
 
 static struct dquot *get_empty_dquot(struct super_block *sb, int type)
@@ -549,99 +478,66 @@
 		return NODQUOT;
 
 	memset((caddr_t)dquot, 0, sizeof(struct dquot));
-	init_waitqueue_head(&dquot->dq_wait_free);
-	init_waitqueue_head(&dquot->dq_wait_lock);
+	sema_init(&dquot->dq_lock, 1);
 	INIT_LIST_HEAD(&dquot->dq_free);
 	INIT_LIST_HEAD(&dquot->dq_inuse);
 	INIT_LIST_HEAD(&dquot->dq_hash);
 	dquot->dq_sb = sb;
 	dquot->dq_type = type;
-	dquot->dq_count = 1;
-	/* all dquots go on the inuse_list */
-	put_inuse(dquot);
+	atomic_set(&dquot->dq_count, 1);
 
 	return dquot;
 }
 
+/*
+ * Get reference to dquot
+ * MUST be called with dqoff_sem held
+ */
 static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
 {
 	unsigned int hashent = hashfn(sb, id, type);
 	struct dquot *dquot, *empty = NODQUOT;
-	struct quota_info *dqopt = sb_dqopt(sb);
 
+        if (!sb_has_quota_enabled(sb, type))
+		return NODQUOT;
 we_slept:
-        if (!is_enabled(dqopt, type)) {
-		if (empty)
-			dqput(empty);
-                return NODQUOT;
-	}
-
+	spin_lock(&dq_list_lock);
 	if ((dquot = find_dquot(hashent, sb, id, type)) == NODQUOT) {
 		if (empty == NODQUOT) {
+			spin_unlock(&dq_list_lock);
 			if ((empty = get_empty_dquot(sb, type)) == NODQUOT)
 				schedule();	/* Try to wait for a moment... */
 			goto we_slept;
 		}
 		dquot = empty;
 		dquot->dq_id = id;
+		/* all dquots go on the inuse_list */
+		put_inuse(dquot);
 		/* hash it first so it can be found */
 		insert_dquot_hash(dquot);
+		dqstats.lookups++;
+		spin_unlock(&dq_list_lock);
 		read_dqblk(dquot);
 	} else {
-		if (!dquot->dq_count)
+		if (!atomic_read(&dquot->dq_count))
 			remove_free_dquot(dquot);
-		get_dquot_ref(dquot);
+		atomic_inc(&dquot->dq_count);
 		dqstats.cache_hits++;
+		dqstats.lookups++;
+		spin_unlock(&dq_list_lock);
 		wait_on_dquot(dquot);
 		if (empty)
-			dqput(empty);
-	}
-
-	if (!dquot->dq_sb) {	/* Has somebody invalidated entry under us? */
-		printk(KERN_ERR "VFS: dqget(): Quota invalidated in dqget()!\n");
-		dqput(dquot);
-		return NODQUOT;
+			kmem_cache_free(dquot_cachep, empty);
 	}
-	++dquot->dq_referenced;
-	dqstats.lookups++;
 
-	return dquot;
-}
-
-/* Duplicate reference to dquot got from inode */
-static struct dquot *dqduplicate(struct dquot *dquot)
-{
-	if (dquot == NODQUOT)
-		return NODQUOT;
-	get_dquot_ref(dquot);
-	if (!dquot->dq_sb) {
-		printk(KERN_ERR "VFS: dqduplicate(): Invalidated quota to be duplicated!\n");
-		put_dquot_ref(dquot);
-		return NODQUOT;
-	}
-	if (dquot->dq_flags & DQ_LOCKED)
-		printk(KERN_ERR "VFS: dqduplicate(): Locked quota to be duplicated!\n");
-	get_dquot_dup_ref(dquot);
-	dquot->dq_referenced++;
-	dqstats.lookups++;
+#ifdef __DQUOT_PARANOIA
+	if (!dquot->dq_sb)	/* Has somebody invalidated entry under us? */
+		BUG();
+#endif
 
 	return dquot;
 }
 
-/* Put duplicated reference */
-static void dqputduplicate(struct dquot *dquot)
-{
-	if (!dquot->dq_dup_ref) {
-		printk(KERN_ERR "VFS: dqputduplicate(): Duplicated dquot put without duplicate reference.\n");
-		return;
-	}
-	put_dquot_dup_ref(dquot);
-	if (!dquot->dq_dup_ref)
-		wake_up(&dquot->dq_wait_free);
-	put_dquot_ref(dquot);
-	dqstats.drops++;
-}
-
 static int dqinit_needed(struct inode *inode, int type)
 {
 	int cnt;
@@ -656,6 +552,7 @@
 	return 0;
 }
 
+/* This routine is guarded by dqoff_sem semaphore */
 static void add_dquot_ref(struct super_block *sb, int type)
 {
 	struct list_head *p;
@@ -682,14 +579,13 @@
 /* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */
 static inline int dqput_blocks(struct dquot *dquot)
 {
-	if (dquot->dq_dup_ref && dquot->dq_count - dquot->dq_dup_ref <= 1)
-		return 1;
-	if (dquot->dq_count <= 1 && dquot->dq_flags & DQ_MOD)
+	if (atomic_read(&dquot->dq_count) <= 1 && dquot_dirty(dquot))
 		return 1;
 	return 0;
 }
 
 /* Remove references to dquots from inode - add dquot to list for freeing if needed */
+/* We can't race with anybody because we hold dqoff_sem for writing... */
 int remove_inode_dquot_ref(struct inode *inode, int type, struct list_head *tofree_head)
 {
 	struct dquot *dquot = inode->i_dquot[type];
@@ -705,9 +601,13 @@
 put_it:
 	if (dquot != NODQUOT) {
 		if (dqput_blocks(dquot)) {
-			if (dquot->dq_count != 1)
-				printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", dquot->dq_count);
+#ifdef __DQUOT_PARANOIA
+			if (atomic_read(&dquot->dq_count) != 1)
+				printk(KERN_WARNING "VFS: Adding dquot with dq_count %d to dispose list.\n", atomic_read(&dquot->dq_count));
+#endif
+			spin_lock(&dq_list_lock);
 			list_add(&dquot->dq_free, tofree_head);	/* As dquot must have currently users it can't be on the free list... */
+			spin_unlock(&dq_list_lock);
 			return 1;
 		}
 		else
@@ -717,12 +617,12 @@
 }
 
 /* Free list of dquots - called from inode.c */
+/* dquots are removed from inodes, no new references can be got so we are the only ones holding reference */
 void put_dquot_list(struct list_head *tofree_head)
 {
 	struct list_head *act_head;
 	struct dquot *dquot;
 
-	lock_kernel();
 	act_head = tofree_head->next;
 	/* So now we have dquots on the list... Just free them */
 	while (act_head != tofree_head) {
@@ -731,7 +631,6 @@
 		list_del_init(&dquot->dq_free);	/* Remove dquot from the list so we won't have problems... */
 		dqput(dquot);
 	}
-	unlock_kernel();
 }
 
 static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
@@ -754,7 +653,7 @@
 		dquot->dq_dqb.dqb_curinodes = 0;
 	if (dquot->dq_dqb.dqb_curinodes < dquot->dq_dqb.dqb_isoftlimit)
 		dquot->dq_dqb.dqb_itime = (time_t) 0;
-	dquot->dq_flags &= ~DQ_INODES;
+	clear_bit(DQ_INODES_B, &dquot->dq_flags);
 	mark_dquot_dirty(dquot);
 }
 
@@ -766,17 +665,17 @@
 		dquot->dq_dqb.dqb_curspace = 0;
 	if (toqb(dquot->dq_dqb.dqb_curspace) < dquot->dq_dqb.dqb_bsoftlimit)
 		dquot->dq_dqb.dqb_btime = (time_t) 0;
-	dquot->dq_flags &= ~DQ_BLKS;
+	clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 	mark_dquot_dirty(dquot);
 }
 
-static inline int need_print_warning(struct dquot *dquot, int flag)
+static inline int need_print_warning(struct dquot *dquot)
 {
 	switch (dquot->dq_type) {
 		case USRQUOTA:
-			return current->fsuid == dquot->dq_id && !(dquot->dq_flags & flag);
+			return current->fsuid == dquot->dq_id;
 		case GRPQUOTA:
-			return in_group_p(dquot->dq_id) && !(dquot->dq_flags & flag);
+			return in_group_p(dquot->dq_id);
 	}
 	return 0;
 }
@@ -794,12 +693,11 @@
 static void print_warning(struct dquot *dquot, const char warntype)
 {
 	char *msg = NULL;
-	int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS :
-	  ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES : 0);
+	int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B :
+	  ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0);
 
-	if (!need_print_warning(dquot, flag))
+	if (!need_print_warning(dquot) || (flag && test_and_set_bit(flag, &dquot->dq_flags)))
 		return;
-	dquot->dq_flags |= flag;
 	tty_write_message(current->tty, dquot->dq_sb->s_id);
 	if (warntype == ISOFTWARN || warntype == BSOFTWARN)
 		tty_write_message(current->tty, ": warning, ");
@@ -846,10 +744,11 @@
 	    (info->dqi_format->qf_fmt_id != QFMT_VFS_OLD || !(info->dqi_flags & V1_DQF_RSQUASH));
 }
 
+/* needs dq_data_lock */
 static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
 {
 	*warntype = NOWARN;
-	if (inodes <= 0 || dquot->dq_flags & DQ_FAKE)
+	if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_ihardlimit &&
@@ -877,10 +776,11 @@
 	return QUOTA_OK;
 }
 
+/* needs dq_data_lock */
 static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
 {
 	*warntype = 0;
-	if (space <= 0 || dquot->dq_flags & DQ_FAKE)
+	if (space <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
 		return QUOTA_OK;
 
 	if (dquot->dq_dqb.dqb_bhardlimit &&
@@ -925,19 +825,19 @@
  */
 void dquot_initialize(struct inode *inode, int type)
 {
-	struct dquot *dquot[MAXQUOTAS];
 	unsigned int id = 0;
 	int cnt;
 
-	if (IS_NOQUOTA(inode))
+	down_write(&sb_dqopt(inode->i_sb)->dqoff_sem);
+	/* Having dqoff lock we know NOQUOTA flags can't be altered... */
+	if (IS_NOQUOTA(inode)) {
+		up_write(&sb_dqopt(inode->i_sb)->dqoff_sem);
 		return;
-	/* Build list of quotas to initialize... We can block here */
+	}
+	/* Build list of quotas to initialize... */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		dquot[cnt] = NODQUOT;
 		if (type != -1 && cnt != type)
 			continue;
-		if (!sb_has_quota_enabled(inode->i_sb, cnt))
-			continue;
 		if (inode->i_dquot[cnt] == NODQUOT) {
 			switch (cnt) {
 				case USRQUOTA:
@@ -947,22 +847,12 @@
 					id = inode->i_gid;
 					break;
 			}
-			dquot[cnt] = dqget(inode->i_sb, id, cnt);
+			inode->i_dquot[cnt] = dqget(inode->i_sb, id, cnt);
+			if (inode->i_dquot[cnt])
+				inode->i_flags |= S_QUOTA;
 		}
 	}
-	/* NOBLOCK START: Here we shouldn't block */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (dquot[cnt] == NODQUOT || !sb_has_quota_enabled(inode->i_sb, cnt) || inode->i_dquot[cnt] != NODQUOT)
-			continue;
-		inode->i_dquot[cnt] = dquot[cnt];
-		dquot[cnt] = NODQUOT;
-		inode->i_flags |= S_QUOTA;
-	}
-	/* NOBLOCK END */
-	/* Put quotas which we didn't use */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (dquot[cnt] != NODQUOT)
-			dqput(dquot[cnt]);
+	up_write(&sb_dqopt(inode->i_sb)->dqoff_sem);
 }
 
 /*
@@ -970,57 +860,56 @@
  *
  * Note: this is a blocking operation.
  */
-void dquot_drop(struct inode *inode)
+static void dquot_drop_nolock(struct inode *inode)
 {
-	struct dquot *dquot;
 	int cnt;
 
 	inode->i_flags &= ~S_QUOTA;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		dquot = inode->i_dquot[cnt];
+		dqput(inode->i_dquot[cnt]);
 		inode->i_dquot[cnt] = NODQUOT;
-		dqput(dquot);
 	}
 }
 
+void dquot_drop(struct inode *inode)
+{
+	down_write(&sb_dqopt(inode->i_sb)->dqoff_sem);
+	dquot_drop_nolock(inode);
+	up_write(&sb_dqopt(inode->i_sb)->dqoff_sem);
+}
+
 /*
  * This operation can block, but only after everything is updated
  */
 int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
 {
 	int cnt, ret = NO_QUOTA;
-	struct dquot *dquot[MAXQUOTAS];
 	char warntype[MAXQUOTAS];
 
-	lock_kernel();
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		dquot[cnt] = NODQUOT;
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = NOWARN;
-	}
-	/* NOBLOCK Start */
+
+	down_read(&sb_dqopt(inode->i_sb)->dqoff_sem);
+	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		dquot[cnt] = dqduplicate(inode->i_dquot[cnt]);
-		if (dquot[cnt] == NODQUOT)
+		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		if (check_bdq(dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA)
+		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA)
 			goto warn_put_all;
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (dquot[cnt] == NODQUOT)
+		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		dquot_incr_space(dquot[cnt], number);
+		dquot_incr_space(inode->i_dquot[cnt], number);
 	}
 	inode_add_bytes(inode, number);
-	/* NOBLOCK End */
 	ret = QUOTA_OK;
 warn_put_all:
-	flush_warnings(dquot, warntype);
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (dquot[cnt] != NODQUOT)
-			dqputduplicate(dquot[cnt]);
-	unlock_kernel();
+	spin_unlock(&dq_data_lock);
+	flush_warnings(inode->i_dquot, warntype);
+	up_read(&sb_dqopt(inode->i_sb)->dqoff_sem);
 	return ret;
 }
 
@@ -1030,36 +919,29 @@
 int dquot_alloc_inode(const struct inode *inode, unsigned long number)
 {
 	int cnt, ret = NO_QUOTA;
-	struct dquot *dquot[MAXQUOTAS];
 	char warntype[MAXQUOTAS];
 
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		dquot[cnt] = NODQUOT;
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = NOWARN;
-	}
-	/* NOBLOCK Start */
-	lock_kernel();
+	down_read(&sb_dqopt(inode->i_sb)->dqoff_sem);
+	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		dquot[cnt] = dqduplicate(inode -> i_dquot[cnt]);
-		if (dquot[cnt] == NODQUOT)
+		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		if (check_idq(dquot[cnt], number, warntype+cnt) == NO_QUOTA)
+		if (check_idq(inode->i_dquot[cnt], number, warntype+cnt) == NO_QUOTA)
 			goto warn_put_all;
 	}
 
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (dquot[cnt] == NODQUOT)
+		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		dquot_incr_inodes(dquot[cnt], number);
+		dquot_incr_inodes(inode->i_dquot[cnt], number);
 	}
-	/* NOBLOCK End */
 	ret = QUOTA_OK;
 warn_put_all:
-	flush_warnings(dquot, warntype);
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (dquot[cnt] != NODQUOT)
-			dqputduplicate(dquot[cnt]);
-	unlock_kernel();
+	spin_unlock(&dq_data_lock);
+	flush_warnings((struct dquot **)inode->i_dquot, warntype);
+	up_read(&sb_dqopt(inode->i_sb)->dqoff_sem);
 	return ret;
 }
 
@@ -1069,20 +951,17 @@
 void dquot_free_space(struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
-	struct dquot *dquot;
 
-	/* NOBLOCK Start */
-	lock_kernel();
+	down_read(&sb_dqopt(inode->i_sb)->dqoff_sem);
+	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		dquot = dqduplicate(inode->i_dquot[cnt]);
-		if (dquot == NODQUOT)
+		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		dquot_decr_space(dquot, number);
-		dqputduplicate(dquot);
+		dquot_decr_space(inode->i_dquot[cnt], number);
 	}
 	inode_sub_bytes(inode, number);
-	unlock_kernel();
-	/* NOBLOCK End */
+	spin_unlock(&dq_data_lock);
+	up_read(&sb_dqopt(inode->i_sb)->dqoff_sem);
 }
 
 /*
@@ -1091,19 +970,16 @@
 void dquot_free_inode(const struct inode *inode, unsigned long number)
 {
 	unsigned int cnt;
-	struct dquot *dquot;
 
-	/* NOBLOCK Start */
-	lock_kernel();
+	down_read(&sb_dqopt(inode->i_sb)->dqoff_sem);
+	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		dquot = dqduplicate(inode->i_dquot[cnt]);
-		if (dquot == NODQUOT)
+		if (inode->i_dquot[cnt] == NODQUOT)
 			continue;
-		dquot_decr_inodes(dquot, number);
-		dqputduplicate(dquot);
+		dquot_decr_inodes(inode->i_dquot[cnt], number);
 	}
-	unlock_kernel();
-	/* NOBLOCK End */
+	spin_unlock(&dq_data_lock);
+	up_read(&sb_dqopt(inode->i_sb)->dqoff_sem);
 }
 
 /*
@@ -1125,10 +1001,11 @@
 		transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
 		warntype[cnt] = NOWARN;
 	}
+	down_write(&sb_dqopt(inode->i_sb)->dqoff_sem);
+	if (IS_NOQUOTA(inode))	/* File without quota accounting? */
+		goto warn_put_all;
 	/* First build the transfer_to list - here we can block on reading of dquots... */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!sb_has_quota_enabled(inode->i_sb, cnt))
-			continue;
 		switch (cnt) {
 			case USRQUOTA:
 				if (!chuid)
@@ -1142,16 +1019,13 @@
 				break;
 		}
 	}
-	/* NOBLOCK START: From now on we shouldn't block */
+	spin_lock(&dq_data_lock);
 	space = inode_get_bytes(inode);
 	/* Build the transfer_from list and check the limits */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		/* The second test can fail when quotaoff is in progress... */
-		if (transfer_to[cnt] == NODQUOT || !sb_has_quota_enabled(inode->i_sb, cnt))
-			continue;
-		transfer_from[cnt] = dqduplicate(inode->i_dquot[cnt]);
-		if (transfer_from[cnt] == NODQUOT)	/* Can happen on quotafiles (quota isn't initialized on them)... */
+		if (transfer_to[cnt] == NODQUOT)
 			continue;
+		transfer_from[cnt] = inode->i_dquot[cnt];
 		if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA ||
 		    check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA)
 			goto warn_put_all;
@@ -1162,9 +1036,9 @@
 	 */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		/*
-		 * Skip changes for same uid or gid or for non-existing quota-type.
+		 * Skip changes for same uid or gid or for turned off quota-type.
 		 */
-		if (transfer_from[cnt] == NODQUOT || transfer_to[cnt] == NODQUOT)
+		if (transfer_to[cnt] == NODQUOT)
 			continue;
 
 		dquot_decr_inodes(transfer_from[cnt], 1);
@@ -1173,26 +1047,17 @@
 		dquot_incr_inodes(transfer_to[cnt], 1);
 		dquot_incr_space(transfer_to[cnt], space);
 
-		if (inode->i_dquot[cnt] == NODQUOT)
-			BUG();
 		inode->i_dquot[cnt] = transfer_to[cnt];
-		/*
-		 * We've got to release transfer_from[] twice - once for dquot_transfer() and
-		 * once for inode. We don't want to release transfer_to[] as it's now placed in inode
-		 */
-		transfer_to[cnt] = transfer_from[cnt];
 	}
-	/* NOBLOCK END. From now on we can block as we wish */
 	ret = QUOTA_OK;
 warn_put_all:
+	spin_unlock(&dq_data_lock);
 	flush_warnings(transfer_to, warntype);
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		/* First we must put duplicate - otherwise we might deadlock */
-		if (transfer_to[cnt] != NODQUOT)
-			dqputduplicate(transfer_to[cnt]);
+	
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (transfer_from[cnt] != NODQUOT)
 			dqput(transfer_from[cnt]);
-	}
+	up_write(&sb_dqopt(inode->i_sb)->dqoff_sem);
 	return ret;
 }
 
@@ -1244,24 +1109,30 @@
 	int cnt;
 	struct quota_info *dqopt = sb_dqopt(sb);
 
-	lock_kernel();
 	if (!sb)
 		goto out;
 
 	/* We need to serialize quota_off() for device */
-	down(&dqopt->dqoff_sem);
+	down_write(&dqopt->dqoff_sem);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (type != -1 && cnt != type)
 			continue;
-		if (!is_enabled(dqopt, cnt))
+		if (!sb_has_quota_enabled(sb, cnt))
 			continue;
 		reset_enable_flags(dqopt, cnt);
 
 		/* Note: these are blocking operations */
 		remove_dquot_ref(sb, cnt);
 		invalidate_dquots(sb, cnt);
-		if (info_dirty(&dqopt->info[cnt]))
+		/*
+		 * Now all dquots should be invalidated, all writes done so we should be only
+		 * users of the info. No locks needed.
+		 */
+		if (info_dirty(&dqopt->info[cnt])) {
+			down(&dqopt->dqio_sem);
 			dqopt->ops[cnt]->write_file_info(sb, cnt);
+			up(&dqopt->dqio_sem);
+		}
 		if (dqopt->ops[cnt]->free_file_info)
 			dqopt->ops[cnt]->free_file_info(sb, cnt);
 		put_quota_format(dqopt->info[cnt].dqi_format);
@@ -1273,15 +1144,14 @@
 		dqopt->info[cnt].dqi_bgrace = 0;
 		dqopt->ops[cnt] = NULL;
 	}
-	up(&dqopt->dqoff_sem);
+	up_write(&dqopt->dqoff_sem);
 out:
-	unlock_kernel();
 	return 0;
 }
 
 int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path)
 {
-	struct file *f = NULL;
+	struct file *f;
 	struct inode *inode;
 	struct quota_info *dqopt = sb_dqopt(sb);
 	struct quota_format_type *fmt = find_quota_format(format_id);
@@ -1289,19 +1159,11 @@
 
 	if (!fmt)
 		return -ESRCH;
-	if (is_enabled(dqopt, type)) {
-		error = -EBUSY;
+	f = filp_open(path, O_RDWR, 0600);
+	if (IS_ERR(f)) {
+		error = PTR_ERR(f);
 		goto out_fmt;
 	}
-
-	down(&dqopt->dqoff_sem);
-
-	f = filp_open(path, O_RDWR, 0600);
-
-	error = PTR_ERR(f);
-	if (IS_ERR(f))
-		goto out_lock;
-	dqopt->files[type] = f;
 	error = -EIO;
 	if (!f->f_op || !f->f_op->read || !f->f_op->write)
 		goto out_f;
@@ -1312,30 +1174,41 @@
 	error = -EACCES;
 	if (!S_ISREG(inode->i_mode))
 		goto out_f;
+
+	down_write(&dqopt->dqoff_sem);
+	if (sb_has_quota_enabled(sb, type)) {
+		error = -EBUSY;
+		goto out_lock;
+	}
+	dqopt->files[type] = f;
 	error = -EINVAL;
 	if (!fmt->qf_ops->check_quota_file(sb, type))
-		goto out_f;
+		goto out_lock;
 	/* We don't want quota on quota files */
-	dquot_drop(inode);
+	dquot_drop_nolock(inode);
 	inode->i_flags |= S_NOQUOTA;
 
 	dqopt->ops[type] = fmt->qf_ops;
 	dqopt->info[type].dqi_format = fmt;
-	if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0)
-		goto out_f;
+	down(&dqopt->dqio_sem);
+	if ((error = dqopt->ops[type]->read_file_info(sb, type)) < 0) {
+		up(&dqopt->dqio_sem);
+		goto out_lock;
+	}
+	up(&dqopt->dqio_sem);
 	set_enable_flags(dqopt, type);
 
 	add_dquot_ref(sb, type);
 
-	up(&dqopt->dqoff_sem);
+	up_write(&dqopt->dqoff_sem);
 	return 0;
 
-out_f:
-	if (f)
-		filp_close(f, NULL);
-	dqopt->files[type] = NULL;
 out_lock:
-	up(&dqopt->dqoff_sem);
+	inode->i_flags &= ~S_NOQUOTA;
+	dqopt->files[type] = NULL;
+	up_write(&dqopt->dqoff_sem);
+out_f:
+	filp_close(f, NULL);
 out_fmt:
 	put_quota_format(fmt);
 
@@ -1347,6 +1220,7 @@
 {
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 
+	spin_lock(&dq_data_lock);
 	di->dqb_bhardlimit = dm->dqb_bhardlimit;
 	di->dqb_bsoftlimit = dm->dqb_bsoftlimit;
 	di->dqb_curspace = dm->dqb_curspace;
@@ -1356,16 +1230,21 @@
 	di->dqb_btime = dm->dqb_btime;
 	di->dqb_itime = dm->dqb_itime;
 	di->dqb_valid = QIF_ALL;
+	spin_unlock(&dq_data_lock);
 }
 
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
 {
-	struct dquot *dquot = dqget(sb, id, type);
+	struct dquot *dquot;
 
-	if (!dquot)
-		return -EINVAL;
+	down_read(&sb_dqopt(sb)->dqoff_sem);
+	if (!(dquot = dqget(sb, id, type))) {
+		up_read(&sb_dqopt(sb)->dqoff_sem);
+		return -ESRCH;
+	}
 	do_get_dqblk(dquot, di);
 	dqput(dquot);
+	up_read(&sb_dqopt(sb)->dqoff_sem);
 	return 0;
 }
 
@@ -1375,6 +1254,7 @@
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 	int check_blim = 0, check_ilim = 0;
 
+	spin_lock(&dq_data_lock);
 	if (di->dqb_valid & QIF_SPACE) {
 		dm->dqb_curspace = di->dqb_curspace;
 		check_blim = 1;
@@ -1401,7 +1281,7 @@
 	if (check_blim) {
 		if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) {
 			dm->dqb_btime = 0;
-			dquot->dq_flags &= ~DQ_BLKS;
+			clear_bit(DQ_BLKS_B, &dquot->dq_flags);
 		}
 		else if (!(di->dqb_valid & QIF_BTIME))	/* Set grace only if user hasn't provided his own... */
 			dm->dqb_btime = CURRENT_TIME + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_bgrace;
@@ -1409,46 +1289,67 @@
 	if (check_ilim) {
 		if (!dm->dqb_isoftlimit || dm->dqb_curinodes < dm->dqb_isoftlimit) {
 			dm->dqb_itime = 0;
-			dquot->dq_flags &= ~DQ_INODES;
+			clear_bit(DQ_INODES_B, &dquot->dq_flags);
 		}
 		else if (!(di->dqb_valid & QIF_ITIME))	/* Set grace only if user hasn't provided his own... */
 			dm->dqb_itime = CURRENT_TIME + sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_igrace;
 	}
 	if (dm->dqb_bhardlimit || dm->dqb_bsoftlimit || dm->dqb_ihardlimit || dm->dqb_isoftlimit)
-		dquot->dq_flags &= ~DQ_FAKE;
+		clear_bit(DQ_FAKE_B, &dquot->dq_flags);
 	else
-		dquot->dq_flags |= DQ_FAKE;
-	dquot->dq_flags |= DQ_MOD;
+		set_bit(DQ_FAKE_B, &dquot->dq_flags);
+	mark_dquot_dirty(dquot);
+	spin_unlock(&dq_data_lock);
 }
 
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di)
 {
-	struct dquot *dquot = dqget(sb, id, type);
+	struct dquot *dquot;
 
-	if (!dquot)
-		return -EINVAL;
+	down_read(&sb_dqopt(sb)->dqoff_sem);
+	if (!(dquot = dqget(sb, id, type))) {
+		up_read(&sb_dqopt(sb)->dqoff_sem);
+		return -ESRCH;
+	}
 	do_set_dqblk(dquot, di);
 	dqput(dquot);
+	up_read(&sb_dqopt(sb)->dqoff_sem);
 	return 0;
 }
 
 /* Generic routine for getting common part of quota file information */
 int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 {
-	struct mem_dqinfo *mi = sb_dqopt(sb)->info + type;
+	struct mem_dqinfo *mi;
 
+	down_read(&sb_dqopt(sb)->dqoff_sem);
+	if (!sb_has_quota_enabled(sb, type)) {
+		up_read(&sb_dqopt(sb)->dqoff_sem);
+		return -ESRCH;
+	}
+	mi = sb_dqopt(sb)->info + type;
+	spin_lock(&dq_data_lock);
 	ii->dqi_bgrace = mi->dqi_bgrace;
 	ii->dqi_igrace = mi->dqi_igrace;
 	ii->dqi_flags = mi->dqi_flags & DQF_MASK;
 	ii->dqi_valid = IIF_ALL;
+	spin_unlock(&dq_data_lock);
+	up_read(&sb_dqopt(sb)->dqoff_sem);
 	return 0;
 }
 
 /* Generic routine for setting common part of quota file information */
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
 {
-	struct mem_dqinfo *mi = sb_dqopt(sb)->info + type;
+	struct mem_dqinfo *mi;
 
+	down_read(&sb_dqopt(sb)->dqoff_sem);
+	if (!sb_has_quota_enabled(sb, type)) {
+		up_read(&sb_dqopt(sb)->dqoff_sem);
+		return -ESRCH;
+	}
+	mi = sb_dqopt(sb)->info + type;
+	spin_lock(&dq_data_lock);
 	if (ii->dqi_valid & IIF_BGRACE)
 		mi->dqi_bgrace = ii->dqi_bgrace;
 	if (ii->dqi_valid & IIF_IGRACE)
@@ -1456,6 +1357,8 @@
 	if (ii->dqi_valid & IIF_FLAGS)
 		mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK);
 	mark_info_dirty(mi);
+	spin_unlock(&dq_data_lock);
+	up_read(&sb_dqopt(sb)->dqoff_sem);
 	return 0;
 }
 
@@ -1501,7 +1404,7 @@
 	register_sysctl_table(sys_table, 0);
 	for (i = 0; i < NR_DQHASH; i++)
 		INIT_LIST_HEAD(dquot_hash + i);
-	printk(KERN_NOTICE "VFS: Disk quotas v%s\n", __DQUOT_VERSION__);
+	printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__);
 
 	dquot_cachep = kmem_cache_create("dquot", 
 			sizeof(struct dquot), sizeof(unsigned long) * 4,
@@ -1518,3 +1421,5 @@
 EXPORT_SYMBOL(register_quota_format);
 EXPORT_SYMBOL(unregister_quota_format);
 EXPORT_SYMBOL(dqstats);
+EXPORT_SYMBOL(dq_list_lock);
+EXPORT_SYMBOL(dq_data_lock);
diff -ruNX /home/jack/.kerndiffexclude linux-2.5.45/fs/inode.c linux-2.5.45-1-smplocks/fs/inode.c
--- linux-2.5.45/fs/inode.c	Thu Oct 17 11:13:14 2002
+++ linux-2.5.45-1-smplocks/fs/inode.c	Fri Nov  1 22:32:10 2002
@@ -1094,9 +1094,8 @@
 
 	if (!sb->dq_op)
 		return;	/* nothing to do */
-	/* We have to be protected against other CPUs */
-	lock_kernel();		/* This lock is for quota code */
 	spin_lock(&inode_lock);	/* This lock is for inodes code */
+	/* We don't have to lock against quota code - test IS_QUOTAINIT is just for speedup... */
  
 	list_for_each(act_head, &inode_in_use) {
 		inode = list_entry(act_head, struct inode, i_list);
@@ -1119,7 +1118,6 @@
 			remove_inode_dquot_ref(inode, type, &tofree_head);
 	}
 	spin_unlock(&inode_lock);
-	unlock_kernel();
 
 	put_dquot_list(&tofree_head);
 }
diff -ruNX /home/jack/.kerndiffexclude linux-2.5.45/fs/quota.c linux-2.5.45-1-smplocks/fs/quota.c
--- linux-2.5.45/fs/quota.c	Sat Oct 12 06:22:45 2002
+++ linux-2.5.45-1-smplocks/fs/quota.c	Fri Nov  1 22:32:10 2002
@@ -84,6 +84,7 @@
 		case Q_SETINFO:
 		case Q_SETQUOTA:
 		case Q_GETQUOTA:
+			/* This is just informative test so we are satisfied without a lock */
 			if (!sb_has_quota_enabled(sb, type))
 				return -ESRCH;
 	}
@@ -151,7 +152,13 @@
 		case Q_GETFMT: {
 			__u32 fmt;
 
+			down_read(&sb_dqopt(sb)->dqoff_sem);
+			if (!sb_has_quota_enabled(sb, type)) {
+				up_read(&sb_dqopt(sb)->dqoff_sem);
+				return -ESRCH;
+			}
 			fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id;
+			up_read(&sb_dqopt(sb)->dqoff_sem);
 			if (copy_to_user(addr, &fmt, sizeof(fmt)))
 				return -EFAULT;
 			return 0;
@@ -244,7 +251,6 @@
 	struct super_block *sb = NULL;
 	int ret = -EINVAL;
 
-	lock_kernel();
 	cmds = cmd >> SUBCMDSHIFT;
 	type = cmd & SUBCMDMASK;
 
@@ -259,6 +265,5 @@
 out:
 	if (sb)
 		drop_super(sb);
-	unlock_kernel();
 	return ret;
 }
diff -ruNX /home/jack/.kerndiffexclude linux-2.5.45/fs/super.c linux-2.5.45-1-smplocks/fs/super.c
--- linux-2.5.45/fs/super.c	Fri Nov  1 22:20:34 2002
+++ linux-2.5.45-1-smplocks/fs/super.c	Fri Nov  1 22:32:10 2002
@@ -67,7 +67,7 @@
 		atomic_set(&s->s_active, 1);
 		sema_init(&s->s_vfs_rename_sem,1);
 		sema_init(&s->s_dquot.dqio_sem, 1);
-		sema_init(&s->s_dquot.dqoff_sem, 1);
+		init_rwsem(&s->s_dquot.dqoff_sem);
 		s->s_maxbytes = MAX_NON_LFS;
 		s->dq_op = sb_dquot_ops;
 		s->s_qcop = sb_quotactl_ops;
diff -ruNX /home/jack/.kerndiffexclude linux-2.5.45/include/linux/quota.h linux-2.5.45-1-smplocks/include/linux/quota.h
--- linux-2.5.45/include/linux/quota.h	Sat Oct 12 06:22:15 2002
+++ linux-2.5.45-1-smplocks/include/linux/quota.h	Fri Nov  1 22:32:10 2002
@@ -37,6 +37,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <linux/spinlock.h>
 
 #define __DQUOT_VERSION__	"dquot_6.5.1"
 #define __DQUOT_NUM_VERSION__	6*10000+5*100+1
@@ -44,6 +45,9 @@
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
 typedef __u64 qsize_t;          /* Type in which we store sizes */
 
+extern spinlock_t dq_list_lock;
+extern spinlock_t dq_data_lock;
+
 /* Size of blocks in which are counted size limits */
 #define QUOTABLOCK_BITS 10
 #define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
@@ -155,7 +159,7 @@
 
 struct mem_dqinfo {
 	struct quota_format_type *dqi_format;
-	int dqi_flags;
+	unsigned long dqi_flags;
 	unsigned int dqi_bgrace;
 	unsigned int dqi_igrace;
 	union {
@@ -165,18 +169,19 @@
 };
 
 #define DQF_MASK 0xffff		/* Mask for format specific flags */
-#define DQF_INFO_DIRTY 0x10000  /* Is info dirty? */
-#define DQF_ANY_DQUOT_DIRTY 0x20000	/* Is any dquot dirty? */
+#define DQF_INFO_DIRTY_B 16
+#define DQF_ANY_DQUOT_DIRTY_B 17
+#define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B)	/* Is info dirty? */
+#define DQF_ANY_DQUOT_DIRTY (1 << DQF_ANY_DQUOT_DIRTY B)	/* Is any dquot dirty? */
 
 extern inline void mark_info_dirty(struct mem_dqinfo *info)
 {
-	info->dqi_flags |= DQF_INFO_DIRTY;
+	set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
 }
 
-#define info_dirty(info) ((info)->dqi_flags & DQF_INFO_DIRTY)
-
-#define info_any_dirty(info) ((info)->dqi_flags & DQF_INFO_DIRTY ||\
-			      (info)->dqi_flags & DQF_ANY_DQUOT_DIRTY)
+#define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags)
+#define info_any_dquot_dirty(info) test_bit(DQF_ANY_DQUOT_DIRTY_B, &(info)->dqi_flags)
+#define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info))
 
 #define sb_dqopt(sb) (&(sb)->s_dquot)
 
@@ -195,30 +200,29 @@
 
 #define NR_DQHASH 43            /* Just an arbitrary number */
 
-#define DQ_LOCKED     0x01	/* dquot under IO */
-#define DQ_MOD        0x02	/* dquot modified since read */
-#define DQ_BLKS       0x10	/* uid/gid has been warned about blk limit */
-#define DQ_INODES     0x20	/* uid/gid has been warned about inode limit */
-#define DQ_FAKE       0x40	/* no limits only usage */
-#define DQ_INVAL      0x80	/* dquot is going to be invalidated */
+#define DQ_MOD_B	0
+#define DQ_BLKS_B	1
+#define DQ_INODES_B	2
+#define DQ_FAKE_B	3
+
+#define DQ_MOD        (1 << DQ_MOD_B)	/* dquot modified since read */
+#define DQ_BLKS       (1 << DQ_BLKS_B)	/* uid/gid has been warned about blk limit */
+#define DQ_INODES     (1 << DQ_INODES_B)	/* uid/gid has been warned about inode limit */
+#define DQ_FAKE       (1 << DQ_FAKE_B)	/* no limits only usage */
 
 struct dquot {
 	struct list_head dq_hash;	/* Hash list in memory */
 	struct list_head dq_inuse;	/* List of all quotas */
 	struct list_head dq_free;	/* Free list element */
-	wait_queue_head_t dq_wait_lock;	/* Pointer to waitqueue on dquot lock */
-	wait_queue_head_t dq_wait_free;	/* Pointer to waitqueue for quota to be unused */
-	int dq_count;			/* Use count */
-	int dq_dup_ref;			/* Number of duplicated refences */
+	struct semaphore dq_lock;	/* dquot IO lock */
+	atomic_t dq_count;		/* Use count */
 
 	/* fields after this point are cleared when invalidating */
 	struct super_block *dq_sb;	/* superblock this applies to */
 	unsigned int dq_id;		/* ID this applies to (uid, gid) */
 	loff_t dq_off;			/* Offset of dquot on disk */
+	unsigned long dq_flags;		/* See DQ_* */
 	short dq_type;			/* Type of quota */
-	short dq_flags;			/* See DQ_* */
-	unsigned long dq_referenced;	/* Number of times this dquot was 
-					   referenced during its lifetime */
 	struct mem_dqblk dq_dqb;	/* Diskquota usage */
 };
 
@@ -276,7 +280,7 @@
 struct quota_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
 	struct semaphore dqio_sem;		/* lock device while I/O in progress */
-	struct semaphore dqoff_sem;		/* serialize quota_off() and quota_on() on device */
+	struct rw_semaphore dqoff_sem;		/* serialize quota_off() and quota_on() on device and ops using quota_info struct, pointers from inode to dquots */
 	struct file *files[MAXQUOTAS];		/* fp's to quotafiles */
 	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
 	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
@@ -284,26 +288,17 @@
 
 /* Inline would be better but we need to dereference super_block which is not defined yet */
 #define mark_dquot_dirty(dquot) do {\
-	dquot->dq_flags |= DQ_MOD;\
-	sb_dqopt(dquot->dq_sb)->info[dquot->dq_type].dqi_flags |= DQF_ANY_DQUOT_DIRTY;\
+	set_bit(DQF_ANY_DQUOT_DIRTY_B, &(sb_dqopt((dquot)->dq_sb)->info[(dquot)->dq_type].dqi_flags));\
+	set_bit(DQ_MOD_B, &(dquot)->dq_flags);\
 } while (0)
 
-#define dquot_dirty(dquot) ((dquot)->dq_flags & DQ_MOD)
-
-static inline int is_enabled(struct quota_info *dqopt, int type)
-{
-	switch (type) {
-		case USRQUOTA:
-			return dqopt->flags & DQUOT_USR_ENABLED;
-		case GRPQUOTA:
-			return dqopt->flags & DQUOT_GRP_ENABLED;
-	}
-	return 0;
-}
+#define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags)
 
-#define sb_any_quota_enabled(sb) (is_enabled(sb_dqopt(sb), USRQUOTA) | is_enabled(sb_dqopt(sb), GRPQUOTA))
+#define sb_has_quota_enabled(sb, type) ((type)==USRQUOTA ? \
+	(sb_dqopt(sb)->flags & DQUOT_USR_ENABLED) : (sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED))
 
-#define sb_has_quota_enabled(sb, type) (is_enabled(sb_dqopt(sb), type))
+#define sb_any_quota_enabled(sb) (sb_has_quota_enabled(sb, USRQUOTA) | \
+				  sb_has_quota_enabled(sb, GRPQUOTA))
 
 int register_quota_format(struct quota_format_type *fmt);
 void unregister_quota_format(struct quota_format_type *fmt);
diff -ruNX /home/jack/.kerndiffexclude linux-2.5.45/include/linux/quotaops.h linux-2.5.45-1-smplocks/include/linux/quotaops.h
--- linux-2.5.45/include/linux/quotaops.h	Sat Oct 12 06:21:36 2002
+++ linux-2.5.45-1-smplocks/include/linux/quotaops.h	Fri Nov  1 22:32:10 2002
@@ -46,36 +46,31 @@
 {
 	if (!inode->i_sb)
 		BUG();
-	lock_kernel();
 	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
 		inode->i_sb->dq_op->initialize(inode, -1);
-	unlock_kernel();
 }
 
 static __inline__ void DQUOT_DROP(struct inode *inode)
 {
-	lock_kernel();
 	if (IS_QUOTAINIT(inode)) {
 		if (!inode->i_sb)
 			BUG();
 		inode->i_sb->dq_op->drop(inode);	/* Ops must be set when there's any quota... */
 	}
-	unlock_kernel();
 }
 
 static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
-	lock_kernel();
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) {
-			unlock_kernel();
+		if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
 			return 1;
-		}
 	}
-	else
+	else {
+		spin_lock(&dq_data_lock);
 		inode_add_bytes(inode, nr);
-	unlock_kernel();
+		spin_unlock(&dq_data_lock);
+	}
 	return 0;
 }
 
@@ -89,17 +84,16 @@
 
 static __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
-	lock_kernel();
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		/* Used space is updated in alloc_space() */
-		if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) {
-			unlock_kernel();
+		if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
 			return 1;
-		}
 	}
-	else
+	else {
+		spin_lock(&dq_data_lock);
 		inode_add_bytes(inode, nr);
-	unlock_kernel();
+		spin_unlock(&dq_data_lock);
+	}
 	return 0;
 }
 
@@ -113,26 +107,23 @@
 
 static __inline__ int DQUOT_ALLOC_INODE(struct inode *inode)
 {
-	lock_kernel();
 	if (sb_any_quota_enabled(inode->i_sb)) {
 		DQUOT_INIT(inode);
-		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
-			unlock_kernel();
+		if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
 			return 1;
-		}
 	}
-	unlock_kernel();
 	return 0;
 }
 
 static __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
-	lock_kernel();
 	if (sb_any_quota_enabled(inode->i_sb))
 		inode->i_sb->dq_op->free_space(inode, nr);
-	else
+	else {
+		spin_lock(&dq_data_lock);
 		inode_sub_bytes(inode, nr);
-	unlock_kernel();
+		spin_unlock(&dq_data_lock);
+	}
 }
 
 static __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
@@ -143,23 +134,17 @@
 
 static __inline__ void DQUOT_FREE_INODE(struct inode *inode)
 {
-	lock_kernel();
 	if (sb_any_quota_enabled(inode->i_sb))
 		inode->i_sb->dq_op->free_inode(inode, 1);
-	unlock_kernel();
 }
 
 static __inline__ int DQUOT_TRANSFER(struct inode *inode, struct iattr *iattr)
 {
-	lock_kernel();
 	if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
 		DQUOT_INIT(inode);
-		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) {
-			unlock_kernel();
+		if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
 			return 1;
-		}
 	}
-	unlock_kernel();
 	return 0;
 }
 
@@ -169,10 +154,8 @@
 {
 	int ret = -ENOSYS;
 
-	lock_kernel();
 	if (sb->s_qcop && sb->s_qcop->quota_off)
 		ret = sb->s_qcop->quota_off(sb, -1);
-	unlock_kernel();
 	return ret;
 }
 
@@ -192,9 +175,7 @@
 #define DQUOT_TRANSFER(inode, iattr)		(0)
 extern __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
-	lock_kernel();
 	inode_add_bytes(inode, nr);
-	unlock_kernel();
 	return 0;
 }
 
@@ -207,9 +188,7 @@
 
 extern __inline__ int DQUOT_ALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
-	lock_kernel();
 	inode_add_bytes(inode, nr);
-	unlock_kernel();
 	return 0;
 }
 
@@ -222,9 +201,7 @@
 
 extern __inline__ void DQUOT_FREE_SPACE_NODIRTY(struct inode *inode, qsize_t nr)
 {
-	lock_kernel();
 	inode_sub_bytes(inode, nr);
-	unlock_kernel();
 }
 
 extern __inline__ void DQUOT_FREE_SPACE(struct inode *inode, qsize_t nr)
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Copyright © 2002, Eklektix, Inc.
Comments and public postings are copyrighted by their creators.
Linux is a registered trademark of Linus Torvalds