All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Wilcox <matthew@wil.cx>
To: Ingo Molnar <mingo@elte.hu>, "J. Bruce Fields" <bfields@citi.umich.edu>
Cc: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Alexander Viro <viro@ftp.linux.org.uk>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-fsdevel@vger.kernel.org
Subject: Re: AIM7 40% regression with 2.6.26-rc1
Date: Tue, 6 May 2008 10:23:32 -0600	[thread overview]
Message-ID: <20080506162332.GI19219@parisc-linux.org> (raw)
In-Reply-To: <20080506120934.GH19219@parisc-linux.org>

On Tue, May 06, 2008 at 06:09:34AM -0600, Matthew Wilcox wrote:
> So the only likely things I can see are:
> 
>  - file locks
>  - fasync

I've wanted to fix file locks for a while.  Here's a first attempt.
It was done quickly, so I concede that it may well have bugs in it.
I found (and fixed) one with LTP.

It takes *no account* of nfsd, nor remote filesystems.  We need to have
a serious discussion about their requirements.

diff --git a/fs/locks.c b/fs/locks.c
index 663c069..cb09765 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -140,6 +140,8 @@ int lease_break_time = 45;
 #define for_each_lock(inode, lockp) \
 	for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
 
+static DEFINE_SPINLOCK(file_lock_lock);
+
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
@@ -510,9 +512,9 @@ static void __locks_delete_block(struct file_lock *waiter)
  */
 static void locks_delete_block(struct file_lock *waiter)
 {
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	__locks_delete_block(waiter);
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 }
 
 /* Insert waiter into blocker's block list.
@@ -649,7 +651,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 {
 	struct file_lock *cfl;
 
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) {
 		if (!IS_POSIX(cfl))
 			continue;
@@ -662,7 +664,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 			fl->fl_pid = pid_vnr(cfl->fl_nspid);
 	} else
 		fl->fl_type = F_UNLCK;
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 	return;
 }
 EXPORT_SYMBOL(posix_test_lock);
@@ -735,18 +737,21 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	int error = 0;
 	int found = 0;
 
-	lock_kernel();
-	if (request->fl_flags & FL_ACCESS)
+	if (request->fl_flags & FL_ACCESS) {
+		spin_lock(&file_lock_lock);
 		goto find_conflict;
+	}
 
 	if (request->fl_type != F_UNLCK) {
 		error = -ENOMEM;
+		
 		new_fl = locks_alloc_lock();
 		if (new_fl == NULL)
-			goto out;
+			goto out_unlocked;
 		error = 0;
 	}
 
+	spin_lock(&file_lock_lock);
 	for_each_lock(inode, before) {
 		struct file_lock *fl = *before;
 		if (IS_POSIX(fl))
@@ -772,10 +777,13 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	 * If a higher-priority process was blocked on the old file lock,
 	 * give it the opportunity to lock the file.
 	 */
-	if (found)
+	if (found) {
+		spin_unlock(&file_lock_lock);
 		cond_resched();
+		spin_lock(&file_lock_lock);
+	}
 
-find_conflict:
+ find_conflict:
 	for_each_lock(inode, before) {
 		struct file_lock *fl = *before;
 		if (IS_POSIX(fl))
@@ -796,8 +804,9 @@ find_conflict:
 	new_fl = NULL;
 	error = 0;
 
-out:
-	unlock_kernel();
+ out:
+	spin_unlock(&file_lock_lock);
+ out_unlocked:
 	if (new_fl)
 		locks_free_lock(new_fl);
 	return error;
@@ -826,7 +835,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 		new_fl2 = locks_alloc_lock();
 	}
 
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	if (request->fl_type != F_UNLCK) {
 		for_each_lock(inode, before) {
 			fl = *before;
@@ -994,7 +1003,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 		locks_wake_up_blocks(left);
 	}
  out:
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 	/*
 	 * Free any unused locks.
 	 */
@@ -1069,14 +1078,14 @@ int locks_mandatory_locked(struct inode *inode)
 	/*
 	 * Search the lock list for this inode for any POSIX locks.
 	 */
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (!IS_POSIX(fl))
 			continue;
 		if (fl->fl_owner != owner)
 			break;
 	}
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 	return fl ? -EAGAIN : 0;
 }
 
@@ -1190,7 +1199,7 @@ int __break_lease(struct inode *inode, unsigned int mode)
 
 	new_fl = lease_alloc(NULL, mode & FMODE_WRITE ? F_WRLCK : F_RDLCK);
 
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 
 	time_out_leases(inode);
 
@@ -1251,8 +1260,10 @@ restart:
 			break_time++;
 	}
 	locks_insert_block(flock, new_fl);
+	spin_unlock(&file_lock_lock);
 	error = wait_event_interruptible_timeout(new_fl->fl_wait,
 						!new_fl->fl_next, break_time);
+	spin_lock(&file_lock_lock);
 	__locks_delete_block(new_fl);
 	if (error >= 0) {
 		if (error == 0)
@@ -1266,8 +1277,8 @@ restart:
 		error = 0;
 	}
 
-out:
-	unlock_kernel();
+ out:
+	spin_unlock(&file_lock_lock);
 	if (!IS_ERR(new_fl))
 		locks_free_lock(new_fl);
 	return error;
@@ -1323,7 +1334,7 @@ int fcntl_getlease(struct file *filp)
 	struct file_lock *fl;
 	int type = F_UNLCK;
 
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	time_out_leases(filp->f_path.dentry->d_inode);
 	for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl);
 			fl = fl->fl_next) {
@@ -1332,7 +1343,7 @@ int fcntl_getlease(struct file *filp)
 			break;
 		}
 	}
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 	return type;
 }
 
@@ -1363,6 +1374,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 	if (error)
 		return error;
 
+	spin_lock(&file_lock_lock);
 	time_out_leases(inode);
 
 	BUG_ON(!(*flp)->fl_lmops->fl_break);
@@ -1370,10 +1382,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 	lease = *flp;
 
 	if (arg != F_UNLCK) {
+		spin_unlock(&file_lock_lock);
 		error = -ENOMEM;
 		new_fl = locks_alloc_lock();
 		if (new_fl == NULL)
-			goto out;
+			goto out_unlocked;
 
 		error = -EAGAIN;
 		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
@@ -1382,6 +1395,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 		    && ((atomic_read(&dentry->d_count) > 1)
 			|| (atomic_read(&inode->i_count) > 1)))
 			goto out;
+		spin_lock(&file_lock_lock);
 	}
 
 	/*
@@ -1429,11 +1443,14 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 
 	locks_copy_lock(new_fl, lease);
 	locks_insert_lock(before, new_fl);
+	spin_unlock(&file_lock_lock);
 
 	*flp = new_fl;
 	return 0;
 
-out:
+ out:
+	spin_unlock(&file_lock_lock);
+ out_unlocked:
 	if (new_fl != NULL)
 		locks_free_lock(new_fl);
 	return error;
@@ -1471,12 +1488,10 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
 {
 	int error;
 
-	lock_kernel();
 	if (filp->f_op && filp->f_op->setlease)
 		error = filp->f_op->setlease(filp, arg, lease);
 	else
 		error = generic_setlease(filp, arg, lease);
-	unlock_kernel();
 
 	return error;
 }
@@ -1503,12 +1518,11 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 	if (error)
 		return error;
 
-	lock_kernel();
-
 	error = vfs_setlease(filp, arg, &flp);
 	if (error || arg == F_UNLCK)
-		goto out_unlock;
+		return error;
 
+	lock_kernel();
 	error = fasync_helper(fd, filp, 1, &flp->fl_fasync);
 	if (error < 0) {
 		/* remove lease just inserted by setlease */
@@ -1519,7 +1533,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 	}
 
 	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-out_unlock:
+ out_unlock:
 	unlock_kernel();
 	return error;
 }
@@ -2024,7 +2038,7 @@ void locks_remove_flock(struct file *filp)
 			fl.fl_ops->fl_release_private(&fl);
 	}
 
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	before = &inode->i_flock;
 
 	while ((fl = *before) != NULL) {
@@ -2042,7 +2056,7 @@ void locks_remove_flock(struct file *filp)
  		}
 		before = &fl->fl_next;
 	}
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 }
 
 /**
@@ -2057,12 +2071,12 @@ posix_unblock_lock(struct file *filp, struct file_lock *waiter)
 {
 	int status = 0;
 
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	if (waiter->fl_next)
 		__locks_delete_block(waiter);
 	else
 		status = -ENOENT;
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 	return status;
 }
 
@@ -2175,7 +2189,7 @@ static int locks_show(struct seq_file *f, void *v)
 
 static void *locks_start(struct seq_file *f, loff_t *pos)
 {
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	f->private = (void *)1;
 	return seq_list_start(&file_lock_list, *pos);
 }
@@ -2187,7 +2201,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
 
 static void locks_stop(struct seq_file *f, void *v)
 {
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 }
 
 struct seq_operations locks_seq_operations = {
@@ -2215,7 +2229,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
 {
 	struct file_lock *fl;
 	int result = 1;
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (IS_POSIX(fl)) {
 			if (fl->fl_type == F_RDLCK)
@@ -2232,7 +2246,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
 		result = 0;
 		break;
 	}
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 	return result;
 }
 
@@ -2255,7 +2269,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 {
 	struct file_lock *fl;
 	int result = 1;
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
 		if (IS_POSIX(fl)) {
 			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
@@ -2270,7 +2284,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 		result = 0;
 		break;
 	}
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 	return result;
 }
 

-- 
Intel are signing my paycheques ... these opinions are still mine
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

  reply	other threads:[~2008-05-06 16:23 UTC|newest]

Thread overview: 140+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-05-06  5:48 AIM7 40% regression with 2.6.26-rc1 Zhang, Yanmin
2008-05-06 11:18 ` Matthew Wilcox
2008-05-06 11:44 ` Ingo Molnar
2008-05-06 12:09   ` Matthew Wilcox
2008-05-06 16:23     ` Matthew Wilcox [this message]
2008-05-06 16:36       ` Linus Torvalds
2008-05-06 16:42         ` Matthew Wilcox
2008-05-06 16:39           ` Alan Cox
2008-05-06 16:51             ` Matthew Wilcox
2008-05-06 16:45               ` Alan Cox
2008-05-06 17:42               ` Linus Torvalds
2008-05-06 20:28           ` Linus Torvalds
2008-05-06 16:44         ` J. Bruce Fields
2008-05-06 17:21       ` Andrew Morton
2008-05-06 17:31         ` Matthew Wilcox
2008-05-06 17:49           ` Ingo Molnar
2008-05-06 18:07             ` Andrew Morton
2008-05-11 11:11               ` Matthew Wilcox
2008-05-06 17:39         ` Ingo Molnar
2008-05-07  6:49           ` Zhang, Yanmin
2008-05-06 17:45         ` Linus Torvalds
2008-05-07 16:38         ` Matthew Wilcox
2008-05-07 16:55           ` Linus Torvalds
2008-05-07 17:08             ` Linus Torvalds
2008-05-07 17:16               ` Andrew Morton
2008-05-07 17:27                 ` Linus Torvalds
2008-05-07 17:22               ` Ingo Molnar
2008-05-07 17:25                 ` Ingo Molnar
2008-05-07 17:31                 ` Linus Torvalds
2008-05-07 17:47                   ` Linus Torvalds
2008-05-07 17:49                   ` Ingo Molnar
2008-05-07 18:02                     ` Linus Torvalds
2008-05-07 18:17                       ` Ingo Molnar
2008-05-07 18:27                         ` Linus Torvalds
2008-05-07 18:43                           ` Ingo Molnar
2008-05-07 19:01                             ` Linus Torvalds
2008-05-07 19:09                               ` Ingo Molnar
2008-05-07 19:24                               ` Matthew Wilcox
2008-05-07 19:44                                 ` Linus Torvalds
2008-05-07 20:00                                   ` Oi. NFS people. Read this Matthew Wilcox
2008-05-07 22:10                                     ` Trond Myklebust
2008-05-09  1:43                                       ` J. Bruce Fields
2008-05-08  3:24       ` AIM7 40% regression with 2.6.26-rc1 Zhang, Yanmin
2008-05-08  3:34         ` Linus Torvalds
2008-05-08  4:37           ` Zhang, Yanmin
2008-05-08 14:58             ` Linus Torvalds
2008-05-07  2:11   ` Zhang, Yanmin
2008-05-07  3:41     ` Zhang, Yanmin
2008-05-07  3:59       ` Andrew Morton
2008-05-07  4:46         ` Zhang, Yanmin
2008-05-07  6:26       ` Ingo Molnar
2008-05-07  6:28         ` Ingo Molnar
2008-05-07  7:05           ` Zhang, Yanmin
2008-05-07 11:00       ` Andi Kleen
2008-05-07 11:46         ` Matthew Wilcox
2008-05-07 12:21           ` Andi Kleen
2008-05-07 14:36             ` Linus Torvalds
2008-05-07 14:35               ` Alan Cox
2008-05-07 15:00                 ` Linus Torvalds
2008-05-07 15:02                   ` Linus Torvalds
2008-05-07 14:57               ` Andi Kleen
2008-05-07 15:31                 ` Andrew Morton
2008-05-07 16:22                   ` Matthew Wilcox
2008-05-07 15:19               ` Linus Torvalds
2008-05-07 17:14                 ` Ingo Molnar
2008-05-08  2:44                 ` Zhang, Yanmin
2008-05-08  3:29                   ` Linus Torvalds
2008-05-08  4:08                     ` Zhang, Yanmin
2008-05-08  4:17                       ` Linus Torvalds
2008-05-08 12:01                         ` [patch] speed up / fix the new generic semaphore code (fix AIM7 40% regression with 2.6.26-rc1) Ingo Molnar
2008-05-08 12:28                           ` Ingo Molnar
2008-05-08 14:43                             ` Ingo Molnar
2008-05-08 15:10                               ` [git pull] scheduler fixes Ingo Molnar
2008-05-08 15:33                                 ` Adrian Bunk
2008-05-08 15:41                                   ` Ingo Molnar
2008-05-08 19:42                                     ` Adrian Bunk
2008-05-11 11:03                                 ` Matthew Wilcox
2008-05-11 11:14                                   ` Matthew Wilcox
2008-05-11 11:48                                   ` Matthew Wilcox
2008-05-11 12:50                                     ` Ingo Molnar
2008-05-11 12:52                                       ` Ingo Molnar
2008-05-11 13:02                                         ` Matthew Wilcox
2008-05-11 13:26                                           ` Matthew Wilcox
2008-05-11 14:00                                             ` Ingo Molnar
2008-05-11 14:18                                               ` Matthew Wilcox
2008-05-11 14:42                                                 ` Ingo Molnar
2008-05-11 14:48                                                   ` Matthew Wilcox
2008-05-11 15:19                                                     ` Ingo Molnar
2008-05-11 15:29                                                       ` Matthew Wilcox
2008-05-13 14:11                                                         ` Ingo Molnar
2008-05-13 14:21                                                           ` Matthew Wilcox
2008-05-13 14:42                                                             ` Ingo Molnar
2008-05-13 15:28                                                               ` Matthew Wilcox
2008-05-13 17:13                                                                 ` Ingo Molnar
2008-05-13 17:22                                                                   ` Linus Torvalds
2008-05-13 21:05                                                                     ` Ingo Molnar
2008-05-11 13:54                                           ` Ingo Molnar
2008-05-11 14:22                                             ` Matthew Wilcox
2008-05-11 14:32                                               ` Ingo Molnar
2008-05-11 14:46                                                 ` Matthew Wilcox
2008-05-11 16:47                                                 ` Linus Torvalds
2008-05-11 13:01                                   ` Ingo Molnar
2008-05-11 13:06                                     ` Matthew Wilcox
2008-05-11 13:45                                       ` Ingo Molnar
2008-05-11 14:10                                   ` Sven Wegener
2008-05-08 16:02                             ` [patch] speed up / fix the new generic semaphore code (fix AIM7 40% regression with 2.6.26-rc1) Linus Torvalds
2008-05-08 18:30                               ` Linus Torvalds
2008-05-08 20:19                                 ` Ingo Molnar
2008-05-08 20:27                                   ` Linus Torvalds
2008-05-08 21:45                                     ` Ingo Molnar
2008-05-08 22:02                                       ` Ingo Molnar
2008-05-08 22:55                                       ` Linus Torvalds
2008-05-08 23:07                                         ` Linus Torvalds
2008-05-08 23:14                                           ` Linus Torvalds
2008-05-08 23:16                                         ` Alan Cox
2008-05-08 23:33                                           ` Linus Torvalds
2008-05-08 23:27                                             ` Alan Cox
2008-05-09  6:50                                             ` Ingo Molnar
2008-05-09  8:29                                             ` Andi Kleen
2008-05-08 13:20                           ` Matthew Wilcox
2008-05-08 15:01                             ` Ingo Molnar
2008-05-08 13:56                           ` Arjan van de Ven
2008-05-08  6:43                   ` AIM7 40% regression with 2.6.26-rc1 Ingo Molnar
2008-05-08  6:48                     ` Andrew Morton
2008-05-08  7:14                     ` Zhang, Yanmin
2008-05-08  7:39                       ` Ingo Molnar
2008-05-08  8:44                         ` Zhang, Yanmin
2008-05-08  9:21                           ` Ingo Molnar
2008-05-08  9:29                             ` Ingo Molnar
2008-05-08  9:30                             ` Zhang, Yanmin
2008-05-07 16:20               ` Ingo Molnar
2008-05-07 16:35                 ` Linus Torvalds
2008-05-07 17:05                   ` Ingo Molnar
2008-05-07 17:24                     ` Linus Torvalds
2008-05-07 17:36                       ` Ingo Molnar
2008-05-07 17:55                         ` Linus Torvalds
2008-05-07 17:59                           ` Matthew Wilcox
2008-05-07 18:17                             ` Linus Torvalds
2008-05-07 18:49                               ` Ingo Molnar
2008-05-07 13:59         ` Alan Cox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080506162332.GI19219@parisc-linux.org \
    --to=matthew@wil.cx \
    --cc=akpm@linux-foundation.org \
    --cc=bfields@citi.umich.edu \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@ftp.linux.org.uk \
    --cc=yanmin_zhang@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.