Linux-ext4 Archive mirror
 help / color / mirror / Atom feed
* [PATCH] ext2/3/4: change i_mutex usage on lseek
@ 2009-01-15  0:32 Hisashi Hifumi
  2009-01-15  4:32 ` Dave Kleikamp
  0 siblings, 1 reply; 5+ messages in thread
From: Hisashi Hifumi @ 2009-01-15  0:32 UTC (permalink / raw
  To: akpm, linux-ext4, linux-fsdevel

Hi.

I wrote some patch that changed a range of i_mutex on ext2/3/4's lseek.
Ext2/3/4 uses generic_file_llseek, this function is inside i_mutex.
I think there is room for optimization in some cases.
When SEEK_END is specified from caller, in this case we should handle
inode->i_size so i_mutex is needed. But in other cases such as SEEK_CUR or
SEEK_SET, i_mutex is not needed because just changing file->f_pos value without
touching i_size.

I did some test to measure i_mutex contention.
This test do:
	1. make an 128MB file.
	2. fork 100 processes. repeat 10000000 times lseeking randomly on each process to this file.
	3, gauge seconds between start and end of this test.

The result was:

	-2.6.29-rc1
	# time ./lseek_test
	315 sec

	real    5m15.407s
	user    1m19.128s
	sys     5m38.884s

	-2.6.29-rc1-patched
	# time ./lseek_test
	13 sec

	real    0m13.039s
	user    1m14.730s
	sys     2m9.633s 

Hardware environment:
CPU 2.4GHz(Quad Core) *4
Memory 64GB

This improvement is derived from just removal of lseek's i_mutex contention.
There is i_mutex contention not only around lseek, but also fsync or write.
So,  I think we also can mitigate i_mutex contention between fsync and lseek.

Thanks.

Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>

diff -Nrup linux-2.6.29-rc1.org/fs/ext2/file.c linux-2.6.29-rc1/fs/ext2/file.c
--- linux-2.6.29-rc1.org/fs/ext2/file.c	2008-12-25 08:26:37.000000000 +0900
+++ linux-2.6.29-rc1/fs/ext2/file.c	2009-01-13 11:58:16.000000000 +0900
@@ -38,12 +38,24 @@ static int ext2_release_file (struct ino
 	return 0;
 }
 
+static loff_t ext2_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t retval;
+
+	if (origin == SEEK_END)
+		retval = generic_file_llseek(file, offset, origin);
+	else
+		retval = generic_file_llseek_unlocked(file, offset, origin);
+
+	return retval;
+}
+
 /*
  * We have mostly NULL's here: the current defaults are ok for
  * the ext2 filesystem.
  */
 const struct file_operations ext2_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext2_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
@@ -62,7 +74,7 @@ const struct file_operations ext2_file_o
 
 #ifdef CONFIG_EXT2_FS_XIP
 const struct file_operations ext2_xip_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext2_file_llseek,
 	.read		= xip_file_read,
 	.write		= xip_file_write,
 	.unlocked_ioctl = ext2_ioctl,
diff -Nrup linux-2.6.29-rc1.org/fs/ext3/file.c linux-2.6.29-rc1/fs/ext3/file.c
--- linux-2.6.29-rc1.org/fs/ext3/file.c	2008-12-25 08:26:37.000000000 +0900
+++ linux-2.6.29-rc1/fs/ext3/file.c	2009-01-13 11:58:16.000000000 +0900
@@ -106,8 +106,20 @@ force_commit:
 	return ret;
 }
 
+static loff_t ext3_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t retval;
+
+	if (origin == SEEK_END)
+		retval = generic_file_llseek(file, offset, origin);
+	else
+		retval = generic_file_llseek_unlocked(file, offset, origin);
+
+	return retval;
+}
+
 const struct file_operations ext3_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext3_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
diff -Nrup linux-2.6.29-rc1.org/fs/ext4/file.c linux-2.6.29-rc1/fs/ext4/file.c
--- linux-2.6.29-rc1.org/fs/ext4/file.c	2009-01-13 11:55:09.000000000 +0900
+++ linux-2.6.29-rc1/fs/ext4/file.c	2009-01-13 12:09:59.000000000 +0900
@@ -140,8 +140,20 @@ static int ext4_file_mmap(struct file *f
 	return 0;
 }
 
+static loff_t ext4_file_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t retval;
+
+	if (origin == SEEK_END)
+		retval = generic_file_llseek(file, offset, origin);
+	else
+		retval = generic_file_llseek_unlocked(file, offset, origin);
+
+	return retval;
+}
+
 const struct file_operations ext4_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext4_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,




Following is the test program "lseek_test.c".

#include <stdio.h>
#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <signal.h>
#define NUM 100
#define LEN 4096
#define LOOP 32*1024

int num;

void catch_SIGCHLD(int signo)
{
	pid_t child_pid = 0;
	do {
		int child_ret;
		child_pid = waitpid(-1, &child_ret, WNOHANG);
		if (child_pid > 0)
			num++;
	} while (child_pid > 0);
}
main()
{
        int  i, pid;
	char buf[LEN];
	unsigned long offset, filesize;
	time_t t1, t2;
	struct sigaction act;
	memset(buf, 0, LEN);
	memset(&act, 0, sizeof(act));
	act.sa_handler = catch_SIGCHLD;
	act.sa_flags = SA_NOCLDSTOP|SA_RESTART;
	sigemptyset(&act.sa_mask);
	sigaction(SIGCHLD, &act, NULL);	

	filesize = LEN * LOOP;
	int fd = open("targetfile1",O_RDWR|O_CREAT);	

	/* create a 128MB file */
	for(i = 0; i < LOOP; i++)
		write(fd, buf, LEN);
	fsync(fd);
	close(fd);
	
	time(&t1);	
	for (i = 0; i < NUM; i++) {	
		pid = fork();
		if(pid == 0){
		/* child */
			int fd = open("targetfile1", O_RDWR);	
			int j;
			for (j = 0; j < 10000000; j++) {
				offset = (random() % filesize);
				lseek(fd, offset, SEEK_SET);
			}
			close(fd);
			exit(0);
		}
	}
	while(num < NUM)
		sleep(1);
	time(&t2);	
	printf("%d sec\n",t2-t1);
}



^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ext2/3/4: change i_mutex usage on lseek
  2009-01-15  0:32 [PATCH] ext2/3/4: change i_mutex usage on lseek Hisashi Hifumi
@ 2009-01-15  4:32 ` Dave Kleikamp
  2009-01-15  4:40   ` Hisashi Hifumi
  2009-01-15 13:01   ` Jamie Lokier
  0 siblings, 2 replies; 5+ messages in thread
From: Dave Kleikamp @ 2009-01-15  4:32 UTC (permalink / raw
  To: Hisashi Hifumi; +Cc: akpm, linux-ext4, linux-fsdevel

On Thu, 2009-01-15 at 09:32 +0900, Hisashi Hifumi wrote:
> Hi.
> 
> I wrote some patch that changed a range of i_mutex on ext2/3/4's lseek.
> Ext2/3/4 uses generic_file_llseek, this function is inside i_mutex.
> I think there is room for optimization in some cases.
> When SEEK_END is specified from caller, in this case we should handle
> inode->i_size so i_mutex is needed. But in other cases such as SEEK_CUR or
> SEEK_SET, i_mutex is not needed because just changing file->f_pos value without
> touching i_size.

Is there any reason you couldn't have just changed generic_file_llseek()
to do this rather than making identical changes to the individual file
systems.  I would think this optimization would be safe for any file
system.

> I did some test to measure i_mutex contention.
> This test do:
> 	1. make an 128MB file.
> 	2. fork 100 processes. repeat 10000000 times lseeking randomly on each process to this file.
> 	3, gauge seconds between start and end of this test.
> 
> The result was:
> 
> 	-2.6.29-rc1
> 	# time ./lseek_test
> 	315 sec
> 
> 	real    5m15.407s
> 	user    1m19.128s
> 	sys     5m38.884s
> 
> 	-2.6.29-rc1-patched
> 	# time ./lseek_test
> 	13 sec
> 
> 	real    0m13.039s
> 	user    1m14.730s
> 	sys     2m9.633s 
> 
> Hardware environment:
> CPU 2.4GHz(Quad Core) *4
> Memory 64GB
> 
> This improvement is derived from just removal of lseek's i_mutex contention.
> There is i_mutex contention not only around lseek, but also fsync or write.
> So,  I think we also can mitigate i_mutex contention between fsync and lseek.
> 
> Thanks.
> 
> Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
> 
> diff -Nrup linux-2.6.29-rc1.org/fs/ext2/file.c linux-2.6.29-rc1/fs/ext2/file.c
> --- linux-2.6.29-rc1.org/fs/ext2/file.c	2008-12-25 08:26:37.000000000 +0900
> +++ linux-2.6.29-rc1/fs/ext2/file.c	2009-01-13 11:58:16.000000000 +0900
> @@ -38,12 +38,24 @@ static int ext2_release_file (struct ino
>  	return 0;
>  }
> 
> +static loff_t ext2_file_llseek(struct file *file, loff_t offset, int origin)
> +{
> +	loff_t retval;
> +
> +	if (origin == SEEK_END)
> +		retval = generic_file_llseek(file, offset, origin);
> +	else
> +		retval = generic_file_llseek_unlocked(file, offset, origin);
> +
> +	return retval;
> +}
> +
>  /*
>   * We have mostly NULL's here: the current defaults are ok for
>   * the ext2 filesystem.
>   */
>  const struct file_operations ext2_file_operations = {
> -	.llseek		= generic_file_llseek,
> +	.llseek		= ext2_file_llseek,
>  	.read		= do_sync_read,
>  	.write		= do_sync_write,
>  	.aio_read	= generic_file_aio_read,
> @@ -62,7 +74,7 @@ const struct file_operations ext2_file_o
> 
>  #ifdef CONFIG_EXT2_FS_XIP
>  const struct file_operations ext2_xip_file_operations = {
> -	.llseek		= generic_file_llseek,
> +	.llseek		= ext2_file_llseek,
>  	.read		= xip_file_read,
>  	.write		= xip_file_write,
>  	.unlocked_ioctl = ext2_ioctl,
> diff -Nrup linux-2.6.29-rc1.org/fs/ext3/file.c linux-2.6.29-rc1/fs/ext3/file.c
> --- linux-2.6.29-rc1.org/fs/ext3/file.c	2008-12-25 08:26:37.000000000 +0900
> +++ linux-2.6.29-rc1/fs/ext3/file.c	2009-01-13 11:58:16.000000000 +0900
> @@ -106,8 +106,20 @@ force_commit:
>  	return ret;
>  }
> 
> +static loff_t ext3_file_llseek(struct file *file, loff_t offset, int origin)
> +{
> +	loff_t retval;
> +
> +	if (origin == SEEK_END)
> +		retval = generic_file_llseek(file, offset, origin);
> +	else
> +		retval = generic_file_llseek_unlocked(file, offset, origin);
> +
> +	return retval;
> +}
> +
>  const struct file_operations ext3_file_operations = {
> -	.llseek		= generic_file_llseek,
> +	.llseek		= ext3_file_llseek,
>  	.read		= do_sync_read,
>  	.write		= do_sync_write,
>  	.aio_read	= generic_file_aio_read,
> diff -Nrup linux-2.6.29-rc1.org/fs/ext4/file.c linux-2.6.29-rc1/fs/ext4/file.c
> --- linux-2.6.29-rc1.org/fs/ext4/file.c	2009-01-13 11:55:09.000000000 +0900
> +++ linux-2.6.29-rc1/fs/ext4/file.c	2009-01-13 12:09:59.000000000 +0900
> @@ -140,8 +140,20 @@ static int ext4_file_mmap(struct file *f
>  	return 0;
>  }
> 
> +static loff_t ext4_file_llseek(struct file *file, loff_t offset, int origin)
> +{
> +	loff_t retval;
> +
> +	if (origin == SEEK_END)
> +		retval = generic_file_llseek(file, offset, origin);
> +	else
> +		retval = generic_file_llseek_unlocked(file, offset, origin);
> +
> +	return retval;
> +}
> +
>  const struct file_operations ext4_file_operations = {
> -	.llseek		= generic_file_llseek,
> +	.llseek		= ext4_file_llseek,
>  	.read		= do_sync_read,
>  	.write		= do_sync_write,
>  	.aio_read	= generic_file_aio_read,

-- 
David Kleikamp
IBM Linux Technology Center

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ext2/3/4: change i_mutex usage on lseek
  2009-01-15  4:32 ` Dave Kleikamp
@ 2009-01-15  4:40   ` Hisashi Hifumi
  2009-01-15 13:01   ` Jamie Lokier
  1 sibling, 0 replies; 5+ messages in thread
From: Hisashi Hifumi @ 2009-01-15  4:40 UTC (permalink / raw
  To: Dave Kleikamp; +Cc: akpm, linux-ext4, linux-fsdevel

Hi.
Thank you for your comment.

At 13:32 09/01/15, Dave Kleikamp wrote:
>On Thu, 2009-01-15 at 09:32 +0900, Hisashi Hifumi wrote:
>> Hi.
>> 
>> I wrote some patch that changed a range of i_mutex on ext2/3/4's lseek.
>> Ext2/3/4 uses generic_file_llseek, this function is inside i_mutex.
>> I think there is room for optimization in some cases.
>> When SEEK_END is specified from caller, in this case we should handle
>> inode->i_size so i_mutex is needed. But in other cases such as SEEK_CUR or
>> SEEK_SET, i_mutex is not needed because just changing file->f_pos value without
>> touching i_size.
>
>Is there any reason you couldn't have just changed generic_file_llseek()
>to do this rather than making identical changes to the individual file
>systems.  I would think this optimization would be safe for any file
>system.
>

I agree with you. I think changing generic_file_llseek() directly does not hurt
file system functionality.
I will send a patch changing generic_file_llseek().



>> I did some test to measure i_mutex contention.
>> This test do:
>> 	1. make an 128MB file.
>> 	2. fork 100 processes. repeat 10000000 times lseeking randomly on each 
>process to this file.
>> 	3, gauge seconds between start and end of this test.
>> 
>> The result was:
>> 
>> 	-2.6.29-rc1
>> 	# time ./lseek_test
>> 	315 sec
>> 
>> 	real    5m15.407s
>> 	user    1m19.128s
>> 	sys     5m38.884s
>> 
>> 	-2.6.29-rc1-patched
>> 	# time ./lseek_test
>> 	13 sec
>> 
>> 	real    0m13.039s
>> 	user    1m14.730s
>> 	sys     2m9.633s 
>> 
>> Hardware environment:
>> CPU 2.4GHz(Quad Core) *4
>> Memory 64GB
>> 
>> This improvement is derived from just removal of lseek's i_mutex contention.
>> There is i_mutex contention not only around lseek, but also fsync or write.
>> So,  I think we also can mitigate i_mutex contention between fsync and lseek.
>> 
>> Thanks.
>> 
>> Signed-off-by: Hisashi Hifumi <hifumi.hisashi@oss.ntt.co.jp>
>> 
>> diff -Nrup linux-2.6.29-rc1.org/fs/ext2/file.c linux-2.6.29-rc1/fs/ext2/file.c
>> --- linux-2.6.29-rc1.org/fs/ext2/file.c	2008-12-25 08:26:37.000000000 +0900
>> +++ linux-2.6.29-rc1/fs/ext2/file.c	2009-01-13 11:58:16.000000000 +0900
>> @@ -38,12 +38,24 @@ static int ext2_release_file (struct ino
>>  	return 0;
>>  }
>> 
>> +static loff_t ext2_file_llseek(struct file *file, loff_t offset, int origin)
>> +{
>> +	loff_t retval;
>> +
>> +	if (origin == SEEK_END)
>> +		retval = generic_file_llseek(file, offset, origin);
>> +	else
>> +		retval = generic_file_llseek_unlocked(file, offset, origin);
>> +
>> +	return retval;
>> +}
>> +
>>  /*
>>   * We have mostly NULL's here: the current defaults are ok for
>>   * the ext2 filesystem.
>>   */
>>  const struct file_operations ext2_file_operations = {
>> -	.llseek		= generic_file_llseek,
>> +	.llseek		= ext2_file_llseek,
>>  	.read		= do_sync_read,
>>  	.write		= do_sync_write,
>>  	.aio_read	= generic_file_aio_read,
>> @@ -62,7 +74,7 @@ const struct file_operations ext2_file_o
>> 
>>  #ifdef CONFIG_EXT2_FS_XIP
>>  const struct file_operations ext2_xip_file_operations = {
>> -	.llseek		= generic_file_llseek,
>> +	.llseek		= ext2_file_llseek,
>>  	.read		= xip_file_read,
>>  	.write		= xip_file_write,
>>  	.unlocked_ioctl = ext2_ioctl,
>> diff -Nrup linux-2.6.29-rc1.org/fs/ext3/file.c linux-2.6.29-rc1/fs/ext3/file.c
>> --- linux-2.6.29-rc1.org/fs/ext3/file.c	2008-12-25 08:26:37.000000000 +0900
>> +++ linux-2.6.29-rc1/fs/ext3/file.c	2009-01-13 11:58:16.000000000 +0900
>> @@ -106,8 +106,20 @@ force_commit:
>>  	return ret;
>>  }
>> 
>> +static loff_t ext3_file_llseek(struct file *file, loff_t offset, int origin)
>> +{
>> +	loff_t retval;
>> +
>> +	if (origin == SEEK_END)
>> +		retval = generic_file_llseek(file, offset, origin);
>> +	else
>> +		retval = generic_file_llseek_unlocked(file, offset, origin);
>> +
>> +	return retval;
>> +}
>> +
>>  const struct file_operations ext3_file_operations = {
>> -	.llseek		= generic_file_llseek,
>> +	.llseek		= ext3_file_llseek,
>>  	.read		= do_sync_read,
>>  	.write		= do_sync_write,
>>  	.aio_read	= generic_file_aio_read,
>> diff -Nrup linux-2.6.29-rc1.org/fs/ext4/file.c linux-2.6.29-rc1/fs/ext4/file.c
>> --- linux-2.6.29-rc1.org/fs/ext4/file.c	2009-01-13 11:55:09.000000000 +0900
>> +++ linux-2.6.29-rc1/fs/ext4/file.c	2009-01-13 12:09:59.000000000 +0900
>> @@ -140,8 +140,20 @@ static int ext4_file_mmap(struct file *f
>>  	return 0;
>>  }
>> 
>> +static loff_t ext4_file_llseek(struct file *file, loff_t offset, int origin)
>> +{
>> +	loff_t retval;
>> +
>> +	if (origin == SEEK_END)
>> +		retval = generic_file_llseek(file, offset, origin);
>> +	else
>> +		retval = generic_file_llseek_unlocked(file, offset, origin);
>> +
>> +	return retval;
>> +}
>> +
>>  const struct file_operations ext4_file_operations = {
>> -	.llseek		= generic_file_llseek,
>> +	.llseek		= ext4_file_llseek,
>>  	.read		= do_sync_read,
>>  	.write		= do_sync_write,
>>  	.aio_read	= generic_file_aio_read,
>
>-- 
>David Kleikamp
>IBM Linux Technology Center


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ext2/3/4: change i_mutex usage on lseek
  2009-01-15  4:32 ` Dave Kleikamp
  2009-01-15  4:40   ` Hisashi Hifumi
@ 2009-01-15 13:01   ` Jamie Lokier
  2009-01-15 13:35     ` Theodore Tso
  1 sibling, 1 reply; 5+ messages in thread
From: Jamie Lokier @ 2009-01-15 13:01 UTC (permalink / raw
  To: Dave Kleikamp; +Cc: Hisashi Hifumi, akpm, linux-ext4, linux-fsdevel

Dave Kleikamp wrote:
> Is there any reason you couldn't have just changed generic_file_llseek()
> to do this rather than making identical changes to the individual file
> systems.  I would think this optimization would be safe for any file
> system.

Is it safe on 32-bit systems where 64-bit updates are not atomic?

You may say that doing multiple parallel lseek() calls is undefined
behaviour, so it's ok to end up with file position that none of the
individual lseek() calls asked for.

But if it's undefined behaviour, no programs should be doing parallel
lseek() calls on the same open file, so why optimise it at all?

-- Jamie

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] ext2/3/4: change i_mutex usage on lseek
  2009-01-15 13:01   ` Jamie Lokier
@ 2009-01-15 13:35     ` Theodore Tso
  0 siblings, 0 replies; 5+ messages in thread
From: Theodore Tso @ 2009-01-15 13:35 UTC (permalink / raw
  To: Jamie Lokier
  Cc: Dave Kleikamp, Hisashi Hifumi, akpm, linux-ext4, linux-fsdevel

On Thu, Jan 15, 2009 at 01:01:54PM +0000, Jamie Lokier wrote:
> Dave Kleikamp wrote:
> > Is there any reason you couldn't have just changed generic_file_llseek()
> > to do this rather than making identical changes to the individual file
> > systems.  I would think this optimization would be safe for any file
> > system.
> 
> Is it safe on 32-bit systems where 64-bit updates are not atomic?

Protection of 64-bit updates of i_size where 32-bit systems do not
have atomic 64-bit updates is done via i_size_seqcount, which is done
automatically as long as the code in question uses i_size_write(), an
inline function defined in include/linux/fs.h.

> You may say that doing multiple parallel lseek() calls is undefined
> behaviour, so it's ok to end up with file position that none of the
> individual lseek() calls asked for.
> 
> But if it's undefined behaviour, no programs should be doing parallel
> lseek() calls on the same open file, so why optimise it at all?

i_mutex gets used for a lot more than protecting i_size updates for
lseek(), so removing the need for i_mutex could very well be helpful
for non-micro-benchmark tests.  As Hirashi-san mentioned in his patch,
this could avoid contention between lseek() and write() (possibly via
a different file descriptor, so it wouldn't be an undefined race
condition) and fsync().

I do agree this is an optimization that should be done in
generic_file_llseek(), though.

Regards,

						- Ted

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2009-01-15 13:35 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-15  0:32 [PATCH] ext2/3/4: change i_mutex usage on lseek Hisashi Hifumi
2009-01-15  4:32 ` Dave Kleikamp
2009-01-15  4:40   ` Hisashi Hifumi
2009-01-15 13:01   ` Jamie Lokier
2009-01-15 13:35     ` Theodore Tso

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).