All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* Removing swap lockmap...
@ 1999-01-18 15:12 Zlatko Calusic
  1999-01-18 20:26 ` Andrea Arcangeli
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Zlatko Calusic @ 1999-01-18 15:12 UTC (permalink / raw
  To: Linux-MM List, Linux Kernel List

[-- Attachment #1: Type: text/plain, Size: 458 bytes --]

I removed swap lockmap all together and, to my surprise, I can't
produce any ill behaviour on my system, not even under very heavy
swapping (in low memory condition).

I remember there were some issues when swap lockmap was removed in
2.1.89, so it was reintroduced later (processes were dying randomly).

Question is, why is everything running so smoothly now, even without
swap lockmap?

See for yourself, patch is attached (it is against testing/pre-8).


[-- Attachment #2: no-swap-lockmap --]
[-- Type: text/plain, Size: 8780 bytes --]

Index: 2207.4/include/linux/swap.h
--- 2207.4/include/linux/swap.h Thu, 14 Jan 1999 00:48:18 +0100 zcalusic (linux-2.1/w/b/28_swap.h 1.1.2.1.6.1.1.1.4.1.5.1 644)
+++ 2207.6/include/linux/swap.h Mon, 18 Jan 1999 13:07:04 +0100 zcalusic (linux-2.1/w/b/28_swap.h 1.1.2.1.6.1.1.1.4.1.5.1.1.1 644)
@@ -43,7 +43,6 @@
 	kdev_t swap_device;
 	struct dentry * swap_file;
 	unsigned short * swap_map;
-	unsigned char * swap_lockmap;
 	unsigned int lowest_bit;
 	unsigned int highest_bit;
 	unsigned int cluster_next;
@@ -79,7 +78,6 @@
 extern void rw_swap_page(int, unsigned long, char *, int);
 extern void rw_swap_page_nocache(int, unsigned long, char *);
 extern void rw_swap_page_nolock(int, unsigned long, char *, int);
-extern void swap_after_unlock_page (unsigned long entry);
 
 /* linux/mm/page_alloc.c */
 extern void swap_in(struct task_struct *, struct vm_area_struct *,
Index: 2207.4/include/linux/mm.h
--- 2207.4/include/linux/mm.h Mon, 18 Jan 1999 09:00:38 +0100 zcalusic (linux-2.1/z/b/9_mm.h 1.1.5.2.4.1.2.1.3.1.1.1 644)
+++ 2207.6/include/linux/mm.h Mon, 18 Jan 1999 13:07:04 +0100 zcalusic (linux-2.1/z/b/9_mm.h 1.1.5.2.4.1.2.1.3.1.1.2 644)
@@ -141,7 +141,7 @@
 #define PG_uptodate		 4
 #define PG_free_after		 5
 #define PG_decr_after		 6
-#define PG_swap_unlock_after	 7
+/* Unused			 7 */
 #define PG_DMA			 8
 #define PG_Slab			 9
 #define PG_swap_cache		10
@@ -156,7 +156,6 @@
 #define PageUptodate(page)	(test_bit(PG_uptodate, &(page)->flags))
 #define PageFreeAfter(page)	(test_bit(PG_free_after, &(page)->flags))
 #define PageDecrAfter(page)	(test_bit(PG_decr_after, &(page)->flags))
-#define PageSwapUnlockAfter(page) (test_bit(PG_swap_unlock_after, &(page)->flags))
 #define PageDMA(page)		(test_bit(PG_DMA, &(page)->flags))
 #define PageSlab(page)		(test_bit(PG_Slab, &(page)->flags))
 #define PageSwapCache(page)	(test_bit(PG_swap_cache, &(page)->flags))
Index: 2207.4/mm/swapfile.c
--- 2207.4/mm/swapfile.c Thu, 14 Jan 1999 00:48:18 +0100 zcalusic (linux-2.1/z/b/20_swapfile.c 1.2.7.1 644)
+++ 2207.6/mm/swapfile.c Mon, 18 Jan 1999 13:07:04 +0100 zcalusic (linux-2.1/z/b/20_swapfile.c 1.2.7.1.1.1 644)
@@ -41,8 +41,6 @@
 			offset = si->cluster_next++;
 			if (si->swap_map[offset])
 				continue;
-			if (test_bit(offset, si->swap_lockmap))
-				continue;
 			si->cluster_nr--;
 			goto got_page;
 		}
@@ -51,8 +49,6 @@
 	for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
 		if (si->swap_map[offset])
 			continue;
-		if (test_bit(offset, si->swap_lockmap))
-			continue;
 		si->lowest_bit = offset;
 got_page:
 		si->swap_map[offset] = 1;
@@ -423,8 +419,6 @@
 	p->swap_device = 0;
 	vfree(p->swap_map);
 	p->swap_map = NULL;
-	vfree(p->swap_lockmap);
-	p->swap_lockmap = NULL;
 	p->flags = 0;
 	err = 0;
 
@@ -489,9 +483,7 @@
 	static int least_priority = 0;
 	union swap_header *swap_header = 0;
 	int swap_header_version;
-	int lock_map_size = PAGE_SIZE;
 	int nr_good_pages = 0;
-	unsigned long tmp_lock_map = 0;
 	
 	lock_kernel();
 	if (!capable(CAP_SYS_ADMIN))
@@ -509,7 +501,6 @@
 	p->swap_file = NULL;
 	p->swap_device = 0;
 	p->swap_map = NULL;
-	p->swap_lockmap = NULL;
 	p->lowest_bit = 0;
 	p->highest_bit = 0;
 	p->cluster_nr = 0;
@@ -569,9 +560,7 @@
 		goto bad_swap;
 	}
 
-	p->swap_lockmap = (char *) &tmp_lock_map;
 	rw_swap_page_nocache(READ, SWP_ENTRY(type,0), (char *) swap_header);
-	p->swap_lockmap = NULL;
 
 	if (!memcmp("SWAP-SPACE",swap_header->magic.magic,10))
 		swap_header_version = 1;
@@ -649,7 +638,6 @@
 				p->swap_map[page] = SWAP_MAP_BAD;
 		}
 		nr_good_pages = swap_header->info.last_page - i;
-		lock_map_size = (p->max + 7) / 8;
 		if (error) 
 			goto bad_swap;
 	}
@@ -660,11 +648,6 @@
 		goto bad_swap;
 	}
 	p->swap_map[0] = SWAP_MAP_BAD;
-	if (!(p->swap_lockmap = vmalloc (lock_map_size))) {
-		error = -ENOMEM;
-		goto bad_swap;
-	}
-	memset(p->swap_lockmap,0,lock_map_size);
 	p->flags = SWP_WRITEOK;
 	p->pages = nr_good_pages;
 	nr_swap_pages += nr_good_pages;
@@ -691,15 +674,12 @@
 	if(filp.f_op && filp.f_op->release)
 		filp.f_op->release(filp.f_dentry->d_inode,&filp);
 bad_swap_2:
-	if (p->swap_lockmap)
-		vfree(p->swap_lockmap);
 	if (p->swap_map)
 		vfree(p->swap_map);
 	dput(p->swap_file);
 	p->swap_device = 0;
 	p->swap_file = NULL;
 	p->swap_map = NULL;
-	p->swap_lockmap = NULL;
 	p->flags = 0;
 out:
 	if (swap_header)
Index: 2207.4/mm/page_io.c
--- 2207.4/mm/page_io.c Tue, 29 Dec 1998 15:51:24 +0100 zcalusic (linux-2.1/z/b/22_page_io.c 1.2.6.1.1.1 644)
+++ 2207.6/mm/page_io.c Mon, 18 Jan 1999 13:07:04 +0100 zcalusic (linux-2.1/z/b/22_page_io.c 1.2.6.1.1.1.6.1 644)
@@ -18,8 +18,6 @@
 
 #include <asm/pgtable.h>
 
-static struct wait_queue * lock_queue = NULL;
-
 /*
  * Reads or writes a swap page.
  * wait=1: start I/O and wait for completion. wait=0: start asynchronous I/O.
@@ -85,12 +83,6 @@
 	}
 
 	if (PageSwapCache(page)) {
-		/* Make sure we are the only process doing I/O with this swap page. */
-		while (test_and_set_bit(offset,p->swap_lockmap)) {
-			run_task_queue(&tq_disk);
-			sleep_on(&lock_queue);
-		}
-
 		/* 
 		 * Make sure that we have a swap cache association for this
 		 * page.  We need this to find which swap page to unlock once
@@ -162,11 +154,6 @@
 		/* Do some cleaning up so if this ever happens we can hopefully
 		 * trigger controlled shutdown.
 		 */
-		if (PageSwapCache(page)) {
-			if (!test_and_clear_bit(offset,p->swap_lockmap))
-				printk("swap_after_unlock_page: lock already cleared\n");
-			wake_up(&lock_queue);
-		}
 		atomic_dec(&page->count);
 		return;
 	}
@@ -174,19 +161,11 @@
  		set_bit(PG_decr_after, &page->flags);
  		atomic_inc(&nr_async_pages);
  	}
- 	if (PageSwapCache(page)) {
- 		/* only lock/unlock swap cache pages! */
- 		set_bit(PG_swap_unlock_after, &page->flags);
- 	}
  	set_bit(PG_free_after, &page->flags);
 
  	/* block_size == PAGE_SIZE/zones_used */
  	brw_page(rw, page, dev, zones, block_size, 0);
  
- 	/* Note! For consistency we do all of the logic,
- 	 * decrementing the page count, and unlocking the page in the
- 	 * swap lock map - in the IO completion handler.
- 	 */
  	if (!wait) 
  		return;
  	wait_on_page(page);
@@ -202,34 +181,6 @@
 #endif
 }
 
-/* Note: We could remove this totally asynchronous function,
- * and improve swap performance, and remove the need for the swap lock map,
- * by not removing pages from the swap cache until after I/O has been
- * processed and letting remove_from_page_cache decrement the swap count
- * just before it removes the page from the page cache.
- */
-/* This is run when asynchronous page I/O has completed. */
-void swap_after_unlock_page (unsigned long entry)
-{
-	unsigned long type, offset;
-	struct swap_info_struct * p;
-
-	type = SWP_TYPE(entry);
-	if (type >= nr_swapfiles) {
-		printk("swap_after_unlock_page: bad swap-device\n");
-		return;
-	}
-	p = &swap_info[type];
-	offset = SWP_OFFSET(entry);
-	if (offset >= p->max) {
-		printk("swap_after_unlock_page: weirdness\n");
-		return;
-	}
-	if (!test_and_clear_bit(offset,p->swap_lockmap))
-		printk("swap_after_unlock_page: lock already cleared\n");
-	wake_up(&lock_queue);
-}
-
 /* A simple wrapper so the base function doesn't need to enforce
  * that all swap pages go through the swap cache!
  */
@@ -287,12 +238,6 @@
 	clear_bit(PG_swap_cache, &page->flags);
 }
 
-/*
- * shmfs needs a version that doesn't put the page in the page cache!
- * The swap lock map insists that pages be in the page cache!
- * Therefore we can't use it.  Later when we can remove the need for the
- * lock map and we can reduce the number of functions exported.
- */
 void rw_swap_page_nolock(int rw, unsigned long entry, char *buffer, int wait)
 {
 	struct page *page = mem_map + MAP_NR((unsigned long) buffer);
Index: 2207.4/mm/page_alloc.c
--- 2207.4/mm/page_alloc.c Thu, 14 Jan 1999 00:48:18 +0100 zcalusic (linux-2.1/z/b/26_page_alloc 1.2.6.1.1.2.4.1.1.1.3.1 644)
+++ 2207.6/mm/page_alloc.c Mon, 18 Jan 1999 13:07:04 +0100 zcalusic (linux-2.1/z/b/26_page_alloc 1.2.6.1.1.2.4.1.1.1.3.2 644)
@@ -368,8 +368,6 @@
 			break;
 		if (swapdev->swap_map[offset] == SWAP_MAP_BAD)
 			break;
-		if (test_bit(offset, swapdev->swap_lockmap))
-			break;
 
 		/* Ok, do the async read-ahead now */
 		new_page = read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry), offset), 0);
Index: 2207.4/fs/buffer.c
--- 2207.4/fs/buffer.c Mon, 18 Jan 1999 09:00:38 +0100 zcalusic (linux-2.1/G/b/41_buffer.c 1.1.1.1.1.3.2.1.2.1.3.1 644)
+++ 2207.6/fs/buffer.c Mon, 18 Jan 1999 13:07:04 +0100 zcalusic (linux-2.1/G/b/41_buffer.c 1.1.1.1.1.3.2.1.2.1.3.2 644)
@@ -1141,8 +1141,6 @@
 			atomic_read(&nr_async_pages));
 #endif
 	}
-	if (test_and_clear_bit(PG_swap_unlock_after, &page->flags))
-		swap_after_unlock_page(page->offset);
 	if (test_and_clear_bit(PG_free_after, &page->flags))
 		__free_page(page);
 }

[-- Attachment #3: Type: text/plain, Size: 12 bytes --]


-- 
Zlatko

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Removing swap lockmap...
  1999-01-18 15:12 Removing swap lockmap Zlatko Calusic
@ 1999-01-18 20:26 ` Andrea Arcangeli
  1999-01-18 22:24   ` BUG: deadlock in swap lockmap handling Alan Cox
  1999-01-18 21:46 ` Removing swap lockmap Stephen C. Tweedie
  1999-01-19  0:34 ` Eric W. Biederman
  2 siblings, 1 reply; 10+ messages in thread
From: Andrea Arcangeli @ 1999-01-18 20:26 UTC (permalink / raw
  To: Zlatko Calusic, Stephen C. Tweedie, Linus Torvalds
  Cc: Linux-MM List, Linux Kernel List

On 18 Jan 1999, Zlatko Calusic wrote:

> I removed swap lockmap all together and, to my surprise, I can't
> produce any ill behaviour on my system, not even under very heavy
> swapping (in low memory condition).

Looking at your patch (and so looking at the swap_lockmap code) I found a
potential deadlock in the current swap_lockmap handling: 

	task A				task B
	----------			-------------
	rw_swap_page_base()
	
	...if (test_and_set_bit(lockmap))
		... run_task_queue()
					swap_after_unlock_page()
						... clear_bit(lockmap)
						.... wakeup(&lock_queue)
		...sleep_on(&lock_queue);
		deadlocked

I think it will not harm too much because the window is not too big (but
not small) and because usually one of the process not yet deadlocked will
generate IO and will wakeup also the deadlocked process at I/O
completation time. A very lazy ;) but at the same time obviosly right
(that should not harm performances at all) fix could be to replace the
sleep_on() with a sleep_on_timeout(..., 1).

Index: page_io.c
===================================================================
RCS file: /var/cvs/linux/mm/page_io.c,v
retrieving revision 1.1.2.1
diff -u -r1.1.2.1 page_io.c
--- page_io.c	1999/01/18 01:32:53	1.1.2.1
+++ linux/mm/page_io.c	1999/01/18 20:21:41
@@ -88,7 +88,7 @@
 		/* Make sure we are the only process doing I/O with this swap page. */
 		while (test_and_set_bit(offset,p->swap_lockmap)) {
 			run_task_queue(&tq_disk);
-			sleep_on(&lock_queue);
+			sleep_on_timeout(&lock_queue, 1);
 		}
 
 		/* 


I think we need the swap_lockmap in the shm case because without swap
cache a swapin could happen at the same time of the swapout because
find_in_swap_cache() won't work there. 

Andrea Arcangeli

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Removing swap lockmap...
  1999-01-18 15:12 Removing swap lockmap Zlatko Calusic
  1999-01-18 20:26 ` Andrea Arcangeli
@ 1999-01-18 21:46 ` Stephen C. Tweedie
  1999-01-19  1:33   ` Zlatko Calusic
  1999-01-19  0:34 ` Eric W. Biederman
  2 siblings, 1 reply; 10+ messages in thread
From: Stephen C. Tweedie @ 1999-01-18 21:46 UTC (permalink / raw
  To: Zlatko.Calusic; +Cc: Linux-MM List, Linux Kernel List, Stephen Tweedie

Hi,

In article <87iue47gy4.fsf@atlas.CARNet.hr>, Zlatko Calusic
<Zlatko.Calusic@CARNet.hr> writes:

> I removed swap lockmap all together and, to my surprise, I can't
> produce any ill behaviour on my system, not even under very heavy
> swapping (in low memory condition).

Just because you can't reproduce it doesn't mean it works perfectly.
There was a very good reason why the swap lock map was still required
until recently.  The race condition it fixed wass an obscure one but
still important.  However, very recent VM changes make me wonder if it
is still absolutely necessary.  

The problem was that if we swapped out a page, we might sometimes remove
the swap cache for the page before the IO was complete.  If we can
_guarantee_ that the swap cache will persist until after the IO is
complete, then any future attempt to use that swap page will find that
the page is locked and will wait for the IO to complete.

However, if in fact the swap cache for the page _ever_ gets removed
before the IO completes, then a future read in of the page might start
before the current write had completed.  This has been observed in
practice.  The swap lock protects against this.

Now that we always keep the swap cache intact in mm/vmscan.c and only
reclaim it in mm/filemap.c, we might in fact be safe omiting the swap
lock.  I'd be nervous about it without a _thorough_ audit of the code,
though, as this particular race is hard to reproduce.

--Stephen
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: BUG: deadlock in swap lockmap handling
  1999-01-18 20:26 ` Andrea Arcangeli
@ 1999-01-18 22:24   ` Alan Cox
  0 siblings, 0 replies; 10+ messages in thread
From: Alan Cox @ 1999-01-18 22:24 UTC (permalink / raw
  To: andrea; +Cc: Zlatko.Calusic, sct, torvalds, linux-mm, linux-kernel

> I think it will not harm too much because the window is not too big (but
> not small) and because usually one of the process not yet deadlocked will
> generate IO and will wakeup also the deadlocked process at I/O
> completation time. A very lazy ;) but at the same time obviosly right

Take it from me - the scenario you give will cause deadlocks and problems.
There were other "generating an I/O would have cleaned up" type problems in
2.0.x < .35/6. They caused a lot of grief with installers where that 
I/O assumption is not true. Another classic case is large fsck's during
boot up.

So its not just a trivial irrelevant fix.

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Removing swap lockmap...
  1999-01-18 15:12 Removing swap lockmap Zlatko Calusic
  1999-01-18 20:26 ` Andrea Arcangeli
  1999-01-18 21:46 ` Removing swap lockmap Stephen C. Tweedie
@ 1999-01-19  0:34 ` Eric W. Biederman
  1999-01-19  1:37   ` Zlatko Calusic
  2 siblings, 1 reply; 10+ messages in thread
From: Eric W. Biederman @ 1999-01-19  0:34 UTC (permalink / raw
  To: Zlatko.Calusic; +Cc: Linux-MM List, Linux Kernel List

>>>>> "ZC" == Zlatko Calusic <Zlatko.Calusic@CARNet.hr> writes:


ZC> I removed swap lockmap all together and, to my surprise, I can't
ZC> produce any ill behaviour on my system, not even under very heavy
ZC> swapping (in low memory condition).

ZC> I remember there were some issues when swap lockmap was removed in
ZC> 2.1.89, so it was reintroduced later (processes were dying randomly).


ZC> Question is, why is everything running so smoothly now, even without
ZC> swap lockmap?

For this patch to be safe we need to 
A) Fix sysv shm to use the swap cache.
B) garantee that shrink_mmap is the only place that
   removes a page from the swap cache, and that it never removes
   a page while I/O is in progress, (as Stephen said).

This means a lot of the current cases like delete_from_swap_cache,
and free_page_and_swap_cache need to be removed.

And we need to be very carefull when we break down the swap cache,
and make it an unshared page.

The change to normally remove pages with shrink_mmap is what makes it
mostly safe now.

For 2.3 it should go.  For 2.2 it should stay.

Eric
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Removing swap lockmap...
  1999-01-18 21:46 ` Removing swap lockmap Stephen C. Tweedie
@ 1999-01-19  1:33   ` Zlatko Calusic
  0 siblings, 0 replies; 10+ messages in thread
From: Zlatko Calusic @ 1999-01-19  1:33 UTC (permalink / raw
  To: Stephen C. Tweedie; +Cc: Linux-MM List, Linux Kernel List

"Stephen C. Tweedie" <sct@redhat.com> writes:

> Hi,
> 
> In article <87iue47gy4.fsf@atlas.CARNet.hr>, Zlatko Calusic
> <Zlatko.Calusic@CARNet.hr> writes:
> 
> > I removed swap lockmap all together and, to my surprise, I can't
> > produce any ill behaviour on my system, not even under very heavy
> > swapping (in low memory condition).
> 
> Just because you can't reproduce it doesn't mean it works perfectly.
> There was a very good reason why the swap lock map was still required
> until recently.  The race condition it fixed wass an obscure one but
> still important.  However, very recent VM changes make me wonder if it
> is still absolutely necessary.  
> 
> The problem was that if we swapped out a page, we might sometimes remove
> the swap cache for the page before the IO was complete.  If we can
> _guarantee_ that the swap cache will persist until after the IO is
> complete, then any future attempt to use that swap page will find that
> the page is locked and will wait for the IO to complete.
> 
> However, if in fact the swap cache for the page _ever_ gets removed
> before the IO completes, then a future read in of the page might start
> before the current write had completed.  This has been observed in
> practice.  The swap lock protects against this.

Yes, this is what I observed by reading some older articles from
linux-mm list (mostly conversation between you and Eric). So, I
decided to remove swap lockmap, reproduce problems and then try to fix
them. There is even one Eric's useful comment (removed in my patch)
that is useful in deciding what should be done to prevent problems.

But, interesting thing is that whatever I do, I CAN'T get into trouble 
and that is interesting part. :-)

> 
> Now that we always keep the swap cache intact in mm/vmscan.c and only
> reclaim it in mm/filemap.c, we might in fact be safe omiting the swap
> lock.  I'd be nervous about it without a _thorough_ audit of the code,
> though, as this particular race is hard to reproduce.
> 

Yes, I have the same worries now so close to real 2.2.0, that's why I
see this patch strictly experimental (it is not sent to Linus,
directly!). Maybe, someone of you could try it for yourself, and if
enough people test it, and if we try hard to understand why it seems
to not make thing worse, maybe there's a slight chance it could go in
for 2.2, or at least as a first thing in 2.3.0, so it gets heavily
tested and debugged.

But, I'm also nervous about it...
I rememeber there really were some nasty silent deaths while lockmap was
removed in some revisions during 2.1.x. :-(

Thanks for comments.
-- 
Zlatko
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Removing swap lockmap...
  1999-01-19  0:34 ` Eric W. Biederman
@ 1999-01-19  1:37   ` Zlatko Calusic
  1999-01-19 18:15     ` Andrea Arcangeli
  0 siblings, 1 reply; 10+ messages in thread
From: Zlatko Calusic @ 1999-01-19  1:37 UTC (permalink / raw
  To: Eric W. Biederman; +Cc: Linux-MM List, Linux Kernel List

ebiederm+eric@ccr.net (Eric W. Biederman) writes:

> >>>>> "ZC" == Zlatko Calusic <Zlatko.Calusic@CARNet.hr> writes:
> 
> 
> ZC> I removed swap lockmap all together and, to my surprise, I can't
> ZC> produce any ill behaviour on my system, not even under very heavy
> ZC> swapping (in low memory condition).
> 
> ZC> I remember there were some issues when swap lockmap was removed in
> ZC> 2.1.89, so it was reintroduced later (processes were dying randomly).
> 
> 
> ZC> Question is, why is everything running so smoothly now, even without
> ZC> swap lockmap?
> 
> For this patch to be safe we need to 
> A) Fix sysv shm to use the swap cache.

Yes, this case probably doesn't get enough testing with my current
setup, so it is quite hard (for me) to prove removing lockmap is
no-no. Problem is that I don't understand shm swapping very well, it
is piggybacked on "regular" MM as a special case, and I didn't had
time to go through it, yet.

> B) garantee that shrink_mmap is the only place that
>    removes a page from the swap cache, and that it never removes
>    a page while I/O is in progress, (as Stephen said).
> 
> This means a lot of the current cases like delete_from_swap_cache,
> and free_page_and_swap_cache need to be removed.

Yes, I believe you on this one.

> 
> And we need to be very carefull when we break down the swap cache,
> and make it an unshared page.
> 
> The change to normally remove pages with shrink_mmap is what makes it
> mostly safe now.
> 
> For 2.3 it should go.  For 2.2 it should stay.
> 

I'll still be testing it, nevertheless. Maybe after some time I get in 
some trouble, and then at least I'll know what to do. :-)

Regards,
-- 
Zlatko
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Removing swap lockmap...
  1999-01-19  1:37   ` Zlatko Calusic
@ 1999-01-19 18:15     ` Andrea Arcangeli
  1999-01-20 17:09       ` Stephen C. Tweedie
  0 siblings, 1 reply; 10+ messages in thread
From: Andrea Arcangeli @ 1999-01-19 18:15 UTC (permalink / raw
  To: Zlatko Calusic; +Cc: Eric W. Biederman, Linux-MM List, Linux Kernel List

On 19 Jan 1999, Zlatko Calusic wrote:

> Yes, this case probably doesn't get enough testing with my current
> setup, so it is quite hard (for me) to prove removing lockmap is
> no-no. Problem is that I don't understand shm swapping very well

Launch some time this proggy to try out shm swapping:

/*
 * Copyright (C) 1999  Andrea Arcangeli
 * shm swapout test
 */

#include <sys/ipc.h>
#include <sys/shm.h>

#define SIZE 16000000

main()
{
	int shmid;
	char *addr, *p;
	if ((shmid = shmget(IPC_PRIVATE, SIZE, IPC_CREAT | 0644)) < 0)
		perror("shmget");
	if ((addr = shmat(shmid, NULL, 0)) < 0)
		perror("shmat");
	for (p = addr; p < addr + SIZE; p+=4096)
		*p = 0;
}

To know if the lockmap is needed you can also reinsert the code and add a
printk() in the test_and_set_bit() path.

Andrea Arcangeli

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Removing swap lockmap...
  1999-01-19 18:15     ` Andrea Arcangeli
@ 1999-01-20 17:09       ` Stephen C. Tweedie
  1999-01-20 18:14         ` Zlatko Calusic
  0 siblings, 1 reply; 10+ messages in thread
From: Stephen C. Tweedie @ 1999-01-20 17:09 UTC (permalink / raw
  To: Andrea Arcangeli
  Cc: Zlatko Calusic, Eric W. Biederman, Linux-MM List,
	Linux Kernel List

Hi,

On Tue, 19 Jan 1999 19:15:51 +0100 (CET), Andrea Arcangeli
<andrea@e-mind.com> said:

> On 19 Jan 1999, Zlatko Calusic wrote:
>> Yes, this case probably doesn't get enough testing with my current
>> setup, so it is quite hard (for me) to prove removing lockmap is
>> no-no. Problem is that I don't understand shm swapping very well

> Launch some time this proggy to try out shm swapping:

I have a shared memory stresser anyway if you want.  However, when we
originally took out the lock map, it proved _very_ hard to reproduce the
race, and only a couple of people reported problems.  Plain stress
testing with random access patterns over a day or more did not show up
the problem.  

These races can be very nasty and hard to trigger.  You really do need
to think the change through rather than just try-it-and-see.

--Stephen
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Removing swap lockmap...
  1999-01-20 17:09       ` Stephen C. Tweedie
@ 1999-01-20 18:14         ` Zlatko Calusic
  0 siblings, 0 replies; 10+ messages in thread
From: Zlatko Calusic @ 1999-01-20 18:14 UTC (permalink / raw
  To: Stephen C. Tweedie
  Cc: Andrea Arcangeli, Eric W. Biederman, Linux-MM List,
	Linux Kernel List

"Stephen C. Tweedie" <sct@redhat.com> writes:

> Hi,
> 
> On Tue, 19 Jan 1999 19:15:51 +0100 (CET), Andrea Arcangeli
> <andrea@e-mind.com> said:
> 
> > On 19 Jan 1999, Zlatko Calusic wrote:
> >> Yes, this case probably doesn't get enough testing with my current
> >> setup, so it is quite hard (for me) to prove removing lockmap is
> >> no-no. Problem is that I don't understand shm swapping very well
> 
> > Launch some time this proggy to try out shm swapping:
> 
> I have a shared memory stresser anyway if you want.  However, when we
> originally took out the lock map, it proved _very_ hard to reproduce the
> race, and only a couple of people reported problems.  Plain stress
> testing with random access patterns over a day or more did not show up
> the problem.  
> 
> These races can be very nasty and hard to trigger.  You really do need
> to think the change through rather than just try-it-and-see.
> 

Yes, after some time, I agree completely.

So I decided to take the path you're suggesting, but of course, it'll
take some time to prove anything (if at all).

Anecdote:

In fact removing swap lockmap produced very interesting situation for
me. 2.1.89 introduced the biggest improvements in MM code I saw in
2.1.x series, but also that subtle race condition which decided to
reveal itself slightly later.

At some time, let's say around 2.1.95 :), I decided to buy another
32MB of RAM, to let my Netscape and XEmacs work together more
smoothly. And there came my friend telling me he's just about to buy
128 MB of RAM, and is thus willing to give me his 2 x 16MB SIMM-s (to
make space in his computer), with adequately lower price than in the
stores. I was slightly worried that maybe he's trying to get rid of
bad SIMM-s, but eventually I agreed and made a deal (after all, what
kind of friend would he be, selling broken things to his buddy :)).

And then it started. My daemons started to dissapear misteriously (but
very rarely, once in a few days or so). Few times, while stressing the
system, X died in a horrible death taking away all my applications,
and nothing like that didn't happen for a long time before that.

So what would you think in such a situation? :)

Well, thanks to few very useful mails exchanged between you and Eric
(that appeared on the linux-mm list), my friendship was saved :), and
I understood that my problems were not due to a bad memory, but a
stubborn race condition, hard to track, and hard to reproduce as we
know now. Not to mention that it behaved much like a bad memory.

In fact, all that was a RL "race condition", testing new memory and
beta code at the same time. That's what you get living on the bleeding 
edge. :)

Sorry for spamming the list(s) with long stories like this.

Regards,
-- 
Zlatko
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~1999-01-20 18:15 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
1999-01-18 15:12 Removing swap lockmap Zlatko Calusic
1999-01-18 20:26 ` Andrea Arcangeli
1999-01-18 22:24   ` BUG: deadlock in swap lockmap handling Alan Cox
1999-01-18 21:46 ` Removing swap lockmap Stephen C. Tweedie
1999-01-19  1:33   ` Zlatko Calusic
1999-01-19  0:34 ` Eric W. Biederman
1999-01-19  1:37   ` Zlatko Calusic
1999-01-19 18:15     ` Andrea Arcangeli
1999-01-20 17:09       ` Stephen C. Tweedie
1999-01-20 18:14         ` Zlatko Calusic

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.