From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from penguin.e-mind.com (penguin.e-mind.com [195.223.140.120]) by kvack.org (8.8.7/8.8.7) with ESMTP id HAA25112 for ; Thu, 14 Jan 1999 07:34:00 -0500 Date: Thu, 14 Jan 1999 13:30:44 +0100 (CET) From: Andrea Arcangeli Reply-To: Andrea Arcangeli Subject: Re: [patch] arca-vm-19 [Re: Results: Zlatko's new vm patch] In-Reply-To: Message-ID: MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Sender: owner-linux-mm@kvack.org To: Steve Bergman , dlux@dlux.sch.bme.hu, "Nicholas J. Leon" Cc: Linus Torvalds , brent verner , "Garst R. Reese" , Kalle Andersson , Zlatko Calusic , Ben McCann , bredelin@ucsd.edu, linux-kernel@vger.rutgers.edu, linux-mm@kvack.org, Alan Cox , "Stephen C. Tweedie" , Heinz Mauelshagen List-ID: On Wed, 13 Jan 1999, Andrea Arcangeli wrote: > I produced a new arca-vm-19. I would like if you could try it. I don't It seems that the better algorithm I am be able to invent is been the growing_swap_cache one (the one in arca-vm-16). Steve could you try this new patch (arca-vm-20) against real 2.2.0-pre7? I think that it should be still better than arca-vm-16 + SWAP_CLUSTER_MAX=512. If it will be not very good could you do: echo 8 2 4 512 512 512 > /proc/sys/vm/pager and try again? (such numbers should be the same of setting SWAP_CLUSTER_MAX in arca-vm-16, but as default only the max_async_pages is set to 512 because I think it's been the only one that made a difference). If this will be not the best again you could apply the filemap.c patch I sent you in the last email (the one that return to put the shrink_mmap() weight exponential increasing in function of priority) and try again? Many thanks! Andrea Arcangeli Here arca-vm-20 against 2.2.0-pre7: Index: linux/mm/filemap.c diff -u linux/mm/filemap.c:1.1.1.9 linux/mm/filemap.c:1.1.1.1.2.48 --- linux/mm/filemap.c:1.1.1.9 Thu Jan 7 12:21:35 1999 +++ linux/mm/filemap.c Thu Jan 14 13:15:32 1999 @@ -121,14 +125,11 @@ int shrink_mmap(int priority, int gfp_mask) { static unsigned long clock = 0; - unsigned long limit = num_physpages; struct page * page; - int count; - - count = (limit << 1) >> priority; + unsigned long count = num_physpages / (priority+1); page = mem_map + clock; - do { + while (count-- != 0) { int referenced; /* This works even in the presence of PageSkip because @@ -147,7 +148,6 @@ clock = page->map_nr; } - count--; referenced = test_and_clear_bit(PG_referenced, &page->flags); if (PageLocked(page)) @@ -160,21 +160,6 @@ if (atomic_read(&page->count) != 1) continue; - /* - * Is it a page swap page? If so, we want to - * drop it if it is no longer used, even if it - * were to be marked referenced.. - */ - if (PageSwapCache(page)) { - if (referenced && swap_count(page->offset) != 1) - continue; - delete_from_swap_cache(page); - return 1; - } - - if (referenced) - continue; - /* Is it a buffer page? */ if (page->buffers) { if (buffer_under_min()) @@ -184,6 +169,14 @@ return 1; } + if (referenced) + continue; + + if (PageSwapCache(page)) { + delete_from_swap_cache(page); + return 1; + } + /* is it a page-cache page? */ if (page->inode) { if (pgcache_under_min()) @@ -191,8 +184,7 @@ remove_inode_page(page); return 1; } - - } while (count > 0); + } return 0; } Index: linux/mm/mmap.c diff -u linux/mm/mmap.c:1.1.1.2 linux/mm/mmap.c:1.1.1.1.2.12 --- linux/mm/mmap.c:1.1.1.2 Fri Nov 27 11:19:10 1998 +++ linux/mm/mmap.c Wed Jan 13 21:23:38 1999 @@ -66,7 +66,7 @@ free += page_cache_size; free += nr_free_pages; free += nr_swap_pages; - free -= (page_cache.min_percent + buffer_mem.min_percent + 2)*num_physpages/100; + free -= (pager_daemon.cache_min_percent + pager_daemon.buffer_min_percent + 2)*num_physpages/100; return free > pages; } Index: linux/mm/page_alloc.c diff -u linux/mm/page_alloc.c:1.1.1.9 linux/mm/page_alloc.c:1.1.1.1.2.31 --- linux/mm/page_alloc.c:1.1.1.9 Thu Jan 14 12:32:57 1999 +++ linux/mm/page_alloc.c Thu Jan 14 12:42:59 1999 @@ -124,7 +124,6 @@ if (!PageReserved(page) && atomic_dec_and_test(&page->count)) { if (PageSwapCache(page)) panic ("Freeing swap cache page"); - page->flags &= ~(1 << PG_referenced); free_pages_ok(page->map_nr, 0); return; } @@ -141,7 +140,6 @@ if (atomic_dec_and_test(&map->count)) { if (PageSwapCache(map)) panic ("Freeing swap cache pages"); - map->flags &= ~(1 << PG_referenced); free_pages_ok(map_nr, order); return; } @@ -212,19 +210,18 @@ * further thought. */ if (!(current->flags & PF_MEMALLOC)) { - static int trashing = 0; int freed; if (nr_free_pages > freepages.min) { - if (!trashing) + if (!current->trashing) goto ok_to_allocate; if (nr_free_pages > freepages.low) { - trashing = 0; + current->trashing = 0; goto ok_to_allocate; } } - trashing = 1; + current->trashing = 1; current->flags |= PF_MEMALLOC; freed = try_to_free_pages(gfp_mask); current->flags &= ~PF_MEMALLOC; @@ -361,7 +358,7 @@ if (offset >= swapdev->max) break; /* Don't block on I/O for read-ahead */ - if (atomic_read(&nr_async_pages) >= pager_daemon.swap_cluster) + if (atomic_read(&nr_async_pages) >= pager_daemon.max_async_pages) break; /* Don't read in bad or busy pages */ if (!swapdev->swap_map[offset]) Index: linux/mm/page_io.c diff -u linux/mm/page_io.c:1.1.1.4 linux/mm/page_io.c:1.1.1.1.2.6 --- linux/mm/page_io.c:1.1.1.4 Tue Dec 29 01:39:20 1998 +++ linux/mm/page_io.c Wed Jan 13 00:00:04 1999 @@ -58,7 +58,7 @@ } /* Don't allow too many pending pages in flight.. */ - if (atomic_read(&nr_async_pages) > pager_daemon.swap_cluster) + if (atomic_read(&nr_async_pages) > pager_daemon.max_async_pages) wait = 1; p = &swap_info[type]; Index: linux/mm/swap.c diff -u linux/mm/swap.c:1.1.1.6 linux/mm/swap.c:1.1.1.1.2.14 --- linux/mm/swap.c:1.1.1.6 Mon Jan 11 22:24:24 1999 +++ linux/mm/swap.c Thu Jan 14 13:15:32 1999 @@ -40,41 +40,17 @@ }; /* How many pages do we try to swap or page in/out together? */ -int page_cluster = 4; /* Default value modified in swap_setup() */ +int page_cluster = 5; /* Default readahead 32 pages every time */ /* We track the number of pages currently being asynchronously swapped out, so that we don't try to swap TOO many pages out at once */ atomic_t nr_async_pages = ATOMIC_INIT(0); -buffer_mem_t buffer_mem = { - 2, /* minimum percent buffer */ - 10, /* borrow percent buffer */ - 60 /* maximum percent buffer */ -}; - -buffer_mem_t page_cache = { - 2, /* minimum percent page cache */ - 15, /* borrow percent page cache */ - 75 /* maximum */ -}; - pager_daemon_t pager_daemon = { - 512, /* base number for calculating the number of tries */ - SWAP_CLUSTER_MAX, /* minimum number of tries */ - SWAP_CLUSTER_MAX, /* do swap I/O in clusters of this size */ + 8, /* starting priority of try_to_free_pages() */ + 2, /* minimum percent buffer */ + 4, /* minimum percent page cache */ + 32, /* number of tries we do on every try_to_free_pages() */ + 128, /* do swap I/O in clusters of this size */ + 512 /* max number of async swapped-out pages on the fly */ }; - -/* - * Perform any setup for the swap system - */ - -void __init swap_setup(void) -{ - /* Use a smaller cluster for memory <16MB or <32MB */ - if (num_physpages < ((16 * 1024 * 1024) >> PAGE_SHIFT)) - page_cluster = 2; - else if (num_physpages < ((32 * 1024 * 1024) >> PAGE_SHIFT)) - page_cluster = 3; - else - page_cluster = 4; -} Index: linux/mm/swapfile.c diff -u linux/mm/swapfile.c:1.1.1.3 linux/mm/swapfile.c:1.1.1.1.2.6 --- linux/mm/swapfile.c:1.1.1.3 Mon Jan 11 22:24:24 1999 +++ linux/mm/swapfile.c Wed Jan 13 00:00:04 1999 @@ -23,7 +23,6 @@ struct swap_info_struct swap_info[MAX_SWAPFILES]; -#define SWAPFILE_CLUSTER 256 static inline int scan_swap_map(struct swap_info_struct *si) { @@ -31,7 +30,7 @@ /* * We try to cluster swap pages by allocating them * sequentially in swap. Once we've allocated - * SWAPFILE_CLUSTER pages this way, however, we resort to + * SWAP_CLUSTER pages this way, however, we resort to * first-free allocation, starting a new cluster. This * prevents us from scattering swap pages all over the entire * swap partition, so that we reduce overall disk seek times @@ -47,7 +46,7 @@ goto got_page; } } - si->cluster_nr = SWAPFILE_CLUSTER; + si->cluster_nr = SWAP_CLUSTER; for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) { if (si->swap_map[offset]) continue; Index: linux/mm/vmscan.c diff -u linux/mm/vmscan.c:1.1.1.12 linux/mm/vmscan.c:1.1.1.1.2.93 --- linux/mm/vmscan.c:1.1.1.12 Mon Jan 11 22:24:24 1999 +++ linux/mm/vmscan.c Thu Jan 14 13:15:32 1999 @@ -10,6 +10,11 @@ * Version: $Id: vmscan.c,v 1.5 1998/02/23 22:14:28 sct Exp $ */ +/* + * free_user_and_cache() and always async swapout original idea. + * Copyright (C) 1999 Andrea Arcangeli + */ + #include #include #include @@ -20,6 +25,8 @@ #include +int swapout_interval = HZ; + /* * The swap-out functions return 1 if they successfully * threw something out, and we got a free page. It returns @@ -71,6 +78,21 @@ * memory, and we should just continue our scan. */ if (PageSwapCache(page_map)) { + if (pte_write(pte)) + { + struct page *found; + printk ("VM: Found a writable swap-cached page!\n"); + /* Try to diagnose the problem ... */ + found = find_page(&swapper_inode, page_map->offset); + if (found) { + printk("page=%p@%08lx, found=%p, count=%d\n", + page_map, page_map->offset, + found, atomic_read(&found->count)); + __free_page(found); + } else + printk ("Spurious, page not in cache\n"); + return 0; + } entry = page_map->offset; swap_duplicate(entry); set_pte(page_table, __pte(entry)); @@ -199,7 +221,7 @@ do { int result; - tsk->swap_address = address + PAGE_SIZE; + tsk->mm->swap_address = address + PAGE_SIZE; result = try_to_swap_out(tsk, vma, address, pte, gfp_mask); if (result) return result; @@ -271,7 +293,7 @@ /* * Go through process' page directory. */ - address = p->swap_address; + address = p->mm->swap_address; /* * Find the proper vm-area @@ -293,8 +315,8 @@ } /* We didn't find anything for the process */ - p->swap_cnt = 0; - p->swap_address = 0; + p->mm->swap_cnt = 0; + p->mm->swap_address = 0; return 0; } @@ -306,7 +328,8 @@ static int swap_out(unsigned int priority, int gfp_mask) { struct task_struct * p, * pbest; - int counter, assign, max_cnt; + int counter, assign; + unsigned long max_cnt; /* * We make one or two passes through the task list, indexed by @@ -325,7 +348,7 @@ counter = nr_tasks / (priority+1); if (counter < 1) counter = 1; - if (counter > nr_tasks) + else if (counter > nr_tasks) counter = nr_tasks; for (; counter >= 0; counter--) { @@ -338,13 +361,13 @@ for (; p != &init_task; p = p->next_task) { if (!p->swappable) continue; - if (p->mm->rss <= 0) + if (p->mm->rss == 0) continue; /* Refresh swap_cnt? */ if (assign) - p->swap_cnt = p->mm->rss; - if (p->swap_cnt > max_cnt) { - max_cnt = p->swap_cnt; + p->mm->swap_cnt = p->mm->rss; + if (p->mm->swap_cnt > max_cnt) { + max_cnt = p->mm->swap_cnt; pbest = p; } } @@ -375,8 +398,6 @@ int i; char *revision="$Revision: 1.5 $", *s, *e; - swap_setup(); - if ((s = strchr(revision, ':')) && (e = strchr(s, '$'))) s++, i = e - s; @@ -430,7 +451,7 @@ break; current->state = TASK_INTERRUPTIBLE; run_task_queue(&tq_disk); - schedule_timeout(HZ); + schedule_timeout(swapout_interval); /* * kswapd isn't even meant to keep up with anything, @@ -438,13 +459,36 @@ * point is to make sure that the system doesn't stay * forever in a really bad memory squeeze. */ - if (nr_free_pages < freepages.high) + if (nr_free_pages < freepages.min) try_to_free_pages(GFP_KSWAPD); } return 0; } +static int free_user_and_cache(int priority, int gfp_mask) +{ + static unsigned long grow_swap_cache = 0; + + if (!shrink_mmap(priority, gfp_mask)) + grow_swap_cache = 1; + + switch (grow_swap_cache) + { + case 0: + return 1; + default: + if (grow_swap_cache++ >= freepages.high) + grow_swap_cache = 0; + } + + if (swap_out(priority, gfp_mask)) + return 1; + + grow_swap_cache = 0; + return 0; +} + /* * We need to make the locks finer granularity, but right * now we need this so that we can do page allocations @@ -457,34 +501,35 @@ int try_to_free_pages(unsigned int gfp_mask) { int priority; - int count = SWAP_CLUSTER_MAX; + static int state = 0; + int count = pager_daemon.tries; lock_kernel(); /* Always trim SLAB caches when memory gets low. */ kmem_cache_reap(gfp_mask); - - priority = 6; - do { - while (shrink_mmap(priority, gfp_mask)) { - if (!--count) - goto done; - } - /* Try to get rid of some shared memory pages.. */ - while (shm_swap(priority, gfp_mask)) { - if (!--count) - goto done; - } - - /* Then, try to page stuff out.. */ - while (swap_out(priority, gfp_mask)) { - if (!--count) - goto done; - } + priority = pager_daemon.priority; + switch (state) + { + do { + case 0: + while (free_user_and_cache(priority, gfp_mask)) { + if (!--count) + goto done; + } + state = 1; + case 1: + /* Try to get rid of some shared memory pages.. */ + while (shm_swap(priority, gfp_mask)) { + if (!--count) + goto done; + } + state = 0; - shrink_dcache_memory(priority, gfp_mask); - } while (--priority >= 0); + shrink_dcache_memory(priority, gfp_mask); + } while (--priority >= 0); + } done: unlock_kernel(); Index: linux/kernel/fork.c diff -u linux/kernel/fork.c:1.1.1.6 linux/kernel/fork.c:1.1.1.1.2.10 --- linux/kernel/fork.c:1.1.1.6 Mon Jan 11 22:24:21 1999 +++ linux/kernel/fork.c Mon Jan 11 22:56:09 1999 @@ -511,6 +514,7 @@ p->did_exec = 0; p->swappable = 0; + p->trashing = 0; p->state = TASK_UNINTERRUPTIBLE; copy_flags(clone_flags, p); Index: linux/kernel/sysctl.c diff -u linux/kernel/sysctl.c:1.1.1.6 linux/kernel/sysctl.c:1.1.1.1.2.12 --- linux/kernel/sysctl.c:1.1.1.6 Mon Jan 11 22:24:22 1999 +++ linux/kernel/sysctl.c Wed Jan 13 21:23:38 1999 @@ -32,7 +32,7 @@ /* External variables not in a header file. */ extern int panic_timeout; -extern int console_loglevel, C_A_D; +extern int console_loglevel, C_A_D, swapout_interval; extern int bdf_prm[], bdflush_min[], bdflush_max[]; extern char binfmt_java_interpreter[], binfmt_java_appletviewer[]; extern int sysctl_overcommit_memory; @@ -216,6 +216,8 @@ }; static ctl_table vm_table[] = { + {VM_SWAPOUT, "swapout_interval", + &swapout_interval, sizeof(int), 0644, NULL, &proc_dointvec}, {VM_FREEPG, "freepages", &freepages, sizeof(freepages_t), 0644, NULL, &proc_dointvec}, {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL, @@ -223,11 +225,7 @@ &bdflush_min, &bdflush_max}, {VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory, sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec}, - {VM_BUFFERMEM, "buffermem", - &buffer_mem, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec}, - {VM_PAGECACHE, "pagecache", - &page_cache, sizeof(buffer_mem_t), 0644, NULL, &proc_dointvec}, - {VM_PAGERDAEMON, "kswapd", + {VM_PAGERDAEMON, "pager", &pager_daemon, sizeof(pager_daemon_t), 0644, NULL, &proc_dointvec}, {VM_PGT_CACHE, "pagetable_cache", &pgt_cache_water, 2*sizeof(int), 0600, NULL, &proc_dointvec}, Index: linux/include/linux/mm.h diff -u linux/include/linux/mm.h:1.1.1.6 linux/include/linux/mm.h:1.1.1.1.2.21 --- linux/include/linux/mm.h:1.1.1.6 Mon Jan 11 22:23:57 1999 +++ linux/include/linux/mm.h Thu Jan 14 13:15:31 1999 @@ -118,7 +118,6 @@ unsigned long offset; struct page *next_hash; atomic_t count; - unsigned int unused; unsigned long flags; /* atomic flags, some possibly updated asynchronously */ struct wait_queue *wait; struct page **pprev_hash; @@ -302,8 +301,7 @@ /* filemap.c */ extern void remove_inode_page(struct page *); -extern unsigned long page_unuse(struct page *); -extern int shrink_mmap(int, int); +extern int FASTCALL(shrink_mmap(int, int)); extern void truncate_inode_pages(struct inode *, unsigned long); extern unsigned long get_cached_page(struct inode *, unsigned long, int); extern void put_cached_page(unsigned long); @@ -387,9 +385,9 @@ } #define buffer_under_min() ((buffermem >> PAGE_SHIFT) * 100 < \ - buffer_mem.min_percent * num_physpages) -#define pgcache_under_min() (page_cache_size * 100 < \ - page_cache.min_percent * num_physpages) + pager_daemon.buffer_min_percent * num_physpages) +#define pgcache_under_min() ((page_cache_size-swapper_inode.i_nrpages) * 100 < \ + pager_daemon.cache_min_percent * num_physpages) #endif /* __KERNEL__ */ Index: linux/include/linux/sched.h diff -u linux/include/linux/sched.h:1.1.1.6 linux/include/linux/sched.h:1.1.1.1.2.13 --- linux/include/linux/sched.h:1.1.1.6 Mon Jan 11 22:24:03 1999 +++ linux/include/linux/sched.h Thu Jan 14 12:42:58 1999 @@ -169,6 +174,7 @@ unsigned long rss, total_vm, locked_vm; unsigned long def_flags; unsigned long cpu_vm_mask; + unsigned long swap_cnt, swap_address; /* * This is an architecture-specific pointer: the portable * part of Linux does not know about any segments. @@ -177,15 +183,17 @@ }; #define INIT_MM { \ - &init_mmap, NULL, swapper_pg_dir, \ + &init_mmap, NULL, swapper_pg_dir, \ ATOMIC_INIT(1), 1, \ MUTEX, \ 0, \ 0, 0, 0, 0, \ - 0, 0, 0, \ + 0, 0, 0, \ 0, 0, 0, 0, \ 0, 0, 0, \ - 0, 0, NULL } + 0, 0, \ + 0, 0, \ + NULL } struct signal_struct { atomic_t count; @@ -270,8 +278,7 @@ /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap; int swappable:1; - unsigned long swap_address; - unsigned long swap_cnt; /* number of pages to swap on next pass */ + int trashing:1; /* process credentials */ uid_t uid,euid,suid,fsuid; gid_t gid,egid,sgid,fsgid; @@ -355,7 +362,7 @@ /* utime */ {0,0,0,0},0, \ /* per CPU times */ {0, }, {0, }, \ /* flt */ 0,0,0,0,0,0, \ -/* swp */ 0,0,0, \ +/* swp */ 0,0, \ /* process credentials */ \ /* uid etc */ 0,0,0,0,0,0,0,0, \ /* suppl grps*/ 0, {0,}, \ Index: linux/include/linux/swap.h diff -u linux/include/linux/swap.h:1.1.1.6 linux/include/linux/swap.h:1.1.1.1.2.17 --- linux/include/linux/swap.h:1.1.1.6 Mon Jan 11 22:24:05 1999 +++ linux/include/linux/swap.h Wed Jan 13 21:28:52 1999 @@ -33,7 +33,7 @@ #define SWP_USED 1 #define SWP_WRITEOK 3 -#define SWAP_CLUSTER_MAX 32 +#define SWAP_CLUSTER (pager_daemon.swap_cluster) #define SWAP_MAP_MAX 0x7fff #define SWAP_MAP_BAD 0x8000 @@ -68,9 +68,6 @@ /* linux/ipc/shm.c */ extern int shm_swap (int, int); - -/* linux/mm/swap.c */ -extern void swap_setup (void); /* linux/mm/vmscan.c */ extern int try_to_free_pages(unsigned int gfp_mask); Index: linux/include/linux/swapctl.h diff -u linux/include/linux/swapctl.h:1.1.1.4 linux/include/linux/swapctl.h:1.1.1.1.2.6 --- linux/include/linux/swapctl.h:1.1.1.4 Mon Jan 11 22:24:05 1999 +++ linux/include/linux/swapctl.h Thu Jan 14 13:15:31 1999 @@ -4,32 +4,23 @@ #include #include -typedef struct buffer_mem_v1 +typedef struct freepages_s { - unsigned int min_percent; - unsigned int borrow_percent; - unsigned int max_percent; -} buffer_mem_v1; -typedef buffer_mem_v1 buffer_mem_t; -extern buffer_mem_t buffer_mem; -extern buffer_mem_t page_cache; - -typedef struct freepages_v1 -{ unsigned int min; unsigned int low; unsigned int high; -} freepages_v1; -typedef freepages_v1 freepages_t; +} freepages_t; extern freepages_t freepages; -typedef struct pager_daemon_v1 +typedef struct pager_daemon_s { - unsigned int tries_base; - unsigned int tries_min; + unsigned int priority; + unsigned int buffer_min_percent; + unsigned int cache_min_percent; + unsigned int tries; unsigned int swap_cluster; -} pager_daemon_v1; -typedef pager_daemon_v1 pager_daemon_t; + unsigned int max_async_pages; +} pager_daemon_t; extern pager_daemon_t pager_daemon; #endif /* _LINUX_SWAPCTL_H */ -- This is a majordomo managed list. To unsubscribe, send a message with the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org