Skip to content

Commit

Permalink
[PATCH] swap: scan_swap_map drop swap_device_lock
Browse files Browse the repository at this point in the history
get_swap_page has often shown up on latency traces, doing lengthy scans while
holding two spinlocks.  swap_list_lock is already dropped, now scan_swap_map
drop swap_device_lock before scanning the swap_map.

While scanning for an empty cluster, don't worry that racing tasks may
allocate what was free and free what was allocated; but when allocating an
entry, check it's still free after retaking the lock.  Avoid dropping the lock
in the expected common path.  No barriers beyond the locks, just let the
cookie crumble; highest_bit limit is volatile, but benign.

Guard against swapoff: must check SWP_WRITEOK before allocating, must raise
SWP_SCANNING reference count while in scan_swap_map, swapoff wait for that to
fall - just use schedule_timeout, we don't want to burden scan_swap_map
itself, and it's very unlikely that anyone can really still be in
scan_swap_map once swapoff gets this far.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Hugh Dickins authored and Linus Torvalds committed Sep 5, 2005
1 parent 7dfad41 commit 52b7efd
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 7 deletions.
2 changes: 2 additions & 0 deletions include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ enum {
SWP_USED = (1 << 0), /* is slot in swap_info[] used? */
SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */
SWP_ACTIVE = (SWP_USED | SWP_WRITEOK),
/* add others here before... */
SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
};

#define SWAP_CLUSTER_MAX 32
Expand Down
42 changes: 35 additions & 7 deletions mm/swapfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,12 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
* But we do now try to find an empty cluster. -Andrea
*/

si->flags += SWP_SCANNING;
if (unlikely(!si->cluster_nr)) {
si->cluster_nr = SWAPFILE_CLUSTER - 1;
if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER)
goto lowest;
swap_device_unlock(si);

offset = si->lowest_bit;
last_in_cluster = offset + SWAPFILE_CLUSTER - 1;
Expand All @@ -111,10 +113,12 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
if (si->swap_map[offset])
last_in_cluster = offset + SWAPFILE_CLUSTER;
else if (offset == last_in_cluster) {
swap_device_lock(si);
si->cluster_next = offset-SWAPFILE_CLUSTER-1;
goto cluster;
}
}
swap_device_lock(si);
goto lowest;
}

Expand All @@ -123,10 +127,12 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
offset = si->cluster_next;
if (offset > si->highest_bit)
lowest: offset = si->lowest_bit;
checks: if (!(si->flags & SWP_WRITEOK))
goto no_page;
if (!si->highest_bit)
goto no_page;
if (!si->swap_map[offset]) {
got_page: if (offset == si->lowest_bit)
if (offset == si->lowest_bit)
si->lowest_bit++;
if (offset == si->highest_bit)
si->highest_bit--;
Expand All @@ -137,16 +143,22 @@ got_page: if (offset == si->lowest_bit)
}
si->swap_map[offset] = 1;
si->cluster_next = offset + 1;
si->flags -= SWP_SCANNING;
return offset;
}

swap_device_unlock(si);
while (++offset <= si->highest_bit) {
if (!si->swap_map[offset])
goto got_page;
if (!si->swap_map[offset]) {
swap_device_lock(si);
goto checks;
}
}
swap_device_lock(si);
goto lowest;

no_page:
si->flags -= SWP_SCANNING;
return 0;
}

Expand Down Expand Up @@ -1111,10 +1123,6 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
err = try_to_unuse(type);
current->flags &= ~PF_SWAPOFF;

/* wait for any unplug function to finish */
down_write(&swap_unplug_sem);
up_write(&swap_unplug_sem);

if (err) {
/* re-insert swap space back into swap_list */
swap_list_lock();
Expand All @@ -1128,10 +1136,28 @@ asmlinkage long sys_swapoff(const char __user * specialfile)
swap_info[prev].next = p - swap_info;
nr_swap_pages += p->pages;
total_swap_pages += p->pages;
swap_device_lock(p);
p->flags |= SWP_WRITEOK;
swap_device_unlock(p);
swap_list_unlock();
goto out_dput;
}

/* wait for any unplug function to finish */
down_write(&swap_unplug_sem);
up_write(&swap_unplug_sem);

/* wait for anyone still in scan_swap_map */
swap_device_lock(p);
p->highest_bit = 0; /* cuts scans short */
while (p->flags >= SWP_SCANNING) {
swap_device_unlock(p);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(1);
swap_device_lock(p);
}
swap_device_unlock(p);

destroy_swap_extents(p);
down(&swapon_sem);
swap_list_lock();
Expand Down Expand Up @@ -1431,6 +1457,8 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
}

p->lowest_bit = 1;
p->cluster_next = 1;

/*
* Find out how many pages are allowed for a single swap
* device. There are two limiting factors: 1) the number of
Expand Down

0 comments on commit 52b7efd

Please sign in to comment.