Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 126226
b: refs/heads/master
c: 7992fde
h: refs/heads/master
v: v3
  • Loading branch information
Hugh Dickins authored and Linus Torvalds committed Jan 6, 2009
1 parent 474f109 commit ff50e99
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 2 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 6a6ba83175c029c7820765bae44692266b29e67a
refs/heads/master: 7992fde72ce06c73280a1939b7a1e903bc95ef85
3 changes: 3 additions & 0 deletions trunk/include/linux/swap.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ enum {
SWP_USED = (1 << 0), /* is slot in swap_info[] used? */
SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */
SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */
SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */
/* add others here before... */
SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
};
Expand All @@ -144,6 +145,8 @@ struct swap_info_struct {
unsigned short *swap_map;
unsigned int lowest_bit;
unsigned int highest_bit;
unsigned int lowest_alloc; /* while preparing discard cluster */
unsigned int highest_alloc; /* while preparing discard cluster */
unsigned int cluster_next;
unsigned int cluster_nr;
unsigned int pages;
Expand Down
119 changes: 118 additions & 1 deletion trunk/mm/swapfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,62 @@ static int discard_swap(struct swap_info_struct *si)
return err; /* That will often be -EOPNOTSUPP */
}

/*
* swap allocation tell device that a cluster of swap can now be discarded,
* to allow the swap device to optimize its wear-levelling.
*/
static void discard_swap_cluster(struct swap_info_struct *si,
pgoff_t start_page, pgoff_t nr_pages)
{
struct swap_extent *se = si->curr_swap_extent;
int found_extent = 0;

while (nr_pages) {
struct list_head *lh;

if (se->start_page <= start_page &&
start_page < se->start_page + se->nr_pages) {
pgoff_t offset = start_page - se->start_page;
sector_t start_block = se->start_block + offset;
pgoff_t nr_blocks = se->nr_pages - offset;

if (nr_blocks > nr_pages)
nr_blocks = nr_pages;
start_page += nr_blocks;
nr_pages -= nr_blocks;

if (!found_extent++)
si->curr_swap_extent = se;

start_block <<= PAGE_SHIFT - 9;
nr_blocks <<= PAGE_SHIFT - 9;
if (blkdev_issue_discard(si->bdev, start_block,
nr_blocks, GFP_NOIO))
break;
}

lh = se->list.next;
if (lh == &si->extent_list)
lh = lh->next;
se = list_entry(lh, struct swap_extent, list);
}
}

static int wait_for_discard(void *word)
{
schedule();
return 0;
}

#define SWAPFILE_CLUSTER 256
#define LATENCY_LIMIT 256

static inline unsigned long scan_swap_map(struct swap_info_struct *si)
{
unsigned long offset;
unsigned long last_in_cluster;
unsigned long last_in_cluster = 0;
int latency_ration = LATENCY_LIMIT;
int found_free_cluster = 0;

/*
* We try to cluster swap pages by allocating them sequentially
Expand All @@ -142,6 +190,19 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
si->cluster_nr = SWAPFILE_CLUSTER - 1;
goto checks;
}
if (si->flags & SWP_DISCARDABLE) {
/*
* Start range check on racing allocations, in case
* they overlap the cluster we eventually decide on
* (we scan without swap_lock to allow preemption).
* It's hardly conceivable that cluster_nr could be
* wrapped during our scan, but don't depend on it.
*/
if (si->lowest_alloc)
goto checks;
si->lowest_alloc = si->max;
si->highest_alloc = 0;
}
spin_unlock(&swap_lock);

offset = si->lowest_bit;
Expand All @@ -156,6 +217,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
offset -= SWAPFILE_CLUSTER - 1;
si->cluster_next = offset;
si->cluster_nr = SWAPFILE_CLUSTER - 1;
found_free_cluster = 1;
goto checks;
}
if (unlikely(--latency_ration < 0)) {
Expand All @@ -167,6 +229,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
offset = si->lowest_bit;
spin_lock(&swap_lock);
si->cluster_nr = SWAPFILE_CLUSTER - 1;
si->lowest_alloc = 0;
}

checks:
Expand All @@ -191,6 +254,60 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
si->swap_map[offset] = 1;
si->cluster_next = offset + 1;
si->flags -= SWP_SCANNING;

if (si->lowest_alloc) {
/*
* Only set when SWP_DISCARDABLE, and there's a scan
* for a free cluster in progress or just completed.
*/
if (found_free_cluster) {
/*
* To optimize wear-levelling, discard the
* old data of the cluster, taking care not to
* discard any of its pages that have already
* been allocated by racing tasks (offset has
* already stepped over any at the beginning).
*/
if (offset < si->highest_alloc &&
si->lowest_alloc <= last_in_cluster)
last_in_cluster = si->lowest_alloc - 1;
si->flags |= SWP_DISCARDING;
spin_unlock(&swap_lock);

if (offset < last_in_cluster)
discard_swap_cluster(si, offset,
last_in_cluster - offset + 1);

spin_lock(&swap_lock);
si->lowest_alloc = 0;
si->flags &= ~SWP_DISCARDING;

smp_mb(); /* wake_up_bit advises this */
wake_up_bit(&si->flags, ilog2(SWP_DISCARDING));

} else if (si->flags & SWP_DISCARDING) {
/*
* Delay using pages allocated by racing tasks
* until the whole discard has been issued. We
* could defer that delay until swap_writepage,
* but it's easier to keep this self-contained.
*/
spin_unlock(&swap_lock);
wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),
wait_for_discard, TASK_UNINTERRUPTIBLE);
spin_lock(&swap_lock);
} else {
/*
* Note pages allocated by racing tasks while
* scan for a free cluster is in progress, so
* that its final discard can exclude them.
*/
if (offset < si->lowest_alloc)
si->lowest_alloc = offset;
if (offset > si->highest_alloc)
si->highest_alloc = offset;
}
}
return offset;

scan:
Expand Down

0 comments on commit ff50e99

Please sign in to comment.