Skip to content

Commit

Permalink
xfs: convert grant head manipulations to lockless algorithm
Browse files Browse the repository at this point in the history
The only thing that the grant lock remains to protect is the grant head
manipulations when adding or removing space from the log. These calculations
are already based on atomic variables, so we can already update them safely
without locks. However, the grant head manpulations require atomic multi-step
calculations to be executed, which the algorithms currently don't allow.

To make these multi-step calculations atomic, convert the algorithms to
compare-and-exchange loops on the atomic variables. That is, we sample the old
value, perform the calculation and use atomic64_cmpxchg() to attempt to update
the head with the new value. If the head has not changed since we sampled it,
it will succeed and we are done. Otherwise, we rerun the calculation again from
a new sample of the head.

This allows us to remove the grant lock from around all the grant head space
manipulations, and that effectively removes the grant lock from the log
completely. Hence we can remove the grant lock completely from the log at this
point.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
  • Loading branch information
Dave Chinner authored and Dave Chinner committed Dec 21, 2010
1 parent 3f16b98 commit d0eb2f3
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 77 deletions.
103 changes: 34 additions & 69 deletions fs/xfs/xfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,13 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,

#if defined(DEBUG)
STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
STATIC void xlog_verify_grant_tail(struct log *log);
STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
int count, boolean_t syncing);
STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
xfs_lsn_t tail_lsn);
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_head(a,b)
#define xlog_verify_grant_tail(a)
#define xlog_verify_iclog(a,b,c,d)
#define xlog_verify_tail_lsn(a,b,c)
Expand All @@ -103,17 +101,24 @@ xlog_grant_sub_space(
atomic64_t *head,
int bytes)
{
int cycle, space;
int64_t head_val = atomic64_read(head);
int64_t new, old;

xlog_crack_grant_head(head, &cycle, &space);
do {
int cycle, space;

space -= bytes;
if (space < 0) {
space += log->l_logsize;
cycle--;
}
xlog_crack_grant_head_val(head_val, &cycle, &space);

xlog_assign_grant_head(head, cycle, space);
space -= bytes;
if (space < 0) {
space += log->l_logsize;
cycle--;
}

old = head_val;
new = xlog_assign_grant_head_val(cycle, space);
head_val = atomic64_cmpxchg(head, old, new);
} while (head_val != old);
}

static void
Expand All @@ -122,20 +127,27 @@ xlog_grant_add_space(
atomic64_t *head,
int bytes)
{
int tmp;
int cycle, space;
int64_t head_val = atomic64_read(head);
int64_t new, old;

xlog_crack_grant_head(head, &cycle, &space);
do {
int tmp;
int cycle, space;

tmp = log->l_logsize - space;
if (tmp > bytes)
space += bytes;
else {
space = bytes - tmp;
cycle++;
}
xlog_crack_grant_head_val(head_val, &cycle, &space);

xlog_assign_grant_head(head, cycle, space);
tmp = log->l_logsize - space;
if (tmp > bytes)
space += bytes;
else {
space = bytes - tmp;
cycle++;
}

old = head_val;
new = xlog_assign_grant_head_val(cycle, space);
head_val = atomic64_cmpxchg(head, old, new);
} while (head_val != old);
}

static void
Expand Down Expand Up @@ -318,9 +330,7 @@ xfs_log_reserve(

trace_xfs_log_reserve(log, internal_ticket);

spin_lock(&log->l_grant_lock);
xlog_grant_push_ail(log, internal_ticket->t_unit_res);
spin_unlock(&log->l_grant_lock);
retval = xlog_regrant_write_log_space(log, internal_ticket);
} else {
/* may sleep if need to allocate more tickets */
Expand All @@ -334,11 +344,9 @@ xfs_log_reserve(

trace_xfs_log_reserve(log, internal_ticket);

spin_lock(&log->l_grant_lock);
xlog_grant_push_ail(log,
(internal_ticket->t_unit_res *
internal_ticket->t_cnt));
spin_unlock(&log->l_grant_lock);
retval = xlog_grant_log_space(log, internal_ticket);
}

Expand Down Expand Up @@ -1057,7 +1065,6 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_xbuf = bp;

spin_lock_init(&log->l_icloglock);
spin_lock_init(&log->l_grant_lock);
init_waitqueue_head(&log->l_flush_wait);

/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
Expand Down Expand Up @@ -1135,7 +1142,6 @@ xlog_alloc_log(xfs_mount_t *mp,
kmem_free(iclog);
}
spinlock_destroy(&log->l_icloglock);
spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
Expand Down Expand Up @@ -1331,10 +1337,8 @@ xlog_sync(xlog_t *log,
roundoff < BBTOB(1)));

/* move grant heads by roundoff in sync */
spin_lock(&log->l_grant_lock);
xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
spin_unlock(&log->l_grant_lock);

/* put cycle number in every block */
xlog_pack_data(log, iclog, roundoff);
Expand Down Expand Up @@ -1455,7 +1459,6 @@ xlog_dealloc_log(xlog_t *log)
iclog = next_iclog;
}
spinlock_destroy(&log->l_icloglock);
spinlock_destroy(&log->l_grant_lock);

xfs_buf_free(log->l_xbuf);
log->l_mp->m_log = NULL;
Expand Down Expand Up @@ -2574,13 +2577,10 @@ xlog_grant_log_space(xlog_t *log,
}

/* we've got enough space */
spin_lock(&log->l_grant_lock);
xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_grant_exit(log, tic);
xlog_verify_grant_head(log, 1);
xlog_verify_grant_tail(log);
spin_unlock(&log->l_grant_lock);
return 0;

error_return_unlocked:
Expand Down Expand Up @@ -2694,12 +2694,9 @@ xlog_regrant_write_log_space(xlog_t *log,
}

/* we've got enough space */
spin_lock(&log->l_grant_lock);
xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_regrant_write_exit(log, tic);
xlog_verify_grant_head(log, 1);
xlog_verify_grant_tail(log);
spin_unlock(&log->l_grant_lock);
return 0;


Expand Down Expand Up @@ -2737,7 +2734,6 @@ xlog_regrant_reserve_log_space(xlog_t *log,
if (ticket->t_cnt > 0)
ticket->t_cnt--;

spin_lock(&log->l_grant_lock);
xlog_grant_sub_space(log, &log->l_grant_reserve_head,
ticket->t_curr_res);
xlog_grant_sub_space(log, &log->l_grant_write_head,
Expand All @@ -2747,21 +2743,15 @@ xlog_regrant_reserve_log_space(xlog_t *log,

trace_xfs_log_regrant_reserve_sub(log, ticket);

xlog_verify_grant_head(log, 1);

/* just return if we still have some of the pre-reserved space */
if (ticket->t_cnt > 0) {
spin_unlock(&log->l_grant_lock);
if (ticket->t_cnt > 0)
return;
}

xlog_grant_add_space(log, &log->l_grant_reserve_head,
ticket->t_unit_res);

trace_xfs_log_regrant_reserve_exit(log, ticket);

xlog_verify_grant_head(log, 0);
spin_unlock(&log->l_grant_lock);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
} /* xlog_regrant_reserve_log_space */
Expand Down Expand Up @@ -2790,7 +2780,6 @@ xlog_ungrant_log_space(xlog_t *log,
if (ticket->t_cnt > 0)
ticket->t_cnt--;

spin_lock(&log->l_grant_lock);
trace_xfs_log_ungrant_enter(log, ticket);
trace_xfs_log_ungrant_sub(log, ticket);

Expand All @@ -2809,8 +2798,6 @@ xlog_ungrant_log_space(xlog_t *log,

trace_xfs_log_ungrant_exit(log, ticket);

xlog_verify_grant_head(log, 1);
spin_unlock(&log->l_grant_lock);
xfs_log_move_tail(log->l_mp, 1);
} /* xlog_ungrant_log_space */

Expand Down Expand Up @@ -3428,28 +3415,6 @@ xlog_verify_dest_ptr(
xlog_panic("xlog_verify_dest_ptr: invalid ptr");
}

STATIC void
xlog_verify_grant_head(xlog_t *log, int equals)
{
int reserve_cycle, reserve_space;
int write_cycle, write_space;

xlog_crack_grant_head(&log->l_grant_reserve_head,
&reserve_cycle, &reserve_space);
xlog_crack_grant_head(&log->l_grant_write_head,
&write_cycle, &write_space);

if (reserve_cycle == write_cycle) {
if (equals)
ASSERT(reserve_space >= write_space);
else
ASSERT(reserve_space > write_space);
} else {
ASSERT(reserve_cycle - 1 == write_cycle);
ASSERT(write_space >= reserve_space);
}
}

STATIC void
xlog_verify_grant_tail(
struct log *log)
Expand Down
23 changes: 15 additions & 8 deletions fs/xfs/xfs_log_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,9 +510,6 @@ typedef struct log {
int l_curr_block; /* current logical log block */
int l_prev_block; /* previous logical log block */

/* The following block of fields are changed while holding grant_lock */
spinlock_t l_grant_lock ____cacheline_aligned_in_smp;

/*
* l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
* read without needing to hold specific locks. To avoid operations
Expand Down Expand Up @@ -599,23 +596,33 @@ xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
}

/*
* When we crack the grrant head, we sample it first so that the value will not
* When we crack the grant head, we sample it first so that the value will not
* change while we are cracking it into the component values. This means we
* will always get consistent component values to work from.
*/
static inline void
xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
{
int64_t val = atomic64_read(head);

*cycle = val >> 32;
*space = val & 0xffffffff;
}

static inline void
xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
{
xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
}

static inline int64_t
xlog_assign_grant_head_val(int cycle, int space)
{
return ((int64_t)cycle << 32) | space;
}

static inline void
xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
{
atomic64_set(head, ((int64_t)cycle << 32) | space);
atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
}

/*
Expand Down

0 comments on commit d0eb2f3

Please sign in to comment.