Skip to content

Commit

Permalink
xfs: use xfs_defer_pending objects to recover intent items
Browse files Browse the repository at this point in the history
One thing I never quite got around to doing is porting the log intent
item recovery code to reconstruct the deferred pending work state.  As a
result, each intent item open codes xfs_defer_finish_one in its recovery
method, because that's what the EFI code did before xfs_defer.c even
existed.

This is a gross thing to have left unfixed -- if an EFI cannot proceed
due to busy extents, we end up creating separate new EFIs for each
unfinished work item, which is a change in behavior from what runtime
would have done.

Worse yet, Long Li pointed out that there's a UAF in the recovery code.
The ->commit_pass2 function adds the intent item to the AIL and drops
the refcount.  The one remaining refcount is now owned by the recovery
mechanism (aka the log intent items in the AIL) with the intent of
giving the refcount to the intent done item in the ->iop_recover
function.

However, if something fails later in recovery, xlog_recover_finish will
walk the recovered intent items in the AIL and release them.  If the CIL
hasn't been pushed before that point (which is possible since we don't
force the log until later) then the intent done release will try to free
its associated intent, which has already been freed.

This patch starts to address this mess by having the ->commit_pass2
functions recreate the xfs_defer_pending state.  The next few patches
will fix the recovery functions.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
  • Loading branch information
Darrick J. Wong committed Dec 7, 2023
1 parent 07bcbdf commit 03f7767
Show file tree
Hide file tree
Showing 11 changed files with 158 additions and 116 deletions.
105 changes: 77 additions & 28 deletions fs/xfs/libxfs/xfs_defer.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,23 +245,53 @@ xfs_defer_create_intents(
return ret;
}

STATIC void
static inline void
xfs_defer_pending_abort(
struct xfs_mount *mp,
struct xfs_defer_pending *dfp)
{
const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];

trace_xfs_defer_pending_abort(mp, dfp);

if (dfp->dfp_intent && !dfp->dfp_done) {
ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
}
}

static inline void
xfs_defer_pending_cancel_work(
struct xfs_mount *mp,
struct xfs_defer_pending *dfp)
{
const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type];
struct list_head *pwi;
struct list_head *n;

trace_xfs_defer_cancel_list(mp, dfp);

list_del(&dfp->dfp_list);
list_for_each_safe(pwi, n, &dfp->dfp_work) {
list_del(pwi);
dfp->dfp_count--;
trace_xfs_defer_cancel_item(mp, dfp, pwi);
ops->cancel_item(pwi);
}
ASSERT(dfp->dfp_count == 0);
kmem_cache_free(xfs_defer_pending_cache, dfp);
}

STATIC void
xfs_defer_pending_abort_list(
struct xfs_mount *mp,
struct list_head *dop_list)
{
struct xfs_defer_pending *dfp;
const struct xfs_defer_op_type *ops;

/* Abort intent items that don't have a done item. */
list_for_each_entry(dfp, dop_list, dfp_list) {
ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_pending_abort(mp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) {
ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL;
}
}
list_for_each_entry(dfp, dop_list, dfp_list)
xfs_defer_pending_abort(mp, dfp);
}

/* Abort all the intents that were committed. */
Expand All @@ -271,7 +301,7 @@ xfs_defer_trans_abort(
struct list_head *dop_pending)
{
trace_xfs_defer_trans_abort(tp, _RET_IP_);
xfs_defer_pending_abort(tp->t_mountp, dop_pending);
xfs_defer_pending_abort_list(tp->t_mountp, dop_pending);
}

/*
Expand Down Expand Up @@ -389,27 +419,13 @@ xfs_defer_cancel_list(
{
struct xfs_defer_pending *dfp;
struct xfs_defer_pending *pli;
struct list_head *pwi;
struct list_head *n;
const struct xfs_defer_op_type *ops;

/*
* Free the pending items. Caller should already have arranged
* for the intent items to be released.
*/
list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_cancel_list(mp, dfp);
list_del(&dfp->dfp_list);
list_for_each_safe(pwi, n, &dfp->dfp_work) {
list_del(pwi);
dfp->dfp_count--;
trace_xfs_defer_cancel_item(mp, dfp, pwi);
ops->cancel_item(pwi);
}
ASSERT(dfp->dfp_count == 0);
kmem_cache_free(xfs_defer_pending_cache, dfp);
}
list_for_each_entry_safe(dfp, pli, dop_list, dfp_list)
xfs_defer_pending_cancel_work(mp, dfp);
}

/*
Expand Down Expand Up @@ -665,6 +681,39 @@ xfs_defer_add(
dfp->dfp_count++;
}

/*
* Create a pending deferred work item to replay the recovered intent item
* and add it to the list.
*/
void
xfs_defer_start_recovery(
struct xfs_log_item *lip,
enum xfs_defer_ops_type dfp_type,
struct list_head *r_dfops)
{
struct xfs_defer_pending *dfp;

dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
GFP_NOFS | __GFP_NOFAIL);
dfp->dfp_type = dfp_type;
dfp->dfp_intent = lip;
INIT_LIST_HEAD(&dfp->dfp_work);
list_add_tail(&dfp->dfp_list, r_dfops);
}

/*
* Cancel a deferred work item created to recover a log intent item. @dfp
* will be freed after this function returns.
*/
void
xfs_defer_cancel_recovery(
struct xfs_mount *mp,
struct xfs_defer_pending *dfp)
{
xfs_defer_pending_abort(mp, dfp);
xfs_defer_pending_cancel_work(mp, dfp);
}

/*
* Move deferred ops from one transaction to another and reset the source to
* initial state. This is primarily used to carry state forward across
Expand Down Expand Up @@ -769,7 +818,7 @@ xfs_defer_ops_capture_abort(
{
unsigned short i;

xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
xfs_defer_pending_abort_list(mp, &dfc->dfc_dfops);
xfs_defer_cancel_list(mp, &dfc->dfc_dfops);

for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
Expand Down
5 changes: 5 additions & 0 deletions fs/xfs/libxfs/xfs_defer.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
struct xfs_defer_capture *d);
void xfs_defer_resources_rele(struct xfs_defer_resources *dres);

void xfs_defer_start_recovery(struct xfs_log_item *lip,
enum xfs_defer_ops_type dfp_type, struct list_head *r_dfops);
void xfs_defer_cancel_recovery(struct xfs_mount *mp,
struct xfs_defer_pending *dfp);

int __init xfs_defer_init_item_caches(void);
void xfs_defer_destroy_item_caches(void);

Expand Down
3 changes: 3 additions & 0 deletions fs/xfs/libxfs/xfs_log_recover.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,7 @@ xlog_recover_resv(const struct xfs_trans_res *r)
return ret;
}

void xlog_recover_intent_item(struct xlog *log, struct xfs_log_item *lip,
xfs_lsn_t lsn, unsigned int dfp_type);

#endif /* __XFS_LOG_RECOVER_H__ */
10 changes: 2 additions & 8 deletions fs/xfs/xfs_attr_item.c
Original file line number Diff line number Diff line change
Expand Up @@ -772,14 +772,8 @@ xlog_recover_attri_commit_pass2(
attrip = xfs_attri_init(mp, nv);
memcpy(&attrip->attri_format, attri_formatp, len);

/*
* The ATTRI has two references. One for the ATTRD and one for ATTRI to
* ensure it makes it into the AIL. Insert the ATTRI into the AIL
* directly and drop the ATTRI reference. Note that
* xfs_trans_ail_update() drops the AIL lock.
*/
xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn);
xfs_attri_release(attrip);
xlog_recover_intent_item(log, &attrip->attri_item, lsn,
XFS_DEFER_OPS_TYPE_ATTR);
xfs_attri_log_nameval_put(nv);
return 0;
}
Expand Down
9 changes: 3 additions & 6 deletions fs/xfs/xfs_bmap_item.c
Original file line number Diff line number Diff line change
Expand Up @@ -681,12 +681,9 @@ xlog_recover_bui_commit_pass2(
buip = xfs_bui_init(mp);
xfs_bui_copy_format(&buip->bui_format, bui_formatp);
atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
* that finishing or canceling the work will drop the other.
*/
xfs_trans_ail_insert(log->l_ailp, &buip->bui_item, lsn);
xfs_bui_release(buip);

xlog_recover_intent_item(log, &buip->bui_item, lsn,
XFS_DEFER_OPS_TYPE_BMAP);
return 0;
}

Expand Down
9 changes: 3 additions & 6 deletions fs/xfs/xfs_extfree_item.c
Original file line number Diff line number Diff line change
Expand Up @@ -820,12 +820,9 @@ xlog_recover_efi_commit_pass2(
return error;
}
atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
/*
* Insert the intent into the AIL directly and drop one reference so
* that finishing or canceling the work will drop the other.
*/
xfs_trans_ail_insert(log->l_ailp, &efip->efi_item, lsn);
xfs_efi_release(efip);

xlog_recover_intent_item(log, &efip->efi_item, lsn,
XFS_DEFER_OPS_TYPE_FREE);
return 0;
}

Expand Down
1 change: 1 addition & 0 deletions fs/xfs/xfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -1542,6 +1542,7 @@ xlog_alloc_log(
log->l_covered_state = XLOG_STATE_COVER_IDLE;
set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
INIT_LIST_HEAD(&log->r_dfops);

log->l_prev_block = -1;
/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/xfs_log_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ struct xlog {
long l_opstate; /* operational state */
uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
struct list_head *l_buf_cancel_table;
struct list_head r_dfops; /* recovered log intent items */
int l_iclog_hsize; /* size of iclog header */
int l_iclog_heads; /* # of iclog header sectors */
uint l_sectBBsize; /* sector size in BBs (2^n) */
Expand Down
Loading

0 comments on commit 03f7767

Please sign in to comment.