diff --git a/[refs] b/[refs] index 38a477622075..c2149b0454bc 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 316ce2ba8e74a7bb9153b9f93adc883cb1ceb9fd +refs/heads/master: 4cbe4249d6586d5d88ef271e07302407a14c8443 diff --git a/trunk/fs/ocfs2/alloc.c b/trunk/fs/ocfs2/alloc.c index 7e9cb753fba7..0cb2945eb817 100644 --- a/trunk/fs/ocfs2/alloc.c +++ b/trunk/fs/ocfs2/alloc.c @@ -2209,8 +2209,8 @@ static int ocfs2_rotate_subtree_right(handle_t *handle, * * Will return zero if the path passed in is already the leftmost path. */ -int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, - struct ocfs2_path *path, u32 *cpos) +static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, + struct ocfs2_path *path, u32 *cpos) { int i, j, ret = 0; u64 blkno; @@ -5587,97 +5587,19 @@ int ocfs2_remove_extent(handle_t *handle, return ret; } -/* - * ocfs2_reserve_blocks_for_rec_trunc() would look basically the - * same as ocfs2_lock_alloctors(), except for it accepts a blocks - * number to reserve some extra blocks, and it only handles meta - * data allocations. - * - * Currently, only ocfs2_remove_btree_range() uses it for truncating - * and punching holes. - */ -static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode, - struct ocfs2_extent_tree *et, - u32 extents_to_split, - struct ocfs2_alloc_context **ac, - int extra_blocks) -{ - int ret = 0, num_free_extents; - unsigned int max_recs_needed = 2 * extents_to_split; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - - *ac = NULL; - - num_free_extents = ocfs2_num_free_extents(osb, et); - if (num_free_extents < 0) { - ret = num_free_extents; - mlog_errno(ret); - goto out; - } - - if (!num_free_extents || - (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) - extra_blocks += ocfs2_extend_meta_needed(et->et_root_el); - - if (extra_blocks) { - ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac); - if (ret < 0) { - if (ret != -ENOSPC) - mlog_errno(ret); - goto out; - } - } - -out: - if (ret) { - if (*ac) { - ocfs2_free_alloc_context(*ac); - *ac = NULL; - } - } - - return ret; -} - int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, - u32 cpos, u32 phys_cpos, u32 len, int flags, - struct ocfs2_cached_dealloc_ctxt *dealloc, - u64 refcount_loc) + u32 cpos, u32 phys_cpos, u32 len, + struct ocfs2_cached_dealloc_ctxt *dealloc) { - int ret, credits = 0, extra_blocks = 0; + int ret; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; handle_t *handle; struct ocfs2_alloc_context *meta_ac = NULL; - struct ocfs2_refcount_tree *ref_tree = NULL; - - if ((flags & OCFS2_EXT_REFCOUNTED) && len) { - BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & - OCFS2_HAS_REFCOUNT_FL)); - - ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, - &ref_tree, NULL); - if (ret) { - mlog_errno(ret); - goto out; - } - - ret = ocfs2_prepare_refcount_change_for_del(inode, - refcount_loc, - phys_blkno, - len, - &credits, - &extra_blocks); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - } - ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac, - extra_blocks); + ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); if (ret) { mlog_errno(ret); return ret; @@ -5693,8 +5615,7 @@ int ocfs2_remove_btree_range(struct inode *inode, } } - handle = ocfs2_start_trans(osb, - ocfs2_remove_extent_credits(osb->sb) + credits); + handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); @@ -5721,20 +5642,9 @@ int ocfs2_remove_btree_range(struct inode *inode, ocfs2_journal_dirty(handle, et->et_root_bh); - if (phys_blkno) { - if (flags & OCFS2_EXT_REFCOUNTED) - ret = ocfs2_decrease_refcount(inode, handle, - ocfs2_blocks_to_clusters(osb->sb, - phys_blkno), - len, meta_ac, - dealloc, 1); - else - ret = ocfs2_truncate_log_append(osb, handle, - phys_blkno, len); - if (ret) - mlog_errno(ret); - - } + ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); + if (ret) + mlog_errno(ret); out_commit: ocfs2_commit_trans(osb, handle); @@ -5744,9 +5654,6 @@ int ocfs2_remove_btree_range(struct inode *inode, if (meta_ac) ocfs2_free_alloc_context(meta_ac); - if (ref_tree) - ocfs2_unlock_refcount_tree(osb, ref_tree, 1); - return ret; } @@ -6574,6 +6481,417 @@ static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, le16_to_cpu(eb->h_suballoc_bit)); } +/* This function will figure out whether the currently last extent + * block will be deleted, and if it will, what the new last extent + * block will be so we can update his h_next_leaf_blk field, as well + * as the dinodes i_last_eb_blk */ +static int ocfs2_find_new_last_ext_blk(struct inode *inode, + unsigned int clusters_to_del, + struct ocfs2_path *path, + struct buffer_head **new_last_eb) +{ + int next_free, ret = 0; + u32 cpos; + struct ocfs2_extent_rec *rec; + struct ocfs2_extent_block *eb; + struct ocfs2_extent_list *el; + struct buffer_head *bh = NULL; + + *new_last_eb = NULL; + + /* we have no tree, so of course, no last_eb. */ + if (!path->p_tree_depth) + goto out; + + /* trunc to zero special case - this makes tree_depth = 0 + * regardless of what it is. */ + if (OCFS2_I(inode)->ip_clusters == clusters_to_del) + goto out; + + el = path_leaf_el(path); + BUG_ON(!el->l_next_free_rec); + + /* + * Make sure that this extent list will actually be empty + * after we clear away the data. We can shortcut out if + * there's more than one non-empty extent in the + * list. Otherwise, a check of the remaining extent is + * necessary. + */ + next_free = le16_to_cpu(el->l_next_free_rec); + rec = NULL; + if (ocfs2_is_empty_extent(&el->l_recs[0])) { + if (next_free > 2) + goto out; + + /* We may have a valid extent in index 1, check it. */ + if (next_free == 2) + rec = &el->l_recs[1]; + + /* + * Fall through - no more nonempty extents, so we want + * to delete this leaf. + */ + } else { + if (next_free > 1) + goto out; + + rec = &el->l_recs[0]; + } + + if (rec) { + /* + * Check it we'll only be trimming off the end of this + * cluster. + */ + if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del) + goto out; + } + + ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + eb = (struct ocfs2_extent_block *) bh->b_data; + el = &eb->h_list; + + /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block(). + * Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); + + *new_last_eb = bh; + get_bh(*new_last_eb); + mlog(0, "returning block %llu, (cpos: %u)\n", + (unsigned long long)le64_to_cpu(eb->h_blkno), cpos); +out: + brelse(bh); + + return ret; +} + +/* + * Trim some clusters off the rightmost edge of a tree. Only called + * during truncate. + * + * The caller needs to: + * - start journaling of each path component. + * - compute and fully set up any new last ext block + */ +static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, + handle_t *handle, struct ocfs2_truncate_context *tc, + u32 clusters_to_del, u64 *delete_start, u8 *flags) +{ + int ret, i, index = path->p_tree_depth; + u32 new_edge = 0; + u64 deleted_eb = 0; + struct buffer_head *bh; + struct ocfs2_extent_list *el; + struct ocfs2_extent_rec *rec; + + *delete_start = 0; + *flags = 0; + + while (index >= 0) { + bh = path->p_node[index].bh; + el = path->p_node[index].el; + + mlog(0, "traveling tree (index = %d, block = %llu)\n", + index, (unsigned long long)bh->b_blocknr); + + BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); + + if (index != + (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) { + ocfs2_error(inode->i_sb, + "Inode %lu has invalid ext. block %llu", + inode->i_ino, + (unsigned long long)bh->b_blocknr); + ret = -EROFS; + goto out; + } + +find_tail_record: + i = le16_to_cpu(el->l_next_free_rec) - 1; + rec = &el->l_recs[i]; + + mlog(0, "Extent list before: record %d: (%u, %u, %llu), " + "next = %u\n", i, le32_to_cpu(rec->e_cpos), + ocfs2_rec_clusters(el, rec), + (unsigned long long)le64_to_cpu(rec->e_blkno), + le16_to_cpu(el->l_next_free_rec)); + + BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del); + + if (le16_to_cpu(el->l_tree_depth) == 0) { + /* + * If the leaf block contains a single empty + * extent and no records, we can just remove + * the block. + */ + if (i == 0 && ocfs2_is_empty_extent(rec)) { + memset(rec, 0, + sizeof(struct ocfs2_extent_rec)); + el->l_next_free_rec = cpu_to_le16(0); + + goto delete; + } + + /* + * Remove any empty extents by shifting things + * left. That should make life much easier on + * the code below. This condition is rare + * enough that we shouldn't see a performance + * hit. + */ + if (ocfs2_is_empty_extent(&el->l_recs[0])) { + le16_add_cpu(&el->l_next_free_rec, -1); + + for(i = 0; + i < le16_to_cpu(el->l_next_free_rec); i++) + el->l_recs[i] = el->l_recs[i + 1]; + + memset(&el->l_recs[i], 0, + sizeof(struct ocfs2_extent_rec)); + + /* + * We've modified our extent list. The + * simplest way to handle this change + * is to being the search from the + * start again. + */ + goto find_tail_record; + } + + le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del); + + /* + * We'll use "new_edge" on our way back up the + * tree to know what our rightmost cpos is. + */ + new_edge = le16_to_cpu(rec->e_leaf_clusters); + new_edge += le32_to_cpu(rec->e_cpos); + + /* + * The caller will use this to delete data blocks. + */ + *delete_start = le64_to_cpu(rec->e_blkno) + + ocfs2_clusters_to_blocks(inode->i_sb, + le16_to_cpu(rec->e_leaf_clusters)); + *flags = rec->e_flags; + + /* + * If it's now empty, remove this record. + */ + if (le16_to_cpu(rec->e_leaf_clusters) == 0) { + memset(rec, 0, + sizeof(struct ocfs2_extent_rec)); + le16_add_cpu(&el->l_next_free_rec, -1); + } + } else { + if (le64_to_cpu(rec->e_blkno) == deleted_eb) { + memset(rec, 0, + sizeof(struct ocfs2_extent_rec)); + le16_add_cpu(&el->l_next_free_rec, -1); + + goto delete; + } + + /* Can this actually happen? */ + if (le16_to_cpu(el->l_next_free_rec) == 0) + goto delete; + + /* + * We never actually deleted any clusters + * because our leaf was empty. There's no + * reason to adjust the rightmost edge then. + */ + if (new_edge == 0) + goto delete; + + rec->e_int_clusters = cpu_to_le32(new_edge); + le32_add_cpu(&rec->e_int_clusters, + -le32_to_cpu(rec->e_cpos)); + + /* + * A deleted child record should have been + * caught above. + */ + BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0); + } + +delete: + ocfs2_journal_dirty(handle, bh); + + mlog(0, "extent list container %llu, after: record %d: " + "(%u, %u, %llu), next = %u.\n", + (unsigned long long)bh->b_blocknr, i, + le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec), + (unsigned long long)le64_to_cpu(rec->e_blkno), + le16_to_cpu(el->l_next_free_rec)); + + /* + * We must be careful to only attempt delete of an + * extent block (and not the root inode block). + */ + if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) { + struct ocfs2_extent_block *eb = + (struct ocfs2_extent_block *)bh->b_data; + + /* + * Save this for use when processing the + * parent block. + */ + deleted_eb = le64_to_cpu(eb->h_blkno); + + mlog(0, "deleting this extent block.\n"); + + ocfs2_remove_from_cache(INODE_CACHE(inode), bh); + + BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); + BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); + BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); + + ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb); + /* An error here is not fatal. */ + if (ret < 0) + mlog_errno(ret); + } else { + deleted_eb = 0; + } + + index--; + } + + ret = 0; +out: + return ret; +} + +static int ocfs2_do_truncate(struct ocfs2_super *osb, + unsigned int clusters_to_del, + struct inode *inode, + struct buffer_head *fe_bh, + handle_t *handle, + struct ocfs2_truncate_context *tc, + struct ocfs2_path *path, + struct ocfs2_alloc_context *meta_ac) +{ + int status; + struct ocfs2_dinode *fe; + struct ocfs2_extent_block *last_eb = NULL; + struct ocfs2_extent_list *el; + struct buffer_head *last_eb_bh = NULL; + u64 delete_blk = 0; + u8 rec_flags; + + fe = (struct ocfs2_dinode *) fe_bh->b_data; + + status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del, + path, &last_eb_bh); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + /* + * Each component will be touched, so we might as well journal + * here to avoid having to handle errors later. + */ + status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + if (last_eb_bh) { + status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; + } + + el = &(fe->id2.i_list); + + /* + * Lower levels depend on this never happening, but it's best + * to check it up here before changing the tree. + */ + if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) { + ocfs2_error(inode->i_sb, + "Inode %lu has an empty extent record, depth %u\n", + inode->i_ino, le16_to_cpu(el->l_tree_depth)); + status = -EROFS; + goto bail; + } + + dquot_free_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); + spin_lock(&OCFS2_I(inode)->ip_lock); + OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - + clusters_to_del; + spin_unlock(&OCFS2_I(inode)->ip_lock); + le32_add_cpu(&fe->i_clusters, -clusters_to_del); + inode->i_blocks = ocfs2_inode_sector_count(inode); + + status = ocfs2_trim_tree(inode, path, handle, tc, + clusters_to_del, &delete_blk, &rec_flags); + if (status) { + mlog_errno(status); + goto bail; + } + + if (le32_to_cpu(fe->i_clusters) == 0) { + /* trunc to zero is a special case. */ + el->l_tree_depth = 0; + fe->i_last_eb_blk = 0; + } else if (last_eb) + fe->i_last_eb_blk = last_eb->h_blkno; + + ocfs2_journal_dirty(handle, fe_bh); + + if (last_eb) { + /* If there will be a new last extent block, then by + * definition, there cannot be any leaves to the right of + * him. */ + last_eb->h_next_leaf_blk = 0; + ocfs2_journal_dirty(handle, last_eb_bh); + } + + if (delete_blk) { + if (rec_flags & OCFS2_EXT_REFCOUNTED) + status = ocfs2_decrease_refcount(inode, handle, + ocfs2_blocks_to_clusters(osb->sb, + delete_blk), + clusters_to_del, meta_ac, + &tc->tc_dealloc, 1); + else + status = ocfs2_truncate_log_append(osb, handle, + delete_blk, + clusters_to_del); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + status = 0; +bail: + brelse(last_eb_bh); + mlog_exit(status); + return status; +} + static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) { set_buffer_uptodate(bh); @@ -6982,29 +7300,26 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, */ int ocfs2_commit_truncate(struct ocfs2_super *osb, struct inode *inode, - struct buffer_head *di_bh) + struct buffer_head *fe_bh, + struct ocfs2_truncate_context *tc) { - int status = 0, i, flags = 0; - u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff; + int status, i, credits, tl_sem = 0; + u32 clusters_to_del, new_highest_cpos, range; u64 blkno = 0; struct ocfs2_extent_list *el; - struct ocfs2_extent_rec *rec; + handle_t *handle = NULL; + struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_path *path = NULL; - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; - struct ocfs2_extent_list *root_el = &(di->id2.i_list); - u64 refcount_loc = le64_to_cpu(di->i_refcount_loc); - struct ocfs2_extent_tree et; - struct ocfs2_cached_dealloc_ctxt dealloc; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; + struct ocfs2_alloc_context *meta_ac = NULL; + struct ocfs2_refcount_tree *ref_tree = NULL; mlog_entry_void(); - ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); - ocfs2_init_dealloc_ctxt(&dealloc); - new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, i_size_read(inode)); - path = ocfs2_new_path(di_bh, &di->id2.i_list, + path = ocfs2_new_path(fe_bh, &di->id2.i_list, ocfs2_journal_access_di); if (!path) { status = -ENOMEM; @@ -7023,6 +7338,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, goto bail; } + credits = 0; + /* * Truncate always works against the rightmost tree branch. */ @@ -7057,62 +7374,101 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, } i = le16_to_cpu(el->l_next_free_rec) - 1; - rec = &el->l_recs[i]; - flags = rec->e_flags; - range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); - - if (i == 0 && ocfs2_is_empty_extent(rec)) { - /* - * Lower levels depend on this never happening, but it's best - * to check it up here before changing the tree. - */ - if (root_el->l_tree_depth && rec->e_int_clusters == 0) { - ocfs2_error(inode->i_sb, "Inode %lu has an empty " - "extent record, depth %u\n", inode->i_ino, - le16_to_cpu(root_el->l_tree_depth)); - status = -EROFS; - goto bail; - } - trunc_cpos = le32_to_cpu(rec->e_cpos); - trunc_len = 0; - blkno = 0; - } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) { - /* - * Truncate entire record. - */ - trunc_cpos = le32_to_cpu(rec->e_cpos); - trunc_len = ocfs2_rec_clusters(el, rec); - blkno = le64_to_cpu(rec->e_blkno); + range = le32_to_cpu(el->l_recs[i].e_cpos) + + ocfs2_rec_clusters(el, &el->l_recs[i]); + if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { + clusters_to_del = 0; + } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { + clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); + blkno = le64_to_cpu(el->l_recs[i].e_blkno); } else if (range > new_highest_cpos) { - /* - * Partial truncate. it also should be - * the last truncate we're doing. - */ - trunc_cpos = new_highest_cpos; - trunc_len = range - new_highest_cpos; - coff = new_highest_cpos - le32_to_cpu(rec->e_cpos); - blkno = le64_to_cpu(rec->e_blkno) + - ocfs2_clusters_to_blocks(inode->i_sb, coff); + clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + + le32_to_cpu(el->l_recs[i].e_cpos)) - + new_highest_cpos; + blkno = le64_to_cpu(el->l_recs[i].e_blkno) + + ocfs2_clusters_to_blocks(inode->i_sb, + ocfs2_rec_clusters(el, &el->l_recs[i]) - + clusters_to_del); } else { - /* - * Truncate completed, leave happily. - */ status = 0; goto bail; } - phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); + mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", + clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); + + if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) { + BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & + OCFS2_HAS_REFCOUNT_FL)); + + status = ocfs2_lock_refcount_tree(osb, + le64_to_cpu(di->i_refcount_loc), + 1, &ref_tree, NULL); + if (status) { + mlog_errno(status); + goto bail; + } + + status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh, + blkno, + clusters_to_del, + &credits, + &meta_ac); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + + mutex_lock(&tl_inode->i_mutex); + tl_sem = 1; + /* ocfs2_truncate_log_needs_flush guarantees us at least one + * record is free for use. If there isn't any, we flush to get + * an empty truncate log. */ + if (ocfs2_truncate_log_needs_flush(osb)) { + status = __ocfs2_flush_truncate_log(osb); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + + credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, + (struct ocfs2_dinode *)fe_bh->b_data, + el); + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + handle = NULL; + mlog_errno(status); + goto bail; + } - status = ocfs2_remove_btree_range(inode, &et, trunc_cpos, - phys_cpos, trunc_len, flags, &dealloc, - refcount_loc); + status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, + tc, path, meta_ac); if (status < 0) { mlog_errno(status); goto bail; } + mutex_unlock(&tl_inode->i_mutex); + tl_sem = 0; + + ocfs2_commit_trans(osb, handle); + handle = NULL; + ocfs2_reinit_path(path, 1); + if (meta_ac) { + ocfs2_free_alloc_context(meta_ac); + meta_ac = NULL; + } + + if (ref_tree) { + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + ref_tree = NULL; + } + /* * The check above will catch the case where we've truncated * away all allocation. @@ -7123,10 +7479,25 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, ocfs2_schedule_truncate_log_flush(osb, 1); - ocfs2_run_deallocs(osb, &dealloc); + if (tl_sem) + mutex_unlock(&tl_inode->i_mutex); + + if (handle) + ocfs2_commit_trans(osb, handle); + + if (meta_ac) + ocfs2_free_alloc_context(meta_ac); + + if (ref_tree) + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); + + ocfs2_run_deallocs(osb, &tc->tc_dealloc); ocfs2_free_path(path); + /* This will drop the ext_alloc cluster lock for us */ + ocfs2_free_truncate_context(tc); + mlog_exit(status); return status; } diff --git a/trunk/fs/ocfs2/alloc.h b/trunk/fs/ocfs2/alloc.h index a55a27bb96a5..1db4359ccb90 100644 --- a/trunk/fs/ocfs2/alloc.h +++ b/trunk/fs/ocfs2/alloc.h @@ -140,9 +140,8 @@ int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et, struct ocfs2_cached_dealloc_ctxt *dealloc); int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, - u32 cpos, u32 phys_cpos, u32 len, int flags, - struct ocfs2_cached_dealloc_ctxt *dealloc, - u64 refcount_loc); + u32 cpos, u32 phys_cpos, u32 len, + struct ocfs2_cached_dealloc_ctxt *dealloc); int ocfs2_num_free_extents(struct ocfs2_super *osb, struct ocfs2_extent_tree *et); @@ -234,7 +233,8 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, struct ocfs2_truncate_context **tc); int ocfs2_commit_truncate(struct ocfs2_super *osb, struct inode *inode, - struct buffer_head *di_bh); + struct buffer_head *fe_bh, + struct ocfs2_truncate_context *tc); int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, unsigned int start, unsigned int end, int trunc); @@ -319,8 +319,6 @@ int ocfs2_journal_access_path(struct ocfs2_caching_info *ci, struct ocfs2_path *path); int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, struct ocfs2_path *path, u32 *cpos); -int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, - struct ocfs2_path *path, u32 *cpos); int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, struct ocfs2_path *left, struct ocfs2_path *right); diff --git a/trunk/fs/ocfs2/dir.c b/trunk/fs/ocfs2/dir.c index 4a75c2e2f855..6c9a28a2d3ae 100644 --- a/trunk/fs/ocfs2/dir.c +++ b/trunk/fs/ocfs2/dir.c @@ -4526,8 +4526,8 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); - ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0, - &dealloc, 0); + ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, + &dealloc); if (ret) { mlog_errno(ret); goto out; diff --git a/trunk/fs/ocfs2/dlm/dlmdomain.c b/trunk/fs/ocfs2/dlm/dlmdomain.c index 6b5a492e1749..e82c0537eff9 100644 --- a/trunk/fs/ocfs2/dlm/dlmdomain.c +++ b/trunk/fs/ocfs2/dlm/dlmdomain.c @@ -1523,7 +1523,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, goto leave; } - dlm->name = kstrdup(domain, GFP_KERNEL); + dlm->name = kmalloc(strlen(domain) + 1, GFP_KERNEL); if (dlm->name == NULL) { mlog_errno(-ENOMEM); kfree(dlm); @@ -1557,6 +1557,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, for (i = 0; i < DLM_HASH_BUCKETS; i++) INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); + strcpy(dlm->name, domain); dlm->key = key; dlm->node_num = o2nm_this_node(); diff --git a/trunk/fs/ocfs2/dlm/dlmlock.c b/trunk/fs/ocfs2/dlm/dlmlock.c index 69cf369961c4..f1fba2a6a8fe 100644 --- a/trunk/fs/ocfs2/dlm/dlmlock.c +++ b/trunk/fs/ocfs2/dlm/dlmlock.c @@ -431,7 +431,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, struct dlm_lock *lock; int kernel_allocated = 0; - lock = kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); + lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); if (!lock) return NULL; diff --git a/trunk/fs/ocfs2/dlm/dlmmaster.c b/trunk/fs/ocfs2/dlm/dlmmaster.c index b01e34819a09..3114de2e74c7 100644 --- a/trunk/fs/ocfs2/dlm/dlmmaster.c +++ b/trunk/fs/ocfs2/dlm/dlmmaster.c @@ -617,11 +617,13 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm, { struct dlm_lock_resource *res = NULL; - res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS); + res = (struct dlm_lock_resource *) + kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS); if (!res) goto error; - res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS); + res->lockname.name = (char *) + kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS); if (!res->lockname.name) goto error; @@ -755,7 +757,8 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm, spin_unlock(&dlm->spinlock); mlog(0, "allocating a new resource\n"); /* nothing found and we need to allocate one. */ - alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); + alloc_mle = (struct dlm_master_list_entry *) + kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); if (!alloc_mle) goto leave; res = dlm_new_lockres(dlm, lockid, namelen); @@ -1539,7 +1542,8 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); - mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); + mle = (struct dlm_master_list_entry *) + kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); if (!mle) { response = DLM_MASTER_RESP_ERROR; mlog_errno(-ENOMEM); @@ -2454,7 +2458,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, goto leave; } - mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); + mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, + GFP_NOFS); if (!mle) { mlog_errno(ret); goto leave; @@ -3036,7 +3041,8 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, hash = dlm_lockid_hash(name, namelen); /* preallocate.. if this fails, abort */ - mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS); + mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, + GFP_NOFS); if (!mle) { ret = -ENOMEM; diff --git a/trunk/fs/ocfs2/file.c b/trunk/fs/ocfs2/file.c index 9c1047c2e44e..19d16f2ef81e 100644 --- a/trunk/fs/ocfs2/file.c +++ b/trunk/fs/ocfs2/file.c @@ -444,6 +444,7 @@ static int ocfs2_truncate_file(struct inode *inode, int status = 0; struct ocfs2_dinode *fe = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_truncate_context *tc = NULL; mlog_entry("(inode = %llu, new_i_size = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -514,7 +515,13 @@ static int ocfs2_truncate_file(struct inode *inode, goto bail_unlock_sem; } - status = ocfs2_commit_truncate(osb, inode, di_bh); + status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); + if (status < 0) { + mlog_errno(status); + goto bail_unlock_sem; + } + + status = ocfs2_commit_truncate(osb, inode, di_bh, tc); if (status < 0) { mlog_errno(status); goto bail_unlock_sem; @@ -1418,90 +1425,16 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, return ret; } -static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos) -{ - int i; - struct ocfs2_extent_rec *rec = NULL; - - for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { - - rec = &el->l_recs[i]; - - if (le32_to_cpu(rec->e_cpos) < pos) - break; - } - - return i; -} - -/* - * Helper to calculate the punching pos and length in one run, we handle the - * following three cases in order: - * - * - remove the entire record - * - remove a partial record - * - no record needs to be removed (hole-punching completed) -*/ -static void ocfs2_calc_trunc_pos(struct inode *inode, - struct ocfs2_extent_list *el, - struct ocfs2_extent_rec *rec, - u32 trunc_start, u32 *trunc_cpos, - u32 *trunc_len, u32 *trunc_end, - u64 *blkno, int *done) -{ - int ret = 0; - u32 coff, range; - - range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); - - if (le32_to_cpu(rec->e_cpos) >= trunc_start) { - *trunc_cpos = le32_to_cpu(rec->e_cpos); - /* - * Skip holes if any. - */ - if (range < *trunc_end) - *trunc_end = range; - *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos); - *blkno = le64_to_cpu(rec->e_blkno); - *trunc_end = le32_to_cpu(rec->e_cpos); - } else if (range > trunc_start) { - *trunc_cpos = trunc_start; - *trunc_len = *trunc_end - trunc_start; - coff = trunc_start - le32_to_cpu(rec->e_cpos); - *blkno = le64_to_cpu(rec->e_blkno) + - ocfs2_clusters_to_blocks(inode->i_sb, coff); - *trunc_end = trunc_start; - } else { - /* - * It may have two following possibilities: - * - * - last record has been removed - * - trunc_start was within a hole - * - * both two cases mean the completion of hole punching. - */ - ret = 1; - } - - *done = ret; -} - static int ocfs2_remove_inode_range(struct inode *inode, struct buffer_head *di_bh, u64 byte_start, u64 byte_len) { - int ret = 0, flags = 0, done = 0, i; - u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos; - u32 cluster_in_el; + int ret = 0; + u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_cached_dealloc_ctxt dealloc; struct address_space *mapping = inode->i_mapping; struct ocfs2_extent_tree et; - struct ocfs2_path *path = NULL; - struct ocfs2_extent_list *el = NULL; - struct ocfs2_extent_rec *rec = NULL; - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; - u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc); ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); ocfs2_init_dealloc_ctxt(&dealloc); @@ -1527,35 +1460,17 @@ static int ocfs2_remove_inode_range(struct inode *inode, goto out; } - /* - * For reflinks, we may need to CoW 2 clusters which might be - * partially zero'd later, if hole's start and end offset were - * within one cluster(means is not exactly aligned to clustersize). - */ - - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { - - ret = ocfs2_cow_file_pos(inode, di_bh, byte_start); - if (ret) { - mlog_errno(ret); - goto out; - } - - ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len); - if (ret) { - mlog_errno(ret); - goto out; - } - } - trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); - trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits; - cluster_in_el = trunc_end; + trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; + if (trunc_len >= trunc_start) + trunc_len -= trunc_start; + else + trunc_len = 0; - mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n", + mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)byte_start, - (unsigned long long)byte_len, trunc_start, trunc_end); + (unsigned long long)byte_len, trunc_start, trunc_len); ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); if (ret) { @@ -1563,79 +1478,31 @@ static int ocfs2_remove_inode_range(struct inode *inode, goto out; } - path = ocfs2_new_path_from_et(&et); - if (!path) { - ret = -ENOMEM; - mlog_errno(ret); - goto out; - } - - while (trunc_end > trunc_start) { - - ret = ocfs2_find_path(INODE_CACHE(inode), path, - cluster_in_el); + cpos = trunc_start; + while (trunc_len) { + ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, + &alloc_size, NULL); if (ret) { mlog_errno(ret); goto out; } - el = path_leaf_el(path); - - i = ocfs2_find_rec(el, trunc_end); - /* - * Need to go to previous extent block. - */ - if (i < 0) { - if (path->p_tree_depth == 0) - break; + if (alloc_size > trunc_len) + alloc_size = trunc_len; - ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, - path, - &cluster_in_el); + /* Only do work for non-holes */ + if (phys_cpos != 0) { + ret = ocfs2_remove_btree_range(inode, &et, cpos, + phys_cpos, alloc_size, + &dealloc); if (ret) { mlog_errno(ret); goto out; } - - /* - * We've reached the leftmost extent block, - * it's safe to leave. - */ - if (cluster_in_el == 0) - break; - - /* - * The 'pos' searched for previous extent block is - * always one cluster less than actual trunc_end. - */ - trunc_end = cluster_in_el + 1; - - ocfs2_reinit_path(path, 1); - - continue; - - } else - rec = &el->l_recs[i]; - - ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos, - &trunc_len, &trunc_end, &blkno, &done); - if (done) - break; - - flags = rec->e_flags; - phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); - - ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos, - phys_cpos, trunc_len, flags, - &dealloc, refcount_loc); - if (ret < 0) { - mlog_errno(ret); - goto out; } - cluster_in_el = trunc_end; - - ocfs2_reinit_path(path, 1); + cpos += alloc_size; + trunc_len -= alloc_size; } ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); diff --git a/trunk/fs/ocfs2/inode.c b/trunk/fs/ocfs2/inode.c index 9a17251f3d9e..9ee13f70da57 100644 --- a/trunk/fs/ocfs2/inode.c +++ b/trunk/fs/ocfs2/inode.c @@ -544,6 +544,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, struct buffer_head *fe_bh) { int status = 0; + struct ocfs2_truncate_context *tc = NULL; struct ocfs2_dinode *fe; handle_t *handle = NULL; @@ -585,7 +586,13 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, ocfs2_commit_trans(osb, handle); handle = NULL; - status = ocfs2_commit_truncate(osb, inode, fe_bh); + status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); + if (status < 0) { + mlog_errno(status); + goto out; + } + + status = ocfs2_commit_truncate(osb, inode, fe_bh, tc); if (status < 0) { mlog_errno(status); goto out; @@ -950,7 +957,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode, void ocfs2_delete_inode(struct inode *inode) { int wipe, status; - sigset_t oldset; + sigset_t blocked, oldset; struct buffer_head *di_bh = NULL; mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); @@ -977,7 +984,13 @@ void ocfs2_delete_inode(struct inode *inode) * messaging paths may return us -ERESTARTSYS. Which would * cause us to exit early, resulting in inodes being orphaned * forever. */ - ocfs2_block_signals(&oldset); + sigfillset(&blocked); + status = sigprocmask(SIG_BLOCK, &blocked, &oldset); + if (status < 0) { + mlog_errno(status); + ocfs2_cleanup_delete_inode(inode, 1); + goto bail; + } /* * Synchronize us against ocfs2_get_dentry. We take this in @@ -1051,7 +1064,9 @@ void ocfs2_delete_inode(struct inode *inode) ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); bail_unblock: - ocfs2_unblock_signals(&oldset); + status = sigprocmask(SIG_SETMASK, &oldset, NULL); + if (status < 0) + mlog_errno(status); bail: clear_inode(inode); mlog_exit_void(); diff --git a/trunk/fs/ocfs2/localalloc.c b/trunk/fs/ocfs2/localalloc.c index 63c41e206792..9538bbe028d4 100644 --- a/trunk/fs/ocfs2/localalloc.c +++ b/trunk/fs/ocfs2/localalloc.c @@ -122,7 +122,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) struct super_block *sb = osb->sb; gd_mb = ocfs2_clusters_to_megabytes(osb->sb, - 8 * ocfs2_group_bitmap_size(sb)); + 8 * ocfs2_group_bitmap_size(sb, 0)); /* * This takes care of files systems with very small group diff --git a/trunk/fs/ocfs2/mmap.c b/trunk/fs/ocfs2/mmap.c index a61809f8eab5..39737613424a 100644 --- a/trunk/fs/ocfs2/mmap.c +++ b/trunk/fs/ocfs2/mmap.c @@ -42,20 +42,44 @@ #include "file.h" #include "inode.h" #include "mmap.h" -#include "super.h" +static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset) +{ + /* The best way to deal with signals in the vm path is + * to block them upfront, rather than allowing the + * locking paths to return -ERESTARTSYS. */ + sigfillset(blocked); + + /* We should technically never get a bad return value + * from sigprocmask */ + return sigprocmask(SIG_BLOCK, blocked, oldset); +} + +static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) +{ + return sigprocmask(SIG_SETMASK, oldset, NULL); +} static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) { - sigset_t oldset; - int ret; + sigset_t blocked, oldset; + int error, ret; mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff); - ocfs2_block_signals(&oldset); + error = ocfs2_vm_op_block_sigs(&blocked, &oldset); + if (error < 0) { + mlog_errno(error); + ret = VM_FAULT_SIGBUS; + goto out; + } + ret = filemap_fault(area, vmf); - ocfs2_unblock_signals(&oldset); + error = ocfs2_vm_op_unblock_sigs(&oldset); + if (error < 0) + mlog_errno(error); +out: mlog_exit_ptr(vmf->page); return ret; } @@ -135,10 +159,14 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct buffer_head *di_bh = NULL; - sigset_t oldset; - int ret; + sigset_t blocked, oldset; + int ret, ret2; - ocfs2_block_signals(&oldset); + ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); + if (ret < 0) { + mlog_errno(ret); + return ret; + } /* * The cluster locks taken will block a truncate from another @@ -166,7 +194,9 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ocfs2_inode_unlock(inode, 1); out: - ocfs2_unblock_signals(&oldset); + ret2 = ocfs2_vm_op_unblock_sigs(&oldset); + if (ret2 < 0) + mlog_errno(ret2); if (ret) ret = VM_FAULT_SIGBUS; return ret; diff --git a/trunk/fs/ocfs2/namei.c b/trunk/fs/ocfs2/namei.c index 607084b349d4..21d4a33d0f0e 100644 --- a/trunk/fs/ocfs2/namei.c +++ b/trunk/fs/ocfs2/namei.c @@ -239,8 +239,6 @@ static int ocfs2_mknod(struct inode *dir, }; int did_quota_inode = 0; struct ocfs2_dir_lookup_result lookup = { NULL, }; - sigset_t oldset; - int did_block_signals = 0; mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, (unsigned long)dev, dentry->d_name.len, @@ -352,10 +350,6 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - /* Starting to change things, restart is no longer possible. */ - ocfs2_block_signals(&oldset); - did_block_signals = 1; - status = dquot_alloc_inode(inode); if (status) goto leave; @@ -436,8 +430,6 @@ static int ocfs2_mknod(struct inode *dir, ocfs2_commit_trans(osb, handle); ocfs2_inode_unlock(dir, 1); - if (did_block_signals) - ocfs2_unblock_signals(&oldset); if (status == -ENOSPC) mlog(0, "Disk is full\n"); @@ -626,7 +618,6 @@ static int ocfs2_link(struct dentry *old_dentry, struct ocfs2_dinode *fe = NULL; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_dir_lookup_result lookup = { NULL, }; - sigset_t oldset; mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, old_dentry->d_name.len, old_dentry->d_name.name, @@ -683,9 +674,6 @@ static int ocfs2_link(struct dentry *old_dentry, goto out_unlock_inode; } - /* Starting to change things, restart is no longer possible. */ - ocfs2_block_signals(&oldset); - err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (err < 0) { @@ -722,7 +710,6 @@ static int ocfs2_link(struct dentry *old_dentry, out_commit: ocfs2_commit_trans(osb, handle); - ocfs2_unblock_signals(&oldset); out_unlock_inode: ocfs2_inode_unlock(inode, 1); @@ -1581,8 +1568,6 @@ static int ocfs2_symlink(struct inode *dir, }; int did_quota = 0, did_quota_inode = 0; struct ocfs2_dir_lookup_result lookup = { NULL, }; - sigset_t oldset; - int did_block_signals = 0; mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, dentry, symname, dentry->d_name.len, dentry->d_name.name); @@ -1678,10 +1663,6 @@ static int ocfs2_symlink(struct inode *dir, goto bail; } - /* Starting to change things, restart is no longer possible. */ - ocfs2_block_signals(&oldset); - did_block_signals = 1; - status = dquot_alloc_inode(inode); if (status) goto bail; @@ -1785,8 +1766,6 @@ static int ocfs2_symlink(struct inode *dir, ocfs2_commit_trans(osb, handle); ocfs2_inode_unlock(dir, 1); - if (did_block_signals) - ocfs2_unblock_signals(&oldset); brelse(new_fe_bh); brelse(parent_fe_bh); diff --git a/trunk/fs/ocfs2/ocfs2_fs.h b/trunk/fs/ocfs2/ocfs2_fs.h index d61a1521b10e..448aa8d11a97 100644 --- a/trunk/fs/ocfs2/ocfs2_fs.h +++ b/trunk/fs/ocfs2/ocfs2_fs.h @@ -165,6 +165,9 @@ /* Refcount tree support */ #define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000 +/* Discontigous block groups */ +#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 + /* * backup superblock flag is used to indicate that this volume * has backup superblocks. @@ -831,6 +834,13 @@ struct ocfs2_dx_leaf { struct ocfs2_dx_entry_list dl_list; }; +/* + * Largest bitmap for a block (suballocator) group in bytes. This limit + * does not affect cluster groups (global allocator). Cluster group + * bitmaps run to the end of the block. + */ +#define OCFS2_MAX_BG_BITMAP_SIZE 256 + /* * On disk allocator group structure for OCFS2 */ @@ -852,7 +862,29 @@ struct ocfs2_group_desc __le64 bg_blkno; /* Offset on disk, in blocks */ /*30*/ struct ocfs2_block_check bg_check; /* Error checking */ __le64 bg_reserved2; -/*40*/ __u8 bg_bitmap[0]; +/*40*/ union { + __u8 bg_bitmap[0]; + struct { + /* + * Block groups may be discontiguous when + * OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG is set. + * The extents of a discontigous block group are + * stored in bg_list. It is a flat list. + * l_tree_depth must always be zero. A + * discontiguous group is signified by a non-zero + * bg_list->l_next_free_rec. Only block groups + * can be discontiguous; Cluster groups cannot. + * We've never made a block group with more than + * 2048 blocks (256 bytes of bg_bitmap). This + * codifies that limit so that we can fit bg_list. + * bg_size of a discontiguous block group will + * be 256 to match bg_bitmap_filler. + */ + __u8 bg_bitmap_filler[OCFS2_MAX_BG_BITMAP_SIZE]; +/*140*/ struct ocfs2_extent_list bg_list; + }; + }; +/* Actual on-disk size is one block */ }; struct ocfs2_refcount_rec { @@ -1276,12 +1308,16 @@ static inline u16 ocfs2_local_alloc_size(struct super_block *sb) return size; } -static inline int ocfs2_group_bitmap_size(struct super_block *sb) +static inline int ocfs2_group_bitmap_size(struct super_block *sb, + int suballocator) { int size; - size = sb->s_blocksize - - offsetof(struct ocfs2_group_desc, bg_bitmap); + if (suballocator) + size = OCFS2_MAX_BG_BITMAP_SIZE; + else + size = sb->s_blocksize - + offsetof(struct ocfs2_group_desc, bg_bitmap); return size; } @@ -1404,12 +1440,15 @@ static inline int ocfs2_local_alloc_size(int blocksize) return size; } -static inline int ocfs2_group_bitmap_size(int blocksize) +static inline int ocfs2_group_bitmap_size(int blocksize, int suballocator) { int size; - size = blocksize - - offsetof(struct ocfs2_group_desc, bg_bitmap); + if (suballocator) + size = OCFS2_MAX_BG_BITMAP_SIZE; + else + size = blocksize - + offsetof(struct ocfs2_group_desc, bg_bitmap); return size; } diff --git a/trunk/fs/ocfs2/refcounttree.c b/trunk/fs/ocfs2/refcounttree.c index 6fab28921f3d..33dd2a18cb74 100644 --- a/trunk/fs/ocfs2/refcounttree.c +++ b/trunk/fs/ocfs2/refcounttree.c @@ -2509,19 +2509,20 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, * * Normally the refcount blocks store these refcount should be * contiguous also, so that we can get the number easily. - * We will at most add split 2 refcount records and 2 more - * refcount blocks, so just check it in a rough way. + * As for meta_ac, we will at most add split 2 refcount record and + * 2 more refcount block, so just check it in a rough way. * * Caller must hold refcount tree lock. */ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, - u64 refcount_loc, + struct buffer_head *di_bh, u64 phys_blkno, u32 clusters, int *credits, - int *ref_blocks) + struct ocfs2_alloc_context **meta_ac) { - int ret; + int ret, ref_blocks = 0; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct buffer_head *ref_root_bh = NULL; struct ocfs2_refcount_tree *tree; @@ -2538,13 +2539,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), - refcount_loc, &tree); + le64_to_cpu(di->i_refcount_loc), &tree); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc, + ret = ocfs2_read_refcount_block(&tree->rf_ci, + le64_to_cpu(di->i_refcount_loc), &ref_root_bh); if (ret) { mlog_errno(ret); @@ -2555,14 +2557,21 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, &tree->rf_ci, ref_root_bh, start_cpos, clusters, - ref_blocks, credits); + &ref_blocks, credits); if (ret) { mlog_errno(ret); goto out; } - mlog(0, "reserve new metadata %d blocks, credits = %d\n", - *ref_blocks, *credits); + mlog(0, "reserve new metadata %d, credits = %d\n", + ref_blocks, *credits); + + if (ref_blocks) { + ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb), + ref_blocks, meta_ac); + if (ret) + mlog_errno(ret); + } out: brelse(ref_root_bh); diff --git a/trunk/fs/ocfs2/refcounttree.h b/trunk/fs/ocfs2/refcounttree.h index 9983ba1570e2..c1d19b1d3ecc 100644 --- a/trunk/fs/ocfs2/refcounttree.h +++ b/trunk/fs/ocfs2/refcounttree.h @@ -47,11 +47,11 @@ int ocfs2_decrease_refcount(struct inode *inode, struct ocfs2_cached_dealloc_ctxt *dealloc, int delete); int ocfs2_prepare_refcount_change_for_del(struct inode *inode, - u64 refcount_loc, + struct buffer_head *di_bh, u64 phys_blkno, u32 clusters, int *credits, - int *ref_blocks); + struct ocfs2_alloc_context **meta_ac); int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos); diff --git a/trunk/fs/ocfs2/resize.c b/trunk/fs/ocfs2/resize.c index a821f667b5c4..5bbfc123781f 100644 --- a/trunk/fs/ocfs2/resize.c +++ b/trunk/fs/ocfs2/resize.c @@ -315,7 +315,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != - ocfs2_group_bitmap_size(osb->sb) * 8) { + ocfs2_group_bitmap_size(osb->sb, 0) * 8) { mlog(ML_ERROR, "The disk is too old and small. " "Force to do offline resize."); ret = -EINVAL; @@ -496,7 +496,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) fe = (struct ocfs2_dinode *)main_bm_bh->b_data; if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != - ocfs2_group_bitmap_size(osb->sb) * 8) { + ocfs2_group_bitmap_size(osb->sb, 0) * 8) { mlog(ML_ERROR, "The disk is too old and small." " Force to do offline resize."); ret = -EINVAL; diff --git a/trunk/fs/ocfs2/suballoc.c b/trunk/fs/ocfs2/suballoc.c index 667d622b3659..1070f79fa068 100644 --- a/trunk/fs/ocfs2/suballoc.c +++ b/trunk/fs/ocfs2/suballoc.c @@ -360,7 +360,7 @@ static int ocfs2_block_group_fill(handle_t *handle, memset(bg, 0, sb->s_blocksize); strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); - bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); + bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1)); bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); bg->bg_chain = cpu_to_le16(my_chain); bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; diff --git a/trunk/fs/ocfs2/super.c b/trunk/fs/ocfs2/super.c index cf6d87b57450..59930ee4fe2e 100644 --- a/trunk/fs/ocfs2/super.c +++ b/trunk/fs/ocfs2/super.c @@ -2277,7 +2277,7 @@ static int ocfs2_initialize_super(struct super_block *sb, osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters; iput(inode); - osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; + osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0) * 8; status = ocfs2_init_slot_info(osb); if (status < 0) { @@ -2560,25 +2560,5 @@ void __ocfs2_abort(struct super_block* sb, ocfs2_handle_error(sb); } -/* - * Void signal blockers, because in-kernel sigprocmask() only fails - * when SIG_* is wrong. - */ -void ocfs2_block_signals(sigset_t *oldset) -{ - int rc; - sigset_t blocked; - - sigfillset(&blocked); - rc = sigprocmask(SIG_BLOCK, &blocked, oldset); - BUG_ON(rc); -} - -void ocfs2_unblock_signals(sigset_t *oldset) -{ - int rc = sigprocmask(SIG_SETMASK, oldset, NULL); - BUG_ON(rc); -} - module_init(ocfs2_init); module_exit(ocfs2_exit); diff --git a/trunk/fs/ocfs2/super.h b/trunk/fs/ocfs2/super.h index 40c7de084c10..783f5270f2a1 100644 --- a/trunk/fs/ocfs2/super.h +++ b/trunk/fs/ocfs2/super.h @@ -45,11 +45,4 @@ void __ocfs2_abort(struct super_block *sb, #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) -/* - * Void signal blockers, because in-kernel sigprocmask() only fails - * when SIG_* is wrong. - */ -void ocfs2_block_signals(sigset_t *oldset); -void ocfs2_unblock_signals(sigset_t *oldset); - #endif /* OCFS2_SUPER_H */