Skip to content

Commit

Permalink
Merge tag 'gfs2-merge-window' of git://git.kernel.org/pub/scm/linux/k…
Browse files Browse the repository at this point in the history
…ernel/git/steve/gfs2-3.0-nmw

Pull GFS2 updates from Steven Whitehouse:
 "One of the main highlights this time, is not the patches themselves
  but instead the widening contributor base.  It is good to see that
  interest is increasing in GFS2, and I'd like to thank all the
  contributors to this patch set.

  In addition to the usual set of bug fixes and clean ups, there are
  patches to improve inode creation performance when xattrs are required
  and some improvements to the transaction code which is intended to
  help improve scalability after further changes in due course.

  Journal extent mapping is also updated to make it more efficient and
  again, this is a foundation for future work in this area.

  The maximum number of ACLs has been increased to 300 (for a 4k block
  size) which means that even with a few additional xattrs from selinux,
  everything should fit within a single fs block.

  There is also a patch to bring GFS2's own copy of the writepages code
  up to the same level as the core VFS.  Eventually we may be able to
  merge some of this code, since it is fairly similar.

  The other major change this time, is bringing consistency to the
  printing of messages via fs_<level>, pr_<level> macros"

* tag 'gfs2-merge-window' of git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw: (29 commits)
  GFS2: Fix address space from page function
  GFS2: Fix uninitialized VFS inode in gfs2_create_inode
  GFS2: Fix return value in slot_get()
  GFS2: inline function gfs2_set_mode
  GFS2: Remove extraneous function gfs2_security_init
  GFS2: Increase the max number of ACLs
  GFS2: Re-add a call to log_flush_wait when flushing the journal
  GFS2: Ensure workqueue is scheduled after noexp request
  GFS2: check NULL return value in gfs2_ok_to_move
  GFS2: Convert gfs2_lm_withdraw to use fs_err
  GFS2: Use fs_<level> more often
  GFS2: Use pr_<level> more consistently
  GFS2: Move recovery variables to journal structure in memory
  GFS2: global conversion to pr_foo()
  GFS2: return -E2BIG if hit the maximum limits of ACLs
  GFS2: Clean up journal extent mapping
  GFS2: replace kmalloc - __vmalloc / memset 0
  GFS2: Remove extra "if" in gfs2_log_flush()
  fs: NULL dereference in posix_acl_to_xattr()
  GFS2: Move log buffer accounting to transaction
  ...
  • Loading branch information
Linus Torvalds committed Apr 4, 2014
2 parents f7789dc + 1b2ad41 commit 34917f9
Show file tree
Hide file tree
Showing 31 changed files with 618 additions and 442 deletions.
2 changes: 2 additions & 0 deletions fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ static inline struct inode *wb_inode(struct list_head *head)
#define CREATE_TRACE_POINTS
#include <trace/events/writeback.h>

EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);

static void bdi_wakeup_thread(struct backing_dev_info *bdi)
{
spin_lock_bh(&bdi->wb_lock);
Expand Down
23 changes: 6 additions & 17 deletions fs/gfs2/acl.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,6 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
return acl;
}

static int gfs2_set_mode(struct inode *inode, umode_t mode)
{
int error = 0;

if (mode != inode->i_mode) {
inode->i_mode = mode;
mark_inode_dirty(inode);
}

return error;
}

int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
int error;
Expand All @@ -85,8 +73,8 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)

BUG_ON(name == NULL);

if (acl->a_count > GFS2_ACL_MAX_ENTRIES)
return -EINVAL;
if (acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
return -E2BIG;

if (type == ACL_TYPE_ACCESS) {
umode_t mode = inode->i_mode;
Expand All @@ -98,9 +86,10 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (error == 0)
acl = NULL;

error = gfs2_set_mode(inode, mode);
if (error)
return error;
if (mode != inode->i_mode) {
inode->i_mode = mode;
mark_inode_dirty(inode);
}
}

if (acl) {
Expand Down
2 changes: 1 addition & 1 deletion fs/gfs2/acl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_ACL_MAX_ENTRIES 25
#define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12)

extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
Expand Down
132 changes: 96 additions & 36 deletions fs/gfs2/aops.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/backing-dev.h>
#include <linux/aio.h>
#include <trace/events/writeback.h>

#include "gfs2.h"
#include "incore.h"
Expand Down Expand Up @@ -230,13 +231,11 @@ static int gfs2_writepages(struct address_space *mapping,
static int gfs2_write_jdata_pagevec(struct address_space *mapping,
struct writeback_control *wbc,
struct pagevec *pvec,
int nr_pages, pgoff_t end)
int nr_pages, pgoff_t end,
pgoff_t *done_index)
{
struct inode *inode = mapping->host;
struct gfs2_sbd *sdp = GFS2_SB(inode);
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
unsigned offset = i_size & (PAGE_CACHE_SIZE-1);
unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
int i;
int ret;
Expand All @@ -248,40 +247,83 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
for(i = 0; i < nr_pages; i++) {
struct page *page = pvec->pages[i];

/*
* At this point, the page may be truncated or
* invalidated (changing page->mapping to NULL), or
* even swizzled back from swapper_space to tmpfs file
* mapping. However, page->index will not change
* because we have a reference on the page.
*/
if (page->index > end) {
/*
* can't be range_cyclic (1st pass) because
* end == -1 in that case.
*/
ret = 1;
break;
}

*done_index = page->index;

lock_page(page);

if (unlikely(page->mapping != mapping)) {
continue_unlock:
unlock_page(page);
continue;
}

if (!wbc->range_cyclic && page->index > end) {
ret = 1;
unlock_page(page);
continue;
if (!PageDirty(page)) {
/* someone wrote it for us */
goto continue_unlock;
}

if (wbc->sync_mode != WB_SYNC_NONE)
wait_on_page_writeback(page);

if (PageWriteback(page) ||
!clear_page_dirty_for_io(page)) {
unlock_page(page);
continue;
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE)
wait_on_page_writeback(page);
else
goto continue_unlock;
}

/* Is the page fully outside i_size? (truncate in progress) */
if (page->index > end_index || (page->index == end_index && !offset)) {
page->mapping->a_ops->invalidatepage(page, 0,
PAGE_CACHE_SIZE);
unlock_page(page);
continue;
}
BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;

trace_wbc_writepage(wbc, mapping->backing_dev_info);

ret = __gfs2_jdata_writepage(page, wbc);
if (unlikely(ret)) {
if (ret == AOP_WRITEPAGE_ACTIVATE) {
unlock_page(page);
ret = 0;
} else {

/*
* done_index is set past this page,
* so media errors will not choke
* background writeout for the entire
* file. This has consequences for
* range_cyclic semantics (ie. it may
* not be suitable for data integrity
* writeout).
*/
*done_index = page->index + 1;
ret = 1;
break;
}
}

if (ret || (--(wbc->nr_to_write) <= 0))
/*
* We stop writing back only if we are not doing
* integrity sync. In case of integrity sync we have to
* keep going until we have written all the pages
* we tagged for writeback prior to entering this loop.
*/
if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) {
ret = 1;
break;
}

}
gfs2_trans_end(sdp);
return ret;
Expand All @@ -306,51 +348,69 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
int done = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
pgoff_t end;
int scanned = 0;
pgoff_t done_index;
int cycled;
int range_whole = 0;
int tag;

pagevec_init(&pvec, 0);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
if (index == 0)
cycled = 1;
else
cycled = 0;
end = -1;
} else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
scanned = 1;
cycled = 1; /* ignore range_cyclic tests */
}
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;

retry:
while (!done && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_DIRTY,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
scanned = 1;
ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end);
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && (index <= end)) {
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;

ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end, &done_index);
if (ret)
done = 1;
if (ret > 0)
ret = 0;

pagevec_release(&pvec);
cond_resched();
}

if (!scanned && !done) {
if (!cycled && !done) {
/*
* range_cyclic:
* We hit the last page and there is more work to be done: wrap
* back to the start of the file
*/
scanned = 1;
cycled = 1;
index = 0;
end = writeback_index - 1;
goto retry;
}

if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = index;
mapping->writeback_index = done_index;

return ret;
}

Expand Down
115 changes: 115 additions & 0 deletions fs/gfs2/bmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1327,6 +1327,121 @@ int gfs2_file_dealloc(struct gfs2_inode *ip)
return trunc_dealloc(ip, 0);
}

/**
* gfs2_free_journal_extents - Free cached journal bmap info
* @jd: The journal
*
*/

void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
{
struct gfs2_journal_extent *jext;

while(!list_empty(&jd->extent_list)) {
jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
list_del(&jext->list);
kfree(jext);
}
}

/**
* gfs2_add_jextent - Add or merge a new extent to extent cache
* @jd: The journal descriptor
* @lblock: The logical block at start of new extent
* @pblock: The physical block at start of new extent
* @blocks: Size of extent in fs blocks
*
* Returns: 0 on success or -ENOMEM
*/

static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks)
{
struct gfs2_journal_extent *jext;

if (!list_empty(&jd->extent_list)) {
jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
if ((jext->dblock + jext->blocks) == dblock) {
jext->blocks += blocks;
return 0;
}
}

jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS);
if (jext == NULL)
return -ENOMEM;
jext->dblock = dblock;
jext->lblock = lblock;
jext->blocks = blocks;
list_add_tail(&jext->list, &jd->extent_list);
jd->nr_extents++;
return 0;
}

/**
* gfs2_map_journal_extents - Cache journal bmap info
* @sdp: The super block
* @jd: The journal to map
*
* Create a reusable "extent" mapping from all logical
* blocks to all physical blocks for the given journal. This will save
* us time when writing journal blocks. Most journals will have only one
* extent that maps all their logical blocks. That's because gfs2.mkfs
* arranges the journal blocks sequentially to maximize performance.
* So the extent would map the first block for the entire file length.
* However, gfs2_jadd can happen while file activity is happening, so
* those journals may not be sequential. Less likely is the case where
* the users created their own journals by mounting the metafs and
* laying it out. But it's still possible. These journals might have
* several extents.
*
* Returns: 0 on success, or error on failure
*/

int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
{
u64 lblock = 0;
u64 lblock_stop;
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct buffer_head bh;
unsigned int shift = sdp->sd_sb.sb_bsize_shift;
u64 size;
int rc;

lblock_stop = i_size_read(jd->jd_inode) >> shift;
size = (lblock_stop - lblock) << shift;
jd->nr_extents = 0;
WARN_ON(!list_empty(&jd->extent_list));

do {
bh.b_state = 0;
bh.b_blocknr = 0;
bh.b_size = size;
rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0);
if (rc || !buffer_mapped(&bh))
goto fail;
rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift);
if (rc)
goto fail;
size -= bh.b_size;
lblock += (bh.b_size >> ip->i_inode.i_blkbits);
} while(size > 0);

fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid,
jd->nr_extents);
return 0;

fail:
fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n",
rc, jd->jd_jid,
(unsigned long long)(i_size_read(jd->jd_inode) - size),
jd->nr_extents);
fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n",
rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr,
bh.b_state, (unsigned long long)bh.b_size);
gfs2_free_journal_extents(jd);
return rc;
}

/**
* gfs2_write_alloc_required - figure out if a write will require an allocation
* @ip: the file being written to
Expand Down
Loading

0 comments on commit 34917f9

Please sign in to comment.