Skip to content

Commit

Permalink
fs: Convert nr_inodes and nr_unused to per-cpu counters
Browse files Browse the repository at this point in the history
The number of inodes allocated does not need to be tied to the
addition or removal of an inode to/from a list. If we are not tied
to a list lock, we could update the counters when inodes are
initialised or destroyed, but to do that we need to convert the
counters to be per-cpu (i.e. independent of a lock). This means that
we have the freedom to change the list/locking implementation
without needing to care about the counters.

Based on a patch originally from Eric Dumazet.

[AV: cleaned up a bit, fixed build breakage on weird configs

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
  • Loading branch information
Dave Chinner authored and Al Viro committed Oct 26, 2010
1 parent be1a16a commit cffbc8a
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 25 deletions.
5 changes: 2 additions & 3 deletions fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
wb->last_old_flush = jiffies;
nr_pages = global_page_state(NR_FILE_DIRTY) +
global_page_state(NR_UNSTABLE_NFS) +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
get_nr_dirty_inodes();

if (nr_pages) {
struct wb_writeback_work work = {
Expand Down Expand Up @@ -1090,8 +1090,7 @@ void writeback_inodes_sb(struct super_block *sb)

WARN_ON(!rwsem_is_locked(&sb->s_umount));

work.nr_pages = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();

bdi_queue_work(sb->s_bdi, &work);
wait_for_completion(&done);
Expand Down
64 changes: 45 additions & 19 deletions fs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem);
*/
struct inodes_stat_t inodes_stat;

static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;

static struct kmem_cache *inode_cachep __read_mostly;

static inline int get_nr_inodes(void)
{
return percpu_counter_sum_positive(&nr_inodes);
}

static inline int get_nr_inodes_unused(void)
{
return percpu_counter_sum_positive(&nr_inodes_unused);
}

int get_nr_dirty_inodes(void)
{
int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
return nr_dirty > 0 ? nr_dirty : 0;

}

/*
* Handle nr_inode sysctl
*/
#ifdef CONFIG_SYSCTL
int proc_nr_inodes(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
inodes_stat.nr_unused = get_nr_inodes_unused();
return proc_dointvec(table, write, buffer, lenp, ppos);
}
#endif

static void wake_up_inode(struct inode *inode)
{
/*
Expand Down Expand Up @@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_fsnotify_mask = 0;
#endif

percpu_counter_inc(&nr_inodes);

return 0;
out:
return -ENOMEM;
Expand Down Expand Up @@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode)
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
#endif
percpu_counter_dec(&nr_inodes);
}
EXPORT_SYMBOL(__destroy_inode);

Expand Down Expand Up @@ -286,7 +322,7 @@ void __iget(struct inode *inode)

if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_in_use);
inodes_stat.nr_unused--;
percpu_counter_dec(&nr_inodes_unused);
}

void end_writeback(struct inode *inode)
Expand Down Expand Up @@ -327,8 +363,6 @@ static void evict(struct inode *inode)
*/
static void dispose_list(struct list_head *head)
{
int nr_disposed = 0;

while (!list_empty(head)) {
struct inode *inode;

Expand All @@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head)

wake_up_inode(inode);
destroy_inode(inode);
nr_disposed++;
}
spin_lock(&inode_lock);
inodes_stat.nr_inodes -= nr_disposed;
spin_unlock(&inode_lock);
}

/*
Expand All @@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head)
static int invalidate_list(struct list_head *head, struct list_head *dispose)
{
struct list_head *next;
int busy = 0, count = 0;
int busy = 0;

next = head->next;
for (;;) {
Expand All @@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose)
list_move(&inode->i_list, dispose);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
count++;
percpu_counter_dec(&nr_inodes_unused);
continue;
}
busy = 1;
}
/* only unused inodes may be cached with i_count zero */
inodes_stat.nr_unused -= count;
return busy;
}

Expand Down Expand Up @@ -447,7 +475,6 @@ static int can_unuse(struct inode *inode)
static void prune_icache(int nr_to_scan)
{
LIST_HEAD(freeable);
int nr_pruned = 0;
int nr_scanned;
unsigned long reap = 0;

Expand Down Expand Up @@ -483,9 +510,8 @@ static void prune_icache(int nr_to_scan)
list_move(&inode->i_list, &freeable);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
nr_pruned++;
percpu_counter_dec(&nr_inodes_unused);
}
inodes_stat.nr_unused -= nr_pruned;
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
Expand Down Expand Up @@ -517,7 +543,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
return -1;
prune_icache(nr);
}
return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
}

static struct shrinker icache_shrinker = {
Expand Down Expand Up @@ -594,7 +620,6 @@ static inline void
__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
struct inode *inode)
{
inodes_stat.nr_inodes++;
list_add(&inode->i_list, &inode_in_use);
list_add(&inode->i_sb_list, &sb->s_inodes);
if (head)
Expand Down Expand Up @@ -1214,7 +1239,7 @@ static void iput_final(struct inode *inode)
if (!drop) {
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
list_move(&inode->i_list, &inode_unused);
inodes_stat.nr_unused++;
percpu_counter_inc(&nr_inodes_unused);
if (sb->s_flags & MS_ACTIVE) {
spin_unlock(&inode_lock);
return;
Expand All @@ -1226,14 +1251,13 @@ static void iput_final(struct inode *inode)
spin_lock(&inode_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
percpu_counter_dec(&nr_inodes_unused);
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
evict(inode);
spin_lock(&inode_lock);
Expand Down Expand Up @@ -1502,6 +1526,8 @@ void __init inode_init(void)
SLAB_MEM_SPREAD),
init_once);
register_shrinker(&icache_shrinker);
percpu_counter_init(&nr_inodes, 0);
percpu_counter_init(&nr_inodes_unused, 0);

/* Hash may have been set up in inode_init_early */
if (!hashdist)
Expand Down
1 change: 1 addition & 0 deletions fs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,5 @@ extern void release_open_intent(struct nameidata *);
/*
* inode.c
*/
extern int get_nr_dirty_inodes(void);
extern int invalidate_inodes(struct super_block *);
3 changes: 2 additions & 1 deletion include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2486,7 +2486,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);

int proc_nr_inodes(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);

#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
Expand Down
4 changes: 2 additions & 2 deletions kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = {
.data = &inodes_stat,
.maxlen = 2*sizeof(int),
.mode = 0444,
.proc_handler = proc_dointvec,
.proc_handler = proc_nr_inodes,
},
{
.procname = "inode-state",
.data = &inodes_stat,
.maxlen = 7*sizeof(int),
.mode = 0444,
.proc_handler = proc_dointvec,
.proc_handler = proc_nr_inodes,
},
{
.procname = "file-nr",
Expand Down

0 comments on commit cffbc8a

Please sign in to comment.