Skip to content

Commit

Permalink
fs: allow for more than 2^31 files
Browse files Browse the repository at this point in the history
Andrew,

Could you please review this patch, you probably are the right guy to
take it, because it crosses fs and net trees.

Note : /proc/sys/fs/file-nr is a read-only file, so this patch doesnt
depend on previous patch (sysctl: fix min/max handling in
__do_proc_doulongvec_minmax())

Thanks !

[PATCH V4] fs: allow for more than 2^31 files

Robin Holt tried to boot a 16TB system and found af_unix was overflowing
a 32bit value :

<quote>

We were seeing a failure which prevented boot.  The kernel was incapable
of creating either a named pipe or unix domain socket.  This comes down
to a common kernel function called unix_create1() which does:

        atomic_inc(&unix_nr_socks);
        if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
                goto out;

The function get_max_files() is a simple return of files_stat.max_files.
files_stat.max_files is a signed integer and is computed in
fs/file_table.c's files_init().

        n = (mempages * (PAGE_SIZE / 1024)) / 10;
        files_stat.max_files = n;

In our case, mempages (total_ram_pages) is approx 3,758,096,384
(0xe0000000).  That leaves max_files at approximately 1,503,238,553.
This causes 2 * get_max_files() to integer overflow.

</quote>

Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
integers, and change af_unix to use an atomic_long_t instead of
atomic_t.

get_max_files() is changed to return an unsigned long.
get_nr_files() is changed to return a long.

unix_nr_socks is changed from atomic_t to atomic_long_t, while not
strictly needed to address Robin problem.

Before patch (on a 64bit kernel) :
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
-18446744071562067968

After patch:
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
2147483648
# cat /proc/sys/fs/file-nr
704     0       2147483648

Reported-by: Robin Holt <holt@sgi.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Robin Holt <holt@sgi.com>
Tested-by: Robin Holt <holt@sgi.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
  • Loading branch information
Eric Dumazet authored and Al Viro committed Oct 26, 2010
1 parent fde214d commit 7e360c3
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 24 deletions.
17 changes: 7 additions & 10 deletions fs/file_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,15 @@ static inline void file_free(struct file *f)
/*
* Return the total number of open files in the system
*/
static int get_nr_files(void)
static long get_nr_files(void)
{
return percpu_counter_read_positive(&nr_files);
}

/*
* Return the maximum number of open files in the system
*/
int get_max_files(void)
unsigned long get_max_files(void)
{
return files_stat.max_files;
}
Expand All @@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
return proc_dointvec(table, write, buffer, lenp, ppos);
return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
#else
int proc_nr_files(ctl_table *table, int write,
Expand All @@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write,
struct file *get_empty_filp(void)
{
const struct cred *cred = current_cred();
static int old_max;
static long old_max;
struct file * f;

/*
Expand Down Expand Up @@ -140,8 +140,7 @@ struct file *get_empty_filp(void)
over:
/* Ran out of filps - report that */
if (get_nr_files() > old_max) {
printk(KERN_INFO "VFS: file-max limit %d reached\n",
get_max_files());
pr_info("VFS: file-max limit %lu reached\n", get_max_files());
old_max = get_nr_files();
}
goto fail;
Expand Down Expand Up @@ -487,7 +486,7 @@ void mark_files_ro(struct super_block *sb)

void __init files_init(unsigned long mempages)
{
int n;
unsigned long n;

filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
Expand All @@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages)
*/

n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = n;
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
files_defer_init();
lg_lock_init(files_lglock);
percpu_counter_init(&nr_files, 0);
Expand Down
8 changes: 4 additions & 4 deletions include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@

/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
int nr_files; /* read only */
int nr_free_files; /* read only */
int max_files; /* tunable */
unsigned long nr_files; /* read only */
unsigned long nr_free_files; /* read only */
unsigned long max_files; /* tunable */
};

struct inodes_stat_t {
Expand Down Expand Up @@ -400,7 +400,7 @@ extern void __init inode_init_early(void);
extern void __init files_init(unsigned long);

extern struct files_stat_struct files_stat;
extern int get_max_files(void);
extern unsigned long get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
Expand Down
6 changes: 3 additions & 3 deletions kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1352,16 +1352,16 @@ static struct ctl_table fs_table[] = {
{
.procname = "file-nr",
.data = &files_stat,
.maxlen = 3*sizeof(int),
.maxlen = sizeof(files_stat),
.mode = 0444,
.proc_handler = proc_nr_files,
},
{
.procname = "file-max",
.data = &files_stat.max_files,
.maxlen = sizeof(int),
.maxlen = sizeof(files_stat.max_files),
.mode = 0644,
.proc_handler = proc_dointvec,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "nr_open",
Expand Down
14 changes: 7 additions & 7 deletions net/unix/af_unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@

static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
static DEFINE_SPINLOCK(unix_table_lock);
static atomic_t unix_nr_socks = ATOMIC_INIT(0);
static atomic_long_t unix_nr_socks;

#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])

Expand Down Expand Up @@ -360,13 +360,13 @@ static void unix_sock_destructor(struct sock *sk)
if (u->addr)
unix_release_addr(u->addr);

atomic_dec(&unix_nr_socks);
atomic_long_dec(&unix_nr_socks);
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
#ifdef UNIX_REFCNT_DEBUG
printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
atomic_read(&unix_nr_socks));
printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
atomic_long_read(&unix_nr_socks));
#endif
}

Expand Down Expand Up @@ -606,8 +606,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
struct sock *sk = NULL;
struct unix_sock *u;

atomic_inc(&unix_nr_socks);
if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
atomic_long_inc(&unix_nr_socks);
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;

sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
Expand All @@ -632,7 +632,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
unix_insert_socket(unix_sockets_unbound, sk);
out:
if (sk == NULL)
atomic_dec(&unix_nr_socks);
atomic_long_dec(&unix_nr_socks);
else {
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
Expand Down

0 comments on commit 7e360c3

Please sign in to comment.