Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 90963
b: refs/heads/master
c: 3d73363
h: refs/heads/master
i:
  90961: 8ea551c
  90959: f220275
v: v3
  • Loading branch information
Dave Hansen authored and Al Viro committed Apr 19, 2008
1 parent 3e798ca commit c2f30c0
Show file tree
Hide file tree
Showing 3 changed files with 245 additions and 16 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 2c463e95480829a2fe8f386589516e13b1289db6
refs/heads/master: 3d733633a633065729c9e4e254b2e5442c00ef7e
252 changes: 237 additions & 15 deletions trunk/fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <linux/quotaops.h>
#include <linux/acct.h>
#include <linux/capability.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/sysfs.h>
#include <linux/seq_file.h>
Expand Down Expand Up @@ -55,6 +56,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
return tmp & (HASH_SIZE - 1);
}

#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)

struct vfsmount *alloc_vfsmnt(const char *name)
{
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
Expand All @@ -68,6 +71,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
atomic_set(&mnt->__mnt_writers, 0);
if (name) {
int size = strlen(name) + 1;
char *newname = kmalloc(size, GFP_KERNEL);
Expand All @@ -80,6 +84,92 @@ struct vfsmount *alloc_vfsmnt(const char *name)
return mnt;
}

/*
* Most r/o checks on a fs are for operations that take
* discrete amounts of time, like a write() or unlink().
* We must keep track of when those operations start
* (for permission checks) and when they end, so that
* we can determine when writes are able to occur to
* a filesystem.
*/
/*
* __mnt_is_readonly: check whether a mount is read-only
* @mnt: the mount to check for its write status
*
* This shouldn't be used directly ouside of the VFS.
* It does not guarantee that the filesystem will stay
* r/w, just that it is right *now*. This can not and
* should not be used in place of IS_RDONLY(inode).
* mnt_want/drop_write() will _keep_ the filesystem
* r/w.
*/
int __mnt_is_readonly(struct vfsmount *mnt)
{
return (mnt->mnt_sb->s_flags & MS_RDONLY);
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);

struct mnt_writer {
/*
* If holding multiple instances of this lock, they
* must be ordered by cpu number.
*/
spinlock_t lock;
struct lock_class_key lock_class; /* compiles out with !lockdep */
unsigned long count;
struct vfsmount *mnt;
} ____cacheline_aligned_in_smp;
static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);

static int __init init_mnt_writers(void)
{
int cpu;
for_each_possible_cpu(cpu) {
struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
spin_lock_init(&writer->lock);
lockdep_set_class(&writer->lock, &writer->lock_class);
writer->count = 0;
}
return 0;
}
fs_initcall(init_mnt_writers);

static void unlock_mnt_writers(void)
{
int cpu;
struct mnt_writer *cpu_writer;

for_each_possible_cpu(cpu) {
cpu_writer = &per_cpu(mnt_writers, cpu);
spin_unlock(&cpu_writer->lock);
}
}

static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
{
if (!cpu_writer->mnt)
return;
/*
* This is in case anyone ever leaves an invalid,
* old ->mnt and a count of 0.
*/
if (!cpu_writer->count)
return;
atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
cpu_writer->count = 0;
}
/*
* must hold cpu_writer->lock
*/
static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
struct vfsmount *mnt)
{
if (cpu_writer->mnt == mnt)
return;
__clear_mnt_count(cpu_writer);
cpu_writer->mnt = mnt;
}

/*
* Most r/o checks on a fs are for operations that take
* discrete amounts of time, like a write() or unlink().
Expand All @@ -100,12 +190,77 @@ struct vfsmount *alloc_vfsmnt(const char *name)
*/
int mnt_want_write(struct vfsmount *mnt)
{
if (__mnt_is_readonly(mnt))
return -EROFS;
return 0;
int ret = 0;
struct mnt_writer *cpu_writer;

cpu_writer = &get_cpu_var(mnt_writers);
spin_lock(&cpu_writer->lock);
if (__mnt_is_readonly(mnt)) {
ret = -EROFS;
goto out;
}
use_cpu_writer_for_mount(cpu_writer, mnt);
cpu_writer->count++;
out:
spin_unlock(&cpu_writer->lock);
put_cpu_var(mnt_writers);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);

static void lock_mnt_writers(void)
{
int cpu;
struct mnt_writer *cpu_writer;

for_each_possible_cpu(cpu) {
cpu_writer = &per_cpu(mnt_writers, cpu);
spin_lock(&cpu_writer->lock);
__clear_mnt_count(cpu_writer);
cpu_writer->mnt = NULL;
}
}

/*
* These per-cpu write counts are not guaranteed to have
* matched increments and decrements on any given cpu.
* A file open()ed for write on one cpu and close()d on
* another cpu will imbalance this count. Make sure it
* does not get too far out of whack.
*/
static void handle_write_count_underflow(struct vfsmount *mnt)
{
if (atomic_read(&mnt->__mnt_writers) >=
MNT_WRITER_UNDERFLOW_LIMIT)
return;
/*
* It isn't necessary to hold all of the locks
* at the same time, but doing it this way makes
* us share a lot more code.
*/
lock_mnt_writers();
/*
* vfsmount_lock is for mnt_flags.
*/
spin_lock(&vfsmount_lock);
/*
* If coalescing the per-cpu writer counts did not
* get us back to a positive writer count, we have
* a bug.
*/
if ((atomic_read(&mnt->__mnt_writers) < 0) &&
!(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
printk(KERN_DEBUG "leak detected on mount(%p) writers "
"count: %d\n",
mnt, atomic_read(&mnt->__mnt_writers));
WARN_ON(1);
/* use the flag to keep the dmesg spam down */
mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
}
spin_unlock(&vfsmount_lock);
unlock_mnt_writers();
}

/**
* mnt_drop_write - give up write access to a mount
* @mnt: the mount on which to give up write access
Expand All @@ -116,23 +271,61 @@ EXPORT_SYMBOL_GPL(mnt_want_write);
*/
void mnt_drop_write(struct vfsmount *mnt)
{
int must_check_underflow = 0;
struct mnt_writer *cpu_writer;

cpu_writer = &get_cpu_var(mnt_writers);
spin_lock(&cpu_writer->lock);

use_cpu_writer_for_mount(cpu_writer, mnt);
if (cpu_writer->count > 0) {
cpu_writer->count--;
} else {
must_check_underflow = 1;
atomic_dec(&mnt->__mnt_writers);
}

spin_unlock(&cpu_writer->lock);
/*
* Logically, we could call this each time,
* but the __mnt_writers cacheline tends to
* be cold, and makes this expensive.
*/
if (must_check_underflow)
handle_write_count_underflow(mnt);
/*
* This could be done right after the spinlock
* is taken because the spinlock keeps us on
* the cpu, and disables preemption. However,
* putting it here bounds the amount that
* __mnt_writers can underflow. Without it,
* we could theoretically wrap __mnt_writers.
*/
put_cpu_var(mnt_writers);
}
EXPORT_SYMBOL_GPL(mnt_drop_write);

/*
* __mnt_is_readonly: check whether a mount is read-only
* @mnt: the mount to check for its write status
*
* This shouldn't be used directly ouside of the VFS.
* It does not guarantee that the filesystem will stay
* r/w, just that it is right *now*. This can not and
* should not be used in place of IS_RDONLY(inode).
*/
int __mnt_is_readonly(struct vfsmount *mnt)
int mnt_make_readonly(struct vfsmount *mnt)
{
return (mnt->mnt_sb->s_flags & MS_RDONLY);
int ret = 0;

lock_mnt_writers();
/*
* With all the locks held, this value is stable
*/
if (atomic_read(&mnt->__mnt_writers) > 0) {
ret = -EBUSY;
goto out;
}
/*
* actually set mount's r/o flag here to make
* __mnt_is_readonly() true, which keeps anyone
* from doing a successful mnt_want_write().
*/
out:
unlock_mnt_writers();
return ret;
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);

int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
{
Expand Down Expand Up @@ -325,7 +518,36 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,

static inline void __mntput(struct vfsmount *mnt)
{
int cpu;
struct super_block *sb = mnt->mnt_sb;
/*
* We don't have to hold all of the locks at the
* same time here because we know that we're the
* last reference to mnt and that no new writers
* can come in.
*/
for_each_possible_cpu(cpu) {
struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
if (cpu_writer->mnt != mnt)
continue;
spin_lock(&cpu_writer->lock);
atomic_add(cpu_writer->count, &mnt->__mnt_writers);
cpu_writer->count = 0;
/*
* Might as well do this so that no one
* ever sees the pointer and expects
* it to be valid.
*/
cpu_writer->mnt = NULL;
spin_unlock(&cpu_writer->lock);
}
/*
* This probably indicates that somebody messed
* up a mnt_want/drop_write() pair. If this
* happens, the filesystem was probably unable
* to make r/w->r/o transitions.
*/
WARN_ON(atomic_read(&mnt->__mnt_writers));
dput(mnt->mnt_root);
free_vfsmnt(mnt);
deactivate_super(sb);
Expand Down
7 changes: 7 additions & 0 deletions trunk/include/linux/mount.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include <linux/types.h>
#include <linux/list.h>
#include <linux/nodemask.h>
#include <linux/spinlock.h>
#include <asm/atomic.h>

Expand All @@ -30,6 +31,7 @@ struct mnt_namespace;
#define MNT_RELATIME 0x20

#define MNT_SHRINKABLE 0x100
#define MNT_IMBALANCED_WRITE_COUNT 0x200 /* just for debugging */

#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */
#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */
Expand Down Expand Up @@ -62,6 +64,11 @@ struct vfsmount {
int mnt_expiry_mark; /* true if marked for expiry */
int mnt_pinned;
int mnt_ghosts;
/*
* This value is not stable unless all of the mnt_writers[] spinlocks
* are held, and all mnt_writer[]s on this mount have 0 as their ->count
*/
atomic_t __mnt_writers;
};

static inline struct vfsmount *mntget(struct vfsmount *mnt)
Expand Down

0 comments on commit c2f30c0

Please sign in to comment.