Skip to content

Commit

Permalink
[PATCH] inotify: lock avoidance with parent watch status in dentry
Browse files Browse the repository at this point in the history
Previous inotify work avoidance is good when inotify is completely unused,
but it breaks down if even a single watch is in place anywhere in the
system.  Robin Holt notices that udev is one such culprit - it slows down a
512-thread application on a 512 CPU system from 6 seconds to 22 minutes.

Solve this by adding a flag in the dentry that tells inotify whether or not
its parent inode has a watch on it.  Event queueing to parent will skip
taking locks if this flag is cleared.  Setting and clearing of this flag on
all child dentries versus event delivery: this is no in terms of race
cases, and that was shown to be equivalent to always performing the check.

The essential behaviour is that activity occuring _after_ a watch has been
added and _before_ it has been removed, will generate events.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
Nick Piggin authored and Linus Torvalds committed Mar 25, 2006
1 parent bf36b90 commit c32ccd8
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 10 deletions.
8 changes: 8 additions & 0 deletions fs/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
if (inode)
list_add(&entry->d_alias, &inode->i_dentry);
entry->d_inode = inode;
fsnotify_d_instantiate(entry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
}
Expand Down Expand Up @@ -853,6 +854,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
list_add(&entry->d_alias, &inode->i_dentry);
do_negative:
entry->d_inode = inode;
fsnotify_d_instantiate(entry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
return NULL;
Expand Down Expand Up @@ -983,6 +985,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
new = __d_find_alias(inode, 1);
if (new) {
BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
fsnotify_d_instantiate(new, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(new, inode);
d_rehash(dentry);
Expand All @@ -992,6 +995,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
/* d_instantiate takes dcache_lock, so we do it by hand */
list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;
fsnotify_d_instantiate(dentry, inode);
spin_unlock(&dcache_lock);
security_d_instantiate(dentry, inode);
d_rehash(dentry);
Expand Down Expand Up @@ -1176,6 +1180,9 @@ void d_delete(struct dentry * dentry)
spin_lock(&dentry->d_lock);
isdir = S_ISDIR(dentry->d_inode->i_mode);
if (atomic_read(&dentry->d_count) == 1) {
/* remove this and other inotify debug checks after 2.6.18 */
dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;

dentry_iput(dentry);
fsnotify_nameremove(dentry, isdir);
return;
Expand Down Expand Up @@ -1342,6 +1349,7 @@ void d_move(struct dentry * dentry, struct dentry * target)

list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
spin_unlock(&target->d_lock);
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock);
spin_unlock(&dcache_lock);
Expand Down
87 changes: 77 additions & 10 deletions fs/inotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
#include <asm/ioctls.h>

static atomic_t inotify_cookie;
static atomic_t inotify_watches;

static kmem_cache_t *watch_cachep;
static kmem_cache_t *event_cachep;
Expand Down Expand Up @@ -380,6 +379,48 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
return error;
}

/*
* inotify_inode_watched - returns nonzero if there are watches on this inode
* and zero otherwise. We call this lockless, we do not care if we race.
*/
static inline int inotify_inode_watched(struct inode *inode)
{
return !list_empty(&inode->inotify_watches);
}

/*
* Get child dentry flag into synch with parent inode.
* Flag should always be clear for negative dentrys.
*/
static void set_dentry_child_flags(struct inode *inode, int watched)
{
struct dentry *alias;

spin_lock(&dcache_lock);
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct dentry *child;

list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode) {
WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
continue;
}
spin_lock(&child->d_lock);
if (watched) {
WARN_ON(child->d_flags &
DCACHE_INOTIFY_PARENT_WATCHED);
child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
} else {
WARN_ON(!(child->d_flags &
DCACHE_INOTIFY_PARENT_WATCHED));
child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED;
}
spin_unlock(&child->d_lock);
}
}
spin_unlock(&dcache_lock);
}

/*
* create_watch - creates a watch on the given device.
*
Expand Down Expand Up @@ -426,7 +467,6 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
get_inotify_watch(watch);

atomic_inc(&dev->user->inotify_watches);
atomic_inc(&inotify_watches);

return watch;
}
Expand Down Expand Up @@ -458,8 +498,10 @@ static void remove_watch_no_event(struct inotify_watch *watch,
list_del(&watch->i_list);
list_del(&watch->d_list);

if (!inotify_inode_watched(watch->inode))
set_dentry_child_flags(watch->inode, 0);

atomic_dec(&dev->user->inotify_watches);
atomic_dec(&inotify_watches);
idr_remove(&dev->idr, watch->wd);
put_inotify_watch(watch);
}
Expand All @@ -481,16 +523,39 @@ static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
remove_watch_no_event(watch, dev);
}

/* Kernel API */

/*
* inotify_inode_watched - returns nonzero if there are watches on this inode
* and zero otherwise. We call this lockless, we do not care if we race.
* inotify_d_instantiate - instantiate dcache entry for inode
*/
static inline int inotify_inode_watched(struct inode *inode)
void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
{
return !list_empty(&inode->inotify_watches);
struct dentry *parent;

if (!inode)
return;

WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
spin_lock(&entry->d_lock);
parent = entry->d_parent;
if (inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
spin_unlock(&entry->d_lock);
}

/* Kernel API */
/*
* inotify_d_move - dcache entry has been moved
*/
void inotify_d_move(struct dentry *entry)
{
struct dentry *parent;

parent = entry->d_parent;
if (inotify_inode_watched(parent->d_inode))
entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
else
entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
}

/**
* inotify_inode_queue_event - queue an event to all watches on this inode
Expand Down Expand Up @@ -538,7 +603,7 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
struct dentry *parent;
struct inode *inode;

if (!atomic_read (&inotify_watches))
if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
return;

spin_lock(&dentry->d_lock);
Expand Down Expand Up @@ -993,6 +1058,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
goto out;
}

if (!inotify_inode_watched(inode))
set_dentry_child_flags(inode, 1);

/* Add the watch to the device's and the inode's list */
list_add(&watch->d_list, &dev->watches);
list_add(&watch->i_list, &inode->inotify_watches);
Expand Down Expand Up @@ -1065,7 +1133,6 @@ static int __init inotify_setup(void)
inotify_max_user_watches = 8192;

atomic_set(&inotify_cookie, 0);
atomic_set(&inotify_watches, 0);

watch_cachep = kmem_cache_create("inotify_watch_cache",
sizeof(struct inotify_watch),
Expand Down
2 changes: 2 additions & 0 deletions include/linux/dcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ d_iput: no no no yes
#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
#define DCACHE_UNHASHED 0x0010

#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */

extern spinlock_t dcache_lock;

/**
Expand Down
19 changes: 19 additions & 0 deletions include/linux/fsnotify.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@
#include <linux/dnotify.h>
#include <linux/inotify.h>

/*
* fsnotify_d_instantiate - instantiate a dentry for inode
* Called with dcache_lock held.
*/
static inline void fsnotify_d_instantiate(struct dentry *entry,
struct inode *inode)
{
inotify_d_instantiate(entry, inode);
}

/*
* fsnotify_d_move - entry has been moved
* Called with dcache_lock and entry->d_lock held.
*/
static inline void fsnotify_d_move(struct dentry *entry)
{
inotify_d_move(entry);
}

/*
* fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
*/
Expand Down
11 changes: 11 additions & 0 deletions include/linux/inotify.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ struct inotify_event {

#ifdef CONFIG_INOTIFY

extern void inotify_d_instantiate(struct dentry *, struct inode *);
extern void inotify_d_move(struct dentry *);
extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
const char *);
extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
Expand All @@ -81,6 +83,15 @@ extern u32 inotify_get_cookie(void);

#else

static inline void inotify_d_instantiate(struct dentry *dentry,
struct inode *inode)
{
}

static inline void inotify_d_move(struct dentry *dentry)
{
}

static inline void inotify_inode_queue_event(struct inode *inode,
__u32 mask, __u32 cookie,
const char *filename)
Expand Down

0 comments on commit c32ccd8

Please sign in to comment.