Skip to content

Commit

Permalink
Merge branch 'dentry-cleanups' (dcache access cleanups and optimizati…
Browse files Browse the repository at this point in the history
…ons)

This branch simplifies and clarifies the dcache lookup, and allows us to
do certain nice optimizations when comparing dentries.  It also cleans
up the interface to __d_lookup_rcu(), especially around passing the
inode information around.

* dentry-cleanups:
  vfs: make it possible to access the dentry hash/len as one 64-bit entry
  vfs: move dentry name length comparison from dentry_cmp() into callers
  vfs: do the careful dentry name access for all dentry_cmp cases
  vfs: remove unnecessary d_unhashed() check from __d_lookup_rcu
  vfs: clean up __d_lookup_rcu() and dentry_cmp() interfaces
  • Loading branch information
Linus Torvalds committed May 21, 2012
2 parents 7e5cb5e + 26fe575 commit 31ed8e6
Show file tree
Hide file tree
Showing 19 changed files with 167 additions and 103 deletions.
173 changes: 113 additions & 60 deletions fs/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,16 +153,12 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
* In contrast, 'ct' and 'tcount' can be from a pathname, and do
* need the careful unaligned handling.
*/
static inline int dentry_cmp(const unsigned char *cs, size_t scount,
const unsigned char *ct, size_t tcount)
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
{
unsigned long a,b,mask;

if (unlikely(scount != tcount))
return 1;

for (;;) {
a = load_unaligned_zeropad(cs);
a = *(unsigned long *)cs;
b = load_unaligned_zeropad(ct);
if (tcount < sizeof(unsigned long))
break;
Expand All @@ -180,12 +176,8 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,

#else

static inline int dentry_cmp(const unsigned char *cs, size_t scount,
const unsigned char *ct, size_t tcount)
static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
{
if (scount != tcount)
return 1;

do {
if (*cs != *ct)
return 1;
Expand All @@ -198,6 +190,27 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,

#endif

static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
{
/*
* Be careful about RCU walk racing with rename:
* use ACCESS_ONCE to fetch the name pointer.
*
* NOTE! Even if a rename will mean that the length
* was not loaded atomically, we don't care. The
* RCU walk will check the sequence count eventually,
* and catch it. And we won't overrun the buffer,
* because we're reading the name pointer atomically,
* and a dentry name is guaranteed to be properly
* terminated with a NUL byte.
*
* End result: even if 'len' is wrong, we'll exit
* early because the data cannot match (there can
* be no NUL in the ct/tcount data)
*/
return dentry_string_cmp(ACCESS_ONCE(dentry->d_name.name), ct, tcount);
}

static void __d_free(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
Expand Down Expand Up @@ -1439,18 +1452,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
}

list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct qstr *qstr = &alias->d_name;

/*
* Don't need alias->d_lock here, because aliases with
* d_parent == entry->d_parent are not subject to name or
* parent changes, because the parent inode i_mutex is held.
*/
if (qstr->hash != hash)
if (alias->d_name.hash != hash)
continue;
if (alias->d_parent != entry->d_parent)
continue;
if (dentry_cmp(qstr->name, qstr->len, name, len))
if (alias->d_name.len != len)
continue;
if (dentry_cmp(alias, name, len))
continue;
__dget(alias);
return alias;
Expand Down Expand Up @@ -1489,7 +1502,7 @@ struct dentry *d_make_root(struct inode *root_inode)
struct dentry *res = NULL;

if (root_inode) {
static const struct qstr name = { .name = "/", .len = 1 };
static const struct qstr name = QSTR_INIT("/", 1);

res = __d_alloc(root_inode->i_sb, &name);
if (res)
Expand Down Expand Up @@ -1727,6 +1740,48 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
}
EXPORT_SYMBOL(d_add_ci);

/*
* Do the slow-case of the dentry name compare.
*
* Unlike the dentry_cmp() function, we need to atomically
* load the name, length and inode information, so that the
* filesystem can rely on them, and can use the 'name' and
* 'len' information without worrying about walking off the
* end of memory etc.
*
* Thus the read_seqcount_retry() and the "duplicate" info
* in arguments (the low-level filesystem should not look
* at the dentry inode or name contents directly, since
* rename can change them while we're in RCU mode).
*/
enum slow_d_compare {
D_COMP_OK,
D_COMP_NOMATCH,
D_COMP_SEQRETRY,
};

static noinline enum slow_d_compare slow_dentry_cmp(
const struct dentry *parent,
struct inode *inode,
struct dentry *dentry,
unsigned int seq,
const struct qstr *name)
{
int tlen = dentry->d_name.len;
const char *tname = dentry->d_name.name;
struct inode *i = dentry->d_inode;

if (read_seqcount_retry(&dentry->d_seq, seq)) {
cpu_relax();
return D_COMP_SEQRETRY;
}
if (parent->d_op->d_compare(parent, inode,
dentry, i,
tlen, tname, name))
return D_COMP_NOMATCH;
return D_COMP_OK;
}

/**
* __d_lookup_rcu - search for a dentry (racy, store-free)
* @parent: parent dentry
Expand All @@ -1753,15 +1808,17 @@ EXPORT_SYMBOL(d_add_ci);
* the returned dentry, so long as its parent's seqlock is checked after the
* child is looked up. Thus, an interlocking stepping of sequence lock checks
* is formed, giving integrity down the path walk.
*
* NOTE! The caller *has* to check the resulting dentry against the sequence
* number we've returned before using any of the resulting dentry state!
*/
struct dentry *__d_lookup_rcu(const struct dentry *parent,
const struct qstr *name,
unsigned *seqp, struct inode **inode)
unsigned *seqp, struct inode *inode)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
struct hlist_bl_head *b = d_hash(parent, hash);
struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
struct hlist_bl_node *node;
struct dentry *dentry;

Expand All @@ -1787,49 +1844,45 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
*/
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
unsigned seq;
struct inode *i;
const char *tname;
int tlen;

if (dentry->d_name.hash != hash)
continue;

seqretry:
seq = read_seqcount_begin(&dentry->d_seq);
if (dentry->d_parent != parent)
continue;
if (d_unhashed(dentry))
continue;
tlen = dentry->d_name.len;
tname = dentry->d_name.name;
i = dentry->d_inode;
prefetch(tname);
/*
* This seqcount check is required to ensure name and
* len are loaded atomically, so as not to walk off the
* edge of memory when walking. If we could load this
* atomically some other way, we could drop this check.
* The dentry sequence count protects us from concurrent
* renames, and thus protects inode, parent and name fields.
*
* The caller must perform a seqcount check in order
* to do anything useful with the returned dentry,
* including using the 'd_inode' pointer.
*
* NOTE! We do a "raw" seqcount_begin here. That means that
* we don't wait for the sequence count to stabilize if it
* is in the middle of a sequence change. If we do the slow
* dentry compare, we will do seqretries until it is stable,
* and if we end up with a successful lookup, we actually
* want to exit RCU lookup anyway.
*/
if (read_seqcount_retry(&dentry->d_seq, seq))
goto seqretry;
seq = raw_seqcount_begin(&dentry->d_seq);
if (dentry->d_parent != parent)
continue;
*seqp = seq;

if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
if (parent->d_op->d_compare(parent, *inode,
dentry, i,
tlen, tname, name))
if (dentry->d_name.hash != hashlen_hash(hashlen))
continue;
} else {
if (dentry_cmp(tname, tlen, str, len))
switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
case D_COMP_OK:
return dentry;
case D_COMP_NOMATCH:
continue;
default:
goto seqretry;
}
}
/*
* No extra seqcount check is required after the name
* compare. The caller must perform a seqcount check in
* order to do anything useful with the returned dentry
* anyway.
*/
*seqp = seq;
*inode = i;
return dentry;

if (dentry->d_name.hash_len != hashlen)
continue;
if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
return dentry;
}
return NULL;
}
Expand Down Expand Up @@ -1908,8 +1961,6 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
rcu_read_lock();

hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
const char *tname;
int tlen;

if (dentry->d_name.hash != hash)
continue;
Expand All @@ -1924,15 +1975,17 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
* It is safe to compare names since d_move() cannot
* change the qstr (protected by d_lock).
*/
tlen = dentry->d_name.len;
tname = dentry->d_name.name;
if (parent->d_flags & DCACHE_OP_COMPARE) {
int tlen = dentry->d_name.len;
const char *tname = dentry->d_name.name;
if (parent->d_op->d_compare(parent, parent->d_inode,
dentry, dentry->d_inode,
tlen, tname, name))
goto next;
} else {
if (dentry_cmp(tname, tlen, str, len))
if (dentry->d_name.len != len)
goto next;
if (dentry_cmp(dentry, str, len))
goto next;
}

Expand Down
2 changes: 1 addition & 1 deletion fs/ext2/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str

struct dentry *ext2_get_parent(struct dentry *child)
{
struct qstr dotdot = {.name = "..", .len = 2};
struct qstr dotdot = QSTR_INIT("..", 2);
unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot);
if (!ino)
return ERR_PTR(-ENOENT);
Expand Down
2 changes: 1 addition & 1 deletion fs/ext3/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
struct dentry *ext3_get_parent(struct dentry *child)
{
unsigned long ino;
struct qstr dotdot = {.name = "..", .len = 2};
struct qstr dotdot = QSTR_INIT("..", 2);
struct ext3_dir_entry_2 * de;
struct buffer_head *bh;

Expand Down
5 changes: 1 addition & 4 deletions fs/ext4/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -1052,10 +1052,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
struct dentry *ext4_get_parent(struct dentry *child)
{
__u32 ino;
static const struct qstr dotdot = {
.name = "..",
.len = 2,
};
static const struct qstr dotdot = QSTR_INIT("..", 2);
struct ext4_dir_entry_2 * de;
struct buffer_head *bh;

Expand Down
2 changes: 1 addition & 1 deletion fs/gfs2/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
struct buffer_head *bh;
struct gfs2_leaf *leaf;
struct gfs2_dirent *dent;
struct qstr name = { .name = "", .len = 0, .hash = 0 };
struct qstr name = { .name = "" };

error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
if (error)
Expand Down
4 changes: 2 additions & 2 deletions fs/libfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na

int dcache_dir_open(struct inode *inode, struct file *file)
{
static struct qstr cursor_name = {.len = 1, .name = "."};
static struct qstr cursor_name = QSTR_INIT(".", 1);

file->private_data = d_alloc(file->f_path.dentry, &cursor_name);

Expand Down Expand Up @@ -225,7 +225,7 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
struct dentry *dentry;
struct inode *root;
struct qstr d_name = {.name = name, .len = strlen(name)};
struct qstr d_name = QSTR_INIT(name, strlen(name));

if (IS_ERR(s))
return ERR_CAST(s);
Expand Down
19 changes: 16 additions & 3 deletions fs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -1144,12 +1144,25 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
*inode = nd->inode;
dentry = __d_lookup_rcu(parent, name, &seq, inode);
dentry = __d_lookup_rcu(parent, name, &seq, nd->inode);
if (!dentry)
goto unlazy;

/* Memory barrier in read_seqcount_begin of child is enough */
/*
* This sequence count validates that the inode matches
* the dentry name information from lookup.
*/
*inode = dentry->d_inode;
if (read_seqcount_retry(&dentry->d_seq, seq))
return -ECHILD;

/*
* This sequence count validates that the parent had no
* changes while we did the lookup of the dentry above.
*
* The memory barrier in read_seqcount_begin of child is
* enough, we can use __read_seqcount_retry here.
*/
if (__read_seqcount_retry(&parent->d_seq, nd->seq))
return -ECHILD;
nd->seq = seq;
Expand Down
5 changes: 1 addition & 4 deletions fs/nfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,10 +477,7 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
static
void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
{
struct qstr filename = {
.len = entry->len,
.name = entry->name,
};
struct qstr filename = QSTR_INIT(entry->name, entry->len);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = parent->d_inode;
Expand Down
3 changes: 1 addition & 2 deletions fs/nfs/nfs3proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
{
struct nfs_removeargs arg = {
.fh = NFS_FH(dir),
.name.len = name->len,
.name.name = name->name,
.name = *name,
};
struct nfs_removeres res;
struct rpc_message msg = {
Expand Down
3 changes: 1 addition & 2 deletions fs/nfs/nfs4proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2782,8 +2782,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
struct nfs_server *server = NFS_SERVER(dir);
struct nfs_removeargs args = {
.fh = NFS_FH(dir),
.name.len = name->len,
.name.name = name->name,
.name = *name,
.bitmask = server->attr_bitmask,
};
struct nfs_removeres res = {
Expand Down
Loading

0 comments on commit 31ed8e6

Please sign in to comment.