Skip to content

Commit

Permalink
kernfs: Use RCU to access kernfs_node::name.
Browse files Browse the repository at this point in the history
Using RCU lifetime rules to access kernfs_node::name can avoid the
trouble with kernfs_rename_lock in kernfs_name() and kernfs_path_from_node()
if the fs was created with KERNFS_ROOT_INVARIANT_PARENT. This is usefull
as it allows to implement kernfs_path_from_node() only with RCU
protection and avoiding kernfs_rename_lock. The lock is only required if
the __parent node can be changed and the function requires an unchanged
hierarchy while it iterates from the node to its parent.
The change is needed to allow the lookup of the node's path
(kernfs_path_from_node()) from context which runs always with disabled
preemption and or interrutps even on PREEMPT_RT. The problem is that
kernfs_rename_lock becomes a sleeping lock on PREEMPT_RT.

I went through all ::name users and added the required access for the lookup
with a few extensions:
- rdtgroup_pseudo_lock_create() drops all locks and then uses the name
  later on. resctrl supports rename with different parents. Here I made
  a temporal copy of the name while it is used outside of the lock.

- kernfs_rename_ns() accepts NULL as new_parent. This simplifies
  sysfs_move_dir_ns() where it can set NULL in order to reuse the current
  name.

- kernfs_rename_ns() is only using kernfs_rename_lock if the parents are
  different. All users use either kernfs_rwsem (for stable path view) or
  just RCU for the lookup. The ::name uses always RCU free.

Use RCU lifetime guarantees to access kernfs_node::name.

Suggested-by: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reported-by: syzbot+6ea37e2e6ffccf41a7e6@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/67251dc6.050a0220.529b6.015e.GAE@google.com/
Reported-by: Hillf Danton <hdanton@sina.com>
Closes: https://lore.kernel.org/20241102001224.2789-1-hdanton@sina.com
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/r/20250213145023.2820193-7-bigeasy@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
Sebastian Andrzej Siewior authored and Greg Kroah-Hartman committed Feb 15, 2025
1 parent 6334889 commit 741c10b
Show file tree
Hide file tree
Showing 11 changed files with 105 additions and 71 deletions.
5 changes: 5 additions & 0 deletions arch/x86/kernel/cpu/resctrl/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,11 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,

extern struct mutex rdtgroup_mutex;

static inline const char *rdt_kn_name(const struct kernfs_node *kn)
{
return rcu_dereference_check(kn->name, lockdep_is_held(&rdtgroup_mutex));
}

extern struct rdt_hw_resource rdt_resources_all[];
extern struct rdtgroup rdtgroup_default;
extern struct dentry *debugfs_resctrl;
Expand Down
14 changes: 10 additions & 4 deletions arch/x86/kernel/cpu/resctrl/pseudo_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
rdtgrp = dev_get_drvdata(dev);
if (mode)
*mode = 0600;
return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdtgrp->kn->name);
guard(mutex)(&rdtgroup_mutex);
return kasprintf(GFP_KERNEL, "pseudo_lock/%s", rdt_kn_name(rdtgrp->kn));
}

static const struct class pseudo_lock_class = {
Expand Down Expand Up @@ -1293,6 +1294,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
struct task_struct *thread;
unsigned int new_minor;
struct device *dev;
char *kn_name __free(kfree) = NULL;
int ret;

ret = pseudo_lock_region_alloc(plr);
Expand All @@ -1304,6 +1306,11 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
ret = -EINVAL;
goto out_region;
}
kn_name = kstrdup(rdt_kn_name(rdtgrp->kn), GFP_KERNEL);
if (!kn_name) {
ret = -ENOMEM;
goto out_cstates;
}

plr->thread_done = 0;

Expand Down Expand Up @@ -1348,8 +1355,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
mutex_unlock(&rdtgroup_mutex);

if (!IS_ERR_OR_NULL(debugfs_resctrl)) {
plr->debugfs_dir = debugfs_create_dir(rdtgrp->kn->name,
debugfs_resctrl);
plr->debugfs_dir = debugfs_create_dir(kn_name, debugfs_resctrl);
if (!IS_ERR_OR_NULL(plr->debugfs_dir))
debugfs_create_file("pseudo_lock_measure", 0200,
plr->debugfs_dir, rdtgrp,
Expand All @@ -1358,7 +1364,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)

dev = device_create(&pseudo_lock_class, NULL,
MKDEV(pseudo_lock_major, new_minor),
rdtgrp, "%s", rdtgrp->kn->name);
rdtgrp, "%s", kn_name);

mutex_lock(&rdtgroup_mutex);

Expand Down
10 changes: 5 additions & 5 deletions arch/x86/kernel/cpu/resctrl/rdtgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -916,14 +916,14 @@ int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
continue;

seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
rdtg->kn->name);
rdt_kn_name(rdtg->kn));
seq_puts(s, "mon:");
list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
mon.crdtgrp_list) {
if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
crg->mon.rmid))
continue;
seq_printf(s, "%s", crg->kn->name);
seq_printf(s, "%s", rdt_kn_name(crg->kn));
break;
}
seq_putc(s, '\n');
Expand Down Expand Up @@ -3675,7 +3675,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
*/
static bool is_mon_groups(struct kernfs_node *kn, const char *name)
{
return (!strcmp(kn->name, "mon_groups") &&
return (!strcmp(rdt_kn_name(kn), "mon_groups") &&
strcmp(name, "mon_groups"));
}

Expand Down Expand Up @@ -3824,7 +3824,7 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
}
} else if (rdtgrp->type == RDTMON_GROUP &&
is_mon_groups(parent_kn, kn->name)) {
is_mon_groups(parent_kn, rdt_kn_name(kn))) {
ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
} else {
ret = -EPERM;
Expand Down Expand Up @@ -3912,7 +3912,7 @@ static int rdtgroup_rename(struct kernfs_node *kn,

kn_parent = rdt_kn_parent(kn);
if (rdtgrp->type != RDTMON_GROUP || !kn_parent ||
!is_mon_groups(kn_parent, kn->name)) {
!is_mon_groups(kn_parent, rdt_kn_name(kn))) {
rdt_last_cmd_puts("Source must be a MON group\n");
ret = -EPERM;
goto out;
Expand Down
113 changes: 62 additions & 51 deletions fs/kernfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ static bool kernfs_lockdep(struct kernfs_node *kn)
#endif
}

static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
{
if (!kn)
return strscpy(buf, "(null)", buflen);

return strscpy(buf, rcu_access_pointer(kn->__parent) ? kn->name : "/", buflen);
}

/* kernfs_node_depth - compute depth from @from to @to */
static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
{
Expand Down Expand Up @@ -168,11 +160,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,

/* Calculate how many bytes we need for the rest */
for (i = depth_to - 1; i >= 0; i--) {
const char *name;

for (kn = kn_to, j = 0; j < i; j++)
kn = rcu_dereference(kn->__parent);

len += scnprintf(buf + len, buflen - len, "/%s", kn->name);
name = rcu_dereference(kn->name);
len += scnprintf(buf + len, buflen - len, "/%s", name);
}

return len;
Expand All @@ -196,13 +190,18 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
*/
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{
unsigned long flags;
int ret;
struct kernfs_node *kn_parent;

read_lock_irqsave(&kernfs_rename_lock, flags);
ret = kernfs_name_locked(kn, buf, buflen);
read_unlock_irqrestore(&kernfs_rename_lock, flags);
return ret;
if (!kn)
return strscpy(buf, "(null)", buflen);

guard(rcu)();
/*
* KERNFS_ROOT_INVARIANT_PARENT is ignored here. The name is RCU freed and
* the parent is either existing or not.
*/
kn_parent = rcu_dereference(kn->__parent);
return strscpy(buf, kn_parent ? rcu_dereference(kn->name) : "/", buflen);
}

/**
Expand All @@ -224,14 +223,17 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
char *buf, size_t buflen)
{
unsigned long flags;
int ret;
struct kernfs_root *root;

guard(rcu)();
read_lock_irqsave(&kernfs_rename_lock, flags);
ret = kernfs_path_from_node_locked(to, from, buf, buflen);
read_unlock_irqrestore(&kernfs_rename_lock, flags);
return ret;
if (to) {
root = kernfs_root(to);
if (!(root->flags & KERNFS_ROOT_INVARIANT_PARENT)) {
guard(read_lock_irqsave)(&kernfs_rename_lock);
return kernfs_path_from_node_locked(to, from, buf, buflen);
}
}
return kernfs_path_from_node_locked(to, from, buf, buflen);
}
EXPORT_SYMBOL_GPL(kernfs_path_from_node);

Expand Down Expand Up @@ -338,13 +340,13 @@ static int kernfs_name_compare(unsigned int hash, const char *name,
return -1;
if (ns > kn->ns)
return 1;
return strcmp(name, kn->name);
return strcmp(name, kernfs_rcu_name(kn));
}

static int kernfs_sd_compare(const struct kernfs_node *left,
const struct kernfs_node *right)
{
return kernfs_name_compare(left->hash, left->name, left->ns, right);
return kernfs_name_compare(left->hash, kernfs_rcu_name(left), left->ns, right);
}

/**
Expand Down Expand Up @@ -542,7 +544,8 @@ static void kernfs_free_rcu(struct rcu_head *rcu)
{
struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu);

kfree_const(kn->name);
/* If the whole node goes away, then name can't be used outside */
kfree_const(rcu_access_pointer(kn->name));

if (kn->iattr) {
simple_xattrs_free(&kn->iattr->xattrs, NULL);
Expand Down Expand Up @@ -575,7 +578,8 @@ void kernfs_put(struct kernfs_node *kn)

WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
"kernfs_put: %s/%s: released with incorrect active_ref %d\n",
parent ? parent->name : "", kn->name, atomic_read(&kn->active));
parent ? rcu_dereference(parent->name) : "",
rcu_dereference(kn->name), atomic_read(&kn->active));

if (kernfs_type(kn) == KERNFS_LINK)
kernfs_put(kn->symlink.target_kn);
Expand Down Expand Up @@ -652,7 +656,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
RB_CLEAR_NODE(&kn->rb);

kn->name = name;
rcu_assign_pointer(kn->name, name);
kn->mode = mode;
kn->flags = flags;

Expand Down Expand Up @@ -790,7 +794,8 @@ int kernfs_add_one(struct kernfs_node *kn)
ret = -EINVAL;
has_ns = kernfs_ns_enabled(parent);
if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
has_ns ? "required" : "invalid", parent->name, kn->name))
has_ns ? "required" : "invalid",
kernfs_rcu_name(parent), kernfs_rcu_name(kn)))
goto out_unlock;

if (kernfs_type(parent) != KERNFS_DIR)
Expand All @@ -800,7 +805,7 @@ int kernfs_add_one(struct kernfs_node *kn)
if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR))
goto out_unlock;

kn->hash = kernfs_name_hash(kn->name, kn->ns);
kn->hash = kernfs_name_hash(kernfs_rcu_name(kn), kn->ns);

ret = kernfs_link_sibling(kn);
if (ret)
Expand Down Expand Up @@ -856,7 +861,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,

if (has_ns != (bool)ns) {
WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
has_ns ? "required" : "invalid", parent->name, name);
has_ns ? "required" : "invalid", kernfs_rcu_name(parent), name);
return NULL;
}

Expand Down Expand Up @@ -1135,8 +1140,6 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name,

/* Negative hashed dentry? */
if (d_really_is_negative(dentry)) {
struct kernfs_node *parent;

/* If the kernfs parent node has changed discard and
* proceed to ->lookup.
*
Expand Down Expand Up @@ -1184,7 +1187,7 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name,
goto out_bad;

/* The kernfs node has been renamed */
if (strcmp(dentry->d_name.name, kn->name) != 0)
if (strcmp(dentry->d_name.name, kernfs_rcu_name(kn)) != 0)
goto out_bad;

/* The kernfs node has been moved to a different namespace */
Expand Down Expand Up @@ -1478,7 +1481,7 @@ static void __kernfs_remove(struct kernfs_node *kn)
if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb))
return;

pr_debug("kernfs %s: removing\n", kn->name);
pr_debug("kernfs %s: removing\n", kernfs_rcu_name(kn));

/* prevent new usage by marking all nodes removing and deactivating */
pos = NULL;
Expand Down Expand Up @@ -1734,7 +1737,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
{
struct kernfs_node *old_parent;
struct kernfs_root *root;
const char *old_name = NULL;
const char *old_name;
int error;

/* can't move or rename root */
Expand All @@ -1757,16 +1760,19 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
}

error = 0;
old_name = kernfs_rcu_name(kn);
if (!new_name)
new_name = old_name;
if ((old_parent == new_parent) && (kn->ns == new_ns) &&
(strcmp(kn->name, new_name) == 0))
(strcmp(old_name, new_name) == 0))
goto out; /* nothing to rename */

error = -EEXIST;
if (kernfs_find_ns(new_parent, new_name, new_ns))
goto out;

/* rename kernfs_node */
if (strcmp(kn->name, new_name) != 0) {
if (strcmp(old_name, new_name) != 0) {
error = -ENOMEM;
new_name = kstrdup_const(new_name, GFP_KERNEL);
if (!new_name)
Expand All @@ -1779,27 +1785,32 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
* Move to the appropriate place in the appropriate directories rbtree.
*/
kernfs_unlink_sibling(kn);
kernfs_get(new_parent);

/* rename_lock protects ->parent and ->name accessors */
write_lock_irq(&kernfs_rename_lock);
/* rename_lock protects ->parent accessors */
if (old_parent != new_parent) {
kernfs_get(new_parent);
write_lock_irq(&kernfs_rename_lock);

old_parent = kernfs_parent(kn);
rcu_assign_pointer(kn->__parent, new_parent);
rcu_assign_pointer(kn->__parent, new_parent);

kn->ns = new_ns;
if (new_name) {
old_name = kn->name;
kn->name = new_name;
}
kn->ns = new_ns;
if (new_name)
rcu_assign_pointer(kn->name, new_name);

write_unlock_irq(&kernfs_rename_lock);
write_unlock_irq(&kernfs_rename_lock);
kernfs_put(old_parent);
} else {
/* name assignment is RCU protected, parent is the same */
kn->ns = new_ns;
if (new_name)
rcu_assign_pointer(kn->name, new_name);
}

kn->hash = kernfs_name_hash(kn->name, kn->ns);
kn->hash = kernfs_name_hash(new_name ?: old_name, kn->ns);
kernfs_link_sibling(kn);

kernfs_put(old_parent);
kfree_const(old_name);
if (new_name && !is_kernel_rodata((unsigned long)old_name))
kfree_rcu_mightsleep(old_name);

error = 0;
out:
Expand Down Expand Up @@ -1884,7 +1895,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
pos;
pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
const char *name = pos->name;
const char *name = kernfs_rcu_name(pos);
unsigned int type = fs_umode_to_dtype(pos->mode);
int len = strlen(name);
ino_t ino = kernfs_ino(pos);
Expand Down
4 changes: 3 additions & 1 deletion fs/kernfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,7 @@ static void kernfs_notify_workfn(struct work_struct *work)
list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
struct kernfs_node *parent;
struct inode *p_inode = NULL;
const char *kn_name;
struct inode *inode;
struct qstr name;

Expand All @@ -928,7 +929,8 @@ static void kernfs_notify_workfn(struct work_struct *work)
if (!inode)
continue;

name = QSTR(kn->name);
kn_name = kernfs_rcu_name(kn);
name = QSTR(kn_name);
parent = kernfs_get_parent(kn);
if (parent) {
p_inode = ilookup(info->sb, kernfs_ino(parent));
Expand Down
Loading

0 comments on commit 741c10b

Please sign in to comment.