Skip to content

Commit

Permalink
sysfs: implement sysfs_dirent active reference and immediate disconnect
Browse files Browse the repository at this point in the history
sysfs: implement sysfs_dirent active reference and immediate disconnect

Opening a sysfs node references its associated kobject, so userland
can arbitrarily prolong lifetime of a kobject which complicates
lifetime rules in drivers.  This patch implements active reference and
makes the association between kobject and sysfs immediately breakable.

Now each sysfs_dirent has two reference counts - s_count and s_active.
s_count is a regular reference count which guarantees that the
containing sysfs_dirent is accessible.  As long as s_count reference
is held, all sysfs internal fields in sysfs_dirent are accessible
including s_parent and s_name.

The newly added s_active is active reference count.  This is acquired
by invoking sysfs_get_active() and it's the caller's responsibility to
ensure sysfs_dirent itself is accessible (should be holding s_count
one way or the other).  Dereferencing sysfs_dirent to access objects
out of sysfs proper requires active reference.  This includes access
to the associated kobjects, attributes and ops.

The active references can be drained and denied by calling
sysfs_deactivate().  All active sysfs_dirents must be deactivated
after deletion but before the default reference is dropped.  This
enables immediate disconnect of sysfs nodes.  Once a sysfs_dirent is
deleted, it won't access any entity external to sysfs proper.

Because attr/bin_attr ops access both the node itself and its parent
for kobject, they need to hold active references to both.
sysfs_get/put_active_two() helpers are provided to help grabbing both
references.  Parent's is acquired first and released last.

Unlike other operations, mmapped area lingers on after mmap() is
finished and the module implement implementing it and kobj need to
stay referenced till all the mapped pages are gone.  This is
accomplished by holding one set of active references to the bin_attr
and its parent if there have been any mmap during lifetime of an
openfile.  The references are dropped when the openfile is released.

This change makes sysfs lifetime rules independent from both kobject's
and module's.  It not only fixes several race conditions caused by
sysfs not holding onto the proper module when referencing kobject, but
also helps fixing and simplifying lifetime management in driver model
and drivers by taking sysfs out of the equation.

Please read the following message for more info.

  http://article.gmane.org/gmane.linux.kernel/510293

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
  • Loading branch information
Tejun Heo authored and Greg Kroah-Hartman committed Jul 11, 2007
1 parent eb36165 commit 0ab6608
Show file tree
Hide file tree
Showing 5 changed files with 271 additions and 113 deletions.
95 changes: 62 additions & 33 deletions fs/sysfs/bin.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,28 @@
struct bin_buffer {
struct mutex mutex;
void *buffer;
int mmapped;
};

static int
fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
{
struct sysfs_dirent *attr_sd = dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
struct kobject * kobj = to_kobj(dentry->d_parent);
struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
int rc;

/* need attr_sd for attr, its parent for kobj */
if (!sysfs_get_active_two(attr_sd))
return -ENODEV;

if (!attr->read)
return -EIO;
rc = -EIO;
if (attr->read)
rc = attr->read(kobj, buffer, off, count);

return attr->read(kobj, buffer, off, count);
sysfs_put_active_two(attr_sd);

return rc;
}

static ssize_t
Expand Down Expand Up @@ -79,12 +88,20 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
{
struct sysfs_dirent *attr_sd = dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
struct kobject *kobj = to_kobj(dentry->d_parent);
struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
int rc;

/* need attr_sd for attr, its parent for kobj */
if (!sysfs_get_active_two(attr_sd))
return -ENODEV;

if (!attr->write)
return -EIO;
rc = -EIO;
if (attr->write)
rc = attr->write(kobj, buffer, offset, count);

return attr->write(kobj, buffer, offset, count);
sysfs_put_active_two(attr_sd);

return rc;
}

static ssize_t write(struct file *file, const char __user *userbuf,
Expand Down Expand Up @@ -124,73 +141,85 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
struct bin_buffer *bb = file->private_data;
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
struct kobject *kobj = to_kobj(file->f_path.dentry->d_parent);
struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
int rc;

if (!attr->mmap)
return -EINVAL;

mutex_lock(&bb->mutex);
rc = attr->mmap(kobj, attr, vma);

/* need attr_sd for attr, its parent for kobj */
if (!sysfs_get_active_two(attr_sd))
return -ENODEV;

rc = -EINVAL;
if (attr->mmap)
rc = attr->mmap(kobj, attr, vma);

if (rc == 0 && !bb->mmapped)
bb->mmapped = 1;
else
sysfs_put_active_two(attr_sd);

mutex_unlock(&bb->mutex);

return rc;
}

static int open(struct inode * inode, struct file * file)
{
struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
struct bin_buffer *bb = NULL;
int error = -EINVAL;
int error;

if (!kobj || !attr)
goto Done;
/* need attr_sd for attr */
if (!sysfs_get_active(attr_sd))
return -ENODEV;

/* Grab the module reference for this attribute if we have one */
/* Grab the module reference for this attribute */
error = -ENODEV;
if (!try_module_get(attr->attr.owner))
goto Done;
if (!try_module_get(attr->attr.owner))
goto err_sput;

error = -EACCES;
if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
goto Error;
goto err_mput;
if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
goto Error;
goto err_mput;

error = -ENOMEM;
bb = kzalloc(sizeof(*bb), GFP_KERNEL);
if (!bb)
goto Error;
goto err_mput;

bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!bb->buffer)
goto Error;
goto err_mput;

mutex_init(&bb->mutex);
file->private_data = bb;

error = 0;
goto Done;
/* open succeeded, put active reference and pin attr_sd */
sysfs_put_active(attr_sd);
sysfs_get(attr_sd);
return 0;

Error:
kfree(bb);
err_mput:
module_put(attr->attr.owner);
Done:
if (error)
kobject_put(kobj);
err_sput:
sysfs_put_active(attr_sd);
kfree(bb);
return error;
}

static int release(struct inode * inode, struct file * file)
{
struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent);
struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
struct bin_buffer *bb = file->private_data;

kobject_put(kobj);
if (bb->mmapped)
sysfs_put_active_two(attr_sd);
sysfs_put(attr_sd);
module_put(attr->attr.owner);
kfree(bb->buffer);
kfree(bb);
Expand Down
28 changes: 25 additions & 3 deletions fs/sysfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
repeat:
parent_sd = sd->s_parent;

/* If @sd is being released after deletion, s_active is write
* locked. If @sd is cursor for directory walk or being
* released prematurely, s_active has no reader or writer.
*
* sysfs_deactivate() lies to lockdep that s_active is
* unlocked immediately. Lie one more time to cover the
* previous lie.
*/
if (!down_write_trylock(&sd->s_active))
rwsem_acquire(&sd->s_active.dep_map,
SYSFS_S_ACTIVE_DEACTIVATE, 0, _RET_IP_);
up_write(&sd->s_active);

if (sd->s_type & SYSFS_KOBJ_LINK)
sysfs_put(sd->s_elem.symlink.target_sd);
if (sd->s_type & SYSFS_COPY_NAME)
Expand Down Expand Up @@ -113,6 +126,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)

atomic_set(&sd->s_count, 1);
atomic_set(&sd->s_event, 1);
init_rwsem(&sd->s_active);
INIT_LIST_HEAD(&sd->s_children);
INIT_LIST_HEAD(&sd->s_sibling);

Expand Down Expand Up @@ -371,7 +385,6 @@ static void remove_dir(struct dentry * d)
d_delete(d);
sd = d->d_fsdata;
list_del_init(&sd->s_sibling);
sysfs_put(sd);
if (d->d_inode)
simple_rmdir(parent->d_inode,d);

Expand All @@ -380,6 +393,9 @@ static void remove_dir(struct dentry * d)

mutex_unlock(&parent->d_inode->i_mutex);
dput(parent);

sysfs_deactivate(sd);
sysfs_put(sd);
}

void sysfs_remove_subdir(struct dentry * d)
Expand All @@ -390,6 +406,7 @@ void sysfs_remove_subdir(struct dentry * d)

static void __sysfs_remove_dir(struct dentry *dentry)
{
LIST_HEAD(removed);
struct sysfs_dirent * parent_sd;
struct sysfs_dirent * sd, * tmp;

Expand All @@ -403,12 +420,17 @@ static void __sysfs_remove_dir(struct dentry *dentry)
list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
if (!sd->s_type || !(sd->s_type & SYSFS_NOT_PINNED))
continue;
list_del_init(&sd->s_sibling);
list_move(&sd->s_sibling, &removed);
sysfs_drop_dentry(sd, dentry);
sysfs_put(sd);
}
mutex_unlock(&dentry->d_inode->i_mutex);

list_for_each_entry_safe(sd, tmp, &removed, s_sibling) {
list_del_init(&sd->s_sibling);
sysfs_deactivate(sd);
sysfs_put(sd);
}

remove_dir(dentry);
/**
* Drop reference from dget() on entrance.
Expand Down
Loading

0 comments on commit 0ab6608

Please sign in to comment.