Skip to content

Commit

Permalink
Merge patch series "file: remove f_version"
Browse files Browse the repository at this point in the history
Christian Brauner <brauner@kernel.org> says:

The f_version member in struct file isn't particularly well-defined. It
is mainly used as a cookie to detect concurrent seeks when iterating
directories. But it is also abused by some subsystems for completely
unrelated things.

It is mostly a directory specific thing that doesn't really need to live
in struct file and with its wonky semantics it really lacks a specific
function.

For pipes, f_version is (ab)used to defer poll notifications until a
write has happened. And struct pipe_inode_info is used by multiple
struct files in their ->private_data so there's no chance of pushing
that down into file->private_data without introducing another pointer
indirection.

But this should be a solvable problem. Only regular files with
FMODE_ATOMIC_POS and directories require f_pos_lock. Pipes and other
files don't. So this adds a union into struct file encompassing
f_pos_lock and a pipe specific f_pipe member that pipes can use. This
union of course can be extended to other file types and is similar to
what we do in struct inode already.

* patches from https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-0-6d3e4816aa7b@kernel.org:
  fs: remove f_version
  pipe: use f_pipe
  fs: add f_pipe
  ubifs: store cookie in private data
  ufs: store cookie in private data
  udf: store cookie in private data
  proc: store cookie in private data
  ocfs2: store cookie in private data
  input: remove f_version abuse
  ext4: store cookie in private data
  ext2: store cookie in private data
  affs: store cookie in private data
  fs: add generic_llseek_cookie()
  fs: use must_set_pos()
  fs: add must_set_pos()
  fs: add vfs_setpos_cookie()
  s390: remove unused f_version
  ceph: remove unused f_version
  adi: remove unused f_version
  file: remove pointless comment

Link: https://lore.kernel.org/r/20240830-vfs-file-f_version-v1-0-6d3e4816aa7b@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
  • Loading branch information
Christian Brauner committed Sep 12, 2024
2 parents 0f389ad + 11068e0 commit 24a988f
Show file tree
Hide file tree
Showing 20 changed files with 405 additions and 143 deletions.
1 change: 0 additions & 1 deletion drivers/char/adi.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ static loff_t adi_llseek(struct file *file, loff_t offset, int whence)

if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
ret = offset;
}

Expand Down
47 changes: 22 additions & 25 deletions drivers/input/input.c
Original file line number Diff line number Diff line change
Expand Up @@ -1079,33 +1079,31 @@ static inline void input_wakeup_procfs_readers(void)
wake_up(&input_devices_poll_wait);
}

struct input_seq_state {
unsigned short pos;
bool mutex_acquired;
int input_devices_state;
};

static __poll_t input_proc_devices_poll(struct file *file, poll_table *wait)
{
struct seq_file *seq = file->private_data;
struct input_seq_state *state = seq->private;

poll_wait(file, &input_devices_poll_wait, wait);
if (file->f_version != input_devices_state) {
file->f_version = input_devices_state;
if (state->input_devices_state != input_devices_state) {
state->input_devices_state = input_devices_state;
return EPOLLIN | EPOLLRDNORM;
}

return 0;
}

union input_seq_state {
struct {
unsigned short pos;
bool mutex_acquired;
};
void *p;
};

static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos)
{
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;
int error;

/* We need to fit into seq->private pointer */
BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));

error = mutex_lock_interruptible(&input_mutex);
if (error) {
state->mutex_acquired = false;
Expand All @@ -1124,7 +1122,7 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos)

static void input_seq_stop(struct seq_file *seq, void *v)
{
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;

if (state->mutex_acquired)
mutex_unlock(&input_mutex);
Expand Down Expand Up @@ -1210,25 +1208,23 @@ static const struct seq_operations input_devices_seq_ops = {

static int input_proc_devices_open(struct inode *inode, struct file *file)
{
return seq_open(file, &input_devices_seq_ops);
return seq_open_private(file, &input_devices_seq_ops,
sizeof(struct input_seq_state));
}

static const struct proc_ops input_devices_proc_ops = {
.proc_open = input_proc_devices_open,
.proc_poll = input_proc_devices_poll,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
.proc_release = seq_release_private,
};

static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)
{
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;
int error;

/* We need to fit into seq->private pointer */
BUILD_BUG_ON(sizeof(union input_seq_state) != sizeof(seq->private));

error = mutex_lock_interruptible(&input_mutex);
if (error) {
state->mutex_acquired = false;
Expand All @@ -1243,7 +1239,7 @@ static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos)

static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;

state->pos = *pos + 1;
return seq_list_next(v, &input_handler_list, pos);
Expand All @@ -1252,7 +1248,7 @@ static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static int input_handlers_seq_show(struct seq_file *seq, void *v)
{
struct input_handler *handler = container_of(v, struct input_handler, node);
union input_seq_state *state = (union input_seq_state *)&seq->private;
struct input_seq_state *state = seq->private;

seq_printf(seq, "N: Number=%u Name=%s", state->pos, handler->name);
if (handler->filter)
Expand All @@ -1273,14 +1269,15 @@ static const struct seq_operations input_handlers_seq_ops = {

static int input_proc_handlers_open(struct inode *inode, struct file *file)
{
return seq_open(file, &input_handlers_seq_ops);
return seq_open_private(file, &input_handlers_seq_ops,
sizeof(struct input_seq_state));
}

static const struct proc_ops input_handlers_proc_ops = {
.proc_open = input_proc_handlers_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
.proc_release = seq_release_private,
};

static int __init input_proc_init(void)
Expand Down
3 changes: 0 additions & 3 deletions drivers/s390/char/hmcdrv_dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,6 @@ static loff_t hmcdrv_dev_seek(struct file *fp, loff_t pos, int whence)
if (pos < 0)
return -EINVAL;

if (fp->f_pos != pos)
++fp->f_version;

fp->f_pos = pos;
return pos;
}
Expand Down
44 changes: 38 additions & 6 deletions fs/affs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,44 @@
#include <linux/iversion.h>
#include "affs.h"

struct affs_dir_data {
unsigned long ino;
u64 cookie;
};

static int affs_readdir(struct file *, struct dir_context *);

static loff_t affs_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct affs_dir_data *data = file->private_data;

return generic_llseek_cookie(file, offset, whence, &data->cookie);
}

static int affs_dir_open(struct inode *inode, struct file *file)
{
struct affs_dir_data *data;

data = kzalloc(sizeof(struct affs_dir_data), GFP_KERNEL);
if (!data)
return -ENOMEM;
file->private_data = data;
return 0;
}

static int affs_dir_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}

const struct file_operations affs_dir_operations = {
.open = affs_dir_open,
.read = generic_read_dir,
.llseek = generic_file_llseek,
.llseek = affs_dir_llseek,
.iterate_shared = affs_readdir,
.fsync = affs_file_fsync,
.release = affs_dir_release,
};

/*
Expand All @@ -45,6 +76,7 @@ static int
affs_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
struct affs_dir_data *data = file->private_data;
struct super_block *sb = inode->i_sb;
struct buffer_head *dir_bh = NULL;
struct buffer_head *fh_bh = NULL;
Expand All @@ -59,7 +91,7 @@ affs_readdir(struct file *file, struct dir_context *ctx)
pr_debug("%s(ino=%lu,f_pos=%llx)\n", __func__, inode->i_ino, ctx->pos);

if (ctx->pos < 2) {
file->private_data = (void *)0;
data->ino = 0;
if (!dir_emit_dots(file, ctx))
return 0;
}
Expand All @@ -80,8 +112,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
/* If the directory hasn't changed since the last call to readdir(),
* we can jump directly to where we left off.
*/
ino = (u32)(long)file->private_data;
if (ino && inode_eq_iversion(inode, file->f_version)) {
ino = data->ino;
if (ino && inode_eq_iversion(inode, data->cookie)) {
pr_debug("readdir() left off=%d\n", ino);
goto inside;
}
Expand Down Expand Up @@ -131,8 +163,8 @@ affs_readdir(struct file *file, struct dir_context *ctx)
} while (ino);
}
done:
file->f_version = inode_query_iversion(inode);
file->private_data = (void *)(long)ino;
data->cookie = inode_query_iversion(inode);
data->ino = ino;
affs_brelse(fh_bh);

out_brelse_dir:
Expand Down
1 change: 0 additions & 1 deletion fs/ceph/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,6 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)

if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
dfi->file_info.flags &= ~CEPH_F_ATEND;
}
retval = offset;
Expand Down
28 changes: 25 additions & 3 deletions fs/ext2/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
bool need_revalidate = !inode_eq_iversion(inode, file->f_version);
bool need_revalidate = !inode_eq_iversion(inode, *(u64 *)file->private_data);
bool has_filetype;

if (pos > inode->i_size - EXT2_DIR_REC_LEN(1))
Expand All @@ -290,7 +290,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
offset = ext2_validate_entry(kaddr, offset, chunk_mask);
ctx->pos = (n<<PAGE_SHIFT) + offset;
}
file->f_version = inode_query_iversion(inode);
*(u64 *)file->private_data = inode_query_iversion(inode);
need_revalidate = false;
}
de = (ext2_dirent *)(kaddr+offset);
Expand Down Expand Up @@ -703,8 +703,30 @@ int ext2_empty_dir(struct inode *inode)
return 0;
}

static int ext2_dir_open(struct inode *inode, struct file *file)
{
file->private_data = kzalloc(sizeof(u64), GFP_KERNEL);
if (!file->private_data)
return -ENOMEM;
return 0;
}

static int ext2_dir_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}

static loff_t ext2_dir_llseek(struct file *file, loff_t offset, int whence)
{
return generic_llseek_cookie(file, offset, whence,
(u64 *)file->private_data);
}

const struct file_operations ext2_dir_operations = {
.llseek = generic_file_llseek,
.open = ext2_dir_open,
.release = ext2_dir_release,
.llseek = ext2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = ext2_readdir,
.unlocked_ioctl = ext2_ioctl,
Expand Down
50 changes: 28 additions & 22 deletions fs/ext4/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct super_block *sb = inode->i_sb;
struct buffer_head *bh = NULL;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
struct dir_private_info *info = file->private_data;

err = fscrypt_prepare_readdir(inode);
if (err)
Expand Down Expand Up @@ -229,7 +230,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
* readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the block
* to make sure. */
if (!inode_eq_iversion(inode, file->f_version)) {
if (!inode_eq_iversion(inode, info->cookie)) {
for (i = 0; i < sb->s_blocksize && i < offset; ) {
de = (struct ext4_dir_entry_2 *)
(bh->b_data + i);
Expand All @@ -249,7 +250,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
offset = i;
ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))
| offset;
file->f_version = inode_query_iversion(inode);
info->cookie = inode_query_iversion(inode);
}

while (ctx->pos < inode->i_size
Expand Down Expand Up @@ -384,6 +385,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
struct dir_private_info *info = file->private_data;
int dx_dir = is_dx_dir(inode);
loff_t ret, htree_max = ext4_get_htree_eof(file);

Expand All @@ -392,7 +394,7 @@ static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
htree_max, htree_max);
else
ret = ext4_llseek(file, offset, whence);
file->f_version = inode_peek_iversion(inode) - 1;
info->cookie = inode_peek_iversion(inode) - 1;
return ret;
}

Expand Down Expand Up @@ -429,18 +431,15 @@ static void free_rb_tree_fname(struct rb_root *root)
*root = RB_ROOT;
}


static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
loff_t pos)
static void ext4_htree_init_dir_info(struct file *filp, loff_t pos)
{
struct dir_private_info *p;

p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return NULL;
p->curr_hash = pos2maj_hash(filp, pos);
p->curr_minor_hash = pos2min_hash(filp, pos);
return p;
struct dir_private_info *p = filp->private_data;

if (is_dx_dir(file_inode(filp)) && !p->initialized) {
p->curr_hash = pos2maj_hash(filp, pos);
p->curr_minor_hash = pos2min_hash(filp, pos);
p->initialized = true;
}
}

void ext4_htree_free_dir_info(struct dir_private_info *p)
Expand Down Expand Up @@ -552,12 +551,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
struct fname *fname;
int ret = 0;

if (!info) {
info = ext4_htree_create_dir_info(file, ctx->pos);
if (!info)
return -ENOMEM;
file->private_data = info;
}
ext4_htree_init_dir_info(file, ctx->pos);

if (ctx->pos == ext4_get_htree_eof(file))
return 0; /* EOF */
Expand Down Expand Up @@ -590,10 +584,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)
* cached entries.
*/
if ((!info->curr_node) ||
!inode_eq_iversion(inode, file->f_version)) {
!inode_eq_iversion(inode, info->cookie)) {
info->curr_node = NULL;
free_rb_tree_fname(&info->root);
file->f_version = inode_query_iversion(inode);
info->cookie = inode_query_iversion(inode);
ret = ext4_htree_fill_tree(file, info->curr_hash,
info->curr_minor_hash,
&info->next_hash);
Expand Down Expand Up @@ -664,7 +658,19 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf,
return 0;
}

static int ext4_dir_open(struct inode *inode, struct file *file)
{
struct dir_private_info *info;

info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
file->private_data = info;
return 0;
}

const struct file_operations ext4_dir_operations = {
.open = ext4_dir_open,
.llseek = ext4_dir_llseek,
.read = generic_read_dir,
.iterate_shared = ext4_readdir,
Expand Down
Loading

0 comments on commit 24a988f

Please sign in to comment.