Skip to content
Donald Buczek edited this page Jun 9, 2020 · 13 revisions

https://elixir.bootlin.com/linux/v5.7/source/fs/read_write.c#L596

SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
	return ksys_read(fd, buf, count);
}

https://elixir.bootlin.com/linux/v5.7/source/fs/read_write.c#L577

ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
{
	struct fd f = fdget_pos(fd);
	ssize_t ret = -EBADF;

	if (f.file) {
		loff_t pos, *ppos = file_ppos(f.file);
		if (ppos) {
			pos = *ppos;
			ppos = &pos;
		}
		ret = vfs_read(f.file, buf, count, ppos);
		if (ret >= 0 && ppos)
			f.file->f_pos = pos;
		fdput_pos(f);
	}
	return ret;
}

https://elixir.bootlin.com/linux/v5.7/source/include/linux/file.h#L72

static inline struct fd fdget_pos(int fd)
{
	return __to_fd(__fdget_pos(fd));
}

https://elixir.bootlin.com/linux/v5.7/source/fs/file.c#L818

unsigned long __fdget_pos(unsigned int fd)
{
	unsigned long v = __fdget(fd);
	struct file *file = (struct file *)(v & ~3);

	if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
		if (file_count(file) > 1) {
			v |= FDPUT_POS_UNLOCK;
			mutex_lock(&file->f_pos_lock);
		}
	}
	return v;
}

https://elixir.bootlin.com/linux/v5.7/source/fs/file.c#L807

unsigned long __fdget(unsigned int fd)
{
	return __fget_light(fd, FMODE_PATH);
}

https://elixir.bootlin.com/linux/v5.7/source/fs/file.c#L774

/*
 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
 *
 * You can use this instead of fget if you satisfy all of the following
 * conditions:
 * 1) You must call fput_light before exiting the syscall and returning control
 *    to userspace (i.e. you cannot remember the returned struct file * after
 *    returning to userspace).
 * 2) You must not call filp_close on the returned struct file * in between
 *    calls to fget_light and fput_light.
 * 3) You must not clone the current task in between the calls to fget_light
 *    and fput_light.
 *
 * The fput_needed flag returned by fget_light should be passed to the
 * corresponding fput_light.
 */

static unsigned long __fget_light(unsigned int fd, fmode_t mask)
{
	struct files_struct *files = current->files;
	struct file *file;

	if (atomic_read(&files->count) == 1) {
		file = __fcheck_files(files, fd);
		if (!file || unlikely(file->f_mode & mask))
			return 0;
		return (unsigned long)file;
	} else {
		file = __fget(fd, mask, 1);
		if (!file)
			return 0;
		return FDPUT_FPUT | (unsigned long)file;
	}
}

https://elixir.bootlin.com/linux/v5.7/source/fs/read_write.c#L447

ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
	ssize_t ret;

	if (!(file->f_mode & FMODE_READ))
		return -EBADF;
	if (!(file->f_mode & FMODE_CAN_READ))
		return -EINVAL;
	if (unlikely(!access_ok(buf, count)))
		return -EFAULT;

	ret = rw_verify_area(READ, file, pos, count);
	if (!ret) {
		if (count > MAX_RW_COUNT)
			count =  MAX_RW_COUNT;
		ret = __vfs_read(file, buf, count, pos);
		if (ret > 0) {
			fsnotify_access(file);
			add_rchar(current, ret);
		}
		inc_syscr(current);
	}

	return ret;
}

https://elixir.bootlin.com/linux/v5.7/source/fs/read_write.c#L422

ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
		   loff_t *pos)
{
	if (file->f_op->read)
		return file->f_op->read(file, buf, count, pos);
	else if (file->f_op->read_iter)
		return new_sync_read(file, buf, count, pos);
	else
		return -EINVAL;
}

https://elixir.bootlin.com/linux/v5.7/source/fs/read_write.c#L404

static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
	struct iovec iov = { .iov_base = buf, .iov_len = len };
	struct kiocb kiocb;
	struct iov_iter iter;
	ssize_t ret;

	init_sync_kiocb(&kiocb, filp);
	kiocb.ki_pos = (ppos ? *ppos : 0);
	iov_iter_init(&iter, READ, &iov, 1, len);

	ret = call_read_iter(filp, &kiocb, &iter);
	BUG_ON(ret == -EIOCBQUEUED);
	if (ppos)
		*ppos = kiocb.ki_pos;
	return ret;
}

https://elixir.bootlin.com/linux/v5.7/source/include/linux/fs.h#L2076

static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
{
	*kiocb = (struct kiocb) {
		.ki_filp = filp,
		.ki_flags = iocb_flags(filp),
		.ki_hint = ki_hint_validate(file_write_hint(filp)),
		.ki_ioprio = get_current_ioprio(),
	};

https://elixir.bootlin.com/linux/v5.7/source/include/linux/fs.h#L3421

static inline int iocb_flags(struct file *file)
{
	int res = 0;
	if (file->f_flags & O_APPEND)
		res |= IOCB_APPEND;
	if (io_is_direct(file))
		res |= IOCB_DIRECT;
	if ((file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host))
		res |= IOCB_DSYNC;
	if (file->f_flags & __O_SYNC)
		res |= IOCB_SYNC;
	return res;
}

https://elixir.bootlin.com/linux/v5.7/source/include/linux/fs.h#L2057

static inline enum rw_hint file_write_hint(struct file *file)
{
	if (file->f_write_hint != WRITE_LIFE_NOT_SET)
		return file->f_write_hint;

	return file_inode(file)->i_write_hint;
}

https://elixir.bootlin.com/linux/v5.7/source/include/linux/fs.h#L2067

static inline u16 ki_hint_validate(enum rw_hint hint)
{
	typeof(((struct kiocb *)0)->ki_hint) max_hint = -1;

	if (hint <= max_hint)
		return hint;
	return 0;
}

https://elixir.bootlin.com/linux/v5.7/source/include/linux/ioprio.h#L771

/*
 * If the calling process has set an I/O priority, use that. Otherwise, return
 * the default I/O priority.
 */
static inline int get_current_ioprio(void)
{
	struct io_context *ioc = current->io_context;

	if (ioc)
		return ioc->ioprio;
	return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0);
}

Links