Skip to content

Commit

Permalink
erofs: support unencoded inodes for fileio
Browse files Browse the repository at this point in the history
Since EROFS only needs to handle read requests in simple contexts,
Just directly use vfs_iocb_iter_read() for data I/Os.

Reviewed-by: Sandeep Dhavale <dhavale@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240905093031.2745929-1-hsiangkao@linux.alibaba.com
  • Loading branch information
Gao Xiang committed Sep 10, 2024
1 parent fb17675 commit ce63cb6
Show file tree
Hide file tree
Showing 6 changed files with 248 additions and 51 deletions.
1 change: 1 addition & 0 deletions fs/erofs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
50 changes: 46 additions & 4 deletions fs/erofs/data.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
if (map->m_la >= inode->i_size) {
/* leave out-of-bound access unmapped */
map->m_flags = 0;
map->m_plen = 0;
map->m_plen = map->m_llen;
goto out;
}

Expand Down Expand Up @@ -197,8 +197,13 @@ static void erofs_fill_from_devinfo(struct erofs_map_dev *map,
struct erofs_device_info *dif)
{
map->m_bdev = NULL;
if (dif->file && S_ISBLK(file_inode(dif->file)->i_mode))
map->m_bdev = file_bdev(dif->file);
map->m_fp = NULL;
if (dif->file) {
if (S_ISBLK(file_inode(dif->file)->i_mode))
map->m_bdev = file_bdev(dif->file);
else
map->m_fp = dif->file;
}
map->m_daxdev = dif->dax_dev;
map->m_dax_part_off = dif->dax_part_off;
map->m_fscache = dif->fscache;
Expand All @@ -215,6 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
map->m_daxdev = EROFS_SB(sb)->dax_dev;
map->m_dax_part_off = EROFS_SB(sb)->dax_part_off;
map->m_fscache = EROFS_SB(sb)->s_fscache;
map->m_fp = EROFS_SB(sb)->fdev;

if (map->m_deviceid) {
down_read(&devs->rwsem);
Expand Down Expand Up @@ -250,6 +256,42 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
return 0;
}

/*
* bit 30: I/O error occurred on this folio
* bit 0 - 29: remaining parts to complete this folio
*/
#define EROFS_ONLINEFOLIO_EIO (1 << 30)

void erofs_onlinefolio_init(struct folio *folio)
{
union {
atomic_t o;
void *v;
} u = { .o = ATOMIC_INIT(1) };

folio->private = u.v; /* valid only if file-backed folio is locked */
}

void erofs_onlinefolio_split(struct folio *folio)
{
atomic_inc((atomic_t *)&folio->private);
}

void erofs_onlinefolio_end(struct folio *folio, int err)
{
int orig, v;

do {
orig = atomic_read((atomic_t *)&folio->private);
v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0);
} while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig);

if (v & ~EROFS_ONLINEFOLIO_EIO)
return;
folio->private = 0;
folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO));
}

static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
{
Expand Down Expand Up @@ -399,7 +441,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
}

/* for uncompressed (aligned) files and raw access for other files */
const struct address_space_operations erofs_raw_access_aops = {
const struct address_space_operations erofs_aops = {
.read_folio = erofs_read_folio,
.readahead = erofs_readahead,
.bmap = erofs_bmap,
Expand Down
178 changes: 178 additions & 0 deletions fs/erofs/fileio.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2024, Alibaba Cloud
*/
#include "internal.h"
#include <trace/events/erofs.h>

struct erofs_fileio_rq {
struct bio_vec bvecs[BIO_MAX_VECS];
struct bio bio;
struct kiocb iocb;
};

struct erofs_fileio {
struct erofs_map_blocks map;
struct erofs_map_dev dev;
struct erofs_fileio_rq *rq;
};

static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret)
{
struct erofs_fileio_rq *rq =
container_of(iocb, struct erofs_fileio_rq, iocb);
struct folio_iter fi;

DBG_BUGON(rq->bio.bi_end_io);
if (ret > 0) {
if (ret != rq->bio.bi_iter.bi_size) {
bio_advance(&rq->bio, ret);
zero_fill_bio(&rq->bio);
}
ret = 0;
}
bio_for_each_folio_all(fi, &rq->bio) {
DBG_BUGON(folio_test_uptodate(fi.folio));
erofs_onlinefolio_end(fi.folio, ret);
}
bio_uninit(&rq->bio);
kfree(rq);
}

static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
{
struct iov_iter iter;
int ret;

if (!rq)
return;
rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT;
rq->iocb.ki_ioprio = get_current_ioprio();
rq->iocb.ki_complete = erofs_fileio_ki_complete;
rq->iocb.ki_flags = (rq->iocb.ki_filp->f_mode & FMODE_CAN_ODIRECT) ?
IOCB_DIRECT : 0;
iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt,
rq->bio.bi_iter.bi_size);
ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter);
if (ret != -EIOCBQUEUED)
erofs_fileio_ki_complete(&rq->iocb, ret);
}

static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
{
struct erofs_fileio_rq *rq = kzalloc(sizeof(*rq),
GFP_KERNEL | __GFP_NOFAIL);

bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
rq->iocb.ki_filp = mdev->m_fp;
return rq;
}

static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
{
struct inode *inode = folio_inode(folio);
struct erofs_map_blocks *map = &io->map;
unsigned int cur = 0, end = folio_size(folio), len, attached = 0;
loff_t pos = folio_pos(folio), ofs;
struct iov_iter iter;
struct bio_vec bv;
int err = 0;

erofs_onlinefolio_init(folio);
while (cur < end) {
if (!in_range(pos + cur, map->m_la, map->m_llen)) {
map->m_la = pos + cur;
map->m_llen = end - cur;
err = erofs_map_blocks(inode, map);
if (err)
break;
}

ofs = folio_pos(folio) + cur - map->m_la;
len = min_t(loff_t, map->m_llen - ofs, end - cur);
if (map->m_flags & EROFS_MAP_META) {
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
void *src;

src = erofs_read_metabuf(&buf, inode->i_sb,
map->m_pa + ofs, EROFS_KMAP);
if (IS_ERR(src)) {
err = PTR_ERR(src);
break;
}
bvec_set_folio(&bv, folio, len, cur);
iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len);
if (copy_to_iter(src, len, &iter) != len) {
erofs_put_metabuf(&buf);
err = -EIO;
break;
}
erofs_put_metabuf(&buf);
} else if (!(map->m_flags & EROFS_MAP_MAPPED)) {
folio_zero_segment(folio, cur, cur + len);
attached = 0;
} else {
if (io->rq && (map->m_pa + ofs != io->dev.m_pa ||
map->m_deviceid != io->dev.m_deviceid)) {
io_retry:
erofs_fileio_rq_submit(io->rq);
io->rq = NULL;
}

if (!io->rq) {
io->dev = (struct erofs_map_dev) {
.m_pa = io->map.m_pa + ofs,
.m_deviceid = io->map.m_deviceid,
};
err = erofs_map_dev(inode->i_sb, &io->dev);
if (err)
break;
io->rq = erofs_fileio_rq_alloc(&io->dev);
io->rq->bio.bi_iter.bi_sector = io->dev.m_pa >> 9;
attached = 0;
}
if (!attached++)
erofs_onlinefolio_split(folio);
if (!bio_add_folio(&io->rq->bio, folio, len, cur))
goto io_retry;
io->dev.m_pa += len;
}
cur += len;
}
erofs_onlinefolio_end(folio, err);
return err;
}

static int erofs_fileio_read_folio(struct file *file, struct folio *folio)
{
struct erofs_fileio io = {};
int err;

trace_erofs_read_folio(folio, true);
err = erofs_fileio_scan_folio(&io, folio);
erofs_fileio_rq_submit(io.rq);
return err;
}

static void erofs_fileio_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->mapping->host;
struct erofs_fileio io = {};
struct folio *folio;
int err;

trace_erofs_readpages(inode, readahead_index(rac),
readahead_count(rac), true);
while ((folio = readahead_folio(rac))) {
err = erofs_fileio_scan_folio(&io, folio);
if (err && err != -EINTR)
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
folio->index, EROFS_I(inode)->nid);
}
erofs_fileio_rq_submit(io.rq);
}

const struct address_space_operations erofs_fileio_aops = {
.read_folio = erofs_fileio_read_folio,
.readahead = erofs_fileio_readahead,
};
17 changes: 12 additions & 5 deletions fs/erofs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,11 +250,14 @@ static int erofs_fill_inode(struct inode *inode)
}

mapping_set_large_folios(inode->i_mapping);
if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb))) {
/* XXX: data I/Os will be implemented in the following patches */
err = -EOPNOTSUPP;
} else if (erofs_inode_is_data_compressed(vi->datalayout)) {
if (erofs_inode_is_data_compressed(vi->datalayout)) {
#ifdef CONFIG_EROFS_FS_ZIP
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb))) {
err = -EOPNOTSUPP;
goto out_unlock;
}
#endif
DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT,
erofs_info, inode->i_sb,
"EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
Expand All @@ -263,10 +266,14 @@ static int erofs_fill_inode(struct inode *inode)
err = -EOPNOTSUPP;
#endif
} else {
inode->i_mapping->a_ops = &erofs_raw_access_aops;
inode->i_mapping->a_ops = &erofs_aops;
#ifdef CONFIG_EROFS_FS_ONDEMAND
if (erofs_is_fscache_mode(inode->i_sb))
inode->i_mapping->a_ops = &erofs_fscache_access_aops;
#endif
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (erofs_is_fileio_mode(EROFS_SB(inode->i_sb)))
inode->i_mapping->a_ops = &erofs_fileio_aops;
#endif
}
out_unlock:
Expand Down
7 changes: 6 additions & 1 deletion fs/erofs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ struct erofs_map_dev {
struct erofs_fscache *m_fscache;
struct block_device *m_bdev;
struct dax_device *m_daxdev;
struct file *m_fp;
u64 m_dax_part_off;

erofs_off_t m_pa;
Expand All @@ -380,7 +381,8 @@ struct erofs_map_dev {

extern const struct super_operations erofs_sops;

extern const struct address_space_operations erofs_raw_access_aops;
extern const struct address_space_operations erofs_aops;
extern const struct address_space_operations erofs_fileio_aops;
extern const struct address_space_operations z_erofs_aops;
extern const struct address_space_operations erofs_fscache_access_aops;

Expand Down Expand Up @@ -411,6 +413,9 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map);
void erofs_onlinefolio_init(struct folio *folio);
void erofs_onlinefolio_split(struct folio *folio);
void erofs_onlinefolio_end(struct folio *folio, int err);
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid);
int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask,
Expand Down
Loading

0 comments on commit ce63cb6

Please sign in to comment.