Skip to content

Commit

Permalink
erofs: support on-disk compressed fragments data
Browse files Browse the repository at this point in the history
Introduce on-disk compressed fragments data feature.

This approach adds a new field called `h_fragmentoff' in the per-file
compression header to indicate the fragment offset of each tail pcluster
or the whole file in the special packed inode.

Similar to ztailpacking, it will also find and record the 'headlcn'
of the tail pcluster when initializing per-inode zmap for making
follow-on requests more easy.

Signed-off-by: Yue Hu <huyue2@coolpad.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/YzHKxcFTlHGgXeH9@B-P7TQMD6M-0146.local
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
  • Loading branch information
Yue Hu authored and Gao Xiang committed Sep 26, 2022
1 parent fdffc09 commit b15b2e3
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 17 deletions.
33 changes: 27 additions & 6 deletions fs/erofs/erofs_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@
#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008
#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008
#define EROFS_FEATURE_INCOMPAT_ZTAILPACKING 0x00000010
#define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020
#define EROFS_ALL_FEATURE_INCOMPAT \
(EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
EROFS_FEATURE_INCOMPAT_ZTAILPACKING)
EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
EROFS_FEATURE_INCOMPAT_FRAGMENTS)

#define EROFS_SB_EXTSLOT_SIZE 16

Expand Down Expand Up @@ -71,7 +73,9 @@ struct erofs_super_block {
} __packed u1;
__le16 extra_devices; /* # of devices besides the primary device */
__le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */
__u8 reserved2[38];
__u8 reserved[6];
__le64 packed_nid; /* nid of the special packed inode */
__u8 reserved2[24];
};

/*
Expand Down Expand Up @@ -296,17 +300,26 @@ struct z_erofs_lzma_cfgs {
* bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
* bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
* bit 4 : interlaced plain pcluster (0 - off; 1 - on)
* bit 5 : fragment pcluster (0 - off; 1 - on)
*/
#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010
#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020

#define Z_EROFS_FRAGMENT_INODE_BIT 7
struct z_erofs_map_header {
__le16 h_reserved1;
/* indicates the encoded size of tailpacking data */
__le16 h_idata_size;
union {
/* fragment data offset in the packed inode */
__le32 h_fragmentoff;
struct {
__le16 h_reserved1;
/* indicates the encoded size of tailpacking data */
__le16 h_idata_size;
};
};
__le16 h_advise;
/*
* bit 0-3 : algorithm type of head 1 (logical cluster type 01);
Expand All @@ -315,7 +328,8 @@ struct z_erofs_map_header {
__u8 h_algorithmtype;
/*
* bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
* bit 3-7 : reserved.
* bit 3-6 : reserved;
* bit 7 : move the whole file into packed inode or not.
*/
__u8 h_clusterbits;
};
Expand Down Expand Up @@ -404,6 +418,10 @@ struct erofs_dirent {
/* check the EROFS on-disk layout strictly at compile time */
static inline void erofs_check_ondisk_layout_definitions(void)
{
const __le64 fmh = *(__le64 *)&(struct z_erofs_map_header) {
.h_clusterbits = 1 << Z_EROFS_FRAGMENT_INODE_BIT
};

BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
BUILD_BUG_ON(sizeof(struct erofs_inode_compact) != 32);
BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64);
Expand All @@ -421,6 +439,9 @@ static inline void erofs_check_ondisk_layout_definitions(void)

BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
/* exclude old compiler versions like gcc 7.5.0 */
BUILD_BUG_ON(__builtin_constant_p(fmh) ?
fmh != cpu_to_le64(1ULL << 63) : 0);
}

#endif
16 changes: 13 additions & 3 deletions fs/erofs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ struct erofs_sb_info {
struct inode *managed_cache;

struct erofs_sb_lz4_info lz4;
struct inode *packed_inode;
#endif /* CONFIG_EROFS_FS_ZIP */
struct erofs_dev_context *devs;
struct dax_device *dax_dev;
Expand Down Expand Up @@ -289,6 +290,7 @@ EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE)
EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE)
EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2)
EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)

/* atomic flag definitions */
Expand Down Expand Up @@ -324,8 +326,13 @@ struct erofs_inode {
unsigned char z_algorithmtype[2];
unsigned char z_logical_clusterbits;
unsigned long z_tailextent_headlcn;
erofs_off_t z_idataoff;
unsigned short z_idata_size;
union {
struct {
erofs_off_t z_idataoff;
unsigned short z_idata_size;
};
erofs_off_t z_fragmentoff;
};
};
#endif /* CONFIG_EROFS_FS_ZIP */
};
Expand Down Expand Up @@ -384,6 +391,7 @@ extern const struct address_space_operations z_erofs_aops;
enum {
BH_Encoded = BH_PrivateStart,
BH_FullMapped,
BH_Fragment,
};

/* Has a disk mapping */
Expand All @@ -394,6 +402,8 @@ enum {
#define EROFS_MAP_ENCODED (1 << BH_Encoded)
/* The length of extent is full */
#define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped)
/* Located in the special packed inode */
#define EROFS_MAP_FRAGMENT (1 << BH_Fragment)

struct erofs_map_blocks {
struct erofs_buf buf;
Expand All @@ -415,7 +425,7 @@ struct erofs_map_blocks {
#define EROFS_GET_BLOCKS_FIEMAP 0x0002
/* Used to map the whole extent if non-negligible data is requested for LZMA */
#define EROFS_GET_BLOCKS_READMORE 0x0004
/* Used to map tail extent for tailpacking inline pcluster */
/* Used to map tail extent for tailpacking inline or fragment pcluster */
#define EROFS_GET_BLOCKS_FINDTAIL 0x0008

enum {
Expand Down
15 changes: 15 additions & 0 deletions fs/erofs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,17 @@ static int erofs_read_superblock(struct super_block *sb)
#endif
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
sbi->root_nid = le16_to_cpu(dsb->root_nid);
#ifdef CONFIG_EROFS_FS_ZIP
sbi->packed_inode = NULL;
if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) {
sbi->packed_inode =
erofs_iget(sb, le64_to_cpu(dsb->packed_nid), false);
if (IS_ERR(sbi->packed_inode)) {
ret = PTR_ERR(sbi->packed_inode);
goto out;
}
}
#endif
sbi->inos = le64_to_cpu(dsb->inos);

sbi->build_time = le64_to_cpu(dsb->build_time);
Expand Down Expand Up @@ -411,6 +422,8 @@ static int erofs_read_superblock(struct super_block *sb)
erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!");
if (erofs_is_fscache_mode(sb))
erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
if (erofs_sb_has_fragments(sbi))
erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!");
out:
erofs_put_metabuf(&buf);
return ret;
Expand Down Expand Up @@ -947,6 +960,8 @@ static void erofs_put_super(struct super_block *sb)
#ifdef CONFIG_EROFS_FS_ZIP
iput(sbi->managed_cache);
sbi->managed_cache = NULL;
iput(sbi->packed_inode);
sbi->packed_inode = NULL;
#endif
erofs_fscache_unregister_fs(sb);
}
Expand Down
2 changes: 2 additions & 0 deletions fs/erofs/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ EROFS_ATTR_FEATURE(device_table);
EROFS_ATTR_FEATURE(compr_head2);
EROFS_ATTR_FEATURE(sb_chksum);
EROFS_ATTR_FEATURE(ztailpacking);
EROFS_ATTR_FEATURE(fragments);

static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(zero_padding),
Expand All @@ -86,6 +87,7 @@ static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(compr_head2),
ATTR_LIST(sb_chksum),
ATTR_LIST(ztailpacking),
ATTR_LIST(fragments),
NULL,
};
ATTRIBUTE_GROUPS(erofs_feat);
Expand Down
50 changes: 49 additions & 1 deletion fs/erofs/zdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,35 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
la < fe->headoffset;
}

static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
struct page *page, unsigned int pageofs,
unsigned int len)
{
struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
u8 *src, *dst;
unsigned int i, cnt;

pos += EROFS_I(inode)->z_fragmentoff;
for (i = 0; i < len; i += cnt) {
cnt = min_t(unsigned int, len - i,
EROFS_BLKSIZ - erofs_blkoff(pos));
src = erofs_bread(&buf, packed_inode,
erofs_blknr(pos), EROFS_KMAP);
if (IS_ERR(src)) {
erofs_put_metabuf(&buf);
return PTR_ERR(src);
}

dst = kmap_local_page(page);
memcpy(dst + pageofs + i, src + erofs_blkoff(pos), cnt);
kunmap_local(dst);
pos += cnt;
}
erofs_put_metabuf(&buf);
return 0;
}

static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct page *page, struct page **pagepool)
{
Expand Down Expand Up @@ -688,7 +717,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
/* didn't get a valid pcluster previously (very rare) */
}

if (!(map->m_flags & EROFS_MAP_MAPPED))
if (!(map->m_flags & EROFS_MAP_MAPPED) ||
map->m_flags & EROFS_MAP_FRAGMENT)
goto hitted;

err = z_erofs_collector_begin(fe);
Expand Down Expand Up @@ -735,6 +765,24 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
zero_user_segment(page, cur, end);
goto next_part;
}
if (map->m_flags & EROFS_MAP_FRAGMENT) {
unsigned int pageofs, skip, len;

if (offset > map->m_la) {
pageofs = 0;
skip = offset - map->m_la;
} else {
pageofs = map->m_la & ~PAGE_MASK;
skip = 0;
}
len = min_t(unsigned int, map->m_llen - skip, end - cur);
err = z_erofs_read_fragment(inode, skip, page, pageofs, len);
if (err)
goto out;
++spiltted;
tight = false;
goto next_part;
}

exclusive = (!cur && (!spiltted || tight));
if (cur)
Expand Down
53 changes: 46 additions & 7 deletions fs/erofs/zmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ int z_erofs_fill_inode(struct inode *inode)
struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);

if (!erofs_sb_has_big_pcluster(sbi) &&
!erofs_sb_has_ztailpacking(sbi) &&
!erofs_sb_has_ztailpacking(sbi) && !erofs_sb_has_fragments(sbi) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
vi->z_advise = 0;
vi->z_algorithmtype[0] = 0;
Expand Down Expand Up @@ -55,10 +55,6 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
goto out_unlock;

DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
!erofs_sb_has_ztailpacking(EROFS_SB(sb)) &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);

pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
vi->xattr_isize, 8);
kaddr = erofs_read_metabuf(&buf, sb, erofs_blknr(pos),
Expand All @@ -69,6 +65,16 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
}

h = kaddr + erofs_blkoff(pos);
/*
* if the highest bit of the 8-byte map header is set, the whole file
* is stored in the packed inode. The rest bits keeps z_fragmentoff.
*/
if (h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT) {
vi->z_advise = Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
vi->z_fragmentoff = le64_to_cpu(*(__le64 *)h) ^ (1ULL << 63);
vi->z_tailextent_headlcn = 0;
goto unmap_done;
}
vi->z_advise = le16_to_cpu(h->h_advise);
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
Expand Down Expand Up @@ -123,6 +129,20 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
if (err < 0)
goto out_unlock;
}

if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
!(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
struct erofs_map_blocks map = {
.buf = __EROFS_BUF_INITIALIZER
};

vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
err = z_erofs_do_map_blocks(inode, &map,
EROFS_GET_BLOCKS_FINDTAIL);
erofs_put_metabuf(&map.buf);
if (err < 0)
goto out_unlock;
}
/* paired with smp_mb() at the beginning of the function */
smp_mb();
set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
Expand Down Expand Up @@ -598,6 +618,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
{
struct erofs_inode *const vi = EROFS_I(inode);
bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
struct z_erofs_maprecorder m = {
.inode = inode,
.map = map,
Expand Down Expand Up @@ -666,12 +687,19 @@ static int z_erofs_do_map_blocks(struct inode *inode,

map->m_llen = end - map->m_la;

if (flags & EROFS_GET_BLOCKS_FINDTAIL)
if (flags & EROFS_GET_BLOCKS_FINDTAIL) {
vi->z_tailextent_headlcn = m.lcn;
/* for non-compact indexes, fragmentoff is 64 bits */
if (fragment &&
vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
vi->z_fragmentoff |= (u64)m.pblk << 32;
}
if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_META;
map->m_pa = vi->z_idataoff;
map->m_plen = vi->z_idata_size;
} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_FRAGMENT;
} else {
map->m_pa = blknr_to_addr(m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
Expand Down Expand Up @@ -715,6 +743,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
struct erofs_map_blocks *map,
int flags)
{
struct erofs_inode *const vi = EROFS_I(inode);
int err = 0;

trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
Expand All @@ -731,6 +760,15 @@ int z_erofs_map_blocks_iter(struct inode *inode,
if (err)
goto out;

if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
!vi->z_tailextent_headlcn) {
map->m_la = 0;
map->m_llen = inode->i_size;
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED |
EROFS_MAP_FRAGMENT;
goto out;
}

err = z_erofs_do_map_blocks(inode, map, flags);
out:
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
Expand All @@ -757,7 +795,8 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset,
iomap->length = map.m_llen;
if (map.m_flags & EROFS_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
iomap->addr = map.m_pa;
iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ?
IOMAP_NULL_ADDR : map.m_pa;
} else {
iomap->type = IOMAP_HOLE;
iomap->addr = IOMAP_NULL_ADDR;
Expand Down

0 comments on commit b15b2e3

Please sign in to comment.