Skip to content

Commit

Permalink
ext4: xattr-in-inode support
Browse files Browse the repository at this point in the history
Large xattr support is implemented for EXT4_FEATURE_INCOMPAT_EA_INODE.

If the size of an xattr value is larger than will fit in a single
external block, then the xattr value will be saved into the body
of an external xattr inode.

The also helps support a larger number of xattr, since only the headers
will be stored in the in-inode space or the single external block.

The inode is referenced from the xattr header via "e_value_inum",
which was formerly "e_value_block", but that field was never used.
The e_value_size still contains the xattr size so that listing
xattrs does not need to look up the inode if the data is not accessed.

struct ext4_xattr_entry {
        __u8    e_name_len;     /* length of name */
        __u8    e_name_index;   /* attribute name index */
        __le16  e_value_offs;   /* offset in disk block of value */
        __le32  e_value_inum;   /* inode in which value is stored */
        __le32  e_value_size;   /* size of attribute value */
        __le32  e_hash;         /* hash value of name and value */
        char    e_name[0];      /* attribute name */
};

The xattr inode is marked with the EXT4_EA_INODE_FL flag and also
holds a back-reference to the owning inode in its i_mtime field,
allowing the ext4/e2fsck to verify the correct inode is accessed.

[ Applied fix by Dan Carpenter to avoid freeing an ERR_PTR. ]

Lustre-Jira: https://jira.hpdd.intel.com/browse/LU-80
Lustre-bugzilla: https://bugzilla.lustre.org/show_bug.cgi?id=4424
Signed-off-by: Kalpak Shah <kalpak.shah@sun.com>
Signed-off-by: James Simmons <uja.ornl@gmail.com>
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
  • Loading branch information
Andreas Dilger authored and Theodore Ts'o committed Jun 22, 2017
1 parent e08ac99 commit e50e512
Show file tree
Hide file tree
Showing 6 changed files with 604 additions and 56 deletions.
12 changes: 12 additions & 0 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -1797,6 +1797,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_EXTENTS| \
EXT4_FEATURE_INCOMPAT_64BIT| \
EXT4_FEATURE_INCOMPAT_FLEX_BG| \
EXT4_FEATURE_INCOMPAT_EA_INODE| \
EXT4_FEATURE_INCOMPAT_MMP | \
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
Expand Down Expand Up @@ -2230,6 +2231,12 @@ struct mmpd_data {
*/
#define EXT4_MMP_MAX_CHECK_INTERVAL 300UL

/*
* Maximum size of xattr attributes for FEATURE_INCOMPAT_EA_INODE 1Mb
* This limit is arbitrary, but is reasonable for the xattr API.
*/
#define EXT4_XATTR_MAX_LARGE_EA_SIZE (1024 * 1024)

/*
* Function prototypes
*/
Expand All @@ -2242,6 +2249,10 @@ struct mmpd_data {
# define ATTRIB_NORET __attribute__((noreturn))
# define NORET_AND noreturn,

struct ext4_xattr_ino_array {
unsigned int xia_count; /* # of used item in the array */
unsigned int xia_inodes[0];
};
/* bitmap.c */
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
Expand Down Expand Up @@ -2489,6 +2500,7 @@ extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int chunk);
extern int ext4_writepage_trans_blocks(struct inode *);
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
Expand Down
1 change: 0 additions & 1 deletion fs/ext4/ialloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,6 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
* as writing the quota to disk may need the lock as well.
*/
dquot_initialize(inode);
ext4_xattr_delete_inode(handle, inode);
dquot_free_inode(inode);
dquot_drop(inode);

Expand Down
2 changes: 1 addition & 1 deletion fs/ext4/inline.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode,

/* Compute min_offs. */
for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
if (!entry->e_value_block && entry->e_value_size) {
if (!entry->e_value_inum && entry->e_value_size) {
size_t offs = le16_to_cpu(entry->e_value_offs);
if (offs < min_offs)
min_offs = offs;
Expand Down
49 changes: 40 additions & 9 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,6 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents);

/*
* Test whether an inode is a fast symlink.
Expand Down Expand Up @@ -189,6 +187,8 @@ void ext4_evict_inode(struct inode *inode)
{
handle_t *handle;
int err;
int extra_credits = 3;
struct ext4_xattr_ino_array *lea_ino_array = NULL;

trace_ext4_evict_inode(inode);

Expand Down Expand Up @@ -238,8 +238,8 @@ void ext4_evict_inode(struct inode *inode)
* protection against it
*/
sb_start_intwrite(inode->i_sb);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
ext4_blocks_for_truncate(inode)+3);

handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/*
Expand All @@ -251,9 +251,36 @@ void ext4_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
goto no_delete;
}

if (IS_SYNC(inode))
ext4_handle_sync(handle);

/*
* Delete xattr inode before deleting the main inode.
*/
err = ext4_xattr_delete_inode(handle, inode, &lea_ino_array);
if (err) {
ext4_warning(inode->i_sb,
"couldn't delete inode's xattr (err %d)", err);
goto stop_handle;
}

if (!IS_NOQUOTA(inode))
extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);

if (!ext4_handle_has_enough_credits(handle,
ext4_blocks_for_truncate(inode) + extra_credits)) {
err = ext4_journal_extend(handle,
ext4_blocks_for_truncate(inode) + extra_credits);
if (err > 0)
err = ext4_journal_restart(handle,
ext4_blocks_for_truncate(inode) + extra_credits);
if (err != 0) {
ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err);
goto stop_handle;
}
}

inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
Expand All @@ -277,10 +304,10 @@ void ext4_evict_inode(struct inode *inode)
* enough credits left in the handle to remove the inode from
* the orphan list and set the dtime field.
*/
if (!ext4_handle_has_enough_credits(handle, 3)) {
err = ext4_journal_extend(handle, 3);
if (!ext4_handle_has_enough_credits(handle, extra_credits)) {
err = ext4_journal_extend(handle, extra_credits);
if (err > 0)
err = ext4_journal_restart(handle, 3);
err = ext4_journal_restart(handle, extra_credits);
if (err != 0) {
ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err);
Expand Down Expand Up @@ -315,8 +342,12 @@ void ext4_evict_inode(struct inode *inode)
ext4_clear_inode(inode);
else
ext4_free_inode(handle, inode);

ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb);

if (lea_ino_array != NULL)
ext4_xattr_inode_array_free(inode, lea_ino_array);
return;
no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
Expand Down Expand Up @@ -5504,7 +5535,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
*
* Also account for superblock, inode, quota and xattr blocks
*/
static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
Expand Down
Loading

0 comments on commit e50e512

Please sign in to comment.