Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
mariux64
/
linux
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Issues
2
Pull requests
0
Actions
Projects
0
Wiki
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Wiki
Security
Insights
Files
3b421b8
Documentation
arch
block
crypto
drivers
firmware
fs
9p
adfs
affs
afs
autofs4
befs
bfs
btrfs
cachefiles
ceph
cifs
coda
configfs
cramfs
debugfs
devpts
dlm
ecryptfs
efivarfs
efs
exofs
exportfs
ext2
ext3
ext4
Kconfig
Makefile
acl.c
acl.h
balloc.c
bitmap.c
block_validity.c
dir.c
ext4.h
ext4_extents.h
ext4_jbd2.c
ext4_jbd2.h
extents.c
extents_status.c
extents_status.h
file.c
fsync.c
hash.c
ialloc.c
indirect.c
inline.c
inode.c
ioctl.c
mballoc.c
mballoc.h
migrate.c
mmp.c
move_extent.c
namei.c
page-io.c
resize.c
super.c
symlink.c
truncate.h
xattr.c
xattr.h
xattr_security.c
xattr_trusted.c
xattr_user.c
f2fs
fat
freevxfs
fscache
fuse
gfs2
hfs
hfsplus
hostfs
hpfs
hppfs
hugetlbfs
isofs
jbd
jbd2
jffs2
jfs
kernfs
lockd
logfs
minix
ncpfs
nfs
nfs_common
nfsd
nilfs2
nls
notify
ntfs
ocfs2
omfs
openpromfs
overlayfs
proc
pstore
qnx4
qnx6
quota
ramfs
reiserfs
romfs
squashfs
sysfs
sysv
ubifs
udf
ufs
xfs
Kconfig
Kconfig.binfmt
Makefile
aio.c
anon_inodes.c
attr.c
bad_inode.c
binfmt_aout.c
binfmt_elf.c
binfmt_elf_fdpic.c
binfmt_em86.c
binfmt_flat.c
binfmt_misc.c
binfmt_script.c
binfmt_som.c
block_dev.c
buffer.c
char_dev.c
compat.c
compat_binfmt_elf.c
compat_ioctl.c
coredump.c
dcache.c
dcookies.c
direct-io.c
drop_caches.c
eventfd.c
eventpoll.c
exec.c
fcntl.c
fhandle.c
file.c
file_table.c
filesystems.c
fs-writeback.c
fs_pin.c
fs_struct.c
inode.c
internal.h
ioctl.c
libfs.c
locks.c
mbcache.c
mount.h
mpage.c
namei.c
namespace.c
no-block.c
nsfs.c
open.c
pipe.c
pnode.c
pnode.h
posix_acl.c
proc_namespace.c
read_write.c
readdir.c
select.c
seq_file.c
signalfd.c
splice.c
stack.c
stat.c
statfs.c
super.c
sync.c
timerfd.c
utimes.c
xattr.c
include
init
ipc
kernel
lib
mm
net
samples
scripts
security
sound
tools
usr
virt
.gitignore
.mailmap
COPYING
CREDITS
Kbuild
Kconfig
MAINTAINERS
Makefile
README
REPORTING-BUGS
Breadcrumbs
linux
/
fs
/
ext4
/
file.c
Copy path
Blame
Blame
Latest commit
Theodore Ts'o
Revert "ext4: fix suboptimal seek_{data,hole} extents traversial"
Jan 2, 2015
ad7fefb
·
Jan 2, 2015
History
History
614 lines (538 loc) · 15.3 KB
Breadcrumbs
linux
/
fs
/
ext4
/
file.c
Top
File metadata and controls
Code
Blame
614 lines (538 loc) · 15.3 KB
Raw
/* * linux/fs/ext4/file.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * * from * * linux/fs/minix/file.c * * Copyright (C) 1991, 1992 Linus Torvalds * * ext4 fs regular file handling primitives * * 64-bit file support on 64-bit platforms by Jakub Jelinek * (jj@sunsite.ms.mff.cuni.cz) */ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> #include <linux/mount.h> #include <linux/path.h> #include <linux/aio.h> #include <linux/quotaops.h> #include <linux/pagevec.h> #include "ext4.h" #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" /* * Called when an inode is released. Note that this is different * from ext4_file_open: open gets called at every open, but release * gets called only when /all/ the files are closed. */ static int ext4_release_file(struct inode *inode, struct file *filp) { if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { ext4_alloc_da_blocks(inode); ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); } /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1) && !EXT4_I(inode)->i_reserved_data_blocks) { down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) ext4_htree_free_dir_info(filp->private_data); return 0; } static void ext4_unwritten_wait(struct inode *inode) { wait_queue_head_t *wq = ext4_ioend_wq(inode); wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0)); } /* * This tests whether the IO in question is block-aligned or not. * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they * are converted to written only after the IO is complete. Until they are * mapped, these blocks appear as holes, so dio_zero_block() will assume that * it needs to zero out portions of the start and/or end block. If 2 AIO * threads are at work on the same unwritten block, they must be synchronized * or one thread will zero the other's data, causing corruption. */ static int ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) { struct super_block *sb = inode->i_sb; int blockmask = sb->s_blocksize - 1; if (pos >= i_size_read(inode)) return 0; if ((pos | iov_iter_alignment(from)) & blockmask) return 1; return 0; } static ssize_t ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(iocb->ki_filp); struct mutex *aio_mutex = NULL; struct blk_plug plug; int o_direct = file->f_flags & O_DIRECT; int overwrite = 0; size_t length = iov_iter_count(from); ssize_t ret; loff_t pos = iocb->ki_pos; /* * Unaligned direct AIO must be serialized; see comment above * In the case of O_APPEND, assume that we must always serialize */ if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb) && (file->f_flags & O_APPEND || ext4_unaligned_aio(inode, from, pos))) { aio_mutex = ext4_aio_mutex(inode); mutex_lock(aio_mutex); ext4_unwritten_wait(inode); } mutex_lock(&inode->i_mutex); if (file->f_flags & O_APPEND) iocb->ki_pos = pos = i_size_read(inode); /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. */ if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); if ((pos > sbi->s_bitmap_maxbytes) || (pos == sbi->s_bitmap_maxbytes && length > 0)) { mutex_unlock(&inode->i_mutex); ret = -EFBIG; goto errout; } if (pos + length > sbi->s_bitmap_maxbytes) iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); } iocb->private = &overwrite; if (o_direct) { blk_start_plug(&plug); /* check whether we do a DIO overwrite or not */ if (ext4_should_dioread_nolock(inode) && !aio_mutex && !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { struct ext4_map_blocks map; unsigned int blkbits = inode->i_blkbits; int err, len; map.m_lblk = pos >> blkbits; map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) - map.m_lblk; len = map.m_len; err = ext4_map_blocks(NULL, inode, &map, 0); /* * 'err==len' means that all of blocks has * been preallocated no matter they are * initialized or not. For excluding * unwritten extents, we need to check * m_flags. There are two conditions that * indicate for initialized extents. 1) If we * hit extent cache, EXT4_MAP_MAPPED flag is * returned; 2) If we do a real lookup, * non-flags are returned. So we should check * these two conditions. */ if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) overwrite = 1; } } ret = __generic_file_write_iter(iocb, from); mutex_unlock(&inode->i_mutex); if (ret > 0) { ssize_t err; err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } if (o_direct) blk_finish_plug(&plug); errout: if (aio_mutex) mutex_unlock(aio_mutex); return ret; } static const struct vm_operations_struct ext4_file_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = ext4_page_mkwrite, .remap_pages = generic_file_remap_pages, }; static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &ext4_file_vm_ops; return 0; } static int ext4_file_open(struct inode * inode, struct file * filp) { struct super_block *sb = inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct vfsmount *mnt = filp->f_path.mnt; struct path path; char buf[64], *cp; if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) && !(sb->s_flags & MS_RDONLY))) { sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED; /* * Sample where the filesystem has been mounted and * store it in the superblock for sysadmin convenience * when trying to sort through large numbers of block * devices or filesystem images. */ memset(buf, 0, sizeof(buf)); path.mnt = mnt; path.dentry = mnt->mnt_root; cp = d_path(&path, buf, sizeof(buf)); if (!IS_ERR(cp)) { handle_t *handle; int err; handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); if (IS_ERR(handle)) return PTR_ERR(handle); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) { ext4_journal_stop(handle); return err; } strlcpy(sbi->s_es->s_last_mounted, cp, sizeof(sbi->s_es->s_last_mounted)); ext4_handle_dirty_super(handle, sb); ext4_journal_stop(handle); } } /* * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present */ if (filp->f_mode & FMODE_WRITE) { int ret = ext4_inode_attach_jinode(inode); if (ret < 0) return ret; } return dquot_file_open(inode, filp); } /* * Here we use ext4_map_blocks() to get a block mapping for a extent-based * file rather than ext4_ext_walk_space() because we can introduce * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same * function. When extent status tree has been fully implemented, it will * track all extent status for a file and we can directly use it to * retrieve the offset for SEEK_DATA/SEEK_HOLE. */ /* * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to * lookup page cache to check whether or not there has some data between * [startoff, endoff] because, if this range contains an unwritten extent, * we determine this extent as a data or a hole according to whether the * page cache has data or not. */ static int ext4_find_unwritten_pgoff(struct inode *inode, int whence, struct ext4_map_blocks *map, loff_t *offset) { struct pagevec pvec; unsigned int blkbits; pgoff_t index; pgoff_t end; loff_t endoff; loff_t startoff; loff_t lastoff; int found = 0; blkbits = inode->i_sb->s_blocksize_bits; startoff = *offset; lastoff = startoff; endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; index = startoff >> PAGE_CACHE_SHIFT; end = endoff >> PAGE_CACHE_SHIFT; pagevec_init(&pvec, 0); do { int i, num; unsigned long nr_pages; num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); if (nr_pages == 0) { if (whence == SEEK_DATA) break; BUG_ON(whence != SEEK_HOLE); /* * If this is the first time to go into the loop and * offset is not beyond the end offset, it will be a * hole at this offset */ if (lastoff == startoff || lastoff < endoff) found = 1; break; } /* * If this is the first time to go into the loop and * offset is smaller than the first page offset, it will be a * hole at this offset. */ if (lastoff == startoff && whence == SEEK_HOLE && lastoff < page_offset(pvec.pages[0])) { found = 1; break; } for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; struct buffer_head *bh, *head; /* * If the current offset is not beyond the end of given * range, it will be a hole. */ if (lastoff < endoff && whence == SEEK_HOLE && page->index > end) { found = 1; *offset = lastoff; goto out; } lock_page(page); if (unlikely(page->mapping != inode->i_mapping)) { unlock_page(page); continue; } if (!page_has_buffers(page)) { unlock_page(page); continue; } if (page_has_buffers(page)) { lastoff = page_offset(page); bh = head = page_buffers(page); do { if (buffer_uptodate(bh) || buffer_unwritten(bh)) { if (whence == SEEK_DATA) found = 1; } else { if (whence == SEEK_HOLE) found = 1; } if (found) { *offset = max_t(loff_t, startoff, lastoff); unlock_page(page); goto out; } lastoff += bh->b_size; bh = bh->b_this_page; } while (bh != head); } lastoff = page_offset(page) + PAGE_SIZE; unlock_page(page); } /* * The no. of pages is less than our desired, that would be a * hole in there. */ if (nr_pages < num && whence == SEEK_HOLE) { found = 1; *offset = lastoff; break; } index = pvec.pages[i - 1]->index + 1; pagevec_release(&pvec); } while (index <= end); out: pagevec_release(&pvec); return found; } /* * ext4_seek_data() retrieves the offset for SEEK_DATA. */ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) { struct inode *inode = file->f_mapping->host; struct ext4_map_blocks map; struct extent_status es; ext4_lblk_t start, last, end; loff_t dataoff, isize; int blkbits; int ret = 0; mutex_lock(&inode->i_mutex); isize = i_size_read(inode); if (offset >= isize) { mutex_unlock(&inode->i_mutex); return -ENXIO; } blkbits = inode->i_sb->s_blocksize_bits; start = offset >> blkbits; last = start; end = isize >> blkbits; dataoff = offset; do { map.m_lblk = last; map.m_len = end - last + 1; ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { if (last != start) dataoff = (loff_t)last << blkbits; break; } /* * If there is a delay extent at this offset, * it will be as a data. */ ext4_es_find_delayed_extent_range(inode, last, last, &es); if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { if (last != start) dataoff = (loff_t)last << blkbits; break; } /* * If there is a unwritten extent at this offset, * it will be as a data or a hole according to page * cache that has data or not. */ if (map.m_flags & EXT4_MAP_UNWRITTEN) { int unwritten; unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, &map, &dataoff); if (unwritten) break; } last++; dataoff = (loff_t)last << blkbits; } while (last <= end); mutex_unlock(&inode->i_mutex); if (dataoff > isize) return -ENXIO; return vfs_setpos(file, dataoff, maxsize); } /* * ext4_seek_hole() retrieves the offset for SEEK_HOLE. */ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) { struct inode *inode = file->f_mapping->host; struct ext4_map_blocks map; struct extent_status es; ext4_lblk_t start, last, end; loff_t holeoff, isize; int blkbits; int ret = 0; mutex_lock(&inode->i_mutex); isize = i_size_read(inode); if (offset >= isize) { mutex_unlock(&inode->i_mutex); return -ENXIO; } blkbits = inode->i_sb->s_blocksize_bits; start = offset >> blkbits; last = start; end = isize >> blkbits; holeoff = offset; do { map.m_lblk = last; map.m_len = end - last + 1; ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { last += ret; holeoff = (loff_t)last << blkbits; continue; } /* * If there is a delay extent at this offset, * we will skip this extent. */ ext4_es_find_delayed_extent_range(inode, last, last, &es); if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { last = es.es_lblk + es.es_len; holeoff = (loff_t)last << blkbits; continue; } /* * If there is a unwritten extent at this offset, * it will be as a data or a hole according to page * cache that has data or not. */ if (map.m_flags & EXT4_MAP_UNWRITTEN) { int unwritten; unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, &map, &holeoff); if (!unwritten) { last += ret; holeoff = (loff_t)last << blkbits; continue; } } /* find a hole */ break; } while (last <= end); mutex_unlock(&inode->i_mutex); if (holeoff > isize) holeoff = isize; return vfs_setpos(file, holeoff, maxsize); } /* * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values * by calling generic_file_llseek_size() with the appropriate maxbytes * value for each. */ loff_t ext4_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t maxbytes; if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; else maxbytes = inode->i_sb->s_maxbytes; switch (whence) { case SEEK_SET: case SEEK_CUR: case SEEK_END: return generic_file_llseek_size(file, offset, whence, maxbytes, i_size_read(inode)); case SEEK_DATA: return ext4_seek_data(file, offset, maxbytes); case SEEK_HOLE: return ext4_seek_hole(file, offset, maxbytes); } return -EINVAL; } const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, .read = new_sync_read, .write = new_sync_write, .read_iter = generic_file_read_iter, .write_iter = ext4_file_write_iter, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, #endif .mmap = ext4_file_mmap, .open = ext4_file_open, .release = ext4_release_file, .fsync = ext4_sync_file, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, }; const struct inode_operations ext4_file_inode_operations = { .setattr = ext4_setattr, .getattr = ext4_getattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, .removexattr = generic_removexattr, .get_acl = ext4_get_acl, .set_acl = ext4_set_acl, .fiemap = ext4_fiemap, };
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
You can’t perform that action at this time.