Skip to content

Commit

Permalink
direct-io: inline the complete submission path
Browse files Browse the repository at this point in the history
Add inlines to all the submission path functions. While this increases
code size it also gives gcc a lot of optimization opportunities
in this critical hotpath.

In particular -- together with some other changes -- this
allows gcc to get rid of the unnecessary clearing of
sdio at the beginning and optimize the messy parameter passing.
Any non inlining of a function which takes a sdio parameter
would break this optimization because they cannot be done if the
address of a structure is taken.

Note that benefits are only seen with CONFIG_OPTIMIZE_INLINING
and CONFIG_CC_OPTIMIZE_FOR_SIZE both set to off.

This gives about 2.2% improvement on a large database benchmark
with a high IOPS rate.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
  • Loading branch information
Andi Kleen authored and root committed Oct 28, 2011
1 parent 1877264 commit ba253fb
Showing 1 changed file with 21 additions and 15 deletions.
36 changes: 21 additions & 15 deletions fs/direct-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
/*
* Go grab and pin some userspace pages. Typically we'll get 64 at a time.
*/
static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
{
int ret;
int nr_pages;
Expand Down Expand Up @@ -245,7 +245,8 @@ static int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
* decent number of pages, less frequently. To provide nicer use of the
* L1 cache.
*/
static struct page *dio_get_page(struct dio *dio, struct dio_submit *sdio)
static inline struct page *dio_get_page(struct dio *dio,
struct dio_submit *sdio)
{
if (dio_pages_present(sdio) == 0) {
int ret;
Expand Down Expand Up @@ -376,7 +377,7 @@ void dio_end_io(struct bio *bio, int error)
}
EXPORT_SYMBOL_GPL(dio_end_io);

static void
static inline void
dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
struct block_device *bdev,
sector_t first_sector, int nr_vecs)
Expand Down Expand Up @@ -407,7 +408,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
*
* bios hold a dio reference between submit_bio and ->end_io.
*/
static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
{
struct bio *bio = sdio->bio;
unsigned long flags;
Expand Down Expand Up @@ -435,7 +436,7 @@ static void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
/*
* Release any resources in case of a failure
*/
static void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
{
while (dio_pages_present(sdio))
page_cache_release(dio_get_page(dio, sdio));
Expand Down Expand Up @@ -528,7 +529,7 @@ static void dio_await_completion(struct dio *dio)
*
* This also helps to limit the peak amount of pinned userspace memory.
*/
static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
static inline int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
{
int ret = 0;

Expand Down Expand Up @@ -631,8 +632,8 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
/*
* There is no bio. Make one now.
*/
static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
sector_t start_sector, struct buffer_head *map_bh)
static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
sector_t start_sector, struct buffer_head *map_bh)
{
sector_t sector;
int ret, nr_pages;
Expand All @@ -657,7 +658,7 @@ static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
*
* Return zero on success. Non-zero means the caller needs to start a new BIO.
*/
static int dio_bio_add_page(struct dio_submit *sdio)
static inline int dio_bio_add_page(struct dio_submit *sdio)
{
int ret;

Expand Down Expand Up @@ -689,8 +690,8 @@ static int dio_bio_add_page(struct dio_submit *sdio)
* The caller of this function is responsible for removing cur_page from the
* dio, and for dropping the refcount which came from that presence.
*/
static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
struct buffer_head *map_bh)
static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
struct buffer_head *map_bh)
{
int ret = 0;

Expand Down Expand Up @@ -759,7 +760,7 @@ static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
* If that doesn't work out then we put the old page into the bio and add this
* page to the dio instead.
*/
static int
static inline int
submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
unsigned offset, unsigned len, sector_t blocknr,
struct buffer_head *map_bh)
Expand Down Expand Up @@ -842,8 +843,8 @@ static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
* `end' is zero if we're doing the start of the IO, 1 at the end of the
* IO.
*/
static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end,
struct buffer_head *map_bh)
static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
int end, struct buffer_head *map_bh)
{
unsigned dio_blocks_per_fs_block;
unsigned this_chunk_blocks; /* In dio_blocks */
Expand Down Expand Up @@ -1042,7 +1043,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
return ret;
}

static ssize_t
static inline ssize_t
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
const struct iovec *iov, loff_t offset, unsigned long nr_segs,
unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
Expand Down Expand Up @@ -1216,6 +1217,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
* expected that filesystem provide exclusion between new direct I/O
* and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
* but other filesystems need to take care of this on their own.
*
* NOTE: if you pass "sdio" to anything by pointer make sure that function
* is always inlined. Otherwise gcc is unable to split the structure into
* individual fields and will generate much worse code. This is important
* for the whole file.
*/
ssize_t
__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
Expand Down

0 comments on commit ba253fb

Please sign in to comment.