From ad56255feea91862de033fb83eba8833ff8b132d Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 11 Jan 2011 10:22:40 +1100 Subject: [PATCH] --- yaml --- r: 231261 b: refs/heads/master c: eda77982729b7170bdc9e8855f0682edf322d277 h: refs/heads/master i: 231259: 84d029ebc59c864628a004a78aba668b59fc7a6c v: v3 --- [refs] | 2 +- trunk/fs/xfs/linux-2.6/xfs_file.c | 38 +++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/[refs] b/[refs] index 6a9fb4ed139d..c77dcde448ad 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 4d8d15812fd9bc96d0da11467d23e0373feae933 +refs/heads/master: eda77982729b7170bdc9e8855f0682edf322d277 diff --git a/trunk/fs/xfs/linux-2.6/xfs_file.c b/trunk/fs/xfs/linux-2.6/xfs_file.c index 5863dd8f448c..ef51eb43e137 100644 --- a/trunk/fs/xfs/linux-2.6/xfs_file.c +++ b/trunk/fs/xfs/linux-2.6/xfs_file.c @@ -684,9 +684,24 @@ xfs_file_aio_write_checks( * xfs_file_dio_aio_write - handle direct IO writes * * Lock the inode appropriately to prepare for and issue a direct IO write. - * By spearating it from the buffered write path we remove all the tricky to + * By separating it from the buffered write path we remove all the tricky to * follow locking changes and looping. * + * If there are cached pages or we're extending the file, we need IOLOCK_EXCL + * until we're sure the bytes at the new EOF have been zeroed and/or the cached + * pages are flushed out. + * + * In most cases the direct IO writes will be done holding IOLOCK_SHARED + * allowing them to be done in parallel with reads and other direct IO writes. + * However, if the IO is not aligned to filesystem blocks, the direct IO layer + * needs to do sub-block zeroing and that requires serialisation against other + * direct IOs to the same block. In this case we need to serialise the + * submission of the unaligned IOs so that we don't get racing block zeroing in + * the dio layer. To avoid the problem with aio, we also need to wait for + * outstanding IOs to complete so that unwritten extent conversion is completed + * before we try to map the overlapping block. This is currently implemented by + * hitting it with a big hammer (i.e. xfs_ioend_wait()). + * * Returns with locks held indicated by @iolock and errors indicated by * negative return values. */ @@ -706,6 +721,7 @@ xfs_file_dio_aio_write( struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; size_t count = ocount; + int unaligned_io = 0; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; @@ -713,13 +729,10 @@ xfs_file_dio_aio_write( if ((pos & target->bt_smask) || (count & target->bt_smask)) return -XFS_ERROR(EINVAL); - /* - * For direct I/O, if there are cached pages or we're extending - * the file, we need IOLOCK_EXCL until we're sure the bytes at - * the new EOF have been zeroed and/or the cached pages are - * flushed out. - */ - if (mapping->nrpages || pos > ip->i_size) + if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) + unaligned_io = 1; + + if (unaligned_io || mapping->nrpages || pos > ip->i_size) *iolock = XFS_IOLOCK_EXCL; else *iolock = XFS_IOLOCK_SHARED; @@ -737,8 +750,13 @@ xfs_file_dio_aio_write( return ret; } - if (*iolock == XFS_IOLOCK_EXCL) { - /* demote the lock now the cached pages are gone */ + /* + * If we are doing unaligned IO, wait for all other IO to drain, + * otherwise demote the lock if we had to flush cached pages + */ + if (unaligned_io) + xfs_ioend_wait(ip); + else if (*iolock == XFS_IOLOCK_EXCL) { xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); *iolock = XFS_IOLOCK_SHARED; }