summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_reflink.c66
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_super.c21
-rw-r--r--fs/xfs/xfs_trace.h1
4 files changed, 81 insertions, 9 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 8073d2a42143..c0a8383681fd 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1350,6 +1350,13 @@ xfs_reflink_remap_blocks(
len = min_t(xfs_filblks_t, XFS_B_TO_FSB(mp, remap_len),
XFS_MAX_FILEOFF);
+ /*
+ * Make sure the end is aligned with a rt extent (if desired), since
+ * the end of the range could be EOF.
+ */
+ if (xfs_inode_has_bigrtextents(dest))
+ len = roundup_64(len, mp->m_sb.sb_rextsize);
+
trace_xfs_reflink_remap_blocks(src, srcoff, len, dest, destoff);
while (len > 0) {
@@ -1423,6 +1430,50 @@ xfs_reflink_zero_posteof(
&xfs_buffered_write_iomap_ops);
}
+/* Adjust the length of the remap operation to end on a rt extent boundary. */
+STATIC int
+xfs_reflink_remap_adjust_rtlen(
+ struct xfs_inode *src,
+ loff_t pos_in,
+ struct xfs_inode *dest,
+ loff_t pos_out,
+ loff_t *len,
+ unsigned int remap_flags)
+{
+ struct xfs_mount *mp = src->i_mount;
+ uint32_t mod;
+
+ div_u64_rem(*len, XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize), &mod);
+
+ /*
+ * We previously checked the rtextent alignment of both offsets, so we
+ * now have to check the alignment of the length. The VFS remap prep
+ * function can change the length on us, so we can only make length
+ * adjustments after that. If the length is aligned to an rtextent,
+ * we're trivially good to go.
+ *
+ * Otherwise, the length is not aligned to an rt extent. If the source
+ * file's range ends at EOF, the VFS ensured that the dest file's range
+ * also ends at EOF. The actual remap function will round the (byte)
+ * length up to the nearest rtextent unit, so we're ok here too.
+ */
+ if (mod == 0 || pos_in + *len == i_size_read(VFS_I(src)))
+ return 0;
+
+ /*
+ * Otherwise, the only thing we can do is round the request length down
+ * to an rt extent boundary. If the caller doesn't allow that, we are
+ * finished.
+ */
+ if (!(remap_flags & REMAP_FILE_CAN_SHORTEN))
+ return -EINVAL;
+
+ /* Back off by a single extent. */
+ (*len) -= mod;
+ trace_xfs_reflink_remap_adjust_rtlen(src, pos_in, *len, dest, pos_out);
+ return 0;
+}
+
/*
* Prepare two files for range cloning. Upon a successful return both inodes
* will have the iolock and mmaplock held, the page cache of the out file will
@@ -1482,11 +1533,22 @@ xfs_reflink_remap_prep(
if (IS_DAX(inode_in) || IS_DAX(inode_out))
goto out_unlock;
- ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
- len, remap_flags);
+ ASSERT(is_power_of_2(xfs_inode_alloc_unitsize(dest)));
+
+ ret = __generic_remap_file_range_prep(file_in, pos_in, file_out,
+ pos_out, len, remap_flags,
+ xfs_inode_alloc_unitsize(dest));
if (ret || *len == 0)
goto out_unlock;
+ /* Make sure the end is aligned with a rt extent. */
+ if (xfs_inode_has_bigrtextents(src)) {
+ ret = xfs_reflink_remap_adjust_rtlen(src, pos_in, dest,
+ pos_out, len, remap_flags);
+ if (ret || *len == 0)
+ goto out_unlock;
+ }
+
/* Attach dquots to dest inode before changing block map */
ret = xfs_qm_dqattach(dest);
if (ret)
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index efd54174a99e..8b8f83118ca2 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1063,7 +1063,7 @@ xfs_growfs_rt(
if (!xfs_has_metadir(mp) && (xfs_has_rmapbt(mp) || xfs_has_reflink(mp)))
return -EOPNOTSUPP;
- if (xfs_has_reflink(mp) && in->extsize != 1)
+ if (xfs_has_reflink(mp) && !is_power_of_2(mp->m_sb.sb_rextsize))
return -EOPNOTSUPP;
nrblocks = in->newblocks;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 6d3f8df845f4..0c825fe1f9bb 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1683,13 +1683,23 @@ xfs_fs_fill_super(
if (xfs_has_reflink(mp)) {
/*
- * Reflink doesn't support rt extent sizes larger than a single
- * block because we would have to perform unshare-around for
- * rtext-unaligned write requests.
+ * Reflink doesn't support pagecache pages that span multiple
+ * realtime extents because iomap doesn't track subpage dirty
+ * state. This means that we cannot dirty all the pages
+ * backing an rt extent without dirtying the adjoining rt
+ * extents. If those rt extents are shared and extend into
+ * other pages, this leads to crazy write amplification. The
+ * VFS remap_range checks assume power-of-two block sizes, so
+ * we don't support that either.
+ *
+ * Hence we only support rt extent sizes that are an integer
+ * power of two because we know those will align with the page
+ * size.
*/
- if (xfs_has_realtime(mp) && mp->m_sb.sb_rextsize != 1) {
+ if (xfs_has_realtime(mp) &&
+ !is_power_of_2(mp->m_sb.sb_rextsize)) {
xfs_alert(mp,
- "reflink not compatible with realtime extent size %u!",
+ "reflink not compatible with non-power-of-2 realtime extent size %u!",
mp->m_sb.sb_rextsize);
error = -EINVAL;
goto out_filestream_unmount;
@@ -1708,7 +1718,6 @@ xfs_fs_fill_super(
}
}
-
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 1cc5c4380f9b..7abc426a46db 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3635,6 +3635,7 @@ TRACE_EVENT(xfs_reflink_remap_blocks,
__entry->dest_lblk)
);
DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range);
+DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_adjust_rtlen);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error);