summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2022-07-14 11:16:17 -0700
committerDarrick J. Wong <djwong@kernel.org>2022-10-14 14:17:27 -0700
commit21a7ebc29db770dc8446a10d5ac93b5253bccc4d (patch)
tree1902ffd93134466ff633e65ab0a186eab37a5d89
parent83e52ec0caf61182e6a89bcad12f69a96e17c062 (diff)
xfs: support realtime reflink with an extent size that isn't a power of 2realtime-reflink-extsize_2022-10-14
Add the necessary alignment checking code to the reflink remap code to ensure that remap requests are aligned to rt extent boundaries if the realtime extent size isn't a power of two. The VFS helpers assume that they can use the usual (blocksize - 1) masking to avoid slow 64-bit division, but since XFS is special we won't make everyone pay that cost for our weird edge case. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/xfs_reflink.c92
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_super.c12
3 files changed, 97 insertions, 10 deletions
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 0a010870f380..437ce227368e 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1659,6 +1659,83 @@ xfs_reflink_remap_adjust_rtlen(
#endif /* CONFIG_XFS_RT */
/*
+ * Check the alignment of a remap request when the allocation unit size isn't a
+ * power of two. The VFS helpers use (fast) bitmask-based alignment checks,
+ * but here we have to use slow long division.
+ */
+static int
+xfs_reflink_remap_check_rtalign(
+ struct xfs_inode *ip_in,
+ loff_t pos_in,
+ struct xfs_inode *ip_out,
+ loff_t pos_out,
+ loff_t *req_len,
+ unsigned int remap_flags)
+{
+ struct xfs_mount *mp = ip_in->i_mount;
+ uint32_t rextbytes;
+ loff_t in_size, out_size;
+ loff_t new_length, length = *req_len;
+ loff_t blen;
+
+ rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
+ in_size = i_size_read(VFS_I(ip_in));
+ out_size = i_size_read(VFS_I(ip_out));
+
+ /* The start of both ranges must be aligned to a rt extent. */
+ if (!isaligned_64(pos_in, rextbytes) ||
+ !isaligned_64(pos_out, rextbytes))
+ return -EINVAL;
+
+ if (length == 0)
+ length = in_size - pos_in;
+
+ /*
+ * If the user wanted us to exchange up to the infile's EOF, round up
+ * to the next block boundary for this check.
+ *
+ * Otherwise, reject the range length if it's not extent aligned. We
+ * already confirmed the starting offsets' extent alignment.
+ */
+ if (pos_in + length == in_size)
+ blen = roundup_64(in_size, rextbytes) - pos_in;
+ else
+ blen = rounddown_64(length, rextbytes);
+
+ /* Don't allow overlapped remappings within the same file. */
+ if (ip_in == ip_out &&
+ pos_out + blen > pos_in &&
+ pos_in + blen > pos_out)
+ return -EINVAL;
+
+ /*
+ * Ensure that we don't exchange a partial EOF extent into the middle
+ * of another file.
+ */
+ if (isaligned_64(length, rextbytes))
+ return 0;
+
+ new_length = length;
+ if (pos_out + length < out_size)
+ new_length = rounddown_64(new_length, rextbytes);
+
+ if (new_length == length)
+ return 0;
+
+ /*
+ * Return the shortened request if the caller permits it. If the
+ * request was shortened to zero rt extents, we know that the original
+ * arguments weren't valid in the first place.
+ */
+ if ((remap_flags & REMAP_FILE_CAN_SHORTEN) && new_length > 0) {
+ *req_len = new_length;
+ return 0;
+ }
+
+ return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
+}
+
+/*
* Prepare two files for range cloning. Upon a successful return both inodes
* will have the iolock and mmaplock held, the page cache of the out file will
* be truncated, and any leases on the out file will have been broken. This
@@ -1701,6 +1778,7 @@ xfs_reflink_remap_prep(
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
const struct iomap_ops *dax_read_ops = NULL;
+ unsigned int alloc_unit = xfs_inode_alloc_unitsize(dest);
int ret;
/* Lock both files against IO */
@@ -1718,14 +1796,22 @@ xfs_reflink_remap_prep(
if (IS_DAX(inode_in) != IS_DAX(inode_out))
goto out_unlock;
- ASSERT(is_power_of_2(xfs_inode_alloc_unitsize(dest)));
+ /* Check non-power of two alignment issues, if necessary. */
+ if (XFS_IS_REALTIME_INODE(dest) && !is_power_of_2(alloc_unit)) {
+ ret = xfs_reflink_remap_check_rtalign(src, pos_in, dest,
+ pos_out, len, remap_flags);
+ if (ret)
+ goto out_unlock;
+
+ /* Do the VFS checks with the regular block alignment. */
+ alloc_unit = src->i_mount->m_sb.sb_blocksize;
+ }
if (IS_DAX(inode_in))
dax_read_ops = &xfs_read_iomap_ops;
ret = __generic_remap_file_range_prep(file_in, pos_in, file_out,
- pos_out, len, remap_flags, dax_read_ops,
- xfs_inode_alloc_unitsize(dest));
+ pos_out, len, remap_flags, dax_read_ops, alloc_unit);
if (ret || *len == 0)
goto out_unlock;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 7c1edd5c2554..5e27cb7fce36 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1312,7 +1312,8 @@ xfs_growfs_rt(
return -EOPNOTSUPP;
if (xfs_has_quota(mp))
return -EOPNOTSUPP;
- if (xfs_has_reflink(mp) && !is_power_of_2(mp->m_sb.sb_rextsize))
+ if (xfs_has_reflink(mp) && !is_power_of_2(mp->m_sb.sb_rextsize) &&
+ (XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) & ~PAGE_MASK))
return -EOPNOTSUPP;
nrblocks = in->newblocks;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ba35232dbfed..94c893d48bcd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1656,17 +1656,17 @@ xfs_fs_fill_super(
* state. This means that we cannot dirty all the pages
* backing an rt extent without dirtying the adjoining rt
* extents. If those rt extents are shared and extend into
- * other pages, this leads to crazy write amplification. The
- * VFS remap_range checks assume power-of-two block sizes.
+ * other pages, this leads to crazy write amplification.
*
* Hence we only support rt extent sizes that are an integer
- * power of two because we know those will align with the page
- * size.
+ * power of two or an integer multiple of the page size because
+ * we know those will align with the page size.
*/
if (xfs_has_realtime(mp) &&
- !is_power_of_2(mp->m_sb.sb_rextsize)) {
+ !is_power_of_2(mp->m_sb.sb_rextsize) &&
+ (XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) & ~PAGE_MASK)) {
xfs_alert(mp,
- "reflink not compatible with non-power-of-2 realtime extent size %u!",
+ "reflink not compatible with realtime extent size %u!",
mp->m_sb.sb_rextsize);
error = -EINVAL;
goto out_filestream_unmount;