summaryrefslogtreecommitdiff
path: root/fs/xfs/libxfs/xfs_inode_buf.c
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2022-04-21 16:46:17 +1000
committerDave Chinner <david@fromorbit.com>2022-04-21 16:46:17 +1000
commita44a027a8b2a20fec30e0e9c99b0eb41c03e7420 (patch)
tree2cce770cced23924fd30b40601b52ffb609baab5 /fs/xfs/libxfs/xfs_inode_buf.c
parent463260d7670566c357dfa2c38bc3124c98b646bc (diff)
parent973ac0eb3a7dfedecd385bd2b48b12e62a0492f2 (diff)
Merge tag 'large-extent-counters-v9' of https://github.com/chandanr/linux into xfs-5.19-for-next
xfs: Large extent counters The commit xfs: fix inode fork extent count overflow (3f8a4f1d876d3e3e49e50b0396eaffcc4ba71b08) mentions that 10 billion data fork extents should be possible to create. However the corresponding on-disk field has a signed 32-bit type. Hence this patchset extends the per-inode data fork extent counter to 64 bits (out of which 48 bits are used to store the extent count). Also, XFS has an attribute fork extent counter which is 16 bits wide. A workload that, 1. Creates 1 million 255-byte sized xattrs, 2. Deletes 50% of these xattrs in an alternating manner, 3. Tries to insert 400,000 new 255-byte sized xattrs causes the xattr extent counter to overflow. Dave tells me that there are instances where a single file has more than 100 million hardlinks. With parent pointers being stored in xattrs, we will overflow the signed 16-bits wide attribute extent counter when large number of hardlinks are created. Hence this patchset extends the on-disk field to 32-bits. The following changes are made to accomplish this, 1. A 64-bit inode field is carved out of existing di_pad and di_flushiter fields to hold the 64-bit data fork extent counter. 2. The existing 32-bit inode data fork extent counter will be used to hold the attribute fork extent counter. 3. A new incompat superblock flag to prevent older kernels from mounting the filesystem. Signed-off-by: Chandan Babu R <chandan.babu@oracle.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/libxfs/xfs_inode_buf.c')
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c83
1 files changed, 67 insertions, 16 deletions
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index cae9708c8587..74b82ec80f8e 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -279,6 +279,25 @@ xfs_inode_to_disk_ts(
return ts;
}
+static inline void
+xfs_inode_to_disk_iext_counters(
+ struct xfs_inode *ip,
+ struct xfs_dinode *to)
+{
+ if (xfs_inode_has_large_extent_counts(ip)) {
+ to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df));
+ to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(ip->i_afp));
+ /*
+ * We might be upgrading the inode to use larger extent counters
+ * than was previously used. Hence zero the unused field.
+ */
+ to->di_nrext64_pad = cpu_to_be16(0);
+ } else {
+ to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
+ to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
+ }
+}
+
void
xfs_inode_to_disk(
struct xfs_inode *ip,
@@ -296,7 +315,6 @@ xfs_inode_to_disk(
to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);
- memset(to->di_pad, 0, sizeof(to->di_pad));
to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
@@ -307,8 +325,6 @@ xfs_inode_to_disk(
to->di_size = cpu_to_be64(ip->i_disk_size);
to->di_nblocks = cpu_to_be64(ip->i_nblocks);
to->di_extsize = cpu_to_be32(ip->i_extsize);
- to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
- to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
to->di_forkoff = ip->i_forkoff;
to->di_aformat = xfs_ifork_format(ip->i_afp);
to->di_flags = cpu_to_be16(ip->i_diflags);
@@ -323,11 +339,14 @@ xfs_inode_to_disk(
to->di_lsn = cpu_to_be64(lsn);
memset(to->di_pad2, 0, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
- to->di_flushiter = 0;
+ to->di_v3_pad = 0;
} else {
to->di_version = 2;
to->di_flushiter = cpu_to_be16(ip->i_flushiter);
+ memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
}
+
+ xfs_inode_to_disk_iext_counters(ip, to);
}
static xfs_failaddr_t
@@ -336,7 +355,10 @@ xfs_dinode_verify_fork(
struct xfs_mount *mp,
int whichfork)
{
- uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
+ xfs_extnum_t di_nextents;
+ xfs_extnum_t max_extents;
+
+ di_nextents = xfs_dfork_nextents(dip, whichfork);
switch (XFS_DFORK_FORMAT(dip, whichfork)) {
case XFS_DINODE_FMT_LOCAL:
@@ -358,12 +380,11 @@ xfs_dinode_verify_fork(
return __this_address;
break;
case XFS_DINODE_FMT_BTREE:
- if (whichfork == XFS_ATTR_FORK) {
- if (di_nextents > MAXAEXTNUM)
- return __this_address;
- } else if (di_nextents > MAXEXTNUM) {
+ max_extents = xfs_iext_max_nextents(
+ xfs_dinode_has_large_extent_counts(dip),
+ whichfork);
+ if (di_nextents > max_extents)
return __this_address;
- }
break;
default:
return __this_address;
@@ -396,6 +417,24 @@ xfs_dinode_verify_forkoff(
return NULL;
}
+static xfs_failaddr_t
+xfs_dinode_verify_nrext64(
+ struct xfs_mount *mp,
+ struct xfs_dinode *dip)
+{
+ if (xfs_dinode_has_large_extent_counts(dip)) {
+ if (!xfs_has_large_extent_counts(mp))
+ return __this_address;
+ if (dip->di_nrext64_pad != 0)
+ return __this_address;
+ } else if (dip->di_version >= 3) {
+ if (dip->di_v3_pad != 0)
+ return __this_address;
+ }
+
+ return NULL;
+}
+
xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
@@ -407,6 +446,9 @@ xfs_dinode_verify(
uint16_t flags;
uint64_t flags2;
uint64_t di_size;
+ xfs_extnum_t nextents;
+ xfs_extnum_t naextents;
+ xfs_filblks_t nblocks;
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
return __this_address;
@@ -437,10 +479,19 @@ xfs_dinode_verify(
if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
return __this_address;
+ fa = xfs_dinode_verify_nrext64(mp, dip);
+ if (fa)
+ return fa;
+
+ nextents = xfs_dfork_data_extents(dip);
+ naextents = xfs_dfork_attr_extents(dip);
+ nblocks = be64_to_cpu(dip->di_nblocks);
+
/* Fork checks carried over from xfs_iformat_fork */
- if (mode &&
- be32_to_cpu(dip->di_nextents) + be16_to_cpu(dip->di_anextents) >
- be64_to_cpu(dip->di_nblocks))
+ if (mode && nextents + naextents > nblocks)
+ return __this_address;
+
+ if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
return __this_address;
if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize)
@@ -497,7 +548,7 @@ xfs_dinode_verify(
default:
return __this_address;
}
- if (dip->di_anextents)
+ if (naextents)
return __this_address;
}
@@ -639,7 +690,7 @@ xfs_inode_validate_extsize(
if (extsize_bytes % blocksize_bytes)
return __this_address;
- if (extsize > MAXEXTLEN)
+ if (extsize > XFS_MAX_BMBT_EXTLEN)
return __this_address;
if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2)
@@ -696,7 +747,7 @@ xfs_inode_validate_cowextsize(
if (cowextsize_bytes % mp->m_sb.sb_blocksize)
return __this_address;
- if (cowextsize > MAXEXTLEN)
+ if (cowextsize > XFS_MAX_BMBT_EXTLEN)
return __this_address;
if (cowextsize > mp->m_sb.sb_agblocks / 2)