From: "Darrick J. Wong" djwong@kernel.org
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9VTE3 CVE: NA
Reference: https://lore.kernel.org/all/20240326133813.3224593-1-john.g.garry@oracle.com...
--------------------------------
Add a new inode flag to require that all file data extent mappings must be aligned (both the file offset range and the allocated space itself) to the extent size hint. Having a separate COW extent size hint is no longer allowed.
The goal here is to enable sysadmins and users to mandate that all space mappings in a file must have a startoff/blockcount that are aligned to (say) a 2MB alignment and that the startblock/blockcount will follow the same alignment.
jpg: Enforce extsize is a power-of-2 for forcealign Signed-off-by: "Darrick J. Wong" djwong@kernel.org Co-developed-by: John Garry john.g.garry@oracle.com Signed-off-by: John Garry john.g.garry@oracle.com Signed-off-by: Long Li leo.lilong@huawei.com --- fs/xfs/libxfs/xfs_format.h | 6 +++++- fs/xfs/libxfs/xfs_inode_buf.c | 40 +++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_inode_buf.h | 3 +++ fs/xfs/libxfs/xfs_sb.c | 2 ++ fs/xfs/xfs_inode.c | 14 ++++++++++++ fs/xfs/xfs_inode.h | 4 ++++ fs/xfs/xfs_ioctl.c | 32 ++++++++++++++++++++++++++++ fs/xfs/xfs_mount.h | 2 ++ fs/xfs/xfs_super.c | 5 +++++ include/uapi/linux/fs.h | 2 ++ 10 files changed, 109 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 54832df8540f..fb537bb7dba5 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -353,6 +353,7 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ #define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */ #define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */ +#define XFS_SB_FEAT_RO_COMPAT_FORCEALIGN (1 << 30) /* aligned file data extents */ #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \ @@ -972,15 +973,18 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ #define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ #define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */ +/* data extent mappings for regular files must be aligned to extent size hint */ +#define XFS_DIFLAG2_FORCEALIGN_BIT 5
#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) #define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) #define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) #define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) +#define XFS_DIFLAG2_FORCEALIGN (1 << XFS_DIFLAG2_FORCEALIGN_BIT)
#define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_FORCEALIGN)
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 0970ae3fe538..dd9e5de65d52 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -574,6 +574,14 @@ xfs_dinode_verify( !xfs_has_bigtime(mp)) return __this_address;
+ if (flags2 & XFS_DIFLAG2_FORCEALIGN) { + fa = xfs_inode_validate_forcealign(mp, mode, flags, + be32_to_cpu(dip->di_extsize), + be32_to_cpu(dip->di_cowextsize)); + if (fa) + return fa; + } + return NULL; }
@@ -699,3 +707,35 @@ xfs_inode_validate_cowextsize(
return NULL; } + +/* Validate the forcealign inode flag */ +xfs_failaddr_t +xfs_inode_validate_forcealign( + struct xfs_mount *mp, + uint16_t mode, + uint16_t flags, + uint32_t extsize, + uint32_t cowextsize) +{ + /* superblock rocompat feature flag */ + if (!xfs_has_forcealign(mp)) + return __this_address; + + /* Only regular files and directories */ + if (!S_ISDIR(mode) && !S_ISREG(mode)) + return __this_address; + + /* Doesn't apply to realtime files */ + if (flags & XFS_DIFLAG_REALTIME) + return __this_address; + + /* Requires a non-zero power-of-2 extent size hint */ + if (extsize == 0 || !is_power_of_2(extsize)) + return __this_address; + + /* Requires no cow extent size hint */ + if (cowextsize != 0) + return __this_address; + + return NULL; +} diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 05c3640e135a..1bcf1415a4b5 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -62,6 +62,9 @@ xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, uint32_t cowextsize, uint16_t mode, uint16_t flags, uint64_t flags2); +xfs_failaddr_t xfs_inode_validate_forcealign(struct xfs_mount *mp, + uint16_t mode, uint16_t flags, uint32_t extsize, + uint32_t cowextsize);
static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv) { diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index c099ccf2787d..a016ba008019 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -116,6 +116,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_REFLINK; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT) features |= XFS_FEAT_INOBTCNT; + if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FORCEALIGN) + features |= XFS_FEAT_FORCEALIGN; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) features |= XFS_FEAT_FTYPE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 268bbc2d978b..d858f0b1ca02 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -643,6 +643,8 @@ _xfs_dic2xflags( flags |= FS_XFLAG_DAX; if (di_flags2 & XFS_DIFLAG2_COWEXTSIZE) flags |= FS_XFLAG_COWEXTSIZE; + if (di_flags2 & XFS_DIFLAG2_FORCEALIGN) + flags |= FS_XFLAG_FORCEALIGN; }
if (has_attr) @@ -759,6 +761,18 @@ xfs_inode_inherit_flags2( } if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX) ip->i_d.di_flags2 |= XFS_DIFLAG2_DAX; + if (pip->i_d.di_flags2 & XFS_DIFLAG2_FORCEALIGN) + ip->i_d.di_flags2 |= XFS_DIFLAG2_FORCEALIGN; + + if (ip->i_d.di_flags2 & XFS_DIFLAG2_FORCEALIGN) { + xfs_failaddr_t failaddr; + + failaddr = xfs_inode_validate_forcealign(ip->i_mount, + VFS_I(ip)->i_mode, ip->i_d.di_flags, ip->i_d.di_extsize, + ip->i_d.di_cowextsize); + if (failaddr) + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_FORCEALIGN; + } }
/* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b552daae323f..53a2b90486f6 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -268,6 +268,10 @@ static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip) return ip->i_d.di_flags2 & XFS_DIFLAG2_BIGTIME; }
+static inline bool xfs_inode_forcealign(struct xfs_inode *ip) +{ + return ip->i_d.di_flags2 & XFS_DIFLAG2_FORCEALIGN; +} /* * Return the buftarg used for data allocations on a given inode. */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 2337eb272235..a53038b8f736 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1198,6 +1198,8 @@ xfs_flags2diflags2( di_flags2 |= XFS_DIFLAG2_DAX; if (xflags & FS_XFLAG_COWEXTSIZE) di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; + if (xflags & FS_XFLAG_FORCEALIGN) + di_flags2 |= XFS_DIFLAG2_FORCEALIGN;
return di_flags2; } @@ -1236,6 +1238,22 @@ xfs_ioctl_setattr_xflags( if (di_flags2 && !xfs_has_v3inodes(mp)) return -EINVAL;
+ /* + * Force-align requires a nonzero extent size hint and a zero cow + * extent size hint. It doesn't apply to realtime files. + */ + if (fa->fsx_xflags & FS_XFLAG_FORCEALIGN) { + if (!xfs_has_forcealign(mp)) + return -EINVAL; + if (fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) + return -EINVAL; + if (!(fa->fsx_xflags & (FS_XFLAG_EXTSIZE | + FS_XFLAG_EXTSZINHERIT))) + return -EINVAL; + if (fa->fsx_xflags & FS_XFLAG_REALTIME) + return -EINVAL; + } + ip->i_d.di_flags = xfs_flags2diflags(ip, fa->fsx_xflags); ip->i_d.di_flags2 = di_flags2;
@@ -1339,6 +1357,9 @@ xfs_ioctl_setattr_check_extsize( struct xfs_mount *mp = ip->i_mount; xfs_extlen_t size; xfs_fsblock_t extsize_fsb; + xfs_failaddr_t failaddr; + uint16_t new_diflags; + uint16_t new_diflags2;
if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) @@ -1363,6 +1384,17 @@ xfs_ioctl_setattr_check_extsize( if (fa->fsx_extsize % size) return -EINVAL;
+ new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); + new_diflags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); + if (new_diflags2 & XFS_DIFLAG2_FORCEALIGN) { + failaddr = xfs_inode_validate_forcealign(ip->i_mount, + VFS_I(ip)->i_mode, new_diflags, + XFS_B_TO_FSB(mp, fa->fsx_extsize), + XFS_B_TO_FSB(mp, fa->fsx_cowextsize)); + if (failaddr) + return -EINVAL; + } + return 0; }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 21547ff97b5a..bc0ed9247f47 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -274,6 +274,7 @@ typedef struct xfs_mount { #define XFS_FEAT_INOBTCNT (1ULL << 23) /* inobt block counts */ #define XFS_FEAT_BIGTIME (1ULL << 24) /* large timestamps */ #define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */ +#define XFS_FEAT_FORCEALIGN (1ULL << 27) /* aligned file data extents */
/* Mount features */ #define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */ @@ -336,6 +337,7 @@ __XFS_HAS_FEAT(realtime, REALTIME) __XFS_HAS_FEAT(inobtcounts, INOBTCNT) __XFS_HAS_FEAT(bigtime, BIGTIME) __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR) +__XFS_HAS_FEAT(forcealign, FORCEALIGN)
/* * Mount features diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 502fb08bfd38..cc7962438f26 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1639,6 +1639,7 @@ xfs_fc_fill_super( "DAX unsupported by block device. Turning off DAX."); xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); } + if (xfs_has_reflink(mp)) { xfs_alert(mp, "DAX and reflink cannot be used together!"); @@ -1657,6 +1658,10 @@ xfs_fc_fill_super( } }
+ if (xfs_has_forcealign(mp)) + xfs_warn(mp, +"EXPERIMENTAL forced data extent alignment feature in use. Use at your own risk!"); + if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 78f4091ed188..822c9f4c4ce3 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -140,6 +140,8 @@ struct fsxattr { #define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */ #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ +/* data extent mappings for regular files must be aligned to extent size hint */ +#define FS_XFLAG_FORCEALIGN 0x00020000 #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/* the read-only stuff doesn't really belong here, but any other place is