diff options
author | Joshua Ashton <joshua@froggi.es> | 2023-08-13 18:34:17 +0100 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-12-26 18:13:20 -0500 |
commit | ff3c05d660610ec088c4e4222404eddb92795458 (patch) | |
tree | f61fed18478f595ef24c9070b81721003b0fe231 | |
parent | f5ddd17a1be1cab7a7637e41223429237e934306 (diff) |
bcachefs: Implement casefoldingcasefold
This patch implements support for case-insensitive file name lookups
in bcachefs.
The implementation uses the same UTF-8 lowering and normalization that
ext4 and f2fs is using.
More information is provided in Documentation/bcachefs/casefolding.rst
Signed-off-by: Joshua Ashton <joshua@froggi.es>
Cc: André Almeida <andrealmeid@igalia.com>
Cc: Gabriel Krisman Bertazi <krisman@suse.de>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | Documentation/filesystems/bcachefs/casefolding.rst | 87 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 6 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 26 | ||||
-rw-r--r-- | fs/bcachefs/dirent.c | 199 | ||||
-rw-r--r-- | fs/bcachefs/fs-common.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/fs-ioctl.c | 22 | ||||
-rw-r--r-- | fs/bcachefs/fs-ioctl.h | 20 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 17 | ||||
-rw-r--r-- | fs/bcachefs/sb-errors.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/str_hash.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 19 |
11 files changed, 358 insertions, 50 deletions
diff --git a/Documentation/filesystems/bcachefs/casefolding.rst b/Documentation/filesystems/bcachefs/casefolding.rst new file mode 100644 index 000000000000..6546aa4f7a86 --- /dev/null +++ b/Documentation/filesystems/bcachefs/casefolding.rst @@ -0,0 +1,87 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Casefolding +=========== + +bcachefs has support for case-insensitive file and directory +lookups using the regular `chattr +F` (`S_CASEFOLD`, `FS_CASEFOLD_FL`) +casefolding attributes. + +The main usecase for casefolding is compatibility with software written +against other filesystems that rely on casefolded lookups +(eg. NTFS and Wine/Proton). +Taking advantage of file-system level casefolding can lead to great +loading time gains in many applications and games. + +Casefolding support requires a kernel with the `CONFIG_UNICODE` enabled. +Once a directory has been flagged for casefolding, a feature bit +is enabled on the superblock which marks the filesystem as using +casefolding. +When the feature bit for casefolding is enabled, it is no longer possible +to mount that filesystem on kernels without `CONFIG_UNICODE` enabled. + +On the lookup/query side: casefolding is implemented by allocating a new +string of `BCH_NAME_MAX` length using the `utf8_casefold` function to +casefold the query string. + +On the dirent side: casefolding is implemented by ensuring the `bkey`'s +hash is made from the casefolded string and storing the cached casefolded +name with the regular name in the dirent. + +The structure looks like this: + +Regular: [dirent data][regular name][nul][nul]... +Casefolded: [dirent data][reg len][cf len][regular name][casefolded name][nul][nul]... + +(Do note, the number of `NUL`s here is merely for illustration, they count can vary + per-key, and they may not even be present if the key is aligned to `sizeof(u64)`.) + +This is efficient as it means that for all file lookups that require casefolding, +it has identical performance to a regular lookup: +a hash comparison and a `memcmp` of the name. + +Rationale +--------- + +Several designs were considered for this system: +One was to introduce a dirent_v2, however that would be painful especially as +the hash system only has support for a single key type. This would also need +`BCH_NAME_MAX` to change between versions, and a new feature bit. + +Another option was to store without the two lengths, and just take the length of +the regular name and casefolded name contiguously / 2 as the length. This would +assume that the regular length == casefolded length, but that could potentially +not be true, if the uppercase unicode glyph had a different UTF-8 encoding than +the lowercase unicode glyph. +It would be possible to disregard the casefold cache for those cases, but it was +decided to simply encode the two string lengths in the key to avoid random +performance issues if this edgecase was ever hit. + +The option settled on was to use a free-bit in d_type to mark a dirent as having +a casefold cache, and then treat the first 4 bytes the name block as lengths. +You can see this in the `d_cf_name_block` member of union in `bch_dirent`. + +The feature bit was used to allow casefolding support to be enabled for the majority +of users, but some allow users who have no need for the feature to still use bcachefs as +`CONFIG_UNICODE` can increase the kernel side a significant amount due to the tables used, +which may be decider between using bcachefs for eg. embedded platforms. + +Other filesystems like ext4 and f2fs have a super-block level option for casefolding +encoding, but bcachefs currently does not provide this. ext4 and f2fs do not expose +any encodings than a single UTF-8 version. When future encodings are desirable, +they will be added trivially using the opts mechanism. + +dentry/dcache considerations +--------- + +Currently, in casefolded directories, bcachefs (like other filesystems) will not cache +negative dentry's. + +This is because currently doing so presents a problem in the following scenario: + - Lookup file "blAH" in a casefolded directory + - Creation of file "BLAH" in a casefolded directory + - Lookup file "blAH" in a casefolded directory +This would fail if negative dentry's were cached. + +This is slightly suboptimal, but could be fixed in future with some vfs work. + diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 3926e9e3ba96..305797a7dd6b 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -203,6 +203,7 @@ #include <linux/types.h> #include <linux/workqueue.h> #include <linux/zstd.h> +#include <linux/unicode.h> #include "bcachefs_format.h" #include "errcode.h" @@ -723,6 +724,8 @@ enum bch_write_ref { BCH_WRITE_REF_NR, }; +#define BCH_FS_DEFAULT_UTF8_ENCODING UNICODE_AGE(12, 1, 0) + struct bch_fs { struct closure cl; @@ -798,6 +801,9 @@ struct bch_fs { u64 compat; } sb; +#if IS_ENABLED(CONFIG_UNICODE) + struct unicode_map *cf_encoding; +#endif struct bch_sb_handle disk_sb; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 2105198daf3b..5a9263720609 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -844,7 +844,8 @@ enum inode_opt_id { x(i_size_dirty, 5) \ x(i_sectors_dirty, 6) \ x(unlinked, 7) \ - x(backptr_untrusted, 8) + x(backptr_untrusted, 8) \ + x(casefolded, 9) /* bits 20+ reserved for packed fields below: */ @@ -903,9 +904,25 @@ struct bch_dirent { * Copy of mode bits 12-15 from the target inode - so userspace can get * the filetype without having to do a stat() */ - __u8 d_type; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 d_type:5, + d_unused:2, + d_casefold:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 d_casefold:1, + d_unused:2, + d_type:5; +#endif - __u8 d_name[]; + union { + struct { + __u8 d_pad; + __le16 d_name_len; + __le16 d_cf_name_len; + __u8 d_names[0]; + } d_cf_name_block __packed; + __u8 d_name[0]; + } __packed; } __packed __aligned(8); #define DT_SUBVOL 16 @@ -1919,7 +1936,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u x(new_varint, 15) \ x(journal_no_flush, 16) \ x(alloc_v2, 17) \ - x(extents_across_btree_nodes, 18) + x(extents_across_btree_nodes, 18) \ + x(casefolding, 19) #define BCH_SB_FEATURES_ALWAYS \ ((1ULL << BCH_FEATURE_new_extent_overwrite)| \ diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index 41f88666219c..20bad64fd5d6 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,8 +13,39 @@ #include <linux/dcache.h> -static inline unsigned dirent_val_u64s(unsigned len) +static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) { +#if IS_ENABLED(CONFIG_UNICODE) + unsigned char *buf = NULL; + int ret; + + *out_cf = (struct qstr) QSTR_INIT(NULL, 0); + + buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1); + ret = PTR_ERR_OR_ZERO(buf); + if (ret) + return ret; + + ret = utf8_casefold(info->cf_encoding, str, buf, BCH_NAME_MAX + 1); + if (ret <= 0) + return ret; + + *out_cf = (struct qstr) QSTR_INIT(buf, ret); + return 0; +#else + *out_cf = (struct qstr) QSTR_INIT(NULL, 0); + + return -EOPNOTSUPP; +#endif +} + +static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len) +{ + if (cf_len > 0) { + return DIV_ROUND_UP(offsetof(struct bch_dirent, d_cf_name_block.d_names) + len + cf_len, + sizeof(u64)); + } return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len, sizeof(u64)); } @@ -31,13 +62,38 @@ static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) #endif return bkey_bytes - - offsetof(struct bch_dirent, d_name) - + (d.v->d_casefold + ? offsetof(struct bch_dirent, d_cf_name_block.d_names) + : offsetof(struct bch_dirent, d_name)) - trailing_nuls; } struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d) { - return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d)); + if (d.v->d_casefold) { + unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len); + return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[0], name_len); + } else { + return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d)); + } +} + +static struct qstr bch2_dirent_get_casefold_name(struct bkey_s_c_dirent d) +{ + if (d.v->d_casefold) { + unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len); + unsigned cf_name_len = le16_to_cpu(d.v->d_cf_name_block.d_cf_name_len); + return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[name_len], cf_name_len); + } else { + return (struct qstr) QSTR_INIT(NULL, 0); + } +} + +static inline struct qstr bch2_dirent_get_lookup_name(struct bkey_s_c_dirent d) +{ + return d.v->d_casefold + ? bch2_dirent_get_casefold_name(d) + : bch2_dirent_get_name(d); } static u64 bch2_dirent_hash(const struct bch_hash_info *info, @@ -60,7 +116,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - struct qstr name = bch2_dirent_get_name(d); + struct qstr name = bch2_dirent_get_lookup_name(d); return bch2_dirent_hash(info, &name); } @@ -68,7 +124,7 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) { struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); - const struct qstr l_name = bch2_dirent_get_name(l); + const struct qstr l_name = bch2_dirent_get_lookup_name(l); const struct qstr *r_name = _r; return !qstr_eq(l_name, *r_name); @@ -78,8 +134,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) { struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r); - const struct qstr l_name = bch2_dirent_get_name(l); - const struct qstr r_name = bch2_dirent_get_name(r); + const struct qstr l_name = bch2_dirent_get_lookup_name(l); + const struct qstr r_name = bch2_dirent_get_lookup_name(r); return !qstr_eq(l_name, r_name); } @@ -108,44 +164,57 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + unsigned name_block_len = bch2_dirent_name_bytes(d); struct qstr d_name = bch2_dirent_get_name(d); + struct qstr d_cf_name = bch2_dirent_get_casefold_name(d); int ret = 0; - bkey_fsck_err_on(!d_name.len, c, err, - dirent_empty_name, + bkey_fsck_err_on(!d_name.len, + c, err, dirent_empty_name, "empty name"); - bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, - dirent_val_too_big, - "value too big (%zu > %u)", - bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); + bkey_fsck_err_on(d_name.len + d_cf_name.len > name_block_len, + c, err, dirent_val_too_big, + "dirent names exceed bkey size (%d + %d > %d)", + d_name.len, d_cf_name.len, name_block_len); /* * Check new keys don't exceed the max length * (older keys may be larger.) */ - bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err, - dirent_name_too_long, + bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, + c, err, dirent_name_too_long, "dirent name too big (%u > %u)", d_name.len, BCH_NAME_MAX); - bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, - dirent_name_embedded_nul, + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), + c, err, dirent_name_embedded_nul, "dirent has stray data after name's NUL"); bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || - (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, - dirent_name_dot_or_dotdot, + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), + c, err, dirent_name_dot_or_dotdot, "invalid name"); - bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, - dirent_name_has_slash, + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), + c, err, dirent_name_has_slash, "name with /"); bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, - dirent_to_itself, + le64_to_cpu(d.v->d_inum) == d.k->p.inode, + c, err, dirent_to_itself, "dirent points to own directory"); + + if (d.v->d_casefold) { + bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_cf_name.len > BCH_NAME_MAX, + c, err, dirent_cf_name_too_big, + "dirent w/ cf name too big (%u > %u)", + d_cf_name.len, BCH_NAME_MAX); + + bkey_fsck_err_on(d_cf_name.len != strnlen(d_cf_name.name, d_cf_name.len), + c, err, dirent_stray_data_after_cf_name, + "dirent has stray data after cf name's NUL"); + } fsck_err: return ret; } @@ -168,10 +237,11 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans, subvol_inum dir, u8 type, - int name_len, u64 dst) + int name_len, int cf_name_len, + u64 dst) { struct bkey_i_dirent *dirent; - unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len); + unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len, cf_name_len); BUG_ON(u64s > U8_MAX); @@ -190,6 +260,8 @@ static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans, } dirent->v.d_type = type; + dirent->v.d_unused = 0; + dirent->v.d_casefold = cf_name_len ? 1 : 0; return dirent; } @@ -197,6 +269,8 @@ static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans, static void dirent_init_regular_name(struct bkey_i_dirent *dirent, const struct qstr *name) { + EBUG_ON(dirent->v.d_casefold); + memcpy(&dirent->v.d_name[0], name->name, name->len); memset(&dirent->v.d_name[name->len], 0, bkey_val_bytes(&dirent->k) - @@ -204,10 +278,30 @@ static void dirent_init_regular_name(struct bkey_i_dirent *dirent, name->len); } +static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent, + const struct qstr *name, + const struct qstr *cf_name) +{ + EBUG_ON(!dirent->v.d_casefold); + EBUG_ON(!cf_name->len); + + dirent->v.d_cf_name_block.d_name_len = name->len; + dirent->v.d_cf_name_block.d_cf_name_len = cf_name->len; + memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); + memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len); + memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0, + bkey_val_bytes(&dirent->k) - + offsetof(struct bch_dirent, d_cf_name_block.d_names) - + name->len + cf_name->len); + + EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_name->len); +} + static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, subvol_inum dir, u8 type, const struct qstr *name, + const struct qstr *cf_name, u64 dst) { struct bkey_i_dirent *dirent; @@ -215,13 +309,16 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, if (name->len > BCH_NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - dirent = dirent_alloc_key(trans, dir, type, name->len, dst); + dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst); if (IS_ERR(dirent)) return dirent; - dirent_init_regular_name(dirent, name); + if (cf_name) + dirent_init_casefolded_name(dirent, name, cf_name); + else + dirent_init_regular_name(dirent, name); - EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); + EBUG_ON(bch2_dirent_get_name(dirent_i_to_s_c(dirent)).len != name->len); return dirent; } @@ -237,7 +334,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans, struct bkey_i_dirent *dirent; int ret; - dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum); + dirent = dirent_create_key(trans, zero_inum, type, name, NULL, dst_inum); ret = PTR_ERR_OR_ZERO(dirent); if (ret) return ret; @@ -263,7 +360,16 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir, struct bkey_i_dirent *dirent; int ret; - dirent = dirent_create_key(trans, dir, type, name, dst_inum); + if (hash_info->cf_encoding) { + struct qstr cf_name; + ret = bch2_casefold(trans, hash_info, name, &cf_name); + if (ret) + return ret; + dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum); + } else { + dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum); + } + ret = PTR_ERR_OR_ZERO(dirent); if (ret) return ret; @@ -313,6 +419,8 @@ int bch2_dirent_rename(struct btree_trans *trans, const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset, enum bch_rename_mode mode) { + struct qstr src_name_lookup = *src_name; + struct qstr dst_name_lookup = *dst_name; struct btree_iter src_iter = { NULL }; struct btree_iter dst_iter = { NULL }; struct bkey_s_c old_src, old_dst = bkey_s_c_null; @@ -329,8 +437,13 @@ int bch2_dirent_rename(struct btree_trans *trans, memset(dst_inum, 0, sizeof(*dst_inum)); /* Lookup src: */ + ret = src_hash->cf_encoding + ? bch2_casefold(trans, src_hash, src_name, &src_name_lookup) + : 0; + if (ret) + goto out; ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc, - src_hash, src_dir, src_name, + src_hash, src_dir, &src_name_lookup, BTREE_ITER_INTENT); if (ret) goto out; @@ -352,6 +465,11 @@ int bch2_dirent_rename(struct btree_trans *trans, /* Lookup dst: */ + ret = dst_hash->cf_encoding + ? bch2_casefold(trans, dst_hash, dst_name, &dst_name_lookup) + : 0; + if (ret) + goto out; if (mode == BCH_RENAME) { /* * Note that we're _not_ checking if the target already exists - @@ -359,12 +477,12 @@ int bch2_dirent_rename(struct btree_trans *trans, * correctness: */ ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc, - dst_hash, dst_dir, dst_name); + dst_hash, dst_dir, &dst_name_lookup); if (ret) goto out; } else { ret = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc, - dst_hash, dst_dir, dst_name, + dst_hash, dst_dir, &dst_name_lookup, BTREE_ITER_INTENT); if (ret) goto out; @@ -389,7 +507,8 @@ int bch2_dirent_rename(struct btree_trans *trans, *src_offset = dst_iter.pos.offset; /* Create new dst key: */ - new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0); + new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, + dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0); ret = PTR_ERR_OR_ZERO(new_dst); if (ret) goto out; @@ -399,7 +518,8 @@ int bch2_dirent_rename(struct btree_trans *trans, /* Create new src key: */ if (mode == BCH_RENAME_EXCHANGE) { - new_src = dirent_create_key(trans, src_dir, 0, src_name, 0); + new_src = dirent_create_key(trans, src_dir, 0, src_name, + src_hash->cf_encoding ? &src_name_lookup : NULL, 0); ret = PTR_ERR_OR_ZERO(new_src); if (ret) goto out; @@ -491,6 +611,7 @@ int __bch2_dirent_lookup_trans(struct btree_trans *trans, const struct qstr *name, subvol_inum *inum, unsigned flags) { + struct qstr lookup_name = *name; struct bkey_s_c k; struct bkey_s_c_dirent d; u32 snapshot; @@ -500,8 +621,14 @@ int __bch2_dirent_lookup_trans(struct btree_trans *trans, if (ret) return ret; + ret = hash_info->cf_encoding + ? bch2_casefold(trans, hash_info, name, &lookup_name) + : 0; + if (ret) + return ret; + ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc, - hash_info, dir, name, flags); + hash_info, dir, &lookup_name, flags); if (ret) return ret; diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index 4496cf91a4c1..1f53efb0ac35 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -46,6 +46,10 @@ int bch2_create_trans(struct btree_trans *trans, if (ret) goto err; + /* Inherit casefold state from parent. */ + if (S_ISDIR(mode)) + new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded; + if (!(flags & BCH_CREATE_SNAPSHOT)) { /* Normal create path - allocate a new inode: */ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u); diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 8098a3a299d1..8ea1eddbb994 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -54,6 +54,28 @@ static int bch2_inode_flags_set(struct btree_trans *trans, (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) return -EINVAL; + if ((newflags ^ oldflags) & BCH_INODE_casefolded) { +#if IS_ENABLED(CONFIG_UNICODE) + int ret = 0; + /* Not supported on individual files. */ + if (!S_ISDIR(bi->bi_mode)) + return -EOPNOTSUPP; + + /* + * Make sure the dir is empty, as otherwise we'd need to + * rehash everything and update the dirent keys. + */ + ret = bch2_empty_dir_trans(trans, inode_inum(inode)); + if (ret < 0) + return ret; + + bch2_check_set_feature(c, BCH_FEATURE_casefolding); +#else + printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n"); + return -EOPNOTSUPP; +#endif + } + if (s->set_projinherit) { bi->bi_fields_set &= ~(1 << Inode_opt_project); bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h index d30f9bb056fd..ecd3bfdcde21 100644 --- a/fs/bcachefs/fs-ioctl.h +++ b/fs/bcachefs/fs-ioctl.h @@ -6,19 +6,21 @@ /* bcachefs inode flags -> vfs inode flags: */ static const __maybe_unused unsigned bch_flags_to_vfs[] = { - [__BCH_INODE_sync] = S_SYNC, - [__BCH_INODE_immutable] = S_IMMUTABLE, - [__BCH_INODE_append] = S_APPEND, - [__BCH_INODE_noatime] = S_NOATIME, + [__BCH_INODE_sync] = S_SYNC, + [__BCH_INODE_immutable] = S_IMMUTABLE, + [__BCH_INODE_append] = S_APPEND, + [__BCH_INODE_noatime] = S_NOATIME, + [__BCH_INODE_casefolded] = S_CASEFOLD, }; /* bcachefs inode flags -> FS_IOC_GETFLAGS: */ static const __maybe_unused unsigned bch_flags_to_uflags[] = { - [__BCH_INODE_sync] = FS_SYNC_FL, - [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, - [__BCH_INODE_append] = FS_APPEND_FL, - [__BCH_INODE_nodump] = FS_NODUMP_FL, - [__BCH_INODE_noatime] = FS_NOATIME_FL, + [__BCH_INODE_sync] = FS_SYNC_FL, + [__BCH_INODE_immutable] = FS_IMMUTABLE_FL, + [__BCH_INODE_append] = FS_APPEND_FL, + [__BCH_INODE_nodump] = FS_NODUMP_FL, + [__BCH_INODE_noatime] = FS_NOATIME_FL, + [__BCH_INODE_casefolded] = FS_CASEFOLD_FL, }; /* bcachefs inode flags -> FS_IOC_FSGETXATTR: */ diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 34cb22a9c05d..9e31ec7fdce6 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -367,6 +367,23 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry, if (!ret) vinode = bch2_vfs_inode_get(c, inum); +#if IS_ENABLED(CONFIG_UNICODE) + if (!vinode && IS_CASEFOLDED(vdir)) { + /* + * Do not cache a negative dentry in casefolded directories + * as it would need to be invalidated in the following situation: + * - Lookup file "blAH" in a casefolded directory + * - Creation of file "BLAH" in a casefolded directory + * - Lookup file "blAH" in a casefolded directory + * which would fail if we had a negative dentry. + * + * We should come back to this when VFS has a method to handle + * this edgecase. + */ + return NULL; + } +#endif + return d_splice_alias(vinode, dentry); } diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h index 92289ce62cbf..934ee34538a3 100644 --- a/fs/bcachefs/sb-errors.h +++ b/fs/bcachefs/sb-errors.h @@ -249,7 +249,9 @@ x(dir_loop, 241) \ x(hash_table_key_duplicate, 242) \ x(hash_table_key_wrong_offset, 243) \ - x(unlinked_inode_not_on_deleted_list, 244) + x(unlinked_inode_not_on_deleted_list, 244) \ + x(dirent_cf_name_too_big, 245) \ + x(dirent_stray_data_after_cf_name, 246) enum bch_sb_error_id { #define x(t, n) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index 89fdb7c21134..f6d6668e8a73 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -44,6 +44,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt) struct bch_hash_info { u8 type; + struct unicode_map *cf_encoding; /* * For crc32 or crc64 string hashes the first key value of * the siphash_key (k0) is used as the key. @@ -58,6 +59,9 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi) struct bch_hash_info info = { .type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) & ~(~0U << INODE_STR_HASH_BITS), +#if IS_ENABLED(CONFIG_UNICODE) + .cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL, +#endif .siphash_key = { .k0 = bi->bi_hash_seed } }; diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0f3a924ca1f9..b1edf8e9f80b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -820,6 +820,25 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) if (ret) goto err; +#if IS_ENABLED(CONFIG_UNICODE) + /* Default encoding until we can potentially have more as an option. */ + c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); + if (IS_ERR(c->cf_encoding)) { + printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u", + unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING), + unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING)); + ret = -EINVAL; + goto err; + } +#else + if (c->sb.features & (1ULL << BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif + pr_uuid(&name, c->sb.user_uuid.b); strscpy(c->name, name.buf, sizeof(c->name)); printbuf_exit(&name); |