summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoshua Ashton <joshua@froggi.es>2023-08-13 18:34:17 +0100
committerKent Overstreet <kent.overstreet@linux.dev>2023-12-26 18:13:20 -0500
commitff3c05d660610ec088c4e4222404eddb92795458 (patch)
treef61fed18478f595ef24c9070b81721003b0fe231
parentf5ddd17a1be1cab7a7637e41223429237e934306 (diff)
bcachefs: Implement casefoldingcasefold
This patch implements support for case-insensitive file name lookups in bcachefs. The implementation uses the same UTF-8 lowering and normalization that ext4 and f2fs is using. More information is provided in Documentation/bcachefs/casefolding.rst Signed-off-by: Joshua Ashton <joshua@froggi.es> Cc: André Almeida <andrealmeid@igalia.com> Cc: Gabriel Krisman Bertazi <krisman@suse.de> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--Documentation/filesystems/bcachefs/casefolding.rst87
-rw-r--r--fs/bcachefs/bcachefs.h6
-rw-r--r--fs/bcachefs/bcachefs_format.h26
-rw-r--r--fs/bcachefs/dirent.c199
-rw-r--r--fs/bcachefs/fs-common.c4
-rw-r--r--fs/bcachefs/fs-ioctl.c22
-rw-r--r--fs/bcachefs/fs-ioctl.h20
-rw-r--r--fs/bcachefs/fs.c17
-rw-r--r--fs/bcachefs/sb-errors.h4
-rw-r--r--fs/bcachefs/str_hash.h4
-rw-r--r--fs/bcachefs/super.c19
11 files changed, 358 insertions, 50 deletions
diff --git a/Documentation/filesystems/bcachefs/casefolding.rst b/Documentation/filesystems/bcachefs/casefolding.rst
new file mode 100644
index 000000000000..6546aa4f7a86
--- /dev/null
+++ b/Documentation/filesystems/bcachefs/casefolding.rst
@@ -0,0 +1,87 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Casefolding
+===========
+
+bcachefs has support for case-insensitive file and directory
+lookups using the regular `chattr +F` (`S_CASEFOLD`, `FS_CASEFOLD_FL`)
+casefolding attributes.
+
+The main usecase for casefolding is compatibility with software written
+against other filesystems that rely on casefolded lookups
+(eg. NTFS and Wine/Proton).
+Taking advantage of file-system level casefolding can lead to great
+loading time gains in many applications and games.
+
+Casefolding support requires a kernel with the `CONFIG_UNICODE` enabled.
+Once a directory has been flagged for casefolding, a feature bit
+is enabled on the superblock which marks the filesystem as using
+casefolding.
+When the feature bit for casefolding is enabled, it is no longer possible
+to mount that filesystem on kernels without `CONFIG_UNICODE` enabled.
+
+On the lookup/query side: casefolding is implemented by allocating a new
+string of `BCH_NAME_MAX` length using the `utf8_casefold` function to
+casefold the query string.
+
+On the dirent side: casefolding is implemented by ensuring the `bkey`'s
+hash is made from the casefolded string and storing the cached casefolded
+name with the regular name in the dirent.
+
+The structure looks like this:
+
+Regular: [dirent data][regular name][nul][nul]...
+Casefolded: [dirent data][reg len][cf len][regular name][casefolded name][nul][nul]...
+
+(Do note, the number of `NUL`s here is merely for illustration, they count can vary
+ per-key, and they may not even be present if the key is aligned to `sizeof(u64)`.)
+
+This is efficient as it means that for all file lookups that require casefolding,
+it has identical performance to a regular lookup:
+a hash comparison and a `memcmp` of the name.
+
+Rationale
+---------
+
+Several designs were considered for this system:
+One was to introduce a dirent_v2, however that would be painful especially as
+the hash system only has support for a single key type. This would also need
+`BCH_NAME_MAX` to change between versions, and a new feature bit.
+
+Another option was to store without the two lengths, and just take the length of
+the regular name and casefolded name contiguously / 2 as the length. This would
+assume that the regular length == casefolded length, but that could potentially
+not be true, if the uppercase unicode glyph had a different UTF-8 encoding than
+the lowercase unicode glyph.
+It would be possible to disregard the casefold cache for those cases, but it was
+decided to simply encode the two string lengths in the key to avoid random
+performance issues if this edgecase was ever hit.
+
+The option settled on was to use a free-bit in d_type to mark a dirent as having
+a casefold cache, and then treat the first 4 bytes the name block as lengths.
+You can see this in the `d_cf_name_block` member of union in `bch_dirent`.
+
+The feature bit was used to allow casefolding support to be enabled for the majority
+of users, but some allow users who have no need for the feature to still use bcachefs as
+`CONFIG_UNICODE` can increase the kernel side a significant amount due to the tables used,
+which may be decider between using bcachefs for eg. embedded platforms.
+
+Other filesystems like ext4 and f2fs have a super-block level option for casefolding
+encoding, but bcachefs currently does not provide this. ext4 and f2fs do not expose
+any encodings than a single UTF-8 version. When future encodings are desirable,
+they will be added trivially using the opts mechanism.
+
+dentry/dcache considerations
+---------
+
+Currently, in casefolded directories, bcachefs (like other filesystems) will not cache
+negative dentry's.
+
+This is because currently doing so presents a problem in the following scenario:
+ - Lookup file "blAH" in a casefolded directory
+ - Creation of file "BLAH" in a casefolded directory
+ - Lookup file "blAH" in a casefolded directory
+This would fail if negative dentry's were cached.
+
+This is slightly suboptimal, but could be fixed in future with some vfs work.
+
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 3926e9e3ba96..305797a7dd6b 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -203,6 +203,7 @@
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/zstd.h>
+#include <linux/unicode.h>
#include "bcachefs_format.h"
#include "errcode.h"
@@ -723,6 +724,8 @@ enum bch_write_ref {
BCH_WRITE_REF_NR,
};
+#define BCH_FS_DEFAULT_UTF8_ENCODING UNICODE_AGE(12, 1, 0)
+
struct bch_fs {
struct closure cl;
@@ -798,6 +801,9 @@ struct bch_fs {
u64 compat;
} sb;
+#if IS_ENABLED(CONFIG_UNICODE)
+ struct unicode_map *cf_encoding;
+#endif
struct bch_sb_handle disk_sb;
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 2105198daf3b..5a9263720609 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -844,7 +844,8 @@ enum inode_opt_id {
x(i_size_dirty, 5) \
x(i_sectors_dirty, 6) \
x(unlinked, 7) \
- x(backptr_untrusted, 8)
+ x(backptr_untrusted, 8) \
+ x(casefolded, 9)
/* bits 20+ reserved for packed fields below: */
@@ -903,9 +904,25 @@ struct bch_dirent {
* Copy of mode bits 12-15 from the target inode - so userspace can get
* the filetype without having to do a stat()
*/
- __u8 d_type;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 d_type:5,
+ d_unused:2,
+ d_casefold:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 d_casefold:1,
+ d_unused:2,
+ d_type:5;
+#endif
- __u8 d_name[];
+ union {
+ struct {
+ __u8 d_pad;
+ __le16 d_name_len;
+ __le16 d_cf_name_len;
+ __u8 d_names[0];
+ } d_cf_name_block __packed;
+ __u8 d_name[0];
+ } __packed;
} __packed __aligned(8);
#define DT_SUBVOL 16
@@ -1919,7 +1936,8 @@ static inline void SET_BCH_SB_BACKGROUND_COMPRESSION_TYPE(struct bch_sb *sb, __u
x(new_varint, 15) \
x(journal_no_flush, 16) \
x(alloc_v2, 17) \
- x(extents_across_btree_nodes, 18)
+ x(extents_across_btree_nodes, 18) \
+ x(casefolding, 19)
#define BCH_SB_FEATURES_ALWAYS \
((1ULL << BCH_FEATURE_new_extent_overwrite)| \
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index 41f88666219c..20bad64fd5d6 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -13,8 +13,39 @@
#include <linux/dcache.h>
-static inline unsigned dirent_val_u64s(unsigned len)
+static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info,
+ const struct qstr *str, struct qstr *out_cf)
{
+#if IS_ENABLED(CONFIG_UNICODE)
+ unsigned char *buf = NULL;
+ int ret;
+
+ *out_cf = (struct qstr) QSTR_INIT(NULL, 0);
+
+ buf = bch2_trans_kmalloc(trans, BCH_NAME_MAX + 1);
+ ret = PTR_ERR_OR_ZERO(buf);
+ if (ret)
+ return ret;
+
+ ret = utf8_casefold(info->cf_encoding, str, buf, BCH_NAME_MAX + 1);
+ if (ret <= 0)
+ return ret;
+
+ *out_cf = (struct qstr) QSTR_INIT(buf, ret);
+ return 0;
+#else
+ *out_cf = (struct qstr) QSTR_INIT(NULL, 0);
+
+ return -EOPNOTSUPP;
+#endif
+}
+
+static inline unsigned dirent_val_u64s(unsigned len, unsigned cf_len)
+{
+ if (cf_len > 0) {
+ return DIV_ROUND_UP(offsetof(struct bch_dirent, d_cf_name_block.d_names) + len + cf_len,
+ sizeof(u64));
+ }
return DIV_ROUND_UP(offsetof(struct bch_dirent, d_name) + len,
sizeof(u64));
}
@@ -31,13 +62,38 @@ static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d)
#endif
return bkey_bytes -
- offsetof(struct bch_dirent, d_name) -
+ (d.v->d_casefold
+ ? offsetof(struct bch_dirent, d_cf_name_block.d_names)
+ : offsetof(struct bch_dirent, d_name)) -
trailing_nuls;
}
struct qstr bch2_dirent_get_name(struct bkey_s_c_dirent d)
{
- return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+ if (d.v->d_casefold) {
+ unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
+ return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[0], name_len);
+ } else {
+ return (struct qstr) QSTR_INIT(d.v->d_name, bch2_dirent_name_bytes(d));
+ }
+}
+
+static struct qstr bch2_dirent_get_casefold_name(struct bkey_s_c_dirent d)
+{
+ if (d.v->d_casefold) {
+ unsigned name_len = le16_to_cpu(d.v->d_cf_name_block.d_name_len);
+ unsigned cf_name_len = le16_to_cpu(d.v->d_cf_name_block.d_cf_name_len);
+ return (struct qstr) QSTR_INIT(&d.v->d_cf_name_block.d_names[name_len], cf_name_len);
+ } else {
+ return (struct qstr) QSTR_INIT(NULL, 0);
+ }
+}
+
+static inline struct qstr bch2_dirent_get_lookup_name(struct bkey_s_c_dirent d)
+{
+ return d.v->d_casefold
+ ? bch2_dirent_get_casefold_name(d)
+ : bch2_dirent_get_name(d);
}
static u64 bch2_dirent_hash(const struct bch_hash_info *info,
@@ -60,7 +116,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
{
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
- struct qstr name = bch2_dirent_get_name(d);
+ struct qstr name = bch2_dirent_get_lookup_name(d);
return bch2_dirent_hash(info, &name);
}
@@ -68,7 +124,7 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
{
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
- const struct qstr l_name = bch2_dirent_get_name(l);
+ const struct qstr l_name = bch2_dirent_get_lookup_name(l);
const struct qstr *r_name = _r;
return !qstr_eq(l_name, *r_name);
@@ -78,8 +134,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
{
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
- const struct qstr l_name = bch2_dirent_get_name(l);
- const struct qstr r_name = bch2_dirent_get_name(r);
+ const struct qstr l_name = bch2_dirent_get_lookup_name(l);
+ const struct qstr r_name = bch2_dirent_get_lookup_name(r);
return !qstr_eq(l_name, r_name);
}
@@ -108,44 +164,57 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k,
struct printbuf *err)
{
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
+ unsigned name_block_len = bch2_dirent_name_bytes(d);
struct qstr d_name = bch2_dirent_get_name(d);
+ struct qstr d_cf_name = bch2_dirent_get_casefold_name(d);
int ret = 0;
- bkey_fsck_err_on(!d_name.len, c, err,
- dirent_empty_name,
+ bkey_fsck_err_on(!d_name.len,
+ c, err, dirent_empty_name,
"empty name");
- bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err,
- dirent_val_too_big,
- "value too big (%zu > %u)",
- bkey_val_u64s(k.k), dirent_val_u64s(d_name.len));
+ bkey_fsck_err_on(d_name.len + d_cf_name.len > name_block_len,
+ c, err, dirent_val_too_big,
+ "dirent names exceed bkey size (%d + %d > %d)",
+ d_name.len, d_cf_name.len, name_block_len);
/*
* Check new keys don't exceed the max length
* (older keys may be larger.)
*/
- bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX, c, err,
- dirent_name_too_long,
+ bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_name.len > BCH_NAME_MAX,
+ c, err, dirent_name_too_long,
"dirent name too big (%u > %u)",
d_name.len, BCH_NAME_MAX);
- bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err,
- dirent_name_embedded_nul,
+ bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len),
+ c, err, dirent_name_embedded_nul,
"dirent has stray data after name's NUL");
bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) ||
- (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err,
- dirent_name_dot_or_dotdot,
+ (d_name.len == 2 && !memcmp(d_name.name, "..", 2)),
+ c, err, dirent_name_dot_or_dotdot,
"invalid name");
- bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err,
- dirent_name_has_slash,
+ bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len),
+ c, err, dirent_name_has_slash,
"name with /");
bkey_fsck_err_on(d.v->d_type != DT_SUBVOL &&
- le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err,
- dirent_to_itself,
+ le64_to_cpu(d.v->d_inum) == d.k->p.inode,
+ c, err, dirent_to_itself,
"dirent points to own directory");
+
+ if (d.v->d_casefold) {
+ bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) && d_cf_name.len > BCH_NAME_MAX,
+ c, err, dirent_cf_name_too_big,
+ "dirent w/ cf name too big (%u > %u)",
+ d_cf_name.len, BCH_NAME_MAX);
+
+ bkey_fsck_err_on(d_cf_name.len != strnlen(d_cf_name.name, d_cf_name.len),
+ c, err, dirent_stray_data_after_cf_name,
+ "dirent has stray data after cf name's NUL");
+ }
fsck_err:
return ret;
}
@@ -168,10 +237,11 @@ void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c,
static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans,
subvol_inum dir,
u8 type,
- int name_len, u64 dst)
+ int name_len, int cf_name_len,
+ u64 dst)
{
struct bkey_i_dirent *dirent;
- unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len);
+ unsigned u64s = BKEY_U64s + dirent_val_u64s(name_len, cf_name_len);
BUG_ON(u64s > U8_MAX);
@@ -190,6 +260,8 @@ static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans,
}
dirent->v.d_type = type;
+ dirent->v.d_unused = 0;
+ dirent->v.d_casefold = cf_name_len ? 1 : 0;
return dirent;
}
@@ -197,6 +269,8 @@ static struct bkey_i_dirent *dirent_alloc_key(struct btree_trans *trans,
static void dirent_init_regular_name(struct bkey_i_dirent *dirent,
const struct qstr *name)
{
+ EBUG_ON(dirent->v.d_casefold);
+
memcpy(&dirent->v.d_name[0], name->name, name->len);
memset(&dirent->v.d_name[name->len], 0,
bkey_val_bytes(&dirent->k) -
@@ -204,10 +278,30 @@ static void dirent_init_regular_name(struct bkey_i_dirent *dirent,
name->len);
}
+static void dirent_init_casefolded_name(struct bkey_i_dirent *dirent,
+ const struct qstr *name,
+ const struct qstr *cf_name)
+{
+ EBUG_ON(!dirent->v.d_casefold);
+ EBUG_ON(!cf_name->len);
+
+ dirent->v.d_cf_name_block.d_name_len = name->len;
+ dirent->v.d_cf_name_block.d_cf_name_len = cf_name->len;
+ memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len);
+ memcpy(&dirent->v.d_cf_name_block.d_names[name->len], cf_name->name, cf_name->len);
+ memset(&dirent->v.d_cf_name_block.d_names[name->len + cf_name->len], 0,
+ bkey_val_bytes(&dirent->k) -
+ offsetof(struct bch_dirent, d_cf_name_block.d_names) -
+ name->len + cf_name->len);
+
+ EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_name->len);
+}
+
static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
subvol_inum dir,
u8 type,
const struct qstr *name,
+ const struct qstr *cf_name,
u64 dst)
{
struct bkey_i_dirent *dirent;
@@ -215,13 +309,16 @@ static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans,
if (name->len > BCH_NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
- dirent = dirent_alloc_key(trans, dir, type, name->len, dst);
+ dirent = dirent_alloc_key(trans, dir, type, name->len, cf_name ? cf_name->len : 0, dst);
if (IS_ERR(dirent))
return dirent;
- dirent_init_regular_name(dirent, name);
+ if (cf_name)
+ dirent_init_casefolded_name(dirent, name, cf_name);
+ else
+ dirent_init_regular_name(dirent, name);
- EBUG_ON(bch2_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
+ EBUG_ON(bch2_dirent_get_name(dirent_i_to_s_c(dirent)).len != name->len);
return dirent;
}
@@ -237,7 +334,7 @@ int bch2_dirent_create_snapshot(struct btree_trans *trans,
struct bkey_i_dirent *dirent;
int ret;
- dirent = dirent_create_key(trans, zero_inum, type, name, dst_inum);
+ dirent = dirent_create_key(trans, zero_inum, type, name, NULL, dst_inum);
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
@@ -263,7 +360,16 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
struct bkey_i_dirent *dirent;
int ret;
- dirent = dirent_create_key(trans, dir, type, name, dst_inum);
+ if (hash_info->cf_encoding) {
+ struct qstr cf_name;
+ ret = bch2_casefold(trans, hash_info, name, &cf_name);
+ if (ret)
+ return ret;
+ dirent = dirent_create_key(trans, dir, type, name, &cf_name, dst_inum);
+ } else {
+ dirent = dirent_create_key(trans, dir, type, name, NULL, dst_inum);
+ }
+
ret = PTR_ERR_OR_ZERO(dirent);
if (ret)
return ret;
@@ -313,6 +419,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
const struct qstr *dst_name, subvol_inum *dst_inum, u64 *dst_offset,
enum bch_rename_mode mode)
{
+ struct qstr src_name_lookup = *src_name;
+ struct qstr dst_name_lookup = *dst_name;
struct btree_iter src_iter = { NULL };
struct btree_iter dst_iter = { NULL };
struct bkey_s_c old_src, old_dst = bkey_s_c_null;
@@ -329,8 +437,13 @@ int bch2_dirent_rename(struct btree_trans *trans,
memset(dst_inum, 0, sizeof(*dst_inum));
/* Lookup src: */
+ ret = src_hash->cf_encoding
+ ? bch2_casefold(trans, src_hash, src_name, &src_name_lookup)
+ : 0;
+ if (ret)
+ goto out;
ret = bch2_hash_lookup(trans, &src_iter, bch2_dirent_hash_desc,
- src_hash, src_dir, src_name,
+ src_hash, src_dir, &src_name_lookup,
BTREE_ITER_INTENT);
if (ret)
goto out;
@@ -352,6 +465,11 @@ int bch2_dirent_rename(struct btree_trans *trans,
/* Lookup dst: */
+ ret = dst_hash->cf_encoding
+ ? bch2_casefold(trans, dst_hash, dst_name, &dst_name_lookup)
+ : 0;
+ if (ret)
+ goto out;
if (mode == BCH_RENAME) {
/*
* Note that we're _not_ checking if the target already exists -
@@ -359,12 +477,12 @@ int bch2_dirent_rename(struct btree_trans *trans,
* correctness:
*/
ret = bch2_hash_hole(trans, &dst_iter, bch2_dirent_hash_desc,
- dst_hash, dst_dir, dst_name);
+ dst_hash, dst_dir, &dst_name_lookup);
if (ret)
goto out;
} else {
ret = bch2_hash_lookup(trans, &dst_iter, bch2_dirent_hash_desc,
- dst_hash, dst_dir, dst_name,
+ dst_hash, dst_dir, &dst_name_lookup,
BTREE_ITER_INTENT);
if (ret)
goto out;
@@ -389,7 +507,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
*src_offset = dst_iter.pos.offset;
/* Create new dst key: */
- new_dst = dirent_create_key(trans, dst_dir, 0, dst_name, 0);
+ new_dst = dirent_create_key(trans, dst_dir, 0, dst_name,
+ dst_hash->cf_encoding ? &dst_name_lookup : NULL, 0);
ret = PTR_ERR_OR_ZERO(new_dst);
if (ret)
goto out;
@@ -399,7 +518,8 @@ int bch2_dirent_rename(struct btree_trans *trans,
/* Create new src key: */
if (mode == BCH_RENAME_EXCHANGE) {
- new_src = dirent_create_key(trans, src_dir, 0, src_name, 0);
+ new_src = dirent_create_key(trans, src_dir, 0, src_name,
+ src_hash->cf_encoding ? &src_name_lookup : NULL, 0);
ret = PTR_ERR_OR_ZERO(new_src);
if (ret)
goto out;
@@ -491,6 +611,7 @@ int __bch2_dirent_lookup_trans(struct btree_trans *trans,
const struct qstr *name, subvol_inum *inum,
unsigned flags)
{
+ struct qstr lookup_name = *name;
struct bkey_s_c k;
struct bkey_s_c_dirent d;
u32 snapshot;
@@ -500,8 +621,14 @@ int __bch2_dirent_lookup_trans(struct btree_trans *trans,
if (ret)
return ret;
+ ret = hash_info->cf_encoding
+ ? bch2_casefold(trans, hash_info, name, &lookup_name)
+ : 0;
+ if (ret)
+ return ret;
+
ret = bch2_hash_lookup(trans, iter, bch2_dirent_hash_desc,
- hash_info, dir, name, flags);
+ hash_info, dir, &lookup_name, flags);
if (ret)
return ret;
diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
index 4496cf91a4c1..1f53efb0ac35 100644
--- a/fs/bcachefs/fs-common.c
+++ b/fs/bcachefs/fs-common.c
@@ -46,6 +46,10 @@ int bch2_create_trans(struct btree_trans *trans,
if (ret)
goto err;
+ /* Inherit casefold state from parent. */
+ if (S_ISDIR(mode))
+ new_inode->bi_flags |= dir_u->bi_flags & BCH_INODE_casefolded;
+
if (!(flags & BCH_CREATE_SNAPSHOT)) {
/* Normal create path - allocate a new inode: */
bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 8098a3a299d1..8ea1eddbb994 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -54,6 +54,28 @@ static int bch2_inode_flags_set(struct btree_trans *trans,
(newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags)
return -EINVAL;
+ if ((newflags ^ oldflags) & BCH_INODE_casefolded) {
+#if IS_ENABLED(CONFIG_UNICODE)
+ int ret = 0;
+ /* Not supported on individual files. */
+ if (!S_ISDIR(bi->bi_mode))
+ return -EOPNOTSUPP;
+
+ /*
+ * Make sure the dir is empty, as otherwise we'd need to
+ * rehash everything and update the dirent keys.
+ */
+ ret = bch2_empty_dir_trans(trans, inode_inum(inode));
+ if (ret < 0)
+ return ret;
+
+ bch2_check_set_feature(c, BCH_FEATURE_casefolding);
+#else
+ printk(KERN_ERR "Cannot use casefolding on a kernel without CONFIG_UNICODE\n");
+ return -EOPNOTSUPP;
+#endif
+ }
+
if (s->set_projinherit) {
bi->bi_fields_set &= ~(1 << Inode_opt_project);
bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project);
diff --git a/fs/bcachefs/fs-ioctl.h b/fs/bcachefs/fs-ioctl.h
index d30f9bb056fd..ecd3bfdcde21 100644
--- a/fs/bcachefs/fs-ioctl.h
+++ b/fs/bcachefs/fs-ioctl.h
@@ -6,19 +6,21 @@
/* bcachefs inode flags -> vfs inode flags: */
static const __maybe_unused unsigned bch_flags_to_vfs[] = {
- [__BCH_INODE_sync] = S_SYNC,
- [__BCH_INODE_immutable] = S_IMMUTABLE,
- [__BCH_INODE_append] = S_APPEND,
- [__BCH_INODE_noatime] = S_NOATIME,
+ [__BCH_INODE_sync] = S_SYNC,
+ [__BCH_INODE_immutable] = S_IMMUTABLE,
+ [__BCH_INODE_append] = S_APPEND,
+ [__BCH_INODE_noatime] = S_NOATIME,
+ [__BCH_INODE_casefolded] = S_CASEFOLD,
};
/* bcachefs inode flags -> FS_IOC_GETFLAGS: */
static const __maybe_unused unsigned bch_flags_to_uflags[] = {
- [__BCH_INODE_sync] = FS_SYNC_FL,
- [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
- [__BCH_INODE_append] = FS_APPEND_FL,
- [__BCH_INODE_nodump] = FS_NODUMP_FL,
- [__BCH_INODE_noatime] = FS_NOATIME_FL,
+ [__BCH_INODE_sync] = FS_SYNC_FL,
+ [__BCH_INODE_immutable] = FS_IMMUTABLE_FL,
+ [__BCH_INODE_append] = FS_APPEND_FL,
+ [__BCH_INODE_nodump] = FS_NODUMP_FL,
+ [__BCH_INODE_noatime] = FS_NOATIME_FL,
+ [__BCH_INODE_casefolded] = FS_CASEFOLD_FL,
};
/* bcachefs inode flags -> FS_IOC_FSGETXATTR: */
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 34cb22a9c05d..9e31ec7fdce6 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -367,6 +367,23 @@ static struct dentry *bch2_lookup(struct inode *vdir, struct dentry *dentry,
if (!ret)
vinode = bch2_vfs_inode_get(c, inum);
+#if IS_ENABLED(CONFIG_UNICODE)
+ if (!vinode && IS_CASEFOLDED(vdir)) {
+ /*
+ * Do not cache a negative dentry in casefolded directories
+ * as it would need to be invalidated in the following situation:
+ * - Lookup file "blAH" in a casefolded directory
+ * - Creation of file "BLAH" in a casefolded directory
+ * - Lookup file "blAH" in a casefolded directory
+ * which would fail if we had a negative dentry.
+ *
+ * We should come back to this when VFS has a method to handle
+ * this edgecase.
+ */
+ return NULL;
+ }
+#endif
+
return d_splice_alias(vinode, dentry);
}
diff --git a/fs/bcachefs/sb-errors.h b/fs/bcachefs/sb-errors.h
index 92289ce62cbf..934ee34538a3 100644
--- a/fs/bcachefs/sb-errors.h
+++ b/fs/bcachefs/sb-errors.h
@@ -249,7 +249,9 @@
x(dir_loop, 241) \
x(hash_table_key_duplicate, 242) \
x(hash_table_key_wrong_offset, 243) \
- x(unlinked_inode_not_on_deleted_list, 244)
+ x(unlinked_inode_not_on_deleted_list, 244) \
+ x(dirent_cf_name_too_big, 245) \
+ x(dirent_stray_data_after_cf_name, 246)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index 89fdb7c21134..f6d6668e8a73 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -44,6 +44,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
struct bch_hash_info {
u8 type;
+ struct unicode_map *cf_encoding;
/*
* For crc32 or crc64 string hashes the first key value of
* the siphash_key (k0) is used as the key.
@@ -58,6 +59,9 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.type = (bi->bi_flags >> INODE_STR_HASH_OFFSET) &
~(~0U << INODE_STR_HASH_BITS),
+#if IS_ENABLED(CONFIG_UNICODE)
+ .cf_encoding = !!(bi->bi_flags & BCH_INODE_casefolded) ? c->cf_encoding : NULL,
+#endif
.siphash_key = { .k0 = bi->bi_hash_seed }
};
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 0f3a924ca1f9..b1edf8e9f80b 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -820,6 +820,25 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (ret)
goto err;
+#if IS_ENABLED(CONFIG_UNICODE)
+ /* Default encoding until we can potentially have more as an option. */
+ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING);
+ if (IS_ERR(c->cf_encoding)) {
+ printk(KERN_ERR "Cannot load UTF-8 encoding for filesystem. Version: %u.%u.%u",
+ unicode_major(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_minor(BCH_FS_DEFAULT_UTF8_ENCODING),
+ unicode_rev(BCH_FS_DEFAULT_UTF8_ENCODING));
+ ret = -EINVAL;
+ goto err;
+ }
+#else
+ if (c->sb.features & (1ULL << BCH_FEATURE_casefolding)) {
+ printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n");
+ ret = -EINVAL;
+ goto err;
+ }
+#endif
+
pr_uuid(&name, c->sb.user_uuid.b);
strscpy(c->name, name.buf, sizeof(c->name));
printbuf_exit(&name);