summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bcachefs_revision2
-rw-r--r--Kbuild.include319
-rw-r--r--Makefile6
-rw-r--r--Makefile.compiler74
-rw-r--r--include/trace/events/bcachefs.h22
-rw-r--r--libbcachefs/acl.c21
-rw-r--r--libbcachefs/alloc_background.c2
-rw-r--r--libbcachefs/bkey_methods.c2
-rw-r--r--libbcachefs/btree_cache.c66
-rw-r--r--libbcachefs/btree_cache.h6
-rw-r--r--libbcachefs/btree_iter.c166
-rw-r--r--libbcachefs/btree_iter.h28
-rw-r--r--libbcachefs/btree_key_cache.c54
-rw-r--r--libbcachefs/btree_types.h7
-rw-r--r--libbcachefs/btree_update.h14
-rw-r--r--libbcachefs/btree_update_interior.c33
-rw-r--r--libbcachefs/btree_update_leaf.c127
-rw-r--r--libbcachefs/dirent.c18
-rw-r--r--libbcachefs/fs-common.c4
-rw-r--r--libbcachefs/fs-io.c21
-rw-r--r--libbcachefs/fs.c14
-rw-r--r--libbcachefs/fsck.c16
-rw-r--r--libbcachefs/inode.c2
-rw-r--r--libbcachefs/io.c23
-rw-r--r--libbcachefs/move.c9
-rw-r--r--libbcachefs/opts.h5
-rw-r--r--libbcachefs/quota.c2
-rw-r--r--libbcachefs/reflink.c9
-rw-r--r--libbcachefs/str_hash.h2
-rw-r--r--libbcachefs/xattr.c31
30 files changed, 442 insertions, 663 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 8d922cc5..f6620a21 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-0a9be96b5087967ac57007a131614ab6b4cadce4
+b964c6cba873eb5d2ebd0174876b664730e69a73
diff --git a/Kbuild.include b/Kbuild.include
deleted file mode 100644
index 509e0856..00000000
--- a/Kbuild.include
+++ /dev/null
@@ -1,319 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-####
-# kbuild: Generic definitions
-
-# Convenient variables
-comma := ,
-quote := "
-squote := '
-empty :=
-space := $(empty) $(empty)
-space_escape := _-_SPACE_-_
-pound := \#
-
-###
-# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
-dot-target = $(dir $@).$(notdir $@)
-
-###
-# The temporary file to save gcc -MMD generated dependencies must not
-# contain a comma
-depfile = $(subst $(comma),_,$(dot-target).d)
-
-###
-# filename of target with directory and extension stripped
-basetarget = $(basename $(notdir $@))
-
-###
-# real prerequisites without phony targets
-real-prereqs = $(filter-out $(PHONY), $^)
-
-###
-# Escape single quote for use in echo statements
-escsq = $(subst $(squote),'\$(squote)',$1)
-
-###
-# Quote a string to pass it to C files. foo => '"foo"'
-stringify = $(squote)$(quote)$1$(quote)$(squote)
-
-###
-# Easy method for doing a status message
- kecho := :
- quiet_kecho := echo
-silent_kecho := :
-kecho := $($(quiet)kecho)
-
-###
-# filechk is used to check if the content of a generated file is updated.
-# Sample usage:
-#
-# filechk_sample = echo $(KERNELRELEASE)
-# version.h: FORCE
-# $(call filechk,sample)
-#
-# The rule defined shall write to stdout the content of the new file.
-# The existing file will be compared with the new one.
-# - If no file exist it is created
-# - If the content differ the new file is used
-# - If they are equal no change, and no timestamp update
-define filechk
- $(Q)set -e; \
- mkdir -p $(dir $@); \
- trap "rm -f $(dot-target).tmp" EXIT; \
- { $(filechk_$(1)); } > $(dot-target).tmp; \
- if [ ! -r $@ ] || ! cmp -s $@ $(dot-target).tmp; then \
- $(kecho) ' UPD $@'; \
- mv -f $(dot-target).tmp $@; \
- fi
-endef
-
-######
-# gcc support functions
-# See documentation in Documentation/kbuild/makefiles.rst
-
-# cc-cross-prefix
-# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-)
-# Return first <prefix> where a <prefix>gcc is found in PATH.
-# If no gcc found in PATH with listed prefixes return nothing
-#
-# Note: '2>/dev/null' is here to force Make to invoke a shell. Otherwise, it
-# would try to directly execute the shell builtin 'command'. This workaround
-# should be kept for a long time since this issue was fixed only after the
-# GNU Make 4.2.1 release.
-cc-cross-prefix = $(firstword $(foreach c, $(1), \
- $(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
-
-# output directory for tests below
-TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
-
-# try-run
-# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
-# Exit code chooses option. "$$TMP" serves as a temporary file and is
-# automatically cleaned up.
-try-run = $(shell set -e; \
- TMP=$(TMPOUT)/tmp; \
- TMPO=$(TMPOUT)/tmp.o; \
- mkdir -p $(TMPOUT); \
- trap "rm -rf $(TMPOUT)" EXIT; \
- if ($(1)) >/dev/null 2>&1; \
- then echo "$(2)"; \
- else echo "$(3)"; \
- fi)
-
-# as-option
-# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
-
-as-option = $(call try-run,\
- $(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2))
-
-# as-instr
-# Usage: cflags-y += $(call as-instr,instr,option1,option2)
-
-as-instr = $(call try-run,\
- printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
-
-# __cc-option
-# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
-__cc-option = $(call try-run,\
- $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4))
-
-# cc-option
-# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
-
-cc-option = $(call __cc-option, $(CC),\
- $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2))
-
-# cc-option-yn
-# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
-cc-option-yn = $(call try-run,\
- $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
-
-# cc-disable-warning
-# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
-cc-disable-warning = $(call try-run,\
- $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
-
-# cc-ifversion
-# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
-cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4))
-
-# ld-option
-# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
-ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
-
-# ld-ifversion
-# Usage: $(call ld-ifversion, -ge, 22252, y)
-ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4))
-
-######
-
-###
-# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.build obj=
-# Usage:
-# $(Q)$(MAKE) $(build)=dir
-build := -f $(srctree)/scripts/Makefile.build obj
-
-###
-# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.dtbinst obj=
-# Usage:
-# $(Q)$(MAKE) $(dtbinst)=dir
-dtbinst := -f $(srctree)/scripts/Makefile.dtbinst obj
-
-###
-# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=
-# Usage:
-# $(Q)$(MAKE) $(clean)=dir
-clean := -f $(srctree)/scripts/Makefile.clean obj
-
-# echo command.
-# Short version is used, if $(quiet) equals `quiet_', otherwise full one.
-echo-cmd = $(if $($(quiet)cmd_$(1)),\
- echo ' $(call escsq,$($(quiet)cmd_$(1)))$(echo-why)';)
-
-# printing commands
-cmd = @set -e; $(echo-cmd) $(cmd_$(1))
-
-###
-# if_changed - execute command if any prerequisite is newer than
-# target, or command line has changed
-# if_changed_dep - as if_changed, but uses fixdep to reveal dependencies
-# including used config symbols
-# if_changed_rule - as if_changed but execute rule instead
-# See Documentation/kbuild/makefiles.rst for more info
-
-ifneq ($(KBUILD_NOCMDDEP),1)
-# Check if both commands are the same including their order. Result is empty
-# string if equal. User may override this check using make KBUILD_NOCMDDEP=1
-cmd-check = $(filter-out $(subst $(space),$(space_escape),$(strip $(cmd_$@))), \
- $(subst $(space),$(space_escape),$(strip $(cmd_$1))))
-else
-cmd-check = $(if $(strip $(cmd_$@)),,1)
-endif
-
-# Replace >$< with >$$< to preserve $ when reloading the .cmd file
-# (needed for make)
-# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file
-# (needed for make)
-# Replace >'< with >'\''< to be able to enclose the whole string in '...'
-# (needed for the shell)
-make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1)))))
-
-# Find any prerequisites that are newer than target or that do not exist.
-# (This is not true for now; $? should contain any non-existent prerequisites,
-# but it does not work as expected when .SECONDARY is present. This seems a bug
-# of GNU Make.)
-# PHONY targets skipped in both cases.
-newer-prereqs = $(filter-out $(PHONY),$?)
-
-# Execute command if command has changed or prerequisite(s) are updated.
-if_changed = $(if $(newer-prereqs)$(cmd-check), \
- $(cmd); \
- printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
-
-# Execute the command and also postprocess generated .d dependencies file.
-if_changed_dep = $(if $(newer-prereqs)$(cmd-check),$(cmd_and_fixdep),@:)
-
-cmd_and_fixdep = \
- $(cmd); \
- scripts/basic/fixdep $(depfile) $@ '$(make-cmd)' > $(dot-target).cmd;\
- rm -f $(depfile)
-
-# Usage: $(call if_changed_rule,foo)
-# Will check if $(cmd_foo) or any of the prerequisites changed,
-# and if so will execute $(rule_foo).
-if_changed_rule = $(if $(newer-prereqs)$(cmd-check),$(rule_$(1)),@:)
-
-###
-# why - tell why a target got built
-# enabled by make V=2
-# Output (listed in the order they are checked):
-# (1) - due to target is PHONY
-# (2) - due to target missing
-# (3) - due to: file1.h file2.h
-# (4) - due to command line change
-# (5) - due to missing .cmd file
-# (6) - due to target not in $(targets)
-# (1) PHONY targets are always build
-# (2) No target, so we better build it
-# (3) Prerequisite is newer than target
-# (4) The command line stored in the file named dir/.target.cmd
-# differed from actual command line. This happens when compiler
-# options changes
-# (5) No dir/.target.cmd file (used to store command line)
-# (6) No dir/.target.cmd file and target not listed in $(targets)
-# This is a good hint that there is a bug in the kbuild file
-ifeq ($(KBUILD_VERBOSE),2)
-why = \
- $(if $(filter $@, $(PHONY)),- due to target is PHONY, \
- $(if $(wildcard $@), \
- $(if $(newer-prereqs),- due to: $(newer-prereqs), \
- $(if $(cmd-check), \
- $(if $(cmd_$@),- due to command line change, \
- $(if $(filter $@, $(targets)), \
- - due to missing .cmd file, \
- - due to $(notdir $@) not in $$(targets) \
- ) \
- ) \
- ) \
- ), \
- - due to target missing \
- ) \
- )
-
-echo-why = $(call escsq, $(strip $(why)))
-endif
-
-###############################################################################
-#
-# When a Kconfig string contains a filename, it is suitable for
-# passing to shell commands. It is surrounded by double-quotes, and
-# any double-quotes or backslashes within it are escaped by
-# backslashes.
-#
-# This is no use for dependencies or $(wildcard). We need to strip the
-# surrounding quotes and the escaping from quotes and backslashes, and
-# we *do* need to escape any spaces in the string. So, for example:
-#
-# Usage: $(eval $(call config_filename,FOO))
-#
-# Defines FOO_FILENAME based on the contents of the CONFIG_FOO option,
-# transformed as described above to be suitable for use within the
-# makefile.
-#
-# Also, if the filename is a relative filename and exists in the source
-# tree but not the build tree, define FOO_SRCPREFIX as $(srctree)/ to
-# be prefixed to *both* command invocation and dependencies.
-#
-# Note: We also print the filenames in the quiet_cmd_foo text, and
-# perhaps ought to have a version specially escaped for that purpose.
-# But it's only cosmetic, and $(patsubst "%",%,$(CONFIG_FOO)) is good
-# enough. It'll strip the quotes in the common case where there's no
-# space and it's a simple filename, and it'll retain the quotes when
-# there's a space. There are some esoteric cases in which it'll print
-# the wrong thing, but we don't really care. The actual dependencies
-# and commands *do* get it right, with various combinations of single
-# and double quotes, backslashes and spaces in the filenames.
-#
-###############################################################################
-#
-define config_filename
-ifneq ($$(CONFIG_$(1)),"")
-$(1)_FILENAME := $$(subst \\,\,$$(subst \$$(quote),$$(quote),$$(subst $$(space_escape),\$$(space),$$(patsubst "%",%,$$(subst $$(space),$$(space_escape),$$(CONFIG_$(1)))))))
-ifneq ($$(patsubst /%,%,$$(firstword $$($(1)_FILENAME))),$$(firstword $$($(1)_FILENAME)))
-else
-ifeq ($$(wildcard $$($(1)_FILENAME)),)
-ifneq ($$(wildcard $$(srctree)/$$($(1)_FILENAME)),)
-$(1)_SRCPREFIX := $(srctree)/
-endif
-endif
-endif
-endif
-endef
-#
-###############################################################################
-
-# delete partially updated (i.e. corrupted) files on error
-.DELETE_ON_ERROR:
-
-# do not delete intermediate files automatically
-.SECONDARY:
diff --git a/Makefile b/Makefile
index 596357c7..23e05085 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ LDFLAGS+=$(CFLAGS) $(EXTRA_LDFLAGS)
VERSION?=$(shell git describe --dirty=+ 2>/dev/null || echo v0.1-nogit)
-include Kbuild.include
+include Makefile.compiler
CFLAGS+=$(call cc-disable-warning, unused-but-set-variable)
CFLAGS+=$(call cc-disable-warning, stringop-overflow)
@@ -178,8 +178,8 @@ update-bcachefs-sources:
git add include/linux/list_nulls.h
cp $(LINUX_DIR)/include/linux/poison.h include/linux/
git add include/linux/poison.h
- cp $(LINUX_DIR)/scripts/Kbuild.include ./
- git add Kbuild.include
+ cp $(LINUX_DIR)/scripts/Makefile.compiler ./
+ git add Makefile.compiler
$(RM) libbcachefs/*.mod.c
git -C $(LINUX_DIR) rev-parse HEAD | tee .bcachefs_revision
git add .bcachefs_revision
diff --git a/Makefile.compiler b/Makefile.compiler
new file mode 100644
index 00000000..86ecd2ac
--- /dev/null
+++ b/Makefile.compiler
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+# cc-cross-prefix
+# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-)
+# Return first <prefix> where a <prefix>gcc is found in PATH.
+# If no gcc found in PATH with listed prefixes return nothing
+#
+# Note: '2>/dev/null' is here to force Make to invoke a shell. Otherwise, it
+# would try to directly execute the shell builtin 'command'. This workaround
+# should be kept for a long time since this issue was fixed only after the
+# GNU Make 4.2.1 release.
+cc-cross-prefix = $(firstword $(foreach c, $(1), \
+ $(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c))))
+
+# output directory for tests below
+TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$
+
+# try-run
+# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
+# Exit code chooses option. "$$TMP" serves as a temporary file and is
+# automatically cleaned up.
+try-run = $(shell set -e; \
+ TMP=$(TMPOUT)/tmp; \
+ mkdir -p $(TMPOUT); \
+ trap "rm -rf $(TMPOUT)" EXIT; \
+ if ($(1)) >/dev/null 2>&1; \
+ then echo "$(2)"; \
+ else echo "$(3)"; \
+ fi)
+
+# as-option
+# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
+
+as-option = $(call try-run,\
+ $(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2))
+
+# as-instr
+# Usage: cflags-y += $(call as-instr,instr,option1,option2)
+
+as-instr = $(call try-run,\
+ printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
+
+# __cc-option
+# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586)
+__cc-option = $(call try-run,\
+ $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4))
+
+# cc-option
+# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+
+cc-option = $(call __cc-option, $(CC),\
+ $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2))
+
+# cc-option-yn
+# Usage: flag := $(call cc-option-yn,-march=winchip-c6)
+cc-option-yn = $(call try-run,\
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
+
+# cc-disable-warning
+# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
+cc-disable-warning = $(call try-run,\
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+
+# cc-ifversion
+# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
+cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4))
+
+# ld-option
+# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
+ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
+
+# ld-ifversion
+# Usage: $(call ld-ifversion, -ge, 22252, y)
+ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4))
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
index 4c0d9b76..a11bb5f7 100644
--- a/include/trace/events/bcachefs.h
+++ b/include/trace/events/bcachefs.h
@@ -775,14 +775,16 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_traverse,
TRACE_EVENT(iter_traverse,
TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip,
+ bool key_cache,
enum btree_id btree_id,
struct bpos *pos,
int ret),
- TP_ARGS(trans_ip, caller_ip, btree_id, pos, ret),
+ TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos, ret),
TP_STRUCT__entry(
__field(unsigned long, trans_ip )
__field(unsigned long, caller_ip )
+ __field(u8, key_cache )
__field(u8, btree_id )
__field(u64, pos_inode )
__field(u64, pos_offset )
@@ -793,6 +795,7 @@ TRACE_EVENT(iter_traverse,
TP_fast_assign(
__entry->trans_ip = trans_ip;
__entry->caller_ip = caller_ip;
+ __entry->key_cache = key_cache;
__entry->btree_id = btree_id;
__entry->pos_inode = pos->inode;
__entry->pos_offset = pos->offset;
@@ -800,9 +803,10 @@ TRACE_EVENT(iter_traverse,
__entry->ret = ret;
),
- TP_printk("%ps %pS pos %u %llu:%llu:%u ret %i",
+ TP_printk("%ps %pS key cache %u btree %u %llu:%llu:%u ret %i",
(void *) __entry->trans_ip,
(void *) __entry->caller_ip,
+ __entry->key_cache,
__entry->btree_id,
__entry->pos_inode,
__entry->pos_offset,
@@ -953,15 +957,17 @@ TRACE_EVENT(trans_restart_mem_realloced,
DECLARE_EVENT_CLASS(node_lock_fail,
TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip,
+ bool key_cache,
enum btree_id btree_id,
struct bpos *pos,
unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
- TP_ARGS(trans_ip, caller_ip, btree_id, pos,
+ TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos,
level, iter_seq, node, node_seq),
TP_STRUCT__entry(
__field(unsigned long, trans_ip )
__field(unsigned long, caller_ip )
+ __field(u8, key_cache )
__field(u8, btree_id )
__field(u64, pos_inode )
__field(u64, pos_offset )
@@ -975,6 +981,7 @@ DECLARE_EVENT_CLASS(node_lock_fail,
TP_fast_assign(
__entry->trans_ip = trans_ip;
__entry->caller_ip = caller_ip;
+ __entry->key_cache = key_cache;
__entry->btree_id = btree_id;
__entry->pos_inode = pos->inode;
__entry->pos_offset = pos->offset;
@@ -985,9 +992,10 @@ DECLARE_EVENT_CLASS(node_lock_fail,
__entry->node_seq = node_seq;
),
- TP_printk("%ps %pS btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u",
+ TP_printk("%ps %pS key cache %u btree %u pos %llu:%llu:%u level %u iter seq %u node %u node seq %u",
(void *) __entry->trans_ip,
(void *) __entry->caller_ip,
+ __entry->key_cache,
__entry->btree_id,
__entry->pos_inode,
__entry->pos_offset,
@@ -999,20 +1007,22 @@ DECLARE_EVENT_CLASS(node_lock_fail,
DEFINE_EVENT(node_lock_fail, node_upgrade_fail,
TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip,
+ bool key_cache,
enum btree_id btree_id,
struct bpos *pos,
unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
- TP_ARGS(trans_ip, caller_ip, btree_id, pos,
+ TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos,
level, iter_seq, node, node_seq)
);
DEFINE_EVENT(node_lock_fail, node_relock_fail,
TP_PROTO(unsigned long trans_ip,
unsigned long caller_ip,
+ bool key_cache,
enum btree_id btree_id,
struct bpos *pos,
unsigned level, u32 iter_seq, unsigned node, u32 node_seq),
- TP_ARGS(trans_ip, caller_ip, btree_id, pos,
+ TP_ARGS(trans_ip, caller_ip, key_cache, btree_id, pos,
level, iter_seq, node, node_seq)
);
diff --git a/libbcachefs/acl.c b/libbcachefs/acl.c
index 5408a922..eb907e5d 100644
--- a/libbcachefs/acl.c
+++ b/libbcachefs/acl.c
@@ -221,6 +221,8 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type)
struct btree_iter *iter;
struct bkey_s_c_xattr xattr;
struct posix_acl *acl = NULL;
+ struct bkey_s_c k;
+ int ret;
bch2_trans_init(&trans, c, 0, 0);
retry:
@@ -239,7 +241,14 @@ retry:
goto out;
}
- xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret) {
+ acl = ERR_PTR(ret);
+ goto out;
+ }
+
+ xattr = bkey_s_c_to_xattr(k);
acl = bch2_acl_from_disk(xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
@@ -325,8 +334,7 @@ retry:
ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL,
- &inode->ei_journal_seq,
- BTREE_INSERT_NOUNLOCK);
+ &inode->ei_journal_seq, 0);
btree_err:
bch2_trans_iter_put(&trans, inode_iter);
@@ -356,6 +364,7 @@ int bch2_acl_chmod(struct btree_trans *trans,
struct bkey_s_c_xattr xattr;
struct bkey_i_xattr *new;
struct posix_acl *acl;
+ struct bkey_s_c k;
int ret;
iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc,
@@ -366,7 +375,11 @@ int bch2_acl_chmod(struct btree_trans *trans,
if (ret)
return ret == -ENOENT ? 0 : ret;
- xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
+ k = bch2_btree_iter_peek_slot(iter);
+ xattr = bkey_s_c_to_xattr(k);
+ if (ret)
+ goto err;
+
acl = bch2_acl_from_disk(xattr_val(xattr.v),
le16_to_cpu(xattr.v->x_val_len));
ret = PTR_ERR_OR_ZERO(acl);
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index fff85c17..886861a0 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -374,7 +374,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags)
percpu_ref_put(&ca->ref);
goto err;
}
- bch2_btree_iter_next_slot(iter);
+ bch2_btree_iter_advance(iter);
}
}
err:
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c
index ff9d770a..f8adbf43 100644
--- a/libbcachefs/bkey_methods.c
+++ b/libbcachefs/bkey_methods.c
@@ -269,7 +269,7 @@ void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c,
{
bch2_bkey_to_text(out, k.k);
- if (k.k) {
+ if (bkey_val_bytes(k.k)) {
pr_buf(out, ": ");
bch2_val_to_text(out, c, k);
}
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index ed448276..cd0c5009 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -648,8 +648,10 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
* Parent node must be locked, else we could read in a btree node that's
* been freed:
*/
- if (iter && !bch2_btree_node_relock(iter, level + 1))
+ if (iter && !bch2_btree_node_relock(iter, level + 1)) {
+ btree_trans_restart(iter->trans);
return ERR_PTR(-EINTR);
+ }
b = bch2_btree_node_mem_alloc(c);
if (IS_ERR(b))
@@ -686,18 +688,17 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c,
if (!sync)
return NULL;
- /*
- * XXX: this will probably always fail because btree_iter_relock()
- * currently fails for iterators that aren't pointed at a valid btree
- * node
- */
if (iter &&
(!bch2_trans_relock(iter->trans) ||
- !bch2_btree_iter_relock(iter, _THIS_IP_)))
+ !bch2_btree_iter_relock_intent(iter))) {
+ BUG_ON(!iter->trans->restarted);
return ERR_PTR(-EINTR);
+ }
- if (!six_relock_type(&b->c.lock, lock_type, seq))
+ if (!six_relock_type(&b->c.lock, lock_type, seq)) {
+ btree_trans_restart(iter->trans);
return ERR_PTR(-EINTR);
+ }
return b;
}
@@ -718,6 +719,7 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
return;
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&b->key));
+ bch2_bpos_to_text(&PBUF(buf2), b->data->min_key);
bch2_bpos_to_text(&PBUF(buf3), b->data->max_key);
bch2_fs_inconsistent(c, "btree node header doesn't match ptr\n"
@@ -752,20 +754,23 @@ static inline void btree_check_header(struct bch_fs *c, struct btree *b)
* The btree node will have either a read or a write lock held, depending on
* the @write parameter.
*/
-struct btree *bch2_btree_node_get(struct bch_fs *c, struct btree_iter *iter,
+struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_iter *iter,
const struct bkey_i *k, unsigned level,
enum six_lock_type lock_type,
unsigned long trace_ip)
{
+ struct bch_fs *c = trans->c;
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
struct bset_tree *t;
EBUG_ON(level >= BTREE_MAX_DEPTH);
- b = btree_node_mem_ptr(k);
- if (b)
- goto lock_node;
+ if (c->opts.btree_node_mem_ptr_optimization) {
+ b = btree_node_mem_ptr(k);
+ if (b)
+ goto lock_node;
+ }
retry:
b = btree_cache_find(bc, k);
if (unlikely(!b)) {
@@ -818,7 +823,7 @@ lock_node:
if (!btree_node_lock(b, k->k.p, level, iter, lock_type,
lock_node_check_fn, (void *) k, trace_ip)) {
- if (b->hash_val != btree_ptr_hash_val(k))
+ if (!trans->restarted)
goto retry;
return ERR_PTR(-EINTR);
}
@@ -830,10 +835,11 @@ lock_node:
if (bch2_btree_node_relock(iter, level + 1))
goto retry;
- trace_trans_restart_btree_node_reused(iter->trans->ip,
+ trace_trans_restart_btree_node_reused(trans->ip,
trace_ip,
iter->btree_id,
&iter->real_pos);
+ btree_trans_restart(trans);
return ERR_PTR(-EINTR);
}
}
@@ -842,19 +848,20 @@ lock_node:
u32 seq = b->c.lock.state.seq;
six_unlock_type(&b->c.lock, lock_type);
- bch2_trans_unlock(iter->trans);
+ bch2_trans_unlock(trans);
bch2_btree_node_wait_on_read(b);
/*
- * XXX: check if this always fails - btree_iter_relock()
- * currently fails for iterators that aren't pointed at a valid
- * btree node
+ * should_be_locked is not set on this iterator yet, so we need
+ * to relock it specifically:
*/
if (iter &&
- (!bch2_trans_relock(iter->trans) ||
- !bch2_btree_iter_relock(iter, _THIS_IP_)))
+ (!bch2_trans_relock(trans) ||
+ !bch2_btree_iter_relock_intent(iter))) {
+ BUG_ON(!trans->restarted);
return ERR_PTR(-EINTR);
+ }
if (!six_relock_type(&b->c.lock, lock_type, seq))
goto retry;
@@ -899,9 +906,11 @@ struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
EBUG_ON(level >= BTREE_MAX_DEPTH);
- b = btree_node_mem_ptr(k);
- if (b)
- goto lock_node;
+ if (c->opts.btree_node_mem_ptr_optimization) {
+ b = btree_node_mem_ptr(k);
+ if (b)
+ goto lock_node;
+ }
retry:
b = btree_cache_find(bc, k);
if (unlikely(!b)) {
@@ -966,9 +975,9 @@ out:
return b;
}
-void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
- const struct bkey_i *k,
- enum btree_id btree_id, unsigned level)
+int bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
+ const struct bkey_i *k,
+ enum btree_id btree_id, unsigned level)
{
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
@@ -978,9 +987,10 @@ void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
b = btree_cache_find(bc, k);
if (b)
- return;
+ return 0;
- bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false);
+ b = bch2_btree_node_fill(c, iter, k, btree_id, level, SIX_LOCK_read, false);
+ return PTR_ERR_OR_ZERO(b);
}
void bch2_btree_node_evict(struct bch_fs *c, const struct bkey_i *k)
diff --git a/libbcachefs/btree_cache.h b/libbcachefs/btree_cache.h
index fd5026c9..5032293e 100644
--- a/libbcachefs/btree_cache.h
+++ b/libbcachefs/btree_cache.h
@@ -22,15 +22,15 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *, struct closure *);
struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *);
struct btree *bch2_btree_node_mem_alloc(struct bch_fs *);
-struct btree *bch2_btree_node_get(struct bch_fs *, struct btree_iter *,
+struct btree *bch2_btree_node_get(struct btree_trans *, struct btree_iter *,
const struct bkey_i *, unsigned,
enum six_lock_type, unsigned long);
struct btree *bch2_btree_node_get_noiter(struct bch_fs *, const struct bkey_i *,
enum btree_id, unsigned, bool);
-void bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
- const struct bkey_i *, enum btree_id, unsigned);
+int bch2_btree_node_prefetch(struct bch_fs *, struct btree_iter *,
+ const struct bkey_i *, enum btree_id, unsigned);
void bch2_btree_node_evict(struct bch_fs *, const struct bkey_i *);
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index c7473981..3af00e24 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -189,6 +189,7 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade,
(upgrade
? trace_node_upgrade_fail
: trace_node_relock_fail)(iter->trans->ip, trace_ip,
+ btree_iter_type(iter) == BTREE_ITER_CACHED,
iter->btree_id, &iter->real_pos,
l, iter->l[l].lock_seq,
is_btree_node(iter, l)
@@ -197,7 +198,6 @@ static inline bool btree_iter_get_locks(struct btree_iter *iter, bool upgrade,
is_btree_node(iter, l)
? iter->l[l].b->c.lock.state.seq
: 0);
-
fail_idx = l;
btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
}
@@ -309,7 +309,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
}
if (unlikely(deadlock_iter)) {
- trace_trans_restart_would_deadlock(iter->trans->ip, ip,
+ trace_trans_restart_would_deadlock(trans->ip, ip,
trans->in_traverse_all, reason,
deadlock_iter->btree_id,
btree_iter_type(deadlock_iter),
@@ -317,6 +317,7 @@ bool __bch2_btree_node_lock(struct btree *b, struct bpos pos,
iter->btree_id,
btree_iter_type(iter),
&pos);
+ btree_trans_restart(trans);
return false;
}
@@ -375,10 +376,44 @@ void bch2_btree_trans_verify_locks(struct btree_trans *trans)
static inline void bch2_btree_iter_verify_locks(struct btree_iter *iter) {}
#endif
+/*
+ * Only for btree_cache.c - only relocks intent locks
+ */
+bool bch2_btree_iter_relock_intent(struct btree_iter *iter)
+{
+ unsigned l;
+
+ for (l = iter->level;
+ l < iter->locks_want && btree_iter_node(iter, l);
+ l++) {
+ if (!bch2_btree_node_relock(iter, l)) {
+ trace_node_relock_fail(iter->trans->ip, _RET_IP_,
+ btree_iter_type(iter) == BTREE_ITER_CACHED,
+ iter->btree_id, &iter->real_pos,
+ l, iter->l[l].lock_seq,
+ is_btree_node(iter, l)
+ ? 0
+ : (unsigned long) iter->l[l].b,
+ is_btree_node(iter, l)
+ ? iter->l[l].b->c.lock.state.seq
+ : 0);
+ btree_iter_set_dirty(iter, BTREE_ITER_NEED_TRAVERSE);
+ btree_trans_restart(iter->trans);
+ return false;
+ }
+ }
+
+ return true;
+}
+
__flatten
bool bch2_btree_iter_relock(struct btree_iter *iter, unsigned long trace_ip)
{
- return btree_iter_get_locks(iter, false, trace_ip);
+ bool ret = btree_iter_get_locks(iter, false, trace_ip);
+
+ if (!ret)
+ btree_trans_restart(iter->trans);
+ return ret;
}
bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
@@ -421,6 +456,8 @@ bool __bch2_btree_iter_upgrade(struct btree_iter *iter,
btree_iter_get_locks(linked, true, _THIS_IP_);
}
+ if (iter->should_be_locked)
+ btree_trans_restart(iter->trans);
return false;
}
@@ -459,8 +496,7 @@ void bch2_trans_downgrade(struct btree_trans *trans)
/* Btree transaction locking: */
-static inline bool btree_iter_should_be_locked(struct btree_trans *trans,
- struct btree_iter *iter)
+static inline bool btree_iter_should_be_locked(struct btree_iter *iter)
{
return (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) ||
iter->should_be_locked;
@@ -470,11 +506,15 @@ bool bch2_trans_relock(struct btree_trans *trans)
{
struct btree_iter *iter;
+ if (unlikely(trans->restarted))
+ return false;
+
trans_for_each_iter(trans, iter)
- if (!bch2_btree_iter_relock(iter, _RET_IP_) &&
- btree_iter_should_be_locked(trans, iter)) {
+ if (btree_iter_should_be_locked(iter) &&
+ !bch2_btree_iter_relock(iter, _RET_IP_)) {
trace_trans_restart_relock(trans->ip, _RET_IP_,
iter->btree_id, &iter->real_pos);
+ BUG_ON(!trans->restarted);
return false;
}
return true;
@@ -1055,11 +1095,12 @@ static int lock_root_check_fn(struct six_lock *lock, void *p)
return b == *rootp ? 0 : -1;
}
-static inline int btree_iter_lock_root(struct btree_iter *iter,
+static inline int btree_iter_lock_root(struct btree_trans *trans,
+ struct btree_iter *iter,
unsigned depth_want,
unsigned long trace_ip)
{
- struct bch_fs *c = iter->trans->c;
+ struct bch_fs *c = trans->c;
struct btree *b, **rootp = &c->btree_roots[iter->btree_id].b;
enum six_lock_type lock_type;
unsigned i;
@@ -1087,8 +1128,11 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
if (unlikely(!btree_node_lock(b, SPOS_MAX, iter->level,
iter, lock_type,
lock_root_check_fn, rootp,
- trace_ip)))
- return -EINTR;
+ trace_ip))) {
+ if (trans->restarted)
+ return -EINTR;
+ continue;
+ }
if (likely(b == READ_ONCE(*rootp) &&
b->c.level == iter->level &&
@@ -1109,7 +1153,7 @@ static inline int btree_iter_lock_root(struct btree_iter *iter,
}
noinline
-static void btree_iter_prefetch(struct btree_iter *iter)
+static int btree_iter_prefetch(struct btree_iter *iter)
{
struct bch_fs *c = iter->trans->c;
struct btree_iter_level *l = &iter->l[iter->level];
@@ -1120,10 +1164,11 @@ static void btree_iter_prefetch(struct btree_iter *iter)
? (iter->level > 1 ? 0 : 2)
: (iter->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(iter, iter->level);
+ int ret = 0;
bch2_bkey_buf_init(&tmp);
- while (nr) {
+ while (nr && !ret) {
if (!bch2_btree_node_relock(iter, iter->level))
break;
@@ -1133,14 +1178,15 @@ static void btree_iter_prefetch(struct btree_iter *iter)
break;
bch2_bkey_buf_unpack(&tmp, c, l->b, k);
- bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
- iter->level - 1);
+ ret = bch2_btree_node_prefetch(c, iter, tmp.k, iter->btree_id,
+ iter->level - 1);
}
if (!was_locked)
btree_node_unlock(iter, iter->level);
bch2_bkey_buf_exit(&tmp, c);
+ return ret;
}
static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
@@ -1164,10 +1210,11 @@ static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
btree_node_unlock(iter, plevel);
}
-static __always_inline int btree_iter_down(struct btree_iter *iter,
+static __always_inline int btree_iter_down(struct btree_trans *trans,
+ struct btree_iter *iter,
unsigned long trace_ip)
{
- struct bch_fs *c = iter->trans->c;
+ struct bch_fs *c = trans->c;
struct btree_iter_level *l = &iter->l[iter->level];
struct btree *b;
unsigned level = iter->level - 1;
@@ -1181,7 +1228,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
bch2_bkey_buf_unpack(&tmp, c, l->b,
bch2_btree_node_iter_peek(&l->iter, l->b));
- b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip);
+ b = bch2_btree_node_get(trans, iter, tmp.k, level, lock_type, trace_ip);
ret = PTR_ERR_OR_ZERO(b);
if (unlikely(ret))
goto err;
@@ -1194,7 +1241,7 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
btree_node_mem_ptr_set(iter, level + 1, b);
if (iter->flags & BTREE_ITER_PREFETCH)
- btree_iter_prefetch(iter);
+ ret = btree_iter_prefetch(iter);
if (btree_node_read_locked(iter, level + 1))
btree_node_unlock(iter, level + 1);
@@ -1215,25 +1262,19 @@ static int __btree_iter_traverse_all(struct btree_trans *trans, int ret,
struct btree_iter *iter;
u8 sorted[BTREE_ITER_MAX];
int i, nr_sorted = 0;
- bool relock_fail;
if (trans->in_traverse_all)
return -EINTR;
trans->in_traverse_all = true;
retry_all:
+ trans->restarted = false;
+
nr_sorted = 0;
- relock_fail = false;
trans_for_each_iter(trans, iter) {
- if (!bch2_btree_iter_relock(iter, _THIS_IP_))
- relock_fail = true;
sorted[nr_sorted++] = iter->idx;
- }
-
- if (!relock_fail) {
- trans->in_traverse_all = false;
- return 0;
+ iter->should_be_locked = false;
}
#define btree_iter_cmp_by_idx(_l, _r) \
@@ -1289,15 +1330,6 @@ retry_all:
if (ret)
goto retry_all;
}
-
- if (hweight64(trans->iters_live) > 1)
- ret = -EINTR;
- else
- trans_for_each_iter(trans, iter)
- if (iter->flags & BTREE_ITER_KEEP_UNTIL_COMMIT) {
- ret = -EINTR;
- break;
- }
out:
bch2_btree_cache_cannibalize_unlock(c);
@@ -1307,7 +1339,7 @@ out:
return ret;
}
-int bch2_btree_iter_traverse_all(struct btree_trans *trans)
+static int bch2_btree_iter_traverse_all(struct btree_trans *trans)
{
return __btree_iter_traverse_all(trans, 0, _RET_IP_);
}
@@ -1353,9 +1385,19 @@ static inline unsigned btree_iter_up_until_good_node(struct btree_iter *iter,
static int btree_iter_traverse_one(struct btree_iter *iter,
unsigned long trace_ip)
{
+ struct btree_trans *trans = iter->trans;
unsigned l, depth_want = iter->level;
int ret = 0;
+ /*
+ * Ensure we obey iter->should_be_locked: if it's set, we can't unlock
+ * and re-traverse the iterator without a transaction restart:
+ */
+ if (iter->should_be_locked) {
+ ret = bch2_btree_iter_relock(iter, trace_ip) ? 0 : -EINTR;
+ goto out;
+ }
+
if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
ret = bch2_btree_iter_traverse_cached(iter);
goto out;
@@ -1385,8 +1427,8 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
*/
while (iter->level > depth_want) {
ret = btree_iter_node(iter, iter->level)
- ? btree_iter_down(iter, trace_ip)
- : btree_iter_lock_root(iter, depth_want, trace_ip);
+ ? btree_iter_down(trans, iter, trace_ip)
+ : btree_iter_lock_root(trans, iter, depth_want, trace_ip);
if (unlikely(ret)) {
if (ret == 1) {
/*
@@ -1414,7 +1456,9 @@ static int btree_iter_traverse_one(struct btree_iter *iter,
iter->uptodate = BTREE_ITER_NEED_PEEK;
out:
- trace_iter_traverse(iter->trans->ip, trace_ip,
+ BUG_ON((ret == -EINTR) != !!trans->restarted);
+ trace_iter_traverse(trans->ip, trace_ip,
+ btree_iter_type(iter) == BTREE_ITER_CACHED,
iter->btree_id, &iter->real_pos, ret);
bch2_btree_iter_verify(iter);
return ret;
@@ -1427,8 +1471,10 @@ static int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter)
ret = bch2_trans_cond_resched(trans) ?:
btree_iter_traverse_one(iter, _RET_IP_);
- if (unlikely(ret))
+ if (unlikely(ret) && hweight64(trans->iters_linked) == 1) {
ret = __btree_iter_traverse_all(trans, ret, _RET_IP_);
+ BUG_ON(ret == -EINTR);
+ }
return ret;
}
@@ -1559,6 +1605,8 @@ static void btree_iter_set_search_pos(struct btree_iter *iter, struct bpos new_p
int cmp = bpos_cmp(new_pos, iter->real_pos);
unsigned l = iter->level;
+ EBUG_ON(iter->trans->restarted);
+
if (!cmp)
goto out;
@@ -2118,6 +2166,8 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
struct btree_iter *iter, *best = NULL;
struct bpos real_pos, pos_min = POS_MIN;
+ EBUG_ON(trans->restarted);
+
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) &&
!(flags & BTREE_ITER_NOT_EXTENTS) &&
@@ -2282,6 +2332,7 @@ void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
if (old_bytes) {
trace_trans_restart_mem_realloced(trans->ip, _RET_IP_, new_bytes);
+ btree_trans_restart(trans);
return ERR_PTR(-EINTR);
}
}
@@ -2307,33 +2358,26 @@ inline void bch2_trans_unlink_iters(struct btree_trans *trans)
}
/**
- * bch2_trans_reset() - reset a transaction after a interrupted attempt
+ * bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset
- * @flags: transaction reset flags.
*
* While iterating over nodes or updating nodes a attempt to lock a btree
* node may return EINTR when the trylock fails. When this occurs
- * bch2_trans_reset() or bch2_trans_begin() should be called and the
- * transaction retried.
- *
- * Transaction reset flags include:
- *
- * - TRANS_RESET_NOUNLOCK - Do not attempt to unlock and reschedule the
- * transaction.
- * - TRANS_RESET_NOTRAVERSE - Do not traverse all linked iters.
+ * bch2_trans_begin() should be called and the transaction retried.
*/
-void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
+void bch2_trans_begin(struct btree_trans *trans)
{
struct btree_iter *iter;
- trans_for_each_iter(trans, iter) {
+ trans_for_each_iter(trans, iter)
iter->flags &= ~(BTREE_ITER_KEEP_UNTIL_COMMIT|
BTREE_ITER_SET_POS_AFTER_COMMIT);
- iter->should_be_locked = false;
- }
+ /*
+ * XXX: we shouldn't be doing this if the transaction was restarted, but
+ * currently we still overflow transaction iterators if we do that
+ * */
bch2_trans_unlink_iters(trans);
-
trans->iters_touched &= trans->iters_live;
trans->extra_journal_res = 0;
@@ -2351,12 +2395,12 @@ void bch2_trans_reset(struct btree_trans *trans, unsigned flags)
(void *) &trans->fs_usage_deltas->memset_start);
}
- if (!(flags & TRANS_RESET_NOUNLOCK))
- bch2_trans_cond_resched(trans);
+ bch2_trans_cond_resched(trans);
- if (!(flags & TRANS_RESET_NOTRAVERSE) &&
- trans->iters_linked)
+ if (trans->restarted)
bch2_btree_iter_traverse_all(trans);
+
+ trans->restarted = false;
}
static void bch2_trans_alloc_iters(struct btree_trans *trans, struct bch_fs *c)
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 7385cca4..aeabc07d 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -111,11 +111,20 @@ void bch2_btree_node_iter_fix(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_packed *,
unsigned, unsigned);
+bool bch2_btree_iter_relock_intent(struct btree_iter *);
bool bch2_btree_iter_relock(struct btree_iter *, unsigned long);
bool bch2_trans_relock(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
+__always_inline
+static inline int btree_trans_restart(struct btree_trans *trans)
+{
+ trans->restarted = true;
+ bch2_trans_unlock(trans);
+ return -EINTR;
+}
+
bool __bch2_btree_iter_upgrade(struct btree_iter *, unsigned);
static inline bool bch2_btree_iter_upgrade(struct btree_iter *iter,
@@ -147,8 +156,6 @@ void bch2_btree_iter_reinit_node(struct btree_iter *, struct btree *);
int __must_check bch2_btree_iter_traverse(struct btree_iter *);
-int bch2_btree_iter_traverse_all(struct btree_trans *);
-
struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *);
@@ -316,22 +323,7 @@ static inline void set_btree_iter_dontneed(struct btree_trans *trans, struct btr
trans->iters_touched &= ~(1ULL << iter->idx);
}
-#define TRANS_RESET_NOTRAVERSE (1 << 0)
-#define TRANS_RESET_NOUNLOCK (1 << 1)
-
-void bch2_trans_reset(struct btree_trans *, unsigned);
-
-/**
- * bch2_trans_begin() - ensure lock consistency of transaction on retry
- * @trans: transaction to prepare
- *
- * Ensure lock ordering is correct before potentially retrying a transaction
- * after a failed trylock.
- */
-static inline void bch2_trans_begin(struct btree_trans *trans)
-{
- return bch2_trans_reset(trans, 0);
-}
+void bch2_trans_begin(struct btree_trans *);
void *bch2_trans_kmalloc(struct btree_trans *, size_t);
void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index 7f47ef33..e327ef39 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -214,7 +214,7 @@ static int btree_key_cache_fill(struct btree_trans *trans,
if (!bch2_btree_node_relock(ck_iter, 0)) {
trace_transaction_restart_ip(trans->ip, _THIS_IP_);
- ret = -EINTR;
+ ret = btree_trans_restart(trans);
goto err;
}
@@ -233,6 +233,10 @@ static int btree_key_cache_fill(struct btree_trans *trans,
}
}
+ /*
+ * XXX: not allowed to be holding read locks when we take a write lock,
+ * currently
+ */
bch2_btree_node_lock_write(ck_iter->l[0].b, ck_iter);
if (new_k) {
kfree(ck->k);
@@ -299,10 +303,8 @@ retry:
if (!btree_node_lock((void *) ck, iter->pos, 0, iter, lock_want,
bkey_cached_check_fn, iter, _THIS_IP_)) {
- if (ck->key.btree_id != iter->btree_id ||
- bpos_cmp(ck->key.pos, iter->pos)) {
+ if (!trans->restarted)
goto retry;
- }
trace_transaction_restart_ip(trans->ip, _THIS_IP_);
ret = -EINTR;
@@ -322,10 +324,10 @@ retry:
iter->l[0].b = (void *) ck;
fill:
if (!ck->valid && !(iter->flags & BTREE_ITER_CACHED_NOFILL)) {
- if (!btree_node_intent_locked(iter, 0))
- bch2_btree_iter_upgrade(iter, 1);
- if (!btree_node_intent_locked(iter, 0)) {
+ if (!iter->locks_want &&
+ !!__bch2_btree_iter_upgrade(iter, 1)) {
trace_transaction_restart_ip(trans->ip, _THIS_IP_);
+ BUG_ON(!trans->restarted);
ret = -EINTR;
goto err;
}
@@ -340,13 +342,14 @@ fill:
iter->uptodate = BTREE_ITER_NEED_PEEK;
- if (!(iter->flags & BTREE_ITER_INTENT))
- bch2_btree_iter_downgrade(iter);
- else if (!iter->locks_want) {
- if (!__bch2_btree_iter_upgrade(iter, 1))
- ret = -EINTR;
+ if ((iter->flags & BTREE_ITER_INTENT) &&
+ !bch2_btree_iter_upgrade(iter, 1)) {
+ BUG_ON(!trans->restarted);
+ ret = -EINTR;
}
+ BUG_ON(!ret && !btree_node_locked(iter, 0));
+
return ret;
err:
if (ret != -EINTR) {
@@ -377,10 +380,9 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
BTREE_ITER_CACHED_NOFILL|
BTREE_ITER_CACHED_NOCREATE|
BTREE_ITER_INTENT);
-retry:
ret = bch2_btree_iter_traverse(c_iter);
if (ret)
- goto err;
+ goto out;
ck = (void *) c_iter->l[0].b;
if (!ck ||
@@ -399,9 +401,10 @@ retry:
* to be using alloc reserves:
* */
ret = bch2_btree_iter_traverse(b_iter) ?:
- bch2_trans_update(trans, b_iter, ck->k, BTREE_TRIGGER_NORUN) ?:
+ bch2_trans_update(trans, b_iter, ck->k,
+ BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
+ BTREE_TRIGGER_NORUN) ?:
bch2_trans_commit(trans, NULL, NULL,
- BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE|
@@ -409,15 +412,10 @@ retry:
? BTREE_INSERT_JOURNAL_RESERVED
: 0)|
commit_flags);
-err:
- if (ret == -EINTR)
- goto retry;
-
- if (ret == -EAGAIN)
- goto out;
-
if (ret) {
- bch2_fs_fatal_err_on(!bch2_journal_error(j), c,
+ bch2_fs_fatal_err_on(ret != -EINTR &&
+ ret != -EAGAIN &&
+ !bch2_journal_error(j), c,
"error flushing key cache: %i", ret);
goto out;
}
@@ -465,7 +463,6 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
struct bkey_cached *ck =
container_of(pin, struct bkey_cached, journal);
struct bkey_cached_key key;
- struct btree_trans trans;
int ret = 0;
int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
@@ -480,10 +477,9 @@ int bch2_btree_key_cache_journal_flush(struct journal *j,
}
six_unlock_read(&ck->c.lock);
- bch2_trans_init(&trans, c, 0, 0);
- ret = btree_key_cache_flush_pos(&trans, key, seq,
- BTREE_INSERT_JOURNAL_RECLAIM, false);
- bch2_trans_exit(&trans);
+ ret = bch2_trans_do(c, NULL, NULL, 0,
+ btree_key_cache_flush_pos(&trans, key, seq,
+ BTREE_INSERT_JOURNAL_RECLAIM, false));
unlock:
srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 07c9ba4e..6882873d 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -380,9 +380,10 @@ struct btree_trans {
int srcu_idx;
u8 nr_updates;
- unsigned used_mempool:1;
- unsigned error:1;
- unsigned in_traverse_all:1;
+ bool used_mempool:1;
+ bool error:1;
+ bool in_traverse_all:1;
+ bool restarted:1;
/*
* For when bch2_trans_update notices we'll be splitting a compressed
* extent:
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index bab135fa..217b52e1 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -15,7 +15,6 @@ bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
enum btree_insert_flags {
- __BTREE_INSERT_NOUNLOCK,
__BTREE_INSERT_NOFAIL,
__BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_LAZY_RW,
@@ -29,11 +28,6 @@ enum btree_insert_flags {
__BCH_HASH_SET_MUST_REPLACE,
};
-/*
- * Don't drop locks _after_ successfully updating btree:
- */
-#define BTREE_INSERT_NOUNLOCK (1 << __BTREE_INSERT_NOUNLOCK)
-
/* Don't check for -ENOSPC: */
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
@@ -110,12 +104,10 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
({ \
int _ret; \
\
- while (1) { \
+ do { \
+ bch2_trans_begin(_trans); \
_ret = (_do); \
- if (_ret != -EINTR) \
- break; \
- bch2_trans_reset(_trans, 0); \
- } \
+ } while (_ret == -EINTR); \
\
_ret; \
})
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index a2542408..c8c3382f 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -960,9 +960,6 @@ retry:
if (flags & BTREE_INSERT_GC_LOCK_HELD)
lockdep_assert_held(&c->gc_lock);
else if (!down_read_trylock(&c->gc_lock)) {
- if (flags & BTREE_INSERT_NOUNLOCK)
- return ERR_PTR(-EINTR);
-
bch2_trans_unlock(trans);
down_read(&c->gc_lock);
if (!bch2_trans_relock(trans)) {
@@ -1005,20 +1002,11 @@ retry:
BTREE_UPDATE_JOURNAL_RES,
journal_flags|JOURNAL_RES_GET_NONBLOCK);
if (ret == -EAGAIN) {
- /*
- * this would be cleaner if bch2_journal_preres_get() took a
- * closure argument
- */
- if (flags & BTREE_INSERT_NOUNLOCK) {
- trace_trans_restart_journal_preres_get(trans->ip, _RET_IP_);
- ret = -EINTR;
- goto err;
- }
-
bch2_trans_unlock(trans);
if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
bch2_btree_update_free(as);
+ btree_trans_restart(trans);
return ERR_PTR(ret);
}
@@ -1043,8 +1031,7 @@ retry:
if (ret)
goto err;
- ret = bch2_btree_reserve_get(as, nr_nodes, flags,
- !(flags & BTREE_INSERT_NOUNLOCK) ? &cl : NULL);
+ ret = bch2_btree_reserve_get(as, nr_nodes, flags, &cl);
if (ret)
goto err;
@@ -1057,8 +1044,6 @@ err:
bch2_btree_update_free(as);
if (ret == -EAGAIN) {
- BUG_ON(flags & BTREE_INSERT_NOUNLOCK);
-
bch2_trans_unlock(trans);
closure_sync(&cl);
ret = -EINTR;
@@ -1593,12 +1578,12 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
size_t sib_u64s;
int ret = 0, ret2 = 0;
- BUG_ON(!btree_node_locked(iter, level));
retry:
ret = bch2_btree_iter_traverse(iter);
if (ret)
- goto err;
+ return ret;
+ BUG_ON(!iter->should_be_locked);
BUG_ON(!btree_node_locked(iter, level));
b = iter->l[level].b;
@@ -1751,13 +1736,6 @@ err:
if (ret == -EINTR && bch2_trans_relock(trans))
goto retry;
- if (ret == -EINTR && !(flags & BTREE_INSERT_NOUNLOCK)) {
- ret2 = ret;
- ret = bch2_btree_iter_traverse_all(trans);
- if (!ret)
- goto retry;
- }
-
goto out;
}
@@ -1949,8 +1927,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM|
- BTREE_INSERT_JOURNAL_RESERVED|
- BTREE_INSERT_NOUNLOCK);
+ BTREE_INSERT_JOURNAL_RESERVED);
if (ret)
goto err;
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index d319e27a..e9e54226 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -348,11 +348,6 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
}
}
-static noinline void bch2_btree_iter_unlock_noinline(struct btree_iter *iter)
-{
- __bch2_btree_iter_unlock(iter);
-}
-
static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
@@ -384,6 +379,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (race_fault()) {
trace_trans_restart_fault_inject(trans->ip, trace_ip);
+ trans->restarted = true;
return -EINTR;
}
@@ -520,10 +516,11 @@ static noinline int maybe_do_btree_merge(struct btree_trans *trans, struct btree
u64s_delta -= !bkey_deleted(old.k) ? old.k->u64s : 0;
}
- return u64s_delta <= 0
- ? (bch2_foreground_maybe_merge(trans, iter, iter->level,
- trans->flags & ~BTREE_INSERT_NOUNLOCK) ?: -EINTR)
- : 0;
+ if (u64s_delta > 0)
+ return 0;
+
+ return bch2_foreground_maybe_merge(trans, iter,
+ iter->level, trans->flags);
}
/*
@@ -580,20 +577,15 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
* or anything else that might call bch2_trans_relock(), since that
* would just retake the read locks:
*/
- trans_for_each_iter(trans, iter) {
- if (iter->nodes_locked != iter->nodes_intent_locked) {
- if (btree_iter_keep(trans, iter)) {
- if (!bch2_btree_iter_upgrade(iter, 1)) {
- trace_trans_restart_upgrade(trans->ip, trace_ip,
- iter->btree_id,
- &iter->real_pos);
- return -EINTR;
- }
- } else {
- bch2_btree_iter_unlock_noinline(iter);
- }
+ trans_for_each_iter(trans, iter)
+ if (iter->nodes_locked != iter->nodes_intent_locked &&
+ !bch2_btree_iter_upgrade(iter, 1)) {
+ trace_trans_restart_upgrade(trans->ip, trace_ip,
+ iter->btree_id,
+ &iter->real_pos);
+ trans->restarted = true;
+ return -EINTR;
}
- }
trans_for_each_update(trans, i) {
const char *invalid = bch2_bkey_invalid(c,
@@ -655,56 +647,24 @@ int bch2_trans_commit_error(struct btree_trans *trans,
int ret, unsigned long trace_ip)
{
struct bch_fs *c = trans->c;
- unsigned flags = trans->flags;
-
- /*
- * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
- * update; if we haven't done anything yet it doesn't apply
- */
- flags &= ~BTREE_INSERT_NOUNLOCK;
switch (ret) {
case BTREE_INSERT_BTREE_NODE_FULL:
- ret = bch2_btree_split_leaf(trans, i->iter, flags);
-
- /*
- * if the split succeeded without dropping locks the insert will
- * still be atomic (what the caller peeked() and is overwriting
- * won't have changed)
- */
-#if 0
- /*
- * XXX:
- * split -> btree node merging (of parent node) might still drop
- * locks when we're not passing it BTREE_INSERT_NOUNLOCK
- *
- * we don't want to pass BTREE_INSERT_NOUNLOCK to split as that
- * will inhibit merging - but we don't have a reliable way yet
- * (do we?) of checking if we dropped locks in this path
- */
+ ret = bch2_btree_split_leaf(trans, i->iter, trans->flags);
if (!ret)
- goto retry;
-#endif
+ return 0;
- /*
- * don't care if we got ENOSPC because we told split it
- * couldn't block:
- */
- if (!ret ||
- ret == -EINTR ||
- (flags & BTREE_INSERT_NOUNLOCK)) {
+ if (ret == -EINTR)
trace_trans_restart_btree_node_split(trans->ip, trace_ip,
i->iter->btree_id,
&i->iter->real_pos);
- ret = -EINTR;
- }
break;
case BTREE_INSERT_NEED_MARK_REPLICAS:
bch2_trans_unlock(trans);
ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas);
if (ret)
- return ret;
+ break;
if (bch2_trans_relock(trans))
return 0;
@@ -716,12 +676,15 @@ int bch2_trans_commit_error(struct btree_trans *trans,
bch2_trans_unlock(trans);
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
- !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED))
- return -EAGAIN;
+ !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED)) {
+ trans->restarted = true;
+ ret = -EAGAIN;
+ break;
+ }
ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
if (ret)
- return ret;
+ break;
if (bch2_trans_relock(trans))
return 0;
@@ -737,7 +700,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
wait_event_freezable(c->journal.reclaim_wait,
(ret = journal_reclaim_wait_done(c)));
if (ret < 0)
- return ret;
+ break;
if (bch2_trans_relock(trans))
return 0;
@@ -750,7 +713,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
break;
}
- BUG_ON(ret == -ENOSPC && (flags & BTREE_INSERT_NOFAIL));
+ BUG_ON((ret == EINTR || ret == -EAGAIN) && !trans->restarted);
+ BUG_ON(ret == -ENOSPC && (trans->flags & BTREE_INSERT_NOFAIL));
return ret;
}
@@ -839,8 +803,10 @@ static int extent_handle_overwrites(struct btree_trans *trans,
BTREE_ITER_NOT_EXTENTS|
BTREE_ITER_INTENT);
ret = bch2_btree_iter_traverse(update_iter);
- if (ret)
+ if (ret) {
+ bch2_trans_iter_put(trans, update_iter);
goto out;
+ }
bch2_trans_update(trans, update_iter, update,
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|
@@ -887,7 +853,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
struct btree_insert_entry *i = NULL;
struct btree_iter *iter;
bool trans_trigger_run;
- unsigned u64s, reset_flags = 0;
+ unsigned u64s;
int ret = 0;
if (!trans->nr_updates &&
@@ -944,18 +910,13 @@ int __bch2_trans_commit(struct btree_trans *trans)
} while (trans_trigger_run);
trans_for_each_update(trans, i) {
- ret = bch2_btree_iter_traverse(i->iter);
- if (unlikely(ret)) {
- trace_trans_restart_traverse(trans->ip, _RET_IP_,
- i->iter->btree_id,
- &i->iter->pos);
- goto out;
- }
+ BUG_ON(!i->iter->should_be_locked);
if (unlikely(!bch2_btree_iter_upgrade(i->iter, i->level + 1))) {
trace_trans_restart_upgrade(trans->ip, _RET_IP_,
i->iter->btree_id,
&i->iter->pos);
+ trans->restarted = true;
ret = -EINTR;
goto out;
}
@@ -978,6 +939,7 @@ int __bch2_trans_commit(struct btree_trans *trans)
goto err;
}
retry:
+ BUG_ON(trans->restarted);
memset(&trans->journal_res, 0, sizeof(trans->journal_res));
ret = do_bch2_trans_commit(trans, &i, _RET_IP_);
@@ -998,11 +960,18 @@ out:
if (likely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
percpu_ref_put(&trans->c->writes);
out_reset:
- if (!ret)
- reset_flags |= TRANS_RESET_NOTRAVERSE;
- if (!ret && (trans->flags & BTREE_INSERT_NOUNLOCK))
- reset_flags |= TRANS_RESET_NOUNLOCK;
- bch2_trans_reset(trans, reset_flags);
+ trans->extra_journal_res = 0;
+ trans->nr_updates = 0;
+ trans->hooks = NULL;
+ trans->extra_journal_entries = NULL;
+ trans->extra_journal_entry_u64s = 0;
+
+ if (trans->fs_usage_deltas) {
+ trans->fs_usage_deltas->used = 0;
+ memset(&trans->fs_usage_deltas->memset_start, 0,
+ (void *) &trans->fs_usage_deltas->memset_end -
+ (void *) &trans->fs_usage_deltas->memset_start);
+ }
return ret;
err:
@@ -1050,7 +1019,11 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter,
n.iter = bch2_trans_get_iter(trans, n.btree_id, n.k->k.p,
BTREE_ITER_INTENT|
BTREE_ITER_NOT_EXTENTS);
+ ret = bch2_btree_iter_traverse(n.iter);
bch2_trans_iter_put(trans, n.iter);
+
+ if (ret)
+ return ret;
}
BUG_ON(n.iter->flags & BTREE_ITER_IS_EXTENTS);
diff --git a/libbcachefs/dirent.c b/libbcachefs/dirent.c
index a95165b8..02b29681 100644
--- a/libbcachefs/dirent.c
+++ b/libbcachefs/dirent.c
@@ -210,6 +210,9 @@ int bch2_dirent_rename(struct btree_trans *trans,
goto out;
old_dst = bch2_btree_iter_peek_slot(dst_iter);
+ ret = bkey_err(old_dst);
+ if (ret)
+ goto out;
if (mode != BCH_RENAME)
*dst_inum = le64_to_cpu(bkey_s_c_to_dirent(old_dst).v->d_inum);
@@ -225,6 +228,10 @@ int bch2_dirent_rename(struct btree_trans *trans,
goto out;
old_src = bch2_btree_iter_peek_slot(src_iter);
+ ret = bkey_err(old_src);
+ if (ret)
+ goto out;
+
*src_inum = le64_to_cpu(bkey_s_c_to_dirent(old_src).v->d_inum);
/* Create new dst key: */
@@ -329,20 +336,25 @@ u64 bch2_dirent_lookup(struct bch_fs *c, u64 dir_inum,
struct btree_iter *iter;
struct bkey_s_c k;
u64 inum = 0;
+ int ret = 0;
bch2_trans_init(&trans, c, 0, 0);
iter = __bch2_dirent_lookup_trans(&trans, dir_inum,
hash_info, name, 0);
- if (IS_ERR(iter)) {
- BUG_ON(PTR_ERR(iter) == -EINTR);
+ ret = PTR_ERR_OR_ZERO(iter);
+ if (ret)
goto out;
- }
k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto out;
+
inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
bch2_trans_iter_put(&trans, iter);
out:
+ BUG_ON(ret == -EINTR);
bch2_trans_exit(&trans);
return inum;
}
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
index 60c54438..2189a11c 100644
--- a/libbcachefs/fs-common.c
+++ b/libbcachefs/fs-common.c
@@ -168,6 +168,10 @@ int bch2_unlink_trans(struct btree_trans *trans,
goto err;
k = bch2_btree_iter_peek_slot(dirent_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
inum = le64_to_cpu(bkey_s_c_to_dirent(k).v->d_inum);
inode_iter = bch2_inode_peek(trans, inode_u, inum, BTREE_ITER_INTENT);
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index ae55453b..1ac99f37 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -801,6 +801,8 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
bch2_bkey_buf_init(&sk);
retry:
+ bch2_trans_begin(trans);
+
while (1) {
struct bkey_s_c k;
unsigned bytes, sectors, offset_into_extent;
@@ -2301,8 +2303,6 @@ int bch2_truncate(struct user_namespace *mnt_userns,
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
struct bch_inode_unpacked inode_u;
- struct btree_trans trans;
- struct btree_iter *iter;
u64 new_i_size = iattr->ia_size;
s64 i_sectors_delta = 0;
int ret = 0;
@@ -2323,16 +2323,7 @@ int bch2_truncate(struct user_namespace *mnt_userns,
inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
- /*
- * fetch current on disk i_size: inode is locked, i_size can only
- * increase underneath us:
- */
- bch2_trans_init(&trans, c, 0, 0);
- iter = bch2_inode_peek(&trans, &inode_u, inode->v.i_ino, 0);
- ret = PTR_ERR_OR_ZERO(iter);
- bch2_trans_iter_put(&trans, iter);
- bch2_trans_exit(&trans);
-
+ ret = bch2_inode_find_by_inum(c, inode->v.i_ino, &inode_u);
if (ret)
goto err;
@@ -2557,6 +2548,8 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
struct bpos atomic_end;
unsigned trigger_flags = 0;
+ bch2_trans_begin(&trans);
+
k = insert
? bch2_btree_iter_peek_prev(src)
: bch2_btree_iter_peek(src);
@@ -2684,13 +2677,13 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
/* already reserved */
if (k.k->type == KEY_TYPE_reservation &&
bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) {
- bch2_btree_iter_next_slot(iter);
+ bch2_btree_iter_advance(iter);
continue;
}
if (bkey_extent_is_data(k.k) &&
!(mode & FALLOC_FL_ZERO_RANGE)) {
- bch2_btree_iter_next_slot(iter);
+ bch2_btree_iter_advance(iter);
continue;
}
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index bc7cea0d..631fb87b 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -156,7 +156,6 @@ retry:
bch2_inode_write(&trans, iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL,
&inode->ei_journal_seq,
- BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOFAIL);
/*
@@ -294,8 +293,7 @@ retry:
if (unlikely(ret))
goto err_before_quota;
- ret = bch2_trans_commit(&trans, NULL, &journal_seq,
- BTREE_INSERT_NOUNLOCK);
+ ret = bch2_trans_commit(&trans, NULL, &journal_seq, 0);
if (unlikely(ret)) {
bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1,
KEY_TYPE_QUOTA_WARN);
@@ -416,8 +414,7 @@ static int __bch2_link(struct bch_fs *c,
mutex_lock(&inode->ei_update_lock);
bch2_trans_init(&trans, c, 4, 1024);
- ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq,
- BTREE_INSERT_NOUNLOCK,
+ ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0,
bch2_link_trans(&trans,
dir->v.i_ino,
inode->v.i_ino, &dir_u, &inode_u,
@@ -469,7 +466,6 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry)
bch2_trans_init(&trans, c, 4, 1024);
ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq,
- BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOFAIL,
bch2_unlink_trans(&trans,
dir->v.i_ino, &dir_u,
@@ -590,8 +586,7 @@ static int bch2_rename2(struct user_namespace *mnt_userns,
goto err;
}
- ret = __bch2_trans_do(&trans, NULL, &journal_seq,
- BTREE_INSERT_NOUNLOCK,
+ ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0,
bch2_rename_trans(&trans,
src_dir->v.i_ino, &src_dir_u,
dst_dir->v.i_ino, &dst_dir_u,
@@ -734,7 +729,6 @@ retry:
ret = bch2_inode_write(&trans, inode_iter, &inode_u) ?:
bch2_trans_commit(&trans, NULL,
&inode->ei_journal_seq,
- BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOFAIL);
btree_err:
bch2_trans_iter_put(&trans, inode_iter);
@@ -909,6 +903,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
POS(ei->v.i_ino, start >> 9), 0);
retry:
+ bch2_trans_begin(&trans);
+
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k)) &&
bkey_cmp(iter->pos, end) < 0) {
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index bedfd348..36eba46d 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -727,7 +727,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
k = bch2_btree_iter_peek(iter);
if (!k.k)
- return 1;
+ return 0;
ret = bkey_err(k);
if (ret)
@@ -803,8 +803,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
ret = __write_inode(trans, &target, target_snapshot) ?:
bch2_trans_commit(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_NOUNLOCK);
+ BTREE_INSERT_LAZY_RW);
if (ret)
return ret;
return -EINTR;
@@ -904,19 +903,12 @@ static int check_dirents(struct bch_fs *c)
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH);
- while (1) {
+ do {
ret = lockrestart_do(&trans,
check_dirent(&trans, iter, &hash_info, &w, &nr_subdirs));
- if (ret == 1) {
- /* at end */
- ret = 0;
- break;
- }
if (ret)
break;
-
- bch2_btree_iter_advance(iter);
- }
+ } while (bch2_btree_iter_advance(iter));
bch2_trans_iter_put(&trans, iter);
return bch2_trans_exit(&trans) ?: ret;
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 67983ff4..25607b58 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -519,7 +519,7 @@ again:
if (k.k->p.snapshot == snapshot &&
k.k->type != KEY_TYPE_inode &&
!bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) {
- bch2_btree_iter_next(iter);
+ bch2_btree_iter_advance(iter);
continue;
}
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index 092ece2c..a59b291a 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -235,8 +235,12 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
* writing to, because i_size could be up to one block
* less:
*/
- if (!bkey_cmp(old.k->p, new->k.p))
+ if (!bkey_cmp(old.k->p, new->k.p)) {
old = bch2_btree_iter_next(iter);
+ ret = bkey_err(old);
+ if (ret)
+ break;
+ }
if (old.k && !bkey_err(old) &&
old.k->p.inode == extent_iter->pos.inode &&
@@ -362,14 +366,13 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k;
int ret = 0, ret2 = 0;
- while ((k = bch2_btree_iter_peek(iter)).k &&
+ while ((bch2_trans_begin(trans),
+ (k = bch2_btree_iter_peek(iter)).k) &&
bkey_cmp(iter->pos, end) < 0) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(c, 0);
struct bkey_i delete;
- bch2_trans_begin(trans);
-
ret = bkey_err(k);
if (ret)
goto btree_err;
@@ -2270,12 +2273,13 @@ void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio,
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
-retry:
- bch2_trans_begin(&trans);
iter = bch2_trans_get_iter(&trans, BTREE_ID_extents,
POS(inode, bvec_iter.bi_sector),
BTREE_ITER_SLOTS);
+retry:
+ bch2_trans_begin(&trans);
+
while (1) {
unsigned bytes, sectors, offset_into_extent;
enum btree_id data_btree = BTREE_ID_extents;
@@ -2331,19 +2335,20 @@ retry:
swap(bvec_iter.bi_size, bytes);
bio_advance_iter(&rbio->bio, &bvec_iter, bytes);
}
- bch2_trans_iter_put(&trans, iter);
if (ret == -EINTR || ret == READ_RETRY || ret == READ_RETRY_AVOID)
goto retry;
+ bch2_trans_iter_put(&trans, iter);
+ bch2_trans_exit(&trans);
+ bch2_bkey_buf_exit(&sk, c);
+
if (ret) {
bch_err_inum_ratelimited(c, inode,
"read error %i from btree lookup", ret);
rbio->bio.bi_status = BLK_STS_IOERR;
bch2_rbio_done(rbio);
}
- bch2_trans_exit(&trans);
- bch2_bkey_buf_exit(&sk, c);
}
void bch2_fs_io_exit(struct bch_fs *c)
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 80a54e17..ee0f155f 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -84,7 +84,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
- bch2_trans_reset(&trans, 0);
+ bch2_trans_begin(&trans);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
@@ -191,7 +191,7 @@ nomatch:
}
atomic_long_inc(&c->extent_migrate_raced);
trace_move_race(&new->k);
- bch2_btree_iter_next_slot(iter);
+ bch2_btree_iter_advance(iter);
goto next;
}
out:
@@ -597,6 +597,8 @@ static int __bch2_move_data(struct bch_fs *c,
}
} while (delay);
+ bch2_trans_begin(&trans);
+
k = bch2_btree_iter_peek(iter);
stats->pos = iter->pos;
@@ -652,8 +654,7 @@ static int __bch2_move_data(struct bch_fs *c,
data_cmd, data_opts);
if (ret2) {
if (ret2 == -EINTR) {
- bch2_trans_reset(&trans, 0);
- bch2_trans_cond_resched(&trans);
+ bch2_trans_begin(&trans);
continue;
}
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index ed505857..003c00f2 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -178,6 +178,11 @@ enum opt_type {
OPT_BOOL(), \
BCH_SB_INODES_USE_KEY_CACHE, true, \
NULL, "Use the btree key cache for the inodes btree") \
+ x(btree_node_mem_ptr_optimization, u8, \
+ OPT_MOUNT|OPT_RUNTIME, \
+ OPT_BOOL(), \
+ NO_SB_OPT, true, \
+ NULL, "Stash pointer to in memory btree node in btree ptr")\
x(gc_reserve_percent, u8, \
OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_UINT(5, 21), \
diff --git a/libbcachefs/quota.c b/libbcachefs/quota.c
index 35b409e0..7861781a 100644
--- a/libbcachefs/quota.c
+++ b/libbcachefs/quota.c
@@ -760,7 +760,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid,
bkey_quota_init(&new_quota.k_i);
new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid));
- ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOUNLOCK,
+ ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_set_quota_trans(&trans, &new_quota, qdq)) ?:
__bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i));
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index ebf39124..3d9c5c5b 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -192,8 +192,9 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
return k;
}
- bch2_btree_iter_set_pos(iter, end);
- return bkey_s_c_null;
+ if (bkey_cmp(iter->pos, end) >= 0)
+ bch2_btree_iter_set_pos(iter, end);
+ return ret ? bkey_s_c_err(ret) : bkey_s_c_null;
}
s64 bch2_remap_range(struct bch_fs *c,
@@ -304,12 +305,12 @@ s64 bch2_remap_range(struct bch_fs *c,
dst_done = dst_iter->pos.offset - dst_start.offset;
new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
- bch2_trans_begin(&trans);
-
do {
struct bch_inode_unpacked inode_u;
struct btree_iter *inode_iter;
+ bch2_trans_begin(&trans);
+
inode_iter = bch2_inode_peek(&trans, &inode_u,
dst_start.inode, BTREE_ITER_INTENT);
ret2 = PTR_ERR_OR_ZERO(inode_iter);
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index 2ff8e5bd..23602349 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -209,7 +209,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
iter = bch2_trans_copy_iter(trans, start);
- bch2_btree_iter_next_slot(iter);
+ bch2_btree_iter_advance(iter);
for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
if (k.k->type != desc.key_type &&
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
index 8bd7553b..e4d400b1 100644
--- a/libbcachefs/xattr.c
+++ b/libbcachefs/xattr.c
@@ -118,18 +118,16 @@ void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c,
le16_to_cpu(xattr.v->x_val_len));
}
-int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
- const char *name, void *buffer, size_t size, int type)
+static int bch2_xattr_get_trans(struct btree_trans *trans, struct bch_inode_info *inode,
+ const char *name, void *buffer, size_t size, int type)
{
- struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
- struct btree_trans trans;
+ struct bch_hash_info hash = bch2_hash_info_init(trans->c, &inode->ei_inode);
struct btree_iter *iter;
struct bkey_s_c_xattr xattr;
+ struct bkey_s_c k;
int ret;
- bch2_trans_init(&trans, c, 0, 0);
-
- iter = bch2_hash_lookup(&trans, bch2_xattr_hash_desc, &hash,
+ iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &hash,
inode->v.i_ino,
&X_SEARCH(type, name, strlen(name)),
0);
@@ -137,7 +135,12 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
if (ret)
goto err;
- xattr = bkey_s_c_to_xattr(bch2_btree_iter_peek_slot(iter));
+ k = bch2_btree_iter_peek_slot(iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ xattr = bkey_s_c_to_xattr(k);
ret = le16_to_cpu(xattr.v->x_val_len);
if (buffer) {
if (ret > size)
@@ -145,14 +148,18 @@ int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
else
memcpy(buffer, xattr_val(xattr.v), ret);
}
- bch2_trans_iter_put(&trans, iter);
+ bch2_trans_iter_put(trans, iter);
err:
- bch2_trans_exit(&trans);
-
- BUG_ON(ret == -EINTR);
return ret == -ENOENT ? -ENODATA : ret;
}
+int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode,
+ const char *name, void *buffer, size_t size, int type)
+{
+ return bch2_trans_do(c, NULL, NULL, 0,
+ bch2_xattr_get_trans(&trans, inode, name, buffer, size, type));
+}
+
int bch2_xattr_set(struct btree_trans *trans, u64 inum,
const struct bch_hash_info *hash_info,
const char *name, const void *value, size_t size,