summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-02-12 23:18:22 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-03-17 10:32:50 -0400
commitba83cd8fe637896b9b2c7832d3d55cbf79e564c4 (patch)
tree096e1a819ddf543481aeae790d308b18e7d1e4b0
parentf6f9564145dce548d1aa918e241e9702e962cb13 (diff)
bcachefs: Improved transaction restart debugging XXX
In debug mode, on transaction restart we now save the full backtrace. We're chasing a bug where we lose an a transaction restart error - this will make it easy to track down. ------------[ cut here ]------------ Feb 17 15:57:47 extravaganza.localdomain 4,2210,1695278469,-;do not call blocking ops when !TASK_RUNNING; state=2 set at [<00000000f728d589>] __six_lock_type_slowpath.constprop.0+0x407/0x7a0 Feb 17 15:57:47 extravaganza.localdomain 4,2211,1695278530,-;WARNING: CPU: 4 PID: 76 at kernel/sched/core.c:9862 __might_sleep+0xd9/0xe0 Feb 17 15:57:47 extravaganza.localdomain 4,2212,1695278578,-;Modules linked in: netconsole nfnetlink snd_seq_dummy snd_hrtimer binfmt_misc zfs(PO) zunicode(PO) zzstd(O) zlua(O) zavl(PO) icp(PO) zcommon(PO) znvpair(PO) spl(O) intel_rapl_msr intel_rapl_common intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek kvm_intel snd_hda_codec_generic mei_pxp snd_hda_codec_hdmi mei_hdcp snd_hda_intel kvm snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec snd_seq_midi eeepc_wmi rapl snd_hda_core snd_seq_midi_event wmi_bmof intel_cstate joydev serio_raw snd_hwdep snd_rawmidi snd_pcm ee1004 input_leds snd_seq snd_seq_device snd_timer snd mei_me soundcore mei mac_hid acpi_pad msr parport_pc ppdev lp parport pstore_blk ramoops reed_solomon pstore_zone efi_pstore ip_tables x_tables autofs4 overlay isofs nls_iso8859_1 dm_mirror dm_region_hash dm_log amdgpu iommu_v2 gpu_sched drm_buddy hid_logitech_hidpp hid_logitech_dj hid_generic uas usbhid hid usb_storage nouveau radeon i2c_algo_bit drm_ttm_helper ttm Feb 17 15:57:47 extravaganza.localdomain 4,2213,1695278892,c; drm_display_helper crct10dif_pclmul crc32_pclmul cec polyval_clmulni rc_core polyval_generic ghash_clmulni_intel drm_kms_helper syscopyarea nvme sysfillrect sha512_ssse3 sysimgblt aesni_intel fb_sys_fops crypto_simd mfd_aaeon cryptd i2c_i801 nvme_core psmouse drm e1000e asus_wmi xhci_pci ledtrig_audio i2c_smbus sparse_keymap nvme_common ahci xhci_pci_renesas libahci platform_profile mxm_wmi video wmi Feb 17 15:57:47 extravaganza.localdomain 4,2214,1695279104,-;CPU: 4 PID: 76 Comm: kworker/4:1 Tainted: P W O 6.1.12+bcachefs.git20230214.375685a54-1-debug #1 Feb 17 15:57:47 extravaganza.localdomain 4,2215,1695279151,-;Hardware name: System manufacturer System Product Name/Z170 PRO GAMING, BIOS 1904 07/05/2016 Feb 17 15:57:47 extravaganza.localdomain 4,2216,1695279192,-;Workqueue: bcachefs_btree_io btree_node_write_work Feb 17 15:57:47 extravaganza.localdomain 4,2217,1695279243,-;RIP: 0010:__might_sleep+0xd9/0xe0 Feb 17 15:57:47 extravaganza.localdomain 4,2218,1695279279,-;Code: a0 14 00 00 4c 89 ff 48 89 4d d0 e8 61 b9 43 00 49 8b 95 a0 14 00 00 48 8b 4d d0 44 89 f6 48 c7 c7 a0 65 6a a7 e8 a2 bf 6e 01 <0f> 0b eb 88 0f 1f 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 48 Feb 17 15:57:47 extravaganza.localdomain 4,2219,1695279331,-;RSP: 0018:ffffc900005b6f40 EFLAGS: 00010246 Feb 17 15:57:47 extravaganza.localdomain 4,2220,1695279377,-;RAX: 0000000000000000 RBX: ffffffffa7f2a392 RCX: 0000000000000000 Feb 17 15:57:47 extravaganza.localdomain 4,2221,1695279418,-;RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 Feb 17 15:57:47 extravaganza.localdomain 4,2222,1695279464,-;RBP: ffffc900005b6f70 R08: 0000000000000000 R09: 0000000000000000 Feb 17 15:57:47 extravaganza.localdomain 4,2223,1695279497,-;R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000112 Feb 17 15:57:47 extravaganza.localdomain 4,2224,1695279538,-;R13: ffff888102c30000 R14: 0000000000000002 R15: ffff888102c314a0 Feb 17 15:57:47 extravaganza.localdomain 4,2225,1695279584,-;FS: 0000000000000000(0000) GS:ffff8887d4e00000(0000) knlGS:0000000000000000 Feb 17 15:57:47 extravaganza.localdomain 4,2226,1695279627,-;CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Feb 17 15:57:47 extravaganza.localdomain 4,2227,1695279668,-;CR2: 00005572cc5e70d8 CR3: 0000000114978005 CR4: 00000000003706e0 Feb 17 15:57:47 extravaganza.localdomain 4,2228,1695279708,-;DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 Feb 17 15:57:47 extravaganza.localdomain 4,2229,1695279753,-;DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Feb 17 15:57:47 extravaganza.localdomain 4,2230,1695279801,-;Call Trace: Feb 17 15:57:47 extravaganza.localdomain 4,2231,1695279845,-; <TASK> Feb 17 15:57:47 extravaganza.localdomain 4,2232,1695279888,-; ? __six_lock_type_slowpath.constprop.0+0x407/0x7a0 Feb 17 15:57:47 extravaganza.localdomain 4,2233,1695279928,-; ? bch2_save_backtrace+0x5b/0x210 Feb 17 15:57:47 extravaganza.localdomain 4,2234,1695279980,-; __kmem_cache_alloc_node+0x290/0x2f0 Feb 17 15:57:47 extravaganza.localdomain 4,2235,1695280030,-; ? load_balance+0x904/0xc20 Feb 17 15:57:47 extravaganza.localdomain 4,2236,1695280074,-; ? bch2_save_backtrace+0x5b/0x210 Feb 17 15:57:47 extravaganza.localdomain 4,2237,1695280122,-; ? bch2_save_backtrace+0x5b/0x210 Feb 17 15:57:47 extravaganza.localdomain 4,2238,1695280172,-; __kmalloc_node_track_caller+0x51/0xf0 Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/btree_iter.c31
-rw-r--r--fs/bcachefs/btree_iter.h4
-rw-r--r--fs/bcachefs/btree_types.h3
3 files changed, 31 insertions, 7 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 8f7d37697281..eb1f6f867212 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1175,10 +1175,17 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans,
path->uptodate = BTREE_ITER_UPTODATE;
out:
- if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted)
- panic("ret %s (%i) trans->restarted %s (%i)\n",
- bch2_err_str(ret), ret,
- bch2_err_str(trans->restarted), trans->restarted);
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "ret %s (%i) trans->restarted %s (%i)\n",
+ bch2_err_str(ret), ret,
+ bch2_err_str(trans->restarted), trans->restarted);
+#ifdef CONFIG_BCACHEFS_DEBUG
+ bch2_prt_backtrace(&buf, &trans->last_restarted);
+#endif
+ panic("%s", buf.buf);
+ }
bch2_btree_path_verify(trans, path);
return ret;
}
@@ -1366,9 +1373,15 @@ void bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count)
void bch2_trans_in_restart_error(struct btree_trans *trans)
{
- panic("in transaction restart: %s, last restarted by %pS\n",
- bch2_err_str(trans->restarted),
- (void *) trans->last_restarted_ip);
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "in transaction restart: %s, last restarted by %pS\n",
+ bch2_err_str(trans->restarted),
+ (void *) trans->last_restarted_ip);
+#ifdef CONFIG_BCACHEFS_DEBUG
+ bch2_prt_backtrace(&buf, &trans->last_restarted);
+#endif
+ panic("%s", buf.buf);
}
noinline __cold
@@ -3079,6 +3092,10 @@ void bch2_trans_exit(struct btree_trans *trans)
if (trans->paths)
mempool_free(trans->paths, &c->btree_paths_pool);
+#ifdef CONFIG_BCACHEFS_DEBUG
+ darray_exit(&trans->last_restarted);
+#endif
+
trans->mem = (void *) 0x1;
trans->paths = (void *) 0x1;
}
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 6b7cef145ced..4db24c9baa26 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -260,6 +260,10 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er
BUG_ON(err <= 0);
BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));
+#ifdef CONFIG_BCACHEFS_DEBUG
+ bch2_save_backtrace(&trans->last_restarted, current);
+#endif
+
trans->restarted = err;
trans->last_restarted_ip = _THIS_IP_;
return -err;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 6250f34fe561..f1d94138d457 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -440,6 +440,9 @@ struct btree_trans {
u32 restart_count;
unsigned long last_begin_ip;
unsigned long last_restarted_ip;
+#ifdef CONFIG_BCACHEFS_DEBUG
+ bch_stacktrace last_restarted;
+#endif
unsigned long srcu_lock_time;
/*