diff options
author | Kent Overstreet <kent.overstreet@linux.dev> | 2023-02-12 23:18:22 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-03-17 10:32:50 -0400 |
commit | ba83cd8fe637896b9b2c7832d3d55cbf79e564c4 (patch) | |
tree | 096e1a819ddf543481aeae790d308b18e7d1e4b0 | |
parent | f6f9564145dce548d1aa918e241e9702e962cb13 (diff) |
bcachefs: Improved transaction restart debugging XXX
In debug mode, on transaction restart we now save the full backtrace.
We're chasing a bug where we lose an a transaction restart error - this
will make it easy to track down.
------------[ cut here ]------------
Feb 17 15:57:47 extravaganza.localdomain 4,2210,1695278469,-;do not call blocking ops when !TASK_RUNNING; state=2 set at [<00000000f728d589>] __six_lock_type_slowpath.constprop.0+0x407/0x7a0
Feb 17 15:57:47 extravaganza.localdomain 4,2211,1695278530,-;WARNING: CPU: 4 PID: 76 at kernel/sched/core.c:9862 __might_sleep+0xd9/0xe0
Feb 17 15:57:47 extravaganza.localdomain 4,2212,1695278578,-;Modules linked in: netconsole nfnetlink snd_seq_dummy snd_hrtimer binfmt_misc zfs(PO) zunicode(PO) zzstd(O) zlua(O) zavl(PO) icp(PO) zcommon(PO) znvpair(PO) spl(O) intel_rapl_msr intel_rapl_common intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_realtek kvm_intel snd_hda_codec_generic mei_pxp snd_hda_codec_hdmi mei_hdcp snd_hda_intel kvm snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec snd_seq_midi eeepc_wmi rapl snd_hda_core snd_seq_midi_event wmi_bmof intel_cstate joydev serio_raw snd_hwdep snd_rawmidi snd_pcm ee1004 input_leds snd_seq snd_seq_device snd_timer snd mei_me soundcore mei mac_hid acpi_pad msr parport_pc ppdev lp parport pstore_blk ramoops reed_solomon pstore_zone efi_pstore ip_tables x_tables autofs4 overlay isofs nls_iso8859_1 dm_mirror dm_region_hash dm_log amdgpu iommu_v2 gpu_sched drm_buddy hid_logitech_hidpp hid_logitech_dj hid_generic uas usbhid hid usb_storage nouveau radeon i2c_algo_bit drm_ttm_helper ttm
Feb 17 15:57:47 extravaganza.localdomain 4,2213,1695278892,c; drm_display_helper crct10dif_pclmul crc32_pclmul cec polyval_clmulni rc_core polyval_generic ghash_clmulni_intel drm_kms_helper syscopyarea nvme sysfillrect sha512_ssse3 sysimgblt aesni_intel fb_sys_fops crypto_simd mfd_aaeon cryptd i2c_i801 nvme_core psmouse drm e1000e asus_wmi xhci_pci ledtrig_audio i2c_smbus sparse_keymap nvme_common ahci xhci_pci_renesas libahci platform_profile mxm_wmi video wmi
Feb 17 15:57:47 extravaganza.localdomain 4,2214,1695279104,-;CPU: 4 PID: 76 Comm: kworker/4:1 Tainted: P W O 6.1.12+bcachefs.git20230214.375685a54-1-debug #1
Feb 17 15:57:47 extravaganza.localdomain 4,2215,1695279151,-;Hardware name: System manufacturer System Product Name/Z170 PRO GAMING, BIOS 1904 07/05/2016
Feb 17 15:57:47 extravaganza.localdomain 4,2216,1695279192,-;Workqueue: bcachefs_btree_io btree_node_write_work
Feb 17 15:57:47 extravaganza.localdomain 4,2217,1695279243,-;RIP: 0010:__might_sleep+0xd9/0xe0
Feb 17 15:57:47 extravaganza.localdomain 4,2218,1695279279,-;Code: a0 14 00 00 4c 89 ff 48 89 4d d0 e8 61 b9 43 00 49 8b 95 a0 14 00 00 48 8b 4d d0 44 89 f6 48 c7 c7 a0 65 6a a7 e8 a2 bf 6e 01 <0f> 0b eb 88 0f 1f 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 48
Feb 17 15:57:47 extravaganza.localdomain 4,2219,1695279331,-;RSP: 0018:ffffc900005b6f40 EFLAGS: 00010246
Feb 17 15:57:47 extravaganza.localdomain 4,2220,1695279377,-;RAX: 0000000000000000 RBX: ffffffffa7f2a392 RCX: 0000000000000000
Feb 17 15:57:47 extravaganza.localdomain 4,2221,1695279418,-;RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
Feb 17 15:57:47 extravaganza.localdomain 4,2222,1695279464,-;RBP: ffffc900005b6f70 R08: 0000000000000000 R09: 0000000000000000
Feb 17 15:57:47 extravaganza.localdomain 4,2223,1695279497,-;R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000112
Feb 17 15:57:47 extravaganza.localdomain 4,2224,1695279538,-;R13: ffff888102c30000 R14: 0000000000000002 R15: ffff888102c314a0
Feb 17 15:57:47 extravaganza.localdomain 4,2225,1695279584,-;FS: 0000000000000000(0000) GS:ffff8887d4e00000(0000) knlGS:0000000000000000
Feb 17 15:57:47 extravaganza.localdomain 4,2226,1695279627,-;CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
Feb 17 15:57:47 extravaganza.localdomain 4,2227,1695279668,-;CR2: 00005572cc5e70d8 CR3: 0000000114978005 CR4: 00000000003706e0
Feb 17 15:57:47 extravaganza.localdomain 4,2228,1695279708,-;DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
Feb 17 15:57:47 extravaganza.localdomain 4,2229,1695279753,-;DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Feb 17 15:57:47 extravaganza.localdomain 4,2230,1695279801,-;Call Trace:
Feb 17 15:57:47 extravaganza.localdomain 4,2231,1695279845,-; <TASK>
Feb 17 15:57:47 extravaganza.localdomain 4,2232,1695279888,-; ? __six_lock_type_slowpath.constprop.0+0x407/0x7a0
Feb 17 15:57:47 extravaganza.localdomain 4,2233,1695279928,-; ? bch2_save_backtrace+0x5b/0x210
Feb 17 15:57:47 extravaganza.localdomain 4,2234,1695279980,-; __kmem_cache_alloc_node+0x290/0x2f0
Feb 17 15:57:47 extravaganza.localdomain 4,2235,1695280030,-; ? load_balance+0x904/0xc20
Feb 17 15:57:47 extravaganza.localdomain 4,2236,1695280074,-; ? bch2_save_backtrace+0x5b/0x210
Feb 17 15:57:47 extravaganza.localdomain 4,2237,1695280122,-; ? bch2_save_backtrace+0x5b/0x210
Feb 17 15:57:47 extravaganza.localdomain 4,2238,1695280172,-; __kmalloc_node_track_caller+0x51/0xf0
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/btree_iter.c | 31 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 3 |
3 files changed, 31 insertions, 7 deletions
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8f7d37697281..eb1f6f867212 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1175,10 +1175,17 @@ int bch2_btree_path_traverse_one(struct btree_trans *trans, path->uptodate = BTREE_ITER_UPTODATE; out: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted) - panic("ret %s (%i) trans->restarted %s (%i)\n", - bch2_err_str(ret), ret, - bch2_err_str(trans->restarted), trans->restarted); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) != !!trans->restarted) { + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "ret %s (%i) trans->restarted %s (%i)\n", + bch2_err_str(ret), ret, + bch2_err_str(trans->restarted), trans->restarted); +#ifdef CONFIG_BCACHEFS_DEBUG + bch2_prt_backtrace(&buf, &trans->last_restarted); +#endif + panic("%s", buf.buf); + } bch2_btree_path_verify(trans, path); return ret; } @@ -1366,9 +1373,15 @@ void bch2_trans_restart_error(struct btree_trans *trans, u32 restart_count) void bch2_trans_in_restart_error(struct btree_trans *trans) { - panic("in transaction restart: %s, last restarted by %pS\n", - bch2_err_str(trans->restarted), - (void *) trans->last_restarted_ip); + struct printbuf buf = PRINTBUF; + + prt_printf(&buf, "in transaction restart: %s, last restarted by %pS\n", + bch2_err_str(trans->restarted), + (void *) trans->last_restarted_ip); +#ifdef CONFIG_BCACHEFS_DEBUG + bch2_prt_backtrace(&buf, &trans->last_restarted); +#endif + panic("%s", buf.buf); } noinline __cold @@ -3079,6 +3092,10 @@ void bch2_trans_exit(struct btree_trans *trans) if (trans->paths) mempool_free(trans->paths, &c->btree_paths_pool); +#ifdef CONFIG_BCACHEFS_DEBUG + darray_exit(&trans->last_restarted); +#endif + trans->mem = (void *) 0x1; trans->paths = (void *) 0x1; } diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 6b7cef145ced..4db24c9baa26 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -260,6 +260,10 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er BUG_ON(err <= 0); BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart)); +#ifdef CONFIG_BCACHEFS_DEBUG + bch2_save_backtrace(&trans->last_restarted, current); +#endif + trans->restarted = err; trans->last_restarted_ip = _THIS_IP_; return -err; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 6250f34fe561..f1d94138d457 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -440,6 +440,9 @@ struct btree_trans { u32 restart_count; unsigned long last_begin_ip; unsigned long last_restarted_ip; +#ifdef CONFIG_BCACHEFS_DEBUG + bch_stacktrace last_restarted; +#endif unsigned long srcu_lock_time; /* |