diff options
Diffstat (limited to 'fs')
202 files changed, 4240 insertions, 4631 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e51e7d88980a..1d847a939f29 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = { .kill_sb = kill_anon_super, }; -static struct inode *anon_inode_make_secure_inode( - const char *name, - const struct inode *context_inode) +/** + * anon_inode_make_secure_inode - allocate an anonymous inode with security context + * @sb: [in] Superblock to allocate from + * @name: [in] Name of the class of the newfile (e.g., "secretmem") + * @context_inode: + * [in] Optional parent inode for security inheritance + * + * The function ensures proper security initialization through the LSM hook + * security_inode_init_security_anon(). + * + * Return: Pointer to new inode on success, ERR_PTR on failure. + */ +struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name, + const struct inode *context_inode) { struct inode *inode; int error; - inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + inode = alloc_anon_inode(sb); if (IS_ERR(inode)) return inode; inode->i_flags &= ~S_PRIVATE; @@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode( } return inode; } +EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, @@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name, return ERR_PTR(-ENOENT); if (make_inode) { - inode = anon_inode_make_secure_inode(name, context_inode); + inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb, + name, context_inode); if (IS_ERR(inode)) { file = ERR_CAST(inode); goto err; diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index d03adc36100e..307824d6eccb 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -279,7 +279,7 @@ struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) if (rcu) return ERR_PTR(-ECHILD); - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); @@ -304,7 +304,6 @@ err: set_cached_acl(&inode->v, type, acl); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return acl; } @@ -350,8 +349,8 @@ int bch2_set_acl(struct mnt_idmap *idmap, umode_t mode; int ret; - mutex_lock(&inode->ei_update_lock); - struct btree_trans *trans = bch2_trans_get(c); + guard(mutex)(&inode->ei_update_lock); + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); acl = _acl; @@ -385,17 +384,13 @@ btree_err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) - goto err; + return ret; bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME|ATTR_MODE); set_cached_acl(&inode->v, type, acl); -err: - bch2_trans_put(trans); - mutex_unlock(&inode->ei_update_lock); - - return ret; + return 0; } int bch2_acl_chmod(struct btree_trans *trans, subvol_inum inum, diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d64839c756bc..4c1604fd80f9 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -565,11 +565,11 @@ void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bke int bch2_bucket_gens_init(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); struct bkey_i_bucket_gens g; bool have_bucket_gens_key = false; int ret; + CLASS(btree_trans, trans)(c); ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, ({ /* @@ -609,17 +609,14 @@ iter_err: BCH_TRANS_COMMIT_no_enospc, bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); - bch2_trans_put(trans); - - bch_err_fn(c, ret); return ret; } int bch2_alloc_read(struct bch_fs *c) { - down_read(&c->state_lock); + guard(rwsem_read)(&c->state_lock); - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bch_dev *ca = NULL; int ret; @@ -680,10 +677,6 @@ int bch2_alloc_read(struct bch_fs *c) } bch2_dev_put(ca); - bch2_trans_put(trans); - - up_read(&c->state_lock); - bch_err_fn(c, ret); return ret; } @@ -699,7 +692,7 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, ? BCH_FSCK_ERR_need_discard_key_wrong : BCH_FSCK_ERR_freespace_key_wrong; enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, alloc_k); @@ -711,8 +704,6 @@ static int __need_discard_or_freespace_err(struct btree_trans *trans, if (bch2_err_matches(ret, BCH_ERR_fsck_ignore) || bch2_err_matches(ret, BCH_ERR_fsck_errors_not_fixed)) ret = 0; - - printbuf_exit(&buf); return ret; } @@ -860,10 +851,10 @@ int bch2_trigger_alloc(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; - struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p); + CLASS(bch2_dev_bucket_tryget, ca)(c, new.k->p); if (!ca) return bch_err_throw(c, trigger_alloc); @@ -879,7 +870,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); ret = PTR_ERR_OR_ZERO(new_ka); if (unlikely(ret)) - goto err; + return ret; new_a = &new_ka->v; } @@ -913,7 +904,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, ret = bch2_bucket_do_index(trans, ca, old, old_a, false) ?: bch2_bucket_do_index(trans, ca, new.s_c, new_a, true); if (ret) - goto err; + return ret; } if (new_a->data_type == BCH_DATA_cached && @@ -925,7 +916,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, alloc_lru_idx_read(*old_a), alloc_lru_idx_read(*new_a)); if (ret) - goto err; + return ret; ret = bch2_lru_change(trans, BCH_LRU_BUCKET_FRAGMENTATION, @@ -933,17 +924,17 @@ int bch2_trigger_alloc(struct btree_trans *trans, alloc_lru_idx_fragmentation(*old_a, ca), alloc_lru_idx_fragmentation(*new_a, ca)); if (ret) - goto err; + return ret; if (old_a->gen != new_a->gen) { ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); if (ret) - goto err; + return ret; } ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags); if (ret) - goto err; + return ret; } if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { @@ -994,7 +985,7 @@ int bch2_trigger_alloc(struct btree_trans *trans, if (bch2_fs_fatal_err_on(ret, c, "setting bucket_needs_journal_commit: %s", bch2_err_str(ret))) - goto err; + return ret; } } @@ -1036,16 +1027,12 @@ int bch2_trigger_alloc(struct btree_trans *trans, g->gen_valid = 1; g->gen = new_a->gen; } -err: fsck_err: - printbuf_exit(&buf); - bch2_dev_put(ca); return ret; invalid_bucket: bch2_fs_inconsistent(c, "reference to invalid bucket\n%s", (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)); - ret = bch_err_throw(c, trigger_alloc); - goto err; + return bch_err_throw(c, trigger_alloc); } /* @@ -1164,10 +1151,10 @@ int bch2_check_alloc_key(struct btree_trans *trans, const struct bch_alloc_v4 *a; unsigned gens_offset; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p); + CLASS(bch2_dev_bucket_tryget_noerror, ca)(c, alloc_k.k->p); if (fsck_err_on(!ca, trans, alloc_key_to_missing_dev_bucket, "alloc key for invalid device:bucket %llu:%llu", @@ -1177,7 +1164,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, return ret; if (!ca->mi.freespace_initialized) - goto out; + return 0; a = bch2_alloc_to_v4(alloc_k, &a_convert); @@ -1185,35 +1172,35 @@ int bch2_check_alloc_key(struct btree_trans *trans, k = bch2_btree_iter_peek_slot(trans, discard_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; bool is_discarded = a->data_type == BCH_DATA_need_discard; if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, trans, alloc_k, !is_discarded, true, true)) { ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded); if (ret) - goto err; + return ret; } bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); k = bch2_btree_iter_peek_slot(trans, freespace_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; bool is_free = a->data_type == BCH_DATA_free; if (need_discard_or_freespace_err_on(!!k.k->type != is_free, trans, alloc_k, !is_free, false, true)) { ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free); if (ret) - goto err; + return ret; } bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; if (fsck_err_on(a->gen != alloc_gen(k, gens_offset), trans, bucket_gens_key_wrong, @@ -1226,7 +1213,7 @@ int bch2_check_alloc_key(struct btree_trans *trans, ret = PTR_ERR_OR_ZERO(g); if (ret) - goto err; + return ret; if (k.k->type == KEY_TYPE_bucket_gens) { bkey_reassemble(&g->k_i, k); @@ -1239,13 +1226,9 @@ int bch2_check_alloc_key(struct btree_trans *trans, ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0); if (ret) - goto err; + return ret; } -out: -err: fsck_err: - bch2_dev_put(ca); - printbuf_exit(&buf); return ret; } @@ -1257,7 +1240,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, struct btree_iter *freespace_iter) { struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; if (!ca->mi.freespace_initialized) @@ -1268,7 +1251,7 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, k = bch2_btree_iter_peek_slot(trans, freespace_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; *end = bkey_min(k.k->p, *end); @@ -1281,10 +1264,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, end->offset)) { struct bkey_i *update = bch2_trans_kmalloc(trans, sizeof(*update)); - ret = PTR_ERR_OR_ZERO(update); if (ret) - goto err; + return ret; bkey_init(&update->k); update->k.type = KEY_TYPE_set; @@ -1295,11 +1277,9 @@ int bch2_check_alloc_hole_freespace(struct btree_trans *trans, ret = bch2_trans_update(trans, freespace_iter, update, 0); if (ret) - goto err; + return ret; } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -1310,7 +1290,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, struct btree_iter *bucket_gens_iter) { struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned i, gens_offset, gens_end_offset; int ret; @@ -1319,7 +1299,7 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter); ret = bkey_err(k); if (ret) - goto err; + return ret; if (bkey_cmp(alloc_gens_pos(start, &gens_offset), alloc_gens_pos(*end, &gens_end_offset))) @@ -1345,23 +1325,20 @@ int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, if (need_update) { struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); - ret = PTR_ERR_OR_ZERO(u); if (ret) - goto err; + return ret; memcpy(u, &g, sizeof(g)); ret = bch2_trans_update(trans, bucket_gens_iter, u, 0); if (ret) - goto err; + return ret; } } *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0)); -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -1404,7 +1381,7 @@ int __bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_i enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard ? BCH_DATA_need_discard : BCH_DATA_free; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool async_repair = fsck_flags & FSCK_ERR_NO_LOG; fsck_flags |= FSCK_CAN_FIX|FSCK_CAN_IGNORE; @@ -1456,7 +1433,6 @@ out: fsck_err: bch2_set_btree_iter_dontneed(trans, &alloc_iter); bch2_trans_iter_exit(trans, &alloc_iter); - printbuf_exit(&buf); return ret; delete: if (!async_repair) { @@ -1513,19 +1489,19 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; u64 b; bool need_update = false; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; BUG_ON(k.k->type != KEY_TYPE_bucket_gens); bkey_reassemble(&g.k_i, k); - struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode); + CLASS(bch2_dev_tryget_noerror, ca)(c, k.k->p.inode); if (!ca) { if (fsck_err(trans, bucket_gens_to_invalid_dev, "bucket_gens key for invalid device:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) - ret = bch2_btree_delete_at(trans, iter, 0); - goto out; + return bch2_btree_delete_at(trans, iter, 0); + return 0; } if (fsck_err_on(end <= ca->mi.first_bucket || @@ -1533,8 +1509,7 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, trans, bucket_gens_to_invalid_buckets, "bucket_gens key for invalid buckets:\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = bch2_btree_delete_at(trans, iter, 0); - goto out; + return bch2_btree_delete_at(trans, iter, 0); } for (b = start; b < ca->mi.first_bucket; b++) @@ -1555,30 +1530,26 @@ int bch2_check_bucket_gens_key(struct btree_trans *trans, if (need_update) { struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); - ret = PTR_ERR_OR_ZERO(u); if (ret) - goto out; + return ret; memcpy(u, &g, sizeof(g)); - ret = bch2_trans_update(trans, iter, u, 0); + return bch2_trans_update(trans, iter, u, 0); } -out: fsck_err: - bch2_dev_put(ca); - printbuf_exit(&buf); return ret; } int bch2_check_alloc_info(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; struct bch_dev *ca = NULL; struct bkey hole; struct bkey_s_c k; int ret = 0; + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch); bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, @@ -1646,14 +1617,14 @@ bkey_err: ca = NULL; if (ret < 0) - goto err; + return ret; ret = for_each_btree_key(trans, iter, BTREE_ID_need_discard, POS_MIN, BTREE_ITER_prefetch, k, bch2_check_discard_freespace_key(trans, &iter)); if (ret) - goto err; + return ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN, BTREE_ITER_prefetch); @@ -1670,11 +1641,9 @@ bkey_err: continue; } if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); - bch_err(c, "while checking %s", buf.buf); - printbuf_exit(&buf); break; } @@ -1682,16 +1651,14 @@ bkey_err: } bch2_trans_iter_exit(trans, &iter); if (ret) - goto err; + return ret; ret = for_each_btree_key_commit(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_bucket_gens_key(trans, &iter, k)); -err: - bch2_trans_put(trans); - bch_err_fn(c, ret); + return ret; } @@ -1703,7 +1670,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, struct bch_alloc_v4 a_convert; const struct bch_alloc_v4 *a; struct bkey_s_c alloc_k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; alloc_k = bch2_btree_iter_peek(trans, alloc_iter); @@ -1714,7 +1681,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) return ret; - struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode); + CLASS(bch2_dev_tryget_noerror, ca)(c, alloc_k.k->p.inode); if (!ca) return 0; @@ -1726,95 +1693,80 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, bucket_to_u64(alloc_k.k->p), lru_idx, alloc_k, last_flushed); if (ret) - goto err; + return ret; } - if (a->data_type != BCH_DATA_cached) - goto err; + if (a->data_type == BCH_DATA_cached) { + if (fsck_err_on(!a->io_time[READ], + trans, alloc_key_cached_but_read_time_zero, + "cached bucket with read_time 0\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + struct bkey_i_alloc_v4 *a_mut = + bch2_alloc_to_v4_mut(trans, alloc_k); + ret = PTR_ERR_OR_ZERO(a_mut); + if (ret) + return ret; - if (fsck_err_on(!a->io_time[READ], - trans, alloc_key_cached_but_read_time_zero, - "cached bucket with read_time 0\n%s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - struct bkey_i_alloc_v4 *a_mut = - bch2_alloc_to_v4_mut(trans, alloc_k); - ret = PTR_ERR_OR_ZERO(a_mut); - if (ret) - goto err; + a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); + ret = bch2_trans_update(trans, alloc_iter, + &a_mut->k_i, BTREE_TRIGGER_norun); + if (ret) + return ret; - a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); - ret = bch2_trans_update(trans, alloc_iter, - &a_mut->k_i, BTREE_TRIGGER_norun); - if (ret) - goto err; + a = &a_mut->v; + } - a = &a_mut->v; + ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, + bucket_to_u64(alloc_k.k->p), + a->io_time[READ], + alloc_k, last_flushed); } - - ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, - bucket_to_u64(alloc_k.k->p), - a->io_time[READ], - alloc_k, last_flushed); - if (ret) - goto err; -err: fsck_err: - bch2_dev_put(ca); - printbuf_exit(&buf); return ret; } int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { struct bkey_buf last_flushed; - bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?: - bch2_check_stripe_to_lru_refs(c); + bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)) ?: + bch2_check_stripe_to_lru_refs(trans); bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); return ret; } static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress) { struct bch_fs *c = ca->fs; - int ret; - mutex_lock(&ca->discard_buckets_in_flight_lock); + guard(mutex)(&ca->discard_buckets_in_flight_lock); struct discard_in_flight *i = darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket); - if (i) { - ret = bch_err_throw(c, EEXIST_discard_in_flight_add); - goto out; - } + if (i) + return bch_err_throw(c, EEXIST_discard_in_flight_add); - ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { + return darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { .in_progress = in_progress, .bucket = bucket, })); -out: - mutex_unlock(&ca->discard_buckets_in_flight_lock); - return ret; } static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket) { - mutex_lock(&ca->discard_buckets_in_flight_lock); + guard(mutex)(&ca->discard_buckets_in_flight_lock); struct discard_in_flight *i = darray_find_p(ca->discard_buckets_in_flight, i, i->bucket == bucket); BUG_ON(!i || !i->in_progress); darray_remove_item(&ca->discard_buckets_in_flight, i); - mutex_unlock(&ca->discard_buckets_in_flight_lock); } struct discard_buckets_state { @@ -1836,7 +1788,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, struct btree_iter iter = {}; struct bkey_s_c k; struct bkey_i_alloc_v4 *a; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool discard_locked = false; int ret = 0; @@ -1927,7 +1879,6 @@ fsck_err: if (!ret) s->seen++; bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); return ret; } @@ -2024,17 +1975,16 @@ static void bch2_do_discards_fast_work(struct work_struct *work) bool got_bucket = false; u64 bucket; - mutex_lock(&ca->discard_buckets_in_flight_lock); - darray_for_each(ca->discard_buckets_in_flight, i) { - if (i->in_progress) - continue; + scoped_guard(mutex, &ca->discard_buckets_in_flight_lock) + darray_for_each(ca->discard_buckets_in_flight, i) { + if (i->in_progress) + continue; - got_bucket = true; - bucket = i->bucket; - i->in_progress = true; - break; - } - mutex_unlock(&ca->discard_buckets_in_flight_lock); + got_bucket = true; + bucket = i->bucket; + i->in_progress = true; + break; + } if (!got_bucket) break; @@ -2142,7 +2092,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, s64 *nr_to_invalidate) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); struct btree_iter alloc_iter = {}; int ret = 0; @@ -2203,7 +2153,6 @@ static int invalidate_one_bucket(struct btree_trans *trans, out: fsck_err: bch2_trans_iter_exit(trans, &alloc_iter); - printbuf_exit(&buf); return ret; } @@ -2226,7 +2175,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) { struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work); struct bch_fs *c = ca->fs; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); int ret = 0; struct bkey_buf last_flushed; @@ -2268,7 +2217,6 @@ restart_err: } bch2_trans_iter_exit(trans, &iter); err: - bch2_trans_put(trans); bch2_bkey_buf_exit(&last_flushed, c); enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_do_invalidates); enumerated_ref_put(&c->writes, BCH_WRITE_REF_invalidate); @@ -2301,18 +2249,17 @@ void bch2_do_invalidates(struct bch_fs *c) int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, u64 bucket_start, u64 bucket_end) { - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; struct bkey hole; struct bpos end = POS(ca->dev_idx, bucket_end); - struct bch_member *m; unsigned long last_updated = jiffies; int ret; BUG_ON(bucket_start > bucket_end); BUG_ON(bucket_end > ca->mi.nbuckets); + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), BTREE_ITER_prefetch); @@ -2383,17 +2330,16 @@ bkey_err: } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); if (ret < 0) { bch_err_msg(ca, ret, "initializing free space"); return ret; } - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); + } return 0; } @@ -2403,7 +2349,6 @@ int bch2_fs_freespace_init(struct bch_fs *c) if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) return 0; - /* * We can crash during the device add path, so we need to check this on * every mount: @@ -2428,9 +2373,8 @@ int bch2_fs_freespace_init(struct bch_fs *c) } if (doing_init) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bch2_write_super(c); - mutex_unlock(&c->sb_lock); bch_verbose(c, "done initializing freespace"); } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 23a9fbb36f49..fd1415524e46 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -106,20 +106,20 @@ void __bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob) return; } - spin_lock(&ob->lock); - ob->valid = false; - ob->data_type = 0; - spin_unlock(&ob->lock); + scoped_guard(spinlock, &ob->lock) { + ob->valid = false; + ob->data_type = 0; + } - spin_lock(&c->freelist_lock); - bch2_open_bucket_hash_remove(c, ob); + scoped_guard(spinlock, &c->freelist_lock) { + bch2_open_bucket_hash_remove(c, ob); - ob->freelist = c->open_buckets_freelist; - c->open_buckets_freelist = ob - c->open_buckets; + ob->freelist = c->open_buckets_freelist; + c->open_buckets_freelist = ob - c->open_buckets; - c->open_buckets_nr_free++; - ca->nr_open_buckets--; - spin_unlock(&c->freelist_lock); + c->open_buckets_nr_free++; + ca->nr_open_buckets--; + } closure_wake_up(&c->open_buckets_wait); } @@ -164,14 +164,14 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob) BUG_ON(c->open_buckets_partial_nr >= ARRAY_SIZE(c->open_buckets_partial)); - spin_lock(&c->freelist_lock); - scoped_guard(rcu) + scoped_guard(spinlock, &c->freelist_lock) { + guard(rcu)(); bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++; - ob->on_partial_list = true; - c->open_buckets_partial[c->open_buckets_partial_nr++] = - ob - c->open_buckets; - spin_unlock(&c->freelist_lock); + ob->on_partial_list = true; + c->open_buckets_partial[c->open_buckets_partial_nr++] = + ob - c->open_buckets; + } closure_wake_up(&c->open_buckets_wait); closure_wake_up(&c->freelist_wait); @@ -219,33 +219,31 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, return NULL; } - spin_lock(&c->freelist_lock); + guard(spinlock)(&c->freelist_lock); if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) { if (cl) closure_wait(&c->open_buckets_wait, cl); track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true); - spin_unlock(&c->freelist_lock); return ERR_PTR(bch_err_throw(c, open_buckets_empty)); } /* Recheck under lock: */ if (bch2_bucket_is_open(c, ca->dev_idx, bucket)) { - spin_unlock(&c->freelist_lock); req->counters.skipped_open++; return NULL; } struct open_bucket *ob = bch2_open_bucket_alloc(c); - spin_lock(&ob->lock); - ob->valid = true; - ob->sectors_free = ca->mi.bucket_size; - ob->dev = ca->dev_idx; - ob->gen = gen; - ob->bucket = bucket; - spin_unlock(&ob->lock); + scoped_guard(spinlock, &ob->lock) { + ob->valid = true; + ob->sectors_free = ca->mi.bucket_size; + ob->dev = ca->dev_idx; + ob->gen = gen; + ob->bucket = bucket; + } ca->nr_open_buckets++; bch2_open_bucket_hash_add(c, ob); @@ -253,7 +251,6 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], false); track_event_change(&c->times[BCH_TIME_blocked_allocate], false); - spin_unlock(&c->freelist_lock); return ob; } @@ -453,7 +450,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, struct closure *cl, struct open_bucket *ob) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); printbuf_tabstop_push(&buf, 24); @@ -480,8 +477,6 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, prt_printf(&buf, "err\t%s\n", bch2_err_str(PTR_ERR(ob))); trace_bucket_alloc_fail(c, buf.buf); } - - printbuf_exit(&buf); } /** @@ -511,7 +506,8 @@ again: bch2_dev_usage_read_fast(ca, &req->usage); avail = dev_buckets_free(ca, req->usage, req->watermark); - if (req->usage.buckets[BCH_DATA_need_discard] > avail) + if (req->usage.buckets[BCH_DATA_need_discard] > + min(avail, ca->mi.nbuckets >> 7)) bch2_dev_do_discards(ca); if (req->usage.buckets[BCH_DATA_need_gc_gens] > avail) @@ -588,7 +584,8 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, .ca = ca, }; - bch2_trans_do(c, + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false))); return ob; } @@ -847,17 +844,15 @@ static int bucket_alloc_set_writepoint(struct bch_fs *c, static int bucket_alloc_set_partial(struct bch_fs *c, struct alloc_request *req) { - int i, ret = 0; - if (!c->open_buckets_partial_nr) return 0; - spin_lock(&c->freelist_lock); + guard(spinlock)(&c->freelist_lock); if (!c->open_buckets_partial_nr) - goto unlock; + return 0; - for (i = c->open_buckets_partial_nr - 1; i >= 0; --i) { + for (int i = c->open_buckets_partial_nr - 1; i >= 0; --i) { struct open_bucket *ob = c->open_buckets + c->open_buckets_partial[i]; if (want_bucket(c, req, ob)) { @@ -877,14 +872,13 @@ static int bucket_alloc_set_partial(struct bch_fs *c, scoped_guard(rcu) bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - ret = add_new_bucket(c, req, ob); + int ret = add_new_bucket(c, req, ob); if (ret) - break; + return ret; } } -unlock: - spin_unlock(&c->freelist_lock); - return ret; + + return 0; } static int __open_bucket_add_buckets(struct btree_trans *trans, @@ -980,23 +974,18 @@ static bool should_drop_bucket(struct open_bucket *ob, struct bch_fs *c, return ob->ec != NULL; } else if (ca) { bool drop = ob->dev == ca->dev_idx; - struct open_bucket *ob2; - unsigned i; if (!drop && ob->ec) { - unsigned nr_blocks; + guard(mutex)(&ob->ec->lock); + unsigned nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; - mutex_lock(&ob->ec->lock); - nr_blocks = bkey_i_to_stripe(&ob->ec->new_stripe.key)->v.nr_blocks; - - for (i = 0; i < nr_blocks; i++) { + for (unsigned i = 0; i < nr_blocks; i++) { if (!ob->ec->blocks[i]) continue; - ob2 = c->open_buckets + ob->ec->blocks[i]; + struct open_bucket *ob2 = c->open_buckets + ob->ec->blocks[i]; drop |= ob2->dev == ca->dev_idx; } - mutex_unlock(&ob->ec->lock); } return drop; @@ -1012,14 +1001,13 @@ static void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca, struct open_bucket *ob; unsigned i; - mutex_lock(&wp->lock); + guard(mutex)(&wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) if (should_drop_bucket(ob, c, ca, ec)) bch2_open_bucket_put(c, ob); else ob_push(c, &ptrs, ob); wp->ptrs = ptrs; - mutex_unlock(&wp->lock); } void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, @@ -1035,39 +1023,37 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca, bch2_writepoint_stop(c, ca, ec, &c->rebalance_write_point); bch2_writepoint_stop(c, ca, ec, &c->btree_write_point); - mutex_lock(&c->btree_reserve_cache_lock); - while (c->btree_reserve_cache_nr) { - struct btree_alloc *a = - &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; + scoped_guard(mutex, &c->btree_reserve_cache_lock) + while (c->btree_reserve_cache_nr) { + struct btree_alloc *a = + &c->btree_reserve_cache[--c->btree_reserve_cache_nr]; - bch2_open_buckets_put(c, &a->ob); - } - mutex_unlock(&c->btree_reserve_cache_lock); + bch2_open_buckets_put(c, &a->ob); + } - spin_lock(&c->freelist_lock); i = 0; - while (i < c->open_buckets_partial_nr) { - struct open_bucket *ob = - c->open_buckets + c->open_buckets_partial[i]; - - if (should_drop_bucket(ob, c, ca, ec)) { - --c->open_buckets_partial_nr; - swap(c->open_buckets_partial[i], - c->open_buckets_partial[c->open_buckets_partial_nr]); - - ob->on_partial_list = false; - - scoped_guard(rcu) - bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; - - spin_unlock(&c->freelist_lock); - bch2_open_bucket_put(c, ob); - spin_lock(&c->freelist_lock); - } else { - i++; + scoped_guard(spinlock, &c->freelist_lock) + while (i < c->open_buckets_partial_nr) { + struct open_bucket *ob = + c->open_buckets + c->open_buckets_partial[i]; + + if (should_drop_bucket(ob, c, ca, ec)) { + --c->open_buckets_partial_nr; + swap(c->open_buckets_partial[i], + c->open_buckets_partial[c->open_buckets_partial_nr]); + + ob->on_partial_list = false; + + scoped_guard(rcu) + bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--; + + spin_unlock(&c->freelist_lock); + bch2_open_bucket_put(c, ob); + spin_lock(&c->freelist_lock); + } else { + i++; + } } - } - spin_unlock(&c->freelist_lock); bch2_ec_stop_dev(c, ca); } @@ -1121,22 +1107,17 @@ static noinline bool try_decrease_writepoints(struct btree_trans *trans, unsigne struct open_bucket *ob; unsigned i; - mutex_lock(&c->write_points_hash_lock); - if (c->write_points_nr < old_nr) { - mutex_unlock(&c->write_points_hash_lock); - return true; - } - - if (c->write_points_nr == 1 || - !too_many_writepoints(c, 8)) { - mutex_unlock(&c->write_points_hash_lock); - return false; - } + scoped_guard(mutex, &c->write_points_hash_lock) { + if (c->write_points_nr < old_nr) + return true; - wp = c->write_points + --c->write_points_nr; + if (c->write_points_nr == 1 || + !too_many_writepoints(c, 8)) + return false; - hlist_del_rcu(&wp->node); - mutex_unlock(&c->write_points_hash_lock); + wp = c->write_points + --c->write_points_nr; + hlist_del_rcu(&wp->node); + } bch2_trans_mutex_lock_norelock(trans, &wp->lock); open_bucket_for_each(c, &wp->ptrs, ob, i) @@ -1470,35 +1451,25 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, struct bch_dev *ca) { - struct open_bucket *ob; - - out->atomic++; + guard(printbuf_atomic)(out); - for (ob = c->open_buckets; + for (struct open_bucket *ob = c->open_buckets; ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { - spin_lock(&ob->lock); + guard(spinlock)(&ob->lock); if (ob->valid && (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); - spin_unlock(&ob->lock); } - - --out->atomic; } void bch2_open_buckets_partial_to_text(struct printbuf *out, struct bch_fs *c) { - unsigned i; - - out->atomic++; - spin_lock(&c->freelist_lock); + guard(printbuf_atomic)(out); + guard(spinlock)(&c->freelist_lock); - for (i = 0; i < c->open_buckets_partial_nr; i++) + for (unsigned i = 0; i < c->open_buckets_partial_nr; i++) bch2_open_bucket_to_text(out, c, c->open_buckets + c->open_buckets_partial[i]); - - spin_unlock(&c->freelist_lock); - --out->atomic; } static const char * const bch2_write_point_states[] = { @@ -1514,7 +1485,7 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, struct open_bucket *ob; unsigned i; - mutex_lock(&wp->lock); + guard(mutex)(&wp->lock); prt_printf(out, "%lu: ", wp->write_point); prt_human_readable_u64(out, wp->sectors_allocated << 9); @@ -1533,8 +1504,6 @@ static void bch2_write_point_to_text(struct printbuf *out, struct bch_fs *c, open_bucket_for_each(c, &wp->ptrs, ob, i) bch2_open_bucket_to_text(out, c, ob); printbuf_indent_sub(out, 2); - - mutex_unlock(&wp->lock); } void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c) @@ -1621,7 +1590,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", c->opts.allocator_stuck_timeout); @@ -1634,8 +1603,8 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) bch2_printbuf_make_room(&buf, 4096); - buf.atomic++; - scoped_guard(rcu) + scoped_guard(rcu) { + guard(printbuf_atomic)(&buf); for_each_online_member_rcu(c, ca) { prt_printf(&buf, "Dev %u:\n", ca->dev_idx); printbuf_indent_add(&buf, 2); @@ -1643,7 +1612,7 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) printbuf_indent_sub(&buf, 2); prt_newline(&buf); } - --buf.atomic; + } prt_printf(&buf, "Copygc debug:\n"); printbuf_indent_add(&buf, 2); @@ -1657,7 +1626,6 @@ static noinline void bch2_print_allocator_stuck(struct bch_fs *c) printbuf_indent_sub(&buf, 2); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } static inline unsigned allocator_wait_timeout(struct bch_fs *c) diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 1b3fc8460096..02aef66859c3 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -210,16 +210,11 @@ static inline bool bch2_bucket_is_open(struct bch_fs *c, unsigned dev, u64 bucke static inline bool bch2_bucket_is_open_safe(struct bch_fs *c, unsigned dev, u64 bucket) { - bool ret; - if (bch2_bucket_is_open(c, dev, bucket)) return true; - spin_lock(&c->freelist_lock); - ret = bch2_bucket_is_open(c, dev, bucket); - spin_unlock(&c->freelist_lock); - - return ret; + guard(spinlock)(&c->freelist_lock); + return bch2_bucket_is_open(c, dev, bucket); } enum bch_write_flags; diff --git a/fs/bcachefs/async_objs.c b/fs/bcachefs/async_objs.c index a7cd1f0f0964..ad04e5f0f056 100644 --- a/fs/bcachefs/async_objs.c +++ b/fs/bcachefs/async_objs.c @@ -13,28 +13,38 @@ #include <linux/debugfs.h> -static void promote_obj_to_text(struct printbuf *out, void *obj) +static void promote_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { - bch2_promote_op_to_text(out, obj); + bch2_promote_op_to_text(out, c, obj); } -static void rbio_obj_to_text(struct printbuf *out, void *obj) +static void rbio_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { - bch2_read_bio_to_text(out, obj); + bch2_read_bio_to_text(out, c, obj); } -static void write_op_obj_to_text(struct printbuf *out, void *obj) +static void write_op_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { bch2_write_op_to_text(out, obj); } -static void btree_read_bio_obj_to_text(struct printbuf *out, void *obj) +static void btree_read_bio_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { struct btree_read_bio *rbio = obj; bch2_btree_read_bio_to_text(out, rbio); } -static void btree_write_bio_obj_to_text(struct printbuf *out, void *obj) +static void btree_write_bio_obj_to_text(struct printbuf *out, + struct bch_fs *c, + void *obj) { struct btree_write_bio *wbio = obj; bch2_bio_to_text(out, &wbio->wbio.bio); @@ -79,13 +89,12 @@ static ssize_t bch2_async_obj_list_read(struct file *file, char __user *buf, if (!i->size) break; - list->obj_to_text(&i->buf, obj); + list->obj_to_text(&i->buf, i->c, obj); + i->iter = iter.pos; } if (i->buf.allocation_failure) ret = -ENOMEM; - else - i->iter = iter.pos; if (!ret) ret = bch2_debugfs_flush_buf(i); diff --git a/fs/bcachefs/async_objs.h b/fs/bcachefs/async_objs.h index cd6489b8cf76..451db4c51fb2 100644 --- a/fs/bcachefs/async_objs.h +++ b/fs/bcachefs/async_objs.h @@ -3,9 +3,10 @@ #define _BCACHEFS_ASYNC_OBJS_H #ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS -static inline void __async_object_list_del(struct fast_list *head, unsigned idx) +static inline void __async_object_list_del(struct fast_list *head, unsigned *idx) { - fast_list_remove(head, idx); + fast_list_remove(head, *idx); + *idx = 0; } static inline int __async_object_list_add(struct fast_list *head, void *obj, unsigned *idx) @@ -16,7 +17,7 @@ static inline int __async_object_list_add(struct fast_list *head, void *obj, uns } #define async_object_list_del(_c, _list, idx) \ - __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, idx) + __async_object_list_del(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, &idx) #define async_object_list_add(_c, _list, obj, idx) \ __async_object_list_add(&(_c)->async_objs[BCH_ASYNC_OBJ_LIST_##_list].list, obj, idx) diff --git a/fs/bcachefs/async_objs_types.h b/fs/bcachefs/async_objs_types.h index 8d713c0f5841..ed262c874ad0 100644 --- a/fs/bcachefs/async_objs_types.h +++ b/fs/bcachefs/async_objs_types.h @@ -18,7 +18,7 @@ enum bch_async_obj_lists { struct async_obj_list { struct fast_list list; - void (*obj_to_text)(struct printbuf *, void *); + void (*obj_to_text)(struct printbuf *, struct bch_fs *, void *); unsigned idx; }; diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index bc277f42cf5f..bd26ab3e6812 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -108,7 +108,7 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, bool insert) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool will_check = c->recovery.passes_to_run & BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers); int ret = 0; @@ -146,7 +146,6 @@ static noinline int backpointer_mod_err(struct btree_trans *trans, if (buf.buf) bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); return ret; } @@ -209,7 +208,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, bool commit) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; /* @@ -245,7 +244,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, "%s", buf.buf)) { ret = bch2_backpointer_del(trans, bp.k->p); if (ret || !commit) - goto out; + return ret; /* * Normally, on transaction commit from inside a transaction, @@ -263,9 +262,7 @@ static int backpointer_target_not_found(struct btree_trans *trans, */ ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } -out: fsck_err: - printbuf_exit(&buf); return ret; } @@ -389,7 +386,7 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st struct bch_fs *c = trans->c; struct btree_iter alloc_iter = {}; struct bkey_s_c alloc_k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; struct bpos bucket; @@ -424,7 +421,6 @@ static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, st out: fsck_err: bch2_trans_iter_exit(trans, &alloc_iter); - printbuf_exit(&buf); return ret; } @@ -435,14 +431,13 @@ int bch2_check_btree_backpointers(struct bch_fs *c) bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); + bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed)); bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); return ret; } @@ -472,7 +467,7 @@ static int check_extent_checksum(struct btree_trans *trans, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); const union bch_extent_entry *entry; struct extent_ptr_decoded p; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); void *data_buf = NULL; struct bio *bio = NULL; size_t bytes; @@ -531,7 +526,6 @@ err: kvfree(data_buf); enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_check_extent_checksums); - printbuf_exit(&buf); return ret; } @@ -542,7 +536,7 @@ static int check_bp_exists(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter other_extent_iter = {}; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (bpos_lt(bp->k.p, s->bp_start) || bpos_gt(bp->k.p, s->bp_end)) @@ -567,7 +561,6 @@ err: fsck_err: bch2_trans_iter_exit(trans, &other_extent_iter); bch2_trans_iter_exit(trans, &bp_iter); - printbuf_exit(&buf); return ret; check_existing_bp: /* Do we have a backpointer for a different extent? */ @@ -896,7 +889,7 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b u32 sectors[ALLOC_SECTORS_NR]; memset(sectors, 0, sizeof(sectors)); - struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p); + CLASS(bch2_dev_bucket_tryget_noerror, ca)(trans->c, alloc_k.k->p); if (!ca) return 0; @@ -933,12 +926,12 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b }; bch2_trans_iter_exit(trans, &iter); if (ret) - goto err; + return ret; if (need_commit) { ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret) - goto err; + return ret; } if (sectors[ALLOC_dirty] != a->dirty_sectors || @@ -947,15 +940,14 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); if (ret) - goto err; + return ret; } if (sectors[ALLOC_dirty] > a->dirty_sectors || sectors[ALLOC_cached] > a->cached_sectors || sectors[ALLOC_stripe] > a->stripe_sectors) { - ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: + return check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: bch_err_throw(c, transaction_restart_nested); - goto err; } bool empty = (sectors[ALLOC_dirty] + @@ -971,9 +963,8 @@ static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct b *had_mismatch = true; } -err: - bch2_dev_put(ca); - return ret; + + return 0; } static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) @@ -1108,7 +1099,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) { int ret = 0; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct extents_to_bp_state s = { .bp_start = POS_MIN }; bch2_bkey_buf_init(&s.last_flushed); @@ -1147,7 +1138,7 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) if (!bpos_eq(s.bp_start, POS_MIN) || !bpos_eq(s.bp_end, SPOS_MAX)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "check_extents_to_backpointers(): "); bch2_bpos_to_text(&buf, s.bp_start); @@ -1155,7 +1146,6 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) bch2_bpos_to_text(&buf, s.bp_end); bch_verbose(c, "%s", buf.buf); - printbuf_exit(&buf); } ret = bch2_check_extents_to_backpointers_pass(trans, &s); @@ -1170,11 +1160,8 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c) bch2_bucket_bitmap_free(&ca->bucket_backpointer_empty); } err: - bch2_trans_put(trans); bch2_bkey_buf_exit(&s.last_flushed, c); bch2_btree_cache_unpin(c); - - bch_err_fn(c, ret); return ret; } @@ -1212,7 +1199,7 @@ int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans, u64 nr = ca->bucket_backpointer_mismatch.nr; u64 allowed = copygc ? ca->mi.nbuckets >> 7 : 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); __bch2_log_msg_start(ca->name, &buf); prt_printf(&buf, "Detected missing backpointers in bucket %llu, now have %llu/%llu with missing\n", @@ -1223,7 +1210,6 @@ int bch2_check_bucket_backpointer_mismatch(struct btree_trans *trans, nr < allowed ? RUN_RECOVERY_PASS_ratelimit : 0); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return 0; } @@ -1300,7 +1286,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, int bch2_check_backpointers_to_extents(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; int ret; @@ -1320,7 +1306,7 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) if (bbpos_cmp(start, BBPOS_MIN) || bbpos_cmp(end, BBPOS_MAX)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "check_backpointers_to_extents(): "); bch2_bbpos_to_text(&buf, start); @@ -1328,7 +1314,6 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) bch2_bbpos_to_text(&buf, end); bch_verbose(c, "%s", buf.buf); - printbuf_exit(&buf); } ret = bch2_check_backpointers_to_extents_pass(trans, start, end); @@ -1337,11 +1322,8 @@ int bch2_check_backpointers_to_extents(struct bch_fs *c) start = bbpos_successor(end); } - bch2_trans_put(trans); bch2_btree_cache_unpin(c); - - bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index fb3156ed7f0b..8a6f886b5bf2 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -329,19 +329,21 @@ do { \ bch2_print_str(_c, __VA_ARGS__); \ } while (0) -#define bch_info(c, fmt, ...) \ - bch2_print(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) -#define bch_info_ratelimited(c, fmt, ...) \ - bch2_print_ratelimited(c, KERN_INFO bch2_fmt(c, fmt), ##__VA_ARGS__) -#define bch_notice(c, fmt, ...) \ - bch2_print(c, KERN_NOTICE bch2_fmt(c, fmt), ##__VA_ARGS__) -#define bch_warn(c, fmt, ...) \ - bch2_print(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) -#define bch_warn_ratelimited(c, fmt, ...) \ - bch2_print_ratelimited(c, KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__) - -#define bch_err(c, fmt, ...) \ - bch2_print(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) +#define bch_log(c, loglevel, fmt, ...) \ + bch2_print(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__) +#define bch_log_ratelimited(c, loglevel, fmt, ...) \ + bch2_print_ratelimited(c, loglevel bch2_fmt(c, fmt), ##__VA_ARGS__) + +#define bch_err(c, ...) bch_log(c, KERN_ERR, __VA_ARGS__) +#define bch_err_ratelimited(c, ...) bch_log_ratelimited(c, KERN_ERR, __VA_ARGS__) +#define bch_warn(c, ...) bch_log(c, KERN_WARNING, __VA_ARGS__) +#define bch_warn_ratelimited(c, ...) bch_log_ratelimited(c, KERN_WARNING, __VA_ARGS__) +#define bch_notice(c, ...) bch_log(c, KERN_NOTICE, __VA_ARGS__) +#define bch_info(c, ...) bch_log(c, KERN_INFO, __VA_ARGS__) +#define bch_info_ratelimited(c, ...) bch_log_ratelimited(c, KERN_INFO, __VA_ARGS__) +#define bch_verbose(c, ...) bch_log(c, KERN_DEBUG, __VA_ARGS__) +#define bch_verbose_ratelimited(c, ...) bch_log_ratelimited(c, KERN_DEBUG, __VA_ARGS__) + #define bch_err_dev(ca, fmt, ...) \ bch2_print(c, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) #define bch_err_dev_offset(ca, _offset, fmt, ...) \ @@ -351,8 +353,6 @@ do { \ #define bch_err_inum_offset(c, _inum, _offset, fmt, ...) \ bch2_print(c, KERN_ERR bch2_fmt_inum_offset(c, _inum, _offset, fmt), ##__VA_ARGS__) -#define bch_err_ratelimited(c, fmt, ...) \ - bch2_print_ratelimited(c, KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__) #define bch_err_dev_ratelimited(ca, fmt, ...) \ bch2_print_ratelimited(ca, KERN_ERR bch2_fmt_dev(ca, fmt), ##__VA_ARGS__) #define bch_err_dev_offset_ratelimited(ca, _offset, fmt, ...) \ @@ -386,24 +386,6 @@ do { \ ##__VA_ARGS__, bch2_err_str(_ret)); \ } while (0) -#define bch_verbose(c, fmt, ...) \ -do { \ - if ((c)->opts.verbose) \ - bch_info(c, fmt, ##__VA_ARGS__); \ -} while (0) - -#define bch_verbose_ratelimited(c, fmt, ...) \ -do { \ - if ((c)->opts.verbose) \ - bch_info_ratelimited(c, fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_verbose_init(opts, fmt, ...) \ -do { \ - if (opt_get(opts, verbose)) \ - pr_info(fmt, ##__VA_ARGS__); \ -} while (0) - static inline int __bch2_err_trace(struct bch_fs *c, int err) { trace_error_throw(c, err, _THIS_IP_); @@ -833,6 +815,8 @@ struct bch_fs { struct bch_disk_groups_cpu __rcu *disk_groups; struct bch_opts opts; + unsigned loglevel; + unsigned prev_loglevel; /* Updated by bch2_sb_update():*/ struct { @@ -1181,7 +1165,7 @@ static inline bool bch2_ro_ref_tryget(struct bch_fs *c) static inline void bch2_ro_ref_put(struct bch_fs *c) { - if (refcount_dec_and_test(&c->ro_ref)) + if (c && refcount_dec_and_test(&c->ro_ref)) wake_up(&c->ro_ref_wait); } diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index ee823c640642..67e39f835b96 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -624,10 +624,8 @@ struct bkey_format bch2_bkey_format_done(struct bkey_format_state *s) } if (static_branch_unlikely(&bch2_debug_check_bkey_unpack)) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); BUG_ON(bch2_bkey_format_invalid(NULL, &ret, 0, &buf)); - printbuf_exit(&buf); } return ret; diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 32841f762eb2..72698c0d9f0e 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -58,7 +58,7 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, struct bkey_packed *_k, *_n; struct bkey uk, n; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (!i->u64s) return; @@ -97,8 +97,6 @@ void bch2_dump_bset(struct bch_fs *c, struct btree *b, if (!bkey_deleted(k.k) && bpos_eq(n.p, k.k->p)) printk(KERN_ERR "Duplicate keys\n"); } - - printbuf_exit(&buf); } void bch2_dump_btree_node(struct bch_fs *c, struct btree *b) @@ -113,7 +111,7 @@ void bch2_dump_btree_node_iter(struct btree *b, struct btree_node_iter *iter) { struct btree_node_iter_set *set; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); printk(KERN_ERR "btree node iter with %u/%u sets:\n", __btree_node_iter_used(iter), b->nsets); @@ -128,8 +126,6 @@ void bch2_dump_btree_node_iter(struct btree *b, printk(KERN_ERR "set %zu key %u: %s\n", t - b->set, set->k, buf.buf); } - - printbuf_exit(&buf); } struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b) @@ -362,27 +358,6 @@ static struct bkey_float *bkey_float(const struct btree *b, return ro_aux_tree_base(b, t)->f + idx; } -static void __bset_aux_tree_verify(struct btree *b) -{ - for_each_bset(b, t) { - if (t->aux_data_offset == U16_MAX) - continue; - - BUG_ON(t != b->set && - t[-1].aux_data_offset == U16_MAX); - - BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t)); - BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); - BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); - } -} - -static inline void bset_aux_tree_verify(struct btree *b) -{ - if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) - __bset_aux_tree_verify(b); -} - void bch2_btree_keys_init(struct btree *b) { unsigned i; @@ -538,6 +513,51 @@ static inline void bch2_bset_verify_rw_aux_tree(struct btree *b, __bch2_bset_verify_rw_aux_tree(b, t); } +static void __bset_aux_tree_verify_ro(struct btree *b, struct bset_tree *t) +{ + struct bkey_packed *k = btree_bkey_first(b, t); + + eytzinger1_for_each(j, t->size - 1) { + while (tree_to_bkey(b, t, j) > k && + k != btree_bkey_last(b, t)) + k = bkey_p_next(k); + + BUG_ON(tree_to_bkey(b, t, j) != k); + } +} + +static void __bset_aux_tree_verify(struct btree *b) +{ + for_each_bset(b, t) { + if (t->aux_data_offset == U16_MAX) + continue; + + BUG_ON(t != b->set && + t[-1].aux_data_offset == U16_MAX); + + BUG_ON(t->aux_data_offset < bset_aux_tree_buf_start(b, t)); + BUG_ON(t->aux_data_offset > btree_aux_data_u64s(b)); + BUG_ON(bset_aux_tree_buf_end(t) > btree_aux_data_u64s(b)); + + switch (bset_aux_tree_type(t)) { + case BSET_RO_AUX_TREE: + __bset_aux_tree_verify_ro(b, t); + break; + case BSET_RW_AUX_TREE: + __bch2_bset_verify_rw_aux_tree(b, t); + break; + default: + break; + } + } +} + +static inline void bset_aux_tree_verify(struct btree *b) +{ + if (static_branch_unlikely(&bch2_debug_check_bset_lookups)) + __bset_aux_tree_verify(b); +} + /* returns idx of first entry >= offset: */ static unsigned rw_aux_tree_bsearch(struct btree *b, struct bset_tree *t, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 49505653fe12..23ed7393f07f 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -78,9 +78,8 @@ void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; - mutex_lock(&bc->lock); - __bch2_btree_node_to_freelist(bc, b); - mutex_unlock(&bc->lock); + scoped_guard(mutex, &bc->lock) + __bch2_btree_node_to_freelist(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); @@ -215,14 +214,13 @@ void bch2_node_pin(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; - mutex_lock(&bc->lock); + guard(mutex)(&bc->lock); if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { set_btree_node_pinned(b); list_move(&b->list, &bc->live[1].list); bc->live[0].nr--; bc->live[1].nr++; } - mutex_unlock(&bc->lock); } void bch2_btree_cache_unpin(struct bch_fs *c) @@ -230,7 +228,7 @@ void bch2_btree_cache_unpin(struct bch_fs *c) struct btree_cache *bc = &c->btree_cache; struct btree *b, *n; - mutex_lock(&bc->lock); + guard(mutex)(&bc->lock); c->btree_cache.pinned_nodes_mask[0] = 0; c->btree_cache.pinned_nodes_mask[1] = 0; @@ -240,8 +238,6 @@ void bch2_btree_cache_unpin(struct bch_fs *c) bc->live[0].nr++; bc->live[1].nr--; } - - mutex_unlock(&bc->lock); } /* Btree in memory cache - hash table */ @@ -296,11 +292,8 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, b->c.level = level; b->c.btree_id = id; - mutex_lock(&bc->lock); - int ret = __bch2_btree_node_hash_insert(bc, b); - mutex_unlock(&bc->lock); - - return ret; + guard(mutex)(&bc->lock); + return __bch2_btree_node_hash_insert(bc, b); } void bch2_btree_node_update_key_early(struct btree_trans *trans, @@ -317,7 +310,7 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); if (!IS_ERR_OR_NULL(b)) { - mutex_lock(&c->btree_cache.lock); + guard(mutex)(&c->btree_cache.lock); __bch2_btree_node_hash_remove(&c->btree_cache, b); @@ -325,7 +318,6 @@ void bch2_btree_node_update_key_early(struct btree_trans *trans, ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); - mutex_unlock(&c->btree_cache.lock); six_unlock_read(&b->c.lock); } @@ -930,20 +922,18 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, } if (unlikely(!bkey_is_btree_ptr(&k->k))) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); - printbuf_exit(&buf); return ERR_PTR(ret); } if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); - printbuf_exit(&buf); return ERR_PTR(ret); } @@ -1018,11 +1008,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { - struct printbuf buf = PRINTBUF; - if (c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations) return; + CLASS(printbuf, buf)(); prt_printf(&buf, "btree node header doesn't match ptr: "); bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); @@ -1038,8 +1027,6 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) bch2_bpos_to_text(&buf, b->data->max_key); bch2_fs_topology_error(c, "%s", buf.buf); - - printbuf_exit(&buf); } static inline void btree_check_header(struct bch_fs *c, struct btree *b) diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 7269490a5d9a..34cb8a4324dc 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -95,11 +95,10 @@ static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k) static inline void __gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) { - preempt_disable(); + guard(preempt)(); write_seqcount_begin(&c->gc_pos_lock); c->gc_pos = new_pos; write_seqcount_end(&c->gc_pos_lock); - preempt_enable(); } static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos) @@ -138,14 +137,13 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min) int ret; if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_str(&buf, " -> "); bch2_bpos_to_text(&buf, new_min); bch_info(c, "%s(): %s", __func__, buf.buf); - printbuf_exit(&buf); } new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL); @@ -174,14 +172,13 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) int ret; if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_str(&buf, " -> "); bch2_bpos_to_text(&buf, new_max); bch_info(c, "%s(): %s", __func__, buf.buf); - printbuf_exit(&buf); } ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p); @@ -205,13 +202,12 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max) bch2_btree_node_drop_keys_outside_node(b); - mutex_lock(&c->btree_cache.lock); + guard(mutex)(&c->btree_cache.lock); __bch2_btree_node_hash_remove(&c->btree_cache, b); bkey_copy(&b->key, &new->k_i); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); - mutex_unlock(&c->btree_cache.lock); return 0; } @@ -223,7 +219,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * struct bpos expected_start = !prev ? b->data->min_key : bpos_successor(prev->key.k.p); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 && @@ -253,7 +249,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * expected_start, bpos_predecessor(cur->data->min_key)); if (ret) - goto err; + return ret; *pulled_from_scan = cur->data->min_key; ret = DID_FILL_FROM_SCAN; @@ -286,9 +282,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * } } } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -296,7 +290,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, struct btree *child, struct bpos *pulled_from_scan) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; if (bpos_eq(child->key.k.p, b->key.k.p)) @@ -317,7 +311,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0, bpos_successor(child->key.k.p), b->key.k.p); if (ret) - goto err; + return ret; *pulled_from_scan = b->key.k.p; ret = DID_FILL_FROM_SCAN; @@ -325,9 +319,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, ret = set_node_max(c, child, b->key.k.p); } } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -340,7 +332,7 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct struct bkey_buf prev_k, cur_k; struct btree *prev = NULL, *cur = NULL; bool have_child, new_pass = false; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; if (!b->c.level) @@ -529,7 +521,6 @@ fsck_err: bch2_bkey_buf_exit(&prev_k, c); bch2_bkey_buf_exit(&cur_k, c); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -539,7 +530,7 @@ static int bch2_check_root(struct btree_trans *trans, enum btree_id btree, { struct bch_fs *c = trans->c; struct btree_root *r = bch2_btree_id_root(c, btree); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; bch2_btree_id_to_text(&buf, btree); @@ -568,21 +559,20 @@ static int bch2_check_root(struct btree_trans *trans, enum btree_id btree, bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX); ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX); if (ret) - goto err; + return ret; } *reconstructed_root = true; } err: fsck_err: - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } int bch2_check_topology(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bpos pulled_from_scan = POS_MIN; int ret = 0; @@ -603,9 +593,8 @@ recover: six_unlock_read(&b->c.lock); if (ret == DROP_THIS_NODE) { - mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + bch2_btree_node_hash_remove(&c->btree_cache, b); r->b = NULL; @@ -614,17 +603,15 @@ recover: goto recover; } - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_btree_id_to_text(&buf, i); bch_err(c, "empty btree root %s", buf.buf); - printbuf_exit(&buf); bch2_btree_root_alloc_fake_trans(trans, i, 0); r->alive = false; ret = 0; } } - bch2_trans_put(trans); return ret; } @@ -651,7 +638,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, struct bkey deleted = KEY(0, 0, 0); struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; deleted.p = k.k->p; @@ -675,10 +662,9 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bch2_dev_btree_bitmap_mark(c, k); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } /* @@ -703,7 +689,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, BTREE_TRIGGER_gc|BTREE_TRIGGER_insert|flags); out: fsck_err: - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -771,8 +756,8 @@ static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) static int bch2_gc_btrees(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); int ret = 0; struct progress_indicator_state progress; @@ -792,8 +777,6 @@ static int bch2_gc_btrees(struct bch_fs *c) ret = bch2_gc_btree(trans, &progress, btree, true); } - printbuf_exit(&buf); - bch2_trans_put(trans); bch_err_fn(c, ret); return ret; } @@ -945,16 +928,16 @@ fsck_err: static int bch2_gc_alloc_done(struct bch_fs *c) { + CLASS(btree_trans, trans)(c); int ret = 0; for_each_member_device(c, ca) { - ret = bch2_trans_run(c, - for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, + ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(ca->dev_idx, ca->mi.first_bucket), POS(ca->dev_idx, ca->mi.nbuckets - 1), BTREE_ITER_slots|BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_alloc_write_key(trans, &iter, ca, k))); + bch2_alloc_write_key(trans, &iter, ca, k)); if (ret) { bch2_dev_put(ca); break; @@ -987,7 +970,7 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); const struct bch_stripe *s; struct gc_stripe *m; bool bad = false; @@ -1032,18 +1015,17 @@ static int bch2_gc_write_stripes_key(struct btree_trans *trans, ret = bch2_trans_update(trans, iter, &new->k_i, 0); } fsck_err: - printbuf_exit(&buf); return ret; } static int bch2_gc_stripes_done(struct bch_fs *c) { - return bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_gc_write_stripes_key(trans, &iter, k))); + bch2_gc_write_stripes_key(trans, &iter, k)); } /** @@ -1072,8 +1054,8 @@ int bch2_check_allocations(struct bch_fs *c) { int ret; - down_read(&c->state_lock); - down_write(&c->gc_lock); + guard(rwsem_read)(&c->state_lock); + guard(rwsem_write)(&c->gc_lock); bch2_btree_interior_updates_flush(c); @@ -1102,15 +1084,11 @@ int bch2_check_allocations(struct bch_fs *c) bch2_gc_stripes_done(c) ?: bch2_gc_reflink_done(c); out: - percpu_down_write(&c->mark_lock); - /* Indicates that gc is no longer in progress: */ - __gc_pos_set(c, gc_phase(GC_PHASE_not_running)); - - bch2_gc_free(c); - percpu_up_write(&c->mark_lock); - - up_write(&c->gc_lock); - up_read(&c->state_lock); + scoped_guard(percpu_write, &c->mark_lock) { + /* Indicates that gc is no longer in progress: */ + __gc_pos_set(c, gc_phase(GC_PHASE_not_running)); + bch2_gc_free(c); + } /* * At startup, allocations can happen directly instead of via the @@ -1121,7 +1099,6 @@ out: if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags)) bch2_sb_members_clean_deleted(c); - bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 8924dae15d41..c0fba9016d6a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -592,7 +592,7 @@ static int __btree_err(int ret, !(test_bit(BCH_FS_in_fsck, &c->flags) && c->opts.fix_errors == FSCK_FIX_ask); - struct printbuf out = PRINTBUF; + CLASS(printbuf, out)(); bch2_log_msg_start(c, &out); if (!print_deferred) @@ -619,13 +619,13 @@ static int __btree_err(int ret, if (!have_retry) ret = bch_err_throw(c, fsck_fix); - goto out; + return ret; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); break; } - goto out; + return ret; } if (rw == WRITE) { @@ -647,16 +647,14 @@ static int __btree_err(int ret, if (!have_retry) ret = bch_err_throw(c, fsck_fix); - goto out; + return ret; case -BCH_ERR_btree_node_read_err_bad_node: prt_str(&out, ", "); break; } print: bch2_print_str(c, KERN_ERR, out.buf); -out: fsck_err: - printbuf_exit(&out); return ret; } @@ -735,8 +733,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, struct printbuf *err_msg) { unsigned version = le16_to_cpu(i->version); - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; + CLASS(printbuf, buf1)(); + CLASS(printbuf, buf2)(); int ret = 0; btree_err_on(!bch2_version_compatible(version), @@ -755,10 +753,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, "bset version %u older than superblock version_min %u", version, c->sb.version_min)) { if (bch2_version_compatible(version)) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); c->disk_sb.sb->version_min = cpu_to_le16(version); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } else { /* We have no idea what's going on: */ i->version = cpu_to_le16(c->sb.version); @@ -772,10 +769,9 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, btree_node_bset_newer_than_sb, "bset version %u newer than superblock version %u", version, c->sb.version)) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); c->disk_sb.sb->version = cpu_to_le16(version); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } btree_err_on(BSET_SEPARATE_WHITEOUTS(i), @@ -875,8 +871,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, &bn->format); } fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); return ret; } @@ -946,7 +940,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, { unsigned version = le16_to_cpu(i->version); struct bkey_packed *k, *prev = NULL; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool updated_range = b->key.k.type == KEY_TYPE_btree_ptr_v2 && BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); int ret = 0; @@ -1051,7 +1045,6 @@ got_good_key: set_btree_node_need_rewrite_error(b); } fsck_err: - printbuf_exit(&buf); return ret; } @@ -1070,7 +1063,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); u64 max_journal_seq = 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0, write = READ; u64 start_time = local_clock(); @@ -1302,9 +1295,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_bounce_free(c, btree_buf_bytes(b), used_mempool, sorted); - if (updated_range) - bch2_btree_node_drop_keys_outside_node(b); - i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { struct bkey tmp; @@ -1342,6 +1332,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); + if (updated_range) + bch2_btree_node_drop_keys_outside_node(b); + /* * XXX: * @@ -1385,7 +1378,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, } fsck_err: mempool_free(iter, &c->fill_iter); - printbuf_exit(&buf); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read_done], start_time); return ret; } @@ -1401,7 +1393,7 @@ static void btree_node_read_work(struct work_struct *work) struct bch_io_failures failed = { .nr = 0 }; int ret = 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "btree node read error at btree "); @@ -1493,7 +1485,6 @@ start: bch2_time_stats_update(&c->times[BCH_TIME_btree_node_read], rb->start_time); bio_put(&rb->bio); - printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); wake_up_bit(&b->flags, BTREE_NODE_read_in_flight); @@ -1575,7 +1566,7 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) closure_type(ra, struct btree_node_read_all, cl); struct bch_fs *c = ra->c; struct btree *b = ra->b; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool dump_bset_maps = false; int ret = 0, best = -1, write = READ; unsigned i, written = 0, written2 = 0; @@ -1684,11 +1675,10 @@ fsck_err: if (ret) { set_btree_node_read_error(b); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_btree_lost_data(c, &buf, b->c.btree_id); if (buf.pos) bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); } else if (*saw_error) bch2_btree_node_rewrite_async(c, b); @@ -1699,7 +1689,6 @@ fsck_err: closure_debug_destroy(&ra->cl); kfree(ra); - printbuf_exit(&buf); clear_btree_node_read_in_flight(b); smp_mb__after_atomic(); @@ -1819,7 +1808,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, if (ret <= 0) { bool ratelimit = true; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_str(&buf, "btree node read error: no device to read from\n at "); @@ -1836,7 +1825,6 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b, DEFAULT_RATELIMIT_BURST); if (!ratelimit || __ratelimit(&rs)) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); set_btree_node_read_error(b); clear_btree_node_read_in_flight(b); @@ -1918,9 +1906,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { - mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + bch2_btree_node_hash_remove(&c->btree_cache, b); ret = bch_err_throw(c, btree_node_read_error); goto err; @@ -1937,7 +1924,8 @@ err: int bch2_btree_root_read(struct bch_fs *c, enum btree_id id, const struct bkey_i *k, unsigned level) { - return bch2_trans_run(c, __bch2_btree_root_read(trans, id, k, level)); + CLASS(btree_trans, trans)(c); + return __bch2_btree_root_read(trans, id, k, level); } struct btree_node_scrub { @@ -2016,7 +2004,7 @@ static void btree_node_scrub_work(struct work_struct *work) { struct btree_node_scrub *scrub = container_of(work, struct btree_node_scrub, work); struct bch_fs *c = scrub->c; - struct printbuf err = PRINTBUF; + CLASS(printbuf, err)(); __bch2_btree_pos_to_text(&err, c, scrub->btree, scrub->level, bkey_i_to_s_c(scrub->key.k)); @@ -2031,7 +2019,6 @@ static void btree_node_scrub_work(struct work_struct *work) bch_err_fn_ratelimited(c, ret); } - printbuf_exit(&err); bch2_bkey_buf_exit(&scrub->key, c);; btree_bounce_free(c, c->opts.btree_node_size, scrub->used_mempool, scrub->buf); enumerated_ref_put(&scrub->ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scrub); @@ -2212,7 +2199,8 @@ static void btree_node_write_work(struct work_struct *work) } } else { - ret = bch2_trans_do(c, + CLASS(btree_trans, trans)(c); + ret = lockrestart_do(trans, bch2_btree_node_update_key_get_iter(trans, b, &wbio->key, BCH_WATERMARK_interior_updates| BCH_TRANS_COMMIT_journal_reclaim| @@ -2231,11 +2219,10 @@ err: set_btree_node_noevict(b); if (!bch2_err_matches(ret, EROFS)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "writing btree node: %s\n ", bch2_err_str(ret)); bch2_btree_pos_to_text(&buf, c, b); bch2_fs_fatal_error(c, "%s", buf.buf); - printbuf_exit(&buf); } goto out; } @@ -2254,13 +2241,12 @@ static void btree_node_write_endio(struct bio *bio) wbio->submit_time, !bio->bi_status); if (ca && bio->bi_status) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); prt_printf(&buf, "btree write error: %s\n ", bch2_blk_status_to_str(bio->bi_status)); bch2_btree_pos_to_text(&buf, c, b); bch_err_dev_ratelimited(ca, "%s", buf.buf); - printbuf_exit(&buf); } if (bio->bi_status) { diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 7463946898c0..cc771affa511 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -903,7 +903,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, k = bch2_btree_and_journal_iter_peek(&jiter); if (!k.k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "node not found at pos "); bch2_bpos_to_text(&buf, path->pos); @@ -911,7 +911,6 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans, bch2_btree_pos_to_text(&buf, c, l->b); ret = bch2_fs_topology_error(c, "%s", buf.buf); - printbuf_exit(&buf); goto err; } @@ -930,7 +929,7 @@ static noinline_for_stack int btree_node_missing_err(struct btree_trans *trans, struct btree_path *path) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "node not found at pos "); bch2_bpos_to_text(&buf, path->pos); @@ -1451,7 +1450,7 @@ void __noreturn bch2_trans_restart_error(struct btree_trans *trans, u32 restart_ static void __noreturn bch2_trans_in_restart_error(struct btree_trans *trans) { #ifdef CONFIG_BCACHEFS_DEBUG - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_prt_backtrace(&buf, &trans->last_restarted_trace); panic("in transaction restart: %s, last restarted by\n%s", bch2_err_str(trans->restarted), @@ -1601,13 +1600,13 @@ void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans) static noinline __cold void __bch2_dump_trans_paths_updates(struct btree_trans *trans, bool nosort) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); + bch2_log_msg_start(trans->c, &buf); __bch2_trans_paths_to_text(&buf, trans, nosort); bch2_trans_updates_to_text(&buf, trans); bch2_print_str(trans->c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } noinline __cold @@ -1620,22 +1619,19 @@ noinline __cold static void bch2_trans_update_max_paths(struct btree_trans *trans) { struct btree_transaction_stats *s = btree_trans_stats(trans); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); size_t nr = bitmap_weight(trans->paths_allocated, trans->nr_paths); bch2_trans_paths_to_text(&buf, trans); if (!buf.allocation_failure) { - mutex_lock(&s->lock); + guard(mutex)(&s->lock); if (nr > s->nr_max_paths) { s->nr_max_paths = nr; swap(s->max_paths_text, buf.buf); } - mutex_unlock(&s->lock); } - printbuf_exit(&buf); - trans->nr_paths_max = nr; } @@ -1643,11 +1639,10 @@ noinline __cold int __bch2_btree_trans_too_many_iters(struct btree_trans *trans) { if (trace_trans_restart_too_many_iters_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_trans_paths_to_text(&buf, trans); trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf); - printbuf_exit(&buf); } count_event(trans->c, trans_restart_too_many_iters); @@ -3196,14 +3191,13 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long if (WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX)) { #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "bump allocator exceeded BTREE_TRANS_MEM_MAX (%u)\n", BTREE_TRANS_MEM_MAX); bch2_trans_kmalloc_trace_to_text(&buf, &trans->trans_kmalloc_trace); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); #endif } @@ -3213,7 +3207,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long struct btree_transaction_stats *s = btree_trans_stats(trans); if (new_bytes > s->max_mem) { - mutex_lock(&s->lock); + guard(mutex)(&s->lock); #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE darray_resize(&s->trans_kmalloc_trace, trans->trans_kmalloc_trace.nr); s->trans_kmalloc_trace.nr = min(s->trans_kmalloc_trace.size, @@ -3225,7 +3219,6 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size, unsigned long s->trans_kmalloc_trace.nr); #endif s->max_mem = new_bytes; - mutex_unlock(&s->lock); } if (trans->used_mempool || new_bytes > BTREE_TRANS_MEM_MAX) { @@ -3535,7 +3528,7 @@ static void check_btree_paths_leaked(struct btree_trans *trans) struct btree_path *path; unsigned i; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "btree paths leaked from %s!\n", trans->fn); @@ -3547,7 +3540,6 @@ static void check_btree_paths_leaked(struct btree_trans *trans) bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } } #else @@ -3672,11 +3664,11 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) /* trans->paths is rcu protected vs. freeing */ guard(rcu)(); - out->atomic++; + guard(printbuf_atomic)(out); struct btree_path *paths = rcu_dereference(trans->paths); if (!paths) - goto out; + return; unsigned long *paths_allocated = trans_paths_allocated(paths); @@ -3712,8 +3704,6 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) bch2_btree_bkey_cached_common_to_text(out, b); prt_newline(out); } -out: - --out->atomic; } void bch2_fs_btree_iter_exit(struct bch_fs *c) diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index cc2c6bb6b6a8..53074ed62e09 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -1007,13 +1007,19 @@ static inline void class_btree_trans_destructor(struct btree_trans **p) #define class_btree_trans_constructor(_c) bch2_trans_get(_c) +/* deprecated, prefer CLASS(btree_trans) */ #define bch2_trans_run(_c, _do) \ ({ \ CLASS(btree_trans, trans)(_c); \ (_do); \ }) -#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do)) +/* deprecated, prefer CLASS(btree_trans) */ +#define bch2_trans_do(_c, _do) \ +({ \ + CLASS(btree_trans, trans)(_c); \ + lockrestart_do(trans, _do); \ +}) void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *); diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c index ea839560a136..24f2fbe84ad7 100644 --- a/fs/bcachefs/btree_journal_iter.c +++ b/fs/bcachefs/btree_journal_iter.c @@ -5,6 +5,7 @@ #include "bset.h" #include "btree_cache.h" #include "btree_journal_iter.h" +#include "disk_accounting.h" #include "journal_io.h" #include <linux/sort.h> @@ -278,12 +279,23 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, if (idx < keys->size && journal_key_cmp(&n, &keys->data[idx]) == 0) { + struct bkey_i *o = keys->data[idx].k; + + if (k->k.type == KEY_TYPE_accounting && + o->k.type == KEY_TYPE_accounting) { + if (!keys->data[idx].allocated) + goto insert; + + bch2_accounting_accumulate(bkey_i_to_accounting(k), + bkey_i_to_s_c_accounting(o)); + } + if (keys->data[idx].allocated) kfree(keys->data[idx].k); keys->data[idx] = n; return 0; } - +insert: if (idx > keys->gap) idx -= keys->size - keys->nr; @@ -450,9 +462,8 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, keys->data[idx].level == level && bpos_eq(keys->data[idx].k->k.p, pos) && !keys->data[idx].overwritten) { - mutex_lock(&keys->overwrite_lock); + guard(mutex)(&keys->overwrite_lock); __bch2_journal_key_overwritten(keys, idx); - mutex_unlock(&keys->overwrite_lock); } } @@ -803,7 +814,7 @@ void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, void bch2_journal_keys_dump(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); pr_info("%zu keys:", keys->nr); @@ -817,7 +828,6 @@ void bch2_journal_keys_dump(struct bch_fs *c) bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); pr_err("%s", buf.buf); } - printbuf_exit(&buf); } void bch2_fs_journal_keys_init(struct bch_fs *c) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 19d1bb806395..ebba14da92b4 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -301,13 +301,12 @@ static noinline_for_stack void do_trace_key_cache_fill(struct btree_trans *trans struct btree_path *ck_path, struct bkey_s_c k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bpos_to_text(&buf, ck_path->pos); prt_char(&buf, ' '); bch2_bkey_val_to_text(&buf, trans->c, k); trace_key_cache_fill(trans, buf.buf); - printbuf_exit(&buf); } static noinline int btree_key_cache_fill(struct btree_trans *trans, @@ -540,10 +539,10 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, struct bkey_cached *ck = container_of(pin, struct bkey_cached, journal); struct bkey_cached_key key; - struct btree_trans *trans = bch2_trans_get(c); int srcu_idx = srcu_read_lock(&c->btree_trans_barrier); int ret = 0; + CLASS(btree_trans, trans)(c); btree_node_lock_nopath_nofail(trans, &ck->c, SIX_LOCK_read); key = ck->key; @@ -566,8 +565,6 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, BCH_TRANS_COMMIT_journal_reclaim, false)); unlock: srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); - - bch2_trans_put(trans); return ret; } diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index bed2b4b6ffb9..38c5643e8a78 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -159,13 +159,11 @@ static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans count_event(c, trans_restart_would_deadlock); if (trace_trans_restart_would_deadlock_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); - buf.atomic++; print_cycle(&buf, g); - trace_trans_restart_would_deadlock(trans, buf.buf); - printbuf_exit(&buf); } } @@ -196,8 +194,8 @@ static int btree_trans_abort_preference(struct btree_trans *trans) static noinline __noreturn void break_cycle_fail(struct lock_graph *g) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks")); @@ -214,7 +212,6 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g) } bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf); - printbuf_exit(&buf); BUG(); } @@ -692,7 +689,7 @@ int __bch2_btree_path_upgrade(struct btree_trans *trans, count_event(trans->c, trans_restart_upgrade); if (trace_trans_restart_upgrade_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "%s %pS\n", trans->fn, (void *) _RET_IP_); prt_printf(&buf, "btree %s pos\n", bch2_btree_id_str(path->btree_id)); @@ -708,7 +705,6 @@ int __bch2_btree_path_upgrade(struct btree_trans *trans, path->l[f.l].lock_seq); trace_trans_restart_upgrade(trans->c, buf.buf); - printbuf_exit(&buf); } out: bch2_trans_verify_locks(trans); @@ -777,7 +773,7 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st goto out; if (trace_trans_restart_relock_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bpos_to_text(&buf, path->pos); prt_printf(&buf, " %s l=%u seq=%u node seq=", @@ -797,7 +793,6 @@ static noinline __cold void bch2_trans_relock_fail(struct btree_trans *trans, st } trace_trans_restart_relock(trans, ip, buf.buf); - printbuf_exit(&buf); } count_event(trans->c, trans_restart_relock); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 42c9eb2c786e..d997e3818c30 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -65,16 +65,6 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs); } -static inline u64 bkey_journal_seq(struct bkey_s_c k) -{ - switch (k.k->type) { - case KEY_TYPE_inode_v3: - return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_journal_seq); - default: - return 0; - } -} - static int found_btree_node_cmp_cookie(const void *_l, const void *_r) { const struct found_btree_node *l = _l; @@ -206,17 +196,15 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, n.journal_seq = le64_to_cpu(bn->keys.journal_seq), n.sectors_written = b->written; - mutex_lock(&f->lock); + guard(mutex)(&f->lock); if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) { bch_err(c, "try_read_btree_node() can't handle endian conversion"); f->ret = -EINVAL; - goto unlock; + return; } if (darray_push(&f->nodes, n)) f->ret = -ENOMEM; -unlock: - mutex_unlock(&f->lock); } } @@ -371,7 +359,7 @@ static int handle_overwrites(struct bch_fs *c, int bch2_scan_for_btree_nodes(struct bch_fs *c) { struct find_btree_nodes *f = &c->found_btree_nodes; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); found_btree_nodes nodes_heap = {}; size_t dst; int ret = 0; @@ -478,7 +466,6 @@ int bch2_scan_for_btree_nodes(struct bch_fs *c) eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL); err: darray_exit(&nodes_heap); - printbuf_exit(&buf); return ret; } @@ -550,7 +537,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, return ret; if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "recovery "); bch2_btree_id_level_to_text(&buf, btree, level); @@ -560,7 +547,6 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch2_bpos_to_text(&buf, node_max); bch_info(c, "%s(): %s", __func__, buf.buf); - printbuf_exit(&buf); } struct found_btree_node search = { @@ -584,10 +570,9 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, found_btree_node_to_key(&tmp.k, &n); if (c->opts.verbose) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k)); bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); - printbuf_exit(&buf); } BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index a7e9d8916848..1f9965ae610c 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -235,10 +235,10 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, struct bch_fs *c = container_of(j, struct bch_fs, journal); struct btree_write *w = container_of(pin, struct btree_write, journal); struct btree *b = container_of(w, struct btree, writes[i]); - struct btree_trans *trans = bch2_trans_get(c); unsigned long old, new; unsigned idx = w - b->writes; + CLASS(btree_trans, trans)(c); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); old = READ_ONCE(b->flags); @@ -257,8 +257,6 @@ static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin, btree_node_write_if_need(trans, b, SIX_LOCK_read); six_unlock_read(&b->c.lock); - - bch2_trans_put(trans); return 0; } @@ -591,7 +589,8 @@ static noinline int bch2_trans_commit_run_gc_triggers(struct btree_trans *trans) } static inline int -bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, +bch2_trans_commit_write_locked(struct btree_trans *trans, + enum bch_trans_commit_flags flags, struct btree_insert_entry **stopped_at, unsigned long trace_ip) { @@ -673,16 +672,20 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, struct bkey_i *accounting; - percpu_down_read(&c->mark_lock); - for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); - accounting != btree_trans_subbuf_top(trans, &trans->accounting); - accounting = bkey_next(accounting)) { - ret = bch2_accounting_trans_commit_hook(trans, - bkey_i_to_accounting(accounting), flags); - if (ret) - goto revert_fs_usage; - } - percpu_up_read(&c->mark_lock); + scoped_guard(percpu_read, &c->mark_lock) + for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); + accounting != btree_trans_subbuf_top(trans, &trans->accounting); + accounting = bkey_next(accounting)) { + ret = bch2_accounting_trans_commit_hook(trans, + bkey_i_to_accounting(accounting), flags); + if (unlikely(ret)) { + for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); + i != accounting; + i = bkey_next(i)) + bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); + return ret; + } + } /* XXX: we only want to run this if deltas are nonzero */ bch2_trans_account_disk_usage_change(trans); @@ -794,13 +797,6 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, return 0; fatal_err: bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret)); - percpu_down_read(&c->mark_lock); -revert_fs_usage: - for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); - i != accounting; - i = bkey_next(i)) - bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); - percpu_up_read(&c->mark_lock); return ret; } @@ -826,7 +822,8 @@ static int bch2_trans_commit_journal_pin_flush(struct journal *j, /* * Get journal reservation, take write locks, and attempt to do btree update(s): */ -static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags, +static inline int do_bch2_trans_commit(struct btree_trans *trans, + enum bch_trans_commit_flags flags, struct btree_insert_entry **stopped_at, unsigned long trace_ip) { @@ -962,16 +959,33 @@ out: * do. */ static noinline int -do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) +do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans, + enum bch_trans_commit_flags flags) { struct bch_fs *c = trans->c; + int ret = 0; BUG_ON(current != c->recovery_task); + struct bkey_i *accounting; + + percpu_down_read(&c->mark_lock); + for (accounting = btree_trans_subbuf_base(trans, &trans->accounting); + accounting != btree_trans_subbuf_top(trans, &trans->accounting); + accounting = bkey_next(accounting)) { + ret = likely(!(flags & BCH_TRANS_COMMIT_skip_accounting_apply)) + ? bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(accounting), + BCH_ACCOUNTING_normal, false) + : 0; + if (ret) + goto revert_fs_usage; + } + percpu_up_read(&c->mark_lock); + trans_for_each_update(trans, i) { - int ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); + ret = bch2_journal_key_insert(c, i->btree_id, i->level, i->k); if (ret) - return ret; + goto fatal_err; } for (struct jset_entry *i = btree_trans_journal_entries_start(trans); @@ -980,9 +994,9 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) if (i->type == BCH_JSET_ENTRY_btree_keys || i->type == BCH_JSET_ENTRY_write_buffer_keys) { jset_entry_for_each_key(i, k) { - int ret = bch2_journal_key_insert(c, i->btree_id, i->level, k); + ret = bch2_journal_key_insert(c, i->btree_id, i->level, k); if (ret) - return ret; + goto fatal_err; } } @@ -1000,12 +1014,24 @@ do_bch2_trans_commit_to_journal_replay(struct btree_trans *trans) for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); i != btree_trans_subbuf_top(trans, &trans->accounting); i = bkey_next(i)) { - int ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i); + ret = bch2_journal_key_insert(c, BTREE_ID_accounting, 0, i); if (ret) - return ret; + goto fatal_err; } return 0; +fatal_err: + bch2_fs_fatal_error(c, "fatal error in transaction commit: %s", bch2_err_str(ret)); + percpu_down_read(&c->mark_lock); +revert_fs_usage: + BUG(); + /* error path not handled by __bch2_trans_commit() */ + for (struct bkey_i *i = btree_trans_subbuf_base(trans, &trans->accounting); + i != accounting; + i = bkey_next(i)) + bch2_accounting_trans_commit_revert(trans, bkey_i_to_accounting(i), flags); + percpu_up_read(&c->mark_lock); + return ret; } int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags flags) @@ -1031,7 +1057,7 @@ int __bch2_trans_commit(struct btree_trans *trans, enum bch_trans_commit_flags f if (!(flags & BCH_TRANS_COMMIT_no_check_rw) && unlikely(!enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_trans))) { if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) - ret = do_bch2_trans_commit_to_journal_replay(trans); + ret = do_bch2_trans_commit_to_journal_replay(trans, flags); else ret = bch_err_throw(c, erofs_trans_commit); goto out_reset; diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c index 7983c4940b3b..f514a8ad7a89 100644 --- a/fs/bcachefs/btree_update.c +++ b/fs/bcachefs/btree_update.c @@ -671,8 +671,9 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, enum bch_trans_commit_flags commit_flags, enum btree_iter_update_trigger_flags iter_flags) { - return bch2_trans_commit_do(c, disk_res, NULL, commit_flags, - bch2_btree_insert_trans(trans, id, k, iter_flags)); + CLASS(btree_trans, trans)(c); + return commit_do(trans, disk_res, NULL, commit_flags, + bch2_btree_insert_trans(trans, id, k, iter_flags)); } int bch2_btree_delete_at(struct btree_trans *trans, struct btree_iter *iter, @@ -781,9 +782,8 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, enum btree_iter_update_trigger_flags flags, u64 *journal_seq) { - int ret = bch2_trans_run(c, - bch2_btree_delete_range_trans(trans, id, start, end, - flags, journal_seq)); + CLASS(btree_trans, trans)(c); + int ret = bch2_btree_delete_range_trans(trans, id, start, end, flags, journal_seq); if (ret == -BCH_ERR_transaction_restart_nested) ret = 0; return ret; @@ -877,31 +877,31 @@ static int __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, va_list args) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_vprintf(&buf, fmt, args); unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; if (ret) - goto err; + return ret; if (!test_bit(JOURNAL_running, &c->journal.flags)) { ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s)); if (ret) - goto err; + return ret; struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries); journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s); memcpy_and_pad(l->d, u64s * sizeof(u64), buf.buf, buf.pos, 0); c->journal.early_journal_entries.nr += jset_u64s(u64s); } else { - ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, - bch2_trans_log_msg(trans, &buf)); + CLASS(btree_trans, trans)(c); + ret = commit_do(trans, NULL, NULL, commit_flags, + bch2_trans_log_msg(trans, &buf)); } -err: - printbuf_exit(&buf); - return ret; + + return 0; } __printf(2, 3) diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 222a9f8ffbd5..633de3b3ac28 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -191,19 +191,29 @@ int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bk int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *); +static inline int bch2_btree_write_buffer_insert_checks(struct bch_fs *c, enum btree_id btree, + struct bkey_i *k) +{ + if (unlikely(!btree_type_uses_write_buffer(btree) || + k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX)) { + int ret = bch2_btree_write_buffer_insert_err(c, btree, k); + dump_stack(); + return ret; + } + + return 0; +} + static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, enum btree_id btree, struct bkey_i *k) { kmsan_check_memory(k, bkey_bytes(&k->k)); - EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); - - if (unlikely(!btree_type_uses_write_buffer(btree))) { - int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k); - dump_stack(); + int ret = bch2_btree_write_buffer_insert_checks(trans->c, btree, k); + if (unlikely(ret)) return ret; - } + /* * Most updates skip the btree write buffer until journal replay is * finished because synchronization with journal replay relies on having @@ -220,7 +230,7 @@ static inline int __must_check bch2_trans_update_buffered(struct btree_trans *tr return bch2_btree_insert_clone_trans(trans, btree, k); struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); - int ret = PTR_ERR_OR_ZERO(e); + ret = PTR_ERR_OR_ZERO(e); if (ret) return ret; @@ -268,6 +278,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans, nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_flags))) +/* deprecated, prefer CLASS(btree_trans) */ #define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \ bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index ebdb4d2f1be9..312ef203b27b 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -53,7 +53,7 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) : b->data->min_key; struct btree_and_journal_iter iter; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct bkey_buf prev; int ret = 0; @@ -133,7 +133,6 @@ int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b) out: bch2_btree_and_journal_iter_exit(&iter); bch2_bkey_buf_exit(&prev, c); - printbuf_exit(&buf); return ret; err: bch2_btree_id_level_to_text(&buf, b->c.btree_id, b->c.level); @@ -240,9 +239,8 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans, __btree_node_free(trans, b); - mutex_lock(&c->btree_cache.lock); - bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + bch2_btree_node_hash_remove(&c->btree_cache, b); six_unlock_write(&b->c.lock); mark_btree_node_locked_noreset(path, b->c.level, BTREE_NODE_INTENT_LOCKED); @@ -268,9 +266,8 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, clear_btree_node_dirty_acct(c, b); clear_btree_node_need_write(b); - mutex_lock(&c->btree_cache.lock); - __bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + __bch2_btree_node_hash_remove(&c->btree_cache, b); BUG_ON(p->nr >= ARRAY_SIZE(p->b)); p->b[p->nr++] = b; @@ -560,7 +557,8 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans * bch2_time_stats_update(&c->times[BCH_TIME_btree_interior_update_total], as->start_time); - mutex_lock(&c->btree_interior_update_lock); + guard(mutex)(&c->btree_interior_update_lock); + list_del(&as->unwritten_list); list_del(&as->list); @@ -572,8 +570,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans * * since being on btree_interior_update_list is our ref on @c: */ closure_wake_up(&c->btree_interior_update_wait); - - mutex_unlock(&c->btree_interior_update_lock); } static void btree_update_add_key(struct btree_update *as, @@ -602,12 +598,11 @@ static void btree_update_new_nodes_mark_sb(struct btree_update *as) { struct bch_fs *c = as->c; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); for_each_keylist_key(&as->new_keys, k) bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k)); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } /* @@ -659,7 +654,7 @@ static void btree_update_nodes_written(struct btree_update *as) { struct bch_fs *c = as->c; struct btree *b; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); u64 journal_seq = 0; unsigned i; int ret; @@ -818,15 +813,15 @@ err: bch2_journal_pin_drop(&c->journal, &as->journal); - mutex_lock(&c->btree_interior_update_lock); - for (i = 0; i < as->nr_new_nodes; i++) { - b = as->new_nodes[i]; + scoped_guard(mutex, &c->btree_interior_update_lock) { + for (i = 0; i < as->nr_new_nodes; i++) { + b = as->new_nodes[i]; - BUG_ON(b->will_make_reachable != (unsigned long) as); - b->will_make_reachable = 0; - clear_btree_node_will_make_reachable(b); + BUG_ON(b->will_make_reachable != (unsigned long) as); + b->will_make_reachable = 0; + clear_btree_node_will_make_reachable(b); + } } - mutex_unlock(&c->btree_interior_update_lock); for (i = 0; i < as->nr_new_nodes; i++) { b = as->new_nodes[i]; @@ -840,7 +835,6 @@ err: bch2_open_bucket_put(c, c->open_buckets + as->open_buckets[i]); bch2_btree_update_free(as, trans); - bch2_trans_put(trans); } static void btree_interior_update_work(struct work_struct *work) @@ -850,12 +844,12 @@ static void btree_interior_update_work(struct work_struct *work) struct btree_update *as; while (1) { - mutex_lock(&c->btree_interior_update_lock); - as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, - struct btree_update, unwritten_list); - if (as && !as->nodes_written) - as = NULL; - mutex_unlock(&c->btree_interior_update_lock); + scoped_guard(mutex, &c->btree_interior_update_lock) { + as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, + struct btree_update, unwritten_list); + if (as && !as->nodes_written) + as = NULL; + } if (!as) break; @@ -869,9 +863,8 @@ static CLOSURE_CALLBACK(btree_update_set_nodes_written) closure_type(as, struct btree_update, cl); struct bch_fs *c = as->c; - mutex_lock(&c->btree_interior_update_lock); - as->nodes_written = true; - mutex_unlock(&c->btree_interior_update_lock); + scoped_guard(mutex, &c->btree_interior_update_lock) + as->nodes_written = true; queue_work(c->btree_interior_update_worker, &c->btree_interior_update_work); } @@ -889,7 +882,7 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) BUG_ON(!btree_node_dirty(b)); BUG_ON(!b->c.level); - mutex_lock(&c->btree_interior_update_lock); + guard(mutex)(&c->btree_interior_update_lock); list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); as->mode = BTREE_UPDATE_node; @@ -898,8 +891,6 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) set_btree_node_write_blocked(b); list_add(&as->write_blocked_list, &b->write_blocked); - - mutex_unlock(&c->btree_interior_update_lock); } static int bch2_update_reparent_journal_pin_flush(struct journal *j, @@ -938,11 +929,11 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b) b->c.btree_id, b->c.level, insert, insert->k.u64s); - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); + scoped_guard(mutex, &c->btree_interior_update_lock) { + list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); - as->mode = BTREE_UPDATE_root; - mutex_unlock(&c->btree_interior_update_lock); + as->mode = BTREE_UPDATE_root; + } } /* @@ -963,7 +954,8 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree closure_get(&as->cl); - mutex_lock(&c->btree_interior_update_lock); + guard(mutex)(&c->btree_interior_update_lock); + BUG_ON(as->nr_new_nodes >= ARRAY_SIZE(as->new_nodes)); BUG_ON(b->will_make_reachable); @@ -971,8 +963,6 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree b->will_make_reachable = 1UL|(unsigned long) as; set_btree_node_will_make_reachable(b); - mutex_unlock(&c->btree_interior_update_lock); - btree_update_add_key(as, &as->new_keys, b); if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { @@ -991,31 +981,29 @@ static void btree_update_drop_new_node(struct bch_fs *c, struct btree *b) { struct btree_update *as; unsigned long v; - unsigned i; - mutex_lock(&c->btree_interior_update_lock); - /* - * When b->will_make_reachable != 0, it owns a ref on as->cl that's - * dropped when it gets written by bch2_btree_complete_write - the - * xchg() is for synchronization with bch2_btree_complete_write: - */ - v = xchg(&b->will_make_reachable, 0); - clear_btree_node_will_make_reachable(b); - as = (struct btree_update *) (v & ~1UL); + scoped_guard(mutex, &c->btree_interior_update_lock) { + /* + * When b->will_make_reachable != 0, it owns a ref on as->cl that's + * dropped when it gets written by bch2_btree_complete_write - the + * xchg() is for synchronization with bch2_btree_complete_write: + */ + v = xchg(&b->will_make_reachable, 0); + clear_btree_node_will_make_reachable(b); + as = (struct btree_update *) (v & ~1UL); - if (!as) { - mutex_unlock(&c->btree_interior_update_lock); - return; - } + if (!as) + return; - for (i = 0; i < as->nr_new_nodes; i++) - if (as->new_nodes[i] == b) - goto found; + unsigned i; + for (i = 0; i < as->nr_new_nodes; i++) + if (as->new_nodes[i] == b) + goto found; - BUG(); -found: - array_remove_item(as->new_nodes, as->nr_new_nodes, i); - mutex_unlock(&c->btree_interior_update_lock); + BUG(); + found: + array_remove_item(as->new_nodes, as->nr_new_nodes, i); + } if (v & 1) closure_put(&as->cl); @@ -1232,9 +1220,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, bch2_keylist_init(&as->new_keys, as->_new_keys); bch2_keylist_init(&as->parent_keys, as->inline_keys); - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->list, &c->btree_interior_update_list); - mutex_unlock(&c->btree_interior_update_lock); + scoped_guard(mutex, &c->btree_interior_update_lock) + list_add_tail(&as->list, &c->btree_interior_update_list); struct btree *b = btree_path_node(path, path->level); as->node_start = b->data->min_key; @@ -1318,13 +1305,11 @@ err: static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b) { /* Root nodes cannot be reaped */ - mutex_lock(&c->btree_cache.lock); - list_del_init(&b->list); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + list_del_init(&b->list); - mutex_lock(&c->btree_root_lock); - bch2_btree_id_root(c, b->c.btree_id)->b = b; - mutex_unlock(&c->btree_root_lock); + scoped_guard(mutex, &c->btree_root_lock) + bch2_btree_id_root(c, b->c.btree_id)->b = b; bch2_recalc_btree_reserve(c); } @@ -1379,7 +1364,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, { struct bch_fs *c = as->c; struct bkey_packed *k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned long old, new; BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 && @@ -1424,8 +1409,6 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, new |= BTREE_WRITE_interior; new |= 1 << BTREE_NODE_need_write; } while (!try_cmpxchg(&b->flags, &old, new)); - - printbuf_exit(&buf); } static int @@ -1452,7 +1435,7 @@ bch2_btree_insert_keys_interior(struct btree_update *as, int ret = bch2_btree_node_check_topology(trans, b); if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); for (struct bkey_i *k = keys->keys; k != insert; @@ -1839,7 +1822,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_verify_keylist_sorted(keys); if (!btree_node_intent_locked(path, b->c.level)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "%s(): node not locked at level %u\n", __func__, b->c.level); @@ -1848,7 +1831,6 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return -EIO; } @@ -1971,9 +1953,8 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans * bch2_trans_node_add(trans, path, n); six_unlock_intent(&n->c.lock); - mutex_lock(&c->btree_cache.lock); - list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list); - mutex_unlock(&c->btree_cache.lock); + scoped_guard(mutex, &c->btree_cache.lock) + list_add_tail(&b->list, &c->btree_cache.live[btree_node_pinned(b)].list); bch2_trans_verify_locks(trans); } @@ -2073,7 +2054,7 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, } if (!bpos_eq(bpos_successor(prev->data->max_key), next->data->min_key)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); printbuf_indent_add_nextline(&buf, 2); prt_printf(&buf, "%s(): ", __func__); @@ -2088,7 +2069,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans, bch2_bpos_to_text(&buf, next->data->min_key); bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); goto err; } @@ -2371,9 +2351,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work) !bch2_err_matches(ret, EROFS)) bch_err_fn_ratelimited(c, ret); - spin_lock(&c->btree_node_rewrites_lock); - list_del(&a->list); - spin_unlock(&c->btree_node_rewrites_lock); + scoped_guard(spinlock, &c->btree_node_rewrites_lock) + list_del(&a->list); closure_wake_up(&c->btree_node_rewrites_wait); @@ -2398,16 +2377,16 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b) bool now = false, pending = false; - spin_lock(&c->btree_node_rewrites_lock); - if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) && - enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) { - list_add(&a->list, &c->btree_node_rewrites); - now = true; - } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { - list_add(&a->list, &c->btree_node_rewrites_pending); - pending = true; + scoped_guard(spinlock, &c->btree_node_rewrites_lock) { + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay) && + enumerated_ref_tryget(&c->writes, BCH_WRITE_REF_node_rewrite)) { + list_add(&a->list, &c->btree_node_rewrites); + now = true; + } else if (!test_bit(BCH_FS_may_go_rw, &c->flags)) { + list_add(&a->list, &c->btree_node_rewrites_pending); + pending = true; + } } - spin_unlock(&c->btree_node_rewrites_lock); if (now) { queue_work(c->btree_node_rewrite_worker, &a->work); @@ -2428,13 +2407,14 @@ void bch2_async_btree_node_rewrites_flush(struct bch_fs *c) void bch2_do_pending_node_rewrites(struct bch_fs *c) { while (1) { - spin_lock(&c->btree_node_rewrites_lock); - struct async_btree_rewrite *a = - list_pop_entry(&c->btree_node_rewrites_pending, - struct async_btree_rewrite, list); - if (a) - list_add(&a->list, &c->btree_node_rewrites); - spin_unlock(&c->btree_node_rewrites_lock); + struct async_btree_rewrite *a; + + scoped_guard(spinlock, &c->btree_node_rewrites_lock) { + a = list_pop_entry(&c->btree_node_rewrites_pending, + struct async_btree_rewrite, list); + if (a) + list_add(&a->list, &c->btree_node_rewrites); + } if (!a) break; @@ -2447,11 +2427,11 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c) void bch2_free_pending_node_rewrites(struct bch_fs *c) { while (1) { - spin_lock(&c->btree_node_rewrites_lock); - struct async_btree_rewrite *a = - list_pop_entry(&c->btree_node_rewrites_pending, - struct async_btree_rewrite, list); - spin_unlock(&c->btree_node_rewrites_lock); + struct async_btree_rewrite *a; + + scoped_guard(spinlock, &c->btree_node_rewrites_lock) + a = list_pop_entry(&c->btree_node_rewrites_pending, + struct async_btree_rewrite, list); if (!a) break; @@ -2533,7 +2513,7 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bch2_btree_node_lock_write_nofail(trans, btree_iter_path(trans, iter), &b->c); if (new_hash) { - mutex_lock(&c->btree_cache.lock); + guard(mutex)(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, new_hash); __bch2_btree_node_hash_remove(&c->btree_cache, b); @@ -2541,7 +2521,6 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans, bkey_copy(&b->key, new_key); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); - mutex_unlock(&c->btree_cache.lock); } else { bkey_copy(&b->key, new_key); } @@ -2552,9 +2531,8 @@ out: return ret; err: if (new_hash) { - mutex_lock(&c->btree_cache.lock); + guard(mutex)(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, b); - mutex_unlock(&c->btree_cache.lock); } goto out; } @@ -2689,7 +2667,8 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) { - bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level))); + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level)); } static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) @@ -2722,21 +2701,15 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) { struct btree_update *as; - mutex_lock(&c->btree_interior_update_lock); + guard(mutex)(&c->btree_interior_update_lock); list_for_each_entry(as, &c->btree_interior_update_list, list) bch2_btree_update_to_text(out, as); - mutex_unlock(&c->btree_interior_update_lock); } static bool bch2_btree_interior_updates_pending(struct bch_fs *c) { - bool ret; - - mutex_lock(&c->btree_interior_update_lock); - ret = !list_empty(&c->btree_interior_update_list); - mutex_unlock(&c->btree_interior_update_lock); - - return ret; + guard(mutex)(&c->btree_interior_update_lock); + return !list_empty(&c->btree_interior_update_list); } bool bch2_btree_interior_updates_flush(struct bch_fs *c) @@ -2753,13 +2726,11 @@ void bch2_journal_entry_to_btree_root(struct bch_fs *c, struct jset_entry *entry { struct btree_root *r = bch2_btree_id_root(c, entry->btree_id); - mutex_lock(&c->btree_root_lock); + guard(mutex)(&c->btree_interior_update_lock); r->level = entry->level; r->alive = true; bkey_copy(&r->key, (struct bkey_i *) entry->start); - - mutex_unlock(&c->btree_root_lock); } struct jset_entry * @@ -2767,11 +2738,9 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, struct jset_entry *end, unsigned long skip) { - unsigned i; - - mutex_lock(&c->btree_root_lock); + guard(mutex)(&c->btree_interior_update_lock); - for (i = 0; i < btree_id_nr_alive(c); i++) { + for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (r->alive && !test_bit(i, &skip)) { @@ -2781,8 +2750,6 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c, } } - mutex_unlock(&c->btree_root_lock); - return end; } diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c index 4b095235a0d2..9cfc3edce39a 100644 --- a/fs/bcachefs/btree_write_buffer.c +++ b/fs/bcachefs/btree_write_buffer.c @@ -259,9 +259,8 @@ out: bch2_btree_write_buffer_journal_flush); if (j->watermark) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_set_watermark(j); - spin_unlock(&j->lock); } BUG_ON(wb->sorted.size < wb->flushing.keys.nr); @@ -270,7 +269,7 @@ out: int bch2_btree_write_buffer_insert_err(struct bch_fs *c, enum btree_id btree, struct bkey_i *k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "attempting to do write buffer update on non wb btree="); bch2_btree_id_to_text(&buf, btree); @@ -278,7 +277,6 @@ int bch2_btree_write_buffer_insert_err(struct bch_fs *c, bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); return -EROFS; } @@ -300,9 +298,8 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) bch2_trans_unlock(trans); bch2_trans_begin(trans); - mutex_lock(&wb->inc.lock); - move_keys_from_inc_to_flushing(wb); - mutex_unlock(&wb->inc.lock); + scoped_guard(mutex, &wb->inc.lock) + move_keys_from_inc_to_flushing(wb); for (size_t i = 0; i < wb->flushing.keys.nr; i++) { wb->sorted.data[i].idx = i; @@ -330,10 +327,9 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) darray_for_each(wb->sorted, i) { struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx]; - if (unlikely(!btree_type_uses_write_buffer(k->btree))) { - ret = bch2_btree_write_buffer_insert_err(trans->c, k->btree, &k->k); + ret = bch2_btree_write_buffer_insert_checks(c, k->btree, &k->k); + if (unlikely(ret)) goto err; - } for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++) prefetch(&wb->flushing.keys.data[n->idx]); @@ -534,9 +530,8 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 max_seq) ret = bch2_journal_keys_to_write_buffer(c, buf); if (!blocked && !ret) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); buf->need_flush_to_write_buffer = false; - spin_unlock(&j->lock); } mutex_unlock(&j->buf_lock); @@ -568,9 +563,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 max_seq, * On memory allocation failure, bch2_btree_write_buffer_flush_locked() * is not guaranteed to empty wb->inc: */ - mutex_lock(&wb->flushing.lock); - ret = bch2_btree_write_buffer_flush_locked(trans); - mutex_unlock(&wb->flushing.lock); + scoped_guard(mutex, &wb->flushing.lock) + ret = bch2_btree_write_buffer_flush_locked(trans); } while (!ret && (fetch_from_journal_err || (wb->inc.pin.seq && wb->inc.pin.seq <= max_seq) || @@ -583,9 +577,10 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { struct bch_fs *c = container_of(j, struct bch_fs, journal); + CLASS(btree_trans, trans)(c); bool did_work = false; - return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work)); + return btree_write_buffer_flush_seq(trans, seq, &did_work); } int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) @@ -607,9 +602,9 @@ bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c) if (bch2_journal_error(&c->journal)) return false; + CLASS(btree_trans, trans)(c); bool did_work = false; - bch2_trans_run(c, btree_write_buffer_flush_seq(trans, - journal_cur_seq(&c->journal), &did_work)); + btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work); return did_work; } @@ -656,11 +651,10 @@ int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans, if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) { if (trace_write_buffer_maybe_flush_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, referring_k); trace_write_buffer_maybe_flush(trans, _RET_IP_, buf.buf); - printbuf_exit(&buf); } bch2_bkey_buf_reassemble(&tmp, c, referring_k); @@ -691,11 +685,12 @@ static void bch2_btree_write_buffer_flush_work(struct work_struct *work) struct btree_write_buffer *wb = &c->btree_write_buffer; int ret; - mutex_lock(&wb->flushing.lock); - do { - ret = bch2_trans_run(c, bch2_btree_write_buffer_flush_locked(trans)); - } while (!ret && bch2_btree_write_buffer_should_flush(c)); - mutex_unlock(&wb->flushing.lock); + scoped_guard(mutex, &wb->flushing.lock) { + CLASS(btree_trans, trans)(c); + do { + ret = bch2_btree_write_buffer_flush_locked(trans); + } while (!ret && bch2_btree_write_buffer_should_flush(c)); + } enumerated_ref_put(&c->writes, BCH_WRITE_REF_btree_write_buffer); } diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h index c351d21aca0b..e484cd6b90b0 100644 --- a/fs/bcachefs/btree_write_buffer.h +++ b/fs/bcachefs/btree_write_buffer.h @@ -89,11 +89,9 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c, struct journal_keys_to_wb *dst, enum btree_id btree, struct bkey_i *k) { - if (unlikely(!btree_type_uses_write_buffer(btree))) { - int ret = bch2_btree_write_buffer_insert_err(c, btree, k); - dump_stack(); + int ret = bch2_btree_write_buffer_insert_checks(c, btree, k); + if (unlikely(ret)) return ret; - } EBUG_ON(!dst->seq); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index f25903c10e8a..5aab527e3e7c 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -71,13 +71,8 @@ __bch2_fs_usage_read_short(struct bch_fs *c) struct bch_fs_usage_short bch2_fs_usage_read_short(struct bch_fs *c) { - struct bch_fs_usage_short ret; - - percpu_down_read(&c->mark_lock); - ret = __bch2_fs_usage_read_short(c); - percpu_up_read(&c->mark_lock); - - return ret; + guard(percpu_read)(&c->mark_lock); + return __bch2_fs_usage_read_short(c); } void bch2_dev_usage_to_text(struct printbuf *out, @@ -113,10 +108,10 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, bool *do_update) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; - struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); + CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev); if (!ca) { if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, trans, ptr_to_invalid_device, @@ -138,7 +133,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) *do_update = true; - goto out; + return 0; } enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); @@ -158,7 +153,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, } else { /* this pointer will be dropped */ *do_update = true; - goto out; + return 0; } } @@ -208,7 +203,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, *do_update = true; if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) - goto out; + return 0; if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), trans, ptr_bucket_data_type_mismatch, @@ -224,14 +219,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, switch (g->data_type) { case BCH_DATA_sb: bch_err(c, "btree and superblock in the same bucket - cannot repair"); - ret = bch_err_throw(c, fsck_repair_unimplemented); - goto out; + return bch_err_throw(c, fsck_repair_unimplemented); case BCH_DATA_journal: ret = bch2_dev_journal_bucket_delete(ca, PTR_BUCKET_NR(ca, &p.ptr)); bch_err_msg(c, ret, "error deleting journal bucket %zu", PTR_BUCKET_NR(ca, &p.ptr)); if (ret) - goto out; + return ret; break; } @@ -265,10 +259,7 @@ static int bch2_check_fix_ptr(struct btree_trans *trans, bch2_bkey_val_to_text(&buf, c, k), buf.buf))) *do_update = true; } -out: fsck_err: - bch2_dev_put(ca); - printbuf_exit(&buf); return ret; } @@ -281,7 +272,7 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, const union bch_extent_entry *entry_c; struct extent_ptr_decoded p = { 0 }; bool do_update = false; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; /* We don't yet do btree key updates correctly for when we're RW */ @@ -290,14 +281,14 @@ int bch2_check_fix_ptrs(struct btree_trans *trans, bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); if (ret) - goto err; + return ret; } if (do_update) { struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) - goto err; + return ret; scoped_guard(rcu) bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_exists(c, ptr->dev)); @@ -387,7 +378,7 @@ found: BTREE_TRIGGER_norun); bch2_trans_iter_exit(trans, &iter); if (ret) - goto err; + return ret; if (level) bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); @@ -396,7 +387,7 @@ found: jset_u64s(new->k.u64s)); ret = PTR_ERR_OR_ZERO(e); if (ret) - goto err; + return ret; journal_entry_set(e, BCH_JSET_ENTRY_btree_root, @@ -413,9 +404,8 @@ found: bkey_copy(&b->key, new); } } -err: - printbuf_exit(&buf); - return ret; + + return 0; } static int bucket_ref_update_err(struct btree_trans *trans, struct printbuf *buf, @@ -460,9 +450,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, { struct bch_fs *c = trans->c; size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool inserting = sectors > 0; - int ret = 0; BUG_ON(!sectors); @@ -474,9 +463,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen); - ret = bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); - goto out; + return bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen); } if (unlikely(gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX)) { @@ -487,15 +475,12 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen); - ret = bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_ptr_too_stale); - goto out; + return bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_ptr_too_stale); } - if (b_gen != ptr->gen && ptr->cached) { - ret = 1; - goto out; - } + if (b_gen != ptr->gen && ptr->cached) + return 1; if (unlikely(b_gen != ptr->gen)) { bch2_log_msg_start(c, &buf); @@ -506,9 +491,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type ?: ptr_data_type), ptr->gen); - ret = bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_stale_dirty_ptr); - goto out; + return bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_stale_dirty_ptr); } if (unlikely(bucket_data_type_mismatch(bucket_data_type, ptr_data_type))) { @@ -518,9 +502,8 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type), bch2_data_type_str(ptr_data_type)); - ret = bucket_ref_update_err(trans, &buf, k, inserting, + return bucket_ref_update_err(trans, &buf, k, inserting, BCH_FSCK_ERR_ptr_bucket_data_type_mismatch); - goto out; } if (unlikely((u64) *bucket_sectors + sectors > U32_MAX)) { @@ -531,16 +514,13 @@ int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, bch2_data_type_str(bucket_data_type ?: ptr_data_type), *bucket_sectors, sectors); - ret = bucket_ref_update_err(trans, &buf, k, inserting, - BCH_FSCK_ERR_bucket_sector_count_overflow); sectors = -*bucket_sectors; - goto out; + return bucket_ref_update_err(trans, &buf, k, inserting, + BCH_FSCK_ERR_bucket_sector_count_overflow); } *bucket_sectors += sectors; -out: - printbuf_exit(&buf); - return ret; + return 0; } void bch2_trans_account_disk_usage_change(struct btree_trans *trans) @@ -550,7 +530,7 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans) static int warned_disk_usage = 0; bool warn = false; - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); struct bch_fs_usage_base *src = &trans->fs_usage_delta; s64 added = src->btree + src->data + src->reserved; @@ -578,11 +558,10 @@ void bch2_trans_account_disk_usage_change(struct btree_trans *trans) this_cpu_sub(*c->online_reserved, added); } - preempt_disable(); - struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); - acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); - preempt_enable(); - percpu_up_read(&c->mark_lock); + scoped_guard(preempt) { + struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); + acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); + } if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) bch2_trans_inconsistent(trans, @@ -621,40 +600,34 @@ static int bch2_trigger_pointer(struct btree_trans *trans, { struct bch_fs *c = trans->c; bool insert = !(flags & BTREE_TRIGGER_overwrite); - struct printbuf buf = PRINTBUF; - int ret = 0; + CLASS(printbuf, buf)(); struct bkey_i_backpointer bp; bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp); *sectors = insert ? bp.v.bucket_len : -(s64) bp.v.bucket_len; - struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); + CLASS(bch2_dev_tryget, ca)(c, p.ptr.dev); if (unlikely(!ca)) { if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) - ret = bch_err_throw(c, trigger_pointer); - goto err; + return bch_err_throw(c, trigger_pointer); + return 0; } struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); if (!bucket_valid(ca, bucket.offset)) { if (insert) { bch2_dev_bucket_missing(ca, bucket.offset); - ret = bch_err_throw(c, trigger_pointer); + return bch_err_throw(c, trigger_pointer); } - goto err; + return 0; } if (flags & BTREE_TRIGGER_transactional) { struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); - ret = PTR_ERR_OR_ZERO(a) ?: - __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert); - if (ret) - goto err; - - ret = bch2_bucket_backpointer_mod(trans, k, &bp, insert); - if (ret) - goto err; + return PTR_ERR_OR_ZERO(a) ?: + __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &a->v, insert) ?: + bch2_bucket_backpointer_mod(trans, k, &bp, insert); } if (flags & BTREE_TRIGGER_gc) { @@ -662,23 +635,22 @@ static int bch2_trigger_pointer(struct btree_trans *trans, if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", p.ptr.dev, (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = bch_err_throw(c, trigger_pointer); - goto err; + return bch_err_throw(c, trigger_pointer); } bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; - ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); + int ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.v.data_type, &new, insert); alloc_to_bucket(g, new); bucket_unlock(g); - if (!ret) - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); + if (ret) + return ret; + + return bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); } -err: - bch2_dev_put(ca); - printbuf_exit(&buf); - return ret; + + return 0; } static int bch2_trigger_stripe_ptr(struct btree_trans *trans, @@ -738,14 +710,13 @@ err: if (!m || !m->alive) { gc_stripe_unlock(m); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "pointer to nonexistent stripe %llu\n while marking ", (u64) p.ec.idx); bch2_bkey_val_to_text(&buf, c, k); __bch2_inconsistent_error(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return bch_err_throw(c, trigger_stripe_pointer); } @@ -996,7 +967,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s\n", @@ -1012,7 +983,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, /* Always print, this is always fatal */ bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); if (!ret) ret = bch_err_throw(c, metadata_bucket_inconsistency); goto err; @@ -1034,7 +1004,6 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; - int ret = 0; struct bucket *g = gc_bucket(ca, b); if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", @@ -1062,8 +1031,7 @@ static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev * g->dirty_sectors += sectors; struct bch_alloc_v4 new = bucket_m_to_alloc(*g); bucket_unlock(g); - ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); - return ret; + return bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); err_unlock: bucket_unlock(g); err: @@ -1125,10 +1093,10 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; + struct bch_sb_layout layout; - mutex_lock(&c->sb_lock); - struct bch_sb_layout layout = ca->disk_sb.sb->layout; - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + layout = ca->disk_sb.sb->layout; u64 bucket = 0; unsigned i, bucket_sectors = 0; @@ -1173,8 +1141,8 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - int ret = bch2_trans_run(c, - __bch2_trans_mark_dev_sb(trans, ca, flags)); + CLASS(btree_trans, trans)(c); + int ret = __bch2_trans_mark_dev_sb(trans, ca, flags); bch_err_fn(c, ret); return ret; } @@ -1227,15 +1195,38 @@ bool bch2_is_superblock_bucket(struct bch_dev *ca, u64 b) #define SECTORS_CACHE 1024 +static int disk_reservation_recalc_sectors_available(struct bch_fs *c, + struct disk_reservation *res, + u64 sectors, enum bch_reservation_flags flags) +{ + guard(mutex)(&c->sectors_available_lock); + + percpu_u64_set(&c->pcpu->sectors_available, 0); + u64 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); + + if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL)) + sectors = min(sectors, sectors_available); + + if (sectors <= sectors_available || + (flags & BCH_DISK_RESERVATION_NOFAIL)) { + atomic64_set(&c->sectors_available, + max_t(s64, 0, sectors_available - sectors)); + this_cpu_add(*c->online_reserved, sectors); + res->sectors += sectors; + return 0; + } else { + atomic64_set(&c->sectors_available, sectors_available); + return bch_err_throw(c, ENOSPC_disk_reservation); + } +} + int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, u64 sectors, enum bch_reservation_flags flags) { struct bch_fs_pcpu *pcpu; u64 old, get; - u64 sectors_available; - int ret; - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); preempt_disable(); pcpu = this_cpu_ptr(c->pcpu); @@ -1246,9 +1237,10 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, do { get = min((u64) sectors + SECTORS_CACHE, old); - if (get < sectors) { + if (unlikely(get < sectors)) { preempt_enable(); - goto recalculate; + return disk_reservation_recalc_sectors_available(c, + res, sectors, flags); } } while (!atomic64_try_cmpxchg(&c->sectors_available, &old, old - get)); @@ -1259,36 +1251,8 @@ out: pcpu->sectors_available -= sectors; this_cpu_add(*c->online_reserved, sectors); res->sectors += sectors; - preempt_enable(); - percpu_up_read(&c->mark_lock); return 0; - -recalculate: - mutex_lock(&c->sectors_available_lock); - - percpu_u64_set(&c->pcpu->sectors_available, 0); - sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); - - if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL)) - sectors = min(sectors, sectors_available); - - if (sectors <= sectors_available || - (flags & BCH_DISK_RESERVATION_NOFAIL)) { - atomic64_set(&c->sectors_available, - max_t(s64, 0, sectors_available - sectors)); - this_cpu_add(*c->online_reserved, sectors); - res->sectors += sectors; - ret = 0; - } else { - atomic64_set(&c->sectors_available, sectors_available); - ret = bch_err_throw(c, ENOSPC_disk_reservation); - } - - mutex_unlock(&c->sectors_available_lock); - percpu_up_read(&c->mark_lock); - - return ret; } /* Startup/shutdown: */ diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index 832eff93acb6..ca341586920b 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -25,25 +25,20 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_ u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b, unsigned dev, u64 bucket) { - struct buckets_waiting_for_journal_table *t; u64 dev_bucket = (u64) dev << 56 | bucket; - u64 ret = 0; - mutex_lock(&b->lock); - t = b->t; + guard(mutex)(&b->lock); + + struct buckets_waiting_for_journal_table *t = b->t; for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { struct bucket_hashed *h = bucket_hash(t, i, dev_bucket); - if (h->dev_bucket == dev_bucket) { - ret = h->journal_seq; - break; - } + if (h->dev_bucket == dev_bucket) + return h->journal_seq; } - mutex_unlock(&b->lock); - - return ret; + return 0; } static bool bucket_table_insert(struct buckets_waiting_for_journal_table *t, @@ -92,12 +87,11 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .journal_seq = journal_seq, }; size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; - int ret = 0; - mutex_lock(&b->lock); + guard(mutex)(&b->lock); if (likely(bucket_table_insert(b->t, &new, flushed_seq))) - goto out; + return 0; t = b->t; size = 1UL << t->bits; @@ -109,8 +103,7 @@ realloc: n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { struct bch_fs *c = container_of(b, struct bch_fs, buckets_waiting_for_journal); - ret = bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set); - goto out; + return bch_err_throw(c, ENOMEM_buckets_waiting_for_journal_set); } retry_rehash: @@ -143,10 +136,7 @@ retry_rehash: pr_debug("took %zu rehashes, table at %zu/%lu elements", nr_rehashes, nr_elements, 1UL << b->t->bits); -out: - mutex_unlock(&b->lock); - - return ret; + return 0; } void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c) diff --git a/fs/bcachefs/chardev.c b/fs/bcachefs/chardev.c index 5ea89aa2b0c4..467fc45e84fe 100644 --- a/fs/bcachefs/chardev.c +++ b/fs/bcachefs/chardev.c @@ -52,6 +52,11 @@ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, return ca; } +DEFINE_CLASS(bch2_device_lookup, struct bch_dev *, + bch2_dev_put(_T), + bch2_device_lookup(c, dev, flags), + struct bch_fs *c, u64 dev, unsigned flags); + #if 0 static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg) { @@ -207,8 +212,6 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg) static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) { - struct bch_dev *ca; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -219,7 +222,7 @@ static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg) arg.pad) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + struct bch_dev *ca = bch2_device_lookup(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); @@ -249,9 +252,6 @@ static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg) static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) { - struct bch_dev *ca; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -262,21 +262,16 @@ static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg) arg.pad) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch2_dev_offline(c, ca, arg.flags); - bch2_dev_put(ca); - return ret; + return bch2_dev_offline(c, ca, arg.flags); } static long bch2_ioctl_disk_set_state(struct bch_fs *c, struct bch_ioctl_disk_set_state arg) { - struct bch_dev *ca; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -288,15 +283,12 @@ static long bch2_ioctl_disk_set_state(struct bch_fs *c, arg.new_state >= BCH_MEMBER_STATE_NR) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); - if (ret) - bch_err(c, "Error setting device state: %s", bch2_err_str(ret)); - - bch2_dev_put(ca); + int ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags); + bch_err_msg(ca, ret, "setting device state"); return ret; } @@ -312,7 +304,7 @@ static int bch2_data_thread(void *arg) { struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr); - ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg); + ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, &ctx->arg); if (ctx->thr.ret == -BCH_ERR_device_offline) ctx->stats.ret = BCH_IOCTL_DATA_EVENT_RET_device_offline; else { @@ -349,14 +341,13 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, }; if (ctx->arg.op == BCH_DATA_OP_scrub) { - struct bch_dev *ca = bch2_dev_tryget(c, ctx->arg.scrub.dev); + CLASS(bch2_dev_tryget_noerror, ca)(c, ctx->arg.scrub.dev); if (ca) { struct bch_dev_usage_full u; bch2_dev_usage_full_read_fast(ca, &u); for (unsigned i = BCH_DATA_btree; i < ARRAY_SIZE(u.d); i++) if (ctx->arg.scrub.data_types & BIT(i)) e.p.sectors_total += u.d[i].sectors; - bch2_dev_put(ca); } } else { e.p.sectors_total = bch2_fs_usage_read_short(c).used; @@ -418,9 +409,8 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, struct bch_ioctl_fs_usage __user *user_arg) { struct bch_ioctl_fs_usage arg = {}; - darray_char replicas = {}; + CLASS(darray_char, replicas)(); u32 replica_entries_bytes; - int ret = 0; if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; @@ -428,11 +418,11 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes)) return -EFAULT; - ret = bch2_fs_replicas_usage_read(c, &replicas) ?: + int ret = bch2_fs_replicas_usage_read(c, &replicas) ?: (replica_entries_bytes < replicas.nr ? -ERANGE : 0) ?: copy_to_user_errcode(&user_arg->replicas, replicas.data, replicas.nr); if (ret) - goto err; + return ret; struct bch_fs_usage_short u = bch2_fs_usage_read_short(c); arg.capacity = c->capacity; @@ -449,52 +439,41 @@ static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c, &arg.persistent_reserved[i], 1); } - ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); -err: - darray_exit(&replicas); - return ret; + return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); } static long bch2_ioctl_query_accounting(struct bch_fs *c, struct bch_ioctl_query_accounting __user *user_arg) { struct bch_ioctl_query_accounting arg; - darray_char accounting = {}; - int ret = 0; + CLASS(darray_char, accounting)(); if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; - ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?: + int ret = copy_from_user_errcode(&arg, user_arg, sizeof(arg)) ?: bch2_fs_accounting_read(c, &accounting, arg.accounting_types_mask) ?: (arg.accounting_u64s * sizeof(u64) < accounting.nr ? -ERANGE : 0) ?: copy_to_user_errcode(&user_arg->accounting, accounting.data, accounting.nr); if (ret) - goto err; + return ret; arg.capacity = c->capacity; arg.used = bch2_fs_usage_read_short(c).used; arg.online_reserved = percpu_u64_get(c->online_reserved); arg.accounting_u64s = accounting.nr / sizeof(u64); - ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); -err: - darray_exit(&accounting); - return ret; + return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); } /* obsolete, didn't allow for new data types: */ static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { - struct bch_ioctl_dev_usage arg; - struct bch_dev_usage_full src; - struct bch_dev *ca; - unsigned i; - if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; + struct bch_ioctl_dev_usage arg; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; @@ -504,38 +483,32 @@ static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c, arg.pad[2]) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - src = bch2_dev_usage_full_read(ca); + struct bch_dev_usage_full src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - for (i = 0; i < ARRAY_SIZE(arg.d); i++) { + for (unsigned i = 0; i < ARRAY_SIZE(arg.d); i++) { arg.d[i].buckets = src.d[i].buckets; arg.d[i].sectors = src.d[i].sectors; arg.d[i].fragmented = src.d[i].fragmented; } - bch2_dev_put(ca); - return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); } static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, struct bch_ioctl_dev_usage_v2 __user *user_arg) { - struct bch_ioctl_dev_usage_v2 arg; - struct bch_dev_usage_full src; - struct bch_dev *ca; - int ret = 0; - if (!test_bit(BCH_FS_started, &c->flags)) return -EINVAL; + struct bch_ioctl_dev_usage_v2 arg; if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; @@ -545,20 +518,20 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, arg.pad[2]) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - src = bch2_dev_usage_full_read(ca); + struct bch_dev_usage_full src = bch2_dev_usage_full_read(ca); arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR); arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); + int ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); if (ret) - goto err; + return ret; for (unsigned i = 0; i < arg.nr_data_types; i++) { struct bch_ioctl_dev_usage_type t = { @@ -569,11 +542,10 @@ static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t)); if (ret) - goto err; + return ret; } -err: - bch2_dev_put(ca); - return ret; + + return 0; } static long bch2_ioctl_read_super(struct bch_fs *c, @@ -590,13 +562,13 @@ static long bch2_ioctl_read_super(struct bch_fs *c, arg.pad) return -EINVAL; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); if (arg.flags & BCH_READ_DEV) { ca = bch2_device_lookup(c, arg.dev, arg.flags); ret = PTR_ERR_OR_ZERO(ca); if (ret) - goto err_unlock; + return ret; sb = ca->disk_sb.sb; } else { @@ -612,8 +584,6 @@ static long bch2_ioctl_read_super(struct bch_fs *c, vstruct_bytes(sb)); err: bch2_dev_put(ca); -err_unlock: - mutex_unlock(&c->sb_lock); return ret; } @@ -639,9 +609,6 @@ static long bch2_ioctl_disk_get_idx(struct bch_fs *c, static long bch2_ioctl_disk_resize(struct bch_fs *c, struct bch_ioctl_disk_resize arg) { - struct bch_dev *ca; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -649,22 +616,16 @@ static long bch2_ioctl_disk_resize(struct bch_fs *c, arg.pad) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch2_dev_resize(c, ca, arg.nbuckets); - - bch2_dev_put(ca); - return ret; + return bch2_dev_resize(c, ca, arg.nbuckets); } static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, struct bch_ioctl_disk_resize_journal arg) { - struct bch_dev *ca; - int ret; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -675,14 +636,11 @@ static long bch2_ioctl_disk_resize_journal(struct bch_fs *c, if (arg.nbuckets > U32_MAX) return -EINVAL; - ca = bch2_device_lookup(c, arg.dev, arg.flags); + CLASS(bch2_device_lookup, ca)(c, arg.dev, arg.flags); if (IS_ERR(ca)) return PTR_ERR(ca); - ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); - - bch2_dev_put(ca); - return ret; + return bch2_set_nr_journal_buckets(c, ca, arg.nbuckets); } #define BCH_IOCTL(_name, _argtype) \ diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index a6795e73f0b9..b1ec38992852 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -361,7 +361,7 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, extent_nonce(version, crc_old), bio); if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n" " expected %0llx:%0llx got %0llx:%0llx (old type ", __func__, @@ -374,7 +374,6 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio, bch2_prt_csum_type(&buf, new_csum_type); prt_str(&buf, ")"); WARN_RATELIMIT(1, "%s", buf.buf); - printbuf_exit(&buf); return bch_err_throw(c, recompute_checksum); } @@ -438,23 +437,21 @@ const struct bch_sb_field_ops bch_sb_field_ops_crypt = { #ifdef __KERNEL__ static int __bch2_request_key(char *key_description, struct bch_key *key) { - struct key *keyring_key; - const struct user_key_payload *ukp; int ret; - keyring_key = request_key(&key_type_user, key_description, NULL); + struct key *keyring_key = request_key(&key_type_user, key_description, NULL); if (IS_ERR(keyring_key)) return PTR_ERR(keyring_key); - down_read(&keyring_key->sem); - ukp = dereference_key_locked(keyring_key); - if (ukp->datalen == sizeof(*key)) { - memcpy(key, ukp->data, ukp->datalen); - ret = 0; - } else { - ret = -EINVAL; + scoped_guard(rwsem_read, &keyring_key->sem) { + const struct user_key_payload *ukp = dereference_key_locked(keyring_key); + if (ukp->datalen == sizeof(*key)) { + memcpy(key, ukp->data, ukp->datalen); + ret = 0; + } else { + ret = -EINVAL; + } } - up_read(&keyring_key->sem); key_put(keyring_key); return ret; @@ -495,14 +492,13 @@ got_key: int bch2_request_key(struct bch_sb *sb, struct bch_key *key) { - struct printbuf key_description = PRINTBUF; + CLASS(printbuf, key_description)(); int ret; prt_printf(&key_description, "bcachefs:"); pr_uuid(&key_description, sb->user_uuid.b); ret = __bch2_request_key(key_description.buf, key); - printbuf_exit(&key_description); #ifndef __KERNEL__ if (ret) { @@ -524,13 +520,12 @@ int bch2_request_key(struct bch_sb *sb, struct bch_key *key) int bch2_revoke_key(struct bch_sb *sb) { key_serial_t key_id; - struct printbuf key_description = PRINTBUF; + CLASS(printbuf, key_description)(); prt_printf(&key_description, "bcachefs:"); pr_uuid(&key_description, sb->user_uuid.b); key_id = request_key("user", key_description.buf, NULL, KEY_SPEC_USER_KEYRING); - printbuf_exit(&key_description); if (key_id < 0) return errno; @@ -584,34 +579,28 @@ err: */ int bch2_disable_encryption(struct bch_fs *c) { - struct bch_sb_field_crypt *crypt; - struct bch_key key; - int ret = -EINVAL; - - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); - crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); + struct bch_sb_field_crypt *crypt = bch2_sb_field_get(c->disk_sb.sb, crypt); if (!crypt) - goto out; + return -EINVAL; /* is key encrypted? */ ret = 0; if (bch2_key_is_encrypted(&crypt->key)) - goto out; + return 0; - ret = bch2_decrypt_sb_key(c, crypt, &key); + struct bch_key key; + int ret = bch2_decrypt_sb_key(c, crypt, &key); if (ret) - goto out; + return ret; crypt->key.magic = cpu_to_le64(BCH_KEY_MAGIC); crypt->key.key = key; SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 0); bch2_write_super(c); -out: - mutex_unlock(&c->sb_lock); - - return ret; + return 0; } /* @@ -625,7 +614,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) struct bch_sb_field_crypt *crypt; int ret = -EINVAL; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); /* Do we already have an encryption key? */ if (bch2_sb_field_get(c->disk_sb.sb, crypt)) @@ -669,7 +658,6 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) SET_BCH_SB_ENCRYPTION_TYPE(c->disk_sb.sb, 1); bch2_write_super(c); err: - mutex_unlock(&c->sb_lock); memzero_explicit(&user_key, sizeof(user_key)); memzero_explicit(&key, sizeof(key)); return ret; diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c index 8e9264b5a84e..1c6d0cdca3c5 100644 --- a/fs/bcachefs/clock.c +++ b/fs/bcachefs/clock.c @@ -40,15 +40,13 @@ out: void bch2_io_timer_del(struct io_clock *clock, struct io_timer *timer) { - spin_lock(&clock->timer_lock); + guard(spinlock)(&clock->timer_lock); for (size_t i = 0; i < clock->timers.nr; i++) if (clock->timers.data[i] == timer) { min_heap_del(&clock->timers, i, &callbacks, NULL); - break; + return; } - - spin_unlock(&clock->timer_lock); } struct io_clock_wait { @@ -133,28 +131,27 @@ void __bch2_increment_clock(struct io_clock *clock, u64 sectors) struct io_timer *timer; u64 now = atomic64_add_return(sectors, &clock->now); - spin_lock(&clock->timer_lock); + guard(spinlock)(&clock->timer_lock); + while ((timer = get_expired_timer(clock, now))) timer->fn(timer); - spin_unlock(&clock->timer_lock); } void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock) { - out->atomic++; - spin_lock(&clock->timer_lock); u64 now = atomic64_read(&clock->now); printbuf_tabstop_push(out, 40); prt_printf(out, "current time:\t%llu\n", now); + guard(printbuf_atomic)(out); + guard(spinlock)(&clock->timer_lock); + for (unsigned i = 0; i < clock->timers.nr; i++) prt_printf(out, "%ps %ps:\t%llu\n", clock->timers.data[i]->fn, clock->timers.data[i]->fn2, clock->timers.data[i]->expire); - spin_unlock(&clock->timer_lock); - --out->atomic; } void bch2_io_clock_exit(struct io_clock *clock) diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index b37b1f325f0a..aeb9b9bd7d33 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -336,7 +336,7 @@ static int attempt_compress(struct bch_fs *c, void *workspace, void *dst, size_t dst_len, void *src, size_t src_len, - struct bch_compression_opt compression) + union bch_compression_opt compression) { enum bch_compression_type compression_type = __bch2_compression_opt_to_type[compression.type]; @@ -426,7 +426,7 @@ static int attempt_compress(struct bch_fs *c, static unsigned __bio_compress(struct bch_fs *c, struct bio *dst, size_t *dst_len, struct bio *src, size_t *src_len, - struct bch_compression_opt compression) + union bch_compression_opt compression) { struct bbuf src_data = { NULL }, dst_data = { NULL }; void *workspace; @@ -553,7 +553,7 @@ unsigned bch2_bio_compress(struct bch_fs *c, compression_type = __bio_compress(c, dst, dst_len, src, src_len, - bch2_compression_decode(compression_opt)); + (union bch_compression_opt){ .value = compression_opt }); dst->bi_iter.bi_size = orig_dst; src->bi_iter.bi_size = orig_src; @@ -579,30 +579,25 @@ static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f) if ((c->sb.features & f) == f) return 0; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); - if ((c->sb.features & f) == f) { - mutex_unlock(&c->sb_lock); + if ((c->sb.features & f) == f) return 0; - } ret = __bch2_fs_compress_init(c, c->sb.features|f); - if (ret) { - mutex_unlock(&c->sb_lock); + if (ret) return ret; - } c->disk_sb.sb->features[0] |= cpu_to_le64(f); bch2_write_super(c); - mutex_unlock(&c->sb_lock); - return 0; } int bch2_check_set_has_compressed_data(struct bch_fs *c, unsigned compression_opt) { - unsigned compression_type = bch2_compression_decode(compression_opt).type; + unsigned int compression_type = ((union bch_compression_opt){ .value = compression_opt }) + .type; BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature)); @@ -683,7 +678,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) static u64 compression_opt_to_feature(unsigned v) { - unsigned type = bch2_compression_decode(v).type; + unsigned int type = ((union bch_compression_opt){ .value = v }).type; return BIT_ULL(bch2_compression_opt_to_feature[type]); } @@ -703,7 +698,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, { char *val = kstrdup(_val, GFP_KERNEL); char *p = val, *type_str, *level_str; - struct bch_compression_opt opt = { 0 }; + union bch_compression_opt opt = { 0 }; int ret; if (!val) @@ -736,7 +731,7 @@ int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res, opt.level = level; } - *res = bch2_compression_encode(opt); + *res = opt.value; err: kfree(val); return ret; @@ -744,7 +739,7 @@ err: void bch2_compression_opt_to_text(struct printbuf *out, u64 v) { - struct bch_compression_opt opt = bch2_compression_decode(v); + union bch_compression_opt opt = { .value = v }; if (opt.type < BCH_COMPRESSION_OPT_NR) prt_str(out, bch2_compression_opts[opt.type]); diff --git a/fs/bcachefs/compress.h b/fs/bcachefs/compress.h index bec2f05bfd52..667ddb91d47a 100644 --- a/fs/bcachefs/compress.h +++ b/fs/bcachefs/compress.h @@ -10,41 +10,27 @@ static const unsigned __bch2_compression_opt_to_type[] = { #undef x }; -struct bch_compression_opt { - u8 type:4, - level:4; -}; - -static inline struct bch_compression_opt __bch2_compression_decode(unsigned v) -{ - return (struct bch_compression_opt) { - .type = v & 15, - .level = v >> 4, +union bch_compression_opt { + u8 value; + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 type:4, level:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 level:4, type:4; +#endif }; -} +}; static inline bool bch2_compression_opt_valid(unsigned v) { - struct bch_compression_opt opt = __bch2_compression_decode(v); + union bch_compression_opt opt = { .value = v }; return opt.type < ARRAY_SIZE(__bch2_compression_opt_to_type) && !(!opt.type && opt.level); } -static inline struct bch_compression_opt bch2_compression_decode(unsigned v) -{ - return bch2_compression_opt_valid(v) - ? __bch2_compression_decode(v) - : (struct bch_compression_opt) { 0 }; -} - -static inline unsigned bch2_compression_encode(struct bch_compression_opt opt) -{ - return opt.type|(opt.level << 4); -} - static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v) { - return __bch2_compression_opt_to_type[bch2_compression_decode(v).type]; + return __bch2_compression_opt_to_type[((union bch_compression_opt){ .value = v }).type]; } struct bch_write_op; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 3968f3be7f3b..ccedc93fe0ef 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -115,7 +115,7 @@ static void trace_io_move_finish2(struct data_update *u, struct bkey_i *insert) { struct bch_fs *c = u->op.c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_newline(&buf); @@ -131,7 +131,6 @@ static void trace_io_move_finish2(struct data_update *u, prt_newline(&buf); trace_io_move_finish(c, buf.buf); - printbuf_exit(&buf); } noinline_for_stack @@ -143,7 +142,7 @@ static void trace_io_move_fail2(struct data_update *m, { struct bch_fs *c = m->op.c; struct bkey_s_c old = bkey_i_to_s_c(m->k.k); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned rewrites_found = 0; if (!trace_io_move_fail_enabled()) @@ -187,7 +186,6 @@ static void trace_io_move_fail2(struct data_update *m, } trace_io_move_fail(c, buf.buf); - printbuf_exit(&buf); } noinline_for_stack @@ -196,7 +194,7 @@ static void trace_data_update2(struct data_update *m, struct bkey_i *insert) { struct bch_fs *c = m->op.c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "\nold: "); bch2_bkey_val_to_text(&buf, c, old); @@ -206,7 +204,6 @@ static void trace_data_update2(struct data_update *m, bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); trace_data_update(c, buf.buf); - printbuf_exit(&buf); } noinline_for_stack @@ -215,7 +212,7 @@ static void trace_io_move_created_rebalance2(struct data_update *m, struct bkey_i *insert) { struct bch_fs *c = m->op.c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_data_update_opts_to_text(&buf, c, &m->op.opts, &m->data_opts); @@ -227,7 +224,6 @@ static void trace_io_move_created_rebalance2(struct data_update *m, bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); trace_io_move_created_rebalance(c, buf.buf); - printbuf_exit(&buf); this_cpu_inc(c->counters[BCH_COUNTER_io_move_created_rebalance]); } @@ -238,7 +234,7 @@ static int data_update_invalid_bkey(struct data_update *m, struct bkey_i *insert) { struct bch_fs *c = m->op.c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_str(&buf, "about to insert invalid key in data update path"); @@ -254,7 +250,6 @@ static int data_update_invalid_bkey(struct data_update *m, bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return bch_err_throw(c, invalid_bkey); } @@ -499,7 +494,8 @@ out: int bch2_data_update_index_update(struct bch_write_op *op) { - return bch2_trans_run(op->c, __bch2_data_update_index_update(trans, op)); + CLASS(btree_trans, trans)(op->c); + return __bch2_data_update_index_update(trans, op); } void bch2_data_update_read_done(struct data_update *m) @@ -675,7 +671,7 @@ void bch2_data_update_inflight_to_text(struct printbuf *out, struct data_update if (!m->read_done) { prt_printf(out, "read:\n"); printbuf_indent_add(out, 2); - bch2_read_bio_to_text(out, &m->rbio); + bch2_read_bio_to_text(out, m->op.c, &m->rbio); } else { prt_printf(out, "write:\n"); printbuf_indent_add(out, 2); @@ -784,8 +780,8 @@ static int can_write_extent(struct bch_fs *c, struct data_update *m) __clear_bit(*i, devs.d); CLASS(printbuf, buf)(); - buf.atomic++; + guard(printbuf_atomic)(&buf); guard(rcu)(); unsigned nr_replicas = 0, i; diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 07c2a0f73cc2..33cb94f70b19 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -141,7 +141,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) return; bch2_btree_node_io_lock(b); - mutex_lock(&c->verify_lock); + guard(mutex)(&c->verify_lock); if (!c->verify_ondisk) { c->verify_ondisk = kvmalloc(btree_buf_bytes(b), GFP_KERNEL); @@ -172,14 +172,11 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) failed |= bch2_btree_verify_replica(c, b, p); if (failed) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf); - printbuf_exit(&buf); } out: - mutex_unlock(&c->verify_lock); bch2_btree_node_io_unlock(b); } @@ -367,17 +364,17 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, i->size = size; i->ret = 0; + CLASS(btree_trans, trans)(i->c); return bch2_debugfs_flush_buf(i) ?: - bch2_trans_run(i->c, - for_each_btree_key(trans, iter, i->id, i->from, - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, ({ - bch2_bkey_val_to_text(&i->buf, i->c, k); - prt_newline(&i->buf); - bch2_trans_unlock(trans); - i->from = bpos_successor(iter.pos); - bch2_debugfs_flush_buf(i); - }))) ?: + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots, k, ({ + bch2_bkey_val_to_text(&i->buf, i->c, k); + prt_newline(&i->buf); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + bch2_debugfs_flush_buf(i); + })) ?: i->ret; } @@ -404,15 +401,15 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - return bch2_trans_run(i->c, - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; + CLASS(btree_trans, trans)(i->c); + return for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - drop_locks_do(trans, bch2_debugfs_flush_buf(i)); - }))) ?: i->ret; + drop_locks_do(trans, bch2_debugfs_flush_buf(i)); + })) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { @@ -431,27 +428,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf, i->size = size; i->ret = 0; + CLASS(btree_trans, trans)(i->c); return bch2_debugfs_flush_buf(i) ?: - bch2_trans_run(i->c, - for_each_btree_key(trans, iter, i->id, i->from, - BTREE_ITER_prefetch| - BTREE_ITER_all_snapshots, k, ({ - struct btree_path_level *l = - &btree_iter_path(trans, &iter)->l[0]; - struct bkey_packed *_k = - bch2_btree_node_iter_peek(&l->iter, l->b); - - if (bpos_gt(l->b->key.k.p, i->prev_node)) { - bch2_btree_node_to_text(&i->buf, i->c, l->b); - i->prev_node = l->b->key.k.p; - } - - bch2_bfloat_to_text(&i->buf, l->b, _k); - bch2_trans_unlock(trans); - i->from = bpos_successor(iter.pos); - bch2_debugfs_flush_buf(i); - }))) ?: - i->ret; + for_each_btree_key(trans, iter, i->id, i->from, + BTREE_ITER_prefetch| + BTREE_ITER_all_snapshots, k, ({ + struct btree_path_level *l = + &btree_iter_path(trans, &iter)->l[0]; + struct bkey_packed *_k = + bch2_btree_node_iter_peek(&l->iter, l->b); + + if (bpos_gt(l->b->key.k.p, i->prev_node)) { + bch2_btree_node_to_text(&i->buf, i->c, l->b); + i->prev_node = l->b->key.k.p; + } + + bch2_bfloat_to_text(&i->buf, l->b, _k); + bch2_trans_unlock(trans); + i->from = bpos_successor(iter.pos); + bch2_debugfs_flush_buf(i); + })) ?: + i->ret; } static const struct file_operations bfloat_failed_debug_ops = { @@ -465,7 +462,7 @@ static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs * struct btree *b) { if (!out->nr_tabstops) - printbuf_tabstop_push(out, 32); + printbuf_tabstop_push(out, 36); prt_printf(out, "%px ", b); bch2_btree_id_level_to_text(out, b->c.btree_id, b->c.level); @@ -512,8 +509,8 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, if (ret) return ret; - i->buf.atomic++; scoped_guard(rcu) { + guard(printbuf_atomic)(&i->buf); struct bucket_table *tbl = rht_dereference_rcu(c->btree_cache.table.tbl, &c->btree_cache.table); @@ -528,7 +525,6 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf, done = true; } } - --i->buf.atomic; } while (!done); if (i->buf.allocation_failure) @@ -771,7 +767,7 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, prt_printf(&i->buf, "%s:\n", bch2_btree_transaction_fns[i->iter]); printbuf_indent_add(&i->buf, 2); - mutex_lock(&s->lock); + guard(mutex)(&s->lock); prt_printf(&i->buf, "Max mem used: %u\n", s->max_mem); #ifdef CONFIG_BCACHEFS_TRANS_KMALLOC_TRACE @@ -802,8 +798,6 @@ static ssize_t btree_transaction_stats_read(struct file *file, char __user *buf, printbuf_indent_sub(&i->buf, 2); } - mutex_unlock(&s->lock); - printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); i->iter++; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index ccbb0127b724..e27cf62d3a5e 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -13,6 +13,7 @@ #include <linux/dcache.h> +#if IS_ENABLED(CONFIG_UNICODE) int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, const struct qstr *str, struct qstr *out_cf) { @@ -34,6 +35,7 @@ int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, *out_cf = (struct qstr) QSTR_INIT(buf, ret); return 0; } +#endif static unsigned bch2_dirent_name_bytes(struct bkey_s_c_dirent d) { @@ -256,6 +258,7 @@ int bch2_dirent_init_name(struct bch_fs *c, if (ret) return ret; +#if IS_ENABLED(CONFIG_UNICODE) memcpy(&dirent->v.d_cf_name_block.d_names[0], name->name, name->len); char *cf_out = &dirent->v.d_cf_name_block.d_names[name->len]; @@ -281,6 +284,7 @@ int bch2_dirent_init_name(struct bch_fs *c, dirent->v.d_cf_name_block.d_cf_name_len = cpu_to_le16(cf_len); EBUG_ON(bch2_dirent_get_casefold_name(dirent_i_to_s_c(dirent)).len != cf_len); +#endif } unsigned u64s = dirent_val_u64s(name->len, cf_len); @@ -617,13 +621,12 @@ u64 bch2_dirent_lookup(struct bch_fs *c, subvol_inum dir, const struct bch_hash_info *hash_info, const struct qstr *name, subvol_inum *inum) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter = {}; int ret = lockrestart_do(trans, bch2_dirent_lookup_trans(trans, &iter, dir, hash_info, name, inum, 0)); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -683,8 +686,8 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, struct bkey_buf sk; bch2_bkey_buf_init(&sk); - int ret = bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_dirents, POS(inum.inum, ctx->pos), POS(inum.inum, U64_MAX), inum.subvol, 0, k, ({ @@ -705,7 +708,7 @@ int bch2_readdir(struct bch_fs *c, subvol_inum inum, continue; ret2 ?: (bch2_trans_unlock(trans), bch2_dir_emit(ctx, dirent, target)); - }))); + })); bch2_bkey_buf_exit(&sk, c); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 1e17199cc5c7..efb58d2dcf68 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -23,8 +23,16 @@ struct bch_fs; struct bch_hash_info; struct bch_inode_info; +#if IS_ENABLED(CONFIG_UNICODE) int bch2_casefold(struct btree_trans *, const struct bch_hash_info *, const struct qstr *, struct qstr *); +#else +static inline int bch2_casefold(struct btree_trans *trans, const struct bch_hash_info *info, + const struct qstr *str, struct qstr *out_cf) +{ + return bch_err_throw(trans->c, no_casefolding_without_utf8); +} +#endif static inline int bch2_maybe_casefold(struct btree_trans *trans, const struct bch_hash_info *info, diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index f7528cd69c73..219e37738aee 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -380,11 +380,10 @@ static int __bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accoun accounting_pos_cmp, NULL); if (trace_accounting_mem_insert_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_accounting_to_text(&buf, c, a.s_c); trace_accounting_mem_insert(c, buf.buf); - printbuf_exit(&buf); } return 0; err: @@ -404,9 +403,9 @@ int bch2_accounting_mem_insert(struct bch_fs *c, struct bkey_s_c_accounting a, return bch_err_throw(c, btree_insert_need_mark_replicas); percpu_up_read(&c->mark_lock); - percpu_down_write(&c->mark_lock); - int ret = __bch2_accounting_mem_insert(c, a); - percpu_up_write(&c->mark_lock); + int ret; + scoped_guard(percpu_write, &c->mark_lock) + ret = __bch2_accounting_mem_insert(c, a); percpu_down_read(&c->mark_lock); return ret; } @@ -438,7 +437,7 @@ void bch2_accounting_mem_gc(struct bch_fs *c) { struct bch_accounting_mem *acc = &c->accounting; - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); struct accounting_mem_entry *dst = acc->k.data; darray_for_each(acc->k, src) { @@ -453,7 +452,6 @@ void bch2_accounting_mem_gc(struct bch_fs *c) acc->k.nr = dst - acc->k.data; eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, NULL); - percpu_up_write(&c->mark_lock); } /* @@ -471,7 +469,7 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) darray_init(usage); - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); darray_for_each(acc->k, i) { union { u8 bytes[struct_size_t(struct bch_replicas_usage, r.devs, @@ -494,7 +492,6 @@ int bch2_fs_replicas_usage_read(struct bch_fs *c, darray_char *usage) memcpy(&darray_top(*usage), &u.r, replicas_usage_bytes(&u.r)); usage->nr += replicas_usage_bytes(&u.r); } - percpu_up_read(&c->mark_lock); if (ret) darray_exit(usage); @@ -509,7 +506,7 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc darray_init(out_buf); - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); darray_for_each(acc->k, i) { struct disk_accounting_pos a_p; bpos_to_disk_accounting_pos(&a_p, i->pos); @@ -533,8 +530,6 @@ int bch2_fs_accounting_read(struct bch_fs *c, darray_char *out_buf, unsigned acc out_buf->nr += bkey_bytes(&a_out->k); } - percpu_up_read(&c->mark_lock); - if (ret) darray_exit(out_buf); return ret; @@ -553,7 +548,7 @@ int bch2_gc_accounting_start(struct bch_fs *c) struct bch_accounting_mem *acc = &c->accounting; int ret = 0; - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); darray_for_each(acc->k, e) { e->v[1] = __alloc_percpu_gfp(e->nr_counters * sizeof(u64), sizeof(u64), GFP_KERNEL); @@ -565,20 +560,18 @@ int bch2_gc_accounting_start(struct bch_fs *c) } acc->gc_running = !ret; - percpu_up_write(&c->mark_lock); - return ret; } int bch2_gc_accounting_done(struct bch_fs *c) { struct bch_accounting_mem *acc = &c->accounting; - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); struct bpos pos = POS_MIN; int ret = 0; - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); while (1) { unsigned idx = eytzinger0_find_ge(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &pos); @@ -622,7 +615,8 @@ int bch2_gc_accounting_done(struct bch_fs *c) if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) { percpu_up_write(&c->mark_lock); - ret = commit_do(trans, NULL, NULL, 0, + ret = commit_do(trans, NULL, NULL, + BCH_TRANS_COMMIT_skip_accounting_apply, bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false)); percpu_down_write(&c->mark_lock); if (ret) @@ -637,20 +631,16 @@ int bch2_gc_accounting_done(struct bch_fs *c) bkey_i_to_s_c_accounting(&k_i.k), BCH_ACCOUNTING_normal, true); - preempt_disable(); + guard(preempt)(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); struct bch_fs_usage_base *src = &trans->fs_usage_delta; acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); - preempt_enable(); } } } } err: fsck_err: - percpu_up_write(&c->mark_lock); - printbuf_exit(&buf); - bch2_trans_put(trans); bch_err_fn(c, ret); return ret; } @@ -662,11 +652,9 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) if (k.k->type != KEY_TYPE_accounting) return 0; - percpu_down_read(&c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), - BCH_ACCOUNTING_read, false); - percpu_up_read(&c->mark_lock); - return ret; + guard(percpu_read)(&c->mark_lock); + return bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), + BCH_ACCOUNTING_read, false); } static int bch2_disk_accounting_validate_late(struct btree_trans *trans, @@ -674,7 +662,7 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, u64 *v, unsigned nr) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0, invalid_dev = -1; switch (acc->type) { @@ -723,7 +711,6 @@ static int bch2_disk_accounting_validate_late(struct btree_trans *trans, } fsck_err: - printbuf_exit(&buf); return ret; invalid_device: if (fsck_err(trans, accounting_to_invalid_device, @@ -751,8 +738,8 @@ invalid_device: int bch2_accounting_read(struct bch_fs *c) { struct bch_accounting_mem *acc = &c->accounting; - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); /* * We might run more than once if we rewind to start topology repair or @@ -761,13 +748,13 @@ int bch2_accounting_read(struct bch_fs *c) * * Instead, zero out any accounting we have: */ - percpu_down_write(&c->mark_lock); - darray_for_each(acc->k, e) - percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); - for_each_member_device(c, ca) - percpu_memset(ca->usage, 0, sizeof(*ca->usage)); - percpu_memset(c->usage, 0, sizeof(*c->usage)); - percpu_up_write(&c->mark_lock); + scoped_guard(percpu_write, &c->mark_lock) { + darray_for_each(acc->k, e) + percpu_memset(e->v[0], 0, sizeof(u64) * e->nr_counters); + for_each_member_device(c, ca) + percpu_memset(ca->usage, 0, sizeof(*ca->usage)); + percpu_memset(c->usage, 0, sizeof(*c->usage)); + } struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_accounting, POS_MIN, @@ -798,7 +785,7 @@ int bch2_accounting_read(struct bch_fs *c) accounting_read_key(trans, k); })); if (ret) - goto err; + return ret; struct journal_keys *keys = &c->journal_keys; struct journal_key *dst = keys->data; @@ -837,14 +824,14 @@ int bch2_accounting_read(struct bch_fs *c) ret = accounting_read_key(trans, k); if (ret) - goto err; + return ret; } *dst++ = *i; } keys->gap = keys->nr = dst - keys->data; - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); darray_for_each_reverse(acc->k, i) { struct disk_accounting_pos acc_k; @@ -876,60 +863,55 @@ int bch2_accounting_read(struct bch_fs *c) } if (ret) - goto fsck_err; + return ret; } eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, NULL); - preempt_disable(); - struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); + scoped_guard(preempt) { + struct bch_fs_usage_base *usage = this_cpu_ptr(c->usage); - for (unsigned i = 0; i < acc->k.nr; i++) { - struct disk_accounting_pos k; - bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); + for (unsigned i = 0; i < acc->k.nr; i++) { + struct disk_accounting_pos k; + bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos); - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false); - switch (k.type) { - case BCH_DISK_ACCOUNTING_persistent_reserved: - usage->reserved += v[0] * k.persistent_reserved.nr_replicas; - break; - case BCH_DISK_ACCOUNTING_replicas: - fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); - break; - case BCH_DISK_ACCOUNTING_dev_data_type: { - guard(rcu)(); - struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); - if (ca) { - struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; - percpu_u64_set(&d->buckets, v[0]); - percpu_u64_set(&d->sectors, v[1]); - percpu_u64_set(&d->fragmented, v[2]); - - if (k.dev_data_type.data_type == BCH_DATA_sb || - k.dev_data_type.data_type == BCH_DATA_journal) - usage->hidden += v[0] * ca->mi.bucket_size; + switch (k.type) { + case BCH_DISK_ACCOUNTING_persistent_reserved: + usage->reserved += v[0] * k.persistent_reserved.nr_replicas; + break; + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(usage, k.replicas.data_type, v[0]); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: { + guard(rcu)(); + struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.dev_data_type.dev); + if (ca) { + struct bch_dev_usage_type __percpu *d = &ca->usage->d[k.dev_data_type.data_type]; + percpu_u64_set(&d->buckets, v[0]); + percpu_u64_set(&d->sectors, v[1]); + percpu_u64_set(&d->fragmented, v[2]); + + if (k.dev_data_type.data_type == BCH_DATA_sb || + k.dev_data_type.data_type == BCH_DATA_journal) + usage->hidden += v[0] * ca->mi.bucket_size; + } + break; + } } - break; - } } } - preempt_enable(); -fsck_err: - percpu_up_write(&c->mark_lock); -err: - printbuf_exit(&buf); - bch2_trans_put(trans); - bch_err_fn(c, ret); + return ret; } int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) { - return bch2_trans_run(c, - bch2_btree_write_buffer_flush_sync(trans) ?: + CLASS(btree_trans, trans)(c); + return bch2_btree_write_buffer_flush_sync(trans) ?: for_each_btree_key_commit(trans, iter, BTREE_ID_accounting, POS_MIN, BTREE_ITER_all_snapshots, k, NULL, NULL, 0, ({ struct disk_accounting_pos acc; @@ -940,15 +922,16 @@ int bch2_dev_usage_remove(struct bch_fs *c, unsigned dev) ? bch2_btree_bit_mod_buffered(trans, BTREE_ID_accounting, k.k->p, 0) : 0; })) ?: - bch2_btree_write_buffer_flush_sync(trans)); + bch2_btree_write_buffer_flush_sync(trans); } int bch2_dev_usage_init(struct bch_dev *ca, bool gc) { struct bch_fs *c = ca->fs; + CLASS(btree_trans, trans)(c); u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 }; - int ret = bch2_trans_do(c, ({ + int ret = lockrestart_do(trans, ({ bch2_disk_accounting_mod2(trans, gc, v, dev_data_type, .dev = ca->dev_idx, @@ -964,78 +947,77 @@ void bch2_verify_accounting_clean(struct bch_fs *c) bool mismatch = false; struct bch_fs_usage_base base = {}, base_inmem = {}; - bch2_trans_run(c, - for_each_btree_key(trans, iter, - BTREE_ID_accounting, POS_MIN, - BTREE_ITER_all_snapshots, k, ({ - u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; - struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); - unsigned nr = bch2_accounting_counters(k.k); + CLASS(btree_trans, trans)(c); + for_each_btree_key(trans, iter, + BTREE_ID_accounting, POS_MIN, + BTREE_ITER_all_snapshots, k, ({ + u64 v[BCH_ACCOUNTING_MAX_COUNTERS]; + struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); + unsigned nr = bch2_accounting_counters(k.k); - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, k.k->p); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); - if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) - break; + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + break; - if (!bch2_accounting_is_mem(&acc_k)) { - struct disk_accounting_pos next; - memset(&next, 0, sizeof(next)); - next.type = acc_k.type + 1; - bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); - continue; - } + if (!bch2_accounting_is_mem(&acc_k)) { + struct disk_accounting_pos next; + memset(&next, 0, sizeof(next)); + next.type = acc_k.type + 1; + bch2_btree_iter_set_pos(trans, &iter, disk_accounting_pos_to_bpos(&next)); + continue; + } - bch2_accounting_mem_read(c, k.k->p, v, nr); + bch2_accounting_mem_read(c, k.k->p, v, nr); - if (memcmp(a.v->d, v, nr * sizeof(u64))) { - struct printbuf buf = PRINTBUF; + if (memcmp(a.v->d, v, nr * sizeof(u64))) { + CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, " !="); - for (unsigned j = 0; j < nr; j++) - prt_printf(&buf, " %llu", v[j]); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, " !="); + for (unsigned j = 0; j < nr; j++) + prt_printf(&buf, " %llu", v[j]); - pr_err("%s", buf.buf); - printbuf_exit(&buf); - mismatch = true; - } + pr_err("%s", buf.buf); + mismatch = true; + } - switch (acc_k.type) { - case BCH_DISK_ACCOUNTING_persistent_reserved: - base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; - break; - case BCH_DISK_ACCOUNTING_replicas: - fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]); - break; - case BCH_DISK_ACCOUNTING_dev_data_type: - { - guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */ - struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); - if (!ca) - continue; - - v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets); - v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors); - v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented); - } + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_persistent_reserved: + base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; + break; + case BCH_DISK_ACCOUNTING_replicas: + fs_usage_data_type_to_base(&base, acc_k.replicas.data_type, a.v->d[0]); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: { + { + guard(rcu)(); /* scoped guard is a loop, and doesn't play nicely with continue */ + struct bch_dev *ca = bch2_dev_rcu_noerror(c, acc_k.dev_data_type.dev); + if (!ca) + continue; + + v[0] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].buckets); + v[1] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].sectors); + v[2] = percpu_u64_get(&ca->usage->d[acc_k.dev_data_type.data_type].fragmented); + } - if (memcmp(a.v->d, v, 3 * sizeof(u64))) { - struct printbuf buf = PRINTBUF; + if (memcmp(a.v->d, v, 3 * sizeof(u64))) { + CLASS(printbuf, buf)(); - bch2_bkey_val_to_text(&buf, c, k); - prt_str(&buf, " in mem"); - for (unsigned j = 0; j < nr; j++) - prt_printf(&buf, " %llu", v[j]); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, " in mem"); + for (unsigned j = 0; j < nr; j++) + prt_printf(&buf, " %llu", v[j]); - pr_err("dev accounting mismatch: %s", buf.buf); - printbuf_exit(&buf); - mismatch = true; - } + pr_err("dev accounting mismatch: %s", buf.buf); + mismatch = true; } + } + } - 0; - }))); + 0; + })); acc_u64s_percpu(&base_inmem.hidden, &c->usage->hidden, sizeof(base_inmem) / sizeof(u64)); diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index d61abebf3e0b..43f4b21d0aab 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -211,10 +211,8 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { - percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false); - percpu_up_read(&trans->c->mark_lock); - return ret; + guard(percpu_read)(&trans->c->mark_lock); + return bch2_accounting_mem_mod_locked(trans, a, gc ? BCH_ACCOUNTING_gc : BCH_ACCOUNTING_normal, false); } static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *acc, @@ -236,13 +234,12 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem * static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, u64 *v, unsigned nr) { - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); struct bch_accounting_mem *acc = &c->accounting; unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &p); bch2_accounting_mem_read_counters(acc, idx, v, nr, false); - percpu_up_read(&c->mark_lock); } static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) diff --git a/fs/bcachefs/disk_groups.c b/fs/bcachefs/disk_groups.c index cde842ac1886..293e47268508 100644 --- a/fs/bcachefs/disk_groups.c +++ b/fs/bcachefs/disk_groups.c @@ -375,7 +375,7 @@ void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c) { bch2_printbuf_make_room(out, 4096); - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups); @@ -396,16 +396,13 @@ void bch2_disk_groups_to_text(struct printbuf *out, struct bch_fs *c) next: prt_newline(out); } - - out->atomic--; } void bch2_disk_path_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) { - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); - __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v), - --out->atomic; + __bch2_disk_path_to_text(out, rcu_dereference(c->disk_groups), v); } void bch2_disk_path_to_text_sb(struct printbuf *out, struct bch_sb *sb, unsigned v) @@ -471,14 +468,9 @@ int __bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) int bch2_dev_group_set(struct bch_fs *c, struct bch_dev *ca, const char *name) { - int ret; - - mutex_lock(&c->sb_lock); - ret = __bch2_dev_group_set(c, ca, name) ?: + guard(mutex)(&c->sb_lock); + return __bch2_dev_group_set(c, ca, name) ?: bch2_write_super(c); - mutex_unlock(&c->sb_lock); - - return ret; } int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, @@ -506,9 +498,8 @@ int bch2_opt_target_parse(struct bch_fs *c, const char *val, u64 *res, return 0; } - mutex_lock(&c->sb_lock); - g = bch2_disk_path_find(&c->disk_sb, val); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + g = bch2_disk_path_find(&c->disk_sb, val); if (g >= 0) { *res = group_to_target(g); @@ -527,7 +518,7 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) prt_printf(out, "none"); return; case TARGET_DEV: { - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); struct bch_dev *ca = t.dev < c->sb.nr_devices ? rcu_dereference(c->devs[t.dev]) @@ -539,8 +530,6 @@ void bch2_target_to_text(struct printbuf *out, struct bch_fs *c, unsigned v) prt_printf(out, "offline device %u", t.dev); else prt_printf(out, "invalid device %u", t.dev); - - out->atomic--; return; } case TARGET_GROUP: diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 687c3ba98095..62dda821247e 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -197,8 +197,7 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bool parity = ptr_idx >= nr_data; enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe; s64 sectors = parity ? le16_to_cpu(s.v->sectors) : 0; - struct printbuf buf = PRINTBUF; - int ret = 0; + CLASS(printbuf, buf)(); struct bch_fs *c = trans->c; if (deleting) @@ -212,10 +211,8 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bch2_data_type_str(a->data_type), a->dirty_sectors, a->stripe, s.k->p.offset, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); if (bch2_trans_inconsistent_on(parity && bch2_bucket_sectors_total(*a), trans, "bucket %llu:%llu gen %u data type %s dirty_sectors %u cached_sectors %u: data already in parity bucket\n%s", @@ -223,30 +220,24 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bch2_data_type_str(a->data_type), a->dirty_sectors, a->cached_sectors, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); } else { if (bch2_trans_inconsistent_on(a->stripe != s.k->p.offset || a->stripe_redundancy != s.v->nr_redundant, trans, "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe (got %u)\n%s", bucket.inode, bucket.offset, a->gen, a->stripe, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); if (bch2_trans_inconsistent_on(a->data_type != data_type, trans, "bucket %llu:%llu gen %u data type %s: wrong data type when stripe, should be %s\n%s", bucket.inode, bucket.offset, a->gen, bch2_data_type_str(a->data_type), bch2_data_type_str(data_type), - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); if (bch2_trans_inconsistent_on(parity && (a->dirty_sectors != -sectors || @@ -255,17 +246,15 @@ static int __mark_stripe_bucket(struct btree_trans *trans, bucket.inode, bucket.offset, a->gen, a->dirty_sectors, a->cached_sectors, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); } if (sectors) { - ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type, - a->gen, a->data_type, &a->dirty_sectors); + int ret = bch2_bucket_ref_update(trans, ca, s.s_c, ptr, sectors, data_type, + a->gen, a->data_type, &a->dirty_sectors); if (ret) - goto err; + return ret; } if (!deleting) { @@ -277,9 +266,8 @@ static int __mark_stripe_bucket(struct btree_trans *trans, a->stripe_redundancy = 0; alloc_data_type_set(a, BCH_DATA_user); } -err: - printbuf_exit(&buf); - return ret; + + return 0; } static int mark_stripe_bucket(struct btree_trans *trans, @@ -289,14 +277,13 @@ static int mark_stripe_bucket(struct btree_trans *trans, { struct bch_fs *c = trans->c; const struct bch_extent_ptr *ptr = s.v->ptrs + ptr_idx; - struct printbuf buf = PRINTBUF; - int ret = 0; + CLASS(printbuf, buf)(); - struct bch_dev *ca = bch2_dev_tryget(c, ptr->dev); + CLASS(bch2_dev_tryget, ca)(c, ptr->dev); if (unlikely(!ca)) { if (ptr->dev != BCH_SB_MEMBER_INVALID && !(flags & BTREE_TRIGGER_overwrite)) - ret = bch_err_throw(c, mark_stripe); - goto err; + return bch_err_throw(c, mark_stripe); + return 0; } struct bpos bucket = PTR_BUCKET_POS(ca, ptr); @@ -312,36 +299,32 @@ static int mark_stripe_bucket(struct btree_trans *trans, struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); - ret = PTR_ERR_OR_ZERO(a) ?: + int ret = PTR_ERR_OR_ZERO(a) ?: __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &a->v, flags) ?: bch2_bucket_backpointer_mod(trans, s.s_c, &bp, !(flags & BTREE_TRIGGER_overwrite)); if (ret) - goto err; + return ret; } if (flags & BTREE_TRIGGER_gc) { struct bucket *g = gc_bucket(ca, bucket.offset); if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n%s", ptr->dev, - (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { - ret = bch_err_throw(c, mark_stripe); - goto err; - } + (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) + return bch_err_throw(c, mark_stripe); bucket_lock(g); struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; - ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); + int ret = __mark_stripe_bucket(trans, ca, s, ptr_idx, deleting, bucket, &new, flags); alloc_to_bucket(g, new); bucket_unlock(g); if (!ret) ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); } -err: - bch2_dev_put(ca); - printbuf_exit(&buf); - return ret; + + return 0; } static int mark_stripe_buckets(struct btree_trans *trans, @@ -630,16 +613,15 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf) struct bch_csum got = ec_block_checksum(buf, i, offset); if (bch2_crc_cmp(want, got)) { - struct bch_dev *ca = bch2_dev_tryget(c, v->ptrs[i].dev); + CLASS(bch2_dev_tryget, ca)(c, v->ptrs[i].dev); if (ca) { - struct printbuf err = PRINTBUF; + CLASS(printbuf, err)(); prt_str(&err, "stripe "); bch2_csum_err_msg(&err, v->csum_type, want, got); prt_printf(&err, " for %ps at %u of\n ", (void *) _RET_IP_, i); bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key)); bch_err_ratelimited(ca, "%s", err.buf); - printbuf_exit(&err); bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); } @@ -703,8 +685,8 @@ static void ec_block_endio(struct bio *bio) struct closure *cl = bio->bi_private; int rw = ec_bio->rw; unsigned ref = rw == READ - ? BCH_DEV_READ_REF_ec_block - : BCH_DEV_WRITE_REF_ec_block; + ? (unsigned) BCH_DEV_READ_REF_ec_block + : (unsigned) BCH_DEV_WRITE_REF_ec_block; bch2_account_io_completion(ca, bio_data_dir(bio), ec_bio->submit_time, !bio->bi_status); @@ -741,8 +723,8 @@ static void ec_block_io(struct bch_fs *c, struct ec_stripe_buf *buf, : BCH_DATA_parity; int rw = op_is_write(opf); unsigned ref = rw == READ - ? BCH_DEV_READ_REF_ec_block - : BCH_DEV_WRITE_REF_ec_block; + ? (unsigned) BCH_DEV_READ_REF_ec_block + : (unsigned) BCH_DEV_WRITE_REF_ec_block; struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, rw, ref); if (!ca) { @@ -832,7 +814,7 @@ int bch2_ec_read_extent(struct btree_trans *trans, struct bch_read_bio *rbio, struct bch_stripe *v; unsigned i, offset; const char *msg = NULL; - struct printbuf msgbuf = PRINTBUF; + CLASS(printbuf, msgbuf)(); int ret = 0; closure_init_stack(&cl); @@ -894,7 +876,6 @@ err: bch2_bkey_val_to_text(&msgbuf, c, orig_k); bch_err_ratelimited(c, "error doing reconstruct read: %s\n %s", msg, msgbuf.buf); - printbuf_exit(&msgbuf); ret = bch_err_throw(c, stripe_reconstruct); goto out; } @@ -936,31 +917,22 @@ static bool __bch2_stripe_is_open(struct bch_fs *c, u64 idx) static bool bch2_stripe_is_open(struct bch_fs *c, u64 idx) { - bool ret = false; - - spin_lock(&c->ec_stripes_new_lock); - ret = __bch2_stripe_is_open(c, idx); - spin_unlock(&c->ec_stripes_new_lock); - - return ret; + guard(spinlock)(&c->ec_stripes_new_lock); + return __bch2_stripe_is_open(c, idx); } static bool bch2_try_open_stripe(struct bch_fs *c, struct ec_stripe_new *s, u64 idx) { - bool ret; - - spin_lock(&c->ec_stripes_new_lock); - ret = !__bch2_stripe_is_open(c, idx); + guard(spinlock)(&c->ec_stripes_new_lock); + bool ret = !__bch2_stripe_is_open(c, idx); if (ret) { unsigned hash = hash_64(idx, ilog2(ARRAY_SIZE(c->ec_stripes_new))); s->idx = idx; hlist_add_head(&s->hash, &c->ec_stripes_new[hash]); } - spin_unlock(&c->ec_stripes_new_lock); - return ret; } @@ -968,9 +940,8 @@ static void bch2_stripe_close(struct bch_fs *c, struct ec_stripe_new *s) { BUG_ON(!s->idx); - spin_lock(&c->ec_stripes_new_lock); + guard(spinlock)(&c->ec_stripes_new_lock); hlist_del_init(&s->hash); - spin_unlock(&c->ec_stripes_new_lock); s->idx = 0; } @@ -1063,7 +1034,7 @@ static int ec_stripe_key_update(struct btree_trans *trans, unsigned sectors = stripe_blockcount_get(v, i); if (!bch2_extent_ptr_eq(old->v.ptrs[i], new->v.ptrs[i]) && sectors) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "stripe changed nonempty block %u", i); prt_str(&buf, "\nold: "); @@ -1071,7 +1042,6 @@ static int ec_stripe_key_update(struct btree_trans *trans, prt_str(&buf, "\nnew: "); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new->k_i)); bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); ret = -EINVAL; goto err; } @@ -1115,21 +1085,18 @@ static int ec_stripe_update_extent(struct btree_trans *trans, int ret, dev, block; if (bp.v->level) { - struct printbuf buf = PRINTBUF; struct btree_iter node_iter; - struct btree *b; - - b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); + struct btree *b = bch2_backpointer_get_node(trans, bp, &node_iter, last_flushed); bch2_trans_iter_exit(trans, &node_iter); if (!b) return 0; + CLASS(printbuf, buf)(); prt_printf(&buf, "found btree node in erasure coded bucket: b=%px\n", b); bch2_bkey_val_to_text(&buf, c, bp.s_c); bch2_fs_inconsistent(c, "%s", buf.buf); - printbuf_exit(&buf); return bch_err_throw(c, erasure_coding_found_btree_node); } @@ -1194,7 +1161,7 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b struct bch_extent_ptr ptr = v->ptrs[block]; int ret = 0; - struct bch_dev *ca = bch2_dev_tryget(c, ptr.dev); + CLASS(bch2_dev_tryget, ca)(c, ptr.dev); if (!ca) return bch_err_throw(c, ENOENT_dev_not_found); @@ -1225,28 +1192,26 @@ static int ec_stripe_update_bucket(struct btree_trans *trans, struct ec_stripe_b })); bch2_bkey_buf_exit(&last_flushed, c); - bch2_dev_put(ca); return ret; } static int ec_stripe_update_extents(struct bch_fs *c, struct ec_stripe_buf *s) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bch_stripe *v = &bkey_i_to_stripe(&s->key)->v; unsigned nr_data = v->nr_blocks - v->nr_redundant; int ret = bch2_btree_write_buffer_flush_sync(trans); if (ret) - goto err; + return ret; for (unsigned i = 0; i < nr_data; i++) { ret = ec_stripe_update_bucket(trans, s, i); if (ret) - break; + return ret; } -err: - bch2_trans_put(trans); - return ret; + + return 0; } static void zero_out_rest_of_ec_bucket(struct bch_fs *c, @@ -1385,9 +1350,8 @@ err: } } - mutex_lock(&c->ec_stripe_new_lock); - list_del(&s->list); - mutex_unlock(&c->ec_stripe_new_lock); + scoped_guard(mutex, &c->ec_stripe_new_lock) + list_del(&s->list); wake_up(&c->ec_stripe_new_wait); ec_stripe_buf_exit(&s->existing_stripe); @@ -1401,15 +1365,11 @@ static struct ec_stripe_new *get_pending_stripe(struct bch_fs *c) { struct ec_stripe_new *s; - mutex_lock(&c->ec_stripe_new_lock); + guard(mutex)(&c->ec_stripe_new_lock); list_for_each_entry(s, &c->ec_stripe_new_list, list) if (!atomic_read(&s->ref[STRIPE_REF_io])) - goto out; - s = NULL; -out: - mutex_unlock(&c->ec_stripe_new_lock); - - return s; + return s; + return NULL; } static void ec_stripe_create_work(struct work_struct *work) @@ -1443,9 +1403,8 @@ static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h h->s = NULL; s->pending = true; - mutex_lock(&c->ec_stripe_new_lock); - list_add(&s->list, &c->ec_stripe_new_list); - mutex_unlock(&c->ec_stripe_new_lock); + scoped_guard(mutex, &c->ec_stripe_new_lock) + list_add(&s->list, &c->ec_stripe_new_list); ec_stripe_new_put(c, s, STRIPE_REF_io); } @@ -2199,13 +2158,13 @@ static int bch2_invalidate_stripe_to_dev_from_alloc(struct btree_trans *trans, s int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags) { - int ret = bch2_trans_run(c, - for_each_btree_key_max_commit(trans, iter, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_max_commit(trans, iter, BTREE_ID_alloc, POS(dev_idx, 0), POS(dev_idx, U64_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ bch2_invalidate_stripe_to_dev_from_alloc(trans, k, flags); - }))); + })); bch_err_fn(c, ret); return ret; } @@ -2215,33 +2174,28 @@ int bch2_dev_remove_stripes(struct bch_fs *c, unsigned dev_idx, unsigned flags) static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) { struct ec_stripe_head *h; - struct open_bucket *ob; - unsigned i; - mutex_lock(&c->ec_stripe_head_lock); + guard(mutex)(&c->ec_stripe_head_lock); list_for_each_entry(h, &c->ec_stripe_head_list, list) { - mutex_lock(&h->lock); + guard(mutex)(&h->lock); if (!h->s) - goto unlock; + continue; if (!ca) goto found; - for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) { + for (unsigned i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) { if (!h->s->blocks[i]) continue; - ob = c->open_buckets + h->s->blocks[i]; + struct open_bucket *ob = c->open_buckets + h->s->blocks[i]; if (ob->dev == ca->dev_idx) goto found; } - goto unlock; + continue; found: ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes); -unlock: - mutex_unlock(&h->lock); } - mutex_unlock(&c->ec_stripe_head_lock); } void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca) @@ -2258,11 +2212,8 @@ static bool bch2_fs_ec_flush_done(struct bch_fs *c) { sched_annotate_sleep(); - mutex_lock(&c->ec_stripe_new_lock); - bool ret = list_empty(&c->ec_stripe_new_list); - mutex_unlock(&c->ec_stripe_new_lock); - - return ret; + guard(mutex)(&c->ec_stripe_new_lock); + return list_empty(&c->ec_stripe_new_list); } void bch2_fs_ec_flush(struct bch_fs *c) @@ -2299,41 +2250,40 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) struct ec_stripe_head *h; struct ec_stripe_new *s; - mutex_lock(&c->ec_stripe_head_lock); - list_for_each_entry(h, &c->ec_stripe_head_list, list) { - prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n", - h->disk_label, h->algo, h->redundancy, - bch2_watermarks[h->watermark], - h->nr_created); + scoped_guard(mutex, &c->ec_stripe_head_lock) + list_for_each_entry(h, &c->ec_stripe_head_list, list) { + prt_printf(out, "disk label %u algo %u redundancy %u %s nr created %llu:\n", + h->disk_label, h->algo, h->redundancy, + bch2_watermarks[h->watermark], + h->nr_created); - if (h->s) - bch2_new_stripe_to_text(out, c, h->s); - } - mutex_unlock(&c->ec_stripe_head_lock); + if (h->s) + bch2_new_stripe_to_text(out, c, h->s); + } prt_printf(out, "in flight:\n"); - mutex_lock(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) - bch2_new_stripe_to_text(out, c, s); - mutex_unlock(&c->ec_stripe_new_lock); + scoped_guard(mutex, &c->ec_stripe_new_lock) + list_for_each_entry(s, &c->ec_stripe_new_list, list) + bch2_new_stripe_to_text(out, c, s); } void bch2_fs_ec_exit(struct bch_fs *c) { - struct ec_stripe_head *h; - unsigned i; while (1) { - mutex_lock(&c->ec_stripe_head_lock); - h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); - mutex_unlock(&c->ec_stripe_head_lock); + struct ec_stripe_head *h; + + scoped_guard(mutex, &c->ec_stripe_head_lock) + h = list_pop_entry(&c->ec_stripe_head_list, struct ec_stripe_head, list); if (!h) break; if (h->s) { - for (i = 0; i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; i++) + for (unsigned i = 0; + i < bkey_i_to_stripe(&h->s->new_stripe.key)->v.nr_blocks; + i++) BUG_ON(h->s->blocks[i]); kfree(h->s); @@ -2386,20 +2336,18 @@ static int bch2_check_stripe_to_lru_ref(struct btree_trans *trans, return 0; } -int bch2_check_stripe_to_lru_refs(struct bch_fs *c) +int bch2_check_stripe_to_lru_refs(struct btree_trans *trans) { struct bkey_buf last_flushed; - bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_stripes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_stripe_to_lru_ref(trans, k, &last_flushed))); + bch2_check_stripe_to_lru_ref(trans, k, &last_flushed)); - bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); + bch2_bkey_buf_exit(&last_flushed, trans->c); + bch_err_fn(trans->c, ret); return ret; } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 548048adf0d5..e807e7027d7a 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -304,6 +304,6 @@ void bch2_fs_ec_exit(struct bch_fs *); void bch2_fs_ec_init_early(struct bch_fs *); int bch2_fs_ec_init(struct bch_fs *); -int bch2_check_stripe_to_lru_refs(struct bch_fs *); +int bch2_check_stripe_to_lru_refs(struct btree_trans *); #endif /* _BCACHEFS_EC_H */ diff --git a/fs/bcachefs/enumerated_ref.c b/fs/bcachefs/enumerated_ref.c index 56ab430f209f..2ded74135977 100644 --- a/fs/bcachefs/enumerated_ref.c +++ b/fs/bcachefs/enumerated_ref.c @@ -75,13 +75,11 @@ void enumerated_ref_stop(struct enumerated_ref *ref, { enumerated_ref_stop_async(ref); while (!wait_for_completion_timeout(&ref->stop_complete, HZ * 10)) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); prt_str(&buf, "Waited for 10 seconds to shutdown enumerated ref\n"); prt_str(&buf, "Outstanding refs:\n"); enumerated_ref_to_text(&buf, ref, names); printk(KERN_ERR "%s", buf.buf); - printbuf_exit(&buf); } } diff --git a/fs/bcachefs/errcode.c b/fs/bcachefs/errcode.c index c39cf304c681..86264b8c343c 100644 --- a/fs/bcachefs/errcode.c +++ b/fs/bcachefs/errcode.c @@ -26,7 +26,8 @@ const char *bch2_err_str(int err) err = abs(err); - BUG_ON(err >= BCH_ERR_MAX); + if (err >= BCH_ERR_MAX) + return "(Invalid error)"; if (err >= BCH_ERR_START) errstr = bch2_errcode_strs[err - BCH_ERR_START]; diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 71649b4164b8..32a286b3a74e 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -42,15 +42,14 @@ bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) bool bch2_inconsistent_error(struct bch_fs *c) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); printbuf_indent_add_nextline(&buf, 2); bool ret = __bch2_inconsistent_error(c, &buf); if (ret) bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); return ret; } @@ -58,8 +57,8 @@ __printf(3, 0) static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans, const char *fmt, va_list args) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); bch2_log_msg_start(c, &buf); @@ -70,8 +69,6 @@ static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *tra bch2_trans_updates_to_text(&buf, trans); bool ret = __bch2_inconsistent_error(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - - printbuf_exit(&buf); return ret; } @@ -109,8 +106,7 @@ int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); va_list args; @@ -120,8 +116,6 @@ int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) int ret = __bch2_topology_error(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - - printbuf_exit(&buf); return ret; } @@ -138,18 +132,18 @@ void bch2_io_error_work(struct work_struct *work) /* XXX: if it's reads or checksums that are failing, set it to failed */ - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); unsigned long write_errors_start = READ_ONCE(ca->write_errors_start); if (write_errors_start && time_after(jiffies, write_errors_start + c->opts.write_error_timeout * HZ)) { if (ca->mi.state >= BCH_MEMBER_STATE_ro) - goto out; + return; bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, BCH_FORCE_IF_DEGRADED); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); __bch2_log_msg_start(ca->name, &buf); prt_printf(&buf, "writes erroring for %u seconds, setting %s ro", @@ -159,10 +153,7 @@ void bch2_io_error_work(struct work_struct *work) bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } -out: - up_write(&c->state_lock); } void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) @@ -382,11 +373,10 @@ bool __bch2_count_fsck_err(struct bch_fs *c, { bch2_sb_error_count(c, id); - mutex_lock(&c->fsck_error_msgs_lock); bool print = true, repeat = false, suppress = false; - count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress); - mutex_unlock(&c->fsck_error_msgs_lock); + scoped_guard(mutex, &c->fsck_error_msgs_lock) + count_fsck_err_locked(c, id, msg->buf, &repeat, &print, &suppress); if (suppress) prt_printf(msg, "Ratelimiting new instances of previous error\n"); @@ -444,7 +434,8 @@ int __bch2_fsck_err(struct bch_fs *c, const char *fmt, ...) { va_list args; - struct printbuf buf = PRINTBUF, *out = &buf; + CLASS(printbuf, buf)(); + struct printbuf *out = &buf; int ret = 0; const char *action_orig = "fix?", *action = action_orig; @@ -637,7 +628,9 @@ err: * log_fsck_err()s: that would require us to track for every error type * which recovery pass corrects it, to get the fsck exit status correct: */ - if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + /* nothing */ + } else if (bch2_err_matches(ret, BCH_ERR_fsck_fix)) { set_bit(BCH_FS_errors_fixed, &c->flags); } else { set_bit(BCH_FS_errors_not_fixed, &c->flags); @@ -646,7 +639,6 @@ err: if (action != action_orig) kfree(action); - printbuf_exit(&buf); BUG_ON(!ret); return ret; @@ -678,7 +670,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) fsck_flags |= fsck_flags_extra[err]; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "invalid bkey in %s", bch2_bkey_validate_contexts[from.from]); @@ -699,7 +691,6 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, va_end(args); int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf); - printbuf_exit(&buf); return ret; } @@ -707,7 +698,7 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) { struct fsck_err_state *s, *n; - mutex_lock(&c->fsck_error_msgs_lock); + guard(mutex)(&c->fsck_error_msgs_lock); list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { if (print && s->ratelimited && s->last_msg) @@ -717,8 +708,6 @@ static void __bch2_flush_fsck_errs(struct bch_fs *c, bool print) kfree(s->last_msg); kfree(s); } - - mutex_unlock(&c->fsck_error_msgs_lock); } void bch2_flush_fsck_errs(struct bch_fs *c) @@ -752,7 +741,8 @@ int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *o void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, subvol_inum inum, u64 offset) { - bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); } int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, @@ -769,5 +759,6 @@ int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printb void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out, struct bpos pos) { - bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); + CLASS(btree_trans, trans)(c); + lockrestart_do(trans, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); } diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index ec0951fbddea..b36ecfc0ab9d 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -63,15 +63,14 @@ void bch2_io_failures_to_text(struct printbuf *out, ((!!f->failed_ec) << 3); bch2_printbuf_make_room(out, 1024); - out->atomic++; scoped_guard(rcu) { + guard(printbuf_atomic)(out); struct bch_dev *ca = bch2_dev_rcu_noerror(c, f->dev); if (ca) prt_str(out, ca->name); else prt_printf(out, "(invalid device %u)", f->dev); } - --out->atomic; prt_char(out, ' '); @@ -1237,7 +1236,7 @@ restart_drop_ptrs: void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr) { - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); if (!ca) { @@ -1262,7 +1261,6 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc else if (stale) prt_printf(out, " invalid"); } - --out->atomic; } void bch2_extent_crc_unpacked_to_text(struct printbuf *out, struct bch_extent_crc_unpacked *crc) @@ -1524,7 +1522,7 @@ int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { - struct bch_compression_opt opt = __bch2_compression_decode(r->compression); + union bch_compression_opt opt = { .value = r->compression }; prt_printf(err, "invalid compression opt %u:%u", opt.type, opt.level); return bch_err_throw(c, invalid_bkey); diff --git a/fs/bcachefs/fast_list.c b/fs/bcachefs/fast_list.c index b52f264318d8..6be2a45be1dd 100644 --- a/fs/bcachefs/fast_list.c +++ b/fs/bcachefs/fast_list.c @@ -138,8 +138,21 @@ void fast_list_remove(struct fast_list *l, unsigned idx) void fast_list_exit(struct fast_list *l) { - /* XXX: warn if list isn't empty */ - free_percpu(l->buffer); + if (l->buffer) { + int cpu; + for_each_possible_cpu(cpu) { + struct fast_list_pcpu *lp = per_cpu_ptr(l->buffer, cpu); + + while (lp->nr) + ida_free(&l->slots_allocated, lp->entries[--lp->nr]); + } + + free_percpu(l->buffer); + } + + WARN(ida_find_first(&l->slots_allocated) >= 0, + "fast_list still has objects on exit\n"); + ida_destroy(&l->slots_allocated); genradix_free(&l->items); } diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index 4e82dfa6c03f..f2389054693a 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -254,12 +254,11 @@ err: bch2_trans_iter_exit(trans, &iter); if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter.pos.offset << 9)); prt_printf(&buf, "read error %s from btree lookup", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); rbio->bio.bi_status = BLK_STS_IOERR; bio_endio(&rbio->bio); @@ -425,27 +424,23 @@ static void bch2_writepage_io_done(struct bch_write_op *op) set_bit(EI_INODE_ERROR, &io->inode->ei_flags); bio_for_each_folio_all(fi, bio) { - struct bch_folio *s; - mapping_set_error(fi.folio->mapping, -EIO); - s = __bch2_folio(fi.folio); - spin_lock(&s->lock); + struct bch_folio *s = __bch2_folio(fi.folio); + guard(spinlock)(&s->lock); + for (i = 0; i < folio_sectors(fi.folio); i++) s->s[i].nr_replicas = 0; - spin_unlock(&s->lock); } } if (io->op.flags & BCH_WRITE_wrote_data_inline) { bio_for_each_folio_all(fi, bio) { - struct bch_folio *s; + struct bch_folio *s = __bch2_folio(fi.folio); + guard(spinlock)(&s->lock); - s = __bch2_folio(fi.folio); - spin_lock(&s->lock); for (i = 0; i < folio_sectors(fi.folio); i++) s->s[i].nr_replicas = 0; - spin_unlock(&s->lock); } } @@ -571,30 +566,30 @@ do_io: BUG_ON(ret); /* Before unlocking the page, get copy of reservations: */ - spin_lock(&s->lock); - memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); + scoped_guard(spinlock, &s->lock) { + memcpy(w->tmp, s->s, sizeof(struct bch_folio_sector) * f_sectors); - for (i = 0; i < f_sectors; i++) { - if (s->s[i].state < SECTOR_dirty) - continue; + for (i = 0; i < f_sectors; i++) { + if (s->s[i].state < SECTOR_dirty) + continue; - nr_replicas_this_write = - min_t(unsigned, nr_replicas_this_write, - s->s[i].nr_replicas + - s->s[i].replicas_reserved); - } + nr_replicas_this_write = + min_t(unsigned, nr_replicas_this_write, + s->s[i].nr_replicas + + s->s[i].replicas_reserved); + } - for (i = 0; i < f_sectors; i++) { - if (s->s[i].state < SECTOR_dirty) - continue; + for (i = 0; i < f_sectors; i++) { + if (s->s[i].state < SECTOR_dirty) + continue; - s->s[i].nr_replicas = w->opts.compression - ? 0 : nr_replicas_this_write; + s->s[i].nr_replicas = w->opts.compression + ? 0 : nr_replicas_this_write; - s->s[i].replicas_reserved = 0; - bch2_folio_sector_set(folio, s, i, SECTOR_allocated); + s->s[i].replicas_reserved = 0; + bch2_folio_sector_set(folio, s, i, SECTOR_allocated); + } } - spin_unlock(&s->lock); BUG_ON(atomic_read(&s->write_count)); atomic_set(&s->write_count, 1); @@ -780,10 +775,9 @@ int bch2_write_end(struct file *file, struct address_space *mapping, copied = 0; } - spin_lock(&inode->v.i_lock); - if (pos + copied > inode->v.i_size) - i_size_write(&inode->v, pos + copied); - spin_unlock(&inode->v.i_lock); + scoped_guard(spinlock, &inode->v.i_lock) + if (pos + copied > inode->v.i_size) + i_size_write(&inode->v, pos + copied); if (copied) { if (!folio_test_uptodate(folio)) @@ -942,10 +936,9 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, end = pos + copied; - spin_lock(&inode->v.i_lock); - if (end > inode->v.i_size) - i_size_write(&inode->v, end); - spin_unlock(&inode->v.i_lock); + scoped_guard(spinlock, &inode->v.i_lock) + if (end > inode->v.i_size) + i_size_write(&inode->v, end); f_pos = pos; f_offset = pos - folio_pos(darray_first(fs)); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 1f5154d9676b..73d44875faf2 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -252,7 +252,7 @@ static bool bch2_check_range_allocated(struct bch_fs *c, subvol_inum inum, u64 offset, u64 size, unsigned nr_replicas, bool compressed) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_s_c k; u64 end = offset + size; @@ -285,7 +285,6 @@ retry: err: if (bch2_err_matches(err, BCH_ERR_transaction_restart)) goto retry; - bch2_trans_put(trans); return err ? false : ret; } @@ -428,17 +427,15 @@ static __always_inline void bch2_dio_write_end(struct dio_write *dio) dio->written += dio->op.written; if (dio->extending) { - spin_lock(&inode->v.i_lock); + guard(spinlock)(&inode->v.i_lock); if (req->ki_pos > inode->v.i_size) i_size_write(&inode->v, req->ki_pos); - spin_unlock(&inode->v.i_lock); } if (dio->op.i_sectors_delta || dio->quota_res.sectors) { - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); __bch2_i_sectors_acct(c, inode, &dio->quota_res, dio->op.i_sectors_delta); __bch2_quota_reservation_put(c, inode, &dio->quota_res); - mutex_unlock(&inode->ei_quota_lock); } bio_release_pages(bio, false); diff --git a/fs/bcachefs/fs-io-pagecache.c b/fs/bcachefs/fs-io-pagecache.c index c2cc405822f2..2a6705186c44 100644 --- a/fs/bcachefs/fs-io-pagecache.c +++ b/fs/bcachefs/fs-io-pagecache.c @@ -125,11 +125,9 @@ folio_sector_reserve(enum bch_folio_sector_state state) /* for newly allocated folios: */ struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp) { - struct bch_folio *s; - - s = kzalloc(sizeof(*s) + - sizeof(struct bch_folio_sector) * - folio_sectors(folio), gfp); + struct bch_folio *s = kzalloc(sizeof(*s) + + sizeof(struct bch_folio_sector) * + folio_sectors(folio), gfp); if (!s) return NULL; @@ -162,7 +160,7 @@ static void __bch2_folio_set(struct folio *folio, BUG_ON(pg_offset >= sectors); BUG_ON(pg_offset + pg_len > sectors); - spin_lock(&s->lock); + guard(spinlock)(&s->lock); for (i = pg_offset; i < pg_offset + pg_len; i++) { s->s[i].nr_replicas = nr_ptrs; @@ -171,8 +169,6 @@ static void __bch2_folio_set(struct folio *folio, if (i == sectors) s->uptodate = true; - - spin_unlock(&s->lock); } /* @@ -276,10 +272,9 @@ void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode, s = bch2_folio(folio); if (s) { - spin_lock(&s->lock); + guard(spinlock)(&s->lock); for (j = folio_offset; j < folio_offset + folio_len; j++) s->s[j].nr_replicas = 0; - spin_unlock(&s->lock); } folio_unlock(folio); @@ -330,13 +325,12 @@ int bch2_mark_pagecache_reserved(struct bch_inode_info *inode, unsigned folio_offset = max(*start, folio_start) - folio_start; unsigned folio_len = min(end, folio_end) - folio_offset - folio_start; - spin_lock(&s->lock); + guard(spinlock)(&s->lock); for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) { i_sectors_delta -= s->s[j].state == SECTOR_dirty; bch2_folio_sector_set(folio, s, j, folio_sector_reserve(s->s[j].state)); } - spin_unlock(&s->lock); } folio_unlock(folio); @@ -529,29 +523,26 @@ void bch2_set_folio_dirty(struct bch_fs *c, BUG_ON(!s->uptodate); - spin_lock(&s->lock); - - for (i = round_down(offset, block_bytes(c)) >> 9; - i < round_up(offset + len, block_bytes(c)) >> 9; - i++) { - unsigned sectors = sectors_to_reserve(&s->s[i], - res->disk.nr_replicas); - - /* - * This can happen if we race with the error path in - * bch2_writepage_io_done(): - */ - sectors = min_t(unsigned, sectors, res->disk.sectors); + scoped_guard(spinlock, &s->lock) + for (i = round_down(offset, block_bytes(c)) >> 9; + i < round_up(offset + len, block_bytes(c)) >> 9; + i++) { + unsigned sectors = sectors_to_reserve(&s->s[i], + res->disk.nr_replicas); - s->s[i].replicas_reserved += sectors; - res->disk.sectors -= sectors; + /* + * This can happen if we race with the error path in + * bch2_writepage_io_done(): + */ + sectors = min_t(unsigned, sectors, res->disk.sectors); - dirty_sectors += s->s[i].state == SECTOR_unallocated; + s->s[i].replicas_reserved += sectors; + res->disk.sectors -= sectors; - bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); - } + dirty_sectors += s->s[i].state == SECTOR_unallocated; - spin_unlock(&s->lock); + bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state)); + } bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index dc5f713e209c..93ad33f0953a 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -148,7 +148,7 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, struct quota_res *quota_res, s64 sectors) { if (unlikely((s64) inode->v.i_blocks + sectors < 0)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli)", inode->v.i_ino, (u64) inode->v.i_blocks, sectors, @@ -157,7 +157,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_underflow, &buf); if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); if (sectors < 0) sectors = -inode->v.i_blocks; @@ -187,7 +186,6 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_inum inum, u64 *seq) { - struct printbuf buf = PRINTBUF; struct bch_inode_unpacked u; struct btree_iter iter; int ret = bch2_inode_peek(trans, &iter, &u, inum, 0); @@ -197,6 +195,7 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in u64 cur_seq = journal_cur_seq(&trans->c->journal); *seq = min(cur_seq, u.bi_journal_seq); + CLASS(printbuf, buf)(); if (fsck_err_on(u.bi_journal_seq > cur_seq, trans, inode_journal_seq_in_future, "inode journal seq in future (currently at %llu)\n%s", @@ -208,7 +207,6 @@ static int bch2_get_inode_journal_seq_trans(struct btree_trans *trans, subvol_in } fsck_err: bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); return ret; } @@ -227,7 +225,7 @@ static int bch2_flush_inode(struct bch_fs *c, u64 seq; int ret = bch2_trans_commit_do(c, NULL, NULL, 0, - bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: + bch2_get_inode_journal_seq_trans(trans, inode_inum(inode), &seq)) ?: bch2_journal_flush_seq(&c->journal, seq, TASK_INTERRUPTIBLE) ?: bch2_inode_flush_nocow_writes(c, inode); enumerated_ref_put(&c->writes, BCH_WRITE_REF_fsync); @@ -267,11 +265,11 @@ static inline int range_has_data(struct bch_fs *c, u32 subvol, struct bpos start, struct bpos end) { - return bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, start, end, subvol, 0, k, ({ - bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); - }))); + bkey_extent_is_data(k.k) && !bkey_extent_is_unwritten(k); + })); } static int __bch2_truncate_folio(struct bch_inode_info *inode, @@ -521,7 +519,7 @@ int bchfs_truncate(struct mnt_idmap *idmap, if (unlikely(!inode->v.i_size && inode->v.i_blocks && !bch2_journal_error(&c->journal))) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)", @@ -531,7 +529,6 @@ int bchfs_truncate(struct mnt_idmap *idmap, bool print = bch2_count_fsck_err(c, vfs_inode_i_blocks_not_zero_at_truncate, &buf); if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } ret = bch2_setattr_nonsize(idmap, inode, iattr); @@ -559,11 +556,10 @@ static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, l u64 block_start = round_up(offset, block_bytes(c)); u64 block_end = round_down(end, block_bytes(c)); bool truncated_last_page; - int ret = 0; - ret = bch2_truncate_folios(inode, offset, end); + int ret = bch2_truncate_folios(inode, offset, end); if (unlikely(ret < 0)) - goto err; + return ret; truncated_last_page = ret; @@ -576,19 +572,18 @@ static noinline long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, l block_start >> 9, block_end >> 9, &i_sectors_delta); bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta); + + if (ret) + return ret; } - mutex_lock(&inode->ei_update_lock); - if (end >= inode->v.i_size && !truncated_last_page) { - ret = bch2_write_inode_size(c, inode, inode->v.i_size, - ATTR_MTIME|ATTR_CTIME); - } else { - ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + guard(mutex)(&inode->ei_update_lock); + if (end >= inode->v.i_size && !truncated_last_page) + return bch2_write_inode_size(c, inode, inode->v.i_size, + ATTR_MTIME|ATTR_CTIME); + else + return bch2_write_inode(c, inode, inode_update_times_fn, NULL, ATTR_MTIME|ATTR_CTIME); - } - mutex_unlock(&inode->ei_update_lock); -err: - return ret; } static noinline long bchfs_fcollapse_finsert(struct bch_inode_info *inode, @@ -631,7 +626,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode, u64 start_sector, u64 end_sector) { struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bpos end_pos = POS(inode->v.i_ino, end_sector); struct bch_io_opts opts; @@ -753,7 +748,6 @@ bkey_err: } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -802,13 +796,11 @@ static noinline long bchfs_fallocate(struct bch_inode_info *inode, int mode, if (end >= inode->v.i_size && (((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) || !(mode & FALLOC_FL_KEEP_SIZE))) { - spin_lock(&inode->v.i_lock); - i_size_write(&inode->v, end); - spin_unlock(&inode->v.i_lock); + scoped_guard(spinlock, &inode->v.i_lock) + i_size_write(&inode->v, end); - mutex_lock(&inode->ei_update_lock); - ret2 = bch2_write_inode_size(c, inode, end, 0); - mutex_unlock(&inode->ei_update_lock); + scoped_guard(mutex, &inode->ei_update_lock) + ret2 = bch2_write_inode_size(c, inode, end, 0); } return ret ?: ret2; @@ -861,8 +853,8 @@ static int quota_reserve_range(struct bch_inode_info *inode, struct bch_fs *c = inode->v.i_sb->s_fs_info; u64 sectors = end - start; - int ret = bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, POS(inode->v.i_ino, start), POS(inode->v.i_ino, end - 1), @@ -875,7 +867,7 @@ static int quota_reserve_range(struct bch_inode_info *inode, } 0; - }))); + })); return ret ?: bch2_quota_reservation_add(c, inode, res, sectors, true); } @@ -955,10 +947,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, bch2_i_sectors_acct(c, dst, "a_res, i_sectors_delta); - spin_lock(&dst->v.i_lock); - if (pos_dst + ret > dst->v.i_size) - i_size_write(&dst->v, pos_dst + ret); - spin_unlock(&dst->v.i_lock); + scoped_guard(spinlock, &dst->v.i_lock) + if (pos_dst + ret > dst->v.i_size) + i_size_write(&dst->v, pos_dst + ret); if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) || IS_SYNC(file_inode(file_dst))) @@ -1020,38 +1011,38 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) if (offset >= isize) return -ENXIO; - int ret = bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_extents, POS(inode->v.i_ino, offset >> 9), POS(inode->v.i_ino, U64_MAX), inum.subvol, BTREE_ITER_slots, k, ({ - if (k.k->p.inode != inode->v.i_ino || - !bkey_extent_is_data(k.k)) { - loff_t start_offset = k.k->p.inode == inode->v.i_ino - ? max(offset, bkey_start_offset(k.k) << 9) - : offset; - loff_t end_offset = k.k->p.inode == inode->v.i_ino - ? MAX_LFS_FILESIZE - : k.k->p.offset << 9; - - /* - * Found a hole in the btree, now make sure it's - * a hole in the pagecache. We might have to - * keep searching if this hole is entirely dirty - * in the page cache: - */ - bch2_trans_unlock(trans); - loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v, - start_offset, end_offset, 0, false); - if (pagecache_hole < end_offset) { - next_hole = pagecache_hole; - break; - } - } else { - offset = max(offset, bkey_start_offset(k.k) << 9); + if (k.k->p.inode != inode->v.i_ino || + !bkey_extent_is_data(k.k)) { + loff_t start_offset = k.k->p.inode == inode->v.i_ino + ? max(offset, bkey_start_offset(k.k) << 9) + : offset; + loff_t end_offset = k.k->p.inode == inode->v.i_ino + ? MAX_LFS_FILESIZE + : k.k->p.offset << 9; + + /* + * Found a hole in the btree, now make sure it's + * a hole in the pagecache. We might have to + * keep searching if this hole is entirely dirty + * in the page cache: + */ + bch2_trans_unlock(trans); + loff_t pagecache_hole = bch2_seek_pagecache_hole(&inode->v, + start_offset, end_offset, 0, false); + if (pagecache_hole < end_offset) { + next_hole = pagecache_hole; + break; } - 0; - }))); + } else { + offset = max(offset, bkey_start_offset(k.k) << 9); + } + 0; + })); if (ret) return ret; diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index ca70346e68dc..d229f7225da1 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -77,9 +77,8 @@ static inline void bch2_quota_reservation_put(struct bch_fs *c, struct quota_res *res) { if (res->sectors) { - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); __bch2_quota_reservation_put(c, inode, res); - mutex_unlock(&inode->ei_quota_lock); } } @@ -94,16 +93,15 @@ static inline int bch2_quota_reservation_add(struct bch_fs *c, if (test_bit(EI_INODE_SNAPSHOT, &inode->ei_flags)) return 0; - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); - if (likely(!ret)) { - inode->ei_quota_reserved += sectors; - res->sectors += sectors; - } - mutex_unlock(&inode->ei_quota_lock); + if (ret) + return ret; - return ret; + inode->ei_quota_reserved += sectors; + res->sectors += sectors; + return 0; } #else @@ -134,9 +132,8 @@ static inline void bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info * struct quota_res *quota_res, s64 sectors) { if (sectors) { - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); __bch2_i_sectors_acct(c, inode, quota_res, sectors); - mutex_unlock(&inode->ei_quota_lock); } } diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index 4e72e654da96..8b9d3c7d1f57 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -111,9 +111,8 @@ static int bch2_ioc_getlabel(struct bch_fs *c, char __user *user_label) BUILD_BUG_ON(BCH_SB_LABEL_SIZE >= FSLABEL_MAX); - mutex_lock(&c->sb_lock); - memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); len = strnlen(label, BCH_SB_LABEL_SIZE); if (len == BCH_SB_LABEL_SIZE) { @@ -152,10 +151,10 @@ static int bch2_ioc_setlabel(struct bch_fs *c, if (ret) return ret; - mutex_lock(&c->sb_lock); - strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); - ret = bch2_write_super(c); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); + ret = bch2_write_super(c); + } mnt_drop_write_file(file); return ret; @@ -172,7 +171,7 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) if (get_user(flags, arg)) return -EFAULT; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "shutdown by ioctl type %u", flags); @@ -193,13 +192,10 @@ static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) bch2_fs_emergency_read_only2(c, &buf); break; default: - ret = -EINVAL; - goto noprint; + return -EINVAL; } bch2_print_str(c, KERN_ERR, buf.buf); -noprint: - printbuf_exit(&buf); return ret; } @@ -234,9 +230,8 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) { /* sync_inodes_sb enforce s_umount is locked */ - down_read(&c->vfs_sb->s_umount); + guard(rwsem_read)(&c->vfs_sb->s_umount); sync_inodes_sb(c->vfs_sb); - up_read(&c->vfs_sb->s_umount); } if (arg.src_ptr) { @@ -301,12 +296,10 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, !arg.src_ptr) snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; - down_write(&c->snapshot_create_lock); - inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), - dst_dentry, arg.mode|S_IFDIR, - 0, snapshot_src, create_flags); - up_write(&c->snapshot_create_lock); - + scoped_guard(rwsem_write, &c->snapshot_create_lock) + inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), + dst_dentry, arg.mode|S_IFDIR, + 0, snapshot_src, create_flags); error = PTR_ERR_OR_ZERO(inode); if (error) goto err3; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3b0783f117ae..2789b30add10 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -106,14 +106,13 @@ int __must_check bch2_write_inode(struct bch_fs *c, inode_set_fn set, void *p, unsigned fields) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter = {}; - struct bch_inode_unpacked inode_u; - int ret; + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); - ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); + struct btree_iter iter = {}; + struct bch_inode_unpacked inode_u; + int ret = bch2_inode_peek(trans, &iter, &inode_u, inode_inum(inode), BTREE_ITER_intent); if (ret) goto err; @@ -156,7 +155,6 @@ err: inode_inum(inode).subvol, inode_inum(inode).inum); - bch2_trans_put(trans); return ret < 0 ? ret : 0; } @@ -166,32 +164,27 @@ int bch2_fs_quota_transfer(struct bch_fs *c, unsigned qtypes, enum quota_acct_mode mode) { - unsigned i; - int ret; - qtypes &= enabled_qtypes(c); - for (i = 0; i < QTYP_NR; i++) + for (unsigned i = 0; i < QTYP_NR; i++) if (new_qid.q[i] == inode->ei_qid.q[i]) qtypes &= ~(1U << i); if (!qtypes) return 0; - mutex_lock(&inode->ei_quota_lock); + guard(mutex)(&inode->ei_quota_lock); - ret = bch2_quota_transfer(c, qtypes, new_qid, + int ret = bch2_quota_transfer(c, qtypes, new_qid, inode->ei_qid, inode->v.i_blocks + inode->ei_quota_reserved, mode); if (!ret) - for (i = 0; i < QTYP_NR; i++) + for (unsigned i = 0; i < QTYP_NR; i++) if (qtypes & (1 << i)) inode->ei_qid.q[i] = new_qid.q[i]; - mutex_unlock(&inode->ei_quota_lock); - return ret; } @@ -241,7 +234,7 @@ int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) struct bch_fs *c = trans->c; struct rhltable *ht = &c->vfs_inodes_by_inum_table; u64 inum = p.offset; - DARRAY(u32) subvols; + CLASS(darray_u32, subvols)(); int ret = 0; if (!test_bit(BCH_FS_started, &c->flags)) @@ -280,7 +273,7 @@ restart: rcu_read_unlock(); ret = darray_make_room(&subvols, 1); if (ret) - goto err; + return ret; subvols.nr = 0; goto restart_from_top; } @@ -303,14 +296,13 @@ restart: u32 snap; ret = bch2_subvolume_get_snapshot(trans, *i, &snap); if (ret) - goto err; + return ret; ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot); if (ret) break; } -err: - darray_exit(&subvols); + return ret; } @@ -367,9 +359,9 @@ repeat: static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode) { - spin_lock(&inode->v.i_lock); - bool remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags); - spin_unlock(&inode->v.i_lock); + bool remove; + scoped_guard(spinlock, &inode->v.i_lock) + remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags); if (remove) { int ret = rhltable_remove(&c->vfs_inodes_by_inum_table, @@ -430,9 +422,8 @@ retry: inode_sb_list_add(&inode->v); - mutex_lock(&c->vfs_inodes_lock); - list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); - mutex_unlock(&c->vfs_inodes_lock); + scoped_guard(mutex, &c->vfs_inodes_lock) + list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); return inode; } } @@ -514,7 +505,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) if (inode) return &inode->v; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bch_inode_unpacked inode_u; struct bch_subvolume subvol; @@ -522,7 +513,6 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); - bch2_trans_put(trans); return ret ? ERR_PTR(ret) : &inode->v; } @@ -534,7 +524,6 @@ __bch2_create(struct mnt_idmap *idmap, unsigned flags) { struct bch_fs *c = dir->v.i_sb->s_fs_info; - struct btree_trans *trans; struct bch_inode_unpacked dir_u; struct bch_inode_info *inode; struct bch_inode_unpacked inode_u; @@ -555,18 +544,23 @@ __bch2_create(struct mnt_idmap *idmap, if (ret) return ERR_PTR(ret); #endif + inode = __bch2_new_inode(c, GFP_NOFS); if (unlikely(!inode)) { - inode = ERR_PTR(-ENOMEM); - goto err; + posix_acl_release(default_acl); + posix_acl_release(acl); + return ERR_PTR(-ENOMEM); } bch2_inode_init_early(c, &inode_u); if (!(flags & BCH_CREATE_TMPFILE)) mutex_lock(&dir->ei_update_lock); - - trans = bch2_trans_get(c); + /* + * posix_acl_create() calls get_acl -> btree transaction, don't start + * ours until after, ei->update_lock must also be taken first: + */ + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); @@ -625,7 +619,6 @@ err_before_quota: * restart here. */ inode = bch2_inode_hash_insert(c, NULL, inode); - bch2_trans_put(trans); err: posix_acl_release(default_acl); posix_acl_release(acl); @@ -634,7 +627,6 @@ err_trans: if (!(flags & BCH_CREATE_TMPFILE)) mutex_unlock(&dir->ei_update_lock); - bch2_trans_put(trans); make_bad_inode(&inode->v); iput(&inode->v); inode = ERR_PTR(ret); @@ -649,7 +641,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, { struct bch_fs *c = trans->c; subvol_inum inum = {}; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct qstr lookup_name; int ret = bch2_maybe_casefold(trans, dir_hash_info, name, &lookup_name); @@ -701,7 +693,6 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, goto err; out: bch2_trans_iter_exit(trans, &dirent_iter); - printbuf_exit(&buf); return inode; err: inode = ERR_PTR(ret); @@ -770,8 +761,8 @@ static int __bch2_link(struct bch_fs *c, struct bch_inode_unpacked dir_u, inode_u; int ret; - mutex_lock(&inode->ei_update_lock); - struct btree_trans *trans = bch2_trans_get(c); + guard(mutex)(&inode->ei_update_lock); + CLASS(btree_trans, trans)(c); ret = commit_do(trans, NULL, NULL, 0, bch2_link_trans(trans, @@ -785,8 +776,6 @@ static int __bch2_link(struct bch_fs *c, bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME); } - bch2_trans_put(trans); - mutex_unlock(&inode->ei_update_lock); return ret; } @@ -821,8 +810,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, int ret; bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); - - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, @@ -849,7 +837,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); err: - bch2_trans_put(trans); bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); return ret; @@ -918,7 +905,6 @@ static int bch2_rename2(struct mnt_idmap *idmap, struct bch_inode_info *dst_inode = to_bch_ei(dst_dentry->d_inode); struct bch_inode_unpacked dst_dir_u, src_dir_u; struct bch_inode_unpacked src_inode_u, dst_inode_u, *whiteout_inode_u; - struct btree_trans *trans; enum bch_rename_mode mode = flags & RENAME_EXCHANGE ? BCH_RENAME_EXCHANGE : dst_dentry->d_inode @@ -942,7 +928,7 @@ static int bch2_rename2(struct mnt_idmap *idmap, src_inode, dst_inode); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?: bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol); @@ -1028,8 +1014,6 @@ err_tx_restart: bch2_inode_update_after_write(trans, dst_inode, &dst_inode_u, ATTR_CTIME); err: - bch2_trans_put(trans); - bch2_fs_quota_transfer(c, src_inode, bch_qid(&src_inode->ei_inode), 1 << QTYP_PRJ, @@ -1097,7 +1081,6 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, { struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_qid qid; - struct btree_trans *trans; struct btree_iter inode_iter = {}; struct bch_inode_unpacked inode_u; struct posix_acl *acl = NULL; @@ -1105,7 +1088,7 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, kgid_t kgid; int ret; - mutex_lock(&inode->ei_update_lock); + guard(mutex)(&inode->ei_update_lock); qid = inode->ei_qid; @@ -1122,9 +1105,9 @@ int bch2_setattr_nonsize(struct mnt_idmap *idmap, ret = bch2_fs_quota_transfer(c, inode, qid, ~0, KEY_TYPE_QUOTA_PREALLOC); if (ret) - goto err; + return ret; - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); retry: bch2_trans_begin(trans); kfree(acl); @@ -1153,18 +1136,13 @@ btree_err: if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto retry; if (unlikely(ret)) - goto err_trans; + return ret; bch2_inode_update_after_write(trans, inode, &inode_u, attr->ia_valid); if (acl) set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl); -err_trans: - bch2_trans_put(trans); -err: - mutex_unlock(&inode->ei_update_lock); - - return bch2_err_class(ret); + return 0; } static int bch2_getattr(struct mnt_idmap *idmap, @@ -1228,18 +1206,16 @@ static int bch2_setattr(struct mnt_idmap *idmap, { struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret; lockdep_assert_held(&inode->v.i_rwsem); - ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: - setattr_prepare(idmap, dentry, iattr); - if (ret) - return ret; + int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: + setattr_prepare(idmap, dentry, iattr) ?: + (iattr->ia_valid & ATTR_SIZE + ? bchfs_truncate(idmap, inode, iattr) + : bch2_setattr_nonsize(idmap, inode, iattr)); - return iattr->ia_valid & ATTR_SIZE - ? bchfs_truncate(idmap, inode, iattr) - : bch2_setattr_nonsize(idmap, inode, iattr); + return bch2_err_class(ret); } static int bch2_tmpfile(struct mnt_idmap *idmap, @@ -1487,7 +1463,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *ei = to_bch_ei(vinode); - struct btree_trans *trans; struct bch_fiemap_extent cur, prev; int ret = 0; @@ -1505,7 +1480,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_bkey_buf_init(&prev.kbuf); bkey_init(&prev.kbuf.k->k); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); while (start < end) { ret = lockrestart_do(trans, @@ -1538,7 +1513,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, ret = bch2_fill_extent(c, info, &prev); } err: - bch2_trans_put(trans); bch2_bkey_buf_exit(&cur.kbuf, c); bch2_bkey_buf_exit(&prev.kbuf, c); @@ -1968,7 +1942,6 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child struct bch_inode_info *inode = to_bch_ei(child->d_inode); struct bch_inode_info *dir = to_bch_ei(parent->d_inode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - struct btree_trans *trans; struct btree_iter iter1; struct btree_iter iter2; struct bkey_s_c k; @@ -1983,8 +1956,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child if (!S_ISDIR(dir->v.i_mode)) return -EINVAL; - trans = bch2_trans_get(c); - + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter1, BTREE_ID_dirents, POS(dir->ei_inode.bi_inum, 0), 0); bch2_trans_iter_init(trans, &iter2, BTREE_ID_dirents, @@ -2063,8 +2035,6 @@ err: bch2_trans_iter_exit(trans, &iter1); bch2_trans_iter_exit(trans, &iter2); - bch2_trans_put(trans); - return ret; } @@ -2148,12 +2118,11 @@ static int bch2_vfs_write_inode(struct inode *vinode, { struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *inode = to_bch_ei(vinode); - int ret; - mutex_lock(&inode->ei_update_lock); - ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, - ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); - mutex_unlock(&inode->ei_update_lock); + guard(mutex)(&inode->ei_update_lock); + + int ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, + ATTR_ATIME|ATTR_MTIME|ATTR_CTIME); return bch2_err_class(ret); } @@ -2200,9 +2169,8 @@ static void bch2_evict_inode(struct inode *vinode) bch2_inode_hash_remove(c, inode); } - mutex_lock(&c->vfs_inodes_lock); - list_del_init(&inode->ei_vfs_inode_list); - mutex_unlock(&c->vfs_inodes_lock); + scoped_guard(mutex, &c->vfs_inodes_lock) + list_del_init(&inode->ei_vfs_inode_list); } void bch2_evict_subvolume_inodes(struct bch_fs *c, snapshot_id_list *s) @@ -2352,16 +2320,14 @@ static int bch2_show_devname(struct seq_file *seq, struct dentry *root) static int bch2_show_options(struct seq_file *seq, struct dentry *root) { struct bch_fs *c = root->d_sb->s_fs_info; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_opts_to_text(&buf, c->opts, c, c->disk_sb.sb, OPT_MOUNT, OPT_HIDDEN, OPT_SHOW_MOUNT_STYLE); printbuf_nul_terminate(&buf); seq_printf(seq, ",%s", buf.buf); - int ret = buf.allocation_failure ? -ENOMEM : 0; - printbuf_exit(&buf); - return ret; + return buf.allocation_failure ? -ENOMEM : 0; } static void bch2_put_super(struct super_block *sb) @@ -2383,24 +2349,20 @@ static int bch2_freeze(struct super_block *sb) { struct bch_fs *c = sb->s_fs_info; - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); bch2_fs_read_only(c); - up_write(&c->state_lock); return 0; } static int bch2_unfreeze(struct super_block *sb) { struct bch_fs *c = sb->s_fs_info; - int ret; if (test_bit(BCH_FS_emergency_ro, &c->flags)) return 0; - down_write(&c->state_lock); - ret = bch2_fs_read_write(c); - up_write(&c->state_lock); - return ret; + guard(rwsem_write)(&c->state_lock); + return bch2_fs_read_write(c); } static const struct super_operations bch_super_operations = { @@ -2671,7 +2633,7 @@ static int bch2_fs_reconfigure(struct fs_context *fc) opt_set(opts->opts, read_only, (fc->sb_flags & SB_RDONLY) != 0); if (opts->opts.read_only != c->opts.read_only) { - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); if (opts->opts.read_only) { bch2_fs_read_only(c); @@ -2681,22 +2643,18 @@ static int bch2_fs_reconfigure(struct fs_context *fc) ret = bch2_fs_read_write(c); if (ret) { bch_err(c, "error going rw: %i", ret); - up_write(&c->state_lock); - ret = -EINVAL; - goto err; + return -EINVAL; } sb->s_flags &= ~SB_RDONLY; } c->opts.read_only = opts->opts.read_only; - - up_write(&c->state_lock); } if (opt_defined(opts->opts, errors)) c->opts.errors = opts->opts.errors; -err: + return bch2_err_class(ret); } diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 471e93a3f00c..df0aa2522b18 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -53,10 +53,9 @@ static int dirent_points_to_inode(struct bch_fs *c, { int ret = dirent_points_to_inode_nowarn(c, dirent, inode); if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); dirent_inode_mismatch_msg(&buf, c, dirent, inode); bch_warn(c, "%s", buf.buf); - printbuf_exit(&buf); } return ret; } @@ -253,14 +252,13 @@ create_lostfound: * XXX: we could have a nicer log message here if we had a nice way to * walk backpointers to print a path */ - struct printbuf path = PRINTBUF; + CLASS(printbuf, path)(); ret = bch2_inum_to_path(trans, root_inum, &path); if (ret) goto err; bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u", path.buf, root_inum.subvol, snapshot); - printbuf_exit(&path); u64 now = bch2_current_time(c); u64 cpu = raw_smp_processor_id(); @@ -455,7 +453,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * * whiteouts for the dirent we just created. */ if (!inode->bi_subvol && bch2_snapshot_is_leaf(c, inode->bi_snapshot) <= 0) { - snapshot_id_list whiteouts_done; + CLASS(snapshot_id_list, whiteouts_done)(); struct btree_iter iter; struct bkey_s_c k; @@ -499,7 +497,6 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked * break; } } - darray_exit(&whiteouts_done); bch2_trans_iter_exit(trans, &iter); } @@ -683,11 +680,15 @@ static inline void snapshots_seen_exit(struct snapshots_seen *s) darray_exit(&s->ids); } -static inline void snapshots_seen_init(struct snapshots_seen *s) +static inline struct snapshots_seen snapshots_seen_init(void) { - memset(s, 0, sizeof(*s)); + return (struct snapshots_seen) {}; } +DEFINE_CLASS(snapshots_seen, struct snapshots_seen, + snapshots_seen_exit(&_T), + snapshots_seen_init(), void) + static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) { u32 *i; @@ -815,9 +816,13 @@ static void inode_walker_exit(struct inode_walker *w) static struct inode_walker inode_walker_init(void) { - return (struct inode_walker) { 0, }; + return (struct inode_walker) {}; } +DEFINE_CLASS(inode_walker, struct inode_walker, + inode_walker_exit(&_T), + inode_walker_init(), void) + static int add_inode(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c inode) { @@ -917,7 +922,7 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str if (!i) return NULL; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; if (fsck_err_on(k.k->p.snapshot != i->inode.bi_snapshot, @@ -967,10 +972,8 @@ lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, str goto fsck_err; } - printbuf_exit(&buf); return i; fsck_err: - printbuf_exit(&buf); return ERR_PTR(ret); } @@ -1004,27 +1007,25 @@ int bch2_fsck_update_backpointers(struct btree_trans *trans, return 0; struct bkey_i_dirent *d = bkey_i_to_dirent(new); - struct inode_walker target = inode_walker_init(); - int ret = 0; + CLASS(inode_walker, target)(); if (d->v.d_type == DT_SUBVOL) { bch_err(trans->c, "%s does not support DT_SUBVOL", __func__); - ret = -BCH_ERR_fsck_repair_unimplemented; + return bch_err_throw(trans->c, fsck_repair_unimplemented); } else { - ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); + int ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); if (ret) - goto err; + return ret; darray_for_each(target.inodes, i) { i->inode.bi_dir_offset = d->k.p.offset; ret = __bch2_fsck_write_inode(trans, &i->inode); if (ret) - goto err; + return ret; } + + return 0; } -err: - inode_walker_exit(&target); - return ret; } static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, @@ -1056,7 +1057,7 @@ static int check_inode_dirent_inode(struct btree_trans *trans, bool *write_inode) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); u32 inode_snapshot = inode->bi_snapshot; struct btree_iter dirent_iter = {}; @@ -1106,7 +1107,6 @@ out: ret = 0; fsck_err: bch2_trans_iter_exit(trans, &dirent_iter); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -1118,7 +1118,7 @@ static int check_inode(struct btree_trans *trans, struct snapshots_seen *s) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct bch_inode_unpacked u; bool do_update = false; int ret; @@ -1234,7 +1234,7 @@ static int check_inode(struct btree_trans *trans, */ ret = check_inode_deleted_list(trans, k.k->p); if (ret < 0) - goto err_noprint; + return ret; fsck_err_on(!ret, trans, unlinked_inode_not_on_deleted_list, @@ -1255,7 +1255,7 @@ static int check_inode(struct btree_trans *trans, u.bi_inum, u.bi_snapshot)) { ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck deleting inode"); - goto err_noprint; + return ret; } ret = 0; } @@ -1316,33 +1316,26 @@ do_update: ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - goto err_noprint; + return ret; } err: fsck_err: bch_err_fn(c, ret); -err_noprint: - printbuf_exit(&buf); return ret; } int bch2_check_inodes(struct bch_fs *c) { struct bch_inode_unpacked snapshot_root = {}; - struct snapshots_seen s; - snapshots_seen_init(&s); + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_inode(trans, &iter, k, &snapshot_root, &s))); - - snapshots_seen_exit(&s); - bch_err_fn(c, ret); - return ret; + check_inode(trans, &iter, k, &snapshot_root, &s)); } static int find_oldest_inode_needs_reattach(struct btree_trans *trans, @@ -1390,7 +1383,7 @@ static int check_unreachable_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; if (!bkey_is_inode(k.k)) @@ -1414,7 +1407,6 @@ static int check_unreachable_inode(struct btree_trans *trans, buf.buf))) ret = reattach_inode(trans, &inode); fsck_err: - printbuf_exit(&buf); return ret; } @@ -1430,14 +1422,12 @@ fsck_err: */ int bch2_check_unreachable_inodes(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_unreachable_inode(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_unreachable_inode(trans, &iter, k)); } static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) @@ -1461,7 +1451,7 @@ static int check_key_has_inode(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter iter2 = {}; int ret = PTR_ERR_OR_ZERO(i); if (ret) @@ -1557,7 +1547,6 @@ out: err: fsck_err: bch2_trans_iter_exit(trans, &iter2); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; delete: @@ -1627,23 +1616,28 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal if (i->inode.bi_sectors == i->count) continue; + CLASS(printbuf, buf)(); + lockrestart_do(trans, + bch2_inum_snapshot_to_path(trans, + i->inode.bi_inum, + i->inode.bi_snapshot, NULL, &buf)); + count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->inode.bi_snapshot); if (w->recalculate_sums) i->count = count2; if (i->count != count2) { - bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", - w->last_pos.inode, i->inode.bi_snapshot, i->count, count2); + bch_err_ratelimited(c, "fsck counted i_sectors wrong: got %llu should be %llu\n%s", + i->count, count2, buf.buf); i->count = count2; } if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty) && i->inode.bi_sectors != i->count, trans, inode_i_sectors_wrong, - "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", - w->last_pos.inode, i->inode.bi_snapshot, - i->inode.bi_sectors, i->count)) { + "incorrect i_sectors: got %llu, should be %llu\n%s", + i->inode.bi_sectors, i->count, buf.buf)) { i->inode.bi_sectors = i->count; ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) @@ -1686,11 +1680,15 @@ static void extent_ends_exit(struct extent_ends *extent_ends) darray_exit(&extent_ends->e); } -static void extent_ends_init(struct extent_ends *extent_ends) +static struct extent_ends extent_ends_init(void) { - memset(extent_ends, 0, sizeof(*extent_ends)); + return (struct extent_ends) {}; } +DEFINE_CLASS(extent_ends, struct extent_ends, + extent_ends_exit(&_T), + extent_ends_init(), void) + static int extent_ends_at(struct bch_fs *c, struct extent_ends *extent_ends, struct snapshots_seen *seen, @@ -1730,7 +1728,7 @@ static int overlapping_extents_found(struct btree_trans *trans, struct extent_end *extent_end) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter iter1, iter2 = {}; struct bkey_s_c k1, k2; int ret; @@ -1836,7 +1834,6 @@ fsck_err: err: bch2_trans_iter_exit(trans, &iter2); bch2_trans_iter_exit(trans, &iter1); - printbuf_exit(&buf); return ret; } @@ -1893,11 +1890,10 @@ static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *it bkey_for_each_crc(k.k, ptrs, crc, i) if (crc_is_encoded(crc) && crc.uncompressed_size > encoded_extent_max_sectors) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); - printbuf_exit(&buf); } return 0; @@ -1911,7 +1907,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct disk_reservation *res) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; ret = bch2_check_key_has_snapshot(trans, iter, k); @@ -2004,7 +2000,6 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, out: err: fsck_err: - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -2015,49 +2010,41 @@ fsck_err: */ int bch2_check_extents(struct bch_fs *c) { - struct inode_walker w = inode_walker_init(); - struct snapshots_seen s; - struct extent_ends extent_ends; struct disk_reservation res = { 0 }; - snapshots_seen_init(&s); - extent_ends_init(&extent_ends); + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); + CLASS(inode_walker, w)(); + CLASS(extent_ends, extent_ends)(); - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_extents, + int ret = for_each_btree_key(trans, iter, BTREE_ID_extents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ bch2_disk_reservation_put(c, &res); check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?: check_extent_overbig(trans, &iter, k); })) ?: - check_i_sectors_notnested(trans, &w)); + check_i_sectors_notnested(trans, &w); bch2_disk_reservation_put(c, &res); - extent_ends_exit(&extent_ends); - inode_walker_exit(&w); - snapshots_seen_exit(&s); - - bch_err_fn(c, ret); return ret; } int bch2_check_indirect_extents(struct bch_fs *c) { + CLASS(btree_trans, trans)(c); struct disk_reservation res = { 0 }; - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_prefetch, k, &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({ bch2_disk_reservation_put(c, &res); check_extent_overbig(trans, &iter, k); - }))); + })); bch2_disk_reservation_put(c, &res); - bch_err_fn(c, ret); return ret; } @@ -2150,7 +2137,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * u32 parent_snapshot; u32 new_parent_subvol = 0; u64 parent_inum; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); @@ -2274,7 +2261,6 @@ out: err: fsck_err: bch2_trans_iter_exit(trans, &subvol_iter); - printbuf_exit(&buf); return ret; } @@ -2288,39 +2274,37 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, { struct bch_fs *c = trans->c; struct inode_walker_entry *i; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; ret = bch2_check_key_has_snapshot(trans, iter, k); - if (ret) { - ret = ret < 0 ? ret : 0; - goto out; - } + if (ret) + return ret < 0 ? ret : 0; ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); if (ret) - goto err; + return ret; if (k.k->type == KEY_TYPE_whiteout) - goto out; + return 0; if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { ret = check_subdir_dirents_count(trans, dir); if (ret) - goto err; + return ret; } i = walk_inode(trans, dir, k); ret = PTR_ERR_OR_ZERO(i); - if (ret < 0) - goto err; + if (ret) + return ret; ret = check_key_has_inode(trans, iter, dir, i, k); if (ret) - goto err; + return ret; if (!i || i->whiteout) - goto out; + return 0; if (dir->first_this_inode) *hash_info = bch2_hash_info_init(c, &i->inode); @@ -2331,15 +2315,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k, need_second_pass); if (ret < 0) - goto err; - if (ret) { - /* dirent has been deleted */ - ret = 0; - goto out; - } - + return ret; + if (ret) + return 0; /* dirent has been deleted */ if (k.k->type != KEY_TYPE_dirent) - goto out; + return 0; struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); @@ -2364,13 +2344,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, d.v->d_type, &name, NULL, target); ret = PTR_ERR_OR_ZERO(new_d); if (ret) - goto out; + return ret; new_d->k.p.inode = d.k->p.inode; new_d->k.p.snapshot = d.k->p.snapshot; struct btree_iter dup_iter = {}; - ret = bch2_hash_delete_at(trans, + return bch2_hash_delete_at(trans, bch2_dirent_hash_desc, hash_info, iter, BTREE_UPDATE_internal_snapshot_node) ?: bch2_str_hash_repair_key(trans, s, @@ -2378,17 +2358,16 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, iter, bkey_i_to_s_c(&new_d->k_i), &dup_iter, bkey_s_c_null, need_second_pass); - goto out; } if (d.v->d_type == DT_SUBVOL) { ret = check_dirent_to_subvol(trans, iter, d); if (ret) - goto err; + return ret; } else { ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); if (ret) - goto err; + return ret; if (!target->inodes.nr) { ret = maybe_reconstruct_inum(trans, le64_to_cpu(d.v->d_inum), @@ -2405,13 +2384,13 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, buf.buf))) { ret = bch2_fsck_remove_dirent(trans, d.k->p); if (ret) - goto err; + return ret; } darray_for_each(target->inodes, i) { ret = bch2_check_dirent_target(trans, iter, d, &i->inode, true); if (ret) - goto err; + return ret; } darray_for_each(target->deletes, i) @@ -2434,24 +2413,27 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, BTREE_UPDATE_internal_snapshot_node); bch2_trans_iter_exit(trans, &delete_iter); if (ret) - goto err; + return ret; } } + /* + * Cannot access key values after doing a transaction commit without + * revalidating: + */ + bool have_dir = d.v->d_type == DT_DIR; + ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret) - goto err; + return ret; for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) { - if (d.v->d_type == DT_DIR) + if (have_dir) i->count++; i->i_size += bkey_bytes(d.k); } -out: -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -2461,23 +2443,21 @@ fsck_err: */ int bch2_check_dirents(struct bch_fs *c) { - struct inode_walker dir = inode_walker_init(); - struct inode_walker target = inode_walker_init(); - struct snapshots_seen s; struct bch_hash_info hash_info; + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); + CLASS(inode_walker, dir)(); + CLASS(inode_walker, target)(); bool need_second_pass = false, did_second_pass = false; int ret; - - snapshots_seen_init(&s); again: - ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, + ret = for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s, &need_second_pass)) ?: - check_subdir_count_notnested(trans, &dir)); + check_subdir_count_notnested(trans, &dir); if (!ret && need_second_pass && !did_second_pass) { bch_info(c, "check_dirents requires second pass"); @@ -2490,10 +2470,6 @@ again: ret = -EINVAL; } - snapshots_seen_exit(&s); - inode_walker_exit(&dir); - inode_walker_exit(&target); - bch_err_fn(c, ret); return ret; } @@ -2536,21 +2512,17 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, */ int bch2_check_xattrs(struct bch_fs *c) { - struct inode_walker inode = inode_walker_init(); struct bch_hash_info hash_info; - int ret = 0; + CLASS(btree_trans, trans)(c); + CLASS(inode_walker, inode)(); - ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_xattr(trans, &iter, k, &hash_info, &inode))); - - inode_walker_exit(&inode); - bch_err_fn(c, ret); + check_xattr(trans, &iter, k, &hash_info, &inode)); return ret; } @@ -2615,18 +2587,17 @@ fsck_err: /* Get root directory, create if it doesn't exist: */ int bch2_check_root(struct bch_fs *c) { - int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_root_trans(trans)); - bch_err_fn(c, ret); - return ret; + CLASS(btree_trans, trans)(c); + return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + check_root_trans(trans)); } static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; struct btree_iter parent_iter = {}; - darray_u32 subvol_path = {}; - struct printbuf buf = PRINTBUF; + CLASS(darray_u32, subvol_path)(); + CLASS(printbuf, buf)(); int ret = 0; if (k.k->type != KEY_TYPE_subvolume) @@ -2686,21 +2657,17 @@ static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, } fsck_err: err: - printbuf_exit(&buf); - darray_exit(&subvol_path); bch2_trans_iter_exit(trans, &parent_iter); return ret; } int bch2_check_subvolume_structure(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_subvol_path(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_subvol_path(trans, &iter, k)); } static int bch2_bi_depth_renumber_one(struct btree_trans *trans, @@ -2751,8 +2718,8 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) { struct bch_fs *c = trans->c; struct btree_iter inode_iter = {}; - darray_u64 path = {}; - struct printbuf buf = PRINTBUF; + CLASS(darray_u64, path)(); + CLASS(printbuf, buf)(); u32 snapshot = inode_k.k->p.snapshot; bool redo_bi_depth = false; u32 min_bi_depth = U32_MAX; @@ -2858,8 +2825,6 @@ static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); - darray_exit(&path); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -2870,8 +2835,8 @@ fsck_err: */ int bch2_check_directory_structure(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_intent| BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, @@ -2883,10 +2848,7 @@ int bch2_check_directory_structure(struct bch_fs *c) continue; check_path_loop(trans, k); - }))); - - bch_err_fn(c, ret); - return ret; + })); } struct nlink_table { @@ -2970,8 +2932,8 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, struct nlink_table *t, u64 start, u64 *end) { - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_inodes, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start), BTREE_ITER_intent| BTREE_ITER_prefetch| @@ -3006,7 +2968,7 @@ static int check_nlinks_find_hardlinks(struct bch_fs *c, break; } 0; - }))); + })); bch_err_fn(c, ret); return ret; @@ -3016,12 +2978,10 @@ noinline_for_stack static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { - struct snapshots_seen s; - - snapshots_seen_init(&s); + CLASS(btree_trans, trans)(c); + CLASS(snapshots_seen, s)(); - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, + int ret = for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, BTREE_ITER_intent| BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, ({ @@ -3038,9 +2998,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links le64_to_cpu(d.v->d_inum), d.k->p.snapshot); } 0; - }))); - - snapshots_seen_exit(&s); + })); bch_err_fn(c, ret); return ret; @@ -3094,14 +3052,14 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, struct nlink_table *links, u64 range_start, u64 range_end) { + CLASS(btree_trans, trans)(c); size_t idx = 0; - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS(0, range_start), BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); + check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end)); if (ret < 0) { bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); return ret; @@ -3140,7 +3098,6 @@ int bch2_check_nlinks(struct bch_fs *c) } while (next_iter_range_start != U64_MAX); kvfree(links.d); - bch_err_fn(c, ret); return ret; } @@ -3175,15 +3132,13 @@ int bch2_fix_reflink_p(struct bch_fs *c) if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) return 0; - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_extents, POS_MIN, BTREE_ITER_intent|BTREE_ITER_prefetch| BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - fix_reflink_p_key(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + fix_reflink_p_key(trans, &iter, k)); } #ifndef NO_BCACHEFS_CHARDEV @@ -3209,6 +3164,8 @@ static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) if (ret) return ret; + thr->c->recovery_task = current; + ret = bch2_fs_start(thr->c); if (ret) goto err; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 307fb0c95656..4a9725f30c4f 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -417,7 +417,8 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, int bch2_inode_find_by_inum(struct bch_fs *c, subvol_inum inum, struct bch_inode_unpacked *inode) { - return bch2_trans_do(c, bch2_inode_find_by_inum_trans(trans, inum, inode)); + CLASS(btree_trans, trans)(c); + return lockrestart_do(trans, bch2_inode_find_by_inum_trans(trans, inum, inode)); } int bch2_inode_find_snapshot_root(struct btree_trans *trans, u64 inum, @@ -1132,7 +1133,7 @@ err: int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter = {}; struct bkey_s_c k; struct bch_inode_unpacked inode; @@ -1141,7 +1142,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) ret = lockrestart_do(trans, may_delete_deleted_inum(trans, inum, &inode)); if (ret) - goto err2; + return ret; /* * If this was a directory, there shouldn't be any real dirents left - @@ -1156,7 +1157,7 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum) : bch2_inode_delete_keys(trans, inum, BTREE_ID_dirents)) ?: bch2_inode_delete_keys(trans, inum, BTREE_ID_xattrs); if (ret) - goto err2; + return ret; retry: bch2_trans_begin(trans); @@ -1188,12 +1189,9 @@ err: goto retry; if (ret) - goto err2; + return ret; - ret = delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot)); -err2: - bch2_trans_put(trans); - return ret; + return delete_ancestor_snapshot_inodes(trans, SPOS(0, inum.inum, snapshot)); } int bch2_inode_nlink_inc(struct bch_inode_unpacked *bi) @@ -1413,7 +1411,7 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, struct bch_fs *c = trans->c; struct btree_iter inode_iter; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, pos, BTREE_ITER_cached); @@ -1506,7 +1504,6 @@ static int may_delete_deleted_inode(struct btree_trans *trans, struct bpos pos, out: fsck_err: bch2_trans_iter_exit(trans, &inode_iter); - printbuf_exit(&buf); return ret; delete: ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, pos, false); @@ -1524,29 +1521,23 @@ static int may_delete_deleted_inum(struct btree_trans *trans, subvol_inum inum, int bch2_delete_dead_inodes(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); - int ret; - + CLASS(btree_trans, trans)(c); /* * if we ran check_inodes() unlinked inodes will have already been * cleaned up but the write buffer will be out of sync; therefore we * alway need a write buffer flush - */ - ret = bch2_btree_write_buffer_flush_sync(trans); - if (ret) - goto err; - - /* + * * Weird transaction restart handling here because on successful delete, * bch2_inode_rm_snapshot() will return a nested transaction restart, * but we can't retry because the btree write buffer won't have been * flushed and we'd spin: */ - ret = for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, + return bch2_btree_write_buffer_flush_sync(trans) ?: + for_each_btree_key_commit(trans, iter, BTREE_ID_deleted_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ struct bch_inode_unpacked inode; - ret = may_delete_deleted_inode(trans, k.k->p, &inode, true); + int ret = may_delete_deleted_inode(trans, k.k->p, &inode, true); if (ret > 0) { bch_verbose_ratelimited(c, "deleting unlinked inode %llu:%u", k.k->p.offset, k.k->p.snapshot); @@ -1567,8 +1558,4 @@ int bch2_delete_dead_inodes(struct bch_fs *c) ret; })); -err: - bch2_trans_put(trans); - bch_err_fn(c, ret); - return ret; } diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 07023667a475..5d6681c070ba 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -114,12 +114,11 @@ err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); if (should_print_err(ret)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, iter->pos.offset << 9)); prt_printf(&buf, "fallocate error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); } err_noprint: bch2_open_buckets_put(c, &open_buckets); @@ -222,23 +221,18 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, s64 *i_sectors_delta) { - struct btree_trans *trans = bch2_trans_get(c); - struct btree_iter iter; - int ret; + CLASS(btree_trans, trans)(c); + struct btree_iter iter; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, start), BTREE_ITER_intent); - ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); + int ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - ret = 0; - return ret; + return bch2_err_matches(ret, BCH_ERR_transaction_restart) ? 0 : ret; } /* truncate: */ @@ -319,17 +313,13 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ - down_read(&c->snapshot_create_lock); - struct btree_trans *trans = bch2_trans_get(c); + guard(rwsem_read)(&c->snapshot_create_lock); + CLASS(btree_trans, trans)(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) - goto out; + return ret; ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; -out: - bch2_trans_put(trans); - up_read(&c->snapshot_create_lock); - return ret; } @@ -555,16 +545,12 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ - down_read(&c->snapshot_create_lock); - struct btree_trans *trans = bch2_trans_get(c); + guard(rwsem_read)(&c->snapshot_create_lock); + CLASS(btree_trans, trans)(c); int ret = bch2_logged_op_start(trans, &op.k_i); if (ret) - goto out; + return ret; ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; -out: - bch2_trans_put(trans); - up_read(&c->snapshot_create_lock); - return ret; } diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index fa56ff67803c..b8ccd8c930e1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -39,38 +39,73 @@ MODULE_PARM_DESC(read_corrupt_ratio, ""); #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT +static inline u32 bch2_dev_congested_read(struct bch_dev *ca, u64 now) +{ + s64 congested = atomic_read(&ca->congested); + u64 last = READ_ONCE(ca->congested_last); + if (time_after64(now, last)) + congested -= (now - last) >> 12; + + return clamp(congested, 0LL, CONGESTED_MAX); +} + static bool bch2_target_congested(struct bch_fs *c, u16 target) { const struct bch_devs_mask *devs; unsigned d, nr = 0, total = 0; - u64 now = local_clock(), last; - s64 congested; - struct bch_dev *ca; - - if (!target) - return false; + u64 now = local_clock(); guard(rcu)(); devs = bch2_target_to_mask(c, target) ?: &c->rw_devs[BCH_DATA_user]; for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { - ca = rcu_dereference(c->devs[d]); + struct bch_dev *ca = rcu_dereference(c->devs[d]); if (!ca) continue; - congested = atomic_read(&ca->congested); - last = READ_ONCE(ca->congested_last); - if (time_after64(now, last)) - congested -= (now - last) >> 12; - - total += max(congested, 0LL); + total += bch2_dev_congested_read(ca, now); nr++; } return get_random_u32_below(nr * CONGESTED_MAX) < total; } +void bch2_dev_congested_to_text(struct printbuf *out, struct bch_dev *ca) +{ + printbuf_tabstop_push(out, 32); + + prt_printf(out, "current:\t%u%%\n", + bch2_dev_congested_read(ca, local_clock()) * + 100 / CONGESTED_MAX); + + prt_printf(out, "raw:\t%i/%u\n", atomic_read(&ca->congested), CONGESTED_MAX); + + prt_printf(out, "last io over threshold:\t"); + bch2_pr_time_units(out, local_clock() - ca->congested_last); + prt_newline(out); + + prt_printf(out, "read latency threshold:\t"); + bch2_pr_time_units(out, + ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(1)].m << 2); + prt_newline(out); + + prt_printf(out, "median read latency:\t"); + bch2_pr_time_units(out, + ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(7)].m); + prt_newline(out); + + prt_printf(out, "write latency threshold:\t"); + bch2_pr_time_units(out, + ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(1)].m << 3); + prt_newline(out); + + prt_printf(out, "median write latency:\t"); + bch2_pr_time_units(out, + ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(7)].m); + prt_newline(out); +} + #else static bool bch2_target_congested(struct bch_fs *c, u16 target) @@ -130,22 +165,32 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, if (!have_io_error(failed)) { BUG_ON(!opts.promote_target); - if (!(flags & BCH_READ_may_promote)) + if (!(flags & BCH_READ_may_promote)) { + count_event(c, io_read_nopromote_may_not); return bch_err_throw(c, nopromote_may_not); + } - if (bch2_bkey_has_target(c, k, opts.promote_target)) + if (bch2_bkey_has_target(c, k, opts.promote_target)) { + count_event(c, io_read_nopromote_already_promoted); return bch_err_throw(c, nopromote_already_promoted); + } - if (bkey_extent_is_unwritten(k)) + if (bkey_extent_is_unwritten(k)) { + count_event(c, io_read_nopromote_unwritten); return bch_err_throw(c, nopromote_unwritten); + } - if (bch2_target_congested(c, opts.promote_target)) + if (bch2_target_congested(c, opts.promote_target)) { + count_event(c, io_read_nopromote_congested); return bch_err_throw(c, nopromote_congested); + } } if (rhashtable_lookup_fast(&c->promote_table, &pos, - bch_promote_params)) + bch_promote_params)) { + count_event(c, io_read_nopromote_in_flight); return bch_err_throw(c, nopromote_in_flight); + } return 0; } @@ -160,6 +205,7 @@ static noinline void promote_free(struct bch_read_bio *rbio) BUG_ON(ret); async_object_list_del(c, promote, op->list_idx); + async_object_list_del(c, rbio, rbio->list_idx); bch2_data_update_exit(&op->write); @@ -356,12 +402,14 @@ nopromote: return NULL; } -void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op) +void bch2_promote_op_to_text(struct printbuf *out, + struct bch_fs *c, + struct promote_op *op) { if (!op->write.read_done) { prt_printf(out, "parent read: %px\n", op->write.rbio.parent); printbuf_indent_add(out, 2); - bch2_read_bio_to_text(out, op->write.rbio.parent); + bch2_read_bio_to_text(out, c, op->write.rbio.parent); printbuf_indent_sub(out, 2); } @@ -389,7 +437,8 @@ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *o static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, struct bch_read_bio *rbio, struct bpos read_pos) { - bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); + CLASS(btree_trans, trans)(c); + bch2_read_err_msg_trans(trans, out, rbio, read_pos); } enum rbio_context { @@ -459,6 +508,10 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) if (rbio->start_time) bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], rbio->start_time); +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + if (rbio->list_idx) + async_object_list_del(rbio->c, rbio, rbio->list_idx); +#endif bio_endio(&rbio->bio); } @@ -586,7 +639,7 @@ static void bch2_rbio_retry(struct work_struct *work) }; struct bch_io_failures failed = { .nr = 0 }; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bkey_buf sk; bch2_bkey_buf_init(&sk); @@ -628,7 +681,7 @@ static void bch2_rbio_retry(struct work_struct *work) } if (failed.nr || ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); lockrestart_do(trans, @@ -656,12 +709,10 @@ static void bch2_rbio_retry(struct work_struct *work) bch2_io_failures_to_text(&buf, c, &failed); bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } bch2_rbio_done(rbio); bch2_bkey_buf_exit(&sk, c); - bch2_trans_put(trans); } static void bch2_rbio_error(struct bch_read_bio *rbio, @@ -749,8 +800,9 @@ out: static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { - bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_rbio_narrow_crcs(trans, rbio)); + CLASS(btree_trans, trans)(rbio->c); + commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_rbio_narrow_crcs(trans, rbio)); } static void bch2_read_decompress_err(struct work_struct *work) @@ -758,7 +810,7 @@ static void bch2_read_decompress_err(struct work_struct *work) struct bch_read_bio *rbio = container_of(work, struct bch_read_bio, work); struct bch_fs *c = rbio->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); prt_str(&buf, "decompression error"); @@ -770,7 +822,6 @@ static void bch2_read_decompress_err(struct work_struct *work) bch_err_ratelimited(c, "%s", buf.buf); bch2_rbio_error(rbio, -BCH_ERR_data_read_decompress_err, BLK_STS_IOERR); - printbuf_exit(&buf); } static void bch2_read_decrypt_err(struct work_struct *work) @@ -778,7 +829,7 @@ static void bch2_read_decrypt_err(struct work_struct *work) struct bch_read_bio *rbio = container_of(work, struct bch_read_bio, work); struct bch_fs *c = rbio->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); prt_str(&buf, "decrypt error"); @@ -790,7 +841,6 @@ static void bch2_read_decrypt_err(struct work_struct *work) bch_err_ratelimited(c, "%s", buf.buf); bch2_rbio_error(rbio, -BCH_ERR_data_read_decrypt_err, BLK_STS_IOERR); - printbuf_exit(&buf); } /* Inner part that may run in process context */ @@ -972,7 +1022,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter iter; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, @@ -1009,7 +1059,6 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, bch2_fs_inconsistent(c, "%s", buf.buf); bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); } int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, @@ -1061,25 +1110,22 @@ retry_pick: trace_and_count(c, io_read_fail_and_poison, &orig->bio); } - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "%s\n ", bch2_err_str(ret)); bch2_bkey_val_to_text(&buf, c, k); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); goto err; } if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20_key_set) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); bch2_bkey_val_to_text(&buf, c, k); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, data_read_no_encryption_key); goto err; } @@ -1448,13 +1494,12 @@ err: if (unlikely(ret)) { if (ret != -BCH_ERR_extent_poisoned) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9)); prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); } rbio->bio.bi_status = BLK_STS_IOERR; @@ -1476,19 +1521,34 @@ static const char * const bch2_read_bio_flags[] = { NULL }; -void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) +void bch2_read_bio_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_read_bio *rbio) { + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); + + bch2_read_err_msg(c, out, rbio, rbio->read_pos); + prt_newline(out); + + /* Are we in a retry? */ + + printbuf_indent_add(out, 2); + u64 now = local_clock(); - prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0); - prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0); + prt_printf(out, "start_time:\t"); + bch2_pr_time_units(out, max_t(s64, 0, now - rbio->start_time)); + prt_newline(out); + + prt_printf(out, "submit_time:\t"); + bch2_pr_time_units(out, max_t(s64, 0, now - rbio->submit_time)); + prt_newline(out); if (!rbio->split) prt_printf(out, "end_io:\t%ps\n", rbio->end_io); else prt_printf(out, "parent:\t%px\n", rbio->parent); - prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io); - prt_printf(out, "promote:\t%u\n", rbio->promote); prt_printf(out, "bounce:\t%u\n", rbio->bounce); prt_printf(out, "split:\t%u\n", rbio->split); @@ -1507,6 +1567,7 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) prt_newline(out); bch2_bio_to_text(out, &rbio->bio); + printbuf_indent_sub(out, 2); } void bch2_fs_io_read_exit(struct bch_fs *c) diff --git a/fs/bcachefs/io_read.h b/fs/bcachefs/io_read.h index cfc8ef35b14d..9d63d5914b20 100644 --- a/fs/bcachefs/io_read.h +++ b/fs/bcachefs/io_read.h @@ -7,6 +7,10 @@ #include "extents_types.h" #include "reflink.h" +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT +void bch2_dev_congested_to_text(struct printbuf *, struct bch_dev *); +#endif + struct bch_read_bio { struct bch_fs *c; u64 start_time; @@ -161,11 +165,11 @@ static inline void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, rbio->subvol = inum.subvol; - bch2_trans_run(c, - __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL, - BCH_READ_retry_if_stale| - BCH_READ_may_promote| - BCH_READ_user_mapped)); + CLASS(btree_trans, trans)(c); + __bch2_read(trans, rbio, rbio->bio.bi_iter, inum, NULL, NULL, + BCH_READ_retry_if_stale| + BCH_READ_may_promote| + BCH_READ_user_mapped); } static inline struct bch_read_bio *rbio_init_fragment(struct bio *bio, @@ -207,8 +211,8 @@ static inline struct bch_read_bio *rbio_init(struct bio *bio, } struct promote_op; -void bch2_promote_op_to_text(struct printbuf *, struct promote_op *); -void bch2_read_bio_to_text(struct printbuf *, struct bch_read_bio *); +void bch2_promote_op_to_text(struct printbuf *, struct bch_fs *, struct promote_op *); +void bch2_read_bio_to_text(struct printbuf *, struct bch_fs *, struct bch_read_bio *); void bch2_fs_io_read_exit(struct bch_fs *); int bch2_fs_io_read_init(struct bch_fs *); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index fa077341d2ef..d7620138e038 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -55,14 +55,9 @@ static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency, s64 latency_over = io_latency - latency_threshold; if (latency_threshold && latency_over > 0) { - /* - * bump up congested by approximately latency_over * 4 / - * latency_threshold - we don't need much accuracy here so don't - * bother with the divide: - */ if (atomic_read(&ca->congested) < CONGESTED_MAX) - atomic_add(latency_over >> - max_t(int, ilog2(latency_threshold) - 2, 0), + atomic_add((u32) min(U32_MAX, io_latency * 2) / + (u32) min(U32_MAX, latency_threshold), &ca->congested); ca->congested_last = now; @@ -261,7 +256,7 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, s64 bi_sectors = le64_to_cpu(inode->v.bi_sectors); if (unlikely(bi_sectors + i_sectors_delta < 0)) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "inode %llu i_sectors underflow: %lli + %lli < 0", extent_iter->pos.inode, bi_sectors, i_sectors_delta); @@ -269,7 +264,6 @@ static inline int bch2_extent_update_i_size_sectors(struct btree_trans *trans, bool print = bch2_count_fsck_err(c, inode_i_sectors_underflow, &buf); if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); if (i_sectors_delta < 0) i_sectors_delta = -bi_sectors; @@ -375,7 +369,6 @@ static int bch2_write_index_default(struct bch_write_op *op) struct bkey_buf sk; struct keylist *keys = &op->insert_keys; struct bkey_i *k = bch2_keylist_front(keys); - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; subvol_inum inum = { .subvol = op->subvol, @@ -385,6 +378,7 @@ static int bch2_write_index_default(struct bch_write_op *op) BUG_ON(!inum.subvol); + CLASS(btree_trans, trans)(c); bch2_bkey_buf_init(&sk); do { @@ -421,7 +415,6 @@ static int bch2_write_index_default(struct bch_write_op *op) bch2_cut_front(iter.pos, k); } while (!bch2_keylist_empty(keys)); - bch2_trans_put(trans); bch2_bkey_buf_exit(&sk, c); return ret; @@ -431,7 +424,7 @@ static int bch2_write_index_default(struct bch_write_op *op) void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, ...) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (op->subvol) { bch2_inum_offset_err_msg(op->c, &buf, @@ -458,7 +451,6 @@ void bch2_write_op_error(struct bch_write_op *op, u64 offset, const char *fmt, . } bch_err_ratelimited(op->c, "%s", buf.buf); - printbuf_exit(&buf); } void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, @@ -470,8 +462,8 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, struct bch_write_bio *n; unsigned ref_rw = type == BCH_DATA_btree ? READ : WRITE; unsigned ref_idx = type == BCH_DATA_btree - ? BCH_DEV_READ_REF_btree_node_write - : BCH_DEV_WRITE_REF_io_write; + ? (unsigned) BCH_DEV_READ_REF_btree_node_write + : (unsigned) BCH_DEV_WRITE_REF_io_write; BUG_ON(c->opts.nochanges); @@ -1223,6 +1215,7 @@ static bool bch2_extent_is_writeable(struct bch_write_op *op, static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, struct btree_iter *iter, + struct bch_write_op *op, struct bkey_i *orig, struct bkey_s_c k, u64 new_i_size) @@ -1232,11 +1225,13 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, return 0; } - struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); + struct bkey_i *new = bch2_trans_kmalloc_nomemzero(trans, + bkey_bytes(k.k) + sizeof(struct bch_extent_rebalance)); int ret = PTR_ERR_OR_ZERO(new); if (ret) return ret; + bkey_reassemble(new, k); bch2_cut_front(bkey_start_pos(&orig->k), new); bch2_cut_back(orig->k.p, new); @@ -1244,6 +1239,8 @@ static int bch2_nocow_write_convert_one_unwritten(struct btree_trans *trans, bkey_for_each_ptr(ptrs, ptr) ptr->unwritten = 0; + bch2_bkey_set_needs_rebalance(op->c, &op->opts, new); + /* * Note that we're not calling bch2_subvol_get_snapshot() in this path - * that was done when we kicked off the write, and here it's important @@ -1268,7 +1265,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) bkey_start_pos(&orig->k), orig->k.p, BTREE_ITER_intent, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ - bch2_nocow_write_convert_one_unwritten(trans, &iter, orig, k, op->new_i_size); + bch2_nocow_write_convert_one_unwritten(trans, &iter, op, orig, k, op->new_i_size); })); if (ret) break; @@ -1477,7 +1474,7 @@ err_bucket_stale: break; } - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (bch2_fs_inconsistent_on(stale < 0, c, "pointer to invalid bucket in nocow path on device %llu\n %s", stale_at->b.inode, @@ -1487,7 +1484,6 @@ err_bucket_stale: /* We can retry this: */ ret = bch_err_throw(c, transaction_restart); } - printbuf_exit(&buf); goto err_get_ioref; } @@ -1531,7 +1527,7 @@ again: * freeing up space on specific disks, which means that * allocations for specific disks may hang arbitrarily long: */ - ret = bch2_trans_run(c, lockrestart_do(trans, + ret = bch2_trans_do(c, bch2_alloc_sectors_start_trans(trans, op->target, op->opts.erasure_code && !(op->flags & BCH_WRITE_cached), @@ -1541,7 +1537,7 @@ again: op->nr_replicas_required, op->watermark, op->flags, - &op->cl, &wp))); + &op->cl, &wp)); if (unlikely(ret)) { if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) break; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 93794d276a7f..3ba1f9fd3402 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -88,7 +88,7 @@ static void bch2_journal_buf_to_text(struct printbuf *out, struct journal *j, u6 static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) { lockdep_assert_held(&j->lock); - out->atomic++; + guard(printbuf_atomic)(out); if (!out->nr_tabstops) printbuf_tabstop_push(out, 24); @@ -98,8 +98,6 @@ static void bch2_journal_bufs_to_text(struct printbuf *out, struct journal *j) seq++) bch2_journal_buf_to_text(out, j, seq); prt_printf(out, "last buf %s\n", journal_entry_is_open(j) ? "open" : "closed"); - - --out->atomic; } static inline struct journal_buf * @@ -140,9 +138,9 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) { struct bch_fs *c = container_of(j, struct bch_fs, journal); bool stuck = false; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); - buf.atomic++; + guard(printbuf_atomic)(&buf); if (!(error == -BCH_ERR_journal_full || error == -BCH_ERR_journal_pin_full) || @@ -150,28 +148,24 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) return stuck; - spin_lock(&j->lock); + scoped_guard(spinlock, &j->lock) { + if (j->can_discard) + return stuck; - if (j->can_discard) { - spin_unlock(&j->lock); - return stuck; - } + stuck = true; - stuck = true; + /* + * The journal shutdown path will set ->err_seq, but do it here first to + * serialize against concurrent failures and avoid duplicate error + * reports. + */ + if (j->err_seq) + return stuck; - /* - * The journal shutdown path will set ->err_seq, but do it here first to - * serialize against concurrent failures and avoid duplicate error - * reports. - */ - if (j->err_seq) { - spin_unlock(&j->lock); - return stuck; - } - j->err_seq = journal_cur_seq(j); + j->err_seq = journal_cur_seq(j); - __bch2_journal_debug_to_text(&buf, j); - spin_unlock(&j->lock); + __bch2_journal_debug_to_text(&buf, j); + } prt_printf(&buf, bch2_fmt(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)"), bch2_err_str(error)); bch2_print_str(c, KERN_ERR, buf.buf); @@ -179,7 +173,6 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) printbuf_reset(&buf); bch2_journal_pins_to_text(&buf, j); bch_err(c, "Journal pins:\n%s", buf.buf); - printbuf_exit(&buf); bch2_fatal_error(c); dump_stack(); @@ -269,22 +262,21 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t buf->data->u64s = cpu_to_le32(old.cur_entry_offset); if (trace_journal_entry_close_enabled() && trace) { - struct printbuf pbuf = PRINTBUF; - pbuf.atomic++; - - prt_str(&pbuf, "entry size: "); - prt_human_readable_u64(&pbuf, vstruct_bytes(buf->data)); - prt_newline(&pbuf); - bch2_prt_task_backtrace(&pbuf, current, 1, GFP_NOWAIT); - trace_journal_entry_close(c, pbuf.buf); - printbuf_exit(&pbuf); + CLASS(printbuf, err)(); + guard(printbuf_atomic)(&err); + + prt_str(&err, "entry size: "); + prt_human_readable_u64(&err, vstruct_bytes(buf->data)); + prt_newline(&err); + bch2_prt_task_backtrace(&err, current, 1, GFP_NOWAIT); + trace_journal_entry_close(c, err.buf); } sectors = vstruct_blocks_plus(buf->data, c->block_bits, buf->u64s_reserved) << c->block_bits; if (unlikely(sectors > buf->sectors)) { - struct printbuf err = PRINTBUF; - err.atomic++; + CLASS(printbuf, err)(); + guard(printbuf_atomic)(&err); prt_printf(&err, "journal entry overran reserved space: %u > %u\n", sectors, buf->sectors); @@ -296,7 +288,6 @@ static void __journal_entry_close(struct journal *j, unsigned closed_val, bool t bch2_journal_halt_locked(j); bch_err(c, "%s", err.buf); - printbuf_exit(&err); return; } @@ -344,9 +335,8 @@ void bch2_journal_halt_locked(struct journal *j) void bch2_journal_halt(struct journal *j) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_halt_locked(j); - spin_unlock(&j->lock); } static bool journal_entry_want_write(struct journal *j) @@ -371,13 +361,8 @@ static bool journal_entry_want_write(struct journal *j) bool bch2_journal_entry_close(struct journal *j) { - bool ret; - - spin_lock(&j->lock); - ret = journal_entry_want_write(j); - spin_unlock(&j->lock); - - return ret; + guard(spinlock)(&j->lock); + return journal_entry_want_write(j); } /* @@ -394,7 +379,7 @@ static int journal_entry_open(struct journal *j) lockdep_assert_held(&j->lock); BUG_ON(journal_entry_is_open(j)); - BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb)); + BUG_ON(c->sb.clean); if (j->blocked) return bch_err_throw(c, journal_blocked); @@ -542,7 +527,7 @@ static void journal_write_work(struct work_struct *work) { struct journal *j = container_of(work, struct journal, write_work.work); - spin_lock(&j->lock); + guard(spinlock)(&j->lock); if (__journal_entry_is_open(j->reservations)) { long delta = journal_cur_buf(j)->expires - jiffies; @@ -551,7 +536,6 @@ static void journal_write_work(struct work_struct *work) else __journal_entry_close(j, JOURNAL_ENTRY_CLOSED_VAL, true); } - spin_unlock(&j->lock); } static void journal_buf_prealloc(struct journal *j) @@ -652,34 +636,32 @@ out: if (ret == -BCH_ERR_journal_max_in_flight && track_event_change(&c->times[BCH_TIME_blocked_journal_max_in_flight], true) && trace_journal_entry_full_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_printbuf_make_room(&buf, 4096); - spin_lock(&j->lock); - prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); - bch2_journal_bufs_to_text(&buf, j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); + bch2_journal_bufs_to_text(&buf, j); + } trace_journal_entry_full(c, buf.buf); - printbuf_exit(&buf); count_event(c, journal_entry_full); } if (ret == -BCH_ERR_journal_max_open && track_event_change(&c->times[BCH_TIME_blocked_journal_max_open], true) && trace_journal_entry_full_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_printbuf_make_room(&buf, 4096); - spin_lock(&j->lock); - prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); - bch2_journal_bufs_to_text(&buf, j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + prt_printf(&buf, "seq %llu\n", journal_cur_seq(j)); + bch2_journal_bufs_to_text(&buf, j); + } trace_journal_entry_full(c, buf.buf); - printbuf_exit(&buf); count_event(c, journal_entry_full); } @@ -751,11 +733,10 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, remaining_wait)) return ret; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_journal_debug_to_text(&buf, j); bch2_print_str(c, KERN_ERR, buf.buf); prt_printf(&buf, bch2_fmt(c, "Journal stuck? Waited for 10 seconds, err %s"), bch2_err_str(ret)); - printbuf_exit(&buf); closure_wait_event(&j->async_wait, !bch2_err_matches(ret = __journal_res_get(j, res, flags), BCH_ERR_operation_blocked) || @@ -772,11 +753,13 @@ void bch2_journal_entry_res_resize(struct journal *j, union journal_res_state state; int d = new_u64s - res->u64s; - spin_lock(&j->lock); + guard(spinlock)(&j->lock); + + j->entry_u64s_reserved += d; + res->u64s += d; - j->entry_u64s_reserved += d; if (d <= 0) - goto out; + return; j->cur_entry_u64s = max_t(int, 0, j->cur_entry_u64s - d); state = READ_ONCE(j->reservations); @@ -791,9 +774,6 @@ void bch2_journal_entry_res_resize(struct journal *j, } else { journal_cur_buf(j)->u64s_reserved += d; } -out: - spin_unlock(&j->lock); - res->u64s += d; } /* journal flushing: */ @@ -944,7 +924,6 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) { struct bch_fs *c = container_of(j, struct bch_fs, journal); u64 unwritten_seq; - bool ret = false; if (!(c->sb.features & (1ULL << BCH_FEATURE_journal_no_flush))) return false; @@ -952,9 +931,10 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) if (c->journal.flushed_seq_ondisk >= start) return false; - spin_lock(&j->lock); + guard(spinlock)(&j->lock); + if (c->journal.flushed_seq_ondisk >= start) - goto out; + return false; for (unwritten_seq = journal_last_unwritten_seq(j); unwritten_seq < end; @@ -963,15 +943,12 @@ bool bch2_journal_noflush_seq(struct journal *j, u64 start, u64 end) /* journal flush already in flight, or flush requseted */ if (buf->must_flush) - goto out; + return false; buf->noflush = true; } - ret = true; -out: - spin_unlock(&j->lock); - return ret; + return true; } static int __bch2_journal_meta(struct journal *j) @@ -1010,19 +987,18 @@ int bch2_journal_meta(struct journal *j) void bch2_journal_unblock(struct journal *j) { - spin_lock(&j->lock); - if (!--j->blocked && - j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && - j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { - union journal_res_state old, new; - - old.v = atomic64_read(&j->reservations.counter); - do { - new.v = old.v; - new.cur_entry_offset = j->cur_entry_offset_if_blocked; - } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); - } - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + if (!--j->blocked && + j->cur_entry_offset_if_blocked < JOURNAL_ENTRY_CLOSED_VAL && + j->reservations.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL) { + union journal_res_state old, new; + + old.v = atomic64_read(&j->reservations.counter); + do { + new.v = old.v; + new.cur_entry_offset = j->cur_entry_offset_if_blocked; + } while (!atomic64_try_cmpxchg(&j->reservations.counter, &old.v, new.v)); + } journal_wake(j); } @@ -1050,9 +1026,8 @@ static void __bch2_journal_block(struct journal *j) void bch2_journal_block(struct journal *j) { - spin_lock(&j->lock); - __bch2_journal_block(j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + __bch2_journal_block(j); journal_quiesce(j); } @@ -1065,7 +1040,7 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou /* We're inside wait_event(), but using mutex_lock(: */ sched_annotate_sleep(); mutex_lock(&j->buf_lock); - spin_lock(&j->lock); + guard(spinlock)(&j->lock); max_seq = min(max_seq, journal_cur_seq(j)); for (u64 seq = journal_last_unwritten_seq(j); @@ -1092,7 +1067,6 @@ static struct journal_buf *__bch2_next_write_buffer_flush_journal_buf(struct jou } } - spin_unlock(&j->lock); if (IS_ERR_OR_NULL(ret)) mutex_unlock(&j->buf_lock); return ret; @@ -1147,16 +1121,14 @@ static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr, if (ret) break; - if (!new_fs) { - ret = bch2_trans_run(c, - bch2_trans_mark_metadata_bucket(trans, ca, - ob[nr_got]->bucket, BCH_DATA_journal, - ca->mi.bucket_size, BTREE_TRIGGER_transactional)); - if (ret) { - bch2_open_bucket_put(c, ob[nr_got]); - bch_err_msg(c, ret, "marking new journal buckets"); - break; - } + CLASS(btree_trans, trans)(c); + ret = bch2_trans_mark_metadata_bucket(trans, ca, + ob[nr_got]->bucket, BCH_DATA_journal, + ca->mi.bucket_size, BTREE_TRIGGER_transactional); + if (ret) { + bch2_open_bucket_put(c, ob[nr_got]); + bch_err_msg(c, ret, "marking new journal buckets"); + break; } bu[nr_got] = ob[nr_got]->bucket; @@ -1226,12 +1198,13 @@ err_unblock: mutex_unlock(&c->sb_lock); } - if (ret && !new_fs) + if (ret) { + CLASS(btree_trans, trans)(c); for (i = 0; i < nr_got; i++) - bch2_trans_run(c, - bch2_trans_mark_metadata_bucket(trans, ca, + bch2_trans_mark_metadata_bucket(trans, ca, bu[i], BCH_DATA_free, 0, - BTREE_TRIGGER_transactional)); + BTREE_TRIGGER_transactional); + } err_free: for (i = 0; i < nr_got; i++) bch2_open_bucket_put(c, ob[i]); @@ -1296,10 +1269,8 @@ static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, unsigned nr) { - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false); - up_write(&c->state_lock); - bch_err_fn(c, ret); return ret; } @@ -1423,21 +1394,18 @@ int bch2_fs_journal_alloc(struct bch_fs *c) static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) { - bool ret = false; - u64 seq; + guard(spinlock)(&j->lock); - spin_lock(&j->lock); - for (seq = journal_last_unwritten_seq(j); - seq <= journal_cur_seq(j) && !ret; + for (u64 seq = journal_last_unwritten_seq(j); + seq <= journal_cur_seq(j); seq++) { struct journal_buf *buf = journal_seq_to_buf(j, seq); if (bch2_bkey_has_device_c(bkey_i_to_s_c(&buf->key), dev_idx)) - ret = true; + return true; } - spin_unlock(&j->lock); - return ret; + return false; } void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) @@ -1556,13 +1524,11 @@ int bch2_fs_journal_start(struct journal *j, u64 last_seq, u64 cur_seq) if (!had_entries) j->last_empty_seq = cur_seq - 1; /* to match j->seq */ - spin_lock(&j->lock); - j->last_flush_write = jiffies; - - j->reservations.idx = journal_cur_seq(j); - - c->last_bucket_seq_cleanup = journal_cur_seq(j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + j->last_flush_write = jiffies; + j->reservations.idx = journal_cur_seq(j); + c->last_bucket_seq_cleanup = journal_cur_seq(j); + } return 0; } @@ -1573,13 +1539,12 @@ void bch2_journal_set_replay_done(struct journal *j) * journal_space_available must happen before setting JOURNAL_running * JOURNAL_running must happen before JOURNAL_replay_done */ - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_space_available(j); set_bit(JOURNAL_need_flush_write, &j->flags); set_bit(JOURNAL_running, &j->flags); set_bit(JOURNAL_replay_done, &j->flags); - spin_unlock(&j->lock); } /* init/exit: */ @@ -1736,9 +1701,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) printbuf_tabstops_reset(out); printbuf_tabstop_push(out, 28); - out->atomic++; + guard(printbuf_atomic)(out); guard(rcu)(); + s = READ_ONCE(j->reservations); prt_printf(out, "flags:\t"); @@ -1828,13 +1794,10 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) } prt_printf(out, "replicas want %u need %u\n", c->opts.metadata_replicas, c->opts.metadata_replicas_required); - - --out->atomic; } void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); __bch2_journal_debug_to_text(out, j); - spin_unlock(&j->lock); } diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index 977907038d98..b46b9718d841 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -297,9 +297,8 @@ static inline void bch2_journal_buf_put(struct journal *j, u64 seq) s = journal_state_buf_put(j, idx); if (!journal_state_count(s, idx)) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_buf_put_final(j, seq); - spin_unlock(&j->lock); } else if (unlikely(s.cur_entry_offset == JOURNAL_ENTRY_BLOCKED_VAL)) wake_up(&j->wait); } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2d6ce4348a22..94a065ee0b40 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -35,7 +35,8 @@ void bch2_journal_pos_from_member_info_set(struct bch_fs *c) void bch2_journal_pos_from_member_info_resume(struct bch_fs *c) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); + for_each_member_device(c, ca) { struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); @@ -46,16 +47,14 @@ void bch2_journal_pos_from_member_info_resume(struct bch_fs *c) if (offset <= ca->mi.bucket_size) ca->journal.sectors_free = ca->mi.bucket_size - offset; } - mutex_unlock(&c->sb_lock); } static void bch2_journal_ptr_to_text(struct printbuf *out, struct bch_fs *c, struct journal_ptr *p) { - struct bch_dev *ca = bch2_dev_tryget_noerror(c, p->dev); + CLASS(bch2_dev_tryget_noerror, ca)(c, p->dev); prt_printf(out, "%s %u:%u:%u (sector %llu)", ca ? ca->name : "(invalid dev)", p->dev, p->bucket, p->bucket_offset, p->sector); - bch2_dev_put(ca); } void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct journal_replay *j) @@ -157,7 +156,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, struct journal_replay **_i, *i, *dup; size_t bytes = vstruct_bytes(j); u64 last_seq = !JSET_NO_FLUSH(j) ? le64_to_cpu(j->last_seq) : 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = JOURNAL_ENTRY_ADD_OK; if (last_seq && c->opts.journal_rewind) @@ -223,7 +222,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, ret = darray_push(&dup->ptrs, entry_ptr); if (ret) - goto out; + return ret; bch2_journal_replay_to_text(&buf, c, dup); @@ -240,7 +239,7 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca, if (entry_ptr.csum_good && !identical) goto replace; - goto out; + return ret; } replace: i = kvmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL); @@ -263,9 +262,7 @@ replace: } *_i = i; -out: fsck_err: - printbuf_exit(&buf); return ret; } @@ -312,7 +309,7 @@ static void journal_entry_err_msg(struct printbuf *out, #define journal_entry_err(c, version, jset, entry, _err, msg, ...) \ ({ \ - struct printbuf _buf = PRINTBUF; \ + CLASS(printbuf, _buf)(); \ \ journal_entry_err_msg(&_buf, version, jset, entry); \ prt_printf(&_buf, msg, ##__VA_ARGS__); \ @@ -331,7 +328,6 @@ static void journal_entry_err_msg(struct printbuf *out, break; \ } \ \ - printbuf_exit(&_buf); \ true; \ }) @@ -617,7 +613,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); - struct printbuf err = PRINTBUF; + CLASS(printbuf, err)(); int ret = 0; if (journal_entry_err_on(bytes < sizeof(*u) || @@ -626,7 +622,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, journal_entry_data_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); - goto out; + return 0; } if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c, &err), @@ -634,11 +630,9 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, journal_entry_data_usage_bad_size, "invalid journal entry usage: %s", err.buf)) { journal_entry_null_range(entry, vstruct_next(entry)); - goto out; + return 0; } -out: fsck_err: - printbuf_exit(&err); return ret; } @@ -1165,17 +1159,16 @@ reread: vstruct_end(j) - (void *) j->encrypted_start); bch2_fs_fatal_err_on(ret, c, "decrypting journal entry: %s", bch2_err_str(ret)); - mutex_lock(&jlist->lock); - ret = journal_entry_add(c, ca, (struct journal_ptr) { - .csum_good = csum_good, - .csum = csum, - .dev = ca->dev_idx, - .bucket = bucket, - .bucket_offset = offset - - bucket_to_sector(ca, ja->buckets[bucket]), - .sector = offset, - }, jlist, j); - mutex_unlock(&jlist->lock); + scoped_guard(mutex, &jlist->lock) + ret = journal_entry_add(c, ca, (struct journal_ptr) { + .csum_good = csum_good, + .csum = csum, + .dev = ca->dev_idx, + .bucket = bucket, + .bucket_offset = offset - + bucket_to_sector(ca, ja->buckets[bucket]), + .sector = offset, + }, jlist, j); switch (ret) { case JOURNAL_ENTRY_ADD_OK: @@ -1235,16 +1228,15 @@ out: closure_return(cl); return; err: - mutex_lock(&jlist->lock); - jlist->ret = ret; - mutex_unlock(&jlist->lock); + scoped_guard(mutex, &jlist->lock) + jlist->ret = ret; goto out; } noinline_for_stack static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_replay *j) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); enum bch_csum_type csum_type = JSET_CSUM_TYPE(&j->j); @@ -1271,7 +1263,6 @@ static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_r prt_printf(&buf, "\n(had good copy on another device)"); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end) @@ -1299,7 +1290,6 @@ struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u noinline_for_stack static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 end_seq) { - struct printbuf buf = PRINTBUF; int ret = 0; struct genradix_iter radix_iter; @@ -1318,7 +1308,7 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e struct u64_range missing; while ((missing = bch2_journal_entry_missing_range(c, seq, le64_to_cpu(i->j.seq))).start) { - printbuf_reset(&buf); + CLASS(printbuf, buf)(); prt_printf(&buf, "journal entries %llu-%llu missing! (replaying %llu-%llu)", missing.start, missing.end - 1, start_seq, end_seq); @@ -1342,7 +1332,6 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e seq = le64_to_cpu(i->j.seq) + 1; } fsck_err: - printbuf_exit(&buf); return ret; } @@ -1354,7 +1343,6 @@ int bch2_journal_read(struct bch_fs *c, struct journal_list jlist; struct journal_replay *i, **_i; struct genradix_iter radix_iter; - struct printbuf buf = PRINTBUF; bool degraded = false, last_write_torn = false; u64 seq; int ret = 0; @@ -1443,24 +1431,27 @@ int bch2_journal_read(struct bch_fs *c, return 0; } - printbuf_reset(&buf); - prt_printf(&buf, "journal read done, replaying entries %llu-%llu", - *last_seq, *blacklist_seq - 1); - - /* - * Drop blacklisted entries and entries older than last_seq (or start of - * journal rewind: - */ u64 drop_before = *last_seq; - if (c->opts.journal_rewind) { - drop_before = min(drop_before, c->opts.journal_rewind); - prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + { + CLASS(printbuf, buf)(); + prt_printf(&buf, "journal read done, replaying entries %llu-%llu", + *last_seq, *blacklist_seq - 1); + + /* + * Drop blacklisted entries and entries older than last_seq (or start of + * journal rewind: + */ + if (c->opts.journal_rewind) { + drop_before = min(drop_before, c->opts.journal_rewind); + prt_printf(&buf, " (rewinding from %llu)", c->opts.journal_rewind); + } + + *last_seq = drop_before; + if (*start_seq != *blacklist_seq) + prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); + bch_info(c, "%s", buf.buf); } - *last_seq = drop_before; - if (*start_seq != *blacklist_seq) - prt_printf(&buf, " (unflushed %llu-%llu)", *blacklist_seq, *start_seq - 1); - bch_info(c, "%s", buf.buf); genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; @@ -1483,7 +1474,7 @@ int bch2_journal_read(struct bch_fs *c, ret = bch2_journal_check_for_missing(c, drop_before, *blacklist_seq - 1); if (ret) - goto err; + return ret; genradix_for_each(&c->journal_entries, radix_iter, _i) { union bch_replicas_padded replicas = { @@ -1512,14 +1503,14 @@ int bch2_journal_read(struct bch_fs *c, i->ptrs.data[0].sector, READ); if (ret) - goto err; + return ret; darray_for_each(i->ptrs, ptr) replicas_entry_add_dev(&replicas.e, ptr->dev); bch2_replicas_entry_sort(&replicas.e); - printbuf_reset(&buf); + CLASS(printbuf, buf)(); bch2_replicas_entry_to_text(&buf, &replicas.e); if (!degraded && @@ -1530,12 +1521,10 @@ int bch2_journal_read(struct bch_fs *c, le64_to_cpu(i->j.seq), buf.buf))) { ret = bch2_mark_replicas(c, &replicas.e); if (ret) - goto err; + return ret; } } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -1695,10 +1684,10 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) memcpy(new_buf, buf->data, buf->buf_size); - spin_lock(&j->lock); - swap(buf->data, new_buf); - swap(buf->buf_size, new_size); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + swap(buf->data, new_buf); + swap(buf->buf_size, new_size); + } kvfree(new_buf); } @@ -1725,7 +1714,7 @@ static CLOSURE_CALLBACK(journal_write_done) } if (err && !bch2_journal_error(j)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); if (err == -BCH_ERR_journal_write_err) @@ -1737,7 +1726,6 @@ static CLOSURE_CALLBACK(journal_write_done) bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } closure_debug_destroy(cl); @@ -1780,6 +1768,7 @@ static CLOSURE_CALLBACK(journal_write_done) closure_wake_up(&c->freelist_wait); bch2_reset_alloc_cursors(c); + do_discards = true; } j->seq_ondisk = seq; @@ -2018,9 +2007,8 @@ static int bch2_journal_write_prep(struct journal *j, struct journal_buf *w) } } - spin_lock(&c->journal.lock); - w->need_flush_to_write_buffer = false; - spin_unlock(&c->journal.lock); + scoped_guard(spinlock, &c->journal.lock) + w->need_flush_to_write_buffer = false; start = end = vstruct_last(jset); @@ -2158,21 +2146,21 @@ CLOSURE_CALLBACK(bch2_journal_write) j->write_start_time = local_clock(); - spin_lock(&j->lock); - if (nr_rw_members > 1) - w->separate_flush = true; + scoped_guard(spinlock, &j->lock) { + if (nr_rw_members > 1) + w->separate_flush = true; - ret = bch2_journal_write_pick_flush(j, w); - spin_unlock(&j->lock); + ret = bch2_journal_write_pick_flush(j, w); + } if (unlikely(ret)) goto err; - mutex_lock(&j->buf_lock); - journal_buf_realloc(j, w); + scoped_guard(mutex, &j->buf_lock) { + journal_buf_realloc(j, w); - ret = bch2_journal_write_prep(j, w); - mutex_unlock(&j->buf_lock); + ret = bch2_journal_write_prep(j, w); + } if (unlikely(ret)) goto err; @@ -2193,22 +2181,22 @@ CLOSURE_CALLBACK(bch2_journal_write) if (unlikely(ret)) goto err; - spin_lock(&j->lock); - /* - * write is allocated, no longer need to account for it in - * bch2_journal_space_available(): - */ - w->sectors = 0; - w->write_allocated = true; - j->entry_bytes_written += vstruct_bytes(w->data); + scoped_guard(spinlock, &j->lock) { + /* + * write is allocated, no longer need to account for it in + * bch2_journal_space_available(): + */ + w->sectors = 0; + w->write_allocated = true; + j->entry_bytes_written += vstruct_bytes(w->data); - /* - * journal entry has been compacted and allocated, recalculate space - * available: - */ - bch2_journal_space_available(j); - bch2_journal_do_writes(j); - spin_unlock(&j->lock); + /* + * journal entry has been compacted and allocated, recalculate space + * available: + */ + bch2_journal_space_available(j); + bch2_journal_do_writes(j); + } w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key)); @@ -2232,7 +2220,7 @@ CLOSURE_CALLBACK(bch2_journal_write) return; err_allocate_write: if (!bch2_journal_error(j)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_journal_debug_to_text(&buf, j); prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"), @@ -2240,7 +2228,6 @@ err_allocate_write: vstruct_sectors(w->data, c->block_bits), bch2_err_str(ret)); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } err: bch2_fatal_error(c); diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 0042d43b8e57..be50455c7f13 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -221,8 +221,8 @@ void bch2_journal_space_available(struct journal *j) if (nr_online < metadata_replicas_required(c)) { if (!(c->sb.features & BIT_ULL(BCH_FEATURE_small_image))) { - struct printbuf buf = PRINTBUF; - buf.atomic++; + CLASS(printbuf, buf)(); + guard(printbuf_atomic)(&buf); prt_printf(&buf, "insufficient writeable journal devices available: have %u, need %u\n" "rw journal devs:", nr_online, metadata_replicas_required(c)); @@ -230,7 +230,6 @@ void bch2_journal_space_available(struct journal *j) prt_printf(&buf, " %s", ca->name); bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); } ret = bch_err_throw(c, insufficient_journal_devices); goto out; @@ -280,11 +279,8 @@ static bool __should_discard_bucket(struct journal *j, struct journal_device *ja static bool should_discard_bucket(struct journal *j, struct journal_device *ja) { - spin_lock(&j->lock); - bool ret = __should_discard_bucket(j, ja); - spin_unlock(&j->lock); - - return ret; + guard(spinlock)(&j->lock); + return __should_discard_bucket(j, ja); } /* @@ -295,7 +291,7 @@ void bch2_journal_do_discards(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - mutex_lock(&j->discard_lock); + guard(mutex)(&j->discard_lock); for_each_rw_member(c, ca, BCH_DEV_WRITE_REF_journal_do_discards) { struct journal_device *ja = &ca->journal; @@ -309,15 +305,12 @@ void bch2_journal_do_discards(struct journal *j) ja->buckets[ja->discard_idx]), ca->mi.bucket_size, GFP_NOFS); - spin_lock(&j->lock); - ja->discard_idx = (ja->discard_idx + 1) % ja->nr; - - bch2_journal_space_available(j); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + ja->discard_idx = (ja->discard_idx + 1) % ja->nr; + bch2_journal_space_available(j); + } } } - - mutex_unlock(&j->discard_lock); } /* @@ -358,9 +351,8 @@ bool __bch2_journal_pin_put(struct journal *j, u64 seq) void bch2_journal_pin_put(struct journal *j, u64 seq) { if (__bch2_journal_pin_put(j, seq)) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); bch2_journal_reclaim_fast(j); - spin_unlock(&j->lock); } } @@ -393,10 +385,9 @@ static inline bool __journal_pin_drop(struct journal *j, void bch2_journal_pin_drop(struct journal *j, struct journal_entry_pin *pin) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); if (__journal_pin_drop(j, pin)) bch2_journal_reclaim_fast(j); - spin_unlock(&j->lock); } static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin, @@ -443,7 +434,7 @@ void bch2_journal_pin_copy(struct journal *j, struct journal_entry_pin *src, journal_pin_flush_fn flush_fn) { - spin_lock(&j->lock); + guard(spinlock)(&j->lock); u64 seq = READ_ONCE(src->seq); @@ -454,7 +445,6 @@ void bch2_journal_pin_copy(struct journal *j, * longer to exist, but that means there's no longer anything to * copy and we can bail out here: */ - spin_unlock(&j->lock); return; } @@ -471,31 +461,32 @@ void bch2_journal_pin_copy(struct journal *j, */ if (seq == journal_last_seq(j)) journal_wake(j); - spin_unlock(&j->lock); } void bch2_journal_pin_set(struct journal *j, u64 seq, struct journal_entry_pin *pin, journal_pin_flush_fn flush_fn) { - spin_lock(&j->lock); + bool wake; - BUG_ON(seq < journal_last_seq(j)); + scoped_guard(spinlock, &j->lock) { + BUG_ON(seq < journal_last_seq(j)); - bool reclaim = __journal_pin_drop(j, pin); + bool reclaim = __journal_pin_drop(j, pin); - bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); + bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); - if (reclaim) - bch2_journal_reclaim_fast(j); - /* - * If the journal is currently full, we might want to call flush_fn - * immediately: - */ - if (seq == journal_last_seq(j)) - journal_wake(j); + if (reclaim) + bch2_journal_reclaim_fast(j); + /* + * If the journal is currently full, we might want to call flush_fn + * immediately: + */ + wake = seq == journal_last_seq(j); + } - spin_unlock(&j->lock); + if (wake) + journal_wake(j); } /** @@ -580,17 +571,17 @@ static size_t journal_flush_pins(struct journal *j, j->last_flushed = jiffies; - spin_lock(&j->lock); - pin = journal_get_next_pin(j, seq_to_flush, - allowed_below, - allowed_above, &seq); - if (pin) { - BUG_ON(j->flush_in_progress); - j->flush_in_progress = pin; - j->flush_in_progress_dropped = false; - flush_fn = pin->flush; + scoped_guard(spinlock, &j->lock) { + pin = journal_get_next_pin(j, seq_to_flush, + allowed_below, + allowed_above, &seq); + if (pin) { + BUG_ON(j->flush_in_progress); + j->flush_in_progress = pin; + j->flush_in_progress_dropped = false; + flush_fn = pin->flush; + } } - spin_unlock(&j->lock); if (!pin) break; @@ -603,13 +594,13 @@ static size_t journal_flush_pins(struct journal *j, err = flush_fn(j, pin, seq); - spin_lock(&j->lock); - /* Pin might have been dropped or rearmed: */ - if (likely(!err && !j->flush_in_progress_dropped)) - list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); - j->flush_in_progress = NULL; - j->flush_in_progress_dropped = false; - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) { + /* Pin might have been dropped or rearmed: */ + if (likely(!err && !j->flush_in_progress_dropped)) + list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); + j->flush_in_progress = NULL; + j->flush_in_progress_dropped = false; + } wake_up(&j->pin_flush_wait); @@ -770,9 +761,8 @@ static int bch2_journal_reclaim_thread(void *arg) j->reclaim_kicked = false; - mutex_lock(&j->reclaim_lock); - ret = __bch2_journal_reclaim(j, false, kicked); - mutex_unlock(&j->reclaim_lock); + scoped_guard(mutex, &j->reclaim_lock) + ret = __bch2_journal_reclaim(j, false, kicked); now = jiffies; delay = msecs_to_jiffies(c->opts.journal_reclaim_delay); @@ -788,9 +778,8 @@ static int bch2_journal_reclaim_thread(void *arg) if (j->reclaim_kicked) break; - spin_lock(&j->lock); - journal_empty = fifo_empty(&j->pin); - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + journal_empty = fifo_empty(&j->pin); long timeout = j->next_reclaim - jiffies; @@ -844,10 +833,10 @@ int bch2_journal_reclaim_start(struct journal *j) static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, unsigned types) { + guard(spinlock)(&j->lock); + struct journal_entry_pin_list *pin_list; u64 seq; - - spin_lock(&j->lock); fifo_for_each_entry_ptr(pin_list, &j->pin, seq) { if (seq > seq_to_flush) break; @@ -855,12 +844,9 @@ static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush, for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++) if ((BIT(i) & types) && (!list_empty(&pin_list->unflushed[i]) || - !list_empty(&pin_list->flushed[i]))) { - spin_unlock(&j->lock); + !list_empty(&pin_list->flushed[i]))) return true; - } } - spin_unlock(&j->lock); return false; } @@ -881,32 +867,27 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, if (ret) return ret; - mutex_lock(&j->reclaim_lock); + guard(mutex)(&j->reclaim_lock); for (int type = JOURNAL_PIN_TYPE_NR - 1; type >= 0; --type) if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) { *did_work = true; - goto unlock; + return ret; } if (seq_to_flush > journal_cur_seq(j)) bch2_journal_entry_close(j); - spin_lock(&j->lock); /* * If journal replay hasn't completed, the unreplayed journal entries * hold refs on their corresponding sequence numbers */ + guard(spinlock)(&j->lock); ret = !test_bit(JOURNAL_replay_done, &j->flags) || journal_last_seq(j) > seq_to_flush || !fifo_used(&j->pin); - - spin_unlock(&j->lock); -unlock: - mutex_unlock(&j->reclaim_lock); - return ret; } @@ -931,13 +912,12 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) u64 iter, seq = 0; int ret = 0; - spin_lock(&j->lock); - fifo_for_each_entry_ptr(p, &j->pin, iter) - if (dev_idx >= 0 - ? bch2_dev_list_has_dev(p->devs, dev_idx) - : p->devs.nr < c->opts.metadata_replicas) - seq = iter; - spin_unlock(&j->lock); + scoped_guard(spinlock, &j->lock) + fifo_for_each_entry_ptr(p, &j->pin, iter) + if (dev_idx >= 0 + ? bch2_dev_list_has_dev(p->devs, dev_idx) + : p->devs.nr < c->opts.metadata_replicas) + seq = iter; bch2_journal_flush_pins(j, seq); @@ -945,7 +925,7 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) if (ret) return ret; - mutex_lock(&c->replicas_gc_lock); + guard(mutex)(&c->replicas_gc_lock); bch2_replicas_gc_start(c, 1 << BCH_DATA_journal); /* @@ -960,29 +940,25 @@ int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) goto err; seq = 0; - spin_lock(&j->lock); - while (!ret) { - union bch_replicas_padded replicas; + scoped_guard(spinlock, &j->lock) + while (!ret) { + union bch_replicas_padded replicas; - seq = max(seq, journal_last_seq(j)); - if (seq >= j->pin.back) - break; - bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, - journal_seq_pin(j, seq)->devs); - seq++; + seq = max(seq, journal_last_seq(j)); + if (seq >= j->pin.back) + break; + bch2_devlist_to_replicas(&replicas.e, BCH_DATA_journal, + journal_seq_pin(j, seq)->devs); + seq++; - if (replicas.e.nr_devs) { - spin_unlock(&j->lock); - ret = bch2_mark_replicas(c, &replicas.e); - spin_lock(&j->lock); + if (replicas.e.nr_devs) { + spin_unlock(&j->lock); + ret = bch2_mark_replicas(c, &replicas.e); + spin_lock(&j->lock); + } } - } - spin_unlock(&j->lock); err: - ret = bch2_replicas_gc_end(c, ret); - mutex_unlock(&c->replicas_gc_lock); - - return ret; + return bch2_replicas_gc_end(c, ret); } bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq) @@ -990,20 +966,16 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 struct journal_entry_pin_list *pin_list; struct journal_entry_pin *pin; - spin_lock(&j->lock); - if (!test_bit(JOURNAL_running, &j->flags)) { - spin_unlock(&j->lock); + guard(spinlock)(&j->lock); + guard(printbuf_atomic)(out); + + if (!test_bit(JOURNAL_running, &j->flags)) return true; - } *seq = max(*seq, j->pin.front); - if (*seq >= j->pin.back) { - spin_unlock(&j->lock); + if (*seq >= j->pin.back) return true; - } - - out->atomic++; pin_list = journal_seq_pin(j, *seq); @@ -1022,9 +994,6 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 printbuf_indent_sub(out, 2); - --out->atomic; - spin_unlock(&j->lock); - return false; } diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c index 6361809b5e2e..399db5b77d9f 100644 --- a/fs/bcachefs/journal_seq_blacklist.c +++ b/fs/bcachefs/journal_seq_blacklist.c @@ -49,7 +49,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) unsigned i = 0, nr; int ret = 0; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bl = bch2_sb_field_get(c->disk_sb.sb, journal_seq_blacklist); nr = blacklist_nr_entries(bl); @@ -77,10 +77,8 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) bl = bch2_sb_field_resize(&c->disk_sb, journal_seq_blacklist, sb_blacklist_u64s(nr + 1)); - if (!bl) { - ret = bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist); - goto out; - } + if (!bl) + return bch_err_throw(c, ENOSPC_sb_journal_seq_blacklist); array_insert_item(bl->start, nr, i, ((struct journal_seq_blacklist_entry) { .start = cpu_to_le64(start), @@ -89,8 +87,6 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << BCH_FEATURE_journal_seq_blacklist_v3); ret = bch2_write_super(c); -out: - mutex_unlock(&c->sb_lock); return ret ?: bch2_blacklist_table_initialize(c); } diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 75f27ec26f85..0367ea37e857 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -35,7 +35,7 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, { struct bch_fs *c = trans->c; u32 restart_count = trans->restart_count; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; fsck_err_on(test_bit(BCH_FS_clean_recovery, &c->flags), @@ -56,21 +56,18 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, bch2_bkey_buf_exit(&sk, c); fsck_err: - printbuf_exit(&buf); return ret ?: trans_was_restarted(trans, restart_count); } int bch2_resume_logged_ops(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_max(trans, iter, BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM_logged_ops, 0), POS(LOGGED_OPS_INUM_logged_ops, U64_MAX), BTREE_ITER_prefetch, k, - resume_logged_op(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + resume_logged_op(trans, &iter, k)); } static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) @@ -107,12 +104,11 @@ int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) */ if (ret) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); bch2_fs_fatal_error(c, "deleting logged operation %s: %s", buf.buf, bch2_err_str(ret)); - printbuf_exit(&buf); } return ret; diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 57b5b3263b08..ee14656c3fdd 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -86,7 +86,7 @@ int bch2_lru_check_set(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter lru_iter; struct bkey_s_c lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, @@ -112,7 +112,6 @@ int bch2_lru_check_set(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &lru_iter); - printbuf_exit(&buf); return ret; } @@ -166,8 +165,8 @@ static int bch2_check_lru_key(struct btree_trans *trans, struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; + CLASS(printbuf, buf1)(); + CLASS(printbuf, buf2)(); struct bbpos bp = lru_pos_to_bp(lru_k); @@ -198,8 +197,6 @@ static int bch2_check_lru_key(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf2); - printbuf_exit(&buf1); return ret; } @@ -210,14 +207,13 @@ int bch2_check_lrus(struct bch_fs *c) bch2_bkey_buf_init(&last_flushed); bkey_init(&last_flushed.k->k); - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_check_lru_key(trans, &iter, k, &last_flushed))); + bch2_check_lru_key(trans, &iter, k, &last_flushed)); bch2_bkey_buf_exit(&last_flushed, c); - bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index f296cce95338..bd1e54e0efd5 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -119,34 +119,29 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, struct progress_indicator_state *progress, unsigned dev_idx, unsigned flags) { - struct btree_trans *trans = bch2_trans_get(c); - enum btree_id id; - int ret = 0; + CLASS(btree_trans, trans)(c); - for (id = 0; id < BTREE_ID_NR; id++) { + for (unsigned id = 0; id < BTREE_ID_NR; id++) { if (!btree_type_has_ptrs(id)) continue; - ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, + int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ bch2_progress_update_iter(trans, progress, &iter, "dropping user data"); bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags); })); if (ret) - break; + return ret; } - bch2_trans_put(trans); - - return ret; + return 0; } static int bch2_dev_metadata_drop(struct bch_fs *c, struct progress_indicator_state *progress, unsigned dev_idx, unsigned flags) { - struct btree_trans *trans; struct btree_iter iter; struct closure cl; struct btree *b; @@ -158,7 +153,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, if (flags & BCH_FORCE_IF_METADATA_LOST) return bch_err_throw(c, remove_with_metadata_missing_unimplemented); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); bch2_bkey_buf_init(&k); closure_init_stack(&cl); @@ -199,7 +194,6 @@ next: ret = 0; err: bch2_bkey_buf_exit(&k, c); - bch2_trans_put(trans); BUG_ON(bch2_err_matches(ret, BCH_ERR_transaction_restart)); @@ -240,7 +234,7 @@ out: int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bkey_buf last_flushed; bch2_bkey_buf_init(&last_flushed); @@ -260,7 +254,6 @@ int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsig })); bch2_bkey_buf_exit(&last_flushed, trans->c); - bch2_trans_put(trans); bch_err_fn(c, ret); return ret; } diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index eec591e947bd..3f44bb54f91a 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -54,22 +54,20 @@ trace_io_move2(struct bch_fs *c, struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_update_opts *data_opts) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); prt_newline(&buf); bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); trace_io_move(c, buf.buf); - printbuf_exit(&buf); } static noinline void trace_io_move_read2(struct bch_fs *c, struct bkey_s_c k) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); trace_io_move_read(c, buf.buf); - printbuf_exit(&buf); } static noinline void @@ -78,7 +76,7 @@ trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k, struct data_update_opts *data_opts, move_pred_fn pred, void *_arg, bool p) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "%ps: %u", pred, p); @@ -92,7 +90,6 @@ trace_io_move_pred2(struct bch_fs *c, struct bkey_s_c k, prt_newline(&buf); bch2_data_update_opts_to_text(&buf, c, io_opts, data_opts); trace_io_move_pred(c, buf.buf); - printbuf_exit(&buf); } static noinline void @@ -128,10 +125,9 @@ static void move_free(struct moving_io *io) if (io->b) atomic_dec(&io->b->count); - mutex_lock(&ctxt->lock); - list_del(&io->io_list); + scoped_guard(mutex, &ctxt->lock) + list_del(&io->io_list); wake_up(&ctxt->wait); - mutex_unlock(&ctxt->lock); if (!io->write.data_opts.scrub) { bch2_data_update_exit(&io->write); @@ -150,11 +146,9 @@ static void move_write_done(struct bch_write_op *op) if (op->error) { if (trace_io_move_write_fail_enabled()) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_write_op_to_text(&buf, op); trace_io_move_write_fail(c, buf.buf); - printbuf_exit(&buf); } this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]); @@ -203,11 +197,9 @@ static void move_write(struct moving_io *io) } if (trace_io_move_write_enabled()) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(io->write.k.k)); trace_io_move_write(c, buf.buf); - printbuf_exit(&buf); } closure_get(&io->write.ctxt->cl); @@ -276,9 +268,8 @@ void bch2_moving_ctxt_exit(struct moving_context *ctxt) EBUG_ON(atomic_read(&ctxt->read_sectors)); EBUG_ON(atomic_read(&ctxt->read_ios)); - mutex_lock(&c->moving_context_lock); - list_del(&ctxt->list); - mutex_unlock(&c->moving_context_lock); + scoped_guard(mutex, &c->moving_context_lock) + list_del(&ctxt->list); /* * Generally, releasing a transaction within a transaction restart means @@ -314,9 +305,8 @@ void bch2_moving_ctxt_init(struct moving_context *ctxt, INIT_LIST_HEAD(&ctxt->ios); init_waitqueue_head(&ctxt->wait); - mutex_lock(&c->moving_context_lock); - list_add(&ctxt->list, &c->moving_context_list); - mutex_unlock(&c->moving_context_lock); + scoped_guard(mutex, &c->moving_context_lock) + list_add(&ctxt->list, &c->moving_context_list); } void bch2_move_stats_exit(struct bch_move_stats *stats, struct bch_fs *c) @@ -412,13 +402,13 @@ int bch2_move_extent(struct moving_context *ctxt, if (trace_io_move_read_enabled()) trace_io_move_read2(c, k); - mutex_lock(&ctxt->lock); - atomic_add(io->read_sectors, &ctxt->read_sectors); - atomic_inc(&ctxt->read_ios); + scoped_guard(mutex, &ctxt->lock) { + atomic_add(io->read_sectors, &ctxt->read_sectors); + atomic_inc(&ctxt->read_ios); - list_add_tail(&io->read_list, &ctxt->reads); - list_add_tail(&io->io_list, &ctxt->ios); - mutex_unlock(&ctxt->lock); + list_add_tail(&io->read_list, &ctxt->reads); + list_add_tail(&io->io_list, &ctxt->ios); + } /* * dropped by move_read_endio() - guards against use after free of @@ -443,13 +433,11 @@ err: count_event(c, io_move_start_fail); if (trace_io_move_start_fail_enabled()) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); prt_str(&buf, ": "); prt_str(&buf, bch2_err_str(ret)); trace_io_move_start_fail(c, buf.buf); - printbuf_exit(&buf); } if (bch2_err_matches(ret, BCH_ERR_data_update_done)) @@ -468,7 +456,7 @@ struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans, struct bch_io_opts *opts_ret = &io_opts->fs_io_opts; int ret = 0; - if (extent_iter->min_depth) + if (btree_iter_path(trans, extent_iter)->level) return opts_ret; if (extent_k.k->type == KEY_TYPE_reflink_v) @@ -672,8 +660,7 @@ retry_root: k = bkey_i_to_s_c(&b->key); - io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, - iter.pos, &iter, k); + io_opts = &snapshot_io_opts.fs_io_opts; ret = PTR_ERR_OR_ZERO(io_opts); if (ret) goto root_err; @@ -875,7 +862,7 @@ static int __bch2_move_data_phys(struct moving_context *ctxt, u64 check_mismatch_done = bucket_start; int ret = 0; - struct bch_dev *ca = bch2_dev_tryget(c, dev); + CLASS(bch2_dev_tryget, ca)(c, dev); if (!ca) return 0; @@ -1014,7 +1001,6 @@ err: bch2_trans_iter_exit(trans, &bp_iter); bch2_bkey_buf_exit(&sk, c); bch2_bkey_buf_exit(&last_flushed, c); - bch2_dev_put(ca); return ret; } @@ -1031,9 +1017,9 @@ int bch2_move_data_phys(struct bch_fs *c, { struct moving_context ctxt; - bch2_trans_run(c, bch2_btree_write_buffer_flush_sync(trans)); - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); + bch2_btree_write_buffer_flush_sync(ctxt.trans); + if (ctxt.stats) { ctxt.stats->phys = true; ctxt.stats->data_type = (int) DATA_PROGRESS_DATA_TYPE_phys; @@ -1268,12 +1254,11 @@ int bch2_scan_old_btree_nodes(struct bch_fs *c, struct bch_move_stats *stats) BBPOS_MAX, rewrite_old_nodes_pred, c, stats); if (!ret) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); c->disk_sb.sb->version_min = c->disk_sb.sb->version; bch2_write_super(c); - mutex_unlock(&c->sb_lock); } bch_err_fn(c, ret); @@ -1343,18 +1328,18 @@ static bool scrub_pred(struct bch_fs *c, void *_arg, int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, - struct bch_ioctl_data op) + struct bch_ioctl_data *op) { - struct bbpos start = BBPOS(op.start_btree, op.start_pos); - struct bbpos end = BBPOS(op.end_btree, op.end_pos); + struct bbpos start = BBPOS(op->start_btree, op->start_pos); + struct bbpos end = BBPOS(op->end_btree, op->end_pos); int ret = 0; - if (op.op >= BCH_DATA_OP_NR) + if (op->op >= BCH_DATA_OP_NR) return -EINVAL; - bch2_move_stats_init(stats, bch2_data_ops_strs[op.op]); + bch2_move_stats_init(stats, bch2_data_ops_strs[op->op]); - switch (op.op) { + switch (op->op) { case BCH_DATA_OP_scrub: /* * prevent tests from spuriously failing, make sure we see all @@ -1362,13 +1347,13 @@ int bch2_data_job(struct bch_fs *c, */ bch2_btree_interior_updates_flush(c); - ret = bch2_move_data_phys(c, op.scrub.dev, 0, U64_MAX, - op.scrub.data_types, + ret = bch2_move_data_phys(c, op->scrub.dev, 0, U64_MAX, + op->scrub.data_types, NULL, stats, writepoint_hashed((unsigned long) current), false, - scrub_pred, &op) ?: ret; + scrub_pred, op) ?: ret; break; case BCH_DATA_OP_rereplicate: @@ -1385,18 +1370,18 @@ int bch2_data_job(struct bch_fs *c, ret = bch2_replicas_gc2(c) ?: ret; break; case BCH_DATA_OP_migrate: - if (op.migrate.dev >= c->sb.nr_devices) + if (op->migrate.dev >= c->sb.nr_devices) return -EINVAL; stats->data_type = BCH_DATA_journal; - ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_data_phys(c, op.migrate.dev, 0, U64_MAX, + ret = bch2_journal_flush_device_pins(&c->journal, op->migrate.dev); + ret = bch2_move_data_phys(c, op->migrate.dev, 0, U64_MAX, ~0, NULL, stats, writepoint_hashed((unsigned long) current), true, - migrate_pred, &op) ?: ret; + migrate_pred, op) ?: ret; bch2_btree_interior_updates_flush(c); ret = bch2_replicas_gc2(c) ?: ret; break; @@ -1468,11 +1453,11 @@ static void bch2_moving_ctxt_to_text(struct printbuf *out, struct bch_fs *c, str printbuf_indent_add(out, 2); - mutex_lock(&ctxt->lock); - struct moving_io *io; - list_for_each_entry(io, &ctxt->ios, io_list) - bch2_data_update_inflight_to_text(out, &io->write); - mutex_unlock(&ctxt->lock); + scoped_guard(mutex, &ctxt->lock) { + struct moving_io *io; + list_for_each_entry(io, &ctxt->ios, io_list) + bch2_data_update_inflight_to_text(out, &io->write); + } printbuf_indent_sub(out, 4); } @@ -1481,10 +1466,9 @@ void bch2_fs_moving_ctxts_to_text(struct printbuf *out, struct bch_fs *c) { struct moving_context *ctxt; - mutex_lock(&c->moving_context_lock); - list_for_each_entry(ctxt, &c->moving_context_list, list) - bch2_moving_ctxt_to_text(out, c, ctxt); - mutex_unlock(&c->moving_context_lock); + scoped_guard(mutex, &c->moving_context_lock) + list_for_each_entry(ctxt, &c->moving_context_list, list) + bch2_moving_ctxt_to_text(out, c, ctxt); } void bch2_fs_move_init(struct bch_fs *c) diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 86b80499ac55..fe92ca6d418d 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -152,7 +152,7 @@ int bch2_evacuate_bucket(struct moving_context *, struct data_update_opts); int bch2_data_job(struct bch_fs *, struct bch_move_stats *, - struct bch_ioctl_data); + struct bch_ioctl_data *); void bch2_move_stats_to_text(struct printbuf *, struct bch_move_stats *); void bch2_move_stats_exit(struct bch_move_stats *, struct bch_fs *); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 27e68d470ad0..9192b1fc3594 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -71,7 +71,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (ret) return ret; - struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode); + CLASS(bch2_dev_bucket_tryget, ca)(c, k.k->p); if (!ca) goto out; @@ -90,7 +90,6 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, ret = lru_idx && lru_idx <= time; out: - bch2_dev_put(ca); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -320,8 +319,8 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) bch2_printbuf_make_room(out, 4096); struct task_struct *t; - out->atomic++; scoped_guard(rcu) { + guard(printbuf_atomic)(out); prt_printf(out, "Currently calculated wait:\n"); for_each_rw_member_rcu(c, ca) { prt_printf(out, " %s:\t", ca->name); @@ -333,7 +332,6 @@ void bch2_copygc_wait_to_text(struct printbuf *out, struct bch_fs *c) if (t) get_task_struct(t); } - --out->atomic; if (t) { bch2_prt_task_backtrace(out, t, 0, GFP_KERNEL); diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c index 3e2b41babc26..8fa108880f58 100644 --- a/fs/bcachefs/namei.c +++ b/fs/bcachefs/namei.c @@ -99,7 +99,9 @@ int bch2_create_trans(struct btree_trans *trans, * If we're not root, we have to own the subvolume being * snapshotted: */ - if (uid && new_inode->bi_uid != uid) { + if (uid && + !capable(CAP_FOWNER) && + new_inode->bi_uid != uid) { ret = -EPERM; goto err; } @@ -727,7 +729,7 @@ static int bch2_check_dirent_inode_dirent(struct btree_trans *trans, bool in_fsck) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter bp_iter = {}; int ret = 0; @@ -835,7 +837,6 @@ out: err: fsck_err: bch2_trans_iter_exit(trans, &bp_iter); - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -847,7 +848,7 @@ int __bch2_check_dirent_target(struct btree_trans *trans, bool in_fsck) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck); @@ -882,7 +883,6 @@ int __bch2_check_dirent_target(struct btree_trans *trans, } err: fsck_err: - printbuf_exit(&buf); bch_err_fn(c, ret); return ret; } @@ -940,7 +940,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, snapshot_id_list *snapshot_overwrites, bool *do_update) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool repairing_parents = false; int ret = 0; @@ -967,7 +967,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot, snapshot_overwrites, &buf); if (ret) - goto err; + return ret; if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) { inode->bi_flags |= BCH_INODE_has_case_insensitive; @@ -986,14 +986,14 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, if (dir.bi_parent_subvol) { ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot); if (ret) - goto err; + return ret; snapshot_overwrites = NULL; } ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0); if (ret) - goto err; + return ret; if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) { prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n"); @@ -1001,13 +1001,13 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot, snapshot_overwrites, &buf); if (ret) - goto err; + return ret; if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) { dir.bi_flags |= BCH_INODE_has_case_insensitive; ret = __bch2_fsck_write_inode(trans, &dir); if (ret) - goto err; + return ret; } } @@ -1019,9 +1019,7 @@ int bch2_check_inode_has_case_insensitive(struct btree_trans *trans, break; } out: -err: fsck_err: - printbuf_exit(&buf); if (ret) return ret; diff --git a/fs/bcachefs/nocow_locking.c b/fs/bcachefs/nocow_locking.c index 962218fa68ec..58cfd540c6d6 100644 --- a/fs/bcachefs/nocow_locking.c +++ b/fs/bcachefs/nocow_locking.c @@ -47,7 +47,7 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, int v, lock_val = flags ? 1 : -1; unsigned i; - spin_lock(&l->lock); + guard(spinlock)(&l->lock); for (i = 0; i < ARRAY_SIZE(l->b); i++) if (l->b[i] == dev_bucket) @@ -58,21 +58,19 @@ bool __bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, l->b[i] = dev_bucket; goto take_lock; } -fail: - spin_unlock(&l->lock); + return false; got_entry: v = atomic_read(&l->l[i]); if (lock_val > 0 ? v < 0 : v > 0) - goto fail; + return false; take_lock: v = atomic_read(&l->l[i]); /* Overflow? */ if (v && sign(v + lock_val) != sign(v)) - goto fail; + return false; atomic_add(lock_val, &l->l[i]); - spin_unlock(&l->lock); return true; } diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c index b1cf88905b81..921f9049912d 100644 --- a/fs/bcachefs/opts.c +++ b/fs/bcachefs/opts.c @@ -584,7 +584,7 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, break; case Opt_discard: if (!ca) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); for_each_member_device(c, ca) { struct bch_member *m = bch2_members_v2_get_mut(ca->disk_sb.sb, ca->dev_idx); @@ -592,7 +592,6 @@ void bch2_opt_hook_post_set(struct bch_fs *c, struct bch_dev *ca, u64 inum, } bch2_write_super(c); - mutex_unlock(&c->sb_lock); } break; case Opt_version_upgrade: @@ -613,7 +612,6 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, struct printbuf *parse_later, const char *name, const char *val) { - struct printbuf err = PRINTBUF; u64 v; int ret, id; @@ -638,46 +636,36 @@ int bch2_parse_one_mount_opt(struct bch_fs *c, struct bch_opts *opts, val = bch2_opt_val_synonym_lookup(name, val); if (!(bch2_opt_table[id].flags & OPT_MOUNT)) - goto bad_opt; + return -BCH_ERR_option_name; if (id == Opt_acl && !IS_ENABLED(CONFIG_BCACHEFS_POSIX_ACL)) - goto bad_opt; + return -BCH_ERR_option_name; if ((id == Opt_usrquota || id == Opt_grpquota) && !IS_ENABLED(CONFIG_BCACHEFS_QUOTA)) - goto bad_opt; + return -BCH_ERR_option_name; + CLASS(printbuf, err)(); ret = bch2_opt_parse(c, &bch2_opt_table[id], val, &v, &err); if (ret == -BCH_ERR_option_needs_open_fs) { - ret = 0; - if (parse_later) { prt_printf(parse_later, "%s=%s,", name, val); if (parse_later->allocation_failure) - ret = -ENOMEM; + return -ENOMEM; } - goto out; + return 0; } if (ret < 0) - goto bad_val; + return -BCH_ERR_option_value; if (opts) bch2_opt_set_by_id(opts, id, v); - ret = 0; -out: - printbuf_exit(&err); - return ret; -bad_opt: - ret = -BCH_ERR_option_name; - goto out; -bad_val: - ret = -BCH_ERR_option_value; - goto out; + return 0; } int bch2_parse_mount_opts(struct bch_fs *c, struct bch_opts *opts, @@ -805,11 +793,10 @@ bool __bch2_opt_set_sb(struct bch_sb *sb, int dev_idx, bool bch2_opt_set_sb(struct bch_fs *c, struct bch_dev *ca, const struct bch_option *opt, u64 v) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bool changed = __bch2_opt_set_sb(c->disk_sb.sb, ca ? ca->dev_idx : -1, opt, v); if (changed) bch2_write_super(c); - mutex_unlock(&c->sb_lock); return changed; } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 63f8e254495c..84ce69a7f131 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -150,12 +150,12 @@ enum fsck_err_opts { NULL, "Number of consecutive write errors allowed before kicking out a device")\ x(metadata_replicas, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ + OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ BCH_SB_META_REPLICAS_WANT, 1, \ "#", "Number of metadata replicas") \ x(data_replicas, u8, \ OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ + OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ BCH_SB_DATA_REPLICAS_WANT, 1, \ "#", "Number of data replicas") \ x(metadata_replicas_required, u8, \ @@ -165,7 +165,7 @@ enum fsck_err_opts { "#", NULL) \ x(data_replicas_required, u8, \ OPT_FS|OPT_FORMAT|OPT_MOUNT, \ - OPT_UINT(1, BCH_REPLICAS_MAX), \ + OPT_UINT(1, BCH_REPLICAS_MAX + 1), \ BCH_SB_DATA_REPLICAS_REQ, 1, \ "#", NULL) \ x(encoded_extent_max, u32, \ @@ -529,7 +529,7 @@ enum fsck_err_opts { "size", "Specifies the bucket size; must be greater than the btree node size")\ x(durability, u8, \ OPT_DEVICE|OPT_RUNTIME|OPT_SB_FIELD_ONE_BIAS, \ - OPT_UINT(0, BCH_REPLICAS_MAX), \ + OPT_UINT(0, BCH_REPLICAS_MAX + 1), \ BCH_MEMBER_DURABILITY, 1, \ "n", "Data written to this device will be considered\n"\ "to have already been replicated n times") \ diff --git a/fs/bcachefs/printbuf.h b/fs/bcachefs/printbuf.h index 8f4e28d440ac..907e5c97550b 100644 --- a/fs/bcachefs/printbuf.h +++ b/fs/bcachefs/printbuf.h @@ -295,4 +295,8 @@ static inline void printbuf_atomic_dec(struct printbuf *buf) buf->atomic--; } +DEFINE_GUARD(printbuf_atomic, struct printbuf *, + printbuf_atomic_inc(_T), + printbuf_atomic_dec(_T)); + #endif /* _BCACHEFS_PRINTBUF_H */ diff --git a/fs/bcachefs/progress.c b/fs/bcachefs/progress.c index d09898566abe..42353067ba28 100644 --- a/fs/bcachefs/progress.c +++ b/fs/bcachefs/progress.c @@ -46,7 +46,7 @@ void bch2_progress_update_iter(struct btree_trans *trans, s->last_node = b; if (progress_update_p(s)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned percent = s->nodes_total ? div64_u64(s->nodes_seen * 100, s->nodes_total) : 0; @@ -56,6 +56,5 @@ void bch2_progress_update_iter(struct btree_trans *trans, bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos)); bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); } } diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index f241efb1fb50..5f1eff591b29 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -394,12 +394,10 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k, dq = bkey_s_c_to_quota(k); q = &c->quotas[k.k->p.inode]; - mutex_lock(&q->lock); + guard(mutex)(&q->lock); mq = genradix_ptr_alloc(&q->table, k.k->p.offset, GFP_KERNEL); - if (!mq) { - mutex_unlock(&q->lock); + if (!mq) return -ENOMEM; - } for (i = 0; i < Q_COUNTERS; i++) { mq->c[i].hardlimit = le64_to_cpu(dq.v->c[i].hardlimit); @@ -414,8 +412,6 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k, mq->c[Q_INO].timer = qdq->d_ino_timer; if (qdq && qdq->d_fieldmask & QC_INO_WARNS) mq->c[Q_INO].warns = qdq->d_ino_warns; - - mutex_unlock(&q->lock); } return 0; @@ -522,24 +518,21 @@ advance: int bch2_fs_quota_read(struct bch_fs *c) { + scoped_guard(mutex, &c->sb_lock) { + struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); + if (!sb_quota) + return bch_err_throw(c, ENOSPC_sb_quota); - mutex_lock(&c->sb_lock); - struct bch_sb_field_quota *sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); - if (!sb_quota) { - mutex_unlock(&c->sb_lock); - return bch_err_throw(c, ENOSPC_sb_quota); + bch2_sb_quota_read(c); } - bch2_sb_quota_read(c); - mutex_unlock(&c->sb_lock); - - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key(trans, iter, BTREE_ID_quotas, POS_MIN, BTREE_ITER_prefetch, k, __bch2_quota_set(c, k, NULL)) ?: for_each_btree_key(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, - bch2_fs_quota_read_inode(trans, &iter, k))); + bch2_fs_quota_read_inode(trans, &iter, k)); bch_err_fn(c, ret); return ret; } @@ -550,7 +543,6 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) { struct bch_fs *c = sb->s_fs_info; struct bch_sb_field_quota *sb_quota; - int ret = 0; if (sb->s_flags & SB_RDONLY) return -EROFS; @@ -569,11 +561,12 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) if (uflags & FS_QUOTA_PDQ_ENFD && !c->opts.prjquota) return -EINVAL; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); if (!sb_quota) { - ret = bch_err_throw(c, ENOSPC_sb_quota); - goto unlock; + int ret = bch_err_throw(c, ENOSPC_sb_quota); + bch_err_fn(c, ret); + return ret; } if (uflags & FS_QUOTA_UDQ_ENFD) @@ -586,10 +579,7 @@ static int bch2_quota_enable(struct super_block *sb, unsigned uflags) SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, true); bch2_write_super(c); -unlock: - mutex_unlock(&c->sb_lock); - - return bch2_err_class(ret); + return 0; } static int bch2_quota_disable(struct super_block *sb, unsigned uflags) @@ -599,7 +589,7 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags) if (sb->s_flags & SB_RDONLY) return -EROFS; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); if (uflags & FS_QUOTA_UDQ_ENFD) SET_BCH_SB_USRQUOTA(c->disk_sb.sb, false); @@ -610,8 +600,6 @@ static int bch2_quota_disable(struct super_block *sb, unsigned uflags) SET_BCH_SB_PRJQUOTA(c->disk_sb.sb, false); bch2_write_super(c); - mutex_unlock(&c->sb_lock); - return 0; } @@ -700,14 +688,12 @@ static int bch2_quota_set_info(struct super_block *sb, int type, { struct bch_fs *c = sb->s_fs_info; struct bch_sb_field_quota *sb_quota; - int ret = 0; if (0) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); qc_info_to_text(&buf, info); pr_info("setting:\n%s", buf.buf); - printbuf_exit(&buf); } if (sb->s_flags & SB_RDONLY) @@ -723,11 +709,12 @@ static int bch2_quota_set_info(struct super_block *sb, int type, ~(QC_SPC_TIMER|QC_INO_TIMER|QC_SPC_WARNS|QC_INO_WARNS)) return -EINVAL; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); sb_quota = bch2_sb_get_or_create_quota(&c->disk_sb); if (!sb_quota) { - ret = bch_err_throw(c, ENOSPC_sb_quota); - goto unlock; + int ret = bch_err_throw(c, ENOSPC_sb_quota); + bch_err_fn(c, ret); + return bch2_err_class(ret); } if (info->i_fieldmask & QC_SPC_TIMER) @@ -749,10 +736,7 @@ static int bch2_quota_set_info(struct super_block *sb, int type, bch2_sb_quota_read(c); bch2_write_super(c); -unlock: - mutex_unlock(&c->sb_lock); - - return bch2_err_class(ret); + return 0; } /* Get/set individual quotas: */ @@ -778,15 +762,13 @@ static int bch2_get_quota(struct super_block *sb, struct kqid kqid, struct bch_fs *c = sb->s_fs_info; struct bch_memquota_type *q = &c->quotas[kqid.type]; qid_t qid = from_kqid(&init_user_ns, kqid); - struct bch_memquota *mq; memset(qdq, 0, sizeof(*qdq)); - mutex_lock(&q->lock); - mq = genradix_ptr(&q->table, qid); + guard(mutex)(&q->lock); + struct bch_memquota *mq = genradix_ptr(&q->table, qid); if (mq) __bch2_quota_get(qdq, mq); - mutex_unlock(&q->lock); return 0; } @@ -799,21 +781,17 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid, qid_t qid = from_kqid(&init_user_ns, *kqid); struct genradix_iter iter; struct bch_memquota *mq; - int ret = 0; - mutex_lock(&q->lock); + guard(mutex)(&q->lock); genradix_for_each_from(&q->table, iter, mq, qid) if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) { __bch2_quota_get(qdq, mq); *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos); - goto found; + return 0; } - ret = -ENOENT; -found: - mutex_unlock(&q->lock); - return bch2_err_class(ret); + return -ENOENT; } static int bch2_set_quota_trans(struct btree_trans *trans, @@ -821,12 +799,10 @@ static int bch2_set_quota_trans(struct btree_trans *trans, struct qc_dqblk *qdq) { struct btree_iter iter; - struct bkey_s_c k; - int ret; - - k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p, - BTREE_ITER_slots|BTREE_ITER_intent); - ret = bkey_err(k); + struct bkey_s_c k = + bch2_bkey_get_iter(trans, &iter, BTREE_ID_quotas, new_quota->k.p, + BTREE_ITER_slots|BTREE_ITER_intent); + int ret = bkey_err(k); if (unlikely(ret)) return ret; @@ -852,24 +828,22 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, struct qc_dqblk *qdq) { struct bch_fs *c = sb->s_fs_info; - struct bkey_i_quota new_quota; - int ret; if (0) { - struct printbuf buf = PRINTBUF; - + CLASS(printbuf, buf)(); qc_dqblk_to_text(&buf, qdq); pr_info("setting:\n%s", buf.buf); - printbuf_exit(&buf); } if (sb->s_flags & SB_RDONLY) return -EROFS; + struct bkey_i_quota new_quota; bkey_quota_init(&new_quota.k_i); new_quota.k.p = POS(qid.type, from_kqid(&init_user_ns, qid)); - ret = bch2_trans_commit_do(c, NULL, NULL, 0, + CLASS(btree_trans, trans)(c); + int ret = commit_do(trans, NULL, NULL, 0, bch2_set_quota_trans(trans, &new_quota, qdq)) ?: __bch2_quota_set(c, bkey_i_to_s_c(&new_quota.k_i), qdq); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 73b463c94966..32fa7cf90b63 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -235,24 +235,19 @@ static const char * const bch2_rebalance_state_strs[] = { int bch2_set_rebalance_needs_scan_trans(struct btree_trans *trans, u64 inum) { struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i_cookie *cookie; - u64 v; - int ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - k = bch2_btree_iter_peek_slot(trans, &iter); - ret = bkey_err(k); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, &iter); + int ret = bkey_err(k); if (ret) goto err; - v = k.k->type == KEY_TYPE_cookie + u64 v = k.k->type == KEY_TYPE_cookie ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) : 0; - cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); + struct bkey_i_cookie *cookie = bch2_trans_kmalloc(trans, sizeof(*cookie)); ret = PTR_ERR_OR_ZERO(cookie); if (ret) goto err; @@ -269,8 +264,8 @@ err: int bch2_set_rebalance_needs_scan(struct bch_fs *c, u64 inum) { - int ret = bch2_trans_commit_do(c, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, + CLASS(btree_trans, trans)(c); + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_set_rebalance_needs_scan_trans(trans, inum)); bch2_rebalance_wakeup(c); return ret; @@ -284,19 +279,15 @@ int bch2_set_fs_needs_rebalance(struct bch_fs *c) static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, u64 cookie) { struct btree_iter iter; - struct bkey_s_c k; - u64 v; - int ret; - bch2_trans_iter_init(trans, &iter, BTREE_ID_rebalance_work, SPOS(inum, REBALANCE_WORK_SCAN_OFFSET, U32_MAX), BTREE_ITER_intent); - k = bch2_btree_iter_peek_slot(trans, &iter); - ret = bkey_err(k); + struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, &iter); + int ret = bkey_err(k); if (ret) goto err; - v = k.k->type == KEY_TYPE_cookie + u64 v = k.k->type == KEY_TYPE_cookie ? le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie) : 0; @@ -373,7 +364,7 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, } if (trace_rebalance_extent_enabled()) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_bkey_val_to_text(&buf, c, k); prt_newline(&buf); @@ -399,7 +390,6 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, } trace_rebalance_extent(c, buf.buf); - printbuf_exit(&buf); } return k; @@ -713,17 +703,15 @@ void bch2_rebalance_stop(struct bch_fs *c) int bch2_rebalance_start(struct bch_fs *c) { - struct task_struct *p; - int ret; - if (c->rebalance.thread) return 0; if (c->opts.nochanges) return 0; - p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); - ret = PTR_ERR_OR_ZERO(p); + struct task_struct *p = + kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name); + int ret = PTR_ERR_OR_ZERO(p); bch_err_msg(c, ret, "creating rebalance thread"); if (ret) return ret; @@ -779,7 +767,7 @@ static int check_rebalance_work_one(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct bkey_s_c extent_k, rebalance_k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = bkey_err(extent_k = bch2_btree_iter_peek(trans, extent_iter)) ?: bkey_err(rebalance_k = bch2_btree_iter_peek(trans, rebalance_iter)); @@ -833,7 +821,7 @@ static int check_rebalance_work_one(struct btree_trans *trans, ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, extent_k.k->p, false); if (ret) - goto err; + return ret; } if (fsck_err_on(should_have_rebalance && !have_rebalance, @@ -842,22 +830,20 @@ static int check_rebalance_work_one(struct btree_trans *trans, ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, extent_k.k->p, true); if (ret) - goto err; + return ret; } if (cmp <= 0) bch2_btree_iter_advance(trans, extent_iter); if (cmp >= 0) bch2_btree_iter_advance(trans, rebalance_iter); -err: fsck_err: - printbuf_exit(&buf); return ret; } int bch2_check_rebalance_work(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter rebalance_iter, extent_iter; int ret = 0; @@ -884,6 +870,5 @@ int bch2_check_rebalance_work(struct bch_fs *c) bch2_bkey_buf_exit(&last_flushed, c); bch2_trans_iter_exit(trans, &extent_iter); bch2_trans_iter_exit(trans, &rebalance_iter); - bch2_trans_put(trans); return ret < 0 ? ret : 0; } diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 0def4ecb7f88..a8eea4787a3e 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -40,7 +40,7 @@ int bch2_btree_lost_data(struct bch_fs *c, u64 b = BIT_ULL(btree); int ret = 0; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (!(c->sb.btrees_lost_data & b)) { @@ -109,8 +109,6 @@ int bch2_btree_lost_data(struct bch_fs *c, } out: bch2_write_super(c); - mutex_unlock(&c->sb_lock); - return ret; } @@ -123,7 +121,7 @@ static void kill_btree(struct bch_fs *c, enum btree_id btree) /* for -o reconstruct_alloc: */ void bch2_reconstruct_alloc(struct bch_fs *c) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required); @@ -167,7 +165,6 @@ void bch2_reconstruct_alloc(struct bch_fs *c) c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_no_alloc_info)); bch2_write_super(c); - mutex_unlock(&c->sb_lock); for (unsigned i = 0; i < btree_id_nr_alive(c); i++) if (btree_id_is_alloc(i)) @@ -339,14 +336,15 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq - 1, r->journal_seq - 1); } +DEFINE_DARRAY_NAMED(darray_journal_keys, struct journal_key *) + int bch2_journal_replay(struct bch_fs *c) { struct journal_keys *keys = &c->journal_keys; - DARRAY(struct journal_key *) keys_sorted = { 0 }; + CLASS(darray_journal_keys, keys_sorted)(); struct journal *j = &c->journal; u64 start_seq = c->journal_replay_seq_start; u64 end_seq = c->journal_replay_seq_start; - struct btree_trans *trans = NULL; bool immediate_flush = false; int ret = 0; @@ -354,13 +352,13 @@ int bch2_journal_replay(struct bch_fs *c) ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", keys->nr, start_seq, end_seq); if (ret) - goto err; + return ret; } BUG_ON(!atomic_read(&keys->ref)); move_gap(keys, keys->nr); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); /* * Replay accounting keys first: we can't allow the write buffer to @@ -380,7 +378,7 @@ int bch2_journal_replay(struct bch_fs *c) BCH_WATERMARK_reclaim, bch2_journal_replay_accounting_key(trans, k)); if (bch2_fs_fatal_err_on(ret, c, "error replaying accounting; %s", bch2_err_str(ret))) - goto err; + return ret; k->overwritten = true; } @@ -414,7 +412,7 @@ int bch2_journal_replay(struct bch_fs *c) if (ret) { ret = darray_push(&keys_sorted, k); if (ret) - goto err; + return ret; } } @@ -445,22 +443,16 @@ int bch2_journal_replay(struct bch_fs *c) : 0), bch2_journal_replay_key(trans, k)); if (ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_btree_id_level_to_text(&buf, k->btree_id, k->level); bch_err_msg(c, ret, "while replaying key at %s:", buf.buf); - printbuf_exit(&buf); - goto err; + return ret; } BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten); } - /* - * We need to put our btree_trans before calling flush_all_pins(), since - * that will use a btree_trans internally - */ - bch2_trans_put(trans); - trans = NULL; + bch2_trans_unlock_long(trans); if (!c->opts.retain_recovery_info && c->recovery.pass_done >= BCH_RECOVERY_PASS_journal_replay) @@ -479,12 +471,7 @@ int bch2_journal_replay(struct bch_fs *c) if (keys->nr) bch2_journal_log_msg(c, "journal replay finished"); -err: - if (trans) - bch2_trans_put(trans); - darray_exit(&keys_sorted); - bch_err_fn(c, ret); - return ret; + return 0; } /* journal replay early: */ @@ -596,7 +583,7 @@ static int journal_replay_early(struct bch_fs *c, static int read_btree_roots(struct bch_fs *c) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { @@ -632,7 +619,6 @@ static int read_btree_roots(struct bch_fs *c) } } fsck_err: - printbuf_exit(&buf); return ret; } @@ -666,7 +652,7 @@ static bool check_version_upgrade(struct bch_fs *c) } if (new_version > old_version) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (old_version < bcachefs_metadata_required_upgrade_below) prt_str(&buf, "Version upgrade required:\n"); @@ -699,14 +685,12 @@ static bool check_version_upgrade(struct bch_fs *c) } bch_notice(c, "%s", buf.buf); - printbuf_exit(&buf); - ret = true; } if (new_version > c->sb.version_incompat_allowed && c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Now allowing incompatible features up to "); bch2_version_to_text(&buf, new_version); @@ -715,8 +699,6 @@ static bool check_version_upgrade(struct bch_fs *c) prt_newline(&buf); bch_notice(c, "%s", buf.buf); - printbuf_exit(&buf); - ret = true; } @@ -796,15 +778,14 @@ int bch2_fs_recovery(struct bch_fs *c) u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); if (sb_passes) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "superblock requires following recovery passes to be run:\n "); prt_bitflags(&buf, bch2_recovery_passes, sb_passes); bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); } if (bch2_check_version_downgrade(c)) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Version downgrade required:"); @@ -820,7 +801,6 @@ int bch2_fs_recovery(struct bch_fs *c) } bch_info(c, "%s", buf.buf); - printbuf_exit(&buf); write_sb = true; } @@ -937,11 +917,10 @@ use_clean: if (ret) goto err; - ret = bch2_fs_resize_on_mount(c); - if (ret) { - up_write(&c->state_lock); + scoped_guard(rwsem_write, &c->state_lock) + ret = bch2_fs_resize_on_mount(c); + if (ret) goto err; - } if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) { bch_info(c, "filesystem is an unresized image file, mounting ro"); @@ -1119,10 +1098,9 @@ use_clean: bch2_move_stats_init(&stats, "recovery"); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_version_to_text(&buf, c->sb.version_min); bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf); - printbuf_exit(&buf); ret = bch2_fs_read_write_early(c) ?: bch2_scan_old_btree_nodes(c, &stats); @@ -1150,14 +1128,13 @@ final_out: err: fsck_err: { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "error in recovery: %s\n", bch2_err_str(ret)); bch2_fs_emergency_read_only2(c, &buf); bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } goto final_out; } @@ -1167,40 +1144,54 @@ int bch2_fs_initialize(struct bch_fs *c) struct bch_inode_unpacked root_inode, lostfound_inode; struct bkey_inode_buf packed_inode; struct qstr lostfound = QSTR("lost+found"); - struct bch_member *m; int ret; bch_notice(c, "initializing new filesystem"); set_bit(BCH_FS_new_fs, &c->flags); - mutex_lock(&c->sb_lock); - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); - c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); + scoped_guard(mutex, &c->sb_lock) { + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done); + c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done); - bch2_check_version_downgrade(c); + bch2_check_version_downgrade(c); - if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { - bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); - SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); - bch2_write_super(c); - } - - for_each_member_device(c, ca) { - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false); - } + if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) { + bch2_sb_upgrade(c, bcachefs_metadata_version_current, false); + SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); + bch2_write_super(c); + } - bch2_sb_members_to_cpu(c); + for_each_member_device(c, ca) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false); + } - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + bch2_write_super(c); + } set_bit(BCH_FS_btree_running, &c->flags); - set_bit(BCH_FS_may_go_rw, &c->flags); for (unsigned i = 0; i < BTREE_ID_NR; i++) bch2_btree_root_alloc_fake(c, i, 0); + for_each_member_device(c, ca) { + ret = bch2_dev_usage_init(ca, false); + if (ret) { + bch2_dev_put(ca); + goto err; + } + } + + /* + * Write out the superblock and journal buckets, now that we can do + * btree updates + */ + bch_verbose(c, "marking superblocks"); + ret = bch2_trans_mark_dev_sbs(c); + bch_err_msg(c, ret, "marking superblocks"); + if (ret) + goto err; + ret = bch2_fs_journal_alloc(c); if (ret) goto err; @@ -1213,28 +1204,12 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; + set_bit(BCH_FS_may_go_rw, &c->flags); ret = bch2_fs_read_write_early(c); if (ret) goto err; - set_bit(BCH_FS_accounting_replay_done, &c->flags); - bch2_journal_set_replay_done(&c->journal); - - for_each_member_device(c, ca) { - ret = bch2_dev_usage_init(ca, false); - if (ret) { - bch2_dev_put(ca); - goto err; - } - } - - /* - * Write out the superblock and journal buckets, now that we can do - * btree updates - */ - bch_verbose(c, "marking superblocks"); - ret = bch2_trans_mark_dev_sbs(c); - bch_err_msg(c, ret, "marking superblocks"); + ret = bch2_journal_replay(c); if (ret) goto err; @@ -1292,12 +1267,11 @@ int bch2_fs_initialize(struct bch_fs *c) if (ret) goto err; - mutex_lock(&c->sb_lock); - SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); - SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); + SET_BCH_SB_CLEAN(c->disk_sb.sb, false); + bch2_write_super(c); + } c->recovery.curr_pass = BCH_RECOVERY_PASS_NR; return 0; diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c index 6a039e011064..f9d1c4921392 100644 --- a/fs/bcachefs/recovery_passes.c +++ b/fs/bcachefs/recovery_passes.c @@ -237,19 +237,21 @@ static int bch2_lookup_root_inode(struct bch_fs *c) subvol_inum inum = BCACHEFS_ROOT_SUBVOL_INUM; struct bch_inode_unpacked inode_u; struct bch_subvolume subvol; + CLASS(btree_trans, trans)(c); - return bch2_trans_do(c, + return lockrestart_do(trans, bch2_subvolume_get(trans, inum.subvol, true, &subvol) ?: bch2_inode_find_by_inum_trans(trans, inum, &inode_u)); } struct recovery_pass_fn { int (*fn)(struct bch_fs *); + const char *name; unsigned when; }; static struct recovery_pass_fn recovery_pass_fns[] = { -#define x(_fn, _id, _when) { .fn = bch2_##_fn, .when = _when }, +#define x(_fn, _id, _when) { .fn = bch2_##_fn, .name = #_fn, .when = _when }, BCH_RECOVERY_PASSES() #undef x }; @@ -346,13 +348,11 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, lockdep_assert_held(&c->sb_lock); bch2_printbuf_make_room(out, 1024); - out->atomic++; - - unsigned long lockflags; - spin_lock_irqsave(&r->lock, lockflags); + guard(printbuf_atomic)(out); + guard(spinlock_irq)(&r->lock); if (!recovery_pass_needs_set(c, pass, &flags)) - goto out; + return 0; bool in_recovery = test_bit(BCH_FS_in_recovery, &c->flags); bool rewind = in_recovery && @@ -369,8 +369,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, (!in_recovery || r->curr_pass >= BCH_RECOVERY_PASS_set_may_go_rw)) { prt_printf(out, "need recovery pass %s (%u), but already rw\n", bch2_recovery_passes[pass], pass); - ret = bch_err_throw(c, cannot_rewind_recovery); - goto out; + return bch_err_throw(c, cannot_rewind_recovery); } if (ratelimit) @@ -400,9 +399,7 @@ int __bch2_run_explicit_recovery_pass(struct bch_fs *c, if (p->when & PASS_ONLINE) bch2_run_async_recovery_passes(c); } -out: - spin_unlock_irqrestore(&r->lock, lockflags); - --out->atomic; + return ret; } @@ -458,16 +455,14 @@ int bch2_run_print_explicit_recovery_pass(struct bch_fs *c, enum bch_recovery_pa if (!recovery_pass_needs_set(c, pass, &flags)) return 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); int ret = __bch2_run_explicit_recovery_pass(c, &buf, pass, RUN_RECOVERY_PASS_nopersistent); - mutex_unlock(&c->sb_lock); bch2_print_str(c, KERN_NOTICE, buf.buf); - printbuf_exit(&buf); return ret; } @@ -486,6 +481,7 @@ static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass) r->passes_to_run &= ~BIT_ULL(pass); if (ret) { + bch_err(c, "%s(): error %s", p->name, bch2_err_str(ret)); r->passes_failing |= BIT_ULL(pass); return ret; } diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 8d8e045b6bd5..60abd89d7c9f 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -183,7 +183,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, u64 live_end = REFLINK_P_IDX(p.v) + p.k->size; u64 refd_start = live_start - le32_to_cpu(p.v->front_pad); u64 refd_end = live_end + le32_to_cpu(p.v->back_pad); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; BUG_ON(missing_start < refd_start); @@ -195,7 +195,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, prt_printf(&buf, "pointer to missing indirect extent in "); ret = bch2_inum_snap_offset_err_msg_trans(trans, &buf, missing_pos); if (ret) - goto err; + return ret; prt_printf(&buf, "-%llu\n", (missing_pos.offset + (missing_end - missing_start)) << 9); bch2_bkey_val_to_text(&buf, c, p.s_c); @@ -207,7 +207,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, struct bkey_i_reflink_p *new = bch2_bkey_make_mut_noupdate_typed(trans, p.s_c, reflink_p); ret = PTR_ERR_OR_ZERO(new); if (ret) - goto err; + return ret; /* * Is the missing range not actually needed? @@ -238,15 +238,13 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, ret = bch2_btree_insert_trans(trans, BTREE_ID_extents, &new->k_i, BTREE_TRIGGER_norun); if (ret) - goto err; + return ret; if (should_commit) ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?: bch_err_throw(c, transaction_restart_nested); } -err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -301,7 +299,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, enum btree_iter_update_trigger_flags flags) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); s64 offset_into_extent = *idx - REFLINK_P_IDX(p.v); struct btree_iter iter; @@ -360,7 +358,6 @@ next: err: fsck_err: bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); return ret; } @@ -374,7 +371,7 @@ static s64 gc_trigger_reflink_p_segment(struct btree_trans *trans, int add = !(flags & BTREE_TRIGGER_overwrite) ? 1 : -1; u64 next_idx = REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad); s64 ret = 0; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); if (r_idx >= c->reflink_gc_nr) goto not_found; @@ -394,12 +391,10 @@ not_found: if (flags & BTREE_TRIGGER_check_repair) { ret = bch2_indirect_extent_missing_error(trans, p, *idx, next_idx, false); if (ret) - goto err; + return ret; } *idx = next_idx; -err: - printbuf_exit(&buf); return ret; } @@ -498,20 +493,15 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, bool reflink_p_may_update_opts_field) { struct bch_fs *c = trans->c; - struct btree_iter reflink_iter = {}; - struct bkey_s_c k; - struct bkey_i *r_v; - struct bkey_i_reflink_p *r_p; - __le64 *refcount; - int ret; if (orig->k.type == KEY_TYPE_inline_data) bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data); + struct btree_iter reflink_iter; bch2_trans_iter_init(trans, &reflink_iter, BTREE_ID_reflink, POS_MAX, BTREE_ITER_intent); - k = bch2_btree_iter_peek_prev(trans, &reflink_iter); - ret = bkey_err(k); + struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, &reflink_iter); + int ret = bkey_err(k); if (ret) goto err; @@ -523,7 +513,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, if (bkey_ge(reflink_iter.pos, POS(0, REFLINK_P_IDX_MAX - orig->k.size))) return -ENOSPC; - r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); + struct bkey_i *r_v = bch2_trans_kmalloc(trans, sizeof(__le64) + bkey_bytes(&orig->k)); ret = PTR_ERR_OR_ZERO(r_v); if (ret) goto err; @@ -536,7 +526,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, set_bkey_val_bytes(&r_v->k, sizeof(__le64) + bkey_val_bytes(&orig->k)); - refcount = bkey_refcount(bkey_i_to_s(r_v)); + __le64 *refcount = bkey_refcount(bkey_i_to_s(r_v)); *refcount = 0; memcpy(refcount + 1, &orig->v, bkey_val_bytes(&orig->k)); @@ -549,7 +539,8 @@ static int bch2_make_extent_indirect(struct btree_trans *trans, * so we know it will be big enough: */ orig->k.type = KEY_TYPE_reflink_p; - r_p = bkey_i_to_reflink_p(orig); + + struct bkey_i_reflink_p *r_p = bkey_i_to_reflink_p(orig); set_bkey_val_bytes(&r_p->k, sizeof(r_p->v)); /* FORTIFY_SOURCE is broken here, and doesn't provide unsafe_memset() */ @@ -598,7 +589,6 @@ s64 bch2_remap_range(struct bch_fs *c, u64 new_i_size, s64 *i_sectors_delta, bool may_change_src_io_path_opts) { - struct btree_trans *trans; struct btree_iter dst_iter, src_iter; struct bkey_s_c src_k; struct bkey_buf new_dst, new_src; @@ -623,7 +613,7 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_bkey_buf_init(&new_dst); bch2_bkey_buf_init(&new_src); - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); ret = bch2_inum_opts_get(trans, src_inum, &opts); if (ret) @@ -761,7 +751,6 @@ s64 bch2_remap_range(struct bch_fs *c, bch2_trans_iter_exit(trans, &inode_iter); } while (bch2_err_matches(ret2, BCH_ERR_transaction_restart)); err: - bch2_trans_put(trans); bch2_bkey_buf_exit(&new_src, c); bch2_bkey_buf_exit(&new_dst, c); @@ -779,7 +768,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; const __le64 *refcount = bkey_refcount_c(k); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct reflink_gc *r; int ret = 0; @@ -807,7 +796,7 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); ret = PTR_ERR_OR_ZERO(new); if (ret) - goto out; + return ret; if (!r->refcount) new->k.type = KEY_TYPE_deleted; @@ -815,32 +804,30 @@ static int bch2_gc_write_reflink_key(struct btree_trans *trans, *bkey_refcount(bkey_i_to_s(new)) = cpu_to_le64(r->refcount); ret = bch2_trans_update(trans, iter, new, 0); } -out: fsck_err: - printbuf_exit(&buf); return ret; } int bch2_gc_reflink_done(struct bch_fs *c) { + CLASS(btree_trans, trans)(c); size_t idx = 0; - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - bch2_gc_write_reflink_key(trans, &iter, k, &idx))); + bch2_gc_write_reflink_key(trans, &iter, k, &idx)); c->reflink_gc_nr = 0; return ret; } int bch2_gc_reflink_start(struct bch_fs *c) { + CLASS(btree_trans, trans)(c); c->reflink_gc_nr = 0; - int ret = bch2_trans_run(c, - for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, + int ret = for_each_btree_key(trans, iter, BTREE_ID_reflink, POS_MIN, BTREE_ITER_prefetch, k, ({ const __le64 *refcount = bkey_refcount_c(k); @@ -858,7 +845,7 @@ int bch2_gc_reflink_start(struct bch_fs *c) r->size = k.k->size; r->refcount = 0; 0; - }))); + })); bch_err_fn(c, ret); return ret; diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 8383bd7fdb3f..0784283ce78c 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -286,11 +286,8 @@ bool bch2_replicas_marked_locked(struct bch_fs *c, bool bch2_replicas_marked(struct bch_fs *c, struct bch_replicas_entry_v1 *search) { - percpu_down_read(&c->mark_lock); - bool ret = bch2_replicas_marked_locked(c, search); - percpu_up_read(&c->mark_lock); - - return ret; + guard(percpu_read)(&c->mark_lock); + return bch2_replicas_marked_locked(c, search); } noinline @@ -305,14 +302,14 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, memset(&new_r, 0, sizeof(new_r)); memset(&new_gc, 0, sizeof(new_gc)); - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); if (c->replicas_gc.entries && !__replicas_has_entry(&c->replicas_gc, new_entry)) { new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); if (!new_gc.entries) { ret = bch_err_throw(c, ENOMEM_cpu_replicas); - goto err; + goto out; } } @@ -320,12 +317,12 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); if (!new_r.entries) { ret = bch_err_throw(c, ENOMEM_cpu_replicas); - goto err; + goto out; } ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); if (ret) - goto err; + goto out; } if (!new_r.entries && @@ -338,22 +335,18 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, bch2_write_super(c); /* don't update in memory replicas until changes are persistent */ - percpu_down_write(&c->mark_lock); - if (new_r.entries) - swap(c->replicas, new_r); - if (new_gc.entries) - swap(new_gc, c->replicas_gc); - percpu_up_write(&c->mark_lock); + scoped_guard(percpu_write, &c->mark_lock) { + if (new_r.entries) + swap(c->replicas, new_r); + if (new_gc.entries) + swap(new_gc, c->replicas_gc); + } out: - mutex_unlock(&c->sb_lock); - kfree(new_r.entries); kfree(new_gc.entries); - return ret; -err: bch_err_msg(c, ret, "adding replicas entry"); - goto out; + return ret; } int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) @@ -371,24 +364,20 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret) { lockdep_assert_held(&c->replicas_gc_lock); - mutex_lock(&c->sb_lock); - percpu_down_write(&c->mark_lock); - - ret = ret ?: - bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); - if (!ret) - swap(c->replicas, c->replicas_gc); - - kfree(c->replicas_gc.entries); - c->replicas_gc.entries = NULL; + guard(mutex)(&c->sb_lock); + scoped_guard(percpu_write, &c->mark_lock) { + ret = ret ?: + bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); + if (!ret) + swap(c->replicas, c->replicas_gc); - percpu_up_write(&c->mark_lock); + kfree(c->replicas_gc.entries); + c->replicas_gc.entries = NULL; + } if (!ret) bch2_write_super(c); - mutex_unlock(&c->sb_lock); - return ret; } @@ -399,7 +388,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) lockdep_assert_held(&c->replicas_gc_lock); - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); BUG_ON(c->replicas_gc.entries); c->replicas_gc.nr = 0; @@ -420,7 +409,6 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) c->replicas_gc.entry_size, GFP_KERNEL); if (!c->replicas_gc.entries) { - mutex_unlock(&c->sb_lock); bch_err(c, "error allocating c->replicas_gc"); return bch_err_throw(c, ENOMEM_replicas_gc); } @@ -432,8 +420,6 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) e, c->replicas_gc.entry_size); bch2_cpu_replicas_sort(&c->replicas_gc); - mutex_unlock(&c->sb_lock); - return 0; } @@ -461,55 +447,48 @@ retry: return bch_err_throw(c, ENOMEM_replicas_gc); } - mutex_lock(&c->sb_lock); - percpu_down_write(&c->mark_lock); - - if (nr != c->replicas.nr || - new.entry_size != c->replicas.entry_size) { - percpu_up_write(&c->mark_lock); - mutex_unlock(&c->sb_lock); - kfree(new.entries); - goto retry; - } - - for (unsigned i = 0; i < c->replicas.nr; i++) { - struct bch_replicas_entry_v1 *e = - cpu_replicas_entry(&c->replicas, i); + guard(mutex)(&c->sb_lock); + scoped_guard(percpu_write, &c->mark_lock) { + if (nr != c->replicas.nr || + new.entry_size != c->replicas.entry_size) { + kfree(new.entries); + goto retry; + } - struct disk_accounting_pos k = { - .type = BCH_DISK_ACCOUNTING_replicas, - }; + for (unsigned i = 0; i < c->replicas.nr; i++) { + struct bch_replicas_entry_v1 *e = + cpu_replicas_entry(&c->replicas, i); - unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), - "embedded variable length struct"); + struct disk_accounting_pos k = { + .type = BCH_DISK_ACCOUNTING_replicas, + }; - struct bpos p = disk_accounting_pos_to_bpos(&k); + unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), + "embedded variable length struct"); - struct bch_accounting_mem *acc = &c->accounting; - bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &p) >= acc->k.nr; + struct bpos p = disk_accounting_pos_to_bpos(&k); - if (e->data_type == BCH_DATA_journal || !kill) - memcpy(cpu_replicas_entry(&new, new.nr++), - e, new.entry_size); - } + struct bch_accounting_mem *acc = &c->accounting; + bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &p) >= acc->k.nr; - bch2_cpu_replicas_sort(&new); + if (e->data_type == BCH_DATA_journal || !kill) + memcpy(cpu_replicas_entry(&new, new.nr++), + e, new.entry_size); + } - ret = bch2_cpu_replicas_to_sb_replicas(c, &new); + bch2_cpu_replicas_sort(&new); - if (!ret) - swap(c->replicas, new); + ret = bch2_cpu_replicas_to_sb_replicas(c, &new); - kfree(new.entries); + if (!ret) + swap(c->replicas, new); - percpu_up_write(&c->mark_lock); + kfree(new.entries); + } if (!ret) bch2_write_super(c); - - mutex_unlock(&c->sb_lock); - return ret; } @@ -597,9 +576,8 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) bch2_cpu_replicas_sort(&new_r); - percpu_down_write(&c->mark_lock); + guard(percpu_write)(&c->mark_lock); swap(c->replicas, new_r); - percpu_up_write(&c->mark_lock); kfree(new_r.entries); @@ -809,9 +787,8 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, unsigned flags, bool print) { struct bch_replicas_entry_v1 *e; - bool ret = true; - percpu_down_read(&c->mark_lock); + guard(percpu_read)(&c->mark_lock); for_each_cpu_replicas_entry(&c->replicas, e) { unsigned nr_online = 0, nr_failed = 0, dflags = 0; bool metadata = e->data_type < BCH_DATA_user; @@ -847,21 +824,18 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, if (dflags & ~flags) { if (print) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_replicas_entry_to_text(&buf, e); bch_err(c, "insufficient devices online (%u) for replicas entry %s", nr_online, buf.buf); - printbuf_exit(&buf); } - ret = false; - break; + return false; } } - percpu_up_read(&c->mark_lock); - return ret; + return true; } unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) @@ -904,11 +878,8 @@ unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) { - mutex_lock(&c->sb_lock); - unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); - mutex_unlock(&c->sb_lock); - - return ret; + guard(mutex)(&c->sb_lock); + return bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); } void bch2_fs_replicas_exit(struct bch_fs *c) diff --git a/fs/bcachefs/sb-clean.c b/fs/bcachefs/sb-clean.c index 59c8770e4a0e..a5916984565e 100644 --- a/fs/bcachefs/sb-clean.c +++ b/fs/bcachefs/sb-clean.c @@ -89,8 +89,8 @@ int bch2_verify_superblock_clean(struct bch_fs *c, { unsigned i; struct bch_sb_field_clean *clean = *cleanp; - struct printbuf buf1 = PRINTBUF; - struct printbuf buf2 = PRINTBUF; + CLASS(printbuf, buf1)(); + CLASS(printbuf, buf2)(); int ret = 0; if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c, @@ -140,8 +140,6 @@ int bch2_verify_superblock_clean(struct bch_fs *c, l2, buf2.buf); } fsck_err: - printbuf_exit(&buf2); - printbuf_exit(&buf1); return ret; } @@ -150,7 +148,7 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) struct bch_sb_field_clean *clean, *sb_clean; int ret; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); sb_clean = bch2_sb_field_get(c->disk_sb.sb, clean); if (fsck_err_on(!sb_clean, c, @@ -158,29 +156,22 @@ struct bch_sb_field_clean *bch2_read_superblock_clean(struct bch_fs *c) "superblock marked clean but clean section not present")) { SET_BCH_SB_CLEAN(c->disk_sb.sb, false); c->sb.clean = false; - mutex_unlock(&c->sb_lock); return ERR_PTR(-BCH_ERR_invalid_sb_clean); } clean = kmemdup(sb_clean, vstruct_bytes(&sb_clean->field), GFP_KERNEL); - if (!clean) { - mutex_unlock(&c->sb_lock); + if (!clean) return ERR_PTR(-BCH_ERR_ENOMEM_read_superblock_clean); - } ret = bch2_sb_clean_validate_late(c, clean, READ); if (ret) { kfree(clean); - mutex_unlock(&c->sb_lock); return ERR_PTR(ret); } - mutex_unlock(&c->sb_lock); - return clean; fsck_err: - mutex_unlock(&c->sb_lock); return ERR_PTR(ret); } @@ -265,21 +256,16 @@ const struct bch_sb_field_ops bch_sb_field_ops_clean = { int bch2_fs_mark_dirty(struct bch_fs *c) { - int ret; - /* * Unconditionally write superblock, to verify it hasn't changed before * we go rw: */ - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALWAYS); - ret = bch2_write_super(c); - mutex_unlock(&c->sb_lock); - - return ret; + return bch2_write_super(c); } void bch2_fs_mark_clean(struct bch_fs *c) @@ -289,9 +275,9 @@ void bch2_fs_mark_clean(struct bch_fs *c) unsigned u64s; int ret; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); if (BCH_SB_CLEAN(c->disk_sb.sb)) - goto out; + return; SET_BCH_SB_CLEAN(c->disk_sb.sb, true); @@ -305,7 +291,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) sb_clean = bch2_sb_field_resize(&c->disk_sb, clean, u64s); if (!sb_clean) { bch_err(c, "error resizing superblock while setting filesystem clean"); - goto out; + return; } sb_clean->flags = 0; @@ -329,12 +315,10 @@ void bch2_fs_mark_clean(struct bch_fs *c) ret = bch2_sb_clean_validate_late(c, sb_clean, WRITE); if (ret) { bch_err(c, "error writing marking filesystem clean: validate error"); - goto out; + return; } bch2_journal_pos_from_member_info_set(c); bch2_write_super(c); -out: - mutex_unlock(&c->sb_lock); } diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h index a59b2a10659e..2e3a56bfd085 100644 --- a/fs/bcachefs/sb-counters_format.h +++ b/fs/bcachefs/sb-counters_format.h @@ -13,6 +13,11 @@ enum counters_flags { x(io_read_hole, 81, TYPE_SECTORS) \ x(io_read_promote, 30, TYPE_COUNTER) \ x(io_read_nopromote, 85, TYPE_COUNTER) \ + x(io_read_nopromote_may_not, 86, TYPE_COUNTER) \ + x(io_read_nopromote_already_promoted, 87, TYPE_COUNTER) \ + x(io_read_nopromote_unwritten, 88, TYPE_COUNTER) \ + x(io_read_nopromote_congested, 89, TYPE_COUNTER) \ + x(io_read_nopromote_in_flight, 90, TYPE_COUNTER) \ x(io_read_bounce, 31, TYPE_COUNTER) \ x(io_read_split, 33, TYPE_COUNTER) \ x(io_read_reuse_race, 34, TYPE_COUNTER) \ diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 1506d05e0665..de56a1ee79db 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -191,7 +191,7 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c) bool write_sb = false; int ret = 0; - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); if (old_version < bcachefs_metadata_version_bucket_stripe_sectors && @@ -205,7 +205,6 @@ int bch2_sb_set_upgrade_extra(struct bch_fs *c) if (write_sb) bch2_write_super(c); - mutex_unlock(&c->sb_lock); return ret < 0 ? ret : 0; } @@ -372,7 +371,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) if (!test_bit(BCH_FS_btree_running, &c->flags)) return 0; - darray_char table = {}; + CLASS(darray_char, table)(); int ret = 0; for (const struct upgrade_downgrade_entry *src = downgrade_table; @@ -389,7 +388,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) ret = darray_make_room(&table, bytes); if (ret) - goto out; + return ret; dst = (void *) &darray_top(table); dst->version = cpu_to_le16(src->version); @@ -401,7 +400,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c) ret = downgrade_table_extra(c, &table); if (ret) - goto out; + return ret; if (!dst->recovery_passes[0] && !dst->recovery_passes[1] && @@ -416,18 +415,14 @@ int bch2_sb_downgrade_update(struct bch_fs *c) unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); if (d && le32_to_cpu(d->field.u64s) > sb_u64s) - goto out; + return 0; d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); - if (!d) { - ret = bch_err_throw(c, ENOSPC_sb_downgrade); - goto out; - } + if (!d) + return bch_err_throw(c, ENOSPC_sb_downgrade); memcpy(d->entries, table.data, table.nr); memset_u64s_tail(d->entries, 0, table.nr); -out: - darray_exit(&table); return ret; } diff --git a/fs/bcachefs/sb-errors.c b/fs/bcachefs/sb-errors.c index 48853efdc105..41a259eab4fb 100644 --- a/fs/bcachefs/sb-errors.c +++ b/fs/bcachefs/sb-errors.c @@ -110,75 +110,66 @@ void bch2_sb_error_count(struct bch_fs *c, enum bch_sb_error_id err) }; unsigned i; - mutex_lock(&c->fsck_error_counts_lock); + guard(mutex)(&c->fsck_error_counts_lock); + for (i = 0; i < e->nr; i++) { if (err == e->data[i].id) { e->data[i].nr++; e->data[i].last_error_time = n.last_error_time; - goto out; + return; } if (err < e->data[i].id) break; } if (darray_make_room(e, 1)) - goto out; + return; darray_insert_item(e, i, n); -out: - mutex_unlock(&c->fsck_error_counts_lock); } void bch2_sb_errors_from_cpu(struct bch_fs *c) { - bch_sb_errors_cpu *src = &c->fsck_error_counts; - struct bch_sb_field_errors *dst; - unsigned i; - - mutex_lock(&c->fsck_error_counts_lock); - - dst = bch2_sb_field_resize(&c->disk_sb, errors, - bch2_sb_field_errors_u64s(src->nr)); + guard(mutex)(&c->fsck_error_counts_lock); + bch_sb_errors_cpu *src = &c->fsck_error_counts; + struct bch_sb_field_errors *dst = + bch2_sb_field_resize(&c->disk_sb, errors, + bch2_sb_field_errors_u64s(src->nr)); if (!dst) - goto err; + return; - for (i = 0; i < src->nr; i++) { + for (unsigned i = 0; i < src->nr; i++) { SET_BCH_SB_ERROR_ENTRY_ID(&dst->entries[i], src->data[i].id); SET_BCH_SB_ERROR_ENTRY_NR(&dst->entries[i], src->data[i].nr); dst->entries[i].last_error_time = cpu_to_le64(src->data[i].last_error_time); } - -err: - mutex_unlock(&c->fsck_error_counts_lock); } static int bch2_sb_errors_to_cpu(struct bch_fs *c) { + guard(mutex)(&c->fsck_error_counts_lock); + struct bch_sb_field_errors *src = bch2_sb_field_get(c->disk_sb.sb, errors); bch_sb_errors_cpu *dst = &c->fsck_error_counts; - unsigned i, nr = bch2_sb_field_errors_nr_entries(src); - int ret; + unsigned nr = bch2_sb_field_errors_nr_entries(src); if (!nr) return 0; - mutex_lock(&c->fsck_error_counts_lock); - ret = darray_make_room(dst, nr); + int ret = darray_make_room(dst, nr); if (ret) - goto err; + return ret; dst->nr = nr; - for (i = 0; i < nr; i++) { + for (unsigned i = 0; i < nr; i++) { dst->data[i].id = BCH_SB_ERROR_ENTRY_ID(&src->entries[i]); dst->data[i].nr = BCH_SB_ERROR_ENTRY_NR(&src->entries[i]); dst->data[i].last_error_time = le64_to_cpu(src->entries[i].last_error_time); } -err: - mutex_unlock(&c->fsck_error_counts_lock); - return ret; + return 0; } void bch2_fs_sb_errors_exit(struct bch_fs *c) diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c index 340d4fb7f9b6..0573c7b00151 100644 --- a/fs/bcachefs/sb-members.c +++ b/fs/bcachefs/sb-members.c @@ -12,7 +12,7 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); bool removed = test_bit(dev, c->devs_removed.d); @@ -31,7 +31,6 @@ int bch2_dev_missing_bkey(struct bch_fs *c, struct bkey_s_c k, unsigned dev) if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return ret; } @@ -442,9 +441,8 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) struct bch_fs *c = ca->fs; struct bch_member m; - mutex_lock(&ca->fs->sb_lock); - m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); - mutex_unlock(&ca->fs->sb_lock); + scoped_guard(mutex, &ca->fs->sb_lock) + m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); printbuf_tabstop_push(out, 12); @@ -471,16 +469,15 @@ void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) void bch2_dev_errors_reset(struct bch_dev *ca) { struct bch_fs *c = ca->fs; - struct bch_member *m; - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + guard(mutex)(&c->sb_lock); + + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); bch2_write_super(c); - mutex_unlock(&c->sb_lock); } /* @@ -612,7 +609,7 @@ have_slot: void bch2_sb_members_clean_deleted(struct bch_fs *c) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bool write_sb = false; for (unsigned i = 0; i < c->sb.nr_devices; i++) { @@ -626,5 +623,4 @@ void bch2_sb_members_clean_deleted(struct bch_fs *c) if (write_sb) bch2_write_super(c); - mutex_unlock(&c->sb_lock); } diff --git a/fs/bcachefs/sb-members.h b/fs/bcachefs/sb-members.h index 0d363a1cdd47..35d4ab9b6197 100644 --- a/fs/bcachefs/sb-members.h +++ b/fs/bcachefs/sb-members.h @@ -133,7 +133,7 @@ static inline void __bch2_dev_put(struct bch_dev *ca) static inline void bch2_dev_put(struct bch_dev *ca) { - if (ca) + if (!IS_ERR_OR_NULL(ca)) __bch2_dev_put(ca); } diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 538c324f4765..08083d6ca8bc 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -152,16 +152,16 @@ static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type, * here. */ if (type == SIX_LOCK_read && lock->readers) { - preempt_disable(); - this_cpu_inc(*lock->readers); /* signal that we own lock */ + scoped_guard(preempt) { + this_cpu_inc(*lock->readers); /* signal that we own lock */ - smp_mb(); + smp_mb(); - old = atomic_read(&lock->state); - ret = !(old & l[type].lock_fail); + old = atomic_read(&lock->state); + ret = !(old & l[type].lock_fail); - this_cpu_sub(*lock->readers, !ret); - preempt_enable(); + this_cpu_sub(*lock->readers, !ret); + } if (!ret) { smp_mb(); @@ -360,7 +360,7 @@ static inline bool six_optimistic_spin(struct six_lock *lock, if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN) return false; - preempt_disable(); + guard(preempt)(); end_time = sched_clock() + 10 * NSEC_PER_USEC; while (!need_resched() && six_owner_running(lock)) { @@ -369,10 +369,8 @@ static inline bool six_optimistic_spin(struct six_lock *lock, * wait->lock_acquired: pairs with the smp_store_release in * __six_lock_wakeup */ - if (smp_load_acquire(&wait->lock_acquired)) { - preempt_enable(); + if (smp_load_acquire(&wait->lock_acquired)) return true; - } if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) { six_set_bitmask(lock, SIX_LOCK_NOSPIN); @@ -388,7 +386,6 @@ static inline bool six_optimistic_spin(struct six_lock *lock, cpu_relax(); } - preempt_enable(); return false; } diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 4c43d2a2c1f5..7a801513b134 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -284,12 +284,10 @@ fsck_err: static int bch2_snapshot_table_make_room(struct bch_fs *c, u32 id) { - mutex_lock(&c->snapshot_table_lock); - int ret = snapshot_t_mut(c, id) + guard(mutex)(&c->snapshot_table_lock); + return snapshot_t_mut(c, id) ? 0 : bch_err_throw(c, ENOMEM_mark_snapshot); - mutex_unlock(&c->snapshot_table_lock); - return ret; } static int __bch2_mark_snapshot(struct btree_trans *trans, @@ -300,15 +298,12 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, struct bch_fs *c = trans->c; struct snapshot_t *t; u32 id = new.k->p.offset; - int ret = 0; - mutex_lock(&c->snapshot_table_lock); + guard(mutex)(&c->snapshot_table_lock); t = snapshot_t_mut(c, id); - if (!t) { - ret = bch_err_throw(c, ENOMEM_mark_snapshot); - goto err; - } + if (!t) + return bch_err_throw(c, ENOMEM_mark_snapshot); if (new.k->type == KEY_TYPE_snapshot) { struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(new); @@ -348,9 +343,8 @@ static int __bch2_mark_snapshot(struct btree_trans *trans, } else { memset(t, 0, sizeof(*t)); } -err: - mutex_unlock(&c->snapshot_table_lock); - return ret; + + return 0; } int bch2_mark_snapshot(struct btree_trans *trans, @@ -481,7 +475,7 @@ static int check_snapshot_tree(struct btree_trans *trans, struct bkey_s_c_snapshot_tree st; struct bch_snapshot s; struct bch_subvolume subvol; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct btree_iter snapshot_iter = {}; u32 root_id; int ret; @@ -567,7 +561,6 @@ out: err: fsck_err: bch2_trans_iter_exit(trans, &snapshot_iter); - printbuf_exit(&buf); return ret; } @@ -580,14 +573,12 @@ fsck_err: */ int bch2_check_snapshot_trees(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_snapshot_tree(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_snapshot_tree(trans, &iter, k)); } /* @@ -706,7 +697,7 @@ static int check_snapshot(struct btree_trans *trans, struct bkey_i_snapshot *u; u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset); u32 real_depth; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); u32 i, id; int ret = 0; @@ -839,7 +830,6 @@ static int check_snapshot(struct btree_trans *trans, ret = 0; err: fsck_err: - printbuf_exit(&buf); return ret; } @@ -849,14 +839,12 @@ int bch2_check_snapshots(struct bch_fs *c) * We iterate backwards as checking/fixing the depth field requires that * the parent's depth already be correct: */ - int ret = bch2_trans_run(c, - for_each_btree_key_reverse_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_snapshots, POS_MAX, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_snapshot(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_snapshot(trans, &iter, k)); } static int check_snapshot_exists(struct btree_trans *trans, u32 id) @@ -980,8 +968,8 @@ static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct int bch2_reconstruct_snapshots(struct bch_fs *c) { - struct btree_trans *trans = bch2_trans_get(c); - struct printbuf buf = PRINTBUF; + CLASS(btree_trans, trans)(c); + CLASS(printbuf, buf)(); struct snapshot_tree_reconstruct r = {}; int ret = 0; @@ -1023,10 +1011,7 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) } fsck_err: err: - bch2_trans_put(trans); snapshot_tree_reconstruct_exit(&r); - printbuf_exit(&buf); - bch_err_fn(c, ret); return ret; } @@ -1035,7 +1020,7 @@ int __bch2_check_key_has_snapshot(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret = 0; enum snapshot_id_state state = bch2_snapshot_id_state(c, k.k->p.snapshot); @@ -1083,7 +1068,6 @@ int __bch2_check_key_has_snapshot(struct btree_trans *trans, } } fsck_err: - printbuf_exit(&buf); return ret; } @@ -1693,7 +1677,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s if (BCH_SNAPSHOT_DELETED(s.v)) return 0; - mutex_lock(&d->progress_lock); + guard(mutex)(&d->progress_lock); for (unsigned i = 0; i < 2; i++) { u32 child = le32_to_cpu(s.v->children[i]); @@ -1720,7 +1704,6 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s darray_push(&d->delete_interior, n); } } - mutex_unlock(&d->progress_lock); return ret; } @@ -1825,10 +1808,12 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c) if (!mutex_trylock(&d->lock)) return 0; - if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) - goto out_unlock; + if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) { + mutex_unlock(&d->lock); + return 0; + } - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); /* * For every snapshot node: If we have no live children and it's not @@ -1848,11 +1833,10 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c) goto err; { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_snapshot_delete_nodes_to_text(&buf, d); ret = commit_do(trans, NULL, NULL, 0, bch2_trans_log_msg(trans, &buf)); - printbuf_exit(&buf); if (ret) goto err; } @@ -1895,19 +1879,16 @@ int __bch2_delete_dead_snapshots(struct bch_fs *c) goto err; } err: - mutex_lock(&d->progress_lock); - darray_exit(&d->deleting_from_trees); - darray_exit(&d->delete_interior); - darray_exit(&d->delete_leaves); - d->running = false; - mutex_unlock(&d->progress_lock); - bch2_trans_put(trans); + scoped_guard(mutex, &d->progress_lock) { + darray_exit(&d->deleting_from_trees); + darray_exit(&d->delete_interior); + darray_exit(&d->delete_leaves); + d->running = false; + } bch2_recovery_pass_set_no_ratelimit(c, BCH_RECOVERY_PASS_check_snapshots); -out_unlock: + mutex_unlock(&d->lock); - if (!bch2_err_matches(ret, EROFS)) - bch_err_fn(c, ret); return ret; } @@ -1952,11 +1933,10 @@ void bch2_snapshot_delete_status_to_text(struct printbuf *out, struct bch_fs *c) return; } - mutex_lock(&d->progress_lock); - bch2_snapshot_delete_nodes_to_text(out, d); - - bch2_bbpos_to_text(out, d->pos); - mutex_unlock(&d->progress_lock); + scoped_guard(mutex, &d->progress_lock) { + bch2_snapshot_delete_nodes_to_text(out, d); + bch2_bbpos_to_text(out, d->pos); + } } int __bch2_key_has_snapshot_overwrites(struct btree_trans *trans, @@ -2010,11 +1990,11 @@ int bch2_snapshots_read(struct bch_fs *c) * Initializing the is_ancestor bitmaps requires ancestors to already be * initialized - so mark in reverse: */ - int ret = bch2_trans_run(c, - for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_snapshots, POS_MAX, 0, k, __bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0, bkey_s_c_null, k, 0) ?: - bch2_check_snapshot_needs_deletion(trans, k))); + bch2_check_snapshot_needs_deletion(trans, k)); bch_err_fn(c, ret); /* diff --git a/fs/bcachefs/snapshot_types.h b/fs/bcachefs/snapshot_types.h index 0ab698f13e5c..a826c9c83c11 100644 --- a/fs/bcachefs/snapshot_types.h +++ b/fs/bcachefs/snapshot_types.h @@ -6,7 +6,7 @@ #include "darray.h" #include "subvolume_types.h" -typedef DARRAY(u32) snapshot_id_list; +DEFINE_DARRAY_NAMED(snapshot_id_list, u32); #define IS_ANCESTOR_BITMAP 128 diff --git a/fs/bcachefs/str_hash.c b/fs/bcachefs/str_hash.c index d39fd4261e1b..dfe4b6ae0733 100644 --- a/fs/bcachefs/str_hash.c +++ b/fs/bcachefs/str_hash.c @@ -125,7 +125,7 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool need_commit = false; int ret = 0; @@ -183,7 +183,7 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans, goto err; if (!need_commit) { - struct printbuf buf = PRINTBUF; + printbuf_reset(&buf); bch2_log_msg_start(c, &buf); prt_printf(&buf, "inode %llu hash info mismatch with root, but mismatch not found\n", @@ -198,7 +198,6 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans, prt_printf(&buf, " %llx %llx", hash_info->siphash_key.k0, hash_info->siphash_key.k1); #endif bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, fsck_repair_unimplemented); goto err; } @@ -207,7 +206,6 @@ int bch2_repair_inode_hash_info(struct btree_trans *trans, bch_err_throw(c, transaction_restart_nested); err: fsck_err: - printbuf_exit(&buf); bch2_trans_iter_exit(trans, &iter); return ret; } @@ -244,7 +242,7 @@ int bch2_str_hash_repair_key(struct btree_trans *trans, bool *updated_before_k_pos) { struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bool free_snapshots_seen = false; int ret = 0; @@ -346,7 +344,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter iter = {}; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); struct bkey_s_c k; int ret = 0; @@ -375,9 +373,7 @@ int __bch2_str_hash_check_key(struct btree_trans *trans, goto bad_hash; } bch2_trans_iter_exit(trans, &iter); -out: fsck_err: - printbuf_exit(&buf); return ret; bad_hash: bch2_trans_iter_exit(trans, &iter); @@ -386,7 +382,7 @@ bad_hash: */ ret = check_inode_hash_info_matches_root(trans, hash_k.k->p.inode, hash_info); if (ret) - goto out; + return ret; if (fsck_err(trans, hash_table_key_wrong_offset, "hash table key at wrong offset: should be at %llu\n%s", @@ -396,5 +392,5 @@ bad_hash: k_iter, hash_k, &iter, bkey_s_c_null, updated_before_k_pos); - goto out; + return ret; } diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 353df662a9b5..2d2d6b22df88 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -17,7 +17,7 @@ static int bch2_subvolume_delete(struct btree_trans *, u32); static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); prt_printf(&buf, "missing subvolume %u", subvolid); @@ -27,7 +27,6 @@ static int bch2_subvolume_missing(struct bch_fs *c, u32 subvolid) BCH_RECOVERY_PASS_check_inodes, 0); if (print) bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); return ret; } @@ -47,18 +46,18 @@ static int check_subvol(struct btree_trans *trans, struct bkey_s_c k) { struct bch_fs *c = trans->c; - struct bkey_s_c_subvolume subvol; struct btree_iter subvol_children_iter = {}; + struct bch_subvolume subvol; struct bch_snapshot snapshot; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); unsigned snapid; int ret = 0; if (k.k->type != KEY_TYPE_subvolume) return 0; - subvol = bkey_s_c_to_subvolume(k); - snapid = le32_to_cpu(subvol.v->snapshot); + bkey_val_copy(&subvol, bkey_s_c_to_subvolume(k)); + snapid = le32_to_cpu(subvol.snapshot); ret = bch2_snapshot_lookup(trans, snapid, &snapshot); if (bch2_err_matches(ret, ENOENT)) @@ -67,19 +66,19 @@ static int check_subvol(struct btree_trans *trans, if (ret) return ret; - if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { + if (BCH_SUBVOLUME_UNLINKED(&subvol)) { ret = bch2_subvolume_delete(trans, iter->pos.offset); bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); return ret ?: bch_err_throw(c, transaction_restart_nested); } - if (fsck_err_on(subvol.k->p.offset == BCACHEFS_ROOT_SUBVOL && - subvol.v->fs_path_parent, + if (fsck_err_on(k.k->p.offset == BCACHEFS_ROOT_SUBVOL && + subvol.fs_path_parent, trans, subvol_root_fs_path_parent_nonzero, "root subvolume has nonzero fs_path_parent\n%s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { struct bkey_i_subvolume *n = - bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); + bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); ret = PTR_ERR_OR_ZERO(n); if (ret) goto err; @@ -87,7 +86,7 @@ static int check_subvol(struct btree_trans *trans, n->v.fs_path_parent = 0; } - if (subvol.v->fs_path_parent) { + if (subvol.fs_path_parent) { struct bpos pos = subvolume_children_pos(k); struct bkey_s_c subvol_children_k = @@ -111,16 +110,16 @@ static int check_subvol(struct btree_trans *trans, struct bch_inode_unpacked inode; ret = bch2_inode_find_by_inum_nowarn_trans(trans, - (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.v->inode) }, + (subvol_inum) { k.k->p.offset, le64_to_cpu(subvol.inode) }, &inode); if (!ret) { - if (fsck_err_on(inode.bi_subvol != subvol.k->p.offset, + if (fsck_err_on(inode.bi_subvol != k.k->p.offset, trans, subvol_root_wrong_bi_subvol, "subvol root %llu:%u has wrong bi_subvol field: got %u, should be %llu", inode.bi_inum, inode.bi_snapshot, - inode.bi_subvol, subvol.k->p.offset)) { - inode.bi_subvol = subvol.k->p.offset; - inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + inode.bi_subvol, k.k->p.offset)) { + inode.bi_subvol = k.k->p.offset; + inode.bi_snapshot = le32_to_cpu(subvol.snapshot); ret = __bch2_fsck_write_inode(trans, &inode); if (ret) goto err; @@ -128,8 +127,8 @@ static int check_subvol(struct btree_trans *trans, } else if (bch2_err_matches(ret, ENOENT)) { if (fsck_err(trans, subvol_to_missing_root, "subvolume %llu points to missing subvolume root %llu:%u", - k.k->p.offset, le64_to_cpu(subvol.v->inode), - le32_to_cpu(subvol.v->snapshot))) { + k.k->p.offset, le64_to_cpu(subvol.inode), + le32_to_cpu(subvol.snapshot))) { /* * Recreate - any contents that are still disconnected * will then get reattached under lost+found @@ -137,10 +136,10 @@ static int check_subvol(struct btree_trans *trans, bch2_inode_init_early(c, &inode); bch2_inode_init_late(c, &inode, bch2_current_time(c), 0, 0, S_IFDIR|0700, 0, NULL); - inode.bi_inum = le64_to_cpu(subvol.v->inode); - inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + inode.bi_inum = le64_to_cpu(subvol.inode); + inode.bi_snapshot = le32_to_cpu(subvol.snapshot); inode.bi_subvol = k.k->p.offset; - inode.bi_parent_subvol = le32_to_cpu(subvol.v->fs_path_parent); + inode.bi_parent_subvol = le32_to_cpu(subvol.fs_path_parent); ret = __bch2_fsck_write_inode(trans, &inode); if (ret) goto err; @@ -149,8 +148,8 @@ static int check_subvol(struct btree_trans *trans, goto err; } - if (!BCH_SUBVOLUME_SNAP(subvol.v)) { - u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot)); + if (!BCH_SUBVOLUME_SNAP(&subvol)) { + u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.snapshot)); u32 snapshot_tree = bch2_snapshot_tree(c, snapshot_root); struct bch_snapshot_tree st; @@ -162,12 +161,12 @@ static int check_subvol(struct btree_trans *trans, if (ret) goto err; - if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, + if (fsck_err_on(le32_to_cpu(st.master_subvol) != k.k->p.offset, trans, subvol_not_master_and_not_snapshot, "subvolume %llu is not set as snapshot but is not master subvolume", k.k->p.offset)) { struct bkey_i_subvolume *s = - bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume); + bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume); ret = PTR_ERR_OR_ZERO(s); if (ret) goto err; @@ -178,19 +177,16 @@ static int check_subvol(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &subvol_children_iter); - printbuf_exit(&buf); return ret; } int bch2_check_subvols(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_subvol(trans, &iter, k))); - bch_err_fn(c, ret); - return ret; + check_subvol(trans, &iter, k)); } static int check_subvol_child(struct btree_trans *trans, @@ -219,13 +215,11 @@ fsck_err: int bch2_check_subvol_children(struct bch_fs *c) { - int ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_subvolume_children, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - check_subvol_child(trans, &iter, k))); - bch_err_fn(c, ret); - return 0; + check_subvol_child(trans, &iter, k)); } /* Subvolumes: */ @@ -348,7 +342,8 @@ int bch2_subvol_is_ro_trans(struct btree_trans *trans, u32 subvol) int bch2_subvol_is_ro(struct bch_fs *c, u32 subvol) { - return bch2_trans_do(c, bch2_subvol_is_ro_trans(trans, subvol)); + CLASS(btree_trans, trans)(c); + return lockrestart_do(trans, bch2_subvol_is_ro_trans(trans, subvol)); } int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, @@ -514,18 +509,22 @@ static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *wor int ret = 0; while (!ret) { - mutex_lock(&c->snapshots_unlinked_lock); - snapshot_id_list s = c->snapshots_unlinked; - darray_init(&c->snapshots_unlinked); - mutex_unlock(&c->snapshots_unlinked_lock); + snapshot_id_list s; + + scoped_guard(mutex, &c->snapshots_unlinked_lock) { + s = c->snapshots_unlinked; + darray_init(&c->snapshots_unlinked); + } if (!s.nr) break; bch2_evict_subvolume_inodes(c, &s); + CLASS(btree_trans, trans)(c); + darray_for_each(s, id) { - ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id)); + ret = bch2_subvolume_delete(trans, *id); bch_err_msg(c, ret, "deleting subvolume %u", *id); if (ret) break; @@ -549,10 +548,9 @@ static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans struct bch_fs *c = trans->c; int ret = 0; - mutex_lock(&c->snapshots_unlinked_lock); - if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) - ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); - mutex_unlock(&c->snapshots_unlinked_lock); + scoped_guard(mutex, &c->snapshots_unlinked_lock) + if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol)) + ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol); if (ret) return ret; @@ -677,7 +675,6 @@ int bch2_initialize_subvolumes(struct bch_fs *c) struct bkey_i_snapshot_tree root_tree; struct bkey_i_snapshot root_snapshot; struct bkey_i_subvolume root_volume; - int ret; bkey_snapshot_tree_init(&root_tree.k_i); root_tree.k.p.offset = 1; @@ -698,11 +695,9 @@ int bch2_initialize_subvolumes(struct bch_fs *c) root_volume.v.snapshot = cpu_to_le32(U32_MAX); root_volume.v.inode = cpu_to_le64(BCACHEFS_ROOT_INO); - ret = bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0, 0) ?: + return bch2_btree_insert(c, BTREE_ID_snapshot_trees, &root_tree.k_i, NULL, 0, 0) ?: bch2_btree_insert(c, BTREE_ID_snapshots, &root_snapshot.k_i, NULL, 0, 0) ?: bch2_btree_insert(c, BTREE_ID_subvolumes, &root_volume.k_i, NULL, 0, 0); - bch_err_fn(c, ret); - return ret; } static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans) @@ -739,10 +734,9 @@ err: /* set bi_subvol on root inode */ int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c) { - int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_fs_upgrade_for_subvolumes(trans)); - bch_err_fn(c, ret); - return ret; + CLASS(btree_trans, trans)(c); + return commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_fs_upgrade_for_subvolumes(trans)); } void bch2_fs_subvolumes_init_early(struct bch_fs *c) diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 85e460d10e9d..820cb0f4fe57 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -68,23 +68,21 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) { - int ret = ((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && - version <= c->sb.version_incompat_allowed) - ? 0 - : -BCH_ERR_may_not_use_incompat_feature; + guard(mutex)(&c->sb_lock); - mutex_lock(&c->sb_lock); - if (!ret) { + if (((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && + version <= c->sb.version_incompat_allowed)) { SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); bch2_write_super(c); + return 0; } else { darray_for_each(c->incompat_versions_requested, i) if (version == *i) - goto out; + return -BCH_ERR_may_not_use_incompat_feature; darray_push(&c->incompat_versions_requested, version); - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "requested incompat feature "); bch2_version_to_text(&buf, version); prt_str(&buf, " currently not enabled, allowed up to "); @@ -92,13 +90,8 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v prt_printf(&buf, "\n set version_upgrade=incompat to enable"); bch_notice(c, "%s", buf.buf); - printbuf_exit(&buf); + return -BCH_ERR_may_not_use_incompat_feature; } - -out: - mutex_unlock(&c->sb_lock); - - return ret; } const char * const bch2_sb_fields[] = { @@ -203,12 +196,11 @@ int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; if (new_bytes > max_bytes) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_bdevname(&buf, sb->bdev); prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes); pr_err("%s", buf.buf); - printbuf_exit(&buf); return -BCH_ERR_ENOSPC_sb; } } @@ -783,8 +775,8 @@ static int __bch2_read_super(const char *path, struct bch_opts *opts, { u64 offset = opt_get(*opts, sb); struct bch_sb_layout layout; - struct printbuf err = PRINTBUF; - struct printbuf err2 = PRINTBUF; + CLASS(printbuf, err)(); + CLASS(printbuf, err2)(); __le64 *i; int ret; #ifndef __KERNEL__ @@ -859,7 +851,6 @@ retry: else bch2_print_opts(opts, KERN_ERR "%s", err2.buf); - printbuf_exit(&err2); printbuf_reset(&err); /* @@ -925,15 +916,14 @@ got_super: path, err.buf); goto err_no_print; } -out: - printbuf_exit(&err); - return ret; + + return 0; err: bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n", path, err.buf); err_no_print: bch2_free_super(sb); - goto out; + return ret; } int bch2_read_super(const char *path, struct bch_opts *opts, @@ -1019,7 +1009,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) int bch2_write_super(struct bch_fs *c) { struct closure *cl = &c->sb_write; - struct printbuf err = PRINTBUF; + CLASS(printbuf, err)(); unsigned sb = 0, nr_wrote; struct bch_devs_mask sb_written; bool wrote, can_mount_without_written, can_mount_with_written; @@ -1101,14 +1091,13 @@ int bch2_write_super(struct bch_fs *c) goto out; if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_printf(&buf, "attempting to write superblock that wasn't version downgraded ("); bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version)); prt_str(&buf, " > "); bch2_version_to_text(&buf, bcachefs_metadata_version_current); prt_str(&buf, ")"); bch2_fs_fatal_error(c, ": %s", buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, sb_not_downgraded); goto out; } @@ -1129,7 +1118,7 @@ int bch2_write_super(struct bch_fs *c) continue; if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_char(&buf, ' '); prt_bdevname(&buf, ca->disk_sb.bdev); prt_printf(&buf, @@ -1144,12 +1133,10 @@ int bch2_write_super(struct bch_fs *c) } else { bch_err(c, "%s", buf.buf); } - - printbuf_exit(&buf); } if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_char(&buf, ' '); prt_bdevname(&buf, ca->disk_sb.bdev); prt_printf(&buf, @@ -1157,7 +1144,6 @@ int bch2_write_super(struct bch_fs *c) le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); bch2_fs_fatal_error(c, "%s", buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, erofs_sb_err); } } @@ -1219,19 +1205,17 @@ out: darray_for_each(online_devices, ca) enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super); darray_exit(&online_devices); - printbuf_exit(&err); return ret; } void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) { - mutex_lock(&c->sb_lock); - if (!(c->sb.features & (1ULL << feat))) { - c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); + guard(mutex)(&c->sb_lock); + if (!(c->sb.features & BIT_ULL(feat))) { + c->disk_sb.sb->features[0] |= cpu_to_le64(BIT_ULL(feat)); bch2_write_super(c); } - mutex_unlock(&c->sb_lock); } /* Downgrade if superblock is at a higher version than currently supported: */ @@ -1279,11 +1263,12 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) void bch2_sb_upgrade_incompat(struct bch_fs *c) { - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); + if (c->sb.version == c->sb.version_incompat_allowed) - goto unlock; + return; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Now allowing incompatible features up to "); bch2_version_to_text(&buf, c->sb.version); @@ -1292,14 +1277,11 @@ void bch2_sb_upgrade_incompat(struct bch_fs *c) prt_newline(&buf); bch_notice(c, "%s", buf.buf); - printbuf_exit(&buf); c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), c->sb.version)); bch2_write_super(c); -unlock: - mutex_unlock(&c->sb_lock); } static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, @@ -1365,7 +1347,7 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, enum bch_validate_flags flags, struct printbuf *err) { unsigned type = le32_to_cpu(f->type); - struct printbuf field_err = PRINTBUF; + CLASS(printbuf, field_err)(); const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); int ret; @@ -1377,7 +1359,6 @@ static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, bch2_sb_field_to_text(err, sb, f); } - printbuf_exit(&field_err); return ret; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index a3438b0dc0a9..4e038f655f83 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -103,9 +103,32 @@ const char * const bch2_dev_write_refs[] = { }; #undef x -static void __bch2_print_str(struct bch_fs *c, const char *prefix, - const char *str) +static bool should_print_loglevel(struct bch_fs *c, const char *fmt) { + unsigned loglevel_opt = c->loglevel ?: c->opts.verbose ? 7: 6; + + bool have_soh = fmt[0] == KERN_SOH[0]; + bool have_loglevel = have_soh && fmt[1] >= '0' && fmt[1] <= '9'; + + unsigned loglevel = have_loglevel + ? fmt[1] - '0' + : c->prev_loglevel; + + if (have_loglevel) + c->prev_loglevel = loglevel; + + return loglevel <= loglevel_opt; +} + +void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) +{ + if (!should_print_loglevel(c, prefix)) + return; + +#ifndef __KERNEL__ + prefix = ""; +#endif + #ifdef __KERNEL__ struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); @@ -114,12 +137,7 @@ static void __bch2_print_str(struct bch_fs *c, const char *prefix, return; } #endif - bch2_print_string_as_lines(KERN_ERR, str); -} - -void bch2_print_str(struct bch_fs *c, const char *prefix, const char *str) -{ - __bch2_print_str(c, prefix, str); + bch2_print_string_as_lines(prefix, str); } __printf(2, 0) @@ -149,6 +167,14 @@ void bch2_print_opts(struct bch_opts *opts, const char *fmt, ...) void __bch2_print(struct bch_fs *c, const char *fmt, ...) { + if (!should_print_loglevel(c, fmt)) + return; + +#ifndef __KERNEL__ + if (fmt[0] == KERN_SOH[0]) + fmt += 2; +#endif + struct stdio_redirect *stdio = bch2_fs_stdio_redirect(c); va_list args; @@ -241,14 +267,11 @@ static struct bch_fs *__bch2_uuid_to_fs(__uuid_t uuid) struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) { - struct bch_fs *c; + guard(mutex)(&bch_fs_list_lock); - mutex_lock(&bch_fs_list_lock); - c = __bch2_uuid_to_fs(uuid); + struct bch_fs *c = __bch2_uuid_to_fs(uuid); if (c) closure_get(&c->cl); - mutex_unlock(&bch_fs_list_lock); - return c; } @@ -392,9 +415,8 @@ void bch2_fs_read_only(struct bch_fs *c) bch2_fs_mark_clean(c); } else { /* Make sure error counts/counters are persisted */ - mutex_lock(&c->sb_lock); + guard(mutex)(&c->sb_lock); bch2_write_super(c); - mutex_unlock(&c->sb_lock); bch_verbose(c, "done going read-only, filesystem not clean"); } @@ -405,9 +427,8 @@ static void bch2_fs_read_only_work(struct work_struct *work) struct bch_fs *c = container_of(work, struct bch_fs, read_only_work); - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); bch2_fs_read_only(c); - up_write(&c->state_lock); } static void bch2_fs_read_only_async(struct bch_fs *c) @@ -487,11 +508,11 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) ret = bch2_fs_init_rw(c); if (ret) - goto err; + return ret; ret = bch2_sb_members_v2_init(c); if (ret) - goto err; + return ret; clear_bit(BCH_FS_clean_shutdown, &c->flags); @@ -510,15 +531,20 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) * overwriting whatever was there previously, and there must always be * at least one non-flush write in the journal or recovery will fail: */ - spin_lock(&c->journal.lock); - set_bit(JOURNAL_need_flush_write, &c->journal.flags); - set_bit(JOURNAL_running, &c->journal.flags); - bch2_journal_space_available(&c->journal); - spin_unlock(&c->journal.lock); + scoped_guard(spinlock, &c->journal.lock) { + set_bit(JOURNAL_need_flush_write, &c->journal.flags); + set_bit(JOURNAL_running, &c->journal.flags); + bch2_journal_space_available(&c->journal); + } ret = bch2_fs_mark_dirty(c); if (ret) - goto err; + return ret; + + /* + * Don't jump to our error path, and call bch2_fs_read_only(), unless we + * successfully marked the filesystem dirty + */ ret = bch2_journal_reclaim_start(&c->journal); if (ret) @@ -571,11 +597,8 @@ int bch2_fs_read_write(struct bch_fs *c) int bch2_fs_read_write_early(struct bch_fs *c) { - down_write(&c->state_lock); - int ret = __bch2_fs_read_write(c, true); - up_write(&c->state_lock); - - return ret; + guard(rwsem_write)(&c->state_lock); + return __bch2_fs_read_write(c, true); } /* Filesystem startup/shutdown: */ @@ -673,9 +696,8 @@ void __bch2_fs_stop(struct bch_fs *c) set_bit(BCH_FS_stopping, &c->flags); - down_write(&c->state_lock); - bch2_fs_read_only(c); - up_write(&c->state_lock); + scoped_guard(rwsem_write, &c->state_lock) + bch2_fs_read_only(c); for (unsigned i = 0; i < c->sb.nr_devices; i++) { struct bch_dev *ca = rcu_dereference_protected(c->devs[i], true); @@ -711,9 +733,8 @@ void __bch2_fs_stop(struct bch_fs *c) void bch2_fs_free(struct bch_fs *c) { - mutex_lock(&bch_fs_list_lock); - list_del(&c->list); - mutex_unlock(&bch_fs_list_lock); + scoped_guard(mutex, &bch_fs_list_lock) + list_del(&c->list); closure_sync(&c->cl); closure_debug_destroy(&c->cl); @@ -775,21 +796,19 @@ static int bch2_fs_online(struct bch_fs *c) return ret; } - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); for_each_member_device(c, ca) { ret = bch2_dev_sysfs_online(c, ca); if (ret) { bch_err(c, "error creating sysfs objects"); bch2_dev_put(ca); - goto err; + return ret; } } BUG_ON(!list_empty(&c->list)); list_add(&c->list, &bch_fs_list); -err: - up_write(&c->state_lock); return ret; } @@ -826,8 +845,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, bch_sb_handles *sbs) { struct bch_fs *c; - struct printbuf name = PRINTBUF; unsigned i, iter_size; + CLASS(printbuf, name)(); int ret = 0; c = kvmalloc(sizeof(struct bch_fs), GFP_KERNEL|__GFP_ZERO); @@ -914,9 +933,8 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, if (ret) goto err; - mutex_lock(&c->sb_lock); - ret = bch2_sb_to_fs(c, sb); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + ret = bch2_sb_to_fs(c, sb); if (ret) goto err; @@ -968,7 +986,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; strscpy(c->name, name.buf, sizeof(c->name)); - printbuf_exit(&name); iter_size = sizeof(struct sort_iter) + (btree_blocks(c) + 1) * 2 * @@ -1060,12 +1077,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, &c->clock_journal_res, (sizeof(struct jset_entry_clock) / sizeof(u64)) * 2); - mutex_lock(&bch_fs_list_lock); - ret = bch2_fs_online(c); - mutex_unlock(&bch_fs_list_lock); + scoped_guard(mutex, &bch_fs_list_lock) + ret = bch2_fs_online(c); if (ret) goto err; + + c->recovery_task = current; out: return c; err: @@ -1133,8 +1151,8 @@ static bool bch2_fs_may_start(struct bch_fs *c) case BCH_DEGRADED_yes: flags |= BCH_FORCE_IF_DEGRADED; break; - default: - mutex_lock(&c->sb_lock); + default: { + guard(mutex)(&c->sb_lock); for (unsigned i = 0; i < c->disk_sb.sb->nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) continue; @@ -1143,13 +1161,11 @@ static bool bch2_fs_may_start(struct bch_fs *c) if (!bch2_dev_is_online(ca) && (ca->mi.state == BCH_MEMBER_STATE_rw || - ca->mi.state == BCH_MEMBER_STATE_ro)) { - mutex_unlock(&c->sb_lock); + ca->mi.state == BCH_MEMBER_STATE_ro)) return false; - } } - mutex_unlock(&c->sb_lock); break; + } } return bch2_have_enough_devs(c, c->online_devs, flags, true); @@ -1160,6 +1176,8 @@ int bch2_fs_start(struct bch_fs *c) time64_t now = ktime_get_real_seconds(); int ret = 0; + BUG_ON(test_bit(BCH_FS_started, &c->flags)); + print_mount_opts(c); if (c->cf_encoding) @@ -1171,44 +1189,29 @@ int bch2_fs_start(struct bch_fs *c) if (!bch2_fs_may_start(c)) return bch_err_throw(c, insufficient_devices_to_start); - down_write(&c->state_lock); - mutex_lock(&c->sb_lock); + scoped_guard(rwsem_write, &c->state_lock) { + guard(mutex)(&c->sb_lock); + if (!bch2_sb_field_get_minsize(&c->disk_sb, ext, + sizeof(struct bch_sb_field_ext) / sizeof(u64))) { + ret = bch_err_throw(c, ENOSPC_sb); + goto err; + } - BUG_ON(test_bit(BCH_FS_started, &c->flags)); + ret = bch2_sb_members_v2_init(c); + if (ret) + goto err; - if (!bch2_sb_field_get_minsize(&c->disk_sb, ext, - sizeof(struct bch_sb_field_ext) / sizeof(u64))) { - mutex_unlock(&c->sb_lock); - up_write(&c->state_lock); - ret = bch_err_throw(c, ENOSPC_sb); - goto err; - } + scoped_guard(rcu) + for_each_online_member_rcu(c, ca) { + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = + cpu_to_le64(now); + if (ca->mi.state == BCH_MEMBER_STATE_rw) + bch2_dev_allocator_add(c, ca); + } - ret = bch2_sb_members_v2_init(c); - if (ret) { - mutex_unlock(&c->sb_lock); - up_write(&c->state_lock); - goto err; + bch2_recalc_capacity(c); } - scoped_guard(rcu) - for_each_online_member_rcu(c, ca) - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = - cpu_to_le64(now); - - /* - * Dno't write superblock yet: recovery might have to downgrade - */ - mutex_unlock(&c->sb_lock); - - scoped_guard(rcu) - for_each_online_member_rcu(c, ca) - if (ca->mi.state == BCH_MEMBER_STATE_rw) - bch2_dev_allocator_add(c, ca); - bch2_recalc_capacity(c); - up_write(&c->state_lock); - - c->recovery_task = current; ret = BCH_SB_INITIALIZED(c->disk_sb.sb) ? bch2_fs_recovery(c) : bch2_fs_initialize(c); @@ -1229,13 +1232,12 @@ int bch2_fs_start(struct bch_fs *c) set_bit(BCH_FS_started, &c->flags); wake_up(&c->ro_ref_wait); - down_write(&c->state_lock); - if (c->opts.read_only) - bch2_fs_read_only(c); - else if (!test_bit(BCH_FS_rw, &c->flags)) - ret = bch2_fs_read_write(c); - up_write(&c->state_lock); - + scoped_guard(rwsem_write, &c->state_lock) { + if (c->opts.read_only) + bch2_fs_read_only(c); + else if (!test_bit(BCH_FS_rw, &c->flags)) + ret = bch2_fs_read_write(c); + } err: if (ret) bch_err_msg(c, ret, "starting filesystem"); @@ -1280,7 +1282,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, if (fs->sb->seq == sb->sb->seq && fs->sb->write_time != sb->sb->write_time) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Split brain detected between "); prt_bdevname(&buf, sb->bdev); @@ -1305,7 +1307,6 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, prt_printf(&buf, "Not using older sb"); pr_err("%s", buf.buf); - printbuf_exit(&buf); if (!opts->no_splitbrain_check) return -BCH_ERR_device_splitbrain; @@ -1316,7 +1317,7 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, u64 seq_from_member = le64_to_cpu(sb->sb->seq); if (seq_from_fs && seq_from_fs < seq_from_member) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); prt_str(&buf, "Split brain detected between "); prt_bdevname(&buf, sb->bdev); @@ -1338,7 +1339,6 @@ static int bch2_dev_in_fs(struct bch_sb_handle *fs, } pr_err("%s", buf.buf); - printbuf_exit(&buf); if (!opts->no_splitbrain_check) return -BCH_ERR_device_splitbrain; @@ -1553,18 +1553,16 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx) struct bch_dev *ca = NULL; if (bch2_fs_init_fault("dev_alloc")) - goto err; + return bch_err_throw(c, ENOMEM_dev_alloc); ca = __bch2_dev_alloc(c, &member); if (!ca) - goto err; + return bch_err_throw(c, ENOMEM_dev_alloc); ca->fs = c; bch2_dev_attach(c, ca, dev_idx); return 0; -err: - return bch_err_throw(c, ENOMEM_dev_alloc); } static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) @@ -1579,7 +1577,10 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) if (get_capacity(sb->bdev->bd_disk) < ca->mi.bucket_size * ca->mi.nbuckets) { - bch_err(ca, "cannot online: device too small"); + bch_err(ca, "cannot online: device too small (capacity %llu filesystem size %llu nbuckets %llu)", + get_capacity(sb->bdev->bd_disk), + ca->mi.bucket_size * ca->mi.nbuckets, + ca->mi.nbuckets); return bch_err_throw(ca->fs, device_size_too_small); } @@ -1590,10 +1591,9 @@ static int __bch2_dev_attach_bdev(struct bch_dev *ca, struct bch_sb_handle *sb) if (ret) return ret; - struct printbuf name = PRINTBUF; + CLASS(printbuf, name)(); prt_bdevname(&name, sb->bdev); strscpy(ca->name, name.buf, sizeof(ca->name)); - printbuf_exit(&name); /* Commit: */ ca->disk_sb = *sb; @@ -1725,7 +1725,6 @@ static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca) int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { - struct bch_member *m; int ret = 0; if (ca->mi.state == new_state) @@ -1739,11 +1738,11 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, bch_notice(ca, "%s", bch2_member_states[new_state]); - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - SET_BCH_MEMBER_STATE(m, new_state); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + SET_BCH_MEMBER_STATE(m, new_state); + bch2_write_super(c); + } if (new_state == BCH_MEMBER_STATE_rw) __bch2_dev_read_write(c, ca); @@ -1756,26 +1755,20 @@ int __bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, int bch2_dev_set_state(struct bch_fs *c, struct bch_dev *ca, enum bch_member_state new_state, int flags) { - int ret; - - down_write(&c->state_lock); - ret = __bch2_dev_set_state(c, ca, new_state, flags); - up_write(&c->state_lock); - - return ret; + guard(rwsem_write)(&c->state_lock); + return __bch2_dev_set_state(c, ca, new_state, flags); } /* Device add/removal: */ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) { - struct bch_member *m; unsigned dev_idx = ca->dev_idx, data; bool fast_device_removal = !bch2_request_incompat_feature(c, bcachefs_metadata_version_fast_device_removal); int ret; - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); /* * We consume a reference to ca->ref, regardless of whether we succeed @@ -1842,20 +1835,17 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) data = bch2_dev_has_data(c, ca); if (data) { - struct printbuf data_has = PRINTBUF; - + CLASS(printbuf, data_has)(); prt_bitflags(&data_has, __bch2_data_types, data); bch_err(ca, "Remove failed, still has data (%s)", data_has.buf); - printbuf_exit(&data_has); ret = -EBUSY; goto err; } __bch2_dev_offline(c, ca); - mutex_lock(&c->sb_lock); - rcu_assign_pointer(c->devs[ca->dev_idx], NULL); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) + rcu_assign_pointer(c->devs[ca->dev_idx], NULL); #ifndef CONFIG_BCACHEFS_DEBUG percpu_ref_kill(&ca->ref); @@ -1871,25 +1861,23 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags) * Free this device's slot in the bch_member array - all pointers to * this device must be gone: */ - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx); - if (fast_device_removal) - m->uuid = BCH_SB_MEMBER_DELETED_UUID; - else - memset(&m->uuid, 0, sizeof(m->uuid)); + if (fast_device_removal) + m->uuid = BCH_SB_MEMBER_DELETED_UUID; + else + memset(&m->uuid, 0, sizeof(m->uuid)); - bch2_write_super(c); + bch2_write_super(c); + } - mutex_unlock(&c->sb_lock); - up_write(&c->state_lock); return 0; err: if (test_bit(BCH_FS_rw, &c->flags) && ca->mi.state == BCH_MEMBER_STATE_rw && !enumerated_ref_is_zero(&ca->io_ref[READ])) __bch2_dev_read_write(c, ca); - up_write(&c->state_lock); return ret; } @@ -1899,8 +1887,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path) struct bch_opts opts = bch2_opts_empty(); struct bch_sb_handle sb = {}; struct bch_dev *ca = NULL; - struct printbuf errbuf = PRINTBUF; - struct printbuf label = PRINTBUF; + CLASS(printbuf, label)(); int ret = 0; ret = bch2_read_super(path, &opts, &sb); @@ -1919,12 +1906,12 @@ int bch2_dev_add(struct bch_fs *c, const char *path) } if (list_empty(&c->list)) { - mutex_lock(&bch_fs_list_lock); - if (__bch2_uuid_to_fs(c->sb.uuid)) - ret = bch_err_throw(c, filesystem_uuid_already_open); - else - list_add(&c->list, &bch_fs_list); - mutex_unlock(&bch_fs_list_lock); + scoped_guard(mutex, &bch_fs_list_lock) { + if (__bch2_uuid_to_fs(c->sb.uuid)) + ret = bch_err_throw(c, filesystem_uuid_already_open); + else + list_add(&c->list, &bch_fs_list); + } if (ret) { bch_err(c, "filesystem UUID already open"); @@ -1946,105 +1933,95 @@ int bch2_dev_add(struct bch_fs *c, const char *path) if (ret) goto err; - down_write(&c->state_lock); - mutex_lock(&c->sb_lock); - SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); + scoped_guard(rwsem_write, &c->state_lock) { + scoped_guard(mutex, &c->sb_lock) { + SET_BCH_SB_MULTI_DEVICE(c->disk_sb.sb, true); - ret = bch2_sb_from_fs(c, ca); - bch_err_msg(c, ret, "setting up new superblock"); - if (ret) - goto err_unlock; + ret = bch2_sb_from_fs(c, ca); + bch_err_msg(c, ret, "setting up new superblock"); + if (ret) + goto err; - if (dynamic_fault("bcachefs:add:no_slot")) - goto err_unlock; + if (dynamic_fault("bcachefs:add:no_slot")) + goto err; - ret = bch2_sb_member_alloc(c); - if (ret < 0) { - bch_err_msg(c, ret, "setting up new superblock"); - goto err_unlock; - } - unsigned dev_idx = ret; - ret = 0; + ret = bch2_sb_member_alloc(c); + if (ret < 0) { + bch_err_msg(c, ret, "setting up new superblock"); + goto err; + } + unsigned dev_idx = ret; + ret = 0; - /* success: */ + /* success: */ - dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds()); - *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi; + dev_mi.last_mount = cpu_to_le64(ktime_get_real_seconds()); + *bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx) = dev_mi; - ca->disk_sb.sb->dev_idx = dev_idx; - bch2_dev_attach(c, ca, dev_idx); + ca->disk_sb.sb->dev_idx = dev_idx; + bch2_dev_attach(c, ca, dev_idx); - set_bit(ca->dev_idx, c->online_devs.d); + set_bit(ca->dev_idx, c->online_devs.d); - if (BCH_MEMBER_GROUP(&dev_mi)) { - ret = __bch2_dev_group_set(c, ca, label.buf); - bch_err_msg(c, ret, "creating new label"); - if (ret) { - mutex_unlock(&c->sb_lock); - goto err_late; - } - } + if (BCH_MEMBER_GROUP(&dev_mi)) { + ret = __bch2_dev_group_set(c, ca, label.buf); + bch_err_msg(c, ret, "creating new label"); + if (ret) + goto err_late; + } - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + bch2_write_super(c); + } - if (test_bit(BCH_FS_started, &c->flags)) { ret = bch2_dev_usage_init(ca, false); if (ret) goto err_late; - ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); - bch_err_msg(ca, ret, "marking new superblock"); - if (ret) - goto err_late; - - ret = bch2_fs_freespace_init(c); - bch_err_msg(ca, ret, "initializing free space"); - if (ret) - goto err_late; + if (test_bit(BCH_FS_started, &c->flags)) { + ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); + bch_err_msg(ca, ret, "marking new superblock"); + if (ret) + goto err_late; - if (ca->mi.state == BCH_MEMBER_STATE_rw) - __bch2_dev_read_write(c, ca); + ret = bch2_fs_freespace_init(c); + bch_err_msg(ca, ret, "initializing free space"); + if (ret) + goto err_late; - ret = bch2_dev_journal_alloc(ca, false); - bch_err_msg(c, ret, "allocating journal"); - if (ret) - goto err_late; - } + if (ca->mi.state == BCH_MEMBER_STATE_rw) + __bch2_dev_read_write(c, ca); - /* - * We just changed the superblock UUID, invalidate cache and send a - * uevent to update /dev/disk/by-uuid - */ - invalidate_bdev(ca->disk_sb.bdev); + ret = bch2_dev_journal_alloc(ca, false); + bch_err_msg(c, ret, "allocating journal"); + if (ret) + goto err_late; + } - char uuid_str[37]; - snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); + /* + * We just changed the superblock UUID, invalidate cache and send a + * uevent to update /dev/disk/by-uuid + */ + invalidate_bdev(ca->disk_sb.bdev); - char *envp[] = { - "CHANGE=uuid", - uuid_str, - NULL, - }; - kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + char uuid_str[37]; + snprintf(uuid_str, sizeof(uuid_str), "UUID=%pUb", &c->sb.uuid); - up_write(&c->state_lock); + char *envp[] = { + "CHANGE=uuid", + uuid_str, + NULL, + }; + kobject_uevent_env(&ca->disk_sb.bdev->bd_device.kobj, KOBJ_CHANGE, envp); + } out: - printbuf_exit(&label); - printbuf_exit(&errbuf); bch_err_fn(c, ret); return ret; - -err_unlock: - mutex_unlock(&c->sb_lock); - up_write(&c->state_lock); err: if (ca) bch2_dev_free(ca); bch2_free_super(&sb); goto out; err_late: - up_write(&c->state_lock); ca = NULL; goto err; } @@ -2058,13 +2035,11 @@ int bch2_dev_online(struct bch_fs *c, const char *path) unsigned dev_idx; int ret; - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); ret = bch2_read_super(path, &opts, &sb); - if (ret) { - up_write(&c->state_lock); + if (ret) return ret; - } dev_idx = sb.sb->dev_idx; @@ -2101,39 +2076,33 @@ int bch2_dev_online(struct bch_fs *c, const char *path) goto err; } - mutex_lock(&c->sb_lock); - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = - cpu_to_le64(ktime_get_real_seconds()); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + scoped_guard(mutex, &c->sb_lock) { + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = + cpu_to_le64(ktime_get_real_seconds()); + bch2_write_super(c); + } - up_write(&c->state_lock); return 0; err: - up_write(&c->state_lock); bch2_free_super(&sb); return ret; } int bch2_dev_offline(struct bch_fs *c, struct bch_dev *ca, int flags) { - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); if (!bch2_dev_is_online(ca)) { bch_err(ca, "Already offline"); - up_write(&c->state_lock); return 0; } if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_failed, flags)) { bch_err(ca, "Cannot offline required disk"); - up_write(&c->state_lock); return bch_err_throw(c, device_state_not_allowed); } __bch2_dev_offline(c, ca); - - up_write(&c->state_lock); return 0; } @@ -2151,60 +2120,54 @@ static int __bch2_dev_resize_alloc(struct bch_dev *ca, u64 old_nbuckets, u64 new int bch2_dev_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) { - struct bch_member *m; u64 old_nbuckets; int ret = 0; - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); old_nbuckets = ca->mi.nbuckets; if (nbuckets < ca->mi.nbuckets) { bch_err(ca, "Cannot shrink yet"); - ret = -EINVAL; - goto err; + return -EINVAL; } if (nbuckets > BCH_MEMBER_NBUCKETS_MAX) { bch_err(ca, "New device size too big (%llu greater than max %u)", nbuckets, BCH_MEMBER_NBUCKETS_MAX); - ret = bch_err_throw(c, device_size_too_big); - goto err; + return bch_err_throw(c, device_size_too_big); } if (bch2_dev_is_online(ca) && get_capacity(ca->disk_sb.bdev->bd_disk) < ca->mi.bucket_size * nbuckets) { bch_err(ca, "New size larger than device"); - ret = bch_err_throw(c, device_size_too_small); - goto err; + return bch_err_throw(c, device_size_too_small); } ret = bch2_dev_buckets_resize(c, ca, nbuckets); bch_err_msg(ca, ret, "resizing buckets"); if (ret) - goto err; + return ret; ret = bch2_trans_mark_dev_sb(c, ca, BTREE_TRIGGER_transactional); if (ret) - goto err; + return ret; - mutex_lock(&c->sb_lock); - m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - m->nbuckets = cpu_to_le64(nbuckets); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + m->nbuckets = cpu_to_le64(nbuckets); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + bch2_write_super(c); + } if (ca->mi.freespace_initialized) { ret = __bch2_dev_resize_alloc(ca, old_nbuckets, nbuckets); if (ret) - goto err; + return ret; } bch2_recalc_capacity(c); -err: - up_write(&c->state_lock); - return ret; + return 0; } int bch2_fs_resize_on_mount(struct bch_fs *c) @@ -2222,26 +2185,24 @@ int bch2_fs_resize_on_mount(struct bch_fs *c) if (ret) { enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_fs_resize_on_mount); - up_write(&c->state_lock); return ret; } - mutex_lock(&c->sb_lock); - struct bch_member *m = - bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); - m->nbuckets = cpu_to_le64(new_nbuckets); - SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); + scoped_guard(mutex, &c->sb_lock) { + struct bch_member *m = + bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); + m->nbuckets = cpu_to_le64(new_nbuckets); + SET_BCH_MEMBER_RESIZE_ON_MOUNT(m, false); - c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); - bch2_write_super(c); - mutex_unlock(&c->sb_lock); + c->disk_sb.sb->features[0] &= ~cpu_to_le64(BIT_ULL(BCH_FEATURE_small_image)); + bch2_write_super(c); + } if (ca->mi.freespace_initialized) { ret = __bch2_dev_resize_alloc(ca, old_nbuckets, new_nbuckets); if (ret) { enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_fs_resize_on_mount); - up_write(&c->state_lock); return ret; } } @@ -2280,6 +2241,10 @@ static struct bch_fs *bdev_get_fs(struct block_device *bdev) return c; } +DEFINE_CLASS(bdev_get_fs, struct bch_fs *, + bch2_ro_ref_put(_T), bdev_get_fs(bdev), + struct block_device *bdev); + /* returns with ref on ca->ref */ static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bdev) { @@ -2291,7 +2256,7 @@ static struct bch_dev *bdev_to_bch_dev(struct bch_fs *c, struct block_device *bd static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) { - struct bch_fs *c = bdev_get_fs(bdev); + CLASS(bdev_get_fs, c)(bdev); if (!c) return; @@ -2305,48 +2270,45 @@ static void bch2_fs_bdev_mark_dead(struct block_device *bdev, bool surprise) down_read(&sb->s_umount); } - down_write(&c->state_lock); + guard(rwsem_write)(&c->state_lock); + struct bch_dev *ca = bdev_to_bch_dev(c, bdev); - if (!ca) - goto unlock; + if (ca) { + bool dev = bch2_dev_state_allowed(c, ca, + BCH_MEMBER_STATE_failed, + BCH_FORCE_IF_DEGRADED); + + if (!dev && sb) { + if (!surprise) + sync_filesystem(sb); + shrink_dcache_sb(sb); + evict_inodes(sb); + } - bool dev = bch2_dev_state_allowed(c, ca, - BCH_MEMBER_STATE_failed, - BCH_FORCE_IF_DEGRADED); + CLASS(printbuf, buf)(); + __bch2_log_msg_start(ca->name, &buf); - if (!dev && sb) { - if (!surprise) - sync_filesystem(sb); - shrink_dcache_sb(sb); - evict_inodes(sb); - } + prt_printf(&buf, "offline from block layer"); - struct printbuf buf = PRINTBUF; - __bch2_log_msg_start(ca->name, &buf); + if (dev) { + __bch2_dev_offline(c, ca); + } else { + bch2_journal_flush(&c->journal); + bch2_fs_emergency_read_only2(c, &buf); + } - prt_printf(&buf, "offline from block layer"); + bch2_print_str(c, KERN_ERR, buf.buf); - if (dev) { - __bch2_dev_offline(c, ca); - } else { - bch2_journal_flush(&c->journal); - bch2_fs_emergency_read_only2(c, &buf); + bch2_dev_put(ca); } - bch2_print_str(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); - - bch2_dev_put(ca); -unlock: if (sb) up_read(&sb->s_umount); - up_write(&c->state_lock); - bch2_ro_ref_put(c); } static void bch2_fs_bdev_sync(struct block_device *bdev) { - struct bch_fs *c = bdev_get_fs(bdev); + CLASS(bdev_get_fs, c)(bdev); if (!c) return; @@ -2357,12 +2319,9 @@ static void bch2_fs_bdev_sync(struct block_device *bdev) * unmounted - we only take this to avoid a warning in * sync_filesystem: */ - down_read(&sb->s_umount); + guard(rwsem_read)(&sb->s_umount); sync_filesystem(sb); - up_read(&sb->s_umount); } - - bch2_ro_ref_put(c); } const struct blk_holder_ops bch2_sb_handle_bdev_ops = { @@ -2384,7 +2343,6 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, bch_sb_handles sbs = {}; struct bch_fs *c = NULL; struct bch_sb_handle *best = NULL; - struct printbuf errbuf = PRINTBUF; int ret = 0; if (!try_module_get(THIS_MODULE)) @@ -2439,15 +2397,12 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices, if (ret) goto err; - down_write(&c->state_lock); - darray_for_each(sbs, sb) { - ret = bch2_dev_attach_bdev(c, sb); - if (ret) { - up_write(&c->state_lock); - goto err; + scoped_guard(rwsem_write, &c->state_lock) + darray_for_each(sbs, sb) { + ret = bch2_dev_attach_bdev(c, sb); + if (ret) + goto err; } - } - up_write(&c->state_lock); if (!c->opts.nostart) { ret = bch2_fs_start(c); @@ -2458,7 +2413,6 @@ out: darray_for_each(sbs, sb) bch2_free_super(sb); darray_exit(&sbs); - printbuf_exit(&errbuf); module_put(THIS_MODULE); return c; err_print: diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 50cf51654265..158f526e3dcc 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -62,7 +62,7 @@ static ssize_t fn ## _to_text(struct printbuf *, \ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ char *buf) \ { \ - struct printbuf out = PRINTBUF; \ + CLASS(printbuf, out)(); \ ssize_t ret = fn ## _to_text(&out, kobj, attr); \ \ if (out.pos && out.buf[out.pos - 1] != '\n') \ @@ -75,7 +75,6 @@ static ssize_t fn ## _show(struct kobject *kobj, struct attribute *attr,\ ret = min_t(size_t, out.pos, PAGE_SIZE - 1); \ memcpy(buf, out.buf, ret); \ } \ - printbuf_exit(&out); \ return bch2_err_class(ret); \ } \ \ @@ -172,7 +171,9 @@ read_attribute(io_latency_read); read_attribute(io_latency_write); read_attribute(io_latency_stats_read); read_attribute(io_latency_stats_write); +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT read_attribute(congested); +#endif read_attribute(btree_write_stats); @@ -233,14 +234,13 @@ static size_t bch2_btree_cache_size(struct bch_fs *c) size_t ret = 0; struct btree *b; - mutex_lock(&bc->lock); + guard(mutex)(&bc->lock); list_for_each_entry(b, &bc->live[0].list, list) ret += btree_buf_bytes(b); list_for_each_entry(b, &bc->live[1].list, list) ret += btree_buf_bytes(b); list_for_each_entry(b, &bc->freeable, list) ret += btree_buf_bytes(b); - mutex_unlock(&bc->lock); return ret; } @@ -563,9 +563,8 @@ STORE(bch2_fs) closure_wake_up(&c->freelist_wait); if (attr == &sysfs_trigger_recalc_capacity) { - down_read(&c->state_lock); + guard(rwsem_read)(&c->state_lock); bch2_recalc_capacity(c); - up_read(&c->state_lock); } if (attr == &sysfs_trigger_delete_dead_snapshots) @@ -942,9 +941,10 @@ SHOW(bch2_dev) if (attr == &sysfs_io_latency_stats_write) bch2_time_stats_to_text(out, &ca->io_latency[WRITE].stats); - sysfs_printf(congested, "%u%%", - clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) - * 100 / CONGESTED_MAX); +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT + if (attr == &sysfs_congested) + bch2_dev_congested_to_text(out, ca); +#endif if (attr == &sysfs_alloc_debug) bch2_dev_alloc_debug_to_text(out, ca); @@ -1015,7 +1015,9 @@ struct attribute *bch2_dev_files[] = { &sysfs_io_latency_write, &sysfs_io_latency_stats_read, &sysfs_io_latency_stats_write, +#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT &sysfs_congested, +#endif &sysfs_read_fua_test, diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 782a05fe7656..ea27df30cfcb 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -31,7 +31,7 @@ static void delete_test_keys(struct bch_fs *c) static int test_delete(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -66,13 +66,12 @@ static int test_delete(struct bch_fs *c, u64 nr) goto err; err: bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } static int test_delete_written(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_i_cookie k; int ret; @@ -101,7 +100,6 @@ static int test_delete_written(struct bch_fs *c, u64 nr) goto err; err: bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -130,13 +128,14 @@ static int test_iterate(struct bch_fs *c, u64 nr) pr_info("iterating forwards"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ + CLASS(btree_trans, trans)(c); + + ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ BUG_ON(k.k->p.offset != i++); 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards"); if (ret) return ret; @@ -145,12 +144,11 @@ static int test_iterate(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - ret = bch2_trans_run(c, - for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, + ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_xattrs, SPOS(0, U64_MAX, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != --i); 0; - }))); + })); bch_err_msg(c, ret, "error iterating backwards"); if (ret) return ret; @@ -185,14 +183,15 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) pr_info("iterating forwards"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ + CLASS(btree_trans, trans)(c); + + ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i); i = k.k->p.offset; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards"); if (ret) return ret; @@ -201,13 +200,12 @@ static int test_iterate_extents(struct bch_fs *c, u64 nr) pr_info("iterating backwards"); - ret = bch2_trans_run(c, - for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, + ret = for_each_btree_key_reverse(trans, iter, BTREE_ID_extents, SPOS(0, U64_MAX, U32_MAX), 0, k, ({ BUG_ON(k.k->p.offset != i); i = bkey_start_offset(k.k); 0; - }))); + })); bch_err_msg(c, ret, "error iterating backwards"); if (ret) return ret; @@ -241,14 +239,15 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) pr_info("iterating forwards"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ + CLASS(btree_trans, trans)(c); + + ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ BUG_ON(k.k->p.offset != i); i += 2; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards"); if (ret) return ret; @@ -258,10 +257,9 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) pr_info("iterating forwards by slots"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_slots, k, ({ + ret = for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + BTREE_ITER_slots, k, ({ if (i >= nr * 2) break; @@ -270,7 +268,7 @@ static int test_iterate_slots(struct bch_fs *c, u64 nr) i++; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards by slots"); return ret; } @@ -301,15 +299,16 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) pr_info("iterating forwards"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - 0, k, ({ + CLASS(btree_trans, trans)(c); + + ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + 0, k, ({ BUG_ON(bkey_start_offset(k.k) != i + 8); BUG_ON(k.k->size != 8); i += 16; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards"); if (ret) return ret; @@ -319,10 +318,9 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) pr_info("iterating forwards by slots"); i = 0; - ret = bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_extents, - SPOS(0, 0, U32_MAX), POS(0, U64_MAX), - BTREE_ITER_slots, k, ({ + ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, + SPOS(0, 0, U32_MAX), POS(0, U64_MAX), + BTREE_ITER_slots, k, ({ if (i == nr) break; BUG_ON(bkey_deleted(k.k) != !(i % 16)); @@ -331,7 +329,7 @@ static int test_iterate_slots_extents(struct bch_fs *c, u64 nr) BUG_ON(k.k->size != 8); i = k.k->p.offset; 0; - }))); + })); bch_err_msg(c, ret, "error iterating forwards by slots"); return ret; } @@ -344,7 +342,7 @@ static int test_peek_end(struct bch_fs *c, u64 nr) { delete_test_keys(c); - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_s_c k; @@ -358,7 +356,6 @@ static int test_peek_end(struct bch_fs *c, u64 nr) BUG_ON(k.k); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return 0; } @@ -366,7 +363,7 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) { delete_test_keys(c); - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_s_c k; @@ -380,7 +377,6 @@ static int test_peek_end_extents(struct bch_fs *c, u64 nr) BUG_ON(k.k); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return 0; } @@ -392,15 +388,13 @@ static int insert_test_extent(struct bch_fs *c, u64 start, u64 end) { struct bkey_i_cookie k; - int ret; - bkey_cookie_init(&k.k_i); k.k_i.k.p.offset = end; k.k_i.k.p.snapshot = U32_MAX; k.k_i.k.size = end - start; k.k_i.k.bversion.lo = test_version++; - ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); + int ret = bch2_btree_insert(c, BTREE_ID_extents, &k.k_i, NULL, 0, 0); bch_err_fn(c, ret); return ret; } @@ -446,15 +440,14 @@ static int test_extent_overwrite_all(struct bch_fs *c, u64 nr) static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid) { struct bkey_i_cookie k; - int ret; - bkey_cookie_init(&k.k_i); k.k_i.k.p.inode = inum; k.k_i.k.p.offset = start + len; k.k_i.k.p.snapshot = snapid; k.k_i.k.size = len; - ret = bch2_trans_commit_do(c, NULL, NULL, 0, + CLASS(btree_trans, trans)(c); + int ret = commit_do(trans, NULL, NULL, 0, bch2_btree_insert_nonextent(trans, BTREE_ID_extents, &k.k_i, BTREE_UPDATE_internal_snapshot_node)); bch_err_fn(c, ret); @@ -477,7 +470,6 @@ static int test_extent_create_overlapping(struct bch_fs *c, u64 inum) /* Test skipping over keys in unrelated snapshots: */ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) { - struct btree_trans *trans; struct btree_iter iter; struct bkey_s_c k; struct bkey_i_cookie cookie; @@ -489,7 +481,7 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) if (ret) return ret; - trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, snapid_lo), 0); lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_max(trans, &iter, POS(0, U64_MAX)))); @@ -497,28 +489,28 @@ static int test_snapshot_filter(struct bch_fs *c, u32 snapid_lo, u32 snapid_hi) BUG_ON(k.k->p.snapshot != U32_MAX); bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } static int test_snapshots(struct bch_fs *c, u64 nr) { struct bkey_i_cookie cookie; - u32 snapids[2]; - u32 snapid_subvols[2] = { 1, 1 }; - int ret; - bkey_cookie_init(&cookie.k_i); cookie.k.p.snapshot = U32_MAX; - ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); + + int ret = bch2_btree_insert(c, BTREE_ID_xattrs, &cookie.k_i, NULL, 0, 0); if (ret) return ret; - ret = bch2_trans_commit_do(c, NULL, NULL, 0, - bch2_snapshot_node_create(trans, U32_MAX, - snapids, - snapid_subvols, - 2)); + u32 snapids[2]; + u32 snapid_subvols[2] = { 1, 1 }; + + CLASS(btree_trans, trans)(c); + ret = commit_do(trans, NULL, NULL, 0, + bch2_snapshot_node_create(trans, U32_MAX, + snapids, + snapid_subvols, + 2)); if (ret) return ret; @@ -542,42 +534,37 @@ static u64 test_rand(void) static int rand_insert(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); - struct bkey_i_cookie k; - int ret = 0; - u64 i; + CLASS(btree_trans, trans)(c); - for (i = 0; i < nr; i++) { + for (u64 i = 0; i < nr; i++) { + struct bkey_i_cookie k; bkey_cookie_init(&k.k_i); k.k.p.offset = test_rand(); k.k.p.snapshot = U32_MAX; - ret = commit_do(trans, NULL, NULL, 0, + int ret = commit_do(trans, NULL, NULL, 0, bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k.k_i, 0)); if (ret) - break; + return ret; } - bch2_trans_put(trans); - return ret; + return 0; } static int rand_insert_multi(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bkey_i_cookie k[8]; - int ret = 0; unsigned j; - u64 i; - for (i = 0; i < nr; i += ARRAY_SIZE(k)) { + for (u64 i = 0; i < nr; i += ARRAY_SIZE(k)) { for (j = 0; j < ARRAY_SIZE(k); j++) { bkey_cookie_init(&k[j].k_i); k[j].k.p.offset = test_rand(); k[j].k.p.snapshot = U32_MAX; } - ret = commit_do(trans, NULL, NULL, 0, + int ret = commit_do(trans, NULL, NULL, 0, bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[0].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[1].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[2].k_i, 0) ?: @@ -587,25 +574,23 @@ static int rand_insert_multi(struct bch_fs *c, u64 nr) bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[6].k_i, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_xattrs, &k[7].k_i, 0)); if (ret) - break; + return ret; } - bch2_trans_put(trans); - return ret; + return 0; } static int rand_lookup(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_s_c k; int ret = 0; - u64 i; bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - for (i = 0; i < nr; i++) { + for (u64 i = 0; i < nr; i++) { bch2_btree_iter_set_pos(trans, &iter, SPOS(0, test_rand(), U32_MAX)); lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek(trans, &iter))); @@ -615,7 +600,6 @@ static int rand_lookup(struct bch_fs *c, u64 nr) } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -646,17 +630,16 @@ static int rand_mixed_trans(struct btree_trans *trans, static int rand_mixed(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct btree_iter iter; struct bkey_i_cookie cookie; int ret = 0; - u64 i, rand; bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), 0); - for (i = 0; i < nr; i++) { - rand = test_rand(); + for (u64 i = 0; i < nr; i++) { + u64 rand = test_rand(); ret = commit_do(trans, NULL, NULL, 0, rand_mixed_trans(trans, &iter, &cookie, i, rand)); if (ret) @@ -664,7 +647,6 @@ static int rand_mixed(struct bch_fs *c, u64 nr) } bch2_trans_iter_exit(trans, &iter); - bch2_trans_put(trans); return ret; } @@ -692,31 +674,27 @@ err: static int rand_delete(struct bch_fs *c, u64 nr) { - struct btree_trans *trans = bch2_trans_get(c); - int ret = 0; - u64 i; + CLASS(btree_trans, trans)(c); - for (i = 0; i < nr; i++) { + for (u64 i = 0; i < nr; i++) { struct bpos pos = SPOS(0, test_rand(), U32_MAX); - ret = commit_do(trans, NULL, NULL, 0, + int ret = commit_do(trans, NULL, NULL, 0, __do_delete(trans, pos)); if (ret) - break; + return ret; } - bch2_trans_put(trans); - return ret; + return 0; } static int seq_insert(struct bch_fs *c, u64 nr) { struct bkey_i_cookie insert; - bkey_cookie_init(&insert.k_i); - return bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), BTREE_ITER_slots|BTREE_ITER_intent, k, NULL, NULL, 0, ({ @@ -724,22 +702,22 @@ static int seq_insert(struct bch_fs *c, u64 nr) break; insert.k.p = iter.pos; bch2_trans_update(trans, &iter, &insert.k_i, 0); - }))); + })); } static int seq_lookup(struct bch_fs *c, u64 nr) { - return bch2_trans_run(c, - for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_max(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), POS(0, U64_MAX), 0, k, - 0)); + 0); } static int seq_overwrite(struct bch_fs *c, u64 nr) { - return bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, + CLASS(btree_trans, trans)(c); + return for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, SPOS(0, 0, U32_MAX), BTREE_ITER_intent, k, NULL, NULL, 0, ({ @@ -747,7 +725,7 @@ static int seq_overwrite(struct bch_fs *c, u64 nr) bkey_reassemble(&u.k_i, k); bch2_trans_update(trans, &iter, &u.k_i, 0); - }))); + })); } static int seq_delete(struct bch_fs *c, u64 nr) @@ -808,8 +786,8 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, { struct test_job j = { .c = c, .nr = nr, .nr_threads = nr_threads }; char name_buf[20]; - struct printbuf nr_buf = PRINTBUF; - struct printbuf per_sec_buf = PRINTBUF; + CLASS(printbuf, nr_buf)(); + CLASS(printbuf, per_sec_buf)(); unsigned i; u64 time; @@ -883,8 +861,6 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname, div_u64(time, NSEC_PER_SEC), div_u64(time * nr_threads, nr), per_sec_buf.buf); - printbuf_exit(&per_sec_buf); - printbuf_exit(&nr_buf); return j.ret; } diff --git a/fs/bcachefs/thread_with_file.c b/fs/bcachefs/thread_with_file.c index 314a24d15d4e..c2eae0ab7765 100644 --- a/fs/bcachefs/thread_with_file.c +++ b/fs/bcachefs/thread_with_file.c @@ -60,8 +60,7 @@ int bch2_run_thread_with_file(struct thread_with_file *thr, err: if (fd >= 0) put_unused_fd(fd); - if (thr->task) - kthread_stop(thr->task); + kthread_stop(thr->task); return ret; } @@ -185,23 +184,23 @@ static ssize_t thread_with_stdio_write(struct file *file, const char __user *ubu break; } - spin_lock(&buf->lock); - size_t makeroom = b; - if (!buf->waiting_for_line || memchr(buf->buf.data, '\n', buf->buf.nr)) - makeroom = min_t(ssize_t, makeroom, - max_t(ssize_t, STDIO_REDIRECT_BUFSIZE - buf->buf.nr, - 0)); - darray_make_room_gfp(&buf->buf, makeroom, GFP_NOWAIT); - - b = min(len, darray_room(buf->buf)); - - if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) { - buf->buf.nr += b; - ubuf += b; - len -= b; - copied += b; + scoped_guard(spinlock, &buf->lock) { + size_t makeroom = b; + if (!buf->waiting_for_line || memchr(buf->buf.data, '\n', buf->buf.nr)) + makeroom = min_t(ssize_t, makeroom, + max_t(ssize_t, STDIO_REDIRECT_BUFSIZE - buf->buf.nr, + 0)); + darray_make_room_gfp(&buf->buf, makeroom, GFP_NOWAIT); + + b = min(len, darray_room(buf->buf)); + + if (b && !copy_from_user_nofault(&darray_top(buf->buf), ubuf, b)) { + buf->buf.nr += b; + ubuf += b; + len -= b; + copied += b; + } } - spin_unlock(&buf->lock); if (b) { wake_up(&buf->wait); @@ -349,14 +348,15 @@ int bch2_stdio_redirect_read(struct stdio_redirect *stdio, char *ubuf, size_t le if (stdio->done) return -1; - spin_lock(&buf->lock); - int ret = min(len, buf->buf.nr); - buf->buf.nr -= ret; - memcpy(ubuf, buf->buf.data, ret); - memmove(buf->buf.data, - buf->buf.data + ret, - buf->buf.nr); - spin_unlock(&buf->lock); + int ret; + scoped_guard(spinlock, &buf->lock) { + ret = min(len, buf->buf.nr); + buf->buf.nr -= ret; + memcpy(ubuf, buf->buf.data, ret); + memmove(buf->buf.data, + buf->buf.data + ret, + buf->buf.nr); + } wake_up(&buf->wait); return ret; diff --git a/fs/bcachefs/time_stats.c b/fs/bcachefs/time_stats.c index 2c34fe4be912..7b5fa44807d7 100644 --- a/fs/bcachefs/time_stats.c +++ b/fs/bcachefs/time_stats.c @@ -138,10 +138,8 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) GFP_ATOMIC); spin_unlock_irqrestore(&stats->lock, flags); } else { - struct time_stat_buffer *b; - - preempt_disable(); - b = this_cpu_ptr(stats->buffer); + guard(preempt)(); + struct time_stat_buffer *b = this_cpu_ptr(stats->buffer); BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); b->entries[b->nr++] = (struct time_stat_buffer_entry) { @@ -151,7 +149,6 @@ void __bch2_time_stats_update(struct bch2_time_stats *stats, u64 start, u64 end) if (unlikely(b->nr == ARRAY_SIZE(b->entries))) time_stats_clear_buffer(stats, b); - preempt_enable(); } } diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 7a4436fd4441..2ded7f3c835f 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -321,11 +321,10 @@ void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp) { - bch_stacktrace stack = { 0 }; + CLASS(bch_stacktrace, stack)(); int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp); bch2_prt_backtrace(out, &stack); - darray_exit(&stack); return ret; } @@ -982,9 +981,8 @@ u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) int cpu; /* access to pcpu vars has to be blocked by other locking */ - preempt_disable(); - ret = this_cpu_ptr(p); - preempt_enable(); + scoped_guard(preempt) + ret = this_cpu_ptr(p); for_each_possible_cpu(cpu) { u64 *i = per_cpu_ptr(p, cpu); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 6488f098d140..768528c2bae7 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -216,7 +216,8 @@ void bch2_prt_u64_base2(struct printbuf *, u64); void bch2_print_string_as_lines(const char *, const char *); -typedef DARRAY(unsigned long) bch_stacktrace; +DEFINE_DARRAY_NAMED(bch_stacktrace, unsigned long); + int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *, unsigned, gfp_t); void bch2_prt_backtrace(struct printbuf *, bch_stacktrace *); int bch2_prt_task_backtrace(struct printbuf *, struct task_struct *, unsigned, gfp_t); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index 627f153798c6..903e20cd34fa 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -313,8 +313,8 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) struct xattr_buf buf = { .buf = buffer, .len = buffer_size }; u64 offset = 0, inum = inode->ei_inode.bi_inum; - int ret = bch2_trans_run(c, - for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, + CLASS(btree_trans, trans)(c); + int ret = for_each_btree_key_in_subvolume_max(trans, iter, BTREE_ID_xattrs, POS(inum, offset), POS(inum, U64_MAX), inode->ei_inum.subvol, 0, k, ({ @@ -322,7 +322,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) continue; bch2_xattr_emit(dentry, bkey_s_c_to_xattr(k).v, &buf); - }))) ?: + })) ?: bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, false) ?: bch2_xattr_list_bcachefs(c, &inode->ei_inode, &buf, true); @@ -335,9 +335,10 @@ static int bch2_xattr_get_handler(const struct xattr_handler *handler, { struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret = bch2_trans_do(c, - bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); + CLASS(btree_trans, trans)(c); + int ret = lockrestart_do(trans, + bch2_xattr_get_trans(trans, inode, name, buffer, size, handler->flags)); if (ret < 0 && bch2_err_matches(ret, ENOENT)) ret = -ENODATA; @@ -356,12 +357,12 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler, struct bch_inode_unpacked inode_u; int ret; - ret = bch2_trans_run(c, - commit_do(trans, NULL, NULL, 0, + CLASS(btree_trans, trans)(c); + ret = commit_do(trans, NULL, NULL, 0, bch2_xattr_set(trans, inode_inum(inode), &inode_u, &hash, name, value, size, handler->flags, flags)) ?: - (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0)); + (bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_CTIME), 0); return bch2_err_class(ret); } @@ -418,7 +419,6 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, bch2_inode_opts_to_opts(&inode->ei_inode); const struct bch_option *opt; int id, inode_opt_id; - struct printbuf out = PRINTBUF; int ret; u64 v; @@ -439,6 +439,7 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, !(inode->ei_inode.bi_fields_set & (1 << inode_opt_id))) return -ENODATA; + CLASS(printbuf, out)(); v = bch2_opt_get_by_id(&opts, id); bch2_opt_to_text(&out, c, c->disk_sb.sb, opt, v, 0); @@ -453,7 +454,6 @@ static int __bch2_xattr_bcachefs_get(const struct xattr_handler *handler, memcpy(buffer, out.buf, out.pos); } - printbuf_exit(&out); return ret; } @@ -532,11 +532,11 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, kfree(buf); if (ret < 0) - goto err_class_exit; + goto err; ret = bch2_opt_hook_pre_set(c, NULL, opt_id, v); if (ret < 0) - goto err_class_exit; + goto err; s.v = v + 1; s.defined = true; @@ -548,7 +548,7 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, * rename() also has to deal with keeping inherited options up * to date - see bch2_reinherit_attrs() */ - spin_lock(&dentry->d_lock); + guard(spinlock)(&dentry->d_lock); if (!IS_ROOT(dentry)) { struct bch_inode_info *dir = to_bch_ei(d_inode(dentry->d_parent)); @@ -557,26 +557,24 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler, } else { s.v = 0; } - spin_unlock(&dentry->d_lock); s.defined = false; } - mutex_lock(&inode->ei_update_lock); - if (inode_opt_id == Inode_opt_project) { - /* - * inode fields accessible via the xattr interface are stored - * with a +1 bias, so that 0 means unset: - */ - ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0); - if (ret) - goto err; - } + scoped_guard(mutex, &inode->ei_update_lock) { + if (inode_opt_id == Inode_opt_project) { + /* + * inode fields accessible via the xattr interface are stored + * with a +1 bias, so that 0 means unset: + */ + ret = bch2_set_projid(c, inode, s.v ? s.v - 1 : 0); + if (ret) + goto err; + } - ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); + ret = bch2_write_inode(c, inode, inode_opt_set_fn, &s, 0); + } err: - mutex_unlock(&inode->ei_update_lock); -err_class_exit: return bch2_err_class(ret); } diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 9de356bcb411..aa176cc9a324 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -83,6 +83,8 @@ enum btrfs_block_group_flags { BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, /* Does the block group need to be added to the free space tree? */ BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, + /* Set after we add a new block group to the free space tree. */ + BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, /* Indicate that the block group is placed on a sequential zone */ BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, /* diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c7cc24a5dd5e..8c597fa60523 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1377,7 +1377,10 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info) { - WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root)); + struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root); + + if (WARN_ON(node)) + refcount_dec(&node->refs); } static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1beb9458f622..0d6ad7512f21 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1835,6 +1835,8 @@ void btrfs_put_root(struct btrfs_root *root) if (refcount_dec_and_test(&root->refs)) { if (WARN_ON(!xa_empty(&root->inodes))) xa_destroy(&root->inodes); + if (WARN_ON(!xa_empty(&root->delayed_nodes))) + xa_destroy(&root->delayed_nodes); WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); @@ -2156,8 +2158,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root, found = true; root = read_tree_root_path(tree_root, path, &key); if (IS_ERR(root)) { - if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) - ret = PTR_ERR(root); + ret = PTR_ERR(root); break; } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); @@ -4310,8 +4311,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) * * So wait for all ongoing ordered extents to complete and then run * delayed iputs. This works because once we reach this point no one - * can either create new ordered extents nor create delayed iputs - * through some other means. + * can create new ordered extents, but delayed iputs can still be added + * by a reclaim worker (see comments further below). * * Also note that btrfs_wait_ordered_roots() is not safe here, because * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent, @@ -4322,15 +4323,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_flush_workqueue(fs_info->endio_write_workers); /* Ordered extents for free space inodes. */ btrfs_flush_workqueue(fs_info->endio_freespace_worker); + /* + * Run delayed iputs in case an async reclaim worker is waiting for them + * to be run as mentioned above. + */ btrfs_run_delayed_iputs(fs_info); - /* There should be no more workload to generate new delayed iputs. */ - set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); cancel_work_sync(&fs_info->async_reclaim_work); cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); cancel_work_sync(&fs_info->em_shrinker_work); + /* + * Run delayed iputs again because an async reclaim worker may have + * added new ones if it was flushing delalloc: + * + * shrink_delalloc() -> btrfs_start_delalloc_roots() -> + * start_delalloc_inodes() -> btrfs_add_delayed_iput() + */ + btrfs_run_delayed_iputs(fs_info); + + /* There should be no more workload to generate new delayed iputs. */ + set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); + /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 849199768664..1dc931c4937f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4312,7 +4312,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio) spin_unlock(&eb->refs_lock); continue; } - xa_unlock_irq(&fs_info->buffer_tree); /* * If tree ref isn't set then we know the ref on this eb is a @@ -4329,6 +4328,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio) * check the folio private at the end. And * release_extent_buffer() will release the refs_lock. */ + xa_unlock_irq(&fs_info->buffer_tree); release_extent_buffer(eb); xa_lock_irq(&fs_info->buffer_tree); } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 0c573d46639a..a83c268f7f87 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1115,11 +1115,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); if (ret < 0) goto out_locked; - ASSERT(ret == 0); + /* + * If ret is 1 (no key found), it means this is an empty block group, + * without any extents allocated from it and there's no block group + * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree + * because we are using the block group tree feature, so block group + * items are stored in the block group tree. It also means there are no + * extents allocated for block groups with a start offset beyond this + * block group's end offset (this is the last, highest, block group). + */ + if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) + ASSERT(ret == 0); start = block_group->start; end = block_group->start + block_group->length; - while (1) { + while (ret == 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.type == BTRFS_EXTENT_ITEM_KEY || @@ -1149,8 +1159,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_next_item(extent_root, path); if (ret < 0) goto out_locked; - if (ret) - break; } if (start < end) { ret = __add_to_free_space_tree(trans, block_group, path2, @@ -1233,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, { BTRFS_PATH_AUTO_FREE(path); struct btrfs_key key; + struct rb_node *node; int nr; int ret; @@ -1261,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans, btrfs_release_path(path); } + node = rb_first_cached(&trans->fs_info->block_group_cache_tree); + while (node) { + struct btrfs_block_group *bg; + + bg = rb_entry(node, struct btrfs_block_group, cache_node); + clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); + node = rb_next(node); + cond_resched(); + } + return 0; } @@ -1350,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) block_group = rb_entry(node, struct btrfs_block_group, cache_node); + + if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, + &block_group->runtime_flags)) + goto next; + ret = populate_free_space_tree(trans, block_group); if (ret) { btrfs_abort_transaction(trans, ret); btrfs_end_transaction(trans); return ret; } +next: if (btrfs_should_end_transaction(trans)) { btrfs_end_transaction(trans); trans = btrfs_start_transaction(free_space_root, 1); @@ -1382,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); + /* + * While rebuilding the free space tree we may allocate new metadata + * block groups while modifying the free space tree. + * + * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we + * can use multiple transactions, every time btrfs_end_transaction() is + * called at btrfs_rebuild_free_space_tree() we finish the creation of + * new block groups by calling btrfs_create_pending_block_groups(), and + * that in turn calls us, through add_block_group_free_space(), to add + * a free space info item and a free space extent item for the block + * group. + * + * Then later btrfs_rebuild_free_space_tree() may find such new block + * groups and processes them with populate_free_space_tree(), which can + * fail with EEXIST since there are already items for the block group in + * the free space tree. Notice that we say "may find" because a new + * block group may be added to the block groups rbtree in a node before + * or after the block group currently being processed by the rebuild + * process. So signal the rebuild process to skip such new block groups + * if it finds them. + */ + set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); + ret = add_new_free_space_info(trans, block_group, path); if (ret) return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0c778243bf1..fc66872b4c74 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4250,9 +4250,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans, ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index); if (ret) { - btrfs_info(fs_info, - "failed to delete reference to %.*s, inode %llu parent %llu", - name->len, name->name, ino, dir_ino); + btrfs_crit(fs_info, + "failed to delete reference to %.*s, root %llu inode %llu parent %llu", + name->len, name->name, btrfs_root_id(root), ino, dir_ino); btrfs_abort_transaction(trans, ret); goto err; } @@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; int ret = 0; struct btrfs_trans_handle *trans; - u64 last_unlink_trans; struct fscrypt_name fname; if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) @@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) goto out_notrans; } + /* + * Propagate the last_unlink_trans value of the deleted dir to its + * parent directory. This is to prevent an unrecoverable log tree in the + * case we do something like this: + * 1) create dir foo + * 2) create snapshot under dir foo + * 3) delete the snapshot + * 4) rmdir foo + * 5) mkdir foo + * 6) fsync foo or some file inside foo + * + * This is because we can't unlink other roots when replaying the dir + * deletes for directory foo. + */ + if (BTRFS_I(inode)->last_unlink_trans >= trans->transid) + btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); + if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry); goto out; @@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) if (ret) goto out; - last_unlink_trans = BTRFS_I(inode)->last_unlink_trans; - /* now the directory is empty */ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)), &fname.disk_name); - if (!ret) { + if (!ret) btrfs_i_size_write(BTRFS_I(inode), 0); - /* - * Propagate the last_unlink_trans value of the deleted dir to - * its parent directory. This is to prevent an unrecoverable - * log tree in the case we do something like this: - * 1) create dir foo - * 2) create snapshot under dir foo - * 3) delete the snapshot - * 4) rmdir foo - * 5) mkdir foo - * 6) fsync foo or some file inside foo - */ - if (last_unlink_trans >= trans->transid) - BTRFS_I(dir)->last_unlink_trans = last_unlink_trans; - } out: btrfs_end_transaction(trans); out_notrans: @@ -8059,6 +8059,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, int ret; int ret2; bool need_abort = false; + bool logs_pinned = false; struct fscrypt_name old_fname, new_fname; struct fscrypt_str *old_name, *new_name; @@ -8182,6 +8183,31 @@ static int btrfs_rename_exchange(struct inode *old_dir, inode_inc_iversion(new_inode); simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + if (old_ino != BTRFS_FIRST_FREE_OBJECTID && + new_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not for + * root entries) pin the log early to prevent any concurrent + * task from logging the directory after we removed the old + * entries and before we add the new entries, otherwise that + * task can sync a log without any entry for the inodes we are + * renaming and therefore replaying that log, if a power failure + * happens after syncing the log, would result in deleting the + * inodes. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) { btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), true); @@ -8253,30 +8279,23 @@ static int btrfs_rename_exchange(struct inode *old_dir, BTRFS_I(new_inode)->dir_index = new_idx; /* - * Now pin the logs of the roots. We do it to ensure that no other task - * can sync the logs while we are in progress with the rename, because - * that could result in an inconsistency in case any of the inodes that - * are part of this rename operation were logged before. + * Do the log updates for all inodes. + * + * If either entry is for a root we don't need to update the logs since + * we've called btrfs_set_log_full_commit() before. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) - btrfs_pin_log_trans(dest); - - /* Do the log updates for all inodes. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) { btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), old_rename_ctx.index, new_dentry->d_parent); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir), new_rename_ctx.index, old_dentry->d_parent); + } - /* Now unpin the logs. */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) +out_fail: + if (logs_pinned) { btrfs_end_log_trans(root); - if (new_ino != BTRFS_FIRST_FREE_OBJECTID) btrfs_end_log_trans(dest); -out_fail: + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: @@ -8326,6 +8345,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, int ret2; u64 old_ino = btrfs_ino(BTRFS_I(old_inode)); struct fscrypt_name old_fname, new_fname; + bool logs_pinned = false; if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) return -EPERM; @@ -8460,6 +8480,29 @@ static int btrfs_rename(struct mnt_idmap *idmap, inode_inc_iversion(old_inode); simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); + if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { + /* + * If we are renaming in the same directory (and it's not a + * root entry) pin the log to prevent any concurrent task from + * logging the directory after we removed the old entry and + * before we add the new entry, otherwise that task can sync + * a log without any entry for the inode we are renaming and + * therefore replaying that log, if a power failure happens + * after syncing the log, would result in deleting the inode. + * + * If the rename affects two different directories, we want to + * make sure the that there's no log commit that contains + * updates for only one of the directories but not for the + * other. + * + * If we are renaming an entry for a root, we don't care about + * log updates since we called btrfs_set_log_full_commit(). + */ + btrfs_pin_log_trans(root); + btrfs_pin_log_trans(dest); + logs_pinned = true; + } + if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, BTRFS_I(old_dir), BTRFS_I(old_inode), true); @@ -8524,7 +8567,7 @@ static int btrfs_rename(struct mnt_idmap *idmap, if (old_inode->i_nlink == 1) BTRFS_I(old_inode)->dir_index = index; - if (old_ino != BTRFS_FIRST_FREE_OBJECTID) + if (logs_pinned) btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir), rename_ctx.index, new_dentry->d_parent); @@ -8540,6 +8583,10 @@ static int btrfs_rename(struct mnt_idmap *idmap, } } out_fail: + if (logs_pinned) { + btrfs_end_log_trans(root); + btrfs_end_log_trans(dest); + } ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 913acef3f0a9..8a60983a697c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap, goto out; } + btrfs_record_new_subvolume(trans, BTRFS_I(dir)); + ret = btrfs_create_new_inode(trans, &new_inode_args); if (ret) { btrfs_abort_transaction(trans, ret); goto out; } - btrfs_record_new_subvolume(trans, BTRFS_I(dir)); - d_instantiate_new(dentry, new_inode_args.inode); new_inode_args.inode = NULL; @@ -3139,7 +3139,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg) return -EPERM; if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { - btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet"); + btrfs_err(fs_info, "scrub: extent tree v2 not yet supported"); return -EINVAL; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index ce36fafc771e..7cd5e76a783c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -557,7 +557,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, */ for (i = 0; i < ipath->fspath->elem_cnt; ++i) btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)", +"scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu length %u links %u (path: %s)", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, @@ -571,7 +571,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, err: btrfs_warn_in_rcu(fs_info, - "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", + "scrub: %s at logical %llu on dev %s, physical %llu root %llu inode %llu offset %llu: path resolving failed with ret=%d", swarn->errstr, swarn->logical, btrfs_dev_name(swarn->dev), swarn->physical, @@ -596,7 +596,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * /* Super block error, no need to search extent tree. */ if (is_super) { - btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu", + btrfs_warn_in_rcu(fs_info, "scrub: %s on device %s, physical %llu", errstr, btrfs_dev_name(dev), physical); return; } @@ -631,14 +631,14 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device * &ref_level); if (ret < 0) { btrfs_warn(fs_info, - "failed to resolve tree backref for logical %llu: %d", - swarn.logical, ret); + "scrub: failed to resolve tree backref for logical %llu: %d", + swarn.logical, ret); break; } if (ret > 0) break; btrfs_warn_in_rcu(fs_info, -"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", +"scrub: %s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", errstr, swarn.logical, btrfs_dev_name(dev), swarn.physical, (ref_level ? "node" : "leaf"), ref_level, ref_root); @@ -718,7 +718,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad bytenr, has %llu want %llu", + "scrub: tree block %llu mirror %u has bad bytenr, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_bytenr(header), logical); return; @@ -728,7 +728,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad fsid, has %pU want %pU", + "scrub: tree block %llu mirror %u has bad fsid, has %pU want %pU", logical, stripe->mirror_num, header->fsid, fs_info->fs_devices->fsid); return; @@ -738,7 +738,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", + "scrub: tree block %llu mirror %u has bad chunk tree uuid, has %pU want %pU", logical, stripe->mirror_num, header->chunk_tree_uuid, fs_info->chunk_tree_uuid); return; @@ -760,7 +760,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, +"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT, logical, stripe->mirror_num, CSUM_FMT_VALUE(fs_info->csum_size, on_disk_csum), CSUM_FMT_VALUE(fs_info->csum_size, calculated_csum)); @@ -771,7 +771,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr scrub_bitmap_set_meta_gen_error(stripe, sector_nr, sectors_per_tree); scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree); btrfs_warn_rl(fs_info, - "tree block %llu mirror %u has bad generation, has %llu want %llu", + "scrub: tree block %llu mirror %u has bad generation, has %llu want %llu", logical, stripe->mirror_num, btrfs_stack_header_generation(header), stripe->sectors[sector_nr].generation); @@ -814,7 +814,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) */ if (unlikely(sector_nr + sectors_per_tree > stripe->nr_sectors)) { btrfs_warn_rl(fs_info, - "tree block at %llu crosses stripe boundary %llu", + "scrub: tree block at %llu crosses stripe boundary %llu", stripe->logical + (sector_nr << fs_info->sectorsize_bits), stripe->logical); @@ -1046,12 +1046,12 @@ skip: if (repaired) { if (dev) { btrfs_err_rl_in_rcu(fs_info, - "fixed up error at logical %llu on dev %s physical %llu", + "scrub: fixed up error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, - "fixed up error at logical %llu on mirror %u", + "scrub: fixed up error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } continue; @@ -1060,12 +1060,12 @@ skip: /* The remaining are all for unrepaired. */ if (dev) { btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (regular) error at logical %llu on dev %s physical %llu", +"scrub: unable to fixup (regular) error at logical %llu on dev %s physical %llu", stripe->logical, btrfs_dev_name(dev), physical); } else { btrfs_err_rl_in_rcu(fs_info, - "unable to fixup (regular) error at logical %llu on mirror %u", + "scrub: unable to fixup (regular) error at logical %llu on mirror %u", stripe->logical, stripe->mirror_num); } @@ -1593,8 +1593,7 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical, physical, sctx->write_pointer); if (ret) - btrfs_err(fs_info, - "zoned: failed to recover write pointer"); + btrfs_err(fs_info, "scrub: zoned: failed to recover write pointer"); } mutex_unlock(&sctx->wr_lock); btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical); @@ -1658,7 +1657,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, int ret; if (unlikely(!extent_root || !csum_root)) { - btrfs_err(fs_info, "no valid extent or csum root for scrub"); + btrfs_err(fs_info, "scrub: no valid extent or csum root found"); return -EUCLEAN; } memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * @@ -1907,7 +1906,7 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe) struct btrfs_fs_info *fs_info = stripe->bg->fs_info; btrfs_err(fs_info, - "stripe %llu has unrepaired metadata sector at %llu", + "scrub: stripe %llu has unrepaired metadata sector at logical %llu", stripe->logical, stripe->logical + (i << fs_info->sectorsize_bits)); return true; @@ -2167,7 +2166,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, bitmap_and(&error, &error, &has_extent, stripe->nr_sectors); if (!bitmap_empty(&error, stripe->nr_sectors)) { btrfs_err(fs_info, -"unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", +"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl", full_stripe_start, i, stripe->nr_sectors, &error); ret = -EIO; @@ -2789,14 +2788,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, ro_set = 0; } else if (ret == -ETXTBSY) { btrfs_warn(fs_info, - "skipping scrub of block group %llu due to active swapfile", + "scrub: skipping scrub of block group %llu due to active swapfile", cache->start); scrub_pause_off(fs_info); ret = 0; goto skip_unfreeze; } else { - btrfs_warn(fs_info, - "failed setting block group ro: %d", ret); + btrfs_warn(fs_info, "scrub: failed setting block group ro: %d", + ret); btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); scrub_pause_off(fs_info); @@ -2892,13 +2891,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev, ret = btrfs_check_super_csum(fs_info, sb); if (ret != 0) { btrfs_err_rl(fs_info, - "super block at physical %llu devid %llu has bad csum", + "scrub: super block at physical %llu devid %llu has bad csum", physical, dev->devid); return -EIO; } if (btrfs_super_generation(sb) != generation) { btrfs_err_rl(fs_info, -"super block at physical %llu devid %llu has bad generation %llu expect %llu", +"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu", physical, dev->devid, btrfs_super_generation(sb), generation); return -EUCLEAN; @@ -3059,7 +3058,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_err_in_rcu(fs_info, - "scrub on devid %llu: filesystem on %s is not writable", + "scrub: devid %llu: filesystem on %s is not writable", devid, btrfs_dev_name(dev)); ret = -EROFS; goto out; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 97e933113b82..cea8a7e9d6d3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -143,6 +143,9 @@ static struct btrfs_inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *r unsigned int nofs_flag; struct btrfs_inode *inode; + /* Only meant to be called for subvolume roots and not for log roots. */ + ASSERT(is_fstree(btrfs_root_id(root))); + /* * We're holding a transaction handle whether we are logging or * replaying a log tree, so we must make sure NOFS semantics apply @@ -604,21 +607,6 @@ static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len, return 0; } -/* - * simple helper to read an inode off the disk from a given root - * This can only be called for subvolume roots and not for the log - */ -static noinline struct btrfs_inode *read_one_inode(struct btrfs_root *root, - u64 objectid) -{ - struct btrfs_inode *inode; - - inode = btrfs_iget_logging(objectid, root); - if (IS_ERR(inode)) - return NULL; - return inode; -} - /* replays a single extent in 'eb' at 'slot' with 'key' into the * subvolume 'root'. path is released on entry and should be released * on exit. @@ -668,15 +656,15 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, extent_end = ALIGN(start + size, fs_info->sectorsize); } else { - ret = 0; - goto out; + btrfs_err(fs_info, + "unexpected extent type=%d root=%llu inode=%llu offset=%llu", + found_type, btrfs_root_id(root), key->objectid, key->offset); + return -EUCLEAN; } - inode = read_one_inode(root, key->objectid); - if (!inode) { - ret = -EIO; - goto out; - } + inode = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); /* * first check to see if we already have this extent in the @@ -948,9 +936,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_release_path(path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -961,7 +950,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, ret = unlink_inode_for_log_replay(trans, dir, inode, &name); out: kfree(name.name); - iput(&inode->vfs_inode); + if (inode) + iput(&inode->vfs_inode); return ret; } @@ -1072,7 +1062,9 @@ again: search_key.type = BTRFS_INODE_REF_KEY; search_key.offset = parent_objectid; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); - if (ret == 0) { + if (ret < 0) { + return ret; + } else if (ret == 0) { struct btrfs_inode_ref *victim_ref; unsigned long ptr; unsigned long ptr_end; @@ -1145,13 +1137,13 @@ again: struct fscrypt_str victim_name; extref = (struct btrfs_inode_extref *)(base + cur_offset); + victim_name.len = btrfs_inode_extref_name_len(leaf, extref); if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid) goto next; ret = read_alloc_one_name(leaf, &extref->name, - btrfs_inode_extref_name_len(leaf, extref), - &victim_name); + victim_name.len, &victim_name); if (ret) return ret; @@ -1166,18 +1158,18 @@ again: kfree(victim_name.name); return ret; } else if (!ret) { - ret = -ENOENT; - victim_parent = read_one_inode(root, - parent_objectid); - if (victim_parent) { + victim_parent = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(victim_parent)) { + ret = PTR_ERR(victim_parent); + } else { inc_nlink(&inode->vfs_inode); btrfs_release_path(path); ret = unlink_inode_for_log_replay(trans, victim_parent, inode, &victim_name); + iput(&victim_parent->vfs_inode); } - iput(&victim_parent->vfs_inode); kfree(victim_name.name); if (ret) return ret; @@ -1314,9 +1306,9 @@ again: struct btrfs_inode *dir; btrfs_release_path(path); - dir = read_one_inode(root, parent_id); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_id, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); kfree(name.name); goto out; } @@ -1388,15 +1380,17 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * copy the back ref in. The link count fixup code will take * care of the rest */ - dir = read_one_inode(root, parent_objectid); - if (!dir) { - ret = -ENOENT; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; goto out; } - inode = read_one_inode(root, inode_objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(inode_objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -1408,11 +1402,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, * parent object can change from one array * item to another. */ - if (!dir) - dir = read_one_inode(root, parent_objectid); if (!dir) { - ret = -ENOENT; - goto out; + dir = btrfs_iget_logging(parent_objectid, root); + if (IS_ERR(dir)) { + ret = PTR_ERR(dir); + dir = NULL; + goto out; + } } } else { ret = ref_get_fields(eb, ref_ptr, &name, &ref_index); @@ -1681,9 +1677,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; btrfs_release_path(path); - inode = read_one_inode(root, key.offset); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.offset, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } @@ -1719,9 +1715,9 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_inode *inode; struct inode *vfs_inode; - inode = read_one_inode(root, objectid); - if (!inode) - return -EIO; + inode = btrfs_iget_logging(objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); vfs_inode = &inode->vfs_inode; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; @@ -1760,14 +1756,14 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, struct btrfs_inode *dir; int ret; - inode = read_one_inode(root, location->objectid); - if (!inode) - return -ENOENT; + inode = btrfs_iget_logging(location->objectid, root); + if (IS_ERR(inode)) + return PTR_ERR(inode); - dir = read_one_inode(root, dirid); - if (!dir) { + dir = btrfs_iget_logging(dirid, root); + if (IS_ERR(dir)) { iput(&inode->vfs_inode); - return -EIO; + return PTR_ERR(dir); } ret = btrfs_add_link(trans, dir, inode, name, 1, index); @@ -1844,9 +1840,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, bool update_size = true; bool name_added = false; - dir = read_one_inode(root, key->objectid); - if (!dir) - return -EIO; + dir = btrfs_iget_logging(key->objectid, root); + if (IS_ERR(dir)) + return PTR_ERR(dir); ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name); if (ret) @@ -2146,9 +2142,10 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans, btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(path); btrfs_release_path(log_path); - inode = read_one_inode(root, location.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(location.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + inode = NULL; goto out; } @@ -2300,14 +2297,17 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, if (!log_path) return -ENOMEM; - dir = read_one_inode(root, dirid); - /* it isn't an error if the inode isn't there, that can happen - * because we replay the deletes before we copy in the inode item - * from the log + dir = btrfs_iget_logging(dirid, root); + /* + * It isn't an error if the inode isn't there, that can happen because + * we replay the deletes before we copy in the inode item from the log. */ - if (!dir) { + if (IS_ERR(dir)) { btrfs_free_path(log_path); - return 0; + ret = PTR_ERR(dir); + if (ret == -ENOENT) + ret = 0; + return ret; } range_start = 0; @@ -2466,9 +2466,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, struct btrfs_inode *inode; u64 from; - inode = read_one_inode(root, key.objectid); - if (!inode) { - ret = -EIO; + inode = btrfs_iget_logging(key.objectid, root); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); break; } from = ALIGN(i_size_read(&inode->vfs_inode), @@ -7447,6 +7447,8 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, * full log sync. * Also we don't need to worry with renames, since btrfs_rename() marks the log * for full commit when renaming a subvolume. + * + * Must be called before creating the subvolume entry in its parent directory. */ void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans, struct btrfs_inode *dir) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 89835071cfea..f475b4b7c457 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3282,6 +3282,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) device->bytes_used - dev_extent_len); atomic64_add(dev_extent_len, &fs_info->free_chunk_space); btrfs_clear_space_info_full(fs_info); + + if (list_empty(&device->post_commit_list)) { + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); + } + mutex_unlock(&fs_info->chunk_mutex); } } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b5b0156d5b95..9430b34d3cbb 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1403,7 +1403,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg, static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1426,6 +1427,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, zone_info[1].physical); return -EIO; } + + if (zone_info[0].alloc_offset == WP_CONVENTIONAL) + zone_info[0].alloc_offset = last_alloc; + + if (zone_info[1].alloc_offset == WP_CONVENTIONAL) + zone_info[1].alloc_offset = last_alloc; + if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { btrfs_err(bg->fs_info, "zoned: write pointer offset mismatch of zones in DUP profile"); @@ -1446,7 +1454,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; int i; @@ -1461,10 +1470,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); for (i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) + zone_info[i].alloc_offset = last_alloc; + if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) && !btrfs_test_opt(fs_info, DEGRADED)) { btrfs_err(fs_info, @@ -1494,7 +1505,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1505,10 +1517,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { + u64 stripe_nr, full_stripe_nr; + u64 stripe_offset; + int stripe_index; + + stripe_nr = div64_u64(last_alloc, map->stripe_size); + stripe_offset = stripe_nr * map->stripe_size; + full_stripe_nr = div_u64(stripe_nr, map->num_stripes); + div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); + + zone_info[i].alloc_offset = + full_stripe_nr * map->stripe_size; + + if (stripe_index > i) + zone_info[i].alloc_offset += map->stripe_size; + else if (stripe_index == i) + zone_info[i].alloc_offset += + (last_alloc - stripe_offset); + } + if (test_bit(0, active) != test_bit(i, active)) { if (!btrfs_zone_activate(bg)) return -EIO; @@ -1526,7 +1557,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, struct btrfs_chunk_map *map, struct zone_info *zone_info, - unsigned long *active) + unsigned long *active, + u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; @@ -1537,8 +1569,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } for (int i = 0; i < map->num_stripes; i++) { - if (zone_info[i].alloc_offset == WP_MISSING_DEV || - zone_info[i].alloc_offset == WP_CONVENTIONAL) + if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; if (test_bit(0, active) != test_bit(i, active)) { @@ -1549,6 +1580,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); } + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { + u64 stripe_nr, full_stripe_nr; + u64 stripe_offset; + int stripe_index; + + stripe_nr = div64_u64(last_alloc, map->stripe_size); + stripe_offset = stripe_nr * map->stripe_size; + full_stripe_nr = div_u64(stripe_nr, + map->num_stripes / map->sub_stripes); + div_u64_rem(stripe_nr, + (map->num_stripes / map->sub_stripes), + &stripe_index); + + zone_info[i].alloc_offset = + full_stripe_nr * map->stripe_size; + + if (stripe_index > (i / map->sub_stripes)) + zone_info[i].alloc_offset += map->stripe_size; + else if (stripe_index == (i / map->sub_stripes)) + zone_info[i].alloc_offset += + (last_alloc - stripe_offset); + } + if ((i % map->sub_stripes) == 0) { bg->zone_capacity += zone_info[i].capacity; bg->alloc_offset += zone_info[i].alloc_offset; @@ -1637,18 +1691,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; case BTRFS_BLOCK_GROUP_DUP: - ret = btrfs_load_block_group_dup(cache, map, zone_info, active); + ret = btrfs_load_block_group_dup(cache, map, zone_info, active, + last_alloc); break; case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID1C3: case BTRFS_BLOCK_GROUP_RAID1C4: - ret = btrfs_load_block_group_raid1(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid1(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID0: - ret = btrfs_load_block_group_raid0(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid0(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID10: - ret = btrfs_load_block_group_raid10(cache, map, zone_info, active); + ret = btrfs_load_block_group_raid10(cache, map, zone_info, + active, last_alloc); break; case BTRFS_BLOCK_GROUP_RAID5: case BTRFS_BLOCK_GROUP_RAID6: diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 6a329c329f43..16e4a6bd9b97 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -214,9 +214,11 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) /* * bit 30: I/O error occurred on this folio + * bit 29: CPU has dirty data in D-cache (needs aliasing handling); * bit 0 - 29: remaining parts to complete this folio */ -#define EROFS_ONLINEFOLIO_EIO (1 << 30) +#define EROFS_ONLINEFOLIO_EIO 30 +#define EROFS_ONLINEFOLIO_DIRTY 29 void erofs_onlinefolio_init(struct folio *folio) { @@ -233,19 +235,23 @@ void erofs_onlinefolio_split(struct folio *folio) atomic_inc((atomic_t *)&folio->private); } -void erofs_onlinefolio_end(struct folio *folio, int err) +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty) { int orig, v; do { orig = atomic_read((atomic_t *)&folio->private); - v = (orig - 1) | (err ? EROFS_ONLINEFOLIO_EIO : 0); + DBG_BUGON(orig <= 0); + v = dirty << EROFS_ONLINEFOLIO_DIRTY; + v |= (orig - 1) | (!!err << EROFS_ONLINEFOLIO_EIO); } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); - if (v & ~EROFS_ONLINEFOLIO_EIO) + if (v & (BIT(EROFS_ONLINEFOLIO_DIRTY) - 1)) return; folio->private = 0; - folio_end_read(folio, !(v & EROFS_ONLINEFOLIO_EIO)); + if (v & BIT(EROFS_ONLINEFOLIO_DIRTY)) + flush_dcache_folio(folio); + folio_end_read(folio, !(v & BIT(EROFS_ONLINEFOLIO_EIO))); } static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, @@ -351,11 +357,16 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, */ static int erofs_read_folio(struct file *file, struct folio *folio) { + trace_erofs_read_folio(folio, true); + return iomap_read_folio(folio, &erofs_iomap_ops); } static void erofs_readahead(struct readahead_control *rac) { + trace_erofs_readahead(rac->mapping->host, readahead_index(rac), + readahead_count(rac), true); + return iomap_readahead(rac, &erofs_iomap_ops); } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index bf62e2836b60..358061d7b660 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -301,13 +301,11 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, cur = min(cur, rq->outputsize); if (cur && rq->out[0]) { kin = kmap_local_page(rq->in[nrpages_in - 1]); - if (rq->out[0] == rq->in[nrpages_in - 1]) { + if (rq->out[0] == rq->in[nrpages_in - 1]) memmove(kin + rq->pageofs_out, kin + pi, cur); - flush_dcache_page(rq->out[0]); - } else { + else memcpy_to_page(rq->out[0], rq->pageofs_out, kin + pi, cur); - } kunmap_local(kin); } rq->outputsize -= cur; @@ -325,14 +323,12 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK; DBG_BUGON(no >= nrpages_out); cnt = min(insz - pi, PAGE_SIZE - po); - if (rq->out[no] == rq->in[ni]) { + if (rq->out[no] == rq->in[ni]) memmove(kin + po, kin + rq->pageofs_in + pi, cnt); - flush_dcache_page(rq->out[no]); - } else if (rq->out[no]) { + else if (rq->out[no]) memcpy_to_page(rq->out[no], po, kin + rq->pageofs_in + pi, cnt); - } pi += cnt; } while (pi < insz); kunmap_local(kin); diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2fae209d0274..3e4b38bec0aa 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -58,6 +58,11 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_dirent *de; unsigned int nameoff, maxsize; + if (fatal_signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + de = erofs_bread(&buf, dbstart, true); if (IS_ERR(de)) { erofs_err(sb, "failed to readdir of logical block %llu of nid %llu", @@ -88,6 +93,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; ctx->pos = dbstart + maxsize; ofs = 0; + cond_resched(); } erofs_put_metabuf(&buf); if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) { diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 7d81f504bff0..91781718199e 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -38,7 +38,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) } else { bio_for_each_folio_all(fi, &rq->bio) { DBG_BUGON(folio_test_uptodate(fi.folio)); - erofs_onlinefolio_end(fi.folio, ret); + erofs_onlinefolio_end(fi.folio, ret, false); } } bio_uninit(&rq->bio); @@ -47,6 +47,7 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) { + const struct cred *old_cred; struct iov_iter iter; int ret; @@ -60,7 +61,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) rq->iocb.ki_flags = IOCB_DIRECT; iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt, rq->bio.bi_iter.bi_size); + old_cred = override_creds(rq->iocb.ki_filp->f_cred); ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); + revert_creds(old_cred); if (ret != -EIOCBQUEUED) erofs_fileio_ki_complete(&rq->iocb, ret); } @@ -93,8 +96,6 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) struct erofs_map_blocks *map = &io->map; unsigned int cur = 0, end = folio_size(folio), len, attached = 0; loff_t pos = folio_pos(folio), ofs; - struct iov_iter iter; - struct bio_vec bv; int err = 0; erofs_onlinefolio_init(folio); @@ -119,13 +120,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio) err = PTR_ERR(src); break; } - bvec_set_folio(&bv, folio, len, cur); - iov_iter_bvec(&iter, ITER_DEST, &bv, 1, len); - if (copy_to_iter(src, len, &iter) != len) { - erofs_put_metabuf(&buf); - err = -EIO; - break; - } + memcpy_to_folio(folio, cur, src, len); erofs_put_metabuf(&buf); } else if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, cur + len); @@ -159,7 +154,7 @@ io_retry: } cur += len; } - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index a32c03a80c70..06b867d2fc3b 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -315,10 +315,12 @@ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as, /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED 0x0008 /* Located in the special packed inode */ -#define EROFS_MAP_FRAGMENT 0x0010 +#define __EROFS_MAP_FRAGMENT 0x0010 /* The extent refers to partial decompressed data */ #define EROFS_MAP_PARTIAL_REF 0x0020 +#define EROFS_MAP_FRAGMENT (EROFS_MAP_MAPPED | __EROFS_MAP_FRAGMENT) + struct erofs_map_blocks { struct erofs_buf buf; @@ -390,7 +392,7 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map); void erofs_onlinefolio_init(struct folio *folio); void erofs_onlinefolio_split(struct folio *folio); -void erofs_onlinefolio_end(struct folio *folio, int err); +void erofs_onlinefolio_end(struct folio *folio, int err, bool dirty); struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index fe8071844724..e3f28a1bb945 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1034,7 +1034,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, if (!(map->m_flags & EROFS_MAP_MAPPED)) { folio_zero_segment(folio, cur, end); tight = false; - } else if (map->m_flags & EROFS_MAP_FRAGMENT) { + } else if (map->m_flags & __EROFS_MAP_FRAGMENT) { erofs_off_t fpos = offset + cur - map->m_la; err = z_erofs_read_fragment(inode->i_sb, folio, cur, @@ -1091,7 +1091,7 @@ static int z_erofs_scan_folio(struct z_erofs_frontend *f, tight = (bs == PAGE_SIZE); } } while ((end = cur) > 0); - erofs_onlinefolio_end(folio, err); + erofs_onlinefolio_end(folio, err, false); return err; } @@ -1196,7 +1196,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err) cur += len; } kunmap_local(dst); - erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); + erofs_onlinefolio_end(page_folio(bvi->bvec.page), err, true); list_del(p); kfree(bvi); } @@ -1355,7 +1355,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) DBG_BUGON(z_erofs_page_is_invalidated(page)); if (!z_erofs_is_shortlived_page(page)) { - erofs_onlinefolio_end(page_folio(page), err); + erofs_onlinefolio_end(page_folio(page), err, true); continue; } if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) { diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 14ea47f954f5..f1a15ff22147 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -413,8 +413,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, !vi->z_tailextent_headlcn) { map->m_la = 0; map->m_llen = inode->i_size; - map->m_flags = EROFS_MAP_MAPPED | - EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; return 0; } initial_lcn = ofs >> lclusterbits; @@ -489,7 +488,7 @@ static int z_erofs_map_blocks_fo(struct inode *inode, goto unmap_out; } } else if (fragment && m.lcn == vi->z_tailextent_headlcn) { - map->m_flags |= EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; } else { map->m_pa = erofs_pos(sb, m.pblk); err = z_erofs_get_extent_compressedlen(&m, initial_lcn); @@ -597,6 +596,10 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (la > map->m_la) { r = mid; + if (la > lend) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } lend = la; } else { l = mid + 1; @@ -613,7 +616,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, if (lstart < lend) { map->m_la = lstart; if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) { - map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT; + map->m_flags = EROFS_MAP_FRAGMENT; vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; @@ -635,12 +638,6 @@ static int z_erofs_map_blocks_ext(struct inode *inode, } } map->m_llen = lend - map->m_la; - if (!last && map->m_llen < sb->s_blocksize) { - erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu", - map->m_llen, map->m_la, vi->nid); - DBG_BUGON(1); - return -EFSCORRUPTED; - } return 0; } @@ -799,7 +796,7 @@ static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, iomap->length = map.m_llen; if (map.m_flags & EROFS_MAP_MAPPED) { iomap->type = IOMAP_MAPPED; - iomap->addr = map.m_flags & EROFS_MAP_FRAGMENT ? + iomap->addr = map.m_flags & __EROFS_MAP_FRAGMENT ? IOMAP_NULL_ADDR : map.m_pa; } else { iomap->type = IOMAP_HOLE; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d4dbffdedd08..0fbf5dfedb24 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -883,7 +883,7 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) kfree_rcu(epi, rcu); percpu_counter_dec(&ep->user->epoll_watches); - return ep_refcount_dec_and_test(ep); + return true; } /* @@ -891,14 +891,14 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force) */ static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi) { - WARN_ON_ONCE(__ep_remove(ep, epi, false)); + if (__ep_remove(ep, epi, false)) + WARN_ON_ONCE(ep_refcount_dec_and_test(ep)); } static void ep_clear_and_put(struct eventpoll *ep) { struct rb_node *rbp, *next; struct epitem *epi; - bool dispose; /* We need to release all tasks waiting for these file */ if (waitqueue_active(&ep->poll_wait)) @@ -931,10 +931,8 @@ static void ep_clear_and_put(struct eventpoll *ep) cond_resched(); } - dispose = ep_refcount_dec_and_test(ep); mutex_unlock(&ep->mtx); - - if (dispose) + if (ep_refcount_dec_and_test(ep)) ep_free(ep); } @@ -1137,7 +1135,7 @@ again: dispose = __ep_remove(ep, epi, true); mutex_unlock(&ep->mtx); - if (dispose) + if (dispose && ep_refcount_dec_and_test(ep)) ep_free(ep); goto again; } diff --git a/fs/exec.c b/fs/exec.c index 1f5fdd2e096e..ba400aafd640 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -114,6 +114,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt) bool path_noexec(const struct path *path) { + /* If it's an anonymous inode make sure that we catch any shenanigans. */ + VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) && + !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC)); return (path->mnt->mnt_flags & MNT_NOEXEC) || (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); } @@ -781,13 +784,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (IS_ERR(file)) return file; + if (path_noexec(&file->f_path)) + return ERR_PTR(-EACCES); + /* * In the past the regular type check was here. It moved to may_open() in * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is * an invariant that all non-regular files error out before we get here. */ - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || - path_noexec(&file->f_path)) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) return ERR_PTR(-EACCES); err = exe_file_deny_write_access(file); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6bd3de64f2a8..696131e655ed 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,6 +35,17 @@ #include <trace/events/f2fs.h> #include <uapi/linux/f2fs.h> +static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size) +{ + loff_t old_size = i_size_read(inode); + + if (old_size >= new_size) + return; + + /* zero or drop pages only in range of [old_size, new_size] */ + truncate_pagecache(inode, old_size); +} + static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); @@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT); + filemap_invalidate_unlock(inode->i_mapping); + file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); + folio_lock(folio); if (unlikely(folio->mapping != inode->i_mapping || folio_pos(folio) > i_size_read(inode) || @@ -1109,6 +1125,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + if (attr->ia_size > old_size) + f2fs_zero_post_eof_page(inode, attr->ia_size); truncate_setsize(inode, attr->ia_size); if (attr->ia_size <= old_size) @@ -1227,6 +1245,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1510,6 +1532,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + f2fs_lock_op(sbi); f2fs_drop_extent_tree(inode); truncate_pagecache(inode, offset); @@ -1631,6 +1655,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + filemap_invalidate_lock(mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1762,6 +1790,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) /* avoid gc operation during block exchange */ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); + + f2fs_zero_post_eof_page(inode, offset + len); truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { @@ -1819,6 +1849,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, if (err) return err; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + f2fs_balance_fs(sbi, true); pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; @@ -4860,6 +4894,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) err = file_modified(file); if (err) return err; + + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from)); + filemap_invalidate_unlock(inode->i_mapping); return count; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1cb4cba7f961..bfe104db284e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2078,7 +2078,6 @@ write_node: if (!__write_node_folio(folio, false, &submitted, wbc, do_balance, io_type, NULL)) { - folio_unlock(folio); folio_batch_release(&fbatch); ret = -EIO; goto out; diff --git a/fs/file.c b/fs/file.c index 3a3146664cf3..b6db031545e6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1198,8 +1198,12 @@ bool file_seek_cur_needs_f_lock(struct file *file) if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared) return false; - VFS_WARN_ON_ONCE((file_count(file) > 1) && - !mutex_is_locked(&file->f_pos_lock)); + /* + * Note that we are not guaranteed to be called after fdget_pos() on + * this file obj, in which case the caller is expected to provide the + * appropriate locking. + */ + return true; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f102afc03359..47006d0753f1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1147,7 +1147,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, struct address_space *mapping, struct iov_iter *ii, loff_t pos, - unsigned int max_pages) + unsigned int max_folios) { struct fuse_args_pages *ap = &ia->ap; struct fuse_conn *fc = get_fuse_conn(mapping->host); @@ -1157,12 +1157,11 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, int err = 0; num = min(iov_iter_count(ii), fc->max_write); - num = min(num, max_pages << PAGE_SHIFT); ap->args.in_pages = true; ap->descs[0].offset = offset; - while (num) { + while (num && ap->num_folios < max_folios) { size_t tmp; struct folio *folio; pgoff_t index = pos >> PAGE_SHIFT; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index bfe8d8af46f3..9572bdef49ee 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -9,6 +9,7 @@ #include "fuse_i.h" #include "dev_uring_i.h" +#include <linux/dax.h> #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/file.h> @@ -162,6 +163,9 @@ static void fuse_evict_inode(struct inode *inode) /* Will write inode on close/munmap and in all other dirtiers */ WARN_ON(inode->i_state & I_DIRTY_INODE); + if (FUSE_IS_DAX(inode)) + dax_break_layout_final(inode); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { diff --git a/fs/libfs.c b/fs/libfs.c index 9ea0ecc325a8..6f487fc6be34 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1649,12 +1649,10 @@ struct inode *alloc_anon_inode(struct super_block *s) */ inode->i_state = I_DIRTY; /* - * Historically anonymous inodes didn't have a type at all and - * userspace has come to rely on this. Internally they're just - * regular files but S_IFREG is masked off when reporting - * information to userspace. + * Historically anonymous inodes don't have a type at all and + * userspace has come to rely on this. */ - inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR; + inode->i_mode = S_IRUSR | S_IWUSR; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_flags |= S_PRIVATE | S_ANON_INODE; diff --git a/fs/namei.c b/fs/namei.c index 4bb889fc980b..c26a7ee42184 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2917,7 +2917,8 @@ static int lookup_one_common(struct mnt_idmap *idmap, * @base: base directory to lookup from * * Look up a dentry by name in the dcache, returning NULL if it does not - * currently exist. The function does not try to create a dentry. + * currently exist. The function does not try to create a dentry and if one + * is found it doesn't try to revalidate it. * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. @@ -2933,7 +2934,7 @@ struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base) if (err) return ERR_PTR(err); - return lookup_dcache(name, base, 0); + return d_lookup(base, name); } EXPORT_SYMBOL(try_lookup_noperm); @@ -3057,14 +3058,22 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked); * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. It does no permission checking. * - * Unlike lookup_noperm, it should be called without the parent + * Unlike lookup_noperm(), it should be called without the parent * i_rwsem held, and will take the i_rwsem itself if necessary. + * + * Unlike try_lookup_noperm() it *does* revalidate the dentry if it already + * existed. */ struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base) { struct dentry *ret; + int err; - ret = try_lookup_noperm(name, base); + err = lookup_noperm_common(name, base); + if (err) + return ERR_PTR(err); + + ret = lookup_dcache(name, base, 0); if (!ret) ret = lookup_slow(name, base, 0); return ret; @@ -3471,7 +3480,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path, return -EACCES; break; default: - VFS_BUG_ON_INODE(1, inode); + VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode); } error = inode_permission(idmap, inode, MAY_OPEN | acc_mode); diff --git a/fs/namespace.c b/fs/namespace.c index e13d9ab4f564..54c59e091919 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2310,21 +2310,62 @@ out: return dst_mnt; } -/* Caller should check returned pointer for errors */ +static inline bool extend_array(struct path **res, struct path **to_free, + unsigned n, unsigned *count, unsigned new_count) +{ + struct path *p; -struct vfsmount *collect_mounts(const struct path *path) + if (likely(n < *count)) + return true; + p = kmalloc_array(new_count, sizeof(struct path), GFP_KERNEL); + if (p && *count) + memcpy(p, *res, *count * sizeof(struct path)); + *count = new_count; + kfree(*to_free); + *to_free = *res = p; + return p; +} + +struct path *collect_paths(const struct path *path, + struct path *prealloc, unsigned count) { - struct mount *tree; - namespace_lock(); - if (!check_mnt(real_mount(path->mnt))) - tree = ERR_PTR(-EINVAL); - else - tree = copy_tree(real_mount(path->mnt), path->dentry, - CL_COPY_ALL | CL_PRIVATE); - namespace_unlock(); - if (IS_ERR(tree)) - return ERR_CAST(tree); - return &tree->mnt; + struct mount *root = real_mount(path->mnt); + struct mount *child; + struct path *res = prealloc, *to_free = NULL; + unsigned n = 0; + + guard(rwsem_read)(&namespace_sem); + + if (!check_mnt(root)) + return ERR_PTR(-EINVAL); + if (!extend_array(&res, &to_free, 0, &count, 32)) + return ERR_PTR(-ENOMEM); + res[n++] = *path; + list_for_each_entry(child, &root->mnt_mounts, mnt_child) { + if (!is_subdir(child->mnt_mountpoint, path->dentry)) + continue; + for (struct mount *m = child; m; m = next_mnt(m, child)) { + if (!extend_array(&res, &to_free, n, &count, 2 * count)) + return ERR_PTR(-ENOMEM); + res[n].mnt = &m->mnt; + res[n].dentry = m->mnt.mnt_root; + n++; + } + } + if (!extend_array(&res, &to_free, n, &count, count + 1)) + return ERR_PTR(-ENOMEM); + memset(res + n, 0, (count - n) * sizeof(struct path)); + for (struct path *p = res; p->mnt; p++) + path_get(p); + return res; +} + +void drop_collected_paths(struct path *paths, struct path *prealloc) +{ + for (struct path *p = paths; p->mnt; p++) + path_put(p); + if (paths != prealloc) + kfree(paths); } static void free_mnt_ns(struct mnt_namespace *); @@ -2401,15 +2442,6 @@ void dissolve_on_fput(struct vfsmount *mnt) free_mnt_ns(ns); } -void drop_collected_mounts(struct vfsmount *mnt) -{ - namespace_lock(); - lock_mount_hash(); - umount_tree(real_mount(mnt), 0); - unlock_mount_hash(); - namespace_unlock(); -} - static bool __has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; @@ -2511,21 +2543,6 @@ struct vfsmount *clone_private_mount(const struct path *path) } EXPORT_SYMBOL_GPL(clone_private_mount); -int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, - struct vfsmount *root) -{ - struct mount *mnt; - int res = f(root, arg); - if (res) - return res; - list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) { - res = f(&mnt->mnt, arg); - if (res) - return res; - } - return 0; -} - static void lock_mnt_tree(struct mount *mnt) { struct mount *p; @@ -2751,14 +2768,14 @@ static int attach_recursive_mnt(struct mount *source_mnt, hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; hlist_del_init(&child->mnt_hash); - q = __lookup_mnt(&child->mnt_parent->mnt, - child->mnt_mountpoint); - if (q) - mnt_change_mountpoint(child, smp, q); /* Notice when we are propagating across user namespaces */ if (child->mnt_parent->mnt_ns->user_ns != user_ns) lock_mnt_tree(child); child->mnt.mnt_flags &= ~MNT_LOCKED; + q = __lookup_mnt(&child->mnt_parent->mnt, + child->mnt_mountpoint); + if (q) + mnt_change_mountpoint(child, smp, q); commit_tree(child); } put_mountpoint(smp); @@ -5290,16 +5307,12 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, kattr.kflags |= MOUNT_KATTR_RECURSE; ret = wants_mount_setattr(uattr, usize, &kattr); - if (ret < 0) - return ret; - - if (ret) { + if (ret > 0) { ret = do_mount_setattr(&file->f_path, &kattr); - if (ret) - return ret; - finish_mount_kattr(&kattr); } + if (ret) + return ret; } fd = get_unused_fd_flags(flags & O_CLOEXEC); @@ -6262,7 +6275,11 @@ void put_mnt_ns(struct mnt_namespace *ns) { if (!refcount_dec_and_test(&ns->ns.count)) return; - drop_collected_mounts(&ns->root->mnt); + namespace_lock(); + lock_mount_hash(); + umount_tree(ns->root, 0); + unlock_mount_hash(); + namespace_unlock(); free_mnt_ns(ns); } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 72a3e6db2524..f27ea5099a68 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -53,30 +53,40 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping, * data written into the pagecache until we can find out from the server what * the values actually are. */ -static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, - loff_t i_size, loff_t pos, size_t copied) +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied) { + loff_t i_size, end = pos + copied; blkcnt_t add; size_t gap; + if (end <= i_size_read(inode)) + return; + if (ctx->ops->update_i_size) { - ctx->ops->update_i_size(inode, pos); + ctx->ops->update_i_size(inode, end); return; } - i_size_write(inode, pos); + spin_lock(&inode->i_lock); + + i_size = i_size_read(inode); + if (end > i_size) { + i_size_write(inode, end); #if IS_ENABLED(CONFIG_FSCACHE) - fscache_update_cookie(ctx->cache, NULL, &pos); + fscache_update_cookie(ctx->cache, NULL, &end); #endif - gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); - if (copied > gap) { - add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); + gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1)); + if (copied > gap) { + add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE); - inode->i_blocks = min_t(blkcnt_t, - DIV_ROUND_UP(pos, SECTOR_SIZE), - inode->i_blocks + add); + inode->i_blocks = min_t(blkcnt_t, + DIV_ROUND_UP(end, SECTOR_SIZE), + inode->i_blocks + add); + } } + spin_unlock(&inode->i_lock); } /** @@ -111,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, struct folio *folio = NULL, *writethrough = NULL; unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; - loff_t i_size, pos = iocb->ki_pos; + loff_t pos = iocb->ki_pos; size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; @@ -344,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, flush_dcache_folio(folio); /* Update the inode size if we moved the EOF marker */ + netfs_update_i_size(ctx, inode, pos, copied); pos += copied; - i_size = i_size_read(inode); - if (pos > i_size) - netfs_update_i_size(ctx, inode, i_size, pos, copied); written += copied; if (likely(!wreq)) { diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index fa9a5bf3c6d5..a16660ab7f83 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -9,20 +9,6 @@ #include <linux/uio.h> #include "internal.h" -static void netfs_cleanup_dio_write(struct netfs_io_request *wreq) -{ - struct inode *inode = wreq->inode; - unsigned long long end = wreq->start + wreq->transferred; - - if (!wreq->error && - i_size_read(inode) < end) { - if (wreq->netfs_ops->update_i_size) - wreq->netfs_ops->update_i_size(inode, end); - else - i_size_write(inode, end); - } -} - /* * Perform an unbuffered write where we may have to do an RMW operation on an * encrypted file. This can also be used for direct I/O writes. @@ -98,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * if (async) wreq->iocb = iocb; wreq->len = iov_iter_count(&wreq->buffer.iter); - wreq->cleanup = netfs_cleanup_dio_write; ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len); if (ret < 0) { _debug("begin = %zd", ret); @@ -106,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } if (!async) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); ret = netfs_wait_for_write(wreq); if (ret > 0) iocb->ki_pos += ret; diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index e2ee9183392b..d4f16fefd965 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -28,6 +28,12 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); /* + * buffered_write.c + */ +void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode, + loff_t pos, size_t copied); + +/* * main.c */ extern unsigned int netfs_debug; @@ -267,14 +273,32 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq, enum netfs_rreq_trace trace) { if (test_bit(rreq_flag, &rreq->flags)) { - trace_netfs_rreq(rreq, trace); clear_bit_unlock(rreq_flag, &rreq->flags); smp_mb__after_atomic(); /* Set flag before task state */ + trace_netfs_rreq(rreq, trace); wake_up(&rreq->waitq); } } /* + * Test the NETFS_RREQ_IN_PROGRESS flag, inserting an appropriate barrier. + */ +static inline bool netfs_check_rreq_in_progress(const struct netfs_io_request *rreq) +{ + /* Order read of flags before read of anything else, such as error. */ + return test_bit_acquire(NETFS_RREQ_IN_PROGRESS, &rreq->flags); +} + +/* + * Test the NETFS_SREQ_IN_PROGRESS flag, inserting an appropriate barrier. + */ +static inline bool netfs_check_subreq_in_progress(const struct netfs_io_subrequest *subreq) +{ + /* Order read of flags before read of anything else, such as error. */ + return test_bit_acquire(NETFS_SREQ_IN_PROGRESS, &subreq->flags); +} + +/* * fscache-cache.c */ #ifdef CONFIG_PROC_FS diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 3db401d269e7..73da6c9f5777 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v) if (v == &netfs_io_requests) { seq_puts(m, - "REQUEST OR REF FL ERR OPS COVERAGE\n" - "======== == === == ==== === =========\n" + "REQUEST OR REF FLAG ERR OPS COVERAGE\n" + "======== == === ==== ==== === =========\n" ); return 0; } rreq = list_entry(v, struct netfs_io_request, proc_link); seq_printf(m, - "%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx", + "%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx", rreq->debug_id, netfs_origins[rreq->origin], refcount_read(&rreq->ref), diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 43b67a28a8fa..20748bcfbf59 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -356,22 +356,22 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, DEFINE_WAIT(myself); list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + if (!netfs_check_subreq_in_progress(subreq)) continue; - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce); for (;;) { prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + if (!netfs_check_subreq_in_progress(subreq)) break; trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } } + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce); finish_wait(&rreq->waitq, &myself); } @@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, static int netfs_collect_in_app(struct netfs_io_request *rreq, bool (*collector)(struct netfs_io_request *rreq)) { - bool need_collect = false, inactive = true; + bool need_collect = false, inactive = true, done = true; + + if (!netfs_check_rreq_in_progress(rreq)) { + trace_netfs_rreq(rreq, netfs_rreq_trace_recollect); + return 1; /* Done */ + } for (int i = 0; i < NR_IO_STREAMS; i++) { struct netfs_io_subrequest *subreq; @@ -395,14 +400,16 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, struct netfs_io_subrequest, rreq_link); if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || + (!netfs_check_subreq_in_progress(subreq) || test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { need_collect = true; break; } + if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) + done = false; } - if (!need_collect && !inactive) + if (!need_collect && !inactive && !done) return 0; /* Sleep */ __set_current_state(TASK_RUNNING); @@ -423,14 +430,13 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq, /* * Wait for a request to complete, successfully or otherwise. */ -static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, - bool (*collector)(struct netfs_io_request *rreq)) +static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) { DEFINE_WAIT(myself); ssize_t ret; for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -440,18 +446,22 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, case 1: goto all_collected; case 2: + if (!netfs_check_rreq_in_progress(rreq)) + break; + cond_resched(); continue; } } - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) + if (!netfs_check_rreq_in_progress(rreq)) break; + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip); finish_wait(&rreq->waitq, &myself); ret = rreq->error; @@ -478,12 +488,12 @@ all_collected: ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) { - return netfs_wait_for_request(rreq, netfs_read_collection); + return netfs_wait_for_in_progress(rreq, netfs_read_collection); } ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) { - return netfs_wait_for_request(rreq, netfs_write_collection); + return netfs_wait_for_in_progress(rreq, netfs_write_collection); } /* @@ -494,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, { DEFINE_WAIT(myself); - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { @@ -507,19 +515,23 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq, case 1: goto all_collected; case 2: + if (!netfs_check_rreq_in_progress(rreq) || + !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + break; + cond_resched(); continue; } } - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || + if (!netfs_check_rreq_in_progress(rreq) || !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) break; schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); } all_collected: + trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause); finish_wait(&rreq->waitq, &myself); } diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 96ee18af28ef..3e804da1e1eb 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -218,7 +218,7 @@ reassess: stream->collected_to = front->start; } - if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) + if (netfs_check_subreq_in_progress(front)) notes |= HIT_PENDING; smp_rmb(); /* Read counters after IN_PROGRESS flag. */ transferred = READ_ONCE(front->transferred); @@ -293,7 +293,9 @@ reassess: spin_lock(&rreq->lock); remove = front; - trace_netfs_sreq(front, netfs_sreq_trace_discard); + trace_netfs_sreq(front, + notes & ABANDON_SREQ ? + netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed); list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); @@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) if (rreq->iocb) { rreq->iocb->ki_pos += rreq->transferred; - if (rreq->iocb->ki_complete) + if (rreq->iocb->ki_complete) { + trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete); rreq->iocb->ki_complete( rreq->iocb, rreq->error ? rreq->error : rreq->transferred); + } } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -445,7 +451,7 @@ void netfs_read_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_check_rreq_in_progress(rreq)) { if (netfs_read_collection(rreq)) /* Drop the ref from the IN_PROGRESS flag. */ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index e2b102ffb768..0f3a36852a4d 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -240,7 +240,7 @@ reassess_streams: } /* Stall if the front is still undergoing I/O. */ - if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) { + if (netfs_check_subreq_in_progress(front)) { notes |= HIT_PENDING; break; } @@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq) ictx->ops->invalidate_cache(wreq); } - if (wreq->cleanup) - wreq->cleanup(wreq); + if ((wreq->origin == NETFS_UNBUFFERED_WRITE || + wreq->origin == NETFS_DIO_WRITE) && + !wreq->error) + netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred); if (wreq->origin == NETFS_DIO_WRITE && wreq->mapping->nrpages) { @@ -419,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq) if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); wreq->iocb->ki_pos += written; - if (wreq->iocb->ki_complete) + if (wreq->iocb->ki_complete) { + trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete); wreq->iocb->ki_complete( wreq->iocb, wreq->error ? wreq->error : written); + } wreq->iocb = VFS_PTR_POISON; } @@ -434,7 +438,7 @@ void netfs_write_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_check_rreq_in_progress(rreq)) { if (netfs_write_collection(rreq)) /* Drop the ref from the IN_PROGRESS flag. */ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 9d1d8a8bab72..fc9c3e0d34d8 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -146,14 +146,13 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, subreq = netfs_alloc_subrequest(wreq); subreq->source = to->source; subreq->start = start; - subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); subreq->stream_nr = to->stream_nr; subreq->retry_count = 1; trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), netfs_sreq_trace_new); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); + trace_netfs_sreq(subreq, netfs_sreq_trace_split); list_add(&subreq->rreq_link, &to->rreq_link); to = list_next_entry(to, rreq_link); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index df4807460596..4bea008dbebd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr) } static int ff_layout_async_handle_error_v4(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - switch (task->tk_status) { - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: + switch (op_status) { + case NFS4_OK: + case NFS4ERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFS4ERR_BADSESSION: + case NFS4ERR_BADSLOT: + case NFS4ERR_BAD_HIGH_SLOT: + case NFS4ERR_DEADSESSION: + case NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case NFS4ERR_SEQ_FALSE_RETRY: + case NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session. Exchangeid " "flags 0x%x\n", __func__, task->tk_status, clp->cl_exchange_flags); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - break; - case -NFS4ERR_DELAY: + goto out_retry; + case NFS4ERR_DELAY: nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); fallthrough; - case -NFS4ERR_GRACE: + case NFS4ERR_GRACE: rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); - break; - case -NFS4ERR_RETRY_UNCACHED_REP: - break; + goto out_retry; + case NFS4ERR_RETRY_UNCACHED_REP: + goto out_retry; /* Invalidate Layout errors */ - case -NFS4ERR_PNFS_NO_LAYOUT: - case -ESTALE: /* mapped NFS4ERR_STALE */ - case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ - case -EISDIR: /* mapped NFS4ERR_ISDIR */ - case -NFS4ERR_FHEXPIRED: - case -NFS4ERR_WRONG_TYPE: + case NFS4ERR_PNFS_NO_LAYOUT: + case NFS4ERR_STALE: + case NFS4ERR_BADHANDLE: + case NFS4ERR_ISDIR: + case NFS4ERR_FHEXPIRED: + case NFS4ERR_WRONG_TYPE: dprintk("%s Invalid layout error %d\n", __func__, task->tk_status); /* @@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, pnfs_destroy_layout(NFS_I(inode)); rpc_wake_up(&tbl->slot_tbl_waitq); goto reset; + default: + break; + } + + switch (task->tk_status) { /* RPC connection errors */ case -ENETDOWN: case -ENETUNREACH: @@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); rpc_wake_up(&tbl->slot_tbl_waitq); - fallthrough; + break; default: - if (ff_layout_avoid_mds_available_ds(lseg)) - return -NFS4ERR_RESET_TO_PNFS; -reset: - dprintk("%s Retry through MDS. Error %d\n", __func__, - task->tk_status); - return -NFS4ERR_RESET_TO_MDS; + break; } + + if (ff_layout_avoid_mds_available_ds(lseg)) + return -NFS4ERR_RESET_TO_PNFS; +reset: + dprintk("%s Retry through MDS. Error %d\n", __func__, + task->tk_status); + return -NFS4ERR_RESET_TO_MDS; + +out_retry: task->tk_status = 0; return -EAGAIN; } /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, + u32 op_status, struct nfs_client *clp, struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + switch (op_status) { + case NFS_OK: + case NFSERR_NXIO: + break; + case NFSERR_PERM: + if (!task->tk_xprt) + break; + xprt_force_disconnect(task->tk_xprt); + goto out_retry; + case NFSERR_ACCES: + case NFSERR_BADHANDLE: + case NFSERR_FBIG: + case NFSERR_IO: + case NFSERR_NOSPC: + case NFSERR_ROFS: + case NFSERR_STALE: + goto out_reset_to_pnfs; + case NFSERR_JUKEBOX: + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); + goto out_retry; + default: + break; + } + switch (task->tk_status) { /* File access problems. Don't mark the device as unavailable */ case -EACCES: @@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, nfs4_delete_deviceid(devid->ld, devid->nfs_client, &devid->deviceid); } +out_reset_to_pnfs: /* FIXME: Need to prevent infinite looping here. */ return -NFS4ERR_RESET_TO_PNFS; out_retry: @@ -1228,6 +1272,7 @@ out_retry: } static int ff_layout_async_handle_error(struct rpc_task *task, + u32 op_status, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, @@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task, switch (vers) { case 3: - return ff_layout_async_handle_error_v3(task, clp, lseg, idx); - case 4: - return ff_layout_async_handle_error_v4(task, state, clp, + return ff_layout_async_handle_error_v3(task, op_status, clp, lseg, idx); + case 4: + return ff_layout_async_handle_error_v4(task, op_status, state, + clp, lseg, idx); default: /* should never happen */ WARN_ON_ONCE(1); @@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, switch (status) { case NFS4ERR_DELAY: case NFS4ERR_GRACE: + case NFS4ERR_PERM: break; case NFS4ERR_NXIO: ff_layout_mark_ds_unreachable(lseg, idx); @@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task, trace_ff_layout_read_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task, trace_ff_layout_write_error(hdr, task->tk_status); } - err = ff_layout_async_handle_error(task, hdr->args.context->state, + err = ff_layout_async_handle_error(task, hdr->res.op_status, + hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); @@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, trace_ff_layout_commit_error(data, task->tk_status); } - err = ff_layout_async_handle_error(task, NULL, data->ds_clp, - data->lseg, data->ds_commit_index); + err = ff_layout_async_handle_error(task, data->res.op_status, + NULL, data->ds_clp, data->lseg, + data->ds_commit_index); trace_nfs4_pnfs_commit_ds(data, err); switch (err) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8ab7868807a7..a2fa6bc4d74e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2589,15 +2589,26 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); + int err; nfs_clients_init(net); if (!rpc_proc_register(net, &nn->rpcstats)) { - nfs_clients_exit(net); - return -ENOMEM; + err = -ENOMEM; + goto err_proc_rpc; } - return nfs_fs_proc_net_init(net); + err = nfs_fs_proc_net_init(net); + if (err) + goto err_proc_nfs; + + return 0; + +err_proc_nfs: + rpc_proc_unregister(net, "nfs"); +err_proc_rpc: + nfs_clients_exit(net); + return err; } static void nfs_net_exit(struct net *net) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3adb7d0dbec7..1a7ec68bde15 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2059,8 +2059,10 @@ static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo) static void nfs_layoutget_end(struct pnfs_layout_hdr *lo) { if (atomic_dec_and_test(&lo->plh_outstanding) && - test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) + test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags)) { + smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN); + } } static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index ccb00aa93be0..e00b2aea8da2 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1409,6 +1409,7 @@ void nfsd41_cb_referring_call(struct nfsd4_callback *cb, out: if (!rcl->__nr_referring_calls) { cb->cb_nr_referring_call_list--; + list_del(&rcl->__list); kfree(rcl); } } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3f3e9f6c4250..6a42cc7a845a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1611,7 +1611,7 @@ out_unlock: */ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) { - int *nthreads, count = 0, nrpools, i, ret = -EOPNOTSUPP, rem; + int *nthreads, nrpools = 0, i, ret = -EOPNOTSUPP, rem; struct net *net = genl_info_net(info); struct nfsd_net *nn = net_generic(net, nfsd_net_id); const struct nlattr *attr; @@ -1623,12 +1623,11 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info) /* count number of SERVER_THREADS values */ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) { if (nla_type(attr) == NFSD_A_SERVER_THREADS) - count++; + nrpools++; } mutex_lock(&nfsd_mutex); - nrpools = max(count, nfsd_nrpools(net)); nthreads = kcalloc(nrpools, sizeof(int), GFP_KERNEL); if (!nthreads) { ret = -ENOMEM; diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 1fc68b59fa5d..76d6248b625e 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -1418,7 +1418,7 @@ out: bool ovl_lower_positive(struct dentry *dentry) { struct ovl_entry *poe = OVL_E(dentry->d_parent); - struct qstr *name = &dentry->d_name; + const struct qstr *name = &dentry->d_name; const struct cred *old_cred; unsigned int i; bool positive = false; @@ -1441,9 +1441,15 @@ bool ovl_lower_positive(struct dentry *dentry) struct dentry *this; struct ovl_path *parentpath = &ovl_lowerstack(poe)[i]; + /* + * We need to make a non-const copy of dentry->d_name, + * because lookup_one_positive_unlocked() will hash name + * with parentpath base, which is on another (lower fs). + */ this = lookup_one_positive_unlocked( mnt_idmap(parentpath->layer->mnt), - name, parentpath->dentry); + &QSTR_LEN(name->name, name->len), + parentpath->dentry); if (IS_ERR(this)) { switch (PTR_ERR(this)) { case -ENOENT: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 239a9c3642c0..55806bd36faa 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -246,9 +246,11 @@ static inline struct dentry *ovl_do_mkdir(struct ovl_fs *ofs, struct dentry *dentry, umode_t mode) { - dentry = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); - pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(dentry)); - return dentry; + struct dentry *ret; + + ret = vfs_mkdir(ovl_upper_mnt_idmap(ofs), dir, dentry, mode); + pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, PTR_ERR_OR_ZERO(ret)); + return ret; } static inline int ovl_do_mknod(struct ovl_fs *ofs, diff --git a/fs/pidfs.c b/fs/pidfs.c index c1f0a067be40..69919be1c9d8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -366,7 +366,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) kinfo.pid = task_pid_vnr(task); kinfo.mask |= PIDFD_INFO_PID; - if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + if (kinfo.pid == 0 || kinfo.tgid == 0) return -ESRCH; copy_out: diff --git a/fs/pnode.h b/fs/pnode.h index 34b6247af01d..2d026fb98b18 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -28,8 +28,6 @@ #define CL_SHARED_TO_SLAVE 0x20 #define CL_COPY_MNT_NS_FILE 0x40 -#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) - static inline void set_mnt_shared(struct mount *mnt) { mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a3eb3b740f76..3604b616311c 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -42,7 +42,7 @@ static void proc_evict_inode(struct inode *inode) head = ei->sysctl; if (head) { - RCU_INIT_POINTER(ei->sysctl, NULL); + WRITE_ONCE(ei->sysctl, NULL); proc_sys_evict_inode(inode, head); } } diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index cc9d74a06ff0..08b78150cdde 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -918,17 +918,21 @@ static int proc_sys_compare(const struct dentry *dentry, struct ctl_table_header *head; struct inode *inode; - /* Although proc doesn't have negative dentries, rcu-walk means - * that inode here can be NULL */ - /* AV: can it, indeed? */ - inode = d_inode_rcu(dentry); - if (!inode) - return 1; if (name->len != len) return 1; if (memcmp(name->name, str, len)) return 1; - head = rcu_dereference(PROC_I(inode)->sysctl); + + // false positive is fine here - we'll recheck anyway + if (d_in_lookup(dentry)) + return 0; + + inode = d_inode_rcu(dentry); + // we just might have run into dentry in the middle of __dentry_kill() + if (!inode) + return 1; + + head = READ_ONCE(PROC_I(inode)->sysctl); return !head || !sysctl_is_seen(head); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 27972c0749e7..751479eb128f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -36,9 +36,9 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; - anon = get_mm_counter(mm, MM_ANONPAGES); - file = get_mm_counter(mm, MM_FILEPAGES); - shmem = get_mm_counter(mm, MM_SHMEMPAGES); + anon = get_mm_counter_sum(mm, MM_ANONPAGES); + file = get_mm_counter_sum(mm, MM_FILEPAGES); + shmem = get_mm_counter_sum(mm, MM_SHMEMPAGES); /* * Note: to minimize their overhead, mm maintains hiwater_vm and @@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) text = min(text, mm->exec_vm << PAGE_SHIFT); lib = (mm->exec_vm << PAGE_SHIFT) - text; - swap = get_mm_counter(mm, MM_SWAPENTS); + swap = get_mm_counter_sum(mm, MM_SWAPENTS); SEQ_PUT_DEC("VmPeak:\t", hiwater_vm); SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm); SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm); @@ -92,12 +92,12 @@ unsigned long task_statm(struct mm_struct *mm, unsigned long *shared, unsigned long *text, unsigned long *data, unsigned long *resident) { - *shared = get_mm_counter(mm, MM_FILEPAGES) + - get_mm_counter(mm, MM_SHMEMPAGES); + *shared = get_mm_counter_sum(mm, MM_FILEPAGES) + + get_mm_counter_sum(mm, MM_SHMEMPAGES); *text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> PAGE_SHIFT; *data = mm->data_vm + mm->stack_vm; - *resident = *shared + get_mm_counter(mm, MM_ANONPAGES); + *resident = *shared + get_mm_counter_sum(mm, MM_ANONPAGES); return mm->total_vm; } @@ -2182,7 +2182,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_FILE; } - if (is_zero_pfn(pmd_pfn(pmd))) + if (is_huge_zero_pmd(pmd)) categories |= PAGE_IS_PFNZERO; if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c index 6ed2dfd4dbbd..d98e0d2de09f 100644 --- a/fs/resctrl/ctrlmondata.c +++ b/fs/resctrl/ctrlmondata.c @@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) struct rmid_read rr = {0}; struct rdt_mon_domain *d; struct rdtgroup *rdtgrp; + int domid, cpu, ret = 0; struct rdt_resource *r; + struct cacheinfo *ci; struct mon_data *md; - int domid, ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); if (!rdtgrp) { @@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) * one that matches this cache id. */ list_for_each_entry(d, &r->mon_domains, hdr.list) { - if (d->ci->id == domid) { - rr.ci = d->ci; + if (d->ci_id == domid) { + rr.ci_id = d->ci_id; + cpu = cpumask_any(&d->hdr.cpu_mask); + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci) + continue; mon_event_read(&rr, r, NULL, rdtgrp, - &d->ci->shared_cpu_map, evtid, false); + &ci->shared_cpu_map, evtid, false); goto checkresult; } } diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h index 9a8cf6f11151..0a1eedba2b03 100644 --- a/fs/resctrl/internal.h +++ b/fs/resctrl/internal.h @@ -98,7 +98,7 @@ struct mon_data { * domains in @r sharing L3 @ci.id * @evtid: Which monitor event to read. * @first: Initialize MBM counter when true. - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. * @err: Error encountered when reading counter. * @val: Returned value of event counter. If @rgrp is a parent resource group, * @val includes the sum of event counts from its child resource groups. @@ -112,7 +112,7 @@ struct rmid_read { struct rdt_mon_domain *d; enum resctrl_event_id evtid; bool first; - struct cacheinfo *ci; + unsigned int ci_id; int err; u64 val; void *arch_mon_ctx; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index bde2801289d3..f5637855c3ac 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) { int cpu = smp_processor_id(); struct rdt_mon_domain *d; + struct cacheinfo *ci; struct mbm_state *m; int err, ret; u64 tval = 0; @@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) } /* Summing domains that share a cache, must be on a CPU for that cache. */ - if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); + if (!ci || ci->id != rr->ci_id) return -EINVAL; /* @@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) */ ret = -EINVAL; list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { - if (d->ci->id != rr->ci->id) + if (d->ci_id != rr->ci_id) continue; err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, rr->evtid, &tval, rr->arch_mon_ctx); diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c index 1beb124e25f6..77d08229d855 100644 --- a/fs/resctrl/rdtgroup.c +++ b/fs/resctrl/rdtgroup.c @@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, char name[32]; snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); if (snc_mode) sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); @@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, return -EPERM; list_for_each_entry(mevt, &r->evt_list, list) { - domid = do_sum ? d->ci->id : d->hdr.id; + domid = do_sum ? d->ci_id : d->hdr.id; priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum); if (WARN_ON_ONCE(!priv)) return -EINVAL; @@ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, lockdep_assert_held(&rdtgroup_mutex); snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); if (kn) { /* diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index 5200a0f3cafc..368e870624da 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -509,8 +509,17 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { tmp_list = kmalloc(sizeof(*tmp_list), GFP_ATOMIC); - if (tmp_list == NULL) - break; + if (tmp_list == NULL) { + /* + * If the malloc() fails, we won't drop all + * dentries, and unmounting is likely to trigger + * a 'Dentry still in use' error. + */ + cifs_tcon_dbg(VFS, "Out of memory while dropping dentries\n"); + spin_unlock(&cfids->cfid_list_lock); + spin_unlock(&cifs_sb->tlink_tree_lock); + goto done; + } spin_lock(&cfid->fid_lock); tmp_list->dentry = cfid->dentry; cfid->dentry = NULL; @@ -522,6 +531,7 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) } spin_unlock(&cifs_sb->tlink_tree_lock); +done: list_for_each_entry_safe(tmp_list, q, &entry, entry) { list_del(&tmp_list->entry); dput(tmp_list->dentry); diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index bc8a812ff95f..a28f7cae3caa 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -26,7 +26,7 @@ struct cached_dirents { * open file instance. */ struct mutex de_mutex; - int pos; /* Expected ctx->pos */ + loff_t pos; /* Expected ctx->pos */ struct list_head entries; }; diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c0196be0e65f..3fdf75737d43 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1105,7 +1105,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file, if ((count < 1) || (count > 11)) return -EINVAL; - memset(flags_string, 0, 12); + memset(flags_string, 0, sizeof(flags_string)); if (copy_from_user(flags_string, buffer, count)) return -EFAULT; diff --git a/fs/smb/client/cifs_ioctl.h b/fs/smb/client/cifs_ioctl.h index 26327442e383..b51ce64fcccf 100644 --- a/fs/smb/client/cifs_ioctl.h +++ b/fs/smb/client/cifs_ioctl.h @@ -61,7 +61,7 @@ struct smb_query_info { struct smb3_key_debug_info { __u64 Suid; __u16 cipher_type; - __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */ + __u8 auth_key[SMB2_NTLMV2_SESSKEY_SIZE]; __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; } __packed; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 45e94e18f4d5..89160bc34d35 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -709,6 +709,7 @@ inc_rfc1001_len(void *buf, int count) struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; + struct list_head rlist; /* reconnect list */ spinlock_t srv_lock; /* protect anything here that is not protected */ __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ @@ -776,6 +777,7 @@ struct TCP_Server_Info { __le32 session_key_id; /* retrieved from negotiate response and send in session setup request */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ + unsigned long neg_start; /* when negotiate started (jiffies) */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ @@ -1302,6 +1304,7 @@ struct cifs_tcon { bool use_persistent:1; /* use persistent instead of durable handles */ bool no_lease:1; /* Do not request leases on files or directories */ bool use_witness:1; /* use witness protocol */ + bool dummy:1; /* dummy tcon used for reconnecting channels */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 66093fa78aed..045227ed4efc 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -136,6 +136,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *out_buf, int *bytes_returned); +void smb2_query_server_interfaces(struct work_struct *work); void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels); diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 7216fcec79e8..75142f49d65d 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1334,7 +1334,12 @@ cifs_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: + __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) /* reset bytes number since we can not check a sign */ @@ -1343,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid) task_io_account_read(rdata->got_bytes); cifs_stats_bytes_read(tcon, rdata->got_bytes); break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } if (rdata->result == -ENODATA) { @@ -1713,10 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; + case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index c4fb80b37738..205f547ca49e 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -97,7 +97,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) return rc; } -static void smb2_query_server_interfaces(struct work_struct *work) +void smb2_query_server_interfaces(struct work_struct *work) { int rc; int xid; @@ -124,6 +124,14 @@ static void smb2_query_server_interfaces(struct work_struct *work) (SMB_INTERFACE_POLL_INTERVAL * HZ)); } +#define set_need_reco(server) \ +do { \ + spin_lock(&server->srv_lock); \ + if (server->tcpStatus != CifsExiting) \ + server->tcpStatus = CifsNeedReconnect; \ + spin_unlock(&server->srv_lock); \ +} while (0) + /* * Update the tcpStatus for the server. * This is used to signal the cifsd thread to call cifs_reconnect @@ -137,39 +145,45 @@ void cifs_signal_cifsd_for_reconnect(struct TCP_Server_Info *server, bool all_channels) { - struct TCP_Server_Info *pserver; + struct TCP_Server_Info *nserver; struct cifs_ses *ses; + LIST_HEAD(reco); int i; - /* If server is a channel, select the primary channel */ - pserver = SERVER_IS_CHAN(server) ? server->primary_server : server; - /* if we need to signal just this channel */ if (!all_channels) { - spin_lock(&server->srv_lock); - if (server->tcpStatus != CifsExiting) - server->tcpStatus = CifsNeedReconnect; - spin_unlock(&server->srv_lock); + set_need_reco(server); return; } - spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { - if (cifs_ses_exiting(ses)) - continue; - spin_lock(&ses->chan_lock); - for (i = 0; i < ses->chan_count; i++) { - if (!ses->chans[i].server) + if (SERVER_IS_CHAN(server)) + server = server->primary_server; + scoped_guard(spinlock, &cifs_tcp_ses_lock) { + set_need_reco(server); + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); continue; - - spin_lock(&ses->chans[i].server->srv_lock); - if (ses->chans[i].server->tcpStatus != CifsExiting) - ses->chans[i].server->tcpStatus = CifsNeedReconnect; - spin_unlock(&ses->chans[i].server->srv_lock); + } + spin_lock(&ses->chan_lock); + for (i = 1; i < ses->chan_count; i++) { + nserver = ses->chans[i].server; + if (!nserver) + continue; + nserver->srv_count++; + list_add(&nserver->rlist, &reco); + } + spin_unlock(&ses->chan_lock); + spin_unlock(&ses->ses_lock); } - spin_unlock(&ses->chan_lock); } - spin_unlock(&cifs_tcp_ses_lock); + + list_for_each_entry_safe(server, nserver, &reco, rlist) { + list_del_init(&server->rlist); + set_need_reco(server); + cifs_put_tcp_session(server, 0); + } } /* @@ -665,12 +679,12 @@ server_unresponsive(struct TCP_Server_Info *server) /* * If we're in the process of mounting a share or reconnecting a session * and the server abruptly shut down (e.g. socket wasn't closed, packet - * had been ACK'ed but no SMB response), don't wait longer than 20s to - * negotiate protocol. + * had been ACK'ed but no SMB response), don't wait longer than 20s from + * when negotiate actually started. */ spin_lock(&server->srv_lock); if (server->tcpStatus == CifsInNegotiate && - time_after(jiffies, server->lstrp + 20 * HZ)) { + time_after(jiffies, server->neg_start + 20 * HZ)) { spin_unlock(&server->srv_lock); cifs_reconnect(server, false); return true; @@ -2866,20 +2880,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->max_cached_dirs = ctx->max_cached_dirs; tcon->nodelete = ctx->nodelete; tcon->local_lease = ctx->local_lease; - INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - INIT_DELAYED_WORK(&tcon->query_interfaces, - smb2_query_server_interfaces); if (ses->server->dialect >= SMB30_PROT_ID && (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { /* schedule query interfaces poll */ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, (SMB_INTERFACE_POLL_INTERVAL * HZ)); } -#ifdef CONFIG_CIFS_DFS_UPCALL - INIT_DELAYED_WORK(&tcon->dfs_cache_work, dfs_cache_refresh); -#endif spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); spin_unlock(&cifs_tcp_ses_lock); @@ -4199,7 +4207,9 @@ retry: return 0; } + server->lstrp = jiffies; server->tcpStatus = CifsInNegotiate; + server->neg_start = jiffies; spin_unlock(&server->srv_lock); rc = server->ops->negotiate(xid, ses, server); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9835672267d2..e9212da32f01 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -52,6 +52,7 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq) struct netfs_io_stream *stream = &req->rreq.io_streams[subreq->stream_nr]; struct TCP_Server_Info *server; struct cifsFileInfo *open_file = req->cfile; + struct cifs_sb_info *cifs_sb = CIFS_SB(wdata->rreq->inode->i_sb); size_t wsize = req->rreq.wsize; int rc; @@ -63,6 +64,10 @@ static void cifs_prepare_write(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); wdata->server = server; + if (cifs_sb->ctx->wsize == 0) + cifs_negotiate_wsize(server, cifs_sb->ctx, + tlink_tcon(req->cfile->tlink)); + retry: if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, false); @@ -160,10 +165,9 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; - if (cifs_sb->ctx->rsize == 0) { + if (cifs_sb->ctx->rsize == 0) cifs_negotiate_rsize(server, cifs_sb->ctx, tlink_tcon(req->cfile->tlink)); - } rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &size, &rdata->credits); diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index a634a34d4086..59ccc2229ab3 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1824,10 +1824,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_errorf(fc, "symlinkroot mount options must be absolute path\n"); goto cifs_parse_mount_err; } - kfree(ctx->symlinkroot); - ctx->symlinkroot = kstrdup(param->string, GFP_KERNEL); - if (!ctx->symlinkroot) + if (strnlen(param->string, PATH_MAX) == PATH_MAX) { + cifs_errorf(fc, "symlinkroot path too long (max path length: %u)\n", + PATH_MAX - 1); goto cifs_parse_mount_err; + } + kfree(ctx->symlinkroot); + ctx->symlinkroot = param->string; + param->string = NULL; break; } /* case Opt_ignore: - is ignored as expected ... */ @@ -1837,13 +1841,6 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } - /* - * By default resolve all native absolute symlinks relative to "/mnt/". - * Same default has drvfs driver running in WSL for resolving SMB shares. - */ - if (!ctx->symlinkroot) - ctx->symlinkroot = kstrdup("/mnt/", GFP_KERNEL); - return 0; cifs_parse_mount_err: diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 56439da4f119..0a9935ce05a5 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -506,7 +506,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) le16_to_cpu(tcon->ses->server->cipher_type); pkey_inf.Suid = tcon->ses->Suid; memcpy(pkey_inf.auth_key, tcon->ses->auth_key.response, - 16 /* SMB2_NTLMV2_SESSKEY_SIZE */); + SMB2_NTLMV2_SESSKEY_SIZE); memcpy(pkey_inf.smb3decryptionkey, tcon->ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE); memcpy(pkey_inf.smb3encryptionkey, diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index e77017f47084..da23cc12a52c 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -151,6 +151,12 @@ tcon_info_alloc(bool dir_leases_enabled, enum smb3_tcon_ref_trace trace) #ifdef CONFIG_CIFS_DFS_UPCALL INIT_LIST_HEAD(&ret_buf->dfs_ses_list); #endif + INIT_LIST_HEAD(&ret_buf->pending_opens); + INIT_DELAYED_WORK(&ret_buf->query_interfaces, + smb2_query_server_interfaces); +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_DELAYED_WORK(&ret_buf->dfs_cache_work, dfs_cache_refresh); +#endif return ret_buf; } diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index ba0193cf9033..4e5460206397 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -264,7 +264,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info, /* The Mode field in the response can now include the file type as well */ fattr->cf_mode = wire_mode_to_posix(le32_to_cpu(info->Mode), fattr->cf_cifsattrs & ATTR_DIRECTORY); - fattr->cf_dtype = S_DT(le32_to_cpu(info->Mode)); + fattr->cf_dtype = S_DT(fattr->cf_mode); switch (fattr->cf_mode & S_IFMT) { case S_IFLNK: diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index bb25e77c5540..5fa29a97ac15 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -57,6 +57,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, struct reparse_symlink_data_buffer *buf = NULL; struct cifs_open_info_data data = {}; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + const char *symroot = cifs_sb->ctx->symlinkroot; struct inode *new; struct kvec iov; __le16 *path = NULL; @@ -82,7 +83,8 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, .symlink_target = symlink_target, }; - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && symname[0] == '/') { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + symroot && symname[0] == '/') { /* * This is a request to create an absolute symlink on the server * which does not support POSIX paths, and expects symlink in @@ -92,7 +94,7 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, * ensure compatibility of this symlink stored in absolute form * on the SMB server. */ - if (!strstarts(symname, cifs_sb->ctx->symlinkroot)) { + if (!strstarts(symname, symroot)) { /* * If the absolute Linux symlink target path is not * inside "symlinkroot" location then there is no way @@ -101,12 +103,12 @@ static int create_native_symlink(const unsigned int xid, struct inode *inode, cifs_dbg(VFS, "absolute symlink '%s' cannot be converted to NT format " "because it is outside of symlinkroot='%s'\n", - symname, cifs_sb->ctx->symlinkroot); + symname, symroot); rc = -EINVAL; goto out; } - len = strlen(cifs_sb->ctx->symlinkroot); - if (cifs_sb->ctx->symlinkroot[len-1] != '/') + len = strlen(symroot); + if (symroot[len - 1] != '/') len++; if (symname[len] >= 'a' && symname[len] <= 'z' && (symname[len+1] == '/' || symname[len+1] == '\0')) { @@ -782,6 +784,7 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, const char *full_path, struct cifs_sb_info *cifs_sb) { + const char *symroot = cifs_sb->ctx->symlinkroot; char sep = CIFS_DIR_SEP(cifs_sb); char *linux_target = NULL; char *smb_target = NULL; @@ -815,7 +818,8 @@ int smb2_parse_native_symlink(char **target, const char *buf, unsigned int len, goto out; } - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && !relative) { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) && + symroot && !relative) { /* * This is an absolute symlink from the server which does not * support POSIX paths, so the symlink is in NT-style path. @@ -875,15 +879,8 @@ globalroot: abs_path += sizeof("\\DosDevices\\")-1; else if (strstarts(abs_path, "\\GLOBAL??\\")) abs_path += sizeof("\\GLOBAL??\\")-1; - else { - /* Unhandled absolute symlink, points outside of DOS/Win32 */ - cifs_dbg(VFS, - "absolute symlink '%s' cannot be converted from NT format " - "because points to unknown target\n", - smb_target); - rc = -EIO; - goto out; - } + else + goto out_unhandled_target; /* Sometimes path separator after \?? is double backslash */ if (abs_path[0] == '\\') @@ -910,25 +907,19 @@ globalroot: abs_path++; abs_path[0] = drive_letter; } else { - /* Unhandled absolute symlink. Report an error. */ - cifs_dbg(VFS, - "absolute symlink '%s' cannot be converted from NT format " - "because points to unknown target\n", - smb_target); - rc = -EIO; - goto out; + goto out_unhandled_target; } abs_path_len = strlen(abs_path)+1; - symlinkroot_len = strlen(cifs_sb->ctx->symlinkroot); - if (cifs_sb->ctx->symlinkroot[symlinkroot_len-1] == '/') + symlinkroot_len = strlen(symroot); + if (symroot[symlinkroot_len - 1] == '/') symlinkroot_len--; linux_target = kmalloc(symlinkroot_len + 1 + abs_path_len, GFP_KERNEL); if (!linux_target) { rc = -ENOMEM; goto out; } - memcpy(linux_target, cifs_sb->ctx->symlinkroot, symlinkroot_len); + memcpy(linux_target, symroot, symlinkroot_len); linux_target[symlinkroot_len] = '/'; memcpy(linux_target + symlinkroot_len + 1, abs_path, abs_path_len); } else if (smb_target[0] == sep && relative) { @@ -966,6 +957,7 @@ globalroot: * These paths have same format as Linux symlinks, so no * conversion is needed. */ +out_unhandled_target: linux_target = smb_target; smb_target = NULL; } @@ -1172,7 +1164,6 @@ out: if (!have_xattr_dev && (tag == IO_REPARSE_TAG_LX_CHR || tag == IO_REPARSE_TAG_LX_BLK)) return false; - fattr->cf_dtype = S_DT(fattr->cf_mode); return true; } diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c index ec0db32c7d98..330bc3d25bad 100644 --- a/fs/smb/client/sess.c +++ b/fs/smb/client/sess.c @@ -498,8 +498,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, ctx->domainauto = ses->domainAuto; ctx->domainname = ses->domainName; - /* no hostname for extra channels */ - ctx->server_hostname = ""; + ctx->server_hostname = ses->server->hostname; ctx->username = ses->user_name; ctx->password = ses->password; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index a717be1626a3..2df93a75e3b8 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -424,9 +424,9 @@ skip_sess_setup: free_xid(xid); ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; - /* regardless of rc value, setup polling */ - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (!tcon->ipc && !tcon->dummy) + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); mutex_unlock(&ses->session_mutex); @@ -4229,10 +4229,8 @@ void smb2_reconnect_server(struct work_struct *work) } goto done; } - tcon->status = TID_GOOD; - tcon->retry = false; - tcon->need_reconnect = false; + tcon->dummy = true; /* now reconnect sessions for necessary channels */ list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { @@ -4567,7 +4565,11 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted); + goto do_retry; case MID_RETRY_NEEDED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed); +do_retry: __set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags); rdata->result = -EAGAIN; if (server->sign && rdata->got_bytes) @@ -4578,11 +4580,15 @@ smb2_readv_callback(struct mid_q_entry *mid) cifs_stats_bytes_read(tcon, rdata->got_bytes); break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(shdr->CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + rdata->result = -EIO; + break; default: + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown); rdata->result = -EIO; + break; } #ifdef CONFIG_CIFS_SMB_DIRECT /* @@ -4835,11 +4841,14 @@ smb2_writev_callback(struct mid_q_entry *mid) switch (mid->mid_state) { case MID_RESPONSE_RECEIVED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; result = smb2_check_receive(mid, server, 0); - if (result != 0) + if (result != 0) { + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad); break; + } written = le32_to_cpu(rsp->DataLength); /* @@ -4861,14 +4870,23 @@ smb2_writev_callback(struct mid_q_entry *mid) } break; case MID_REQUEST_SUBMITTED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); + result = -EAGAIN; + break; case MID_RETRY_NEEDED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed); + __set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags); result = -EAGAIN; break; case MID_RESPONSE_MALFORMED: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed); credits.value = le16_to_cpu(rsp->hdr.CreditRequest); credits.instance = server->reconnect_instance; - fallthrough; + result = -EIO; + break; default: + trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown); result = -EIO; break; } @@ -4908,7 +4926,6 @@ smb2_writev_callback(struct mid_q_entry *mid) server->credits, server->in_flight, 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; - trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); cifs_write_subrequest_terminated(wdata, result ?: written); release_mid(mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 5ae847919da5..754e94a0e07f 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -907,8 +907,10 @@ wait_send_queue: .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; + size_t payload_len = umin(*_remaining_data_length, + sp->max_send_size - sizeof(*packet)); - rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length, + rc = smb_extract_iter_to_rdma(iter, payload_len, &extract); if (rc < 0) goto err_dma; @@ -1013,6 +1015,27 @@ static int smbd_post_send_empty(struct smbd_connection *info) return smbd_post_send_iter(info, NULL, &remaining_data_length); } +static int smbd_post_send_full_iter(struct smbd_connection *info, + struct iov_iter *iter, + int *_remaining_data_length) +{ + int rc = 0; + + /* + * smbd_post_send_iter() respects the + * negotiated max_send_size, so we need to + * loop until the full iter is posted + */ + + while (iov_iter_count(iter) > 0) { + rc = smbd_post_send_iter(info, iter, _remaining_data_length); + if (rc < 0) + break; + } + + return rc; +} + /* * Post a receive request to the transport * The remote peer can only send data when a receive request is posted @@ -1452,6 +1475,9 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) char name[MAX_NAME_LEN]; int rc; + if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) + return -ENOMEM; + scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info); info->request_cache = kmem_cache_create( @@ -1469,12 +1495,17 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) goto out1; scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info); + + struct kmem_cache_args response_args = { + .align = __alignof__(struct smbd_response), + .useroffset = (offsetof(struct smbd_response, packet) + + sizeof(struct smbdirect_data_transfer)), + .usersize = sp->max_recv_size - sizeof(struct smbdirect_data_transfer), + }; info->response_cache = - kmem_cache_create( - name, - sizeof(struct smbd_response) + - sp->max_recv_size, - 0, SLAB_HWCACHE_ALIGN, NULL); + kmem_cache_create(name, + sizeof(struct smbd_response) + sp->max_recv_size, + &response_args, SLAB_HWCACHE_ALIGN); if (!info->response_cache) goto out2; @@ -1747,35 +1778,39 @@ try_again: } /* - * Receive data from receive reassembly queue + * Receive data from the transport's receive reassembly queue * All the incoming data packets are placed in reassembly queue - * buf: the buffer to read data into + * iter: the buffer to read data into * size: the length of data to read * return value: actual data read - * Note: this implementation copies the data from reassebmly queue to receive + * + * Note: this implementation copies the data from reassembly queue to receive * buffers used by upper layer. This is not the optimal code path. A better way * to do it is to not have upper layer allocate its receive buffers but rather * borrow the buffer from reassembly queue, and return it after data is * consumed. But this will require more changes to upper layer code, and also * need to consider packet boundaries while they still being reassembled. */ -static int smbd_recv_buf(struct smbd_connection *info, char *buf, - unsigned int size) +int smbd_recv(struct smbd_connection *info, struct msghdr *msg) { struct smbdirect_socket *sc = &info->socket; struct smbd_response *response; struct smbdirect_data_transfer *data_transfer; + size_t size = iov_iter_count(&msg->msg_iter); int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; + if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) + return -EINVAL; /* It's a bug in upper layer to get there */ + again: /* * No need to hold the reassembly queue lock all the time as we are * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time */ - log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size, + log_read(INFO, "size=%zd info->reassembly_data_length=%d\n", size, info->reassembly_data_length); if (info->reassembly_data_length >= size) { int queue_length; @@ -1813,7 +1848,10 @@ again: if (response->first_segment && size == 4) { unsigned int rfc1002_len = data_length + remaining_data_length; - *((__be32 *)buf) = cpu_to_be32(rfc1002_len); + __be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); + if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr), + &msg->msg_iter) != sizeof(rfc1002_hdr)) + return -EFAULT; data_read = 4; response->first_segment = false; log_read(INFO, "returning rfc1002 length %d\n", @@ -1822,10 +1860,9 @@ again: } to_copy = min_t(int, data_length - offset, to_read); - memcpy( - buf + data_read, - (char *)data_transfer + data_offset + offset, - to_copy); + if (copy_to_iter((char *)data_transfer + data_offset + offset, + to_copy, &msg->msg_iter) != to_copy) + return -EFAULT; /* move on to the next buffer? */ if (to_copy == data_length - offset) { @@ -1891,90 +1928,6 @@ read_rfc1002_done: } /* - * Receive a page from receive reassembly queue - * page: the page to read data into - * to_read: the length of data to read - * return value: actual data read - */ -static int smbd_recv_page(struct smbd_connection *info, - struct page *page, unsigned int page_offset, - unsigned int to_read) -{ - struct smbdirect_socket *sc = &info->socket; - int ret; - char *to_address; - void *page_address; - - /* make sure we have the page ready for read */ - ret = wait_event_interruptible( - info->wait_reassembly_queue, - info->reassembly_data_length >= to_read || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (ret) - return ret; - - /* now we can read from reassembly queue and not sleep */ - page_address = kmap_atomic(page); - to_address = (char *) page_address + page_offset; - - log_read(INFO, "reading from page=%p address=%p to_read=%d\n", - page, to_address, to_read); - - ret = smbd_recv_buf(info, to_address, to_read); - kunmap_atomic(page_address); - - return ret; -} - -/* - * Receive data from transport - * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC - * return: total bytes read, or 0. SMB Direct will not do partial read. - */ -int smbd_recv(struct smbd_connection *info, struct msghdr *msg) -{ - char *buf; - struct page *page; - unsigned int to_read, page_offset; - int rc; - - if (iov_iter_rw(&msg->msg_iter) == WRITE) { - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg iter dir %u\n", - iov_iter_rw(&msg->msg_iter)); - rc = -EINVAL; - goto out; - } - - switch (iov_iter_type(&msg->msg_iter)) { - case ITER_KVEC: - buf = msg->msg_iter.kvec->iov_base; - to_read = msg->msg_iter.kvec->iov_len; - rc = smbd_recv_buf(info, buf, to_read); - break; - - case ITER_BVEC: - page = msg->msg_iter.bvec->bv_page; - page_offset = msg->msg_iter.bvec->bv_offset; - to_read = msg->msg_iter.bvec->bv_len; - rc = smbd_recv_page(info, page, page_offset, to_read); - break; - - default: - /* It's a bug in upper layer to get there */ - cifs_dbg(VFS, "Invalid msg type %d\n", - iov_iter_type(&msg->msg_iter)); - rc = -EINVAL; - } - -out: - /* SMBDirect will read it all or nothing */ - if (rc > 0) - msg->msg_iter.count = 0; - return rc; -} - -/* * Send data to transport * Each rqst is transported as a SMBDirect payload * rqst: the data to write @@ -2032,14 +1985,14 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_iter(info, &iter, &remaining_data_length); + rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); if (rc < 0) break; if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ - rc = smbd_post_send_iter(info, &rqst->rq_iter, - &remaining_data_length); + rc = smbd_post_send_full_iter(info, &rqst->rq_iter, + &remaining_data_length); if (rc < 0) break; } @@ -2589,13 +2542,14 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, size_t fsize = folioq_folio_size(folioq, slot); if (offset < fsize) { - size_t part = umin(maxsize - ret, fsize - offset); + size_t part = umin(maxsize, fsize - offset); if (!smb_set_sge(rdma, folio_page(folio, 0), offset, part)) return -EIO; offset += part; ret += part; + maxsize -= part; } if (offset >= fsize) { @@ -2610,7 +2564,7 @@ static ssize_t smb_extract_folioq_to_rdma(struct iov_iter *iter, slot = 0; } } - } while (rdma->nr_sge < rdma->max_sge || maxsize > 0); + } while (rdma->nr_sge < rdma->max_sge && maxsize > 0); iter->folioq = folioq; iter->folioq_slot = slot; diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 52bcb55d9952..93e5b2bb9f28 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS(smb3_rw_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\tR=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", + TP_printk("R=%08x[%x] xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", __entry->rreq_debug_id, __entry->rreq_debug_index, __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->offset, __entry->len, __entry->rc) @@ -190,7 +190,7 @@ DECLARE_EVENT_CLASS(smb3_other_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx len=0x%x rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->offset, __entry->len, __entry->rc) ) @@ -247,7 +247,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_err_class, __entry->len = len; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len, __entry->rc) ) @@ -298,7 +298,7 @@ DECLARE_EVENT_CLASS(smb3_copy_range_done_class, __entry->target_offset = target_offset; __entry->len = len; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", + TP_printk("xid=%u sid=0x%llx tid=0x%x source fid=0x%llx source offset=0x%llx target fid=0x%llx target offset=0x%llx len=0x%x", __entry->xid, __entry->sesid, __entry->tid, __entry->target_fid, __entry->src_offset, __entry->target_fid, __entry->target_offset, __entry->len) ) @@ -482,7 +482,7 @@ DECLARE_EVENT_CLASS(smb3_fd_class, __entry->tid = tid; __entry->sesid = sesid; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx", __entry->xid, __entry->sesid, __entry->tid, __entry->fid) ) @@ -521,7 +521,7 @@ DECLARE_EVENT_CLASS(smb3_fd_err_class, __entry->sesid = sesid; __entry->rc = rc; ), - TP_printk("\txid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d", + TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx rc=%d", __entry->xid, __entry->sesid, __entry->tid, __entry->fid, __entry->rc) ) @@ -794,7 +794,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_err_class, __entry->status = status; __entry->rc = rc; ), - TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d", + TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu status=0x%x rc=%d", __entry->sesid, __entry->tid, __entry->cmd, __entry->mid, __entry->status, __entry->rc) ) @@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(smb3_cmd_done_class, __entry->cmd = cmd; __entry->mid = mid; ), - TP_printk("\tsid=0x%llx tid=0x%x cmd=%u mid=%llu", + TP_printk("sid=0x%llx tid=0x%x cmd=%u mid=%llu", __entry->sesid, __entry->tid, __entry->cmd, __entry->mid) ) @@ -867,7 +867,7 @@ DECLARE_EVENT_CLASS(smb3_mid_class, __entry->when_sent = when_sent; __entry->when_received = when_received; ), - TP_printk("\tcmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", + TP_printk("cmd=%u mid=%llu pid=%u, when_sent=%lu when_rcv=%lu", __entry->cmd, __entry->mid, __entry->pid, __entry->when_sent, __entry->when_received) ) @@ -898,7 +898,7 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class, __assign_str(func_name); __entry->rc = rc; ), - TP_printk("\t%s: xid=%u rc=%d", + TP_printk("%s: xid=%u rc=%d", __get_str(func_name), __entry->xid, __entry->rc) ) @@ -924,7 +924,7 @@ DECLARE_EVENT_CLASS(smb3_sync_err_class, __entry->ino = ino; __entry->rc = rc; ), - TP_printk("\tino=%lu rc=%d", + TP_printk("ino=%lu rc=%d", __entry->ino, __entry->rc) ) @@ -950,7 +950,7 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class, __entry->xid = xid; __assign_str(func_name); ), - TP_printk("\t%s: xid=%u", + TP_printk("%s: xid=%u", __get_str(func_name), __entry->xid) ) diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 83764c230e9d..3f04a2977ba8 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -40,7 +40,7 @@ void ksmbd_conn_free(struct ksmbd_conn *conn) kvfree(conn->request_buf); kfree(conn->preauth_info); if (atomic_dec_and_test(&conn->refcnt)) { - ksmbd_free_transport(conn->transport); + conn->transport->ops->free_transport(conn->transport); kfree(conn); } } diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 6efed923bd68..dd3e0e3f7bf0 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -133,6 +133,7 @@ struct ksmbd_transport_ops { void *buf, unsigned int len, struct smb2_buffer_desc_v1 *desc, unsigned int desc_len); + void (*free_transport)(struct ksmbd_transport *kt); }; struct ksmbd_transport { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 1a308171b599..63d17cea2e95 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmbd_work *work, out_len = work->response_sz - (le16_to_cpu(rsp->SecurityBufferOffset) + 4); - /* Check previous session */ - prev_sess_id = le64_to_cpu(req->PreviousSessionId); - if (prev_sess_id && prev_sess_id != sess->id) - destroy_previous_session(conn, sess->user, prev_sess_id); - retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, out_blob, &out_len); if (retval) { ksmbd_debug(SMB, "krb5 authentication failed\n"); return -EINVAL; } + + /* Check previous session */ + prev_sess_id = le64_to_cpu(req->PreviousSessionId); + if (prev_sess_id && prev_sess_id != sess->id) + destroy_previous_session(conn, sess->user, prev_sess_id); + rsp->SecurityBufferLength = cpu_to_le16(out_len); if ((conn->sign || server_conf.enforced_signing) || @@ -4871,8 +4872,13 @@ static int get_file_standard_info(struct smb2_query_info_rsp *rsp, sinfo = (struct smb2_file_standard_info *)rsp->Buffer; delete_pending = ksmbd_inode_pending_delete(fp); - sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9); - sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + sinfo->AllocationSize = cpu_to_le64(stat.blocks << 9); + sinfo->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + sinfo->AllocationSize = cpu_to_le64(fp->stream.size); + sinfo->EndOfFile = cpu_to_le64(fp->stream.size); + } sinfo->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); sinfo->DeletePending = delete_pending; sinfo->Directory = S_ISDIR(stat.mode) ? 1 : 0; @@ -4935,9 +4941,14 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; file_info->Pad1 = 0; - file_info->AllocationSize = - cpu_to_le64(stat.blocks << 9); - file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + file_info->AllocationSize = + cpu_to_le64(stat.blocks << 9); + file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + } file_info->NumberOfLinks = cpu_to_le32(get_nlink(&stat) - delete_pending); file_info->DeletePending = delete_pending; @@ -4946,7 +4957,10 @@ static int get_file_all_info(struct ksmbd_work *work, file_info->IndexNumber = cpu_to_le64(stat.ino); file_info->EASize = 0; file_info->AccessFlags = fp->daccess; - file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + if (ksmbd_stream_fd(fp) == false) + file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + else + file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos); file_info->Mode = fp->coption; file_info->AlignmentRequirement = 0; conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename, @@ -5134,8 +5148,13 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp, time = ksmbd_UnixTimeToNT(stat.ctime); file_info->ChangeTime = cpu_to_le64(time); file_info->Attributes = fp->f_ci->m_fattr; - file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); - file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + if (ksmbd_stream_fd(fp) == false) { + file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + file_info->EndOfFile = S_ISDIR(stat.mode) ? 0 : cpu_to_le64(stat.size); + } else { + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + } file_info->Reserved = cpu_to_le32(0); rsp->OutputBufferLength = cpu_to_le32(sizeof(struct smb2_file_ntwrk_info)); @@ -5158,7 +5177,11 @@ static void get_file_position_info(struct smb2_query_info_rsp *rsp, struct smb2_file_pos_info *file_info; file_info = (struct smb2_file_pos_info *)rsp->Buffer; - file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + if (ksmbd_stream_fd(fp) == false) + file_info->CurrentByteOffset = cpu_to_le64(fp->filp->f_pos); + else + file_info->CurrentByteOffset = cpu_to_le64(fp->stream.pos); + rsp->OutputBufferLength = cpu_to_le32(sizeof(struct smb2_file_pos_info)); } @@ -5247,8 +5270,13 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp, file_info->ChangeTime = cpu_to_le64(time); file_info->DosAttributes = fp->f_ci->m_fattr; file_info->Inode = cpu_to_le64(stat.ino); - file_info->EndOfFile = cpu_to_le64(stat.size); - file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + if (ksmbd_stream_fd(fp) == false) { + file_info->EndOfFile = cpu_to_le64(stat.size); + file_info->AllocationSize = cpu_to_le64(stat.blocks << 9); + } else { + file_info->EndOfFile = cpu_to_le64(fp->stream.size); + file_info->AllocationSize = cpu_to_le64(fp->stream.size); + } file_info->HardLinks = cpu_to_le32(stat.nlink); file_info->Mode = cpu_to_le32(stat.mode & 0777); switch (stat.mode & S_IFMT) { @@ -6190,6 +6218,9 @@ static int set_file_allocation_info(struct ksmbd_work *work, if (!(fp->daccess & FILE_WRITE_DATA_LE)) return -EACCES; + if (ksmbd_stream_fd(fp) == true) + return 0; + rc = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (rc) @@ -6248,7 +6279,8 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp, * truncate of some filesystem like FAT32 fill zero data in * truncated range. */ - if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC) { + if (inode->i_sb->s_magic != MSDOS_SUPER_MAGIC && + ksmbd_stream_fd(fp) == false) { ksmbd_debug(SMB, "truncated to newsize %lld\n", newsize); rc = ksmbd_vfs_truncate(work, fp, newsize); if (rc) { @@ -6321,7 +6353,13 @@ static int set_file_position_info(struct ksmbd_file *fp, return -EINVAL; } - fp->filp->f_pos = current_byte_offset; + if (ksmbd_stream_fd(fp) == false) + fp->filp->f_pos = current_byte_offset; + else { + if (current_byte_offset > XATTR_SIZE_MAX) + current_byte_offset = XATTR_SIZE_MAX; + fp->stream.pos = current_byte_offset; + } return 0; } @@ -8535,11 +8573,6 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) goto err_out; } - opinfo->op_state = OPLOCK_STATE_NONE; - wake_up_interruptible_all(&opinfo->oplock_q); - opinfo_put(opinfo); - ksmbd_fd_put(work, fp); - rsp->StructureSize = cpu_to_le16(24); rsp->OplockLevel = rsp_oplevel; rsp->Reserved = 0; @@ -8547,16 +8580,15 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) rsp->VolatileFid = volatile_id; rsp->PersistentFid = persistent_id; ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_oplock_break)); - if (!ret) - return; - + if (ret) { err_out: + smb2_set_err_rsp(work); + } + opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); - opinfo_put(opinfo); ksmbd_fd_put(work, fp); - smb2_set_err_rsp(work); } static int check_lease_state(struct lease *lease, __le32 req_state) @@ -8686,11 +8718,6 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) } lease_state = lease->state; - opinfo->op_state = OPLOCK_STATE_NONE; - wake_up_interruptible_all(&opinfo->oplock_q); - atomic_dec(&opinfo->breaking_cnt); - wake_up_interruptible_all(&opinfo->oplock_brk); - opinfo_put(opinfo); rsp->StructureSize = cpu_to_le16(36); rsp->Reserved = 0; @@ -8699,16 +8726,16 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) rsp->LeaseState = lease_state; rsp->LeaseDuration = 0; ret = ksmbd_iov_pin_rsp(work, rsp, sizeof(struct smb2_lease_ack)); - if (!ret) - return; - + if (ret) { err_out: + smb2_set_err_rsp(work); + } + + opinfo->op_state = OPLOCK_STATE_NONE; wake_up_interruptible_all(&opinfo->oplock_q); atomic_dec(&opinfo->breaking_cnt); wake_up_interruptible_all(&opinfo->oplock_brk); - opinfo_put(opinfo); - smb2_set_err_rsp(work); } /** diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 4998df04ab95..c6cbe0d56e32 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -159,7 +159,8 @@ struct smb_direct_transport { }; #define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport)) - +#define SMBD_TRANS(t) ((struct smb_direct_transport *)container_of(t, \ + struct smb_direct_transport, transport)) enum { SMB_DIRECT_MSG_NEGOTIATE_REQ = 0, SMB_DIRECT_MSG_DATA_TRANSFER @@ -410,6 +411,11 @@ err: return NULL; } +static void smb_direct_free_transport(struct ksmbd_transport *kt) +{ + kfree(SMBD_TRANS(kt)); +} + static void free_transport(struct smb_direct_transport *t) { struct smb_direct_recvmsg *recvmsg; @@ -427,7 +433,8 @@ static void free_transport(struct smb_direct_transport *t) if (t->qp) { ib_drain_qp(t->qp); ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs); - ib_destroy_qp(t->qp); + t->qp = NULL; + rdma_destroy_qp(t->cm_id); } ksmbd_debug(RDMA, "drain the reassembly queue\n"); @@ -455,7 +462,6 @@ static void free_transport(struct smb_direct_transport *t) smb_direct_destroy_pools(t); ksmbd_conn_free(KSMBD_TRANS(t)->conn); - kfree(t); } static struct smb_direct_sendmsg @@ -1935,8 +1941,8 @@ static int smb_direct_create_qpair(struct smb_direct_transport *t, return 0; err: if (t->qp) { - ib_destroy_qp(t->qp); t->qp = NULL; + rdma_destroy_qp(t->cm_id); } if (t->recv_cq) { ib_destroy_cq(t->recv_cq); @@ -2281,4 +2287,5 @@ static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .read = smb_direct_read, .rdma_read = smb_direct_rdma_read, .rdma_write = smb_direct_rdma_write, + .free_transport = smb_direct_free_transport, }; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index abedf510899a..4e9f98db9ff4 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -93,7 +93,7 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return t; } -void ksmbd_free_transport(struct ksmbd_transport *kt) +static void ksmbd_tcp_free_transport(struct ksmbd_transport *kt) { struct tcp_transport *t = TCP_TRANS(kt); @@ -656,4 +656,5 @@ static const struct ksmbd_transport_ops ksmbd_tcp_transport_ops = { .read = ksmbd_tcp_read, .writev = ksmbd_tcp_writev, .disconnect = ksmbd_tcp_disconnect, + .free_transport = ksmbd_tcp_free_transport, }; diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index ba45e809555a..d3437f6644e3 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -293,6 +293,7 @@ static int ksmbd_vfs_stream_read(struct ksmbd_file *fp, char *buf, loff_t *pos, if (v_len - *pos < count) count = v_len - *pos; + fp->stream.pos = v_len; memcpy(buf, &stream_buf[*pos], count); @@ -456,8 +457,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos, true); if (err < 0) goto out; - - fp->filp->f_pos = *pos; + else + fp->stream.pos = size; err = 0; out: kvfree(stream_buf); @@ -1281,6 +1282,7 @@ out1: err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry); if (err) { + mnt_drop_write(parent_path->mnt); path_put(path); path_put(parent_path); } diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index 5bbb179736c2..0708155b5caf 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -44,6 +44,7 @@ struct ksmbd_lock { struct stream { char *name; ssize_t size; + loff_t pos; }; struct ksmbd_inode { diff --git a/fs/super.c b/fs/super.c index 21799e213fd7..80418ca8e215 100644 --- a/fs/super.c +++ b/fs/super.c @@ -964,8 +964,10 @@ void iterate_supers_type(struct file_system_type *type, spin_unlock(&sb_lock); locked = super_lock_shared(sb); - if (locked) + if (locked) { f(sb, arg); + super_unlock_shared(sb); + } spin_lock(&sb_lock); if (p) diff --git a/fs/xattr.c b/fs/xattr.c index 8ec5b0204bfd..600ae97969cf 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1479,6 +1479,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, buffer += err; } remaining_size -= err; + err = 0; read_lock(&xattrs->lock); for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) { diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 7839efe050bf..000cc7f4a3ce 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3444,16 +3444,41 @@ xfs_alloc_read_agf( set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } + #ifdef DEBUG - else if (!xfs_is_shutdown(mp)) { - ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); - ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); - ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); - ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); - ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level)); - ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level)); + /* + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. + */ + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks); + ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks); + ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount); + ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest); + ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level); + ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); + xfs_trans_brelse(tp, agfbp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } } -#endif +#endif /* DEBUG */ + if (agfbpp) *agfbpp = agfbp; else diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 0c47b5c6ca7d..750111634d9f 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi( set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); } +#ifdef DEBUG /* - * It's possible for these to be out of sync if - * we are in the middle of a forced shutdown. + * It's possible for the AGF to be out of sync if the block device is + * silently dropping writes. This can happen in fstests with dmflakey + * enabled, which allows the buffer to be cleaned and reclaimed by + * memory pressure and then re-read from disk here. We will get a + * stale version of the AGF from disk, and nothing good can happen from + * here. Hence if we detect this situation, immediately shut down the + * filesystem. + * + * This can also happen if we are already in the middle of a forced + * shutdown, so don't bother checking if we are already shut down. */ - ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - xfs_is_shutdown(pag_mount(pag))); + if (!xfs_is_shutdown(pag_mount(pag))) { + bool ok = true; + + ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount); + ok &= pag->pagi_count == be32_to_cpu(agi->agi_count); + + if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); + xfs_trans_brelse(tp, agibp); + xfs_force_shutdown(pag_mount(pag), + SHUTDOWN_CORRUPT_ONDISK); + return -EFSCORRUPTED; + } + } +#endif /* DEBUG */ + if (agibpp) *agibpp = agibp; else diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 8af83bd161f9..ba5bd6031ece 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit( return error; } -/* - * Push a single buffer on a delwri queue. - * - * The purpose of this function is to submit a single buffer of a delwri queue - * and return with the buffer still on the original queue. - * - * The buffer locking and queue management logic between _delwri_pushbuf() and - * _delwri_queue() guarantee that the buffer cannot be queued to another list - * before returning. - */ -int -xfs_buf_delwri_pushbuf( - struct xfs_buf *bp, - struct list_head *buffer_list) -{ - int error; - - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - - trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_); - - xfs_buf_lock(bp); - bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); - bp->b_flags |= XBF_WRITE; - xfs_buf_submit(bp); - - /* - * The buffer is now locked, under I/O but still on the original delwri - * queue. Wait for I/O completion, restore the DELWRI_Q flag and - * return with the buffer unlocked and still on the original queue. - */ - error = xfs_buf_iowait(bp); - bp->b_flags |= _XBF_DELWRI_Q; - xfs_buf_unlock(bp); - - return error; -} - void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) { /* diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 9d2ab567cf81..15fc56948346 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); -extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp) { diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 90139e0f3271..7fc54725c5f6 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -32,6 +32,61 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_buf_log_item, bli_item); } +static void +xfs_buf_item_get_format( + struct xfs_buf_log_item *bip, + int count) +{ + ASSERT(bip->bli_formats == NULL); + bip->bli_format_count = count; + + if (count == 1) { + bip->bli_formats = &bip->__bli_format; + return; + } + + bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), + GFP_KERNEL | __GFP_NOFAIL); +} + +static void +xfs_buf_item_free_format( + struct xfs_buf_log_item *bip) +{ + if (bip->bli_formats != &bip->__bli_format) { + kfree(bip->bli_formats); + bip->bli_formats = NULL; + } +} + +static void +xfs_buf_item_free( + struct xfs_buf_log_item *bip) +{ + xfs_buf_item_free_format(bip); + kvfree(bip->bli_item.li_lv_shadow); + kmem_cache_free(xfs_buf_item_cache, bip); +} + +/* + * xfs_buf_item_relse() is called when the buf log item is no longer needed. + */ +static void +xfs_buf_item_relse( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + + trace_xfs_buf_item_relse(bp, _RET_IP_); + + ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + ASSERT(atomic_read(&bip->bli_refcount) == 0); + + bp->b_log_item = NULL; + xfs_buf_rele(bp); + xfs_buf_item_free(bip); +} + /* Is this log iovec plausibly large enough to contain the buffer log format? */ bool xfs_buf_log_check_iovec( @@ -390,6 +445,42 @@ xfs_buf_item_pin( } /* + * For a stale BLI, process all the necessary completions that must be + * performed when the final BLI reference goes away. The buffer will be + * referenced and locked here - we return to the caller with the buffer still + * referenced and locked for them to finalise processing of the buffer. + */ +static void +xfs_buf_item_finish_stale( + struct xfs_buf_log_item *bip) +{ + struct xfs_buf *bp = bip->bli_buf; + struct xfs_log_item *lip = &bip->bli_item; + + ASSERT(bip->bli_flags & XFS_BLI_STALE); + ASSERT(xfs_buf_islocked(bp)); + ASSERT(bp->b_flags & XBF_STALE); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); + ASSERT(list_empty(&lip->li_trans)); + ASSERT(!bp->b_transp); + + if (bip->bli_flags & XFS_BLI_STALE_INODE) { + xfs_buf_item_done(bp); + xfs_buf_inode_iodone(bp); + ASSERT(list_empty(&bp->b_li_list)); + return; + } + + /* + * We may or may not be on the AIL here, xfs_trans_ail_delete() will do + * the right thing regardless of the situation in which we are called. + */ + xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); + xfs_buf_item_relse(bip); + ASSERT(bp->b_log_item == NULL); +} + +/* * This is called to unpin the buffer associated with the buf log item which was * previously pinned with a call to xfs_buf_item_pin(). We enter this function * with a buffer pin count, a buffer reference and a BLI reference. @@ -438,13 +529,6 @@ xfs_buf_item_unpin( } if (stale) { - ASSERT(bip->bli_flags & XFS_BLI_STALE); - ASSERT(xfs_buf_islocked(bp)); - ASSERT(bp->b_flags & XBF_STALE); - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); - ASSERT(list_empty(&lip->li_trans)); - ASSERT(!bp->b_transp); - trace_xfs_buf_item_unpin_stale(bip); /* @@ -455,22 +539,7 @@ xfs_buf_item_unpin( * processing is complete. */ xfs_buf_rele(bp); - - /* - * If we get called here because of an IO error, we may or may - * not have the item on the AIL. xfs_trans_ail_delete() will - * take care of that situation. xfs_trans_ail_delete() drops - * the AIL lock. - */ - if (bip->bli_flags & XFS_BLI_STALE_INODE) { - xfs_buf_item_done(bp); - xfs_buf_inode_iodone(bp); - ASSERT(list_empty(&bp->b_li_list)); - } else { - xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); - xfs_buf_item_relse(bp); - ASSERT(bp->b_log_item == NULL); - } + xfs_buf_item_finish_stale(bip); xfs_buf_relse(bp); return; } @@ -543,43 +612,42 @@ xfs_buf_item_push( * Drop the buffer log item refcount and take appropriate action. This helper * determines whether the bli must be freed or not, since a decrement to zero * does not necessarily mean the bli is unused. - * - * Return true if the bli is freed, false otherwise. */ -bool +void xfs_buf_item_put( struct xfs_buf_log_item *bip) { - struct xfs_log_item *lip = &bip->bli_item; - bool aborted; - bool dirty; + + ASSERT(xfs_buf_islocked(bip->bli_buf)); /* drop the bli ref and return if it wasn't the last one */ if (!atomic_dec_and_test(&bip->bli_refcount)) - return false; + return; - /* - * We dropped the last ref and must free the item if clean or aborted. - * If the bli is dirty and non-aborted, the buffer was clean in the - * transaction but still awaiting writeback from previous changes. In - * that case, the bli is freed on buffer writeback completion. - */ - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || - xlog_is_shutdown(lip->li_log); - dirty = bip->bli_flags & XFS_BLI_DIRTY; - if (dirty && !aborted) - return false; + /* If the BLI is in the AIL, then it is still dirty and in use */ + if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) { + ASSERT(bip->bli_flags & XFS_BLI_DIRTY); + return; + } /* - * The bli is aborted or clean. An aborted item may be in the AIL - * regardless of dirty state. For example, consider an aborted - * transaction that invalidated a dirty bli and cleared the dirty - * state. + * In shutdown conditions, we can be asked to free a dirty BLI that + * isn't in the AIL. This can occur due to a checkpoint aborting a BLI + * instead of inserting it into the AIL at checkpoint IO completion. If + * there's another bli reference (e.g. a btree cursor holds a clean + * reference) and it is released via xfs_trans_brelse(), we can get here + * with that aborted, dirty BLI. In this case, it is safe to free the + * dirty BLI immediately, as it is not in the AIL and there are no + * other references to it. + * + * We should never get here with a stale BLI via that path as + * xfs_trans_brelse() specifically holds onto stale buffers rather than + * releasing them. */ - if (aborted) - xfs_trans_ail_delete(lip, 0); - xfs_buf_item_relse(bip->bli_buf); - return true; + ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) || + test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags)); + ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); + xfs_buf_item_relse(bip); } /* @@ -600,6 +668,15 @@ xfs_buf_item_put( * if necessary but do not unlock the buffer. This is for support of * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't * free the item. + * + * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must* + * perform a completion abort of any objects attached to the buffer for IO + * tracking purposes. This generally only happens in shutdown situations, + * normally xfs_buf_item_unpin() will drop the last BLI reference and perform + * completion processing. However, because transaction completion can race with + * checkpoint completion during a shutdown, this release context may end up + * being the last active reference to the BLI and so needs to perform this + * cleanup. */ STATIC void xfs_buf_item_release( @@ -607,18 +684,19 @@ xfs_buf_item_release( { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; - bool released; bool hold = bip->bli_flags & XFS_BLI_HOLD; bool stale = bip->bli_flags & XFS_BLI_STALE; -#if defined(DEBUG) || defined(XFS_WARN) - bool ordered = bip->bli_flags & XFS_BLI_ORDERED; - bool dirty = bip->bli_flags & XFS_BLI_DIRTY; bool aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags); + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; +#if defined(DEBUG) || defined(XFS_WARN) + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; #endif trace_xfs_buf_item_release(bip); + ASSERT(xfs_buf_islocked(bp)); + /* * The bli dirty state should match whether the blf has logged segments * except for ordered buffers, where only the bli should be dirty. @@ -634,16 +712,56 @@ xfs_buf_item_release( bp->b_transp = NULL; bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); + /* If there are other references, then we have nothing to do. */ + if (!atomic_dec_and_test(&bip->bli_refcount)) + goto out_release; + + /* + * Stale buffer completion frees the BLI, unlocks and releases the + * buffer. Neither the BLI or buffer are safe to reference after this + * call, so there's nothing more we need to do here. + * + * If we get here with a stale buffer and references to the BLI remain, + * we must not unlock the buffer as the last BLI reference owns lock + * context, not us. + */ + if (stale) { + xfs_buf_item_finish_stale(bip); + xfs_buf_relse(bp); + ASSERT(!hold); + return; + } + /* - * Unref the item and unlock the buffer unless held or stale. Stale - * buffers remain locked until final unpin unless the bli is freed by - * the unref call. The latter implies shutdown because buffer - * invalidation dirties the bli and transaction. + * Dirty or clean, aborted items are done and need to be removed from + * the AIL and released. This frees the BLI, but leaves the buffer + * locked and referenced. */ - released = xfs_buf_item_put(bip); - if (hold || (stale && !released)) + if (aborted || xlog_is_shutdown(lip->li_log)) { + ASSERT(list_empty(&bip->bli_buf->b_li_list)); + xfs_buf_item_done(bp); + goto out_release; + } + + /* + * Clean, unreferenced BLIs can be immediately freed, leaving the buffer + * locked and referenced. + * + * Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback. + */ + if (!dirty) + xfs_buf_item_relse(bip); + else + ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags)); + + /* Not safe to reference the BLI from here */ +out_release: + /* + * If we get here with a stale buffer, we must not unlock the + * buffer as the last BLI reference owns lock context, not us. + */ + if (stale || hold) return; - ASSERT(!stale || aborted); xfs_buf_relse(bp); } @@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_push = xfs_buf_item_push, }; -STATIC void -xfs_buf_item_get_format( - struct xfs_buf_log_item *bip, - int count) -{ - ASSERT(bip->bli_formats == NULL); - bip->bli_format_count = count; - - if (count == 1) { - bip->bli_formats = &bip->__bli_format; - return; - } - - bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format), - GFP_KERNEL | __GFP_NOFAIL); -} - -STATIC void -xfs_buf_item_free_format( - struct xfs_buf_log_item *bip) -{ - if (bip->bli_formats != &bip->__bli_format) { - kfree(bip->bli_formats); - bip->bli_formats = NULL; - } -} - /* * Allocate a new buf log item to go with the given buffer. * Set the buffer's b_log_item field to point to the new @@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format( return false; } -STATIC void -xfs_buf_item_free( - struct xfs_buf_log_item *bip) -{ - xfs_buf_item_free_format(bip); - kvfree(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_cache, bip); -} - -/* - * xfs_buf_item_relse() is called when the buf log item is no longer needed. - */ -void -xfs_buf_item_relse( - struct xfs_buf *bp) -{ - struct xfs_buf_log_item *bip = bp->b_log_item; - - trace_xfs_buf_item_relse(bp, _RET_IP_); - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); - - if (atomic_read(&bip->bli_refcount)) - return; - bp->b_log_item = NULL; - xfs_buf_rele(bp); - xfs_buf_item_free(bip); -} - void xfs_buf_item_done( struct xfs_buf *bp) @@ -1023,5 +1086,5 @@ xfs_buf_item_done( xfs_trans_ail_delete(&bp->b_log_item->bli_item, (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : SHUTDOWN_CORRUPT_INCORE); - xfs_buf_item_relse(bp); + xfs_buf_item_relse(bp->b_log_item); } diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index e10e324cd245..416890b84f8c 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -49,8 +49,7 @@ struct xfs_buf_log_item { int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); void xfs_buf_item_done(struct xfs_buf *bp); -void xfs_buf_item_relse(struct xfs_buf *); -bool xfs_buf_item_put(struct xfs_buf_log_item *); +void xfs_buf_item_put(struct xfs_buf_log_item *bip); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_inode_iodone(struct xfs_buf *); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4e32f0860b7..0bd8022e47b4 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -1398,11 +1398,9 @@ xfs_qm_dqflush( ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); + ASSERT(atomic_read(&dqp->q_pincount) == 0); trace_xfs_dqflush(dqp); - - xfs_qm_dqunpin_wait(dqp); - fa = xfs_qm_dqflush_check(dqp); if (fa) { xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 48254a72071b..0b41b18debf3 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range( } #define XFS_FALLOC_FL_SUPPORTED \ - (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ - FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) + (FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \ + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \ + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \ + FALLOC_FL_UNSHARE_RANGE) STATIC long __xfs_file_fallocate( diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 726e29b837e6..bbc2f2973dcc 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -979,7 +979,15 @@ xfs_reclaim_inode( */ if (xlog_is_shutdown(ip->i_mount->m_log)) { xfs_iunpin_wait(ip); + /* + * Avoid a ABBA deadlock on the inode cluster buffer vs + * concurrent xfs_ifree_cluster() trying to mark the inode + * stale. We don't need the inode locked to run the flush abort + * code, but the flush abort needs to lock the cluster buffer. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iflush_shutdown_abort(ip); + xfs_ilock(ip, XFS_ILOCK_EXCL); goto reclaim; } if (xfs_ipincount(ip)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ee3e0f284287..761a996a857c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1635,7 +1635,7 @@ retry: iip = ip->i_itemp; if (__xfs_iflags_test(ip, XFS_IFLUSHING)) { ASSERT(!list_empty(&iip->ili_item.li_bio_list)); - ASSERT(iip->ili_last_fields); + ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log)); goto out_iunlock; } diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c6cb0b6b9e46..285e27ff89e2 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -758,11 +758,14 @@ xfs_inode_item_push( * completed and items removed from the AIL before the next push * attempt. */ + trace_xfs_inode_push_stale(ip, _RET_IP_); return XFS_ITEM_PINNED; } - if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) + if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) { + trace_xfs_inode_push_pinned(ip, _RET_IP_); return XFS_ITEM_PINNED; + } if (xfs_iflags_test(ip, XFS_IFLUSHING)) return XFS_ITEM_FLUSHING; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index f66d2d430e4f..a80cb6b9969a 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -793,8 +793,10 @@ xlog_cil_ail_insert( struct xfs_log_item *lip = lv->lv_item; xfs_lsn_t item_lsn; - if (aborted) + if (aborted) { + trace_xlog_ail_insert_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { lip->li_ops->iop_release(lip); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 08443ceec329..866c71d9fbae 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -320,7 +320,7 @@ xfs_mru_cache_create( xfs_mru_cache_free_func_t free_func) { struct xfs_mru_cache *mru = NULL; - int err = 0, grp; + int grp; unsigned int grp_time; if (mrup) @@ -341,8 +341,8 @@ xfs_mru_cache_create( mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists), GFP_KERNEL | __GFP_NOFAIL); if (!mru->lists) { - err = -ENOMEM; - goto exit; + kfree(mru); + return -ENOMEM; } for (grp = 0; grp < mru->grp_count; grp++) @@ -361,14 +361,7 @@ xfs_mru_cache_create( mru->free_func = free_func; mru->data = data; *mrup = mru; - -exit: - if (err && mru && mru->lists) - kfree(mru->lists); - if (err && mru) - kfree(mru); - - return err; + return 0; } /* @@ -425,10 +418,6 @@ xfs_mru_cache_insert( { int error = -EINVAL; - ASSERT(mru && mru->lists); - if (!mru || !mru->lists) - goto out_free; - error = -ENOMEM; if (radix_tree_preload(GFP_KERNEL)) goto out_free; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 417439b58785..fa135ac26471 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -134,6 +134,7 @@ xfs_qm_dqpurge( dqp->q_flags |= XFS_DQFLAG_FREEING; + xfs_qm_dqunpin_wait(dqp); xfs_dqflock(dqp); /* @@ -465,6 +466,7 @@ xfs_qm_dquot_isolate( struct xfs_dquot *dqp = container_of(item, struct xfs_dquot, q_lru); struct xfs_qm_isolate *isol = arg; + enum lru_status ret = LRU_SKIP; if (!xfs_dqlock_nowait(dqp)) goto out_miss_busy; @@ -478,6 +480,16 @@ xfs_qm_dquot_isolate( goto out_miss_unlock; /* + * If the dquot is pinned or dirty, rotate it to the end of the LRU to + * give some time for it to be cleaned before we try to isolate it + * again. + */ + ret = LRU_ROTATE; + if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) { + goto out_miss_unlock; + } + + /* * This dquot has acquired a reference in the meantime remove it from * the freelist and try again. */ @@ -492,41 +504,14 @@ xfs_qm_dquot_isolate( } /* - * If the dquot is dirty, flush it. If it's already being flushed, just - * skip it so there is time for the IO to complete before we try to - * reclaim it again on the next LRU pass. + * The dquot may still be under IO, in which case the flush lock will be + * held. If we can't get the flush lock now, just skip over the dquot as + * if it was dirty. */ if (!xfs_dqflock_nowait(dqp)) goto out_miss_unlock; - if (XFS_DQ_IS_DIRTY(dqp)) { - struct xfs_buf *bp = NULL; - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* we have to drop the LRU lock to flush the dquot */ - spin_unlock(&lru->lock); - - error = xfs_dquot_use_attached_buf(dqp, &bp); - if (!bp || error == -EAGAIN) { - xfs_dqfunlock(dqp); - goto out_unlock_dirty; - } - - /* - * dqflush completes dqflock on error, and the delwri ioend - * does it on success. - */ - error = xfs_qm_dqflush(dqp, bp); - if (error) - goto out_unlock_dirty; - - xfs_buf_delwri_queue(bp, &isol->buffers); - xfs_buf_relse(bp); - goto out_unlock_dirty; - } - + ASSERT(!XFS_DQ_IS_DIRTY(dqp)); xfs_dquot_detach_buf(dqp); xfs_dqfunlock(dqp); @@ -548,13 +533,7 @@ out_miss_unlock: out_miss_busy: trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - return LRU_SKIP; - -out_unlock_dirty: - trace_xfs_dqreclaim_busy(dqp); - XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses); - xfs_dqunlock(dqp); - return LRU_RETRY; + return ret; } static unsigned long @@ -1486,7 +1465,6 @@ xfs_qm_flush_one( struct xfs_dquot *dqp, void *data) { - struct xfs_mount *mp = dqp->q_mount; struct list_head *buffer_list = data; struct xfs_buf *bp = NULL; int error = 0; @@ -1497,34 +1475,8 @@ xfs_qm_flush_one( if (!XFS_DQ_IS_DIRTY(dqp)) goto out_unlock; - /* - * The only way the dquot is already flush locked by the time quotacheck - * gets here is if reclaim flushed it before the dqadjust walk dirtied - * it for the final time. Quotacheck collects all dquot bufs in the - * local delwri queue before dquots are dirtied, so reclaim can't have - * possibly queued it for I/O. The only way out is to push the buffer to - * cycle the flush lock. - */ - if (!xfs_dqflock_nowait(dqp)) { - /* buf is pinned in-core by delwri list */ - error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) - goto out_unlock; - - if (!(bp->b_flags & _XBF_DELWRI_Q)) { - error = -EAGAIN; - xfs_buf_relse(bp); - goto out_unlock; - } - xfs_buf_unlock(bp); - - xfs_buf_delwri_pushbuf(bp, buffer_list); - xfs_buf_rele(bp); - - error = -EAGAIN; - goto out_unlock; - } + xfs_qm_dqunpin_wait(dqp); + xfs_dqflock(dqp); error = xfs_dquot_use_attached_buf(dqp, &bp); if (error) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6484c596ecea..736eb0924573 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom( kfree(nmp); + trace_xfs_growfs_check_rtgeom(mp, min_logfsbs); + if (min_logfsbs > mp->m_sb.sb_logblocks) return -EINVAL; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0bc4b5489078..bb0a82635a77 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2020,14 +2020,13 @@ xfs_remount_rw( int error; if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && - bdev_read_only(mp->m_logdev_targp->bt_bdev)) { + xfs_readonly_buftarg(mp->m_logdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only logdev"); return -EACCES; } - if (mp->m_rtdev_targp && - bdev_read_only(mp->m_rtdev_targp->bt_bdev)) { + if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only rtdev"); return -EACCES; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 01d284a1c759..ba45d801df1c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_queued); DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf); DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_iodone_async); @@ -1147,6 +1146,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __field(xfs_ino_t, ino) __field(int, count) __field(int, pincount) + __field(unsigned long, iflags) __field(unsigned long, caller_ip) ), TP_fast_assign( @@ -1154,13 +1154,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class, __entry->ino = ip->i_ino; __entry->count = atomic_read(&VFS_I(ip)->i_count); __entry->pincount = atomic_read(&ip->i_pincount); + __entry->iflags = ip->i_flags; __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS", + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->count, __entry->pincount, + __entry->iflags, (char *)__entry->caller_ip) ) @@ -1250,6 +1252,8 @@ DEFINE_IREF_EVENT(xfs_irele); DEFINE_IREF_EVENT(xfs_inode_pin); DEFINE_IREF_EVENT(xfs_inode_unpin); DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); +DEFINE_IREF_EVENT(xfs_inode_push_pinned); +DEFINE_IREF_EVENT(xfs_inode_push_stale); DECLARE_EVENT_CLASS(xfs_namespace_class, TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), @@ -1654,6 +1658,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip); DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin); +DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort); +DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort); DECLARE_EVENT_CLASS(xfs_ail_class, TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c6657072361a..b4a07af513ba 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -742,8 +742,10 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (abort) + if (abort) { + trace_xfs_trans_free_abort(lip); set_bit(XFS_LI_ABORTED, &lip->li_flags); + } if (lip->li_ops->iop_release) lip->li_ops->iop_release(lip); } diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 80add26c0111..01315ed75502 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -727,7 +727,7 @@ xfs_select_zone( for (;;) { prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE); oz = xfs_select_zone_nowait(mp, write_hint, pack_tight); - if (oz) + if (oz || xfs_is_shutdown(mp)) break; schedule(); } @@ -777,26 +777,6 @@ xfs_mark_rtg_boundary( ioend->io_flags |= IOMAP_IOEND_BOUNDARY; } -static void -xfs_submit_zoned_bio( - struct iomap_ioend *ioend, - struct xfs_open_zone *oz, - bool is_seq) -{ - ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; - ioend->io_private = oz; - atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ - - if (is_seq) { - ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; - ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; - } else { - xfs_mark_rtg_boundary(ioend); - } - - submit_bio(&ioend->io_bio); -} - /* * Cache the last zone written to for an inode so that it is considered first * for subsequent writes. @@ -891,6 +871,26 @@ xfs_zone_cache_create_association( xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); } +static void +xfs_submit_zoned_bio( + struct iomap_ioend *ioend, + struct xfs_open_zone *oz, + bool is_seq) +{ + ioend->io_bio.bi_iter.bi_sector = ioend->io_sector; + ioend->io_private = oz; + atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */ + + if (is_seq) { + ioend->io_bio.bi_opf &= ~REQ_OP_WRITE; + ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND; + } else { + xfs_mark_rtg_boundary(ioend); + } + + submit_bio(&ioend->io_bio); +} + void xfs_zone_alloc_and_submit( struct iomap_ioend *ioend, |