diff options
Diffstat (limited to 'fs/bcachefs/io_read.c')
-rw-r--r-- | fs/bcachefs/io_read.c | 162 |
1 files changed, 116 insertions, 46 deletions
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 210b6adc359f..b8ccd8c930e1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -39,38 +39,73 @@ MODULE_PARM_DESC(read_corrupt_ratio, ""); #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT +static inline u32 bch2_dev_congested_read(struct bch_dev *ca, u64 now) +{ + s64 congested = atomic_read(&ca->congested); + u64 last = READ_ONCE(ca->congested_last); + if (time_after64(now, last)) + congested -= (now - last) >> 12; + + return clamp(congested, 0LL, CONGESTED_MAX); +} + static bool bch2_target_congested(struct bch_fs *c, u16 target) { const struct bch_devs_mask *devs; unsigned d, nr = 0, total = 0; - u64 now = local_clock(), last; - s64 congested; - struct bch_dev *ca; - - if (!target) - return false; + u64 now = local_clock(); guard(rcu)(); devs = bch2_target_to_mask(c, target) ?: &c->rw_devs[BCH_DATA_user]; for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { - ca = rcu_dereference(c->devs[d]); + struct bch_dev *ca = rcu_dereference(c->devs[d]); if (!ca) continue; - congested = atomic_read(&ca->congested); - last = READ_ONCE(ca->congested_last); - if (time_after64(now, last)) - congested -= (now - last) >> 12; - - total += max(congested, 0LL); + total += bch2_dev_congested_read(ca, now); nr++; } return get_random_u32_below(nr * CONGESTED_MAX) < total; } +void bch2_dev_congested_to_text(struct printbuf *out, struct bch_dev *ca) +{ + printbuf_tabstop_push(out, 32); + + prt_printf(out, "current:\t%u%%\n", + bch2_dev_congested_read(ca, local_clock()) * + 100 / CONGESTED_MAX); + + prt_printf(out, "raw:\t%i/%u\n", atomic_read(&ca->congested), CONGESTED_MAX); + + prt_printf(out, "last io over threshold:\t"); + bch2_pr_time_units(out, local_clock() - ca->congested_last); + prt_newline(out); + + prt_printf(out, "read latency threshold:\t"); + bch2_pr_time_units(out, + ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(1)].m << 2); + prt_newline(out); + + prt_printf(out, "median read latency:\t"); + bch2_pr_time_units(out, + ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(7)].m); + prt_newline(out); + + prt_printf(out, "write latency threshold:\t"); + bch2_pr_time_units(out, + ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(1)].m << 3); + prt_newline(out); + + prt_printf(out, "median write latency:\t"); + bch2_pr_time_units(out, + ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(7)].m); + prt_newline(out); +} + #else static bool bch2_target_congested(struct bch_fs *c, u16 target) @@ -130,22 +165,32 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, if (!have_io_error(failed)) { BUG_ON(!opts.promote_target); - if (!(flags & BCH_READ_may_promote)) + if (!(flags & BCH_READ_may_promote)) { + count_event(c, io_read_nopromote_may_not); return bch_err_throw(c, nopromote_may_not); + } - if (bch2_bkey_has_target(c, k, opts.promote_target)) + if (bch2_bkey_has_target(c, k, opts.promote_target)) { + count_event(c, io_read_nopromote_already_promoted); return bch_err_throw(c, nopromote_already_promoted); + } - if (bkey_extent_is_unwritten(k)) + if (bkey_extent_is_unwritten(k)) { + count_event(c, io_read_nopromote_unwritten); return bch_err_throw(c, nopromote_unwritten); + } - if (bch2_target_congested(c, opts.promote_target)) + if (bch2_target_congested(c, opts.promote_target)) { + count_event(c, io_read_nopromote_congested); return bch_err_throw(c, nopromote_congested); + } } if (rhashtable_lookup_fast(&c->promote_table, &pos, - bch_promote_params)) + bch_promote_params)) { + count_event(c, io_read_nopromote_in_flight); return bch_err_throw(c, nopromote_in_flight); + } return 0; } @@ -160,6 +205,7 @@ static noinline void promote_free(struct bch_read_bio *rbio) BUG_ON(ret); async_object_list_del(c, promote, op->list_idx); + async_object_list_del(c, rbio, rbio->list_idx); bch2_data_update_exit(&op->write); @@ -343,16 +389,27 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, return promote; nopromote: - trace_io_read_nopromote(c, ret); + if (trace_io_read_nopromote_enabled()) { + CLASS(printbuf, buf)(); + printbuf_indent_add_nextline(&buf, 2); + prt_printf(&buf, "%s\n", bch2_err_str(ret)); + bch2_bkey_val_to_text(&buf, c, k); + + trace_io_read_nopromote(c, buf.buf); + } + count_event(c, io_read_nopromote); + return NULL; } -void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op) +void bch2_promote_op_to_text(struct printbuf *out, + struct bch_fs *c, + struct promote_op *op) { if (!op->write.read_done) { prt_printf(out, "parent read: %px\n", op->write.rbio.parent); printbuf_indent_add(out, 2); - bch2_read_bio_to_text(out, op->write.rbio.parent); + bch2_read_bio_to_text(out, c, op->write.rbio.parent); printbuf_indent_sub(out, 2); } @@ -380,7 +437,8 @@ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *o static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, struct bch_read_bio *rbio, struct bpos read_pos) { - bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); + CLASS(btree_trans, trans)(c); + bch2_read_err_msg_trans(trans, out, rbio, read_pos); } enum rbio_context { @@ -450,6 +508,10 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) if (rbio->start_time) bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], rbio->start_time); +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + if (rbio->list_idx) + async_object_list_del(rbio->c, rbio, rbio->list_idx); +#endif bio_endio(&rbio->bio); } @@ -577,7 +639,7 @@ static void bch2_rbio_retry(struct work_struct *work) }; struct bch_io_failures failed = { .nr = 0 }; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bkey_buf sk; bch2_bkey_buf_init(&sk); @@ -619,7 +681,7 @@ static void bch2_rbio_retry(struct work_struct *work) } if (failed.nr || ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); lockrestart_do(trans, @@ -647,12 +709,10 @@ static void bch2_rbio_retry(struct work_struct *work) bch2_io_failures_to_text(&buf, c, &failed); bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } bch2_rbio_done(rbio); bch2_bkey_buf_exit(&sk, c); - bch2_trans_put(trans); } static void bch2_rbio_error(struct bch_read_bio *rbio, @@ -740,8 +800,9 @@ out: static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { - bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_rbio_narrow_crcs(trans, rbio)); + CLASS(btree_trans, trans)(rbio->c); + commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_rbio_narrow_crcs(trans, rbio)); } static void bch2_read_decompress_err(struct work_struct *work) @@ -749,7 +810,7 @@ static void bch2_read_decompress_err(struct work_struct *work) struct bch_read_bio *rbio = container_of(work, struct bch_read_bio, work); struct bch_fs *c = rbio->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); prt_str(&buf, "decompression error"); @@ -761,7 +822,6 @@ static void bch2_read_decompress_err(struct work_struct *work) bch_err_ratelimited(c, "%s", buf.buf); bch2_rbio_error(rbio, -BCH_ERR_data_read_decompress_err, BLK_STS_IOERR); - printbuf_exit(&buf); } static void bch2_read_decrypt_err(struct work_struct *work) @@ -769,7 +829,7 @@ static void bch2_read_decrypt_err(struct work_struct *work) struct bch_read_bio *rbio = container_of(work, struct bch_read_bio, work); struct bch_fs *c = rbio->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); prt_str(&buf, "decrypt error"); @@ -781,7 +841,6 @@ static void bch2_read_decrypt_err(struct work_struct *work) bch_err_ratelimited(c, "%s", buf.buf); bch2_rbio_error(rbio, -BCH_ERR_data_read_decrypt_err, BLK_STS_IOERR); - printbuf_exit(&buf); } /* Inner part that may run in process context */ @@ -963,7 +1022,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct btree_iter iter; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, @@ -1000,7 +1059,6 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, bch2_fs_inconsistent(c, "%s", buf.buf); bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); } int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, @@ -1052,25 +1110,22 @@ retry_pick: trace_and_count(c, io_read_fail_and_poison, &orig->bio); } - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "%s\n ", bch2_err_str(ret)); bch2_bkey_val_to_text(&buf, c, k); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); goto err; } if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20_key_set) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); bch2_bkey_val_to_text(&buf, c, k); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, data_read_no_encryption_key); goto err; } @@ -1439,13 +1494,12 @@ err: if (unlikely(ret)) { if (ret != -BCH_ERR_extent_poisoned) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9)); prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); } rbio->bio.bi_status = BLK_STS_IOERR; @@ -1467,19 +1521,34 @@ static const char * const bch2_read_bio_flags[] = { NULL }; -void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) +void bch2_read_bio_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_read_bio *rbio) { + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); + + bch2_read_err_msg(c, out, rbio, rbio->read_pos); + prt_newline(out); + + /* Are we in a retry? */ + + printbuf_indent_add(out, 2); + u64 now = local_clock(); - prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0); - prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0); + prt_printf(out, "start_time:\t"); + bch2_pr_time_units(out, max_t(s64, 0, now - rbio->start_time)); + prt_newline(out); + + prt_printf(out, "submit_time:\t"); + bch2_pr_time_units(out, max_t(s64, 0, now - rbio->submit_time)); + prt_newline(out); if (!rbio->split) prt_printf(out, "end_io:\t%ps\n", rbio->end_io); else prt_printf(out, "parent:\t%px\n", rbio->parent); - prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io); - prt_printf(out, "promote:\t%u\n", rbio->promote); prt_printf(out, "bounce:\t%u\n", rbio->bounce); prt_printf(out, "split:\t%u\n", rbio->split); @@ -1498,6 +1567,7 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) prt_newline(out); bch2_bio_to_text(out, &rbio->bio); + printbuf_indent_sub(out, 2); } void bch2_fs_io_read_exit(struct bch_fs *c) |