summaryrefslogtreecommitdiff
path: root/fs/bcachefs/io_read.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/io_read.c')
-rw-r--r--fs/bcachefs/io_read.c162
1 files changed, 116 insertions, 46 deletions
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 210b6adc359f..b8ccd8c930e1 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -39,38 +39,73 @@ MODULE_PARM_DESC(read_corrupt_ratio, "");
#ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT
+static inline u32 bch2_dev_congested_read(struct bch_dev *ca, u64 now)
+{
+ s64 congested = atomic_read(&ca->congested);
+ u64 last = READ_ONCE(ca->congested_last);
+ if (time_after64(now, last))
+ congested -= (now - last) >> 12;
+
+ return clamp(congested, 0LL, CONGESTED_MAX);
+}
+
static bool bch2_target_congested(struct bch_fs *c, u16 target)
{
const struct bch_devs_mask *devs;
unsigned d, nr = 0, total = 0;
- u64 now = local_clock(), last;
- s64 congested;
- struct bch_dev *ca;
-
- if (!target)
- return false;
+ u64 now = local_clock();
guard(rcu)();
devs = bch2_target_to_mask(c, target) ?:
&c->rw_devs[BCH_DATA_user];
for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
- ca = rcu_dereference(c->devs[d]);
+ struct bch_dev *ca = rcu_dereference(c->devs[d]);
if (!ca)
continue;
- congested = atomic_read(&ca->congested);
- last = READ_ONCE(ca->congested_last);
- if (time_after64(now, last))
- congested -= (now - last) >> 12;
-
- total += max(congested, 0LL);
+ total += bch2_dev_congested_read(ca, now);
nr++;
}
return get_random_u32_below(nr * CONGESTED_MAX) < total;
}
+void bch2_dev_congested_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+ printbuf_tabstop_push(out, 32);
+
+ prt_printf(out, "current:\t%u%%\n",
+ bch2_dev_congested_read(ca, local_clock()) *
+ 100 / CONGESTED_MAX);
+
+ prt_printf(out, "raw:\t%i/%u\n", atomic_read(&ca->congested), CONGESTED_MAX);
+
+ prt_printf(out, "last io over threshold:\t");
+ bch2_pr_time_units(out, local_clock() - ca->congested_last);
+ prt_newline(out);
+
+ prt_printf(out, "read latency threshold:\t");
+ bch2_pr_time_units(out,
+ ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(1)].m << 2);
+ prt_newline(out);
+
+ prt_printf(out, "median read latency:\t");
+ bch2_pr_time_units(out,
+ ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(7)].m);
+ prt_newline(out);
+
+ prt_printf(out, "write latency threshold:\t");
+ bch2_pr_time_units(out,
+ ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(1)].m << 3);
+ prt_newline(out);
+
+ prt_printf(out, "median write latency:\t");
+ bch2_pr_time_units(out,
+ ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(7)].m);
+ prt_newline(out);
+}
+
#else
static bool bch2_target_congested(struct bch_fs *c, u16 target)
@@ -130,22 +165,32 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k,
if (!have_io_error(failed)) {
BUG_ON(!opts.promote_target);
- if (!(flags & BCH_READ_may_promote))
+ if (!(flags & BCH_READ_may_promote)) {
+ count_event(c, io_read_nopromote_may_not);
return bch_err_throw(c, nopromote_may_not);
+ }
- if (bch2_bkey_has_target(c, k, opts.promote_target))
+ if (bch2_bkey_has_target(c, k, opts.promote_target)) {
+ count_event(c, io_read_nopromote_already_promoted);
return bch_err_throw(c, nopromote_already_promoted);
+ }
- if (bkey_extent_is_unwritten(k))
+ if (bkey_extent_is_unwritten(k)) {
+ count_event(c, io_read_nopromote_unwritten);
return bch_err_throw(c, nopromote_unwritten);
+ }
- if (bch2_target_congested(c, opts.promote_target))
+ if (bch2_target_congested(c, opts.promote_target)) {
+ count_event(c, io_read_nopromote_congested);
return bch_err_throw(c, nopromote_congested);
+ }
}
if (rhashtable_lookup_fast(&c->promote_table, &pos,
- bch_promote_params))
+ bch_promote_params)) {
+ count_event(c, io_read_nopromote_in_flight);
return bch_err_throw(c, nopromote_in_flight);
+ }
return 0;
}
@@ -160,6 +205,7 @@ static noinline void promote_free(struct bch_read_bio *rbio)
BUG_ON(ret);
async_object_list_del(c, promote, op->list_idx);
+ async_object_list_del(c, rbio, rbio->list_idx);
bch2_data_update_exit(&op->write);
@@ -343,16 +389,27 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans,
return promote;
nopromote:
- trace_io_read_nopromote(c, ret);
+ if (trace_io_read_nopromote_enabled()) {
+ CLASS(printbuf, buf)();
+ printbuf_indent_add_nextline(&buf, 2);
+ prt_printf(&buf, "%s\n", bch2_err_str(ret));
+ bch2_bkey_val_to_text(&buf, c, k);
+
+ trace_io_read_nopromote(c, buf.buf);
+ }
+ count_event(c, io_read_nopromote);
+
return NULL;
}
-void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op)
+void bch2_promote_op_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct promote_op *op)
{
if (!op->write.read_done) {
prt_printf(out, "parent read: %px\n", op->write.rbio.parent);
printbuf_indent_add(out, 2);
- bch2_read_bio_to_text(out, op->write.rbio.parent);
+ bch2_read_bio_to_text(out, c, op->write.rbio.parent);
printbuf_indent_sub(out, 2);
}
@@ -380,7 +437,8 @@ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *o
static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out,
struct bch_read_bio *rbio, struct bpos read_pos)
{
- bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos));
+ CLASS(btree_trans, trans)(c);
+ bch2_read_err_msg_trans(trans, out, rbio, read_pos);
}
enum rbio_context {
@@ -450,6 +508,10 @@ static void bch2_rbio_done(struct bch_read_bio *rbio)
if (rbio->start_time)
bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read],
rbio->start_time);
+#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS
+ if (rbio->list_idx)
+ async_object_list_del(rbio->c, rbio, rbio->list_idx);
+#endif
bio_endio(&rbio->bio);
}
@@ -577,7 +639,7 @@ static void bch2_rbio_retry(struct work_struct *work)
};
struct bch_io_failures failed = { .nr = 0 };
- struct btree_trans *trans = bch2_trans_get(c);
+ CLASS(btree_trans, trans)(c);
struct bkey_buf sk;
bch2_bkey_buf_init(&sk);
@@ -619,7 +681,7 @@ static void bch2_rbio_retry(struct work_struct *work)
}
if (failed.nr || ret) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_log_msg_start(c, &buf);
lockrestart_do(trans,
@@ -647,12 +709,10 @@ static void bch2_rbio_retry(struct work_struct *work)
bch2_io_failures_to_text(&buf, c, &failed);
bch2_print_str_ratelimited(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
}
bch2_rbio_done(rbio);
bch2_bkey_buf_exit(&sk, c);
- bch2_trans_put(trans);
}
static void bch2_rbio_error(struct bch_read_bio *rbio,
@@ -740,8 +800,9 @@ out:
static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
{
- bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
- __bch2_rbio_narrow_crcs(trans, rbio));
+ CLASS(btree_trans, trans)(rbio->c);
+ commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+ __bch2_rbio_narrow_crcs(trans, rbio));
}
static void bch2_read_decompress_err(struct work_struct *work)
@@ -749,7 +810,7 @@ static void bch2_read_decompress_err(struct work_struct *work)
struct bch_read_bio *rbio =
container_of(work, struct bch_read_bio, work);
struct bch_fs *c = rbio->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
prt_str(&buf, "decompression error");
@@ -761,7 +822,6 @@ static void bch2_read_decompress_err(struct work_struct *work)
bch_err_ratelimited(c, "%s", buf.buf);
bch2_rbio_error(rbio, -BCH_ERR_data_read_decompress_err, BLK_STS_IOERR);
- printbuf_exit(&buf);
}
static void bch2_read_decrypt_err(struct work_struct *work)
@@ -769,7 +829,7 @@ static void bch2_read_decrypt_err(struct work_struct *work)
struct bch_read_bio *rbio =
container_of(work, struct bch_read_bio, work);
struct bch_fs *c = rbio->c;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_read_err_msg(c, &buf, rbio, rbio->read_pos);
prt_str(&buf, "decrypt error");
@@ -781,7 +841,6 @@ static void bch2_read_decrypt_err(struct work_struct *work)
bch_err_ratelimited(c, "%s", buf.buf);
bch2_rbio_error(rbio, -BCH_ERR_data_read_decrypt_err, BLK_STS_IOERR);
- printbuf_exit(&buf);
}
/* Inner part that may run in process context */
@@ -963,7 +1022,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
@@ -1000,7 +1059,6 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
bch2_fs_inconsistent(c, "%s", buf.buf);
bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
}
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
@@ -1052,25 +1110,22 @@ retry_pick:
trace_and_count(c, io_read_fail_and_poison, &orig->bio);
}
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "%s\n ", bch2_err_str(ret));
bch2_bkey_val_to_text(&buf, c, k);
-
bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
goto err;
}
if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) &&
!c->chacha20_key_set) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
bch2_read_err_msg_trans(trans, &buf, orig, read_pos);
prt_printf(&buf, "attempting to read encrypted data without encryption key\n ");
bch2_bkey_val_to_text(&buf, c, k);
bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
ret = bch_err_throw(c, data_read_no_encryption_key);
goto err;
}
@@ -1439,13 +1494,12 @@ err:
if (unlikely(ret)) {
if (ret != -BCH_ERR_extent_poisoned) {
- struct printbuf buf = PRINTBUF;
+ CLASS(printbuf, buf)();
lockrestart_do(trans,
bch2_inum_offset_err_msg_trans(trans, &buf, inum,
bvec_iter.bi_sector << 9));
prt_printf(&buf, "data read error: %s", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
- printbuf_exit(&buf);
}
rbio->bio.bi_status = BLK_STS_IOERR;
@@ -1467,19 +1521,34 @@ static const char * const bch2_read_bio_flags[] = {
NULL
};
-void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio)
+void bch2_read_bio_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_read_bio *rbio)
{
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 20);
+
+ bch2_read_err_msg(c, out, rbio, rbio->read_pos);
+ prt_newline(out);
+
+ /* Are we in a retry? */
+
+ printbuf_indent_add(out, 2);
+
u64 now = local_clock();
- prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0);
- prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0);
+ prt_printf(out, "start_time:\t");
+ bch2_pr_time_units(out, max_t(s64, 0, now - rbio->start_time));
+ prt_newline(out);
+
+ prt_printf(out, "submit_time:\t");
+ bch2_pr_time_units(out, max_t(s64, 0, now - rbio->submit_time));
+ prt_newline(out);
if (!rbio->split)
prt_printf(out, "end_io:\t%ps\n", rbio->end_io);
else
prt_printf(out, "parent:\t%px\n", rbio->parent);
- prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io);
-
prt_printf(out, "promote:\t%u\n", rbio->promote);
prt_printf(out, "bounce:\t%u\n", rbio->bounce);
prt_printf(out, "split:\t%u\n", rbio->split);
@@ -1498,6 +1567,7 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio)
prt_newline(out);
bch2_bio_to_text(out, &rbio->bio);
+ printbuf_indent_sub(out, 2);
}
void bch2_fs_io_read_exit(struct bch_fs *c)