diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-05-01 14:21:01 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-05-04 13:47:54 -0400 |
commit | d5e73ca148dc8ceb9c2125f4817674a2f5c7df60 (patch) | |
tree | e0e788aa40f9bd0771a3a00a204a673ff7192005 | |
parent | fc837b568a5dea0215ea5c5ce271eb3c1c70b956 (diff) |
bcachefs: Don't promote when target is congestedbcachefs-v4.15
-rw-r--r-- | fs/bcachefs/bcachefs.h | 5 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 71 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/util.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/util.h | 11 |
5 files changed, 96 insertions, 10 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 1b71ae08f7f4..206c30f4b516 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -279,6 +279,7 @@ do { \ BCH_TIME_STAT(btree_read) \ BCH_TIME_STAT(data_write) \ BCH_TIME_STAT(data_read) \ + BCH_TIME_STAT(data_promote) \ BCH_TIME_STAT(journal_write) \ BCH_TIME_STAT(journal_delay) \ BCH_TIME_STAT(journal_blocked) \ @@ -420,6 +421,10 @@ struct bch_dev { atomic64_t cur_latency[2]; struct time_stats io_latency[2]; +#define CONGESTED_MAX 1024 + atomic_t congested; + u64 congested_last; + struct io_count __percpu *io_done; }; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 069c3bb84815..bb6565226846 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -14,6 +14,7 @@ #include "compress.h" #include "clock.h" #include "debug.h" +#include "disk_groups.h" #include "error.h" #include "extents.h" #include "io.h" @@ -30,6 +31,63 @@ #include <trace/events/bcachefs.h> +static bool bch2_target_congested(struct bch_fs *c, u16 target) +{ + const struct bch_devs_mask *devs; + unsigned d, nr = 0, total = 0; + u64 now = local_clock(), last; + s64 congested; + struct bch_dev *ca; + + if (!target) + return false; + + rcu_read_lock(); + devs = bch2_target_to_mask(c, target); + for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { + ca = rcu_dereference(c->devs[d]); + if (!ca) + continue; + + congested = atomic_read(&ca->congested); + last = READ_ONCE(ca->congested_last); + if (time_after64(now, last)) + congested -= (now - last) >> 12; + + total += max(congested, 0LL); + nr++; + } + rcu_read_unlock(); + + return bch2_rand_range(nr * CONGESTED_MAX) < total; +} + +static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency, + u64 now, int rw) +{ + u64 latency_capable = + ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m; + /* ideally we'd be taking into account the device's variance here: */ + u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3); + s64 latency_over = io_latency - latency_threshold; + + if (latency_threshold && latency_over > 0) { + /* + * bump up congested by approximately latency_over * 4 / + * latency_threshold - we don't need much accuracy here so don't + * bother with the divide: + */ + if (atomic_read(&ca->congested) < CONGESTED_MAX) + atomic_add(latency_over >> + max_t(int, ilog2(latency_threshold) - 2, 0), + &ca->congested); + + ca->congested_last = now; + } else if (atomic_read(&ca->congested) > 0) { + atomic_dec(&ca->congested); + } +} + void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) { atomic64_t *latency = &ca->cur_latency[rw]; @@ -51,10 +109,12 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) now & ~(~0 << 5)) break; - new = ewma_add(old, io_latency, 6); + new = ewma_add(old, io_latency, 5); } while ((v = atomic64_cmpxchg(latency, old, new)) != old); - bch2_time_stats_update(&ca->io_latency[rw], submit_time); + bch2_congested_acct(ca, io_latency, now, rw); + + __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now); } /* Allocate, free from mempool: */ @@ -852,6 +912,7 @@ void bch2_write(struct closure *cl) struct promote_op { struct closure cl; + u64 start_time; struct rhash_head hash; struct bpos pos; @@ -886,6 +947,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k, if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), opts.promote_target)) return false; + if (bch2_target_congested(c, opts.promote_target)) + return false; + if (rhashtable_lookup_fast(&c->promote_table, &pos, bch_promote_params)) return false; @@ -910,6 +974,8 @@ static void promote_done(struct closure *cl) container_of(cl, struct promote_op, cl); struct bch_fs *c = op->write.op.c; + bch2_time_stats_update(&c->data_promote_time, op->start_time); + bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio); promote_free(c, op); } @@ -961,6 +1027,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c, if (!op) goto err; + op->start_time = local_clock(); op->pos = pos; /* diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index e4381bb5da19..65345d80e4f4 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -147,6 +147,7 @@ read_attribute(io_latency_read); read_attribute(io_latency_write); read_attribute(io_latency_stats_read); read_attribute(io_latency_stats_write); +read_attribute(congested); read_attribute(bucket_quantiles_last_read); read_attribute(bucket_quantiles_last_write); @@ -874,6 +875,10 @@ SHOW(bch2_dev) if (attr == &sysfs_io_latency_stats_write) return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE); + sysfs_printf(congested, "%u%%", + clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX) + * 100 / CONGESTED_MAX); + if (attr == &sysfs_bucket_quantiles_last_read) return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0); if (attr == &sysfs_bucket_quantiles_last_write) @@ -970,6 +975,7 @@ struct attribute *bch2_dev_files[] = { &sysfs_io_latency_write, &sysfs_io_latency_stats_read, &sysfs_io_latency_stats_write, + &sysfs_congested, /* alloc info - other stats: */ &sysfs_bucket_quantiles_last_read, diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 33e2d1fa795d..2e89a9755930 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -233,8 +233,8 @@ void bch2_quantiles_update(struct quantiles *q, u64 v) /* time stats: */ -static void __bch2_time_stats_update(struct time_stats *stats, - u64 start, u64 end) +static void bch2_time_stats_update_one(struct time_stats *stats, + u64 start, u64 end) { u64 duration, freq; @@ -260,14 +260,13 @@ static void __bch2_time_stats_update(struct time_stats *stats, bch2_quantiles_update(&stats->quantiles, duration); } -void bch2_time_stats_update(struct time_stats *stats, u64 start) +void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end) { - u64 end = local_clock(); unsigned long flags; if (!stats->buffer) { spin_lock_irqsave(&stats->lock, flags); - __bch2_time_stats_update(stats, start, end); + bch2_time_stats_update_one(stats, start, end); if (stats->average_frequency < 32 && stats->count > 1024) @@ -285,7 +284,7 @@ void bch2_time_stats_update(struct time_stats *stats, u64 start) BUG_ON(b->nr >= ARRAY_SIZE(b->entries)); b->entries[b->nr++] = (struct time_stat_buffer_entry) { .start = start, - .end = end + .end = end }; if (b->nr == ARRAY_SIZE(b->entries)) { @@ -293,7 +292,7 @@ void bch2_time_stats_update(struct time_stats *stats, u64 start) for (i = b->entries; i < b->entries + ARRAY_SIZE(b->entries); i++) - __bch2_time_stats_update(stats, i->start, i->end); + bch2_time_stats_update_one(stats, i->start, i->end); spin_unlock_irqrestore(&stats->lock, flags); b->nr = 0; diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 97e4e6d00a37..7c7264f4f595 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -372,6 +372,9 @@ ssize_t bch2_scnprint_flag_list(char *, size_t, const char * const[], u64); u64 bch2_read_flag_list(char *, const char * const[]); #define NR_QUANTILES 15 +#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES) +#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES) +#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES) struct quantiles { struct quantile_entry { @@ -401,7 +404,13 @@ struct time_stats { struct time_stat_buffer __percpu *buffer; }; -void bch2_time_stats_update(struct time_stats *stats, u64 time); +void __bch2_time_stats_update(struct time_stats *stats, u64, u64); + +static inline void bch2_time_stats_update(struct time_stats *stats, u64 start) +{ + __bch2_time_stats_update(stats, start, local_clock()); +} + size_t bch2_time_stats_print(struct time_stats *, char *, size_t); void bch2_time_stats_exit(struct time_stats *); |