summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-05-01 14:21:01 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-05-04 13:47:54 -0400
commitd5e73ca148dc8ceb9c2125f4817674a2f5c7df60 (patch)
treee0e788aa40f9bd0771a3a00a204a673ff7192005
parentfc837b568a5dea0215ea5c5ce271eb3c1c70b956 (diff)
bcachefs: Don't promote when target is congestedbcachefs-v4.15
-rw-r--r--fs/bcachefs/bcachefs.h5
-rw-r--r--fs/bcachefs/io.c71
-rw-r--r--fs/bcachefs/sysfs.c6
-rw-r--r--fs/bcachefs/util.c13
-rw-r--r--fs/bcachefs/util.h11
5 files changed, 96 insertions, 10 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 1b71ae08f7f4..206c30f4b516 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -279,6 +279,7 @@ do { \
BCH_TIME_STAT(btree_read) \
BCH_TIME_STAT(data_write) \
BCH_TIME_STAT(data_read) \
+ BCH_TIME_STAT(data_promote) \
BCH_TIME_STAT(journal_write) \
BCH_TIME_STAT(journal_delay) \
BCH_TIME_STAT(journal_blocked) \
@@ -420,6 +421,10 @@ struct bch_dev {
atomic64_t cur_latency[2];
struct time_stats io_latency[2];
+#define CONGESTED_MAX 1024
+ atomic_t congested;
+ u64 congested_last;
+
struct io_count __percpu *io_done;
};
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 069c3bb84815..bb6565226846 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -14,6 +14,7 @@
#include "compress.h"
#include "clock.h"
#include "debug.h"
+#include "disk_groups.h"
#include "error.h"
#include "extents.h"
#include "io.h"
@@ -30,6 +31,63 @@
#include <trace/events/bcachefs.h>
+static bool bch2_target_congested(struct bch_fs *c, u16 target)
+{
+ const struct bch_devs_mask *devs;
+ unsigned d, nr = 0, total = 0;
+ u64 now = local_clock(), last;
+ s64 congested;
+ struct bch_dev *ca;
+
+ if (!target)
+ return false;
+
+ rcu_read_lock();
+ devs = bch2_target_to_mask(c, target);
+ for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) {
+ ca = rcu_dereference(c->devs[d]);
+ if (!ca)
+ continue;
+
+ congested = atomic_read(&ca->congested);
+ last = READ_ONCE(ca->congested_last);
+ if (time_after64(now, last))
+ congested -= (now - last) >> 12;
+
+ total += max(congested, 0LL);
+ nr++;
+ }
+ rcu_read_unlock();
+
+ return bch2_rand_range(nr * CONGESTED_MAX) < total;
+}
+
+static inline void bch2_congested_acct(struct bch_dev *ca, u64 io_latency,
+ u64 now, int rw)
+{
+ u64 latency_capable =
+ ca->io_latency[rw].quantiles.entries[QUANTILE_IDX(1)].m;
+ /* ideally we'd be taking into account the device's variance here: */
+ u64 latency_threshold = latency_capable << (rw == READ ? 2 : 3);
+ s64 latency_over = io_latency - latency_threshold;
+
+ if (latency_threshold && latency_over > 0) {
+ /*
+ * bump up congested by approximately latency_over * 4 /
+ * latency_threshold - we don't need much accuracy here so don't
+ * bother with the divide:
+ */
+ if (atomic_read(&ca->congested) < CONGESTED_MAX)
+ atomic_add(latency_over >>
+ max_t(int, ilog2(latency_threshold) - 2, 0),
+ &ca->congested);
+
+ ca->congested_last = now;
+ } else if (atomic_read(&ca->congested) > 0) {
+ atomic_dec(&ca->congested);
+ }
+}
+
void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
{
atomic64_t *latency = &ca->cur_latency[rw];
@@ -51,10 +109,12 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw)
now & ~(~0 << 5))
break;
- new = ewma_add(old, io_latency, 6);
+ new = ewma_add(old, io_latency, 5);
} while ((v = atomic64_cmpxchg(latency, old, new)) != old);
- bch2_time_stats_update(&ca->io_latency[rw], submit_time);
+ bch2_congested_acct(ca, io_latency, now, rw);
+
+ __bch2_time_stats_update(&ca->io_latency[rw], submit_time, now);
}
/* Allocate, free from mempool: */
@@ -852,6 +912,7 @@ void bch2_write(struct closure *cl)
struct promote_op {
struct closure cl;
+ u64 start_time;
struct rhash_head hash;
struct bpos pos;
@@ -886,6 +947,9 @@ static inline bool should_promote(struct bch_fs *c, struct bkey_s_c k,
if (bch2_extent_has_target(c, bkey_s_c_to_extent(k), opts.promote_target))
return false;
+ if (bch2_target_congested(c, opts.promote_target))
+ return false;
+
if (rhashtable_lookup_fast(&c->promote_table, &pos,
bch_promote_params))
return false;
@@ -910,6 +974,8 @@ static void promote_done(struct closure *cl)
container_of(cl, struct promote_op, cl);
struct bch_fs *c = op->write.op.c;
+ bch2_time_stats_update(&c->data_promote_time, op->start_time);
+
bch2_bio_free_pages_pool(c, &op->write.op.wbio.bio);
promote_free(c, op);
}
@@ -961,6 +1027,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
if (!op)
goto err;
+ op->start_time = local_clock();
op->pos = pos;
/*
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index e4381bb5da19..65345d80e4f4 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -147,6 +147,7 @@ read_attribute(io_latency_read);
read_attribute(io_latency_write);
read_attribute(io_latency_stats_read);
read_attribute(io_latency_stats_write);
+read_attribute(congested);
read_attribute(bucket_quantiles_last_read);
read_attribute(bucket_quantiles_last_write);
@@ -874,6 +875,10 @@ SHOW(bch2_dev)
if (attr == &sysfs_io_latency_stats_write)
return bch2_time_stats_print(&ca->io_latency[WRITE], buf, PAGE_SIZE);
+ sysfs_printf(congested, "%u%%",
+ clamp(atomic_read(&ca->congested), 0, CONGESTED_MAX)
+ * 100 / CONGESTED_MAX);
+
if (attr == &sysfs_bucket_quantiles_last_read)
return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
if (attr == &sysfs_bucket_quantiles_last_write)
@@ -970,6 +975,7 @@ struct attribute *bch2_dev_files[] = {
&sysfs_io_latency_write,
&sysfs_io_latency_stats_read,
&sysfs_io_latency_stats_write,
+ &sysfs_congested,
/* alloc info - other stats: */
&sysfs_bucket_quantiles_last_read,
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index 33e2d1fa795d..2e89a9755930 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -233,8 +233,8 @@ void bch2_quantiles_update(struct quantiles *q, u64 v)
/* time stats: */
-static void __bch2_time_stats_update(struct time_stats *stats,
- u64 start, u64 end)
+static void bch2_time_stats_update_one(struct time_stats *stats,
+ u64 start, u64 end)
{
u64 duration, freq;
@@ -260,14 +260,13 @@ static void __bch2_time_stats_update(struct time_stats *stats,
bch2_quantiles_update(&stats->quantiles, duration);
}
-void bch2_time_stats_update(struct time_stats *stats, u64 start)
+void __bch2_time_stats_update(struct time_stats *stats, u64 start, u64 end)
{
- u64 end = local_clock();
unsigned long flags;
if (!stats->buffer) {
spin_lock_irqsave(&stats->lock, flags);
- __bch2_time_stats_update(stats, start, end);
+ bch2_time_stats_update_one(stats, start, end);
if (stats->average_frequency < 32 &&
stats->count > 1024)
@@ -285,7 +284,7 @@ void bch2_time_stats_update(struct time_stats *stats, u64 start)
BUG_ON(b->nr >= ARRAY_SIZE(b->entries));
b->entries[b->nr++] = (struct time_stat_buffer_entry) {
.start = start,
- .end = end
+ .end = end
};
if (b->nr == ARRAY_SIZE(b->entries)) {
@@ -293,7 +292,7 @@ void bch2_time_stats_update(struct time_stats *stats, u64 start)
for (i = b->entries;
i < b->entries + ARRAY_SIZE(b->entries);
i++)
- __bch2_time_stats_update(stats, i->start, i->end);
+ bch2_time_stats_update_one(stats, i->start, i->end);
spin_unlock_irqrestore(&stats->lock, flags);
b->nr = 0;
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 97e4e6d00a37..7c7264f4f595 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -372,6 +372,9 @@ ssize_t bch2_scnprint_flag_list(char *, size_t, const char * const[], u64);
u64 bch2_read_flag_list(char *, const char * const[]);
#define NR_QUANTILES 15
+#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
+#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
+#define QUANTILE_LAST eytzinger0_last(NR_QUANTILES)
struct quantiles {
struct quantile_entry {
@@ -401,7 +404,13 @@ struct time_stats {
struct time_stat_buffer __percpu *buffer;
};
-void bch2_time_stats_update(struct time_stats *stats, u64 time);
+void __bch2_time_stats_update(struct time_stats *stats, u64, u64);
+
+static inline void bch2_time_stats_update(struct time_stats *stats, u64 start)
+{
+ __bch2_time_stats_update(stats, start, local_clock());
+}
+
size_t bch2_time_stats_print(struct time_stats *, char *, size_t);
void bch2_time_stats_exit(struct time_stats *);