diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2015-04-20 19:48:02 -0700 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-10-07 12:33:56 -0800 |
commit | 31f03656f111a425d1562b7e91870787bff083f6 (patch) | |
tree | 38abccacdea22a6294309371838a4e07bd99ee6d | |
parent | e5cd95164188607e80cb45d3363ea6c38c457de1 (diff) |
bcache: Lockless journal res get/put
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | drivers/md/bcache/journal.c | 234 | ||||
-rw-r--r-- | drivers/md/bcache/journal_types.h | 41 |
2 files changed, 204 insertions, 71 deletions
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 2ec4ae3ae602..2a1f6ee8dc88 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -17,7 +17,7 @@ #include <trace/events/bcache.h> -static void bch_journal_next_entry(struct journal *); +static void __bch_journal_next_entry(struct journal *); /* Sequence number of oldest dirty journal entry */ @@ -768,12 +768,53 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) } } +static union journal_res_state journal_res_state(unsigned count, + unsigned entry_offset) +{ + return (union journal_res_state) { + .count = count, + .cur_entry_offset = entry_offset, + }; +} + +static bool journal_entry_is_open(struct journal *j) +{ + return j->reservations.cur_entry_offset < S32_MAX; +} + +/* + * Closes the current journal entry so that new reservations cannot be take on + * it - returns true if the count of outstanding reservations is 0. + */ +static bool journal_entry_close(struct journal *j) +{ + union journal_res_state old, new; + u64 v = atomic64_read(&j->reservations.counter); + + do { + old.v = new.v = v; + if (old.cur_entry_offset == S32_MAX) + return old.count == 0; + + new.cur_entry_offset = S32_MAX; + } while ((v = cmpxchg(&j->reservations.v, old.v, new.v)) != old.v); + + j->cur->data->u64s = old.cur_entry_offset; + + return old.count == 0; +} + /* Number of u64s we can write to the current journal bucket */ static void journal_calc_entry_size(struct journal *j) { - ssize_t u64s = (min_t(size_t, - j->sectors_free, - JSET_SECTORS) << 9) / sizeof(u64); + ssize_t u64s; + + BUG_ON(journal_entry_is_open(j) || + test_bit(JOURNAL_DIRTY, &j->flags)); + + u64s = (min_t(size_t, + j->sectors_free, + JSET_SECTORS) << 9) / sizeof(u64); /* Subtract the journal header */ u64s -= sizeof(struct jset) / sizeof(u64); @@ -784,11 +825,16 @@ static void journal_calc_entry_size(struct journal *j) */ u64s -= BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_MAX_U64s); u64s -= JSET_KEYS_U64s + j->nr_prio_buckets; + u64s = max_t(ssize_t, 0L, u64s); - j->cur_entry_u64s = max_t(ssize_t, 0L, u64s); + if (u64s > j->cur->data->u64s) { + j->cur_entry_u64s = max_t(ssize_t, 0L, u64s); - /* Handle any already added entries */ - j->cur_entry_offset = j->cur->data->u64s; + /* Handle any already added entries */ + atomic64_set(&j->reservations.counter, + journal_res_state(0, j->cur->data->u64s).v); + wake_up(&j->wait); + } } void bch_journal_start(struct cache_set *c) @@ -813,7 +859,7 @@ void bch_journal_start(struct cache_set *c) j->seq++; } - bch_journal_next_entry(j); + __bch_journal_next_entry(j); /* * Adding entries to the next journal entry before allocating space on @@ -1067,7 +1113,7 @@ static void journal_reclaim_work(struct work_struct *work) ja->last_idx = (ja->last_idx + 1) % nr; spin_unlock_irq(&j->lock); - wake_up(&c->journal.wait); + wake_up(&j->wait); } /* @@ -1229,7 +1275,7 @@ static void journal_next_bucket(struct cache_set *c) queue_work(system_long_wq, &j->reclaim_work); } -static void bch_journal_next_entry(struct journal *j) +static void __bch_journal_next_entry(struct journal *j) { struct journal_entry_pin_list pin_list, *p; @@ -1252,6 +1298,11 @@ static void bch_journal_next_entry(struct journal *j) j->cur->data->seq = ++j->seq; j->cur->data->u64s = 0; +} + +static void bch_journal_next_entry(struct journal *j) +{ + __bch_journal_next_entry(j); journal_calc_entry_size(j); } @@ -1284,7 +1335,7 @@ static void journal_write_done(struct closure *cl) __closure_wake_up(&w->wait); - atomic_set(&j->in_flight, 0); + clear_bit(JOURNAL_IO_IN_FLIGHT, &j->flags); wake_up(&j->wait); if (test_bit(JOURNAL_NEED_WRITE, &j->flags)) @@ -1309,15 +1360,14 @@ static void journal_write_locked(struct closure *cl) struct bio_list list; bio_list_init(&list); - BUG_ON(j->res_count); + BUG_ON(j->reservations.count); BUG_ON(journal_full(j)); + BUG_ON(!test_bit(JOURNAL_DIRTY, &j->flags)); clear_bit(JOURNAL_NEED_WRITE, &j->flags); clear_bit(JOURNAL_DIRTY, &j->flags); cancel_delayed_work(&j->write_work); - w->data->u64s = j->cur_entry_offset; - spin_lock(&c->btree_root_lock); for (i = 0; i < BTREE_ID_NR; i++) { @@ -1333,7 +1383,7 @@ static void journal_write_locked(struct closure *cl) bch_journal_add_prios(j, w->data); /* So last_seq is up to date */ - journal_reclaim_fast(&c->journal); + journal_reclaim_fast(j); w->data->read_clock = c->prio_clock[READ].hand; w->data->write_clock = c->prio_clock[WRITE].hand; @@ -1421,20 +1471,23 @@ static void journal_write_locked(struct closure *cl) static bool __journal_write(struct journal *j) __releases(j->lock) { - BUG_ON(!j->res_count && - !test_bit(JOURNAL_DIRTY, &j->flags)); + struct cache_set *c = container_of(j, struct cache_set, journal); - if (!j->res_count && - !atomic_xchg(&j->in_flight, 1)) { - struct cache_set *c = - container_of(j, struct cache_set, journal); + EBUG_ON(!j->reservations.count && + !test_bit(JOURNAL_DIRTY, &j->flags)); - closure_call(&j->io, journal_write_locked, NULL, &c->cl); - return true; - } else { - spin_unlock_irq(&j->lock); - return false; - } + if (test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags) || + !journal_entry_close(j)) + goto nowrite; + + set_bit(JOURNAL_IO_IN_FLIGHT, &j->flags); + + __set_current_state(TASK_RUNNING); + closure_call(&j->io, journal_write_locked, NULL, &c->cl); + return true; +nowrite: + spin_unlock_irq(&j->lock); + return false; } static bool journal_try_write(struct journal *j) @@ -1486,9 +1539,13 @@ void bch_journal_res_put(struct cache_set *c, struct closure *parent) { struct journal *j = &c->journal; + union journal_res_state s; + bool do_write = false; BUG_ON(!res->ref); + res->ref = false; + while (res->u64s) { unsigned actual = jset_u64s(0); @@ -1498,18 +1555,32 @@ void bch_journal_res_put(struct cache_set *c, res->u64s -= actual; } - spin_lock_irq(&j->lock); - --j->res_count; - res->ref = 0; - res->u64s = 0; + bch_journal_set_dirty(c); if (parent && test_bit(JOURNAL_DIRTY, &j->flags)) { BUG_ON(!closure_wait(&j->cur->wait, parent)); set_bit(JOURNAL_NEED_WRITE, &j->flags); } - bch_journal_set_dirty(c); - journal_unlock(j); + if (test_bit(JOURNAL_NEED_WRITE, &j->flags) && + !test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags)) { + journal_entry_close(j); + do_write = true; + } + + s.v = atomic64_sub_return(journal_res_state(1, 0).v, + &j->reservations.counter); + BUG_ON((int) s.count < 0); + + if (!s.count) { + if (do_write) { + spin_lock_irq(&j->lock); + journal_unlock(j); + } + + if (!list_empty_careful(&j->wait.task_list)) + wake_up(&j->wait); + } } static inline bool journal_bucket_has_room(struct journal *j) @@ -1517,34 +1588,56 @@ static inline bool journal_bucket_has_room(struct journal *j) return (j->sectors_free && fifo_free(&j->pin) > 1); } +static inline bool journal_res_get_fast(struct journal *j, + struct journal_res *res, + unsigned u64s_min, + unsigned u64s_max) +{ + union journal_res_state old, new; + u64 v = atomic64_read(&j->reservations.counter); + + do { + old.v = new.v = v; + + /* + * Check if there is still room in the current journal + * entry: + */ + if (old.cur_entry_offset + u64s_min > j->cur_entry_u64s) + return false; + + res->offset = old.cur_entry_offset; + res->u64s = min(u64s_max, j->cur_entry_u64s - + old.cur_entry_offset); + + new.cur_entry_offset += res->u64s; + new.count++; + } while ((v = cmpxchg(&j->reservations.v, + old.v, new.v)) != old.v); + + res->ref = true; + return true; +} + static bool __journal_res_get(struct cache_set *c, struct journal_res *res, unsigned u64s_min, unsigned u64s_max, u64 *start_time) { struct journal *j = &c->journal; - BUG_ON(res->ref); - BUG_ON(u64s_max < u64s_min); - - spin_lock_irq(&j->lock); - BUG_ON(!j->cur); - while (1) { - /* Check if there is still room in the current journal entry */ - if (j->cur_entry_offset + u64s_min <= j->cur_entry_u64s) { - res->ref = true; - res->offset = j->cur_entry_offset; - res->u64s = min(u64s_max, j->cur_entry_u64s - - j->cur_entry_offset); - - j->cur_entry_offset += res->u64s; - j->res_count++; - spin_unlock_irq(&j->lock); + if (journal_res_get_fast(j, res, u64s_min, u64s_max)) + return true; - if (*start_time) - bch_time_stats_update(&j->full_time, - *start_time); + spin_lock_irq(&j->lock); + /* + * Recheck after taking the lock, so we don't race with another + * thread that just did journal_calc_entry_size() and call + * journal_entry_close() unnecessarily + */ + if (journal_res_get_fast(j, res, u64s_min, u64s_max)) { + spin_unlock_irq(&j->lock); return true; } @@ -1552,6 +1645,11 @@ static bool __journal_res_get(struct cache_set *c, struct journal_res *res, if (*start_time == 0) *start_time = local_clock(); + if (!journal_entry_close(j)) { + spin_unlock_irq(&j->lock); + return false; + } + if (test_bit(JOURNAL_DIRTY, &j->flags)) { /* * If the current journal entry isn't empty, try to @@ -1559,13 +1657,10 @@ static bool __journal_res_get(struct cache_set *c, struct journal_res *res, * flight, we'll have to wait: */ - __set_current_state(TASK_RUNNING); if (!journal_try_write(j)) { trace_bcache_journal_entry_full(c); return false; } - - spin_lock_irq(&j->lock); } else { /* Try to get a new journal bucket */ journal_next_bucket(c); @@ -1576,6 +1671,8 @@ static bool __journal_res_get(struct cache_set *c, struct journal_res *res, trace_bcache_journal_full(c); return false; } + + spin_unlock_irq(&j->lock); } } } @@ -1593,10 +1690,20 @@ static bool __journal_res_get(struct cache_set *c, struct journal_res *res, void bch_journal_res_get(struct cache_set *c, struct journal_res *res, unsigned u64s_min, unsigned u64s_max) { + struct journal *j = &c->journal; u64 start_time = 0; - wait_event(c->journal.wait, + BUG_ON(res->ref); + BUG_ON(u64s_max < u64s_min); + BUG_ON(!j->cur); + + wait_event(j->wait, __journal_res_get(c, res, u64s_min, u64s_max, &start_time)); + + BUG_ON(!res->ref); + + if (start_time) + bch_time_stats_update(&j->full_time, start_time); } void bch_journal_set_dirty(struct cache_set *c) @@ -1638,7 +1745,7 @@ void bch_journal_push_seq(struct cache_set *c, u64 seq, struct closure *parent) BUG_ON(!closure_wait(&j->cur->wait, parent)); set_bit(JOURNAL_NEED_WRITE, &j->flags); } else if (seq + 1 == j->seq && - atomic_read(&j->in_flight)) { + test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags)) { struct journal_write *w = (j->cur == j->w) ? &j->w[1] : &j->w[0]; @@ -1672,6 +1779,9 @@ int bch_journal_alloc(struct cache_set *c) bkey_extent_init(&j->key); + atomic64_set(&j->reservations.counter, + journal_res_state(0, S32_MAX).v); + j->w[0].c = c; j->w[1].c = c; @@ -1699,6 +1809,8 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) "last_seq:\t\t%llu\n" "last_seq_ondisk:\t%llu\n" "reservation count:\t%u\n" + "reservation offset:\t%u\n" + "current entry u64s:\t%u\n" "io in flight:\t\t%i\n" "need write:\t\t%i\n" "dirty:\t\t\t%i\n" @@ -1707,10 +1819,12 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) j->seq, last_seq(j), j->last_seq_ondisk, - j->res_count, - atomic_read(&j->in_flight), + j->reservations.count, + j->reservations.cur_entry_offset, + j->cur_entry_u64s, + test_bit(JOURNAL_IO_IN_FLIGHT, &j->flags), test_bit(JOURNAL_NEED_WRITE, &j->flags), - test_bit(JOURNAL_DIRTY, &j->flags), + test_bit(JOURNAL_DIRTY, &j->flags), test_bit(JOURNAL_REPLAY_DONE, &j->flags)); group_for_each_cache_rcu(ca, &c->cache_tiers[0], iter) { diff --git a/drivers/md/bcache/journal_types.h b/drivers/md/bcache/journal_types.h index 6519cc40dde0..65935b0b226b 100644 --- a/drivers/md/bcache/journal_types.h +++ b/drivers/md/bcache/journal_types.h @@ -1,6 +1,7 @@ #ifndef _BCACHE_JOURNAL_TYPES_H #define _BCACHE_JOURNAL_TYPES_H +#include <linux/cache.h> #include <linux/workqueue.h> #include "fifo.h" @@ -48,20 +49,44 @@ struct journal_seq_blacklist { struct list_head nodes; }; +union journal_res_state { + struct { + atomic64_t counter; + }; + + struct { + u64 v; + }; + + struct { + unsigned count; + unsigned cur_entry_offset; + }; +}; + /* Embedded in struct cache_set */ struct journal { + /* Fastpath stuff up front: */ + unsigned long flags; #define JOURNAL_NEED_WRITE 0 #define JOURNAL_DIRTY 1 #define JOURNAL_REPLAY_DONE 2 - atomic_t in_flight; +#define JOURNAL_IO_IN_FLIGHT 3 - spinlock_t lock; - - unsigned res_count; - unsigned cur_entry_offset; + union journal_res_state reservations; unsigned cur_entry_u64s; + struct journal_write *cur; + + /* + * Two journal entries -- one is currently open for new entries, the + * other is possibly being written out. + */ + struct journal_write w[2]; + + spinlock_t lock; + /* minimum sectors free in the bucket(s) we're currently writing to */ unsigned sectors_free; @@ -105,12 +130,6 @@ struct journal { struct work_struct reclaim_work; - /* - * Two journal entries -- one is currently open for new entries, the - * other is possibly being written out. - */ - struct journal_write w[2], *cur; - u64 prio_buckets[MAX_CACHES_PER_SET]; unsigned nr_prio_buckets; |