summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2015-10-12 21:41:23 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2017-01-18 21:35:43 -0900
commitffc121ef9c4f1f47e23c416921413b4f5ab58e9d (patch)
tree4474783ddd99e8b349cdffd5bb128c8c738cbf32
parenta110043e8efc692df120c41cc5bcdb60dbb975d7 (diff)
bcache: Persistent reservations
-rw-r--r--drivers/md/bcache/bcache.h4
-rw-r--r--drivers/md/bcache/bkey_methods.h17
-rw-r--r--drivers/md/bcache/btree_gc.c67
-rw-r--r--drivers/md/bcache/btree_gc.h5
-rw-r--r--drivers/md/bcache/btree_types.h12
-rw-r--r--drivers/md/bcache/buckets.h2
-rw-r--r--drivers/md/bcache/debug.c3
-rw-r--r--drivers/md/bcache/extents.c52
-rw-r--r--drivers/md/bcache/fs.c18
-rw-r--r--drivers/md/bcache/journal.c12
-rw-r--r--drivers/md/bcache/sysfs.c6
-rw-r--r--include/uapi/linux/bcache.h14
12 files changed, 161 insertions, 51 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 82aa2172f048..342dd72948bd 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -590,8 +590,8 @@ struct cache_set {
struct cache_group cache_tiers[CACHE_TIERS];
u64 capacity; /* sectors */
- atomic_long_t sectors_reserved;
- atomic_long_t sectors_reserved_cache;
+ atomic64_t sectors_reserved;
+ atomic64_t sectors_reserved_cache;
struct mutex bucket_lock;
diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h
index a21325b87208..a01de6d45be3 100644
--- a/drivers/md/bcache/bkey_methods.h
+++ b/drivers/md/bcache/bkey_methods.h
@@ -8,7 +8,22 @@ enum bkey_type {
BKEY_TYPE_BTREE,
};
-#define btree_node_type(_b) ((_b)->level ? BKEY_TYPE_BTREE : (_b)->btree_id)
+/* Type of a key in btree @id at level @level: */
+static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
+{
+ return level ? BKEY_TYPE_BTREE : id;
+}
+
+static inline bool btree_type_has_ptrs(enum bkey_type type)
+{
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ case BKEY_TYPE_EXTENTS:
+ return true;
+ default:
+ return false;
+ }
+}
struct cache_set;
struct btree;
diff --git a/drivers/md/bcache/btree_gc.c b/drivers/md/bcache/btree_gc.c
index 5c9772986108..bf33efbc50a0 100644
--- a/drivers/md/bcache/btree_gc.c
+++ b/drivers/md/bcache/btree_gc.c
@@ -118,30 +118,62 @@ u8 bch_btree_key_recalc_oldest_gen(struct cache_set *c, struct bkey_s_c k)
return max_stale;
}
-u8 __bch_btree_mark_key(struct cache_set *c, int level, struct bkey_s_c k)
+/*
+ * For runtime mark and sweep:
+ */
+static u8 __bch_btree_mark_key(struct cache_set *c, enum bkey_type type,
+ struct bkey_s_c k)
{
- if (bkey_extent_is_data(k.k)) {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ case BKEY_TYPE_EXTENTS:
+ if (bkey_extent_is_data(k.k)) {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+
+ bch_mark_pointers(c, e,
+ type == BKEY_TYPE_BTREE
+ ? CACHE_BTREE_NODE_SIZE(&c->sb)
+ : e.k->size, false,
+ type == BKEY_TYPE_BTREE,
+ true, GC_POS_MIN);
+ }
- bch_mark_pointers(c, e, level
- ? CACHE_BTREE_NODE_SIZE(&c->sb)
- : e.k->size, false, level != 0,
- true, GC_POS_MIN);
+ return bch_btree_key_recalc_oldest_gen(c, k);
+ default:
+ BUG();
}
-
- return bch_btree_key_recalc_oldest_gen(c, k);
}
static u8 btree_mark_key(struct cache_set *c, struct btree *b,
struct bkey_s_c k)
{
- return __bch_btree_mark_key(c, b->level, k);
+ return __bch_btree_mark_key(c, btree_node_type(b), k);
+}
+
+/*
+ * For initial cache set bringup:
+ */
+u8 __bch_btree_mark_key_initial(struct cache_set *c, enum bkey_type type,
+ struct bkey_s_c k)
+{
+
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ case BKEY_TYPE_EXTENTS:
+ if (k.k->type == BCH_RESERVATION)
+ atomic64_add(k.k->size, &c->sectors_reserved);
+
+ return __bch_btree_mark_key(c, type, k);
+ default:
+ BUG();
+ }
+
}
-/* Only the extent btree has leafs whose keys point to data */
-static inline bool btree_node_has_ptrs(struct btree *b)
+static u8 btree_mark_key_initial(struct cache_set *c, struct btree *b,
+ struct bkey_s_c k)
{
- return b->btree_id == BTREE_ID_EXTENTS || b->level > 0;
+ return __bch_btree_mark_key_initial(c, btree_node_type(b), k);
}
static bool btree_gc_mark_node(struct cache_set *c, struct btree *b)
@@ -218,7 +250,7 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id)
spin_lock(&c->btree_root_lock);
b = c->btree_roots[btree_id];
- __bch_btree_mark_key(c, b->level + 1, bkey_i_to_s_c(&b->key));
+ __bch_btree_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key));
gc_pos_set(c, gc_pos_btree_root(b->btree_id));
spin_unlock(&c->btree_root_lock);
@@ -828,11 +860,12 @@ static void bch_initial_gc_btree(struct cache_set *c, enum btree_id id)
for_each_btree_node_key_unpack(&b->keys, &tup,
&node_iter)
- btree_mark_key(c, b, bkey_tup_to_s_c(&tup));
+ btree_mark_key_initial(c, b,
+ bkey_tup_to_s_c(&tup));
}
- __bch_btree_mark_key(c, iter.level + 1,
- bkey_i_to_s_c(&b->key));
+ __bch_btree_mark_key_initial(c, BKEY_TYPE_BTREE,
+ bkey_i_to_s_c(&b->key));
bch_btree_iter_cond_resched(&iter);
}
diff --git a/drivers/md/bcache/btree_gc.h b/drivers/md/bcache/btree_gc.h
index 40b160e715fd..50db7b4613d8 100644
--- a/drivers/md/bcache/btree_gc.h
+++ b/drivers/md/bcache/btree_gc.h
@@ -3,12 +3,15 @@
#include "btree_types.h"
+enum bkey_type;
+
void bch_gc(struct cache_set *);
void bch_gc_thread_stop(struct cache_set *);
int bch_gc_thread_start(struct cache_set *);
int bch_initial_gc(struct cache_set *, struct list_head *);
u8 bch_btree_key_recalc_oldest_gen(struct cache_set *, struct bkey_s_c);
-u8 __bch_btree_mark_key(struct cache_set *, int, struct bkey_s_c);
+u8 __bch_btree_mark_key_initial(struct cache_set *, enum bkey_type,
+ struct bkey_s_c);
/*
* For concurrent mark and sweep (with other index updates), we define a total
diff --git a/drivers/md/bcache/btree_types.h b/drivers/md/bcache/btree_types.h
index 75e320e4466f..0385f7107a4d 100644
--- a/drivers/md/bcache/btree_types.h
+++ b/drivers/md/bcache/btree_types.h
@@ -7,6 +7,7 @@
#include <linux/semaphore.h>
#include <linux/workqueue.h>
+#include "bkey_methods.h"
#include "bset.h"
#include "journal_types.h"
#include "six.h"
@@ -122,4 +123,15 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i)
return i - (void *) b->data;
}
+/* Type of keys @b contains: */
+static inline enum bkey_type btree_node_type(struct btree *b)
+{
+ return b->level ? BKEY_TYPE_BTREE : b->btree_id;
+}
+
+static inline bool btree_node_has_ptrs(struct btree *b)
+{
+ return btree_type_has_ptrs(btree_node_type(b));
+}
+
#endif /* _BCACHE_BTREE_TYPES_H */
diff --git a/drivers/md/bcache/buckets.h b/drivers/md/bcache/buckets.h
index c470adcd04b8..7ea15ec4f9c7 100644
--- a/drivers/md/bcache/buckets.h
+++ b/drivers/md/bcache/buckets.h
@@ -230,7 +230,7 @@ static inline u64 __cache_set_sectors_used(struct cache_set *c)
}
rcu_read_unlock();
- return used + atomic_long_read(&c->sectors_reserved);
+ return used + atomic64_read(&c->sectors_reserved);
}
static inline u64 cache_set_sectors_used(struct cache_set *c)
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 9f0a42f1b3ee..64667b116331 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -192,7 +192,8 @@ void bch_verify_inode_refs(struct cache_set *c)
for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
POS(BCACHE_ROOT_INO, 0), k) {
- if (k.k->type == KEY_TYPE_DISCARD)
+ if (k.k->type == KEY_TYPE_DISCARD ||
+ k.k->type == BCH_RESERVATION)
continue;
if (k.k->p.inode != cur_inum &&
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index f61388bb734d..005b50163a16 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -775,9 +775,12 @@ struct btree_nr_keys bch_extent_sort_fix_overlapping(struct btree_keys *b,
static int bch_add_sectors(struct cache_set *c, struct btree *b,
struct bkey_s_c k, u64 offset,
- int sectors, bool fail_if_stale)
+ s64 sectors, bool fail_if_stale)
{
- if (sectors && bkey_extent_is_data(k.k)) {
+ if (!sectors)
+ return 0;
+
+ if (bkey_extent_is_data(k.k)) {
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
int ret;
@@ -790,13 +793,15 @@ static int bch_add_sectors(struct cache_set *c, struct btree *b,
if (!bkey_extent_is_cached(e.k))
bcache_dev_sectors_dirty_add(c, e.k->p.inode,
offset, sectors);
+ } else if (k.k->type == BCH_RESERVATION) {
+ atomic64_add_bug(sectors, &c->sectors_reserved);
}
return 0;
}
static void bch_subtract_sectors(struct cache_set *c, struct btree *b,
- struct bkey_s_c k, u64 offset, int sectors)
+ struct bkey_s_c k, u64 offset, s64 sectors)
{
bch_add_sectors(c, b, k, offset, -sectors, false);
}
@@ -1362,15 +1367,17 @@ invalid:
return reason;
}
+ case BCH_RESERVATION:
+ return NULL;
+
default:
return "invalid value type";
}
}
-static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
- struct bkey_s_c k)
+static void bch_extent_debugcheck_extent(struct cache_set *c, struct btree *b,
+ struct bkey_s_c_extent e)
{
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const struct bch_extent_ptr *ptr;
struct cache_member_rcu *mi;
struct cache *ca;
@@ -1462,7 +1469,8 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
if (!bkey_extent_is_cached(e.k) &&
replicas < CACHE_SET_DATA_REPLICAS_HAVE(&c->sb)) {
- bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k);
+ bch_bkey_val_to_text(c, btree_node_type(b), buf,
+ sizeof(buf), e.s_c);
cache_set_bug(c,
"extent key bad (too few replicas, %u < %llu): %s",
replicas, CACHE_SET_DATA_REPLICAS_HAVE(&c->sb), buf);
@@ -1474,7 +1482,8 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
*/
for (i = 0; i < CACHE_TIERS; i++)
if (ptrs_per_tier[i] > CACHE_SET_DATA_REPLICAS_WANT(&c->sb)) {
- bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k);
+ bch_bkey_val_to_text(c, btree_node_type(b), buf,
+ sizeof(buf), e.s_c);
cache_set_bug(c,
"extent key bad (too many tier %u replicas): %s",
i, buf);
@@ -1484,14 +1493,16 @@ static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
return;
bad_device:
- bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k);
+ bch_bkey_val_to_text(c, btree_node_type(b), buf,
+ sizeof(buf), e.s_c);
cache_set_bug(c, "extent pointer to dev %u missing device: %s",
ptr->dev, buf);
cache_member_info_put();
return;
bad_ptr:
- bch_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k);
+ bch_bkey_val_to_text(c, btree_node_type(b), buf,
+ sizeof(buf), e.s_c);
cache_set_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu prio %i "
"gen %i last_gc %i mark 0x%08x",
buf, PTR_BUCKET_NR(ca, ptr),
@@ -1501,6 +1512,20 @@ bad_ptr:
return;
}
+static void bch_extent_debugcheck(struct cache_set *c, struct btree *b,
+ struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED:
+ bch_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k));
+ case BCH_RESERVATION:
+ break;
+ default:
+ BUG();
+ }
+}
+
static void bch_extent_to_text(struct cache_set *c, char *buf,
size_t size, struct bkey_s_c k)
{
@@ -1656,6 +1681,8 @@ bool bch_extent_normalize(struct cache_set *c, struct bkey_s k)
}
return false;
+ case BCH_RESERVATION:
+ return false;
default:
BUG();
}
@@ -1715,6 +1742,10 @@ void bch_extent_pick_ptr_avoiding(struct cache_set *c, struct bkey_s_c k,
rcu_read_unlock();
return;
+ case BCH_RESERVATION:
+ ret->ca = NULL;
+ return;
+
default:
BUG();
}
@@ -1747,6 +1778,7 @@ static enum merge_result bch_extent_merge(struct btree_keys *bk,
case KEY_TYPE_DELETED:
case KEY_TYPE_DISCARD:
case KEY_TYPE_ERROR:
+ case BCH_RESERVATION:
/* These types are mergeable, and no val to check */
break;
diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c
index 4a913660696a..770690f60b21 100644
--- a/drivers/md/bcache/fs.c
+++ b/drivers/md/bcache/fs.c
@@ -54,19 +54,21 @@ static int bch_read_single_page(struct page *, struct address_space *);
static int reserve_sectors(struct cache_set *c, unsigned sectors)
{
- if (likely(atomic_long_sub_return(sectors,
- &c->sectors_reserved_cache) >= 0))
+ u64 sectors_to_get = SECTORS_CACHE + sectors;
+
+ if (likely(atomic64_sub_return(sectors,
+ &c->sectors_reserved_cache) >= 0))
return 0;
- atomic_long_add(SECTORS_CACHE, &c->sectors_reserved);
+ atomic64_add(sectors_to_get, &c->sectors_reserved);
if (likely(!cache_set_full(c))) {
- atomic_long_add(SECTORS_CACHE, &c->sectors_reserved_cache);
+ atomic64_add(sectors_to_get, &c->sectors_reserved_cache);
return 0;
}
- atomic_long_sub_bug(SECTORS_CACHE, &c->sectors_reserved);
- atomic_long_add(sectors, &c->sectors_reserved_cache);
+ atomic64_sub_bug(sectors_to_get, &c->sectors_reserved);
+ atomic64_add(sectors, &c->sectors_reserved_cache);
return -ENOSPC;
}
@@ -92,7 +94,7 @@ static void bch_clear_page_bits(struct cache_set *c, struct bch_inode_info *ei,
EBUG_ON(!PageLocked(page));
if (PageAllocated(page)) {
- atomic_long_sub_bug(PAGE_SECTORS, &c->sectors_reserved);
+ atomic64_sub_bug(PAGE_SECTORS, &c->sectors_reserved);
ClearPageAllocated(page);
}
@@ -1744,7 +1746,7 @@ static void bch_writepage_io_done(struct closure *cl)
struct bio_vec *bvec;
unsigned i;
- atomic_long_sub_bug(io->sectors_reserved, &c->sectors_reserved);
+ atomic64_sub_bug(io->sectors_reserved, &c->sectors_reserved);
for (i = 0; i < ARRAY_SIZE(io->i_size_update_count); i++)
i_size_update_put(c, ei, i, io->i_size_update_count[i]);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index a1225705affd..65142eb49069 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -756,12 +756,12 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
list_for_each_entry(r, list, list)
for_each_jset_key(k, n, j, &r->j) {
- if ((j->level || j->btree_id == BTREE_ID_EXTENTS) &&
- !bkey_invalid(c, j->level
- ? BKEY_TYPE_BTREE : j->btree_id,
- bkey_i_to_s_c(k)))
- __bch_btree_mark_key(c, j->level,
- bkey_i_to_s_c(k));
+ enum bkey_type type = bkey_type(j->level, j->btree_id);
+
+ if (btree_type_has_ptrs(type) &&
+ !bkey_invalid(c, type, bkey_i_to_s_c(k)))
+ __bch_btree_mark_key_initial(c, type,
+ bkey_i_to_s_c(k));
}
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 809d5a01ba8a..5ce8590e39f5 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -589,8 +589,10 @@ static ssize_t show_cache_set_alloc_debug(struct cache_set *c, char *buf)
return scnprintf(buf, PAGE_SIZE,
"capacity:\t\t%llu\n"
"meta sectors:\t\t%llu\n"
- "dirty sectors:\t\t%llu\n",
- c->capacity, meta, dirty);
+ "dirty sectors:\t\t%llu\n"
+ "reserved sectors:\t%lu\n",
+ c->capacity, meta, dirty,
+ atomic64_read(&c->sectors_reserved));
}
static ssize_t bch_compression_stats(struct cache_set *c, char *buf)
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 181686996c05..e6fec099efa3 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -364,17 +364,22 @@ struct bch_extent_crc64 {
#define CRC64_EXTENT_SIZE_MAX (1U << 17)
+/*
+ * @reservation - pointer hasn't been written to, just reserved
+ */
struct bch_extent_ptr {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u64 type:2,
erasure_coded:1,
- offset:45, /* 16 petabytes */
+ reservation:1,
+ offset:44, /* 8 petabytes */
dev:8,
gen:8;
#elif defined (__BIG_ENDIAN_BITFIELD)
__u64 gen:8,
dev:8,
- offset:45,
+ offset:44,
+ reservation:1,
erasure_coded:1,
type:2;
#endif
@@ -396,6 +401,11 @@ enum {
* have the same value type:
*/
BCH_EXTENT_CACHED = 129,
+
+ /*
+ * Persistent reservation:
+ */
+ BCH_RESERVATION = 130,
};
struct bch_extent {