summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2019-03-03 15:15:55 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2019-04-03 12:44:08 -0400
commit89c9412c5d108b329d100716896a91983788a1c2 (patch)
treef41997072c9d3494903b151ca72b53bcc8ca878a
parente2ebb8d05816452ed4c80f49c9d5cde1d77b2895 (diff)
bcachefs: ja->discard_idx, ja->dirty_idx
-rw-r--r--fs/bcachefs/journal.c42
-rw-r--r--fs/bcachefs/journal_io.c12
-rw-r--r--fs/bcachefs/journal_reclaim.c63
-rw-r--r--fs/bcachefs/journal_types.h24
4 files changed, 83 insertions, 58 deletions
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index f108a28233c7..16ec417910da 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -760,6 +760,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
while (ja->nr < nr) {
struct open_bucket *ob = NULL;
+ unsigned pos;
long bucket;
if (new_fs) {
@@ -786,20 +787,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
preempt_disable();
}
- __array_insert_item(ja->buckets, ja->nr, ja->last_idx);
- __array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx);
- __array_insert_item(journal_buckets->buckets, ja->nr, ja->last_idx);
+ pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
+ __array_insert_item(ja->buckets, ja->nr, pos);
+ __array_insert_item(ja->bucket_seq, ja->nr, pos);
+ __array_insert_item(journal_buckets->buckets, ja->nr, pos);
+ ja->nr++;
- ja->buckets[ja->last_idx] = bucket;
- ja->bucket_seq[ja->last_idx] = 0;
- journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket);
+ ja->buckets[pos] = bucket;
+ ja->bucket_seq[pos] = 0;
+ journal_buckets->buckets[pos] = cpu_to_le64(bucket);
- if (ja->last_idx < ja->nr) {
- if (ja->cur_idx >= ja->last_idx)
- ja->cur_idx++;
- ja->last_idx++;
- }
- ja->nr++;
+ if (pos <= ja->discard_idx)
+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
+ if (pos <= ja->dirty_idx_ondisk)
+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+ if (pos <= ja->dirty_idx)
+ ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+ if (pos <= ja->cur_idx)
+ ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size,
@@ -1039,6 +1044,7 @@ int bch2_fs_journal_init(struct journal *j)
mutex_init(&j->blacklist_lock);
INIT_LIST_HEAD(&j->seq_blacklist);
mutex_init(&j->reclaim_lock);
+ mutex_init(&j->discard_lock);
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
@@ -1135,13 +1141,17 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
"dev %u:\n"
"\tnr\t\t%u\n"
"\tavailable\t%u:%u\n"
- "\tcur_idx\t\t%u (seq %llu)\n"
- "\tlast_idx\t%u (seq %llu)\n",
+ "\tdiscard_idx\t\t%u\n"
+ "\tdirty_idx_ondisk\t%u (seq %llu)\n"
+ "\tdirty_idx\t\t%u (seq %llu)\n"
+ "\tcur_idx\t\t%u (seq %llu)\n",
iter, ja->nr,
bch2_journal_dev_buckets_available(j, ja),
ja->sectors_free,
- ja->cur_idx, ja->bucket_seq[ja->cur_idx],
- ja->last_idx, ja->bucket_seq[ja->last_idx]);
+ ja->discard_idx,
+ ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk],
+ ja->dirty_idx, ja->bucket_seq[ja->dirty_idx],
+ ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
}
spin_unlock(&j->lock);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 16cb6be87cbf..8a540c90a2ca 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -625,11 +625,12 @@ static void bch2_journal_read_device(struct closure *cl)
ja->sectors_free = 0;
/*
- * Set last_idx to indicate the entire journal is full and needs to be
+ * Set dirty_idx to indicate the entire journal is full and needs to be
* reclaimed - journal reclaim will immediately reclaim whatever isn't
* pinned when it first runs:
*/
- ja->last_idx = (ja->cur_idx + 1) % ja->nr;
+ ja->discard_idx = ja->dirty_idx_ondisk =
+ ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
out:
kvpfree(buf.data, buf.size);
percpu_ref_put(&ca->io_ref);
@@ -1069,12 +1070,13 @@ static void journal_write_done(struct closure *cl)
goto err;
spin_lock(&j->lock);
- j->seq_ondisk = seq;
- j->last_seq_ondisk = last_seq;
-
if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = devs;
+ j->seq_ondisk = seq;
+ j->last_seq_ondisk = last_seq;
+ bch2_journal_space_available(j);
+
/*
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
* more buckets:
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index b8603a1ff1f2..37275ff57b31 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -13,22 +13,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
unsigned next = (ja->cur_idx + 1) % ja->nr;
- unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
+ unsigned available = (ja->discard_idx + ja->nr - next) % ja->nr;
/*
* Allocator startup needs some journal space before we can do journal
* replay:
*/
- if (available &&
- test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
- available--;
+ if (available && test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
+ --available;
/*
* Don't use the last bucket unless writing the new last_seq
* will make another bucket available:
*/
- if (available &&
- journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
+ if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
--available;
return available;
@@ -54,12 +52,34 @@ void bch2_journal_space_available(struct journal *j)
for_each_member_device_rcu(ca, c, i,
&c->rw_devs[BCH_DATA_JOURNAL]) {
struct journal_device *ja = &ca->journal;
- unsigned buckets_this_device, sectors_this_device;
if (!ja->nr)
continue;
+ while (ja->dirty_idx != ja->cur_idx &&
+ ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
+ ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
+
+ while (ja->dirty_idx_ondisk != ja->dirty_idx &&
+ ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
+ ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
+
nr_online++;
+ }
+
+ if (nr_online < c->opts.metadata_replicas_required) {
+ ret = -EROFS;
+ sectors_next_entry = 0;
+ goto out;
+ }
+
+ for_each_member_device_rcu(ca, c, i,
+ &c->rw_devs[BCH_DATA_JOURNAL]) {
+ struct journal_device *ja = &ca->journal;
+ unsigned buckets_this_device, sectors_this_device;
+
+ if (!ja->nr)
+ continue;
buckets_this_device = bch2_journal_dev_buckets_available(j, ja);
sectors_this_device = ja->sectors_free;
@@ -99,20 +119,17 @@ void bch2_journal_space_available(struct journal *j)
nr_devs++;
}
- rcu_read_unlock();
- if (nr_online < c->opts.metadata_replicas_required) {
- ret = -EROFS;
- sectors_next_entry = 0;
- } else if (!sectors_next_entry ||
- nr_devs < min_t(unsigned, nr_online,
- c->opts.metadata_replicas)) {
+ if (!sectors_next_entry ||
+ nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) {
ret = -ENOSPC;
sectors_next_entry = 0;
} else if (!fifo_free(&j->pin)) {
ret = -ENOSPC;
sectors_next_entry = 0;
}
+out:
+ rcu_read_unlock();
j->cur_entry_sectors = sectors_next_entry;
j->cur_entry_error = ret;
@@ -128,25 +145,23 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
bool ret;
spin_lock(&j->lock);
- ret = ja->nr &&
- ja->last_idx != ja->cur_idx &&
- ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk;
+ ret = ja->discard_idx != ja->dirty_idx_ondisk;
spin_unlock(&j->lock);
return ret;
}
/*
- * Advance ja->last_idx as long as it points to buckets that are no longer
+ * Advance ja->discard_idx as long as it points to buckets that are no longer
* dirty, issuing discards if necessary:
*/
-static void journal_do_discards(struct journal *j)
+static void bch2_journal_do_discards(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
unsigned iter;
- mutex_lock(&j->reclaim_lock);
+ mutex_lock(&j->discard_lock);
for_each_rw_member(ca, c, iter) {
struct journal_device *ja = &ca->journal;
@@ -156,18 +171,18 @@ static void journal_do_discards(struct journal *j)
blk_queue_discard(bdev_get_queue(ca->disk_sb.bdev)))
blkdev_issue_discard(ca->disk_sb.bdev,
bucket_to_sector(ca,
- ja->buckets[ja->last_idx]),
+ ja->buckets[ja->discard_idx]),
ca->mi.bucket_size, GFP_NOIO, 0);
spin_lock(&j->lock);
- ja->last_idx = (ja->last_idx + 1) % ja->nr;
+ ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
bch2_journal_space_available(j);
spin_unlock(&j->lock);
}
}
- mutex_unlock(&j->reclaim_lock);
+ mutex_unlock(&j->discard_lock);
}
/*
@@ -398,7 +413,7 @@ void bch2_journal_reclaim_work(struct work_struct *work)
unsigned iter, bucket_to_flush, min_nr = 0;
u64 seq_to_flush = 0;
- journal_do_discards(j);
+ bch2_journal_do_discards(j);
mutex_lock(&j->reclaim_lock);
spin_lock(&j->lock);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 8772e53fb64c..9f1db6d28cd9 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -192,9 +192,6 @@ struct journal {
struct journal_entry_pin_list *data;
} pin;
- struct journal_entry_pin *flush_in_progress;
- wait_queue_head_t pin_flush_wait;
-
u64 replay_journal_seq;
struct mutex blacklist_lock;
@@ -205,10 +202,13 @@ struct journal {
spinlock_t err_lock;
struct delayed_work reclaim_work;
+ struct mutex reclaim_lock;
unsigned long last_flushed;
+ struct journal_entry_pin *flush_in_progress;
+ wait_queue_head_t pin_flush_wait;
- /* protects advancing ja->last_idx: */
- struct mutex reclaim_lock;
+ /* protects advancing ja->discard_idx: */
+ struct mutex discard_lock;
unsigned write_delay_ms;
unsigned reclaim_delay_ms;
@@ -239,17 +239,15 @@ struct journal_device {
unsigned sectors_free;
- /* Journal bucket we're currently writing to */
- unsigned cur_idx;
-
- /* Last journal bucket that still contains an open journal entry */
-
/*
- * j->lock and j->reclaim_lock must both be held to modify, j->lock
- * sufficient to read:
+ * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
*/
- unsigned last_idx;
+ unsigned discard_idx; /* Next bucket to discard */
+ unsigned dirty_idx_ondisk;
+ unsigned dirty_idx;
+ unsigned cur_idx; /* Journal bucket we're currently writing to */
unsigned nr;
+
u64 *buckets;
/* Bio for journal reads/writes to this device */