diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-01-19 13:11:12 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-01-30 20:41:29 -0500 |
commit | 3273d2aab6aa37689133d07835b34d5dd9bc1778 (patch) | |
tree | baa46026be9e537d227b241a81f36d5b8380ca7b | |
parent | e7e7616e13f7eb452a8e03cb50e9cb879f271560 (diff) |
bcachefs: fixes for buffered io & replication
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/buckets.c | 13 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 23 | ||||
-rw-r--r-- | fs/bcachefs/fs-io.c | 153 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 6 |
5 files changed, 96 insertions, 106 deletions
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 73c27290c167..e92a6a3a1649 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -514,8 +514,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, struct btree *b; struct disk_reservation disk_res = { 0, 0 }; unsigned sectors = nr_nodes * c->opts.btree_node_size; - int ret, disk_res_flags = BCH_DISK_RESERVATION_GC_LOCK_HELD| - BCH_DISK_RESERVATION_METADATA; + int ret, disk_res_flags = BCH_DISK_RESERVATION_GC_LOCK_HELD; if (flags & BTREE_INSERT_NOFAIL) disk_res_flags |= BCH_DISK_RESERVATION_NOFAIL; @@ -528,7 +527,9 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, if (ret) return ERR_PTR(ret); - if (bch2_disk_reservation_get(c, &disk_res, sectors, disk_res_flags)) + if (bch2_disk_reservation_get(c, &disk_res, sectors, + c->opts.metadata_replicas, + disk_res_flags)) return ERR_PTR(-ENOSPC); BUG_ON(nr_nodes > BTREE_RESERVE_MAX); diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 43133cbb50e9..f60cd53baf5d 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -788,19 +788,6 @@ recalculate: return ret; } -int bch2_disk_reservation_get(struct bch_fs *c, - struct disk_reservation *res, - unsigned sectors, int flags) -{ - res->sectors = 0; - res->gen = c->capacity_gen; - res->nr_replicas = (flags & BCH_DISK_RESERVATION_METADATA) - ? c->opts.metadata_replicas - : c->opts.data_replicas; - - return bch2_disk_reservation_add(c, res, sectors, flags); -} - /* Startup/shutdown: */ static void buckets_free_rcu(struct rcu_head *rcu) diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 86e72829c430..a42c064bbc40 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -230,16 +230,27 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c, } #define BCH_DISK_RESERVATION_NOFAIL (1 << 0) -#define BCH_DISK_RESERVATION_METADATA (1 << 1) -#define BCH_DISK_RESERVATION_GC_LOCK_HELD (1 << 2) -#define BCH_DISK_RESERVATION_BTREE_LOCKS_HELD (1 << 3) +#define BCH_DISK_RESERVATION_GC_LOCK_HELD (1 << 1) +#define BCH_DISK_RESERVATION_BTREE_LOCKS_HELD (1 << 2) int bch2_disk_reservation_add(struct bch_fs *, struct disk_reservation *, unsigned, int); -int bch2_disk_reservation_get(struct bch_fs *, - struct disk_reservation *, - unsigned, int); + +static inline int bch2_disk_reservation_get(struct bch_fs *c, + struct disk_reservation *res, + unsigned sectors, + unsigned nr_replicas, + int flags) +{ + *res = (struct disk_reservation) { + .sectors = 0, + .gen = c->capacity_gen, + .nr_replicas = nr_replicas, + }; + + return bch2_disk_reservation_add(c, res, sectors, flags); +} int bch2_dev_buckets_resize(struct bch_fs *, struct bch_dev *, u64); void bch2_dev_buckets_free(struct bch_dev *); diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index e1ad5aaa20eb..eb0939ad2817 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -446,23 +446,20 @@ static inline struct bch_io_opts io_opts(struct bch_fs *c, struct bch_inode_info */ struct bch_page_state { union { struct { - /* - * page is _fully_ written on disk, and not compressed - which means to - * write this page we don't have to reserve space (the new write will - * never take up more space on disk than what it's overwriting) - */ - unsigned allocated:1; + /* existing data: */ + unsigned sectors:PAGE_SECTOR_SHIFT + 1; + unsigned nr_replicas:4; + unsigned compressed:1; /* Owns PAGE_SECTORS sized reservation: */ unsigned reserved:1; - unsigned nr_replicas:4; + unsigned reservation_replicas:4; /* * Number of sectors on disk - for i_blocks * Uncompressed size, not compressed size: */ - u8 sectors; - u8 dirty_sectors; + unsigned dirty_sectors:PAGE_SECTOR_SHIFT + 1; }; /* for cmpxchg: */ unsigned long v; @@ -497,9 +494,17 @@ static inline struct bch_page_state *page_state(struct page *page) return s; } +static void __bch2_put_page_reservation(struct bch_fs *c, struct bch_page_state s) +{ + struct disk_reservation res = { + .sectors = s.reserved ? PAGE_SECTORS * s.reservation_replicas : 0 + }; + + bch2_disk_reservation_put(c, &res); +} + static void bch2_put_page_reservation(struct bch_fs *c, struct page *page) { - struct disk_reservation res = { .sectors = PAGE_SECTORS }; struct bch_page_state s; s = page_state_cmpxchg(page_state(page), s, { @@ -508,35 +513,40 @@ static void bch2_put_page_reservation(struct bch_fs *c, struct page *page) s.reserved = 0; }); - bch2_disk_reservation_put(c, &res); + __bch2_put_page_reservation(c, s); } static int bch2_get_page_reservation(struct bch_fs *c, struct page *page, - bool check_enospc) + bool check_enospc) { - struct bch_page_state *s = page_state(page), new; + struct bch_page_state *s = page_state(page), new, old; + unsigned replicas = READ_ONCE(c->opts.data_replicas); struct disk_reservation res; int ret = 0; - BUG_ON(s->allocated && s->sectors != PAGE_SECTORS); + if (s->reserved) { + if (s->reservation_replicas >= replicas) + return 0; - if (s->allocated || s->reserved) - return 0; + } else { + if (s->sectors == PAGE_SECTORS && + s->nr_replicas >= replicas && + !s->compressed) + return 0; + } - ret = bch2_disk_reservation_get(c, &res, PAGE_SECTORS, !check_enospc - ? BCH_DISK_RESERVATION_NOFAIL : 0); + ret = bch2_disk_reservation_get(c, &res, PAGE_SECTORS, replicas, + !check_enospc + ? BCH_DISK_RESERVATION_NOFAIL : 0); if (ret) return ret; - page_state_cmpxchg(s, new, { - if (new.reserved) { - bch2_disk_reservation_put(c, &res); - return 0; - } - new.reserved = 1; - new.nr_replicas = res.nr_replicas; + old = page_state_cmpxchg(s, new, { + new.reserved = 1; + new.reservation_replicas = res.nr_replicas; }); + __bch2_put_page_reservation(c, old); return 0; } @@ -729,16 +739,22 @@ struct readpages_iter { unsigned nr_pages; }; -static int readpage_add_page(struct readpages_iter *iter, struct page *page) +static inline void page_state_init_for_read(struct page *page) { struct bch_page_state *s = page_state(page); - int ret; BUG_ON(s->reserved); - s->allocated = 1; - s->sectors = 0; + s->sectors = 0; + s->compressed = 0; +} + +static int readpage_add_page(struct readpages_iter *iter, struct page *page) +{ + int ret; prefetchw(&page->flags); + page_state_init_for_read(page); + ret = add_to_page_cache_lru(page, iter->mapping, page->index, GFP_NOFS); put_page(page); @@ -766,19 +782,12 @@ static inline struct page *readpage_iter_next(struct readpages_iter *iter) for (; \ ((_page) = __readpage_next_page(&(_iter)));) \ -static void bch2_mark_pages_unalloc(struct bio *bio) -{ - struct bvec_iter iter; - struct bio_vec bv; - - bio_for_each_segment(bv, bio, iter) - page_state(bv.bv_page)->allocated = 0; -} - static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) { struct bvec_iter iter; struct bio_vec bv; + bool compressed = bch2_extent_is_compressed(k); + unsigned nr_ptrs = bch2_extent_nr_dirty_ptrs(k); bio_for_each_segment(bv, bio, iter) { struct bch_page_state *s = page_state(bv.bv_page); @@ -788,14 +797,14 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) unsigned page_sectors = min(bv.bv_len >> 9, k_sectors); - if (!s->sectors) - s->nr_replicas = bch2_extent_nr_dirty_ptrs(k); - else - s->nr_replicas = min_t(unsigned, s->nr_replicas, - bch2_extent_nr_dirty_ptrs(k)); + s->nr_replicas = !s->sectors + ? nr_ptrs + : min_t(unsigned, s->nr_replicas, nr_ptrs); BUG_ON(s->sectors + page_sectors > PAGE_SECTORS); s->sectors += page_sectors; + + s->compressed |= compressed; } } @@ -901,9 +910,6 @@ static void bchfs_read(struct bch_fs *c, struct btree_iter *iter, if (bkey_extent_is_allocation(k.k)) bch2_add_page_sectors(bio, k); - if (!bch2_extent_is_fully_allocated(k)) - bch2_mark_pages_unalloc(bio); - if (pick.ca) { if (!is_last) { bio_inc_remaining(&rbio->bio); @@ -975,22 +981,7 @@ static void __bchfs_readpage(struct bch_fs *c, struct bch_read_bio *rbio, { struct btree_iter iter; - /* - * Initialize page state: - * If a page is partly allocated and partly a hole, we want it to be - * marked BCH_PAGE_UNALLOCATED - so we initially mark all pages - * allocated and then mark them unallocated as we find holes: - * - * Note that the bio hasn't been split yet - it's the only bio that - * points to these pages. As we walk extents and split @bio, that - * necessarily be true, the splits won't necessarily be on page - * boundaries: - */ - struct bch_page_state *s = page_state(page); - - EBUG_ON(s->reserved); - s->allocated = 1; - s->sectors = 0; + page_state_init_for_read(page); bio_set_op_attrs(&rbio->bio, REQ_OP_READ, REQ_SYNC); bio_add_page_contig(&rbio->bio, page); @@ -1142,7 +1133,7 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, struct bch_writepage_state *w, struct bch_inode_info *inode, struct page *page, - struct bch_page_state s) + unsigned nr_replicas) { struct bch_write_op *op; u64 offset = (u64) page->index << PAGE_SECTOR_SHIFT; @@ -1156,8 +1147,8 @@ static void bch2_writepage_io_alloc(struct bch_fs *c, closure_init(&w->io->cl, NULL); bch2_fswrite_op_init(&w->io->op, c, inode, w->opts, false); - op->nr_replicas = s.nr_replicas; - op->res.nr_replicas = s.nr_replicas; + op->nr_replicas = nr_replicas; + op->res.nr_replicas = nr_replicas; op->write_point = writepoint_hashed(inode->ei_last_dirtied); op->pos = POS(inode->v.i_ino, offset); op->wbio.bio.bi_iter.bi_sector = offset; @@ -1199,28 +1190,27 @@ do_io: old = page_state_cmpxchg(page_state(page), new, { EBUG_ON(!new.reserved && (new.sectors != PAGE_SECTORS || - !new.allocated)); + new.compressed)); - if (new.allocated && w->opts.compression) - new.allocated = 0; - else if (!new.reserved) - break; + if (new.reserved) + new.nr_replicas = new.reservation_replicas; + new.compressed |= w->opts.compression != 0; new.reserved = 0; }); if (w->io && - (w->io->op.op.res.nr_replicas != old.nr_replicas || + (w->io->op.op.res.nr_replicas != new.nr_replicas || !bio_can_add_page_contig(&w->io->op.op.wbio.bio, page))) bch2_writepage_do_io(w); if (!w->io) - bch2_writepage_io_alloc(c, w, inode, page, old); + bch2_writepage_io_alloc(c, w, inode, page, new.nr_replicas); BUG_ON(inode != w->io->op.inode); BUG_ON(bio_add_page_contig(&w->io->op.op.wbio.bio, page)); if (old.reserved) - w->io->op.op.res.sectors += old.nr_replicas * PAGE_SECTORS; + w->io->op.op.res.sectors += old.reservation_replicas * PAGE_SECTORS; /* while page is locked: */ w->io->op.new_i_size = i_size; @@ -1631,9 +1621,8 @@ static void bch2_dio_write_loop_async(struct closure *); static long bch2_dio_write_loop(struct dio_write *dio) { struct kiocb *req = dio->req; - struct file *file = req->ki_filp; - struct address_space *mapping = file->f_mapping; - struct bch_inode_info *inode = file_bch_inode(file); + struct address_space *mapping = req->ki_filp->f_mapping; + struct bch_inode_info *inode = dio->iop.inode; struct bio *bio = &dio->iop.op.wbio.bio; struct bio_vec *bv; bool sync; @@ -1777,7 +1766,8 @@ static int bch2_direct_IO_write(struct kiocb *req, !c->opts.journal_flush_disabled) dio->iop.op.flags |= BCH_WRITE_FLUSH; - ret = bch2_disk_reservation_get(c, &dio->iop.op.res, iter->count >> 9, 0); + ret = bch2_disk_reservation_get(c, &dio->iop.op.res, iter->count >> 9, + c->opts.data_replicas, 0); if (unlikely(ret)) { if (bch2_check_range_allocated(c, POS(inode->v.i_ino, offset >> 9), @@ -2083,7 +2073,7 @@ static long bch2_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) * problem with truncate because truncate will never split an * extent, only truncate it... */ - ret = bch2_disk_reservation_get(c, &disk_res, 0, 0); + ret = bch2_disk_reservation_get(c, &disk_res, 0, 0, 0); BUG_ON(ret); ret = bch2_btree_delete_range(c, @@ -2182,7 +2172,8 @@ static long bch2_fcollapse(struct bch_inode_info *inode, BUG_ON(bkey_cmp(dst.pos, bkey_start_pos(©.k.k))); ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size, - BCH_DISK_RESERVATION_NOFAIL); + bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(©.k)), + BCH_DISK_RESERVATION_NOFAIL); BUG_ON(ret); ret = bch2_btree_insert_at(c, &disk_res, &i_sectors_hook.hook, @@ -2320,8 +2311,8 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, if (reservation.v.nr_replicas < replicas || bch2_extent_is_compressed(k)) { - ret = bch2_disk_reservation_get(c, &disk_res, - sectors, 0); + ret = bch2_disk_reservation_get(c, &disk_res, sectors, + replicas, 0); if (ret) goto err_put_sectors_dirty; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 3701e580c542..74f884b86e42 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1505,7 +1505,6 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) journal_seq_pin(j, le64_to_cpu(i->j.seq)); for_each_jset_key(k, _n, entry, &i->j) { - struct disk_reservation disk_res; if (entry->btree_id == BTREE_ID_ALLOC) { /* @@ -1514,13 +1513,14 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list) */ ret = bch2_alloc_replay_key(c, k->k.p); } else { + struct disk_reservation disk_res; /* * We might cause compressed extents to be * split, so we need to pass in a * disk_reservation: */ - BUG_ON(bch2_disk_reservation_get(c, &disk_res, 0, 0)); + BUG_ON(bch2_disk_reservation_get(c, &disk_res, 0, 0, 0)); ret = bch2_btree_insert(c, entry->btree_id, k, &disk_res, NULL, NULL, @@ -1580,7 +1580,7 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, */ if (bch2_disk_reservation_get(c, &disk_res, - bucket_to_sector(ca, nr - ja->nr), 0)) + bucket_to_sector(ca, nr - ja->nr), 1, 0)) return -ENOSPC; mutex_lock(&c->sb_lock); |