summaryrefslogtreecommitdiff
path: root/fs/btrfs/zoned.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/zoned.c')
-rw-r--r--fs/btrfs/zoned.c191
1 files changed, 98 insertions, 93 deletions
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 1f503e8e42d4..45d04092f2f8 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -17,6 +17,7 @@
#include "space-info.h"
#include "fs.h"
#include "accessors.h"
+#include "bio.h"
/* Maximum number of zones to report per blkdev_report_zones() call */
#define BTRFS_REPORT_NR_ZONES 4096
@@ -160,7 +161,7 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
*/
static inline u32 sb_zone_number(int shift, int mirror)
{
- u64 zone;
+ u64 zone = U64_MAX;
ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX);
switch (mirror) {
@@ -220,7 +221,6 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
struct blk_zone *zones, unsigned int *nr_zones)
{
struct btrfs_zoned_device_info *zinfo = device->zone_info;
- u32 zno;
int ret;
if (!*nr_zones)
@@ -235,6 +235,7 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
/* Check cache */
if (zinfo->zone_cache) {
unsigned int i;
+ u32 zno;
ASSERT(IS_ALIGNED(pos, zinfo->zone_size));
zno = pos >> zinfo->zone_size_shift;
@@ -274,9 +275,12 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
return -EIO;
/* Populate cache */
- if (zinfo->zone_cache)
+ if (zinfo->zone_cache) {
+ u32 zno = pos >> zinfo->zone_size_shift;
+
memcpy(zinfo->zone_cache + zno, zones,
sizeof(*zinfo->zone_cache) * *nr_zones);
+ }
return 0;
}
@@ -417,25 +421,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
nr_sectors = bdev_nr_sectors(bdev);
zone_info->zone_size_shift = ilog2(zone_info->zone_size);
zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
- /*
- * We limit max_zone_append_size also by max_segments *
- * PAGE_SIZE. Technically, we can have multiple pages per segment. But,
- * since btrfs adds the pages one by one to a bio, and btrfs cannot
- * increase the metadata reservation even if it increases the number of
- * extents, it is safe to stick with the limit.
- *
- * With the zoned emulation, we can have non-zoned device on the zoned
- * mode. In this case, we don't have a valid max zone append size. So,
- * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
- */
- if (bdev_is_zoned(bdev)) {
- zone_info->max_zone_append_size = min_t(u64,
- (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
- (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
- } else {
- zone_info->max_zone_append_size =
- (u64)bdev_max_segments(bdev) << PAGE_SHIFT;
- }
if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++;
@@ -539,8 +524,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
atomic_set(&zone_info->active_zones_left,
max_active_zones - nactive);
- /* Overcommit does not work well with active zone tacking. */
- set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags);
+ set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags);
}
/* Validate superblock log */
@@ -715,9 +699,9 @@ static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info)
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
{
+ struct queue_limits *lim = &fs_info->limits;
struct btrfs_device *device;
u64 zone_size = 0;
- u64 max_zone_append_size = 0;
int ret;
/*
@@ -727,6 +711,8 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
if (!btrfs_fs_incompat(fs_info, ZONED))
return btrfs_check_for_zoned_device(fs_info);
+ blk_set_stacking_limits(lim);
+
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
struct btrfs_zoned_device_info *zone_info = device->zone_info;
@@ -741,10 +727,17 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
zone_info->zone_size, zone_size);
return -EINVAL;
}
- if (!max_zone_append_size ||
- (zone_info->max_zone_append_size &&
- zone_info->max_zone_append_size < max_zone_append_size))
- max_zone_append_size = zone_info->max_zone_append_size;
+
+ /*
+ * With the zoned emulation, we can have non-zoned device on the
+ * zoned mode. In this case, we don't have a valid max zone
+ * append size.
+ */
+ if (bdev_is_zoned(device->bdev)) {
+ blk_stack_limits(lim,
+ &bdev_get_queue(device->bdev)->limits,
+ 0);
+ }
}
/*
@@ -765,8 +758,18 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
}
fs_info->zone_size = zone_size;
- fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size,
- fs_info->sectorsize);
+ /*
+ * Also limit max_zone_append_size by max_segments * PAGE_SIZE.
+ * Technically, we can have multiple pages per segment. But, since
+ * we add the pages one by one to a bio, and cannot increase the
+ * metadata reservation even if it increases the number of extents, it
+ * is safe to stick with the limit.
+ */
+ fs_info->max_zone_append_size = ALIGN_DOWN(
+ min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT,
+ (u64)lim->max_sectors << SECTOR_SHIFT,
+ (u64)lim->max_segments << PAGE_SHIFT),
+ fs_info->sectorsize);
fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
if (fs_info->max_zone_append_size < fs_info->max_extent_size)
fs_info->max_extent_size = fs_info->max_zone_append_size;
@@ -1577,9 +1580,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
return;
WARN_ON(cache->bytes_super != 0);
- unusable = (cache->alloc_offset - cache->used) +
- (cache->length - cache->zone_capacity);
- free = cache->zone_capacity - cache->alloc_offset;
+
+ /* Check for block groups never get activated */
+ if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
+ cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
+ !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
+ cache->alloc_offset == 0) {
+ unusable = cache->length;
+ free = 0;
+ } else {
+ unusable = (cache->alloc_offset - cache->used) +
+ (cache->length - cache->zone_capacity);
+ free = cache->zone_capacity - cache->alloc_offset;
+ }
/* We only need ->free_space in ALLOC_SEQ block groups */
cache->cached = BTRFS_CACHE_FINISHED;
@@ -1623,8 +1636,10 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans)
spin_unlock(&trans->releasing_ebs_lock);
}
-bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
+bool btrfs_use_zone_append(struct btrfs_bio *bbio)
{
+ u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT);
+ struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_block_group *cache;
bool ret = false;
@@ -1635,6 +1650,9 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
if (!is_data_inode(&inode->vfs_inode))
return false;
+ if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE)
+ return false;
+
/*
* Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the
* extent layout the relocation code has.
@@ -1657,22 +1675,16 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
return ret;
}
-void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
- struct bio *bio)
+void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
{
+ const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
struct btrfs_ordered_extent *ordered;
- const u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
-
- if (bio_op(bio) != REQ_OP_ZONE_APPEND)
- return;
- ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), file_offset);
+ ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
if (WARN_ON(!ordered))
return;
ordered->physical = physical;
- ordered->bdev = bio->bi_bdev;
-
btrfs_put_ordered_extent(ordered);
}
@@ -1684,43 +1696,46 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
struct extent_map *em;
struct btrfs_ordered_sum *sum;
u64 orig_logical = ordered->disk_bytenr;
- u64 *logical = NULL;
- int nr, stripe_len;
+ struct map_lookup *map;
+ u64 physical = ordered->physical;
+ u64 chunk_start_phys;
+ u64 logical;
- /* Zoned devices should not have partitions. So, we can assume it is 0 */
- ASSERT(!bdev_is_partition(ordered->bdev));
- if (WARN_ON(!ordered->bdev))
+ em = btrfs_get_chunk_map(fs_info, orig_logical, 1);
+ if (IS_ERR(em))
return;
+ map = em->map_lookup;
+ chunk_start_phys = map->stripes[0].physical;
- if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev,
- ordered->physical, &logical, &nr,
- &stripe_len)))
- goto out;
-
- WARN_ON(nr != 1);
+ if (WARN_ON_ONCE(map->num_stripes > 1) ||
+ WARN_ON_ONCE((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) ||
+ WARN_ON_ONCE(physical < chunk_start_phys) ||
+ WARN_ON_ONCE(physical > chunk_start_phys + em->orig_block_len)) {
+ free_extent_map(em);
+ return;
+ }
+ logical = em->start + (physical - map->stripes[0].physical);
+ free_extent_map(em);
- if (orig_logical == *logical)
- goto out;
+ if (orig_logical == logical)
+ return;
- ordered->disk_bytenr = *logical;
+ ordered->disk_bytenr = logical;
em_tree = &inode->extent_tree;
write_lock(&em_tree->lock);
em = search_extent_mapping(em_tree, ordered->file_offset,
ordered->num_bytes);
- em->block_start = *logical;
+ em->block_start = logical;
free_extent_map(em);
write_unlock(&em_tree->lock);
list_for_each_entry(sum, &ordered->list, list) {
- if (*logical < orig_logical)
- sum->bytenr -= orig_logical - *logical;
+ if (logical < orig_logical)
+ sum->bytenr -= orig_logical - logical;
else
- sum->bytenr += *logical - orig_logical;
+ sum->bytenr += logical - orig_logical;
}
-
-out:
- kfree(logical);
}
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
@@ -1845,26 +1860,6 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
return btrfs_zoned_issue_zeroout(tgt_dev, physical_pos, length);
}
-struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
- u64 logical, u64 length)
-{
- struct btrfs_device *device;
- struct extent_map *em;
- struct map_lookup *map;
-
- em = btrfs_get_chunk_map(fs_info, logical, length);
- if (IS_ERR(em))
- return ERR_CAST(em);
-
- map = em->map_lookup;
- /* We only support single profile for now */
- device = map->stripes[0].dev;
-
- free_extent_map(em);
-
- return device;
-}
-
/*
* Activate block group and underlying device zones
*
@@ -1916,7 +1911,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
/* Successfully activated all the zones */
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
- space_info->active_total_bytes += block_group->length;
+ WARN_ON(block_group->alloc_offset != 0);
+ if (block_group->zone_unusable == block_group->length) {
+ block_group->zone_unusable = block_group->length - block_group->zone_capacity;
+ space_info->bytes_zone_unusable -= block_group->zone_capacity;
+ }
spin_unlock(&block_group->lock);
btrfs_try_granting_tickets(fs_info, space_info);
spin_unlock(&space_info->lock);
@@ -2100,11 +2099,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
if (!device->bdev)
continue;
- if (!zinfo->max_active_zones ||
- atomic_read(&zinfo->active_zones_left)) {
+ if (!zinfo->max_active_zones) {
ret = true;
break;
}
+
+ switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case 0: /* single */
+ ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+ break;
+ case BTRFS_BLOCK_GROUP_DUP:
+ ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+ break;
+ }
+ if (ret)
+ break;
}
mutex_unlock(&fs_info->chunk_mutex);
@@ -2270,7 +2279,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
u64 avail;
spin_lock(&block_group->lock);
- if (block_group->reserved ||
+ if (block_group->reserved || block_group->alloc_offset == 0 ||
(block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
spin_unlock(&block_group->lock);
continue;
@@ -2307,10 +2316,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
return 0;
- /* No more block groups to activate */
- if (space_info->active_total_bytes == space_info->total_bytes)
- return 0;
-
for (;;) {
int ret;
bool need_finish = false;