From 7768ed33ccdc02801c4483fc5682dc66ace14aea Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 27 Jul 2012 15:07:57 +0100 Subject: dm thin: reduce endio_hook pool size Reduce the slab size used for the dm_thin_endio_hook mempool. Allocation has been seen to fail on machines with smaller amounts of memory due to fragmentation. lvm: page allocation failure. order:5, mode:0xd0 device-mapper: table: 253:38: thin-pool: Error creating pool's endio_hook mempool Cc: stable@vger.kernel.org Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 68694da0d21d..18f87b0def12 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -19,7 +19,7 @@ /* * Tunable constants */ -#define ENDIO_HOOK_POOL_SIZE 10240 +#define ENDIO_HOOK_POOL_SIZE 1024 #define DEFERRED_SET_SIZE 64 #define MAPPING_POOL_SIZE 1024 #define PRISON_CELLS 1024 -- cgit v1.2.3 From 17b7d63f7ed10376e762fdfadbc65da6687d569a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 27 Jul 2012 15:07:57 +0100 Subject: dm thin: clean up compiler warning Clean up "warning: dubious: !x & y". Also make it clear that __snapshotted_since() returns a bool and that dm_thin_lookup_result's 'shared' member is a flag. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 2 +- drivers/md/dm-thin-metadata.h | 2 +- drivers/md/dm-thin.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 3e2907f0bc46..c858931d2dcb 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1262,7 +1262,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) return td->id; } -static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) +static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) { return td->snapshotted_time > time; } diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index b88918ccdaf6..7b47c0a9a8e3 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -119,7 +119,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td); struct dm_thin_lookup_result { dm_block_t block; - int shared; + unsigned shared:1; }; /* diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 18f87b0def12..0bb9e646e215 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1218,7 +1218,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ m = get_next_mapping(pool); m->tc = tc; - m->pass_discard = (!lookup_result.shared) & pool->pf.discard_passdown; + m->pass_discard = (!lookup_result.shared) && pool->pf.discard_passdown; m->virt_block = block; m->data_block = lookup_result.block; m->cell = cell; -- cgit v1.2.3 From af7346ebbda5f4a95da71359231d32cb136bd246 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:07:58 +0100 Subject: dm: remove unused flush target method Remove unused dm_flush_fn .flush target method from header. This was left-over from the FLUSH/FUA conversion and is no longer used. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 98f34b886f95..d70cbb2ada25 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -66,7 +66,6 @@ typedef int (*dm_request_endio_fn) (struct dm_target *ti, struct request *clone, int error, union map_info *map_context); -typedef void (*dm_flush_fn) (struct dm_target *ti); typedef void (*dm_presuspend_fn) (struct dm_target *ti); typedef void (*dm_postsuspend_fn) (struct dm_target *ti); typedef int (*dm_preresume_fn) (struct dm_target *ti); @@ -139,7 +138,6 @@ struct target_type { dm_map_request_fn map_rq; dm_endio_fn end_io; dm_request_endio_fn rq_end_io; - dm_flush_fn flush; dm_presuspend_fn presuspend; dm_postsuspend_fn postsuspend; dm_preresume_fn preresume; -- cgit v1.2.3 From 3caf6d73d4dc163b2d135e0b52b052a2b63e5216 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:07:58 +0100 Subject: dm persistent data: remove debug space map checker Remove debug space map checker from dm persistent data. The space map checker is a wrapper for other space maps that double checks the reference counts are correct. It holds all these reference counts in memory rather than on disk, so uses a lot of memory and is thus restricted to small pools. As yet, this checker hasn't found any issues, but has caused a few of its own due to people turning it on by default with larger pools. Removing. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/Kconfig | 9 - drivers/md/persistent-data/Makefile | 1 - drivers/md/persistent-data/dm-space-map-checker.c | 446 --------------------- drivers/md/persistent-data/dm-space-map-checker.h | 26 -- drivers/md/persistent-data/dm-space-map-disk.c | 34 +- .../md/persistent-data/dm-transaction-manager.c | 29 +- 6 files changed, 11 insertions(+), 534 deletions(-) delete mode 100644 drivers/md/persistent-data/dm-space-map-checker.c delete mode 100644 drivers/md/persistent-data/dm-space-map-checker.h diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 10f122a3a856..1eee45b69b71 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -260,15 +260,6 @@ config DM_DEBUG_BLOCK_STACK_TRACING If unsure, say N. -config DM_DEBUG_SPACE_MAPS - boolean "Extra validation for thin provisioning space maps" - depends on DM_THIN_PROVISIONING - ---help--- - Enable this for messages that may help debug problems with the - space maps used by thin provisioning. - - If unsure, say N. - config DM_MIRROR tristate "Mirror target" depends on BLK_DEV_DM diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile index cfa95f662230..d8e7cb767c1e 100644 --- a/drivers/md/persistent-data/Makefile +++ b/drivers/md/persistent-data/Makefile @@ -1,7 +1,6 @@ obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o dm-persistent-data-objs := \ dm-block-manager.o \ - dm-space-map-checker.o \ dm-space-map-common.o \ dm-space-map-disk.o \ dm-space-map-metadata.o \ diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c deleted file mode 100644 index fc90c11620ad..000000000000 --- a/drivers/md/persistent-data/dm-space-map-checker.c +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (C) 2011 Red Hat, Inc. - * - * This file is released under the GPL. - */ - -#include "dm-space-map-checker.h" - -#include -#include -#include - -#ifdef CONFIG_DM_DEBUG_SPACE_MAPS - -#define DM_MSG_PREFIX "space map checker" - -/*----------------------------------------------------------------*/ - -struct count_array { - dm_block_t nr; - dm_block_t nr_free; - - uint32_t *counts; -}; - -static int ca_get_count(struct count_array *ca, dm_block_t b, uint32_t *count) -{ - if (b >= ca->nr) - return -EINVAL; - - *count = ca->counts[b]; - return 0; -} - -static int ca_count_more_than_one(struct count_array *ca, dm_block_t b, int *r) -{ - if (b >= ca->nr) - return -EINVAL; - - *r = ca->counts[b] > 1; - return 0; -} - -static int ca_set_count(struct count_array *ca, dm_block_t b, uint32_t count) -{ - uint32_t old_count; - - if (b >= ca->nr) - return -EINVAL; - - old_count = ca->counts[b]; - - if (!count && old_count) - ca->nr_free++; - - else if (count && !old_count) - ca->nr_free--; - - ca->counts[b] = count; - return 0; -} - -static int ca_inc_block(struct count_array *ca, dm_block_t b) -{ - if (b >= ca->nr) - return -EINVAL; - - ca_set_count(ca, b, ca->counts[b] + 1); - return 0; -} - -static int ca_dec_block(struct count_array *ca, dm_block_t b) -{ - if (b >= ca->nr) - return -EINVAL; - - BUG_ON(ca->counts[b] == 0); - ca_set_count(ca, b, ca->counts[b] - 1); - return 0; -} - -static int ca_create(struct count_array *ca, struct dm_space_map *sm) -{ - int r; - dm_block_t nr_blocks; - - r = dm_sm_get_nr_blocks(sm, &nr_blocks); - if (r) - return r; - - ca->nr = nr_blocks; - ca->nr_free = nr_blocks; - - if (!nr_blocks) - ca->counts = NULL; - else { - ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks); - if (!ca->counts) - return -ENOMEM; - } - - return 0; -} - -static void ca_destroy(struct count_array *ca) -{ - vfree(ca->counts); -} - -static int ca_load(struct count_array *ca, struct dm_space_map *sm) -{ - int r; - uint32_t count; - dm_block_t nr_blocks, i; - - r = dm_sm_get_nr_blocks(sm, &nr_blocks); - if (r) - return r; - - BUG_ON(ca->nr != nr_blocks); - - DMWARN("Loading debug space map from disk. This may take some time"); - for (i = 0; i < nr_blocks; i++) { - r = dm_sm_get_count(sm, i, &count); - if (r) { - DMERR("load failed"); - return r; - } - - ca_set_count(ca, i, count); - } - DMWARN("Load complete"); - - return 0; -} - -static int ca_extend(struct count_array *ca, dm_block_t extra_blocks) -{ - dm_block_t nr_blocks = ca->nr + extra_blocks; - uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks); - if (!counts) - return -ENOMEM; - - if (ca->counts) { - memcpy(counts, ca->counts, sizeof(*counts) * ca->nr); - ca_destroy(ca); - } - ca->nr = nr_blocks; - ca->nr_free += extra_blocks; - ca->counts = counts; - return 0; -} - -static int ca_commit(struct count_array *old, struct count_array *new) -{ - if (old->nr != new->nr) { - BUG_ON(old->nr > new->nr); - ca_extend(old, new->nr - old->nr); - } - - BUG_ON(old->nr != new->nr); - old->nr_free = new->nr_free; - memcpy(old->counts, new->counts, sizeof(*old->counts) * old->nr); - return 0; -} - -/*----------------------------------------------------------------*/ - -struct sm_checker { - struct dm_space_map sm; - - struct count_array old_counts; - struct count_array counts; - - struct dm_space_map *real_sm; -}; - -static void sm_checker_destroy(struct dm_space_map *sm) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - - dm_sm_destroy(smc->real_sm); - ca_destroy(&smc->old_counts); - ca_destroy(&smc->counts); - kfree(smc); -} - -static int sm_checker_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_get_nr_blocks(smc->real_sm, count); - if (!r) - BUG_ON(smc->old_counts.nr != *count); - return r; -} - -static int sm_checker_get_nr_free(struct dm_space_map *sm, dm_block_t *count) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_get_nr_free(smc->real_sm, count); - if (!r) { - /* - * Slow, but we know it's correct. - */ - dm_block_t b, n = 0; - for (b = 0; b < smc->old_counts.nr; b++) - if (smc->old_counts.counts[b] == 0 && - smc->counts.counts[b] == 0) - n++; - - if (n != *count) - DMERR("free block counts differ, checker %u, sm-disk:%u", - (unsigned) n, (unsigned) *count); - } - return r; -} - -static int sm_checker_new_block(struct dm_space_map *sm, dm_block_t *b) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_new_block(smc->real_sm, b); - - if (!r) { - BUG_ON(*b >= smc->old_counts.nr); - BUG_ON(smc->old_counts.counts[*b] != 0); - BUG_ON(*b >= smc->counts.nr); - BUG_ON(smc->counts.counts[*b] != 0); - ca_set_count(&smc->counts, *b, 1); - } - - return r; -} - -static int sm_checker_inc_block(struct dm_space_map *sm, dm_block_t b) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_inc_block(smc->real_sm, b); - int r2 = ca_inc_block(&smc->counts, b); - BUG_ON(r != r2); - return r; -} - -static int sm_checker_dec_block(struct dm_space_map *sm, dm_block_t b) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_dec_block(smc->real_sm, b); - int r2 = ca_dec_block(&smc->counts, b); - BUG_ON(r != r2); - return r; -} - -static int sm_checker_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - uint32_t result2 = 0; - int r = dm_sm_get_count(smc->real_sm, b, result); - int r2 = ca_get_count(&smc->counts, b, &result2); - - BUG_ON(r != r2); - if (!r) - BUG_ON(*result != result2); - return r; -} - -static int sm_checker_count_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int result2 = 0; - int r = dm_sm_count_is_more_than_one(smc->real_sm, b, result); - int r2 = ca_count_more_than_one(&smc->counts, b, &result2); - - BUG_ON(r != r2); - if (!r) - BUG_ON(!(*result) && result2); - return r; -} - -static int sm_checker_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - uint32_t old_rc; - int r = dm_sm_set_count(smc->real_sm, b, count); - int r2; - - BUG_ON(b >= smc->counts.nr); - old_rc = smc->counts.counts[b]; - r2 = ca_set_count(&smc->counts, b, count); - BUG_ON(r != r2); - - return r; -} - -static int sm_checker_commit(struct dm_space_map *sm) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r; - - r = dm_sm_commit(smc->real_sm); - if (r) - return r; - - r = ca_commit(&smc->old_counts, &smc->counts); - if (r) - return r; - - return 0; -} - -static int sm_checker_extend(struct dm_space_map *sm, dm_block_t extra_blocks) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - int r = dm_sm_extend(smc->real_sm, extra_blocks); - if (r) - return r; - - return ca_extend(&smc->counts, extra_blocks); -} - -static int sm_checker_root_size(struct dm_space_map *sm, size_t *result) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - return dm_sm_root_size(smc->real_sm, result); -} - -static int sm_checker_copy_root(struct dm_space_map *sm, void *copy_to_here_le, size_t len) -{ - struct sm_checker *smc = container_of(sm, struct sm_checker, sm); - return dm_sm_copy_root(smc->real_sm, copy_to_here_le, len); -} - -/*----------------------------------------------------------------*/ - -static struct dm_space_map ops_ = { - .destroy = sm_checker_destroy, - .get_nr_blocks = sm_checker_get_nr_blocks, - .get_nr_free = sm_checker_get_nr_free, - .inc_block = sm_checker_inc_block, - .dec_block = sm_checker_dec_block, - .new_block = sm_checker_new_block, - .get_count = sm_checker_get_count, - .count_is_more_than_one = sm_checker_count_more_than_one, - .set_count = sm_checker_set_count, - .commit = sm_checker_commit, - .extend = sm_checker_extend, - .root_size = sm_checker_root_size, - .copy_root = sm_checker_copy_root -}; - -struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) -{ - int r; - struct sm_checker *smc; - - if (IS_ERR_OR_NULL(sm)) - return ERR_PTR(-EINVAL); - - smc = kmalloc(sizeof(*smc), GFP_KERNEL); - if (!smc) - return ERR_PTR(-ENOMEM); - - memcpy(&smc->sm, &ops_, sizeof(smc->sm)); - r = ca_create(&smc->old_counts, sm); - if (r) { - kfree(smc); - return ERR_PTR(r); - } - - r = ca_create(&smc->counts, sm); - if (r) { - ca_destroy(&smc->old_counts); - kfree(smc); - return ERR_PTR(r); - } - - smc->real_sm = sm; - - r = ca_load(&smc->counts, sm); - if (r) { - ca_destroy(&smc->counts); - ca_destroy(&smc->old_counts); - kfree(smc); - return ERR_PTR(r); - } - - r = ca_commit(&smc->old_counts, &smc->counts); - if (r) { - ca_destroy(&smc->counts); - ca_destroy(&smc->old_counts); - kfree(smc); - return ERR_PTR(r); - } - - return &smc->sm; -} -EXPORT_SYMBOL_GPL(dm_sm_checker_create); - -struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) -{ - int r; - struct sm_checker *smc; - - if (IS_ERR_OR_NULL(sm)) - return ERR_PTR(-EINVAL); - - smc = kmalloc(sizeof(*smc), GFP_KERNEL); - if (!smc) - return ERR_PTR(-ENOMEM); - - memcpy(&smc->sm, &ops_, sizeof(smc->sm)); - r = ca_create(&smc->old_counts, sm); - if (r) { - kfree(smc); - return ERR_PTR(r); - } - - r = ca_create(&smc->counts, sm); - if (r) { - ca_destroy(&smc->old_counts); - kfree(smc); - return ERR_PTR(r); - } - - smc->real_sm = sm; - return &smc->sm; -} -EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh); - -/*----------------------------------------------------------------*/ - -#else - -struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm) -{ - return sm; -} -EXPORT_SYMBOL_GPL(dm_sm_checker_create); - -struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm) -{ - return sm; -} -EXPORT_SYMBOL_GPL(dm_sm_checker_create_fresh); - -/*----------------------------------------------------------------*/ - -#endif diff --git a/drivers/md/persistent-data/dm-space-map-checker.h b/drivers/md/persistent-data/dm-space-map-checker.h deleted file mode 100644 index 444dccf6688c..000000000000 --- a/drivers/md/persistent-data/dm-space-map-checker.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (C) 2011 Red Hat, Inc. - * - * This file is released under the GPL. - */ - -#ifndef SNAPSHOTS_SPACE_MAP_CHECKER_H -#define SNAPSHOTS_SPACE_MAP_CHECKER_H - -#include "dm-space-map.h" - -/*----------------------------------------------------------------*/ - -/* - * This space map wraps a real on-disk space map, and verifies all of its - * operations. It uses a lot of memory, so only use if you have a specific - * problem that you're debugging. - * - * Ownership of @sm passes. - */ -struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm); -struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm); - -/*----------------------------------------------------------------*/ - -#endif diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 3d0ed5332883..f6d29e614ab7 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -4,7 +4,6 @@ * This file is released under the GPL. */ -#include "dm-space-map-checker.h" #include "dm-space-map-common.h" #include "dm-space-map-disk.h" #include "dm-space-map.h" @@ -252,9 +251,8 @@ static struct dm_space_map ops = { .copy_root = sm_disk_copy_root }; -static struct dm_space_map *dm_sm_disk_create_real( - struct dm_transaction_manager *tm, - dm_block_t nr_blocks) +struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, + dm_block_t nr_blocks) { int r; struct sm_disk *smd; @@ -285,27 +283,10 @@ bad: kfree(smd); return ERR_PTR(r); } - -struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, - dm_block_t nr_blocks) -{ - struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks); - struct dm_space_map *smc; - - if (IS_ERR_OR_NULL(sm)) - return sm; - - smc = dm_sm_checker_create_fresh(sm); - if (IS_ERR(smc)) - dm_sm_destroy(sm); - - return smc; -} EXPORT_SYMBOL_GPL(dm_sm_disk_create); -static struct dm_space_map *dm_sm_disk_open_real( - struct dm_transaction_manager *tm, - void *root_le, size_t len) +struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm, + void *root_le, size_t len) { int r; struct sm_disk *smd; @@ -332,13 +313,6 @@ bad: kfree(smd); return ERR_PTR(r); } - -struct dm_space_map *dm_sm_disk_open(struct dm_transaction_manager *tm, - void *root_le, size_t len) -{ - return dm_sm_checker_create( - dm_sm_disk_open_real(tm, root_le, len)); -} EXPORT_SYMBOL_GPL(dm_sm_disk_open); /*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index e5604b32d91f..86c3705052a4 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -5,7 +5,6 @@ */ #include "dm-transaction-manager.h" #include "dm-space-map.h" -#include "dm-space-map-checker.h" #include "dm-space-map-disk.h" #include "dm-space-map-metadata.h" #include "dm-persistent-data-internal.h" @@ -319,15 +318,14 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, int create) { int r; - struct dm_space_map *inner; - inner = dm_sm_metadata_init(); - if (IS_ERR(inner)) - return PTR_ERR(inner); + *sm = dm_sm_metadata_init(); + if (IS_ERR(*sm)) + return PTR_ERR(*sm); - *tm = dm_tm_create(bm, inner); + *tm = dm_tm_create(bm, *sm); if (IS_ERR(*tm)) { - dm_sm_destroy(inner); + dm_sm_destroy(*sm); return PTR_ERR(*tm); } @@ -339,19 +337,13 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, goto bad1; } - r = dm_sm_metadata_create(inner, *tm, dm_bm_nr_blocks(bm), + r = dm_sm_metadata_create(*sm, *tm, dm_bm_nr_blocks(bm), sb_location); if (r) { DMERR("couldn't create metadata space map"); goto bad2; } - *sm = dm_sm_checker_create(inner); - if (IS_ERR(*sm)) { - r = PTR_ERR(*sm); - goto bad2; - } - } else { r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location, sb_validator, sblock); @@ -360,19 +352,13 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, goto bad1; } - r = dm_sm_metadata_open(inner, *tm, + r = dm_sm_metadata_open(*sm, *tm, dm_block_data(*sblock) + root_offset, root_max_len); if (r) { DMERR("couldn't open metadata space map"); goto bad2; } - - *sm = dm_sm_checker_create(inner); - if (IS_ERR(*sm)) { - r = PTR_ERR(*sm); - goto bad2; - } } return 0; @@ -381,7 +367,6 @@ bad2: dm_tm_unlock(*tm, *sblock); bad1: dm_tm_destroy(*tm); - dm_sm_destroy(inner); return r; } -- cgit v1.2.3 From d973ac196b7668c198f3c1338d8b07c13a3e7713 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:07:58 +0100 Subject: dm thin metadata: remove pointless label from __commit_transaction Remove the pointless label 'out' from __commit_transaction in dm-thin-metadata.c Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index c858931d2dcb..acd89ec825d5 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -597,31 +597,31 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) r = __write_changed_details(pmd); if (r < 0) - goto out; + return r; if (!pmd->need_commit) - goto out; + return r; r = dm_sm_commit(pmd->data_sm); if (r < 0) - goto out; + return r; r = dm_tm_pre_commit(pmd->tm); if (r < 0) - goto out; + return r; r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); if (r < 0) - goto out; + return r; r = dm_sm_root_size(pmd->data_sm, &data_len); if (r < 0) - goto out; + return r; r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, &sblock); if (r) - goto out; + return r; disk_super = dm_block_data(sblock); disk_super->time = cpu_to_le32(pmd->time); @@ -644,7 +644,6 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (!r) pmd->need_commit = 0; -out: return r; out_locked: -- cgit v1.2.3 From 8c971178a788c70e8d6db5c3a813de1a1f54e5b7 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:07:58 +0100 Subject: dm thin metadata: introduce THIN_MAX_CONCURRENT_LOCKS Introduce THIN_MAX_CONCURRENT_LOCKS into dm-thin-metadata to give a name to an otherwise "magic" number. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index acd89ec825d5..31f9827dfb56 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -80,6 +80,12 @@ #define THIN_METADATA_CACHE_SIZE 64 #define SECTOR_TO_BLOCK_SHIFT 3 +/* + * 3 for btree insert + + * 2 for btree lookup used within space map + */ +#define THIN_MAX_CONCURRENT_LOCKS 5 + /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 @@ -668,13 +674,9 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(-ENOMEM); } - /* - * Max hex locks: - * 3 for btree insert + - * 2 for btree lookup used within space map - */ bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE, - THIN_METADATA_CACHE_SIZE, 5); + THIN_METADATA_CACHE_SIZE, + THIN_MAX_CONCURRENT_LOCKS); if (!bm) { DMERR("could not create block manager"); kfree(pmd); -- cgit v1.2.3 From 70c48611024791ccf83aca6195b58a5db9325485 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 27 Jul 2012 15:07:59 +0100 Subject: dm snapshot: remove redundant assignment in merge fn Remove redundant bvm->bi_sector self-assignment in dm snapshot's origin_merge(). Signed-off-by: Alasdair G Kergon --- drivers/md/dm-snap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6f758870fc19..a228c20e40b3 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2176,7 +2176,6 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return max_size; bvm->bi_bdev = dev->bdev; - bvm->bi_sector = bvm->bi_sector; return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -- cgit v1.2.3 From 1a66a08ae82b16eb40705ad112ff28873981af92 Mon Sep 17 00:00:00 2001 From: majianpeng Date: Fri, 27 Jul 2012 15:07:59 +0100 Subject: dm: replace simple_strtoul Replace obsolete simple_strtoul() with kstrtou8/kstrtouint. Signed-off-by: majianpeng Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 5 +---- drivers/md/dm-exception-store.c | 13 ++++--------- drivers/md/dm-stripe.c | 7 ++----- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3f06df59fd82..e2b32401ecc7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1241,7 +1241,6 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) static int crypt_decode_key(u8 *key, char *hex, unsigned int size) { char buffer[3]; - char *endp; unsigned int i; buffer[2] = '\0'; @@ -1250,9 +1249,7 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size) buffer[0] = *hex++; buffer[1] = *hex++; - key[i] = (u8)simple_strtoul(buffer, &endp, 16); - - if (endp != &buffer[2]) + if (kstrtou8(buffer, 16, &key[i])) return -EINVAL; } diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index aa70f7d43a1a..ebaa4f803eec 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -142,24 +142,19 @@ EXPORT_SYMBOL(dm_exception_store_type_unregister); static int set_chunk_size(struct dm_exception_store *store, const char *chunk_size_arg, char **error) { - unsigned long chunk_size_ulong; - char *value; + unsigned chunk_size; - chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); - if (*chunk_size_arg == '\0' || *value != '\0' || - chunk_size_ulong > UINT_MAX) { + if (kstrtouint(chunk_size_arg, 10, &chunk_size)) { *error = "Invalid chunk size"; return -EINVAL; } - if (!chunk_size_ulong) { + if (!chunk_size) { store->chunk_size = store->chunk_mask = store->chunk_shift = 0; return 0; } - return dm_exception_store_set_chunk_size(store, - (unsigned) chunk_size_ulong, - error); + return dm_exception_store_set_chunk_size(store, chunk_size, error); } int dm_exception_store_set_chunk_size(struct dm_exception_store *store, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 35c94ff24ad5..183db5d3e48e 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -99,7 +99,6 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) sector_t width; uint32_t stripes; uint32_t chunk_size; - char *end; int r; unsigned int i; @@ -108,14 +107,12 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - stripes = simple_strtoul(argv[0], &end, 10); - if (!stripes || *end) { + if (kstrtouint(argv[0], 10, &stripes) || !stripes) { ti->error = "Invalid stripe count"; return -EINVAL; } - chunk_size = simple_strtoul(argv[1], &end, 10); - if (*end) { + if (kstrtouint(argv[1], 10, &chunk_size)) { ti->error = "Invalid chunk_size"; return -EINVAL; } -- cgit v1.2.3 From f09996c993e256fce4b920588959866998d51250 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 27 Jul 2012 15:07:59 +0100 Subject: dm thin: provide specific errors for two table load failure cases Provide specific error message strings for two pool_ctr() failure cases that currently give just "Unknown error". Reference: test_two_pools_pointing_to_the_same_metadata_fails and test_different_pool_cant_replace_pool in thinp-test-suite. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 0bb9e646e215..e89f8e7d8a33 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1828,15 +1828,19 @@ static struct pool *__pool_find(struct mapped_device *pool_md, struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); if (pool) { - if (pool->pool_md != pool_md) + if (pool->pool_md != pool_md) { + *error = "metadata device already in use by a pool"; return ERR_PTR(-EBUSY); + } __pool_inc(pool); } else { pool = __pool_table_lookup(pool_md); if (pool) { - if (pool->md_dev != metadata_dev) + if (pool->md_dev != metadata_dev) { + *error = "different pool cannot replace a pool"; return ERR_PTR(-EINVAL); + } __pool_inc(pool); } else { -- cgit v1.2.3 From f14fa693c93078444b5e95d7cad78ead0383ad50 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:00 +0100 Subject: dm stripe: fix size test dm-stripe is supposed to ensure that all the space allocated to the stripes is fully used and that all stripes are the same size. This patch fixes the test. It checks that device length is divisible by the chunk size and checks that the resulting quotient is divisible by the number of stripes (which is equivalent to testing if device length is divisible by chunk_size * stripes). Previously, the code only tested that the number of sectors in the target was divisible by each of the chunk size and the number of stripes separately, which could leave entire stripes unused. (A setup that genuinely needs some stripes to be shorter than others can be created by concatenating striped targets.) Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/striped.txt | 3 +-- drivers/md/dm-stripe.c | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/device-mapper/striped.txt b/Documentation/device-mapper/striped.txt index f34d3236b9da..953fe1d68068 100644 --- a/Documentation/device-mapper/striped.txt +++ b/Documentation/device-mapper/striped.txt @@ -16,8 +16,7 @@ Parameters: [ ]+ : Starting sector within the device. One or more underlying devices can be specified. The striped device size must -be a multiple of the chunk size and a multiple of the number of underlying -devices. +be a multiple of the chunk size multiplied by the number of underlying devices. Example scripts diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 183db5d3e48e..1f6316ed2b31 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -132,7 +132,6 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - width = ti->len; if (sector_div(width, stripes)) { ti->error = "Target length not divisible by " "number of stripes"; -- cgit v1.2.3 From 1df05483d758ea43abc375869fbe06be506ba827 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:00 +0100 Subject: dm stripe: remove stripes_mask The structure stripe_c contains a stripes_mask field. This field is useless because it can be trivially calculated by subtracting one from stripes. It is used only at one place. This patch removes it. The patch also changes ffs(stripes) - 1 to __ffs(stripes). Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-stripe.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 1f6316ed2b31..6931bd18b615 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -26,7 +26,6 @@ struct stripe { struct stripe_c { uint32_t stripes; int stripes_shift; - sector_t stripes_mask; /* The size of this target / num. stripes */ sector_t stripe_width; @@ -163,10 +162,8 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (stripes & (stripes - 1)) sc->stripes_shift = -1; - else { - sc->stripes_shift = ffs(stripes) - 1; - sc->stripes_mask = ((sector_t) stripes) - 1; - } + else + sc->stripes_shift = __ffs(stripes); ti->split_io = chunk_size; ti->num_flush_requests = stripes; @@ -218,7 +215,7 @@ static void stripe_map_sector(struct stripe_c *sc, sector_t sector, if (sc->stripes_shift < 0) *stripe = sector_div(chunk, sc->stripes); else { - *stripe = chunk & sc->stripes_mask; + *stripe = chunk & (sc->stripes - 1); chunk >>= sc->stripes_shift; } -- cgit v1.2.3 From 542f90381422676544382d4071ba44a2de90a0c1 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 27 Jul 2012 15:08:00 +0100 Subject: dm: support non power of two target max_io_len Remove the restriction that limits a target's specified maximum incoming I/O size to be a power of 2. Rename this setting from 'split_io' to the less-ambiguous 'max_io_len'. Change it from sector_t to uint32_t, which is plenty big enough, and introduce a wrapper function dm_set_target_max_io_len() to set it. Use sector_div() to process it now that it is not necessarily a power of 2. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 11 +++++------ drivers/md/dm-raid1.c | 6 +++++- drivers/md/dm-snap.c | 27 +++++++++++++++------------ drivers/md/dm-stripe.c | 5 ++++- drivers/md/dm-thin.c | 5 ++++- drivers/md/dm.c | 35 +++++++++++++++++++++++++++-------- include/linux/device-mapper.h | 9 +++++++-- include/linux/dm-ioctl.h | 4 ++-- 8 files changed, 69 insertions(+), 33 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 017c34d78d61..858a8b70811c 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -353,6 +353,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, { unsigned i, rebuild_cnt = 0; unsigned long value, region_size = 0; + sector_t max_io_len; char *key; /* @@ -522,14 +523,12 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; if (rs->md.chunk_sectors) - rs->ti->split_io = rs->md.chunk_sectors; + max_io_len = rs->md.chunk_sectors; else - rs->ti->split_io = region_size; + max_io_len = region_size; - if (rs->md.chunk_sectors) - rs->ti->split_io = rs->md.chunk_sectors; - else - rs->ti->split_io = region_size; + if (dm_set_target_max_io_len(rs->ti, max_io_len)) + return -EINVAL; /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index b58b7a33914a..819ccba65912 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1081,7 +1081,11 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = ms; - ti->split_io = dm_rh_get_region_size(ms->rh); + + r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh)); + if (r) + goto err_free_context; + ti->num_flush_requests = 1; ti->num_discard_requests = 1; ti->discard_zeroes_data_unsupported = 1; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a228c20e40b3..6c0f3e33923a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -691,7 +691,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) * Return a minimum chunk size of all snapshots that have the specified origin. * Return zero if the origin has no snapshots. */ -static sector_t __minimum_chunk_size(struct origin *o) +static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; unsigned chunk_size = 0; @@ -701,7 +701,7 @@ static sector_t __minimum_chunk_size(struct origin *o) chunk_size = min_not_zero(chunk_size, snap->store->chunk_size); - return chunk_size; + return (uint32_t) chunk_size; } /* @@ -1172,7 +1172,10 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->error = "Chunk size not set"; goto bad_read_metadata; } - ti->split_io = s->store->chunk_size; + + r = dm_set_target_max_io_len(ti, s->store->chunk_size); + if (r) + goto bad_read_metadata; return 0; @@ -1239,7 +1242,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src, snap_dest->store->snap = snap_dest; snap_src->store->snap = snap_src; - snap_dest->ti->split_io = snap_dest->store->chunk_size; + snap_dest->ti->max_io_len = snap_dest->store->chunk_size; snap_dest->valid = snap_src->valid; /* @@ -1817,9 +1820,9 @@ static void snapshot_resume(struct dm_target *ti) up_write(&s->lock); } -static sector_t get_origin_minimum_chunksize(struct block_device *bdev) +static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) { - sector_t min_chunksize; + uint32_t min_chunksize; down_read(&_origins_lock); min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); @@ -1838,9 +1841,9 @@ static void snapshot_merge_resume(struct dm_target *ti) snapshot_resume(ti); /* - * snapshot-merge acts as an origin, so set ti->split_io + * snapshot-merge acts as an origin, so set ti->max_io_len */ - ti->split_io = get_origin_minimum_chunksize(s->origin->bdev); + ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev); start_merge(s); } @@ -2073,12 +2076,12 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, struct origin *o; /* - * The origin's __minimum_chunk_size() got stored in split_io + * The origin's __minimum_chunk_size() got stored in max_io_len * by snapshot_merge_resume(). */ down_read(&_origins_lock); o = __lookup_origin(merging_snap->origin->bdev); - for (n = 0; n < size; n += merging_snap->ti->split_io) + for (n = 0; n < size; n += merging_snap->ti->max_io_len) if (__origin_write(&o->snapshots, sector + n, NULL) == DM_MAPIO_SUBMITTED) must_wait = 1; @@ -2138,14 +2141,14 @@ static int origin_map(struct dm_target *ti, struct bio *bio, } /* - * Set the target "split_io" field to the minimum of all the snapshots' + * Set the target "max_io_len" field to the minimum of all the snapshots' * chunk sizes. */ static void origin_resume(struct dm_target *ti) { struct dm_dev *dev = ti->private; - ti->split_io = get_origin_minimum_chunksize(dev->bdev); + ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); } static int origin_status(struct dm_target *ti, status_type_t type, char *result, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 6931bd18b615..992c9d4c3bd9 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -165,7 +165,10 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) else sc->stripes_shift = __ffs(stripes); - ti->split_io = chunk_size; + r = dm_set_target_max_io_len(ti, chunk_size); + if (r) + return r; + ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e89f8e7d8a33..350bcf40485e 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2628,7 +2628,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad_thin_open; } - ti->split_io = tc->pool->sectors_per_block; + r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); + if (r) + goto bad_thin_open; + ti->num_flush_requests = 1; /* In case the pool supports discards, pass them on. */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e24143cc2040..415c2803c0c9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -968,22 +968,41 @@ static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti static sector_t max_io_len(sector_t sector, struct dm_target *ti) { sector_t len = max_io_len_target_boundary(sector, ti); + sector_t offset, max_len; /* - * Does the target need to split even further ? + * Does the target need to split even further? */ - if (ti->split_io) { - sector_t boundary; - sector_t offset = dm_target_offset(ti, sector); - boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) - - offset; - if (len > boundary) - len = boundary; + if (ti->max_io_len) { + offset = dm_target_offset(ti, sector); + if (unlikely(ti->max_io_len & (ti->max_io_len - 1))) + max_len = sector_div(offset, ti->max_io_len); + else + max_len = offset & (ti->max_io_len - 1); + max_len = ti->max_io_len - max_len; + + if (len > max_len) + len = max_len; } return len; } +int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) +{ + if (len > UINT_MAX) { + DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)", + (unsigned long long)len, UINT_MAX); + ti->error = "Maximum size of target IO is too large"; + return -EINVAL; + } + + ti->max_io_len = (uint32_t) len; + + return 0; +} +EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); + static void __map_bio(struct dm_target *ti, struct bio *clone, struct dm_target_io *tio) { diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index d70cbb2ada25..b19c1e189a68 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -186,8 +186,8 @@ struct dm_target { sector_t begin; sector_t len; - /* Always a power of 2 */ - sector_t split_io; + /* If non-zero, maximum size of I/O submitted to a target. */ + uint32_t max_io_len; /* * A number of zero-length barrier requests that will be submitted @@ -357,6 +357,11 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback */ int dm_table_complete(struct dm_table *t); +/* + * Target may require that it is never sent I/O larger than len. + */ +int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len); + /* * Table reference counting. */ diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index 75fd5573516e..3ece4eee84cb 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -268,8 +268,8 @@ enum { #define DM_VERSION_MAJOR 4 #define DM_VERSION_MINOR 22 -#define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2011-10-19)" +#define DM_VERSION_PATCHLEVEL 1 +#define DM_VERSION_EXTRA "-ioctl (2012-06-01)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From eb850de608cc22e0199b3797cd5c0076bae6cda0 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 27 Jul 2012 15:08:01 +0100 Subject: dm stripe: support for non power of 2 chunksize Support non-power-of-2 chunk sizes with dm striping for proper alignment of stripe IO on storage that has non-power-of-2 optimal IO sizes (e.g. RAID6 10+2). Signed-off-by: Mike Snitzer Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/striped.txt | 4 +-- drivers/md/dm-stripe.c | 44 ++++++++++++++------------------- 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/Documentation/device-mapper/striped.txt b/Documentation/device-mapper/striped.txt index 953fe1d68068..45f3b91ea4c3 100644 --- a/Documentation/device-mapper/striped.txt +++ b/Documentation/device-mapper/striped.txt @@ -9,8 +9,8 @@ devices in parallel. Parameters: [ ]+ : Number of underlying devices. - : Size of each chunk of data. Must be a power-of-2 and at - least as large as the system's PAGE_SIZE. + : Size of each chunk of data. Must be at least as + large as the system's PAGE_SIZE. : Full pathname to the underlying block-device, or a "major:minor" device-number. : Starting sector within the device. diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 992c9d4c3bd9..9690daa11c83 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -30,9 +30,7 @@ struct stripe_c { /* The size of this target / num. stripes */ sector_t stripe_width; - /* stripe chunk size */ - uint32_t chunk_shift; - sector_t chunk_mask; + uint32_t chunk_size; /* Needed for handling events */ struct dm_target *ti; @@ -90,7 +88,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, /* * Construct a striped mapping. - * [ ]+ + * [ ]+ */ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) { @@ -111,21 +109,14 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - if (kstrtouint(argv[1], 10, &chunk_size)) { - ti->error = "Invalid chunk_size"; - return -EINVAL; - } - - /* - * chunk_size is a power of two - */ - if (!is_power_of_2(chunk_size) || + if (kstrtouint(argv[1], 10, &chunk_size) || (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { - ti->error = "Invalid chunk size"; + ti->error = "Invalid chunk_size"; return -EINVAL; } - if (ti->len & (chunk_size - 1)) { + width = ti->len; + if (sector_div(width, chunk_size)) { ti->error = "Target length not divisible by " "chunk size"; return -EINVAL; @@ -172,8 +163,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; - sc->chunk_shift = ffs(chunk_size) - 1; - sc->chunk_mask = ((sector_t) chunk_size) - 1; + sc->chunk_size = chunk_size; /* * Get the stripe destinations. @@ -212,8 +202,8 @@ static void stripe_dtr(struct dm_target *ti) static void stripe_map_sector(struct stripe_c *sc, sector_t sector, uint32_t *stripe, sector_t *result) { - sector_t offset = dm_target_offset(sc->ti, sector); - sector_t chunk = offset >> sc->chunk_shift; + sector_t chunk = dm_target_offset(sc->ti, sector); + sector_t chunk_offset = sector_div(chunk, sc->chunk_size); if (sc->stripes_shift < 0) *stripe = sector_div(chunk, sc->stripes); @@ -222,7 +212,7 @@ static void stripe_map_sector(struct stripe_c *sc, sector_t sector, chunk >>= sc->stripes_shift; } - *result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask); + *result = (chunk * sc->chunk_size) + chunk_offset; } static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, @@ -233,9 +223,13 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, stripe_map_sector(sc, sector, &stripe, result); if (stripe == target_stripe) return; - *result &= ~sc->chunk_mask; /* round down */ + + /* round down */ + sector = *result; + *result -= sector_div(sector, sc->chunk_size); + if (target_stripe < stripe) - *result += sc->chunk_mask + 1; /* next chunk */ + *result += sc->chunk_size; /* next chunk */ } static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, @@ -320,7 +314,7 @@ static int stripe_status(struct dm_target *ti, case STATUSTYPE_TABLE: DMEMIT("%d %llu", sc->stripes, - (unsigned long long)sc->chunk_mask + 1); + (unsigned long long)sc->chunk_size); for (i = 0; i < sc->stripes; i++) DMEMIT(" %s %llu", sc->stripe[i].dev->name, (unsigned long long)sc->stripe[i].physical_start); @@ -387,7 +381,7 @@ static void stripe_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct stripe_c *sc = ti->private; - unsigned chunk_size = (sc->chunk_mask + 1) << 9; + unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT; blk_limits_io_min(limits, chunk_size); blk_limits_io_opt(limits, chunk_size * sc->stripes); @@ -415,7 +409,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, static struct target_type stripe_target = { .name = "striped", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, -- cgit v1.2.3 From 8f069b41bce79b0c4b6076acd0f3d15df0a232ed Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:01 +0100 Subject: dm stripe: remove minimum stripe size There is no technical limitation in device mapper that would prevent the dm-stripe target from using a stripe size smaller than page size. This patch removes the limit and makes stripe volumes portable across architectures with different page size. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-stripe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 9690daa11c83..6a20e9ea0bfb 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -109,8 +109,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - if (kstrtouint(argv[1], 10, &chunk_size) || - (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { + if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) { ti->error = "Invalid chunk_size"; return -EINVAL; } -- cgit v1.2.3 From 33d07c0dfab902a7c5420587984497dc05ab5c9c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:02 +0100 Subject: dm stripe: optimize chunk_size calculations dm-stripe is usually used with a chunk size that is a power of two. Use faster shifts and bit masks in such cases. stripe_width is already optimized in a similar way. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-stripe.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 6a20e9ea0bfb..9e8f4cc63d6c 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -31,6 +31,7 @@ struct stripe_c { sector_t stripe_width; uint32_t chunk_size; + int chunk_size_shift; /* Needed for handling events */ struct dm_target *ti; @@ -163,6 +164,10 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_discard_requests = stripes; sc->chunk_size = chunk_size; + if (chunk_size & (chunk_size - 1)) + sc->chunk_size_shift = -1; + else + sc->chunk_size_shift = __ffs(chunk_size); /* * Get the stripe destinations. @@ -202,7 +207,14 @@ static void stripe_map_sector(struct stripe_c *sc, sector_t sector, uint32_t *stripe, sector_t *result) { sector_t chunk = dm_target_offset(sc->ti, sector); - sector_t chunk_offset = sector_div(chunk, sc->chunk_size); + sector_t chunk_offset; + + if (sc->chunk_size_shift < 0) + chunk_offset = sector_div(chunk, sc->chunk_size); + else { + chunk_offset = chunk & (sc->chunk_size - 1); + chunk >>= sc->chunk_size_shift; + } if (sc->stripes_shift < 0) *stripe = sector_div(chunk, sc->stripes); @@ -211,7 +223,12 @@ static void stripe_map_sector(struct stripe_c *sc, sector_t sector, chunk >>= sc->stripes_shift; } - *result = (chunk * sc->chunk_size) + chunk_offset; + if (sc->chunk_size_shift < 0) + chunk *= sc->chunk_size; + else + chunk <<= sc->chunk_size_shift; + + *result = chunk + chunk_offset; } static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, @@ -225,7 +242,10 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, /* round down */ sector = *result; - *result -= sector_div(sector, sc->chunk_size); + if (sc->chunk_size_shift < 0) + *result -= sector_div(sector, sc->chunk_size); + else + *result = sector & ~(sector_t)(sc->chunk_size - 1); if (target_stripe < stripe) *result += sc->chunk_size; /* next chunk */ -- cgit v1.2.3 From 55f2b8bdb0c7387eb2dc645b9ecbe5d0faa6b54e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 27 Jul 2012 15:08:02 +0100 Subject: dm thin: support for non power of 2 pool blocksize Non power of 2 blocksize support is needed to properly align thinp IO on storage that has non power of 2 optimal IO sizes (e.g. RAID6 10+2). Use sector_div to support non power of 2 blocksize for the pool's data device. This provides comparable performance to the power of 2 math that was performed until now (as tested on modern x86_64 hardware). The kernel currently assumes that limits->discard_granularity is a power of two so the thin target only enables discard support if the block size is a power of two. Eliminate pool structure's 'block_shift', 'offset_mask' and remaining 4 byte holes. Signed-off-by: Mike Snitzer Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 59 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 350bcf40485e..f21d318d98f0 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -510,10 +510,8 @@ struct pool { struct block_device *md_dev; struct dm_pool_metadata *pmd; - uint32_t sectors_per_block; - unsigned block_shift; - dm_block_t offset_mask; dm_block_t low_water_blocks; + uint32_t sectors_per_block; struct pool_features pf; unsigned low_water_triggered:1; /* A dm event has been sent */ @@ -526,8 +524,8 @@ struct pool { struct work_struct worker; struct delayed_work waker; - unsigned ref_count; unsigned long last_commit_jiffies; + unsigned ref_count; spinlock_t lock; struct bio_list deferred_bios; @@ -679,16 +677,21 @@ static void requeue_io(struct thin_c *tc) static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) { - return bio->bi_sector >> tc->pool->block_shift; + sector_t block_nr = bio->bi_sector; + + (void) sector_div(block_nr, tc->pool->sectors_per_block); + + return block_nr; } static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) { struct pool *pool = tc->pool; + sector_t bi_sector = bio->bi_sector; bio->bi_bdev = tc->pool_dev->bdev; - bio->bi_sector = (block << pool->block_shift) + - (bio->bi_sector & pool->offset_mask); + bio->bi_sector = (block * pool->sectors_per_block) + + sector_div(bi_sector, pool->sectors_per_block); } static void remap_to_origin(struct thin_c *tc, struct bio *bio) @@ -933,9 +936,10 @@ static void process_prepared(struct pool *pool, struct list_head *head, */ static int io_overlaps_block(struct pool *pool, struct bio *bio) { - return !(bio->bi_sector & pool->offset_mask) && - (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); + sector_t bi_sector = bio->bi_sector; + return !sector_div(bi_sector, pool->sectors_per_block) && + (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); } static int io_overwrites_block(struct pool *pool, struct bio *bio) @@ -1239,8 +1243,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio) * part of the discard that is in a subsequent * block. */ - sector_t offset = bio->bi_sector - (block << pool->block_shift); - unsigned remaining = (pool->sectors_per_block - offset) << 9; + sector_t offset = bio->bi_sector - (block * pool->sectors_per_block); + unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT; bio->bi_size = min(bio->bi_size, remaining); cell_release_singleton(cell, bio); @@ -1722,8 +1726,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, pool->pmd = pmd; pool->sectors_per_block = block_size; - pool->block_shift = ffs(block_size) - 1; - pool->offset_mask = block_size - 1; pool->low_water_blocks = 0; pool_features_init(&pool->pf); pool->prison = prison_create(PRISON_CELLS); @@ -1971,7 +1973,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) if (kstrtoul(argv[2], 10, &block_size) || !block_size || block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || - !is_power_of_2(block_size)) { + block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { ti->error = "Invalid block size"; r = -EINVAL; goto out; @@ -2018,6 +2020,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) goto out_flags_changed; } + /* + * The block layer requires discard_granularity to be a power of 2. + */ + if (pf.discard_enabled && !is_power_of_2(block_size)) { + ti->error = "Discard support must be disabled when the block size is not a power of 2"; + r = -EINVAL; + goto out_flags_changed; + } + pt->pool = pool; pt->ti = ti; pt->metadata_dev = metadata_dev; @@ -2097,7 +2108,8 @@ static int pool_preresume(struct dm_target *ti) int r; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - dm_block_t data_size, sb_data_size; + sector_t data_size = ti->len; + dm_block_t sb_data_size; /* * Take control of the pool object. @@ -2106,7 +2118,8 @@ static int pool_preresume(struct dm_target *ti) if (r) return r; - data_size = ti->len >> pool->block_shift; + (void) sector_div(data_size, pool->sectors_per_block); + r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); if (r) { DMERR("failed to retrieve data device size"); @@ -2115,7 +2128,7 @@ static int pool_preresume(struct dm_target *ti) if (data_size < sb_data_size) { DMERR("pool target too small, is %llu blocks (expected %llu)", - data_size, sb_data_size); + (unsigned long long)data_size, sb_data_size); return -EINVAL; } else if (data_size > sb_data_size) { @@ -2764,19 +2777,21 @@ static int thin_status(struct dm_target *ti, status_type_t type, static int thin_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { - dm_block_t blocks; + sector_t blocks; struct thin_c *tc = ti->private; + struct pool *pool = tc->pool; /* * We can't call dm_pool_get_data_dev_size() since that blocks. So * we follow a more convoluted path through to the pool's target. */ - if (!tc->pool->ti) + if (!pool->ti) return 0; /* nothing is bound */ - blocks = tc->pool->ti->len >> tc->pool->block_shift; + blocks = pool->ti->len; + (void) sector_div(blocks, pool->sectors_per_block); if (blocks) - return fn(ti, tc->pool_dev, 0, tc->pool->sectors_per_block * blocks, data); + return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data); return 0; } @@ -2793,7 +2808,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, -- cgit v1.2.3 From 7acf0277cea0f2da89ffffcc9892bea23f618e63 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:03 +0100 Subject: dm: introduce split_discard_requests This patch introduces a new variable split_discard_requests. It can be set by targets so that discard requests are split on max_io_len boundaries. When split_discard_requests is not set, discard requests are only split on boundaries between targets, as was the case before this patch. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 5 ++++- include/linux/device-mapper.h | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 415c2803c0c9..4e09b6ff5b49 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1215,7 +1215,10 @@ static int __clone_and_map_discard(struct clone_info *ci) if (!ti->num_discard_requests) return -EOPNOTSUPP; - len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); + if (!ti->split_discard_requests) + len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); + else + len = min(ci->sector_count, max_io_len(ci->sector, ti)); __issue_target_requests(ci, ti, ti->num_discard_requests, len); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b19c1e189a68..8bdbbfce759a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -217,6 +217,12 @@ struct dm_target { */ unsigned discards_supported:1; + /* + * Set if the target required discard request to be split + * on max_io_len boundary. + */ + unsigned split_discard_requests:1; + /* * Set if this target does not return zeroes on discarded blocks. */ -- cgit v1.2.3 From 4929630901100fdbfa19186ecf5ea2706f57719b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:03 +0100 Subject: dm thin: split discards on block boundary This patch sets the variable "ti->split_discard_requests" for the dm thin target so that device mapper core splits discard requests on a block boundary. Consequently, a discard request that spans multiple blocks is never sent to dm-thin. The patch also removes some code in process_discard that deals with discards that span multiple blocks. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index f21d318d98f0..828649256902 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1238,15 +1238,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio) } } else { /* - * This path is hit if people are ignoring - * limits->discard_granularity. It ignores any - * part of the discard that is in a subsequent - * block. + * The DM core makes sure that the discard doesn't span + * a block boundary. So we submit the discard of a + * partial block appropriately. */ - sector_t offset = bio->bi_sector - (block * pool->sectors_per_block); - unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT; - bio->bi_size = min(bio->bi_size, remaining); - cell_release_singleton(cell, bio); cell_release_singleton(cell2, bio); if ((!lookup_result.shared) && pool->pf.discard_passdown) @@ -2509,7 +2504,8 @@ static void set_discard_limits(struct pool *pool, struct queue_limits *limits) /* * This is just a hint, and not enforced. We have to cope with - * bios that overlap 2 blocks. + * bios that cover a block partially. A discard that spans a block + * boundary is not sent to this target. */ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; limits->discard_zeroes_data = pool->pf.zero_new_blocks; @@ -2652,6 +2648,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->discards_supported = 1; ti->num_discard_requests = 1; ti->discard_zeroes_data_unsupported = 1; + /* Discard requests must be split on a block boundary */ + ti->split_discard_requests = 1; } dm_put(pool_md); -- cgit v1.2.3 From f9a8e0cd261fc05820539b0ea613686a32795406 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:03 +0100 Subject: dm thin: optimize power of two block size dm-thin will be most likely used with a block size that is a power of two. So it should be optimized for this case. This patch changes division and modulo operations to shifts and bit masks if block size is a power of two. A test that bi_sector is divisible by a block size is removed from io_overlaps_block. Device mapper never sends bios that span a block boundary. Consequently, if we tested that bi_size is equivalent to block size, bi_sector must already be on a block boundary. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 828649256902..93e3e542cff9 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -512,6 +512,7 @@ struct pool { dm_block_t low_water_blocks; uint32_t sectors_per_block; + int sectors_per_block_shift; struct pool_features pf; unsigned low_water_triggered:1; /* A dm event has been sent */ @@ -679,7 +680,10 @@ static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) { sector_t block_nr = bio->bi_sector; - (void) sector_div(block_nr, tc->pool->sectors_per_block); + if (tc->pool->sectors_per_block_shift < 0) + (void) sector_div(block_nr, tc->pool->sectors_per_block); + else + block_nr >>= tc->pool->sectors_per_block_shift; return block_nr; } @@ -690,8 +694,12 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) sector_t bi_sector = bio->bi_sector; bio->bi_bdev = tc->pool_dev->bdev; - bio->bi_sector = (block * pool->sectors_per_block) + - sector_div(bi_sector, pool->sectors_per_block); + if (tc->pool->sectors_per_block_shift < 0) + bio->bi_sector = (block * pool->sectors_per_block) + + sector_div(bi_sector, pool->sectors_per_block); + else + bio->bi_sector = (block << pool->sectors_per_block_shift) | + (bi_sector & (pool->sectors_per_block - 1)); } static void remap_to_origin(struct thin_c *tc, struct bio *bio) @@ -936,10 +944,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, */ static int io_overlaps_block(struct pool *pool, struct bio *bio) { - sector_t bi_sector = bio->bi_sector; - - return !sector_div(bi_sector, pool->sectors_per_block) && - (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); + return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT); } static int io_overwrites_block(struct pool *pool, struct bio *bio) @@ -1721,6 +1726,10 @@ static struct pool *pool_create(struct mapped_device *pool_md, pool->pmd = pmd; pool->sectors_per_block = block_size; + if (block_size & (block_size - 1)) + pool->sectors_per_block_shift = -1; + else + pool->sectors_per_block_shift = __ffs(block_size); pool->low_water_blocks = 0; pool_features_init(&pool->pf); pool->prison = prison_create(PRISON_CELLS); -- cgit v1.2.3 From a58a935d5a1b2ad267017a68c3a1bca26226cc76 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 27 Jul 2012 15:08:04 +0100 Subject: dm mpath: add retain_attached_hw_handler feature A SCSI device handler might get attached to a device during the initial device scan. We do not necessarily want to override this when loading a multipath table, so this patch adds a new multipath feature argument "retain_attached_hw_handler". During SCSI device scan all loaded SCSI device handlers will be consulted for a match (via scsi_dh's provided .match). If a match is found that device handler will be attached. We need a way to have userspace multipathd's provided 'hw_handler' not override the already attached hardware handler. When specifying the new feature 'retain_attached_hw_handler' multipath will use the currently attached hardware handler instead of trying to attach the one specified during table load. If no hardware handler is attached the specified hardware handler will still be used. Leverages scsi_dh_attach's ability to increment the scsi_dh's reference count if the same scsi_dh name is provided when attaching - currently attached scsi_dh name is determined with scsi_dh_attached_handler_name. Depends upon commit 7e8a74b177f17d100916b6ad415450f7c9508691 ("[SCSI] scsi_dh: add scsi_dh_attached_handler_name"). Signed-off-by: Mike Snitzer Tested-by: Babu Moger Reviewed-by: Chandra Seetharaman Acked-by: Hannes Reinecke Signed-off-by: Alasdair G Kergon --- drivers/md/dm-mpath.c | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 638dae048b4f..8a3b2d53f81b 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -85,6 +85,7 @@ struct multipath { unsigned queue_io:1; /* Must we queue all I/O? */ unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ + unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ unsigned pg_init_retries; /* Number of times to retry pg_init */ unsigned pg_init_count; /* Number of times pg_init called */ @@ -568,6 +569,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps int r; struct pgpath *p; struct multipath *m = ti->private; + struct request_queue *q = NULL; + const char *attached_handler_name; /* we need at least a path arg */ if (as->argc < 1) { @@ -586,13 +589,37 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps goto bad; } - if (m->hw_handler_name) { - struct request_queue *q = bdev_get_queue(p->path.dev->bdev); + if (m->retain_attached_hw_handler || m->hw_handler_name) + q = bdev_get_queue(p->path.dev->bdev); + + if (m->retain_attached_hw_handler) { + attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); + if (attached_handler_name) { + /* + * Reset hw_handler_name to match the attached handler + * and clear any hw_handler_params associated with the + * ignored handler. + * + * NB. This modifies the table line to show the actual + * handler instead of the original table passed in. + */ + kfree(m->hw_handler_name); + m->hw_handler_name = attached_handler_name; + + kfree(m->hw_handler_params); + m->hw_handler_params = NULL; + } + } + if (m->hw_handler_name) { + /* + * Increments scsi_dh reference, even when using an + * already-attached handler. + */ r = scsi_dh_attach(q, m->hw_handler_name); if (r == -EBUSY) { /* - * Already attached to different hw_handler, + * Already attached to different hw_handler: * try to reattach with correct one. */ scsi_dh_detach(q); @@ -760,7 +787,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) const char *arg_name; static struct dm_arg _args[] = { - {0, 5, "invalid number of feature args"}, + {0, 6, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, }; @@ -781,6 +808,11 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) continue; } + if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { + m->retain_attached_hw_handler = 1; + continue; + } + if (!strcasecmp(arg_name, "pg_init_retries") && (argc >= 1)) { r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); @@ -1364,13 +1396,16 @@ static int multipath_status(struct dm_target *ti, status_type_t type, else { DMEMIT("%u ", m->queue_if_no_path + (m->pg_init_retries > 0) * 2 + - (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2); + (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + + m->retain_attached_hw_handler); if (m->queue_if_no_path) DMEMIT("queue_if_no_path "); if (m->pg_init_retries) DMEMIT("pg_init_retries %u ", m->pg_init_retries); if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); + if (m->retain_attached_hw_handler) + DMEMIT("retain_attached_hw_handler "); } if (!m->hw_handler_name || type == STATUSTYPE_INFO) @@ -1656,7 +1691,7 @@ out: *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, -- cgit v1.2.3 From f999e8fe70bd0b8faa27ccdac14b5942999c6e78 Mon Sep 17 00:00:00 2001 From: Jonathan E Brassow Date: Fri, 27 Jul 2012 15:08:04 +0100 Subject: dm raid: restructure parse_raid_params In preparation for RAID10 addition to dm-raid, we change an 'if' conditional to a 'switch' conditional to make it easier to see what is being checked for each RAID type. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 858a8b70811c..1717ed33dd7f 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -430,13 +430,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv, if (!strcasecmp(key, "rebuild")) { rebuild_cnt++; - if (((rs->raid_type->level != 1) && - (rebuild_cnt > rs->raid_type->parity_devs)) || - ((rs->raid_type->level == 1) && - (rebuild_cnt > (rs->md.raid_disks - 1)))) { - rs->ti->error = "Too many rebuild devices specified for given RAID type"; + + switch (rs->raid_type->level) { + case 1: + if (rebuild_cnt >= rs->md.raid_disks) { + rs->ti->error = "Too many rebuild devices specified"; + return -EINVAL; + } + break; + case 4: + case 5: + case 6: + if (rebuild_cnt > rs->raid_type->parity_devs) { + rs->ti->error = "Too many rebuild devices specified for given RAID type"; + return -EINVAL; + } + break; + default: + DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); + rs->ti->error = "Rebuild not supported for this RAID type"; return -EINVAL; } + if (value > rs->md.raid_disks) { rs->ti->error = "Invalid rebuild index given"; return -EINVAL; -- cgit v1.2.3 From c039c332f23e794deb6d6f37b9f07ff3b27fb2cf Mon Sep 17 00:00:00 2001 From: Jonathan E Brassow Date: Fri, 27 Jul 2012 15:08:04 +0100 Subject: dm raid: move sectors_per_dev calculation In preparation for RAID10 inclusion in dm-raid, we move the sectors_per_dev calculation later in the device creation process. This is because we won't know up-front how many stripes vs how many mirrors there are which will change the calculation. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon --- drivers/md/dm-raid.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1717ed33dd7f..f4275a8e860c 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -101,20 +101,12 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra { unsigned i; struct raid_set *rs; - sector_t sectors_per_dev; if (raid_devs <= raid_type->parity_devs) { ti->error = "Insufficient number of devices"; return ERR_PTR(-EINVAL); } - sectors_per_dev = ti->len; - if ((raid_type->level > 1) && - sector_div(sectors_per_dev, (raid_devs - raid_type->parity_devs))) { - ti->error = "Target length not divisible by number of data devices"; - return ERR_PTR(-EINVAL); - } - rs = kzalloc(sizeof(*rs) + raid_devs * sizeof(rs->dev[0]), GFP_KERNEL); if (!rs) { ti->error = "Cannot allocate raid context"; @@ -128,7 +120,6 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra rs->md.raid_disks = raid_devs; rs->md.level = raid_type->level; rs->md.new_level = rs->md.level; - rs->md.dev_sectors = sectors_per_dev; rs->md.layout = raid_type->algorithm; rs->md.new_layout = rs->md.layout; rs->md.delta_disks = 0; @@ -143,6 +134,7 @@ static struct raid_set *context_alloc(struct dm_target *ti, struct raid_type *ra * rs->md.external * rs->md.chunk_sectors * rs->md.new_chunk_sectors + * rs->md.dev_sectors */ return rs; @@ -353,6 +345,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, { unsigned i, rebuild_cnt = 0; unsigned long value, region_size = 0; + sector_t sectors_per_dev = rs->ti->len; sector_t max_io_len; char *key; @@ -545,6 +538,13 @@ static int parse_raid_params(struct raid_set *rs, char **argv, if (dm_set_target_max_io_len(rs->ti, max_io_len)) return -EINVAL; + if ((rs->raid_type->level > 1) && + sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { + rs->ti->error = "Target length not divisible by number of data devices"; + return -EINVAL; + } + rs->md.dev_sectors = sectors_per_dev; + /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; rs->md.external = 1; -- cgit v1.2.3 From 40b6229b69211e24b46868675d4db1dab1a51d96 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:04 +0100 Subject: dm crypt: rename pending field There are two dm crypt structures that have a field called "pending". This patch renames them to "cc_pending" and "io_pending" to reduce confusion and ease searching the code. Also remove unnecessary initialisation of r in crypt_convert_block(). Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index e2b32401ecc7..d7f0e0191c01 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -43,7 +43,7 @@ struct convert_context { unsigned int idx_in; unsigned int idx_out; sector_t sector; - atomic_t pending; + atomic_t cc_pending; }; /* @@ -56,7 +56,7 @@ struct dm_crypt_io { struct convert_context ctx; - atomic_t pending; + atomic_t io_pending; int error; sector_t sector; struct dm_crypt_io *base_io; @@ -695,7 +695,7 @@ static int crypt_convert_block(struct crypt_config *cc, struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out); struct dm_crypt_request *dmreq; u8 *iv; - int r = 0; + int r; dmreq = dmreq_of_req(cc, req); iv = iv_of_dmreq(cc, dmreq); @@ -769,14 +769,14 @@ static int crypt_convert(struct crypt_config *cc, struct crypt_cpu *this_cc = this_crypt_config(cc); int r; - atomic_set(&ctx->pending, 1); + atomic_set(&ctx->cc_pending, 1); while(ctx->idx_in < ctx->bio_in->bi_vcnt && ctx->idx_out < ctx->bio_out->bi_vcnt) { crypt_alloc_req(cc, ctx); - atomic_inc(&ctx->pending); + atomic_inc(&ctx->cc_pending); r = crypt_convert_block(cc, ctx, this_cc->req); @@ -793,14 +793,14 @@ static int crypt_convert(struct crypt_config *cc, /* sync */ case 0: - atomic_dec(&ctx->pending); + atomic_dec(&ctx->cc_pending); ctx->sector++; cond_resched(); continue; /* error */ default: - atomic_dec(&ctx->pending); + atomic_dec(&ctx->cc_pending); return r; } } @@ -896,14 +896,14 @@ static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, io->sector = sector; io->error = 0; io->base_io = NULL; - atomic_set(&io->pending, 0); + atomic_set(&io->io_pending, 0); return io; } static void crypt_inc_pending(struct dm_crypt_io *io) { - atomic_inc(&io->pending); + atomic_inc(&io->io_pending); } /* @@ -918,7 +918,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) struct dm_crypt_io *base_io = io->base_io; int error = io->error; - if (!atomic_dec_and_test(&io->pending)) + if (!atomic_dec_and_test(&io->io_pending)) return; mempool_free(io, cc->io_pool); @@ -1107,7 +1107,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) if (r < 0) io->error = -EIO; - crypt_finished = atomic_dec_and_test(&io->ctx.pending); + crypt_finished = atomic_dec_and_test(&io->ctx.cc_pending); /* Encryption was already finished, submit io now */ if (crypt_finished) { @@ -1181,7 +1181,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) if (r < 0) io->error = -EIO; - if (atomic_dec_and_test(&io->ctx.pending)) + if (atomic_dec_and_test(&io->ctx.cc_pending)) kcryptd_crypt_read_done(io); crypt_dec_pending(io); @@ -1208,7 +1208,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); - if (!atomic_dec_and_test(&ctx->pending)) + if (!atomic_dec_and_test(&ctx->cc_pending)) return; if (bio_data_dir(io->base_bio) == READ) -- cgit v1.2.3 From fd2d231faf3ca25584d2320fdcd5a8b202342e46 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:05 +0100 Subject: dm crypt: move cipher data out of per_cpu struct Move static dm-crypt cipher data out of per-cpu structure. Cipher information is static, so it does not have to be in a per-cpu structure. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 134 ++++++++++++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 69 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index d7f0e0191c01..711e0ac58665 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -109,9 +109,6 @@ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID }; */ struct crypt_cpu { struct ablkcipher_request *req; - /* ESSIV: struct crypto_cipher *essiv_tfm */ - void *iv_private; - struct crypto_ablkcipher *tfms[0]; }; /* @@ -151,6 +148,10 @@ struct crypt_config { * per_cpu_ptr() only. */ struct crypt_cpu __percpu *cpu; + + /* ESSIV: struct crypto_cipher *essiv_tfm */ + void *iv_private; + struct crypto_ablkcipher **tfms; unsigned tfms_count; /* @@ -193,7 +194,7 @@ static struct crypt_cpu *this_crypt_config(struct crypt_config *cc) */ static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc) { - return __this_cpu_ptr(cc->cpu)->tfms[0]; + return cc->tfms[0]; } /* @@ -258,7 +259,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) struct hash_desc desc; struct scatterlist sg; struct crypto_cipher *essiv_tfm; - int err, cpu; + int err; sg_init_one(&sg, cc->key, cc->key_size); desc.tfm = essiv->hash_tfm; @@ -268,14 +269,12 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) if (err) return err; - for_each_possible_cpu(cpu) { - essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private, + essiv_tfm = cc->iv_private; - err = crypto_cipher_setkey(essiv_tfm, essiv->salt, - crypto_hash_digestsize(essiv->hash_tfm)); - if (err) - return err; - } + err = crypto_cipher_setkey(essiv_tfm, essiv->salt, + crypto_hash_digestsize(essiv->hash_tfm)); + if (err) + return err; return 0; } @@ -286,16 +285,14 @@ static int crypt_iv_essiv_wipe(struct crypt_config *cc) struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm); struct crypto_cipher *essiv_tfm; - int cpu, r, err = 0; + int r, err = 0; memset(essiv->salt, 0, salt_size); - for_each_possible_cpu(cpu) { - essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private; - r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); - if (r) - err = r; - } + essiv_tfm = cc->iv_private; + r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size); + if (r) + err = r; return err; } @@ -335,8 +332,6 @@ static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc, static void crypt_iv_essiv_dtr(struct crypt_config *cc) { - int cpu; - struct crypt_cpu *cpu_cc; struct crypto_cipher *essiv_tfm; struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv; @@ -346,15 +341,12 @@ static void crypt_iv_essiv_dtr(struct crypt_config *cc) kzfree(essiv->salt); essiv->salt = NULL; - for_each_possible_cpu(cpu) { - cpu_cc = per_cpu_ptr(cc->cpu, cpu); - essiv_tfm = cpu_cc->iv_private; + essiv_tfm = cc->iv_private; - if (essiv_tfm) - crypto_free_cipher(essiv_tfm); + if (essiv_tfm) + crypto_free_cipher(essiv_tfm); - cpu_cc->iv_private = NULL; - } + cc->iv_private = NULL; } static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, @@ -363,7 +355,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, struct crypto_cipher *essiv_tfm = NULL; struct crypto_hash *hash_tfm = NULL; u8 *salt = NULL; - int err, cpu; + int err; if (!opts) { ti->error = "Digest algorithm missing for ESSIV mode"; @@ -388,15 +380,13 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti, cc->iv_gen_private.essiv.salt = salt; cc->iv_gen_private.essiv.hash_tfm = hash_tfm; - for_each_possible_cpu(cpu) { - essiv_tfm = setup_essiv_cpu(cc, ti, salt, - crypto_hash_digestsize(hash_tfm)); - if (IS_ERR(essiv_tfm)) { - crypt_iv_essiv_dtr(cc); - return PTR_ERR(essiv_tfm); - } - per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm; + essiv_tfm = setup_essiv_cpu(cc, ti, salt, + crypto_hash_digestsize(hash_tfm)); + if (IS_ERR(essiv_tfm)) { + crypt_iv_essiv_dtr(cc); + return PTR_ERR(essiv_tfm); } + cc->iv_private = essiv_tfm; return 0; @@ -410,7 +400,7 @@ bad: static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, struct dm_crypt_request *dmreq) { - struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private; + struct crypto_cipher *essiv_tfm = cc->iv_private; memset(iv, 0, cc->iv_size); *(__le64 *)iv = cpu_to_le64(dmreq->iv_sector); @@ -754,7 +744,7 @@ static void crypt_alloc_req(struct crypt_config *cc, if (!this_cc->req) this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); - ablkcipher_request_set_tfm(this_cc->req, this_cc->tfms[key_index]); + ablkcipher_request_set_tfm(this_cc->req, cc->tfms[key_index]); ablkcipher_request_set_callback(this_cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, kcryptd_async_done, dmreq_of_req(cc, this_cc->req)); @@ -1273,29 +1263,38 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size) } } -static void crypt_free_tfms(struct crypt_config *cc, int cpu) +static void crypt_free_tfms(struct crypt_config *cc) { - struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); unsigned i; + if (!cc->tfms) + return; + for (i = 0; i < cc->tfms_count; i++) - if (cpu_cc->tfms[i] && !IS_ERR(cpu_cc->tfms[i])) { - crypto_free_ablkcipher(cpu_cc->tfms[i]); - cpu_cc->tfms[i] = NULL; + if (cc->tfms[i] && !IS_ERR(cc->tfms[i])) { + crypto_free_ablkcipher(cc->tfms[i]); + cc->tfms[i] = NULL; } + + kfree(cc->tfms); + cc->tfms = NULL; } -static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) +static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) { - struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu); unsigned i; int err; + cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_ablkcipher *), + GFP_KERNEL); + if (!cc->tfms) + return -ENOMEM; + for (i = 0; i < cc->tfms_count; i++) { - cpu_cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); - if (IS_ERR(cpu_cc->tfms[i])) { - err = PTR_ERR(cpu_cc->tfms[i]); - crypt_free_tfms(cc, cpu); + cc->tfms[i] = crypto_alloc_ablkcipher(ciphermode, 0, 0); + if (IS_ERR(cc->tfms[i])) { + err = PTR_ERR(cc->tfms[i]); + crypt_free_tfms(cc); return err; } } @@ -1306,15 +1305,14 @@ static int crypt_alloc_tfms(struct crypt_config *cc, int cpu, char *ciphermode) static int crypt_setkey_allcpus(struct crypt_config *cc) { unsigned subkey_size = cc->key_size >> ilog2(cc->tfms_count); - int cpu, err = 0, i, r; - - for_each_possible_cpu(cpu) { - for (i = 0; i < cc->tfms_count; i++) { - r = crypto_ablkcipher_setkey(per_cpu_ptr(cc->cpu, cpu)->tfms[i], - cc->key + (i * subkey_size), subkey_size); - if (r) - err = r; - } + int err = 0, i, r; + + for (i = 0; i < cc->tfms_count; i++) { + r = crypto_ablkcipher_setkey(cc->tfms[i], + cc->key + (i * subkey_size), + subkey_size); + if (r) + err = r; } return err; @@ -1376,9 +1374,10 @@ static void crypt_dtr(struct dm_target *ti) cpu_cc = per_cpu_ptr(cc->cpu, cpu); if (cpu_cc->req) mempool_free(cpu_cc->req, cc->req_pool); - crypt_free_tfms(cc, cpu); } + crypt_free_tfms(cc); + if (cc->bs) bioset_free(cc->bs); @@ -1411,7 +1410,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, struct crypt_config *cc = ti->private; char *tmp, *cipher, *chainmode, *ivmode, *ivopts, *keycount; char *cipher_api = NULL; - int cpu, ret = -EINVAL; + int ret = -EINVAL; char dummy; /* Convert to crypto api definition? */ @@ -1452,8 +1451,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, if (tmp) DMWARN("Ignoring unexpected additional cipher options"); - cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)) + - cc->tfms_count * sizeof(*(cc->cpu->tfms)), + cc->cpu = __alloc_percpu(sizeof(*(cc->cpu)), __alignof__(struct crypt_cpu)); if (!cc->cpu) { ti->error = "Cannot allocate per cpu state"; @@ -1486,12 +1484,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, } /* Allocate cipher */ - for_each_possible_cpu(cpu) { - ret = crypt_alloc_tfms(cc, cpu, cipher_api); - if (ret < 0) { - ti->error = "Error allocating crypto tfm"; - goto bad; - } + ret = crypt_alloc_tfms(cc, cipher_api); + if (ret < 0) { + ti->error = "Error allocating crypto tfm"; + goto bad; } /* Initialize and set key */ -- cgit v1.2.3 From 49a8a9204bb17296725058bbc7f31092d256be6e Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 27 Jul 2012 15:08:05 +0100 Subject: dm crypt: store crypt_config instead of dm_target struct Store the crypt_config struct pointer directly in struct dm_crypt_io instead of the dm_target struct pointer. Target information is never used - only target->private is referenced, thus we can change it to point directly to struct crypt_config. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 711e0ac58665..8298eaee2c5a 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -50,7 +50,7 @@ struct convert_context { * per bio private data */ struct dm_crypt_io { - struct dm_target *target; + struct crypt_config *cc; struct bio *base_bio; struct work_struct work; @@ -801,7 +801,7 @@ static int crypt_convert(struct crypt_config *cc, static void dm_crypt_bio_destructor(struct bio *bio) { struct dm_crypt_io *io = bio->bi_private; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; bio_free(bio, cc->bs); } @@ -815,7 +815,7 @@ static void dm_crypt_bio_destructor(struct bio *bio) static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size, unsigned *out_of_pages) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; @@ -874,14 +874,13 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) } } -static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti, +static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, struct bio *bio, sector_t sector) { - struct crypt_config *cc = ti->private; struct dm_crypt_io *io; io = mempool_alloc(cc->io_pool, GFP_NOIO); - io->target = ti; + io->cc = cc; io->base_bio = bio; io->sector = sector; io->error = 0; @@ -903,7 +902,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io) */ static void crypt_dec_pending(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *base_bio = io->base_bio; struct dm_crypt_io *base_io = io->base_io; int error = io->error; @@ -942,7 +941,7 @@ static void crypt_dec_pending(struct dm_crypt_io *io) static void crypt_endio(struct bio *clone, int error) { struct dm_crypt_io *io = clone->bi_private; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; unsigned rw = bio_data_dir(clone); if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) @@ -969,7 +968,7 @@ static void crypt_endio(struct bio *clone, int error) static void clone_init(struct dm_crypt_io *io, struct bio *clone) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; clone->bi_private = io; clone->bi_end_io = crypt_endio; @@ -980,7 +979,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *base_bio = io->base_bio; struct bio *clone; @@ -1028,7 +1027,7 @@ static void kcryptd_io(struct work_struct *work) static void kcryptd_queue_io(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; INIT_WORK(&io->work, kcryptd_io); queue_work(cc->io_queue, &io->work); @@ -1037,7 +1036,7 @@ static void kcryptd_queue_io(struct dm_crypt_io *io) static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) { struct bio *clone = io->ctx.bio_out; - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; if (unlikely(io->error < 0)) { crypt_free_buffer_pages(cc, clone); @@ -1059,7 +1058,7 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async) static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; struct bio *clone; struct dm_crypt_io *new_io; int crypt_finished; @@ -1125,7 +1124,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * between fragments, so switch to a new dm_crypt_io structure. */ if (unlikely(!crypt_finished && remaining)) { - new_io = crypt_io_alloc(io->target, io->base_bio, + new_io = crypt_io_alloc(io->cc, io->base_bio, sector); crypt_inc_pending(new_io); crypt_convert_init(cc, &new_io->ctx, NULL, @@ -1159,7 +1158,7 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io) static void kcryptd_crypt_read_convert(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; int r = 0; crypt_inc_pending(io); @@ -1183,7 +1182,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, struct dm_crypt_request *dmreq = async_req->data; struct convert_context *ctx = dmreq->ctx; struct dm_crypt_io *io = container_of(ctx, struct dm_crypt_io, ctx); - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; if (error == -EINPROGRESS) { complete(&ctx->restart); @@ -1219,7 +1218,7 @@ static void kcryptd_crypt(struct work_struct *work) static void kcryptd_queue_crypt(struct dm_crypt_io *io) { - struct crypt_config *cc = io->target->private; + struct crypt_config *cc = io->cc; INIT_WORK(&io->work, kcryptd_crypt); queue_work(cc->crypt_queue, &io->work); @@ -1708,7 +1707,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) { struct dm_crypt_io *io; - struct crypt_config *cc; + struct crypt_config *cc = ti->private; /* * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues. @@ -1716,14 +1715,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, * - for REQ_DISCARD caller must use flush if IO ordering matters */ if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) { - cc = ti->private; bio->bi_bdev = cc->dev->bdev; if (bio_sectors(bio)) bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector); return DM_MAPIO_REMAPPED; } - io = crypt_io_alloc(ti, bio, dm_target_offset(ti, bio->bi_sector)); + io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_sector)); if (bio_data_dir(io->base_bio) == READ) { if (kcryptd_io_read(io, GFP_NOWAIT)) -- cgit v1.2.3 From c66029f4d4226452e9a7f468650aa1d553525b81 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 27 Jul 2012 15:08:05 +0100 Subject: dm crypt: rename struct convert_context sector field Rename sector to cc_sector in dm-crypt's convert_context struct. This is preparation for a future patch that merges dm_io and convert_context which both have a "sector" field. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 8298eaee2c5a..330f1eaedc42 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -42,7 +42,7 @@ struct convert_context { unsigned int offset_out; unsigned int idx_in; unsigned int idx_out; - sector_t sector; + sector_t cc_sector; atomic_t cc_pending; }; @@ -654,7 +654,7 @@ static void crypt_convert_init(struct crypt_config *cc, ctx->offset_out = 0; ctx->idx_in = bio_in ? bio_in->bi_idx : 0; ctx->idx_out = bio_out ? bio_out->bi_idx : 0; - ctx->sector = sector + cc->iv_offset; + ctx->cc_sector = sector + cc->iv_offset; init_completion(&ctx->restart); } @@ -690,7 +690,7 @@ static int crypt_convert_block(struct crypt_config *cc, dmreq = dmreq_of_req(cc, req); iv = iv_of_dmreq(cc, dmreq); - dmreq->iv_sector = ctx->sector; + dmreq->iv_sector = ctx->cc_sector; dmreq->ctx = ctx; sg_init_table(&dmreq->sg_in, 1); sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT, @@ -739,7 +739,7 @@ static void crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { struct crypt_cpu *this_cc = this_crypt_config(cc); - unsigned key_index = ctx->sector & (cc->tfms_count - 1); + unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); if (!this_cc->req) this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO); @@ -778,13 +778,13 @@ static int crypt_convert(struct crypt_config *cc, /* fall through*/ case -EINPROGRESS: this_cc->req = NULL; - ctx->sector++; + ctx->cc_sector++; continue; /* sync */ case 0: atomic_dec(&ctx->cc_pending); - ctx->sector++; + ctx->cc_sector++; cond_resched(); continue; -- cgit v1.2.3 From 905386f82d08f66726912f303f3e6605248c60a3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:05 +0100 Subject: dm thin: fix memory leak in process_prepared_mapping error paths Fix memory leak in process_prepared_mapping by always freeing the dm_thin_new_mapping structs from the mapping_pool mempool on the error paths. Signed-off-by: Joe Thornber Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 93e3e542cff9..423df92f1c19 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -870,7 +870,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) if (m->err) { cell_error(m->cell); - return; + goto out; } /* @@ -882,7 +882,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) if (r) { DMERR("dm_thin_insert_block() failed"); cell_error(m->cell); - return; + goto out; } /* @@ -897,6 +897,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) } else cell_defer(tc, m->cell, m->data_block); +out: list_del(&m->list); mempool_free(m, tc->pool->mapping_pool); } -- cgit v1.2.3 From 600497013687516153cb82637acfe56f7eb9308a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:06 +0100 Subject: dm thin: avoid unnecessarily breaking sharing for flushes There's no need to break sharing, triggering a copy, for a write that has no data (i.e. a flush). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 423df92f1c19..7510cb3d571c 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1315,7 +1315,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, if (bio_detain(pool->prison, &key, bio, &cell)) return; - if (bio_data_dir(bio) == WRITE) + if (bio_data_dir(bio) == WRITE && bio->bi_size) break_sharing(tc, bio, block, &key, lookup_result, cell); else { struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; -- cgit v1.2.3 From 8d44c98aac540cdf3cb5385bc6ef8d56930c7d70 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:06 +0100 Subject: dm persistent data: always unlock superblock in dm_bm_flush_and_unlock Unlock the superblock even if initial dm_bufio_write_dirty_buffers fails. Also, remove redundant flush calls. dm_bm_flush_and_unlock's calls to dm_bufio_write_dirty_buffers already result in dm_bufio_issue_flush being called. This avoids warnings about unflushed dirty buffers from bufio. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-block-manager.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 0317ecdc6e53..8ec46dd511f9 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -587,22 +587,14 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, int r; r = dm_bufio_write_dirty_buffers(to_bufio(bm)); - if (unlikely(r)) - return r; - r = dm_bufio_issue_flush(to_bufio(bm)); - if (unlikely(r)) + if (unlikely(r)) { + dm_bm_unlock(superblock); return r; + } dm_bm_unlock(superblock); - r = dm_bufio_write_dirty_buffers(to_bufio(bm)); - if (unlikely(r)) - return r; - r = dm_bufio_issue_flush(to_bufio(bm)); - if (unlikely(r)) - return r; - - return 0; + return dm_bufio_write_dirty_buffers(to_bufio(bm)); } u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) -- cgit v1.2.3 From f4b90369d3a9ffe0f48f373c566578c142e95bf2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:06 +0100 Subject: dm persistent data: only commit space map if index changed Introduce bitmap_index_changed to track whether or not the index changed then only commit a space map if it did. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-space-map-common.c | 12 +++++++++++- drivers/md/persistent-data/dm-space-map-common.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index ff3beed6ad2d..d77602d63c83 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -224,6 +224,7 @@ static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm) ll->nr_blocks = 0; ll->bitmap_root = 0; ll->ref_count_root = 0; + ll->bitmap_index_changed = false; return 0; } @@ -476,7 +477,15 @@ int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) int sm_ll_commit(struct ll_disk *ll) { - return ll->commit(ll); + int r = 0; + + if (ll->bitmap_index_changed) { + r = ll->commit(ll); + if (!r) + ll->bitmap_index_changed = false; + } + + return r; } /*----------------------------------------------------------------*/ @@ -491,6 +500,7 @@ static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index, static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index, struct disk_index_entry *ie) { + ll->bitmap_index_changed = true; memcpy(ll->mi_le.index + index, ie, sizeof(*ie)); return 0; } diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index 8f220821a9a9..b3078d5eda0c 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -78,6 +78,7 @@ struct ll_disk { open_index_fn open_index; max_index_entries_fn max_entries; commit_fn commit; + bool bitmap_index_changed:1; }; struct disk_sm_root { -- cgit v1.2.3 From 0e9c24ed7443d090e369a2eddfa13f7f0b5afbaf Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:07 +0100 Subject: dm: allow targets to request flushes regardless of underlying device support Allow targets to override the 'supports flush' calculation. Set 'flush_supported' if a target needs to receive flushes regardless of whether or not its underlying devices have support. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-table.c | 3 +++ include/linux/device-mapper.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2e227fbf1622..f90069029aae 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1319,6 +1319,9 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) if (!ti->num_flush_requests) continue; + if (ti->flush_supported) + return 1; + if (ti->type->iterate_devices && ti->type->iterate_devices(ti, device_flush_capable, &flush)) return 1; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8bdbbfce759a..bdd65e97a129 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -211,6 +211,12 @@ struct dm_target { /* Used to provide an error string from the ctr */ char *error; + /* + * Set if this target needs to receive flushes regardless of + * whether or not its underlying devices have support. + */ + bool flush_supported:1; + /* * Set if this target needs to receive discards regardless of * whether or not its underlying devices have support. -- cgit v1.2.3 From 16ad3d103dbec4fe6c5950d5a6c383ba9a5296ef Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:07 +0100 Subject: dm thin: set flush_supported The thin provisioning target commits internal metadata on flush. So it should receive flushes regardless of whether the underlying devices support them. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 7510cb3d571c..ee5a3fc93b0e 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2652,6 +2652,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad_thin_open; ti->num_flush_requests = 1; + ti->flush_supported = true; /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { -- cgit v1.2.3 From 0ac55489d9e3898987b2ae305844cf2af86e6b8d Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 27 Jul 2012 15:08:08 +0100 Subject: dm: use bool bitfields in struct dm_target Use boolean bit fields for flags in struct dm_target. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-thin.c | 8 ++++---- include/linux/device-mapper.h | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 330f1eaedc42..00a25ab987c9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1694,7 +1694,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->num_flush_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; return 0; diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 819ccba65912..596a3a1164a7 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1088,7 +1088,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ee5a3fc93b0e..026215566abd 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2053,7 +2053,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * stacking of discard limits (this keeps the pool and * thin devices' discard limits consistent). */ - ti->discards_supported = 1; + ti->discards_supported = true; } ti->private = pt; @@ -2656,11 +2656,11 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { - ti->discards_supported = 1; + ti->discards_supported = true; ti->num_discard_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; /* Discard requests must be split on a block boundary */ - ti->split_discard_requests = 1; + ti->split_discard_requests = true; } dm_put(pool_md); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index bdd65e97a129..eb753633b576 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -221,18 +221,18 @@ struct dm_target { * Set if this target needs to receive discards regardless of * whether or not its underlying devices have support. */ - unsigned discards_supported:1; + bool discards_supported:1; /* * Set if the target required discard request to be split * on max_io_len boundary. */ - unsigned split_discard_requests:1; + bool split_discard_requests:1; /* * Set if this target does not return zeroes on discarded blocks. */ - unsigned discard_zeroes_data_unsupported:1; + bool discard_zeroes_data_unsupported:1; }; /* Each target can link one of these into the table */ -- cgit v1.2.3 From 41675aea32949786cf002c4ab179b85144d3423b Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:08 +0100 Subject: dm thin metadata: factor __setup_btree_details out of init_pmd Factor __setup_btree_details out of init_pmd in dm-thin-metadata. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 74 +++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 31f9827dfb56..82b82c318f14 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -371,6 +371,44 @@ static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) return dm_bm_unlock(b); } +static void __setup_btree_details(struct dm_pool_metadata *pmd) +{ + pmd->info.tm = pmd->tm; + pmd->info.levels = 2; + pmd->info.value_type.context = pmd->data_sm; + pmd->info.value_type.size = sizeof(__le64); + pmd->info.value_type.inc = data_block_inc; + pmd->info.value_type.dec = data_block_dec; + pmd->info.value_type.equal = data_block_equal; + + memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); + pmd->nb_info.tm = pmd->nb_tm; + + pmd->tl_info.tm = pmd->tm; + pmd->tl_info.levels = 1; + pmd->tl_info.value_type.context = &pmd->info; + pmd->tl_info.value_type.size = sizeof(__le64); + pmd->tl_info.value_type.inc = subtree_inc; + pmd->tl_info.value_type.dec = subtree_dec; + pmd->tl_info.value_type.equal = subtree_equal; + + pmd->bl_info.tm = pmd->tm; + pmd->bl_info.levels = 1; + pmd->bl_info.value_type.context = pmd->data_sm; + pmd->bl_info.value_type.size = sizeof(__le64); + pmd->bl_info.value_type.inc = data_block_inc; + pmd->bl_info.value_type.dec = data_block_dec; + pmd->bl_info.value_type.equal = data_block_equal; + + pmd->details_info.tm = pmd->tm; + pmd->details_info.levels = 1; + pmd->details_info.value_type.context = NULL; + pmd->details_info.value_type.size = sizeof(struct disk_device_details); + pmd->details_info.value_type.inc = NULL; + pmd->details_info.value_type.dec = NULL; + pmd->details_info.value_type.equal = NULL; +} + static int init_pmd(struct dm_pool_metadata *pmd, struct dm_block_manager *bm, dm_block_t nr_blocks, int create) @@ -436,41 +474,7 @@ static int init_pmd(struct dm_pool_metadata *pmd, goto bad_data_sm; } - pmd->info.tm = tm; - pmd->info.levels = 2; - pmd->info.value_type.context = pmd->data_sm; - pmd->info.value_type.size = sizeof(__le64); - pmd->info.value_type.inc = data_block_inc; - pmd->info.value_type.dec = data_block_dec; - pmd->info.value_type.equal = data_block_equal; - - memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); - pmd->nb_info.tm = pmd->nb_tm; - - pmd->tl_info.tm = tm; - pmd->tl_info.levels = 1; - pmd->tl_info.value_type.context = &pmd->info; - pmd->tl_info.value_type.size = sizeof(__le64); - pmd->tl_info.value_type.inc = subtree_inc; - pmd->tl_info.value_type.dec = subtree_dec; - pmd->tl_info.value_type.equal = subtree_equal; - - pmd->bl_info.tm = tm; - pmd->bl_info.levels = 1; - pmd->bl_info.value_type.context = pmd->data_sm; - pmd->bl_info.value_type.size = sizeof(__le64); - pmd->bl_info.value_type.inc = data_block_inc; - pmd->bl_info.value_type.dec = data_block_dec; - pmd->bl_info.value_type.equal = data_block_equal; - - pmd->details_info.tm = tm; - pmd->details_info.levels = 1; - pmd->details_info.value_type.context = NULL; - pmd->details_info.value_type.size = sizeof(struct disk_device_details); - pmd->details_info.value_type.inc = NULL; - pmd->details_info.value_type.dec = NULL; - pmd->details_info.value_type.equal = NULL; - + __setup_btree_details(pmd); pmd->root = 0; init_rwsem(&pmd->root_lock); -- cgit v1.2.3 From 51a0f659c03ccc8ec407c7a9f9701d1b4618d515 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:08 +0100 Subject: dm persistent data: create new dm_block_manager struct This patch introduces a separate struct for the block_manager. It also uses IS_ERR to check the return value of dm_bufio_client_create instead of testing incorrectly for NULL. Prior to this patch a struct dm_block_manager was really an alias for a struct dm_bufio_client. We want to add some functionality to the block manager that will require extra fields, so this one to one mapping is no longer valid. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 6 +-- drivers/md/persistent-data/dm-block-manager.c | 58 ++++++++++++++++++--------- 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 82b82c318f14..afd84cacca4b 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -681,10 +681,11 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE, THIN_METADATA_CACHE_SIZE, THIN_MAX_CONCURRENT_LOCKS); - if (!bm) { + if (IS_ERR(bm)) { + r = PTR_ERR(bm); DMERR("could not create block manager"); kfree(pmd); - return ERR_PTR(-ENOMEM); + return ERR_PTR(r); } r = superblock_all_zeroes(bm, &create); @@ -694,7 +695,6 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(r); } - r = init_pmd(pmd, bm, 0, create); if (r) { dm_block_manager_destroy(bm); diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 8ec46dd511f9..c70ad6e303d3 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -325,11 +325,6 @@ static struct dm_buffer *to_buffer(struct dm_block *b) return (struct dm_buffer *) b; } -static struct dm_bufio_client *to_bufio(struct dm_block_manager *bm) -{ - return (struct dm_bufio_client *) bm; -} - dm_block_t dm_block_location(struct dm_block *b) { return dm_bufio_get_block_number(to_buffer(b)); @@ -367,34 +362,57 @@ static void dm_block_manager_write_callback(struct dm_buffer *buf) /*---------------------------------------------------------------- * Public interface *--------------------------------------------------------------*/ +struct dm_block_manager { + struct dm_bufio_client *bufio; +}; + struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, unsigned block_size, unsigned cache_size, unsigned max_held_per_thread) { - return (struct dm_block_manager *) - dm_bufio_client_create(bdev, block_size, max_held_per_thread, - sizeof(struct buffer_aux), - dm_block_manager_alloc_callback, - dm_block_manager_write_callback); + int r; + struct dm_block_manager *bm; + + bm = kmalloc(sizeof(*bm), GFP_KERNEL); + if (!bm) { + r = -ENOMEM; + goto bad; + } + + bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, + sizeof(struct buffer_aux), + dm_block_manager_alloc_callback, + dm_block_manager_write_callback); + if (IS_ERR(bm->bufio)) { + r = PTR_ERR(bm->bufio); + kfree(bm); + goto bad; + } + + return bm; + +bad: + return ERR_PTR(r); } EXPORT_SYMBOL_GPL(dm_block_manager_create); void dm_block_manager_destroy(struct dm_block_manager *bm) { - return dm_bufio_client_destroy(to_bufio(bm)); + dm_bufio_client_destroy(bm->bufio); + kfree(bm); } EXPORT_SYMBOL_GPL(dm_block_manager_destroy); unsigned dm_bm_block_size(struct dm_block_manager *bm) { - return dm_bufio_get_block_size(to_bufio(bm)); + return dm_bufio_get_block_size(bm->bufio); } EXPORT_SYMBOL_GPL(dm_bm_block_size); dm_block_t dm_bm_nr_blocks(struct dm_block_manager *bm) { - return dm_bufio_get_device_size(to_bufio(bm)); + return dm_bufio_get_device_size(bm->bufio); } static int dm_bm_validate_buffer(struct dm_block_manager *bm, @@ -406,7 +424,7 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, int r; if (!v) return 0; - r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(to_bufio(bm))); + r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); if (unlikely(r)) return r; aux->validator = v; @@ -430,7 +448,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b, void *p; int r; - p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result); + p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -463,7 +481,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm, void *p; int r; - p = dm_bufio_read(to_bufio(bm), b, (struct dm_buffer **) result); + p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -496,7 +514,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm, void *p; int r; - p = dm_bufio_get(to_bufio(bm), b, (struct dm_buffer **) result); + p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); if (unlikely(!p)) @@ -529,7 +547,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, struct buffer_aux *aux; void *p; - p = dm_bufio_new(to_bufio(bm), b, (struct dm_buffer **) result); + p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -586,7 +604,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, { int r; - r = dm_bufio_write_dirty_buffers(to_bufio(bm)); + r = dm_bufio_write_dirty_buffers(bm->bufio); if (unlikely(r)) { dm_bm_unlock(superblock); return r; @@ -594,7 +612,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, dm_bm_unlock(superblock); - return dm_bufio_write_dirty_buffers(to_bufio(bm)); + return dm_bufio_write_dirty_buffers(bm->bufio); } u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) -- cgit v1.2.3 From eb04cf634fc2d5e3bc8fe88fbf434eda4921d875 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:08 +0100 Subject: dm thin metadata: stop tracking need for commit Remove an optimisation that tracks whether or not a thin metadata commit is needed. If dm_pool_commit_metadata() is called and no changes have been made to the metadata then this optimisation avoided writing to disk. Removing because we're going to do something better later. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 40 ++-------------------------------------- 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index afd84cacca4b..c8f36227fd7b 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -178,7 +178,6 @@ struct dm_pool_metadata { struct rw_semaphore root_lock; uint32_t time; - int need_commit; dm_block_t root; dm_block_t details_root; struct list_head thin_devices; @@ -479,7 +478,6 @@ static int init_pmd(struct dm_pool_metadata *pmd, init_rwsem(&pmd->root_lock); pmd->time = 0; - pmd->need_commit = 0; pmd->details_root = 0; pmd->trans_id = 0; pmd->flags = 0; @@ -503,11 +501,6 @@ static int __begin_transaction(struct dm_pool_metadata *pmd) struct thin_disk_superblock *disk_super; struct dm_block *sblock; - /* - * __maybe_commit_transaction() resets these - */ - WARN_ON(pmd->need_commit); - /* * We re-read the superblock every time. Shouldn't need to do this * really. @@ -583,8 +576,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) list_del(&td->list); kfree(td); } - - pmd->need_commit = 1; } return 0; @@ -609,9 +600,6 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) return r; - if (!pmd->need_commit) - return r; - r = dm_sm_commit(pmd->data_sm); if (r < 0) return r; @@ -650,11 +638,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) goto out_locked; - r = dm_tm_commit(pmd->tm, sblock); - if (!r) - pmd->need_commit = 0; - - return r; + return dm_tm_commit(pmd->tm, sblock); out_locked: dm_bm_unlock(sblock); @@ -744,7 +728,6 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, } pmd->flags = 0; - pmd->need_commit = 1; r = dm_pool_commit_metadata(pmd); if (r < 0) { DMERR("%s: dm_pool_commit_metadata() failed, error = %d", @@ -1042,8 +1025,6 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) if (r) return r; - pmd->need_commit = 1; - return 0; } @@ -1071,7 +1052,6 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, } pmd->trans_id = new_id; - pmd->need_commit = 1; up_write(&pmd->root_lock); return 0; @@ -1113,8 +1093,6 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) dm_tm_dec(pmd->tm, held_root); dm_tm_unlock(pmd->tm, copy); - pmd->need_commit = 1; - return -EBUSY; } @@ -1140,16 +1118,12 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) &sb_validator, &sblock); if (r) { dm_tm_dec(pmd->tm, held_root); - pmd->need_commit = 1; return r; } disk_super = dm_block_data(sblock); disk_super->held_root = cpu_to_le64(held_root); dm_bm_unlock(sblock); - - pmd->need_commit = 1; - return 0; } @@ -1179,7 +1153,6 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); held_root = le64_to_cpu(disk_super->held_root); disk_super->held_root = cpu_to_le64(0); - pmd->need_commit = 1; dm_bm_unlock(sblock); @@ -1317,7 +1290,6 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; - pmd->need_commit = 1; value = cpu_to_le64(pack_block_time(data_block, pmd->time)); __dm_bless_for_disk(&value); @@ -1358,7 +1330,6 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) td->mapped_blocks--; td->changed = 1; - pmd->need_commit = 1; return 0; } @@ -1379,10 +1350,7 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) int r; down_write(&pmd->root_lock); - r = dm_sm_new_block(pmd->data_sm, result); - pmd->need_commit = 1; - up_write(&pmd->root_lock); return r; @@ -1519,11 +1487,7 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) return -EINVAL; } - r = dm_sm_extend(pmd->data_sm, new_count - old_count); - if (!r) - pmd->need_commit = 1; - - return r; + return dm_sm_extend(pmd->data_sm, new_count - old_count); } int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) -- cgit v1.2.3 From 384ef0e62e409e52c80adef5b1ff83075377c19e Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:09 +0100 Subject: dm persistent data: tidy transaction manager creation fns Tidy the transaction manager creation functions. They no longer lock the superblock. Superblock locking is pulled out to the caller. Also export dm_bm_write_lock_zero. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 42 ++++++++++++------- drivers/md/persistent-data/dm-block-manager.c | 1 + .../md/persistent-data/dm-transaction-manager.c | 47 ++++++---------------- .../md/persistent-data/dm-transaction-manager.h | 11 ++--- 4 files changed, 46 insertions(+), 55 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index c8f36227fd7b..4aadeae3af67 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -418,8 +418,7 @@ static int init_pmd(struct dm_pool_metadata *pmd, struct dm_block *sblock; if (create) { - r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &tm, &sm, &sblock); + r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION, &tm, &sm); if (r < 0) { DMERR("tm_create_with_sm failed"); return r; @@ -428,38 +427,51 @@ static int init_pmd(struct dm_pool_metadata *pmd, data_sm = dm_sm_disk_create(tm, nr_blocks); if (IS_ERR(data_sm)) { DMERR("sm_disk_create failed"); - dm_tm_unlock(tm, sblock); r = PTR_ERR(data_sm); goto bad; } + + /* + * We cycle the superblock to let the validator do its stuff. + */ + r = dm_bm_write_lock_zero(bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, &sblock); + if (r < 0) { + DMERR("couldn't lock superblock"); + goto bad; + } + + dm_bm_unlock(sblock); + } else { - struct thin_disk_superblock *disk_super = NULL; - size_t space_map_root_offset = - offsetof(struct thin_disk_superblock, metadata_space_map_root); + struct thin_disk_superblock *disk_super; + + r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, &sblock); + if (r < 0) { + DMERR("couldn't read superblock"); + return r; + } + disk_super = dm_block_data(sblock); r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, space_map_root_offset, - SPACE_MAP_ROOT_SIZE, &tm, &sm, &sblock); + disk_super->metadata_space_map_root, + sizeof(disk_super->metadata_space_map_root), + &tm, &sm); if (r < 0) { DMERR("tm_open_with_sm failed"); + dm_bm_unlock(sblock); return r; } - disk_super = dm_block_data(sblock); data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root, sizeof(disk_super->data_space_map_root)); if (IS_ERR(data_sm)) { DMERR("sm_disk_open failed"); + dm_bm_unlock(sblock); r = PTR_ERR(data_sm); goto bad; } - } - - r = dm_tm_unlock(tm, sblock); - if (r < 0) { - DMERR("couldn't unlock superblock"); - goto bad_data_sm; + dm_bm_unlock(sblock); } pmd->bm = bm; diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index c70ad6e303d3..4b5c504f47af 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -565,6 +565,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, return 0; } +EXPORT_SYMBOL_GPL(dm_bm_write_lock_zero); int dm_bm_unlock(struct dm_block *b) { diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 86c3705052a4..b4f05830af07 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -310,12 +310,10 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm) static int dm_tm_create_internal(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, struct dm_transaction_manager **tm, struct dm_space_map **sm, - struct dm_block **sblock, - int create) + int create, + void *sm_root, size_t sm_len) { int r; @@ -330,64 +328,43 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, } if (create) { - r = dm_bm_write_lock_zero(dm_tm_get_bm(*tm), sb_location, - sb_validator, sblock); - if (r < 0) { - DMERR("couldn't lock superblock"); - goto bad1; - } - r = dm_sm_metadata_create(*sm, *tm, dm_bm_nr_blocks(bm), sb_location); if (r) { DMERR("couldn't create metadata space map"); - goto bad2; + goto bad; } } else { - r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location, - sb_validator, sblock); - if (r < 0) { - DMERR("couldn't lock superblock"); - goto bad1; - } - - r = dm_sm_metadata_open(*sm, *tm, - dm_block_data(*sblock) + root_offset, - root_max_len); + r = dm_sm_metadata_open(*sm, *tm, sm_root, sm_len); if (r) { DMERR("couldn't open metadata space map"); - goto bad2; + goto bad; } } return 0; -bad2: - dm_tm_unlock(*tm, *sblock); -bad1: +bad: dm_tm_destroy(*tm); + dm_sm_destroy(*sm); return r; } int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock) + struct dm_space_map **sm) { - return dm_tm_create_internal(bm, sb_location, sb_validator, - 0, 0, tm, sm, sblock, 1); + return dm_tm_create_internal(bm, sb_location, tm, sm, 1, NULL, 0); } EXPORT_SYMBOL_GPL(dm_tm_create_with_sm); int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, + void *sm_root, size_t root_len, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock) + struct dm_space_map **sm) { - return dm_tm_create_internal(bm, sb_location, sb_validator, root_offset, - root_max_len, tm, sm, sblock, 0); + return dm_tm_create_internal(bm, sb_location, tm, sm, 0, sm_root, root_len); } EXPORT_SYMBOL_GPL(dm_tm_open_with_sm); diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index 6da784871db4..b5b139076ca5 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -115,16 +115,17 @@ struct dm_block_manager *dm_tm_get_bm(struct dm_transaction_manager *tm); * * Returns a tm that has an open transaction to write the new disk sm. * Caller should store the new sm root and commit. + * + * The superblock location is passed so the metadata space map knows it + * shouldn't be used. */ int dm_tm_create_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock); + struct dm_space_map **sm); int dm_tm_open_with_sm(struct dm_block_manager *bm, dm_block_t sb_location, - struct dm_block_validator *sb_validator, - size_t root_offset, size_t root_max_len, + void *sm_root, size_t root_len, struct dm_transaction_manager **tm, - struct dm_space_map **sm, struct dm_block **sblock); + struct dm_space_map **sm); #endif /* _LINUX_DM_TRANSACTION_MANAGER_H */ -- cgit v1.2.3 From 3c9ad9bd87b03032999ddbeb44bdf7938f7dbd57 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:09 +0100 Subject: dm persistent data: stop using dm_bm_unlock_move when shadowing blocks in tm Stop using dm_bm_unlock_move when shadowing blocks in the transaction manager as an optimisation and remove the function as it is then no longer used. Some code, such as the space maps, keeps using on-disk data structures from the previous transaction. It can do this because blocks won't be reallocated until the subsequent transaction. Using dm_bm_unlock_move to copy blocks sounds like a win, but it forces a synchronous read should the old block be accessed. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-block-manager.c | 16 ---------------- drivers/md/persistent-data/dm-block-manager.h | 8 -------- drivers/md/persistent-data/dm-transaction-manager.c | 17 ++++++++++++++--- 3 files changed, 14 insertions(+), 27 deletions(-) diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 4b5c504f47af..ad1712e802f6 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -584,22 +584,6 @@ int dm_bm_unlock(struct dm_block *b) } EXPORT_SYMBOL_GPL(dm_bm_unlock); -int dm_bm_unlock_move(struct dm_block *b, dm_block_t n) -{ - struct buffer_aux *aux; - - aux = dm_bufio_get_aux_data(to_buffer(b)); - - if (aux->write_locked) { - dm_bufio_mark_buffer_dirty(to_buffer(b)); - bl_up_write(&aux->lock); - } else - bl_up_read(&aux->lock); - - dm_bufio_release_move(to_buffer(b), n); - return 0; -} - int dm_bm_flush_and_unlock(struct dm_block_manager *bm, struct dm_block *superblock) { diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 924833d2dfa6..327885566631 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -96,14 +96,6 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, dm_block_t b, int dm_bm_unlock(struct dm_block *b); -/* - * An optimisation; we often want to copy a block's contents to a new - * block. eg, as part of the shadowing operation. It's far better for - * bufio to do this move behind the scenes than hold 2 locks and memcpy the - * data. - */ -int dm_bm_unlock_move(struct dm_block *b, dm_block_t n); - /* * It's a common idiom to have a superblock that should be committed last. * diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index b4f05830af07..d247a35da3c6 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -219,13 +219,24 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, if (r < 0) return r; - r = dm_bm_unlock_move(orig_block, new); - if (r < 0) { + /* + * It would be tempting to use dm_bm_unlock_move here, but some + * code, such as the space maps, keeps using the old data structures + * secure in the knowledge they won't be changed until the next + * transaction. Using unlock_move would force a synchronous read + * since the old block would no longer be in the cache. + */ + r = dm_bm_write_lock_zero(tm->bm, new, v, result); + if (r) { dm_bm_unlock(orig_block); return r; } - return dm_bm_write_lock(tm->bm, new, v, result); + memcpy(dm_block_data(*result), dm_block_data(orig_block), + dm_bm_block_size(tm->bm)); + + dm_bm_unlock(orig_block); + return r; } int dm_tm_shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, -- cgit v1.2.3 From 2597119206405b9dbaee491e7c2389b7570b3227 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:09 +0100 Subject: dm thin metadata: wrap superblock locking Introduce wrappers to handle write locking the superblock appropriately. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 4aadeae3af67..dd3a89b1bf6f 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -343,6 +343,20 @@ static int subtree_equal(void *context, void *value1_le, void *value2_le) /*----------------------------------------------------------------*/ +static int superblock_lock_zero(struct dm_pool_metadata *pmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int superblock_lock(struct dm_pool_metadata *pmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) { int r; @@ -430,22 +444,11 @@ static int init_pmd(struct dm_pool_metadata *pmd, r = PTR_ERR(data_sm); goto bad; } - - /* - * We cycle the superblock to let the validator do its stuff. - */ - r = dm_bm_write_lock_zero(bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, &sblock); - if (r < 0) { - DMERR("couldn't lock superblock"); - goto bad; - } - - dm_bm_unlock(sblock); - } else { struct thin_disk_superblock *disk_super; - r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, &sblock); + r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, &sblock); if (r < 0) { DMERR("couldn't read superblock"); return r; @@ -628,8 +631,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) return r; - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) return r; @@ -709,8 +711,7 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, /* * Create. */ - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock_zero(pmd, &sblock); if (r) goto bad; @@ -1126,8 +1127,7 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) /* * Write the held root into the superblock. */ - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) { dm_tm_dec(pmd->tm, held_root); return r; @@ -1157,8 +1157,7 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) struct dm_block *sblock, *copy; dm_block_t held_root; - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = superblock_lock(pmd, &sblock); if (r) return r; -- cgit v1.2.3 From 77f49a402769f7db022935ef317f1c12c1c06319 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:10 +0100 Subject: dm thin metadata: rename init_pmd to __create_persistent_data_objects Rename init_pmd to __create_persistent_data_objects in dm-thin-metadata. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index dd3a89b1bf6f..0ef10f70565e 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -422,9 +422,9 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->details_info.value_type.equal = NULL; } -static int init_pmd(struct dm_pool_metadata *pmd, - struct dm_block_manager *bm, - dm_block_t nr_blocks, int create) +static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, + struct dm_block_manager *bm, + dm_block_t nr_blocks, int create) { int r; struct dm_space_map *sm, *data_sm; @@ -693,7 +693,7 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(r); } - r = init_pmd(pmd, bm, 0, create); + r = __create_persistent_data_objects(pmd, bm, 0, create); if (r) { dm_block_manager_destroy(bm); kfree(pmd); -- cgit v1.2.3 From 332627db00a982b395f5b141d50298866bfec98c Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:10 +0100 Subject: dm thin metadata: move bm creation code into create_persistent_data_objects Move block manager creation and the check for unformatted metadata into __create_persistent_data_objects(). Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 56 ++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 0ef10f70565e..dab2109ec40a 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -357,7 +357,7 @@ static int superblock_lock(struct dm_pool_metadata *pmd, &sb_validator, sblock); } -static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) +static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) { int r; unsigned i; @@ -422,9 +422,9 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->details_info.value_type.equal = NULL; } -static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, - struct dm_block_manager *bm, - dm_block_t nr_blocks, int create) +static int __open_or_format_metadata(struct dm_pool_metadata *pmd, + struct dm_block_manager *bm, + dm_block_t nr_blocks, int create) { int r; struct dm_space_map *sm, *data_sm; @@ -509,6 +509,32 @@ bad: return r; } +static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, + dm_block_t nr_blocks, int *create) +{ + int r; + + pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, + THIN_METADATA_CACHE_SIZE, + THIN_MAX_CONCURRENT_LOCKS); + if (IS_ERR(pmd->bm)) { + DMERR("could not create block manager"); + return PTR_ERR(pmd->bm); + } + + r = __superblock_all_zeroes(pmd->bm, create); + if (r) { + dm_block_manager_destroy(pmd->bm); + return r; + } + + r = __open_or_format_metadata(pmd, pmd->bm, nr_blocks, *create); + if (r) + dm_block_manager_destroy(pmd->bm); + + return r; +} + static int __begin_transaction(struct dm_pool_metadata *pmd) { int r; @@ -666,7 +692,6 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, struct thin_disk_superblock *disk_super; struct dm_pool_metadata *pmd; sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; - struct dm_block_manager *bm; int create; struct dm_block *sblock; @@ -676,30 +701,13 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(-ENOMEM); } - bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE, - THIN_METADATA_CACHE_SIZE, - THIN_MAX_CONCURRENT_LOCKS); - if (IS_ERR(bm)) { - r = PTR_ERR(bm); - DMERR("could not create block manager"); - kfree(pmd); - return ERR_PTR(r); - } - - r = superblock_all_zeroes(bm, &create); - if (r) { - dm_block_manager_destroy(bm); - kfree(pmd); - return ERR_PTR(r); - } + pmd->bdev = bdev; - r = __create_persistent_data_objects(pmd, bm, 0, create); + r = __create_persistent_data_objects(pmd, 0, &create); if (r) { - dm_block_manager_destroy(bm); kfree(pmd); return ERR_PTR(r); } - pmd->bdev = bdev; if (!create) { r = __begin_transaction(pmd); -- cgit v1.2.3 From f9dd9352b92cd66729fcb0f0e32f647e811d9083 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:10 +0100 Subject: dm thin metadata: factor __destroy_persistent_data out of dm_pool_metadata_close Factor __destroy_persistent_data_objects out of dm_pool_metadata_close. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index dab2109ec40a..b646a71f6da9 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -535,6 +535,15 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, return r; } +static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) +{ + dm_sm_destroy(pmd->data_sm); + dm_sm_destroy(pmd->metadata_sm); + dm_tm_destroy(pmd->nb_tm); + dm_tm_destroy(pmd->tm); + dm_block_manager_destroy(pmd->bm); +} + static int __begin_transaction(struct dm_pool_metadata *pmd) { int r; @@ -792,11 +801,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) DMWARN("%s: __commit_transaction() failed, error = %d", __func__, r); - dm_tm_destroy(pmd->tm); - dm_tm_destroy(pmd->nb_tm); - dm_block_manager_destroy(pmd->bm); - dm_sm_destroy(pmd->metadata_sm); - dm_sm_destroy(pmd->data_sm); + __destroy_persistent_data_objects(pmd); kfree(pmd); return 0; -- cgit v1.2.3 From 6a0ebd31b6b504621254f5d453d618d36b0179a1 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:10 +0100 Subject: dm thin metadata: lift some initialisation out of __open_or_format_metadata Lift some initialisation out of __open_or_format_metadata in dm-thin-metadata. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index b646a71f6da9..ce487aada0d8 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -489,14 +489,11 @@ static int __open_or_format_metadata(struct dm_pool_metadata *pmd, } __setup_btree_details(pmd); - pmd->root = 0; - init_rwsem(&pmd->root_lock); - pmd->time = 0; + pmd->root = 0; pmd->details_root = 0; pmd->trans_id = 0; pmd->flags = 0; - INIT_LIST_HEAD(&pmd->thin_devices); return 0; @@ -710,6 +707,9 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(-ENOMEM); } + init_rwsem(&pmd->root_lock); + pmd->time = 0; + INIT_LIST_HEAD(&pmd->thin_devices); pmd->bdev = bdev; r = __create_persistent_data_objects(pmd, 0, &create); -- cgit v1.2.3 From 9cb6653f9a515f6cb0eaabfb27a2204b0aaf4e48 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:10 +0100 Subject: dm thin metadata: factor out __write_initial_superblock Factor out __write_initial_superblock and also pull some other initial creation code out of dm_pool_metadata_open. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 109 ++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index ce487aada0d8..5ead655706d4 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -422,6 +422,41 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->details_info.value_type.equal = NULL; } +static int __write_initial_superblock(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; + + if (bdev_size > THIN_METADATA_MAX_SECTORS) + bdev_size = THIN_METADATA_MAX_SECTORS; + + r = superblock_lock_zero(pmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); + disk_super->version = cpu_to_le32(THIN_VERSION); + disk_super->time = 0; + disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); + disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); + + r = dm_bm_unlock(sblock); + if (r) + return r; + + pmd->flags = 0; + r = dm_pool_commit_metadata(pmd); + if (r < 0) + DMERR("%s: dm_pool_commit_metadata() failed, error = %d", + __func__, r); + + return r; +} + static int __open_or_format_metadata(struct dm_pool_metadata *pmd, struct dm_block_manager *bm, dm_block_t nr_blocks, int create) @@ -495,6 +530,23 @@ static int __open_or_format_metadata(struct dm_pool_metadata *pmd, pmd->trans_id = 0; pmd->flags = 0; + if (!create) + return 0; + + r = dm_btree_empty(&pmd->info, &pmd->root); + if (r < 0) + goto bad_data_sm; + + r = dm_btree_empty(&pmd->details_info, &pmd->details_root); + if (r < 0) { + DMERR("couldn't create devices root"); + goto bad_data_sm; + } + + r = __write_initial_superblock(pmd); + if (r) + goto bad_data_sm; + return 0; bad_data_sm: @@ -695,11 +747,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, sector_t data_block_size) { int r; - struct thin_disk_superblock *disk_super; struct dm_pool_metadata *pmd; - sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; int create; - struct dm_block *sblock; pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); if (!pmd) { @@ -711,6 +760,7 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, pmd->time = 0; INIT_LIST_HEAD(&pmd->thin_devices); pmd->bdev = bdev; + pmd->data_block_size = data_block_size; r = __create_persistent_data_objects(pmd, 0, &create); if (r) { @@ -720,57 +770,14 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, if (!create) { r = __begin_transaction(pmd); - if (r < 0) - goto bad; - return pmd; - } - - /* - * Create. - */ - r = superblock_lock_zero(pmd, &sblock); - if (r) - goto bad; - - if (bdev_size > THIN_METADATA_MAX_SECTORS) - bdev_size = THIN_METADATA_MAX_SECTORS; - - disk_super = dm_block_data(sblock); - disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); - disk_super->version = cpu_to_le32(THIN_VERSION); - disk_super->time = 0; - disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); - disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); - disk_super->data_block_size = cpu_to_le32(data_block_size); - - r = dm_bm_unlock(sblock); - if (r < 0) - goto bad; - - r = dm_btree_empty(&pmd->info, &pmd->root); - if (r < 0) - goto bad; - - r = dm_btree_empty(&pmd->details_info, &pmd->details_root); - if (r < 0) { - DMERR("couldn't create devices root"); - goto bad; - } - - pmd->flags = 0; - r = dm_pool_commit_metadata(pmd); - if (r < 0) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - goto bad; + if (r < 0) { + if (dm_pool_metadata_close(pmd) < 0) + DMWARN("%s: dm_pool_metadata_close() failed.", __func__); + return ERR_PTR(r); + } } return pmd; - -bad: - if (dm_pool_metadata_close(pmd) < 0) - DMWARN("%s: dm_pool_metadata_close() failed.", __func__); - return ERR_PTR(r); } int dm_pool_metadata_close(struct dm_pool_metadata *pmd) -- cgit v1.2.3 From 10d2a9ff7c37635619c4d03ca672ffbd58b9c3fa Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:11 +0100 Subject: dm thin metadata: move dm_commit_pool_metadata into __write_initial_superblock Move dm_commit_pool_metadata inline into __write_initial_superblock in dm-thin-metadata. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 44 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 5ead655706d4..2c44c5c10987 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -422,38 +422,70 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->details_info.value_type.equal = NULL; } +static int __begin_transaction(struct dm_pool_metadata *pmd); static int __write_initial_superblock(struct dm_pool_metadata *pmd) { int r; struct dm_block *sblock; + size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; if (bdev_size > THIN_METADATA_MAX_SECTORS) bdev_size = THIN_METADATA_MAX_SECTORS; + r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); + if (r < 0) + return r; + + r = dm_sm_root_size(pmd->data_sm, &data_len); + if (r < 0) + return r; + + r = dm_sm_commit(pmd->data_sm); + if (r < 0) + return r; + + r = dm_tm_pre_commit(pmd->tm); + if (r < 0) + return r; + r = superblock_lock_zero(pmd, &sblock); if (r) return r; disk_super = dm_block_data(sblock); + disk_super->flags = 0; disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); disk_super->version = cpu_to_le32(THIN_VERSION); disk_super->time = 0; + disk_super->trans_id = 0; + disk_super->held_root = 0; + + r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, + metadata_len); + if (r < 0) + goto bad_locked; + + r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, + data_len); + if (r < 0) + goto bad_locked; + + disk_super->data_mapping_root = cpu_to_le64(pmd->root); + disk_super->device_details_root = cpu_to_le64(pmd->details_root); disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); - r = dm_bm_unlock(sblock); + r = dm_tm_commit(pmd->tm, sblock); if (r) return r; - pmd->flags = 0; - r = dm_pool_commit_metadata(pmd); - if (r < 0) - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); + return __begin_transaction(pmd); +bad_locked: + dm_bm_unlock(sblock); return r; } -- cgit v1.2.3 From 270938bac53b03b83b4b0b65b760f32975df72b1 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:11 +0100 Subject: dm thin metadata: lift __begin_transaction out of __write_initial_superblock Lift the call to __begin_transaction out of __write_initial_superblock in dm-thin-metadata. Called higher up the call chain now. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 2c44c5c10987..934b3fc7bfc9 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -422,7 +422,6 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->details_info.value_type.equal = NULL; } -static int __begin_transaction(struct dm_pool_metadata *pmd); static int __write_initial_superblock(struct dm_pool_metadata *pmd) { int r; @@ -478,11 +477,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); - r = dm_tm_commit(pmd->tm, sblock); - if (r) - return r; - - return __begin_transaction(pmd); + return dm_tm_commit(pmd->tm, sblock); bad_locked: dm_bm_unlock(sblock); @@ -800,13 +795,11 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(r); } - if (!create) { - r = __begin_transaction(pmd); - if (r < 0) { - if (dm_pool_metadata_close(pmd) < 0) - DMWARN("%s: dm_pool_metadata_close() failed.", __func__); - return ERR_PTR(r); - } + r = __begin_transaction(pmd); + if (r < 0) { + if (dm_pool_metadata_close(pmd) < 0) + DMWARN("%s: dm_pool_metadata_close() failed.", __func__); + return ERR_PTR(r); } return pmd; -- cgit v1.2.3 From 583ceee2ed1d171718aee348163f2ed25d461772 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:11 +0100 Subject: dm thin metadata: zero unused superblock uuid Zero the unused uuid when initialising the metadata superblock. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 934b3fc7bfc9..9f7c1e01dda2 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -455,6 +455,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); disk_super->flags = 0; + memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); disk_super->version = cpu_to_le32(THIN_VERSION); disk_super->time = 0; -- cgit v1.2.3 From d6332814e343c76097ff10ed8225069e6392022f Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:12 +0100 Subject: dm thin metadata: use struct dm_pool_metadata members in __open_or_format_metadata Clean up __open_or_format_metadata in dm-thin-metadata by using struct dm_pool_metadata members to replace local variables. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 9f7c1e01dda2..dceec441ef30 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -486,31 +486,29 @@ bad_locked: } static int __open_or_format_metadata(struct dm_pool_metadata *pmd, - struct dm_block_manager *bm, dm_block_t nr_blocks, int create) { int r; - struct dm_space_map *sm, *data_sm; - struct dm_transaction_manager *tm; struct dm_block *sblock; if (create) { - r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION, &tm, &sm); + r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &pmd->tm, &pmd->metadata_sm); if (r < 0) { DMERR("tm_create_with_sm failed"); return r; } - data_sm = dm_sm_disk_create(tm, nr_blocks); - if (IS_ERR(data_sm)) { + pmd->data_sm = dm_sm_disk_create(pmd->tm, nr_blocks); + if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_create failed"); - r = PTR_ERR(data_sm); + r = PTR_ERR(pmd->data_sm); goto bad; } } else { struct thin_disk_superblock *disk_super; - r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, + r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, &sblock); if (r < 0) { DMERR("couldn't read superblock"); @@ -518,33 +516,29 @@ static int __open_or_format_metadata(struct dm_pool_metadata *pmd, } disk_super = dm_block_data(sblock); - r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION, + r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, disk_super->metadata_space_map_root, sizeof(disk_super->metadata_space_map_root), - &tm, &sm); + &pmd->tm, &pmd->metadata_sm); if (r < 0) { DMERR("tm_open_with_sm failed"); dm_bm_unlock(sblock); return r; } - data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root, - sizeof(disk_super->data_space_map_root)); - if (IS_ERR(data_sm)) { + pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, + sizeof(disk_super->data_space_map_root)); + if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_open failed"); dm_bm_unlock(sblock); - r = PTR_ERR(data_sm); + r = PTR_ERR(pmd->data_sm); goto bad; } dm_bm_unlock(sblock); } - pmd->bm = bm; - pmd->metadata_sm = sm; - pmd->data_sm = data_sm; - pmd->tm = tm; - pmd->nb_tm = dm_tm_create_non_blocking_clone(tm); + pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); if (!pmd->nb_tm) { DMERR("could not create clone tm"); r = -ENOMEM; @@ -578,10 +572,10 @@ static int __open_or_format_metadata(struct dm_pool_metadata *pmd, return 0; bad_data_sm: - dm_sm_destroy(data_sm); + dm_sm_destroy(pmd->data_sm); bad: - dm_tm_destroy(tm); - dm_sm_destroy(sm); + dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); return r; } @@ -605,7 +599,7 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, return r; } - r = __open_or_format_metadata(pmd, pmd->bm, nr_blocks, *create); + r = __open_or_format_metadata(pmd, nr_blocks, *create); if (r) dm_block_manager_destroy(pmd->bm); -- cgit v1.2.3 From e4d2205cdf2094eb9f7246a4474f8df63806c1d3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:12 +0100 Subject: dm thin metadata: split __open or format metadata Split __open_or_format_metadata into __format_metadata and __open_metadata in dm-thin-metadata. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 124 +++++++++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 50 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index dceec441ef30..31d21bfc8cb4 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -485,57 +485,22 @@ bad_locked: return r; } -static int __open_or_format_metadata(struct dm_pool_metadata *pmd, - dm_block_t nr_blocks, int create) +static int __format_metadata(struct dm_pool_metadata *pmd, dm_block_t nr_blocks) { int r; - struct dm_block *sblock; - if (create) { - r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &pmd->tm, &pmd->metadata_sm); - if (r < 0) { - DMERR("tm_create_with_sm failed"); - return r; - } - - pmd->data_sm = dm_sm_disk_create(pmd->tm, nr_blocks); - if (IS_ERR(pmd->data_sm)) { - DMERR("sm_disk_create failed"); - r = PTR_ERR(pmd->data_sm); - goto bad; - } - } else { - struct thin_disk_superblock *disk_super; - - r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); - if (r < 0) { - DMERR("couldn't read superblock"); - return r; - } - - disk_super = dm_block_data(sblock); - r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, - disk_super->metadata_space_map_root, - sizeof(disk_super->metadata_space_map_root), - &pmd->tm, &pmd->metadata_sm); - if (r < 0) { - DMERR("tm_open_with_sm failed"); - dm_bm_unlock(sblock); - return r; - } - - pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, - sizeof(disk_super->data_space_map_root)); - if (IS_ERR(pmd->data_sm)) { - DMERR("sm_disk_open failed"); - dm_bm_unlock(sblock); - r = PTR_ERR(pmd->data_sm); - goto bad; - } + r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { + DMERR("tm_create_with_sm failed"); + return r; + } - dm_bm_unlock(sblock); + pmd->data_sm = dm_sm_disk_create(pmd->tm, nr_blocks); + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_create failed"); + r = PTR_ERR(pmd->data_sm); + goto bad; } pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); @@ -552,9 +517,6 @@ static int __open_or_format_metadata(struct dm_pool_metadata *pmd, pmd->trans_id = 0; pmd->flags = 0; - if (!create) - return 0; - r = dm_btree_empty(&pmd->info, &pmd->root); if (r < 0) goto bad_data_sm; @@ -580,6 +542,68 @@ bad: return r; } +static int __open_metadata(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + + r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, &sblock); + if (r < 0) { + DMERR("couldn't read superblock"); + return r; + } + + disk_super = dm_block_data(sblock); + r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + disk_super->metadata_space_map_root, + sizeof(disk_super->metadata_space_map_root), + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { + DMERR("tm_open_with_sm failed"); + dm_bm_unlock(sblock); + return r; + } + + pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, + sizeof(disk_super->data_space_map_root)); + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_open failed"); + dm_bm_unlock(sblock); + r = PTR_ERR(pmd->data_sm); + goto bad; + } + + dm_bm_unlock(sblock); + + pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); + if (!pmd->nb_tm) { + DMERR("could not create clone tm"); + r = -ENOMEM; + goto bad_data_sm; + } + + __setup_btree_details(pmd); + +bad_data_sm: + dm_sm_destroy(pmd->data_sm); +bad: + dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); + + return r; +} + +static int __open_or_format_metadata(struct dm_pool_metadata *pmd, + dm_block_t nr_blocks, int create) +{ + if (create) + return __format_metadata(pmd, nr_blocks); + else + return __open_metadata(pmd); +} + static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, dm_block_t nr_blocks, int *create) { -- cgit v1.2.3 From a97e5e6fd027840fc28c72470c581ba68ee9732a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:12 +0100 Subject: dm thin metadata: remove nr_blocks arg from __create_persistent_data_objects Remove nr_blocks arg from __create_persistent_data_objects in dm-thin-metadata. It was always passed as zero. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 31d21bfc8cb4..83ce2fde8ed7 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -485,7 +485,7 @@ bad_locked: return r; } -static int __format_metadata(struct dm_pool_metadata *pmd, dm_block_t nr_blocks) +static int __format_metadata(struct dm_pool_metadata *pmd) { int r; @@ -496,7 +496,7 @@ static int __format_metadata(struct dm_pool_metadata *pmd, dm_block_t nr_blocks) return r; } - pmd->data_sm = dm_sm_disk_create(pmd->tm, nr_blocks); + pmd->data_sm = dm_sm_disk_create(pmd->tm, 0); if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_create failed"); r = PTR_ERR(pmd->data_sm); @@ -596,16 +596,16 @@ bad: } static int __open_or_format_metadata(struct dm_pool_metadata *pmd, - dm_block_t nr_blocks, int create) + int create) { if (create) - return __format_metadata(pmd, nr_blocks); + return __format_metadata(pmd); else return __open_metadata(pmd); } static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, - dm_block_t nr_blocks, int *create) + int *create) { int r; @@ -623,7 +623,7 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, return r; } - r = __open_or_format_metadata(pmd, nr_blocks, *create); + r = __open_or_format_metadata(pmd, *create); if (r) dm_block_manager_destroy(pmd->bm); @@ -808,7 +808,7 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, pmd->bdev = bdev; pmd->data_block_size = data_block_size; - r = __create_persistent_data_objects(pmd, 0, &create); + r = __create_persistent_data_objects(pmd, &create); if (r) { kfree(pmd); return ERR_PTR(r); -- cgit v1.2.3 From 237074c0a30ac017faaf5c3dfab01aff0c6bb03a Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:13 +0100 Subject: dm thin metadata: move __superblock_all_zeroes to __open_or_format_metadata Move the check for __superblock_all_zeroes from __create_persistent_data_objects() down to __open_or_format_metadata in dm-thin-metadata. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 83ce2fde8ed7..e7b8ad78af47 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -596,9 +596,15 @@ bad: } static int __open_or_format_metadata(struct dm_pool_metadata *pmd, - int create) + int *create) { - if (create) + int r; + + r = __superblock_all_zeroes(pmd->bm, create); + if (r) + return r; + + if (*create) return __format_metadata(pmd); else return __open_metadata(pmd); @@ -617,13 +623,7 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, return PTR_ERR(pmd->bm); } - r = __superblock_all_zeroes(pmd->bm, create); - if (r) { - dm_block_manager_destroy(pmd->bm); - return r; - } - - r = __open_or_format_metadata(pmd, *create); + r = __open_or_format_metadata(pmd, create); if (r) dm_block_manager_destroy(pmd->bm); -- cgit v1.2.3 From 8801e06945b6fec9927d5f7a04890a20061340c2 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:13 +0100 Subject: dm thin metadata: remove create parameter from __create_persistent_data_objects Remove 'create' parameter from __create_persistent_data_objects() in dm-thin-metadata. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index e7b8ad78af47..7317f1b4ab15 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -595,23 +595,21 @@ bad: return r; } -static int __open_or_format_metadata(struct dm_pool_metadata *pmd, - int *create) +static int __open_or_format_metadata(struct dm_pool_metadata *pmd) { - int r; + int r, unformatted; - r = __superblock_all_zeroes(pmd->bm, create); + r = __superblock_all_zeroes(pmd->bm, &unformatted); if (r) return r; - if (*create) + if (unformatted) return __format_metadata(pmd); else return __open_metadata(pmd); } -static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, - int *create) +static int __create_persistent_data_objects(struct dm_pool_metadata *pmd) { int r; @@ -623,7 +621,7 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, return PTR_ERR(pmd->bm); } - r = __open_or_format_metadata(pmd, create); + r = __open_or_format_metadata(pmd); if (r) dm_block_manager_destroy(pmd->bm); @@ -794,7 +792,6 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, { int r; struct dm_pool_metadata *pmd; - int create; pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); if (!pmd) { @@ -808,7 +805,7 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, pmd->bdev = bdev; pmd->data_block_size = data_block_size; - r = __create_persistent_data_objects(pmd, &create); + r = __create_persistent_data_objects(pmd); if (r) { kfree(pmd); return ERR_PTR(r); -- cgit v1.2.3 From b79399510888998778ea6a3a281e30cbe59fdb37 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:13 +0100 Subject: dm thin metadata: remove duplicate pmd initialisation Remove some duplicate initialisation of struct dm_pool_metadata. These pmd fields are initialised by both: __format_metadata's calls to dm_btree_empty __write_initial_superblock + __begin_transaction Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 7317f1b4ab15..51b97f07aca3 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -512,11 +512,6 @@ static int __format_metadata(struct dm_pool_metadata *pmd) __setup_btree_details(pmd); - pmd->root = 0; - pmd->details_root = 0; - pmd->trans_id = 0; - pmd->flags = 0; - r = dm_btree_empty(&pmd->info, &pmd->root); if (r < 0) goto bad_data_sm; -- cgit v1.2.3 From d73ec52538041f29a8ae22bc72521222279439b9 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 27 Jul 2012 15:08:13 +0100 Subject: dm thin metadata: only check incompat features on open Factor out __check_incompat_features and only call it once when we open the metadata device rather than at the beginning of every transaction. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 62 +++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 51b97f07aca3..867ee52121d4 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -537,6 +537,34 @@ bad: return r; } +static int __check_incompat_features(struct thin_disk_superblock *disk_super, + struct dm_pool_metadata *pmd) +{ + uint32_t features; + + features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; + if (features) { + DMERR("could not access metadata due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + /* + * Check for read-only metadata to skip the following RDWR checks. + */ + if (get_disk_ro(pmd->bdev->bd_disk)) + return 0; + + features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; + if (features) { + DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + return 0; +} + static int __open_metadata(struct dm_pool_metadata *pmd) { int r; @@ -551,6 +579,13 @@ static int __open_metadata(struct dm_pool_metadata *pmd) } disk_super = dm_block_data(sblock); + + r = __check_incompat_features(disk_super, pmd); + if (r < 0) { + dm_bm_unlock(sblock); + return r; + } + r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, disk_super->metadata_space_map_root, sizeof(disk_super->metadata_space_map_root), @@ -635,7 +670,6 @@ static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) static int __begin_transaction(struct dm_pool_metadata *pmd) { int r; - u32 features; struct thin_disk_superblock *disk_super; struct dm_block *sblock; @@ -656,32 +690,8 @@ static int __begin_transaction(struct dm_pool_metadata *pmd) pmd->flags = le32_to_cpu(disk_super->flags); pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); - features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; - if (features) { - DMERR("could not access metadata due to " - "unsupported optional features (%lx).", - (unsigned long)features); - r = -EINVAL; - goto out; - } - - /* - * Check for read-only metadata to skip the following RDWR checks. - */ - if (get_disk_ro(pmd->bdev->bd_disk)) - goto out; - - features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; - if (features) { - DMERR("could not access metadata RDWR due to " - "unsupported optional features (%lx).", - (unsigned long)features); - r = -EINVAL; - } - -out: dm_bm_unlock(sblock); - return r; + return 0; } static int __write_changed_details(struct dm_pool_metadata *pmd) -- cgit v1.2.3 From 0fa5b17b08baa37919ddd21529081e3090d47de5 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:14 +0100 Subject: dm thin metadata: tidy up open and format error paths Tidy up error path in __open_metadata and __format_metadata in dm-thin-metadata. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 867ee52121d4..1b6d2857b3f1 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -500,37 +500,39 @@ static int __format_metadata(struct dm_pool_metadata *pmd) if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_create failed"); r = PTR_ERR(pmd->data_sm); - goto bad; + goto bad_cleanup_tm; } pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); if (!pmd->nb_tm) { - DMERR("could not create clone tm"); + DMERR("could not create non-blocking clone tm"); r = -ENOMEM; - goto bad_data_sm; + goto bad_cleanup_data_sm; } __setup_btree_details(pmd); r = dm_btree_empty(&pmd->info, &pmd->root); if (r < 0) - goto bad_data_sm; + goto bad_cleanup_nb_tm; r = dm_btree_empty(&pmd->details_info, &pmd->details_root); if (r < 0) { DMERR("couldn't create devices root"); - goto bad_data_sm; + goto bad_cleanup_nb_tm; } r = __write_initial_superblock(pmd); if (r) - goto bad_data_sm; + goto bad_cleanup_nb_tm; return 0; -bad_data_sm: +bad_cleanup_nb_tm: + dm_tm_destroy(pmd->nb_tm); +bad_cleanup_data_sm: dm_sm_destroy(pmd->data_sm); -bad: +bad_cleanup_tm: dm_tm_destroy(pmd->tm); dm_sm_destroy(pmd->metadata_sm); @@ -581,10 +583,8 @@ static int __open_metadata(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); r = __check_incompat_features(disk_super, pmd); - if (r < 0) { - dm_bm_unlock(sblock); - return r; - } + if (r < 0) + goto bad_unlock_sblock; r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, disk_super->metadata_space_map_root, @@ -592,35 +592,34 @@ static int __open_metadata(struct dm_pool_metadata *pmd) &pmd->tm, &pmd->metadata_sm); if (r < 0) { DMERR("tm_open_with_sm failed"); - dm_bm_unlock(sblock); - return r; + goto bad_unlock_sblock; } pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, sizeof(disk_super->data_space_map_root)); if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_open failed"); - dm_bm_unlock(sblock); r = PTR_ERR(pmd->data_sm); - goto bad; + goto bad_cleanup_tm; } - dm_bm_unlock(sblock); - pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); if (!pmd->nb_tm) { - DMERR("could not create clone tm"); + DMERR("could not create non-blocking clone tm"); r = -ENOMEM; - goto bad_data_sm; + goto bad_cleanup_data_sm; } __setup_btree_details(pmd); + return dm_bm_unlock(sblock); -bad_data_sm: +bad_cleanup_data_sm: dm_sm_destroy(pmd->data_sm); -bad: +bad_cleanup_tm: dm_tm_destroy(pmd->tm); dm_sm_destroy(pmd->metadata_sm); +bad_unlock_sblock: + dm_bm_unlock(sblock); return r; } -- cgit v1.2.3 From 66b1edc05e3f4c1227f51e40f2ead3e973c00cb0 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:14 +0100 Subject: dm thin metadata: add format option to dm_pool_metadata_open Add a parameter to dm_pool_metadata_open to indicate whether or not an unformatted metadata area should be formatted. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 17 +++++++++-------- drivers/md/dm-thin-metadata.h | 3 ++- drivers/md/dm-thin.c | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 1b6d2857b3f1..5ae31185e47c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -624,7 +624,7 @@ bad_unlock_sblock: return r; } -static int __open_or_format_metadata(struct dm_pool_metadata *pmd) +static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device) { int r, unformatted; @@ -633,12 +633,12 @@ static int __open_or_format_metadata(struct dm_pool_metadata *pmd) return r; if (unformatted) - return __format_metadata(pmd); - else - return __open_metadata(pmd); + return format_device ? __format_metadata(pmd) : -EPERM; + + return __open_metadata(pmd); } -static int __create_persistent_data_objects(struct dm_pool_metadata *pmd) +static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device) { int r; @@ -650,7 +650,7 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd) return PTR_ERR(pmd->bm); } - r = __open_or_format_metadata(pmd); + r = __open_or_format_metadata(pmd, format_device); if (r) dm_block_manager_destroy(pmd->bm); @@ -792,7 +792,8 @@ out_locked: } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, - sector_t data_block_size) + sector_t data_block_size, + bool format_device) { int r; struct dm_pool_metadata *pmd; @@ -809,7 +810,7 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, pmd->bdev = bdev; pmd->data_block_size = data_block_size; - r = __create_persistent_data_objects(pmd); + r = __create_persistent_data_objects(pmd, format_device); if (r) { kfree(pmd); return ERR_PTR(r); diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 7b47c0a9a8e3..7e6656c49bd5 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -38,7 +38,8 @@ typedef uint64_t dm_thin_id; * Reopens or creates a new, empty metadata volume. */ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, - sector_t data_block_size); + sector_t data_block_size, + bool format_device); int dm_pool_metadata_close(struct dm_pool_metadata *pmd); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 026215566abd..bc9d08c0218b 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1712,7 +1712,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, struct pool *pool; struct dm_pool_metadata *pmd; - pmd = dm_pool_metadata_open(metadata_dev, block_size); + pmd = dm_pool_metadata_open(metadata_dev, block_size, true); if (IS_ERR(pmd)) { *error = "Error creating metadata object"; return (struct pool *)pmd; -- cgit v1.2.3 From 40db5a537655aa0b17a82a4b5596f8d27400edd8 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:14 +0100 Subject: dm thin metadata: add dm_thin_changed_this_transaction Introduce dm_thin_changed_this_transaction to dm-thin-metadata to publish a useful bit of information we're already tracking. This will help dm thin decide when to commit. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 16 +++++++++++++--- drivers/md/dm-thin-metadata.h | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 5ae31185e47c..4f5b11c195f4 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1375,10 +1375,9 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, if (r) return r; - if (inserted) { + td->changed = 1; + if (inserted) td->mapped_blocks++; - td->changed = 1; - } return 0; } @@ -1422,6 +1421,17 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) return r; } +bool dm_thin_changed_this_transaction(struct dm_thin_device *td) +{ + int r; + + down_read(&td->pmd->root_lock); + r = td->changed; + up_read(&td->pmd->root_lock); + + return r; +} + int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) { int r; diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 7e6656c49bd5..1632fec1660d 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -148,6 +148,8 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block); /* * Queries. */ +bool dm_thin_changed_this_transaction(struct dm_thin_device *td); + int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *highest_mapped); -- cgit v1.2.3 From 4afdd680f7c106e1c87b6dd7af9751c282ecb4b0 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:14 +0100 Subject: dm thin: reduce number of metadata commits Reduce the number of metadata commits by using dm_thin_changed_this_transaction to check if metadata was changed on a per thin device granularity. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index bc9d08c0218b..98c50f9626d8 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -707,6 +707,12 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio) bio->bi_bdev = tc->origin_dev->bdev; } +static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) +{ + return (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && + dm_thin_changed_this_transaction(tc->td); +} + static void issue(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; @@ -716,7 +722,7 @@ static void issue(struct thin_c *tc, struct bio *bio) * Batch together any FUA/FLUSH bios we find and then issue * a single commit for them in process_deferred_bios(). */ - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { + if (bio_triggers_commit(tc, bio)) { spin_lock_irqsave(&pool->lock, flags); bio_list_add(&pool->deferred_flush_bios, bio); spin_unlock_irqrestore(&pool->lock, flags); -- cgit v1.2.3 From 310975573ba40ab526b0d76d28225d8d0999d72b Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:15 +0100 Subject: dm persistent data: introduce dm_bm_set_read_only Introduce dm_bm_set_read_only to switch the block manager into a read-only mode. To be used when dm-thin degrades due to io errors on the metadata device. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/persistent-data/dm-block-manager.c | 18 ++++++++++++++++++ drivers/md/persistent-data/dm-block-manager.h | 13 +++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index ad1712e802f6..5ba277768d99 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -364,6 +364,7 @@ static void dm_block_manager_write_callback(struct dm_buffer *buf) *--------------------------------------------------------------*/ struct dm_block_manager { struct dm_bufio_client *bufio; + bool read_only:1; }; struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, @@ -390,6 +391,8 @@ struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, goto bad; } + bm->read_only = false; + return bm; bad: @@ -481,6 +484,9 @@ int dm_bm_write_lock(struct dm_block_manager *bm, void *p; int r; + if (bm->read_only) + return -EPERM; + p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -547,6 +553,9 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm, struct buffer_aux *aux; void *p; + if (bm->read_only) + return -EPERM; + p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result); if (unlikely(IS_ERR(p))) return PTR_ERR(p); @@ -589,6 +598,9 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, { int r; + if (bm->read_only) + return -EPERM; + r = dm_bufio_write_dirty_buffers(bm->bufio); if (unlikely(r)) { dm_bm_unlock(superblock); @@ -600,6 +612,12 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, return dm_bufio_write_dirty_buffers(bm->bufio); } +void dm_bm_set_read_only(struct dm_block_manager *bm) +{ + bm->read_only = true; +} +EXPORT_SYMBOL_GPL(dm_bm_set_read_only); + u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) { return crc32c(~(u32) 0, data, len) ^ init_xor; diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 327885566631..be5bff61be28 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -108,6 +108,19 @@ int dm_bm_unlock(struct dm_block *b); int dm_bm_flush_and_unlock(struct dm_block_manager *bm, struct dm_block *superblock); +/* + * Switches the bm to a read only mode. Once read-only mode + * has been entered the following functions will return -EPERM. + * + * dm_bm_write_lock + * dm_bm_write_lock_zero + * dm_bm_flush_and_unlock + * + * Additionally you should not use dm_bm_unlock_move, however no error will + * be returned if you do. + */ +void dm_bm_set_read_only(struct dm_block_manager *bm); + u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); /*----------------------------------------------------------------*/ -- cgit v1.2.3 From 12ba58af46e5973822f1cfaaf5f29a2a17a523bf Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:15 +0100 Subject: dm thin metadata: introduce dm_pool_metadata_set_read_only Introduce dm_pool_metadata_set_read_only to put the underlying block manager into read-only mode. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 20 ++++++++++++++++---- drivers/md/dm-thin-metadata.h | 6 ++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 4f5b11c195f4..d30bc09fd7c4 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -184,6 +184,7 @@ struct dm_pool_metadata { uint64_t trans_id; unsigned long flags; sector_t data_block_size; + bool read_only:1; }; struct dm_thin_device { @@ -807,6 +808,7 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, init_rwsem(&pmd->root_lock); pmd->time = 0; INIT_LIST_HEAD(&pmd->thin_devices); + pmd->read_only = false; pmd->bdev = bdev; pmd->data_block_size = data_block_size; @@ -849,10 +851,12 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) return -EBUSY; } - r = __commit_transaction(pmd); - if (r < 0) - DMWARN("%s: __commit_transaction() failed, error = %d", - __func__, r); + if (!pmd->read_only) { + r = __commit_transaction(pmd); + if (r < 0) + DMWARN("%s: __commit_transaction() failed, error = %d", + __func__, r); + } __destroy_persistent_data_objects(pmd); kfree(pmd); @@ -1587,3 +1591,11 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) return r; } + +void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) +{ + down_write(&pmd->root_lock); + pmd->read_only = true; + dm_bm_set_read_only(pmd->bm); + up_write(&pmd->root_lock); +} diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 1632fec1660d..2bbe622f299d 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -174,6 +174,12 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); */ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); +/* + * Flicks the underlying block manager into read only mode, so you know + * that nothing is changing. + */ +void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); + /*----------------------------------------------------------------*/ #endif -- cgit v1.2.3 From da105ed5fd7edcc5e0df7dbacef4dadda74e7ebe Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:15 +0100 Subject: dm thin metadata: introduce dm_pool_abort_metadata Introduce dm_pool_abort_metadata to abort the current metadata transaction. Generally this will only be called when bad things are happening and dm-thin is trying to roll back to a good state for read-only mode. It's complicated by the fact that the metadata device may have failed completely causing the abort to be unable to read the old transaction. In this case the metadata object is placed in a 'fail' mode and everything fails apart from destroying it. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin-metadata.c | 207 +++++++++++++++++++++++++++++------------- drivers/md/dm-thin-metadata.h | 12 +++ 2 files changed, 158 insertions(+), 61 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index d30bc09fd7c4..693e149e9727 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Red Hat, Inc. + * Copyright (C) 2011-2012 Red Hat, Inc. * * This file is released under the GPL. */ @@ -185,6 +185,13 @@ struct dm_pool_metadata { unsigned long flags; sector_t data_block_size; bool read_only:1; + + /* + * Set if a transaction has to be aborted but the attempt to roll back + * to the previous (good) transaction failed. The only pool metadata + * operation possible in this state is the closing of the device. + */ + bool fail_io:1; }; struct dm_thin_device { @@ -193,7 +200,8 @@ struct dm_thin_device { dm_thin_id id; int open_count; - int changed; + bool changed:1; + bool aborted_with_changes:1; uint64_t mapped_blocks; uint64_t transaction_id; uint32_t creation_time; @@ -731,9 +739,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) static int __commit_transaction(struct dm_pool_metadata *pmd) { - /* - * FIXME: Associated pool should be made read-only on failure. - */ int r; size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; @@ -809,6 +814,7 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, pmd->time = 0; INIT_LIST_HEAD(&pmd->thin_devices); pmd->read_only = false; + pmd->fail_io = false; pmd->bdev = bdev; pmd->data_block_size = data_block_size; @@ -851,16 +857,17 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) return -EBUSY; } - if (!pmd->read_only) { + if (!pmd->read_only && !pmd->fail_io) { r = __commit_transaction(pmd); if (r < 0) DMWARN("%s: __commit_transaction() failed, error = %d", __func__, r); } - __destroy_persistent_data_objects(pmd); - kfree(pmd); + if (!pmd->fail_io) + __destroy_persistent_data_objects(pmd); + kfree(pmd); return 0; } @@ -921,6 +928,7 @@ static int __open_device(struct dm_pool_metadata *pmd, (*td)->id = dev; (*td)->open_count = 1; (*td)->changed = changed; + (*td)->aborted_with_changes = false; (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); (*td)->creation_time = le32_to_cpu(details_le.creation_time); @@ -982,10 +990,11 @@ static int __create_thin(struct dm_pool_metadata *pmd, int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __create_thin(pmd, dev); + if (!pmd->fail_io) + r = __create_thin(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1072,10 +1081,11 @@ int dm_pool_create_snap(struct dm_pool_metadata *pmd, dm_thin_id dev, dm_thin_id origin) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __create_snap(pmd, dev, origin); + if (!pmd->fail_io) + r = __create_snap(pmd, dev, origin); up_write(&pmd->root_lock); return r; @@ -1114,10 +1124,11 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __delete_device(pmd, dev); + if (!pmd->fail_io) + r = __delete_device(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1127,27 +1138,40 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t current_id, uint64_t new_id) { + int r = -EINVAL; + down_write(&pmd->root_lock); + + if (pmd->fail_io) + goto out; + if (pmd->trans_id != current_id) { - up_write(&pmd->root_lock); DMERR("mismatched transaction id"); - return -EINVAL; + goto out; } pmd->trans_id = new_id; + r = 0; + +out: up_write(&pmd->root_lock); - return 0; + return r; } int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t *result) { + int r = -EINVAL; + down_read(&pmd->root_lock); - *result = pmd->trans_id; + if (!pmd->fail_io) { + *result = pmd->trans_id; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) @@ -1211,10 +1235,11 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __reserve_metadata_snap(pmd); + if (!pmd->fail_io) + r = __reserve_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1256,10 +1281,11 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __release_metadata_snap(pmd); + if (!pmd->fail_io) + r = __release_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1286,10 +1312,11 @@ static int __get_metadata_snap(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = __get_metadata_snap(pmd, result); + if (!pmd->fail_io) + r = __get_metadata_snap(pmd, result); up_read(&pmd->root_lock); return r; @@ -1298,10 +1325,11 @@ int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, struct dm_thin_device **td) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __open_device(pmd, dev, 0, td); + if (!pmd->fail_io) + r = __open_device(pmd, dev, 0, td); up_write(&pmd->root_lock); return r; @@ -1329,28 +1357,31 @@ static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, int can_block, struct dm_thin_lookup_result *result) { - int r; + int r = -EINVAL; uint64_t block_time = 0; __le64 value; struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; + struct dm_btree_info *info; if (can_block) { down_read(&pmd->root_lock); - r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else if (down_read_trylock(&pmd->root_lock)) { - r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else + info = &pmd->info; + } else if (down_read_trylock(&pmd->root_lock)) + info = &pmd->nb_info; + else return -EWOULDBLOCK; + if (pmd->fail_io) + goto out; + + r = dm_btree_lookup(info, pmd->root, keys, &value); + if (!r) + block_time = le64_to_cpu(value); + +out: + up_read(&pmd->root_lock); + if (!r) { dm_block_t exception_block; uint32_t exception_time; @@ -1389,10 +1420,11 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, dm_block_t data_block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = __insert(td, block, data_block); + if (!td->pmd->fail_io) + r = __insert(td, block, data_block); up_write(&td->pmd->root_lock); return r; @@ -1416,10 +1448,11 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = __remove(td, block); + if (!td->pmd->fail_io) + r = __remove(td, block); up_write(&td->pmd->root_lock); return r; @@ -1436,12 +1469,24 @@ bool dm_thin_changed_this_transaction(struct dm_thin_device *td) return r; } +bool dm_thin_aborted_changes(struct dm_thin_device *td) +{ + bool r; + + down_read(&td->pmd->root_lock); + r = td->aborted_with_changes; + up_read(&td->pmd->root_lock); + + return r; +} + int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = dm_sm_new_block(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_new_block(pmd->data_sm, result); up_write(&pmd->root_lock); return r; @@ -1449,9 +1494,11 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); + if (pmd->fail_io) + goto out; r = __commit_transaction(pmd); if (r <= 0) @@ -1466,12 +1513,41 @@ out: return r; } +static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) +{ + struct dm_thin_device *td; + + list_for_each_entry(td, &pmd->thin_devices, list) + td->aborted_with_changes = td->changed; +} + +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (pmd->fail_io) + goto out; + + __set_abort_with_changes_flags(pmd); + __destroy_persistent_data_objects(pmd); + r = __create_persistent_data_objects(pmd, false); + if (r) + pmd->fail_io = true; + +out: + up_write(&pmd->root_lock); + + return r; +} + int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1480,10 +1556,11 @@ int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *resul int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1492,10 +1569,11 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1512,10 +1590,11 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1523,13 +1602,17 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) { + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - *result = td->mapped_blocks; + if (!pmd->fail_io) { + *result = td->mapped_blocks; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __highest_block(struct dm_thin_device *td, dm_block_t *result) @@ -1551,11 +1634,12 @@ static int __highest_block(struct dm_thin_device *td, dm_block_t *result) int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *result) { - int r; + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - r = __highest_block(td, result); + if (!pmd->fail_io) + r = __highest_block(td, result); up_read(&pmd->root_lock); return r; @@ -1583,10 +1667,11 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __resize_data_dev(pmd, new_count); + if (!pmd->fail_io) + r = __resize_data_dev(pmd, new_count); up_write(&pmd->root_lock); return r; diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 2bbe622f299d..0cecc3702885 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -79,6 +79,16 @@ int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, */ int dm_pool_commit_metadata(struct dm_pool_metadata *pmd); +/* + * Discards all uncommitted changes. Rereads the superblock, rolling back + * to the last good transaction. Thin devices remain open. + * dm_thin_aborted_changes() tells you if they had uncommitted changes. + * + * If this call fails it's only useful to call dm_pool_metadata_close(). + * All other methods will fail with -EINVAL. + */ +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd); + /* * Set/get userspace transaction id. */ @@ -150,6 +160,8 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block); */ bool dm_thin_changed_this_transaction(struct dm_thin_device *td); +bool dm_thin_aborted_changes(struct dm_thin_device *td); + int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *highest_mapped); -- cgit v1.2.3 From e49e582965b3694f07a106adc83ddb44aa4f0890 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 27 Jul 2012 15:08:16 +0100 Subject: dm thin: add read only and fail io modes Add read-only and fail-io modes to thin provisioning. If a transaction commit fails the pool's metadata device will transition to "read-only" mode. If a commit fails once already in read-only mode the transition to "fail-io" mode occurs. Once in fail-io mode the pool and all associated thin devices will report a status of "Fail". Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- Documentation/device-mapper/thin-provisioning.txt | 24 +- drivers/md/dm-thin.c | 416 +++++++++++++++++----- 2 files changed, 344 insertions(+), 96 deletions(-) diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt index f5cfc62b7ad3..30b8b83bd333 100644 --- a/Documentation/device-mapper/thin-provisioning.txt +++ b/Documentation/device-mapper/thin-provisioning.txt @@ -231,6 +231,9 @@ i) Constructor no_discard_passdown: Don't pass discards down to the underlying data device, but just remove the mapping. + read_only: Don't allow any changes to be made to the pool + metadata. + Data block size must be between 64KB (128 sectors) and 1GB (2097152 sectors) inclusive. @@ -239,7 +242,7 @@ ii) Status / / - + [no_]discard_passdown ro|rw transaction id: A 64-bit number used by userspace to help synchronise with metadata @@ -257,6 +260,21 @@ ii) Status held root. This feature is not yet implemented so '-' is always returned. + discard_passdown|no_discard_passdown + Whether or not discards are actually being passed down to the + underlying device. When this is enabled when loading the table, + it can get disabled if the underlying device doesn't support it. + + ro|rw + If the pool encounters certain types of device failures it will + drop into a read-only metadata mode in which no changes to + the pool metadata (like allocating new blocks) are permitted. + + In serious cases where even a read-only mode is deemed unsafe + no further I/O will be permitted and the status will just + contain the string 'Fail'. The userspace recovery tools + should then be used. + iii) Messages create_thin @@ -329,3 +347,7 @@ regain some space then send the 'trim' message to the pool. ii) Status + + If the pool has encountered device errors and failed, the status + will just contain the string 'Fail'. The userspace recovery + tools should then be used. diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 98c50f9626d8..087e9b34d290 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Red Hat UK. + * Copyright (C) 2011-2012 Red Hat UK. * * This file is released under the GPL. */ @@ -496,12 +496,27 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, */ struct dm_thin_new_mapping; +/* + * The pool runs in 3 modes. Ordered in degraded order for comparisons. + */ +enum pool_mode { + PM_WRITE, /* metadata may be changed */ + PM_READ_ONLY, /* metadata may not be changed */ + PM_FAIL, /* all I/O fails */ +}; + struct pool_features { + enum pool_mode mode; + unsigned zero_new_blocks:1; unsigned discard_enabled:1; unsigned discard_passdown:1; }; +struct thin_c; +typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio); +typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m); + struct pool { struct list_head list; struct dm_target *ti; /* Only set if a pool target is bound */ @@ -542,8 +557,17 @@ struct pool { struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; mempool_t *endio_hook_pool; + + process_bio_fn process_bio; + process_bio_fn process_discard; + + process_mapping_fn process_prepared_mapping; + process_mapping_fn process_prepared_discard; }; +static enum pool_mode get_pool_mode(struct pool *pool); +static void set_pool_mode(struct pool *pool, enum pool_mode mode); + /* * Target context for a pool. */ @@ -718,16 +742,28 @@ static void issue(struct thin_c *tc, struct bio *bio) struct pool *pool = tc->pool; unsigned long flags; + if (!bio_triggers_commit(tc, bio)) { + generic_make_request(bio); + return; + } + /* - * Batch together any FUA/FLUSH bios we find and then issue - * a single commit for them in process_deferred_bios(). + * Complete bio with an error if earlier I/O caused changes to + * the metadata that can't be committed e.g, due to I/O errors + * on the metadata device. */ - if (bio_triggers_commit(tc, bio)) { - spin_lock_irqsave(&pool->lock, flags); - bio_list_add(&pool->deferred_flush_bios, bio); - spin_unlock_irqrestore(&pool->lock, flags); - } else - generic_make_request(bio); + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in process_deferred_bios(). + */ + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_bios, bio); + spin_unlock_irqrestore(&pool->lock, flags); } static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) @@ -864,6 +900,14 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell wake_worker(pool); } +static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) +{ + if (m->bio) + m->bio->bi_end_io = m->saved_bi_end_io; + cell_error(m->cell); + list_del(&m->list); + mempool_free(m, m->tc->pool->mapping_pool); +} static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; @@ -908,18 +952,20 @@ out: mempool_free(m, tc->pool->mapping_pool); } -static void process_prepared_discard(struct dm_thin_new_mapping *m) +static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) { - int r; struct thin_c *tc = m->tc; - r = dm_thin_remove_block(tc->td, m->virt_block); - if (r) - DMERR("dm_thin_remove_block() failed"); + bio_io_error(m->bio); + cell_defer_except(tc, m->cell); + cell_defer_except(tc, m->cell2); + mempool_free(m, tc->pool->mapping_pool); +} + +static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) +{ + struct thin_c *tc = m->tc; - /* - * Pass the discard down to the underlying device? - */ if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); else @@ -930,8 +976,20 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) mempool_free(m, tc->pool->mapping_pool); } +static void process_prepared_discard(struct dm_thin_new_mapping *m) +{ + int r; + struct thin_c *tc = m->tc; + + r = dm_thin_remove_block(tc->td, m->virt_block); + if (r) + DMERR("dm_thin_remove_block() failed"); + + process_prepared_discard_passdown(m); +} + static void process_prepared(struct pool *pool, struct list_head *head, - void (*fn)(struct dm_thin_new_mapping *)) + process_mapping_fn *fn) { unsigned long flags; struct list_head maps; @@ -943,7 +1001,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, spin_unlock_irqrestore(&pool->lock, flags); list_for_each_entry_safe(m, tmp, &maps, list) - fn(m); + (*fn)(m); } /* @@ -1109,6 +1167,35 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, } } +static int commit(struct pool *pool) +{ + int r; + + r = dm_pool_commit_metadata(pool->pmd); + if (r) + DMERR("commit failed, error = %d", r); + + return r; +} + +/* + * A non-zero return indicates read_only or fail_io mode. + * Many callers don't care about the return value. + */ +static int commit_or_fallback(struct pool *pool) +{ + int r; + + if (get_pool_mode(pool) != PM_WRITE) + return -EINVAL; + + r = commit(pool); + if (r) + set_pool_mode(pool, PM_READ_ONLY); + + return r; +} + static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; @@ -1137,12 +1224,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) * Try to commit to see if that will free up some * more space. */ - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); if (r) @@ -1373,6 +1455,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block default: DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + set_pool_mode(tc->pool, PM_READ_ONLY); cell_error(cell); break; } @@ -1430,6 +1513,49 @@ static void process_bio(struct thin_c *tc, struct bio *bio) } } +static void process_bio_read_only(struct thin_c *tc, struct bio *bio) +{ + int r; + int rw = bio_data_dir(bio); + dm_block_t block = get_bio_block(tc, bio); + struct dm_thin_lookup_result lookup_result; + + r = dm_thin_find_block(tc->td, block, 1, &lookup_result); + switch (r) { + case 0: + if (lookup_result.shared && (rw == WRITE) && bio->bi_size) + bio_io_error(bio); + else + remap_and_issue(tc, bio, lookup_result.block); + break; + + case -ENODATA: + if (rw != READ) { + bio_io_error(bio); + break; + } + + if (tc->origin_dev) { + remap_to_origin_and_issue(tc, bio); + break; + } + + zero_fill_bio(bio); + bio_endio(bio, 0); + break; + + default: + DMERR("dm_thin_find_block() failed, error = %d", r); + bio_io_error(bio); + break; + } +} + +static void process_bio_fail(struct thin_c *tc, struct bio *bio) +{ + bio_io_error(bio); +} + static int need_commit_due_to_time(struct pool *pool) { return jiffies < pool->last_commit_jiffies || @@ -1441,7 +1567,6 @@ static void process_deferred_bios(struct pool *pool) unsigned long flags; struct bio *bio; struct bio_list bios; - int r; bio_list_init(&bios); @@ -1468,9 +1593,9 @@ static void process_deferred_bios(struct pool *pool) } if (bio->bi_rw & REQ_DISCARD) - process_discard(tc, bio); + pool->process_discard(tc, bio); else - process_bio(tc, bio); + pool->process_bio(tc, bio); } /* @@ -1486,10 +1611,7 @@ static void process_deferred_bios(struct pool *pool) if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) return; - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); + if (commit_or_fallback(pool)) { while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; @@ -1504,8 +1626,8 @@ static void do_worker(struct work_struct *ws) { struct pool *pool = container_of(ws, struct pool, worker); - process_prepared(pool, &pool->prepared_mappings, process_prepared_mapping); - process_prepared(pool, &pool->prepared_discards, process_prepared_discard); + process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping); + process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard); process_deferred_bios(pool); } @@ -1522,6 +1644,52 @@ static void do_waker(struct work_struct *ws) /*----------------------------------------------------------------*/ +static enum pool_mode get_pool_mode(struct pool *pool) +{ + return pool->pf.mode; +} + +static void set_pool_mode(struct pool *pool, enum pool_mode mode) +{ + int r; + + pool->pf.mode = mode; + + switch (mode) { + case PM_FAIL: + DMERR("switching pool to failure mode"); + pool->process_bio = process_bio_fail; + pool->process_discard = process_bio_fail; + pool->process_prepared_mapping = process_prepared_mapping_fail; + pool->process_prepared_discard = process_prepared_discard_fail; + break; + + case PM_READ_ONLY: + DMERR("switching pool to read-only mode"); + r = dm_pool_abort_metadata(pool->pmd); + if (r) { + DMERR("aborting transaction failed"); + set_pool_mode(pool, PM_FAIL); + } else { + dm_pool_metadata_read_only(pool->pmd); + pool->process_bio = process_bio_read_only; + pool->process_discard = process_discard; + pool->process_prepared_mapping = process_prepared_mapping_fail; + pool->process_prepared_discard = process_prepared_discard_passdown; + } + break; + + case PM_WRITE: + pool->process_bio = process_bio; + pool->process_discard = process_discard; + pool->process_prepared_mapping = process_prepared_mapping; + pool->process_prepared_discard = process_prepared_discard; + break; + } +} + +/*----------------------------------------------------------------*/ + /* * Mapping functions. */ @@ -1567,6 +1735,12 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, struct dm_thin_lookup_result result; map_context->ptr = thin_hook_bio(tc, bio); + + if (get_pool_mode(tc->pool) == PM_FAIL) { + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } + if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) { thin_defer_bio(tc, bio); return DM_MAPIO_SUBMITTED; @@ -1603,14 +1777,35 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, break; case -ENODATA: + if (get_pool_mode(tc->pool) == PM_READ_ONLY) { + /* + * This block isn't provisioned, and we have no way + * of doing so. Just error it. + */ + bio_io_error(bio); + r = DM_MAPIO_SUBMITTED; + break; + } + /* fall through */ + + case -EWOULDBLOCK: /* * In future, the failed dm_thin_find_block above could * provide the hint to load the metadata into cache. */ - case -EWOULDBLOCK: thin_defer_bio(tc, bio); r = DM_MAPIO_SUBMITTED; break; + + default: + /* + * Must always call bio_io_error on failure. + * dm_thin_find_block can fail with -EINVAL if the + * pool is switched to fail-io mode. + */ + bio_io_error(bio); + r = DM_MAPIO_SUBMITTED; + break; } return r; @@ -1647,15 +1842,26 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) { struct pool_c *pt = ti->private; + /* + * We want to make sure that degraded pools are never upgraded. + */ + enum pool_mode old_mode = pool->pf.mode; + enum pool_mode new_mode = pt->pf.mode; + + if (old_mode > new_mode) + new_mode = old_mode; + pool->ti = ti; pool->low_water_blocks = pt->low_water_blocks; pool->pf = pt->pf; + set_pool_mode(pool, new_mode); /* * If discard_passdown was enabled verify that the data device * supports discards. Disable discard_passdown if not; otherwise * -EOPNOTSUPP will be returned. */ + /* FIXME: pull this out into a sep fn. */ if (pt->pf.discard_passdown) { struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); if (!q || !blk_queue_discard(q)) { @@ -1681,6 +1887,7 @@ static void unbind_control_target(struct pool *pool, struct dm_target *ti) /* Initialize pool features. */ static void pool_features_init(struct pool_features *pf) { + pf->mode = PM_WRITE; pf->zero_new_blocks = 1; pf->discard_enabled = 1; pf->discard_passdown = 1; @@ -1711,14 +1918,16 @@ static struct kmem_cache *_endio_hook_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, - unsigned long block_size, char **error) + unsigned long block_size, + int read_only, char **error) { int r; void *err_p; struct pool *pool; struct dm_pool_metadata *pmd; + bool format_device = read_only ? false : true; - pmd = dm_pool_metadata_open(metadata_dev, block_size, true); + pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device); if (IS_ERR(pmd)) { *error = "Error creating metadata object"; return (struct pool *)pmd; @@ -1835,8 +2044,8 @@ static void __pool_dec(struct pool *pool) static struct pool *__pool_find(struct mapped_device *pool_md, struct block_device *metadata_dev, - unsigned long block_size, char **error, - int *created) + unsigned long block_size, int read_only, + char **error, int *created) { struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev); @@ -1857,7 +2066,7 @@ static struct pool *__pool_find(struct mapped_device *pool_md, __pool_inc(pool); } else { - pool = pool_create(pool_md, metadata_dev, block_size, error); + pool = pool_create(pool_md, metadata_dev, block_size, read_only, error); *created = 1; } } @@ -1908,19 +2117,23 @@ static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf, arg_name = dm_shift_arg(as); argc--; - if (!strcasecmp(arg_name, "skip_block_zeroing")) { + if (!strcasecmp(arg_name, "skip_block_zeroing")) pf->zero_new_blocks = 0; - continue; - } else if (!strcasecmp(arg_name, "ignore_discard")) { + + else if (!strcasecmp(arg_name, "ignore_discard")) pf->discard_enabled = 0; - continue; - } else if (!strcasecmp(arg_name, "no_discard_passdown")) { + + else if (!strcasecmp(arg_name, "no_discard_passdown")) pf->discard_passdown = 0; - continue; - } - ti->error = "Unrecognised pool feature requested"; - r = -EINVAL; + else if (!strcasecmp(arg_name, "read_only")) + pf->mode = PM_READ_ONLY; + + else { + ti->error = "Unrecognised pool feature requested"; + r = -EINVAL; + break; + } } return r; @@ -2013,7 +2226,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) } pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, - block_size, &ti->error, &pool_created); + block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created); if (IS_ERR(pool)) { r = PTR_ERR(pool); goto out_free_pt; @@ -2146,15 +2359,12 @@ static int pool_preresume(struct dm_target *ti) r = dm_pool_resize_data_dev(pool->pmd, data_size); if (r) { DMERR("failed to resize data device"); + /* FIXME Stricter than necessary: Rollback transaction instead here */ + set_pool_mode(pool, PM_READ_ONLY); return r; } - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); } return 0; @@ -2177,19 +2387,12 @@ static void pool_resume(struct dm_target *ti) static void pool_postsuspend(struct dm_target *ti) { - int r; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; cancel_delayed_work(&pool->waker); flush_workqueue(pool->wq); - - r = dm_pool_commit_metadata(pool->pmd); - if (r < 0) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - /* FIXME: invalidate device? error the next FUA or FLUSH bio ?*/ - } + (void) commit_or_fallback(pool); } static int check_arg_count(unsigned argc, unsigned args_required) @@ -2323,12 +2526,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct if (r) return r; - r = dm_pool_commit_metadata(pool->pmd); - if (r) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - return r; - } + (void) commit_or_fallback(pool); r = dm_pool_reserve_metadata_snap(pool->pmd); if (r) @@ -2389,16 +2587,32 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) else DMWARN("Unrecognised thin pool target message received: %s", argv[0]); - if (!r) { - r = dm_pool_commit_metadata(pool->pmd); - if (r) - DMERR("%s message: dm_pool_commit_metadata() failed, error = %d", - argv[0], r); - } + if (!r) + (void) commit_or_fallback(pool); return r; } +static void emit_flags(struct pool_features *pf, char *result, + unsigned sz, unsigned maxlen) +{ + unsigned count = !pf->zero_new_blocks + !pf->discard_enabled + + !pf->discard_passdown + (pf->mode == PM_READ_ONLY); + DMEMIT("%u ", count); + + if (!pf->zero_new_blocks) + DMEMIT("skip_block_zeroing "); + + if (!pf->discard_enabled) + DMEMIT("ignore_discard "); + + if (!pf->discard_passdown) + DMEMIT("no_discard_passdown "); + + if (pf->mode == PM_READ_ONLY) + DMEMIT("read_only "); +} + /* * Status line is: * / @@ -2407,7 +2621,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) static int pool_status(struct dm_target *ti, status_type_t type, char *result, unsigned maxlen) { - int r, count; + int r; unsigned sz = 0; uint64_t transaction_id; dm_block_t nr_free_blocks_data; @@ -2422,6 +2636,11 @@ static int pool_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: + if (get_pool_mode(pool) == PM_FAIL) { + DMEMIT("Fail"); + break; + } + r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); if (r) @@ -2457,9 +2676,19 @@ static int pool_status(struct dm_target *ti, status_type_t type, (unsigned long long)nr_blocks_data); if (held_root) - DMEMIT("%llu", held_root); + DMEMIT("%llu ", held_root); + else + DMEMIT("- "); + + if (pool->pf.mode == PM_READ_ONLY) + DMEMIT("ro "); else - DMEMIT("-"); + DMEMIT("rw "); + + if (pool->pf.discard_enabled && pool->pf.discard_passdown) + DMEMIT("discard_passdown"); + else + DMEMIT("no_discard_passdown"); break; @@ -2469,20 +2698,7 @@ static int pool_status(struct dm_target *ti, status_type_t type, format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, (unsigned long long)pt->low_water_blocks); - - count = !pool->pf.zero_new_blocks + !pool->pf.discard_enabled + - !pt->pf.discard_passdown; - DMEMIT("%u ", count); - - if (!pool->pf.zero_new_blocks) - DMEMIT("skip_block_zeroing "); - - if (!pool->pf.discard_enabled) - DMEMIT("ignore_discard "); - - if (!pt->pf.discard_passdown) - DMEMIT("no_discard_passdown "); - + emit_flags(&pt->pf, result, sz, maxlen); break; } @@ -2542,7 +2758,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2647,6 +2863,11 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) } __pool_inc(tc->pool); + if (get_pool_mode(tc->pool) == PM_FAIL) { + ti->error = "Couldn't open thin device, Pool is in fail mode"; + goto bad_thin_open; + } + r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td); if (r) { ti->error = "Couldn't open thin internal device"; @@ -2755,6 +2976,11 @@ static int thin_status(struct dm_target *ti, status_type_t type, char buf[BDEVNAME_SIZE]; struct thin_c *tc = ti->private; + if (get_pool_mode(tc->pool) == PM_FAIL) { + DMEMIT("Fail"); + return 0; + } + if (!tc->td) DMEMIT("-"); else { @@ -2823,7 +3049,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, -- cgit v1.2.3 From 1f4e0ff07980820977f45d6a5dbc81d3bb9ce4d3 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 27 Jul 2012 15:08:16 +0100 Subject: dm thin: commit before gathering status Commit outstanding metadata before returning the status for a dm thin pool so that the numbers reported are as up-to-date as possible. The commit is not performed if the device is suspended or if the DM_NOFLUSH_FLAG is supplied by userspace and passed to the target through a new 'status_flags' parameter in the target's dm_status_fn. The userspace dmsetup tool will support the --noflush flag with the 'dmsetup status' and 'dmsetup wait' commands from version 1.02.76 onwards. Tested-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 2 +- drivers/md/dm-delay.c | 2 +- drivers/md/dm-flakey.c | 2 +- drivers/md/dm-ioctl.c | 5 ++++- drivers/md/dm-linear.c | 2 +- drivers/md/dm-mpath.c | 2 +- drivers/md/dm-raid.c | 2 +- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-snap.c | 6 +++--- drivers/md/dm-stripe.c | 4 ++-- drivers/md/dm-thin.c | 9 +++++++-- drivers/md/dm-verity.c | 2 +- drivers/md/dm.h | 5 +++++ include/linux/device-mapper.h | 2 +- include/linux/dm-ioctl.h | 8 +++++--- 15 files changed, 35 insertions(+), 20 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 00a25ab987c9..664743d6a6cd 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1733,7 +1733,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, } static int crypt_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct crypt_config *cc = ti->private; unsigned int sz = 0; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2dc22dddb2ae..f53846f9ab50 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -295,7 +295,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio, } static int delay_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct delay_c *dc = ti->private; int sz = 0; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index ac49c01f1a44..cc15543a6ad7 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -333,7 +333,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, } static int flakey_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct flakey_c *fc = ti->private; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a1a3e6df17b8..afd95986d099 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1054,6 +1054,7 @@ static void retrieve_status(struct dm_table *table, char *outbuf, *outptr; status_type_t type; size_t remaining, len, used = 0; + unsigned status_flags = 0; outptr = outbuf = get_result_buffer(param, param_size, &len); @@ -1090,7 +1091,9 @@ static void retrieve_status(struct dm_table *table, /* Get the status/table string from the target driver */ if (ti->type->status) { - if (ti->type->status(ti, type, outptr, remaining)) { + if (param->flags & DM_NOFLUSH_FLAG) + status_flags |= DM_STATUS_NOFLUSH_FLAG; + if (ti->type->status(ti, type, status_flags, outptr, remaining)) { param->flags |= DM_BUFFER_FULL_FLAG; break; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 3639eeab6042..1bf19a93eef0 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -96,7 +96,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio, } static int linear_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 8a3b2d53f81b..d8abb90a6c2f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1378,7 +1378,7 @@ static void multipath_resume(struct dm_target *ti) * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ */ static int multipath_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int sz = 0; unsigned long flags; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index f4275a8e860c..f2f29c526544 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1081,7 +1081,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_c } static int raid_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct raid_set *rs = ti->private; unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 596a3a1164a7..bc5ddba8045b 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1367,7 +1367,7 @@ static char device_status_char(struct mirror *m) static int mirror_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6c0f3e33923a..a143921feaf6 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1849,7 +1849,7 @@ static void snapshot_merge_resume(struct dm_target *ti) } static int snapshot_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct dm_snapshot *snap = ti->private; @@ -2151,8 +2151,8 @@ static void origin_resume(struct dm_target *ti) ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); } -static int origin_status(struct dm_target *ti, status_type_t type, char *result, - unsigned int maxlen) +static int origin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_dev *dev = ti->private; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 9e8f4cc63d6c..a087bf2a8d66 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -311,8 +311,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, * */ -static int stripe_status(struct dm_target *ti, - status_type_t type, char *result, unsigned int maxlen) +static int stripe_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; char buffer[sc->stripes + 1]; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 087e9b34d290..af1fc3b2c2ad 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -5,6 +5,7 @@ */ #include "dm-thin-metadata.h" +#include "dm.h" #include #include @@ -2619,7 +2620,7 @@ static void emit_flags(struct pool_features *pf, char *result, * / */ static int pool_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int r; unsigned sz = 0; @@ -2641,6 +2642,10 @@ static int pool_status(struct dm_target *ti, status_type_t type, break; } + /* Commit to ensure statistics aren't out-of-date */ + if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) + (void) commit_or_fallback(pool); + r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); if (r) @@ -2968,7 +2973,7 @@ static void thin_postsuspend(struct dm_target *ti) * */ static int thin_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int r; ssize_t sz = 0; diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index fa365d39b612..254d19268ad2 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -515,7 +515,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio, * Status: V (valid) or C (corruption found) */ static int verity_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; unsigned sz = 0; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b7dacd59d8d7..52eef493d266 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -22,6 +22,11 @@ #define DM_SUSPEND_LOCKFS_FLAG (1 << 0) #define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) +/* + * Status feature flags + */ +#define DM_STATUS_NOFLUSH_FLAG (1 << 0) + /* * Type of table and mapped_device's mempool */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index eb753633b576..38d27a10aa5d 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -72,7 +72,7 @@ typedef int (*dm_preresume_fn) (struct dm_target *ti); typedef void (*dm_resume_fn) (struct dm_target *ti); typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, - char *result, unsigned int maxlen); + unsigned status_flags, char *result, unsigned maxlen); typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index 3ece4eee84cb..91e3a360f611 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 22 -#define DM_VERSION_PATCHLEVEL 1 -#define DM_VERSION_EXTRA "-ioctl (2012-06-01)" +#define DM_VERSION_MINOR 23 +#define DM_VERSION_PATCHLEVEL 0 +#define DM_VERSION_EXTRA "-ioctl (2012-07-25)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ @@ -307,6 +307,8 @@ enum { /* * Set this to suspend without flushing queued ios. + * Also disables flushing uncommitted changes in the thin target before + * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT. */ #define DM_NOFLUSH_FLAG (1 << 11) /* In */ -- cgit v1.2.3