summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2020-08-03 06:38:44 -0600
committerJens Axboe <axboe@kernel.dk>2020-08-03 06:38:44 -0600
commitf59589fc89665102923725e80e12f782d5f74f67 (patch)
tree412d26f3b5d9f5958a3678c77b07a7efc96ca619
parenta9e8e18aaf914bf273ed63480c4c3d8dd626c95f (diff)
parent45a4d8fd6c7926e7991a1b29233d725fe12935da (diff)
Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.9/driversfor-5.9/drivers-20200803
Pull MD fixes from Song. * 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: md/raid5: Allow degraded raid6 to do rmw md/raid5: Fix Force reconstruct-write io stuck in degraded raid5 raid5: don't duplicate code for different paths in handle_stripe raid5-cache: hold spinlock instead of mutex in r5c_journal_mode_show md: print errno in super_written md/raid5: remove the redundant setting of STRIPE_HANDLE md: register new md sysfs file 'uuid' read-only md: fix max sectors calculation for super 1.0
-rw-r--r--Documentation/admin-guide/md.rst4
-rw-r--r--drivers/md/md.c47
-rw-r--r--drivers/md/raid5-cache.c9
-rw-r--r--drivers/md/raid5.c40
4 files changed, 71 insertions, 29 deletions
diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst
index d973d469ffc4..cc8781b96b4d 100644
--- a/Documentation/admin-guide/md.rst
+++ b/Documentation/admin-guide/md.rst
@@ -426,6 +426,10 @@ All md devices contain:
The accepted values when writing to this file are ``ppl`` and ``resync``,
used to enable and disable PPL.
+ uuid
+ This indicates the UUID of the array in the following format:
+ xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+
As component devices are added to an md array, they appear in the ``md``
directory as new directories named::
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ea48bc25cce1..9c69084cae73 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -978,7 +978,8 @@ static void super_written(struct bio *bio)
struct mddev *mddev = rdev->mddev;
if (bio->bi_status) {
- pr_err("md: super_written gets error=%d\n", bio->bi_status);
+ pr_err("md: %s gets error=%d\n", __func__,
+ blk_status_to_errno(bio->bi_status));
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags)
&& (bio->bi_opf & MD_FAILFAST)) {
@@ -2193,6 +2194,24 @@ retry:
sb->sb_csum = calc_sb_1_csum(sb);
}
+static sector_t super_1_choose_bm_space(sector_t dev_size)
+{
+ sector_t bm_space;
+
+ /* if the device is bigger than 8Gig, save 64k for bitmap
+ * usage, if bigger than 200Gig, save 128k
+ */
+ if (dev_size < 64*2)
+ bm_space = 0;
+ else if (dev_size - 64*2 >= 200*1024*1024*2)
+ bm_space = 128*2;
+ else if (dev_size - 4*2 > 8*1024*1024*2)
+ bm_space = 64*2;
+ else
+ bm_space = 4*2;
+ return bm_space;
+}
+
static unsigned long long
super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
{
@@ -2213,13 +2232,22 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
return 0;
} else {
/* minor version 0; superblock after data */
- sector_t sb_start;
- sb_start = (i_size_read(rdev->bdev->bd_inode) >> 9) - 8*2;
+ sector_t sb_start, bm_space;
+ sector_t dev_size = i_size_read(rdev->bdev->bd_inode) >> 9;
+
+ /* 8K is for superblock */
+ sb_start = dev_size - 8*2;
sb_start &= ~(sector_t)(4*2 - 1);
- max_sectors = rdev->sectors + sb_start - rdev->sb_start;
+
+ bm_space = super_1_choose_bm_space(dev_size);
+
+ /* Space that can be used to store date needs to decrease
+ * superblock bitmap space and bad block space(4K)
+ */
+ max_sectors = sb_start - bm_space - 4*2;
+
if (!num_sectors || num_sectors > max_sectors)
num_sectors = max_sectors;
- rdev->sb_start = sb_start;
}
sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
@@ -4226,6 +4254,14 @@ static struct md_sysfs_entry md_raid_disks =
__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
static ssize_t
+uuid_show(struct mddev *mddev, char *page)
+{
+ return sprintf(page, "%pU\n", mddev->uuid);
+}
+static struct md_sysfs_entry md_uuid =
+__ATTR(uuid, S_IRUGO, uuid_show, NULL);
+
+static ssize_t
chunk_size_show(struct mddev *mddev, char *page)
{
if (mddev->reshape_position != MaxSector &&
@@ -5481,6 +5517,7 @@ static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_layout.attr,
&md_raid_disks.attr,
+ &md_uuid.attr,
&md_chunk_size.attr,
&md_size.attr,
&md_resync_start.attr,
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 82eb4a906e31..4337ae0e6af2 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2537,13 +2537,10 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
struct r5conf *conf;
int ret;
- ret = mddev_lock(mddev);
- if (ret)
- return ret;
-
+ spin_lock(&mddev->lock);
conf = mddev->private;
if (!conf || !conf->log) {
- mddev_unlock(mddev);
+ spin_unlock(&mddev->lock);
return 0;
}
@@ -2563,7 +2560,7 @@ static ssize_t r5c_journal_mode_show(struct mddev *mddev, char *page)
default:
ret = 0;
}
- mddev_unlock(mddev);
+ spin_unlock(&mddev->lock);
return ret;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a6ff6e1e039b..657634a7e8d1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3557,6 +3557,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],
&sh->dev[s->failed_num[1]] };
int i;
+ bool force_rcw = (sh->raid_conf->rmw_level == PARITY_DISABLE_RMW);
if (test_bit(R5_LOCKED, &dev->flags) ||
@@ -3615,17 +3616,27 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
* devices must be read.
*/
return 1;
+
+ if (s->failed >= 2 &&
+ (fdev[i]->towrite ||
+ s->failed_num[i] == sh->pd_idx ||
+ s->failed_num[i] == sh->qd_idx) &&
+ !test_bit(R5_UPTODATE, &fdev[i]->flags))
+ /* In max degraded raid6, If the failed disk is P, Q,
+ * or we want to read the failed disk, we need to do
+ * reconstruct-write.
+ */
+ force_rcw = true;
}
- /* If we are forced to do a reconstruct-write, either because
- * the current RAID6 implementation only supports that, or
- * because parity cannot be trusted and we are currently
- * recovering it, there is extra need to be careful.
+ /* If we are forced to do a reconstruct-write, because parity
+ * cannot be trusted and we are currently recovering it, there
+ * is extra need to be careful.
* If one of the devices that we would need to read, because
* it is not being overwritten (and maybe not written at all)
* is missing/faulty, then we need to read everything we can.
*/
- if (sh->raid_conf->level != 6 &&
+ if (!force_rcw &&
sh->sector < sh->raid_conf->mddev->recovery_cp)
/* reconstruct-write isn't being forced */
return 0;
@@ -3995,10 +4006,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
- } else {
+ } else
set_bit(STRIPE_DELAYED, &sh->state);
- set_bit(STRIPE_HANDLE, &sh->state);
- }
}
}
}
@@ -4023,10 +4032,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
set_bit(R5_Wantread, &dev->flags);
s->locked++;
qread++;
- } else {
+ } else
set_bit(STRIPE_DELAYED, &sh->state);
- set_bit(STRIPE_HANDLE, &sh->state);
- }
}
}
if (rcw && conf->mddev->queue)
@@ -4866,7 +4873,7 @@ static void handle_stripe(struct stripe_head *sh)
* or to load a block that is being partially written.
*/
if (s.to_read || s.non_overwrite
- || (conf->level == 6 && s.to_write && s.failed)
+ || (s.to_write && s.failed)
|| (s.syncing && (s.uptodate + s.compute < disks))
|| s.replacing
|| s.expanding)
@@ -4970,14 +4977,11 @@ static void handle_stripe(struct stripe_head *sh)
if (!test_bit(R5_ReWrite, &dev->flags)) {
set_bit(R5_Wantwrite, &dev->flags);
set_bit(R5_ReWrite, &dev->flags);
- set_bit(R5_LOCKED, &dev->flags);
- s.locked++;
- } else {
+ } else
/* let's read it back */
set_bit(R5_Wantread, &dev->flags);
- set_bit(R5_LOCKED, &dev->flags);
- s.locked++;
- }
+ set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
}
}