summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/Makefile3
-rw-r--r--block/badblocks.c327
-rw-r--r--block/bio-integrity-auto.c191
-rw-r--r--block/bio-integrity.c266
-rw-r--r--block/bio.c17
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-flush.c10
-rw-r--r--block/blk-iocost.c2
-rw-r--r--block/blk-mq-sched.c2
-rw-r--r--block/blk-mq-sysfs.c4
-rw-r--r--block/blk-mq-tag.c3
-rw-r--r--block/blk-mq.c22
-rw-r--r--block/blk-mq.h4
-rw-r--r--block/blk-settings.c58
-rw-r--r--block/blk-sysfs.c309
-rw-r--r--block/blk-throttle.c82
-rw-r--r--block/blk-throttle.h7
-rw-r--r--block/blk-wbt.c17
-rw-r--r--block/blk.h2
-rw-r--r--block/bounce.c2
-rw-r--r--block/elevator.c43
-rw-r--r--block/elevator.h2
-rw-r--r--block/genhd.c9
-rw-r--r--block/kyber-iosched.c2
-rw-r--r--block/partitions/sgi.c2
-rw-r--r--block/partitions/sun.c2
-rw-r--r--block/t10-pi.c6
-rw-r--r--drivers/block/loop.c88
-rw-r--r--drivers/block/null_blk/main.c177
-rw-r--r--drivers/block/null_blk/null_blk.h6
-rw-r--r--drivers/block/null_blk/zoned.c20
-rw-r--r--drivers/block/ublk_drv.c69
-rw-r--r--drivers/md/dm-integrity.c12
-rw-r--r--drivers/md/dm-table.c6
-rw-r--r--drivers/md/md.c61
-rw-r--r--drivers/md/md.h14
-rw-r--r--drivers/md/raid1-10.c2
-rw-r--r--drivers/md/raid1.c10
-rw-r--r--drivers/md/raid10.c14
-rw-r--r--drivers/nvdimm/badrange.c2
-rw-r--r--drivers/nvdimm/nd.h2
-rw-r--r--drivers/nvdimm/pfn_devs.c7
-rw-r--r--drivers/nvdimm/pmem.c2
-rw-r--r--drivers/target/target_core_iblock.c12
-rw-r--r--include/linux/badblocks.h10
-rw-r--r--include/linux/bio-integrity.h25
-rw-r--r--include/linux/bio.h4
-rw-r--r--include/linux/blkdev.h15
-rw-r--r--include/uapi/linux/ublk_cmd.h7
49 files changed, 995 insertions, 965 deletions
diff --git a/block/Makefile b/block/Makefile
index 33748123710b..3a941dc0d27f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -26,7 +26,8 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o \
+ bio-integrity-auto.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
diff --git a/block/badblocks.c b/block/badblocks.c
index db4ec8b9b2a8..ece64e76fe8f 100644
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -528,51 +528,6 @@ out:
}
/*
- * Return 'true' if the range indicated by 'bad' can be backward merged
- * with the bad range (from the bad table) index by 'behind'.
- */
-static bool can_merge_behind(struct badblocks *bb,
- struct badblocks_context *bad, int behind)
-{
- sector_t sectors = bad->len;
- sector_t s = bad->start;
- u64 *p = bb->page;
-
- if ((s < BB_OFFSET(p[behind])) &&
- ((s + sectors) >= BB_OFFSET(p[behind])) &&
- ((BB_END(p[behind]) - s) <= BB_MAX_LEN) &&
- BB_ACK(p[behind]) == bad->ack)
- return true;
- return false;
-}
-
-/*
- * Do backward merge for range indicated by 'bad' and the bad range
- * (from the bad table) indexed by 'behind'. The return value is merged
- * sectors from bad->len.
- */
-static int behind_merge(struct badblocks *bb, struct badblocks_context *bad,
- int behind)
-{
- sector_t sectors = bad->len;
- sector_t s = bad->start;
- u64 *p = bb->page;
- int merged = 0;
-
- WARN_ON(s >= BB_OFFSET(p[behind]));
- WARN_ON((s + sectors) < BB_OFFSET(p[behind]));
-
- if (s < BB_OFFSET(p[behind])) {
- merged = BB_OFFSET(p[behind]) - s;
- p[behind] = BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack);
-
- WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN);
- }
-
- return merged;
-}
-
-/*
* Return 'true' if the range indicated by 'bad' can be forward
* merged with the bad range (from the bad table) indexed by 'prev'.
*/
@@ -745,7 +700,7 @@ static bool can_front_overwrite(struct badblocks *bb, int prev,
*extra = 2;
}
- if ((bb->count + (*extra)) >= MAX_BADBLOCKS)
+ if ((bb->count + (*extra)) > MAX_BADBLOCKS)
return false;
return true;
@@ -855,40 +810,60 @@ static void badblocks_update_acked(struct badblocks *bb)
bb->unacked_exist = 0;
}
+/*
+ * Return 'true' if the range indicated by 'bad' is exactly backward
+ * overlapped with the bad range (from bad table) indexed by 'behind'.
+ */
+static bool try_adjacent_combine(struct badblocks *bb, int prev)
+{
+ u64 *p = bb->page;
+
+ if (prev >= 0 && (prev + 1) < bb->count &&
+ BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
+ (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
+ BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
+ p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
+ BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
+ BB_ACK(p[prev]));
+
+ if ((prev + 2) < bb->count)
+ memmove(p + prev + 1, p + prev + 2,
+ (bb->count - (prev + 2)) * 8);
+ bb->count--;
+ return true;
+ }
+ return false;
+}
+
/* Do exact work to set bad block range into the bad block table */
-static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors,
- int acknowledged)
+static bool _badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
+ int acknowledged)
{
- int retried = 0, space_desired = 0;
- int orig_len, len = 0, added = 0;
+ int len = 0, added = 0;
struct badblocks_context bad;
int prev = -1, hint = -1;
- sector_t orig_start;
unsigned long flags;
- int rv = 0;
u64 *p;
if (bb->shift < 0)
/* badblocks are disabled */
- return 1;
+ return false;
if (sectors == 0)
/* Invalid sectors number */
- return 1;
+ return false;
if (bb->shift) {
/* round the start down, and the end up */
sector_t next = s + sectors;
- rounddown(s, bb->shift);
- roundup(next, bb->shift);
+ rounddown(s, 1 << bb->shift);
+ roundup(next, 1 << bb->shift);
sectors = next - s;
}
write_seqlock_irqsave(&bb->lock, flags);
- orig_start = s;
- orig_len = sectors;
bad.ack = acknowledged;
p = bb->page;
@@ -897,6 +872,9 @@ re_insert:
bad.len = sectors;
len = 0;
+ if (badblocks_full(bb))
+ goto out;
+
if (badblocks_empty(bb)) {
len = insert_at(bb, 0, &bad);
bb->count++;
@@ -908,32 +886,14 @@ re_insert:
/* start before all badblocks */
if (prev < 0) {
- if (!badblocks_full(bb)) {
- /* insert on the first */
- if (bad.len > (BB_OFFSET(p[0]) - bad.start))
- bad.len = BB_OFFSET(p[0]) - bad.start;
- len = insert_at(bb, 0, &bad);
- bb->count++;
- added++;
- hint = 0;
- goto update_sectors;
- }
-
- /* No sapce, try to merge */
- if (overlap_behind(bb, &bad, 0)) {
- if (can_merge_behind(bb, &bad, 0)) {
- len = behind_merge(bb, &bad, 0);
- added++;
- } else {
- len = BB_OFFSET(p[0]) - s;
- space_desired = 1;
- }
- hint = 0;
- goto update_sectors;
- }
-
- /* no table space and give up */
- goto out;
+ /* insert on the first */
+ if (bad.len > (BB_OFFSET(p[0]) - bad.start))
+ bad.len = BB_OFFSET(p[0]) - bad.start;
+ len = insert_at(bb, 0, &bad);
+ bb->count++;
+ added++;
+ hint = ++prev;
+ goto update_sectors;
}
/* in case p[prev-1] can be merged with p[prev] */
@@ -945,33 +905,6 @@ re_insert:
goto update_sectors;
}
- if (overlap_front(bb, prev, &bad)) {
- if (can_merge_front(bb, prev, &bad)) {
- len = front_merge(bb, prev, &bad);
- added++;
- } else {
- int extra = 0;
-
- if (!can_front_overwrite(bb, prev, &bad, &extra)) {
- len = min_t(sector_t,
- BB_END(p[prev]) - s, sectors);
- hint = prev;
- goto update_sectors;
- }
-
- len = front_overwrite(bb, prev, &bad, extra);
- added++;
- bb->count += extra;
-
- if (can_combine_front(bb, prev, &bad)) {
- front_combine(bb, prev);
- bb->count--;
- }
- }
- hint = prev;
- goto update_sectors;
- }
-
if (can_merge_front(bb, prev, &bad)) {
len = front_merge(bb, prev, &bad);
added++;
@@ -979,21 +912,29 @@ re_insert:
goto update_sectors;
}
- /* if no space in table, still try to merge in the covered range */
- if (badblocks_full(bb)) {
- /* skip the cannot-merge range */
- if (((prev + 1) < bb->count) &&
- overlap_behind(bb, &bad, prev + 1) &&
- ((s + sectors) >= BB_END(p[prev + 1]))) {
- len = BB_END(p[prev + 1]) - s;
- hint = prev + 1;
+ if (overlap_front(bb, prev, &bad)) {
+ int extra = 0;
+
+ if (!can_front_overwrite(bb, prev, &bad, &extra)) {
+ if (extra > 0)
+ goto out;
+
+ len = min_t(sector_t,
+ BB_END(p[prev]) - s, sectors);
+ hint = prev;
goto update_sectors;
}
- /* no retry any more */
- len = sectors;
- space_desired = 1;
- hint = -1;
+ len = front_overwrite(bb, prev, &bad, extra);
+ added++;
+ bb->count += extra;
+
+ if (can_combine_front(bb, prev, &bad)) {
+ front_combine(bb, prev);
+ bb->count--;
+ }
+
+ hint = prev;
goto update_sectors;
}
@@ -1006,7 +947,7 @@ re_insert:
len = insert_at(bb, prev + 1, &bad);
bb->count++;
added++;
- hint = prev + 1;
+ hint = ++prev;
update_sectors:
s += len;
@@ -1015,35 +956,12 @@ update_sectors:
if (sectors > 0)
goto re_insert;
- WARN_ON(sectors < 0);
-
/*
* Check whether the following already set range can be
* merged. (prev < 0) condition is not handled here,
* because it's already complicated enough.
*/
- if (prev >= 0 &&
- (prev + 1) < bb->count &&
- BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
- (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
- BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
- p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
- BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
- BB_ACK(p[prev]));
-
- if ((prev + 2) < bb->count)
- memmove(p + prev + 1, p + prev + 2,
- (bb->count - (prev + 2)) * 8);
- bb->count--;
- }
-
- if (space_desired && !badblocks_full(bb)) {
- s = orig_start;
- sectors = orig_len;
- space_desired = 0;
- if (retried++ < 3)
- goto re_insert;
- }
+ try_adjacent_combine(bb, prev);
out:
if (added) {
@@ -1057,10 +975,7 @@ out:
write_sequnlock_irqrestore(&bb->lock, flags);
- if (!added)
- rv = 1;
-
- return rv;
+ return sectors == 0;
}
/*
@@ -1131,21 +1046,20 @@ static int front_splitting_clear(struct badblocks *bb, int prev,
}
/* Do the exact work to clear bad block range from the bad block table */
-static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
+static bool _badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors)
{
struct badblocks_context bad;
int prev = -1, hint = -1;
int len = 0, cleared = 0;
- int rv = 0;
u64 *p;
if (bb->shift < 0)
/* badblocks are disabled */
- return 1;
+ return false;
if (sectors == 0)
/* Invalid sectors number */
- return 1;
+ return false;
if (bb->shift) {
sector_t target;
@@ -1157,8 +1071,8 @@ static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
* isn't than to think a block is not bad when it is.
*/
target = s + sectors;
- roundup(s, bb->shift);
- rounddown(target, bb->shift);
+ roundup(s, 1 << bb->shift);
+ rounddown(target, 1 << bb->shift);
sectors = target - s;
}
@@ -1214,7 +1128,7 @@ re_clear:
if ((BB_OFFSET(p[prev]) < bad.start) &&
(BB_END(p[prev]) > (bad.start + bad.len))) {
/* Splitting */
- if ((bb->count + 1) < MAX_BADBLOCKS) {
+ if ((bb->count + 1) <= MAX_BADBLOCKS) {
len = front_splitting_clear(bb, prev, &bad);
bb->count += 1;
cleared++;
@@ -1255,8 +1169,6 @@ update_sectors:
if (sectors > 0)
goto re_clear;
- WARN_ON(sectors < 0);
-
if (cleared) {
badblocks_update_acked(bb);
set_changed(bb);
@@ -1265,40 +1177,21 @@ update_sectors:
write_sequnlock_irq(&bb->lock);
if (!cleared)
- rv = 1;
+ return false;
- return rv;
+ return true;
}
/* Do the exact work to check bad blocks range from the bad block table */
-static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors,
- sector_t *first_bad, int *bad_sectors)
+static int _badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
+ sector_t *first_bad, sector_t *bad_sectors)
{
- int unacked_badblocks, acked_badblocks;
int prev = -1, hint = -1, set = 0;
struct badblocks_context bad;
- unsigned int seq;
+ int unacked_badblocks = 0;
+ int acked_badblocks = 0;
+ u64 *p = bb->page;
int len, rv;
- u64 *p;
-
- WARN_ON(bb->shift < 0 || sectors == 0);
-
- if (bb->shift > 0) {
- sector_t target;
-
- /* round the start down, and the end up */
- target = s + sectors;
- rounddown(s, bb->shift);
- roundup(target, bb->shift);
- sectors = target - s;
- }
-
-retry:
- seq = read_seqbegin(&bb->lock);
-
- p = bb->page;
- unacked_badblocks = 0;
- acked_badblocks = 0;
re_check:
bad.start = s;
@@ -1349,14 +1242,15 @@ re_check:
len = sectors;
update_sectors:
+ /* This situation should never happen */
+ WARN_ON(sectors < len);
+
s += len;
sectors -= len;
if (sectors > 0)
goto re_check;
- WARN_ON(sectors < 0);
-
if (unacked_badblocks > 0)
rv = -1;
else if (acked_badblocks > 0)
@@ -1364,9 +1258,6 @@ update_sectors:
else
rv = 0;
- if (read_seqretry(&bb->lock, seq))
- goto retry;
-
return rv;
}
@@ -1404,10 +1295,30 @@ update_sectors:
* -1: there are bad blocks which have not yet been acknowledged in metadata.
* plus the start/length of the first bad section we overlap.
*/
-int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
- sector_t *first_bad, int *bad_sectors)
+int badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
+ sector_t *first_bad, sector_t *bad_sectors)
{
- return _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
+ unsigned int seq;
+ int rv;
+
+ WARN_ON(bb->shift < 0 || sectors == 0);
+
+ if (bb->shift > 0) {
+ /* round the start down, and the end up */
+ sector_t target = s + sectors;
+
+ rounddown(s, 1 << bb->shift);
+ roundup(target, 1 << bb->shift);
+ sectors = target - s;
+ }
+
+retry:
+ seq = read_seqbegin(&bb->lock);
+ rv = _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
+ if (read_seqretry(&bb->lock, seq))
+ goto retry;
+
+ return rv;
}
EXPORT_SYMBOL_GPL(badblocks_check);
@@ -1423,11 +1334,12 @@ EXPORT_SYMBOL_GPL(badblocks_check);
* decide how best to handle it.
*
* Return:
- * 0: success
- * 1: failed to set badblocks (out of space)
+ * true: success
+ * false: failed to set badblocks (out of space). Parital setting will be
+ * treated as failure.
*/
-int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
- int acknowledged)
+bool badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
+ int acknowledged)
{
return _badblocks_set(bb, s, sectors, acknowledged);
}
@@ -1444,10 +1356,10 @@ EXPORT_SYMBOL_GPL(badblocks_set);
* drop the remove request.
*
* Return:
- * 0: success
- * 1: failed to clear badblocks
+ * true: success
+ * false: failed to clear badblocks
*/
-int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
+bool badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors)
{
return _badblocks_clear(bb, s, sectors);
}
@@ -1479,6 +1391,11 @@ void ack_all_badblocks(struct badblocks *bb)
p[i] = BB_MAKE(start, len, 1);
}
}
+
+ for (i = 0; i < bb->count ; i++)
+ while (try_adjacent_combine(bb, i))
+ ;
+
bb->unacked_exist = 0;
}
write_sequnlock_irq(&bb->lock);
@@ -1564,10 +1481,10 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
return -EINVAL;
}
- if (badblocks_set(bb, sector, length, !unack))
+ if (!badblocks_set(bb, sector, length, !unack))
return -ENOSPC;
- else
- return len;
+
+ return len;
}
EXPORT_SYMBOL_GPL(badblocks_store);
diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
new file mode 100644
index 000000000000..e524c609be50
--- /dev/null
+++ b/block/bio-integrity-auto.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2007, 2008, 2009 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * Automatically generate and verify integrity data on PI capable devices if the
+ * bio submitter didn't provide PI itself. This ensures that kernel verifies
+ * data integrity even if the file system (or other user of the block device) is
+ * not aware of PI.
+ */
+#include <linux/blk-integrity.h>
+#include <linux/workqueue.h>
+#include "blk.h"
+
+struct bio_integrity_data {
+ struct bio *bio;
+ struct bvec_iter saved_bio_iter;
+ struct work_struct work;
+ struct bio_integrity_payload bip;
+ struct bio_vec bvec;
+};
+
+static struct kmem_cache *bid_slab;
+static mempool_t bid_pool;
+static struct workqueue_struct *kintegrityd_wq;
+
+static void bio_integrity_finish(struct bio_integrity_data *bid)
+{
+ bid->bio->bi_integrity = NULL;
+ bid->bio->bi_opf &= ~REQ_INTEGRITY;
+ kfree(bvec_virt(bid->bip.bip_vec));
+ mempool_free(bid, &bid_pool);
+}
+
+static void bio_integrity_verify_fn(struct work_struct *work)
+{
+ struct bio_integrity_data *bid =
+ container_of(work, struct bio_integrity_data, work);
+ struct bio *bio = bid->bio;
+
+ blk_integrity_verify_iter(bio, &bid->saved_bio_iter);
+ bio_integrity_finish(bid);
+ bio_endio(bio);
+}
+
+/**
+ * __bio_integrity_endio - Integrity I/O completion function
+ * @bio: Protected bio
+ *
+ * Normally I/O completion is done in interrupt context. However, verifying I/O
+ * integrity is a time-consuming task which must be run in process context.
+ *
+ * This function postpones completion accordingly.
+ */
+bool __bio_integrity_endio(struct bio *bio)
+{
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct bio_integrity_data *bid =
+ container_of(bip, struct bio_integrity_data, bip);
+
+ if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
+ INIT_WORK(&bid->work, bio_integrity_verify_fn);
+ queue_work(kintegrityd_wq, &bid->work);
+ return false;
+ }
+
+ bio_integrity_finish(bid);
+ return true;
+}
+
+/**
+ * bio_integrity_prep - Prepare bio for integrity I/O
+ * @bio: bio to prepare
+ *
+ * Checks if the bio already has an integrity payload attached. If it does, the
+ * payload has been generated by another kernel subsystem, and we just pass it
+ * through.
+ * Otherwise allocates integrity payload and for writes the integrity metadata
+ * will be generated. For reads, the completion handler will verify the
+ * metadata.
+ */
+bool bio_integrity_prep(struct bio *bio)
+{
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ struct bio_integrity_data *bid;
+ gfp_t gfp = GFP_NOIO;
+ unsigned int len;
+ void *buf;
+
+ if (!bi)
+ return true;
+
+ if (!bio_sectors(bio))
+ return true;
+
+ /* Already protected? */
+ if (bio_integrity(bio))
+ return true;
+
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ if (bi->flags & BLK_INTEGRITY_NOVERIFY)
+ return true;
+ break;
+ case REQ_OP_WRITE:
+ if (bi->flags & BLK_INTEGRITY_NOGENERATE)
+ return true;
+
+ /*
+ * Zero the memory allocated to not leak uninitialized kernel
+ * memory to disk for non-integrity metadata where nothing else
+ * initializes the memory.
+ */
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
+ gfp |= __GFP_ZERO;
+ break;
+ default:
+ return true;
+ }
+
+ if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
+ return true;
+
+ /* Allocate kernel buffer for protection data */
+ len = bio_integrity_bytes(bi, bio_sectors(bio));
+ buf = kmalloc(len, gfp);
+ if (!buf)
+ goto err_end_io;
+ bid = mempool_alloc(&bid_pool, GFP_NOIO);
+ if (!bid)
+ goto err_free_buf;
+ bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);
+
+ bid->bio = bio;
+
+ bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
+ bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
+
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
+ bid->bip.bip_flags |= BIP_IP_CHECKSUM;
+ if (bi->csum_type)
+ bid->bip.bip_flags |= BIP_CHECK_GUARD;
+ if (bi->flags & BLK_INTEGRITY_REF_TAG)
+ bid->bip.bip_flags |= BIP_CHECK_REFTAG;
+
+ if (bio_integrity_add_page(bio, virt_to_page(buf), len,
+ offset_in_page(buf)) < len)
+ goto err_end_io;
+
+ /* Auto-generate integrity metadata if this is a write */
+ if (bio_data_dir(bio) == WRITE)
+ blk_integrity_generate(bio);
+ else
+ bid->saved_bio_iter = bio->bi_iter;
+ return true;
+
+err_free_buf:
+ kfree(buf);
+err_end_io:
+ bio->bi_status = BLK_STS_RESOURCE;
+ bio_endio(bio);
+ return false;
+}
+EXPORT_SYMBOL(bio_integrity_prep);
+
+void blk_flush_integrity(void)
+{
+ flush_workqueue(kintegrityd_wq);
+}
+
+static int __init blk_integrity_auto_init(void)
+{
+ bid_slab = kmem_cache_create("bio_integrity_data",
+ sizeof(struct bio_integrity_data), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+
+ if (mempool_init_slab_pool(&bid_pool, BIO_POOL_SIZE, bid_slab))
+ panic("bio: can't create integrity pool\n");
+
+ /*
+ * kintegrityd won't block much but may burn a lot of CPU cycles.
+ * Make it highpri CPU intensive wq with max concurrency of 1.
+ */
+ kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
+ WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
+ if (!kintegrityd_wq)
+ panic("Failed to create kintegrityd\n");
+ return 0;
+}
+subsys_initcall(blk_integrity_auto_init);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 5d81ad9a3d20..608594a154a5 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -7,20 +7,12 @@
*/
#include <linux/blk-integrity.h>
-#include <linux/mempool.h>
-#include <linux/export.h>
-#include <linux/bio.h>
-#include <linux/workqueue.h>
-#include <linux/slab.h>
#include "blk.h"
-static struct kmem_cache *bip_slab;
-static struct workqueue_struct *kintegrityd_wq;
-
-void blk_flush_integrity(void)
-{
- flush_workqueue(kintegrityd_wq);
-}
+struct bio_integrity_alloc {
+ struct bio_integrity_payload bip;
+ struct bio_vec bvecs[];
+};
/**
* bio_integrity_free - Free bio integrity payload
@@ -30,21 +22,23 @@ void blk_flush_integrity(void)
*/
void bio_integrity_free(struct bio *bio)
{
- struct bio_integrity_payload *bip = bio_integrity(bio);
- struct bio_set *bs = bio->bi_pool;
-
- if (bs && mempool_initialized(&bs->bio_integrity_pool)) {
- if (bip->bip_vec)
- bvec_free(&bs->bvec_integrity_pool, bip->bip_vec,
- bip->bip_max_vcnt);
- mempool_free(bip, &bs->bio_integrity_pool);
- } else {
- kfree(bip);
- }
+ kfree(bio_integrity(bio));
bio->bi_integrity = NULL;
bio->bi_opf &= ~REQ_INTEGRITY;
}
+void bio_integrity_init(struct bio *bio, struct bio_integrity_payload *bip,
+ struct bio_vec *bvecs, unsigned int nr_vecs)
+{
+ memset(bip, 0, sizeof(*bip));
+ bip->bip_max_vcnt = nr_vecs;
+ if (nr_vecs)
+ bip->bip_vec = bvecs;
+
+ bio->bi_integrity = bip;
+ bio->bi_opf |= REQ_INTEGRITY;
+}
+
/**
* bio_integrity_alloc - Allocate integrity payload and attach it to bio
* @bio: bio to attach integrity metadata to
@@ -59,48 +53,16 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
gfp_t gfp_mask,
unsigned int nr_vecs)
{
- struct bio_integrity_payload *bip;
- struct bio_set *bs = bio->bi_pool;
- unsigned inline_vecs;
+ struct bio_integrity_alloc *bia;
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
return ERR_PTR(-EOPNOTSUPP);
- if (!bs || !mempool_initialized(&bs->bio_integrity_pool)) {
- bip = kmalloc(struct_size(bip, bip_inline_vecs, nr_vecs), gfp_mask);
- inline_vecs = nr_vecs;
- } else {
- bip = mempool_alloc(&bs->bio_integrity_pool, gfp_mask);
- inline_vecs = BIO_INLINE_VECS;
- }
-
- if (unlikely(!bip))
+ bia = kmalloc(struct_size(bia, bvecs, nr_vecs), gfp_mask);
+ if (unlikely(!bia))
return ERR_PTR(-ENOMEM);
-
- memset(bip, 0, sizeof(*bip));
-
- /* always report as many vecs as asked explicitly, not inline vecs */
- bip->bip_max_vcnt = nr_vecs;
- if (nr_vecs > inline_vecs) {
- bip->bip_vec = bvec_alloc(&bs->bvec_integrity_pool,
- &bip->bip_max_vcnt, gfp_mask);
- if (!bip->bip_vec)
- goto err;
- } else if (nr_vecs) {
- bip->bip_vec = bip->bip_inline_vecs;
- }
-
- bip->bip_bio = bio;
- bio->bi_integrity = bip;
- bio->bi_opf |= REQ_INTEGRITY;
-
- return bip;
-err:
- if (bs && mempool_initialized(&bs->bio_integrity_pool))
- mempool_free(bip, &bs->bio_integrity_pool);
- else
- kfree(bip);
- return ERR_PTR(-ENOMEM);
+ bio_integrity_init(bio, &bia->bip, bia->bvecs, nr_vecs);
+ return &bia->bip;
}
EXPORT_SYMBOL(bio_integrity_alloc);
@@ -414,149 +376,6 @@ int bio_integrity_map_iter(struct bio *bio, struct uio_meta *meta)
}
/**
- * bio_integrity_prep - Prepare bio for integrity I/O
- * @bio: bio to prepare
- *
- * Description: Checks if the bio already has an integrity payload attached.
- * If it does, the payload has been generated by another kernel subsystem,
- * and we just pass it through. Otherwise allocates integrity payload.
- * The bio must have data direction, target device and start sector set priot
- * to calling. In the WRITE case, integrity metadata will be generated using
- * the block device's integrity function. In the READ case, the buffer
- * will be prepared for DMA and a suitable end_io handler set up.
- */
-bool bio_integrity_prep(struct bio *bio)
-{
- struct bio_integrity_payload *bip;
- struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
- unsigned int len;
- void *buf;
- gfp_t gfp = GFP_NOIO;
-
- if (!bi)
- return true;
-
- if (!bio_sectors(bio))
- return true;
-
- /* Already protected? */
- if (bio_integrity(bio))
- return true;
-
- switch (bio_op(bio)) {
- case REQ_OP_READ:
- if (bi->flags & BLK_INTEGRITY_NOVERIFY)
- return true;
- break;
- case REQ_OP_WRITE:
- if (bi->flags & BLK_INTEGRITY_NOGENERATE)
- return true;
-
- /*
- * Zero the memory allocated to not leak uninitialized kernel
- * memory to disk for non-integrity metadata where nothing else
- * initializes the memory.
- */
- if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
- gfp |= __GFP_ZERO;
- break;
- default:
- return true;
- }
-
- /* Allocate kernel buffer for protection data */
- len = bio_integrity_bytes(bi, bio_sectors(bio));
- buf = kmalloc(len, gfp);
- if (unlikely(buf == NULL)) {
- goto err_end_io;
- }
-
- bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
- if (IS_ERR(bip)) {
- kfree(buf);
- goto err_end_io;
- }
-
- bip->bip_flags |= BIP_BLOCK_INTEGRITY;
- bip_set_seed(bip, bio->bi_iter.bi_sector);
-
- if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
- bip->bip_flags |= BIP_IP_CHECKSUM;
-
- /* describe what tags to check in payload */
- if (bi->csum_type)
- bip->bip_flags |= BIP_CHECK_GUARD;
- if (bi->flags & BLK_INTEGRITY_REF_TAG)
- bip->bip_flags |= BIP_CHECK_REFTAG;
- if (bio_integrity_add_page(bio, virt_to_page(buf), len,
- offset_in_page(buf)) < len) {
- printk(KERN_ERR "could not attach integrity payload\n");
- goto err_end_io;
- }
-
- /* Auto-generate integrity metadata if this is a write */
- if (bio_data_dir(bio) == WRITE)
- blk_integrity_generate(bio);
- else
- bip->bio_iter = bio->bi_iter;
- return true;
-
-err_end_io:
- bio->bi_status = BLK_STS_RESOURCE;
- bio_endio(bio);
- return false;
-}
-EXPORT_SYMBOL(bio_integrity_prep);
-
-/**
- * bio_integrity_verify_fn - Integrity I/O completion worker
- * @work: Work struct stored in bio to be verified
- *
- * Description: This workqueue function is called to complete a READ
- * request. The function verifies the transferred integrity metadata
- * and then calls the original bio end_io function.
- */
-static void bio_integrity_verify_fn(struct work_struct *work)
-{
- struct bio_integrity_payload *bip =
- container_of(work, struct bio_integrity_payload, bip_work);
- struct bio *bio = bip->bip_bio;
-
- blk_integrity_verify(bio);
-
- kfree(bvec_virt(bip->bip_vec));
- bio_integrity_free(bio);
- bio_endio(bio);
-}
-
-/**
- * __bio_integrity_endio - Integrity I/O completion function
- * @bio: Protected bio
- *
- * Description: Completion for integrity I/O
- *
- * Normally I/O completion is done in interrupt context. However,
- * verifying I/O integrity is a time-consuming task which must be run
- * in process context. This function postpones completion
- * accordingly.
- */
-bool __bio_integrity_endio(struct bio *bio)
-{
- struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
- struct bio_integrity_payload *bip = bio_integrity(bio);
-
- if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
- INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
- queue_work(kintegrityd_wq, &bip->bip_work);
- return false;
- }
-
- kfree(bvec_virt(bip->bip_vec));
- bio_integrity_free(bio);
- return true;
-}
-
-/**
* bio_integrity_advance - Advance integrity vector
* @bio: bio whose integrity vector to update
* @bytes_done: number of data bytes that have been completed
@@ -617,44 +436,3 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
return 0;
}
-
-int bioset_integrity_create(struct bio_set *bs, int pool_size)
-{
- if (mempool_initialized(&bs->bio_integrity_pool))
- return 0;
-
- if (mempool_init_slab_pool(&bs->bio_integrity_pool,
- pool_size, bip_slab))
- return -1;
-
- if (biovec_init_pool(&bs->bvec_integrity_pool, pool_size)) {
- mempool_exit(&bs->bio_integrity_pool);
- return -1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(bioset_integrity_create);
-
-void bioset_integrity_free(struct bio_set *bs)
-{
- mempool_exit(&bs->bio_integrity_pool);
- mempool_exit(&bs->bvec_integrity_pool);
-}
-
-void __init bio_integrity_init(void)
-{
- /*
- * kintegrityd won't block much but may burn a lot of CPU cycles.
- * Make it highpri CPU intensive wq with max concurrency of 1.
- */
- kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
- if (!kintegrityd_wq)
- panic("Failed to create kintegrityd\n");
-
- bip_slab = kmem_cache_create("bio_integrity_payload",
- sizeof(struct bio_integrity_payload) +
- sizeof(struct bio_vec) * BIO_INLINE_VECS,
- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
-}
diff --git a/block/bio.c b/block/bio.c
index f0c416e5931d..3761600f3e04 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1026,9 +1026,10 @@ EXPORT_SYMBOL(bio_add_page);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
size_t off)
{
+ unsigned long nr = off / PAGE_SIZE;
+
WARN_ON_ONCE(len > UINT_MAX);
- WARN_ON_ONCE(off > UINT_MAX);
- __bio_add_page(bio, &folio->page, len, off);
+ __bio_add_page(bio, folio_page(folio, nr), len, off % PAGE_SIZE);
}
EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
@@ -1049,9 +1050,11 @@ EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
size_t off)
{
- if (len > UINT_MAX || off > UINT_MAX)
+ unsigned long nr = off / PAGE_SIZE;
+
+ if (len > UINT_MAX)
return false;
- return bio_add_page(bio, &folio->page, len, off) > 0;
+ return bio_add_page(bio, folio_page(folio, nr), len, off % PAGE_SIZE) > 0;
}
EXPORT_SYMBOL(bio_add_folio);
@@ -1657,7 +1660,6 @@ void bioset_exit(struct bio_set *bs)
mempool_exit(&bs->bio_pool);
mempool_exit(&bs->bvec_pool);
- bioset_integrity_free(bs);
if (bs->bio_slab)
bio_put_slab(bs);
bs->bio_slab = NULL;
@@ -1737,8 +1739,6 @@ static int __init init_bio(void)
BUILD_BUG_ON(BIO_FLAG_LAST > 8 * sizeof_field(struct bio, bi_flags));
- bio_integrity_init();
-
for (i = 0; i < ARRAY_SIZE(bvec_slabs); i++) {
struct biovec_slab *bvs = bvec_slabs + i;
@@ -1754,9 +1754,6 @@ static int __init init_bio(void)
BIOSET_NEED_BVECS | BIOSET_PERCPU_CACHE))
panic("bio: can't allocate bios\n");
- if (bioset_integrity_create(&fs_bio_set, BIO_POOL_SIZE))
- panic("bio: can't create integrity pool\n");
-
return 0;
}
subsys_initcall(init_bio);
diff --git a/block/blk-core.c b/block/blk-core.c
index d6c4fa3943b5..362d0a55b07a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -429,6 +429,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
refcount_set(&q->refs, 1);
mutex_init(&q->debugfs_mutex);
+ mutex_init(&q->elevator_lock);
mutex_init(&q->sysfs_lock);
mutex_init(&q->limits_lock);
mutex_init(&q->rq_qos_mutex);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a72e2a83d075..43d6152897a4 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,9 +95,9 @@ static void blk_kick_flush(struct request_queue *q,
struct blk_flush_queue *fq, blk_opf_t flags);
static inline struct blk_flush_queue *
-blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
+blk_get_flush_queue(struct blk_mq_ctx *ctx)
{
- return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
+ return blk_mq_map_queue(REQ_OP_FLUSH, ctx)->fq;
}
static unsigned int blk_flush_cur_seq(struct request *rq)
@@ -205,7 +205,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
struct list_head *running;
struct request *rq, *n;
unsigned long flags = 0;
- struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
+ struct blk_flush_queue *fq = blk_get_flush_queue(flush_rq->mq_ctx);
/* release the tag's ownership to the req cloned from */
spin_lock_irqsave(&fq->mq_flush_lock, flags);
@@ -341,7 +341,7 @@ static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
struct blk_mq_ctx *ctx = rq->mq_ctx;
unsigned long flags;
- struct blk_flush_queue *fq = blk_get_flush_queue(q, ctx);
+ struct blk_flush_queue *fq = blk_get_flush_queue(ctx);
if (q->elevator) {
WARN_ON(rq->tag < 0);
@@ -382,7 +382,7 @@ static void blk_rq_init_flush(struct request *rq)
bool blk_insert_flush(struct request *rq)
{
struct request_queue *q = rq->q;
- struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
+ struct blk_flush_queue *fq = blk_get_flush_queue(rq->mq_ctx);
bool supports_fua = q->limits.features & BLK_FEAT_FUA;
unsigned int policy = 0;
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 6be46e28459b..38e7bf3c3b4f 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3248,6 +3248,7 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
}
memflags = blk_mq_freeze_queue(disk->queue);
+ mutex_lock(&disk->queue->elevator_lock);
blk_mq_quiesce_queue(disk->queue);
spin_lock_irq(&ioc->lock);
@@ -3355,6 +3356,7 @@ einval:
spin_unlock_irq(&ioc->lock);
blk_mq_unquiesce_queue(disk->queue);
+ mutex_unlock(&disk->queue->elevator_lock);
blk_mq_unfreeze_queue(disk->queue, memflags);
ret = -EINVAL;
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 7442ca27c2bf..109611445d40 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -349,7 +349,7 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
}
ctx = blk_mq_get_ctx(q);
- hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
+ hctx = blk_mq_map_queue(bio->bi_opf, ctx);
type = hctx->type;
if (list_empty_careful(&ctx->rq_lists[type]))
goto out_put;
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 3feeeccf8a99..24656980f443 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -61,9 +61,9 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
if (!entry->show)
return -EIO;
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
res = entry->show(hctx, page);
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
return res;
}
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index b9f417d980b4..d880c50629d6 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -190,8 +190,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
sbitmap_finish_wait(bt, ws, &wait);
data->ctx = blk_mq_get_ctx(data->q);
- data->hctx = blk_mq_map_queue(data->q, data->cmd_flags,
- data->ctx);
+ data->hctx = blk_mq_map_queue(data->cmd_flags, data->ctx);
tags = blk_mq_tags_from_data(data);
if (data->flags & BLK_MQ_REQ_RESERVED)
bt = &tags->breserved_tags;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 40490ac88045..ae8494d88897 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -508,7 +508,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
retry:
data->ctx = blk_mq_get_ctx(q);
- data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
+ data->hctx = blk_mq_map_queue(data->cmd_flags, data->ctx);
if (q->elevator) {
/*
@@ -3314,6 +3314,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
rq->special_vec = rq_src->special_vec;
}
rq->nr_phys_segments = rq_src->nr_phys_segments;
+ rq->nr_integrity_segments = rq_src->nr_integrity_segments;
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
goto free_and_out;
@@ -4094,6 +4095,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
struct blk_mq_ctx *ctx;
struct blk_mq_tag_set *set = q->tag_set;
+ mutex_lock(&q->elevator_lock);
+
queue_for_each_hw_ctx(q, hctx, i) {
cpumask_clear(hctx->cpumask);
hctx->nr_ctx = 0;
@@ -4198,6 +4201,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
}
+
+ mutex_unlock(&q->elevator_lock);
}
/*
@@ -4467,7 +4472,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
unsigned long i, j;
/* protect against switching io scheduler */
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
int old_node;
int node = blk_mq_get_hctx_node(set, i);
@@ -4500,7 +4505,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
@@ -4933,10 +4938,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
if (!qe)
return false;
- /* q->elevator needs protection from ->sysfs_lock */
- mutex_lock(&q->sysfs_lock);
+ /* Accessing q->elevator needs protection from ->elevator_lock. */
+ mutex_lock(&q->elevator_lock);
- /* the check has to be done with holding sysfs_lock */
if (!q->elevator) {
kfree(qe);
goto unlock;
@@ -4950,7 +4954,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
list_add(&qe->node, head);
elevator_disable(q);
unlock:
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
return true;
}
@@ -4980,11 +4984,11 @@ static void blk_mq_elv_switch_back(struct list_head *head,
list_del(&qe->node);
kfree(qe);
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
elevator_switch(q, t);
/* drop the reference acquired in blk_mq_elv_switch_none */
elevator_put(t);
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
}
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 44979e92b79f..3011a78cf16a 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -100,12 +100,10 @@ static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf)
/*
* blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
- * @q: request queue
* @opf: operation type (REQ_OP_*) and flags (e.g. REQ_POLLED).
* @ctx: software queue cpu ctx
*/
-static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
- blk_opf_t opf,
+static inline struct blk_mq_hw_ctx *blk_mq_map_queue(blk_opf_t opf,
struct blk_mq_ctx *ctx)
{
return ctx->hctxs[blk_mq_get_hctx_type(opf)];
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c44dadc35e1e..3c3e87bad6d3 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -21,7 +21,7 @@
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
{
- q->rq_timeout = timeout;
+ WRITE_ONCE(q->rq_timeout, timeout);
}
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
@@ -114,9 +114,15 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
pr_warn("invalid PI settings.\n");
return -EINVAL;
}
+ bi->flags |= BLK_INTEGRITY_NOGENERATE | BLK_INTEGRITY_NOVERIFY;
return 0;
}
+ if (lim->features & BLK_FEAT_BOUNCE_HIGH) {
+ pr_warn("no bounce buffer support for integrity metadata\n");
+ return -EINVAL;
+ }
+
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
pr_warn("integrity support disabled.\n");
return -EINVAL;
@@ -859,36 +865,28 @@ bool queue_limits_stack_integrity(struct queue_limits *t,
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
return true;
- if (!ti->tuple_size) {
- /* inherit the settings from the first underlying device */
- if (!(ti->flags & BLK_INTEGRITY_STACKED)) {
- ti->flags = BLK_INTEGRITY_DEVICE_CAPABLE |
- (bi->flags & BLK_INTEGRITY_REF_TAG);
- ti->csum_type = bi->csum_type;
- ti->tuple_size = bi->tuple_size;
- ti->pi_offset = bi->pi_offset;
- ti->interval_exp = bi->interval_exp;
- ti->tag_size = bi->tag_size;
- goto done;
- }
- if (!bi->tuple_size)
- goto done;
+ if (ti->flags & BLK_INTEGRITY_STACKED) {
+ if (ti->tuple_size != bi->tuple_size)
+ goto incompatible;
+ if (ti->interval_exp != bi->interval_exp)
+ goto incompatible;
+ if (ti->tag_size != bi->tag_size)
+ goto incompatible;
+ if (ti->csum_type != bi->csum_type)
+ goto incompatible;
+ if ((ti->flags & BLK_INTEGRITY_REF_TAG) !=
+ (bi->flags & BLK_INTEGRITY_REF_TAG))
+ goto incompatible;
+ } else {
+ ti->flags = BLK_INTEGRITY_STACKED;
+ ti->flags |= (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) |
+ (bi->flags & BLK_INTEGRITY_REF_TAG);
+ ti->csum_type = bi->csum_type;
+ ti->tuple_size = bi->tuple_size;
+ ti->pi_offset = bi->pi_offset;
+ ti->interval_exp = bi->interval_exp;
+ ti->tag_size = bi->tag_size;
}
-
- if (ti->tuple_size != bi->tuple_size)
- goto incompatible;
- if (ti->interval_exp != bi->interval_exp)
- goto incompatible;
- if (ti->tag_size != bi->tag_size)
- goto incompatible;
- if (ti->csum_type != bi->csum_type)
- goto incompatible;
- if ((ti->flags & BLK_INTEGRITY_REF_TAG) !=
- (bi->flags & BLK_INTEGRITY_REF_TAG))
- goto incompatible;
-
-done:
- ti->flags |= BLK_INTEGRITY_STACKED;
return true;
incompatible:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 6f548a4376aa..d584461a1d84 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -23,10 +23,11 @@
struct queue_sysfs_entry {
struct attribute attr;
ssize_t (*show)(struct gendisk *disk, char *page);
+ ssize_t (*show_limit)(struct gendisk *disk, char *page);
+
ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
int (*store_limit)(struct gendisk *disk, const char *page,
size_t count, struct queue_limits *lim);
- void (*load_module)(struct gendisk *disk, const char *page, size_t count);
};
static ssize_t
@@ -52,7 +53,12 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
static ssize_t queue_requests_show(struct gendisk *disk, char *page)
{
- return queue_var_show(disk->queue->nr_requests, page);
+ ssize_t ret;
+
+ mutex_lock(&disk->queue->elevator_lock);
+ ret = queue_var_show(disk->queue->nr_requests, page);
+ mutex_unlock(&disk->queue->elevator_lock);
+ return ret;
}
static ssize_t
@@ -60,27 +66,38 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long nr;
int ret, err;
+ unsigned int memflags;
+ struct request_queue *q = disk->queue;
- if (!queue_is_mq(disk->queue))
+ if (!queue_is_mq(q))
return -EINVAL;
ret = queue_var_store(&nr, page, count);
if (ret < 0)
return ret;
+ memflags = blk_mq_freeze_queue(q);
+ mutex_lock(&q->elevator_lock);
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
err = blk_mq_update_nr_requests(disk->queue, nr);
if (err)
- return err;
-
+ ret = err;
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue(q, memflags);
return ret;
}
static ssize_t queue_ra_show(struct gendisk *disk, char *page)
{
- return queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
+ ssize_t ret;
+
+ mutex_lock(&disk->queue->limits_lock);
+ ret = queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
+ mutex_unlock(&disk->queue->limits_lock);
+
+ return ret;
}
static ssize_t
@@ -88,11 +105,22 @@ queue_ra_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long ra_kb;
ssize_t ret;
+ unsigned int memflags;
+ struct request_queue *q = disk->queue;
ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
+ /*
+ * ->ra_pages is protected by ->limits_lock because it is usually
+ * calculated from the queue limits by queue_limits_commit_update.
+ */
+ mutex_lock(&q->limits_lock);
+ memflags = blk_mq_freeze_queue(q);
disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
+ mutex_unlock(&q->limits_lock);
+ blk_mq_unfreeze_queue(q, memflags);
+
return ret;
}
@@ -238,8 +266,9 @@ static ssize_t queue_poll_show(struct gendisk *disk, char *page)
{
if (queue_is_mq(disk->queue))
return sysfs_emit(page, "%u\n", blk_mq_can_poll(disk->queue));
+
return sysfs_emit(page, "%u\n",
- !!(disk->queue->limits.features & BLK_FEAT_POLL));
+ !!(disk->queue->limits.features & BLK_FEAT_POLL));
}
static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
@@ -286,17 +315,21 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
size_t count)
{
unsigned long nm;
+ unsigned int memflags;
+ struct request_queue *q = disk->queue;
ssize_t ret = queue_var_store(&nm, page, count);
if (ret < 0)
return ret;
- blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, disk->queue);
- blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, disk->queue);
+ memflags = blk_mq_freeze_queue(q);
+ blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
if (nm == 2)
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
else if (nm)
- blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+ blk_mq_unfreeze_queue(q, memflags);
return ret;
}
@@ -316,11 +349,19 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
#ifdef CONFIG_SMP
struct request_queue *q = disk->queue;
unsigned long val;
+ unsigned int memflags;
ret = queue_var_store(&val, page, count);
if (ret < 0)
return ret;
+ /*
+ * Here we update two queue flags each using atomic bitops, although
+ * updating two flags isn't atomic it should be harmless as those flags
+ * are accessed individually using atomic test_bit operation. So we
+ * don't grab any lock while updating these flags.
+ */
+ memflags = blk_mq_freeze_queue(q);
if (val == 2) {
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
@@ -331,6 +372,7 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
}
+ blk_mq_unfreeze_queue(q, memflags);
#endif
return ret;
}
@@ -344,29 +386,43 @@ static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
size_t count)
{
- if (!(disk->queue->limits.features & BLK_FEAT_POLL))
- return -EINVAL;
+ unsigned int memflags;
+ ssize_t ret = count;
+ struct request_queue *q = disk->queue;
+
+ memflags = blk_mq_freeze_queue(q);
+ if (!(q->limits.features & BLK_FEAT_POLL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
pr_info_ratelimited("please use driver specific parameters instead.\n");
- return count;
+out:
+ blk_mq_unfreeze_queue(q, memflags);
+ return ret;
}
static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
{
- return sysfs_emit(page, "%u\n", jiffies_to_msecs(disk->queue->rq_timeout));
+ return sysfs_emit(page, "%u\n",
+ jiffies_to_msecs(READ_ONCE(disk->queue->rq_timeout)));
}
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
size_t count)
{
- unsigned int val;
+ unsigned int val, memflags;
int err;
+ struct request_queue *q = disk->queue;
err = kstrtou32(page, 10, &val);
if (err || val == 0)
return -EINVAL;
- blk_queue_rq_timeout(disk->queue, msecs_to_jiffies(val));
+ memflags = blk_mq_freeze_queue(q);
+ blk_queue_rq_timeout(q, msecs_to_jiffies(val));
+ blk_mq_unfreeze_queue(q, memflags);
return count;
}
@@ -412,57 +468,55 @@ static struct queue_sysfs_entry _prefix##_entry = { \
.store = _prefix##_store, \
};
-#define QUEUE_LIM_RW_ENTRY(_prefix, _name) \
+#define QUEUE_LIM_RO_ENTRY(_prefix, _name) \
static struct queue_sysfs_entry _prefix##_entry = { \
- .attr = { .name = _name, .mode = 0644 }, \
- .show = _prefix##_show, \
- .store_limit = _prefix##_store, \
+ .attr = { .name = _name, .mode = 0444 }, \
+ .show_limit = _prefix##_show, \
}
-#define QUEUE_RW_LOAD_MODULE_ENTRY(_prefix, _name) \
-static struct queue_sysfs_entry _prefix##_entry = { \
+#define QUEUE_LIM_RW_ENTRY(_prefix, _name) \
+static struct queue_sysfs_entry _prefix##_entry = { \
.attr = { .name = _name, .mode = 0644 }, \
- .show = _prefix##_show, \
- .load_module = _prefix##_load_module, \
- .store = _prefix##_store, \
+ .show_limit = _prefix##_show, \
+ .store_limit = _prefix##_store, \
}
QUEUE_RW_ENTRY(queue_requests, "nr_requests");
QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
-QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
-QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
-QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
-QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
-QUEUE_RW_LOAD_MODULE_ENTRY(elv_iosched, "scheduler");
-
-QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
-QUEUE_RO_ENTRY(queue_physical_block_size, "physical_block_size");
-QUEUE_RO_ENTRY(queue_chunk_sectors, "chunk_sectors");
-QUEUE_RO_ENTRY(queue_io_min, "minimum_io_size");
-QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size");
-
-QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
-QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
-QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
+QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
+QUEUE_LIM_RO_ENTRY(queue_max_segments, "max_segments");
+QUEUE_LIM_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
+QUEUE_LIM_RO_ENTRY(queue_max_segment_size, "max_segment_size");
+QUEUE_RW_ENTRY(elv_iosched, "scheduler");
+
+QUEUE_LIM_RO_ENTRY(queue_logical_block_size, "logical_block_size");
+QUEUE_LIM_RO_ENTRY(queue_physical_block_size, "physical_block_size");
+QUEUE_LIM_RO_ENTRY(queue_chunk_sectors, "chunk_sectors");
+QUEUE_LIM_RO_ENTRY(queue_io_min, "minimum_io_size");
+QUEUE_LIM_RO_ENTRY(queue_io_opt, "optimal_io_size");
+
+QUEUE_LIM_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
+QUEUE_LIM_RO_ENTRY(queue_discard_granularity, "discard_granularity");
+QUEUE_LIM_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
QUEUE_LIM_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
-QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
-QUEUE_RO_ENTRY(queue_atomic_write_boundary_sectors,
+QUEUE_LIM_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
+QUEUE_LIM_RO_ENTRY(queue_atomic_write_boundary_sectors,
"atomic_write_boundary_bytes");
-QUEUE_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
-QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
+QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
+QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
-QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
-QUEUE_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
-QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
+QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
+QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
+QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
-QUEUE_RO_ENTRY(queue_zoned, "zoned");
+QUEUE_LIM_RO_ENTRY(queue_zoned, "zoned");
QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
-QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
-QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");
+QUEUE_LIM_RO_ENTRY(queue_max_open_zones, "max_open_zones");
+QUEUE_LIM_RO_ENTRY(queue_max_active_zones, "max_active_zones");
QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
QUEUE_LIM_RW_ENTRY(queue_iostats_passthrough, "iostats_passthrough");
@@ -470,16 +524,16 @@ QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
QUEUE_RW_ENTRY(queue_poll, "io_poll");
QUEUE_RW_ENTRY(queue_poll_delay, "io_poll_delay");
QUEUE_LIM_RW_ENTRY(queue_wc, "write_cache");
-QUEUE_RO_ENTRY(queue_fua, "fua");
-QUEUE_RO_ENTRY(queue_dax, "dax");
+QUEUE_LIM_RO_ENTRY(queue_fua, "fua");
+QUEUE_LIM_RO_ENTRY(queue_dax, "dax");
QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
-QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
-QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
+QUEUE_LIM_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
+QUEUE_LIM_RO_ENTRY(queue_dma_alignment, "dma_alignment");
/* legacy alias for logical_block_size: */
static struct queue_sysfs_entry queue_hw_sector_size_entry = {
- .attr = {.name = "hw_sector_size", .mode = 0444 },
- .show = queue_logical_block_size_show,
+ .attr = {.name = "hw_sector_size", .mode = 0444 },
+ .show_limit = queue_logical_block_size_show,
};
QUEUE_LIM_RW_ENTRY(queue_rotational, "rotational");
@@ -503,14 +557,24 @@ static ssize_t queue_var_store64(s64 *var, const char *page)
static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
{
- if (!wbt_rq_qos(disk->queue))
- return -EINVAL;
+ ssize_t ret;
+ struct request_queue *q = disk->queue;
+
+ mutex_lock(&q->elevator_lock);
+ if (!wbt_rq_qos(q)) {
+ ret = -EINVAL;
+ goto out;
+ }
- if (wbt_disabled(disk->queue))
- return sysfs_emit(page, "0\n");
+ if (wbt_disabled(q)) {
+ ret = sysfs_emit(page, "0\n");
+ goto out;
+ }
- return sysfs_emit(page, "%llu\n",
- div_u64(wbt_get_min_lat(disk->queue), 1000));
+ ret = sysfs_emit(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
+out:
+ mutex_unlock(&q->elevator_lock);
+ return ret;
}
static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
@@ -520,6 +584,7 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
struct rq_qos *rqos;
ssize_t ret;
s64 val;
+ unsigned int memflags;
ret = queue_var_store64(&val, page);
if (ret < 0)
@@ -527,20 +592,24 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
if (val < -1)
return -EINVAL;
+ memflags = blk_mq_freeze_queue(q);
+ mutex_lock(&q->elevator_lock);
+
rqos = wbt_rq_qos(q);
if (!rqos) {
ret = wbt_init(disk);
if (ret)
- return ret;
+ goto out;
}
+ ret = count;
if (val == -1)
val = wbt_default_latency_nsec(q);
else if (val >= 0)
val *= 1000ULL;
if (wbt_get_min_lat(q) == val)
- return count;
+ goto out;
/*
* Ensure that the queue is idled, in case the latency update
@@ -552,8 +621,11 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
wbt_set_min_lat(q, val);
blk_mq_unquiesce_queue(q);
+out:
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue(q, memflags);
- return count;
+ return ret;
}
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
@@ -561,7 +633,9 @@ QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
/* Common attributes for bio-based and request-based queues. */
static struct attribute *queue_attrs[] = {
- &queue_ra_entry.attr,
+ /*
+ * Attributes which are protected with q->limits_lock.
+ */
&queue_max_hw_sectors_entry.attr,
&queue_max_sectors_entry.attr,
&queue_max_segments_entry.attr,
@@ -577,44 +651,63 @@ static struct attribute *queue_attrs[] = {
&queue_discard_granularity_entry.attr,
&queue_max_discard_sectors_entry.attr,
&queue_max_hw_discard_sectors_entry.attr,
- &queue_discard_zeroes_data_entry.attr,
&queue_atomic_write_max_sectors_entry.attr,
&queue_atomic_write_boundary_sectors_entry.attr,
&queue_atomic_write_unit_min_entry.attr,
&queue_atomic_write_unit_max_entry.attr,
- &queue_write_same_max_entry.attr,
&queue_max_write_zeroes_sectors_entry.attr,
&queue_max_zone_append_sectors_entry.attr,
&queue_zone_write_granularity_entry.attr,
&queue_rotational_entry.attr,
&queue_zoned_entry.attr,
- &queue_nr_zones_entry.attr,
&queue_max_open_zones_entry.attr,
&queue_max_active_zones_entry.attr,
- &queue_nomerges_entry.attr,
&queue_iostats_passthrough_entry.attr,
&queue_iostats_entry.attr,
&queue_stable_writes_entry.attr,
&queue_add_random_entry.attr,
- &queue_poll_entry.attr,
&queue_wc_entry.attr,
&queue_fua_entry.attr,
&queue_dax_entry.attr,
- &queue_poll_delay_entry.attr,
&queue_virt_boundary_mask_entry.attr,
&queue_dma_alignment_entry.attr,
+
+ /*
+ * Attributes which require some form of locking other than
+ * q->sysfs_lock.
+ */
+ &queue_ra_entry.attr,
+
+ /*
+ * Attributes which don't require locking.
+ */
+ &queue_discard_zeroes_data_entry.attr,
+ &queue_write_same_max_entry.attr,
+ &queue_nr_zones_entry.attr,
+ &queue_nomerges_entry.attr,
+ &queue_poll_entry.attr,
+ &queue_poll_delay_entry.attr,
+
NULL,
};
/* Request-based queue attributes that are not relevant for bio-based queues. */
static struct attribute *blk_mq_queue_attrs[] = {
- &queue_requests_entry.attr,
+ /*
+ * Attributes which require some form of locking other than
+ * q->sysfs_lock.
+ */
&elv_iosched_entry.attr,
- &queue_rq_affinity_entry.attr,
- &queue_io_timeout_entry.attr,
+ &queue_requests_entry.attr,
#ifdef CONFIG_BLK_WBT
&queue_wb_lat_entry.attr,
#endif
+ /*
+ * Attributes which don't require locking.
+ */
+ &queue_rq_affinity_entry.attr,
+ &queue_io_timeout_entry.attr,
+
NULL,
};
@@ -664,14 +757,20 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct queue_sysfs_entry *entry = to_queue(attr);
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
- ssize_t res;
- if (!entry->show)
+ if (!entry->show && !entry->show_limit)
return -EIO;
- mutex_lock(&disk->queue->sysfs_lock);
- res = entry->show(disk, page);
- mutex_unlock(&disk->queue->sysfs_lock);
- return res;
+
+ if (entry->show_limit) {
+ ssize_t res;
+
+ mutex_lock(&disk->queue->limits_lock);
+ res = entry->show_limit(disk, page);
+ mutex_unlock(&disk->queue->limits_lock);
+ return res;
+ }
+
+ return entry->show(disk, page);
}
static ssize_t
@@ -681,21 +780,13 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
struct queue_sysfs_entry *entry = to_queue(attr);
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
struct request_queue *q = disk->queue;
- unsigned int memflags;
- ssize_t res;
if (!entry->store_limit && !entry->store)
return -EIO;
- /*
- * If the attribute needs to load a module, do it before freezing the
- * queue to ensure that the module file can be read when the request
- * queue is the one for the device storing the module file.
- */
- if (entry->load_module)
- entry->load_module(disk, page, length);
-
if (entry->store_limit) {
+ ssize_t res;
+
struct queue_limits lim = queue_limits_start_update(q);
res = entry->store_limit(disk, page, length, &lim);
@@ -710,12 +801,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
return length;
}
- mutex_lock(&q->sysfs_lock);
- memflags = blk_mq_freeze_queue(q);
- res = entry->store(disk, page, length);
- blk_mq_unfreeze_queue(q, memflags);
- mutex_unlock(&q->sysfs_lock);
- return res;
+ return entry->store(disk, page, length);
}
static const struct sysfs_ops queue_sysfs_ops = {
@@ -784,18 +870,22 @@ int blk_register_queue(struct gendisk *disk)
if (ret)
goto out_debugfs_remove;
+ ret = blk_crypto_sysfs_register(disk);
+ if (ret)
+ goto out_unregister_ia_ranges;
+
+ mutex_lock(&q->elevator_lock);
if (q->elevator) {
ret = elv_register_queue(q, false);
- if (ret)
- goto out_unregister_ia_ranges;
+ if (ret) {
+ mutex_unlock(&q->elevator_lock);
+ goto out_crypto_sysfs_unregister;
+ }
}
-
- ret = blk_crypto_sysfs_register(disk);
- if (ret)
- goto out_elv_unregister;
+ wbt_enable_default(disk);
+ mutex_unlock(&q->elevator_lock);
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
- wbt_enable_default(disk);
/* Now everything is ready and send out KOBJ_ADD uevent */
kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
@@ -817,8 +907,8 @@ int blk_register_queue(struct gendisk *disk)
return ret;
-out_elv_unregister:
- elv_unregister_queue(q);
+out_crypto_sysfs_unregister:
+ blk_crypto_sysfs_unregister(disk);
out_unregister_ia_ranges:
disk_unregister_independent_access_ranges(disk);
out_debugfs_remove:
@@ -864,8 +954,11 @@ void blk_unregister_queue(struct gendisk *disk)
blk_mq_sysfs_unregister(disk);
blk_crypto_sysfs_unregister(disk);
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
elv_unregister_queue(q);
+ mutex_unlock(&q->elevator_lock);
+
+ mutex_lock(&q->sysfs_lock);
disk_unregister_independent_access_ranges(disk);
mutex_unlock(&q->sysfs_lock);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 8d149aff9fd0..91dab43c65ab 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -478,8 +478,6 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
{
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
- tg->carryover_bytes[rw] = 0;
- tg->carryover_ios[rw] = 0;
/*
* Previous slice has expired. We must have trimmed it after last
@@ -498,16 +496,14 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
}
static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw,
- bool clear_carryover)
+ bool clear)
{
- tg->bytes_disp[rw] = 0;
- tg->io_disp[rw] = 0;
+ if (clear) {
+ tg->bytes_disp[rw] = 0;
+ tg->io_disp[rw] = 0;
+ }
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
- if (clear_carryover) {
- tg->carryover_bytes[rw] = 0;
- tg->carryover_ios[rw] = 0;
- }
throtl_log(&tg->service_queue,
"[%c] new slice start=%lu end=%lu jiffies=%lu",
@@ -599,29 +595,34 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
* sooner, then we need to reduce slice_end. A high bogus slice_end
* is bad because it does not allow new slice to start.
*/
-
throtl_set_slice_end(tg, rw, jiffies + tg->td->throtl_slice);
time_elapsed = rounddown(jiffies - tg->slice_start[rw],
tg->td->throtl_slice);
- if (!time_elapsed)
+ /* Don't trim slice until at least 2 slices are used */
+ if (time_elapsed < tg->td->throtl_slice * 2)
return;
+ /*
+ * The bio submission time may be a few jiffies more than the expected
+ * waiting time, due to 'extra_bytes' can't be divided in
+ * tg_within_bps_limit(), and also due to timer wakeup delay. In this
+ * case, adjust slice_start will discard the extra wait time, causing
+ * lower rate than expected. Therefore, other than the above rounddown,
+ * one extra slice is preserved for deviation.
+ */
+ time_elapsed -= tg->td->throtl_slice;
bytes_trim = calculate_bytes_allowed(tg_bps_limit(tg, rw),
- time_elapsed) +
- tg->carryover_bytes[rw];
- io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed) +
- tg->carryover_ios[rw];
+ time_elapsed);
+ io_trim = calculate_io_allowed(tg_iops_limit(tg, rw), time_elapsed);
if (bytes_trim <= 0 && io_trim <= 0)
return;
- tg->carryover_bytes[rw] = 0;
if ((long long)tg->bytes_disp[rw] >= bytes_trim)
tg->bytes_disp[rw] -= bytes_trim;
else
tg->bytes_disp[rw] = 0;
- tg->carryover_ios[rw] = 0;
if ((int)tg->io_disp[rw] >= io_trim)
tg->io_disp[rw] -= io_trim;
else
@@ -636,7 +637,8 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
jiffies);
}
-static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
+static void __tg_update_carryover(struct throtl_grp *tg, bool rw,
+ long long *bytes, int *ios)
{
unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw];
u64 bps_limit = tg_bps_limit(tg, rw);
@@ -649,26 +651,28 @@ static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
* configuration.
*/
if (bps_limit != U64_MAX)
- tg->carryover_bytes[rw] +=
- calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
+ *bytes = calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
tg->bytes_disp[rw];
if (iops_limit != UINT_MAX)
- tg->carryover_ios[rw] +=
- calculate_io_allowed(iops_limit, jiffy_elapsed) -
+ *ios = calculate_io_allowed(iops_limit, jiffy_elapsed) -
tg->io_disp[rw];
+ tg->bytes_disp[rw] -= *bytes;
+ tg->io_disp[rw] -= *ios;
}
static void tg_update_carryover(struct throtl_grp *tg)
{
+ long long bytes[2] = {0};
+ int ios[2] = {0};
+
if (tg->service_queue.nr_queued[READ])
- __tg_update_carryover(tg, READ);
+ __tg_update_carryover(tg, READ, &bytes[READ], &ios[READ]);
if (tg->service_queue.nr_queued[WRITE])
- __tg_update_carryover(tg, WRITE);
+ __tg_update_carryover(tg, WRITE, &bytes[WRITE], &ios[WRITE]);
/* see comments in struct throtl_grp for meaning of these fields. */
throtl_log(&tg->service_queue, "%s: %lld %lld %d %d\n", __func__,
- tg->carryover_bytes[READ], tg->carryover_bytes[WRITE],
- tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
+ bytes[READ], bytes[WRITE], ios[READ], ios[WRITE]);
}
static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio,
@@ -686,8 +690,7 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
/* Round up to the next throttle slice, wait time must be nonzero */
jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
- io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
- tg->carryover_ios[rw];
+ io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd);
if (io_allowed > 0 && tg->io_disp[rw] + 1 <= io_allowed)
return 0;
@@ -720,8 +723,7 @@ static unsigned long tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
jiffy_elapsed_rnd = tg->td->throtl_slice;
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
- bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
- tg->carryover_bytes[rw];
+ bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd);
if (bytes_allowed > 0 && tg->bytes_disp[rw] + bio_size <= bytes_allowed)
return 0;
@@ -810,13 +812,10 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
unsigned int bio_size = throtl_bio_data_size(bio);
/* Charge the bio to the group */
- if (!bio_flagged(bio, BIO_BPS_THROTTLED)) {
+ if (!bio_flagged(bio, BIO_BPS_THROTTLED))
tg->bytes_disp[rw] += bio_size;
- tg->last_bytes_disp[rw] += bio_size;
- }
tg->io_disp[rw]++;
- tg->last_io_disp[rw]++;
}
/**
@@ -1614,13 +1613,6 @@ static bool tg_within_limit(struct throtl_grp *tg, struct bio *bio, bool rw)
return tg_may_dispatch(tg, bio, NULL);
}
-static void tg_dispatch_in_debt(struct throtl_grp *tg, struct bio *bio, bool rw)
-{
- if (!bio_flagged(bio, BIO_BPS_THROTTLED))
- tg->carryover_bytes[rw] -= throtl_bio_data_size(bio);
- tg->carryover_ios[rw]--;
-}
-
bool __blk_throtl_bio(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
@@ -1657,10 +1649,12 @@ bool __blk_throtl_bio(struct bio *bio)
/*
* IOs which may cause priority inversions are
* dispatched directly, even if they're over limit.
- * Debts are handled by carryover_bytes/ios while
- * calculating wait time.
+ *
+ * Charge and dispatch directly, and our throttle
+ * control algorithm is adaptive, and extra IO bytes
+ * will be throttled for paying the debt
*/
- tg_dispatch_in_debt(tg, bio, rw);
+ throtl_charge_bio(tg, bio);
} else {
/* if above limits, break to queue */
break;
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 1a36d1278eea..7964cc041e06 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -102,12 +102,9 @@ struct throtl_grp {
unsigned int iops[2];
/* Number of bytes dispatched in current slice */
- uint64_t bytes_disp[2];
+ int64_t bytes_disp[2];
/* Number of bio's dispatched in current slice */
- unsigned int io_disp[2];
-
- uint64_t last_bytes_disp[2];
- unsigned int last_io_disp[2];
+ int io_disp[2];
/*
* The following two fields are updated when new configuration is
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 6dfc659d22e2..f1754d07f7e0 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -136,8 +136,9 @@ enum {
RWB_MIN_WRITE_SAMPLES = 3,
/*
- * If we have this number of consecutive windows with not enough
- * information to scale up or down, scale up.
+ * If we have this number of consecutive windows without enough
+ * information to scale up or down, slowly return to center state
+ * (step == 0).
*/
RWB_UNKNOWN_BUMP = 5,
};
@@ -446,9 +447,9 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
break;
case LAT_UNKNOWN_WRITES:
/*
- * We started a the center step, but don't have a valid
- * read/write sample, but we do have writes going on.
- * Allow step to go negative, to increase write perf.
+ * We don't have a valid read/write sample, but we do have
+ * writes going on. Allow step to go negative, to increase
+ * write performance.
*/
scale_up(rwb);
break;
@@ -638,11 +639,7 @@ static void wbt_cleanup(struct rq_qos *rqos, struct bio *bio)
__wbt_done(rqos, flags);
}
-/*
- * May sleep, if we have exceeded the writeback limits. Caller can pass
- * in an irq held spinlock, if it holds one when calling this function.
- * If we do sleep, we'll release and re-grab it.
- */
+/* May sleep, if we have exceeded the writeback limits. */
static void wbt_wait(struct rq_qos *rqos, struct bio *bio)
{
struct rq_wb *rwb = RQWB(rqos);
diff --git a/block/blk.h b/block/blk.h
index 90fa5f28ccab..8f5554a6989e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -710,7 +710,7 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
void blk_integrity_generate(struct bio *bio);
-void blk_integrity_verify(struct bio *bio);
+void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter);
void blk_integrity_prepare(struct request *rq);
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
diff --git a/block/bounce.c b/block/bounce.c
index 0d898cd5ec49..09a9616cf209 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -41,8 +41,6 @@ static void init_bounce_bioset(void)
ret = bioset_init(&bounce_bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
BUG_ON(ret);
- if (bioset_integrity_create(&bounce_bio_set, BIO_POOL_SIZE))
- BUG_ON(1);
ret = bioset_init(&bounce_bio_split, BIO_POOL_SIZE, 0, 0);
BUG_ON(ret);
diff --git a/block/elevator.c b/block/elevator.c
index cd2ce4921601..b4d08026b02c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -457,7 +457,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
struct elevator_queue *e = q->elevator;
int error;
- lockdep_assert_held(&q->sysfs_lock);
+ lockdep_assert_held(&q->elevator_lock);
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
if (!error) {
@@ -481,7 +481,7 @@ void elv_unregister_queue(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
- lockdep_assert_held(&q->sysfs_lock);
+ lockdep_assert_held(&q->elevator_lock);
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
kobject_uevent(&e->kobj, KOBJ_REMOVE);
@@ -618,7 +618,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
unsigned int memflags;
int ret;
- lockdep_assert_held(&q->sysfs_lock);
+ lockdep_assert_held(&q->elevator_lock);
memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
@@ -655,7 +655,7 @@ void elevator_disable(struct request_queue *q)
{
unsigned int memflags;
- lockdep_assert_held(&q->sysfs_lock);
+ lockdep_assert_held(&q->elevator_lock);
memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
@@ -700,34 +700,44 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
return ret;
}
-void elv_iosched_load_module(struct gendisk *disk, const char *buf,
- size_t count)
+static void elv_iosched_load_module(char *elevator_name)
{
- char elevator_name[ELV_NAME_MAX];
struct elevator_type *found;
- const char *name;
-
- strscpy(elevator_name, buf, sizeof(elevator_name));
- name = strstrip(elevator_name);
spin_lock(&elv_list_lock);
- found = __elevator_find(name);
+ found = __elevator_find(elevator_name);
spin_unlock(&elv_list_lock);
if (!found)
- request_module("%s-iosched", name);
+ request_module("%s-iosched", elevator_name);
}
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
size_t count)
{
char elevator_name[ELV_NAME_MAX];
+ char *name;
int ret;
+ unsigned int memflags;
+ struct request_queue *q = disk->queue;
+ /*
+ * If the attribute needs to load a module, do it before freezing the
+ * queue to ensure that the module file can be read when the request
+ * queue is the one for the device storing the module file.
+ */
strscpy(elevator_name, buf, sizeof(elevator_name));
- ret = elevator_change(disk->queue, strstrip(elevator_name));
+ name = strstrip(elevator_name);
+
+ elv_iosched_load_module(name);
+
+ memflags = blk_mq_freeze_queue(q);
+ mutex_lock(&q->elevator_lock);
+ ret = elevator_change(q, name);
if (!ret)
- return count;
+ ret = count;
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue(q, memflags);
return ret;
}
@@ -738,6 +748,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
struct elevator_type *cur = NULL, *e;
int len = 0;
+ mutex_lock(&q->elevator_lock);
if (!q->elevator) {
len += sprintf(name+len, "[none] ");
} else {
@@ -755,6 +766,8 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
spin_unlock(&elv_list_lock);
len += sprintf(name+len, "\n");
+ mutex_unlock(&q->elevator_lock);
+
return len;
}
diff --git a/block/elevator.h b/block/elevator.h
index e526662c5dbb..e4e44dfac503 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -148,8 +148,6 @@ extern void elv_unregister(struct elevator_type *);
* io scheduler sysfs switching
*/
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
-void elv_iosched_load_module(struct gendisk *disk, const char *page,
- size_t count);
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
diff --git a/block/genhd.c b/block/genhd.c
index e9375e20d866..c2bd86cd09de 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -565,8 +565,11 @@ out_free_ext_minor:
if (disk->major == BLOCK_EXT_MAJOR)
blk_free_ext_minor(disk->first_minor);
out_exit_elevator:
- if (disk->queue->elevator)
+ if (disk->queue->elevator) {
+ mutex_lock(&disk->queue->elevator_lock);
elevator_exit(disk->queue);
+ mutex_unlock(&disk->queue->elevator_lock);
+ }
return ret;
}
EXPORT_SYMBOL_GPL(add_disk_fwnode);
@@ -742,9 +745,9 @@ void del_gendisk(struct gendisk *disk)
blk_mq_quiesce_queue(q);
if (q->elevator) {
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
elevator_exit(q);
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
}
rq_qos_exit(q);
blk_mq_unquiesce_queue(q);
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index dc31f2dfa414..0f0f8452609a 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -568,7 +568,7 @@ static bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs)
{
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
- struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(bio->bi_opf, ctx);
struct kyber_hctx_data *khd = hctx->sched_data;
struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
diff --git a/block/partitions/sgi.c b/block/partitions/sgi.c
index 9cc6b8c1eea4..b5ecddd5181a 100644
--- a/block/partitions/sgi.c
+++ b/block/partitions/sgi.c
@@ -50,8 +50,6 @@ int sgi_partition(struct parsed_partitions *state)
p = &label->partitions[0];
magic = label->magic_mushroom;
if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) {
- /*printk("Dev %s SGI disklabel: bad magic %08x\n",
- state->disk->disk_name, be32_to_cpu(magic));*/
put_dev_sector(sect);
return 0;
}
diff --git a/block/partitions/sun.c b/block/partitions/sun.c
index ddf9e6def4b2..2419af76120f 100644
--- a/block/partitions/sun.c
+++ b/block/partitions/sun.c
@@ -74,8 +74,6 @@ int sun_partition(struct parsed_partitions *state)
p = label->partitions;
if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) {
-/* printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n",
- state->disk->disk_name, be16_to_cpu(label->magic)); */
put_dev_sector(sect);
return 0;
}
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 2d05421f0fa5..de172d56b1f3 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -404,7 +404,7 @@ void blk_integrity_generate(struct bio *bio)
}
}
-void blk_integrity_verify(struct bio *bio)
+void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
struct bio_integrity_payload *bip = bio_integrity(bio);
@@ -418,9 +418,9 @@ void blk_integrity_verify(struct bio *bio)
*/
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
iter.interval = 1 << bi->interval_exp;
- iter.seed = bip->bio_iter.bi_sector;
+ iter.seed = saved_iter->bi_sector;
iter.prot_buf = bvec_virt(bip->bip_vec);
- __bio_for_each_segment(bv, bio, bviter, bip->bio_iter) {
+ __bio_for_each_segment(bv, bio, bviter, *saved_iter) {
void *kaddr = bvec_kmap_local(&bv);
blk_status_t ret = BLK_STS_OK;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 68c943a77e41..8ca092303794 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -45,8 +45,6 @@ enum {
Lo_deleting,
};
-struct loop_func_table;
-
struct loop_device {
int lo_number;
loff_t lo_offset;
@@ -54,7 +52,8 @@ struct loop_device {
int lo_flags;
char lo_file_name[LO_NAME_SIZE];
- struct file * lo_backing_file;
+ struct file *lo_backing_file;
+ unsigned int lo_min_dio_size;
struct block_device *lo_device;
gfp_t old_gfp_mask;
@@ -169,29 +168,14 @@ static loff_t get_loop_size(struct loop_device *lo, struct file *file)
* of backing device, and the logical block size of loop is bigger than that of
* the backing device.
*/
-static bool lo_bdev_can_use_dio(struct loop_device *lo,
- struct block_device *backing_bdev)
-{
- unsigned int sb_bsize = bdev_logical_block_size(backing_bdev);
-
- if (queue_logical_block_size(lo->lo_queue) < sb_bsize)
- return false;
- if (lo->lo_offset & (sb_bsize - 1))
- return false;
- return true;
-}
-
static bool lo_can_use_dio(struct loop_device *lo)
{
- struct inode *inode = lo->lo_backing_file->f_mapping->host;
-
if (!(lo->lo_backing_file->f_mode & FMODE_CAN_ODIRECT))
return false;
-
- if (S_ISBLK(inode->i_mode))
- return lo_bdev_can_use_dio(lo, I_BDEV(inode));
- if (inode->i_sb->s_bdev)
- return lo_bdev_can_use_dio(lo, inode->i_sb->s_bdev);
+ if (queue_logical_block_size(lo->lo_queue) < lo->lo_min_dio_size)
+ return false;
+ if (lo->lo_offset & (lo->lo_min_dio_size - 1))
+ return false;
return true;
}
@@ -211,8 +195,6 @@ static inline void loop_update_dio(struct loop_device *lo)
WARN_ON_ONCE(lo->lo_state == Lo_bound &&
lo->lo_queue->mq_freeze_depth == 0);
- if (lo->lo_backing_file->f_flags & O_DIRECT)
- lo->lo_flags |= LO_FLAGS_DIRECT_IO;
if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !lo_can_use_dio(lo))
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
@@ -541,6 +523,28 @@ static void loop_reread_partitions(struct loop_device *lo)
__func__, lo->lo_number, lo->lo_file_name, rc);
}
+static unsigned int loop_query_min_dio_size(struct loop_device *lo)
+{
+ struct file *file = lo->lo_backing_file;
+ struct block_device *sb_bdev = file->f_mapping->host->i_sb->s_bdev;
+ struct kstat st;
+
+ /*
+ * Use the minimal dio alignment of the file system if provided.
+ */
+ if (!vfs_getattr(&file->f_path, &st, STATX_DIOALIGN, 0) &&
+ (st.result_mask & STATX_DIOALIGN))
+ return st.dio_offset_align;
+
+ /*
+ * In a perfect world this wouldn't be needed, but as of Linux 6.13 only
+ * a handful of file systems support the STATX_DIOALIGN flag.
+ */
+ if (sb_bdev)
+ return bdev_logical_block_size(sb_bdev);
+ return SECTOR_SIZE;
+}
+
static inline int is_loop_device(struct file *file)
{
struct inode *i = file->f_mapping->host;
@@ -573,6 +577,17 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
return 0;
}
+static void loop_assign_backing_file(struct loop_device *lo, struct file *file)
+{
+ lo->lo_backing_file = file;
+ lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
+ mapping_set_gfp_mask(file->f_mapping,
+ lo->old_gfp_mask & ~(__GFP_IO | __GFP_FS));
+ if (lo->lo_backing_file->f_flags & O_DIRECT)
+ lo->lo_flags |= LO_FLAGS_DIRECT_IO;
+ lo->lo_min_dio_size = loop_query_min_dio_size(lo);
+}
+
/*
* loop_change_fd switched the backing store of a loopback device to
* a new file. This is useful for operating system installers to free up
@@ -626,10 +641,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
disk_force_media_change(lo->lo_disk);
memflags = blk_mq_freeze_queue(lo->lo_queue);
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
- lo->lo_backing_file = file;
- lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
- mapping_set_gfp_mask(file->f_mapping,
- lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+ loop_assign_backing_file(lo, file);
loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue, memflags);
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
@@ -894,8 +906,8 @@ queue_work:
cmd_list = &lo->rootcg_cmd_list;
}
list_add_tail(&cmd->list_entry, cmd_list);
- spin_unlock_irq(&lo->lo_work_lock);
queue_work(lo->workqueue, work);
+ spin_unlock_irq(&lo->lo_work_lock);
}
static void loop_set_timer(struct loop_device *lo)
@@ -971,12 +983,11 @@ loop_set_status_from_info(struct loop_device *lo,
return 0;
}
-static unsigned int loop_default_blocksize(struct loop_device *lo,
- struct block_device *backing_bdev)
+static unsigned int loop_default_blocksize(struct loop_device *lo)
{
- /* In case of direct I/O, match underlying block size */
- if ((lo->lo_backing_file->f_flags & O_DIRECT) && backing_bdev)
- return bdev_logical_block_size(backing_bdev);
+ /* In case of direct I/O, match underlying minimum I/O size */
+ if (lo->lo_flags & LO_FLAGS_DIRECT_IO)
+ return lo->lo_min_dio_size;
return SECTOR_SIZE;
}
@@ -994,7 +1005,7 @@ static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim,
backing_bdev = inode->i_sb->s_bdev;
if (!bsize)
- bsize = loop_default_blocksize(lo, backing_bdev);
+ bsize = loop_default_blocksize(lo);
loop_get_discard_config(lo, &granularity, &max_discard_sectors);
@@ -1019,7 +1030,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
const struct loop_config *config)
{
struct file *file = fget(config->fd);
- struct address_space *mapping;
struct queue_limits lim;
int error;
loff_t size;
@@ -1055,8 +1065,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
if (error)
goto out_unlock;
- mapping = file->f_mapping;
-
if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
error = -EINVAL;
goto out_unlock;
@@ -1088,9 +1096,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->lo_device = bdev;
- lo->lo_backing_file = file;
- lo->old_gfp_mask = mapping_gfp_mask(mapping);
- mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
+ loop_assign_backing_file(lo, file);
lim = queue_limits_start_update(lo->lo_queue);
loop_update_limits(lo, &lim, config->block_size);
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index d94ef37480bd..4a5cd288dc07 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -473,6 +473,8 @@ NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
NULLB_DEVICE_ATTR(fua, bool, NULL);
NULLB_DEVICE_ATTR(rotational, bool, NULL);
+NULLB_DEVICE_ATTR(badblocks_once, bool, NULL);
+NULLB_DEVICE_ATTR(badblocks_partial_io, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -559,14 +561,14 @@ static ssize_t nullb_device_badblocks_store(struct config_item *item,
goto out;
/* enable badblocks */
cmpxchg(&t_dev->badblocks.shift, -1, 0);
- if (buf[0] == '+')
- ret = badblocks_set(&t_dev->badblocks, start,
- end - start + 1, 1);
- else
- ret = badblocks_clear(&t_dev->badblocks, start,
- end - start + 1);
- if (ret == 0)
+ if (buf[0] == '+') {
+ if (badblocks_set(&t_dev->badblocks, start,
+ end - start + 1, 1))
+ ret = count;
+ } else if (badblocks_clear(&t_dev->badblocks, start,
+ end - start + 1)) {
ret = count;
+ }
out:
kfree(orig);
return ret;
@@ -592,41 +594,43 @@ static ssize_t nullb_device_zone_offline_store(struct config_item *item,
CONFIGFS_ATTR_WO(nullb_device_, zone_offline);
static struct configfs_attribute *nullb_device_attrs[] = {
- &nullb_device_attr_size,
+ &nullb_device_attr_badblocks,
+ &nullb_device_attr_badblocks_once,
+ &nullb_device_attr_badblocks_partial_io,
+ &nullb_device_attr_blocking,
+ &nullb_device_attr_blocksize,
+ &nullb_device_attr_cache_size,
&nullb_device_attr_completion_nsec,
- &nullb_device_attr_submit_queues,
- &nullb_device_attr_poll_queues,
+ &nullb_device_attr_discard,
+ &nullb_device_attr_fua,
&nullb_device_attr_home_node,
- &nullb_device_attr_queue_mode,
- &nullb_device_attr_blocksize,
- &nullb_device_attr_max_sectors,
- &nullb_device_attr_irqmode,
&nullb_device_attr_hw_queue_depth,
&nullb_device_attr_index,
- &nullb_device_attr_blocking,
- &nullb_device_attr_use_per_node_hctx,
- &nullb_device_attr_power,
- &nullb_device_attr_memory_backed,
- &nullb_device_attr_discard,
+ &nullb_device_attr_irqmode,
+ &nullb_device_attr_max_sectors,
&nullb_device_attr_mbps,
- &nullb_device_attr_cache_size,
- &nullb_device_attr_badblocks,
- &nullb_device_attr_zoned,
- &nullb_device_attr_zone_size,
+ &nullb_device_attr_memory_backed,
+ &nullb_device_attr_no_sched,
+ &nullb_device_attr_poll_queues,
+ &nullb_device_attr_power,
+ &nullb_device_attr_queue_mode,
+ &nullb_device_attr_rotational,
+ &nullb_device_attr_shared_tag_bitmap,
+ &nullb_device_attr_shared_tags,
+ &nullb_device_attr_size,
+ &nullb_device_attr_submit_queues,
+ &nullb_device_attr_use_per_node_hctx,
+ &nullb_device_attr_virt_boundary,
+ &nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_capacity,
- &nullb_device_attr_zone_nr_conv,
- &nullb_device_attr_zone_max_open,
+ &nullb_device_attr_zone_full,
&nullb_device_attr_zone_max_active,
- &nullb_device_attr_zone_append_max_sectors,
- &nullb_device_attr_zone_readonly,
+ &nullb_device_attr_zone_max_open,
+ &nullb_device_attr_zone_nr_conv,
&nullb_device_attr_zone_offline,
- &nullb_device_attr_zone_full,
- &nullb_device_attr_virt_boundary,
- &nullb_device_attr_no_sched,
- &nullb_device_attr_shared_tags,
- &nullb_device_attr_shared_tag_bitmap,
- &nullb_device_attr_fua,
- &nullb_device_attr_rotational,
+ &nullb_device_attr_zone_readonly,
+ &nullb_device_attr_zone_size,
+ &nullb_device_attr_zoned,
NULL,
};
@@ -704,16 +708,28 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
- return snprintf(page, PAGE_SIZE,
- "badblocks,blocking,blocksize,cache_size,fua,"
- "completion_nsec,discard,home_node,hw_queue_depth,"
- "irqmode,max_sectors,mbps,memory_backed,no_sched,"
- "poll_queues,power,queue_mode,shared_tag_bitmap,"
- "shared_tags,size,submit_queues,use_per_node_hctx,"
- "virt_boundary,zoned,zone_capacity,zone_max_active,"
- "zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
- "zone_size,zone_append_max_sectors,zone_full,"
- "rotational\n");
+
+ struct configfs_attribute **entry;
+ char delimiter = ',';
+ size_t left = PAGE_SIZE;
+ size_t written = 0;
+ int ret;
+
+ for (entry = &nullb_device_attrs[0]; *entry && left > 0; entry++) {
+ if (!*(entry + 1))
+ delimiter = '\n';
+ ret = snprintf(page + written, left, "%s%c", (*entry)->ca_name,
+ delimiter);
+ if (ret >= left) {
+ WARN_ONCE(1, "Too many null_blk features to print\n");
+ memzero_explicit(page, PAGE_SIZE);
+ return -ENOBUFS;
+ }
+ left -= ret;
+ written += ret;
+ }
+
+ return written;
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -1249,25 +1265,37 @@ static int null_transfer(struct nullb *nullb, struct page *page,
return err;
}
-static blk_status_t null_handle_rq(struct nullb_cmd *cmd)
+/*
+ * Transfer data for the given request. The transfer size is capped with the
+ * nr_sectors argument.
+ */
+static blk_status_t null_handle_data_transfer(struct nullb_cmd *cmd,
+ sector_t nr_sectors)
{
struct request *rq = blk_mq_rq_from_pdu(cmd);
struct nullb *nullb = cmd->nq->dev->nullb;
int err = 0;
unsigned int len;
sector_t sector = blk_rq_pos(rq);
+ unsigned int max_bytes = nr_sectors << SECTOR_SHIFT;
+ unsigned int transferred_bytes = 0;
struct req_iterator iter;
struct bio_vec bvec;
spin_lock_irq(&nullb->lock);
rq_for_each_segment(bvec, rq, iter) {
len = bvec.bv_len;
+ if (transferred_bytes + len > max_bytes)
+ len = max_bytes - transferred_bytes;
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
op_is_write(req_op(rq)), sector,
rq->cmd_flags & REQ_FUA);
if (err)
break;
sector += len >> SECTOR_SHIFT;
+ transferred_bytes += len;
+ if (transferred_bytes >= max_bytes)
+ break;
}
spin_unlock_irq(&nullb->lock);
@@ -1295,31 +1323,51 @@ static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
return sts;
}
-static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd,
- sector_t sector,
- sector_t nr_sectors)
+/*
+ * Check if the command should fail for the badblocks. If so, return
+ * BLK_STS_IOERR and return number of partial I/O sectors to be written or read,
+ * which may be less than the requested number of sectors.
+ *
+ * @cmd: The command to handle.
+ * @sector: The start sector for I/O.
+ * @nr_sectors: Specifies number of sectors to write or read, and returns the
+ * number of sectors to be written or read.
+ */
+blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
+ unsigned int *nr_sectors)
{
struct badblocks *bb = &cmd->nq->dev->badblocks;
- sector_t first_bad;
- int bad_sectors;
+ struct nullb_device *dev = cmd->nq->dev;
+ unsigned int block_sectors = dev->blocksize >> SECTOR_SHIFT;
+ sector_t first_bad, bad_sectors;
+ unsigned int partial_io_sectors = 0;
- if (badblocks_check(bb, sector, nr_sectors, &first_bad, &bad_sectors))
- return BLK_STS_IOERR;
+ if (!badblocks_check(bb, sector, *nr_sectors, &first_bad, &bad_sectors))
+ return BLK_STS_OK;
- return BLK_STS_OK;
+ if (cmd->nq->dev->badblocks_once)
+ badblocks_clear(bb, first_bad, bad_sectors);
+
+ if (cmd->nq->dev->badblocks_partial_io) {
+ if (!IS_ALIGNED(first_bad, block_sectors))
+ first_bad = ALIGN_DOWN(first_bad, block_sectors);
+ if (sector < first_bad)
+ partial_io_sectors = first_bad - sector;
+ }
+ *nr_sectors = partial_io_sectors;
+
+ return BLK_STS_IOERR;
}
-static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
- enum req_op op,
- sector_t sector,
- sector_t nr_sectors)
+blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
+ sector_t sector, sector_t nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
if (op == REQ_OP_DISCARD)
return null_handle_discard(dev, sector, nr_sectors);
- return null_handle_rq(cmd);
+ return null_handle_data_transfer(cmd, nr_sectors);
}
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
@@ -1366,18 +1414,19 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, unsigned int nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
+ blk_status_t badblocks_ret = BLK_STS_OK;
blk_status_t ret;
- if (dev->badblocks.shift != -1) {
- ret = null_handle_badblocks(cmd, sector, nr_sectors);
+ if (dev->badblocks.shift != -1)
+ badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors);
+
+ if (dev->memory_backed && nr_sectors) {
+ ret = null_handle_memory_backed(cmd, op, sector, nr_sectors);
if (ret != BLK_STS_OK)
return ret;
}
- if (dev->memory_backed)
- return null_handle_memory_backed(cmd, op, sector, nr_sectors);
-
- return BLK_STS_OK;
+ return badblocks_ret;
}
static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 6f9fe6171087..7bb6128dbaaf 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -63,6 +63,8 @@ struct nullb_device {
unsigned long flags; /* device flags */
unsigned int curr_cache;
struct badblocks badblocks;
+ bool badblocks_once;
+ bool badblocks_partial_io;
unsigned int nr_zones;
unsigned int nr_zones_imp_open;
@@ -131,6 +133,10 @@ blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector,
sector_t nr_sectors);
blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op,
sector_t sector, unsigned int nr_sectors);
+blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, sector_t sector,
+ unsigned int *nr_sectors);
+blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, enum req_op op,
+ sector_t sector, sector_t nr_sectors);
#ifdef CONFIG_BLK_DEV_ZONED
int null_init_zoned_dev(struct nullb_device *dev, struct queue_limits *lim);
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 0d5f9bf95229..4e5728f45989 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -353,6 +353,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
struct nullb_device *dev = cmd->nq->dev;
unsigned int zno = null_zone_no(dev, sector);
struct nullb_zone *zone = &dev->zones[zno];
+ blk_status_t badblocks_ret = BLK_STS_OK;
blk_status_t ret;
trace_nullb_zone_op(cmd, zno, zone->cond);
@@ -412,9 +413,20 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
zone->cond = BLK_ZONE_COND_IMP_OPEN;
}
- ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
- if (ret != BLK_STS_OK)
- goto unlock_zone;
+ if (dev->badblocks.shift != -1) {
+ badblocks_ret = null_handle_badblocks(cmd, sector, &nr_sectors);
+ if (badblocks_ret != BLK_STS_OK && !nr_sectors) {
+ ret = badblocks_ret;
+ goto unlock_zone;
+ }
+ }
+
+ if (dev->memory_backed) {
+ ret = null_handle_memory_backed(cmd, REQ_OP_WRITE, sector,
+ nr_sectors);
+ if (ret != BLK_STS_OK)
+ goto unlock_zone;
+ }
zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->capacity) {
@@ -429,7 +441,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
zone->cond = BLK_ZONE_COND_FULL;
}
- ret = BLK_STS_OK;
+ ret = badblocks_ret;
unlock_zone:
null_unlock_zone(dev, zone);
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 529085181f35..22d52d3fabf0 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -70,7 +70,8 @@
/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL \
(UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \
- UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED)
+ UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED | \
+ UBLK_PARAM_TYPE_DMA_ALIGN)
struct ublk_rq_data {
struct llist_node node;
@@ -489,15 +490,17 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */
static DEFINE_MUTEX(ublk_ctl_mutex);
+
+#define UBLK_MAX_UBLKS UBLK_MINORS
+
/*
- * Max ublk devices allowed to add
+ * Max unprivileged ublk devices allowed to add
*
* It can be extended to one per-user limit in future or even controlled
* by cgroup.
*/
-#define UBLK_MAX_UBLKS UBLK_MINORS
-static unsigned int ublks_max = 64;
-static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
+static unsigned int unprivileged_ublks_max = 64;
+static unsigned int unprivileged_ublks_added; /* protected by ublk_ctl_mutex */
static struct miscdevice ublk_misc;
@@ -568,6 +571,16 @@ static int ublk_validate_params(const struct ublk_device *ub)
else if (ublk_dev_is_zoned(ub))
return -EINVAL;
+ if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN) {
+ const struct ublk_param_dma_align *p = &ub->params.dma;
+
+ if (p->alignment >= PAGE_SIZE)
+ return -EINVAL;
+
+ if (!is_power_of_2(p->alignment + 1))
+ return -EINVAL;
+ }
+
return 0;
}
@@ -1866,10 +1879,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
return -EIOCBQUEUED;
out:
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n",
__func__, cmd_op, tag, ret, io->flags);
- return -EIOCBQUEUED;
+ return ret;
}
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
@@ -1925,7 +1937,10 @@ static inline int ublk_ch_uring_cmd_local(struct io_uring_cmd *cmd,
static void ublk_ch_uring_cmd_cb(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
- ublk_ch_uring_cmd_local(cmd, issue_flags);
+ int ret = ublk_ch_uring_cmd_local(cmd, issue_flags);
+
+ if (ret != -EIOCBQUEUED)
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
}
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
@@ -2190,7 +2205,8 @@ static int ublk_add_chdev(struct ublk_device *ub)
if (ret)
goto fail;
- ublks_added++;
+ if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV)
+ unprivileged_ublks_added++;
return 0;
fail:
put_device(dev);
@@ -2219,11 +2235,16 @@ static int ublk_add_tag_set(struct ublk_device *ub)
static void ublk_remove(struct ublk_device *ub)
{
+ bool unprivileged;
+
ublk_stop_dev(ub);
cancel_work_sync(&ub->nosrv_work);
cdev_device_del(&ub->cdev, &ub->cdev_dev);
+ unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
ublk_put_device(ub);
- ublks_added--;
+
+ if (unprivileged)
+ unprivileged_ublks_added--;
}
static struct ublk_device *ublk_get_device_from_id(int idx)
@@ -2298,6 +2319,9 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL)
lim.features |= BLK_FEAT_ROTATIONAL;
+ if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN)
+ lim.dma_alignment = ub->params.dma.alignment;
+
if (wait_for_completion_interruptible(&ub->completion) != 0)
return -EINTR;
@@ -2485,7 +2509,8 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
return ret;
ret = -EACCES;
- if (ublks_added >= ublks_max)
+ if ((info.flags & UBLK_F_UNPRIVILEGED_DEV) &&
+ unprivileged_ublks_added >= unprivileged_ublks_max)
goto out_unlock;
ret = -ENOMEM;
@@ -3056,10 +3081,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
if (ub)
ublk_put_device(ub);
out:
- io_uring_cmd_done(cmd, ret, 0, issue_flags);
pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
__func__, ret, cmd->cmd_op, header->dev_id, header->queue_id);
- return -EIOCBQUEUED;
+ return ret;
}
static const struct file_operations ublk_ctl_fops = {
@@ -3123,23 +3147,26 @@ static void __exit ublk_exit(void)
module_init(ublk_init);
module_exit(ublk_exit);
-static int ublk_set_max_ublks(const char *buf, const struct kernel_param *kp)
+static int ublk_set_max_unprivileged_ublks(const char *buf,
+ const struct kernel_param *kp)
{
return param_set_uint_minmax(buf, kp, 0, UBLK_MAX_UBLKS);
}
-static int ublk_get_max_ublks(char *buf, const struct kernel_param *kp)
+static int ublk_get_max_unprivileged_ublks(char *buf,
+ const struct kernel_param *kp)
{
- return sysfs_emit(buf, "%u\n", ublks_max);
+ return sysfs_emit(buf, "%u\n", unprivileged_ublks_max);
}
-static const struct kernel_param_ops ublk_max_ublks_ops = {
- .set = ublk_set_max_ublks,
- .get = ublk_get_max_ublks,
+static const struct kernel_param_ops ublk_max_unprivileged_ublks_ops = {
+ .set = ublk_set_max_unprivileged_ublks,
+ .get = ublk_get_max_unprivileged_ublks,
};
-module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644);
-MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
+module_param_cb(ublks_max, &ublk_max_unprivileged_ublks_ops,
+ &unprivileged_ublks_max, 0644);
+MODULE_PARM_DESC(ublks_max, "max number of unprivileged ublk devices allowed to add(default: 64)");
MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
MODULE_DESCRIPTION("Userspace block device");
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index ee9f7cecd78e..e743657379f7 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -4808,23 +4808,11 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned int argc, char **argv
ti->error = "Cannot allocate bio set";
goto bad;
}
- r = bioset_integrity_create(&ic->recheck_bios, RECHECK_POOL_SIZE);
- if (r) {
- ti->error = "Cannot allocate bio integrity set";
- r = -ENOMEM;
- goto bad;
- }
r = bioset_init(&ic->recalc_bios, 1, 0, BIOSET_NEED_BVECS);
if (r) {
ti->error = "Cannot allocate bio set";
goto bad;
}
- r = bioset_integrity_create(&ic->recalc_bios, 1);
- if (r) {
- ti->error = "Cannot allocate bio integrity set";
- r = -ENOMEM;
- goto bad;
- }
}
ic->metadata_wq = alloc_workqueue("dm-integrity-metadata",
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bf9a61191e9a..453803f1edf5 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1081,15 +1081,9 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
__alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
goto out_free_pools;
- if (mempool_needs_integrity &&
- bioset_integrity_create(&pools->io_bs, pool_size))
- goto out_free_pools;
init_bs:
if (bioset_init(&pools->bs, pool_size, front_pad, 0))
goto out_free_pools;
- if (mempool_needs_integrity &&
- bioset_integrity_create(&pools->bs, pool_size))
- goto out_free_pools;
t->mempools = pools;
return 0;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index cefa9cba711b..438e71e45c16 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1772,7 +1772,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
count <<= sb->bblog_shift;
if (bb + 1 == 0)
break;
- if (badblocks_set(&rdev->badblocks, sector, count, 1))
+ if (!badblocks_set(&rdev->badblocks, sector, count, 1))
return -EINVAL;
}
} else if (sb->bblog_offset != 0)
@@ -2383,19 +2383,6 @@ int md_integrity_register(struct mddev *mddev)
return 0; /* shouldn't register */
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
- if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
- (mddev->level != 1 && mddev->level != 10 &&
- bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
- /*
- * No need to handle the failure of bioset_integrity_create,
- * because the function is called by md_run() -> pers->run(),
- * md_run calls bioset_exit -> bioset_integrity_free in case
- * of failure case.
- */
- pr_err("md: failed to create integrity pool for %s\n",
- mdname(mddev));
- return -EINVAL;
- }
return 0;
}
EXPORT_SYMBOL(md_integrity_register);
@@ -9838,12 +9825,11 @@ EXPORT_SYMBOL(md_finish_reshape);
/* Bad block management */
-/* Returns 1 on success, 0 on failure */
-int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
- int is_new)
+/* Returns true on success, false on failure */
+bool rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
+ int is_new)
{
struct mddev *mddev = rdev->mddev;
- int rv;
/*
* Recording new badblocks for faulty rdev will force unnecessary
@@ -9853,39 +9839,40 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
* avoid it.
*/
if (test_bit(Faulty, &rdev->flags))
- return 1;
+ return true;
if (is_new)
s += rdev->new_data_offset;
else
s += rdev->data_offset;
- rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
- if (rv == 0) {
- /* Make sure they get written out promptly */
- if (test_bit(ExternalBbl, &rdev->flags))
- sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
- sysfs_notify_dirent_safe(rdev->sysfs_state);
- set_mask_bits(&mddev->sb_flags, 0,
- BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
- md_wakeup_thread(rdev->mddev->thread);
- return 1;
- } else
- return 0;
+
+ if (!badblocks_set(&rdev->badblocks, s, sectors, 0))
+ return false;
+
+ /* Make sure they get written out promptly */
+ if (test_bit(ExternalBbl, &rdev->flags))
+ sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
+ set_mask_bits(&mddev->sb_flags, 0,
+ BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
+ md_wakeup_thread(rdev->mddev->thread);
+ return true;
}
EXPORT_SYMBOL_GPL(rdev_set_badblocks);
-int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
- int is_new)
+void rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
+ int is_new)
{
- int rv;
if (is_new)
s += rdev->new_data_offset;
else
s += rdev->data_offset;
- rv = badblocks_clear(&rdev->badblocks, s, sectors);
- if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
+
+ if (!badblocks_clear(&rdev->badblocks, s, sectors))
+ return;
+
+ if (test_bit(ExternalBbl, &rdev->flags))
sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
- return rv;
}
EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index dd6a28f5d8e6..1cf00a04bcdd 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -292,8 +292,8 @@ enum flag_bits {
Nonrot, /* non-rotational device (SSD) */
};
-static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
- sector_t *first_bad, int *bad_sectors)
+static inline int is_badblock(struct md_rdev *rdev, sector_t s, sector_t sectors,
+ sector_t *first_bad, sector_t *bad_sectors)
{
if (unlikely(rdev->badblocks.count)) {
int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
@@ -310,15 +310,15 @@ static inline int rdev_has_badblock(struct md_rdev *rdev, sector_t s,
int sectors)
{
sector_t first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
return is_badblock(rdev, s, sectors, &first_bad, &bad_sectors);
}
-extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
- int is_new);
-extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
- int is_new);
+extern bool rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
+ int is_new);
+extern void rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
+ int is_new);
struct md_cluster_info;
struct md_cluster_operations;
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 4a51b151a981..c7efd8aab675 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -247,7 +247,7 @@ static inline int raid1_check_read_range(struct md_rdev *rdev,
sector_t this_sector, int *len)
{
sector_t first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
/* no bad block overlap */
if (!is_badblock(rdev, this_sector, *len, &first_bad, &bad_sectors))
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e366d0bba792..f06effec0a15 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1536,7 +1536,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
atomic_inc(&rdev->nr_pending);
if (test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
int is_bad;
is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
@@ -2483,7 +2483,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
}
}
-static int narrow_write_error(struct r1bio *r1_bio, int i)
+static bool narrow_write_error(struct r1bio *r1_bio, int i)
{
struct mddev *mddev = r1_bio->mddev;
struct r1conf *conf = mddev->private;
@@ -2504,10 +2504,10 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
sector_t sector;
int sectors;
int sect_to_write = r1_bio->sectors;
- int ok = 1;
+ bool ok = true;
if (rdev->badblocks.shift < 0)
- return 0;
+ return false;
block_sectors = roundup(1 << rdev->badblocks.shift,
bdev_logical_block_size(rdev->bdev) >> 9);
@@ -2883,7 +2883,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
} else {
/* may need to read from here */
sector_t first_bad = MaxSector;
- int bad_sectors;
+ sector_t bad_sectors;
if (is_badblock(rdev, sector_nr, good_sectors,
&first_bad, &bad_sectors)) {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9d8516acf2fd..d412aeeafc18 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -748,7 +748,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
for (slot = 0; slot < conf->copies ; slot++) {
sector_t first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
sector_t dev_sector;
unsigned int pending;
bool nonrot;
@@ -1431,7 +1431,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
- int bad_sectors;
+ sector_t bad_sectors;
int is_bad;
is_bad = is_badblock(rdev, dev_sector, max_sectors,
@@ -2778,7 +2778,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
}
}
-static int narrow_write_error(struct r10bio *r10_bio, int i)
+static bool narrow_write_error(struct r10bio *r10_bio, int i)
{
struct bio *bio = r10_bio->master_bio;
struct mddev *mddev = r10_bio->mddev;
@@ -2799,10 +2799,10 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
sector_t sector;
int sectors;
int sect_to_write = r10_bio->sectors;
- int ok = 1;
+ bool ok = true;
if (rdev->badblocks.shift < 0)
- return 0;
+ return false;
block_sectors = roundup(1 << rdev->badblocks.shift,
bdev_logical_block_size(rdev->bdev) >> 9);
@@ -3405,7 +3405,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
sector_t from_addr, to_addr;
struct md_rdev *rdev = conf->mirrors[d].rdev;
sector_t sector, first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
if (!rdev ||
!test_bit(In_sync, &rdev->flags))
continue;
@@ -3601,7 +3601,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
sector_t first_bad, sector;
- int bad_sectors;
+ sector_t bad_sectors;
struct md_rdev *rdev;
if (r10_bio->devs[i].repl_bio)
diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c
index a002ea6fdd84..ee478ccde7c6 100644
--- a/drivers/nvdimm/badrange.c
+++ b/drivers/nvdimm/badrange.c
@@ -167,7 +167,7 @@ static void set_badblock(struct badblocks *bb, sector_t s, int num)
dev_dbg(bb->dev, "Found a bad range (0x%llx, 0x%llx)\n",
(u64) s * 512, (u64) num * 512);
/* this isn't an error as the hardware will still throw an exception */
- if (badblocks_set(bb, s, num, 1))
+ if (!badblocks_set(bb, s, num, 1))
dev_info_once(bb->dev, "%s: failed for sector %llx\n",
__func__, (u64) s);
}
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 5ca06e9a2d29..cc5c8f3f81e8 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -673,7 +673,7 @@ static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
{
if (bb->count) {
sector_t first_bad;
- int num_bad;
+ sector_t num_bad;
return !!badblocks_check(bb, sector, len / 512, &first_bad,
&num_bad);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index cfdfe0eaa512..8f3e816e805d 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -367,9 +367,10 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
struct nd_namespace_common *ndns = nd_pfn->ndns;
void *zero_page = page_address(ZERO_PAGE(0));
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
- int num_bad, meta_num, rc, bb_present;
+ int meta_num, rc, bb_present;
sector_t first_bad, meta_start;
struct nd_namespace_io *nsio;
+ sector_t num_bad;
if (nd_pfn->mode != PFN_MODE_PMEM)
return 0;
@@ -394,7 +395,7 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
bb_present = badblocks_check(&nd_region->bb, meta_start,
meta_num, &first_bad, &num_bad);
if (bb_present) {
- dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n",
+ dev_dbg(&nd_pfn->dev, "meta: %llx badblocks at %llx\n",
num_bad, first_bad);
nsoff = ALIGN_DOWN((nd_region->ndr_start
+ (first_bad << 9)) - nsio->res.start,
@@ -413,7 +414,7 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
}
if (rc) {
dev_err(&nd_pfn->dev,
- "error clearing %x badblocks at %llx\n",
+ "error clearing %llx badblocks at %llx\n",
num_bad, first_bad);
return rc;
}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index d81faa9d89c9..43156e1576c9 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -249,7 +249,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT;
struct badblocks *bb = &pmem->bb;
sector_t first_bad;
- int num_bad;
+ sector_t num_bad;
if (kaddr)
*kaddr = pmem->virt_addr + offset;
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index c8dc92a7d63e..73564efd11d2 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -167,18 +167,6 @@ static int iblock_configure_device(struct se_device *dev)
break;
}
- if (dev->dev_attrib.pi_prot_type) {
- struct bio_set *bs = &ib_dev->ibd_bio_set;
-
- if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) {
- pr_err("Unable to allocate bioset for PI\n");
- ret = -ENOMEM;
- goto out_blkdev_put;
- }
- pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n",
- &bs->bio_integrity_pool);
- }
-
dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type;
return 0;
diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h
index 670f2dae692f..996493917f36 100644
--- a/include/linux/badblocks.h
+++ b/include/linux/badblocks.h
@@ -48,11 +48,11 @@ struct badblocks_context {
int ack;
};
-int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
- sector_t *first_bad, int *bad_sectors);
-int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
- int acknowledged);
-int badblocks_clear(struct badblocks *bb, sector_t s, int sectors);
+int badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
+ sector_t *first_bad, sector_t *bad_sectors);
+bool badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
+ int acknowledged);
+bool badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors);
void ack_all_badblocks(struct badblocks *bb);
ssize_t badblocks_show(struct badblocks *bb, char *page, int unack);
ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 802f52e38efd..0a25716820fe 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -16,8 +16,6 @@ enum bip_flags {
};
struct bio_integrity_payload {
- struct bio *bip_bio; /* parent bio */
-
struct bvec_iter bip_iter;
unsigned short bip_vcnt; /* # of integrity bio_vecs */
@@ -25,12 +23,7 @@ struct bio_integrity_payload {
unsigned short bip_flags; /* control flags */
u16 app_tag; /* application tag value */
- struct bvec_iter bio_iter; /* for rewinding parent bio */
-
- struct work_struct bip_work; /* I/O completion */
-
struct bio_vec *bip_vec;
- struct bio_vec bip_inline_vecs[];/* embedded bvec array */
};
#define BIP_CLONE_FLAGS (BIP_MAPPED_INTEGRITY | BIP_IP_CHECKSUM | \
@@ -74,6 +67,8 @@ static inline void bip_set_seed(struct bio_integrity_payload *bip,
bip->bip_iter.bi_sector = seed;
}
+void bio_integrity_init(struct bio *bio, struct bio_integrity_payload *bip,
+ struct bio_vec *bvecs, unsigned int nr_vecs);
struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
unsigned int nr);
int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
@@ -85,9 +80,6 @@ bool bio_integrity_prep(struct bio *bio);
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
void bio_integrity_trim(struct bio *bio);
int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask);
-int bioset_integrity_create(struct bio_set *bs, int pool_size);
-void bioset_integrity_free(struct bio_set *bs);
-void bio_integrity_init(void);
#else /* CONFIG_BLK_DEV_INTEGRITY */
@@ -96,15 +88,6 @@ static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
return NULL;
}
-static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
-{
- return 0;
-}
-
-static inline void bioset_integrity_free(struct bio_set *bs)
-{
-}
-
static inline int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
{
return -EINVAL;
@@ -139,10 +122,6 @@ static inline void bio_integrity_trim(struct bio *bio)
{
}
-static inline void bio_integrity_init(void)
-{
-}
-
static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
{
return false;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4b79bf50f4f0..cafc7c215de8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -625,10 +625,6 @@ struct bio_set {
mempool_t bio_pool;
mempool_t bvec_pool;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
- mempool_t bio_integrity_pool;
- mempool_t bvec_integrity_pool;
-#endif
unsigned int back_pad;
/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 248416ecd01c..dcf8fce15e23 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -560,7 +560,22 @@ struct request_queue {
struct blk_flush_queue *fq;
struct list_head flush_list;
+ /*
+ * Protects against I/O scheduler switching, particularly when updating
+ * q->elevator. Since the elevator update code path may also modify q->
+ * nr_requests and wbt latency, this lock also protects the sysfs attrs
+ * nr_requests and wbt_lat_usec. Additionally the nr_hw_queues update
+ * may modify hctx tags, reserved-tags and cpumask, so this lock also
+ * helps protect the hctx attrs. To ensure proper locking order during
+ * an elevator or nr_hw_queue update, first freeze the queue, then
+ * acquire ->elevator_lock.
+ */
+ struct mutex elevator_lock;
+
struct mutex sysfs_lock;
+ /*
+ * Protects queue limits and also sysfs attribute read_ahead_kb.
+ */
struct mutex limits_lock;
/*
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index a8bc98bb69fc..8093acdeaa11 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -401,6 +401,11 @@ struct ublk_param_zoned {
__u8 reserved[20];
};
+struct ublk_param_dma_align {
+ __u32 alignment;
+ __u8 pad[4];
+};
+
struct ublk_params {
/*
* Total length of parameters, userspace has to set 'len' for both
@@ -413,12 +418,14 @@ struct ublk_params {
#define UBLK_PARAM_TYPE_DISCARD (1 << 1)
#define UBLK_PARAM_TYPE_DEVT (1 << 2)
#define UBLK_PARAM_TYPE_ZONED (1 << 3)
+#define UBLK_PARAM_TYPE_DMA_ALIGN (1 << 4)
__u32 types; /* types of parameter included */
struct ublk_param_basic basic;
struct ublk_param_discard discard;
struct ublk_param_devt devt;
struct ublk_param_zoned zoned;
+ struct ublk_param_dma_align dma;
};
#endif