* [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far
@ 2019-02-20 21:44 Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 1/9] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
` (8 more replies)
0 siblings, 9 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
This v2 eliminates the dm-mpath local cache for per-bio-data and
switches to allocating the target required per-bio-data as part of the
'struct dm_noclone' allocation -- special thanks to Mikulas for his
ideas and patch (#6) that went into this v2.
Test results show a slight improvement for bio-based dm-multipath (but
not the kind of win dm-linear gets from switching to noclone):
linear: raw=6087MB/s clone=4241MB/s noclone=5148MB/s
mpath: raw=6087MB/s clone=3331MB/s noclone=3420MB/s
More work is needed to improve DM multipath performance (story of my
life)... but at least this shifts the focus away from DM core and back
to the dm-multipath target code.
Mike Snitzer (6):
dm: update dm_process_bio() to split bio if in ->make_request_fn()
dm: eliminate 'split_discard_bios' flag from DM target interface
dm: improve noclone bio support
dm: improve noclone_endio() to support multipath target
dm mpath: enable noclone support for bio-based
dm: remove unused _rq_tio_cache and _rq_cache
Mikulas Patocka (3):
dm: refactor start_io_acct and end_io_acct
dm: implement noclone optimization for bio-based
dm: add per-bio-data support to noclone bio
drivers/md/dm-cache-target.c | 1 -
drivers/md/dm-linear.c | 3 +-
drivers/md/dm-mpath.c | 13 +-
drivers/md/dm-raid.c | 14 +-
drivers/md/dm-rq.c | 16 ++
drivers/md/dm-rq.h | 16 --
drivers/md/dm-stripe.c | 3 +-
drivers/md/dm-table.c | 11 ++
drivers/md/dm-thin.c | 1 -
drivers/md/dm-zero.c | 1 +
drivers/md/dm-zoned-target.c | 1 -
drivers/md/dm.c | 406 ++++++++++++++++++++++++++++--------------
drivers/md/dm.h | 1 +
include/linux/device-mapper.h | 9 +-
include/uapi/linux/dm-ioctl.h | 4 +-
15 files changed, 330 insertions(+), 170 deletions(-)
--
2.15.0
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v2 1/9] dm: update dm_process_bio() to split bio if in ->make_request_fn()
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 2/9] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
` (7 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
Must call blk_queue_split() otherwise queue_limits for abnormal requests
(e.g. discard, writesame, etc) won't be imposed.
In addition, add dm_queue_split() to simplify DM specific splitting that
is needed for targets that impose ti->max_io_len.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 93 ++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 66 insertions(+), 27 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 515e6af9bed2..7a774fcd0194 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1533,6 +1533,22 @@ static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
}
+static bool is_abnormal_io(struct bio *bio)
+{
+ bool r = false;
+
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_SAME:
+ case REQ_OP_WRITE_ZEROES:
+ r = true;
+ break;
+ }
+
+ return r;
+}
+
static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
int *result)
{
@@ -1565,7 +1581,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
if (!dm_target_is_valid(ti))
return -EIO;
- if (unlikely(__process_abnormal_io(ci, ti, &r)))
+ if (__process_abnormal_io(ci, ti, &r))
return r;
len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
@@ -1601,13 +1617,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
blk_qc_t ret = BLK_QC_T_NONE;
int error = 0;
- if (unlikely(!map)) {
- bio_io_error(bio);
- return ret;
- }
-
- blk_queue_split(md->queue, &bio);
-
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
@@ -1675,18 +1684,13 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
* Optimized variant of __split_and_process_bio that leverages the
* fact that targets that use it do _not_ have a need to split bios.
*/
-static blk_qc_t __process_bio(struct mapped_device *md,
- struct dm_table *map, struct bio *bio)
+static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map,
+ struct bio *bio, struct dm_target *ti)
{
struct clone_info ci;
blk_qc_t ret = BLK_QC_T_NONE;
int error = 0;
- if (unlikely(!map)) {
- bio_io_error(bio);
- return ret;
- }
-
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
@@ -1704,21 +1708,11 @@ static blk_qc_t __process_bio(struct mapped_device *md,
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
} else {
- struct dm_target *ti = md->immutable_target;
struct dm_target_io *tio;
- /*
- * Defend against IO still getting in during teardown
- * - as was seen for a time with nvme-fcloop
- */
- if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) {
- error = -EIO;
- goto out;
- }
-
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
- if (unlikely(__process_abnormal_io(&ci, ti, &error)))
+ if (__process_abnormal_io(&ci, ti, &error))
goto out;
tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
@@ -1730,11 +1724,56 @@ static blk_qc_t __process_bio(struct mapped_device *md,
return ret;
}
+static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio)
+{
+ unsigned len, sector_count;
+
+ sector_count = bio_sectors(*bio);
+ len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count);
+
+ if (sector_count > len) {
+ struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split);
+
+ bio_chain(split, *bio);
+ trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
+ generic_make_request(*bio);
+ *bio = split;
+ }
+}
+
static blk_qc_t dm_process_bio(struct mapped_device *md,
struct dm_table *map, struct bio *bio)
{
+ blk_qc_t ret = BLK_QC_T_NONE;
+ struct dm_target *ti = md->immutable_target;
+
+ if (unlikely(!map)) {
+ bio_io_error(bio);
+ return ret;
+ }
+
+ if (!ti) {
+ ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
+ if (unlikely(!ti || !dm_target_is_valid(ti))) {
+ bio_io_error(bio);
+ return ret;
+ }
+ }
+
+ /*
+ * If in ->make_request_fn we need to use blk_queue_split(), otherwise
+ * queue_limits for abnormal requests (e.g. discard, writesame, etc)
+ * won't be imposed.
+ */
+ if (current->bio_list) {
+ if (is_abnormal_io(bio))
+ blk_queue_split(md->queue, &bio);
+ else
+ dm_queue_split(md, ti, &bio);
+ }
+
if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
- return __process_bio(md, map, bio);
+ return __process_bio(md, map, bio, ti);
else
return __split_and_process_bio(md, map, bio);
}
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 2/9] dm: eliminate 'split_discard_bios' flag from DM target interface
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 1/9] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
2019-02-21 4:36 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 3/9] dm: refactor start_io_acct and end_io_acct Mike Snitzer
` (6 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
There is no need to have DM core split discards on behalf of a DM target
now that blk_queue_split() handles splitting discards based on the
queue_limits. A DM target just needs to set max_discard_sectors,
discard_granularity, etc, in queue_limits.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-cache-target.c | 1 -
drivers/md/dm-raid.c | 14 +++++++++-----
drivers/md/dm-thin.c | 1 -
drivers/md/dm-zoned-target.c | 1 -
drivers/md/dm.c | 28 ++++++----------------------
include/linux/device-mapper.h | 6 ------
include/uapi/linux/dm-ioctl.h | 4 ++--
7 files changed, 17 insertions(+), 38 deletions(-)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index b29a8327eed1..adc529f12b6b 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2496,7 +2496,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ti->num_discard_bios = 1;
ti->discards_supported = true;
- ti->split_discard_bios = false;
ti->per_io_data_size = sizeof(struct per_bio_data);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index adcfe8ae10aa..9fdef6897316 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2986,11 +2986,6 @@ static void configure_discard_support(struct raid_set *rs)
}
}
- /*
- * RAID1 and RAID10 personalities require bio splitting,
- * RAID0/4/5/6 don't and process large discard bios properly.
- */
- ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs));
ti->num_discard_bios = 1;
}
@@ -3747,6 +3742,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, chunk_size);
blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
+
+ /*
+ * RAID1 and RAID10 personalities require bio splitting,
+ * RAID0/4/5/6 don't and process large discard bios properly.
+ */
+ if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+ limits->discard_granularity = chunk_size;
+ limits->max_discard_sectors = chunk_size;
+ }
}
static void raid_postsuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e83b63608262..0d9ded0f5e50 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4227,7 +4227,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (tc->pool->pf.discard_enabled) {
ti->discards_supported = true;
ti->num_discard_bios = 1;
- ti->split_discard_bios = false;
}
mutex_unlock(&dm_thin_pool_table.mutex);
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 6af5babe6837..8865c1709e16 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -727,7 +727,6 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->per_io_data_size = sizeof(struct dmz_bioctx);
ti->flush_supported = true;
ti->discards_supported = true;
- ti->split_discard_bios = true;
/* The exposed capacity is the number of chunks that can be mapped */
ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7a774fcd0194..b988e178a523 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1478,17 +1478,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
return ti->num_write_zeroes_bios;
}
-typedef bool (*is_split_required_fn)(struct dm_target *ti);
-
-static bool is_split_required_for_discard(struct dm_target *ti)
-{
- return ti->split_discard_bios;
-}
-
static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
- unsigned num_bios, bool is_split_required)
+ unsigned num_bios)
{
- unsigned len;
+ unsigned len = ci->sector_count;
/*
* Even though the device advertised support for this type of
@@ -1499,38 +1492,29 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
if (!num_bios)
return -EOPNOTSUPP;
- if (!is_split_required)
- len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
- else
- len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
-
__send_duplicate_bios(ci, ti, num_bios, &len);
- ci->sector += len;
- ci->sector_count -= len;
-
return 0;
}
static int __send_discard(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
- is_split_required_for_discard(ti));
+ return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti));
}
static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti));
}
static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti));
}
static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti));
}
static bool is_abnormal_io(struct bio *bio)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e528baebad69..0f5b3d7c6cb3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -315,12 +315,6 @@ struct dm_target {
* whether or not its underlying devices have support.
*/
bool discards_supported:1;
-
- /*
- * Set if the target required discard bios to be split
- * on max_io_len boundary.
- */
- bool split_discard_bios:1;
};
/* Each target can link one of these into the table */
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index d1e49514977b..f396a82dfd3e 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -270,9 +270,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4
-#define DM_VERSION_MINOR 39
+#define DM_VERSION_MINOR 40
#define DM_VERSION_PATCHLEVEL 0
-#define DM_VERSION_EXTRA "-ioctl (2018-04-03)"
+#define DM_VERSION_EXTRA "-ioctl (2019-01-18)"
/* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 3/9] dm: refactor start_io_acct and end_io_acct
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 1/9] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 2/9] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 4/9] dm: implement noclone optimization for bio-based Mike Snitzer
` (5 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
From: Mikulas Patocka <mpatocka@redhat.com>
This refactoring of start_io_acct() and end_io_acct() eliminates their
dependency on 'struct dm_io' which will not be created for all IO
(following commit introduces the ability for targets to avoid cloning
bios for READ or WRITE bios).
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 68 +++++++++++++++++++++++++--------------------------------
1 file changed, 30 insertions(+), 38 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b988e178a523..1b87d20041e7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -580,7 +580,19 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
return r;
}
-static void start_io_acct(struct dm_io *io);
+static void start_io_acct(struct mapped_device *md, struct bio *bio)
+{
+ generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio), &dm_disk(md)->part0);
+}
+
+static void end_io_acct(struct mapped_device *md, struct bio *bio, unsigned long start_time)
+{
+ generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0, start_time);
+
+ /* nudge anyone waiting on suspend queue */
+ if (unlikely(wq_has_sleeper(&md->wait)))
+ wake_up(&md->wait);
+}
static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
{
@@ -604,7 +616,14 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io->md = md;
spin_lock_init(&io->endio_lock);
- start_io_acct(io);
+ io->start_time = jiffies;
+
+ start_io_acct(md, bio);
+
+ if (unlikely(dm_stats_used(&md->stats)))
+ dm_stats_account_io(&md->stats, bio_data_dir(bio),
+ bio->bi_iter.bi_sector, bio_sectors(bio),
+ false, 0, &io->stats_aux);
return io;
}
@@ -668,41 +687,6 @@ static bool md_in_flight(struct mapped_device *md)
return md_in_flight_bios(md);
}
-static void start_io_acct(struct dm_io *io)
-{
- struct mapped_device *md = io->md;
- struct bio *bio = io->orig_bio;
-
- io->start_time = jiffies;
-
- generic_start_io_acct(md->queue, bio_op(bio), bio_sectors(bio),
- &dm_disk(md)->part0);
-
- if (unlikely(dm_stats_used(&md->stats)))
- dm_stats_account_io(&md->stats, bio_data_dir(bio),
- bio->bi_iter.bi_sector, bio_sectors(bio),
- false, 0, &io->stats_aux);
-}
-
-static void end_io_acct(struct dm_io *io)
-{
- struct mapped_device *md = io->md;
- struct bio *bio = io->orig_bio;
- unsigned long duration = jiffies - io->start_time;
-
- generic_end_io_acct(md->queue, bio_op(bio), &dm_disk(md)->part0,
- io->start_time);
-
- if (unlikely(dm_stats_used(&md->stats)))
- dm_stats_account_io(&md->stats, bio_data_dir(bio),
- bio->bi_iter.bi_sector, bio_sectors(bio),
- true, duration, &io->stats_aux);
-
- /* nudge anyone waiting on suspend queue */
- if (unlikely(wq_has_sleeper(&md->wait)))
- wake_up(&md->wait);
-}
-
/*
* Add the bio to the list of deferred io.
*/
@@ -941,7 +925,15 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
io_error = io->status;
bio = io->orig_bio;
- end_io_acct(io);
+
+ if (unlikely(dm_stats_used(&md->stats))) {
+ unsigned long duration = jiffies - io->start_time;
+ dm_stats_account_io(&md->stats, bio_data_dir(bio),
+ bio->bi_iter.bi_sector, bio_sectors(bio),
+ true, duration, &io->stats_aux);
+ }
+
+ end_io_acct(md, bio, io->start_time);
free_io(md, io);
if (io_error == BLK_STS_DM_REQUEUE)
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 4/9] dm: implement noclone optimization for bio-based
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (2 preceding siblings ...)
2019-02-20 21:44 ` [PATCH v2 3/9] dm: refactor start_io_acct and end_io_acct Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 5/9] dm: improve noclone bio support Mike Snitzer
` (4 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
From: Mikulas Patocka <mpatocka@redhat.com>
Until now bio-based DM core has always cloned an incoming bio, remapped
the clone bio to a low layer, dealt with the clone's completion and then
finally completed the original bio. This cloning can be avoided for
READ and WRITE bios if the target opts-in by setting ti->no_clone.
Avoiding cloning for READ and WRITE bios improves performance of targets
that do very little work in response to each bio (e.g. dm-linear and
dm-striped).
The improvement is accomplished by changing DM core to allocate a
'dm_noclone' structure (that is quite small) instead of cloning the bio.
The bio's bi_end_io and bi_private are saved in the 'dm_noclone' before
they are overwritten and the bio passed to the lower block device.
When the bio is finished, the function noclone_endio restores the values
bi_end_io and bi_private and passes the bio to the original bi_end_io
function.
If the allocation of the 'struct dm_noclone' fails then bio-based DM
falls back to the traditional bio cloning IO path that is backed by
mempool reservations.
Performance improvement for dm-linear:
x86-64, 2x six-core
/dev/ram0 2449MiB/s
/dev/mapper/lin 5.0-rc without optimization 1970MiB/s
/dev/mapper/lin 5.0-rc with optimization 2238MiB/s
arm64, quad core:
/dev/ram0 457MiB/s
/dev/mapper/lin 5.0-rc without optimization 325MiB/s
/dev/mapper/lin 5.0-rc with optimization 364MiB/s
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-linear.c | 3 +-
drivers/md/dm-stripe.c | 3 +-
drivers/md/dm-table.c | 11 ++++++++
drivers/md/dm-zero.c | 1 +
drivers/md/dm.c | 64 +++++++++++++++++++++++++++++++++++++++++++
drivers/md/dm.h | 1 +
include/linux/device-mapper.h | 9 ++++++
7 files changed, 90 insertions(+), 2 deletions(-)
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index ad980a38fb1e..573ee0c5e83a 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -62,6 +62,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_secure_erase_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
+ ti->no_clone = true;
ti->private = lc;
return 0;
@@ -216,7 +217,7 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
static struct target_type linear_target = {
.name = "linear",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
#ifdef CONFIG_BLK_DEV_ZONED
.features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM,
.report_zones = linear_report_zones,
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 8547d7594338..0081bfe03e64 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -172,6 +172,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->num_secure_erase_bios = stripes;
ti->num_write_same_bios = stripes;
ti->num_write_zeroes_bios = stripes;
+ ti->no_clone = true;
sc->chunk_size = chunk_size;
if (chunk_size & (chunk_size - 1))
@@ -486,7 +487,7 @@ static void stripe_io_hints(struct dm_target *ti,
static struct target_type stripe_target = {
.name = "striped",
- .version = {1, 6, 0},
+ .version = {1, 7, 0},
.features = DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
.ctr = stripe_ctr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4b1be754cc41..6a3e23faeb7d 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -47,6 +47,7 @@ struct dm_table {
bool integrity_supported:1;
bool singleton:1;
+ bool no_clone:1;
unsigned integrity_added:1;
/*
@@ -191,6 +192,8 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
if (!t)
return -ENOMEM;
+ t->no_clone = true;
+
INIT_LIST_HEAD(&t->devices);
INIT_LIST_HEAD(&t->target_callbacks);
@@ -789,6 +792,9 @@ int dm_table_add_target(struct dm_table *t, const char *type,
if (r)
goto bad;
+ if (!tgt->no_clone)
+ t->no_clone = false;
+
t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
if (!tgt->num_discard_bios && tgt->discards_supported)
@@ -1376,6 +1382,11 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
return 0;
}
+bool dm_table_supports_noclone(struct dm_table *table)
+{
+ return table->no_clone;
+}
+
/*
* Check whether a table has no data devices attached using each
* target's iterate_devices method.
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b65ca8dcfbdc..436a5ee89698 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -26,6 +26,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv)
* Silently drop discards, avoiding -EOPNOTSUPP.
*/
ti->num_discard_bios = 1;
+ ti->no_clone = true;
return 0;
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1b87d20041e7..57919f211acc 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -102,6 +102,16 @@ struct dm_io {
struct dm_target_io tio;
};
+/*
+ * One of these is allocated per noclone bio.
+ */
+struct dm_noclone {
+ struct mapped_device *md;
+ bio_end_io_t *orig_bi_end_io;
+ void *orig_bi_private;
+ unsigned long start_time;
+};
+
void *dm_per_bio_data(struct bio *bio, size_t data_size)
{
struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
@@ -1009,6 +1019,20 @@ static void clone_endio(struct bio *bio)
dec_pending(io, error);
}
+static void noclone_endio(struct bio *bio)
+{
+ struct dm_noclone *noclone = bio->bi_private;
+ struct mapped_device *md = noclone->md;
+
+ end_io_acct(md, bio, noclone->start_time);
+
+ bio->bi_end_io = noclone->orig_bi_end_io;
+ bio->bi_private = noclone->orig_bi_private;
+ kfree(noclone);
+
+ bio_endio(bio);
+}
+
/*
* Return maximum size of I/O possible at the supplied sector up to the current
* target boundary.
@@ -1774,8 +1798,48 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
return ret;
}
+ if (dm_table_supports_noclone(map) &&
+ (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
+ likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
+ !bio_flagged(bio, BIO_CHAIN) &&
+ likely(!bio_integrity(bio)) &&
+ likely(!dm_stats_used(&md->stats))) {
+ int r;
+ struct dm_noclone *noclone;
+ struct dm_target *ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
+ if (unlikely(!dm_target_is_valid(ti)))
+ goto no_fast_path;
+ if (unlikely(bio_sectors(bio) > max_io_len(bio->bi_iter.bi_sector, ti)))
+ goto no_fast_path;
+ noclone = kmalloc_node(sizeof(*noclone), GFP_NOWAIT, md->numa_node_id);
+ if (unlikely(!noclone))
+ goto no_fast_path;
+ noclone->md = md;
+ noclone->start_time = jiffies;
+ noclone->orig_bi_end_io = bio->bi_end_io;
+ noclone->orig_bi_private = bio->bi_private;
+ bio->bi_end_io = noclone_endio;
+ bio->bi_private = noclone;
+ start_io_acct(md, bio);
+ r = ti->type->map(ti, bio);
+ ret = BLK_QC_T_NONE;
+ if (likely(r == DM_MAPIO_REMAPPED)) {
+ ret = generic_make_request(bio);
+ } else if (likely(r == DM_MAPIO_SUBMITTED)) {
+ } else if (r == DM_MAPIO_KILL) {
+ bio->bi_status = BLK_STS_IOERR;
+ noclone_endio(bio);
+ } else {
+ DMWARN("unimplemented target map return value: %d", r);
+ BUG();
+ }
+ goto put_table_ret;
+ }
+
+no_fast_path:
ret = dm_process_bio(md, map, bio);
+put_table_ret:
dm_put_live_table(md, srcu_idx);
return ret;
}
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 2d539b82ec08..c3c78123dfb3 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -53,6 +53,7 @@ void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
+bool dm_table_supports_noclone(struct dm_table *t);
bool dm_table_has_no_data_devices(struct dm_table *table);
int dm_calculate_queue_limits(struct dm_table *table,
struct queue_limits *limits);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0f5b3d7c6cb3..d38306476c0b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -315,6 +315,15 @@ struct dm_target {
* whether or not its underlying devices have support.
*/
bool discards_supported:1;
+
+ /*
+ * Set if this target can process bios without cloning them.
+ * The target's per bio processing must be fast enough that DM core's
+ * cloning is not dwarfed by per-bio work in the target.
+ * This also implies the target is sufficiently simple so as not to
+ * require complex block capabilities (e.g. integrity, cloning, etc).
+ */
+ bool no_clone:1;
};
/* Each target can link one of these into the table */
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 5/9] dm: improve noclone bio support
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (3 preceding siblings ...)
2019-02-20 21:44 ` [PATCH v2 4/9] dm: implement noclone optimization for bio-based Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
2019-02-22 10:59 ` Mikulas Patocka
2019-02-20 21:44 ` [PATCH v2 6/9] dm: add per-bio-data support to noclone bio Mike Snitzer
` (3 subsequent siblings)
8 siblings, 1 reply; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
Leverage fact that dm_queue_split() enables use of noclone support even
for targets that require additional splitting (via ti->max_io_len).
To achieve this move noclone processing from dm_make_request() to
dm_process_bio() and check if bio->bi_end_io is already set to
noclone_endio. This also fixes dm_wq_work() to be able to support
noclone bios (because it calls dm_process_bio()).
While at it, clean up ->map() return processing to be comparable to
__map_bio() and add some code comments that explain why certain
conditionals exist to prevent the use of noclone.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 103 ++++++++++++++++++++++++++++++++++----------------------
1 file changed, 62 insertions(+), 41 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 57919f211acc..d84735be6927 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1764,14 +1764,75 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
* If in ->make_request_fn we need to use blk_queue_split(), otherwise
* queue_limits for abnormal requests (e.g. discard, writesame, etc)
* won't be imposed.
+ *
+ * For normal READ and WRITE requests we benefit from upfront splitting
+ * via dm_queue_split() because we can then leverage noclone support below.
*/
- if (current->bio_list) {
+ if (current->bio_list && (bio->bi_end_io != noclone_endio)) {
+ /*
+ * It is fine to use {blk,dm}_queue_split() before opting to use noclone
+ * because any ->bi_private and ->bi_end_io will get saved off below.
+ * Once noclone_endio() is called the bio_chain's endio will kick in.
+ * - __split_and_process_bio() can split further, but any targets that
+ * require late _DM_ initiated splitting (e.g. dm_accept_partial_bio()
+ * callers) shouldn't set ti->no_clone.
+ */
if (is_abnormal_io(bio))
blk_queue_split(md->queue, &bio);
else
dm_queue_split(md, ti, &bio);
}
+ if (dm_table_supports_noclone(map) &&
+ (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
+ likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
+ likely(!bio_integrity(bio)) && /* integrity requires specialized processing */
+ likely(!dm_stats_used(&md->stats))) { /* noclone doesn't support dm-stats */
+ int r;
+ struct dm_noclone *noclone;
+
+ /* Already been here if bio->bi_end_io is noclone_endio, reentered via dm_wq_work() */
+ if (bio->bi_end_io != noclone_endio) {
+ noclone = kmalloc_node(sizeof(*noclone), GFP_NOWAIT, md->numa_node_id);
+ if (unlikely(!noclone))
+ goto no_fast_path;
+
+ noclone->md = md;
+ noclone->start_time = jiffies;
+ noclone->orig_bi_end_io = bio->bi_end_io;
+ noclone->orig_bi_private = bio->bi_private;
+ bio->bi_end_io = noclone_endio;
+ bio->bi_private = noclone;
+ } else
+ noclone = bio->bi_private;
+
+ start_io_acct(md, bio);
+ r = ti->type->map(ti, bio);
+ switch (r) {
+ case DM_MAPIO_SUBMITTED:
+ break;
+ case DM_MAPIO_REMAPPED:
+ /* the bio has been remapped so dispatch it */
+ if (md->type == DM_TYPE_NVME_BIO_BASED)
+ ret = direct_make_request(bio);
+ else
+ ret = generic_make_request(bio);
+ break;
+ case DM_MAPIO_KILL:
+ bio_io_error(bio);
+ break;
+ case DM_MAPIO_REQUEUE:
+ bio->bi_status = BLK_STS_DM_REQUEUE;
+ noclone_endio(bio);
+ break;
+ default:
+ DMWARN("unimplemented target map return value: %d", r);
+ BUG();
+ }
+
+ return ret;
+ }
+no_fast_path:
if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
return __process_bio(md, map, bio, ti);
else
@@ -1798,48 +1859,8 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
return ret;
}
- if (dm_table_supports_noclone(map) &&
- (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
- likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
- !bio_flagged(bio, BIO_CHAIN) &&
- likely(!bio_integrity(bio)) &&
- likely(!dm_stats_used(&md->stats))) {
- int r;
- struct dm_noclone *noclone;
- struct dm_target *ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
- if (unlikely(!dm_target_is_valid(ti)))
- goto no_fast_path;
- if (unlikely(bio_sectors(bio) > max_io_len(bio->bi_iter.bi_sector, ti)))
- goto no_fast_path;
- noclone = kmalloc_node(sizeof(*noclone), GFP_NOWAIT, md->numa_node_id);
- if (unlikely(!noclone))
- goto no_fast_path;
- noclone->md = md;
- noclone->start_time = jiffies;
- noclone->orig_bi_end_io = bio->bi_end_io;
- noclone->orig_bi_private = bio->bi_private;
- bio->bi_end_io = noclone_endio;
- bio->bi_private = noclone;
- start_io_acct(md, bio);
- r = ti->type->map(ti, bio);
- ret = BLK_QC_T_NONE;
- if (likely(r == DM_MAPIO_REMAPPED)) {
- ret = generic_make_request(bio);
- } else if (likely(r == DM_MAPIO_SUBMITTED)) {
- } else if (r == DM_MAPIO_KILL) {
- bio->bi_status = BLK_STS_IOERR;
- noclone_endio(bio);
- } else {
- DMWARN("unimplemented target map return value: %d", r);
- BUG();
- }
- goto put_table_ret;
- }
-
-no_fast_path:
ret = dm_process_bio(md, map, bio);
-put_table_ret:
dm_put_live_table(md, srcu_idx);
return ret;
}
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 6/9] dm: add per-bio-data support to noclone bio
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (4 preceding siblings ...)
2019-02-20 21:44 ` [PATCH v2 5/9] dm: improve noclone bio support Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 7/9] dm: improve noclone_endio() to support multipath target Mike Snitzer
` (2 subsequent siblings)
8 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
From: Mikulas Patocka <mpatocka@redhat.com>
Allocate additional space after the 'struct dm_clone' for the target
requested per_io_data_size and place DM_NOCLONE_MAGIC at the end.
Update both dm_per_bio_data() and dm_bio_from_per_bio_data() to branch
accordingly based on whether bio/data is from clone or noclone.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 32 ++++++++++++++++++++++++++++----
1 file changed, 28 insertions(+), 4 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d84735be6927..7e022f840419 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -26,6 +26,8 @@
#include <linux/pr.h>
#include <linux/refcount.h>
+#include <asm/unaligned.h>
+
#define DM_MSG_PREFIX "core"
/*
@@ -105,16 +107,29 @@ struct dm_io {
/*
* One of these is allocated per noclone bio.
*/
+#define DM_NOCLONE_MAGIC 9693664
struct dm_noclone {
struct mapped_device *md;
+ struct bio *bio;
bio_end_io_t *orig_bi_end_io;
void *orig_bi_private;
unsigned long start_time;
+ /* ... per-bio-data ... */
+ /* DM_NOCLONE_MAGIC */
};
+static void noclone_endio(struct bio *bio);
+
void *dm_per_bio_data(struct bio *bio, size_t data_size)
{
- struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+ struct dm_target_io *tio;
+
+ if (bio->bi_end_io == noclone_endio) {
+ struct dm_noclone *noclone = bio->bi_private;
+ return noclone + 1;
+ }
+
+ tio = container_of(bio, struct dm_target_io, clone);
if (!tio->inside_dm_io)
return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
return (char *)bio - offsetof(struct dm_target_io, clone) - offsetof(struct dm_io, tio) - data_size;
@@ -124,9 +139,14 @@ EXPORT_SYMBOL_GPL(dm_per_bio_data);
struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
{
struct dm_io *io = (struct dm_io *)((char *)data + data_size);
- if (io->magic == DM_IO_MAGIC)
+ unsigned magic = get_unaligned(&io->magic);
+ if (magic == DM_NOCLONE_MAGIC) {
+ struct dm_noclone *noclone = (struct dm_noclone *)data - 1;
+ return noclone->bio;
+ }
+ if (magic == DM_IO_MAGIC)
return (struct bio *)((char *)io + offsetof(struct dm_io, tio) + offsetof(struct dm_target_io, clone));
- BUG_ON(io->magic != DM_TIO_MAGIC);
+ BUG_ON(magic != DM_TIO_MAGIC);
return (struct bio *)((char *)io + offsetof(struct dm_target_io, clone));
}
EXPORT_SYMBOL_GPL(dm_bio_from_per_bio_data);
@@ -1793,14 +1813,18 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
/* Already been here if bio->bi_end_io is noclone_endio, reentered via dm_wq_work() */
if (bio->bi_end_io != noclone_endio) {
- noclone = kmalloc_node(sizeof(*noclone), GFP_NOWAIT, md->numa_node_id);
+ noclone = kmalloc_node(sizeof(*noclone) + ti->per_io_data_size + sizeof(unsigned),
+ GFP_NOWAIT, md->numa_node_id);
if (unlikely(!noclone))
goto no_fast_path;
noclone->md = md;
noclone->start_time = jiffies;
+ noclone->bio = bio;
noclone->orig_bi_end_io = bio->bi_end_io;
noclone->orig_bi_private = bio->bi_private;
+ put_unaligned(DM_NOCLONE_MAGIC,
+ (unsigned *)((char *)noclone + sizeof(*noclone) + ti->per_io_data_size));
bio->bi_end_io = noclone_endio;
bio->bi_private = noclone;
} else
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 7/9] dm: improve noclone_endio() to support multipath target
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (5 preceding siblings ...)
2019-02-20 21:44 ` [PATCH v2 6/9] dm: add per-bio-data support to noclone bio Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 8/9] dm mpath: enable noclone support for bio-based Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 9/9] dm: remove unused _rq_tio_cache and _rq_cache Mike Snitzer
8 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
noclone_endio() must call the target's ->endio method if it exists.
Also must handle the various DM_ENDIO_* returns that are possible.
Factor out dm_bio_pushback_needed() for both dec_pending() and
noclone_endio() to use when handling BLK_STS_DM_REQUEUE.
Lastly, there is no need to conditionally set md->immutable_target in
__bind(). If the device only uses a single immutable target then it
should always be reflected in md->immutable_target. This is important
because noclone_endio() benefits from this to get access to the
multipath dm_target without needing to add another member to the
'dm_noclone' structure.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm.c | 80 ++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 57 insertions(+), 23 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7e022f840419..0943913bceca 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -919,6 +919,28 @@ static int __noflush_suspending(struct mapped_device *md)
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
+static bool dm_bio_pushback_needed(struct mapped_device *md,
+ struct bio *bio, blk_status_t *error)
+{
+ unsigned long flags;
+
+ /*
+ * Target requested pushing back the I/O.
+ */
+ if (__noflush_suspending(md)) {
+ spin_lock_irqsave(&md->deferred_lock, flags);
+ // FIXME: using bio_list_add_head() creates LIFO
+ bio_list_add_head(&md->deferred, bio);
+ spin_unlock_irqrestore(&md->deferred_lock, flags);
+ return true;
+ } else {
+ /* noflush suspend was interrupted. */
+ *error = BLK_STS_IOERR;
+ }
+
+ return false;
+}
+
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
@@ -939,22 +961,12 @@ static void dec_pending(struct dm_io *io, blk_status_t error)
}
if (atomic_dec_and_test(&io->io_count)) {
- if (io->status == BLK_STS_DM_REQUEUE) {
- /*
- * Target requested pushing back the I/O.
- */
- spin_lock_irqsave(&md->deferred_lock, flags);
- if (__noflush_suspending(md))
- /* NOTE early return due to BLK_STS_DM_REQUEUE below */
- bio_list_add_head(&md->deferred, io->orig_bio);
- else
- /* noflush suspend was interrupted. */
- io->status = BLK_STS_IOERR;
- spin_unlock_irqrestore(&md->deferred_lock, flags);
- }
+ bio = io->orig_bio;
+
+ if (unlikely(io->status == BLK_STS_DM_REQUEUE))
+ (void) dm_bio_pushback_needed(md, bio, &io->status);
io_error = io->status;
- bio = io->orig_bio;
if (unlikely(dm_stats_used(&md->stats))) {
unsigned long duration = jiffies - io->start_time;
@@ -1041,8 +1053,33 @@ static void clone_endio(struct bio *bio)
static void noclone_endio(struct bio *bio)
{
+ blk_status_t error = bio->bi_status;
struct dm_noclone *noclone = bio->bi_private;
struct mapped_device *md = noclone->md;
+ struct dm_target *ti = NULL;
+ dm_endio_fn endio = NULL;
+
+ if (md->immutable_target) {
+ ti = md->immutable_target;
+ endio = ti->type->end_io;
+ }
+
+ if (endio) {
+ int r = endio(ti, bio, &error);
+ switch (r) {
+ case DM_ENDIO_REQUEUE:
+ error = BLK_STS_DM_REQUEUE;
+ /*FALLTHRU*/
+ case DM_ENDIO_DONE:
+ break;
+ case DM_ENDIO_INCOMPLETE:
+ /* The target will handle the io */
+ return;
+ default:
+ DMWARN("unimplemented target endio return value: %d", r);
+ BUG();
+ }
+ }
end_io_acct(md, bio, noclone->start_time);
@@ -1050,6 +1087,11 @@ static void noclone_endio(struct bio *bio)
bio->bi_private = noclone->orig_bi_private;
kfree(noclone);
+ if (unlikely(error == BLK_STS_DM_REQUEUE)) {
+ if (dm_bio_pushback_needed(md, bio, &bio->bi_status))
+ return;
+ }
+
bio_endio(bio);
}
@@ -2249,15 +2291,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
if (request_based)
dm_stop_queue(q);
- if (request_based || md->type == DM_TYPE_NVME_BIO_BASED) {
- /*
- * Leverage the fact that request-based DM targets and
- * NVMe bio based targets are immutable singletons
- * - used to optimize both dm_request_fn and dm_mq_queue_rq;
- * and __process_bio.
- */
- md->immutable_target = dm_table_get_immutable_target(t);
- }
+ md->immutable_target = dm_table_get_immutable_target(t);
ret = __bind_mempools(md, t);
if (ret) {
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 8/9] dm mpath: enable noclone support for bio-based
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (6 preceding siblings ...)
2019-02-20 21:44 ` [PATCH v2 7/9] dm: improve noclone_endio() to support multipath target Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 9/9] dm: remove unused _rq_tio_cache and _rq_cache Mike Snitzer
8 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
Avoiding bio cloning (aka noclone) offers a slight advantage over bio
cloning for this read workload:
fio --ioengine=psync --iodepth=1 --rw=read --bs=512 --direct=1 \
--numjobs=12 --time_based --runtime=10 --group_reporting --name=/dev/mapper/mpath
clone 3331MB/s 3337MB/s 3355MB/s
noclone 3418MB/s 3407MB/s 3420MB/s
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-mpath.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 2ee5e357a0a7..579330cd7a6a 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1178,9 +1178,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
- if (m->queue_mode == DM_TYPE_BIO_BASED)
+ if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ ti->no_clone = true;
ti->per_io_data_size = multipath_per_bio_data_size();
- else
+ } else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
return 0;
@@ -1584,11 +1585,11 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
return r;
}
-static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
+static int multipath_end_io_bio(struct dm_target *ti, struct bio *bio,
blk_status_t *error)
{
struct multipath *m = ti->private;
- struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
+ struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
struct pgpath *pgpath = mpio->pgpath;
unsigned long flags;
int r = DM_ENDIO_DONE;
@@ -1611,7 +1612,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
}
spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_bios, clone);
+ bio_list_add(&m->queued_bios, bio);
spin_unlock_irqrestore(&m->lock, flags);
if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
queue_work(kmultipathd, &m->process_queued_bios);
@@ -2011,7 +2012,7 @@ static int multipath_busy(struct dm_target *ti)
*---------------------------------------------------------------*/
static struct target_type multipath_target = {
.name = "multipath",
- .version = {1, 13, 0},
+ .version = {1, 14, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH v2 9/9] dm: remove unused _rq_tio_cache and _rq_cache
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
` (7 preceding siblings ...)
2019-02-20 21:44 ` [PATCH v2 8/9] dm mpath: enable noclone support for bio-based Mike Snitzer
@ 2019-02-20 21:44 ` Mike Snitzer
8 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-20 21:44 UTC (permalink / raw)
To: dm-devel
Also move dm_rq_target_io structure definition from dm-rq.h to dm-rq.c
Fixes: 6a23e05c2fe3c6 ("dm: remove legacy request-based IO path")
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
drivers/md/dm-rq.c | 16 ++++++++++++++++
drivers/md/dm-rq.h | 16 ----------------
drivers/md/dm.c | 22 ++--------------------
3 files changed, 18 insertions(+), 36 deletions(-)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index a20531e5f3b4..9428cd951e3b 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -12,6 +12,22 @@
#define DM_MSG_PREFIX "core-rq"
+/*
+ * One of these is allocated per request.
+ */
+struct dm_rq_target_io {
+ struct mapped_device *md;
+ struct dm_target *ti;
+ struct request *orig, *clone;
+ struct kthread_work work;
+ blk_status_t error;
+ union map_info info;
+ struct dm_stats_aux stats_aux;
+ unsigned long duration_jiffies;
+ unsigned n_sectors;
+ unsigned completed;
+};
+
#define DM_MQ_NR_HW_QUEUES 1
#define DM_MQ_QUEUE_DEPTH 2048
static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index b39245545229..1eea0da641db 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -16,22 +16,6 @@
struct mapped_device;
-/*
- * One of these is allocated per request.
- */
-struct dm_rq_target_io {
- struct mapped_device *md;
- struct dm_target *ti;
- struct request *orig, *clone;
- struct kthread_work work;
- blk_status_t error;
- union map_info info;
- struct dm_stats_aux stats_aux;
- unsigned long duration_jiffies;
- unsigned n_sectors;
- unsigned completed;
-};
-
/*
* For request-based dm - the bio clones we allocate are embedded in these
* structs.
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0943913bceca..a58f35d31fd0 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -188,9 +188,6 @@ struct table_device {
struct dm_dev dm_dev;
};
-static struct kmem_cache *_rq_tio_cache;
-static struct kmem_cache *_rq_cache;
-
/*
* Bio-based DM's mempools' reserved IOs set by the user.
*/
@@ -252,20 +249,11 @@ static unsigned dm_get_numa_node(void)
static int __init local_init(void)
{
- int r = -ENOMEM;
-
- _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
- if (!_rq_tio_cache)
- return r;
-
- _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request),
- __alignof__(struct request), 0, NULL);
- if (!_rq_cache)
- goto out_free_rq_tio_cache;
+ int r;
r = dm_uevent_init();
if (r)
- goto out_free_rq_cache;
+ return r;
deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
if (!deferred_remove_workqueue) {
@@ -287,10 +275,6 @@ static int __init local_init(void)
destroy_workqueue(deferred_remove_workqueue);
out_uevent_exit:
dm_uevent_exit();
-out_free_rq_cache:
- kmem_cache_destroy(_rq_cache);
-out_free_rq_tio_cache:
- kmem_cache_destroy(_rq_tio_cache);
return r;
}
@@ -300,8 +284,6 @@ static void local_exit(void)
flush_scheduled_work();
destroy_workqueue(deferred_remove_workqueue);
- kmem_cache_destroy(_rq_cache);
- kmem_cache_destroy(_rq_tio_cache);
unregister_blkdev(_major, _name);
dm_uevent_exit();
--
2.15.0
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH v2 2/9] dm: eliminate 'split_discard_bios' flag from DM target interface
2019-02-20 21:44 ` [PATCH v2 2/9] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
@ 2019-02-21 4:36 ` Mike Snitzer
0 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-21 4:36 UTC (permalink / raw)
To: dm-devel; +Cc: Ming Lei
On Wed, Feb 20 2019 at 4:44pm -0500,
Mike Snitzer <snitzer@redhat.com> wrote:
> There is no need to have DM core split discards on behalf of a DM target
> now that blk_queue_split() handles splitting discards based on the
> queue_limits. A DM target just needs to set max_discard_sectors,
> discard_granularity, etc, in queue_limits.
>
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
> drivers/md/dm-cache-target.c | 1 -
> drivers/md/dm-raid.c | 14 +++++++++-----
> drivers/md/dm-thin.c | 1 -
> drivers/md/dm-zoned-target.c | 1 -
> drivers/md/dm.c | 28 ++++++----------------------
> include/linux/device-mapper.h | 6 ------
> include/uapi/linux/dm-ioctl.h | 4 ++--
> 7 files changed, 17 insertions(+), 38 deletions(-)
>
...
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 7a774fcd0194..b988e178a523 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1478,17 +1478,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
> return ti->num_write_zeroes_bios;
> }
>
> -typedef bool (*is_split_required_fn)(struct dm_target *ti);
> -
> -static bool is_split_required_for_discard(struct dm_target *ti)
> -{
> - return ti->split_discard_bios;
> -}
> -
> static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
> - unsigned num_bios, bool is_split_required)
> + unsigned num_bios)
> {
> - unsigned len;
> + unsigned len = ci->sector_count;
>
> /*
> * Even though the device advertised support for this type of
> @@ -1499,38 +1492,29 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
> if (!num_bios)
> return -EOPNOTSUPP;
>
> - if (!is_split_required)
> - len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
> - else
> - len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
> -
> __send_duplicate_bios(ci, ti, num_bios, &len);
>
> - ci->sector += len;
> - ci->sector_count -= len;
> -
> return 0;
> }
The above was bogus, ci->sector and ci->sector_count must be updated.
Reintroducing adjustments based on 'len' fixed a discard crash Ming
reported.
Now fixed in linux-next.
Mike
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2 5/9] dm: improve noclone bio support
2019-02-20 21:44 ` [PATCH v2 5/9] dm: improve noclone bio support Mike Snitzer
@ 2019-02-22 10:59 ` Mikulas Patocka
2019-02-22 15:22 ` Mike Snitzer
0 siblings, 1 reply; 14+ messages in thread
From: Mikulas Patocka @ 2019-02-22 10:59 UTC (permalink / raw)
To: Mike Snitzer; +Cc: dm-devel
Hi
I've fonud out that this patch causes timeout in the lvm test
shell/activate-missing-segment.sh and some others.
Mikulas
On Wed, 20 Feb 2019, Mike Snitzer wrote:
> Leverage fact that dm_queue_split() enables use of noclone support even
> for targets that require additional splitting (via ti->max_io_len).
>
> To achieve this move noclone processing from dm_make_request() to
> dm_process_bio() and check if bio->bi_end_io is already set to
> noclone_endio. This also fixes dm_wq_work() to be able to support
> noclone bios (because it calls dm_process_bio()).
>
> While at it, clean up ->map() return processing to be comparable to
> __map_bio() and add some code comments that explain why certain
> conditionals exist to prevent the use of noclone.
>
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
> drivers/md/dm.c | 103 ++++++++++++++++++++++++++++++++++----------------------
> 1 file changed, 62 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 57919f211acc..d84735be6927 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1764,14 +1764,75 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
> * If in ->make_request_fn we need to use blk_queue_split(), otherwise
> * queue_limits for abnormal requests (e.g. discard, writesame, etc)
> * won't be imposed.
> + *
> + * For normal READ and WRITE requests we benefit from upfront splitting
> + * via dm_queue_split() because we can then leverage noclone support below.
> */
> - if (current->bio_list) {
> + if (current->bio_list && (bio->bi_end_io != noclone_endio)) {
> + /*
> + * It is fine to use {blk,dm}_queue_split() before opting to use noclone
> + * because any ->bi_private and ->bi_end_io will get saved off below.
> + * Once noclone_endio() is called the bio_chain's endio will kick in.
> + * - __split_and_process_bio() can split further, but any targets that
> + * require late _DM_ initiated splitting (e.g. dm_accept_partial_bio()
> + * callers) shouldn't set ti->no_clone.
> + */
> if (is_abnormal_io(bio))
> blk_queue_split(md->queue, &bio);
> else
> dm_queue_split(md, ti, &bio);
> }
>
> + if (dm_table_supports_noclone(map) &&
> + (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
> + likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
> + likely(!bio_integrity(bio)) && /* integrity requires specialized processing */
> + likely(!dm_stats_used(&md->stats))) { /* noclone doesn't support dm-stats */
> + int r;
> + struct dm_noclone *noclone;
> +
> + /* Already been here if bio->bi_end_io is noclone_endio, reentered via dm_wq_work() */
> + if (bio->bi_end_io != noclone_endio) {
> + noclone = kmalloc_node(sizeof(*noclone), GFP_NOWAIT, md->numa_node_id);
> + if (unlikely(!noclone))
> + goto no_fast_path;
> +
> + noclone->md = md;
> + noclone->start_time = jiffies;
> + noclone->orig_bi_end_io = bio->bi_end_io;
> + noclone->orig_bi_private = bio->bi_private;
> + bio->bi_end_io = noclone_endio;
> + bio->bi_private = noclone;
> + } else
> + noclone = bio->bi_private;
> +
> + start_io_acct(md, bio);
> + r = ti->type->map(ti, bio);
> + switch (r) {
> + case DM_MAPIO_SUBMITTED:
> + break;
> + case DM_MAPIO_REMAPPED:
> + /* the bio has been remapped so dispatch it */
> + if (md->type == DM_TYPE_NVME_BIO_BASED)
> + ret = direct_make_request(bio);
> + else
> + ret = generic_make_request(bio);
> + break;
> + case DM_MAPIO_KILL:
> + bio_io_error(bio);
> + break;
> + case DM_MAPIO_REQUEUE:
> + bio->bi_status = BLK_STS_DM_REQUEUE;
> + noclone_endio(bio);
> + break;
> + default:
> + DMWARN("unimplemented target map return value: %d", r);
> + BUG();
> + }
> +
> + return ret;
> + }
> +no_fast_path:
> if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
> return __process_bio(md, map, bio, ti);
> else
> @@ -1798,48 +1859,8 @@ static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio)
> return ret;
> }
>
> - if (dm_table_supports_noclone(map) &&
> - (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
> - likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
> - !bio_flagged(bio, BIO_CHAIN) &&
> - likely(!bio_integrity(bio)) &&
> - likely(!dm_stats_used(&md->stats))) {
> - int r;
> - struct dm_noclone *noclone;
> - struct dm_target *ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
> - if (unlikely(!dm_target_is_valid(ti)))
> - goto no_fast_path;
> - if (unlikely(bio_sectors(bio) > max_io_len(bio->bi_iter.bi_sector, ti)))
> - goto no_fast_path;
> - noclone = kmalloc_node(sizeof(*noclone), GFP_NOWAIT, md->numa_node_id);
> - if (unlikely(!noclone))
> - goto no_fast_path;
> - noclone->md = md;
> - noclone->start_time = jiffies;
> - noclone->orig_bi_end_io = bio->bi_end_io;
> - noclone->orig_bi_private = bio->bi_private;
> - bio->bi_end_io = noclone_endio;
> - bio->bi_private = noclone;
> - start_io_acct(md, bio);
> - r = ti->type->map(ti, bio);
> - ret = BLK_QC_T_NONE;
> - if (likely(r == DM_MAPIO_REMAPPED)) {
> - ret = generic_make_request(bio);
> - } else if (likely(r == DM_MAPIO_SUBMITTED)) {
> - } else if (r == DM_MAPIO_KILL) {
> - bio->bi_status = BLK_STS_IOERR;
> - noclone_endio(bio);
> - } else {
> - DMWARN("unimplemented target map return value: %d", r);
> - BUG();
> - }
> - goto put_table_ret;
> - }
> -
> -no_fast_path:
> ret = dm_process_bio(md, map, bio);
>
> -put_table_ret:
> dm_put_live_table(md, srcu_idx);
> return ret;
> }
> --
> 2.15.0
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2 5/9] dm: improve noclone bio support
2019-02-22 10:59 ` Mikulas Patocka
@ 2019-02-22 15:22 ` Mike Snitzer
2019-02-22 16:56 ` Mike Snitzer
0 siblings, 1 reply; 14+ messages in thread
From: Mike Snitzer @ 2019-02-22 15:22 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: dm-devel
On Fri, Feb 22 2019 at 5:59am -0500,
Mikulas Patocka <mpatocka@redhat.com> wrote:
> Hi
>
> I've fonud out that this patch causes timeout in the lvm test
> shell/activate-missing-segment.sh and some others.
OK, I can reproduce with:
make check_local T=shell/activate-missing-segment.sh
Interestingly, it isn't hung in kernel, test can be interrupted.
And it is hanging at shell/activate-missing-segment.sh's : aux disable_dev "$dev1"
Other tests with "aux disable_dev" hang too
(e.g. shell/lvcreate-repair.sh, shell/vgreduce-usage.sh)
continued below:
> On Wed, 20 Feb 2019, Mike Snitzer wrote:
>
> > Leverage fact that dm_queue_split() enables use of noclone support even
> > for targets that require additional splitting (via ti->max_io_len).
> >
> > To achieve this move noclone processing from dm_make_request() to
> > dm_process_bio() and check if bio->bi_end_io is already set to
> > noclone_endio. This also fixes dm_wq_work() to be able to support
> > noclone bios (because it calls dm_process_bio()).
> >
> > While at it, clean up ->map() return processing to be comparable to
> > __map_bio() and add some code comments that explain why certain
> > conditionals exist to prevent the use of noclone.
> >
> > Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> > ---
> > drivers/md/dm.c | 103 ++++++++++++++++++++++++++++++++++----------------------
> > 1 file changed, 62 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> > index 57919f211acc..d84735be6927 100644
> > --- a/drivers/md/dm.c
> > +++ b/drivers/md/dm.c
> > @@ -1764,14 +1764,75 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
> > * If in ->make_request_fn we need to use blk_queue_split(), otherwise
> > * queue_limits for abnormal requests (e.g. discard, writesame, etc)
> > * won't be imposed.
> > + *
> > + * For normal READ and WRITE requests we benefit from upfront splitting
> > + * via dm_queue_split() because we can then leverage noclone support below.
> > */
> > - if (current->bio_list) {
> > + if (current->bio_list && (bio->bi_end_io != noclone_endio)) {
> > + /*
> > + * It is fine to use {blk,dm}_queue_split() before opting to use noclone
> > + * because any ->bi_private and ->bi_end_io will get saved off below.
> > + * Once noclone_endio() is called the bio_chain's endio will kick in.
> > + * - __split_and_process_bio() can split further, but any targets that
> > + * require late _DM_ initiated splitting (e.g. dm_accept_partial_bio()
> > + * callers) shouldn't set ti->no_clone.
> > + */
> > if (is_abnormal_io(bio))
> > blk_queue_split(md->queue, &bio);
> > else
> > dm_queue_split(md, ti, &bio);
> > }
> >
> > + if (dm_table_supports_noclone(map) &&
> > + (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
> > + likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
> > + likely(!bio_integrity(bio)) && /* integrity requires specialized processing */
> > + likely(!dm_stats_used(&md->stats))) { /* noclone doesn't support dm-stats */
> > + int r;
> > + struct dm_noclone *noclone;
> > +
> > + /* Already been here if bio->bi_end_io is noclone_endio, reentered via dm_wq_work() */
> > + if (bio->bi_end_io != noclone_endio) {
This conditional is causing it ^
Very strange...
> > + noclone = kmalloc_node(sizeof(*noclone), GFP_NOWAIT, md->numa_node_id);
> > + if (unlikely(!noclone))
> > + goto no_fast_path;
> > +
> > + noclone->md = md;
> > + noclone->start_time = jiffies;
> > + noclone->orig_bi_end_io = bio->bi_end_io;
> > + noclone->orig_bi_private = bio->bi_private;
> > + bio->bi_end_io = noclone_endio;
> > + bio->bi_private = noclone;
> > + } else
> > + noclone = bio->bi_private;
> > +
> > + start_io_acct(md, bio);
> > + r = ti->type->map(ti, bio);
> > + switch (r) {
> > + case DM_MAPIO_SUBMITTED:
> > + break;
> > + case DM_MAPIO_REMAPPED:
> > + /* the bio has been remapped so dispatch it */
> > + if (md->type == DM_TYPE_NVME_BIO_BASED)
> > + ret = direct_make_request(bio);
> > + else
> > + ret = generic_make_request(bio);
> > + break;
> > + case DM_MAPIO_KILL:
> > + bio_io_error(bio);
> > + break;
> > + case DM_MAPIO_REQUEUE:
> > + bio->bi_status = BLK_STS_DM_REQUEUE;
> > + noclone_endio(bio);
> > + break;
> > + default:
> > + DMWARN("unimplemented target map return value: %d", r);
> > + BUG();
> > + }
> > +
> > + return ret;
> > + }
> > +no_fast_path:
> > if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
> > return __process_bio(md, map, bio, ti);
> > else
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2 5/9] dm: improve noclone bio support
2019-02-22 15:22 ` Mike Snitzer
@ 2019-02-22 16:56 ` Mike Snitzer
0 siblings, 0 replies; 14+ messages in thread
From: Mike Snitzer @ 2019-02-22 16:56 UTC (permalink / raw)
To: Mikulas Patocka; +Cc: dm-devel
On Fri, Feb 22 2019 at 10:22am -0500,
Mike Snitzer <snitzer@redhat.com> wrote:
> On Fri, Feb 22 2019 at 5:59am -0500,
> Mikulas Patocka <mpatocka@redhat.com> wrote:
>
> > Hi
> >
> > I've fonud out that this patch causes timeout in the lvm test
> > shell/activate-missing-segment.sh and some others.
>
> OK, I can reproduce with:
> make check_local T=shell/activate-missing-segment.sh
>
> Interestingly, it isn't hung in kernel, test can be interrupted.
> And it is hanging at shell/activate-missing-segment.sh's : aux disable_dev "$dev1"
>
> Other tests with "aux disable_dev" hang too
> (e.g. shell/lvcreate-repair.sh, shell/vgreduce-usage.sh)
>
> continued below:
>
> > On Wed, 20 Feb 2019, Mike Snitzer wrote:
> >
> > > Leverage fact that dm_queue_split() enables use of noclone support even
> > > for targets that require additional splitting (via ti->max_io_len).
> > >
> > > To achieve this move noclone processing from dm_make_request() to
> > > dm_process_bio() and check if bio->bi_end_io is already set to
> > > noclone_endio. This also fixes dm_wq_work() to be able to support
> > > noclone bios (because it calls dm_process_bio()).
> > >
> > > While at it, clean up ->map() return processing to be comparable to
> > > __map_bio() and add some code comments that explain why certain
> > > conditionals exist to prevent the use of noclone.
> > >
> > > Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> > > ---
> > > drivers/md/dm.c | 103 ++++++++++++++++++++++++++++++++++----------------------
> > > 1 file changed, 62 insertions(+), 41 deletions(-)
> > >
> > > diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> > > index 57919f211acc..d84735be6927 100644
> > > --- a/drivers/md/dm.c
> > > +++ b/drivers/md/dm.c
> > > @@ -1764,14 +1764,75 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
> > > * If in ->make_request_fn we need to use blk_queue_split(), otherwise
> > > * queue_limits for abnormal requests (e.g. discard, writesame, etc)
> > > * won't be imposed.
> > > + *
> > > + * For normal READ and WRITE requests we benefit from upfront splitting
> > > + * via dm_queue_split() because we can then leverage noclone support below.
> > > */
> > > - if (current->bio_list) {
> > > + if (current->bio_list && (bio->bi_end_io != noclone_endio)) {
> > > + /*
> > > + * It is fine to use {blk,dm}_queue_split() before opting to use noclone
> > > + * because any ->bi_private and ->bi_end_io will get saved off below.
> > > + * Once noclone_endio() is called the bio_chain's endio will kick in.
> > > + * - __split_and_process_bio() can split further, but any targets that
> > > + * require late _DM_ initiated splitting (e.g. dm_accept_partial_bio()
> > > + * callers) shouldn't set ti->no_clone.
> > > + */
> > > if (is_abnormal_io(bio))
> > > blk_queue_split(md->queue, &bio);
> > > else
> > > dm_queue_split(md, ti, &bio);
> > > }
> > >
> > > + if (dm_table_supports_noclone(map) &&
> > > + (bio_op(bio) == REQ_OP_READ || bio_op(bio) == REQ_OP_WRITE) &&
> > > + likely(!(bio->bi_opf & REQ_PREFLUSH)) &&
> > > + likely(!bio_integrity(bio)) && /* integrity requires specialized processing */
> > > + likely(!dm_stats_used(&md->stats))) { /* noclone doesn't support dm-stats */
> > > + int r;
> > > + struct dm_noclone *noclone;
> > > +
> > > + /* Already been here if bio->bi_end_io is noclone_endio, reentered via dm_wq_work() */
> > > + if (bio->bi_end_io != noclone_endio) {
>
> This conditional is causing it ^
>
> Very strange...
Added some debugging:
Feb 22 10:46:28 thegoat kernel: device-mapper: core: dm_process_bio: 253:2: bio=00000000c70f3c09 bio->bi_iter.bi_sector=0 len=256 bio->bi_end_io != noclone_endio
Feb 22 10:46:28 thegoat kernel: device-mapper: core: dm_process_bio: 253:0: bio=00000000c70f3c09 bio->bi_iter.bi_sector=2048 len=256 bio->bi_end_io = noclone_endio
...
Feb 22 10:46:28 thegoat kernel: device-mapper: core: dm_process_bio: 253:2: bio=00000000da9cadf3 bio->bi_iter.bi_sector=0 len=256 bio->bi_end_io != noclone_endio
Feb 22 10:46:28 thegoat kernel: device-mapper: core: dm_process_bio: 253:0: bio=00000000da9cadf3 bio->bi_iter.bi_sector=2048 len=256 bio->bi_end_io = noclone_endio
So the same noclone bio is entering devices in the stacked DM device
(perfectly normal).
This fixes the issue:
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index a9856f1986df..8fbe4d5ec8e4 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1831,15 +1831,16 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
likely(!bio_integrity(bio)) && /* integrity requires specialized processing */
likely(!dm_stats_used(&md->stats))) { /* noclone doesn't support dm-stats */
int r;
- struct dm_noclone *noclone;
-
- /* Already been here if bio->bi_end_io is noclone_endio, reentered via dm_wq_work() */
- if (bio->bi_end_io != noclone_endio) {
+ /*
+ * Only allocate noclone if in ->make_request_fn, otherwise
+ * leak could occur due to reentering (e.g. from dm_wq_work)
+ */
+ if (current->bio_list) {
+ struct dm_noclone *noclone;
noclone = kmalloc_node(sizeof(*noclone) + ti->per_io_data_size + sizeof(unsigned),
GFP_NOWAIT, md->numa_node_id);
if (unlikely(!noclone))
goto no_fast_path;
-
noclone->md = md;
noclone->start_time = jiffies;
noclone->orig_bi_end_io = bio->bi_end_io;
That said, I've reasoned through other edge cases that need to be dealt
with and will layer other fixes in addition to this one.
^ permalink raw reply related [flat|nested] 14+ messages in thread
end of thread, other threads:[~2019-02-22 16:56 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-02-20 21:44 [PATCH v2 0/9] dm: changes staged in linux-next for 5.1 so far Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 1/9] dm: update dm_process_bio() to split bio if in ->make_request_fn() Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 2/9] dm: eliminate 'split_discard_bios' flag from DM target interface Mike Snitzer
2019-02-21 4:36 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 3/9] dm: refactor start_io_acct and end_io_acct Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 4/9] dm: implement noclone optimization for bio-based Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 5/9] dm: improve noclone bio support Mike Snitzer
2019-02-22 10:59 ` Mikulas Patocka
2019-02-22 15:22 ` Mike Snitzer
2019-02-22 16:56 ` Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 6/9] dm: add per-bio-data support to noclone bio Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 7/9] dm: improve noclone_endio() to support multipath target Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 8/9] dm mpath: enable noclone support for bio-based Mike Snitzer
2019-02-20 21:44 ` [PATCH v2 9/9] dm: remove unused _rq_tio_cache and _rq_cache Mike Snitzer
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).