QEMU-Devel Archive mirror
 help / color / mirror / Atom feed
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
To: Emanuele Giuseppe Esposito <eesposit@redhat.com>, qemu-block@nongnu.org
Cc: John Snow <jsnow@redhat.com>, Kevin Wolf <kwolf@redhat.com>,
	Max Reitz <mreitz@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	qemu-devel@nongnu.org
Subject: Re: [PATCH v3 1/5] block-copy: streamline choice of copy_range vs. read/write
Date: Wed, 9 Jun 2021 11:51:21 +0300	[thread overview]
Message-ID: <ddc23736-d6ce-cdde-21b8-f809ef65ea65@virtuozzo.com> (raw)
In-Reply-To: <20210608073344.53637-2-eesposit@redhat.com>

08.06.2021 10:33, Emanuele Giuseppe Esposito wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> Put the logic to determine the copy size in a separate function, so
> that there is a simple state machine for the possible methods of
> copying data from one BlockDriverState to the other.
> 
> Use .method instead of .copy_range as in-out argument, and
> include also .zeroes as an additional copy method.
> 
> While at it, store the common computation of block_copy_max_transfer
> into a new field of BlockCopyState, and make sure that we always
> obey max_transfer; that's more efficient even for the
> COPY_RANGE_READ_WRITE case.
> 
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

In general looks OK to me, I mostly have style remarks, see below.

> ---
>   block/block-copy.c | 171 ++++++++++++++++++++++-----------------------
>   1 file changed, 85 insertions(+), 86 deletions(-)
> 
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 943e30b7e6..d58051288b 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -28,6 +28,14 @@
>   #define BLOCK_COPY_MAX_WORKERS 64
>   #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
>   
> +typedef enum {
> +    COPY_READ_WRITE_CLUSTER,
> +    COPY_READ_WRITE,
> +    COPY_WRITE_ZEROES,
> +    COPY_RANGE_SMALL,
> +    COPY_RANGE_FULL
> +} BlockCopyMethod;
> +
>   static coroutine_fn int block_copy_task_entry(AioTask *task);
>   
>   typedef struct BlockCopyCallState {
> @@ -64,8 +72,7 @@ typedef struct BlockCopyTask {
>       BlockCopyCallState *call_state;
>       int64_t offset;
>       int64_t bytes;
> -    bool zeroes;
> -    bool copy_range;
> +    BlockCopyMethod method;
>       QLIST_ENTRY(BlockCopyTask) list;
>       CoQueue wait_queue; /* coroutines blocked on this task */
>   } BlockCopyTask;
> @@ -86,8 +93,8 @@ typedef struct BlockCopyState {
>       BdrvDirtyBitmap *copy_bitmap;
>       int64_t in_flight_bytes;
>       int64_t cluster_size;
> -    bool use_copy_range;
> -    int64_t copy_size;
> +    BlockCopyMethod method;
> +    int64_t max_transfer;
>       uint64_t len;
>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
>       QLIST_HEAD(, BlockCopyCallState) calls;
> @@ -149,6 +156,24 @@ static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
>       return true;
>   }
>   
> +static int64_t block_copy_chunk_size(BlockCopyState *s)
> +{
> +    switch (s->method) {
> +    case COPY_READ_WRITE_CLUSTER:
> +        return s->cluster_size;
> +    case COPY_READ_WRITE:
> +    case COPY_RANGE_SMALL:
> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
> +                   s->max_transfer);
> +    case COPY_RANGE_FULL:
> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
> +                   s->max_transfer);
> +    default:
> +        /* Cannot have COPY_WRITE_ZEROES here.  */
> +        abort();
> +    }
> +}
> +
>   /*
>    * Search for the first dirty area in offset/bytes range and create task at
>    * the beginning of it.
> @@ -158,8 +183,9 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
>                                                int64_t offset, int64_t bytes)
>   {
>       BlockCopyTask *task;
> -    int64_t max_chunk = MIN_NON_ZERO(s->copy_size, call_state->max_chunk);
> +    int64_t max_chunk = block_copy_chunk_size(s);
>   
> +    max_chunk = MIN_NON_ZERO(max_chunk, call_state->max_chunk);
>       if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
>                                              offset, offset + bytes,
>                                              max_chunk, &offset, &bytes))
> @@ -183,7 +209,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
>           .call_state = call_state,
>           .offset = offset,
>           .bytes = bytes,
> -        .copy_range = s->use_copy_range,
> +        .method = s->method,
>       };
>       qemu_co_queue_init(&task->wait_queue);
>       QLIST_INSERT_HEAD(&s->tasks, task, list);
> @@ -267,28 +293,27 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>           .len = bdrv_dirty_bitmap_size(copy_bitmap),
>           .write_flags = write_flags,
>           .mem = shres_create(BLOCK_COPY_MAX_MEM),
> +        .max_transfer = QEMU_ALIGN_DOWN(block_copy_max_transfer(source, target)
> +                                        , cluster_size),

It seems unusual to not keep comma on the previous line (and it's actually fit into 80 characters).

I've grepped several places with '^\s*,' pattern, for example in target/mips/tcg/msa_helper.c. But at least in this file there is no such practice, let's be consistent.

>       };
>   
> -    if (block_copy_max_transfer(source, target) < cluster_size) {
> +    if (s->max_transfer < cluster_size) {
>           /*
>            * copy_range does not respect max_transfer. We don't want to bother
>            * with requests smaller than block-copy cluster size, so fallback to
>            * buffered copying (read and write respect max_transfer on their
>            * behalf).
>            */
> -        s->use_copy_range = false;
> -        s->copy_size = cluster_size;
> +        s->method = COPY_READ_WRITE_CLUSTER;
>       } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
>           /* Compression supports only cluster-size writes and no copy-range. */
> -        s->use_copy_range = false;
> -        s->copy_size = cluster_size;
> +        s->method = COPY_READ_WRITE_CLUSTER;
>       } else {
>           /*
>            * We enable copy-range, but keep small copy_size, until first
>            * successful copy_range (look at block_copy_do_copy).
>            */
> -        s->use_copy_range = use_copy_range;
> -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
> +        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
>       }
>   
>       ratelimit_init(&s->rate_limit);
> @@ -343,17 +368,14 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
>    *
>    * No sync here: nor bitmap neighter intersecting requests handling, only copy.
>    *
> - * @copy_range is an in-out argument: if *copy_range is false, copy_range is not
> - * done. If *copy_range is true, copy_range is attempted. If the copy_range
> - * attempt fails, the function falls back to the usual read+write and
> - * *copy_range is set to false. *copy_range and zeroes must not be true
> - * simultaneously.
> - *
> + * @method is an in-out argument, so that copy_range can be either extended to
> + * a full-size buffer or disabled if the copy_range attempt fails.  The output
> + * value of @method should be used for subsequent tasks.
>    * Returns 0 on success.
>    */
>   static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
>                                              int64_t offset, int64_t bytes,
> -                                           bool zeroes, bool *copy_range,
> +                                           BlockCopyMethod *method,
>                                              bool *error_is_read)
>   {
>       int ret;
> @@ -367,9 +389,9 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
>       assert(offset + bytes <= s->len ||
>              offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
>       assert(nbytes < INT_MAX);
> -    assert(!(*copy_range && zeroes));
>   
> -    if (zeroes) {
> +    switch (*method) {
> +    case COPY_WRITE_ZEROES:
>           ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
>                                       ~BDRV_REQ_WRITE_COMPRESSED);
>           if (ret < 0) {
> @@ -377,89 +399,67 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
>               *error_is_read = false;
>           }
>           return ret;
> -    }
>   
> -    if (*copy_range) {
> +    case COPY_RANGE_SMALL:
> +    case COPY_RANGE_FULL:
>           ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
>                                    0, s->write_flags);
> -        if (ret < 0) {
> -            trace_block_copy_copy_range_fail(s, offset, ret);
> -            *copy_range = false;
> -            /* Fallback to read+write with allocated buffer */
> -        } else {
> +        if (ret >= 0) {
> +            /* Successful copy-range, increase copy_size.  */
> +            *method = COPY_RANGE_FULL;
>               return 0;
>           }
> -    }
> -
> -    /*
> -     * In case of failed copy_range request above, we may proceed with buffered
> -     * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
> -     * be properly limited, so don't care too much. Moreover the most likely
> -     * case (copy_range is unsupported for the configuration, so the very first
> -     * copy_range request fails) is handled by setting large copy_size only
> -     * after first successful copy_range.
> -     */
>   
> -    bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
> +        trace_block_copy_copy_range_fail(s, offset, ret);
> +        *method = COPY_READ_WRITE;
> +        /* Fall through to read+write with allocated buffer */
>   
> -    ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
> -    if (ret < 0) {
> -        trace_block_copy_read_fail(s, offset, ret);
> -        *error_is_read = true;
> -        goto out;
> -    }
> +    default:

It would be safer to explicitly write remaining cases here and then abort() in default:.

> +        /*
> +         * In case of failed copy_range request above, we may proceed with
> +         * buffered request larger than BLOCK_COPY_MAX_BUFFER.
> +         * Still, further requests will be properly limited, so don't care too
> +         * much. Moreover the most likely case (copy_range is unsupported for
> +         * the configuration, so the very first copy_range request fails)
> +         * is handled by setting large copy_size only after first successful
> +         * copy_range.
> +         */
>   
> -    ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
> -                         s->write_flags);
> -    if (ret < 0) {
> -        trace_block_copy_write_fail(s, offset, ret);
> -        *error_is_read = false;
> -        goto out;
> -    }
> +        bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
>   
> -out:
> -    qemu_vfree(bounce_buffer);
> +        ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
> +        if (ret < 0) {
> +            trace_block_copy_read_fail(s, offset, ret);
> +            *error_is_read = true;
> +            goto out;
> +        }
>   
> -    return ret;
> -}
> +        ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
> +                            s->write_flags);

alignment broken

> +        if (ret < 0) {
> +            trace_block_copy_write_fail(s, offset, ret);
> +            *error_is_read = false;
> +            goto out;
> +        }
>   
> -static void block_copy_handle_copy_range_result(BlockCopyState *s,
> -                                                bool is_success)
> -{
> -    if (!s->use_copy_range) {
> -        /* already disabled */
> -        return;
> +out:
> +        qemu_vfree(bounce_buffer);

label inside switch operator? Rather unusual. Please, let's avoid it and just keep out: after switch operator.

>       }
>   
> -    if (is_success) {
> -        /*
> -         * Successful copy-range. Now increase copy_size.  copy_range
> -         * does not respect max_transfer (it's a TODO), so we factor
> -         * that in here.
> -         */
> -        s->copy_size =
> -                MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
> -                    QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
> -                                                            s->target),
> -                                    s->cluster_size));
> -    } else {
> -        /* Copy-range failed, disable it. */
> -        s->use_copy_range = false;
> -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
> -    }
> +    return ret;
>   }
>   
>   static coroutine_fn int block_copy_task_entry(AioTask *task)
>   {
>       BlockCopyTask *t = container_of(task, BlockCopyTask, task);
> +    BlockCopyState *s = t->s;
>       bool error_is_read = false;
> -    bool copy_range = t->copy_range;
> +    BlockCopyMethod method = t->method;
>       int ret;
>   
> -    ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes,
> -                             &copy_range, &error_is_read);
> -    if (t->copy_range) {
> -        block_copy_handle_copy_range_result(t->s, copy_range);
> +    ret = block_copy_do_copy(s, t->offset, t->bytes, &method, &error_is_read);
> +    if (s->method == t->method) {
> +        s->method = method;

you leave another t->s occurrences in the function untouched. It's somehow inconsistent. Could we just use t->s in this patch, and refactor with a follow-up patch (or as preparing patch)?

>       }
>       if (ret < 0) {
>           if (!t->call_state->ret) {
> @@ -642,8 +642,7 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
>               continue;
>           }
>           if (ret & BDRV_BLOCK_ZERO) {
> -            task->zeroes = true;
> -            task->copy_range = false;
> +            task->method = COPY_WRITE_ZEROES;
>           }
>   
>           if (!call_state->ignore_ratelimit) {
> 



-- 
Best regards,
Vladimir


  reply	other threads:[~2021-06-09  8:52 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-08  7:33 [PATCH v3 0/5] block-copy: protect block-copy internal structures Emanuele Giuseppe Esposito
2021-06-08  7:33 ` [PATCH v3 1/5] block-copy: streamline choice of copy_range vs. read/write Emanuele Giuseppe Esposito
2021-06-09  8:51   ` Vladimir Sementsov-Ogievskiy [this message]
2021-06-09  9:33     ` Paolo Bonzini
2021-06-09 10:09       ` Vladimir Sementsov-Ogievskiy
2021-06-09 10:54       ` Vladimir Sementsov-Ogievskiy
2021-06-08  7:33 ` [PATCH v3 2/5] block-copy: improve comments of BlockCopyTask and BlockCopyState types and functions Emanuele Giuseppe Esposito
2021-06-09  9:12   ` Vladimir Sementsov-Ogievskiy
2021-06-10 10:14     ` Emanuele Giuseppe Esposito
2021-06-10 10:27       ` Vladimir Sementsov-Ogievskiy
2021-06-10 10:46         ` Emanuele Giuseppe Esposito
2021-06-10 11:12           ` Vladimir Sementsov-Ogievskiy
2021-06-10 14:21             ` Emanuele Giuseppe Esposito
2021-06-10 15:05               ` Vladimir Sementsov-Ogievskiy
2021-06-08  7:33 ` [PATCH v3 3/5] block-copy: move progress_set_remaining in block_copy_task_end Emanuele Giuseppe Esposito
2021-06-08  7:33 ` [PATCH v3 4/5] block-copy: add a CoMutex Emanuele Giuseppe Esposito
2021-06-09 12:25   ` Vladimir Sementsov-Ogievskiy
2021-06-10 14:49     ` Emanuele Giuseppe Esposito
2021-06-08  7:33 ` [PATCH v3 5/5] block-copy: atomic .cancelled and .finished fields in BlockCopyCallState Emanuele Giuseppe Esposito

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ddc23736-d6ce-cdde-21b8-f809ef65ea65@virtuozzo.com \
    --to=vsementsov@virtuozzo.com \
    --cc=eesposit@redhat.com \
    --cc=jsnow@redhat.com \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).