From: "Maciej S. Szmigiero" <mail@maciej.szmigiero.name>
To: Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>
Cc: "Alex Williamson" <alex.williamson@redhat.com>,
"Cédric Le Goater" <clg@redhat.com>,
"Eric Blake" <eblake@redhat.com>,
"Markus Armbruster" <armbru@redhat.com>,
"Avihai Horon" <avihaih@nvidia.com>,
"Joao Martins" <joao.m.martins@oracle.com>,
qemu-devel@nongnu.org
Subject: [PATCH RFC 21/26] migration/multifd: Device state transfer support - receive side
Date: Tue, 16 Apr 2024 16:43:00 +0200 [thread overview]
Message-ID: <a7a20b0de049b53d27b4ce8363fc68e601155104.1713269378.git.maciej.szmigiero@oracle.com> (raw)
In-Reply-To: <cover.1713269378.git.maciej.szmigiero@oracle.com>
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Add a basic support for receiving device state via multifd channels -
both dedicated ones or shared with RAM transfer.
To differentiate between a device state and a RAM packet the packet
header is read first.
Depending whether MULTIFD_FLAG_DEVICE_STATE flag is present or not in the
packet header either device state (MultiFDPacketDeviceState_t) or RAM
data (existing MultiFDPacket_t) is then read.
The received device state data is provided to
qemu_loadvm_load_state_buffer() function for processing in the
device's load_state_buffer handler.
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
migration/migration.c | 7 +-
migration/multifd.c | 146 ++++++++++++++++++++++++++++++++++++------
migration/multifd.h | 34 +++++++++-
3 files changed, 163 insertions(+), 24 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index e4f82695a338..ea2c8a043a77 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -987,7 +987,7 @@ static void migration_ioc_process_incoming_no_header(QIOChannel *ioc,
/* Multiple connections */
assert(migration_needs_multiple_sockets());
if (migrate_multifd()) {
- multifd_recv_new_channel(ioc, &local_err);
+ multifd_recv_new_channel(ioc, false, &local_err);
} else {
assert(migrate_postcopy_preempt());
f = qemu_file_new_input(ioc);
@@ -1031,6 +1031,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
postcopy_preempt_new_channel(migration_incoming_get_current(), f);
break;
case MIG_CHANNEL_TYPE_MULTIFD:
+ case MIG_CHANNEL_TYPE_MULTIFD_DEVICE_STATE:
{
Error *local_err = NULL;
@@ -1039,7 +1040,9 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
return;
}
- multifd_recv_new_channel(ioc, &local_err);
+ multifd_recv_new_channel(ioc,
+ header.channel_type == MIG_CHANNEL_TYPE_MULTIFD_DEVICE_STATE,
+ &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
diff --git a/migration/multifd.c b/migration/multifd.c
index 7118c69a4d49..a26418d87485 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -22,6 +22,7 @@
#include "file.h"
#include "migration.h"
#include "migration-stats.h"
+#include "savevm.h"
#include "socket.h"
#include "tls.h"
#include "qemu-file.h"
@@ -404,7 +405,7 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
uint32_t zero_num = pages->num - pages->normal_num;
int i;
- packet->flags = cpu_to_be32(p->flags);
+ packet->hdr.flags = cpu_to_be32(p->flags);
packet->pages_alloc = cpu_to_be32(p->pages->allocated);
packet->normal_pages = cpu_to_be32(pages->normal_num);
packet->zero_pages = cpu_to_be32(zero_num);
@@ -432,28 +433,44 @@ void multifd_send_fill_packet(MultiFDSendParams *p)
p->flags, p->next_packet_size);
}
-static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+static int multifd_recv_unfill_packet_header(MultiFDRecvParams *p, MultiFDPacketHdr_t *hdr,
+ Error **errp)
{
- MultiFDPacket_t *packet = p->packet;
- int i;
-
- packet->magic = be32_to_cpu(packet->magic);
- if (packet->magic != MULTIFD_MAGIC) {
+ hdr->magic = be32_to_cpu(hdr->magic);
+ if (hdr->magic != MULTIFD_MAGIC) {
error_setg(errp, "multifd: received packet "
"magic %x and expected magic %x",
- packet->magic, MULTIFD_MAGIC);
+ hdr->magic, MULTIFD_MAGIC);
return -1;
}
- packet->version = be32_to_cpu(packet->version);
- if (packet->version != MULTIFD_VERSION) {
+ hdr->version = be32_to_cpu(hdr->version);
+ if (hdr->version != MULTIFD_VERSION) {
error_setg(errp, "multifd: received packet "
"version %u and expected version %u",
- packet->version, MULTIFD_VERSION);
+ hdr->version, MULTIFD_VERSION);
return -1;
}
- p->flags = be32_to_cpu(packet->flags);
+ p->flags = be32_to_cpu(hdr->flags);
+
+ return 0;
+}
+
+static int multifd_recv_unfill_packet_device_state(MultiFDRecvParams *p, Error **errp)
+{
+ MultiFDPacketDeviceState_t *packet = p->packet_dev_state;
+
+ packet->instance_id = be32_to_cpu(packet->instance_id);
+ p->next_packet_size = be32_to_cpu(packet->next_packet_size);
+
+ return 0;
+}
+
+static int multifd_recv_unfill_packet_ram(MultiFDRecvParams *p, Error **errp)
+{
+ MultiFDPacket_t *packet = p->packet;
+ int i;
packet->pages_alloc = be32_to_cpu(packet->pages_alloc);
/*
@@ -485,7 +502,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
p->next_packet_size = be32_to_cpu(packet->next_packet_size);
p->packet_num = be64_to_cpu(packet->packet_num);
- p->packets_recved++;
p->total_normal_pages += p->normal_num;
p->total_zero_pages += p->zero_num;
@@ -533,6 +549,19 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
return 0;
}
+static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+{
+ p->packets_recved++;
+
+ if (p->flags & MULTIFD_FLAG_DEVICE_STATE) {
+ return multifd_recv_unfill_packet_device_state(p, errp);
+ } else {
+ return multifd_recv_unfill_packet_ram(p, errp);
+ }
+
+ g_assert_not_reached();
+}
+
static bool multifd_send_should_exit(void)
{
return qatomic_read(&multifd_send_state->exiting);
@@ -1239,8 +1268,8 @@ bool multifd_send_setup(void)
p->packet_len = sizeof(MultiFDPacket_t)
+ sizeof(uint64_t) * page_count;
p->packet = g_malloc0(p->packet_len);
- p->packet->magic = cpu_to_be32(MULTIFD_MAGIC);
- p->packet->version = cpu_to_be32(MULTIFD_VERSION);
+ p->packet->hdr.magic = cpu_to_be32(MULTIFD_MAGIC);
+ p->packet->hdr.version = cpu_to_be32(MULTIFD_VERSION);
/* We need one extra place for the packet header */
p->iov = g_new0(struct iovec, page_count + 1);
@@ -1415,6 +1444,7 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p)
p->packet_len = 0;
g_free(p->packet);
p->packet = NULL;
+ g_clear_pointer(&p->packet_dev_state, g_free);
g_free(p->iov);
p->iov = NULL;
g_free(p->normal);
@@ -1474,6 +1504,8 @@ void multifd_recv_sync_main(void)
for (i = 0; i < thread_count; i++) {
MultiFDRecvParams *p = &multifd_recv_state->params[i];
+ assert(!p->is_device_state_dedicated);
+
trace_multifd_recv_sync_main_signal(p->id);
qemu_sem_post(&p->sem);
}
@@ -1489,6 +1521,12 @@ void multifd_recv_sync_main(void)
* the work (pending_job=false).
*/
for (i = 0; i < thread_count; i++) {
+ MultiFDRecvParams *p = &multifd_recv_state->params[i];
+
+ if (p->is_device_state_dedicated) {
+ continue;
+ }
+
trace_multifd_recv_sync_main_wait(i);
qemu_sem_wait(&multifd_recv_state->sem_sync);
}
@@ -1507,6 +1545,10 @@ void multifd_recv_sync_main(void)
for (i = 0; i < thread_count; i++) {
MultiFDRecvParams *p = &multifd_recv_state->params[i];
+ if (p->is_device_state_dedicated) {
+ continue;
+ }
+
WITH_QEMU_LOCK_GUARD(&p->mutex) {
if (multifd_recv_state->packet_num < p->packet_num) {
multifd_recv_state->packet_num = p->packet_num;
@@ -1529,8 +1571,13 @@ static void *multifd_recv_thread(void *opaque)
rcu_register_thread();
while (true) {
+ MultiFDPacketHdr_t hdr;
uint32_t flags = 0;
+ bool is_device_state = false;
bool has_data = false;
+ uint8_t *pkt_buf;
+ size_t pkt_len;
+
p->normal_num = 0;
if (use_packets) {
@@ -1538,8 +1585,27 @@ static void *multifd_recv_thread(void *opaque)
break;
}
- ret = qio_channel_read_all_eof(p->c, (void *)p->packet,
- p->packet_len, &local_err);
+ ret = qio_channel_read_all_eof(p->c, (void *)&hdr,
+ sizeof(hdr), &local_err);
+ if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */
+ break;
+ }
+
+ ret = multifd_recv_unfill_packet_header(p, &hdr, &local_err);
+ if (ret) {
+ break;
+ }
+
+ is_device_state = p->flags & MULTIFD_FLAG_DEVICE_STATE;
+ if (is_device_state) {
+ pkt_buf = (uint8_t *)p->packet_dev_state + sizeof(hdr);
+ pkt_len = sizeof(*p->packet_dev_state) - sizeof(hdr);
+ } else {
+ pkt_buf = (uint8_t *)p->packet + sizeof(hdr);
+ pkt_len = p->packet_len - sizeof(hdr);
+ }
+
+ ret = qio_channel_read_all_eof(p->c, (char *)pkt_buf, pkt_len, &local_err);
if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */
break;
}
@@ -1582,8 +1648,39 @@ static void *multifd_recv_thread(void *opaque)
has_data = !!p->data->size;
}
- if (has_data) {
- ret = multifd_recv_state->ops->recv(p, &local_err);
+ if (!is_device_state) {
+ if (p->is_device_state_dedicated) {
+ error_setg(&local_err,
+ "multifd: received non-device-state packet on device-state-dedicated thread");
+ break;
+ }
+
+ if (has_data) {
+ ret = multifd_recv_state->ops->recv(p, &local_err);
+ if (ret != 0) {
+ break;
+ }
+ }
+ } else {
+ g_autofree char *idstr = NULL;
+ g_autofree char *dev_state_buf = NULL;
+
+ assert(use_packets);
+
+ if (p->next_packet_size > 0) {
+ dev_state_buf = g_malloc(p->next_packet_size);
+
+ ret = qio_channel_read_all(p->c, dev_state_buf, p->next_packet_size, &local_err);
+ if (ret != 0) {
+ break;
+ }
+ }
+
+ idstr = g_strndup(p->packet_dev_state->idstr, sizeof(p->packet_dev_state->idstr));
+ ret = qemu_loadvm_load_state_buffer(idstr,
+ p->packet_dev_state->instance_id,
+ dev_state_buf, p->next_packet_size,
+ &local_err);
if (ret != 0) {
break;
}
@@ -1591,6 +1688,11 @@ static void *multifd_recv_thread(void *opaque)
if (use_packets) {
if (flags & MULTIFD_FLAG_SYNC) {
+ if (is_device_state) {
+ error_setg(&local_err, "multifd: received SYNC device state packet");
+ break;
+ }
+
qemu_sem_post(&multifd_recv_state->sem_sync);
qemu_sem_wait(&p->sem_sync);
}
@@ -1662,6 +1764,7 @@ int multifd_recv_setup(Error **errp)
p->packet_len = sizeof(MultiFDPacket_t)
+ sizeof(uint64_t) * page_count;
p->packet = g_malloc0(p->packet_len);
+ p->packet_dev_state = g_malloc0(sizeof(*p->packet_dev_state));
}
p->name = g_strdup_printf("multifdrecv_%d", i);
p->iov = g_new0(struct iovec, page_count);
@@ -1703,7 +1806,9 @@ bool multifd_recv_all_channels_created(void)
* Try to receive all multifd channels to get ready for the migration.
* Sets @errp when failing to receive the current channel.
*/
-void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
+void multifd_recv_new_channel(QIOChannel *ioc,
+ bool is_device_state_dedicated,
+ Error **errp)
{
MultiFDRecvParams *p;
Error *local_err = NULL;
@@ -1733,6 +1838,7 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
error_propagate(errp, local_err);
return;
}
+ p->is_device_state_dedicated = is_device_state_dedicated;
p->c = ioc;
object_ref(OBJECT(ioc));
diff --git a/migration/multifd.h b/migration/multifd.h
index fd0cd29104c1..b5fa56b791af 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -24,7 +24,7 @@ int multifd_recv_setup(Error **errp);
void multifd_recv_cleanup(void);
void multifd_recv_shutdown(void);
bool multifd_recv_all_channels_created(void);
-void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
+void multifd_recv_new_channel(QIOChannel *ioc, bool is_device_state_dedicated, Error **errp);
void multifd_recv_sync_main(void);
int multifd_send_sync_main(void);
bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
@@ -41,6 +41,12 @@ MultiFDRecvData *multifd_get_recv_data(void);
#define MULTIFD_FLAG_ZLIB (1 << 1)
#define MULTIFD_FLAG_ZSTD (2 << 1)
+/*
+ * If set it means that this packet contains device state
+ * (MultiFDPacketDeviceState_t), not RAM data (MultiFDPacket_t).
+ */
+#define MULTIFD_FLAG_DEVICE_STATE (1 << 4)
+
/* This value needs to be a multiple of qemu_target_page_size() */
#define MULTIFD_PACKET_SIZE (512 * 1024)
@@ -48,6 +54,11 @@ typedef struct {
uint32_t magic;
uint32_t version;
uint32_t flags;
+} __attribute__((packed)) MultiFDPacketHdr_t;
+
+typedef struct {
+ MultiFDPacketHdr_t hdr;
+
/* maximum number of allocated pages */
uint32_t pages_alloc;
/* non zero pages */
@@ -68,6 +79,16 @@ typedef struct {
uint64_t offset[];
} __attribute__((packed)) MultiFDPacket_t;
+typedef struct {
+ MultiFDPacketHdr_t hdr;
+
+ char idstr[256] QEMU_NONSTRING;
+ uint32_t instance_id;
+
+ /* size of the next packet that contains the actual data */
+ uint32_t next_packet_size;
+} __attribute__((packed)) MultiFDPacketDeviceState_t;
+
typedef struct {
/* number of used pages */
uint32_t num;
@@ -87,6 +108,13 @@ struct MultiFDRecvData {
off_t file_offset;
};
+typedef struct {
+ char *idstr;
+ uint32_t instance_id;
+ char *buf;
+ size_t buf_len;
+} MultiFDDeviceState_t;
+
typedef struct {
/* Fields are only written at creating/deletion time */
/* No lock required for them, they are read only */
@@ -175,6 +203,7 @@ typedef struct {
uint32_t page_size;
/* number of pages in a full packet */
uint32_t page_count;
+ bool is_device_state_dedicated;
/* syncs main thread and channels */
QemuSemaphore sem_sync;
@@ -194,8 +223,9 @@ typedef struct {
/* thread local variables. No locking required */
- /* pointer to the packet */
+ /* pointers to the possible packet types */
MultiFDPacket_t *packet;
+ MultiFDPacketDeviceState_t *packet_dev_state;
/* size of the next packet that contains pages */
uint32_t next_packet_size;
/* packets received through this channel */
next prev parent reply other threads:[~2024-04-16 14:46 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-16 14:42 [PATCH RFC 00/26] Multifd 🔀 device state transfer support with VFIO consumer Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 01/26] migration: Add x-channel-header pseudo-capability Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 02/26] migration: Add migration channel header send/receive Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 03/26] migration: Add send/receive header for main channel Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 04/26] multifd: change multifd_new_send_channel_create() param type Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 05/26] migration: Add a DestroyNotify parameter to socket_send_channel_create() Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 06/26] multifd: pass MFDSendChannelConnectData when connecting sending socket Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 07/26] migration/postcopy: pass PostcopyPChannelConnectData when connecting sending preempt socket Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 08/26] migration: Allow passing migration header in migration channel creation Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 09/26] migration: Add send/receive header for postcopy preempt channel Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 10/26] migration: Add send/receive header for multifd channel Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 11/26] migration/options: Mapped-ram is not channel header compatible Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 12/26] migration: Enable x-channel-header pseudo-capability Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 13/26] vfio/migration: Add save_{iterate, complete_precopy}_started trace events Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 14/26] migration/ram: Add load start trace event Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 15/26] migration/multifd: Zero p->flags before starting filling a packet Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 16/26] migration: Add save_live_complete_precopy_async{, wait} handlers Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 17/26] migration: Add qemu_loadvm_load_state_buffer() and its handler Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 18/26] migration: Add load_finish handler and associated functions Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 19/26] migration: Add x-multifd-channels-device-state parameter Maciej S. Szmigiero
2024-04-16 14:42 ` [PATCH RFC 20/26] migration: Add MULTIFD_DEVICE_STATE migration channel type Maciej S. Szmigiero
2024-04-16 14:43 ` Maciej S. Szmigiero [this message]
2024-04-16 14:43 ` [PATCH RFC 22/26] migration/multifd: Convert multifd_send_pages::next_channel to atomic Maciej S. Szmigiero
2024-04-16 14:43 ` [PATCH RFC 23/26] migration/multifd: Device state transfer support - send side Maciej S. Szmigiero
2024-04-29 20:04 ` Peter Xu
2024-05-06 16:25 ` Maciej S. Szmigiero
2024-04-16 14:43 ` [PATCH RFC 24/26] migration/multifd: Add migration_has_device_state_support() Maciej S. Szmigiero
2024-04-16 14:43 ` [PATCH RFC 25/26] vfio/migration: Multifd device state transfer support - receive side Maciej S. Szmigiero
2024-04-16 14:43 ` [PATCH RFC 26/26] vfio/migration: Multifd device state transfer support - send side Maciej S. Szmigiero
2024-04-17 8:36 ` [PATCH RFC 00/26] Multifd 🔀 device state transfer support with VFIO consumer Daniel P. Berrangé
2024-04-17 12:11 ` Maciej S. Szmigiero
2024-04-17 16:35 ` Daniel P. Berrangé
2024-04-18 9:50 ` Maciej S. Szmigiero
2024-04-18 10:39 ` Daniel P. Berrangé
2024-04-18 18:14 ` Maciej S. Szmigiero
2024-04-18 20:02 ` Peter Xu
2024-04-19 10:07 ` Daniel P. Berrangé
2024-04-19 15:31 ` Peter Xu
2024-04-23 16:15 ` Maciej S. Szmigiero
2024-04-23 22:20 ` Peter Xu
2024-04-23 22:25 ` Maciej S. Szmigiero
2024-04-23 22:35 ` Peter Xu
2024-04-26 17:34 ` Maciej S. Szmigiero
2024-04-29 15:09 ` Peter Xu
2024-05-06 16:26 ` Maciej S. Szmigiero
2024-05-06 17:56 ` Peter Xu
2024-05-07 8:41 ` Avihai Horon
2024-05-07 16:13 ` Peter Xu
2024-05-07 17:23 ` Avihai Horon
2024-04-23 16:14 ` Maciej S. Szmigiero
2024-04-23 22:27 ` Peter Xu
2024-04-26 17:35 ` Maciej S. Szmigiero
2024-04-29 20:34 ` Peter Xu
2024-04-19 10:20 ` Daniel P. Berrangé
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a7a20b0de049b53d27b4ce8363fc68e601155104.1713269378.git.maciej.szmigiero@oracle.com \
--to=mail@maciej.szmigiero.name \
--cc=alex.williamson@redhat.com \
--cc=armbru@redhat.com \
--cc=avihaih@nvidia.com \
--cc=clg@redhat.com \
--cc=eblake@redhat.com \
--cc=farosas@suse.de \
--cc=joao.m.martins@oracle.com \
--cc=peterx@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).