Hi

On Wed, Jun 9, 2021 at 4:21 AM Longpeng (Mike, Cloud Infrastructure Service Product Dept.) <longpeng2@huawei.com> wrote:


在 2021/6/8 23:37, Daniel P. Berrangé 写道:
> On Tue, Jun 08, 2021 at 04:07:30PM +0200, Markus Armbruster wrote:
>> "Longpeng (Mike, Cloud Infrastructure Service Product Dept.)"
>> <longpeng2@huawei.com> writes:
>>
>>> We find a race during QEMU starting, which would case the QEMU process coredump.
>>>
>>> <main loop>                             |    <MON iothread>
>>>                                         |
>>> [1] create MON chardev                  |
>>> qemu_create_early_backends              |
>>>   chardev_init_func                     |
>>>                                         |
>>> [2] create MON iothread                 |
>>> qemu_create_late_backends               |
>>>   mon_init_func                         |
>>>     aio_bh_schedule-----------------------> monitor_qmp_setup_handlers_bh
>>> [3] enter main loog                     |    tcp_chr_update_read_handler
>>> (* A client come in, e.g. Libvirt *)    |      update_ioc_handlers
>>> tcp_chr_new_client                      |
>>>   update_ioc_handlers                   |
>>>                                         |
>>>     [4] create new hup_source           |
>>>         s->hup_source = *PTR1*          |
>>>           g_source_attach(s->hup_source)|
>>>                                         |        [5] remove_hup_source(*PTR1*)
>>>                                         |            (create new hup_source)
>>>                                         |             s->hup_source = *PTR2*
>>>         [6] g_source_attach_unlocked    |
>>>               *PTR1* is freed by [5]    |
>>>                     
>>> Do you have any suggestion to fix this bug ? Thanks!
>>
>> Do we?  We talked, but I'm not sure we reached a conclusion.
>
> Seems like we ended up with two options.
>
>   1. A workaround for the current  specific problem by rearranging
>      the initilization code in the monitor a little.
>
>   2. A design fix of splitting the chardev creation into two
>      parts, one creation, and one activation.
>
> The latter is significantly more work, but is a better long term bet IMHO.
> But what we really is someone motivated to actually implement one of the
> two options.
>

How about the following implementation of option-1 ? We've tested it for several
weeks, it works fine.

diff --git a/chardev/char-socket.c b/chardev/char-socket.c

If we go with this option, disable_handler will need to be implemented for all chardevs that have some (or use a QEMU_CHAR_FEATURE_DISABLE_HANDLER flag, which will limit the chardevs that can be associated with a monitor and knowingly break previously working setups).


index a484641..ecb3db9 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -722,6 +722,19 @@ static void tcp_chr_update_read_handler(Chardev *chr)
     update_ioc_handlers(s);
 }

+static void tcp_chr_disable_handler(Chardev *chr)
+{
+    SocketChardev *s = SOCKET_CHARDEV(chr);
+
+    if (s->listener && s->state == TCP_CHARDEV_STATE_DISCONNECTED) {
+        qio_net_listener_set_client_func_full(s->listener, NULL, NULL,
+                                              NULL, chr->gcontext);
+    }
+
+    remove_fd_in_watch(chr);
+    remove_hup_source(s);
+}
+
 static bool tcp_chr_is_connected(Chardev *chr)
 {
     SocketChardev *s = SOCKET_CHARDEV(chr);
@@ -1703,6 +1716,7 @@ static void char_socket_class_init(ObjectClass *oc, void
*data)
     cc->chr_add_watch = tcp_chr_add_watch;
     cc->chr_set_reconnect_time = tcp_chr_set_reconnect_time;
     cc->chr_update_read_handler = tcp_chr_update_read_handler;
+    cc->chr_disable_handler = tcp_chr_disable_handler;
     cc->chr_is_connected = tcp_chr_is_connected;
     cc->chr_get_connect_id = tcp_chr_get_connect_id;

diff --git a/chardev/char.c b/chardev/char.c
index ff0a3cf..990fe4f 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -238,6 +238,15 @@ void qemu_chr_be_update_read_handlers(Chardev *s,
     }
 }

+void qemu_chr_be_disable_handlers(Chardev *s)
+{
+    ChardevClass *cc = CHARDEV_GET_CLASS(s);
+
+    if (cc->chr_disable_handler) {
+        cc->chr_disable_handler(s);
+    }
+}
+
 int qemu_chr_add_client(Chardev *s, int fd)
 {
     return CHARDEV_GET_CLASS(s)->chr_add_client ?
diff --git a/include/chardev/char.h b/include/chardev/char.h
index d1ec628..7a8c740 100644
--- a/include/chardev/char.h
+++ b/include/chardev/char.h
@@ -212,6 +212,8 @@ void qemu_chr_be_write_impl(Chardev *s, uint8_t *buf, int len);
 void qemu_chr_be_update_read_handlers(Chardev *s,
                                       GMainContext *context);

+void qemu_chr_be_disable_handlers(Chardev *s);
+
 /**
  * qemu_chr_be_event:
  * @event: the event to send
@@ -282,6 +284,7 @@ typedef struct ChardevClass {
     int (*chr_sync_read)(Chardev *s, const uint8_t *buf, int len);
     GSource *(*chr_add_watch)(Chardev *s, GIOCondition cond);
     void (*chr_update_read_handler)(Chardev *s);
+    void (*chr_disable_handler)(Chardev *s);
     int (*chr_ioctl)(Chardev *s, int cmd, void *arg);
     int (*get_msgfds)(Chardev *s, int* fds, int num);
     int (*set_msgfds)(Chardev *s, int *fds, int num);
diff --git a/monitor/qmp.c b/monitor/qmp.c
index 9a69ae4..2c2248c 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -413,11 +413,13 @@ void monitor_init_qmp(Chardev *chr, bool pretty)
          * e.g. the chardev is in client mode, with wait=on.
          */
         remove_fd_in_watch(chr);
+
         /*
-         * We can't call qemu_chr_fe_set_handlers() directly here
-         * since chardev might be running in the monitor I/O
-         * thread.  Schedule a bottom half.
+         * Before schedule a bottom half, we should clean up the handler in the
+         * default context to prevent the race between main thread and iothread
          */
+        qemu_chr_be_disable_handlers(chr);
+
         aio_bh_schedule_oneshot(iothread_get_aio_context(mon_iothread),
                                 monitor_qmp_setup_handlers_bh, mon);
         /* The bottom half will add @mon to @mon_list */
--
1.8.3.1


> Regards,
> Daniel
>



--
Marc-André Lureau