All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
To: jayalk@intworks.biz, Daniel Vetter <daniel@ffwll.ch>,
	deller@gmx.de, linux-fbdev@vger.kernel.org,
	dri-devel@lists.freedesktop.org
Cc: tiwai@suse.de, namcao@linutronix.de, bigeasy@linutronix.de,
	patrik.r.jakobsson@gmail.com, LKML <linux-kernel@vger.kernel.org>,
	Vegard Nossum <vegard.nossum@oracle.com>,
	George Kennedy <george.kennedy@oracle.com>,
	Darren Kenny <darren.kenny@oracle.com>,
	chuansheng.liu@intel.com
Subject: [bug-report] task info hung problem in fb_deferred_io_work()
Date: Thu, 18 Apr 2024 18:09:53 +0530	[thread overview]
Message-ID: <271372d6-e665-4e7f-b088-dee5f4ab341a@oracle.com> (raw)

Hi,

While fuzzing 5.15.y kernel with Syzkaller, we noticed a INFO: task hung 
bug in fb_deferred_io_work()


This is in 5.15.149 tag, and this is introduced by a set of commits:

2655757a3f10 fbdev: flush deferred IO before closing
15492bab7393 fbdev: Fix incorrect page mapping clearance at 
fb_deferred_io_release()
87b9802ca824 fbdev: Fix invalid page access after closing deferred I/O 
devices
9a95fc04261f fbdev: Rename pagelist to pagereflist for deferred I/O
186b89659c4c fbdev: Track deferred-I/O pages in pageref struct
e79b2b2aadef fbdev: defio: fix the pagelist corruption
0616b00a31d6 fbdev: Don't sort deferred-I/O pages by default
5d3aff76a316 fbdev/defio: Early-out if page is already enlisted

I then tried if it could be reproducible on upstream kernel as well, and 
it did. below is the call trace with latest 6.9-rc4 based kernel. I have 
enabled the LOCKDEP to get locking related information.

  INFO: task kworker/0:3:1077 blocked for more than 122 seconds.
        Not tainted 6.9.0-rc4+ #54
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  task:kworker/0:3     state:D stack:0     pid:1077  tgid:1077  ppid:2 
    flags:0x00004000
  Workqueue: events fb_deferred_io_work
  Call Trace:
   <TASK>
  __schedule (kernel/sched/core.c:5409 kernel/sched/core.c:6746)
  ? __pfx___schedule (kernel/sched/core.c:6617)
  ? schedule (kernel/sched/core.c:6804 kernel/sched/core.c:6837)
  ? __pfx___lock_release (kernel/locking/lockdep.c:5406)
  ? schedule (kernel/sched/core.c:6804 kernel/sched/core.c:6837)
  schedule (./arch/x86/include/asm/preempt.h:84 kernel/sched/core.c:6824 
kernel/sched/core.c:6838)
  io_schedule (kernel/sched/core.c:9019 kernel/sched/core.c:9045)
  folio_wait_bit_common (mm/filemap.c:1272)
  ? __pfx_folio_wait_bit_common (mm/filemap.c:1206)
  ? __pfx___mutex_lock (kernel/locking/mutex.c:751)
  ? lock_acquire.part.0 (kernel/locking/lockdep.c:467 
kernel/locking/lockdep.c:5756)
  ? __pfx_wake_page_function (mm/filemap.c:1091)
  ? __pfx_lock_acquire.part.0 (kernel/locking/lockdep.c:5719)
  fb_deferred_io_work (./include/linux/pagemap.h:1069 
drivers/video/fbdev/core/fb_defio.c:248)
  ? lock_acquire (./include/trace/events/lock.h:24 
kernel/locking/lockdep.c:5725)
  process_one_work (kernel/workqueue.c:3254)
  ? __pfx_lock_acquire.part.0 (kernel/locking/lockdep.c:5719)
  ? __pfx_process_one_work (kernel/workqueue.c:3156)
  ? assign_work (kernel/workqueue.c:1210)
  worker_thread (kernel/workqueue.c:3329 kernel/workqueue.c:3416)
  ? __kthread_parkme (kernel/kthread.c:293)
  ? __pfx_worker_thread (kernel/workqueue.c:3362)
  kthread (kernel/kthread.c:388)
  ? __pfx_kthread (kernel/kthread.c:341)
  ret_from_fork (arch/x86/kernel/process.c:147)
  ? __pfx_kthread (kernel/kthread.c:341)
  ret_from_fork_asm (arch/x86/entry/entry_64.S:257)
   </TASK>
  INFO: task r:2664 blocked for more than 123 seconds.
        Not tainted 6.9.0-rc4+ #54
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  task:r               state:D stack:0     pid:2664  tgid:2664 
ppid:2618   flags:0x00000002
  Call Trace:
   <TASK>
  __schedule (kernel/sched/core.c:5409 kernel/sched/core.c:6746)
  ? __pfx___schedule (kernel/sched/core.c:6617)
  ? schedule (kernel/sched/core.c:6804 kernel/sched/core.c:6837)
  ? __pfx___lock_release (kernel/locking/lockdep.c:5406)
  ? schedule (kernel/sched/core.c:6804 kernel/sched/core.c:6837)
  schedule (./arch/x86/include/asm/preempt.h:84 kernel/sched/core.c:6824 
kernel/sched/core.c:6838)
  schedule_timeout (kernel/time/timer.c:2559)
  ? __pfx_schedule_timeout (kernel/time/timer.c:2544)
  ? mark_lock.part.0 (./arch/x86/include/asm/bitops.h:227 
./arch/x86/include/asm/bitops.h:239 
./include/asm-generic/bitops/instrumented-non-atomic.h:142 
kernel/locking/lockdep.c:228 kernel/locking/lockdep.c:4656)
  ? mark_held_locks (kernel/locking/lockdep.c:4274)
  ? lockdep_hardirqs_on_prepare.part.0 (kernel/locking/lockdep.c:4300 
kernel/locking/lockdep.c:4359)
  __wait_for_common (kernel/sched/completion.c:95 
kernel/sched/completion.c:116)
  ? __pfx_schedule_timeout (kernel/time/timer.c:2544)
  ? __pfx___wait_for_common (kernel/sched/completion.c:110)
  ? start_flush_work (kernel/workqueue.c:4192)
  __flush_work (kernel/workqueue.c:4207)
  ? __pfx___flush_work (kernel/workqueue.c:4195)
  ? __pfx_wq_barrier_func (kernel/workqueue.c:3742)
  ? flush_delayed_work (kernel/workqueue.c:4249)
  ? __pfx_fb_release (drivers/video/fbdev/core/fb_chrdev.c:375)
  fb_deferred_io_lastclose (drivers/video/fbdev/core/fb_defio.c:321)
  ? __pfx_fb_release (drivers/video/fbdev/core/fb_chrdev.c:375)
  fb_deferred_io_release (drivers/video/fbdev/core/fb_defio.c:333)
  fb_release (drivers/video/fbdev/core/fb_chrdev.c:383)
  ? __pfx_fb_release (drivers/video/fbdev/core/fb_chrdev.c:375)
  __fput (fs/file_table.c:422)
  task_work_run (kernel/task_work.c:180 (discriminator 1))
  ? __pfx_task_work_run (kernel/task_work.c:148)
  ? do_raw_spin_unlock (./arch/x86/include/asm/paravirt.h:589 
./arch/x86/include/asm/qspinlock.h:57 kernel/locking/spinlock_debug.c:142)
  do_exit (kernel/exit.c:879)
  ? do_raw_spin_lock (./arch/x86/include/asm/atomic.h:115 
./include/linux/atomic/atomic-arch-fallback.h:2170 
./include/linux/atomic/atomic-instrumented.h:1302 
./include/asm-generic/qspinlock.h:111 kernel/locking/spinlock_debug.c:116)
  ? __pfx_do_exit (kernel/exit.c:819)
  ? __pfx_do_raw_spin_lock (kernel/locking/spinlock_debug.c:114)
  do_group_exit (kernel/exit.c:1008)
  __x64_sys_exit_group (kernel/exit.c:1036)
  do_syscall_64 (arch/x86/entry/common.c:52 arch/x86/entry/common.c:83)
  entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
  RIP: 0033:0x7f9cfe71e5f1
  RSP: 002b:00007ffd636aec08 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
  RAX: ffffffffffffffda RBX: 00007f9cfe7fc510 RCX: 00007f9cfe71e5f1
  RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000
  RBP: 0000000000000000 R08: ffffffffffffff80 R09: 0000000000000000
  R10: 0000000000000020 R11: 0000000000000246 R12: 00007f9cfe7fc510
  R13: 0000000000000000 R14: 00007f9cfe7fc9e8 R15: 00007f9cfe7fca00
   </TASK>

  Showing all locks held in the system:
  1 lock held by khungtaskd/50:
  #0: ffffffff950219c0 (rcu_read_lock){....}-{1:2}, at: 
debug_show_all_locks (./include/linux/rcupdate.h:329 (discriminator 1) 
./include/linux/rcupdate.h:781 (discriminator 1) 
kernel/locking/lockdep.c:6614 (discriminator 1))
  3 locks held by kworker/0:3/1077:
  #0: ffff88810005ad48 ((wq_completion)events){+.+.}-{0:0}, at: 
process_one_work (kernel/workqueue.c:3229)
  #1: ffff8881080dfd98 
((work_completion)(&(&info->deferred_work)->work)){+.+.}-{0:0}, at: 
process_one_work (kernel/workqueue.c:3230)
  #2: ffff88811416db38 (&fbdefio->lock){+.+.}-{3:3}, at: 
fb_deferred_io_work (drivers/video/fbdev/core/fb_defio.c:246)
  1 lock held by r/2664:
  #0: ffff8881141fe078 (&fb_info->lock){+.+.}-{3:3}, at: fb_release 
(drivers/video/fbdev/core/fb_chrdev.c:380)



Syzkaller initially generated a long reproducer, Vegard simplified it to 
the one below

C repro:

#define _GNU_SOURCE
#include <sys/mman.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>

int main(void)
{
         int fd = open("/dev/fb0", O_RDWR, 0);

         char *ptr = (char *) 0x20000000ul;

         mmap(ptr, 4096, PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, 0xff000);
         *ptr = 0;

         mmap(ptr, 4096, PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, 0);
         *ptr = 0;

         return 0;
}

This needs to be run as root to see the hang in 122 seconds.

238 static void fb_deferred_io_work(struct work_struct *work)
239 {
240         struct fb_info *info = container_of(work, struct fb_info, 
deferred_work.work);
241         struct fb_deferred_io_pageref *pageref, *next;
242         struct fb_deferred_io *fbdefio = info->fbdefio;
243
244         /* here we mkclean the pages, then do all deferred IO */
245         mutex_lock(&fbdefio->lock);
246         list_for_each_entry(pageref, &fbdefio->pagereflist, list) {
247                 struct page *cur = pageref->page;
248                 lock_page(cur);
249                 page_mkclean(cur);
250                 unlock_page(cur);
251         }

It is stalling on lock_page() in fb_deferred_io_work()

I will be happy to try any patches and report back.


Thanks,
Harshit

             reply	other threads:[~2024-04-18 12:40 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-18 12:39 Harshit Mogalapalli [this message]
2024-04-18 13:14 ` [bug-report] task info hung problem in fb_deferred_io_work() Patrik Jakobsson
2024-04-18 14:05   ` Nam Cao
2024-04-18 14:34     ` Patrik Jakobsson
2024-04-19 15:18       ` Nam Cao
2024-04-19  7:27   ` Harshit Mogalapalli
2024-04-18 14:06 ` Nam Cao
2024-04-18 14:26   ` Takashi Iwai
2024-04-18 19:29     ` Helge Deller
2024-04-19  6:44       ` Takashi Iwai
2024-04-19  7:39         ` Harshit Mogalapalli
2024-04-19  7:45           ` Takashi Iwai
2024-04-19  7:50             ` Patrik Jakobsson
2024-04-19 15:34               ` Nam Cao
2024-04-30 11:45                 ` Patrik Jakobsson
2024-04-19 14:42             ` Harshit Mogalapalli
2024-04-19  7:35   ` Harshit Mogalapalli
2024-04-19 14:59 ` Nam Cao
2024-04-19 16:05   ` Harshit Mogalapalli
2024-04-19 16:23     ` Nam Cao
2024-04-19 16:39       ` Harshit Mogalapalli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=271372d6-e665-4e7f-b088-dee5f4ab341a@oracle.com \
    --to=harshit.m.mogalapalli@oracle.com \
    --cc=bigeasy@linutronix.de \
    --cc=chuansheng.liu@intel.com \
    --cc=daniel@ffwll.ch \
    --cc=darren.kenny@oracle.com \
    --cc=deller@gmx.de \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=george.kennedy@oracle.com \
    --cc=jayalk@intworks.biz \
    --cc=linux-fbdev@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=namcao@linutronix.de \
    --cc=patrik.r.jakobsson@gmail.com \
    --cc=tiwai@suse.de \
    --cc=vegard.nossum@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.