Open kitakar5525 opened 4 years ago
Perhaps some uninitialised variables being used. It's probably worth reporting that backtrace to the linux-media mailing list stating your findings.
cio2_subdev_get_fmt certainly shouldn't be calling recursively like that ...
It's an odd code path, but I guess there is a potential recursive loop if some how the cio2_subdev_get_fmt() was ending up calling itself recursively. Needs investigating more in here with some debug prints:
static int cio2_subdev_get_fmt(struct v4l2_subdev *sd,
struct v4l2_subdev_pad_config *cfg,
struct v4l2_subdev_format *fmt)
{
struct cio2_queue *q = container_of(sd, struct cio2_queue, subdev);
struct v4l2_subdev_format format;
int ret;
if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
return 0;
}
...<snip>...
}
Thanks for the comment! It seems that indeed it's called recursively.
Lines starting with DEBUG:
is debug output that I added:
1234 /*
1235 * cio2_subdev_get_fmt - Handle get format by pads subdev method
1236 * @sd : pointer to v4l2 subdev structure
1237 * @cfg: V4L2 subdev pad config
1238 * @fmt: pointer to v4l2 subdev format structure
1239 * return -EINVAL or zero on success
1240 */
1241 static int cio2_subdev_get_fmt(struct v4l2_subdev *sd,
1242 struct v4l2_subdev_pad_config *cfg,
1243 struct v4l2_subdev_format *fmt)
1244 {
1245 struct cio2_queue *q = container_of(sd, struct cio2_queue, subdev);
1246 struct v4l2_subdev_format format;
1247 int ret;
1248
1249 pr_info("DEBUG: %s() called\n", __func__);
1250 pr_info("DEBUG: msleep()\n");
1251 msleep(1000);
1252
1253 if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
1254 pr_info("DEBUG: Passed %s() %d\n", __func__, __LINE__);
1255 fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
1256 return 0;
1257 }
1258
1259 pr_info("DEBUG: Passed %s() %d\n", __func__, __LINE__);
1260
1261 if (fmt->pad == CIO2_PAD_SINK) {
1262 pr_info("DEBUG: Passed %s() %d\n", __func__, __LINE__);
1263 format.which = V4L2_SUBDEV_FORMAT_ACTIVE;
1264 ret = v4l2_subdev_call(sd, pad, get_fmt, NULL,
1265 &format);
1266
1267 if (ret) {
1268 pr_info("DEBUG: Passed %s() %d\n", __func__, __LINE__);
1269 return ret;
1270 }
1271 /* update colorspace etc */
1272 q->subdev_fmt.colorspace = format.format.colorspace;
1273 q->subdev_fmt.ycbcr_enc = format.format.ycbcr_enc;
1274 q->subdev_fmt.quantization = format.format.quantization;
1275 q->subdev_fmt.xfer_func = format.format.xfer_func;
1276 }
1277
1278 pr_info("DEBUG: Passed %s() %d\n", __func__, __LINE__);
1279
1280 fmt->format = q->subdev_fmt;
1281
1282 return 0;
1283 }
When "Memory initialization" option is CONFIG_INIT_STACK_NONE (weakest) or CONFIG_GCC_PLUGIN_STRUCTLEAK_USER (weak), dmesg output is like the following:
$ media-ctl -p -d /dev/media0
Media controller API version 5.9.0
Media device information
------------------------
driver ipu3-cio2
model Intel IPU3 CIO2
serial
bus info PCI:0000:00:14.3
hw revision 0x0
driver version 5.9.0
Device topology
- entity 1: ipu3-csi2 0 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev0
pad0: Sink
pad1: Source
[fmt:SGRBG10_1X10/1936x1096 field:none]
-> "ipu3-cio2 0":0 [ENABLED,IMMUTABLE]
[...]
$ dmesg -xw
kern :info : [ 721.598819] DEBUG: cio2_subdev_get_fmt() called
kern :info : [ 721.598820] DEBUG: msleep()
kern :info : [ 722.609168] DEBUG: Passed cio2_subdev_get_fmt() 1259
kern :info : [ 722.609172] DEBUG: Passed cio2_subdev_get_fmt() 1262
kern :info : [ 722.609175] DEBUG: Passed cio2_subdev_get_fmt() 1268
kern :info : [ 722.609236] DEBUG: cio2_subdev_get_fmt() called
kern :info : [ 722.609237] DEBUG: msleep()
kern :info : [ 723.625837] DEBUG: Passed cio2_subdev_get_fmt() 1259
kern :info : [ 723.625839] DEBUG: Passed cio2_subdev_get_fmt() 1278
kern :info : [ 723.625916] DEBUG: cio2_subdev_get_fmt() called
kern :info : [ 723.625917] DEBUG: msleep()
kern :info : [ 724.635846] DEBUG: Passed cio2_subdev_get_fmt() 1259
kern :info : [ 724.635851] DEBUG: Passed cio2_subdev_get_fmt() 1262
kern :info : [ 724.635854] DEBUG: Passed cio2_subdev_get_fmt() 1268
kern :info : [ 724.635915] DEBUG: cio2_subdev_get_fmt() called
kern :info : [ 724.635916] DEBUG: msleep()
kern :info : [ 725.649141] DEBUG: Passed cio2_subdev_get_fmt() 1259
kern :info : [ 725.649145] DEBUG: Passed cio2_subdev_get_fmt() 1278
kern :info : [ 725.649400] DEBUG: cio2_subdev_get_fmt() called
kern :info : [ 725.649402] DEBUG: msleep()
kern :info : [ 726.662389] DEBUG: Passed cio2_subdev_get_fmt() 1259
kern :info : [ 726.662393] DEBUG: Passed cio2_subdev_get_fmt() 1262
kern :info : [ 726.662397] DEBUG: Passed cio2_subdev_get_fmt() 1268
kern :info : [ 726.662454] DEBUG: cio2_subdev_get_fmt() called
kern :info : [ 726.662456] DEBUG: msleep()
kern :info : [ 727.675811] DEBUG: Passed cio2_subdev_get_fmt() 1259
kern :info : [ 727.675815] DEBUG: Passed cio2_subdev_get_fmt() 1278
kern :info : [ 727.676081] DEBUG: cio2_subdev_get_fmt() called
kern :info : [ 727.676083] DEBUG: msleep()
kern :info : [ 728.688995] DEBUG: Passed cio2_subdev_get_fmt() 1259
kern :info : [ 728.689000] DEBUG: Passed cio2_subdev_get_fmt() 1262
kern :info : [ 728.689003] DEBUG: Passed cio2_subdev_get_fmt() 1268
kern :info : [ 728.689063] DEBUG: cio2_subdev_get_fmt() called
kern :info : [ 728.689065] DEBUG: msleep()
kern :info : [ 729.702446] DEBUG: Passed cio2_subdev_get_fmt() 1259
kern :info : [ 729.702450] DEBUG: Passed cio2_subdev_get_fmt() 1278
When "Memory initialization" option is CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF (strong) or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL (very strong), on the other hand, dmesg output is like the following:
$ media-ctl -p -d /dev/media0
Media controller API version 5.9.0
Media device information
------------------------
driver ipu3-cio2
model Intel IPU3 CIO2
serial
bus info PCI:0000:00:14.3
hw revision 0x0
driver version 5.9.0
Device topology
- entity 1: ipu3-csi2 0 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev0
pad0: Sink
(stack here)
$ dmesg -xw
[ 871.807563] kernel: DEBUG: cio2_subdev_get_fmt() called
[ 871.807566] kernel: DEBUG: msleep()
[ 872.821254] kernel: DEBUG: Passed cio2_subdev_get_fmt() 1259
[ 872.821258] kernel: DEBUG: Passed cio2_subdev_get_fmt() 1262
# [...] (same output repeatedly)
[ 986.313536] kernel: DEBUG: cio2_subdev_get_fmt() called
[ 986.313538] kernel: DEBUG: msleep()
[ 987.326899] kernel: DEBUG: Passed cio2_subdev_get_fmt() 1259
[ 987.326904] kernel: DEBUG: Passed cio2_subdev_get_fmt() 1262
[ 987.326908] kernel: DEBUG: cio2_subdev_get_fmt() called
[ 987.326910] kernel: DEBUG: msleep()
(then, system hanged)
I'll send this to mailing list next time I have time.
Is there anything interesting in the stack trace? What's the entry point into the recursion?
Also - have you chopped the media-graph? I was expecting more information there too.
For example - this is the output of media-ctl -p on my IPU3 device (not a surface)
localhost ~ # media-ctl -p
Media controller API version 5.4.39
Media device information
------------------------
driver ipu3-cio2
model Intel IPU3 CIO2
serial
bus info PCI:0000:00:14.3
hw revision 0x0
driver version 5.4.39
Device topology
- entity 1: ipu3-csi2 0 (2 pads, 2 links)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev0
pad0: Sink
<- "ov13858 8-0010":0 [ENABLED]
pad1: Source
[fmt:SGRBG10/1936x1096]
-> "ipu3-cio2 0":0 [ENABLED,IMMUTABLE]
- entity 4: ipu3-cio2 0 (1 pad, 1 link)
type Node subtype V4L flags 0
device node name /dev/video0
pad0: Sink
<- "ipu3-csi2 0":1 [ENABLED,IMMUTABLE]
- entity 10: ipu3-csi2 1 (2 pads, 2 links)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev1
pad0: Sink
<- "ov5670 10-0036":0 [ENABLED]
pad1: Source
[fmt:SGRBG10/1936x1096]
-> "ipu3-cio2 1":0 [ENABLED,IMMUTABLE]
- entity 13: ipu3-cio2 1 (1 pad, 1 link)
type Node subtype V4L flags 0
device node name /dev/video1
pad0: Sink
<- "ipu3-csi2 1":1 [ENABLED,IMMUTABLE]
- entity 19: ipu3-csi2 2 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev2
pad0: Sink
pad1: Source
[fmt:SGRBG10/1936x1096]
-> "ipu3-cio2 2":0 [ENABLED,IMMUTABLE]
- entity 22: ipu3-cio2 2 (1 pad, 1 link)
type Node subtype V4L flags 0
device node name /dev/video2
pad0: Sink
<- "ipu3-csi2 2":1 [ENABLED,IMMUTABLE]
- entity 28: ipu3-csi2 3 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev3
pad0: Sink
pad1: Source
[fmt:SGRBG10/1936x1096]
-> "ipu3-cio2 3":0 [ENABLED,IMMUTABLE]
- entity 31: ipu3-cio2 3 (1 pad, 1 link)
type Node subtype V4L flags 0
device node name /dev/video3
pad0: Sink
<- "ipu3-csi2 3":1 [ENABLED,IMMUTABLE]
- entity 37: ov13858 8-0010 (1 pad, 1 link)
type V4L2 subdev subtype Sensor flags 0
device node name /dev/v4l-subdev4
pad0: Source
[fmt:SGRBG10/4224x3136]
-> "ipu3-csi2 0":0 [ENABLED]
- entity 39: dw9714 8-000c (0 pad, 0 link)
type V4L2 subdev subtype Lens flags 0
device node name /dev/v4l-subdev5
- entity 40: ov5670 10-0036 (1 pad, 1 link)
type V4L2 subdev subtype Sensor flags 0
device node name /dev/v4l-subdev6
pad0: Source
[fmt:SGRBG10/2592x1944]
-> "ipu3-csi2 1":0 [ENABLED]
Is there anything interesting in the stack trace?
Do you mean kernel log that can be obtained by dmesg
or journalctl
? If so, there is no log at all after the hang on the mainline kernel (v5.9-rc6). Even no kernel panic happened. Just system hanged.
On the other hand, on v5.4 LTS, it causes kernel stack overflow (double-fault)
(the log in my first comment).
I'm not sure why there is such difference.
What's the entry point into the recursion?
It repeats these lines:
[ 872.821254] kernel: DEBUG: Passed cio2_subdev_get_fmt() 1259
[ 872.821258] kernel: DEBUG: Passed cio2_subdev_get_fmt() 1262
1259 pr_info("DEBUG: Passed %s() %d\n", __func__, __LINE__);
1260
1261 if (fmt->pad == CIO2_PAD_SINK) {
1262 pr_info("DEBUG: Passed %s() %d\n", __func__, __LINE__);
1263 format.which = V4L2_SUBDEV_FORMAT_ACTIVE;
1264 ret = v4l2_subdev_call(sd, pad, get_fmt, NULL,
1265 &format);
1266
1267 if (ret) {
1268 pr_info("DEBUG: Passed %s() %d\n", __func__, __LINE__);
1269 return ret;
1270 }
So, it looks like the following loop is happening there:
cio2_subdev_get_fmt()
calls v4l2_subdev_call()
v4l2_subdev_call()
internally calls cio2_subdev_get_fmt()
againAlso - have you chopped the media-graph? I was expecting more information there too.
For example - this is the output of media-ctl -p on my IPU3 device (not a surface)
[...]
Ah, yes. I omitted the output for the one that is working as expected. The full output is available here (the one I posted in https://github.com/linux-surface/linux-surface/issues/91 before)
For the one that caused system hang, it's the full output.
regarding the media graph - they're different. That's something for you to explore.
Look at the paste from above:
Media device information
------------------------
driver ipu3-cio2
model Intel IPU3 CIO2
serial
bus info PCI:0000:00:14.3
hw revision 0x0
driver version 5.9.0
Device topology
- entity 1: ipu3-csi2 0 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev0
pad0: Sink
And compare that against the media graph you had 'before'
Media device information
------------------------
driver ipu3-cio2
model Intel IPU3 CIO2
serial
bus info PCI:0000:00:14.3
hw revision 0x0
driver version 5.8.4
Device topology
- entity 1: ipu3-csi2 0 (2 pads, 2 links)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev2
pad0: Sink
<- "ov8865 3-0010":0 [ENABLED]
pad1: Source
[fmt:SGRBG10_1X10/1936x1096 field:none]
-> "ipu3-cio2 0":0 [ENABLED,IMMUTABLE]
Those changes, (like the lack of the ov8865 sink) are crucial pieces of information in regards to this bug.
Ah, sorry, in this issue, I tested this issue with all the sensor drivers and bridge driver unloaded.
(Yes, this issue happens even without sensor drivers / bridge driver. So, This issue may be reproducible on any PCs equipped with IPU3 when Memory initialization
kernel config is set to CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF (strong)
or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL (very strong)
)
For the record, here is the full output when sensor drivers and bridge driver aren't loaded:
# When "Memory initialization" option is
# CONFIG_INIT_STACK_NONE (weakest) or
# CONFIG_GCC_PLUGIN_STRUCTLEAK_USER (weak)
$ media-ctl -p -d /dev/media0
Media controller API version 5.9.0
Media device information
------------------------
driver ipu3-cio2
model Intel IPU3 CIO2
serial
bus info PCI:0000:00:14.3
hw revision 0x0
driver version 5.9.0
Device topology
- entity 1: ipu3-csi2 0 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev0
pad0: Sink
pad1: Source
[fmt:SGRBG10_1X10/1936x1096 field:none]
-> "ipu3-cio2 0":0 [ENABLED,IMMUTABLE]
- entity 4: ipu3-cio2 0 (1 pad, 1 link)
type Node subtype V4L flags 0
device node name /dev/video0
pad0: Sink
<- "ipu3-csi2 0":1 [ENABLED,IMMUTABLE]
- entity 10: ipu3-csi2 1 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev1
pad0: Sink
pad1: Source
[fmt:SGRBG10_1X10/1936x1096 field:none]
-> "ipu3-cio2 1":0 [ENABLED,IMMUTABLE]
- entity 13: ipu3-cio2 1 (1 pad, 1 link)
type Node subtype V4L flags 0
device node name /dev/video1
pad0: Sink
<- "ipu3-csi2 1":1 [ENABLED,IMMUTABLE]
- entity 19: ipu3-csi2 2 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev2
pad0: Sink
pad1: Source
[fmt:SGRBG10_1X10/1936x1096 field:none]
-> "ipu3-cio2 2":0 [ENABLED,IMMUTABLE]
- entity 22: ipu3-cio2 2 (1 pad, 1 link)
type Node subtype V4L flags 0
device node name /dev/video2
pad0: Sink
<- "ipu3-csi2 2":1 [ENABLED,IMMUTABLE]
- entity 28: ipu3-csi2 3 (2 pads, 1 link)
type V4L2 subdev subtype Unknown flags 0
device node name /dev/v4l-subdev3
pad0: Sink
pad1: Source
[fmt:SGRBG10_1X10/1936x1096 field:none]
-> "ipu3-cio2 3":0 [ENABLED,IMMUTABLE]
- entity 31: ipu3-cio2 3 (1 pad, 1 link)
type Node subtype V4L flags 0
device node name /dev/video3
pad0: Sink
<- "ipu3-csi2 3":1 [ENABLED,IMMUTABLE]
Sorry for taking a long time, I've just sent a mail to linux-media mailing list with what I know so far. (This is actually the first time sending a mail to list :D)
For the record, the URL:
For the record, patches are available here:
On Arch Linux with the latest stable kernel (
5.8.5-arch1-1
), printing device topology causes the system to hang. No journal log available after the hang.This issue makes libcamera not working when trying to capture images.
On Arch Linux with the latest LTS kernel (
5.4.61-1-lts
), it causes the kernel oops (but no hangs):Below is a more detailed log.
log
```bash $ media-ctl -d /dev/media0 -p Media controller API version 5.4.61 Media device information ------------------------ driver ipu3-cio2 model Intel IPU3 CIO2 serial bus info PCI:0000:00:14.3 hw revision 0x0 driver version 5.4.61 Device topology - entity 1: ipu3-csi2 0 (2 pads, 2 links) type V4L2 subdev subtype Unknown flags 0 device node name /dev/v4l-subdev2 pad0: Sink zsh: segmentation fault media-ctl -d /dev/media0 -p ``` ```bash kern :emerg : [ 153.857610] BUG: stack guard page was hit at 00000000b8d58050 (stack is 000000005792daca..0000000099809e7f) kern :warn : [ 153.857615] kernel stack overflow (double-fault): 0000 [#1] SMP PTI kern :warn : [ 153.857618] CPU: 3 PID: 2850 Comm: media-ctl Tainted: G C OE 5.4.61-1-lts #1 kern :warn : [ 153.857620] Hardware name: Microsoft Corporation Surface Book/Surface Book, BIOS 92.3192.768 03.24.2020 kern :warn : [ 153.857624] RIP: 0010:cio2_subdev_get_fmt+0x2c/0x180 [ipu3_cio2] kern :warn : [ 153.857627] Code: 44 00 00 55 b9 0b 00 00 00 48 89 fd 53 48 89 d3 48 83 ec 60 65 48 8b 04 25 28 00 00 00 48 89 44 24 58 31 c0 48 89 e2 48 89 d7 48 ab 8b 0b 8b 43 04 85 c9 0f 84 d3 00 00 00 85 c0 75 74 c7 04
kern :warn : [ 153.857628] RSP: 0018:ffffb14a00febfc0 EFLAGS: 00010246
kern :warn : [ 153.857630] RAX: 0000000000000000 RBX: ffffb14a00fec038 RCX: 000000000000000b
kern :warn : [ 153.857631] RDX: ffffb14a00febfc0 RSI: 0000000000000000 RDI: ffffb14a00febfc0
kern :warn : [ 153.857632] RBP: ffff960180db01f8 R08: 0000000000000000 R09: ffff9601a9a730c0
kern :warn : [ 153.857633] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000c0585604
kern :warn : [ 153.857634] R13: ffffb14a00fefdb8 R14: 0000000000000058 R15: ffffb14a00fefcc0
kern :warn : [ 153.857636] FS: 00007fafcea25740(0000) GS:ffff9601af580000(0000) knlGS:0000000000000000
kern :warn : [ 153.857637] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kern :warn : [ 153.857638] CR2: ffffb14a00febfb8 CR3: 0000000406dc6004 CR4: 00000000003606e0
kern :warn : [ 153.857640] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
kern :warn : [ 153.857641] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
kern :warn : [ 153.857641] Call Trace:
kern :warn : [ 153.857647] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857649] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857652] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857654] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857657] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857659] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857662] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857664] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857667] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857669] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857671] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857674] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857677] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857680] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857683] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857686] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857689] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857692] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857695] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857698] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857701] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857704] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857707] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857710] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857713] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857716] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857719] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857722] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857725] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857728] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857731] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857734] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857737] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857740] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857743] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857746] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857749] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857752] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857755] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857758] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857762] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857765] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857768] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857771] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857774] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857777] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857780] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857783] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857786] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857789] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857792] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857795] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857798] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857801] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857804] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857808] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857811] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857814] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857817] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857820] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857823] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857826] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857829] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857832] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857835] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857838] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857841] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857844] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857848] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857851] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857854] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857857] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857860] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857863] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857865] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857868] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857870] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857873] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857876] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857879] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857882] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857885] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857888] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857891] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857894] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857897] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857900] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857903] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857906] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857909] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857912] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857916] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857919] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857922] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857925] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857928] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857931] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857934] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857937] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857939] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857942] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857944] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857946] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857949] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857951] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857953] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857956] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857958] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857960] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857963] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857966] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857968] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857971] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857974] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857976] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857978] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857981] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857983] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857985] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857988] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857990] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857992] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857995] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.857998] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.858000] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.858003] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.858005] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.858008] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.858011] cio2_subdev_get_fmt+0x98/0x180 [ipu3_cio2]
kern :warn : [ 153.858022] subdev_do_ioctl+0x468/0xee0 [videodev]
kern :warn : [ 153.858031] video_usercopy+0xcb/0x620 [videodev]
kern :warn : [ 153.858039] ? subdev_do_ioctl+0xee0/0xee0 [videodev]
kern :warn : [ 153.858047] v4l2_ioctl+0x48/0x50 [videodev]
kern :warn : [ 153.858052] do_vfs_ioctl+0x3ed/0x6c0
kern :warn : [ 153.858055] ? kmem_cache_free+0xa4/0x1b0
kern :warn : [ 153.858057] ksys_ioctl+0x5e/0x90
kern :warn : [ 153.858060] __x64_sys_ioctl+0x16/0x20
kern :warn : [ 153.858063] do_syscall_64+0x49/0x90
kern :warn : [ 153.858067] entry_SYSCALL_64_after_hwframe+0x44/0xa9
kern :warn : [ 153.858069] RIP: 0033:0x7fafceb40f6b
kern :warn : [ 153.858072] Code: 89 d8 49 8d 3c 1c 48 f7 d8 49 39 c4 72 b5 e8 1c ff ff ff 85 c0 78 ba 4c 89 e0 5b 5d 41 5c c3 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d d5 ae 0c 00 f7 d8 64 89 01 48
kern :warn : [ 153.858073] RSP: 002b:00007ffd62848f38 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
kern :warn : [ 153.858075] RAX: ffffffffffffffda RBX: 000055d281871c10 RCX: 00007fafceb40f6b
kern :warn : [ 153.858076] RDX: 00007ffd62848f40 RSI: 00000000c0585604 RDI: 0000000000000003
kern :warn : [ 153.858077] RBP: 00007ffd62848ff0 R08: 00007fafcebd1040 R09: 00007fafcebd10c0
kern :warn : [ 153.858078] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
kern :warn : [ 153.858079] R13: 0000000000000001 R14: 0000000000000002 R15: 000055d281871c18
kern :warn : [ 153.858081] Modules linked in: ov8865(OE) ov7251(OE) ov5693(OE) iptable_mangle xt_CHECKSUM xt_tcpudp iptable_nat xt_comment xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c bridge stp llc iptable_filter usb_storage rfcomm cmac algif_hash algif_skcipher af_alg bnep btusb btrtl btbcm btintel bluetooth ecdh_generic ecc zram intel_rapl_msr intel_rapl_common mousedev joydev input_leds uinput x86_pkg_temp_thermal msr intel_powerclamp coretemp kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio hid_sensor_gyro_3d hid_sensor_als hid_sensor_rotation irqbypass hid_sensor_accel_3d hid_sensor_trigger snd_hda_intel crct10dif_pclmul industrialio_triggered_buffer kfifo_buf snd_intel_nhlt hid_sensor_iio_common crc32_pclmul ghash_clmulni_intel industrialio snd_hda_codec aesni_intel snd_hda_core crypto_simd snd_hwdep squashfs hid_sensor_hub cryptd hid_multitouch mwifiex_pcie glue_helper mei_hdcp nls_iso8859_1 hid_generic mwifiex snd_pcm rapl
kern :warn : [ 153.858121] nls_cp437 usbhid snd_timer intel_cstate vfat ipu3_imgu(C) ipu3_cio2 snd fat v4l2_fwnode fuse cfg80211 intel_uncore pcspkr videobuf2_dma_sg loop soundcore videobuf2_memops rfkill videobuf2_v4l2 mei_me mei videobuf2_common intel_lpss_pci intel_xhci_usb_role_switch intel_lpss videodev idma64 intel_pch_thermal roles mc i2c_hid hid battery surfacepro3_button soc_button_array ac tpm_crb evdev tpm_tis tpm_tis_core mac_hid tpm rng_core pkcs8_key_parser sg scsi_mod crypto_user acpi_call(OE) ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 xhci_pci crc32c_intel xhci_hcd i915 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm intel_agp intel_gtt agpgart
kern :warn : [ 153.858157] ---[ end trace f0e5af07c0f0b484 ]---
kern :warn : [ 153.858160] RIP: 0010:cio2_subdev_get_fmt+0x2c/0x180 [ipu3_cio2]
kern :warn : [ 153.858162] Code: 44 00 00 55 b9 0b 00 00 00 48 89 fd 53 48 89 d3 48 83 ec 60 65 48 8b 04 25 28 00 00 00 48 89 44 24 58 31 c0 48 89 e2 48 89 d7 48 ab 8b 0b 8b 43 04 85 c9 0f 84 d3 00 00 00 85 c0 75 74 c7 04
kern :warn : [ 153.858163] RSP: 0018:ffffb14a00febfc0 EFLAGS: 00010246
kern :warn : [ 153.858165] RAX: 0000000000000000 RBX: ffffb14a00fec038 RCX: 000000000000000b
kern :warn : [ 153.858166] RDX: ffffb14a00febfc0 RSI: 0000000000000000 RDI: ffffb14a00febfc0
kern :warn : [ 153.858167] RBP: ffff960180db01f8 R08: 0000000000000000 R09: ffff9601a9a730c0
kern :warn : [ 153.858168] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000c0585604
kern :warn : [ 153.858169] R13: ffffb14a00fefdb8 R14: 0000000000000058 R15: ffffb14a00fefcc0
kern :warn : [ 153.858170] FS: 00007fafcea25740(0000) GS:ffff9601af580000(0000) knlGS:0000000000000000
kern :warn : [ 153.858171] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kern :warn : [ 153.858172] CR2: ffffb14a00febfb8 CR3: 0000000406dc6004 CR4: 00000000003606e0
kern :warn : [ 153.858173] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
kern :warn : [ 153.858174] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
kern :warn : [ 153.858351] ------------[ cut here ]------------
kern :warn : [ 153.858360] WARNING: CPU: 3 PID: 0 at kernel/rcu/tree.c:569 rcu_idle_enter+0x80/0x90
kern :warn : [ 153.858362] Modules linked in: ov8865(OE) ov7251(OE) ov5693(OE) iptable_mangle xt_CHECKSUM xt_tcpudp iptable_nat xt_comment xt_MASQUERADE nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c bridge stp llc iptable_filter usb_storage rfcomm cmac algif_hash algif_skcipher af_alg bnep btusb btrtl btbcm btintel bluetooth ecdh_generic ecc zram intel_rapl_msr intel_rapl_common mousedev joydev input_leds uinput x86_pkg_temp_thermal msr intel_powerclamp coretemp kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio hid_sensor_gyro_3d hid_sensor_als hid_sensor_rotation irqbypass hid_sensor_accel_3d hid_sensor_trigger snd_hda_intel crct10dif_pclmul industrialio_triggered_buffer kfifo_buf snd_intel_nhlt hid_sensor_iio_common crc32_pclmul ghash_clmulni_intel industrialio snd_hda_codec aesni_intel snd_hda_core crypto_simd snd_hwdep squashfs hid_sensor_hub cryptd hid_multitouch mwifiex_pcie glue_helper mei_hdcp nls_iso8859_1 hid_generic mwifiex snd_pcm rapl
kern :warn : [ 153.858417] nls_cp437 usbhid snd_timer intel_cstate vfat ipu3_imgu(C) ipu3_cio2 snd fat v4l2_fwnode fuse cfg80211 intel_uncore pcspkr videobuf2_dma_sg loop soundcore videobuf2_memops rfkill videobuf2_v4l2 mei_me mei videobuf2_common intel_lpss_pci intel_xhci_usb_role_switch intel_lpss videodev idma64 intel_pch_thermal roles mc i2c_hid hid battery surfacepro3_button soc_button_array ac tpm_crb evdev tpm_tis tpm_tis_core mac_hid tpm rng_core pkcs8_key_parser sg scsi_mod crypto_user acpi_call(OE) ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 xhci_pci crc32c_intel xhci_hcd i915 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm intel_agp intel_gtt agpgart
kern :warn : [ 153.858457] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G D C OE 5.4.61-1-lts #1
kern :warn : [ 153.858459] Hardware name: Microsoft Corporation Surface Book/Surface Book, BIOS 92.3192.768 03.24.2020
kern :warn : [ 153.858464] RIP: 0010:rcu_idle_enter+0x80/0x90
kern :warn : [ 153.858466] Code: d0 65 48 03 05 69 0a 70 66 48 c7 80 d0 00 00 00 00 00 00 00 65 48 03 15 56 0a 70 66 b8 02 00 00 00 f0 0f c1 82 e0 00 00 00 c3 <0f> 0b eb a6 66 66 2e 0f 1f 84 00 00 00 00 00 90 0f 1f 44 00 00 48
kern :warn : [ 153.858468] RSP: 0018:ffffb14a000d3ee0 EFLAGS: 00010002
kern :warn : [ 153.858470] RAX: ffff9601af5ab300 RBX: ffff9601ad1cbd00 RCX: 4000000000000000
kern :warn : [ 153.858471] RDX: 000000000002b300 RSI: 0000000000001388 RDI: 0000000000002532
kern :warn : [ 153.858472] RBP: 0000000000000003 R08: ffff9601af5a92e0 R09: 0000000000000018
kern :warn : [ 153.858473] R10: 00000000000000d1 R11: ffff9601af5a92c0 R12: ffffffff9aabbbe0
kern :warn : [ 153.858474] R13: ffff9601af5b4600 R14: 0000000000000006 R15: 0000000000000000
kern :warn : [ 153.858475] FS: 0000000000000000(0000) GS:ffff9601af580000(0000) knlGS:0000000000000000
kern :warn : [ 153.858476] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kern :warn : [ 153.858477] CR2: ffffb14a00febfb8 CR3: 000000046580a002 CR4: 00000000003606e0
kern :warn : [ 153.858478] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
kern :warn : [ 153.858479] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
kern :warn : [ 153.858480] Call Trace:
kern :warn : [ 153.858488] do_idle+0x1b6/0x240
kern :warn : [ 153.858492] cpu_startup_entry+0x19/0x20
kern :warn : [ 153.858499] start_secondary+0x176/0x1d0
kern :warn : [ 153.858505] secondary_startup_64+0xb6/0xc0
kern :warn : [ 153.858508] ---[ end trace f0e5af07c0f0b485 ]---
```
No issue on Ubuntu with v5.8.4 (https://git.launchpad.net/~ubuntu-kernel-test/ubuntu/+source/linux/+git/mainline-crack/log/?h=cod/mainline/v5.8.4). This tree is almost the same as upstream, thus also almost the same as Arch's kernel. So, I suspected the cause of the hang might be the difference in the kernel config.
And this is true. When I built the kernel with CONFIG_INIT_STACK_NONE=y, no hang occurred there.
Arch sets kernel config
Initialize kernel stack variables at function entry
toCONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y
(zero-init anything passed by reference (very strong)
). On the other hand, Ubuntu sets toCONFIG_INIT_STACK_NONE=y
(no automatic initialization (weakest)
).So, does this mean that the ipu3-cio2 driver hit areas that shouldn't be hit?