microsoft / WSL

Issues found on WSL
https://docs.microsoft.com/windows/wsl
MIT License
17.44k stars 822 forks source link

support legacy vsyscall - Segmentation Fault using a gcc cross compiler #1462

Closed nagua closed 4 years ago

nagua commented 7 years ago

Please use the following bug reporting template to help produce actionable and reproducible issues. Please try to ensure that the reproduction is minimal so that the team can go through more bugs!

[1]    8017 segmentation fault (core dumped)  ./ctc-linux64-atom-2.1.4.13/cross/bin/i686-aldebaran-linux-gnu-c++ main.c

And no output at all.

  1. Download the cross toolchain from https://community.ald.softbankrobotics.com/en/resources/software/language/en-gb . You need the Cross Toolchain 2.1.4 Linux 64 under the point 4 - C++ NAOqi SDK and you need to create an account for that.
  2. Unzip the toolchain
  3. create a minimal c programm echo "int main() {return 0;}" > main.c
  4. Compile with: ./ctc-linux64-atom-2.1.4.13/cross/bin/i686-aldebaran-linux-gnu-c++ main.c
execve("./ctc-linux64-atom-2.1.4.13/cross/bin/i686-aldebaran-linux-gnu-c++", ["./ctc-linux64-atom-2.1.4.13/cros"..., "main.c"], [/* 19 vars */]) = 0
uname({sysname="Linux", nodename="Nicolas-PC", ...}) = 0
brk(NULL)                               = 0x6da000
brk(0x6db120)                           = 0x6db120
arch_prctl(ARCH_SET_FS, 0x6da800)       = 0
brk(0x6fc120)                           = 0x6fc120
brk(0x6fd000)                           = 0x6fd000
rt_sigaction(SIGINT, {SIG_IGN, [INT], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_DFL, [INT], SA_RESTORER|SA_RESTART, 0x7f742b3054b0}, 8) = 0
rt_sigaction(SIGINT, {0x407980, [INT], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_IGN, [INT], SA_RESTORER|SA_RESTART, 0x41af90}, 8) = 0
rt_sigaction(SIGHUP, {SIG_IGN, [HUP], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_DFL, [], SA_RESTORER|SA_INTERRUPT, 0x7f742b3054b0}, 8) = 0
rt_sigaction(SIGHUP, {0x407980, [HUP], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_IGN, [HUP], SA_RESTORER|SA_RESTART, 0x41af90}, 8) = 0
rt_sigaction(SIGTERM, {SIG_IGN, [TERM], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_DFL, [TERM], SA_RESTORER|SA_RESTART, 0x7f742b3054b0}, 8) = 0
rt_sigaction(SIGTERM, {0x407980, [TERM], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_IGN, [TERM], SA_RESTORER|SA_RESTART, 0x41af90}, 8) = 0
rt_sigaction(SIGPIPE, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_DFL, [PIPE], SA_RESTORER|SA_RESTART, 0x7f742b3054b0}, 8) = 0
rt_sigaction(SIGPIPE, {0x407980, [PIPE], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_IGN, [PIPE], SA_RESTORER|SA_RESTART, 0x41af90}, 8) = 0
rt_sigaction(SIGCHLD, {SIG_DFL, [CHLD], SA_RESTORER|SA_RESTART, 0x41af90}, {SIG_DFL, [CHLD], SA_RESTORER|SA_RESTART, 0x7fffff0654b0}, 8) = 0
getcwd("/home/nicolas", 4096)           = 14
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/i686-aldebaran-linux-gnu-c++", {st_mode=S_IFREG|0755, st_size=878192, ...}) = 0
getcwd("/home/nicolas", 4096)           = 14
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/i686-aldebaran-linux-gnu-c++", {st_mode=S_IFREG|0755, st_size=878192, ...}) = 0
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/", X_OK) = 0
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/", X_OK) = 0
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/", X_OK) = 0
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/", X_OK) = 0
getcwd("/home/nicolas", 4096)           = 14
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin", {st_mode=S_IFDIR|0777, st_size=0, ...}) = 0
lstat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/i686-aldebaran-linux-gnu-c++", {st_mode=S_IFREG|0755, st_size=878192, ...}) = 0
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/../i686-aldebaran-linux-gnu/sysroot", F_OK) = 0
access("main.c", F_OK)                  = 0
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/../lib/gcc/i686-aldebaran-linux-gnu/4.5.3/specs", R_OK) = -1 ENOENT (No such file or directory)
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/../lib/gcc/specs", R_OK) = -1 ENOENT (No such file or directory)
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/../lib/gcc/i686-aldebaran-linux-gnu/4.5.3/../../../../i686-aldebaran-linux-gnu/lib/i686-aldebaran-linux-gnu/4.5.3/specs", R_OK) = -1 ENOENT (No such file or directory)
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/../lib/gcc/i686-aldebaran-linux-gnu/4.5.3/../../../../i686-aldebaran-linux-gnu/lib/specs", R_OK) = -1 ENOENT (No such file or directory)
access("/opt/x-tools/linux64-cross-i686-aldebaran-linux-gnu/lib/gcc/i686-aldebaran-linux-gnu/specs", R_OK) = -1 ENOENT (No such file or directory)
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/", X_OK) = 0
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/", X_OK) = 0
stat("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/../libexec/gcc/i686-aldebaran-linux-gnu/4.5.3/lto-wrapper", {st_mode=S_IFREG|0755, st_size=636336, ...}) = 0
access("/home/nicolas/ctc-linux64-atom-2.1.4.13/cross/bin/../libexec/gcc/i686-aldebaran-linux-gnu/4.5.3/lto-wrapper", X_OK) = 0
access("/tmp", R_OK|W_OK|X_OK)          = 0
--- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0xffffffffff600000} ---
+++ killed by SIGSEGV (core dumped) +++
[1]    8008 segmentation fault (core dumped)  strace ./ctc-linux64-atom-2.1.4.13/cross/bin/i686-aldebaran-linux-gnu-c++

No additional required packages other than the toolchain mentioned above. The toolchain is completely statically linked.

See our contributing instructions for assistance.

misenesi commented 7 years ago

@nagua Thanks for reporting this. It looks very similar to this [one].(https://github.com/Microsoft/BashOnWindows/issues/1466)

Since you provide good repro steps, I am opening a bug to track this internally.

misenesi commented 7 years ago

I have done some changes to how we handle SIGSEGV to match more closely Linux behavior and with these changes I can see this still reproing, but the SIGSEGV fails with SEGV_MAPERR, which signals accessing unmapped memory. Most probably a dangling pointer issue, or some sort of buffer overflow. I don't see this repro on Ubuntu, but without source code this is going to be hard to debug.

nagua commented 7 years ago

I think this is an unchanged gcc cross compiler. You can see that it is build with crosstool-ng by evaluating the version string. It is simply a crosscompiler from amd64 to i368 but distributed from aldebaran(soft-bank robotics). So I think you can simply look in the gcc source-code from version 4.5.3 to track the issue down.

JasonLinMS commented 7 years ago

This is the version string of the compiler from Softbank, which has the segfault: i686-aldebaran-linux-gnu-g++ (crosstool-NG hg+unknown-20130411.130503) 4.5.3

I build this compiler using crosstool-ng, trying to match the Softbank one as much as possible, which does not segfault: i686-nptl-linux-gnu-g++ (crosstool-NG crosstool-ng-1.22.0) 4.5.4

Unfortunately, at this point there is not much more we can do to debug this issue, since it does not repro on the GCC cross-compiler built from source.

nagua commented 7 years ago

Thank you anyway. I will try and contact Softbanks about this and see what they can provide and do about this.

But thank you very much for your help so far.

JasonLinMS commented 7 years ago

No problem, please let us know if you have any more information to make further progress on this investigation.

nagua commented 7 years ago

I tried to rebuild the toolchain by myself and I got an toolchain where the compiler is crashing like the original one. I used an Ubuntu 12.04 image and the mercurial crosstool-ng revision 3200. On top of that the crosstool-ng toolchain has a program to get the used config (./cross/bin/i686-aldebaran-linux-gnu-ct-ng.config). With these things in place I could rebuild the toolchain. I disabled the stripping of the executables. I have uploaded the packed files to OneDrive (https://1drv.ms/u/s!AmRdBJnPKuRk1Uq8VWzTkd1eFDw2). Do you need more than unstripped binaries?

The error is slightly different, but it could be due to the fact that I'm now using the stable windows build version.

execve("./i686-aldebaran-linux-gnu-g++", ["./i686-aldebaran-linux-gnu-g++", "/home/nicolas/main.c"], [/* 16 vars */]) = 0
uname({sys="Linux", node="NICI-PC", ...}) = 0
brk(0)                                  = 0x1849000
brk(0x184a140)                          = 0x184a140
arch_prctl(ARCH_SET_FS, 0x1849800)      = 0
brk(0x186b140)                          = 0x186b140
brk(0x186c000)                          = 0x186c000
--- SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0} ---
+++ killed by SIGSEGV (core dumped) +++

Edit Sorry I tried my own toolchain on the newest insider build and there no crash is happening... So I have to investigate further...

Greetings Nicolas

JasonLinMS commented 7 years ago

Ah, thanks for trying this out. Let us know if you see any crashes in the future.

therealkenc commented 7 years ago

Nicolas' last strace is a gift. What struck me about this issue is how few syscalls can be at fault here. His first strace from Softbank's binary makes only 47 syscalls before faceplanting. His last strace makes only 7, and it sure as heck isn't execve() or uname().

[edit:] _blah blah incorrect speculation about arch_prctl() and brk() since I thought that was the only surface that could be causing trouble_.

therealkenc commented 7 years ago

Okay, I think it was right in front of us:

SIGSEGV {si_signo=SIGSEGV, si_code=SI_KERNEL, si_addr=0xffffffffff600000}

That isn't a "dangling pointer issue, or some sort of buffer overflow". That's pointing squarely and deliberately at the vsyscall page, which the Softbank binary and the #1466 CCP4 binary are statically linked to reference. Native Ubuntu's generic kernel is built with CONFIG_LEGACY_VSYSCALL, but on WSL, address ffffffffff600000 - ffffffffff601000 isn't mapped.

Easy to see with cat proc/self/maps:

WSL:

...
7ffffb845000-7ffffc045000 rw-- 00000000 00:00 0                  [stack]
7ffffc769000-7ffffc76a000 r-x- 00000000 00:00 0                  [vdso]

Native:

...
7ffdc1b57000-7ffdc1b78000 rw-p 00000000 00:00 0                          [stack]
7ffdc1b9e000-7ffdc1ba0000 r--p 00000000 00:00 0                          [vvar]
7ffdc1ba0000-7ffdc1ba2000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]

Possibly the shortest test case ever:

int main() { 
  return *(int*)(0xffffffffff600000); 
}
JasonLinMS commented 7 years ago

@therealkenc Nice investigation, thanks!! Can't believe we never looked at the actual segfault address... We'll see if we can implement the vsyscall page, since we already have vdso.

therealkenc commented 7 years ago

For what it is worth, vsyscall seems to be shunned these days (I think as of Real Linux circa 3.2) because of security and whatnot. Newthink seems to be to take the trap and emulate. All the cool kids seem to be hardening their systems so it might not be worth implementing something that was obsolete before you even started. You also avoid reddit/theregister posts claiming that WSL is insecure.

1466 can be marked as a dup, or vice-verse. Workaround (as you already discovered) is to ask the vendor to recompile with a more recent glibc or avoid static binaries altogether.

JasonLinMS commented 7 years ago

Yep that's right, we'll take that into account when discussing the fix, thanks as always!

nagua commented 7 years ago

Hey Guys, are there any news on this issue?

therealkenc commented 7 years ago

They're probably stuck in pencils down for Creators Update. The work-around for this (and #1466) is to compile from source or ask the vendor to make a non-statically linked version.

Oeffner commented 6 years ago

Will this problem with vsyscall emulation will be addressed in WSL soon? Currently it stops the Linux tools we use at my work from working on WSL. Thanks to @therealkenc for investigating and diagnosing it.

therealkenc commented 6 years ago

Will this problem with vsyscall emulation will be addressed in WSL soon?

We don't get ETAs on open issues. This is sad but understandable.

Can this be implemented in WSL

Yes because Turing completeness.

or has it been decided not to do this?

No because this issue was not closed and tagged by-design.

Oeffner commented 6 years ago

Thant sounds good. Many people will look forward to once this has been implemented. I realise that having to implement a feature to handle what I understand is the behaviour of dirty legacy programs from the past is probably not the highest priority on the list. But given that MS has managed to retain backwards compatibility on the win32 platform with their PE format for well over two decades declining to implement vsyscall emulation on WSL would seem rather odd.