msys2 / MINGW-packages

Package scripts for MinGW-w64 targets to build under MSYS2.
https://packages.msys2.org
BSD 3-Clause "New" or "Revised" License
2.29k stars 1.22k forks source link

strtod parse only 17charactor, C++ std::cin does not #4987

Open yumetodo opened 5 years ago

yumetodo commented 5 years ago

I wrote such two programs below.

C version

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
/* macros */
#define N 256

/* main */
int main(void) {
    char s[N] = {'\0'}, *endptr;
    double x;

    fgets(s, N, stdin);
    printf("input: %s", s);

    errno = 0;
    x = strtod(s, &endptr);
    printf("output: %.2f\n", x);
    printf("endptr: %s, errno: %s, rest len: %d\n", endptr, strerror(errno), (int)strlen(endptr));

    return EXIT_SUCCESS;
}

C++ version

#include <string>
#include <iostream>
#include<iomanip>

int main(){
    double d;
    std::cin >> d;
    std::cout << std::fixed << std::setprecision(2) << d << std::endl;
}

Look at std::cin >> d;. According to C++17 standard(N4659), operator >> call std::num_get<char>::do_get(). Also, C++ standard says

§ 25.4.2.1.2

Stage 3: The sequence of chars accumulated in stage 2 (the field) is converted to a numeric value by the rules of one of the functions declared in the header : (3.1) — For a signed integer value, the function strtoll. (3.2) — For an unsigned integer value, the function strtoull. (3.3) — For a float value, the function strtof. (3.4) — For a double value, the function strtod. (3.5) — For a long double value, the function strtold.

In other words, std::cin >> d call strtod.

So, these two program (C version and C++ version) must be same behavior, I think,

Now, I compile these programs on msys2 mingw64

$gcc -v
Using built-in specs.
COLLECT_GCC=C:\msys64\mingw64\bin\gcc.exe
COLLECT_LTO_WRAPPER=C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/8.2.1/lto-wrapper.exe
Target: x86_64-w64-mingw32
Configured with: ../gcc-8-20181214/configure --prefix=/mingw64 --with-local-prefix=/mingw64/local --build=x86_64-w64-mingw32 --host=x86_64-w64-mingw32 --target=x86_64-w64-mingw32 --with-native-system-header-dir=/mingw64/x86_64-w64-mingw32/include --libexecdir=/mingw64/lib --enable-bootstrap --with-arch=x86-64 --with-tune=generic --enable-languages=ada,c,lto,c++,objc,obj-c++,fortran --enable-shared --enable-static --enable-libatomic --enable-threads=posix --enable-graphite --enable-fully-dynamic-string --enable-libstdcxx-filesystem-ts=yes --enable-libstdcxx-time=yes --disable-libstdcxx-pch --disable-libstdcxx-debug --disable-isl-version-check --enable-lto --enable-libgomp --disable-multilib --enable-checking=release --disable-rpath --disable-win32-registry --disable-nls --disable-werror --disable-symvers --with-libiconv --with-system-zlib --with-gmp=/mingw64 --with-mpfr=/mingw64 --with-mpc=/mingw64 --with-isl=/mingw64 --with-pkgversion='Rev1, Built by MSYS2 project' --with-bugurl=https://sourceforge.net/projects/msys2 --with-gnu-as --with-gnu-ld
Thread model: posix
gcc version 8.2.1 20181214 (Rev1, Built by MSYS2 project)

And I execute these and the output is shown below:

C version output

$./a.exe
123456789012345678901234567890
input: 123456789012345678901234567890
output: 123456789012345680000000000000.00
endptr:
, errno: No error, rest len: 1

C++ version output

$./b.exe
123456789012345678901234567890
123456789012345677877719597056.00

Different output was occered. C version output means that strtod parse only 17 charactors and rest was skiped until non-digit charactor found.


Now, I tried to compile these two programs on Visual Studio 2017 cl.exe (_MSC_FULL_VER == 191627026)

I wrote cmake config like below:

cmake_minimum_required(VERSION 3.1)
enable_language(C)
set(CMAKE_C_STANDARD 99) # C99...
set(CMAKE_C_STANDARD_REQUIRED ON) #...is required...
set(CMAKE_C_EXTENSIONS OFF) #...without compiler extensions like gnu++11

## Set our project name
project(teratail_165987)

if(MSVC)
  # Force to always compile with W4
  if(CMAKE_C_FLAGS MATCHES "/W[0-4]")
    string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
  else()
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
  endif()
elseif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUC)
  # Update if necessary
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wno-long-long -pedantic")
endif()
## Define the executable
add_executable(teratail_165987 main.c)

Then,

mkdir build_vs
cd build_vs
cmake ..
cmake --build . --config Release

And the outpt is

$./teratail_165987.exe
123456789012345678901234567890
input: 123456789012345678901234567890
output: 123456789012345677877719597056.00
endptr:
, errno: No error, rest len: 1

On msvc, these two programs output is same.

Also, On Linux, I test it.

these two programs output is same.


Why only the binary compiled by mingw gcc (or mingw clang) output is strange? To investigate, I use ldd.

$ldd a.exe
        ntdll.dll => /c/WINDOWS/SYSTEM32/ntdll.dll (0x7ffe0b2f0000)
        KERNEL32.DLL => /c/WINDOWS/System32/KERNEL32.DLL (0x7ffe0a1b0000)
        KERNELBASE.dll => /c/WINDOWS/System32/KERNELBASE.dll (0x7ffe07440000)
        msvcrt.dll => /c/WINDOWS/System32/msvcrt.dll (0x7ffe0a760000)
$ldd teratail_165987.exe
        ntdll.dll => /c/WINDOWS/SYSTEM32/ntdll.dll (0x7ffe0b2f0000)
        KERNEL32.DLL => /c/WINDOWS/System32/KERNEL32.DLL (0x7ffe0a1b0000)
        KERNELBASE.dll => /c/WINDOWS/System32/KERNELBASE.dll (0x7ffe07440000)
        VCRUNTIME140D.dll => /c/WINDOWS/SYSTEM32/VCRUNTIME140D.dll (0x7ffdf1e40000)
        ucrtbased.dll => /c/WINDOWS/SYSTEM32/ucrtbased.dll (0x7ffdcdd40000)

The binary compiled by mingw gcc use msvcrt. On the other hand, the binary compiled by msvc use ucrtbased.


About msvcrt and ucrtbased, I found document below:

Visual C++ change history 2003 - 2015 | Microsoft Docs

Refactored binaries

The CRT Library has been refactored into a two different binaries, a Universal CRT (ucrtbase), which contains most of the standard functionality, and a VC Runtime Library (vcruntime), which contains the compiler-related functionality, such as exception handling, and intrinsics.

Floating point formatting and parsing
New floating point formatting and parsing algorithms have been introduced to improve correctness. This change affects the printf and scanf families of functions, as well as functions like strtod.

The old formatting algorithms would generate only a limited number of digits, then would fill the remaining decimal places with zero. This is usually good enough to generate strings that will round-trip back to the original floating point value, but it's not great if you want the exact value (or the closest decimal representation thereof). The new formatting algorithms generate as many digits as are required to represent the value (or to fill the specified precision). As an example of the improvement; consider the results when printing a large power of two:

printf("%.0f\n", pow(2.0, 80))

Old output:

1208925819614629200000000

New output:

1208925819614629174706176

The old parsing algorithms would consider only up to 17 significant digits from the input string and would discard the rest of the digits. This is sufficient to generate a very close approximation of the value represented by the string, and the result is usually very close to the correctly rounded result. The new implementation considers all present digits and produces the correctly rounded result for all inputs (up to 768 digits in length). In addition, these functions now respect the rounding mode (controllable via fesetround). This is a potentially breaking behavior change because these functions might output different results. The new results are always more correct than the old results.

So, the strange behavior was cased by msvcrt.


However, Look at C++ version on mingw gcc ouput again:

$./b.exe
123456789012345678901234567890
123456789012345677877719597056.00

These two program behavior should be same, however, actually, not.

C++ version parse input correctly.

Why doesn't affect msvcrt to C++ version?

Is there any workarond to use strtod on mingw gcc correctly?

mati865 commented 5 years ago

You should ask it on mingw mailing list.

sheeit commented 5 years ago

Here's what I get on my machine

$ cat strtod.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

int main(void)
{
    char *s = "123456789012345678901234567890";
    char *endp;
    double x;

    errno = 0;
    x = strtod(s, &endp);
    if (errno) {
        perror("strtod");
        exit(EXIT_FAILURE);
    }

    printf("input:  %s\noutput: %.2f\nendptr: %zu\n",
            s, x, strlen(endp));

    exit(EXIT_SUCCESS);
}
$ gcc -v
Using built-in specs.
COLLECT_GCC=C:\MinGW\bin\gcc.exe
COLLECT_LTO_WRAPPER=c:/mingw/bin/../libexec/gcc/mingw32/8.2.0/lto-wrapper.exe
Target: mingw32
Configured with: ../src/gcc-8.2.0/configure --build=x86_64-pc-linux-gnu --host=mingw32 --target=mingw32 --pref     ix=/mingw --disable-win32-registry --with-arch=i586 --with-tune=generic --enable-languages=c,c++,objc,obj-c++,     fortran,ada --with-pkgversion='MinGW.org GCC-8.2.0-3' --with-gmp=/mingw --with-mpfr=/mingw --with-mpc=/mingw -     -enable-static --enable-shared --enable-threads --with-dwarf2 --disable-sjlj-exceptions --enable-version-speci     fic-runtime-libs --with-libiconv-prefix=/mingw --with-libintl-prefix=/mingw --enable-libstdcxx-debug --with-is     l=/mingw --enable-libgomp --disable-libvtv --enable-nls --disable-build-format-warnings
Thread model: win32
gcc version 8.2.0 (MinGW.org GCC-8.2.0-3)
$ gcc -Wall -Wextra -pedantic -ansi -Og strtod.c -o strtod.exe
$ ./strtod.exe
input:  123456789012345678901234567890
output: 123456789012345677877719597056.00
endptr: 0
$ echo $?
0
$ ldd strtod.exe
        ntdll.dll => /c/Windows/SYSTEM32/ntdll.dll (0x77660000)
        ??? => ??? (0x77820000)
        wow64.dll => /c/Windows/SYSTEM32/wow64.dll (0x73ac0000)
        wow64win.dll => /c/Windows/SYSTEM32/wow64win.dll (0x73a60000)
        wow64cpu.dll => /c/Windows/SYSTEM32/wow64cpu.dll (0x73a50000)
$

So the problem seems to be gone?

yumetodo commented 4 years ago
$cat main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>

int main(void)
{
    char *s = "123456789012345678901234567890";
    char *endp;
    double x;

    errno = 0;
    x = strtod(s, &endp);
    if (errno) {
        perror("strtod");
        exit(EXIT_FAILURE);
    }

    printf("input:  %s\noutput: %.2f\nendptr: %d\n",
            s, x, (int)strlen(endp));

    exit(EXIT_SUCCESS);
}

$gcc -v
Using built-in specs.
COLLECT_GCC=C:\msys64\mingw64\bin\gcc.exe
COLLECT_LTO_WRAPPER=C:/msys64/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/9.3.0/lto-wrapper.exe
Target: x86_64-w64-mingw32
Configured with: ../gcc-9.3.0/configure --prefix=/mingw64 --with-local-prefix=/mingw64/local --build=x86_64-w64-mingw32 --host=x86_64-w64-mingw32 --target=x86_64-w64-mingw32 --with-native-system-header-dir=/mingw64/x86_64-w64-mingw32/include --libexecdir=/mingw64/lib --enable-bootstrap --with-arch=x86-64 --with-tune=generic --enable-languages=c,lto,c++,fortran,ada,objc,obj-c++ --enable-shared --enable-static --enable-libatomic --enable-threads=posix --enable-graphite --enable-fully-dynamic-string --enable-libstdcxx-filesystem-ts=yes --enable-libstdcxx-time=yes --disable-libstdcxx-pch --disable-libstdcxx-debug --disable-isl-version-check --enable-lto --enable-libgomp --disable-multilib --enable-checking=release --disable-rpath --disable-win32-registry --disable-nls --disable-werror --disable-symvers --enable-plugin --with-libiconv --with-system-zlib --with-gmp=/mingw64 --with-mpfr=/mingw64 --with-mpc=/mingw64 --with-isl=/mingw64 --with-pkgversion='Rev2, Built by MSYS2 project' --with-bugurl=https://sourceforge.net/projects/msys2 --with-gnu-as --with-gnu-ld
Thread model: posix
gcc version 9.3.0 (Rev2, Built by MSYS2 project)

$gcc main.c -o main.exe -std=c11 -Wall -Wextra -pedantic

$./main.exe
input:  123456789012345678901234567890
output: 123456789012345680000000000000.00
endptr: 0

$ldd main.exe
        ntdll.dll => /c/WINDOWS/SYSTEM32/ntdll.dll (0x7ff88c840000)
        KERNEL32.DLL => /c/WINDOWS/System32/KERNEL32.DLL (0x7ff88ac60000)
        KERNELBASE.dll => /c/WINDOWS/System32/KERNELBASE.dll (0x7ff889ff0000)
        msvcrt.dll => /c/WINDOWS/System32/msvcrt.dll (0x7ff88c6b0000)

not fixed.