dbuenzli / tsdl

Thin bindings to SDL for OCaml
http://erratique.ch/software/tsdl/
ISC License
100 stars 29 forks source link

Possible race condition with MessageBox #49

Open sguillia opened 6 years ago

sguillia commented 6 years ago

While writing an application using OCaml and Tsdl, I encountered a bug which affects either Tsdl, SDL, Cocoa or Ocaml bindings.

I fully described the bug here: https://bugzilla.libsdl.org/show_bug.cgi?id=4069

In short, the app crashes (sigabort or segfault) when repeatedly opening dialogs for 30 seconds.

Could you please take a look at it?

dbuenzli commented 6 years ago

I trimmed down your example a bit further, see the end of this message. It's a bit difficult to reproduce here but something is indeed racy I don't have time to investigate this further at the moment. Could you please create an equivalent C program to make sure the bug is not on SDL's side.

The full bt is:

lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = signal SIGABRT
  * frame #0: 0x00007fffa8698d42 libsystem_kernel.dylib`__pthread_kill + 10
    frame #1: 0x00007fffa8786457 libsystem_pthread.dylib`pthread_kill + 90
    frame #2: 0x00007fffa85fe420 libsystem_c.dylib`abort + 129
    frame #3: 0x00007fffa715294a libc++abi.dylib`abort_message + 266
    frame #4: 0x00007fffa7177c2f libc++abi.dylib`default_terminate_handler() + 267
    frame #5: 0x00007fffa7c866fe libobjc.A.dylib`_objc_terminate() + 103
    frame #6: 0x00007fffa7174d49 libc++abi.dylib`std::__terminate(void (*)()) + 8
    frame #7: 0x00007fffa71747be libc++abi.dylib`__cxa_throw + 121
    frame #8: 0x00007fffa7c845b6 libobjc.A.dylib`objc_exception_throw + 345
    frame #9: 0x00007fff92eecc3d CoreFoundation`+[NSException raise:format:] + 205
    frame #10: 0x00007fff94820984 Foundation`-[NSConcreteAttributedString initWithString:] + 135
    frame #11: 0x00007fff90ba7d2d AppKit`-[NSAlert setMessageText:] + 56
    frame #12: 0x00000001002e3510 libSDL2-2.0.0.dylib`Cocoa_ShowMessageBox + 188
    frame #13: 0x00000001002e03c8 libSDL2-2.0.0.dylib`SDL_ShowMessageBox_REAL + 255
    frame #14: 0x0000000100339914 libffi.6.dylib`ffi_call_unix64 + 76
    frame #15: 0x000000010033929d libffi.6.dylib`ffi_call + 917
    frame #16: 0x000000010008a90d a.out`ctypes_call(fnname=<unavailable>, function=<unavailable>, callspec_=4296235136, argwriter=<unavailable>, rvreader=<unavailable>) at ffi_call_stubs.c:404 [opt]
    frame #17: 0x0000000100034e30 a.out`camlCtypes_ffi__fun_5017 + 128
    frame #18: 0x000000010000dd09 a.out`camlTsdl__show_message_box_7023 + 73
    frame #19: 0x0000000100003ddc a.out`camlMain__loop_6370 + 252
    frame #20: 0x00000001000040a8 a.out`camlMain__main_8374 + 152
    frame #21: 0x0000000100004122 a.out`camlMain__entry + 66
    frame #22: 0x0000000100001179 a.out`caml_program + 1081
    frame #23: 0x00000001000b6698 a.out`caml_start_program + 92
    frame #24: 0x000000010009b018 a.out`caml_main + 488
    frame #25: 0x000000010009b06c a.out`main + 12
    frame #26: 0x00007fffa856a235 libdyld.dylib`start + 1

(* compile with ocamlfind ocamlopt -package tsdl -linkpkg -thread main.ml *)

open Tsdl

let log_err fmt = Format.eprintf (fmt ^^ "@.")

let close_message_box () =
  let d =
    let open Sdl.Message_box in
    let action =
      { button_flags = button_returnkey_default;
        button_id = 1; button_text = "Save then quit" }
    in
    let cancel =
      { button_flags = button_escapekey_default;
        button_id = 3; button_text = "Cancel" }
    in
    let color_scheme =
      { color_background = (255, 255, 255);
        color_text = (255, 0, 0);
        color_button_border = (0, 255, 0);
        color_button_background = (0, 0, 255);
        color_button_selected = (0, 255, 255) }
    in
    { flags = warning; window = None; title = "Action";
      message = "Do you want to action ?"; buttons = [cancel; action];
      color_scheme = Some color_scheme }
  in
  Sdl.show_message_box d

let event_loop () =
  let keycode e = Sdl.Event.(get e keyboard_keycode) in
  let e = Sdl.Event.create () in
  let rec loop () = match Sdl.wait_event (Some e) with
  | Error (`Msg e) -> log_err " Could not wait event: %s" e; 1
  | Ok () ->
      match Sdl.Event.(enum (get e typ)) with
      | `Quit -> 0
      | `Key_down when keycode e = Sdl.K.escape ->
          begin match close_message_box () with
          | Error (`Msg e) -> log_err "Could not show message box: %s" e; 1
          | Ok 1 -> 0
          | _ -> loop ()
          end
      | _ -> loop ()
  in
  loop ()

let main () = match Sdl.init Sdl.Init.(video + events) with
| Error (`Msg e) -> log_err "Could not init SDL: %s"  e; exit 1
| Ok () ->
    let ret = event_loop () in
    Sdl.quit ();
    exit ret

let () = main ()
sguillia commented 6 years ago

I failed to reproduce the bug using the following C example. Holding the esc key for 2 minutes does work as expected.

// Compile with:
// cc main.c `sdl2-config --libs --cflags` -Wall -Wextra

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <SDL.h>

int message()
{
    const SDL_MessageBoxButtonData buttons[] = {
        {                                       0, 0, "Nothing" },
        { SDL_MESSAGEBOX_BUTTON_RETURNKEY_DEFAULT, 1, "Return (quit)" },
        { SDL_MESSAGEBOX_BUTTON_ESCAPEKEY_DEFAULT, 2, "Escape" },
    };
    const SDL_MessageBoxColorScheme colorScheme = {
        {
            { 255,   0,   0 },
            {   0, 255,   0 },
            { 255, 255,   0 },
            {   0,   0, 255 },
            { 255,   0, 255 }
        }
    };
    const SDL_MessageBoxData messageboxdata = {
        SDL_MESSAGEBOX_INFORMATION,
        NULL,
        "example message box",
        "select a button",
        SDL_arraysize(buttons),
        buttons,
        &colorScheme
    };

    int buttonid;
    if (SDL_ShowMessageBox(&messageboxdata, &buttonid) < 0) {
        SDL_Log("error displaying message box");
        exit(1);
        return 1;
    }
    if (buttonid == -1)
        SDL_Log("no selection");
    else
        SDL_Log("selection was %s", buttons[buttonid].text);
    return (buttonid == 1);
}

int main(void)
{
    SDL_Window  *win;
    SDL_Surface *screen;
    SDL_Event   e;

    SDL_Init(SDL_INIT_VIDEO);
    win = SDL_CreateWindow("SDL",
            SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
            640, 480, 0);
    screen = SDL_GetWindowSurface(win);
    SDL_FillRect(screen, NULL, 0);
    SDL_UpdateWindowSurface(win);
    while (1)
    {
        if (SDL_WaitEvent(&e))
        {
            if (e.type == SDL_QUIT)
                break;
            if (e.type == SDL_KEYDOWN
                    && e.key.keysym.sym == SDLK_ESCAPE)
            {
                if (message())
                    break;
            }
        }
    }
    SDL_DestroyWindow(win);
    SDL_Quit();
}
dbuenzli commented 6 years ago

@sguillia thanks for the C program. I think you can close the report upstream as this seems to be on our side.

I need to recheck what we are doing here but I don't have ctypes in my head at the moment. Either tsdl is doing something wrong or there's something bad with ctypes.

According to the backtrace it seems that the pointer of the setMessageText: is nil, so this looks like a string that is being freed to early.

dbuenzli commented 6 years ago

So in 595d8a5a7716d7af8227b38e83 I tried to add a little bit of logging by attaching a finalizer on the struct that is given to the C function. It seems that structures are not freed for quite sometime and then a hundred of them is freed after around 265th allocation of the struct at which point the segfault occurs. The one given at that point (id 265) is not freed but I suspect that its title string field is at that point, which leads to the segfault in C here.

@yallop do you think this could be a problem in ctypes ?

If you want to have a look this can be tried with the OCaml program in this message and:

opam pin add tsdl git+https://github.com/dbuenzli/tsdl.git#analyse-49
ocamlfind ocamlopt -package tsdl -linkpkg -thread main.ml
./a.out # Then hold down the escape key until the segfault occurs
[...]
IN: 264
OUT: 264
IN: 265
FREE: 92
FREE: 91
FREE: 90
[...]
FREE: 1
2018-02-05 17:42:44.538 a.out[21751:313146] *** Terminating app due to uncaught exception 'NSInvalidArgumentException', reason: 'NSConcreteAttributedString initWithString:: nil value'
*** First throw call stack:
(
    0   CoreFoundation                      0x00007fff86a502cb __exceptionPreprocess + 171
    1   libobjc.A.dylib                     0x00007fff9b86748d objc_exception_throw + 48
    2   CoreFoundation                      0x00007fff86acec3d +[NSException raise:format:] + 205
    3   Foundation                          0x00007fff88402984 -[NSConcreteAttributedString initWithString:] + 135
    4   AppKit                              0x00007fff84789d2d -[NSAlert setMessageText:] + 56
    5   libSDL2-2.0.0.dylib                 0x0000000108371510 Cocoa_ShowMessageBox + 188
    6   libSDL2-2.0.0.dylib                 0x000000010836e3c8 SDL_ShowMessageBox_REAL + 255
    7   libffi.6.dylib                      0x00000001083cc914 ffi_call_unix64 + 76
    8   ???                                 0x00007fff57aee640 0x0 + 140734664468032
)
libc++abi.dylib: terminating with uncaught exception of type NSException
Abort trap: 6
yallop commented 6 years ago

@yallop do you think this could be a problem in ctypes ?

I'm not sure. I'll aim to dig into this later this week.

yallop commented 6 years ago

Things have become very busy here, and unfortunately it's going to be a little while longer before I can investigate this. But I think your suspicion about the lifetime of the title field is likely to be justified, @dbuenzli.