janestreet / base

Standard library for OCaml
MIT License
848 stars 124 forks source link

Segmentation fault with `String.filter_map` #162

Closed smondet closed 3 months ago

smondet commented 7 months ago

(I see a bunch of unsafe_get & co in the implementation so I try here before bothering ocaml/ocaml)

OCaml 5.0.0, Base v0.16.3

Noticed the [@nontail] in the implementation of String.filter_map so I also tried String.filter_mapi which does not segfault but still seems to add a \000 character (???).

Reproduction:

open Base

let sanitize_mapi s =
  "Hello"
  ^ String.filter_mapi s ~f:(fun _ -> function
      | ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-') as c -> Some c | _ -> None)

let sanitize_map s =
  "Hello"
  ^ String.filter_map s ~f:(function
      | ('a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-') as c -> Some c
      | _ -> None)

let () =
  let open Stdlib.Printf in
  let s = " dlkjelkdje -d e E? dei !! " in
  printf "mapi: %S\n%!" (sanitize_mapi s);
  printf "map: %S\n%!" (sanitize_map s);
  ()
 $ dune exec repro/main.exe
mapi: "Hello\000lkjelkdje-deEdei"
Segmentation fault (core dumped)
 $ cat repro/dune
(executable
 (name main)
 (libraries base))

Haven't tried with OCaml 5.1 (yet)

smondet commented 7 months ago

Confirming: same behavior with OCaml 5.1.0

smondet commented 7 months ago

The \000 only happens when the first character is filtered out:

let () =
  let open Stdlib.Printf in
  let s = " dlkjelkdje -d e E? dei !! " in
  let s2 = "dlkjelkdje -d e E? dei !! " in
  printf "mapi: %S\n%!" (sanitize_mapi s);
  printf "mapi: %S\n%!" (sanitize_mapi s2);
  printf "map: %S\n%!" (sanitize_map s2);
  printf "map: %S\n%!" (sanitize_map s);
  ()
 $ dune exec repro/main.exe
mapi: "Hello\000lkjelkdje-deEdei"
mapi: "Hellodlkjelkdje-deEdei"
Segmentation fault (core dumped)
dkalinichenko-js commented 3 months ago

Hi, thanks for your report! The issue should be fixed in base.v0.17.0.