nim-lang / Nim

Nim is a statically typed compiled systems programming language. It combines successful concepts from mature languages like Python, Ada and Modula. Its design focuses on efficiency, expressiveness, and elegance (in that order of priority).
https://nim-lang.org
Other
16.54k stars 1.47k forks source link

Weird memory corruption with UncheckedArray and =destroy inside an object #19532

Open metagn opened 2 years ago

metagn commented 2 years ago

This happens with all GCs, and does not seem to be related to the handling of tailing UncheckedArray, as ptr UncheckedArray also fails. Adding a refcount field and only destroying when it is 0 also doesn't help.

Example

type
  ArrayInfo[T] = ptr object
    length: int
    data: UncheckedArray[T]

  Array*[T] = object
    info: ArrayInfo[T]

proc `=destroy`*[T](arr: var Array[T]) =
  echo repr(arr.info)
  if not arr.info.isNil:
    let len = arr.info.length
    for i in 0 ..< len:
      `=destroy`(arr.info.data[i])
    dealloc(arr.info)
    arr.info = nil

iterator items*[T](x: Array[T]): T =
  let L = x.info.length
  for i in 0 ..< L:
    yield x.info.data[i]

proc toArray*[T](arr: sink openarray[T]): Array[T] =
  result.info = cast[typeof(result.info)](alloc(sizeof(result.info.length) + arr.len * sizeof(T)))
  result.info.length = arr.len
  for i in 0 ..< arr.len:
    result.info.data[i] = arr[i]

proc `$`*[T](x: Array[T]): string =
  result = "Array("
  var firstElement = true
  for value in items(x):
    if firstElement:
      firstElement = false
    else:
      result.add(", ")

    when value isnot string and value isnot seq and compiles(value.isNil):
      if value.isNil:
        result.add "nil"
      else:
        result.addQuoted(value)
    else:
      result.addQuoted(value)
  result.add(")")

type Foo = object
  case atom: bool
  of false:
    node: Array[Foo]
  of true:
    leaf: int

proc tree(arr: varargs[Foo]): Foo =
  Foo(atom: false, node: toArray(@arr))
proc leaf(x: int): Foo = Foo(atom: true, leaf: x)
echo tree(leaf(1), tree(leaf(2), tree(leaf(3))))

It works correctly if Foo is a ref object.

Current Output

nil
nil
...
nil
ptr 0x7f09d70bc0b0 --> [length = 1,
data = [...]]
ptr 0x7f09d70bd090 --> [length = 2,
data = [...]]
ptr 0x7f09d70bc0b0 --> [length = 4,
data = [...]]
Traceback (most recent call last)
/usercode/in.nim(63)     in
/playground/nim/lib/system/dollars.nim(111) $
/playground/nim/lib/system.nim(2930) addQuoted
/playground/nim/lib/system.nim(1621) $
/usercode/in.nim(13)     =destroy
/usercode/in.nim(13)     =destroy
/usercode/in.nim(10)     =destroy
/playground/nim/lib/system/repr.nim(324) reprAny
/playground/nim/lib/system/repr.nim(263) reprAux
/playground/nim/lib/system/repr.nim(237) reprRef
/playground/nim/lib/system/repr.nim(256) reprAux
/playground/nim/lib/system/repr.nim(214) reprRecord
/playground/nim/lib/system/repr.nim(199) reprRecordAux
/playground/nim/lib/system/repr.nim(197) reprRecordAux
/playground/nim/lib/system/repr.nim(266) reprAux
SIGSEGV: Illegal storage access. (Attempt to read from nil?)
Segmentation fault (core dumped)

Replacing repr(arr.info) with cast[int](arr.info):

0
0
...
0
140366252953776
140366252957840
140366252953776
4
Traceback (most recent call last)
/usercode/in.nim(63)     in
/playground/nim/lib/system/dollars.nim(111) $
/playground/nim/lib/system.nim(2930) addQuoted
/playground/nim/lib/system.nim(1621) $
/usercode/in.nim(13)     =destroy
/usercode/in.nim(13)     =destroy
/usercode/in.nim(12)     =destroy
SIGSEGV: Illegal storage access. (Attempt to read from nil?)
Segmentation fault (core dumped)

Notice: The first pointer with length 1 is printed again except with length 4 this time, and at some point a pointer becomes the number 4. With ARC, it becomes 20 instead of 4.

Other variations such as echo (cast[int](arr.info), arr.info[].length) give stuff like (on 1.6):

(6488160, 1)
(6492272, 2)
(6488160, 6488288)

which is an invalid state as well. Removing the echo makes everything work, but this is just due to luck, and many things trigger this bug. If you just add echo len after let len = arr.info.length you get (with 1.6):

1
2
1
(atom: false, node: Array((atom: true, leaf: 1), (atom: false, node: Array((atom: true, leaf: 2), (atom: false, node: Array((atom: true, leaf: 3)))))))
8
2
2
2
1

The 8 here is invalid, but it's such a small divergence that it doesn't affect anything and no segfault happens. 1.4 and earlier segfaults after the first 3 echoes. On devel the 8 becomes 2, but repr still gives the same original invalid state. The problem here is that =destroy getting called more than once for each pointer creates an invalid state rather than a nil state.

Expected Output

(atom: false, node: Array((atom: true, leaf: 1), (atom: false, node: Array((atom: true, leaf: 2), (atom: false, node: Array((atom: true, leaf: 3)))))))
ptr 0xsomepointer1 --> [length = 2,
data = [...]]
ptr 0xsomepointer2 --> [length = 2,
data = [...]]
ptr 0xsomepointer3 --> [length = 1,
data = [...]]

or something slightly different, I'm not sure.

Additional Information

Nim Compiler Version 1.7.1 [Windows: amd64]
Compiled at 2022-02-16
Copyright (c) 2006-2022 by Andreas Rumpf

git hash: b2c5d7b4ff070abe2145e998d090fb15b4df522f
metagn commented 2 years ago

This also happens if you just use ptr UncheckedArray:

type
  ArrayInfo[T] = ptr object
    length: int
    data: ptr UncheckedArray[T]

  Array*[T] = object
    info: ArrayInfo[T]

proc `=destroy`*[T](arr: var Array[T]) =
  echo repr(arr.info)
  if not arr.info.isNil:
    if not arr.info.data.isNil:
      let len = arr.info.length
      for i in 0 ..< len:
        `=destroy`(arr.info.data[i])
      dealloc(arr.info.data)
      arr.info.data = nil
    dealloc(arr.info)
    arr.info = nil

iterator items*[T](x: Array[T]): T =
  let L = x.info.length
  for i in 0 ..< L:
    yield x.info.data[i]

proc toArray*[T](arr: sink openarray[T]): Array[T] =
  result.info = cast[typeof(result.info)](alloc(sizeof(result.info.data)))
  result.info.length = arr.len
  result.info.data = cast[typeof(result.info.data)](alloc(arr.len * sizeof(T)))
  for i in 0 ..< arr.len:
    result.info.data[i] = arr[i]

proc `$`*[T](x: Array[T]): string =
  result = "Array("
  var firstElement = true
  for value in items(x):
    if firstElement:
      firstElement = false
    else:
      result.add(", ")

    when value isnot string and value isnot seq and compiles(value.isNil):
      if value.isNil:
        result.add "nil"
      else:
        result.addQuoted(value)
    else:
      result.addQuoted(value)
  result.add(")")

type Foo = object
  case atom: bool
  of false:
    node: Array[Foo]
  of true:
    leaf: int

proc tree(arr: varargs[Foo]): Foo =
  Foo(atom: false, node: toArray(@arr))
proc leaf(x: int): Foo = Foo(atom: true, leaf: x)
echo tree(leaf(1), tree(leaf(2), tree(leaf(3))))
nil
nil
...
nil
ptr 0x7f0848e4c050 --> [length = 1,
data = ptr 0x7f0848e4c070 --> [...]]
ptr 0x7f0848e4c090 --> [length = 2,
data = ptr 0x7f0848e4b140 --> [...]]
ptr 0x7f0848e4c050 --> [length = 1,
data = nil]
(atom: false, node: Array((atom: true, leaf: 1), (atom: false, node: Array((atom: true, leaf: 2), (atom: false, node: Array((atom: true, leaf: 3)))))))
ptr 0x7f0848e4c0b0 --> [length = 7809635517442387828,
data = ptr 0x65646f6e202c6573 --> [...]]
Traceback (most recent call last)
/playground/nim/lib/system.nim(1621) in
/usercode/in.nim(14)     =destroy
SIGSEGV: Illegal storage access. (Attempt to read from nil?)
Segmentation fault (core dumped)

Notice the length 7809635517442387828. With arc/orc, the data pointer becomes the number 50 instead.

All variations work if you turn Foo into a ref object.

type
  ArrayInfo[T] = ptr object
    length: int
    data: UncheckedArray[T]

  Array*[T] = object
    info: ArrayInfo[T]

proc `=destroy`*[T](arr: var Array[T]) =
  echo "info: ", cast[uint](arr.info)
  if not arr.info.isNil:
    let len = arr.info.length
    echo "len: ", len
    for i in 0 ..< len:
      `=destroy`(arr.info.data[i])
    dealloc(arr.info)
    arr.info = nil

iterator items*[T](x: Array[T]): T =
  let L = x.info.length
  for i in 0 ..< L:
    yield x.info.data[i]

proc toArray*[T](arr: sink openarray[T]): Array[T] =
  result.info = cast[typeof(result.info)](alloc(sizeof(result.info.length) + arr.len * sizeof(T)))
  result.info.length = arr.len
  for i in 0 ..< arr.len:
    result.info.data[i] = arr[i]

proc `$`*[T](x: Array[T]): string =
  result = "Array("
  var firstElement = true
  for value in items(x):
    if firstElement:
      firstElement = false
    else:
      result.add(", ")

    when value isnot string and value isnot seq and compiles(value.isNil):
      if value.isNil:
        result.add "nil"
      else:
        result.addQuoted(value)
    else:
      result.addQuoted(value)
  result.add(")")

type Foo = ref object
  case atom: bool
  of false:
    node: Array[Foo]
  of true:
    leaf: int

proc `$`(f: Foo): string = $f[]
proc tree(arr: varargs[Foo]): Foo =
  Foo(atom: false, node: toArray(@arr))
proc leaf(x: int): Foo = Foo(atom: true, leaf: x)
echo tree(leaf(1), tree(leaf(2), tree(leaf(3))))
(atom: false, node: Array((atom: true, leaf: 1), (atom: false, node: Array((atom: true, leaf: 2), (atom: false, node: Array((atom: true, leaf: 3)))))))
info: 6488416
len: 2
info: 6488320
len: 2
info: 6492240
len: 1

Notice the lack of extra destructions. This is with arc though, refc doesn't do any destructions at the end, even with proc main.