runtimeverification / llvm-backend

KORE to llvm translation
BSD 3-Clause "New" or "Revised" License
36 stars 23 forks source link

Segfault in search mode #461

Closed vasil-sd closed 3 years ago

vasil-sd commented 3 years ago

llvm-backend commit: 72cf698be6baef5cdd01aa9239e32e9be89ec6a k-framework commit: 97358b03296b6f2f6575c432906d6c1fd8043160 clang --version:

clang version 13.0.0
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin

simple-untyped.k (minimized as much as I could):

module SIMPLE-UNTYPED-SYNTAX
  imports DOMAINS-SYNTAX

  syntax Id ::= "main" [token]

  syntax Stmt ::= "var" Exp ";"
                | "var" Exps ";" [macro-rec, avoid]
                | "function" Id "(" Ids ")" Block

  syntax Exp ::= Int | Bool | String | Id
               | "(" Exp ")"             [bracket]
               | "++" Exp
               > Exp "[" Exps "]"        [strict]
               > Exp "(" Exps ")"        [strict]
               | "-" Exp                 [strict]
               | "sizeOf" "(" Exp ")"    [strict]
               | "read" "(" ")"
               > left:
                 Exp "*" Exp             [strict, left]
               | Exp "/" Exp             [strict, left]
               | Exp "%" Exp             [strict, left]
               > left:
                 Exp "+" Exp             [strict, left]
               | Exp "-" Exp             [strict, left]
               > non-assoc:
                 Exp "<" Exp             [strict, non-assoc]
               | Exp "<=" Exp            [strict, non-assoc]
               | Exp ">" Exp             [strict, non-assoc]
               | Exp ">=" Exp            [strict, non-assoc]
               | Exp "==" Exp            [strict, non-assoc]
               | Exp "!=" Exp            [strict, non-assoc]
               > "!" Exp                 [strict]
               > left:
                 Exp "&&" Exp            [strict(1), left]
               | Exp "||" Exp            [strict(1), left]
               > "spawn" Block
               > Exp "=" Exp             [strict(2), right]

  syntax Ids  ::= List{Id,","}           [klabel(exps), prefer]
  syntax Exps ::= List{Exp,","}          [klabel(exps), strict]  // automatically hybrid now
  syntax Exps ::= Ids                    [prefer]
  syntax Val
  syntax Vals ::= List{Val,","}          [klabel(exps)]
  syntax Bottom
  syntax Bottoms ::= List{Bottom,","}    [klabel(exps)]
  syntax Ids ::= Bottoms
  syntax Block ::= "{" "}"
                | "{" Stmt "}"

  syntax Stmt ::= Block
                | Exp ";"                               [strict]
                | "if" "(" Exp ")" Block "else" Block   [avoid, strict(1)]
                | "if" "(" Exp ")" Block                [macro]
                | "while" "(" Exp ")" Block
                | "for" "(" Stmt Exp ";" Exp ")" Block  [macro]
                | "return" Exp ";"                      [strict]
                | "return" ";"                          [macro]
                | "print" "(" Exps ")" ";"              [strict]
                | "try" Block "catch" "(" Id ")" Block
                | "throw" Exp ";"                       [strict]
                | "join" Exp ";"                        [strict]
                | "acquire" Exp ";"                     [strict]
                | "release" Exp ";"                     [strict]
                | "rendezvous" Exp ";"                  [strict]

  syntax Stmt ::= Stmt Stmt                          [right]

  rule var X:Id = E; => var X; X = E;  [macro]

endmodule

module SIMPLE-UNTYPED
  imports SIMPLE-UNTYPED-SYNTAX
  imports DOMAINS

  syntax Val ::= Int | Bool | String
               | array(Int,Int)
               | lambda(Ids,Stmt)
  syntax Exp ::= Val
  syntax Exps ::= Vals
  syntax Vals ::= Bottoms
  syntax KResult ::= Val
                   | Vals  // TODO: should not need this

  syntax ControlCell
  syntax ControlCellFragment

  configuration <T color="red">
                  <threads color="orange">
                    <thread multiplicity="*" color="yellow" type="Set">
                      <k color="green"> $PGM:Stmt ~> execute </k>
                      <control color="cyan">
                        <fstack color="blue"> .List </fstack>
                        <xstack color="purple"> .List </xstack>
                      </control>
                      <env color="violet"> .Map </env>
                      <holds color="black"> .Map </holds>
                      <id color="pink"> 0 </id>
                    </thread>
                  </threads>
                  <genv color="pink"> .Map </genv>
                  <store color="white"> .Map </store>
                  <busy color="cyan"> .Set </busy>
                  <terminated color="red"> .Set </terminated>
                  <input color="magenta"> ListItem(0) </input>
                  <output color="brown"> .List </output>
                  <nextLoc color="gray"> 0 </nextLoc>
                </T>

 syntax KItem ::= "undefined"  [latex(\bot)]

  rule <k> var X:Id; => . ...</k>
       <env> Env => Env[X <- L] </env>
       <store>... .Map => L |-> undefined ...</store>
       <nextLoc> L => L +Int 1 </nextLoc>

  syntax Id ::= "$1" [token] | "$2" [token]

  rule <k> function F(Xs) S => . ...</k>
       <env> Env => Env[F <- L] </env>
       <store>... .Map => L |-> lambda(Xs, S) ...</store>
       <nextLoc> L => L +Int 1 </nextLoc>

  syntax KItem ::= "execute"
  rule <k> execute => main(.Exps); </k>
       <env> Env </env>
       <genv> .Map => Env </genv>  

  rule <k> X:Id => V ...</k>
       <env>... X |-> L ...</env>
       <store>... L |-> V:Val ...</store>  [lookup]

  context ++(HOLE => lvalue(HOLE))
  rule <k> ++loc(L) => I +Int 1 ...</k>
       <store>... L |-> (I => I +Int 1) ...</store>  [increment]

  syntax KItem ::=  (Map,K,ControlCellFragment)

  rule <k> lambda(Xs,S)(Vs:Vals) ~> K => mkDecls(Xs,Vs) S return; </k>
       <control>
         <fstack> .List => ListItem((Env,K,C)) ...</fstack>
         C
       </control>
       <env> Env => GEnv </env>
       <genv> GEnv </genv>

  rule <k> return(V:Val); ~> _ => V ~> K </k>
       <control>
         <fstack> ListItem((Env,K,C)) => .List ...</fstack>
         (_ => C)
       </control>
       <env> _ => Env </env>

  syntax Val ::= "nothing"
  rule return; => return nothing;

  rule <k> read() => I ...</k>
       <input> ListItem(I:Int) => .List ...</input>
       [read]

  context (HOLE => lvalue(HOLE)) = _

  rule <k> loc(L) = V:Val => V ...</k> <store>... L |-> (_ => V) ...</store>

  rule {} => .  
  rule <k> { S } => S ~> setEnv(Env) ...</k>  <env> Env </env>  

  rule S1:Stmt S2:Stmt => S1 ~> S2  

  rule _:Val; => .

  rule <k> print(V:Val, Es => Es); ...</k>
       <output>... .List => ListItem(V) </output>

  rule print(.Vals); => .  

  syntax KItem ::= (Id,Stmt,K,Map,ControlCellFragment)

  syntax KItem ::= "popx"

  rule <thread>...
         <k> spawn S => !T:Int ...</k>
         <env> Env </env>
       ...</thread>
       (.Bag => <thread>...
               <k> S </k>
               <env> Env </env>
               <id> !T </id>
             ...</thread>)

  rule (<thread>... <k>.</k> <holds>H</holds> <id>T</id> ...</thread> => .Bag)
       <busy> Busy => Busy -Set keys(H) </busy>
       <terminated>... .Set => SetItem(T) ...</terminated>

  syntax Stmt ::= mkDecls(Ids,Vals)  [function]
  rule mkDecls((X:Id, Xs:Ids), (V:Val, Vs:Vals)) => var X=V; mkDecls(Xs,Vs)
  rule mkDecls(.Ids,.Vals) => {}

  syntax Exp ::= lookup(Int)
  rule <k> lookup(L) => V ...</k> <store>... L |-> V:Val ...</store>  [lookup]

  syntax KItem ::= setEnv(Map)
  rule <k> setEnv(Env) => . ...</k> <env> _ => Env </env>  

  rule (setEnv(_) => .) ~> setEnv(_)  

  syntax Exp ::= lvalue(K)
  syntax Val ::= loc(Int)

  rule <k> lvalue(X:Id => loc(L)) ...</k> <env>... X |-> L:Int ...</env>

  context lvalue(_::Exp[HOLE::Exps])
  context lvalue(HOLE::Exp[_::Exps])

  rule lvalue(lookup(L:Int) => loc(L))

  syntax Map ::= Int "..." Int "|->" K
     [function, latex({#1}\ldots{#2}\mapsto{#3})]
  rule N...M |-> _ => .Map  requires N >Int M
  rule N...M |-> K => N |-> K (N +Int 1)...M |-> K  requires N <=Int M
endmodule

threads_04.simple:

function main() {
  spawn {
    var x = 0;
    print(" ");
  };
  spawn {
    var y = 0;
    print(y);
  };
}

Reproduction:

$ kompile --enable-search --backend llvm simple-untyped.k
$ krun --search threads_04.simple 
/usr/bin/krun: line 70: 1035517 Segmentation fault      (core dumped) "$@"
[Error] Critical: Backend crashed during rewriting with exit code 139
Baltoli commented 3 years ago

@dwightguth I'm digging into this at the moment; the root cause seems to be a junk value (0x108) being generated as a block * pointer somewhere in the search steps. I haven't worked out why that value is being generated, but I'm narrowing down - it's a bit of a slow process sifting through the LLVM IR; if you have an insight into this let me know.

vasil-sd commented 3 years ago

With help of rr tool (https://rr-project.org/), I've found where 0x108 comes from:

(rr) reverse-cont                                                                                                       
Continuing.

Hardware watchpoint 1: -location rhs.children[0]

Old value = (uint64_t *) 0x108
New value = (uint64_t *) 0x7fc4b8c00540

0x00005608824f43b6 in kore_alloc_heap::allocate<>(unsigned long) (size=264) at /home/vasil/Build/K/k/llvm-backend/src/main/native/llvm-backend/include/runtime/header.h:135
135           set_len(result, size);       

May be this can help.

Baltoli commented 3 years ago

Ah that's really helpful Vasil, thank you!

Baltoli commented 3 years ago

Some more digging reveals that the allocation above is triggered during a call to hook_LIST_element when applying a search rule.

The K definition provided only uses ListItem in a few places, so I was able to narrow it down to an application of the following rule:

rule <k> print(V:Val, Es => Es); ...</k>
     <output>... .List => ListItem(V) </output>

If I change that rule to the following equivalent one:

rule <k> print(V:Val, Es => Es); ...</k>
     <output> L => L ListItem(V) </output>

I can no longer reproduce the segfault. @vasil-sd, if you make this change can you reproduce the crash?

Baltoli commented 3 years ago

The value being inserted into the list that ultimately causes the crash is inj{SortString{}, (null)}(\dv{SortString{}}(" ")).

vasil-sd commented 3 years ago

I confirm, that with

rule <k> print(V:Val, Es => Es); ...</k>
     <output> L => L ListItem(V) </output>

segfault is not reproduced.

Baltoli commented 3 years ago

@dwightguth I'm going to leave this for you to take a look at when you have some time. The value 264 (0x108) is the maximum size for some kind of allocation in immer, and it gets passed through to koreAllocToken under some internal circumstances (calls to make_leaf_e rather than make_leaf_n).

The bug seems to be manifested only when particular internal conditions are met - for example, if I print out each value being passed to hook_LIST_element before constructing the flex_vector return value, then the crash no longer occurs. My best guess is that we're somehow breaking a contract with immer's memory policies / allocator interface, and that's causing the crash.

Baltoli commented 3 years ago

Getting closer to what might actually be the issue. In search.cpp:take_search_steps(int64, block*), I made the following change to inspect search results and the contents of the states_set as the program executes:

+ static bool did = false;
for (block *result : stepResults) {
+  if ((uintptr_t)result->children[0] == 0x108) {
+    fprintf(stderr, "In result\n");
+  }

+  for (auto const &ss : states_set) {
+    if ((uintptr_t)ss->children[0] == 0x108 && !did) {
+      fprintf(stderr, "%p\n", &ss->children[0]);
+      did = true;
+    }
+  }

  auto dirty = states_set.insert(result);
  if (dirty.second) {
    states.push_back(result);
  }
}

The effect of this is to print In result if we see a new result with a garbage value as its first child[^1]. Then, if we see anything _currently_in the state set with the same condition, print the address of the garbage. On my machine, the output I see is:

0x7f9ee9699268
[1]    1259435 segmentation fault (core dumped)  ./simple-untyped-kompiled/search w3 -1 w4

From this, we can conclude that the garbage value is produced by something modifying the contents of the state set during execution: the garbage wasn't present when initially observed as a result, but is on a later iteration. Then, when a rewrite tries to use the accumulated states, it sees garbage and crashes.

Using rr to record an execution with this modification lets us validate this hypothesis by watching what happens to the memory address in question on a replayed execution. The sequence of events that write to the eventual garbage address are:

  1. Allocated during global variable initialisation
  2. Modified by apply_rule_5034_search
  3. Modified during garbage collection migration during stepAll
  4. Reallocated during hook_LIST_element
  5. Segfault as observed

My suspicion from this sequence of events is that what's happening is that the arena allocator is reusing an address that previously corresponded to an object in the state cell. The newly allocated object then gets garbage written to it by immer's internal memory management. Unfortunately, this object remains in the state set even though it was notionally migrated, leading to the crash observed.

To verify that this is the case, I removed the generated call to koreCollect in stepAll (in Decision.cpp:871-886):

- auto ptrTy = llvm::PointerType::getUnqual(
-     llvm::ArrayType::get(getTypeByName(module, LAYOUTITEM_STRUCT), 0));
- auto koreCollect = getOrInsertFunction(
-     module, "koreCollect",
-     llvm::FunctionType::get(
-         llvm::Type::getVoidTy(module->getContext()),
-         {arr->getType(), llvm::Type::getInt8Ty(module->getContext()), ptrTy},
-         false));
- auto call = llvm::CallInst::Create(
-     koreCollect,
-     {arr,
-      llvm::ConstantInt::get(
-          llvm::Type::getInt8Ty(module->getContext()), nroots),
-      llvm::ConstantExpr::getBitCast(layout, ptrTy)},
-     "", collect);
- setDebugLoc(call);

After doing so, I can run the test case and observe the expected results without a segfault.

[^1]: rr reveals that on Vasil's test case, the garbage value is consistently the first child of another block, so we just hard-code the assumption. This might not work on other test cases.

Baltoli commented 3 years ago

This does seem to be fixed on the new_gc branch, so possibly not worth any further investigation - once it became apparent this was in the GC I realised I should check out @dwightguth's recent work, which seems to resolve the issue.

The initial test case being fixed is a red herring (owing to the bug being rather subtle and prone to disappearing if the internal data structures are shuffled around differently). @vasil-sd offers a new test case, which can exhibit infinite behaviours, and so seems to more consistently exhibit the collection problems described above.

The semantics in question is simple_untyped.md from https://github.com/kframework/k/pull/2343:

$kompile --enable-search --md-selector "k&!io" --backend llvm  simple-untyped.md -d ./noio
$krun -v --save-temps -cINPUT="ListItem(0)" tests/threads/threads_05.simple --search --depth 1500 --no-exc-wrap  -d ./noio

The program is:

// Program which has infinitely many bahaviors.

var x, y;

function main() {
  x = 1;
  y = 0;
  spawn { x = 0; };
  while (1 <= x) {
    y = y+1;
  }
  print(y,"\n");
}

While running on new_gc does not fix the problem, removing collection calls does fix the issue on this test case, however.

Baltoli commented 3 years ago
(rr) frame
#2  0x000000000045ce37 in KEq::operator() (this=0x7ffd80fdc288, 
    lhs=@0x7ffd80fdc238: 0x7f7ddfe9cc20, rhs=@0x7f7de16710c8: 0x7f7ddfe99260)
    at /home/bruce/code/k/llvm-backend/src/main/native/llvm-backend/include/runtime/header.h:167
167     return hook_KEQUAL_eq(lhs, rhs);
(rr) p &rhs.children[0]
$7 = (uint64_t **) 0x7f7ddfe99268
(rr) p &rhs.children[0] > youngspace.block_start 
$8 = true
(rr) p &rhs.children[0] < youngspace.block_end
$9 = true
(rr) p rhs.children[0]
$10 = (uint64_t *) 0x108

It appears that the pointer in question is part of the current allocation semispace at the time it's accessed, causing the segfault. Similarly, it's in the same semispace when the garbage value gets written to it:

(rr) frame
#0  kore_alloc_heap::allocate<>(unsigned long) (size=264)
    at /home/bruce/code/k/llvm-backend/src/main/native/llvm-backend/include/runtime/header.h:150
150       return result->data;
(rr) p youngspace
$15 = {first_block = 0x7f7ddfd00000 "", block = 0x7f7ddfe99378 "`\223\351\337}\177", 
  block_start = 0x7f7ddfe00000 "", block_end = 0x7f7ddff00000 "", 
  first_collection_block = 0x7f7ddfa00000 "", num_blocks = 2, 
  num_collection_blocks = 2, allocation_semispace_id = -1 '\377'}
(rr) p result
$16 = (string *) 0x7f7ddfe99268
(rr) p result > youngspace.block_start
$17 = true
(rr) p result < youngspace.block_end
$18 = true
Baltoli commented 3 years ago

Using rr[^1] I've attempted to construct a more detailed history of what happens to the objects in question leading up to this crash.

First, we get the addresses of the objects involved in the segfault:

#2  0x000000000045ce37 in KEq::operator() (this=0x7ffe3b4f8808, lhs=
    @0x7ffe3b4f87b8: 0x7fab8de9cc20, rhs=@0x7fab8f6710c8: 0x7fab8de99260)
    at /home/bruce/code/k/llvm-backend/src/main/native/llvm-backend/include/runtime/header.h:167
167     return hook_KEQUAL_eq(lhs, rhs);
(rr) p rhs
$1 = (block * const&) @0x7fab8f6710c8: 0x7fab8de99260
(rr) p &rhs.children[0]
$2 = (uint64_t **) 0x7fab8de99268
(rr) p rhs.children[0]
$3 = (uint64_t *) 0x108

At the crash, it's clear that garbage has been written into the child pointer. Next, we set a breakpoint to see when the parent (rhs) is generated as a result in the search process:

(rr) break search.cpp:68 if result == 0x7fab8de99260
Breakpoint 2 at 0x45addf: file /home/bruce/code/k/llvm-backend/src/main/native/llvm-backend/runtime/util/search.cpp, line 68.

Additionally, set a watchpoint on the child pointer so that we can see when it gets written to.

(rr) watch -l rhs.children[0]
Hardware watchpoint 4: -location rhs.children[0]

Now, we restart the program with these observations in place, and step through a few times:

(rr) run
Starting program: /home/bruce/.local/share/rr/search-20/mmap_hardlink_3_search 

Program stopped.
0x00007fab905d60d0 in _start () from /lib64/ld-linux-x86-64.so.2
(rr) c
Continuing.

Hardware watchpoint 4: -location rhs.children[0]

Old value = <unreadable>
New value = (uint64_t *) 0x0
0x0000000070000002 in syscall_traced ()
(rr) 
Continuing.

Hardware watchpoint 4: -location rhs.children[0]

Old value = (uint64_t *) 0x0
New value = (uint64_t *) 0x7fab8de99218
0x00000000003b73aa in apply_rule_5034_search ()
(rr) 
Continuing.

Breakpoint 5, take_search_steps (depth=-1, subject=0x7fab8da00778) at /home/bruce/code/k/llvm-backend/src/main/native/llvm-backend/runtime/util/search.cpp:68
68          }

At this point, we can see that the object that will eventually become rhs at the segfault has been appended to the list of states. Its first child also appears to be a valid block value:

(rr) p states.back()
$15 = (block *) 0x7fab8de99260
(rr) p result
$16 = (block *) 0x7fab8de99260
(rr) p result->children[0]
$17 = (uint64_t *) 0x7fab8de99218

Next, we continue executing further:

(rr) c
Continuing.

Hardware watchpoint 4: -location rhs.children[0]

Old value = (uint64_t *) 0x7fab8de99218
New value = (uint64_t *) 0x7fab8da00540
migrate (blockPtr=0x7fab8f671070) at /home/bruce/code/k/llvm-backend/src/main/native/llvm-backend/runtime/collect/collect.cpp:68
68      currBlock->h.hdr |= FWD_PTR_BIT;
(rr) c
Continuing.

Hardware watchpoint 4: -location rhs.children[0]

Old value = (uint64_t *) 0x7fab8da00540
New value = (uint64_t *) 0x108
kore_alloc_heap::allocate<>(unsigned long) (size=264) at /home/bruce/code/k/llvm-backend/src/main/native/llvm-backend/include/runtime/header.h:150
150       return result->data;

We can see the child pointer being migrated and subsequently reused for a new allocation at these two watchpoint hits. At this point, the old parent block has been (correctly) removed from the set of states by the GC migration.[^2]

However, the local variable states_set has not been updated by the GC migration, and it still contains the old parent object, which references the garbage (reused) child:

... , [450] = 0x7fab8de99260, ...

Finally, the problem occurs at line 65 of utils/search.cpp. An attempt is made to add a new result to states_set, which entails calls to the KEq functor made by the internal unordered_set structure. On some unfortunate code path, this ends up calling hook_KEQUAL_eq with the old parent object (that should have been migrated), and some new valid result. This produces a crash if the old object had garbage written to it.

I think the solution here is to migrate the contents of states_set as well as the global states variable during GC with a similar enumerator pattern.

[^1]: Keep in mind that rr keeps addresses constant across replays, which is what lets us set watchpoints like this. [^2]: Too long to include inline, but you can easily grep for its value in states.

ehildenb commented 2 years ago

Wow, what a thorough analysis @Baltoli and @vasil-sd ! Amazing! :smile:

Baltoli commented 2 years ago

Thank you @ehildenb!

vasil-sd commented 2 years ago

@Baltoli, great job! Thanks!