GaloisInc / saw-script

The SAW scripting language.
BSD 3-Clause "New" or "Revised" License
441 stars 63 forks source link

Clearly specify the behavior of `llvm_extract`/`llvm_compositional_extract` with respect to global variables #2093

Open RyanGlScott opened 3 months ago

RyanGlScott commented 3 months ago

SAW's :help output for llvm_extract and llvm_compositional_extract do not clearly specify how either command should behave in the presence of code that reads or modifies global variables. I'd request that this be done, as the current implementation is somewhat hard to explain.

Here are a series of examples that attempt to demonstrate how each command currently works with respect to global variables, along with my recommendations for how the commands should work. First, let's look at llvm_extract. At the moment, calling llvm_extract on any function that references a global variable at all will fail with the following error:

// test.c
#include <stdint.h>

uint32_t g;

uint32_t f(void){
  return g;
}
// test.saw
m <- llvm_load_module "test.bc";
llvm_extract m "f";
$ clang test.c -g -emit-llvm -c
$ ~/Software/saw-1.1/bin/saw test.saw
[14:45:15.870] Loading file "/home/ryanscott/Documents/Hacking/SAW/test.saw"
[14:45:15.872] Stack trace:
"llvm_extract" (/home/ryanscott/Documents/Hacking/SAW/test.saw:3:1-3:13)
Symbolic execution failed.
Abort due to assertion failure:
  test.c:6:10: error: in f
  Global symbol not allocated
  Details:
    Global symbol "g" has no associated allocation
Stack frame f
  No writes or allocations
Base memory
  Allocations:
    GlobalAlloc 1 0x0:[64] Immutable 1-byte-aligned [defined function ] f

The error message is a bit much, but I think this behavior isn't entirely unreasonable. My impression is that llvm_extract is primarily meant for C functions that do not involve any side effects (as you'd likely want a full-blown SAW specification to reason about those), and reading from a global variable ought to be considered a side effect. I propose that llvm_extract explicitly document the fact that it does not support global variables, perhaps improving the error message as well.


Now let's move on to llvm_compositional_extract. Unlike llvm_extract, llvm_compositional_extract does require a full-blown SAW specification, so I would expect it to be able to reason about global variables. Some experimentation suggests that this is indeed the case, but it's less clear on when global variables will be part of the type of the extracted definition.

To explain what I mean, first consider this example:

// test.c
#include <stdint.h>

uint32_t g = 0;

uint32_t f(uint32_t x){
  g = 0;
  return x + g;
}

Should g be considered an input in the extracted definition? An output? As it turns out, that depends on how you define the specification for f. Here is a SAW script that contains four specifications for f, and although they are all very similar, they cause llvm_compositional_extract to produce very different definitions:

// test.saw
enable_experimental;

m <- llvm_load_module "test.bc";

print "";
print "f_spec_1";

// Allocate g, but do not specify what it initially points to.
let f_spec_1 = do {
  llvm_alloc_global "g";
  x <- llvm_fresh_var "x" (llvm_int 32);

  llvm_execute_func [llvm_term x];

  llvm_points_to (llvm_global "g") (llvm_term {{ 0 : [32] }});
  ret <- llvm_fresh_var "ret" (llvm_int 32);
  llvm_postcond {{ ret == x }};
  llvm_return (llvm_term ret);
};

llvm_compositional_extract m "f" "f1" [] false f_spec_1 z3;
print_term (unfold_term ["f1"] {{ f1 }});

print "";
print "f_spec_2";

// Allocate g, and make it point to its initial value in the C program (0).
let f_spec_2 = do {
  llvm_alloc_global "g";
  llvm_points_to (llvm_global "g") (llvm_global_initializer "g");
  x <- llvm_fresh_var "x" (llvm_int 32);

  llvm_execute_func [llvm_term x];

  llvm_points_to (llvm_global "g") (llvm_term {{ 0 : [32] }});
  ret <- llvm_fresh_var "ret" (llvm_int 32);
  llvm_postcond {{ ret == x }};
  llvm_return (llvm_term ret);
};

llvm_compositional_extract m "f" "f2" [] false f_spec_2 z3;
print_term (unfold_term ["f2"] {{ f2 }});

print "";
print "f_spec_3";

// Allocate g, and make it point to a fresh symbolic value.
let f_spec_3 = do {
  llvm_alloc_global "g";
  g_init <- llvm_fresh_var "g_init" (llvm_int 32);
  llvm_points_to (llvm_global "g") (llvm_term g_init);
  x <- llvm_fresh_var "x" (llvm_int 32);

  llvm_execute_func [llvm_term x];

  llvm_points_to (llvm_global "g") (llvm_term {{ 0 : [32] }});
  ret <- llvm_fresh_var "ret" (llvm_int 32);
  llvm_postcond {{ ret == x }};
  llvm_return (llvm_term ret);
};

llvm_compositional_extract m "f" "f3" [] false f_spec_3 z3;
print_term (unfold_term ["f3"] {{ f3 }});

print "";
print "f_spec_4";

// Allocate g, and make it point to a fresh symbolic value. Also relate it to
// a fresh variable in the postconditions.
let f_spec_4 = do {
  llvm_alloc_global "g";
  g_init <- llvm_fresh_var "g_init" (llvm_int 32);
  llvm_points_to (llvm_global "g") (llvm_term g_init);
  x <- llvm_fresh_var "x" (llvm_int 32);

  llvm_execute_func [llvm_term x];

  g_ret <- llvm_fresh_var "g_ret" (llvm_int 32);
  llvm_postcond {{ g_ret == 0 }};
  llvm_points_to (llvm_global "g") (llvm_term g_ret);
  ret <- llvm_fresh_var "ret" (llvm_int 32);
  llvm_postcond {{ ret == x }};
  llvm_return (llvm_term ret);
};

llvm_compositional_extract m "f" "f4" [] false f_spec_4 z3;
print_term (unfold_term ["f4"] {{ f4 }});
$ ~/Software/saw-1.1/bin/saw test.saw
[15:21:40.427] Loading file "/home/ryanscott/Documents/Hacking/SAW/test.saw"
[15:21:40.429] 
[15:21:40.429] f_spec_1
[15:21:40.485] Verifying f ...
[15:21:40.485] Simulating f ...
[15:21:40.486] Checking proof obligations f ...
[15:21:40.496] Proof succeeded! f
[15:21:40.548] \(x : Vec 32 Bool) -> x
[15:21:40.548] 
[15:21:40.548] f_spec_2
[15:21:40.618] Verifying f ...
[15:21:40.618] Simulating f ...
[15:21:40.620] Checking proof obligations f ...
[15:21:40.634] Proof succeeded! f
[15:21:40.690] \(x : Vec 32 Bool) -> x
[15:21:40.690] 
[15:21:40.690] f_spec_3
[15:21:40.765] Verifying f ...
[15:21:40.766] Simulating f ...
[15:21:40.767] Checking proof obligations f ...
[15:21:40.781] Proof succeeded! f
[15:21:40.841] let { x@1 = Vec 32 Bool
    }
 in \(x : x@1) -> \(g_init : x@1) -> x
[15:21:40.842] 
[15:21:40.842] f_spec_4
[15:21:40.917] Verifying f ...
[15:21:40.917] Simulating f ...
[15:21:40.918] Checking proof obligations f ...
[15:21:40.930] Proof succeeded! f
[15:21:40.990] let { x@1 = Vec 32 Bool
    }
 in \(x : x@1) -> \(g_init : x@1) -> (x,bvNat 32 0)

Here is a breakdown of each result:

I think that llvm_compositional_extract's behavior around extracting global variables as inputs is fairly reasonable, but I find its behavior around extracting global variables as outputs to be very surprising. (See also #2091, which is in similar territory.) I propose that if a function modifies a global variable, then it should always be treated as an output.