Open lzaoral opened 2 years ago
The failing tests show that the points-to sets computed with -pta svf
are most probably incomplete. Comparing the output of
llvm-pta-dump testX.bc
with and without -pta svf
would probably give a hint where is the problem.
Might be unrelated but llvm-pta-dump
without any flags prints the following warning:
$ build/tools/llvm-pta-dump funcptr12.ll
Pointer Subgraph is broken (right after building)!
Invalid number of edges:
PSNodeType::STORE with ID 7
- operands: [5 PSNodeType::FUNCTION, 6 PSNodeType::CONSTANT]
(Non-entry node has no predecessors)
Invalid number of edges:
PSNodeType::STORE with ID 10
- operands: [8 PSNodeType::FUNCTION, 9 PSNodeType::CONSTANT]
(Non-entry node has no predecessors)
Unreachable node:
PSNodeType::STORE with ID 7
- operands: [5 PSNodeType::FUNCTION, 6 PSNodeType::CONSTANT]
Unreachable node:
PSNodeType::STORE with ID 10
- operands: [8 PSNodeType::FUNCTION, 9 PSNodeType::CONSTANT]
INFO: Pointer analysis took 0 sec 7 ms
...
funcptr12
It looks like the SVF analysis might be somewhat incomplete, the last six instructions are not present in the output. However, they are still present in the final sliced code which has the both foo
functions completely sliced off even though SVF correctly computed that main
has a pointer to fun2
.
Default:
foo2:: %3 = alloca i32*, align 8
-> foo2:: %3 = alloca i32*, align 8
foo2:: %4 = alloca i32*, align 8
-> foo2:: %4 = alloca i32*, align 8
foo2:: %5 = load i32*, i32** %4, align 8
-> main:: %3 = alloca i32, align 4
main:: %1 = alloca i32, align 4
-> main:: %1 = alloca i32, align 4
main:: %2 = alloca i32, align 4
-> main:: %2 = alloca i32, align 4
main:: %3 = alloca i32, align 4
-> main:: %3 = alloca i32, align 4
main:: %4 = alloca i32*, align 8
-> main:: %4 = alloca i32*, align 8
main:: %5 = alloca i64, align 8
-> main:: %5 = alloca i64, align 8
main:: %6 = alloca i32* (i32*, i32*)*, align 8
-> main:: %6 = alloca i32* (i32*, i32*)*, align 8
main:: %7 = load i32* (i32*, i32*)*, i32* (i32*, i32*)** getelementptr inbounds (%struct.callbacks, %struct.callbacks* @cb, i32 0, i32 1), align 8
-> fun 'foo2'
main:: %8 = ptrtoint i32* (i32*, i32*)* %7 to i64
-> fun 'foo2'
main:: %9 = load i64, i64* %5, align 8
-> fun 'foo2'
main:: %10 = inttoptr i64 %9 to i32* (i32*, i32*)*
-> fun 'foo2'
main:: %11 = load i32* (i32*, i32*)*, i32* (i32*, i32*)** %6, align 8
-> fun 'foo2'
main:: %12 = call i32* %11(i32* %2, i32* %3)
-> main:: %3 = alloca i32, align 4
main:: %13 = load i32*, i32** %4, align 8
-> main:: %3 = alloca i32, align 4
SVF:
foo1:: %3 = alloca i32*, align 8
-> foo1:: %3 = alloca i32*, align 8
foo1:: %4 = alloca i32*, align 8
-> foo1:: %4 = alloca i32*, align 8
foo2:: %3 = alloca i32*, align 8
-> foo2:: %3 = alloca i32*, align 8
foo2:: %4 = alloca i32*, align 8
-> foo2:: %4 = alloca i32*, align 8
main:: %1 = alloca i32, align 4
-> main:: %1 = alloca i32, align 4
main:: %2 = alloca i32, align 4
-> main:: %2 = alloca i32, align 4
main:: %3 = alloca i32, align 4
-> main:: %3 = alloca i32, align 4
main:: %4 = alloca i32*, align 8
-> main:: %4 = alloca i32*, align 8
main:: %5 = alloca i64, align 8
-> main:: %5 = alloca i64, align 8
main:: %6 = alloca i32* (i32*, i32*)*, align 8
-> main:: %6 = alloca i32* (i32*, i32*)*, align 8
main:: %7 = getelementptr %struct.callbacks, %struct.callbacks* @cb, i32 0, i32 1
-> @cb = dso_local global %struct.callbacks { i32* (i32*, i32*)* @foo1, i32* (i32*, i32*)* @foo2 }, align 8
main:: %8 = load i32* (i32*, i32*)*, i32* (i32*, i32*)** %7, align 8
-> fun 'foo2'
EDIT:
It looks like SVF does not properly handle the ptrtoint
-> inttoptr
conversion. Consider the following reduced case
#include <stdint.h>
int main(void)
{
int a;
uintptr_t ptr = (uintptr_t) &a;
* (int *) ptr = 1;
}
and corresponding PAG:
So we don't actually know after the conversion where the pointer points to.
While this is something that can surely be improved on SVF's side, I think this is a bug in the slicer. It seems to be too aggressive to slice away both func1
and func2
in the functptr12
test when the analysis says that the pointer we get after inttoptr
can point to literally anything.
dynalloc5
The reduced case:
#include <stddef.h>
extern void* malloc(size_t);
extern int assert(_Bool);
struct foo {
int a;
};
void* (*malloc2)(size_t) = malloc;
int main(void)
{
struct foo *ptr = malloc2(sizeof(struct foo));
ptr->a = 1;
assert(ptr->a == 1);
}
and corresponding PAG:
llvm-slicer -c assert --anotate=pta,slicer -pta=svf
:
; ModuleID = 'test.ll'
source_filename = "test.c"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
%struct.foo = type { i32 }
@malloc2 = dso_local global i8* (i64)* @malloc, align 8
; -- Generated by llvm-slicer --
; * slicing criteria: ''
; * legacy slicing criteria: 'assert'
; * legacy secondary slicing criteria: ''
; * forward slice: '0'
; * remove slicing criteria: '0'
; * undefined functions behavior: 'read args'
; * pointer analysis: ; * PTA field sensitivity: full
declare i8* @malloc(i64) #0
; Function Attrs: noinline nounwind optnone sspstrong uwtable
define dso_local i32 @main() #1 {
; PTR: %1 = alloca %struct.foo*, align 8 + ?
%1 = alloca %struct.foo*, align 8
; PTR: malloc + ?
%2 = load i8* (i64)*, i8* (i64)** @malloc2, align 8
%3 = call i8* %2(i64 4)
%4 = bitcast i8* %3 to %struct.foo*
store %struct.foo* %4, %struct.foo** %1, align 8
; x %5 = load %struct.foo*, %struct.foo** %1, align 8
; x %6 = getelementptr inbounds %struct.foo, %struct.foo* %5, i32 0, i32 0
; x store i32 1, i32* %6, align 4
%7 = load %struct.foo*, %struct.foo** %1, align 8
%8 = getelementptr inbounds %struct.foo, %struct.foo* %7, i32 0, i32 0
; unknown
%9 = load i32, i32* %8, align 4
; unknown
%10 = icmp eq i32 %9, 1
; unknown
; SLICING CRITERION
%11 = call i32 @assert(i1 zeroext %10)
; x ret i32 0
}
declare i32 @assert(i1 zeroext) #0
I think that this is once again an error in the slicer as the PAG clearly shows that both registers %5
and %7
point to the same object and apart from that the subsequent GEPs are identical. Thus, the %9
will contain the 1
stored by the store
that was sliced away.
However, If you use a plain malloc
instead of the alias, the result is correct.
vararg{3,4}
Call graph produced by SVF is wrong as it does not seem to support function pointers passed as arguments to variadic functions. In vararg3
the foo
function is not marked as being call from the variadic function setv
(the graph for vararg4
is almost the same):
Thus, the whole
setv
function gets sliced away.
It looks like SVF does not properly handle the
ptrtoint
->inttoptr
conversion. Consider the following reduced case#include <stdint.h> int main(void) { int a; uintptr_t ptr = (uintptr_t) &a; * (int *) ptr = 1; }
and corresponding PAG:
So we don't actually know after the conversion where the pointer points to.
While this is something that can surely be improved on SVF's side, I think this is a bug in the slicer. It seems to be too aggressive to slice away both
func1
andfunc2
in thefunctptr12
test when the analysis says that the pointer we get afterinttoptr
can point to literally anything.
Yes, the slicer should say that %5
actually points to unknown memory. The problem may be here: https://github.com/mchalupa/dg/blob/master/include/dg/llvm/PointerAnalysis/SVFPointerAnalysis.h#L69 . We test only the black hole object, but the inttoptr
is assigned from a dummy object.
dynalloc5
...
I think that this is once again an error in the slicer as the PAG clearly shows that both registers
%5
and%7
point to the same object and apart from that the subsequent GEPs are identical. Thus, the%9
will contain the1
stored by thestore
that was sliced away.However, If you use a plain
malloc
instead of the alias, the result is correct.
From the points-to sets provided by SVF, we can find out that the pointer points to some dummy object, but, SVF does not propagate the information about what instruction allocated this dummy memory object: https://github.com/SVF-tools/SVF/blob/master/lib/WPA/Andersen.cpp#L644 And that is why it is not reflected in the DG's points-to set. Maybe we could get this information from PAG, as you suggest. (But I am not sure if it can be done in general, I mean without any complicated analysis).
vararg{3,4}
Call graph produced by SVF is wrong as it does not seem to support function pointers passed as arguments to variadic functions. In
vararg3
thefoo
function is not marked as being call from the variadic functionsetv
(the graph forvararg4
is almost the same):
I'm not sure what we can do here. We can always use our lazy-callgraph which overapproximates the called functions, but for that it would be good to detect that the call graph (or just the points-to set of the function pointer) is incomplete. That is, for any variadic function, we would need to check if some of its actual arguments can point to a function and for such functions we use information from our call graph (possibly filtered with the information about actual arguments).
Thanks @mchalupa. The funcptr12
and vararg{3,4}
tests now pass thanks to your suggestion to use dg
's lazy call graph!
@mchalupa This is the current state of the few failing tests:
funcptr12
-- With SVF the definition ofstruct callbacks cb
ends up being undefined:Thus,
lli
segfaults when it tries to call the function whose address is undefined as well.bitcast5
-- This one doesn't fail in the CI but does on my machine as apparently Ubuntu doesn't build it's packages with-fstack-clash-protection -fcf-protection
as opposed to Arch.lli
crashes with*** stack smashing detected ***: terminated
.In fact, the original code's behaviour is really undefined so it's just a coincidence that it emerged when the code was sliced by SVF. By manual inspection, SVF's analysis seems to be correct as only instructions having no impact on the slicing criterion were removed.
The problematic code: https://github.com/mchalupa/dg/blob/a9c61012858a7449b98e64a6d0e3e744148eaac9/tests/slicing/sources/bitcast5.c#L4-L6 Assume that we are on a platform where
sizeof(int) == 4 * sizeof(char)
holds. Apart from the violation of strict aliasing rules [1] (same forbitcast4
), even if we compile with-fno-strict-aliasing
, we try to zero out the 12th to 16th element of the array of size 13 as type cast has higher precedence than addition [2].EDIT: Fixed by #413. EDIT2: There was actually even another problem because taking a pointer to an address that's not aligned is also UB.
OT: I suggest that we should check that every test input is UB-free. Otherwise, there is no guarantee what's going to happen when we interpret the sliced off code with
lli
assuming thatllvm-slicer
did nothing wrong.[1] https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8#what-does-the-c11-standard-say [2] https://en.cppreference.com/w/c/language/operator_precedence
dynalloc5
-- Following line is sliced off, thus, the assertioni->a == 13
fails: https://github.com/mchalupa/dg/blob/a9c61012858a7449b98e64a6d0e3e744148eaac9/tests/slicing/sources/dynalloc5.c#L12vararg3
-- After slicing, only emptymain
remains:vararg4
-- The variadic function call that sets value ofa
is completely sliced-off. https://github.com/mchalupa/dg/blob/a9c61012858a7449b98e64a6d0e3e744148eaac9/tests/slicing/sources/vararg4.c#L20-L25