llvm / llvm-project

The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.
http://llvm.org
Other
29.04k stars 11.98k forks source link

Taint not being applied across class heirarchies #62663

Open tomrittervg opened 1 year ago

tomrittervg commented 1 year ago

I've found a few places where the Static Analysis Engine can't apply taint in different scenarios based on the class inheritance. I'm not sure quite how to describe it, other than it doesn't seem to be considering all the possible implementations behind a pointer, and this results in taint not being applied at the correct place.

I've minified this reproducer heavily, but this was encountered while trying to get taint tracking working on Firefox, so it originally came from a real use case, using CTU, across a few object files and a lot of source/header files. If this is considered 'not a bug' I would welcome recommendations on how we can annotate or restructure things so that the taint tracking can actually work.

In the example below Statements 1A and 2A show the behavior as I would expect/hope it to work. Statements 1B and 2B show scenarios where it doesn't do the expected thing; these represent how Firefox's code is actually structured; I needed to modify both of those places to get it to behave the way I expected.

#include <stdio.h>

void clang_analyzer_isTainted(int);

/*
  Class Hierarchy:

        Actor◄───┐
                 │---Parent
        PParent◄─┘

        Base ◄── Handler

  Control Flow:

    start: PParent::OnMessageReceived
           Parent::RecvCancel
           Handler::OnRecvCancel
*/

// ==========================================

class Base  {
 public:
  virtual void OnRecvCancel(int port) = 0;
};

class Handler final : public Base {
 public:
  void OnRecvCancel(int port) override;
};

// ------------------------------------------

class PParent 
{
public:
    bool OnMessageReceived() ;
};

class Actor {
 public:
  explicit Actor(Base* aRequest) : m(aRequest) {}

 protected:
  Base* m;
};

class Parent : public Actor, public PParent {
 public:
  explicit Parent(Base* aRequest);

  bool RecvCancel(int port);
};

// ==========================================

Parent::Parent(Base* aRequest)
    : Actor(aRequest) {
}

// ------------------------------------------

void Handler::OnRecvCancel(int port) {
  // SECOND CALL to check taint
  clang_analyzer_isTainted(port);
}

bool Parent::RecvCancel(int port) {
  // FIRST CALL to check taint
  clang_analyzer_isTainted(port);

  // Statement 2A: With this statement (and 1A) things behave as we expect.  We report that
  //               port is tainted on both the first and second calls.
  Handler* foo = (Handler*)m;

  // Statement 2B: With this statement (and 1A) we only report that port is tainted on the first call
  //auto foo = m;

  foo->OnRecvCancel(port);
  return true;
}

// ------------------------------------------

auto PParent::OnMessageReceived() -> bool
{
    int port;
    scanf("%i", &port);

    // Not needed, but yes it is Tainted.
    //clang_analyzer_isTainted(port);

    // Statement 1A: With this value (and statement 2A) we correctly report that port is tainted on
    //               the first and second calls
    Parent* foo = (Parent*)0x10;

    // Statement 1B: With this value (and statement 2A) we report that port is tainted only on the
    //               first call.  Parent inherits from PParent.
    //Parent* foo = static_cast<Parent*>(this);

    // Statement 1C: With this value (and statement 2A) we do not report that port is tainted at all
    //               I presume this is do to optimization as we're causing Undefined Behavior
    //Parent* foo = nullptr;

    return foo->RecvCancel(port);
}

Command:

clang-17 --analyze -Xclang -analyzer-checker=debug.TaintTest,debug.ExprInspection,alpha.security.taint.TaintPropagation PDNSRequestParent.cpp

I'm using a version of Clang trunk built a few days ago; it should be revision 4c457e81c4ed78e237b408fb480909a956432638

Statements 1A & 2A

../PDNSRequestParent.cpp:45:3: warning: YES [debug.ExprInspection]
  clang_analyzer_isTainted(port);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../PDNSRequestParent.cpp:45:28: warning: tainted [debug.TaintTest]
  clang_analyzer_isTainted(port);
                           ^~~~
../PDNSRequestParent.cpp:50:3: warning: YES [debug.ExprInspection]
  clang_analyzer_isTainted(port);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../PDNSRequestParent.cpp:50:28: warning: tainted [debug.TaintTest]
  clang_analyzer_isTainted(port);
                           ^~~~
../PDNSRequestParent.cpp:59:21: warning: tainted [debug.TaintTest]
  foo->OnRecvCancel(port);
                    ^~~~
../PDNSRequestParent.cpp:83:28: warning: tainted [debug.TaintTest]
    return foo->RecvCancel(port);
                           ^~~~
6 warnings generated.

Statements 1A & 2B

../PDNSRequestParent.cpp:45:3: warning: NO [debug.ExprInspection]
  clang_analyzer_isTainted(port);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../PDNSRequestParent.cpp:50:3: warning: YES [debug.ExprInspection]
  clang_analyzer_isTainted(port);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../PDNSRequestParent.cpp:50:28: warning: tainted [debug.TaintTest]
  clang_analyzer_isTainted(port);
                           ^~~~
../PDNSRequestParent.cpp:59:21: warning: tainted [debug.TaintTest]
  foo->OnRecvCancel(port);
                    ^~~~
../PDNSRequestParent.cpp:83:28: warning: tainted [debug.TaintTest]
    return foo->RecvCancel(port);
                           ^~~~
5 warnings generated.

Statements 1B and 2A

../PDNSRequestParent.cpp:45:3: warning: NO [debug.ExprInspection]
  clang_analyzer_isTainted(port);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../PDNSRequestParent.cpp:50:3: warning: YES [debug.ExprInspection]
  clang_analyzer_isTainted(port);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
../PDNSRequestParent.cpp:50:28: warning: tainted [debug.TaintTest]
  clang_analyzer_isTainted(port);
                           ^~~~
../PDNSRequestParent.cpp:59:21: warning: tainted [debug.TaintTest]
  foo->OnRecvCancel(port);
                    ^~~~
../PDNSRequestParent.cpp:83:28: warning: tainted [debug.TaintTest]
    return foo->RecvCancel(port);
                           ^~~~
5 warnings generated.
llvmbot commented 1 year ago

@llvm/issue-subscribers-clang-static-analyzer

steakhal commented 1 year ago

If I'm not mistaken, the only question here is: Why is no taint reported within Handler::OnRecvCancel() if we have a foo->RecvCancel(port) call to it where foo is defined as Parent* foo = static_cast<Parent*>(this);?

Well, it turns out it's because we didn't inline it, so taint could not propagate to its body. IMO we should have inlined it - and if that would be the case, then taint would propagate as expected.

I'll level the details at the end of this comment for describing what actually happened in your specific case, but let's have a look at a simplified [example](https://godbolt.org/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:c%2B%2B,selection:(endColumn:2,endLineNumber:21,positionColumn:2,positionLineNumber:21,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:'void+clang_analyzer_warnIfReached()%3B%0A%0Aclass+HandlerBase+%7B%0A+++public:%0A++++virtual+void+onAction()+%7B%0A++++clang_analyzer_warnIfReached()%3B+//+lol,+we+inlined+this+instead!!%0A++++%7D%0A%7D%3B%0A%0Aclass+Handler+final+:+public+HandlerBase+%7B%0A+++public:%0A++++void+onAction()+override+%7B%0A++++++++clang_analyzer_warnIfReached()%3B+//+Hmm,+why+didn!'t+we+inline+this%3F%0A++++%7D%0A%7D%3B%0A%0Avoid+top(HandlerBase+*p)+%7B%0A++Handler+*q+%3D+static_cast%3CHandler*%3E(p)%3B%0A++q-%3EonAction()%3B%0A++clang_analyzer_warnIfReached()%3B%0A%7D'),l:'5',n:'0',o:'C%2B%2B+source+%231',t:'0')),k:56.74191283209192,l:'4',m:100,n:'0',o:'',s:0,t:'0'),(g:!((g:!((h:output,i:(compilerName:'x86-64+gcc+(trunk)',editorid:1,fontScale:14,fontUsePx:'0',j:2,wrap:'1'),l:'5',n:'0',o:'Output+of+x86-64+clang+(trunk)+(Compiler+%232)',t:'0')),header:(),k:43.25808716790808,l:'4',m:50,n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:clang_trunk,deviceViewOpen:'1',filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:2,lang:c%2B%2B,libs:!(),options:'--analyze+-Xclang+-analyzer-checker%3Dcore,apiModeling,alpha.security,alpha.security.taint,alpha.security.ArrayBoundV2,debug.ExprInspection+-Xclang+-analyze-function%3D%22top(HandlerBase+*)%22',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+x86-64+clang+(trunk)+(Editor+%231)',t:'0')),header:(),l:'4',m:50,n:'0',o:'',s:0,t:'0')),k:43.25808716790808,l:'3',n:'0',o:'',t:'0')),l:'2',m:100,n:'0',o:'',t:'0')),version:4):

void clang_analyzer_warnIfReached();

class HandlerBase {
   public:
    virtual void onAction() {
    clang_analyzer_warnIfReached(); // lol, we inlined this instead!
    }
};

class Handler final : public HandlerBase {
   public:
    void onAction() override {
        clang_analyzer_warnIfReached(); // Hmm, why didn't we inline this?
    }
};

void top(HandlerBase *p) {
  Handler *q = static_cast<Handler*>(p);
  q->onAction();
  clang_analyzer_warnIfReached();
}

We can see that the call to q->onAction() inlined the base implementation of the virtual function - even though it should have inlined the Handler::onAction() instead. And if it's a pure virtual function in the base class, we will simply conservatively evaluate the call.

Unfortunately, I don't have the time to fix this, but at least we now know about this. @tomrittervg I don't think you can do much to workaround this issue. @Xazax-hun Do you agree that we should have inlined Handler::onAction() in my example? If so, we should probably create a separate ticket with that example and close this issue.


Details of this bugreport:

ExprEngine::defaultEvalCall() wants to evaluate foo->OnRecvCancel(port) and inline that member function. However, it fails to resolve the runtime definition of that function, so Call->getRuntimeDefinition().getDecl() results in the wrong type: class Base *. Because of this, when we want to inline the call, we see that this is a pure function that we cannot inline and evaluate it conservatively.

Later, when we will eventually schedule the Handler::OnRecvCancel(int) in top-level context so that we cover it at least without any calling context. And by default, no parameters will be tainted in top-level context (except for main and alike functions of course - which is not the case here).

The problem is within CXXInstanceCall::getRuntimeDefinition(), more precisely inside the called getDynamicTypeInfo(getState(), R) when passing R: Derived{SymRegion{reg_$5<Base * Base{SymRegion{reg_$0<class PParent * this>},Actor}.m>}, Handler}. The Derived{} region itself is a TypedRegion of type class Handler. But within getDynamicTypeInfo() we have this call: MR = MR->StripCasts(); And it turns out it also strips the DerivedRegion, and leaves us only SymRegion{reg_$5...} - which is no longer a TypedRegion so we fall back to the wrapped symbol for type information, which is class Base * in our case. and returns that.

The StripCasts has a StripBaseAndDerivedCasts bool option, which will strip these by default, but when I turned it off, some tests failed, so it's probably not how we should fix this :D

Xazax-hun commented 1 year ago

Should the analyzer trust a static down-cast? I think the analyzer already has the tendency of trusting the source code, e.g., it trusts assertions. So, I believe @steakhal is right, the analyzer in this particular case should inline the method from the derived class.

haoNoQ commented 1 year ago

Yeah I think I agree with @Xazax-hun, this is a dynamic type info problem.

Just because the program performed static_cast<Handler>(), the vtable of the pointee doesn't suddenly start pointing to the Handler::onAction() override. So the function inliner is technically correct, and StripCasts() is also technically correct: the cast on its own doesn't affect runtime vtable lookup. Just because we're interpreting the pointer as a pointer to Handler, doesn't mean it actually points to a Handler. (Which goes back to my usual point: region types were a mistake because the actual runtime behavior never reacts to them: pointers are just numbers.)

However, for static analysis purposes, static_cast also carries the assumption the developer makes about the program: that the object actually is a Handler. The developer is actively trying to tell us that the object is of this subclass type, and in such cases we typically prefer to trust them. Unless we have solid reasons to believe the opposite (eg., we've literally tracked the object since allocation).

So in such cases developer's assumption has to be explicitly encoded in DynamicTypeMap - this is the type-information we've gathered about memory regions at runtime, that goes beyond the region's identity. This is our way of saying that in this region there actually is an object of type Handler. So regardless of StripCasts() (might have as well been getAsOffset(), which plays nicely with "region types were a mistake"), the next thing the static analyzer should do is consult the DynamicTypeMap and trust that info over everything else.


There's another way to look at this: I'm not a lawyer but it might be that according to the C++ Standard the behavior is undefined if the method is invoked through a wrong-type pointer. In this case the inliner machine should have trusted the pointer type, and we could handle this syntactically (region types might have been a mistake, but AST expression types certainly weren't). This also means we can develop a checker that emits a warning when the pointer actually points to the wrong type.

But regardless of how we're establishing that the region has an object of type Handler, once it's established it should go to DynamicTypeMap, so that later method invocations on that object could be resolved the same way regardless of syntactic hints (and possibly for the purposes of checking as well).

tomrittervg commented 1 year ago

The StripCasts has a StripBaseAndDerivedCasts bool option, which will strip these by default, but when I turned it off, some tests failed, so it's probably not how we should fix this

I applied this patch and it failed the following tests:

ctor.mm

error: 'warning' diagnostics expected but not seen: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/ctor.mm Line 98: TRUE
error: 'warning' diagnostics seen but not expected: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/ctor.mm Line 98: FALSE [debug.ExprInspection]
2 errors generated.

derived-to-base.cpp

error: 'warning' diagnostics expected but not seen: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/derived-to-base.cpp Line 383: TRUE
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/derived-to-base.cpp Line 439: TRUE
error: 'warning' diagnostics seen but not expected: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/derived-to-base.cpp Line 392: FALSE [debug.ExprInspection]
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/derived-to-base.cpp Line 448: FALSE [debug.ExprInspection]
4 errors generated.

inline.cpp

error: 'warning' diagnostics expected but not seen: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/inline.cpp Line 80: TRUE
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/inline.cpp Line 103: TRUE
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/inline.cpp Line 151: TRUE
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/inline.cpp Line 434: TRUE
error: 'warning' diagnostics seen but not expected: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/inline.cpp Line 55: FALSE [debug.ExprInspection]
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/inline.cpp Line 80: UNKNOWN [debug.ExprInspection]
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/inline.cpp Line 434: UNKNOWN [debug.ExprInspection]
7 errors generated.

dtor.cpp

error: 'warning' diagnostics expected but not seen: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/dtor.cpp Line 217: TRUE
error: 'warning' diagnostics seen but not expected: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/dtor.cpp Line 217: FALSE [debug.ExprInspection]
2 errors generated.

pointer-to-member.cpp

error: 'warning' diagnostics expected but not seen: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/pointer-to-member.cpp Line 113: TRUE
error: 'warning' diagnostics seen but not expected: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/pointer-to-member.cpp Line 113: FALSE [debug.ExprInspection]
2 errors generated.

temporaries.cpp

error: 'warning' diagnostics seen but not expected: 
  File /media/tom/b032d6f1-a2e4-4c59-8c52-4cb0c18f69ce1/llvm-project/clang/test/Analysis/temporaries.cpp Line 579: REACHABLE [debug.ExprInspection]
1 error generated.
tomrittervg commented 1 week ago

As an update, I tested ~tip (a version built from git on approx Oct 9th) just to confirm it is still an issue. (It is)

DNSRequestParent.cpp:

void clang_analyzer_isTainted(int);

class PParent {
public:
  bool SendCancelDNSRequest(const int &port);
  void OnMessageReceived();
};

class Base  {
public:
  // This causes the Bug  (A1)
  virtual int OnRecvCancelDNSRequest(const int &port) = 0;
  // This does not (A2)
  // int OnRecvCancelDNSRequest(const int &port);
};

class RequestHandler final 
// This Causes the Bug (B1)
  : public Base 
// Commenting it out does not (B2)
{
public:
  int OnRecvCancelDNSRequest(const int &port);
};

/*
  A1 + B1 = bug
  A1 + B2 = no bug
  A2 + B1 = no bug
  A2 + B2 = no bug

*/

class RequestParent : public PParent {
public:
  bool RecvCancelDNSRequest(const int &port);
};

int foo(int port) {
  clang_analyzer_isTainted(port);
}

int RequestHandler::OnRecvCancelDNSRequest(const int &port) {
  // The bug is that this is not detected as tainted:
  clang_analyzer_isTainted(port);
  return port;

}

bool RequestParent::RecvCancelDNSRequest(
    const int &port) {
  clang_analyzer_isTainted(port);

  RequestHandler* x = (RequestHandler*)0x10;
  x->OnRecvCancelDNSRequest(port);

  foo(port);

  return true;
}

PDNSRequestParent.cpp

void clang_analyzer_isTainted(int);
int ThisFunctionReturnsSomethingTainted();

class PParent {
public:
  void OnMessageReceived();

};

class RequestParent : public PParent {
public:
  bool RecvCancelDNSRequest(const int &port);
};

void PParent::OnMessageReceived() {

    int port = ThisFunctionReturnsSomethingTainted();
    clang_analyzer_isTainted(port);

    RequestParent *foo = (RequestParent *)0x10;
    bool __ok =
        foo->RecvCancelDNSRequest(port);
    return;
}

myconfig.yaml

Propagations:
  - Name: ReadPrivilegedParam
    DstArgs: [-1]

  - Name: privilegedextract
    DstArgs: [-1]

  - Name: ThisFunctionReturnsSomethingTainted
    DstArgs: [-1]  

commands:

#!/bin/bash

echo "Generating AST"
clang-20 \
-c \
-x c++ \
-emit-ast \
-D__clang_analyzer__ \
-w \
-o PDNSRequestParent.cpp.ast \
PDNSRequestParent.cpp

clang-20 \
-c \
-x c++ \
-emit-ast \
-D__clang_analyzer__ \
-w \
-o DNSRequestParent.cpp.ast \
DNSRequestParent.cpp

rm -f externalDefMap.txt
touch externalDefMap.txt

echo "extdef mapping"
clang-extdef-mapping \
PDNSRequestParent.cpp \
-- \
-c \
-x c++ \
>> externalDefMap.txt 

clang-extdef-mapping \
DNSRequestParent.cpp \
-- \
-c \
-x c++ \
>> externalDefMap.txt 2>/dev/null

sed -i -e 's/\.cpp$/\.cpp\.ast/g' externalDefMap.txt
sed -i -e "s|$(pwd)|.|g" externalDefMap.txt

echo "Analyzing PDNSRequestParent"
clang-20 \
--analyze \
-Qunused-arguments \
-Xclang -analyzer-opt-analyze-headers \
-Xclang -analyzer-config \
-Xclang expand-macros=true \
-Xclang -analyzer-config \
-Xclang optin.taint.TaintPropagation:Config=myconfig.yaml \
-Xclang -analyzer-checker=debug.TaintTest,debug.ExprInspection,optin.taint.TaintedAlloc,optin.taint.TaintedDiv,optin.taint.GenericTaint \
-Xclang -analyzer-disable-checker=core.NullDereference,deadcode.DeadStores \
-Xclang -analyzer-config \
-Xclang aggressive-binary-operation-simplification=true \
-Xclang -analyzer-config \
-Xclang experimental-enable-naive-ctu-analysis=true \
-Xclang -analyzer-config \
-Xclang ctu-dir=. \
-Xclang -analyzer-config \
-Xclang display-ctu-progress=true \
-x c++ \
PDNSRequestParent.cpp