github / codeql

CodeQL: the libraries and queries that power security researchers around the world, as well as code scanning in GitHub Advanced Security
https://codeql.github.com
MIT License
7.51k stars 1.49k forks source link

Java: CodeQL and chill missing result #17045

Open p1keman opened 1 month ago

p1keman commented 1 month ago

Description of the issue I'm practicing securitylab's Codeql-and-chill, https://securitylab.github.com/ctf/codeql-and-chill/
and I found four data flows using the following codeql rules, with one missing,However, in my test demo, the missing data stream can be completely followed up. I don't know what causes this. The following is the specific codeql rules and test demo

Code database download link https://github.com/github/securitylab/releases/download/ctf-codeql-and-chill-java-edition/titus-control-plane-db.zip

Here's what my codeql rules say

import java
import semmle.code.java.dataflow.DataFlow
import semmle.code.java.dataflow.TaintTracking
import semmle.code.java.dataflow.FlowSources
import DataFlow::PartialPathGraph

class BuildConstraintViolationWithTemplateMethod extends Method{
    BuildConstraintViolationWithTemplateMethod(){
        hasQualifiedName("javax.validation", ["ConstraintValidatorContext"], "buildConstraintViolationWithTemplate")
    }
}

class BuildConstraintViolationWithTemplateSink extends DataFlow::ExprNode{
    BuildConstraintViolationWithTemplateSink(){
        exists( MethodAccess ma | ma.getCallee() instanceof BuildConstraintViolationWithTemplateMethod | ma.getArgument(0)=asExpr())
    }
}

class CollectionType extends RefType{
    CollectionType(){
        hasQualifiedName("java.util", "Collection<?>")
    }
}

class BeanValidationConfig extends TaintTracking::Configuration {
    BeanValidationConfig() { this = "BeanValidationConfig" }

    override predicate isSource(DataFlow::Node source) { 
        source instanceof RemoteFlowSource
    }

    override predicate isSink(DataFlow::Node sink) { 
        sink instanceof BuildConstraintViolationWithTemplateSink
    }

    override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
        pred.asExpr() = succ.asExpr().(AddExpr).getAnOperand()  
        or
        exists(MethodAccess ac | pred.asExpr() = ac.getAnArgument() and succ.asExpr() = ac)  
    }

    override predicate isSanitizer(DataFlow::Node n) {
        exists(Method m | m.getParameter(0).getType() instanceof CollectionType|n.asParameter()=m.getParameter(0))
    }

    override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
        super.allowImplicitRead(node, c)
        or
        this.isAdditionalTaintStep(node, _) and
        (
            c instanceof DataFlow::ArrayContent
            or
            c instanceof DataFlow::CollectionContent
            or
            c instanceof DataFlow::MapValueContent
        )
    }
}

from BeanValidationConfig bvc, DataFlow::PathNode source, DataFlow::PathNode sink
where bvc.hasFlowPath(source, sink)
select source.getNode(), "-->", sink

The missing code location is in the /Users/xavier/src/github/titus-control-plane/titus-common/src/main/java/com/netflix/titus/common/model/sanitizer/internal/CollectionValidator.java file

Debugging through hasPartialFlow shows that the last position is in filter(e -> e.getValue() == null)

        if (!constraintAnnotation.allowNullValues()) {
            Set<String> badEntryKeys = value.entrySet().stream()
                    .filter(e -> e.getValue() == null)
                    .map(e -> e.getKey() instanceof String ? (String) e.getKey() : "<not_string>")
                    .collect(Collectors.toSet());
            if (!badEntryKeys.isEmpty()) {
                attachMessage(context, "null values found for keys: " + new TreeSet<>(badEntryKeys));
                return false;
            }
        }

When I tested it with the following code, I found that this rule follows through into the final system.out.println() function MapToSetTest.java

package com.example.demo;

import java.util.*;
import java.util.stream.Collectors;

public class MapToSetTest {
    public static void main(String[] args) {
        Map<Object, Object> value = new HashMap<>();
        value.put("key1", null);
        value.put(2, null);
        value.put("key3", "value3");
        value.put(4, "value4");

        Set<String> badEntryKeys = value.entrySet().stream()
                .filter(e -> e.getValue() == null)
                .map(e -> e.getKey() instanceof String ? (String) e.getKey() : "<not_string>")
                .collect(Collectors.toSet());
        x("null values found for keys: " +new TreeSet<>( badEntryKeys));
    }

    private static void x(String string){
        System.out.println(string);
    }
}

The test rules are as follows

import java
import semmle.code.java.dataflow.DataFlow
import semmle.code.java.dataflow.TaintTracking
import semmle.code.java.dataflow.FlowSources
import DataFlow::PartialPathGraph

class BeanValidationConfig extends TaintTracking::Configuration {
    BeanValidationConfig() { this = "BeanValidationConfig" }

    override predicate isSource(DataFlow::Node source) { 
        exists( Variable v |  v.getName()="value" | v.getInitializer()= source.asExpr())
    }

    override predicate isAdditionalTaintStep(DataFlow::Node pred, DataFlow::Node succ) {
        pred.asExpr() = succ.asExpr().(AddExpr).getAnOperand()  
        or
        exists(MethodAccess ac | pred.asExpr() = ac.getAnArgument() and succ.asExpr() = ac)  
    }

    override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
        super.allowImplicitRead(node, c)
        or
        this.isAdditionalTaintStep(node, _) and
        (
            c instanceof DataFlow::ArrayContent
            or
            c instanceof DataFlow::CollectionContent
            or
            c instanceof DataFlow::MapValueContent
        )
    }
    override int explorationLimit() { result =  3}
}

predicate partial_flow(DataFlow::PartialPathNode n, DataFlow::Node src, int dist) {
    exists(BeanValidationConfig conf, DataFlow::PartialPathNode source |
    conf.hasPartialFlow(source, n, dist) and
    src = source.getNode() and source.getNode().getLocation().getFile().getBaseName()="MapToSetTest.java"
    )
}
smowton commented 1 month ago

I note that for your example that does work, exists(MethodAccess ac | pred.asExpr() = ac.getAnArgument() and succ.asExpr() = ac) as an additional taint step is not necessary. Given your test does work as intended even with this removed, this suggests the stream, filter, map pattern is not in general a problem.

I could not work out what actually is causing difficulty though because I wasn't sure what flow path you are expecting to see, but not seeing. Could you clarify what you expect to be the source, and what sink you expect it to flow to?

A couple of notes on the QL sample you have provided:

I would generally recommend do not make a blanket "any method argument taints that method's return value" step -- this will take shortcuts across many different methods and obscure what does and does not work by adding lots of surprising extra taint edges. Instead, restrict the additional step to just those functions you wish to model.

Could you also clarify what your sanitizer, which sanitizes the first parameter of any method taking a collection as an argument, is intended for?

smowton commented 1 month ago

OK, I found the problem-- it's pretty obscure!

Taint is reaching as far as the e.getKey call inside the map stream function, but then we end up with a content mis-match:

Our models of stream operations deal in terms of abstract MapKeyContent and MapValueContent -- so starting from a tainted Map, the entry-set's Map.Entry instances have MapKey and MapValue content and we expect to apply the model for Map.Entry.getKey/getValue, which expects MapKey or MapValue content and yield a tainted top-level value.

Unfortunately, the project being tested actually contains an implementation of Map.Entry -- specifically com.netflix.titus.common.util.collections.SimpleEntry -- and our virtual dispatch logic prefers that callee to our model of the interface Map.Entry. The real implementation SimpleEntry doesn't deal in terms of abstract map-keys and values, it reads real fields named key and value, and the mis-match causes our taint-tracking logic to fail to make progress.

The solution is to add a bridge across the getKey/getValue method: an additional taint step like:

    exists(MethodCall mc | mc.getCallee().getSourceDeclaration().hasQualifiedName("java.util", "Map<>$Entry", "getKey") |
      pred.asExpr() = mc.getQualifier() and succ.asExpr() = mc
    )

(Note use of getSourceDeclaration to discard generics)

That is sufficient to get the result. This is very niche edge case, of having source code for some implementation of Map.Entry in the project under analysis, while applying abstract models of other stream functions. This has almost certainly arisen due to changes in the taint-tracking library since the CodeQL and Chill task was written, and probably wasn't supposed to be part of the puzzle. I'll let the Java team know and see what can be done about it.

For completeness, here's the QL I wrote that worked for me -- I've also converted it to the new ConfigSig way of writing data-flow queries, and restricted the source to just the interesting missing case:

import java
import semmle.code.java.dataflow.DataFlow
import semmle.code.java.dataflow.TaintTracking
import semmle.code.java.dataflow.FlowSources

class BuildConstraintViolationWithTemplateMethod extends Method {
  BuildConstraintViolationWithTemplateMethod() {
    hasQualifiedName("javax.validation", ["ConstraintValidatorContext"],
      "buildConstraintViolationWithTemplate")
  }
}

class BuildConstraintViolationWithTemplateSink extends DataFlow::ExprNode {
  BuildConstraintViolationWithTemplateSink() {
    exists(MethodAccess ma | ma.getCallee() instanceof BuildConstraintViolationWithTemplateMethod |
      ma.getArgument(0) = asExpr()
    )
  }
}

module BeanValidationConfig implements DataFlow::ConfigSig {
  predicate isSource(DataFlow::Node source) {
    source.asParameter().getName() = "value" and
    source.getLocation().getFile().getBaseName() = "CollectionValidator.java"
  }

  predicate isSink(DataFlow::Node sink) { sink instanceof BuildConstraintViolationWithTemplateSink }

  predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
    pred.asExpr() = succ.asExpr().(AddExpr).getAnOperand()
    or
    exists(MethodCall mc | mc.getCallee().getSourceDeclaration().hasQualifiedName("java.util", "Map<>$Entry", "getKey") |
      pred.asExpr() = mc.getQualifier() and succ.asExpr() = mc
    )
  }

  predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
    isAdditionalFlowStep(node, _) and
    (
      c instanceof DataFlow::ArrayContent
      or
      c instanceof DataFlow::CollectionContent
      or
      c instanceof DataFlow::MapValueContent
    )
  }
}

module Flow = TaintTracking::Global<BeanValidationConfig>;

from Flow::PathNode source, Flow::PathNode sink
where Flow::flowPath(source, sink)
select source, sink