joernio / joern

Open-source code analysis platform for C/C++/Java/Binary/Javascript/Python/Kotlin based on code property graphs. Discord https://discord.gg/vv4MH284Hc
https://joern.io/
Apache License 2.0
2.09k stars 288 forks source link

[Bug] dataflow error #4794

Open hac425xxx opened 3 months ago

hac425xxx commented 3 months ago

test code

int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)
{
    struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
    char *ueid;

    if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
        return -EINVAL;
    ueid = (char *)nla_data(nla_ueid);

    return smc_clc_ueid_remove(ueid);
}

scala code

joern> def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
     | var x1 = cpg.method.where(_.name("smc_nl_remove_ueid")).repeat(_.astChildren)(_.until(_.isCall)).next
     | var sk = x1.asInstanceOf[Call].argument
     | sk.reachableByFlows(source).p
     | 
def source:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
var x1: io.shiftleft.codepropertygraph.generated.nodes.AstNode = Call(
  argumentIndex = -1,
  argumentName = None,
  code = "*nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]",
  columnNumber = Some(value = 16),
  dispatchType = "STATIC_DISPATCH",
  dynamicTypeHintFullName = IndexedSeq(),
  lineNumber = Some(value = 168),
  methodFullName = "<operator>.assignment",
  name = "<operator>.assignment",
  order = 2,
  possibleTypes = IndexedSeq(),
  signature = "",
  typeFullName = "ANY"
)
var sk: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression] = empty iterator
val res21: List[String] = List()

sk is the node for nla_ueid or info->attrs[SMC_NLA_EID_TABLE_ENTRY]

joern> sk.l
val res20: List[io.shiftleft.codepropertygraph.generated.nodes.Expression] = List(
  Identifier(
    argumentIndex = 1,
    argumentName = None,
    code = "nla_ueid",
    columnNumber = Some(value = 17),
    dynamicTypeHintFullName = IndexedSeq(),
    lineNumber = Some(value = 168),
    name = "nla_ueid",
    order = 1,
    possibleTypes = IndexedSeq(),
    typeFullName = "nlattr*"
  ),
  Call(
    argumentIndex = 2,
    argumentName = None,
    code = "info->attrs[SMC_NLA_EID_TABLE_ENTRY]",
    columnNumber = Some(value = 28),
    dispatchType = "STATIC_DISPATCH",
    dynamicTypeHintFullName = IndexedSeq(),
    lineNumber = Some(value = 168),
    methodFullName = "<operator>.indirectIndexAccess",
    name = "<operator>.indirectIndexAccess",
    order = 2,
    possibleTypes = IndexedSeq(),
    signature = "",
    typeFullName = "ANY"
  )
)

but reachableByFlows return nothing, seem that the dataflow from info to info->attrs[SMC_NLA_EID_TABLE_ENTRY] is miss.

the dfg

digraph "smc_nl_remove_ueid" {  
"111669149875" [label = <(METHOD,smc_nl_remove_ueid)<SUB>166</SUB>> ]
"115964117355" [label = <(PARAM,struct sk_buff *skb)<SUB>166</SUB>> ]
"115964117356" [label = <(PARAM,struct genl_info *info)<SUB>166</SUB>> ]
"25769804764" [label = <(BLOCK,{
    struct nlattr *nla_ueid = info-&gt;attrs[SMC_NL...,{
    struct nlattr *nla_ueid = info-&gt;attrs[SMC_NL...)<SUB>167</SUB>> ]
"94489280938" [label = <(LOCAL,struct nlattr* nla_ueid: nlattr*)<SUB>168</SUB>> ]
"30064776299" [label = <(&lt;operator&gt;.assignment,*nla_ueid = info-&gt;attrs[SMC_NLA_EID_TABLE_ENTRY])<SUB>168</SUB>> ]
"68719480371" [label = <(IDENTIFIER,nla_ueid,*nla_ueid = info-&gt;attrs[SMC_NLA_EID_TABLE_ENTRY])<SUB>168</SUB>> ]
"30064776300" [label = <(&lt;operator&gt;.indirectIndexAccess,info-&gt;attrs[SMC_NLA_EID_TABLE_ENTRY])<SUB>168</SUB>> ]
"30064776301" [label = <(&lt;operator&gt;.indirectFieldAccess,info-&gt;attrs)<SUB>168</SUB>> ]
"68719480372" [label = <(IDENTIFIER,info,*nla_ueid = info-&gt;attrs[SMC_NLA_EID_TABLE_ENTRY])<SUB>168</SUB>> ]
"55834576695" [label = <(FIELD_IDENTIFIER,attrs,attrs)<SUB>168</SUB>> ]
"68719480373" [label = <(IDENTIFIER,SMC_NLA_EID_TABLE_ENTRY,*nla_ueid = info-&gt;attrs[SMC_NLA_EID_TABLE_ENTRY])<SUB>168</SUB>> ]
"94489280939" [label = <(LOCAL,char* ueid: char*)<SUB>169</SUB>> ]
"47244640940" [label = <(CONTROL_STRUCTURE,IF,if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1))<SUB>171</SUB>> ]
"30064776302" [label = <(&lt;operator&gt;.logicalOr,!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_L...)<SUB>171</SUB>> ]
"30064776303" [label = <(&lt;operator&gt;.logicalNot,!nla_ueid)<SUB>171</SUB>> ]
"68719480374" [label = <(IDENTIFIER,nla_ueid,!nla_ueid)<SUB>171</SUB>> ]
"30064776304" [label = <(&lt;operator&gt;.notEquals,nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)<SUB>171</SUB>> ]
"30064776305" [label = <(nla_len,nla_len(nla_ueid))<SUB>171</SUB>> ]
"68719480375" [label = <(IDENTIFIER,nla_ueid,nla_len(nla_ueid))<SUB>171</SUB>> ]
"30064776306" [label = <(&lt;operator&gt;.addition,SMC_MAX_EID_LEN + 1)<SUB>171</SUB>> ]
"68719480376" [label = <(IDENTIFIER,SMC_MAX_EID_LEN,SMC_MAX_EID_LEN + 1)<SUB>171</SUB>> ]
"90194313700" [label = <(LITERAL,1,SMC_MAX_EID_LEN + 1)<SUB>171</SUB>> ]
"25769804765" [label = <(BLOCK,&lt;empty&gt;,&lt;empty&gt;)<SUB>172</SUB>> ]
"146028888266" [label = <(RETURN,return -EINVAL;,return -EINVAL;)<SUB>172</SUB>> ]
"30064776307" [label = <(&lt;operator&gt;.minus,-EINVAL)<SUB>172</SUB>> ]
"68719480377" [label = <(IDENTIFIER,EINVAL,-EINVAL)<SUB>172</SUB>> ]
"30064776308" [label = <(&lt;operator&gt;.assignment,ueid = (char *)nla_data(nla_ueid))<SUB>173</SUB>> ]
"68719480378" [label = <(IDENTIFIER,ueid,ueid = (char *)nla_data(nla_ueid))<SUB>173</SUB>> ]
"30064776309" [label = <(&lt;operator&gt;.cast,(char *)nla_data(nla_ueid))<SUB>173</SUB>> ]
"184683593798" [label = <(UNKNOWN,char *,char *)<SUB>173</SUB>> ]
"30064776310" [label = <(nla_data,nla_data(nla_ueid))<SUB>173</SUB>> ]
"68719480379" [label = <(IDENTIFIER,nla_ueid,nla_data(nla_ueid))<SUB>173</SUB>> ]
"146028888267" [label = <(RETURN,return smc_clc_ueid_remove(ueid);,return smc_clc_ueid_remove(ueid);)<SUB>175</SUB>> ]
"30064776311" [label = <(smc_clc_ueid_remove,smc_clc_ueid_remove(ueid))<SUB>175</SUB>> ]
"68719480380" [label = <(IDENTIFIER,ueid,smc_clc_ueid_remove(ueid))<SUB>175</SUB>> ]
"128849019058" [label = <(METHOD_RETURN,int)<SUB>166</SUB>> ]
  "111669149875" -> "115964117355" 
  "111669149875" -> "115964117356" 
  "111669149875" -> "25769804764" 
  "111669149875" -> "128849019058" 
  "25769804764" -> "94489280938" 
  "25769804764" -> "30064776299" 
  "25769804764" -> "94489280939" 
  "25769804764" -> "47244640940" 
  "25769804764" -> "30064776308" 
  "25769804764" -> "146028888267" 
  "30064776299" -> "68719480371" 
  "30064776299" -> "30064776300" 
  "30064776300" -> "30064776301" 
  "30064776300" -> "68719480373" 
  "30064776301" -> "68719480372" 
  "30064776301" -> "55834576695" 
  "47244640940" -> "30064776302" 
  "47244640940" -> "25769804765" 
  "30064776302" -> "30064776303" 
  "30064776302" -> "30064776304" 
  "30064776303" -> "68719480374" 
  "30064776304" -> "30064776305" 
  "30064776304" -> "30064776306" 
  "30064776305" -> "68719480375" 
  "30064776306" -> "68719480376" 
  "30064776306" -> "90194313700" 
  "25769804765" -> "146028888266" 
  "146028888266" -> "30064776307" 
  "30064776307" -> "68719480377" 
  "30064776308" -> "68719480378" 
  "30064776308" -> "30064776309" 
  "30064776309" -> "184683593798" 
  "30064776309" -> "30064776310" 
  "30064776310" -> "68719480379" 
  "146028888267" -> "30064776311" 
  "30064776311" -> "68719480380" 
}
hac425xxx commented 3 months ago

the dataflow from info to info->attrs in .indirectFieldAccess is exist, but info->attrs to info->attrs[SMC_NLA_EID_TABLE_ENTRY] missed.

joern> def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
     | var x1 = cpg.method.where(_.name("smc_nl_remove_ueid")).repeat(_.astChildren)(_.until(_.isCall)).next
     | var sk = x1.asInstanceOf[Call].argument(2).asInstanceOf[Call].argument
     | sk.reachableByFlows(source).p
def source:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
var x1: io.shiftleft.codepropertygraph.generated.nodes.AstNode = Call(
  argumentIndex = -1,
  argumentName = None,
  code = "*nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]",
  columnNumber = Some(value = 16),
  dispatchType = "STATIC_DISPATCH",
  dynamicTypeHintFullName = IndexedSeq(),
  lineNumber = Some(value = 168),
  methodFullName = "<operator>.assignment",
  name = "<operator>.assignment",
  order = 2,
  possibleTypes = IndexedSeq(),
  signature = "",
  typeFullName = "ANY"
)
var sk: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression] = empty iterator
val res44: List[String] = List(
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────┬─────────┐
│nodeType         │tracked                                                        │line│method            │file     │
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────┼─────────┤
│MethodParameterIn│smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)│166 │smc_nl_remove_ueid│smc_clc.c│
│Call             │*nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]               │168 │smc_nl_remove_ueid│smc_clc.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────┴─────────┘"""
)

joern> x1.asInstanceOf[Call].argument(2).asInstanceOf[Call].argument
val res45: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression] = non-empty iterator

joern> x1.asInstanceOf[Call].argument(2).asInstanceOf[Call].argument.l
val res46: List[io.shiftleft.codepropertygraph.generated.nodes.Expression] = List(
  Call(
    argumentIndex = 1,
    argumentName = None,
    code = "info->attrs",
    columnNumber = Some(value = 28),
    dispatchType = "STATIC_DISPATCH",
    dynamicTypeHintFullName = IndexedSeq(),
    lineNumber = Some(value = 168),
    methodFullName = "<operator>.indirectFieldAccess",
    name = "<operator>.indirectFieldAccess",
    order = 1,
    possibleTypes = IndexedSeq(),
    signature = "",
    typeFullName = "ANY"
  ),
  Identifier(
    argumentIndex = 2,
    argumentName = None,
    code = "SMC_NLA_EID_TABLE_ENTRY",
    columnNumber = Some(value = 40),
    dynamicTypeHintFullName = IndexedSeq(),
    lineNumber = Some(value = 168),
    name = "SMC_NLA_EID_TABLE_ENTRY",
    order = 2,
    possibleTypes = IndexedSeq(),
    typeFullName = "ANY"
  )
)
hac425xxx commented 3 months ago

I add some code for test

int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)
{
    sink1(info->attrs[1]);
    sink1(info->attrs[SMC_NLA_EID_TABLE_ENTRY]);
    sink1(info->attrs);
    sink1(info[1]);
    struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
    char *ueid;

    if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
        return -EINVAL;
    ueid = (char *)nla_data(nla_ueid);

    return smc_clc_ueid_remove(ueid);
}

the query


def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
def sink = cpg.call.name("sink1").argument
sink.reachableByFlows(source).p

the output

joern> 
     | def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
     | def sink = cpg.call.name("sink1").argument
     | sink.reachableByFlows(source).p
def source:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res22: List[String] = List(
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────┬─────────┐
│nodeType         │tracked                                                        │line│method            │file     │
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────┼─────────┤
│MethodParameterIn│smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)│170 │smc_nl_remove_ueid│smc_clc.c│
│Call             │sink1(info->attrs[1])                                          │172 │smc_nl_remove_ueid│smc_clc.c│
│Call             │sink1(info->attrs[1])                                          │172 │smc_nl_remove_ueid│smc_clc.c│
│Call             │sink1(info->attrs[SMC_NLA_EID_TABLE_ENTRY])                    │173 │smc_nl_remove_ueid│smc_clc.c│
│Call             │sink1(info->attrs[SMC_NLA_EID_TABLE_ENTRY])                    │173 │smc_nl_remove_ueid│smc_clc.c│
│Call             │sink1(info->attrs)                                             │174 │smc_nl_remove_ueid│smc_clc.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────┴─────────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────┬─────────┐
│nodeType         │tracked                                                        │line│method            │file     │
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────┼─────────┤
│MethodParameterIn│smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)│170 │smc_nl_remove_ueid│smc_clc.c│
│Call             │sink1(info[1])                                                 │175 │smc_nl_remove_ueid│smc_clc.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────┴─────────┘"""
)

dataflow for info->attrs[SMC_NLA_EID_TABLE_ENTRY] and info->attrs[1] is missed

max-leuthaeuser commented 3 months ago

I checked the AST and the CFG for your examples. Both look fine, so I guess the frontend itself is ok. Must be the creation of the DFG/DF here. @ml86 Who would be the right person to ask?

hac425xxx commented 3 months ago

the sample code is follow:

smc.tar.gz

hac425xxx commented 3 months ago

more simple code


int smc_nl_remove_ueid(void *a1, struct xxx_t *info)
{
    sink1(info->attrs[1]);
    sink1(info->attrs[SMC_NLA_EID_TABLE_ENTRY]);
    sink1(info->attrs);
    sink1(info[1]);

    void *v2 = evil2(evil1(info), 122);
    void *v3 = evil3(v2, 333);

    sink1(v3);
}

the query


importCode(inputPath="/home/kali/driver_extractor/example/example", projectName="example")
def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
def sink = cpg.call.name("sink1").argument
sink.reachableByFlows(source).p

the output

val res17: List[String] = List(
  """
┌─────────────────┬────────────────────────────────────────────────┬────┬──────────────────┬────┐
│nodeType         │tracked                                         │line│method            │file│
├─────────────────┼────────────────────────────────────────────────┼────┼──────────────────┼────┤
│MethodParameterIn│smc_nl_remove_ueid(void *a1, struct xxx_t *info)│26  │smc_nl_remove_ueid│t1.c│
│Call             │sink1(info[1])                                  │31  │smc_nl_remove_ueid│t1.c│
└─────────────────┴────────────────────────────────────────────────┴────┴──────────────────┴────┘""",
  """
┌─────────────────┬────────────────────────────────────────────────┬────┬──────────────────┬────┐
│nodeType         │tracked                                         │line│method            │file│
├─────────────────┼────────────────────────────────────────────────┼────┼──────────────────┼────┤
│MethodParameterIn│smc_nl_remove_ueid(void *a1, struct xxx_t *info)│26  │smc_nl_remove_ueid│t1.c│
│Call             │sink1(info->attrs)                              │30  │smc_nl_remove_ueid│t1.c│
│Identifier       │evil1(info)                                     │33  │smc_nl_remove_ueid│t1.c│
│Call             │evil1(info)                                     │33  │smc_nl_remove_ueid│t1.c│
│Call             │evil2(evil1(info), 122)                         │33  │smc_nl_remove_ueid│t1.c│
│Identifier       │*v2 = evil2(evil1(info), 122)                   │33  │smc_nl_remove_ueid│t1.c│
│Identifier       │evil3(v2, 333)                                  │34  │smc_nl_remove_ueid│t1.c│
│Call             │evil3(v2, 333)                                  │34  │smc_nl_remove_ueid│t1.c│
│Identifier       │*v3 = evil3(v2, 333)                            │34  │smc_nl_remove_ueid│t1.c│
│Identifier       │sink1(v3)                                       │36  │smc_nl_remove_ueid│t1.c│
└─────────────────┴────────────────────────────────────────────────┴────┴──────────────────┴────┘""",
  """
┌─────────────────┬────────────────────────────────────────────────┬────┬──────────────────┬────┐
│nodeType         │tracked                                         │line│method            │file│
├─────────────────┼────────────────────────────────────────────────┼────┼──────────────────┼────┤
│MethodParameterIn│smc_nl_remove_ueid(void *a1, struct xxx_t *info)│26  │smc_nl_remove_ueid│t1.c│
│Call             │sink1(info->attrs)                              │30  │smc_nl_remove_ueid│t1.c│
└─────────────────┴────────────────────────────────────────────────┴────┴──────────────────┴────┘"""
)
ml86 commented 3 months ago

@DavidBakerEffendi will likely have to take a look.

hac425xxx commented 3 months ago

Hi, is there any progress?

max-leuthaeuser commented 3 months ago

@DavidBakerEffendi on vacation until 7 Aug

hac425xxx commented 3 months ago

more example

the dataflow from *int_ptr will lose

void array_oob_from_buffer4(int a1, int *array, unsigned char* data) {
    int *int_ptr = (int *)(data + 8);
    int idx = *int_ptr;

    dbg_sink(idx);

    if(idx > 8) {
        return -1;
    }
    array[idx] = 0;
}

the query

    def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
    def params = f.parameter.l
    def src = f.parameter.name(params.apply(2).name)
    def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)

    f.call.argument.reachableByFlows(src).p

the result

joern>     def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
     |     def params = f.parameter.l
     |     def src = f.parameter.name(params.apply(2).name)
     |     def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
     | 
     |     f.call.argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
  List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res1: List[String] = List(
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │113 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = *int_ptr                                                 │114 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │data + 8                                                       │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Identifier       │data + 8                                                       │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘"""
)

by modify code from *int_ptr to int_ptr[0] , it seems fine.

void array_oob_from_buffer4(int a1, int *array, unsigned char* data) {
    int *int_ptr = (int *)(data + 8);
    int idx = int_ptr[0];

    dbg_sink(idx);

    if(idx > 8) {
        return -1;
    }
    array[idx] = 0;
}

the result


joern>     def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
     |     def params = f.parameter.l
     |     def src = f.parameter.name(params.apply(2).name)
     |     def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
     | 
     |     f.call.argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
  List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res1: List[String] = List(
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │113 │array_oob_from_buffer4│t1.c│
│Call             │idx = int_ptr[0]                                               │114 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = int_ptr[0]                                               │114 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink(idx)                                                  │116 │array_oob_from_buffer4│t1.c│
│Identifier       │idx > 8                                                        │118 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │113 │array_oob_from_buffer4│t1.c│
│Call             │idx = int_ptr[0]                                               │114 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │113 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = int_ptr[0]                                               │114 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │data + 8                                                       │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │113 │array_oob_from_buffer4│t1.c│
│Call             │idx = int_ptr[0]                                               │114 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = int_ptr[0]                                               │114 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink(idx)                                                  │116 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Identifier       │data + 8                                                       │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │113 │array_oob_from_buffer4│t1.c│
│Call             │idx = int_ptr[0]                                               │114 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = int_ptr[0]                                               │114 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘"""
)

But the dataflow from idx to array[idx] is also missed

f.call.name("<operator>.indirectIndexAccess").argument.code("idx").reachableByFlows(src).p
hac425xxx commented 3 months ago

And is there any document that can guide me to debug the data flow analysis related code?

I want to debug and analyze it

hac425xxx commented 3 months ago

by checking the ddg for the following code (array_oob_from_buffer1_1)

// ok
void array_oob_from_buffer1(int a1, int *array, unsigned char* data) {
    struct xxxxx_t* xt = data;
    array[xt->x2] = 0;
}

void array_oob_from_buffer1_1(int a1, int *array, unsigned char* data) {
    struct xxxxx_t* xt = data;
    int idx = xt->x2;
    array[idx] = 0;
}

the flow from xt->x2 to idx is ok, but the assigned idx are not link with array[idx]

image

fabsx00 commented 3 months ago

@hac425xxx there have recently been a lot of fixes for the C frontend, and I could imagine that a regression has crept in. Could you be so kind as to test a version from 6 months ago and see if it shows the same behavior?

hac425xxx commented 3 months ago

@fabsx00 I try to use https://github.com/joernio/joern/releases/tag/v2.0.222

the dataflow to array index access is fine.

void array_oob_from_buffer4_def(int a1, int *array, unsigned char* data) {
    int *int_ptr = (int *)(data + 8);
    int idx = int_ptr[0];

    int buffer[10] = {0};

    dbg_sink(idx);

    buffer[idx] = 1;

    if(idx > 8) {
        return -1;
    }
    array[idx] = 0;

    dbg_sink3(array[idx]);

    dbg_sink2(idx);

}

query

    def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4_def").next
    def params = f.parameter.l
    def src = f.parameter.name(params.apply(2).name)
    def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)

    f.call.argument.reachableByFlows(src).p

output

joern>     def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4_def").next
     |     def params = f.parameter.l
     |     def src = f.parameter.name(params.apply(2).name)
     |     def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
     | 
     |     f.call.argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
  List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res10: List[String] = List(
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Call              | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | dbg_sink(idx)                  | 154       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx > 8                        | 158       | array_oob_from_buffer4_def | t1.c |
| Identifier        | array[idx] = 0                 | 161       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | data + 8                       | 149       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Call              | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | dbg_sink(idx)                  | 154       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Call              | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | dbg_sink(idx)                  | 154       | array_oob_from_buffer4_def | t1.c |
| Identifier        | buffer[idx] = 1                | 156       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Identifier        | data + 8                       | 149       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Call              | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Call              | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | dbg_sink(idx)                  | 154       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx > 8                        | 158       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Call              | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Call              | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | dbg_sink(idx)                  | 154       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx > 8                        | 158       | array_oob_from_buffer4_def | t1.c |
| Identifier        | dbg_sink2(idx)                 | 165       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
""",
  """_____________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                     | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148       | array_oob_from_buffer4_def | t1.c |
| Call              | (int *)(data + 8)              | 149       | array_oob_from_buffer4_def | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 149       | array_oob_from_buffer4_def | t1.c |
| Call              | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx = int_ptr[0]               | 150       | array_oob_from_buffer4_def | t1.c |
| Identifier        | dbg_sink(idx)                  | 154       | array_oob_from_buffer4_def | t1.c |
| Identifier        | idx > 8                        | 158       | array_oob_from_buffer4_def | t1.c |
| Identifier        | dbg_sink3(array[idx])          | 163       | array_oob_from_buffer4_def | t1.c |
"""
)

but the follow testcase still wrong, the dataflow from *int_ptr to idx is missed.

void array_oob_from_buffer4(int a1, int *array, unsigned char* data) {
    int *int_ptr = (int *)(data + 8);
    int idx = *int_ptr;

    dbg_sink(idx);

    if(idx > 8) {
        return -1;
    }
    array[idx] = 0;

    dbg_sink3(array[idx]);

    dbg_sink2(idx);

}

output

joern>     def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
     |     def params = f.parameter.l
     |     def src = f.parameter.name(params.apply(2).name)
     |     def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
     | 
     |     f.call.argument.reachableByFlows(src).p
     | 
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
  List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res13: List[String] = List(
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131       | array_oob_from_buffer4 | t1.c |
| Identifier        | data + 8                       | 132       | array_oob_from_buffer4 | t1.c |
""",
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131       | array_oob_from_buffer4 | t1.c |
| Call              | (int *)(data + 8)              | 132       | array_oob_from_buffer4 | t1.c |
""",
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131       | array_oob_from_buffer4 | t1.c |
| Call              | (int *)(data + 8)              | 132       | array_oob_from_buffer4 | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 132       | array_oob_from_buffer4 | t1.c |
""",
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131       | array_oob_from_buffer4 | t1.c |
| Call              | (int *)(data + 8)              | 132       | array_oob_from_buffer4 | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 132       | array_oob_from_buffer4 | t1.c |
| Identifier        | idx = *int_ptr                 | 133       | array_oob_from_buffer4 | t1.c |
""",
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131       | array_oob_from_buffer4 | t1.c |
| Call              | data + 8                       | 132       | array_oob_from_buffer4 | t1.c |
"""
)
hac425xxx commented 3 months ago

I guess maybe the handler for <operator>.indirection is wrong, I also try to add custom dataflow config, but not work


import io.joern.dataflowengineoss.semanticsloader.FlowSemantic
import io.shiftleft.semanticcpg.layers.LayerCreatorOptions
import io.joern.dataflowengineoss.layers.dataflows.OssDataFlowOptions
import io.joern.dataflowengineoss.layers.dataflows.OssDataFlow
import io.shiftleft.semanticcpg.layers.{LayerCreator, LayerCreatorContext}

val extraFlows = List(
    FlowSemantic.from("<operator>.indirection", List((1, -1))),
)

val context = new LayerCreatorContext(cpg)
val options = new OssDataFlowOptions(extraFlows = extraFlows)
new OssDataFlow(options).run(context)
hac425xxx commented 3 months ago

I modify the UsageAnalyzer class , the dataflow from *int_ptr to idx seems work.

┌──(kali㉿kali)-[~/joern]
└─$ git diff dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
index 7693f2875..bbe918249 100644
--- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
+++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
@@ -267,8 +267,8 @@ private class UsageAnalyzer(

   private val allNodes = in.keys.toList
   private val containerSet =
-    Set(Operators.fieldAccess, Operators.indexAccess, Operators.indirectIndexAccess, Operators.indirectFieldAccess)
-  private val indirectionAccessSet = Set(Operators.addressOf, Operators.indirection)
+    Set(Operators.fieldAccess, Operators.indexAccess, Operators.indirectIndexAccess, Operators.indirectFieldAccess, Operators.indirection)
+  private val indirectionAccessSet = Set(Operators.addressOf)
   val usedIncomingDefs: Map[StoredNode, Map[StoredNode, Set[Definition]]] = initUsedIncomingDefs()

   def initUsedIncomingDefs(): Map[StoredNode, Map[StoredNode, Set[Definition]]] = {

query

joern>     def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
     |     def params = f.parameter.l
     |     def src = f.parameter.name(params.apply(2).name)
     |     def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
     | 
     |     f.call.argument.reachableByFlows(src).p
     | 
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
  List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res11: List[String] = List(
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
│Call             │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
│Call             │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink(idx)                                                  │144 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Identifier       │data + 8                                                       │141 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
│Call             │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink(idx)                                                  │144 │array_oob_from_buffer4│t1.c│
│Identifier       │idx > 8                                                        │146 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink2(idx)                                                 │153 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
│Call             │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink(idx)                                                  │144 │array_oob_from_buffer4│t1.c│
│Identifier       │idx > 8                                                        │146 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink3(array[idx])                                          │151 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
│Call             │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink(idx)                                                  │144 │array_oob_from_buffer4│t1.c│
│Identifier       │idx > 8                                                        │146 │array_oob_from_buffer4│t1.c│
│Identifier       │array[idx] = 0                                                 │149 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
│Call             │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
│Call             │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │idx = *int_ptr                                                 │142 │array_oob_from_buffer4│t1.c│
│Identifier       │dbg_sink(idx)                                                  │144 │array_oob_from_buffer4│t1.c│
│Identifier       │idx > 8                                                        │146 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │(int *)(data + 8)                                              │141 │array_oob_from_buffer4│t1.c│
│Identifier       │*int_ptr = (int *)(data + 8)                                   │141 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call             │data + 8                                                       │141 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘"""
)

And now array index access seems work too.

└─$ git log
commit bd38a15ada2f09098265c5e4ca44ba9959f808af (HEAD -> master, tag: v4.0.9, origin/master, origin/HEAD)
Author: Michael Pollmeier <michael@michaelpollmeier.com>
Date:   Mon Jul 22 16:12:45 2024 +0200

    workaround for scala completion bug (#4791)

    on stage: remove module-info.class from dependency jars - a hacky workaround for a
    scala3 compiler bug: https://github.com/scala/scala3/issues/20421

    Fixes https://github.com/joernio/joern/issues/4625
hac425xxx commented 3 months ago

Current the init testcase is still error: https://github.com/joernio/joern/issues/4794#issue-2424775544

hac425xxx commented 3 months ago

the follow testcase still error


void array_oob_from_buffer2(int a1, int *array, unsigned char* data) {
    int idx = *(int*)data;
    array[idx] = 0;
}

void array_oob_from_buffer3(int a1, int *array, unsigned char* data) {
    int idx = *(int*)(data + 8);
    array[idx] = 0;
}
hac425xxx commented 3 months ago

maybe is the unknown type node affect the cast call, why int * is unknwon.


joern>     def f = cpg.method.name("array_oob_from_buffer2").next
     |     def src = f.parameter.name("data")
     |     f.call.name("<operator>.cast").argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
val res11: List[String] = List(
  """
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType         │tracked                                                        │line│method                │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer2(int a1, int *array, unsigned char* data)│124 │array_oob_from_buffer2│t1.c│
│Identifier       │(int*)data                                                     │125 │array_oob_from_buffer2│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘"""
)

joern> f.call.name("<operator>.cast").argument.l
val res12: List[io.shiftleft.codepropertygraph.generated.nodes.Expression] = List(
  Unknown(
    argumentIndex = 1,
    argumentName = None,
    code = "int*",
    columnNumber = Some(value = 17),
    containedRef = "<empty>",
    dynamicTypeHintFullName = IndexedSeq(),
    lineNumber = Some(value = 125),
    order = 1,
    parserTypeName = "CASTTypeId",
    possibleTypes = IndexedSeq(),
    typeFullName = "<empty>"
  ),
  Identifier(
    argumentIndex = 2,
    argumentName = None,
    code = "data",
    columnNumber = Some(value = 22),
    dynamicTypeHintFullName = IndexedSeq(),
    lineNumber = Some(value = 125),
    name = "data",
    order = 2,
    possibleTypes = IndexedSeq(),
    typeFullName = "unsigned char*"
  )
)

I check the typedecl of cpg.typedecl, it has int*

  TypeDecl(
    aliasTypeFullName = None,
    astParentFullName = "<includes>:<global>",
    astParentType = "NAMESPACE_BLOCK",
    code = "int*",
    columnNumber = None,
    filename = "<includes>",
    fullName = "int*",
    inheritsFromTypeFullName = IndexedSeq(),
    isExternal = true,
    lineNumber = None,
    name = "int*",
    offset = None,
    offsetEnd = None,
    order = -1
  ),
hac425xxx commented 3 months ago

I check the joern code , it always new a Unknown node for cast, will it wrong? @max-leuthaeuser

releated code: https://github.com/joernio/joern/issues/4794#issuecomment-2252627980

  private def astForCastExpression(castExpression: IASTCastExpression): Ast = {
    val cpgCastExpression =
      callNode(castExpression, code(castExpression), Operators.cast, Operators.cast, DispatchTypes.STATIC_DISPATCH)

    val expr    = astForExpression(castExpression.getOperand)
    val argNode = castExpression.getTypeId
    val arg     = unknownNode(argNode, code(argNode))

    callAst(cpgCastExpression, List(Ast(arg), expr))
  }
max-leuthaeuser commented 3 months ago

At the time writing this I did not know how to represent a CASTTypeId. You may want to change that too. (maybe an identifier?) But whats the FlowSemantic of Operators.cast and is it respected in the DdgGenerator?

hac425xxx commented 3 months ago

By apply these patch, it seems all dataflow fine, maybe need a expert to check this

┌──(kali㉿kali)-[~/joern]
└─$ git diff dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
index c0736c875..ebeb6c5ee 100644
--- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
+++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
@@ -267,8 +267,8 @@ private class UsageAnalyzer(

   private val allNodes = in.keys.toList
   private val containerSet =
-    Set(Operators.fieldAccess, Operators.indexAccess, Operators.indirectIndexAccess, Operators.indirectFieldAccess)
-  private val indirectionAccessSet = Set(Operators.addressOf, Operators.indirection)
+    Set(Operators.fieldAccess, Operators.indexAccess, Operators.indirectIndexAccess, Operators.indirectFieldAccess, Operators.indirection, Operators.cast)
+  private val indirectionAccessSet = Set(Operators.addressOf, Operators.cast)
   val usedIncomingDefs: Map[StoredNode, Map[StoredNode, Set[Definition]]] = initUsedIncomingDefs()

   def initUsedIncomingDefs(): Map[StoredNode, Map[StoredNode, Set[Definition]]] = {
@@ -295,7 +295,7 @@ private class UsageAnalyzer(
   private def isContainer(use: StoredNode, inElement: StoredNode): Boolean = {
     inElement match {
       case call: Call if containerSet.contains(call.name) =>
-        call.argument.headOption.exists { base =>
+         call.ast.isCall.argument.exists { base =>
           nodeToString(use) == nodeToString(base)
         }
       case _ => false
@@ -309,11 +309,11 @@ private class UsageAnalyzer(
       case call: Call if containerSet.contains(call.name) =>
         inElement match {
           case param: MethodParameterIn =>
-            call.argument.headOption.exists { base =>
+            call.ast.isCall.argument.exists { base =>
               nodeToString(base).contains(param.name)
             }
           case identifier: Identifier =>
-            call.argument.headOption.exists { base =>
+            call.ast.isCall.argument.exists { base =>
               nodeToString(base).contains(identifier.name)
             }
           case _ => false
@@ -353,7 +353,7 @@ private class UsageAnalyzer(
       case param: MethodParameterIn =>
         nodeToString(use).contains(param.name)
       case call: Call if indirectionAccessSet.contains(call.name) =>
-        call.argumentOption(1).exists(x => nodeToString(use).contains(x.code))
+        call.ast.isCall.argument.exists(x => nodeToString(use).contains(x.code))
       case call: Call =>
         nodeToString(use).contains(call.code)
       case identifier: Identifier => nodeToString(use).contains(identifier.name)

output


joern>     def f = cpg.method.name("array_oob_from_buffer2").next
     |     def src = f.parameter.name("data")
     |     f.call.argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
val res1: List[String] = List(
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132       | array_oob_from_buffer2 | t1.c |
| Call              | idx = *(int*)data              | 133       | array_oob_from_buffer2 | t1.c |
""",
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132       | array_oob_from_buffer2 | t1.c |
| Identifier        | (int*)data                     | 133       | array_oob_from_buffer2 | t1.c |
""",
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132       | array_oob_from_buffer2 | t1.c |
| Call              | idx = *(int*)data              | 133       | array_oob_from_buffer2 | t1.c |
| Identifier        | idx = *(int*)data              | 133       | array_oob_from_buffer2 | t1.c |
| Identifier        | array[idx] = 0                 | 134       | array_oob_from_buffer2 | t1.c |
""",
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132       | array_oob_from_buffer2 | t1.c |
| Call              | (int*)data                     | 133       | array_oob_from_buffer2 | t1.c |
""",
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132       | array_oob_from_buffer2 | t1.c |
| Call              | idx = *(int*)data              | 133       | array_oob_from_buffer2 | t1.c |
| Identifier        | idx = *(int*)data              | 133       | array_oob_from_buffer2 | t1.c |
"""
)

output2

joern>     def f = cpg.method.name("array_oob_from_buffer3")
     |     def src = f.parameter.name("data")
     |     f.call.argument.reachableByFlows(src).p
def f: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Method]
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
val res6: List[String] = List(
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer3(int ... | 142       | array_oob_from_buffer3 | t1.c |
| Call              | idx = *(int *)(data + 8)       | 143       | array_oob_from_buffer3 | t1.c |
| Identifier        | idx = *(int *)(data + 8)       | 143       | array_oob_from_buffer3 | t1.c |
| Identifier        | array[idx] = 0                 | 144       | array_oob_from_buffer3 | t1.c |
""",
  """_________________________________________________________________
hac425xxx commented 3 months ago

There is something weird, the same patch works under linux, but can't work under windows.

hac425xxx commented 3 months ago

a new small bug

void array_oob_from_buffer4(int a1, int *array, unsigned char* data) {
    int *int_ptr = (int *)(data + 8);
    int idx = *int_ptr;

    dbg_sink(idx);

    if(idx > 8) {
        return -1;
    }
    array[idx] = 0;

    dbg_sink3(array[idx]);

    int y = dbg_sink2((unsigned long)idx);

    int z= dbg_flow2(y);
    int k = dbg_flow1(z);

    dbg_sink4(y);
    if(k > 20) {
        return y;
    }

    return dbg_sink5((unsigned long)y);

}

the flow from data from dbg_sink5

joern>     def f = cpg.method.name("array_oob_from_buffer4").next
     |     def src = f.parameter.index(3)
     |     def sink = f.call.name("dbg_sink5").argument
     |     sink.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res6: List[String] = List(
  """__________________________________________________________________________________________________
| nodeType           | tracked                        | lineNumber| method                 | file |
|=================================================================================================|
| MethodParameterIn  | array_oob_from_buffer4(int ... | 169       | array_oob_from_buffer4 | t1.c |
| Call               | (int *)(data + 8)              | 170       | array_oob_from_buffer4 | t1.c |
| Identifier         | *int_ptr = (int *)(data + 8)   | 170       | array_oob_from_buffer4 | t1.c |
| Call               | idx = *int_ptr                 | 171       | array_oob_from_buffer4 | t1.c |
| Identifier         | idx = *int_ptr                 | 171       | array_oob_from_buffer4 | t1.c |
| Identifier         | dbg_sink(idx)                  | 173       | array_oob_from_buffer4 | t1.c |
| Identifier         | idx > 8                        | 175       | array_oob_from_buffer4 | t1.c |
| Call               | dbg_sink3(array[idx])          | 180       | array_oob_from_buffer4 | t1.c |
| Call               | (unsigned long)idx             | 182       | array_oob_from_buffer4 | t1.c |
| Call               | dbg_sink2((unsigned long)idx)  | 182       | array_oob_from_buffer4 | t1.c |
| Identifier         | y = dbg_sink2((unsigned lon... | 182       | array_oob_from_buffer4 | t1.c |
| Identifier         | dbg_flow2(y)                   | 184       | array_oob_from_buffer4 | t1.c |
| MethodParameterIn  | dbg_flow2(unsigned long x)     | 157       | dbg_flow2              | t1.c |
| Identifier         | x + 2                          | 158       | dbg_flow2              | t1.c |
| MethodParameterOut | RET                            | 157       | dbg_flow2              | t1.c |
| Identifier         | dbg_flow2(y)                   | 184       | array_oob_from_buffer4 | t1.c |
| Identifier         | dbg_sink4(y)                   | 187       | array_oob_from_buffer4 | t1.c |
| Call               | (unsigned long)y               | 192       | array_oob_from_buffer4 | t1.c |
"""
)

it does't print the call dbg_sink5((unsigned long)y) node, but other flow will print.

it seems because that dbg_sink5 is in return stmt.

for example: the dataflow to dbg_sink3

joern>    def f = cpg.method.name("array_oob_from_buffer4").next
     |     def src = f.parameter.index(3)
     |     def sink = f.call.name("dbg_sink3").argument
     |     sink.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def src:
  Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res7: List[String] = List(
  """_________________________________________________________________________________________________
| nodeType          | tracked                        | lineNumber| method                 | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 169       | array_oob_from_buffer4 | t1.c |
| Call              | (int *)(data + 8)              | 170       | array_oob_from_buffer4 | t1.c |
| Identifier        | *int_ptr = (int *)(data + 8)   | 170       | array_oob_from_buffer4 | t1.c |
| Call              | idx = *int_ptr                 | 171       | array_oob_from_buffer4 | t1.c |
| Identifier        | idx = *int_ptr                 | 171       | array_oob_from_buffer4 | t1.c |
| Identifier        | dbg_sink(idx)                  | 173       | array_oob_from_buffer4 | t1.c |
| Identifier        | idx > 8                        | 175       | array_oob_from_buffer4 | t1.c |
| Call              | dbg_sink3(array[idx])          | 180       | array_oob_from_buffer4 | t1.c |
"""
)