Open hac425xxx opened 3 months ago
the dataflow from info
to info->attrs
in info->attrs
to info->attrs[SMC_NLA_EID_TABLE_ENTRY]
missed.
joern> def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
| var x1 = cpg.method.where(_.name("smc_nl_remove_ueid")).repeat(_.astChildren)(_.until(_.isCall)).next
| var sk = x1.asInstanceOf[Call].argument(2).asInstanceOf[Call].argument
| sk.reachableByFlows(source).p
def source:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
var x1: io.shiftleft.codepropertygraph.generated.nodes.AstNode = Call(
argumentIndex = -1,
argumentName = None,
code = "*nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]",
columnNumber = Some(value = 16),
dispatchType = "STATIC_DISPATCH",
dynamicTypeHintFullName = IndexedSeq(),
lineNumber = Some(value = 168),
methodFullName = "<operator>.assignment",
name = "<operator>.assignment",
order = 2,
possibleTypes = IndexedSeq(),
signature = "",
typeFullName = "ANY"
)
var sk: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression] = empty iterator
val res44: List[String] = List(
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────┬─────────┐
│nodeType │tracked │line│method │file │
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────┼─────────┤
│MethodParameterIn│smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)│166 │smc_nl_remove_ueid│smc_clc.c│
│Call │*nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY] │168 │smc_nl_remove_ueid│smc_clc.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────┴─────────┘"""
)
joern> x1.asInstanceOf[Call].argument(2).asInstanceOf[Call].argument
val res45: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression] = non-empty iterator
joern> x1.asInstanceOf[Call].argument(2).asInstanceOf[Call].argument.l
val res46: List[io.shiftleft.codepropertygraph.generated.nodes.Expression] = List(
Call(
argumentIndex = 1,
argumentName = None,
code = "info->attrs",
columnNumber = Some(value = 28),
dispatchType = "STATIC_DISPATCH",
dynamicTypeHintFullName = IndexedSeq(),
lineNumber = Some(value = 168),
methodFullName = "<operator>.indirectFieldAccess",
name = "<operator>.indirectFieldAccess",
order = 1,
possibleTypes = IndexedSeq(),
signature = "",
typeFullName = "ANY"
),
Identifier(
argumentIndex = 2,
argumentName = None,
code = "SMC_NLA_EID_TABLE_ENTRY",
columnNumber = Some(value = 40),
dynamicTypeHintFullName = IndexedSeq(),
lineNumber = Some(value = 168),
name = "SMC_NLA_EID_TABLE_ENTRY",
order = 2,
possibleTypes = IndexedSeq(),
typeFullName = "ANY"
)
)
I add some code for test
int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)
{
sink1(info->attrs[1]);
sink1(info->attrs[SMC_NLA_EID_TABLE_ENTRY]);
sink1(info->attrs);
sink1(info[1]);
struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY];
char *ueid;
if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1)
return -EINVAL;
ueid = (char *)nla_data(nla_ueid);
return smc_clc_ueid_remove(ueid);
}
the query
def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
def sink = cpg.call.name("sink1").argument
sink.reachableByFlows(source).p
the output
joern>
| def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
| def sink = cpg.call.name("sink1").argument
| sink.reachableByFlows(source).p
def source:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res22: List[String] = List(
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────┬─────────┐
│nodeType │tracked │line│method │file │
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────┼─────────┤
│MethodParameterIn│smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)│170 │smc_nl_remove_ueid│smc_clc.c│
│Call │sink1(info->attrs[1]) │172 │smc_nl_remove_ueid│smc_clc.c│
│Call │sink1(info->attrs[1]) │172 │smc_nl_remove_ueid│smc_clc.c│
│Call │sink1(info->attrs[SMC_NLA_EID_TABLE_ENTRY]) │173 │smc_nl_remove_ueid│smc_clc.c│
│Call │sink1(info->attrs[SMC_NLA_EID_TABLE_ENTRY]) │173 │smc_nl_remove_ueid│smc_clc.c│
│Call │sink1(info->attrs) │174 │smc_nl_remove_ueid│smc_clc.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────┴─────────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────┬─────────┐
│nodeType │tracked │line│method │file │
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────┼─────────┤
│MethodParameterIn│smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info)│170 │smc_nl_remove_ueid│smc_clc.c│
│Call │sink1(info[1]) │175 │smc_nl_remove_ueid│smc_clc.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────┴─────────┘"""
)
dataflow for info->attrs[SMC_NLA_EID_TABLE_ENTRY] and info->attrs[1] is missed
I checked the AST and the CFG for your examples. Both look fine, so I guess the frontend itself is ok. Must be the creation of the DFG/DF here. @ml86 Who would be the right person to ask?
the sample code is follow:
more simple code
int smc_nl_remove_ueid(void *a1, struct xxx_t *info)
{
sink1(info->attrs[1]);
sink1(info->attrs[SMC_NLA_EID_TABLE_ENTRY]);
sink1(info->attrs);
sink1(info[1]);
void *v2 = evil2(evil1(info), 122);
void *v3 = evil3(v2, 333);
sink1(v3);
}
the query
importCode(inputPath="/home/kali/driver_extractor/example/example", projectName="example")
def source = cpg.method.where(_.name("smc_nl_remove_ueid")).parameter
def sink = cpg.call.name("sink1").argument
sink.reachableByFlows(source).p
the output
val res17: List[String] = List(
"""
┌─────────────────┬────────────────────────────────────────────────┬────┬──────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼────────────────────────────────────────────────┼────┼──────────────────┼────┤
│MethodParameterIn│smc_nl_remove_ueid(void *a1, struct xxx_t *info)│26 │smc_nl_remove_ueid│t1.c│
│Call │sink1(info[1]) │31 │smc_nl_remove_ueid│t1.c│
└─────────────────┴────────────────────────────────────────────────┴────┴──────────────────┴────┘""",
"""
┌─────────────────┬────────────────────────────────────────────────┬────┬──────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼────────────────────────────────────────────────┼────┼──────────────────┼────┤
│MethodParameterIn│smc_nl_remove_ueid(void *a1, struct xxx_t *info)│26 │smc_nl_remove_ueid│t1.c│
│Call │sink1(info->attrs) │30 │smc_nl_remove_ueid│t1.c│
│Identifier │evil1(info) │33 │smc_nl_remove_ueid│t1.c│
│Call │evil1(info) │33 │smc_nl_remove_ueid│t1.c│
│Call │evil2(evil1(info), 122) │33 │smc_nl_remove_ueid│t1.c│
│Identifier │*v2 = evil2(evil1(info), 122) │33 │smc_nl_remove_ueid│t1.c│
│Identifier │evil3(v2, 333) │34 │smc_nl_remove_ueid│t1.c│
│Call │evil3(v2, 333) │34 │smc_nl_remove_ueid│t1.c│
│Identifier │*v3 = evil3(v2, 333) │34 │smc_nl_remove_ueid│t1.c│
│Identifier │sink1(v3) │36 │smc_nl_remove_ueid│t1.c│
└─────────────────┴────────────────────────────────────────────────┴────┴──────────────────┴────┘""",
"""
┌─────────────────┬────────────────────────────────────────────────┬────┬──────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼────────────────────────────────────────────────┼────┼──────────────────┼────┤
│MethodParameterIn│smc_nl_remove_ueid(void *a1, struct xxx_t *info)│26 │smc_nl_remove_ueid│t1.c│
│Call │sink1(info->attrs) │30 │smc_nl_remove_ueid│t1.c│
└─────────────────┴────────────────────────────────────────────────┴────┴──────────────────┴────┘"""
)
@DavidBakerEffendi will likely have to take a look.
Hi, is there any progress?
@DavidBakerEffendi on vacation until 7 Aug
more example
the dataflow from *int_ptr
will lose
void array_oob_from_buffer4(int a1, int *array, unsigned char* data) {
int *int_ptr = (int *)(data + 8);
int idx = *int_ptr;
dbg_sink(idx);
if(idx > 8) {
return -1;
}
array[idx] = 0;
}
the query
def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
def params = f.parameter.l
def src = f.parameter.name(params.apply(2).name)
def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
f.call.argument.reachableByFlows(src).p
the result
joern> def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
| def params = f.parameter.l
| def src = f.parameter.name(params.apply(2).name)
| def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
|
| f.call.argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res1: List[String] = List(
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │idx = *int_ptr │114 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │data + 8 │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Identifier │data + 8 │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘"""
)
by modify code from *int_ptr
to int_ptr[0]
, it seems fine.
void array_oob_from_buffer4(int a1, int *array, unsigned char* data) {
int *int_ptr = (int *)(data + 8);
int idx = int_ptr[0];
dbg_sink(idx);
if(idx > 8) {
return -1;
}
array[idx] = 0;
}
the result
joern> def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
| def params = f.parameter.l
| def src = f.parameter.name(params.apply(2).name)
| def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
|
| f.call.argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res1: List[String] = List(
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Call │idx = int_ptr[0] │114 │array_oob_from_buffer4│t1.c│
│Identifier │idx = int_ptr[0] │114 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink(idx) │116 │array_oob_from_buffer4│t1.c│
│Identifier │idx > 8 │118 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Call │idx = int_ptr[0] │114 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │idx = int_ptr[0] │114 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │data + 8 │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Call │idx = int_ptr[0] │114 │array_oob_from_buffer4│t1.c│
│Identifier │idx = int_ptr[0] │114 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink(idx) │116 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Identifier │data + 8 │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
│Call │idx = int_ptr[0] │114 │array_oob_from_buffer4│t1.c│
│Identifier │idx = int_ptr[0] │114 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│112 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │113 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘"""
)
But the dataflow from idx
to array[idx]
is also missed
f.call.name("<operator>.indirectIndexAccess").argument.code("idx").reachableByFlows(src).p
And is there any document that can guide me to debug the data flow analysis related code?
I want to debug and analyze it
by checking the ddg for the following code (array_oob_from_buffer1_1
)
// ok
void array_oob_from_buffer1(int a1, int *array, unsigned char* data) {
struct xxxxx_t* xt = data;
array[xt->x2] = 0;
}
void array_oob_from_buffer1_1(int a1, int *array, unsigned char* data) {
struct xxxxx_t* xt = data;
int idx = xt->x2;
array[idx] = 0;
}
the flow from xt->x2
to idx
is ok, but the assigned idx
are not link with array[idx]
@hac425xxx there have recently been a lot of fixes for the C frontend, and I could imagine that a regression has crept in. Could you be so kind as to test a version from 6 months ago and see if it shows the same behavior?
@fabsx00 I try to use https://github.com/joernio/joern/releases/tag/v2.0.222
the dataflow to array index access is fine.
void array_oob_from_buffer4_def(int a1, int *array, unsigned char* data) {
int *int_ptr = (int *)(data + 8);
int idx = int_ptr[0];
int buffer[10] = {0};
dbg_sink(idx);
buffer[idx] = 1;
if(idx > 8) {
return -1;
}
array[idx] = 0;
dbg_sink3(array[idx]);
dbg_sink2(idx);
}
query
def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4_def").next
def params = f.parameter.l
def src = f.parameter.name(params.apply(2).name)
def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
f.call.argument.reachableByFlows(src).p
output
joern> def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4_def").next
| def params = f.parameter.l
| def src = f.parameter.name(params.apply(2).name)
| def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
|
| f.call.argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res10: List[String] = List(
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Call | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | dbg_sink(idx) | 154 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx > 8 | 158 | array_oob_from_buffer4_def | t1.c |
| Identifier | array[idx] = 0 | 161 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | data + 8 | 149 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Call | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | dbg_sink(idx) | 154 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Call | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | dbg_sink(idx) | 154 | array_oob_from_buffer4_def | t1.c |
| Identifier | buffer[idx] = 1 | 156 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Identifier | data + 8 | 149 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Call | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Call | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | dbg_sink(idx) | 154 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx > 8 | 158 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Call | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Call | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | dbg_sink(idx) | 154 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx > 8 | 158 | array_oob_from_buffer4_def | t1.c |
| Identifier | dbg_sink2(idx) | 165 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
""",
"""_____________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|====================================================================================================|
| MethodParameterIn | array_oob_from_buffer4_def(... | 148 | array_oob_from_buffer4_def | t1.c |
| Call | (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 149 | array_oob_from_buffer4_def | t1.c |
| Call | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx = int_ptr[0] | 150 | array_oob_from_buffer4_def | t1.c |
| Identifier | dbg_sink(idx) | 154 | array_oob_from_buffer4_def | t1.c |
| Identifier | idx > 8 | 158 | array_oob_from_buffer4_def | t1.c |
| Identifier | dbg_sink3(array[idx]) | 163 | array_oob_from_buffer4_def | t1.c |
"""
)
but the follow testcase still wrong, the dataflow from *int_ptr
to idx
is missed.
void array_oob_from_buffer4(int a1, int *array, unsigned char* data) {
int *int_ptr = (int *)(data + 8);
int idx = *int_ptr;
dbg_sink(idx);
if(idx > 8) {
return -1;
}
array[idx] = 0;
dbg_sink3(array[idx]);
dbg_sink2(idx);
}
output
joern> def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
| def params = f.parameter.l
| def src = f.parameter.name(params.apply(2).name)
| def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
|
| f.call.argument.reachableByFlows(src).p
|
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res13: List[String] = List(
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131 | array_oob_from_buffer4 | t1.c |
| Identifier | data + 8 | 132 | array_oob_from_buffer4 | t1.c |
""",
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131 | array_oob_from_buffer4 | t1.c |
| Call | (int *)(data + 8) | 132 | array_oob_from_buffer4 | t1.c |
""",
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131 | array_oob_from_buffer4 | t1.c |
| Call | (int *)(data + 8) | 132 | array_oob_from_buffer4 | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 132 | array_oob_from_buffer4 | t1.c |
""",
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131 | array_oob_from_buffer4 | t1.c |
| Call | (int *)(data + 8) | 132 | array_oob_from_buffer4 | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 132 | array_oob_from_buffer4 | t1.c |
| Identifier | idx = *int_ptr | 133 | array_oob_from_buffer4 | t1.c |
""",
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 131 | array_oob_from_buffer4 | t1.c |
| Call | data + 8 | 132 | array_oob_from_buffer4 | t1.c |
"""
)
I guess maybe the handler for <operator>.indirection
is wrong, I also try to add custom dataflow config, but not work
import io.joern.dataflowengineoss.semanticsloader.FlowSemantic
import io.shiftleft.semanticcpg.layers.LayerCreatorOptions
import io.joern.dataflowengineoss.layers.dataflows.OssDataFlowOptions
import io.joern.dataflowengineoss.layers.dataflows.OssDataFlow
import io.shiftleft.semanticcpg.layers.{LayerCreator, LayerCreatorContext}
val extraFlows = List(
FlowSemantic.from("<operator>.indirection", List((1, -1))),
)
val context = new LayerCreatorContext(cpg)
val options = new OssDataFlowOptions(extraFlows = extraFlows)
new OssDataFlow(options).run(context)
I modify the UsageAnalyzer
class , the dataflow from *int_ptr to idx seems work.
┌──(kali㉿kali)-[~/joern]
└─$ git diff dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
index 7693f2875..bbe918249 100644
--- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
+++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
@@ -267,8 +267,8 @@ private class UsageAnalyzer(
private val allNodes = in.keys.toList
private val containerSet =
- Set(Operators.fieldAccess, Operators.indexAccess, Operators.indirectIndexAccess, Operators.indirectFieldAccess)
- private val indirectionAccessSet = Set(Operators.addressOf, Operators.indirection)
+ Set(Operators.fieldAccess, Operators.indexAccess, Operators.indirectIndexAccess, Operators.indirectFieldAccess, Operators.indirection)
+ private val indirectionAccessSet = Set(Operators.addressOf)
val usedIncomingDefs: Map[StoredNode, Map[StoredNode, Set[Definition]]] = initUsedIncomingDefs()
def initUsedIncomingDefs(): Map[StoredNode, Map[StoredNode, Set[Definition]]] = {
query
joern> def f = cpg.method.name("array_oob_from_buffer.*").name("array_oob_from_buffer4").next
| def params = f.parameter.l
| def src = f.parameter.name(params.apply(2).name)
| def sink = f.call.name("<operator>.indirectIndexAccess").argument(2)
|
| f.call.argument.reachableByFlows(src).p
|
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def params:
List[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res11: List[String] = List(
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Call │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Call │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink(idx) │144 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Identifier │data + 8 │141 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Call │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink(idx) │144 │array_oob_from_buffer4│t1.c│
│Identifier │idx > 8 │146 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink2(idx) │153 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Call │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink(idx) │144 │array_oob_from_buffer4│t1.c│
│Identifier │idx > 8 │146 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink3(array[idx]) │151 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Call │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink(idx) │144 │array_oob_from_buffer4│t1.c│
│Identifier │idx > 8 │146 │array_oob_from_buffer4│t1.c│
│Identifier │array[idx] = 0 │149 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Call │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Call │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │idx = *int_ptr │142 │array_oob_from_buffer4│t1.c│
│Identifier │dbg_sink(idx) │144 │array_oob_from_buffer4│t1.c│
│Identifier │idx > 8 │146 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │(int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
│Identifier │*int_ptr = (int *)(data + 8) │141 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘""",
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer4(int a1, int *array, unsigned char* data)│140 │array_oob_from_buffer4│t1.c│
│Call │data + 8 │141 │array_oob_from_buffer4│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘"""
)
And now array index access seems work too.
└─$ git log
commit bd38a15ada2f09098265c5e4ca44ba9959f808af (HEAD -> master, tag: v4.0.9, origin/master, origin/HEAD)
Author: Michael Pollmeier <michael@michaelpollmeier.com>
Date: Mon Jul 22 16:12:45 2024 +0200
workaround for scala completion bug (#4791)
on stage: remove module-info.class from dependency jars - a hacky workaround for a
scala3 compiler bug: https://github.com/scala/scala3/issues/20421
Fixes https://github.com/joernio/joern/issues/4625
Current the init testcase is still error: https://github.com/joernio/joern/issues/4794#issue-2424775544
the follow testcase still error
void array_oob_from_buffer2(int a1, int *array, unsigned char* data) {
int idx = *(int*)data;
array[idx] = 0;
}
void array_oob_from_buffer3(int a1, int *array, unsigned char* data) {
int idx = *(int*)(data + 8);
array[idx] = 0;
}
maybe is the unknown type node affect the cast call, why int *
is unknwon.
joern> def f = cpg.method.name("array_oob_from_buffer2").next
| def src = f.parameter.name("data")
| f.call.name("<operator>.cast").argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
val res11: List[String] = List(
"""
┌─────────────────┬───────────────────────────────────────────────────────────────┬────┬──────────────────────┬────┐
│nodeType │tracked │line│method │file│
├─────────────────┼───────────────────────────────────────────────────────────────┼────┼──────────────────────┼────┤
│MethodParameterIn│array_oob_from_buffer2(int a1, int *array, unsigned char* data)│124 │array_oob_from_buffer2│t1.c│
│Identifier │(int*)data │125 │array_oob_from_buffer2│t1.c│
└─────────────────┴───────────────────────────────────────────────────────────────┴────┴──────────────────────┴────┘"""
)
joern> f.call.name("<operator>.cast").argument.l
val res12: List[io.shiftleft.codepropertygraph.generated.nodes.Expression] = List(
Unknown(
argumentIndex = 1,
argumentName = None,
code = "int*",
columnNumber = Some(value = 17),
containedRef = "<empty>",
dynamicTypeHintFullName = IndexedSeq(),
lineNumber = Some(value = 125),
order = 1,
parserTypeName = "CASTTypeId",
possibleTypes = IndexedSeq(),
typeFullName = "<empty>"
),
Identifier(
argumentIndex = 2,
argumentName = None,
code = "data",
columnNumber = Some(value = 22),
dynamicTypeHintFullName = IndexedSeq(),
lineNumber = Some(value = 125),
name = "data",
order = 2,
possibleTypes = IndexedSeq(),
typeFullName = "unsigned char*"
)
)
I check the typedecl of cpg.typedecl, it has int*
TypeDecl(
aliasTypeFullName = None,
astParentFullName = "<includes>:<global>",
astParentType = "NAMESPACE_BLOCK",
code = "int*",
columnNumber = None,
filename = "<includes>",
fullName = "int*",
inheritsFromTypeFullName = IndexedSeq(),
isExternal = true,
lineNumber = None,
name = "int*",
offset = None,
offsetEnd = None,
order = -1
),
I check the joern code , it always new a Unknown node for cast, will it wrong? @max-leuthaeuser
releated code: https://github.com/joernio/joern/issues/4794#issuecomment-2252627980
private def astForCastExpression(castExpression: IASTCastExpression): Ast = {
val cpgCastExpression =
callNode(castExpression, code(castExpression), Operators.cast, Operators.cast, DispatchTypes.STATIC_DISPATCH)
val expr = astForExpression(castExpression.getOperand)
val argNode = castExpression.getTypeId
val arg = unknownNode(argNode, code(argNode))
callAst(cpgCastExpression, List(Ast(arg), expr))
}
At the time writing this I did not know how to represent a CASTTypeId
.
You may want to change that too. (maybe an identifier?)
But whats the FlowSemantic of Operators.cast and is it respected in the DdgGenerator?
By apply these patch, it seems all dataflow fine, maybe need a expert to check this
┌──(kali㉿kali)-[~/joern]
└─$ git diff dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
diff --git a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
index c0736c875..ebeb6c5ee 100644
--- a/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
+++ b/dataflowengineoss/src/main/scala/io/joern/dataflowengineoss/passes/reachingdef/DdgGenerator.scala
@@ -267,8 +267,8 @@ private class UsageAnalyzer(
private val allNodes = in.keys.toList
private val containerSet =
- Set(Operators.fieldAccess, Operators.indexAccess, Operators.indirectIndexAccess, Operators.indirectFieldAccess)
- private val indirectionAccessSet = Set(Operators.addressOf, Operators.indirection)
+ Set(Operators.fieldAccess, Operators.indexAccess, Operators.indirectIndexAccess, Operators.indirectFieldAccess, Operators.indirection, Operators.cast)
+ private val indirectionAccessSet = Set(Operators.addressOf, Operators.cast)
val usedIncomingDefs: Map[StoredNode, Map[StoredNode, Set[Definition]]] = initUsedIncomingDefs()
def initUsedIncomingDefs(): Map[StoredNode, Map[StoredNode, Set[Definition]]] = {
@@ -295,7 +295,7 @@ private class UsageAnalyzer(
private def isContainer(use: StoredNode, inElement: StoredNode): Boolean = {
inElement match {
case call: Call if containerSet.contains(call.name) =>
- call.argument.headOption.exists { base =>
+ call.ast.isCall.argument.exists { base =>
nodeToString(use) == nodeToString(base)
}
case _ => false
@@ -309,11 +309,11 @@ private class UsageAnalyzer(
case call: Call if containerSet.contains(call.name) =>
inElement match {
case param: MethodParameterIn =>
- call.argument.headOption.exists { base =>
+ call.ast.isCall.argument.exists { base =>
nodeToString(base).contains(param.name)
}
case identifier: Identifier =>
- call.argument.headOption.exists { base =>
+ call.ast.isCall.argument.exists { base =>
nodeToString(base).contains(identifier.name)
}
case _ => false
@@ -353,7 +353,7 @@ private class UsageAnalyzer(
case param: MethodParameterIn =>
nodeToString(use).contains(param.name)
case call: Call if indirectionAccessSet.contains(call.name) =>
- call.argumentOption(1).exists(x => nodeToString(use).contains(x.code))
+ call.ast.isCall.argument.exists(x => nodeToString(use).contains(x.code))
case call: Call =>
nodeToString(use).contains(call.code)
case identifier: Identifier => nodeToString(use).contains(identifier.name)
output
joern> def f = cpg.method.name("array_oob_from_buffer2").next
| def src = f.parameter.name("data")
| f.call.argument.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
val res1: List[String] = List(
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132 | array_oob_from_buffer2 | t1.c |
| Call | idx = *(int*)data | 133 | array_oob_from_buffer2 | t1.c |
""",
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132 | array_oob_from_buffer2 | t1.c |
| Identifier | (int*)data | 133 | array_oob_from_buffer2 | t1.c |
""",
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132 | array_oob_from_buffer2 | t1.c |
| Call | idx = *(int*)data | 133 | array_oob_from_buffer2 | t1.c |
| Identifier | idx = *(int*)data | 133 | array_oob_from_buffer2 | t1.c |
| Identifier | array[idx] = 0 | 134 | array_oob_from_buffer2 | t1.c |
""",
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132 | array_oob_from_buffer2 | t1.c |
| Call | (int*)data | 133 | array_oob_from_buffer2 | t1.c |
""",
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer2(int ... | 132 | array_oob_from_buffer2 | t1.c |
| Call | idx = *(int*)data | 133 | array_oob_from_buffer2 | t1.c |
| Identifier | idx = *(int*)data | 133 | array_oob_from_buffer2 | t1.c |
"""
)
output2
joern> def f = cpg.method.name("array_oob_from_buffer3")
| def src = f.parameter.name("data")
| f.call.argument.reachableByFlows(src).p
def f: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Method]
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
val res6: List[String] = List(
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer3(int ... | 142 | array_oob_from_buffer3 | t1.c |
| Call | idx = *(int *)(data + 8) | 143 | array_oob_from_buffer3 | t1.c |
| Identifier | idx = *(int *)(data + 8) | 143 | array_oob_from_buffer3 | t1.c |
| Identifier | array[idx] = 0 | 144 | array_oob_from_buffer3 | t1.c |
""",
"""_________________________________________________________________
There is something weird, the same patch works under linux, but can't work under windows.
a new small bug
void array_oob_from_buffer4(int a1, int *array, unsigned char* data) {
int *int_ptr = (int *)(data + 8);
int idx = *int_ptr;
dbg_sink(idx);
if(idx > 8) {
return -1;
}
array[idx] = 0;
dbg_sink3(array[idx]);
int y = dbg_sink2((unsigned long)idx);
int z= dbg_flow2(y);
int k = dbg_flow1(z);
dbg_sink4(y);
if(k > 20) {
return y;
}
return dbg_sink5((unsigned long)y);
}
the flow from data
from dbg_sink5
joern> def f = cpg.method.name("array_oob_from_buffer4").next
| def src = f.parameter.index(3)
| def sink = f.call.name("dbg_sink5").argument
| sink.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res6: List[String] = List(
"""__________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|=================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 169 | array_oob_from_buffer4 | t1.c |
| Call | (int *)(data + 8) | 170 | array_oob_from_buffer4 | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 170 | array_oob_from_buffer4 | t1.c |
| Call | idx = *int_ptr | 171 | array_oob_from_buffer4 | t1.c |
| Identifier | idx = *int_ptr | 171 | array_oob_from_buffer4 | t1.c |
| Identifier | dbg_sink(idx) | 173 | array_oob_from_buffer4 | t1.c |
| Identifier | idx > 8 | 175 | array_oob_from_buffer4 | t1.c |
| Call | dbg_sink3(array[idx]) | 180 | array_oob_from_buffer4 | t1.c |
| Call | (unsigned long)idx | 182 | array_oob_from_buffer4 | t1.c |
| Call | dbg_sink2((unsigned long)idx) | 182 | array_oob_from_buffer4 | t1.c |
| Identifier | y = dbg_sink2((unsigned lon... | 182 | array_oob_from_buffer4 | t1.c |
| Identifier | dbg_flow2(y) | 184 | array_oob_from_buffer4 | t1.c |
| MethodParameterIn | dbg_flow2(unsigned long x) | 157 | dbg_flow2 | t1.c |
| Identifier | x + 2 | 158 | dbg_flow2 | t1.c |
| MethodParameterOut | RET | 157 | dbg_flow2 | t1.c |
| Identifier | dbg_flow2(y) | 184 | array_oob_from_buffer4 | t1.c |
| Identifier | dbg_sink4(y) | 187 | array_oob_from_buffer4 | t1.c |
| Call | (unsigned long)y | 192 | array_oob_from_buffer4 | t1.c |
"""
)
it does't print the call dbg_sink5((unsigned long)y)
node, but other flow will print.
it seems because that dbg_sink5
is in return
stmt.
for example: the dataflow to dbg_sink3
joern> def f = cpg.method.name("array_oob_from_buffer4").next
| def src = f.parameter.index(3)
| def sink = f.call.name("dbg_sink3").argument
| sink.reachableByFlows(src).p
def f: io.shiftleft.codepropertygraph.generated.nodes.Method
def src:
Iterator[io.shiftleft.codepropertygraph.generated.nodes.MethodParameterIn]
def sink: Iterator[io.shiftleft.codepropertygraph.generated.nodes.Expression]
val res7: List[String] = List(
"""_________________________________________________________________________________________________
| nodeType | tracked | lineNumber| method | file |
|================================================================================================|
| MethodParameterIn | array_oob_from_buffer4(int ... | 169 | array_oob_from_buffer4 | t1.c |
| Call | (int *)(data + 8) | 170 | array_oob_from_buffer4 | t1.c |
| Identifier | *int_ptr = (int *)(data + 8) | 170 | array_oob_from_buffer4 | t1.c |
| Call | idx = *int_ptr | 171 | array_oob_from_buffer4 | t1.c |
| Identifier | idx = *int_ptr | 171 | array_oob_from_buffer4 | t1.c |
| Identifier | dbg_sink(idx) | 173 | array_oob_from_buffer4 | t1.c |
| Identifier | idx > 8 | 175 | array_oob_from_buffer4 | t1.c |
| Call | dbg_sink3(array[idx]) | 180 | array_oob_from_buffer4 | t1.c |
"""
)
test code
scala code
sk is the node for
nla_ueid
orinfo->attrs[SMC_NLA_EID_TABLE_ENTRY]
but reachableByFlows return nothing, seem that the dataflow from
info
toinfo->attrs[SMC_NLA_EID_TABLE_ENTRY]
is miss.the dfg