Open ds5678 opened 1 year ago
This is some code I wrote. Hopefully, it sees some use in the future. In any case, it's safer here than in my git stash.
public static class BasicBlockUtils
{
public static List<BasicBlock> ParsePE(PE binary)
{
var baseAddress = binary.GetVirtualAddressOfPrimaryExecutableSection();
var data = binary.GetEntirePrimaryExecutableSection();
var instructions = X86Utils.Disassemble(data, baseAddress);
HashSet<ulong> callTargets = new();
Dictionary<ulong, (ulong, bool)> jumpAddresses = new();
HashSet<ulong> returnAddresses = new();
HashSet<ulong> jumpTargets = new();
foreach (ref var instruction in instructions)
{
switch (instruction.FlowControl)
{
case FlowControl.UnconditionalBranch:
{
var jumpTarget = GetTargetForJump(instruction);
jumpAddresses.Add(instruction.IP, (jumpTarget, false));
jumpTargets.Add(jumpTarget);
}
break;
case FlowControl.IndirectBranch:
//Not sure what to do with this
break;
case FlowControl.ConditionalBranch:
{
var jumpTarget = GetTargetForJump(instruction);
jumpAddresses.Add(instruction.IP, (jumpTarget, true));
jumpTargets.Add(jumpTarget);
}
break;
case FlowControl.Return:
returnAddresses.Add(instruction.IP);
break;
case FlowControl.Call:
callTargets.Add(GetTargetForCall(instruction));
break;
case FlowControl.IndirectCall:
//I think this can be ignored. Ideally, we would want to identify the call sites, but it's okay if we can't.
break;
case FlowControl.Interrupt:
//I think this needs to end a block too.
break;
case FlowControl.XbeginXabortXend:
//Not sure what this is
break;
case FlowControl.Exception:
//throw new Exception($"Could not assess the flow control of the instruction at 0x{instruction.IP:X}");
break;
}
}
List<BasicBlock> blockList = new();
var blockStart = instructions[0].IP;
for (var i = 0; i < instructions.Count; i++)
{
var address = instructions[i].IP;
if (address != blockStart && (callTargets.Contains(address) || jumpTargets.Contains(address)))
{
blockList.Add(new(blockStart, instructions[i-1].IP, address, false));
blockStart = address;
}
if (returnAddresses.Contains(address))
{
blockList.Add(new(blockStart, instructions[i - 1].IP, address, false));
if (i < instructions.Count - 1)
blockStart = instructions[i + 1].IP;
}
else if (jumpAddresses.TryGetValue(address, out var pair))
{
blockList.Add(new(blockStart, address, pair.Item1, pair.Item2));
if (i < instructions.Count - 1)
blockStart = instructions[i + 1].IP;
}
}
return blockList;
}
private static ulong GetTargetForJump(Instruction instruction)
{
return instruction.NearBranchTarget;
}
private static ulong GetTargetForCall(Instruction instruction)
{
return instruction.NearBranchTarget;
}
}
I was testing this in the CallAnalysisProcessingLayer
with a breakpoint.
public override void Process(ApplicationAnalysisContext appContext, Action<int, int>? progressCallback = null)
{
+ var blockList = BasicBlockUtils.ParsePE((LibCpp2IL.PE.PE)appContext.Binary);
InjectAttribute(appContext);
}
Motivation
Separating the binary into basic blocks could improve analysis.
MethodAnalysisContext.RawBytes
is assigned. The increased number of known functions would improve the accuracy when determining the end point of the method.Jump
andCallNoReturn
.Design Concept