SamboyCoding / Cpp2IL

Work-in-progress tool to reverse unity's IL2CPP toolchain.
MIT License
1.6k stars 187 forks source link

Basic Block Detection #187

Open ds5678 opened 1 year ago

ds5678 commented 1 year ago

Motivation

Separating the binary into basic blocks could improve analysis.

Design Concept

/// <summary>
/// A <see href="https://en.wikipedia.org/wiki/Basic_block">Basic Block</see> of assembly instructions.
/// </summary>
/// <param name="Start">The address of the entry instruction.</param>
/// <param name="End">The address of the exit instruction.</param>
/// <param name="JumpTarget">The address being jumped to after exiting the block. This is 0 iff the block returns.</param>
/// <param name="ConditionalJump">If true, the exit jump is conditional and control might flow to the next block instead of <paramref name="JumpTarget"/>.</param>
public readonly record struct BasicBlock(ulong Start, ulong End, ulong JumpTarget, bool ConditionalJump)
{
    /// <summary>
    /// The length of this block in the address space.
    /// </summary>
    public ulong Length => End - Start + 1;

    /// <summary>
    /// Does this block end with a return instruction?
    /// </summary>
    public bool IsReturnBlock => JumpTarget == 0;

    /// <summary>
    /// Divide due to a jump into the middle of the block.
    /// </summary>
    /// <param name="innerJump">The jump target inside this block.</param>
    /// <param name="lowerBlock">The resulting block that contains <see cref="Start"/>.</param>
    /// <param name="upperBlock">The resulting block that contains <see cref="End"/>.</param>
    /// <exception cref="ArgumentOutOfRangeException"><paramref name="innerJump"/> is not an inside address.</exception>
    public void Divide(ulong innerJump, out BasicBlock lowerBlock, out BasicBlock upperBlock)
    {
        if (!IsInside(innerJump))
            throw new ArgumentOutOfRangeException(nameof(innerJump), innerJump, null);

        lowerBlock = new(Start, innerJump - 1, innerJump, false);
        upperBlock = new(innerJump, End, JumpTarget, ConditionalJump);
    }

    /// <summary>
    /// Is the address contained in this block and not <see cref="Start"/>?
    /// </summary>
    /// <param name="address"></param>
    /// <returns>True if <paramref name="address"/> is any contained and not <see cref="Start"/>.</returns>
    public bool IsInside(ulong address) => address > Start && address <= End;

    /// <summary>
    /// Is the address contained in this block?
    /// </summary>
    /// <param name="address"></param>
    /// <returns>True if <paramref name="address"/> is within the bounds of this block.</returns>
    public bool Contains(ulong address) => address >= Start && address <= End;
}
ds5678 commented 7 months ago

This is some code I wrote. Hopefully, it sees some use in the future. In any case, it's safer here than in my git stash.

public static class BasicBlockUtils
{
    public static List<BasicBlock> ParsePE(PE binary)
    {
        var baseAddress = binary.GetVirtualAddressOfPrimaryExecutableSection();
        var data = binary.GetEntirePrimaryExecutableSection();
        var instructions = X86Utils.Disassemble(data, baseAddress);
        HashSet<ulong> callTargets = new();
        Dictionary<ulong, (ulong, bool)> jumpAddresses = new();
        HashSet<ulong> returnAddresses = new();
        HashSet<ulong> jumpTargets = new();
        foreach (ref var instruction in instructions)
        {
            switch (instruction.FlowControl)
            {
                case FlowControl.UnconditionalBranch:
                    {
                        var jumpTarget = GetTargetForJump(instruction);
                        jumpAddresses.Add(instruction.IP, (jumpTarget, false));
                        jumpTargets.Add(jumpTarget);
                    }
                    break;
                case FlowControl.IndirectBranch:
                    //Not sure what to do with this
                    break;
                case FlowControl.ConditionalBranch:
                    {
                        var jumpTarget = GetTargetForJump(instruction);
                        jumpAddresses.Add(instruction.IP, (jumpTarget, true));
                        jumpTargets.Add(jumpTarget);
                    }
                    break;
                case FlowControl.Return:
                    returnAddresses.Add(instruction.IP);
                    break;
                case FlowControl.Call:
                    callTargets.Add(GetTargetForCall(instruction));
                    break;
                case FlowControl.IndirectCall:
                    //I think this can be ignored. Ideally, we would want to identify the call sites, but it's okay if we can't.
                    break;
                case FlowControl.Interrupt:
                    //I think this needs to end a block too.
                    break;
                case FlowControl.XbeginXabortXend:
                    //Not sure what this is
                    break;
                case FlowControl.Exception:
                    //throw new Exception($"Could not assess the flow control of the instruction at 0x{instruction.IP:X}");
                    break;
            }
        }
        List<BasicBlock> blockList = new();
        var blockStart = instructions[0].IP;
        for (var i = 0; i < instructions.Count; i++)
        {
            var address = instructions[i].IP;
            if (address != blockStart && (callTargets.Contains(address) || jumpTargets.Contains(address)))
            {
                blockList.Add(new(blockStart, instructions[i-1].IP, address, false));
                blockStart = address;
            }
            if (returnAddresses.Contains(address))
            {
                blockList.Add(new(blockStart, instructions[i - 1].IP, address, false));
                if (i < instructions.Count - 1)
                    blockStart = instructions[i + 1].IP;
            }
            else if (jumpAddresses.TryGetValue(address, out var pair))
            {
                blockList.Add(new(blockStart, address, pair.Item1, pair.Item2));
                if (i < instructions.Count - 1)
                    blockStart = instructions[i + 1].IP;
            }
        }
        return blockList;
    }

    private static ulong GetTargetForJump(Instruction instruction)
    {
        return instruction.NearBranchTarget;
    }

    private static ulong GetTargetForCall(Instruction instruction)
    {
        return instruction.NearBranchTarget;
    }
}

I was testing this in the CallAnalysisProcessingLayer with a breakpoint.

    public override void Process(ApplicationAnalysisContext appContext, Action<int, int>? progressCallback = null)
    {
+        var blockList = BasicBlockUtils.ParsePE((LibCpp2IL.PE.PE)appContext.Binary);
        InjectAttribute(appContext);
    }