icsharpcode / ILSpy

.NET Decompiler with support for PDB generation, ReadyToRun, Metadata (&more) - cross-platform!
21.45k stars 3.35k forks source link

Elaborate control flow involving switch statements, expressions and loops fails to decompile dramatically #327

Closed kg closed 7 years ago

kg commented 12 years ago

Ran into this in someone else's code that I was trying to translate and discovered that ILSpy has significant problems decompiling it:

using System;
using System.Collections.Generic;

public class MalformedRenderstringException : Exception {
    public MalformedRenderstringException (string message)
        : base(message) {
    }
}

public static class Program {
    public static object[] MyMethod (object[] layers, string rstring, char delim) {
        int cur_pos, next_pos, len, layer_number;
        String str = rstring.Trim().ToUpper();            
        String cur_token;
        object cur_layer;
        List<object> layer_queue = new List<object>();

        cur_pos = 0;
        len = str.Length;
        while (cur_pos < len) {
            next_pos = str.IndexOf(delim, cur_pos);
            if (next_pos == -1) next_pos = len;
            cur_token = str.Substring(cur_pos, next_pos - cur_pos).Trim();
            switch (cur_token) {
                case "R":
                    cur_layer = new object();
                    layer_queue.Add(cur_layer);
                    break;
                case "E":
                    cur_layer = new object();
                    layer_queue.Add(cur_layer);
                    break;
                default: // tile layer
                    try {
                        layer_number = Int32.Parse(cur_token);
                        if (layer_number <= 0) throw new Exception();                            
                    }
                    catch (Exception) { throw new MalformedRenderstringException(rstring); } // not a positive integer                        
                    cur_layer = layers[layer_number - 1];
                    layer_queue.Add(cur_layer);
                    break;                        
            }
            cur_pos = next_pos + 1;
        }

        var list = layer_queue.ToArray();
        return list;
    }

    public static void Main (string[] args) {
        var layers = new object[6];
        var result = MyMethod(layers, "1,2,R,E,3,4,5", ',');
        Console.WriteLine(result.Length);
    }
}

(Sorry it's so complicated, that's about as much simplification as I could do while making it still fail to decompile)

ILSpy decompiles it to this:

// Program
public static object[] MyMethod(object[] layers, string rstring, char delim)
{
    string text = rstring.Trim().ToUpper();
    List<object> list = new List<object>();
    int i = 0;
    int length = text.Length;
    while (i < length)
    {
        int num = text.IndexOf(delim, i);
        if (num == -1)
        {
            num = length;
        }
        string text2 = text.Substring(i, num - i).Trim();
        string text3 = text2;
        if (text3 == null)
        {
            goto IL_9C;
        }
        object item;
        if (!(text3 == "R"))
        {
            if (!(text3 == "E"))
            {
                goto IL_9C;
            }
            item = new object();
            list.Add(item);
        }
        else
        {
            item = new object();
            list.Add(item);
        }
        IL_D5:
        i = num + 1;
        continue;
        int num2;
        try
        {
            IL_9C:
            num2 = int.Parse(text2);
            if (num2 <= 0)
            {
                throw new Exception();
            }
        }
        catch (Exception)
        {
            throw new MalformedRenderstringException(rstring);
        }
        item = layers[num2 - 1];
        list.Add(item);
        goto IL_D5;
    }
    return list.ToArray();
}

and the ILAst looks like:

var_4 : string
var_7 : class [mscorlib]System.Collections.Generic.List`1<object>
var_0_16 : int32
var_2 : int32
var_1 : int32
var_5_4E : string
var_11_52 : string
var_6_8E : object
var_3 : int32

stloc:string(var_4, callvirt:string(string::ToUpper, callvirt:string(string::Trim, ldloc:string(rstring))))
stloc:class [mscorlib]System.Collections.Generic.List`1<object>(var_7, newobj:class [mscorlib]System.Collections.Generic.List`1<object>(class [mscorlib]System.Collections.Generic.List`1<object>::.ctor))
stloc:int32(var_0_16, ldc.i4:int32(0))
stloc:int32(var_2, callvirtgetter:int32(string::get_Length, ldloc:string(var_4)))

loop (clt:bool(ldloc:int32(var_0_16), ldloc:int32(var_2))) {
    stloc:int32(var_1, callvirt:int32(string::IndexOf, ldloc:string(var_4), ldloc:char(delim), ldloc:int32(var_0_16)))
    if (logicnot:bool(cne:bool(ldloc:int32(var_1), ldc.i4:int32(-1)))) {
        stloc:int32(var_1, ldloc:int32(var_2))
    } else {
    }

    stloc:string(var_5_4E, callvirt:string(string::Trim, callvirt:string(string::Substring, ldloc:string(var_4), ldloc:int32(var_0_16), sub:int32(ldloc:int32(var_1), ldloc:int32(var_0_16)))))
    stloc:string(var_11_52, ldloc:string(var_5_4E))
    if (logicnot:bool(logicnot:bool(logicnot:bool(ldloc:string[exp:bool](var_11_52))))) {
        br(IL_9C)
    } else {
    }

    if (logicnot:bool(call:bool(string::op_Equality, ldloc:string(var_11_52), ldstr:string("R")))) {
        if (logicnot:bool(call:bool(string::op_Equality, ldloc:string(var_11_52), ldstr:string("E")))) {
            br(IL_9C)
        } else {
        }

        stloc:object(var_6_8E, newobj:object(object::.ctor))
        callvirt:void(class [mscorlib]System.Collections.Generic.List`1<object>::Add, ldloc:class [mscorlib]System.Collections.Generic.List`1<object>(var_7), ldloc:object(var_6_8E))
    } else {
        stloc:object(var_6_8E, newobj:object(object::.ctor))
        callvirt:void(class [mscorlib]System.Collections.Generic.List`1<object>::Add, ldloc:class [mscorlib]System.Collections.Generic.List`1<object>(var_7), ldloc:object(var_6_8E))
    }

    IL_D5:
    stloc:int32(var_0_16, add:int32(ldloc:int32(var_1), ldc.i4:int32(1)))
    loopcontinue()
    .try {
        IL_9C:
        stloc:int32(var_3, call:int32(int32::Parse, ldloc:string(var_5_4E)))
        if (logicnot:bool(cgt:bool(ldloc:int32(var_3), ldc.i4:int32(0)))) {
            throw(newobj:Exception(Exception::.ctor))
        } else {
        }

    }
    catch System.Exception {
        throw(newobj:MalformedRenderstringException(MalformedRenderstringException::.ctor, ldloc:string(rstring)))
    }

    stloc:object(var_6_8E, ldelem.ref:object(ldloc:object[](layers), sub:int32(ldloc:int32(var_3), ldc.i4:int32(1))))
    callvirt:void(class [mscorlib]System.Collections.Generic.List`1<object>::Add, ldloc:class [mscorlib]System.Collections.Generic.List`1<object>(var_7), ldloc:object(var_6_8E))
    br(IL_D5)
}

ret(callvirt:object[](class [mscorlib]System.Collections.Generic.List`1<object>::ToArray, ldloc:class [mscorlib]System.Collections.Generic.List`1<object>(var_7)))

I'm not certain if this broken decompilation actually preserves control flow, either; my translator certainly can't turn it into working code, but that might be my fault. The output can't be compiled by CSC since the gotos violate the language rules (IL_9C is out of scope).

dgrunwald commented 12 years ago

The only bug that I can see here is the position of the IL_9C label (it should be in front of the 'try {').} I don't see a better way to represent this control flow using if statements; we would have to detect this kind of construct as a switch statement to create more readable code. (the control flow looks correct, invert the condition and reverse the then and else blocks to see what it's doing)

The transform for switch over strings is currently quite fragile, runs way too late (it's a C# transform) and works only when the IL was using a switch opcode. We'll have to rewrite that to run as a ILAst transform and detect if-goto-sequences as well as switch opcodes.

kg commented 12 years ago

An ILAst level switch transform would be super useful for me as well. Let me know if I can do anything to help with that (it'd let me remove JSIL's version of the switch transform). I'd offer to implement it myself but dominators and SSA both make my head hurt because I don't understand academic CS :)

dgrunwald commented 7 years ago

This was fixed in the new decompiler engine (which just landed on the master branch) -- it no longer uses labels in the ILAst, and guarantees that branches never jump into nested blocks.