generated circuit produces side-effects and wrong/impossible return value

For the last few days, I tried to understand how a code in the form

result = 0;
foo(params);
result = foo(otherparams);
if (! result) { return 22; }
return 0;

could return neither 22 nor 0, but instead values like -23881866888. This seems to be some kind of side-effect from the two calls of foo, as the value changes depending on the number of calls to foo.

I have not been able to find a good explanation for the issue. It seems to only appear if, in some way, the input is accessed and/or used.

This is the smallest example that I was able to find which still produces the effect. The input of the code is one foo which contains four bars. The code checks that the first and last two bars, respectively, have the same value. It writes the result of the comparison in output->control. The value should be either 0 or 22, but nothing else. Instead, the value of output->control is -23881866888. This should be impossible.

example.c

#include <stdint.h>

#define STRING_MAX_LEN 2

typedef unsigned char string_t[STRING_MAX_LEN];

typedef uint32_t index_t;

typedef struct bar {
    string_t baz;
} bar_t;

typedef struct foo {
    bar_t bars[4];
    uint8_t size;
} foo_t;

struct In {
    foo_t foo;
};

struct Out {
    uint8_t out;
    uint8_t control;
};

bool compare_string_t(string_t a, string_t b) {
    uint32_t i;
    bool eq = 1;
    for (i = 0; i < STRING_MAX_LEN; i++) {
        unsigned char x = a[i];
        unsigned char y = b[i];
        eq = eq && (x == y);
    }
    return eq;
}

bool compare_bars(foo_t *foo, index_t a, index_t b) {
    string_t baz_a = foo->bars[a].baz;
    string_t baz_b = foo->bars[b].baz;
    return compare_string_t( baz_a, baz_b );
}

/* return 0 or 22, nothing else */
uint8_t compare_foo(foo_t *foo) {
    uint8_t returnvalue = 0;

    index_t s = foo->size;
    bool comparison = compare_bars(foo, 0, 1);
    comparison = compare_bars(foo, s, s+1);
    if(! comparison) {
        returnvalue = 22;
    }
    return returnvalue;
}

void compute(struct In *input, struct Out *output) {
    output->out = 0;              /* return value */

    foo_t foo = input->foo;
    uint8_t result = compare_foo(&foo);
    output->control = result;
    if (result != 0) {
        output->out = 1;
    }
}

example_v_inp_gen.h

uint32_t generated_input[9] = {
        66  , // uchar foo_t.bars[0].baz[0]
        252 , // uchar foo_t.bars[0].baz[1]
        66  , // uchar foo_t.bars[1].baz[0]
        252 , // uchar foo_t.bars[1].baz[1]
        66  , // uchar foo_t.bars[2].baz[0]
        252 , // uchar foo_t.bars[2].baz[1]
        66  , // uchar foo_t.bars[3].baz[0]
        252 , // uchar foo_t.bars[3].baz[1]
        2   , // uint8_t foo_t.size
};

void example_input_gen (mpq_t * input_q, int num_inputs, char *argv[]) {

    for (int i = 0; i < num_inputs; i++) {
        mpq_set_ui(input_q[i], generated_input[i], 1);
    }
}

example.outputs

0
1
-23881866888

I tested this example both with and without the switch to zk-SNARKs based on Groth's generic group construction; this does not have any influence on the result.

Further Analysis

Generated Constraints

These are the generated constraints in gen/example.ZAATAR.spec:

START_INPUT
I0 //__malloc0.foo.bars[0].baz[0] uint bits 8
I1 //__malloc0.foo.bars[0].baz[1] uint bits 8
I2 //__malloc0.foo.bars[1].baz[0] uint bits 8
I3 //__malloc0.foo.bars[1].baz[1] uint bits 8
I4 //__malloc0.foo.bars[2].baz[0] uint bits 8
I5 //__malloc0.foo.bars[2].baz[1] uint bits 8
I6 //__malloc0.foo.bars[3].baz[0] uint bits 8
I7 //__malloc0.foo.bars[3].baz[1] uint bits 8
I8 //__malloc0.foo.size uint bits 8
END_INPUT

START_OUTPUT
O149 //#compute$__compute__ uint bits 1
O150 //__malloc1.out uint bits 1
O151 //__malloc1.control uint bits 5
END_OUTPUT

START_VARIABLES
V77 //ramput uint bits 8
V79 //ramput uint bits 8
V81 //ramput uint bits 8
V83 //ramput uint bits 8
V85 //ramput uint bits 8
V87 //ramput uint bits 8
V89 //ramput uint bits 8
V91 //ramput uint bits 8
V96 //#compute$$#compare_foo$$#compare_bars$$baz_a:addr uint bits 31
V98 //#compute$$#compare_foo$$#compare_bars$$baz_a[0] uint bits 1
V99 //#compute$$#compare_foo$$#compare_bars$$baz_a[1]:addr uint bits 31
V100 //#compute$$#compare_foo$$#compare_bars$$baz_a[1] uint bits 1
V106 //#compute$$#compare_foo$$#compare_bars$$baz_b:addr uint bits 31
V108 //#compute$$#compare_foo$$#compare_bars$$baz_b[0] uint bits 1
V109 //#compute$$#compare_foo$$#compare_bars$$baz_b[1]:addr uint bits 31
V110 //#compute$$#compare_foo$$#compare_bars$$baz_b[1] uint bits 1
V121 //#compute$$#compare_foo$$#compare_bars$$#compare_string_t$$eq:M uint bits 1
V128 //#compute$$#compare_foo$$#compare_bars$$#compare_string_t$$eq:R:M uint bits 1
V132 //#compute$$#compare_foo$$comparison uint bits 1
END_VARIABLES

START_CONSTRAINTS
RAMPUT_FAST ADDR 1342177282 VALUE I2 CONDITION 1 true V77
RAMPUT_FAST ADDR 1342177283 VALUE I3 CONDITION 1 true V79
RAMPUT_FAST ADDR 1342177280 VALUE I0 CONDITION 1 true V81
RAMPUT_FAST ADDR 1342177281 VALUE I1 CONDITION 1 true V83
RAMPUT_FAST ADDR 1342177286 VALUE I6 CONDITION 1 true V85
RAMPUT_FAST ADDR 1342177287 VALUE I7 CONDITION 1 true V87
RAMPUT_FAST ADDR 1342177284 VALUE I4 CONDITION 1 true V89
RAMPUT_FAST ADDR 1342177285 VALUE I5 CONDITION 1 true V91
(  ) * (  ) + ( 2 * I8 + 1342177280 - V96 )
RAMGET_FAST ADDR V96 VALUE V98
(  ) * (  ) + ( V96 + 1 - V99 )
RAMGET_FAST ADDR V99 VALUE V100
(  ) * (  ) + ( 2 * I8 + 1342177282 - V106 )
RAMGET_FAST ADDR V106 VALUE V108
(  ) * (  ) + ( V106 + 1 - V109 )
RAMGET_FAST ADDR V109 VALUE V110
( V108 ) * ( -2 * V98 ) + ( V98 + V108 - V121 )
( V110 ) * ( -2 * V100 ) + ( V100 + V110 - V128 )
( - V121 + 1 ) * ( - V128 + 1 ) + (  - V132 )
(  ) * (  ) + ( -22 * V132 + 22 - O151 )
(  ) * (  ) + (  - O149 )
O151 != 0 - O150
END_CONSTRAINTS

analysing the constraints

By calculating the values by hand from the above description, I was able to reproduce the same output value as in the issue description. As far as I can see, the problem is that two variable V121 and V128 are treated as boolean (1 bit) values while they are actually _uint8t's (8 bits).

If they were boolean, the calculation would produce the correct result. V132 gets assigned not (V121 or V128), which requires both of them to be boolean. With non-boolean values given, V132 also becomes a non-boolean value, which is then passed to the output in O151 in a way that would only result in either 0 or 22, if V132 was a boolean. This not being boolean messes up the constraint, ultimately producing illegal values for that output.

conclusion

for some reason, the type of the characters (which should be unsigned char) is treated as a boolean value. This is also indicated in the spec in the corresponding lines, e.g.

V108 //#compute$$#compare_foo$$#compare_bars$$baz_b[0] uint bits 1

and results in illegal outputs.

Thanks for the report and detailed analysis! I was able to reproduce the bug, and it does indeed seem to be a problem with the type annotations by the compiler. I'll take a look more closely soon and get back to you.

Hi, any news? When do you expect to be able to have a closer look? I would also be interested to have a closer look by myself and to understand better how the compiler process works, but it the structure is complex and it is difficult to figure out where to look for the type annotations. Can you give me any directions?

Apologies for the delayed response. I looked into this a bit, but I haven't been able to find a root cause.

It looks like this is bug that happens when passing structs with input-dependent indices to functions. The incorrect type annotations might be related, but you can see exactly where things are going wrong by adding some printf statements before and after the comparison in compare_string_t:

printf("before: %Zd", eq);
eq = eq && (x == y);
printf("after: %Zd", eq);

which produces:

PRINTF in computation_p 1:
"before: 1" 
PRINTF in computation_p 1:
"after: 8581" 
PRINTF in computation_p 1:
"before: 8581" 
PRINTF in computation_p 1:
"after: 1085539405"

Clearly wrong, no matter what x and y are.

I found two potential workarounds that produce the correct output:

change comparison = compare_bars(foo, s, s+1); to comparison = compare_bars(foo, 2, 3);

Hardcoding the indices allows the compiler to do much more compile-time evaluation and makes the constraints much simpler (and correct, apparently). Maybe not so useful if you actually need a variable-length size, though.

manually inline compare_string_t, i.e. replace compare_bars with the following:

bool compare_bars(foo_t *foo, index_t a, index_t b) {
uint32_t i;
bool eq = 1;
for (i = 0; i < STRING_MAX_LEN; i++) {
eq = eq && (foo->bars[a].baz[i] == foo->bars[b].baz[i]);
}
return eq;
}

This produces the expected output, even when called with input-dependent indices. This should be logically equivalent to your original program, so it might be good enough for your purposes.

As for finding the root cause of the underlying bug, a good place to start is probably by comparing the spec files generated by your original code and the two workarounds. The spec files are generated by the frontend of the compiler (in compiler/frontend), so you'll need to figure out how and where the different statements in the spec file are generated, and what's going wrong.

The incorrect type annotations might be a clue; they are calculated here:

https://github.com/pepper-project/pequin/blob/26f30ede7dbbd40708090b42ec50d887181f3eff/compiler/frontend/src/SFE/Compiler/RestrictedUnsignedIntType.java#L74

and used in compiler/frontend/src/SFE/Compiler/ConstraintWriter.java, I think.

Thank you very much, @maxhowald for your reply and apologies for my late reaction. I spent the last few days trying to figure out the root cause of this issue. After figuring out how to reproduce the execution of ZCC.java on my example program inside an environment that allows for debugging and then stepping through the compilation progress, I believe the problem is the following:

when calling the line string_t baz_a = foo->bars[a].baz; with a being dependent of some input value, the compiler will first create an lvalue for #compute$$#compare_foo$$#compare_bars$$baz_a and initialise this with 0 in https://github.com/pepper-project/pequin/blob/26f30ede7dbbd40708090b42ec50d887181f3eff/compiler/frontend/src/ccomp/parser_hw/CCompiler.java#L622. During this, separate lvalues for the fields of the struct are created and initialised with 0, namely #compute$$#compare_foo$$#compare_bars$$baz_a[0] and #compute$$#compare_foo$$#compare_bars$$baz_a[0]. Because they get assigned a constant value of 0, the type of the values is set to uint bits 1. At this point, this is not an error. But we note that the lvalue #compute$$#compare_foo$$#compare_bars$$baz_a with the type uint bits 1 is stored in the variable table Function.vars.

The next step is then to do the actual initialization as specified in the code; assigning the value of foo->bars[a].baz; to the lvalue #compute$$#compare_foo$$#compare_bars$$baz_a. Because a is input-dependent in the second call, the values of the struct need to be put in ram and then accessed at runtime via an address that is calculated at runtime. Thus, the assignment of the value happens with a ramget operation (98 ramget_fast VALUE 98 inputs [ ADDR 97 ] //#compute$$#compare_foo$$#compare_bars$$baz_a[0] uint bits 1 in the generated example.c.ZAATAR.circuit file after first execution of the compiler and before optimization). This Statement is again an assignment that, intermediately, was an assignment to #compute$$#compare_foo$$#compare_bars$$baz_a and is split up to two assignments for the fields of the array in https://github.com/pepper-project/pequin/blob/master/compiler/frontend/src/SFE/Compiler/RamGetEnhancedStatement.java#L40-L46.

The root cause of the compilation error

In the aforementioned code, for each field in the array, the lvalue is retrieved via value.fieldEltAt(i);. This results in a lookup of the variable table and returns the lvalue of the initialisation with 0, which is known to need only 1 bit until now. However, when assigning a new value from RAM, we cannot treat the value as a constant and therefore cannot shrink the size of the type to match the (unknown) value. Thus, at this point, the lvalue should be assigned its declared size. But this does not happen. Instead, the incorrect cached value from the variable table is kept and later, when performing a comparison, this leads to the selection of a faster, but incorrect arithmetic comparison gate meant for boolean values, instead of a generic comparison gate.

Fix

Index: pequin/compiler/frontend/src/SFE/Compiler/RamGetEnhancedStatement.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- pequin/compiler/frontend/src/SFE/Compiler/RamGetEnhancedStatement.java  (revision e5976f911a4ddda79a41f27ae83ac68c2d34152f)
+++ pequin/compiler/frontend/src/SFE/Compiler/RamGetEnhancedStatement.java  (revision 0e0b243081699d806ce61723a493c22057e56d49)
@@ -39,6 +39,16 @@
        if (value.size() > 1) {
            for (int i = 0; i < value.size(); i++) {
                LvalExpression lval = value.fieldEltAt(i);
+               if (lval.getLvalue() instanceof VarLvalue) {
+                   Type declaredType = ((VarLvalue) lval.getLvalue()).getDeclaredType();
+                   lval.getLvalue().setType(declaredType);
+               } else {
+                   throw new RuntimeException("Failed to reset the type of the lvalue " +
+                           lval.getLvalue().toString() + " to its declared type after " +
+                           "ramget: I don't know how to determine the declared type " +
+                           "of a " + lval.getClass() + "."
+                   );
+               }
                Expression fieldAddr = new BinaryOpExpression(new PlusOperator(),
                    address, IntConstant.valueOf(i));
                Statement ramget = new RamGetEnhancedStatement(lval, fieldAddr);

As far as i understand the problem, the above patch fixes the issue. I do not know if maybe there is another location in the code where the same problem occurs, you should have a look at that. The fix changes the type of the lvalue that has been created during the first initialisation of the variable. That undoes the optimization of the type. In my code example, this has no effect, as the value is overridden immediately; but it might have an effect if:

the lvalue gets assigned some small (e.g. 1 bit) constants,
then some operations are performed (that cannot be optimized at compile time),
and only afterwards a value is assigned with _ramgetfast.

In that scenario (which i have not tested) the patch might also change the type hint in the variable table retroactively and undo type optimizations. Might cause some issues if the type has been set to be bigger than the declared type? (Wouldn't be legal C code, but seems to be the current handling of integer overflows in pequin) But other than for optimization, the type of an lvalue does not seem to have any effect on the computation anyway. I might be completely wrong here, though.

pepper-project / pequin