Error in Randomized generation of data-types with multiple levels of depth

prpr2770 commented 3 years ago

There seems to be a bug in the compiler that implements the __randomize() funciton for data types that contain >2 levels of depth.

I'm attempting to generate messages with the following levels of hierarchy:

#lang ivy1.7

include collections

type bit
type byte 
type bytes

type int

type levelD = {d1, d2, d3, d4}
type levelE = {e1, e2, e3}

type levelA = struct{
    idA : int,
    valueA : bit
}

type levelB = struct{
    idB : int,
    valueB: byte
}

type levelC = struct{
    idC : int,
    valueC : bytes
}

# -------------------------------
type level4_t

object level4A_t ={
    variant this of level4_t = struct{
        id4A : int, 
        value4A : levelD
    }
}

object level4B_t ={
    variant this of level4_t = struct{
        id4B : int, 
        value4B : levelE
    }
}

# --------------------------------
type level3_t

object level3_A = {
    variant this of level3_t = struct{
        id3 : int, 
        value3 :levelA 
    }
}

#instance level3_B : level3(levelB)
object level3_B = {
    variant this of level3_t = struct{
        id3 : int, 
        value3 :levelB 
    }
}

#instance level3_C : level3(levelC)
object level3_C = {
    variant this of level3_t = struct{
        id3 : int, 
        value3 :levelC 
    }
}

#instance level3_level4 : level3(level4_t)

object level3_level4 = {
    variant this of level3_t = struct{
        id3 : int, 
        value3 :level4_t 
    }
}

# --------------------------------
type level2_t

object level2_messageA_t  ={
    variant this of level2_t = struct{
        id2 : int,
        criticality: bit,
        value : level3_t
    }
}

object level2_messageB_t  ={
    variant this of level2_t = struct{
        id2 : int,
        criticality: bit,
        value : levelD
    }
}

object level2_messageC_t  ={
    variant this of level2_t = struct{
        id2 : int,
        criticality: bit,
        value : levelE
    }
}

# --------------------------------------------
type level1_t

object level1_message_t = {
    variant this of level1_t = struct{
        id1 : int,
        list1 : vector[level2_t]
    }
}

object level1_msgA_t = {
    variant this of level1_t = struct{
        id1 : int,
        list1 : vector[levelA]
    }
}

object level1_msgB_t = {
    variant this of level1_t = struct{
        id1 : int,
        list1 : vector[levelB]
    }
}

# --------------------------------------------
type level0_t

object level0_message_t = {
    variant this of level0_t= struct{
        id0 : int,
        value0: level1_message_t
    }
}

When I attempt to use the random generator to create random instances of a message of type level0_t, I obtain the following sample messages

> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageC_t:{id2:0,criticality:0,value:e1}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}},{level2_messageC_t:{id2:0,criticality:0,value:e1}},{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageB_t:{id2:0,criticality:0,value:d3}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageC_t:{id2:0,criticality:0,value:e3}},{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageC_t:{id2:0,criticality:0,value:e3}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}},{level2_messageB_t:{id2:0,criticality:0,value:d3}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageB_t:{id2:0,criticality:0,value:d1}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}},{level2_messageC_t:{id2:0,criticality:0,value:e3}},{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}},{level2_messageA_t:{id2:0,criticality:0,value:{}}},{level2_messageB_t:{id2:0,criticality:0,value:d2}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}},{level2_messageC_t:{id2:0,criticality:0,value:e3}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageC_t:{id2:0,criticality:0,value:e2}},{level2_messageC_t:{id2:0,criticality:0,value:e1}},{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageC_t:{id2:0,criticality:0,value:e1}},{level2_messageC_t:{id2:0,criticality:0,value:e3}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageC_t:{id2:0,criticality:0,value:e1}},{level2_messageB_t:{id2:0,criticality:0,value:d3}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageB_t:{id2:0,criticality:0,value:d4}},{level2_messageC_t:{id2:0,criticality:0,value:e3}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageC_t:{id2:0,criticality:0,value:e1}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageB_t:{id2:0,criticality:0,value:d3}},{level2_messageC_t:{id2:0,criticality:0,value:e1}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})
> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{}}}]}}})

Note that the all instances of level2_messageA_t has its value: {}. The compiler generated cpp codebase has the following lines, which indicates that level2_message_t.value is not being randomly generated.

template <>
void  __randomize<message_app::level2_messageA_t>( gen &g, const  z3::expr &v){
    __randomize<int>(g,g.apply("level2_messageA_t.id2",v));
    __randomize<int>(g,g.apply("level2_messageA_t.criticality",v));
}

However, the corresponding __from_solver functionality has the following structure, where we see that all three components of the data-type are being processes.

template <>
void  __from_solver<message_app::level2_messageA_t>( gen &g, const  z3::expr &v,message_app::level2_messageA_t &res){
    __from_solver(g,g.apply("level2_messageA_t.id2",v),res.id2);
    __from_solver(g,g.apply("level2_messageA_t.criticality",v),res.criticality);
    __from_solver(g,g.apply("level2_messageA_t.value",v),res.value);
}

Is there a way in which the compiler is constrained to handle random-generation only upto 2 depth-levels?

prpr2770 commented 3 years ago

I reviewed the code in ivy_to_cpp.py and tried to determine the difference in the functions that's used to generate the above lines.

The lines corresponding to __from_solver() are as follows:

def emit_eval(header,symbol,obj=None,classname=None,lhs=None): 
    global indent_level
    name = symbol.name
    sname = slv.solver_name(symbol)
    cname = varname(name) if lhs is None else code_eval(header,lhs)
    sort = symbol.sort
    domain = sort_domain(sort)
    for idx,dsort in enumerate(domain):
        dcard = sort_card(dsort)
        indent(header)
        header.append("for (int X{} = 0; X{} < {}; X{}++)\n".format(idx,idx,dcard,idx))
        indent_level += 1
    indent(header)
    if sort.rng.name in im.module.sort_destructors or sort.rng.name in im.module.native_types or sort.rng in sort_to_cpptype:
        code_line(header,'__from_solver<'+classname+'::'+varname(sort.rng.name)+'>(*this,apply("'+sname+'"'+''.join(','+int_to_z3(s,'X{}'.format(idx)) for idx,s in enumerate(domain))+'),'+cname+''.join('[X{}]'.format(idx) for idx in range(len(domain)))+')')
    else:
        header.append((obj + '.' if obj else '')
                      + cname + ''.join("[X{}]".format(idx) for idx in range(len(domain)))
                      + ' = ({})eval_apply("{}"'.format(ctype(sort.rng,classname=classname),sname)
                      + ''.join(",X{}".format(idx) for idx in range(len(domain)))
                      + ");\n")
    for idx,dsort in enumerate(domain):
        indent_level -= 1

# --------------------------------------
# lines 2896::
                if target.get() in ["gen","test"]:
                    impl.append('template <>\n')
                    open_scope(impl,line='void  __from_solver<' + cfsname + '>( gen &g, const  z3::expr &v,' + cfsname + ' &res)')
                    for idx,sym in enumerate(destrs):
                        fname = memname(sym)
                        vs = variables(sym.sort.dom[1:])
                        for v in vs:
                            open_loop(impl,[v])
                        sname = slv.solver_name(sym)
                        code_line(impl,'__from_solver(g,g.apply("'+sname+'",v'+ ''.join(',g.int_to_z3(g.sort("'+v.sort.name+'"),'+varname(v)+')' for v in vs)+'),res.'+fname+''.join('[{}]'.format(varname(v)) for v in vs) + ')')
                        for v in vs:
                            close_loop(impl,[v])
                    close_scope(impl)

The lines related to __randomize() are as follows:

def emit_randomize(header,symbol,classname=None):

    global indent_level
    name = symbol.name
    sname = slv.solver_name(symbol)
    cname = varname(name)
    sort = symbol.sort
    domain = sort_domain(sort)
    for idx,dsort in enumerate(domain):
        dcard = sort_card(dsort)
        indent(header)
        header.append("for (int X{} = 0; X{} < {}; X{}++)\n".format(idx,idx,dcard,idx))
        indent_level += 1
    if sort.rng.name in im.module.sort_destructors or sort.rng.name in im.module.native_types or sort.rng in sort_to_cpptype:
        code_line(header,'__randomize<'+classname+'::'+varname(sort.rng.name)+'>(*this,apply("'+sname+'"'+''.join(','+int_to_z3(s,'X{}'.format(idx)) for idx,s in enumerate(domain))+'))')
    else:
        indent(header)
        if il.is_uninterpreted_sort(sort.rng):
            raise iu.IvyError(None,'cannot create test generator because type {} is uninterpreted'.format(sort.rng))
        header.append('randomize("{}"'.format(sname)
                      + ''.join(",X{}".format(idx) for idx in range(len(domain)))
                      + ");\n")
    for idx,dsort in enumerate(domain):
        indent_level -= 1    

#    indent(header)
#    header.append('randomize("{}");\n'.format(slv.solver_name(symbol)))

# ---------------------------------------------------------------------
# lines : 2924 onwards

                    open_scope(impl,line='void  __randomize<' + cfsname + '>( gen &g, const  z3::expr &v)')
                    for idx,sym in enumerate(destrs):
                        # we can't randomize a type that z3 is representing with an uninterpreted sort,
                        # because z3 has no numerals for these sorts. Rather than throwing an error, however,
                        # we just don't randomize, in case randomization for this type is not actually needed.
                        # In principle, we should check whether randomiation is needed but this is pretty tricky.
                        if is_really_uninterpreted_sort(sym.sort.rng):
                            continue
#                            raise iu.IvyError(None,'cannot create test generator because type {} is uninterpreted'.format(sym.sort.rng))
                        fname = memname(sym)
                        vs = variables(sym.sort.dom[1:])
                        for v in vs:
                            open_loop(impl,[v])
                        sname = slv.solver_name(sym)
                        code_line(impl,'__randomize<'+ctypefull(sym.sort.rng,classname=classname)+'>(g,g.apply("'+sname+'",v'+ ''.join(',g.int_to_z3(g.sort("'+v.sort.name+'"),'+varname(v)+')' for v in vs)+'))')
                        for v in vs:
                            close_loop(impl,[v])
                    close_scope(impl)

There seems to be a difference in the positioning of indent(header) command in both emit_randomize() and emit_eval(). I'm not sure if this is an issue, as I'm currently unaware of its functionality.

prpr2770 commented 3 years ago

I modified the location of indent(header) and that didn't work out. So, I believe the error in the difference between these two lines:

source: emit_eval(header,symbol,obj=None,classname=None,lhs=None):

        header.append((obj + '.' if obj else '')
                      + cname + ''.join("[X{}]".format(idx) for idx in range(len(domain)))
                      + ' = ({})eval_apply("{}"'.format(ctype(sort.rng,classname=classname),sname)
                      + ''.join(",X{}".format(idx) for idx in range(len(domain)))
                      + ");\n")

source : emit_randomize(header,symbol,classname=None):

        header.append('randomize("{}"'.format(sname)
                      + ''.join(",X{}".format(idx) for idx in range(len(domain)))
                      + ");\n")

Could you please advise on how the syntax for header needs to be modified?

prpr2770 commented 3 years ago

I could solve this issue by making the following changes:

Modification in emit_randomize(header,symbol,classname=None):

    header.append(cname + ''.join("[X{}]".format(idx) for idx in range(len(domain)))
                  + ' = ({})eval_apply("{}"'.format(ctype(sort.rng,classname=classname),sname)
                  + ''.join(",X{}".format(idx) for idx in range(len(domain)))
                  + ");\n")

Modifications at the place where the __randomize() is being generated.

# ---------------------------------------------------------------------
# lines : 2924 onwards

                open_scope(impl,line='void  __randomize<' + cfsname + '>( gen &g, const  z3::expr &v)')
                for idx,sym in enumerate(destrs):
                    # we can't randomize a type that z3 is representing with an uninterpreted sort,
                    # because z3 has no numerals for these sorts. Rather than throwing an error, however,
                    # we just don't randomize, in case randomization for this type is not actually needed.
                    # In principle, we should check whether randomiation is needed but this is pretty tricky.
                    if is_really_uninterpreted_sort(sym.sort.rng):
                        continue
#                            raise iu.IvyError(None,'cannot create test generator because type {} is uninterpreted'.format(sym.sort.rng))
                    fname = memname(sym)
                    vs = variables(sym.sort.dom[1:])
                    for v in vs:
                        open_loop(impl,[v])
                    sname = slv.solver_name(sym)
                    code_line(impl,'__randomize<'+ctypefull(sym.sort.rng,classname=classname)+'>(g,g.apply("'+sname+'",v'+ ''.join(',g.int_to_z3(g.sort("'+v.sort.name+'"),'+varname(v)+')' for v in vs)+'))')
                    for v in vs:
                        close_loop(impl,[v])
                close_scope(impl)

The main issue is the following line:

                        if is_really_uninterpreted_sort(sym.sort.rng):
                            continue

By removing this constraint, the messages are generated as expected.

                        if is_really_uninterpreted_sort(sym.sort.rng):
                            pass

However, I'm not certain how this might impact other parts of IvY's functionality. Kindly advise on the type of tests that need to be conducted, to ensure that this doesn't break any other functionality.

The following message was generated during one of the runs/iterations:

> intf.send({level0_message_t:{id0:0,value0:{id1:0,list1:[{level2_messageA_t:{id2:0,criticality:0,value:{level3_level4:{id3:0,value3:{level4A_t:{id4A:0,value4A:d2}}}}}},{level2_messageB_t:{id2:0,criticality:0,value:{level3_C:{id3:0,value3:{idC:0,valueC:0}}}}},{level2_messageC_t:{id2:0,criticality:0,value:e2}}]}}})

prpr2770 commented 3 years ago

On further reflection of the above issue and the hack that was identified to get around it, we believe it would be more important to understand the following:

Why was level3_t being identified as an uninterpreted sort when level2_t is not, even though both are similarly constructed. There's no specific definition for themselves, but there's structural description provided for their variants.

kenmcmil / ivy

Error in Randomized generation of data-types with multiple levels of depth #19