actgardner / gogen-avro

Generate Go code to serialize and deserialize Avro schemas
MIT License
365 stars 85 forks source link

Union usability improvements #172

Open ahawtho opened 2 years ago

ahawtho commented 2 years ago

Including a NewXxxUnionTttt and/or SetTttt for each union type that sets the appropriate type enum value would significantly improve usability. Currently, many of the straightforward approaches for setting a non-default value produce panic()'s.

See the end for a strawman for starting discussion.

Schema:

{
  "type": "record",
  "name": "rec",
  "fields": [
    {
      "name": "field1",
      "type": ["null", "string"],
      "default": null
    }
  ]
}

In v10, this produces the following code:


type Rec struct {
    Field1 *Field1Union `json:"field1"`
}
//...
func NewRec() Rec {
    r := Rec{}
    r.Field1 = NewField1Union()

    r.Field1 = nil
    return r
}
//...

type Field1Union struct {
    Null      *types.NullVal
    String    string
    UnionType Field1UnionTypeEnum
}

func NewField1Union() *Field1Union {
    return &Field1Union{}
}
//...
func (_ *Field1Union) SetString(v string)  { panic("Unsupported operation") }

Test code:

func TestCreateRec(t *testing.T) {
    tests := []struct {
        name   string
        create func() structs.Rec
        want   string
    }{
        {
            name: "default struct",
            create: func() structs.Rec {
                return structs.Rec{
                    Field1: &structs.Field1Union{
                        // Default value of Field1UnionType == 0, which is invalid
                        String: "test1",
                    },
                }
            },
            want: "test1",
        },
        {
            name: "NewRec()",
            create: func() structs.Rec {
                r := structs.NewRec()
                // the Field1 struct starts as nil, so any invocations on Field1 panic
                r.Field1.String = "test2"
                return r
            },
            want: "test2",
        },
        {
            name: "NewField1Union()",
            create: func() structs.Rec {
                r := structs.NewRec()
                r.Field1 = structs.NewField1Union()
                // a union returned by NewField1Union() is always invalid due to default enum type
                r.Field1.String = "test3"
                return r
            },
            want: "test3",
        },
        {
            name: "SetString()",
            create: func() structs.Rec {
                r := structs.NewRec()
                r.Field1 = structs.NewField1Union()
                // SetString() is unsupported, so this panics.
                r.Field1.SetString("test4")
                return r
            },
            want: "test4",
        },
    }
    for _, tt := range tests {
        t.Run(tt.name, func(t *testing.T) {
            defer func() {
                e := recover()
                if e != nil {
                    t.Errorf("test panic'd: %v", e)
                }
            }()
            sr := tt.create()
            b := &bytes.Buffer{}
            err := sr.Serialize(b)
            if err != nil {
                t.Errorf("error serializing Rec: %v", err)
            }
            dr, err := structs.DeserializeRec(b)
            if err != nil {
                t.Errorf("error deserializing Rec: %v", err)
            }
            if got := dr.Field1.String; got != tt.want {
                t.Errorf("String() = %v, want %v", got, tt.want)
            }
        })
    }
}

E.g., for the example above, something like the following would improve usability:

func NewField1UnionNull() *Field1Union {
    return nil
}
func NewField1UnionString(v string) *Field1Union {
    return &Field1Union{
        String: v,
        UnionType: Field1UnionTypeEnumString,
    }
}

func (s *Field1Union) SetString(v string)  {
    s.String = v
    s.UnionType = Field1UnionTypeEnumString
}
// SetNull() can't be implemented with current generated code, but if serialization also
// supported an empty union struct, the implementation could be as follows. However, the
// current generated code provides better protection against unintentional data loss (e.g.
// a developer sets the field but not the UnionType)
func (s *Field1Union) SetNull()  {
    s.String = ""
    s.UnionType = 0
}