golang / go

The Go programming language
https://go.dev
BSD 3-Clause "New" or "Revised" License
122.85k stars 17.52k forks source link

proposal: encoding/json: allow struct encoder to handle undefined fields #64515

Open KumanekoSakura opened 9 months ago

KumanekoSakura commented 9 months ago

Proposal Details

I'm aware of #63397, but here is a simple approach which I want to use.

I want to use "struct" for defining API responses. But the API is so complicated that some of fields in that "struct" are conditionally initialized (i.e. needs to hide fields when values are not assigned). But due to golang's zero values, I can't hide fields in that "struct".

As a result, I'm currently using "map[string]any" instead of "struct" in order to make sure that only fields that have values defined (they might be golang's zero values) are included in the API responses. But since "struct" is more easy to understand, I want to make it possible to hide fields in a "struct" when values are not assigned.

The diff for json package and an example usage of this proposal are shown below.

--- a/json/encode.go
+++ b/json/encode.go
@@ -727,6 +727,10 @@ type structFields struct {
    nameIndex map[string]int
 }

+type ValueDefinedTester interface {
+   IsDefined() bool
+}
+
 func (se structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) {
    next := byte('{')
 FieldLoop:
@@ -748,6 +752,9 @@ FieldLoop:
        if f.omitEmpty && isEmptyValue(fv) {
            continue
        }
+       if vdt, ok := fv.Interface().(ValueDefinedTester); ok && !vdt.IsDefined() {
+           continue
+       }
        e.WriteByte(next)
        next = ','
        if opts.escapeHTML {
package main

import (
    "encoding/json"
    "fmt"
)

type MyInt32 struct {
    intValue  int32
    isNull    bool
    isDefined bool
}

func (t MyInt32) IsDefined() bool {
    return t.isDefined
}

func (t MyInt32) MarshalJSON() ([]byte, error) {
    if t.isNull {
        return []byte("null"), nil
    } else {
        return []byte(fmt.Sprint(t.intValue)), nil
    }
}

func (t MyInt32) GetValue() int32 {
    return t.intValue
}

func (t *MyInt32) SetValue(v int32) {
    t.intValue = v
    t.isNull = false
    t.isDefined = true
}

func (t MyInt32) IsNull() bool {
    return t.isNull
}

func (t *MyInt32) SetNull() {
    t.intValue = 0
    t.isNull = true
    t.isDefined = true
}

func (t *MyInt32) Reset() {
    t.intValue = 0
    t.isNull = false
    t.isDefined = false
}

func main() {
    type MyStruct struct {
        I MyInt32
    }
    var s MyStruct
    s.I.SetValue(100) // => {"I":100}
    s.I.SetNull()     // => {"I":null}
    s.I.Reset()       // => {}
    if out, err := json.Marshal(s); err == nil {
        fmt.Println(string(out))
    }
}
seankhliao commented 9 months ago

what's wrong with using a pointer?

KumanekoSakura commented 9 months ago

what's wrong with using a pointer?

Firstly, declaring a variable as pointer is prone to null pointer dereference bugs. Declaring a variable as non-pointer and examining whether to include that variable into JSON output is the safer. Also, IsDefined() approach can give developers full control over whether to include that variable into JSON output, unlike "omitempty" flag which can give developers control of only zero value of limited data types.

Secondly, asking developers to use pointers only for controlling whether a field of a struct should be included into JSON output

one := func() int32 {
    return 1
}

type struct1 struct {
    I1 *int32
    I2 *int32
    S  *string
}
var s1 struct1

is boring, for golang does not allow directly assiging constant values / fuction return values into a pointer variable like

s1.I1 = &int32(100)
s1.S = &"abc"
s1.I2 = &one()

and therefore have to do like

tmp_I1 := int32(100)
s1.I1 = &tmp_I1
tmp_S := "abc"
s1.S = &tmp_S
tmp_I2 := one()
s1.I2 = &tmp_I2

. On the other hand,

type struct2 struct {
    I1 MyInt32
    I2 MyInt32
    IA MyInt32Array
    S  MyString
    SA MyStringArray
}
var s2 struct2
s2.I1.SetValue(100)
s2.I2.SetValue(one())
s2.IA.SetValues([]int32{100, 200, 300})
s2.S.SetValue("abc")
s2.SA.SetValues([]string{"a", "b", "c"})

is easier to write.

KumanekoSakura commented 9 months ago

Additional information:

Since the API client (JavaScript programs) treats var1 === 0 and var1 === null and var1 === undefined differently, I need to be able to handle all three cases listed below.

Since a field of a struct declared as "*int32" with "omitempty" cannot handle case 2, I need to implement MarshalJSON() using a struct.

That is,

type struct1 struct {
    I1 *int32
    I2 *int32
    S  *string
}

in the previous comment will actually be

type struct1 struct {
    I1 *MyInt32
    I2 *MyInt32
    S  *MyString
}

and reading/writing these fields safely will become more complicated.

Therefore, I want to declare struct without using pointer

type struct2 struct {
    I1 MyInt32
    I2 MyInt32
    S  MyString
}

and allow struct encoder to control whether to include each field.

KumanekoSakura commented 9 months ago

Hmm, it seems that we can avoid defining a "struct" if we use "**int32" and wrapper functions for hiding temporary variables.

I'm not sure which approach is more friendly to developers.

package main

import (
    "encoding/json"
    "fmt"
)

func valInt32(i int32) **int32 {
    pi := &i
    return &pi
}

func nullInt32() **int32 {
    var pi *int32
    return &pi
}

func undefInt32() **int32 {
    return nil
}

func main() {
    type struct3 struct {
        I **int32 `json:"i,omitempty"`
        J **int32 `json:"j,omitempty"`
        K **int32 `json:"k,omitempty"`
        L **int32 `json:"l,omitempty"`
    }
    var v struct3
    v.I = valInt32(0)
    v.J = valInt32(100)
    v.K = nullInt32()
    v.L = undefInt32()
    if out, err := json.Marshal(v); err == nil {
        fmt.Println(string(out)) // {"i":0,"j":100,"k":null}
    }
}
seankhliao commented 9 months ago

see previously #50360 #48702

apparentlymart commented 9 months ago

From the motivating example it seems like you want to be able to represent both "property totally absent" and "property present but set to null" as independent cases in your program.

One notable gap in the current design -- unless I've just been missing it all these years! -- is a way for a json.Marshaler to decide dynamically whether it constitutes an "empty" value for the purposes of omitempty. The definition of "empty" is currently restricted to a fixed set of rules based on type kind.

Personally I tend to think that encoding/json should focus primarily on the common case in its built-in behaviors, while providing extension points so that third-party code can extend the behavior for less common cases. With that idea in mind, it seems like the most minimal extension to the encoding/json API to meet this need would be an additional optional interface for deciding whether a json.Marshaler is "empty":

type MarshalerEmpty interface {
    Marshaler
    MarshalJSONEmpty() bool
}

The isEmptyValue function could then check whether the given value implements this interface, and if so call MarshalJSONEmpty and return its result.

You could then implement your "value that might be null or absent" concept in separate library code. For example:

type JSONValue[T any] struct {
    value   T
    defined bool
    null    bool
}

func (v JSONValue[T]) MarshalJSON() ([]byte, error) {
    if !(v.defined && v.null) {
        return []byte(`null`), nil
    }
    return json.Marshal(v.value)
}

func (v JSONValue[T]) MarshalJSONEmpty() bool {
    return !v.defined
}

Since the concept of "omitempty" is not applicable when a value appears somewhere other than directly in a struct field, MarshalJSON must still return something valid when the value is "not defined", and so I decided to just return null in that case for the sake of example. But my intent is that the MarshalJSONEmpty method would prevent that from being included if there is a field of type JSONValue[T] with omitempty set.

This is, of course, essentially an entirely different proposal for getting the same effect. After some searching in the history, I think I've essentially just written up a JSON-specific version of https://github.com/golang/go/issues/11939, and so if this seems promising then probably better to continue discussion over there rather than here, I suppose.

KumanekoSakura commented 9 months ago

From the motivating example it seems like you want to be able to represent both "property totally absent" and "property present but set to null" as independent cases in your program.

Yes.

The definition of "empty" is currently restricted to a fixed set of rules based on type kind.

Yes, that limitation is the problem.

I tried an example (shown below) that makes use of "map[bool]" instead of "struct" so that isEmptyValue() test is applied to pseudo-struct. Do you think that this approach make sense?

package main

import (
    "bytes"
    "encoding/json"
    "fmt"
    "strconv"
)

type MyInt32 map[bool]int32 // true is for value, false is for null, empty is for undefined.

func (t MyInt32) MarshalJSON() ([]byte, error) {
    if t.IsNull() {
        return []byte("null"), nil
    }
    return []byte(fmt.Sprint(t[true])), nil
}

func (t *MyInt32) UnmarshalJSON(data []byte) error {
    if bytes.Equal(data, []byte("null")) {
        t.SetNull()
        return nil
    }
    i, err := strconv.ParseInt(string(data), 10, 32)
    t.SetValue(int32(i))
    return err
}

func (t *MyInt32) SetValue(v int32) {
    *t = map[bool]int32{true: v}
}

func (t MyInt32) GetValue() int32 {
    return t[true]
}

func (t *MyInt32) SetNull() {
    *t = map[bool]int32{false: 0}
}

func (t MyInt32) IsNull() bool {
    _, found := t[false]
    return found
}

func (t *MyInt32) Reset() {
    *t = map[bool]int32{}
}

func (t MyInt32) IsDefined() bool {
    return len(t) != 0
}

func main() {
    type struct4 struct {
        I MyInt32 `json:"i,omitempty"`
        J MyInt32 `json:"j,omitempty"`
        K MyInt32 `json:"k,omitempty"`
        L MyInt32 `json:"l,omitempty"`
    }
    var v1, v2 struct4
    v1.I.SetValue(100)
    v1.J.SetNull()
    v1.K.Reset()
    if out, err := json.Marshal(v1); err == nil && json.Unmarshal(out, &v2) == nil {
        fmt.Println(string(out))                                                                                                                                                                                            // => {"i":100,"j":null}
        fmt.Println(v1.I.IsDefined(), v1.I.IsNull(), v1.I.GetValue(), v1.J.IsDefined(), v1.J.IsNull(), v1.J.GetValue(), v1.K.IsDefined(), v1.K.IsNull(), v1.K.GetValue(), v1.L.IsDefined(), v1.L.IsNull(), v1.L.GetValue()) // => true false 100 true true 0 false false 0 false false 0
        fmt.Println(v2.I.IsDefined(), v2.I.IsNull(), v2.I.GetValue(), v2.J.IsDefined(), v2.J.IsNull(), v2.J.GetValue(), v2.K.IsDefined(), v2.K.IsNull(), v2.K.GetValue(), v2.L.IsDefined(), v2.L.IsNull(), v2.L.GetValue()) // => true false 100 true true 0 false false 0 false false 0
    }
}
apparentlymart commented 9 months ago

Using a single-element map which can be nil to represent absence is a clever trick!

KumanekoSakura commented 9 months ago

Thank you. Based on your example, I tried below version and it seems working as expected.

I wish that json package provides a global flag for configuring encOpts{} for json.Marshal() because my API needs to use escapeHTML = false. Then, I will be able to use json.Marshal() instead of using json.NewEncoder() and calling SetEscapeHTML(false)...

package main

import (
    "bytes"
    "encoding/json"
    "fmt"
)

type jsonValueStruct[T any] struct {
    Value T
}

type JSONValue[T any] map[bool]jsonValueStruct[T] // true is for value, false is for null, empty is for undefined.

func (t JSONValue[T]) MarshalJSON() ([]byte, error) {
    if t.IsNull() {
        return []byte("null"), nil
    } else if !t.IsDefined() {
        return []byte{}, fmt.Errorf("value is not set, forgot to set omitempty flag?")
    }
    return json.Marshal(t[true].Value)
}

func (t *JSONValue[T]) UnmarshalJSON(data []byte) error {
    if bytes.Equal(data, []byte("null")) {
        t.SetNull()
        return nil
    }
    var v T
    err := json.Unmarshal(data, &v)
    t.SetValue(v)
    return err
}

func (t *JSONValue[T]) SetValue(v T) {
    *t = map[bool]jsonValueStruct[T]{true: {Value: v}}
}

func (t JSONValue[T]) GetValue() T {
    return t[true].Value
}

func (t *JSONValue[T]) SetNull() {
    *t = map[bool]jsonValueStruct[T]{false: {}}
}

func (t JSONValue[T]) IsNull() bool {
    _, found := t[false]
    return found
}

func (t *JSONValue[T]) Reset() {
    *t = map[bool]jsonValueStruct[T]{}
}

func (t JSONValue[T]) IsDefined() bool {
    return len(t) != 0
}

func main() {
    type struct5 struct {
        I0 JSONValue[int32]          `json:"int32_0,omitempty"`
        I1 JSONValue[int32]          `json:"int32_1,omitempty"`
        I2 JSONValue[int32]          `json:"int32_2,omitempty"`
        I3 JSONValue[int32]          `json:"int32_3,omitempty"`
        J0 JSONValue[string]         `json:"string_0,omitempty"`
        J1 JSONValue[string]         `json:"string_1,omitempty"`
        J2 JSONValue[string]         `json:"string_2,omitempty"`
        J3 JSONValue[string]         `json:"string_3,omitempty"`
        K0 JSONValue[map[string]any] `json:"map_0,omitempty"`
        K1 JSONValue[map[string]any] `json:"map_1,omitempty"`
        K2 JSONValue[map[string]any] `json:"map_2,omitempty"`
        K3 JSONValue[map[string]any] `json:"map_3,omitempty"`
        L0 JSONValue[[]int]          `json:"intArray_0,omitempty"`
        L1 JSONValue[[]int]          `json:"intAttay_1,omitempty"`
        L2 JSONValue[[]int]          `json:"intAttay_2,omitempty"`
        L3 JSONValue[[]int]          `json:"intAttay_3,omitempty"`
    }
    var v [2]struct5
    v[0].I0.SetValue(100)
    v[0].I1.SetNull()
    v[0].I2.Reset()
    v[0].J0.SetValue(`abc`)
    v[0].J1.SetNull()
    v[0].J2.Reset()
    v[0].K0.SetValue(map[string]any{`abc`: 100})
    v[0].K1.SetNull()
    v[0].K2.Reset()
    v[0].L0.SetValue([]int{100, 200})
    v[0].L1.SetNull()
    v[0].L2.Reset()
    if out, err := json.Marshal(v[0]); err == nil && json.Unmarshal(out, &v[1]) == nil {
        // {"int32_0":100,"int32_1":null,"string_0":"abc","string_1":null,"map_0":{"abc":100},"map_1":null,"intArray_0":[100,200],"intAttay_1":null}
        fmt.Println(string(out))
        for i := 0; i < 2; i++ {
            // int32: true false 100 true true 0 false false 0 false false 0
            fmt.Println(`int32:`, v[i].I0.IsDefined(), v[i].I0.IsNull(), v[i].I0.GetValue(), v[i].I1.IsDefined(), v[i].I1.IsNull(), v[i].I1.GetValue(), v[i].I2.IsDefined(), v[i].I2.IsNull(), v[i].I2.GetValue(), v[i].I3.IsDefined(), v[i].I3.IsNull(), v[i].I3.GetValue())
        }
        for i := 0; i < 2; i++ {
            // string: true false abc true true  false false  false false
            fmt.Println(`string:`, v[i].J0.IsDefined(), v[i].J0.IsNull(), v[i].J0.GetValue(), v[i].J1.IsDefined(), v[i].J1.IsNull(), v[i].J1.GetValue(), v[i].J2.IsDefined(), v[i].J2.IsNull(), v[i].J2.GetValue(), v[i].J3.IsDefined(), v[i].J3.IsNull(), v[i].J3.GetValue())
        }
        for i := 0; i < 2; i++ {
            // map[string]any: true false map[abc:100] true true map[] false false map[] false false map[]
            fmt.Println(`map[string]any:`, v[i].K0.IsDefined(), v[i].K0.IsNull(), v[i].K0.GetValue(), v[i].K1.IsDefined(), v[i].K1.IsNull(), v[i].K1.GetValue(), v[i].K2.IsDefined(), v[i].K2.IsNull(), v[i].K2.GetValue(), v[i].K3.IsDefined(), v[i].K3.IsNull(), v[i].K3.GetValue())
        }
        for i := 0; i < 2; i++ {
            // []int: true false [100 200] true true [] false false [] false false []
            fmt.Println(`[]int:`, v[i].L0.IsDefined(), v[i].L0.IsNull(), v[i].L0.GetValue(), v[i].L1.IsDefined(), v[i].L1.IsNull(), v[i].L1.GetValue(), v[i].L2.IsDefined(), v[i].L2.IsNull(), v[i].L2.GetValue(), v[i].L3.IsDefined(), v[i].L3.IsNull(), v[i].L3.GetValue())
        }
    }
}