cosmos / cosmos-sdk

:chains: A Framework for Building High Value Public Blockchains :sparkles:
https://cosmos.network/
Apache License 2.0
6.24k stars 3.61k forks source link

[fuzz] codec: ProtoCodec.MarshalJSON can serialize structs with fields that won't then be UnmarshalJSON'd due to invalid strings #6852

Closed odeke-em closed 4 years ago

odeke-em commented 4 years ago

Summary of Bug

Coming here from fuzzing ProtoCodec, I generated a serialized version of a struct that was successfully produced by ProtoCodec.MarshalJSON'd Given this JSON that was created from a fuzzed struct

{"x":"7","a":{"x":"6","a":{},"b":{},"c":[],"d":[{"x":"9","a":{},"b":{},"c":[],"d":[],"k":{"id":1,"name":"ÂÑ≠ ´ÂöõÁ¥ª∆æËĺ…ÉËú±√Ü≈æËâû","subscription_fee":0.6,"payment":"…†ËπêÈíûƒõ"}},{"x":"5","a":{},"b":{"x":"7","a":{},"b":{},"c":[],"d":[{"x":"9","a":{},"b":{},"c":[],"d":[],"k":{"id":4,"name":"Êáà»áÈô£»Üƒ¨Áóû\"","subscription_fee":0.9,"payment":"∆úÊÖÅ…âÂꥫ´Ëí¢Ê±ÅÁúó√Ƀ°\\"}},{"x":"7","a":{},"b":{},"c":[],"d":[],"k":{"id":2,"name":"«íÈ®ÜÊóö≈´«ÑË∑óÁé¨≈∂ƒÇ«µÁ¶ê»¥","subscription_fee":0.3,"payment":"≈Ö"}},{"x":"6","a":{},"b":{},"c":[],"d":[],"k":{"id":6,"name":"Èá≥»Ö»≤∆æ»∏ËãèÁ®¶","subscription_fee":0.7,"payment":"«ªƒ¶Â®á‰ΩçËÆâÂ∞¨∆ïÁØÉ«á≈©ƒ™…ú∆Æ"}},{"x":"8","a":{},"b":{},"c":[],"d":[],"k":{"id":1,"name":"Ë¥ò\"»∫»∑»ïËíä","subscription_fee":0.6,"payment":"∆Æ≈ß∆´«¨»ó\"Áîø"}},{"x":"8","a":{},"b":{},"c":[],"d":[],"k":{"id":3,"name":"È©î…õ≈ùƒ•Á∏à","subscription_fee":0.3,"payment":"…á√ë…•"}},{"x":"5","a":{},"b":{},"c":[],"d":[],"k":{"id":2,"name":"∆©È•∂∆¨Êõ†  áGËò¶Á≠°","subscription_fee":0.68763274,"payment":"vUƒ≤«éY}Ëπ¢-Èôç√ü"}}],"k":{"id":1812377766,"name":"1Ê©ö«é≈ÄmÁöéÂ∑ôȶ¨","subscription_fee":0.31622836,"payment":";ʧ∂ʱ≥.b…≥»≤»êLc!«ê¬æƒî…ñÁçØ1…å∆©"}},"c":[],"d":[],"k":{"id":-763980326,"name":"È∫í∆º#∆≤ ©Ëâ´ òw:q‰º±≈¶Âüã»ñÁ¶Ç…ÆËÇ°G∆Å","subscription_fee":0.9678479}},{"x":"2845994464978544704","a":{},"b":{},"c":[],"d":[],"k":{"id":1078157956,"name":"Wʵ∞»ñ-√ç∆ü«í6","subscription_fee":0.030120095,"payment":"ʵû¨ÆTc˙ܬº‰∫ø7ËõØk∆° ïy%?∆ï"}},{"x":"5630438296346819683","a":{},"b":{},"c":[],"d":[{"x":"-5730571188691520736","a":{},"b":{"x":"8016913757343637280","a":{},"b":{},"c":[],"d":[],"k":{"id":973561888,"name":"E¬µÈòÜÂóåȪÆ≈Ω","subscription_fee":0.23788759}},"c":[],"d":[],"k":{"id":-562405994,"name":"…öv≈ï*Á≥∂Èñ§√âƒã4b","subscription_fee":0.9817746}},{"x":"5474779743639073320","a":{},"b":{},"c":[],"d":[],"k":{"id":-2028678055,"name":",+Z≈ºÈìé-hgi√°","subscription_fee":0.2875248,"payment":"√Ĭ±ÂéÑ9…≥"}},{"x":"-2536469108246826247","a":{},"b":{},"c":[],"d":[],"k":{"id":-1944330170,"name":"tÁǵ£°o«¨»âÁ•∑Èøñ_µï3qÊáã‰∏µ√®ÁùØ$","subscription_fee":0.7816148,"payment":"TÂû°ÂÜØ"}},{"x":"2385955514112303053","a":{},"b":{},"c":[],"d":[],"k":{"id":573132258,"name":"»ç≈Ä öt,˨Æ1∆ãÊïÉËùª»ûÂá•`«çV","subscription_fee":0.9192206,"payment":"√Æ»é "}},{"x":"-1401566776901850476","a":{},"b":{"x":"-9152602459753651598","a":{},"b":{"x":"6673995028171086108","a":{},"b":{},"c":[],"d":[],"k":{"id":-468634452,"name":"∆µÊπ∏ÁûÆÁÇ¥/≈êT»Æ«õtOÁã냵ÂßíÂÄπ_EX","subscription_fee":0.5699556}},"c":[],"d":[],"k":{"id":623167476,"name":"¬¨Âîª*«∂c√ç…à","subscription_fee":0.6006749,"payment":"«íC∆ù≈≠Âííp\u0026«∏Á∂∏N√X8i£ùuq"}},"c":[],"d":[],"k":{"id":1995993426,"name":"Ê™¨≈©«á ","subscription_fee":0.2643761,"payment":"ËëêzÂØ¥"}},{"x":"3991574966562051405","a":{},"b":{},"c":[],"d":[],"k":{"id":193516610,"name":"‰∏Ø∆ÇÂíçÁ∂ógÂòÉÁ¢ó","subscription_fee":0.6785654,"payment":"Ê°åÁ°ónË™¨Âé®"}}],"k":{"id":-1804296957,"name":"Âóõ","subscription_fee":0.9683864,"payment":"≈óy√滵Á©ìÁÇÆq»øÈ¥µ]ƒµx`ÊÖÇ"}},{"x":"-1602153971024060694","a":{"x":"5231425597200557134","a":{},"b":{},"c":[],"d":[],"k":{"id":-1531282027,"name":"Áæ∫e_ËÅòË£ôƒ±Âô†Â∑àª∑NÈóúÁåπÁ†µÂ∂†…º","subscription_fee":0.9378989,"payment":"ƒè]ÊìôËöùF"}},"b":{},"c":[],"d":[],"k":{"id":1188667468,"name":"ZË≠†…±WÁ≠∞ƒ§∆ìËì±»ã","subscription_fee":0.30506772,"payment":"˶üCxu«í"}},{"x":"2087750618150640987","a":{},"b":{},"c":[],"d":[],"k":{"id":1756911179,"name":"JÁâè","subscription_fee":0.9606761,"payment":"uÂëπÂöÄ∆ßR‰∏≤\u0026'ÈÖæ*∆ºb|\u003e"}}],"k":{"id":2053646113,"name":"Â≤≤","subscription_fee":0.06579561,"payment":"ÁöÄÂçæ∆±!√≥"}},"b":{},"k":{},"inner1":{},"inner2":{}}

and the following program to unmarshal it and compare

package main

import (
    "bytes"
    "fmt"
    "io/ioutil"
    "os"

    "github.com/google/go-cmp/cmp"

    "github.com/cosmos/cosmos-sdk/codec"
    "github.com/cosmos/cosmos-sdk/codec/types"
    "github.com/cosmos/cosmos-sdk/testutil/testdata"
)

var cdc = codec.NewProtoCodec(types.NewInterfaceRegistry())

func main() {
    data, err := ioutil.ReadFile(os.Args[1])
    if err != nil {
        panic(err)
    }
    t3ln := new(testdata.TestVersion3LoneNesting)
    if err := cdc.UnmarshalJSON(data, t3ln); err != nil {
        panic(err)
    }

    rtBytes, err := cdc.MarshalJSON(t3ln)
    if err != nil {
        panic(fmt.Sprintf("roundtrip marshal failed: %v", err))
    }

    if bytes.Equal(data, rtBytes) {
        return
    }

    rt3ln := new(testdata.TestVersion3LoneNesting)
    if err := cdc.UnmarshalBinaryLengthPrefixed(rtBytes, rt3ln); err != nil {
        panic(err)
    }

    if diff := cmp.Diff(rt3ln, t3ln); diff != "" {
        panic(diff)
    }

    if g, w := len(rtBytes), len(data); g != w {
        panic(fmt.Sprintf("Length mismatch:: got %d % x\nwant %d % x\n", g, rtBytes, w, data))
    }

    if diff := cmp.Diff(data, rtBytes); diff != "" {
        panic(diff)
    }
}

where proto definitions in testutil/testdata/proto.proto are

message TestVersion3 {
    int64 x = 1;
    TestVersion3 a = 2;
    TestVersion3 b = 3; // [(gogoproto.nullable) = false];
    repeated TestVersion3 c = 4;
    repeated TestVersion3 d = 5; // [(gogoproto.nullable) = false];
    oneof sum {
        int32 e = 6;
        TestVersion3 f = 7;
    }
    google.protobuf.Any g = 8;
    repeated TestVersion1 h = 9; //[(gogoproto.castrepeated) = "TestVersion1"];
    // google.protobuf.Timestamp i = 10;
    // google.protobuf.Timestamp j = 11; // [(gogoproto.stdtime) = true];
    Customer1 k = 12 [(gogoproto.embed) = true];
    string non_critical_field = 1031;
}

message TestVersion3LoneNesting {
    int64 x = 1;
    TestVersion3 a = 2;
    TestVersion3 b = 3; // [(gogoproto.nullable) = false];
    repeated TestVersion3 c = 4;
    repeated TestVersion3 d = 5; // [(gogoproto.nullable) = false];
    oneof sum {
        TestVersion3LoneNesting f = 7;
    }
    google.protobuf.Any g = 8;
    repeated TestVersion1 h = 9; //[(gogoproto.castrepeated) = "TestVersion1"];
    // google.protobuf.Timestamp i = 10;
    // google.protobuf.Timestamp j = 11; // [(gogoproto.stdtime) = true];
    Customer1 k = 12 [(gogoproto.embed) = true];
    string non_critical_field = 1031;

    message Inner1 {
        int64 id = 1;
        string name = 2;
        message InnerInner {
            string id = 1;
            string city = 2;
        }
        InnerInner inner = 3;
    }

    Inner1 inner1 = 14;

    message Inner2 {
        string id = 1;
        string country = 2;
        message InnerInner {
            string id = 1;
            string city = 2;
        }
        InnerInner inner = 3;
    }

    Inner2 inner2 = 15;
}

Running this program produces

$ go run repr_json.go json_codec/crashers/d8e820406b02436ccf539beb90b5683dc3d9f81b
panic: invalid character '\x18' in string literal

goroutine 1 [running]:
main.main()
    /Users/emmanuelodeke/go/src/github.com/cosmos/cosmos-sdk/fuzz/codec/repr_json.go:25 +0x5bd
exit status 2

Version

928f7fccff1d574c5a23f3584e539aae74c8406a


For Admin Use

ethanfrey commented 4 years ago

Good find.

That is a big test struct. Can you remove parts of it until there is a minimal testcase? I assume it is from one of the name or payment strings, but which?

aaronc commented 4 years ago

Thanks odeke-em. Agreed that a minimal test case will help. Will you be taking this on or should we?

odeke-em commented 4 years ago

Sorry for the late replies @ethanfrey and @aaronc!

The offending payment string is "«íC∆ù≈≠Âííp\u0026«∏Á∂∏N√X8i�£ùuq". But in retrospect, the thing might be that what produced that string is a fuzz mutation of the actual byte content directly, so perhaps we might be chasing an issue that can't be reproduced.

odeke-em commented 4 years ago

The offending bytes have a control character ^X and here is something that can point towards the fault in it: a use of "%q" to serialize strings for JSON. This playground link https://play.golang.org/p/EuWm_nXDSDk or inlined below

package main

import (
    "encoding/json"
    "fmt"
)

func main() {
    // The original string: "«íC∆ù≈≠Âííp\u0026«∏Á∂∏N√X8i£ùuq"
    b := []byte("«íC∆ù≈≠Âííp\u0026«∏Á∂∏N√X8i£ùuq")
    type Customer struct {
        Payment string
    }
    c1 := &Customer{Payment: string(b)}
    fmt.Printf("%#v\n", c1)
    blob, err := json.Marshal(c1)
    if err != nil {
        panic(err)
    }
    cs := new(Customer)
    if err := json.Unmarshal(blob, &cs); err != nil {
        panic(err)
    }
    fmt.Printf("%#v\nOK: %t\n", cs, cs.Payment == string(b))

    var g string
    dx := fmt.Sprintf("%q", b)
    if err := json.Unmarshal([]byte(dx), &g); err != nil {
        panic(err)
    }
    fmt.Printf("G: %#v\n", g)
}

Shows where it errors, so we just need to audit JSON serializing code to ensure that we don't invoke fmt.Sprintf("%q", str) to JSON serialize strings, and if there are no instances, we can safely then close this bug. To examine the bytes unhindered by the poor rendering of Github's output, please visit the Gist or the raw contents of this message.

odeke-em commented 4 years ago

I took sometime today to investigate this code in every possible code path, and given that in this library we use jsonpb, which invokes encoding/json, I've examined all the code paths and this seems unlikely. We arrived to this path because:

The only way that this problem can ever happen is if someone uses "%q" to mistakenly JSON marshal a string, but there isn't ANY code path that uses "%q" so not a problem.

The code to generate the structs as corpus is here under "details"

```go // Generate diverse corpus samples for fuzzing the various codec paths. package main import ( "flag" "fmt" "os" "github.com/gogo/protobuf/proto" "github.com/google/gofuzz" "github.com/cosmos/cosmos-sdk/codec" "github.com/cosmos/cosmos-sdk/codec/types" "github.com/cosmos/cosmos-sdk/testutil/testdata" ) var cdc = codec.NewProtoCodec(types.NewInterfaceRegistry()) func mustAny(msg proto.Message, c fuzz.Continue) *types.Any { anyy, err := types.NewAnyWithValue(msg) if err != nil { panic(err) } return anyy } func main() { kind := flag.String("kind", "binary", "the designation for the marshal output: valid options are: -binary, -binary-length or -json") flag.Parse() var marshalFn = codec.ProtoMarshalJSON var unmarshalFn = codec.ProtoUnmarshalJSON extension := "json" switch *kind { case "binary": extension = "pb" marshalFn = func(msg proto.Message) ([]byte, error) { return cdc.MarshalBinaryBare(msg.(codec.ProtoMarshaler)) } case "binary-length": extension = "len.pb" marshalFn = func(msg proto.Message) ([]byte, error) { return cdc.MarshalBinaryLengthPrefixed(msg.(codec.ProtoMarshaler)) } } f := fuzz.New().NilChance(0).Funcs( func(e *testdata.TestVersion3, c fuzz.Continue) { c.Fuzz(&e.X) e.A = new(testdata.TestVersion3) if c.Intn(19) == 6 { c.Fuzz(e.A) } e.B = new(testdata.TestVersion3) if c.Intn(17) == 8 { c.Fuzz(e.B) } e.C = []*testdata.TestVersion3{} if c.Intn(37) == 17 { e.C = make([]*testdata.TestVersion3, c.Intn(50)) c.Fuzz(&e.C) } e.D = []*testdata.TestVersion3{} if c.Intn(41) == 17 { e.D = make([]*testdata.TestVersion3, 0, c.Intn(8)) c.Fuzz(&e.D) } e.Customer1 = new(testdata.Customer1) c.Fuzz(e.Customer1) if false { var anyPb proto.Message switch c.Intn(3) { case 0: a := &testdata.TestVersion3{X: 30} c.Fuzz(a) anyPb = a case 1: b := &testdata.TestVersion2{X: 20} if false { c.Fuzz(b) } anyPb = b case 2: d := &testdata.TestVersion1{X: 10} if false { c.Fuzz(d) } anyPb = d } e.G = mustAny(anyPb, c) } c.Fuzz(e.Customer1) }, func(e *testdata.TestVersion1, c fuzz.Continue) { c.Fuzz(e.X) }, func(e *types.Any, c fuzz.Continue) { var anyPb proto.Message switch c.Intn(3) { case 0: anyPb = new(testdata.TestVersion3) case 1: anyPb = new(testdata.TestVersion2) case 2: anyPb = new(testdata.TestVersion1) } anyy := mustAny(anyPb, c) e.TypeUrl = anyy.TypeUrl e.Value = anyy.Value }, ) for i := 0; i < 10000; i++ { t3ln := &testdata.TestVersion3LoneNesting{ A: &testdata.TestVersion3{}, B: &testdata.TestVersion3{}, Customer1: &testdata.Customer1{}, Inner1: new(testdata.TestVersion3LoneNesting_Inner1), Inner2: new(testdata.TestVersion3LoneNesting_Inner2), } f.Fuzz(t3ln.A) if false { f.Fuzz(t3ln.B) f.Fuzz(t3ln.Customer1) f.Fuzz(t3ln.Inner1) f.Fuzz(t3ln.Inner2) } f.Fuzz(&t3ln.X) blob, err := marshalFn(t3ln) if err != nil { panic(err) } func() { f, err := os.Create(fmt.Sprintf("gen_%d.%s", i, extension)) if err != nil { return } defer f.Close() f.Write(blob) }() } } ```

My apologies, we can close this as false positive.