brimdata / zed

A novel data lake based on super-structured data
https://zed.brimdata.io/
BSD 3-Clause "New" or "Revised" License
1.38k stars 67 forks source link

ZSON "decorator conflict" #5105

Open philrz opened 5 months ago

philrz commented 5 months ago

tl;dr

The attached conn.zson.gz that was previously output by zq produces the following error when read:

$ zq -i zson conn.zson.gz 
conn.zson.gz: decorator conflict enclosing context "conn_id={orig_h:ip,orig_p:port=uint16,resp_h:ip,resp_p:port}" and decorator cast "conn_id=conn_id={orig_h:ip,orig_p:port=uint16,resp_h:ip,resp_p:port}"

Details

Repro is with Zed commit d1a78b8.

The ZSON in question was produced from an updated version of the Zeek NDJSON reference shaper I've been working on. Here's a simplified version shaper.zed that reproduces the problem:

type port=uint16
type zenum=string
type conn_id={orig_h:ip,orig_p:port,resp_h:ip,resp_p:port}

type conn={_path:string,ts:time,uid:string,id:conn_id,proto:zenum,service:string,duration:duration,orig_bytes:uint64,resp_bytes:uint64,conn_state:string,local_orig:bool,local_resp:bool,missed_bytes:uint64,history:string,orig_pkts:uint64,orig_ip_bytes:uint64,resp_ip_bytes:uint64,tunnel_parents:|[string]|,_write_ts:time}

yield nest_dotted(this)
| yield {_original: this, _shaped: shape(conn)}
| put _cropped := crop(_shaped, conn)
| switch (_cropped == _shaped) (
  case true => yield _shaped
  case false => yield error({msg: "shaper error: fields were cropped", _original, _shaped, _cropped})
)

The following conn.ndjson can be used as input data.

{"_path":"conn","_write_ts":"2015-04-13T09:30:05.590078Z","ts":"2015-04-13T09:28:50.335103Z","uid":"C2kFwI2E6bLvjmH7md","id.orig_h":"192.168.0.54","id.orig_p":53102,"id.resp_h":"213.155.151.181","id.resp_p":80,"proto":"tcp","service":"http","duration":70.07395005226135,"orig_bytes":286,"resp_bytes":1851,"conn_state":"SF","local_orig":true,"local_resp":false,"missed_bytes":0,"history":"ShADadtTfFr","orig_pkts":8,"orig_ip_bytes":631,"resp_pkts":7,"resp_ip_bytes":2155}

I intentionally dropped the resp_pkts field in the conn type definition, so putting it all together I get the expected error value that includes embedded records.

$ zq -version
Version: v1.15.0-6-gd1a78b84

$ cat conn.ndjson | zq -Z -I shaper.zed - | tee conn.zson
error({
    msg: "shaper error: fields were cropped",
    _original: {
        _path: "conn",
        _write_ts: "2015-04-13T09:30:05.590078Z",
        ts: "2015-04-13T09:28:50.335103Z",
        uid: "C2kFwI2E6bLvjmH7md",
        id: {
            orig_h: "192.168.0.54",
            orig_p: 53102,
            resp_h: "213.155.151.181",
            resp_p: 80
        },
        proto: "tcp",
        service: "http",
        duration: 70.07395005226135,
        orig_bytes: 286,
        resp_bytes: 1851,
        conn_state: "SF",
        local_orig: true,
        local_resp: false,
        missed_bytes: 0,
        history: "ShADadtTfFr",
        orig_pkts: 8,
        orig_ip_bytes: 631,
        resp_pkts: 7,
        resp_ip_bytes: 2155
    },
    _shaped: {
        _path: "conn",
        ts: 2015-04-13T09:28:50.335103Z,
        uid: "C2kFwI2E6bLvjmH7md",
        id: {
            orig_h: 192.168.0.54,
            orig_p: 53102 (port=uint16),
            resp_h: 213.155.151.181,
            resp_p: 80 (port)
        } (=conn_id),
        proto: "tcp" (=zenum),
        service: "http",
        duration: 70ns,
        orig_bytes: 286 (uint64),
        resp_bytes: 1851 (uint64),
        conn_state: "SF",
        local_orig: true,
        local_resp: false,
        missed_bytes: 0 (uint64),
        history: "ShADadtTfFr",
        orig_pkts: 8 (uint64),
        orig_ip_bytes: 631 (uint64),
        resp_ip_bytes: 2155 (uint64),
        tunnel_parents: null (|[string]|),
        _write_ts: 2015-04-13T09:30:05.590078Z,
        resp_pkts: 7
    },
    _cropped: {
        _path: "conn",
        ts: 2015-04-13T09:28:50.335103Z,
        uid: "C2kFwI2E6bLvjmH7md",
        id: {
            orig_h: 192.168.0.54,
            orig_p: 53102,
            resp_h: 213.155.151.181,
            resp_p: 80
        } (conn_id),
        proto: "tcp" (zenum),
        service: "http",
        duration: 70ns,
        orig_bytes: 286 (uint64),
        resp_bytes: 1851 (uint64),
        conn_state: "SF",
        local_orig: true,
        local_resp: false,
        missed_bytes: 0 (uint64),
        history: "ShADadtTfFr",
        orig_pkts: 8 (uint64),
        orig_ip_bytes: 631 (uint64),
        resp_ip_bytes: 2155 (uint64),
        tunnel_parents: null (|[string]|),
        _write_ts: 2015-04-13T09:30:05.590078Z
    }
}) (error({msg:string,_original:{_path:string,_write_ts:string,ts:string,uid:string,id:{orig_h:string,orig_p:int64,resp_h:string,resp_p:int64},proto:string,service:string,duration:float64,orig_bytes:int64,resp_bytes:int64,conn_state:string,local_orig:bool,local_resp:bool,missed_bytes:int64,history:string,orig_pkts:int64,orig_ip_bytes:int64,resp_pkts:int64,resp_ip_bytes:int64},_shaped:{_path:string,ts:time,uid:string,id:conn_id={orig_h:ip,orig_p:port=uint16,resp_h:ip,resp_p:port},proto:zenum=string,service:string,duration:duration,orig_bytes:uint64,resp_bytes:uint64,conn_state:string,local_orig:bool,local_resp:bool,missed_bytes:uint64,history:string,orig_pkts:uint64,orig_ip_bytes:uint64,resp_ip_bytes:uint64,tunnel_parents:|[string]|,_write_ts:time,resp_pkts:int64},_cropped:{_path:string,ts:time,uid:string,id:conn_id,proto:zenum,service:string,duration:duration,orig_bytes:uint64,resp_bytes:uint64,conn_state:string,local_orig:bool,local_resp:bool,missed_bytes:uint64,history:string,orig_pkts:uint64,orig_ip_bytes:uint64,resp_ip_bytes:uint64,tunnel_parents:|[string]|,_write_ts:time}}))

However, zq refuses to read back that ZSON.

$ zq -i zson conn.zson
conn.zson: decorator conflict enclosing context "conn_id={orig_h:ip,orig_p:port=uint16,resp_h:ip,resp_p:port}" and decorator cast "conn_id=conn_id={orig_h:ip,orig_p:port=uint16,resp_h:ip,resp_p:port}"