piotr-oles / as-proto

Protobuf implementation in AssemblyScript
MIT License
36 stars 14 forks source link

Oneof/union types don't work very well #42

Open dsyer opened 1 year ago

dsyer commented 1 year ago

It's kind of a problem with AssemblyScript, not as-proto per se, that it doesn't support union types or undefined, but protobufs often contain "oneof" fields and as-proto currently doesn't do a great job of transpiling them.

Given this:

syntax = "proto3";

message Simple {
    oneof payload {
        bool flag = 1;
        int32 value = 2;
        bytes data = 3;
        string text = 4;
    }
}

as-proto will generate this:

// Code generated by protoc-gen-as. DO NOT EDIT.
// Versions:
//   protoc-gen-as v0.9.1
//   protoc        v3.19.6

import { Writer, Reader } from "as-proto/assembly";

export class Simple {
  static encode(message: Simple, writer: Writer): void {
    writer.uint32(8);
    writer.bool(message.flag);

    writer.uint32(16);
    writer.int32(message.value);

    writer.uint32(26);
    writer.bytes(message.data);

    writer.uint32(34);
    writer.string(message.text);
  }

  static decode(reader: Reader, length: i32): Simple {
    const end: usize = length < 0 ? reader.end : reader.ptr + length;
    const message = new Simple();

    while (reader.ptr < end) {
      const tag = reader.uint32();
      switch (tag >>> 3) {
        case 1:
          message.flag = reader.bool();
          break;

        case 2:
          message.value = reader.int32();
          break;

        case 3:
          message.data = reader.bytes();
          break;

        case 4:
          message.text = reader.string();
          break;

        default:
          reader.skipType(tag & 7);
          break;
      }
    }

    return message;
  }

  flag: bool;
  value: i32;
  data: Uint8Array;
  text: string;

  constructor(
    flag: bool = false,
    value: i32 = 0,
    data: Uint8Array = new Uint8Array(0),
    text: string = ""
  ) {
    this.flag = flag;
    this.value = value;
    this.data = data;
    this.text = text;
  }
}

The "payload" field isn't even referenced, and the constructor forces the Simple type to have all of the "oneof" properties (not one of them).

If you try it with ts-proto instead it can generate a union type, e.g.

export interface Simple {
  payload?:
    | { $case: "flag"; flag: boolean }
    | { $case: "value"; value: number }
    | { $case: "data"; data: Uint8Array }
    | { $case: "text"; text: string };
}

but that won't work in AssemblyScript. It would need to be something like this, which I think is what is meant by the very cursory comment on union types in the language guide (https://www.assemblyscript.org/status.html#language-features):

import { Reader, Writer, Protobuf } from "as-proto/assembly/index";

export const protobufPackage = "";

export class SimplePayload {
    $case: string = "text";
    flag: bool = false;
    value: i32 = 0;
    data: Uint8Array = new Uint8Array(0);
    text: string = '';
    static text(text: string) : SimplePayload {
        var instance = new SimplePayload();
        instance.$case = "text";
        instance.text = text;
        return instance;
    }
    static data(data: Uint8Array) : SimplePayload {
        var instance = new SimplePayload();
        instance.$case = "data";
        instance.data = data;
        return instance;
    }
    static value(value: i32) : SimplePayload {
        var instance = new SimplePayload();
        instance.$case = "value";
        instance.value = value;
        return instance;
    }
    static flag(flag: bool) : SimplePayload {
        var instance = new SimplePayload();
        instance.$case = "flag";
        instance.flag = flag;
        return instance;
    }
}

export class Simple {

    payload: SimplePayload = new SimplePayload();

    static encode(message: Simple, writer: Writer): void {
        if (message.payload) {
            if (message.payload.$case === "flag") {
                writer.uint32(8);
                writer.bool(message.payload.flag);
            }
            if (message.payload.$case === "value") {
                writer.uint32(16);
                writer.int32(message.payload.value);
            }
            if (message.payload.$case === "data") {
                writer.uint32(26);
                writer.bytes(message.payload.data);
            }
            if (message.payload.$case === "text") {
                writer.uint32(34);
                writer.string(message.payload.text);
            }
        }
    }

    static decode(reader: Reader, length: i32): Simple {
        const end: usize = length < 0 ? reader.end : reader.ptr + length;
        const message = new Simple();
        while (reader.ptr < end) {
            const tag = reader.uint32();
            switch (tag >>> 3) {
                case 1:
                    message.payload = SimplePayload.flag(reader.bool());
                    break;
                case 2:
                    message.payload = SimplePayload.value(reader.int32());
                    break;
                case 3:
                    message.payload = SimplePayload.data(reader.bytes());
                    break;
                case 4:
                    message.payload = SimplePayload.text(reader.string());
                    break;
                default:
                    reader.skipType(tag & 7);
                    break;
            }
        }
        return message;
    }
}
jcapogna commented 1 year ago

+1

I'm using oneof in my code. Decoding works great, but encoding is problematic as it creates a default value for each field sets them all.

I have a protobuf that looks like this:

message FlagValue {
  oneof value {
    bool booleanValue = 1;
    double doubleValue = 2;
    string stringValue = 3;
  }
}

and the generated constructor sets default values for each field (rather than nulls):

  constructor(
    booleanValue: bool = false,
    doubleValue: f64 = 0.0,
    stringValue: string = ""
  ) {
    this.booleanValue = booleanValue;
    this.doubleValue = doubleValue;
    this.stringValue = stringValue;
  }

And the encoder sets all three fields.

 static encode(message: FlagValue, writer: Writer): void {
    writer.uint32(8);
    writer.bool(message.booleanValue);

    writer.uint32(17);
    writer.double(message.doubleValue);

    writer.uint32(26);
    writer.string(message.stringValue);
  }

I think a good solution would be defaulting the fields to null and modifying the encode function to only set non-null fields.

piotr-oles commented 1 year ago

Unfortunately, there is no concept of null for primitive types, like int32, in AssemblyScript, so we have to default to values like 0. I agree that support for oneOf field is currently broken, it's not trivial to implement and I don't know when I will have time to do that. Feel free to contribute to the project :)

jcapogna commented 1 year ago

I realize this is a bigger issue than just oneOf. I can't tell if an optional field is set or not.

I think it's ok for the fields to be set to a default value, but there needs to be a way to tell if the field is set. This could also be used to not write the unset field to the wire protocol.

Since you can't null a primitive type, I'd suggest keeping a setting boolean value isSet that defaults to false.

You might need to get rid of the constructor to make that possible. I think it's worthwhile though as settings default values like 0 on unset fields is basically an incorrect implementation in my opinion. Not being able to differentiate being an unset optional field and a set 0 value is a problem.

Perhaps this could be done with get, has, and set functions. I believe the Java generated classes are similar to this. If a field is unset, calling get still returns the default value, but calling has returns false.

The generated class could look something like:


value: int32 = 0;
valueIsSet: boolean = false;

setValue(value: int32): void {
    this.value = value;
    this.valueIsSet = true;
}

hasValue(): boolean {
    return this.valueIsSet;
}

getValue(): int32 {
   return this.value;

}

static encode(message: Message, writer: Writer): void {
    if(message.valueIsSet) {
        writer.uint32(8);
        writer.bool(message.value);
    }
}

@piotr-oles What do you think about this optional problem and this solution?

Fixing oneOf seems like an additional and much more minor issue.

piotr-oles commented 1 year ago

Yes, I agree it's a bigger issue. I'm wondering if we should use boolean fields, or maybe something more compact, like bit masks (AFAIK boolean will be stored as i32). I think C++ implementation use masks for that. It would have to be a major release because of API changes.

piotr-oles commented 1 year ago
class Message {
  value: int32 = 0;
  private hasMask1: i32 = 0;

  setValue(value: int32): void {
      this.value = value;
      this.hasMask1 &= 0x1;
  }

  hasValue(): boolean {
     return this.hasMask1 & 0x1 !== 0;
  }

  getValue(): int32 {
     return this.value;
  }

  static encode(message: Message, writer: Writer): void {
      if (message.hasValue()) {
          writer.uint32(8);
          writer.i32(message.value);
      }
  }
}
jcapogna commented 1 year ago

That makes sense. value can be private too in order to prevent sets without setting the mask.

I'm actually surprised I haven't run into this being a blocker for me yet. I don't have time/motivation to take this on myself (at least for now), but we should probably spin out a separate issue for this.

jcapogna commented 1 year ago

I'm about to stop using oneOf because it breaks decoding in Java.

My proto created in AssemblyScript:

message FlagValue {
  oneof value {
    bool booleanValue = 1;
    double doubleValue = 2;
    string stringValue = 3;
  }
}

The decoding code in Java:

 while(!done) {
      int tag = input.readTag();
      switch (tag) {
          case 0:
              done = true;
              break;
          case 8:
              this.value_ = input.readBool();
              this.valueCase_ = 1;
              break;
          case 17:
              this.value_ = input.readDouble();
              this.valueCase_ = 2;
              break;
          case 26:
              String s = input.readStringRequireUtf8();
              this.valueCase_ = 3;
              this.value_ = s;
              break;
          default:
              if (!super.parseUnknownField(input, extensionRegistry, tag)) {
                  done = true;
              }
      }
  }

Looks like the Java decoder assumes only one of the fields in a oneOf is set. What happens is that value is set to booleanValue then overwritten with doubleValue and then overwritten with stringValue.

So if I have a boolean value set, then value ends up being "" from the empty string value.