mransan / ocaml-protoc

A Protobuf Compiler for OCaml
https://mransan.github.io/ocaml-protoc/
MIT License
179 stars 33 forks source link

Incorrect generated encoder/decoder for empty message in RPC endpoints #250

Open Merlin04 opened 3 months ago

Merlin04 commented 3 months ago

With a simple .proto file like this:

syntax = "proto3";

message Empty {}

service Test {
  rpc Ping (Empty) returns (Empty);
}

the generated .ml file contains these functions for the Empty message:

let rec encode_pb_empty (v:empty) encoder = 
()

let rec decode_pb_empty d =
  match Pbrt.Decoder.key d with
  | None -> ();
  | Some (_, pk) -> 
    Pbrt.Decoder.unexpected_payload "Unexpected fields in empty message(empty)" pk

however, the encoder/decoder functions for the Ping endpoint in the Server and Client modules are incorrectly(?) defined as

fun () enc -> Pbrt.Encoder.empty_nested enc
fun d -> Pbrt.Decoder.empty_nested d

which produce an error when given a value encoded/decoded with the functions for the empty message type:

# let e = Pbrt.Encoder.create ();;
val e : Pbrt.Encoder.t = <abstr>
# encode_pb_empty () e;;
- : empty = ()
# let b = Pbrt.Encoder.to_bytes e;;
val b : bytes = Bytes.of_string ""
# Pbrt.Decoder.of_bytes b |> Pbrt.Decoder.empty_nested;;
Exception: Pbrt.Decoder.Failure(Incomplete)
c-cube commented 3 months ago

Indeed, and I'm surprised now that we even have a special case for "empty" on the wire. The protobuf semantics is that unexpected fields should just be ignored, not that we should enforce the absence of any field. I think we could just parse unit as a normal message and ignore all the internal fields. I'd like @Lupus ' opinion too if possible.

Lupus commented 3 months ago

Since empty messages work just fine with our internal fork, I looked through the commits and found this one:

diff --git a/vendor/ocaml-protoc/src/compilerlib/pb_codegen_backend.ml b/vendor/ocaml-protoc/src/compilerlib/pb_codegen_backend.ml
index 84f12e2..d6dcf44 100644
--- a/vendor/ocaml-protoc/src/compilerlib/pb_codegen_backend.ml
+++ b/vendor/ocaml-protoc/src/compilerlib/pb_codegen_backend.ml
@@ -132,12 +132,12 @@ let wrapper_type_of_type_name = function
     with the module name. (This is essentially expecting (rightly) a sub module
     with the same name.
  *)
-let user_defined_type_of_id ~(all_types : _ list) ~file_name i : Ot.field_type =
+let user_defined_type_of_id ?(empty_as_unit=true) ~(all_types : _ list) ~file_name i : Ot.field_type =
   let module_prefix = module_prefix_of_file_name file_name in
   match Typing_util.type_of_id all_types i with
   | exception Not_found -> E.programmatic_error E.No_type_found_for_id
   | { Tt.file_name; spec; _ } as t ->
-    if Typing_util.is_empty_message t then
+    if (Typing_util.is_empty_message t) && empty_as_unit then
       Ot.Ft_unit
     else (
       let field_type_module_prefix = module_prefix_of_file_name file_name in
@@ -639,7 +639,7 @@ let compile_enum file_options file_name scope enum =
 let compile_rpc ~(file_name : string) ~all_types
     (rpc : Pb_field_type.resolved Tt.rpc) : Ot.rpc =
   let compile_ty ~stream (ty : int) : Ot.rpc_type =
-    let ty = user_defined_type_of_id ~all_types ~file_name ty in
+    let ty = user_defined_type_of_id ~empty_as_unit:false ~all_types ~file_name ty in
     if stream then
       Ot.Rpc_stream ty
     else

The idea if I recall it correctly is to use the generated function to encode/decode empty message instead of using special empty_nested. Empty message by itself is represented as empty byte array on the wire. In case of service - empty message as method argument/result is sent as "Content-length: 0" on the wire in case of Twirp. When empty message is nested in another message, you need to wrap that empty byte buffer as proper message field (and this is what empty_nested is doing), but when you send solely that empty message, that additional wrapping is not recognized as valid protobuf by other implementations. Patch presented above interoperates with Golang Twirp client/server just fine for both empty message arguments and return values.

Lupus commented 3 months ago

Actually that patch was already upstreamed by me some time ago to both master and br-4.0 branches.

I've checked master branch (e2cf9037), and for exactly the same input file:

syntax = "proto3";

message Empty {}

service Test {
  rpc Ping (Empty) returns (Empty);
}

I get the following output:

[@@@ocaml.warning "-27-30-39-44"]

type empty = unit

let rec default_empty = ()

[@@@ocaml.warning "-27-30-39"]

(** {2 Formatters} *)

let rec pp_empty fmt (v:empty) = 
  let pp_i fmt () =
    Pbrt.Pp.pp_unit fmt ()
  in
  Pbrt.Pp.pp_brk pp_i fmt ()

[@@@ocaml.warning "-27-30-39"]

(** {2 Protobuf Encoding} *)

let rec encode_pb_empty (v:empty) encoder = 
()

[@@@ocaml.warning "-27-30-39"]

(** {2 Protobuf Decoding} *)

let rec decode_pb_empty d =
  match Pbrt.Decoder.key d with
  | None -> ();
  | Some (_, pk) -> 
    Pbrt.Decoder.unexpected_payload "Unexpected fields in empty message(empty)" pk

[@@@ocaml.warning "-27-30-39"]

(** {2 Protobuf YoJson Encoding} *)

let rec encode_json_empty (v:empty) = 
Pbrt_yojson.make_unit v

[@@@ocaml.warning "-27-30-39"]

(** {2 JSON Decoding} *)

let rec decode_json_empty d =
Pbrt_yojson.unit d "empty" "empty record"

module Test = struct
  open Pbrt_services.Value_mode
  module Client = struct
    open Pbrt_services

    let ping : (empty, unary, empty, unary) Client.rpc =
      (Client.mk_rpc 
        ~package:[]
        ~service_name:"Test" ~rpc_name:"Ping"
        ~req_mode:Client.Unary
        ~res_mode:Client.Unary
        ~encode_json_req:encode_json_empty
        ~encode_pb_req:encode_pb_empty
        ~decode_json_res:decode_json_empty
        ~decode_pb_res:decode_pb_empty
        () : (empty, unary, empty, unary) Client.rpc)
  end

  module Server = struct
    open Pbrt_services

    let ping : (empty,unary,empty,unary) Server.rpc = 
      (Server.mk_rpc ~name:"Ping"
        ~req_mode:Server.Unary
        ~res_mode:Server.Unary
        ~encode_json_res:encode_json_empty
        ~encode_pb_res:encode_pb_empty
        ~decode_json_req:decode_json_empty
        ~decode_pb_req:decode_pb_empty
        () : _ Server.rpc)

    let make
      ~ping:__handler__ping
      () : _ Server.t =
      { Server.
        service_name="Test";
        package=[];
        handlers=[
           (__handler__ping ping);
        ];
      }
  end

end

It looks like correct encoder/decoder is used for services.