mirage / repr

ISC License
32 stars 11 forks source link

Example request: representations of abstract types that work with irmin #108

Closed kentookura closed 7 months ago

kentookura commented 7 months ago

The following code compiles but results in an error (not surprising since we are asserting false):

open Lwt.Syntax
open Repr

type foo = { v : string }

module Abstract : Irmin.Contents.S with type t = foo = struct
  open Repr
  type t = foo
  let t : foo ty =
    let a1 _ = assert false in
    let a2 _ _ = assert false in
    abstract ~pp:a2 ~of_string:a1 ~json:(a2, a1)
      ~bin:(a2, a2, Size.custom_dynamic ())
      ~equal:a2 ~compare:a2
      ~short_hash:(fun ?seed:_ -> a1)
      ~pre_hash:a2 ()

  let merge = Irmin.Merge.(option (idempotent t))
end

module Store = Irmin_git_unix.FS.KV (Abstract) 
module Info = Irmin_unix.Info(Store.Info)

let info message = Info.v ~author:"Example" "%s" message

let main_branch config =
  let* repo = Store.Repo.v config in
  Store.main repo

let main =
  let config = Irmin_git.config ~bare:true "/tmp/irmin" in
  let* t = main_branch config in
  let* () = Store.set_exn t ["a"] { v = "foo" } ~info:(info "first commit") in
  let+ s = Store.get t ["a"] in
  assert (s = { v = "foo" } ) 

let () = Lwt_main.run main
Fatal error: exception File "bin/forest-manager/main.ml", line 11, characters 17-23: Assertion failed
Raised at Dune__exe__Main.Abstract.t.a2 in file "bin/forest-manager/main.ml", line 11, characters 17-29
Called from Stdlib__Format.kasprintf.k in file "format.ml", line 1459, characters 4-22
Called from Irmin_git__Contents.Make.V.to_git in file "src/irmin-git/contents.ml", line 37, characters 16-42
Called from Irmin_git__Content_addressable.Make.add in file "src/irmin-git/content_addressable.ml", line 49, characters 12-22
Called from Irmin__Tree.Make.export.on_contents in file "src/irmin/tree.ml", line 2231, characters 21-48
Called from Irmin__Store.Make.Commit.v.(fun) in file "src/irmin/store.ml", line 180, characters 21-61
Called from Irmin__Store.Make.update.(fun) in file "src/irmin/store.ml", line 813, characters 19-63
Called from Irmin__Store.Make.retry.aux in file "src/irmin/store.ml", line 725, characters 8-13
Called from Irmin__Store.Make.set_tree in file "src/irmin/store.ml", line 835, characters 7-140
Called from Irmin__Store.Make.set_exn in file "src/irmin/store.ml", line 860, characters 4-57
Called from Dune__exe__Main.main in file "bin/forest-manager/main.ml", line 33, characters 12-75
Called from Lwt.Sequential_composition.bind.create_result_promise_and_callback_if_deferred.callback in file "src/core/lwt.ml", line 1844, characters 16-19
Re-raised at Lwt.Miscellaneous.poll in file "src/core/lwt.ml", line 3123, characters 20-29
Called from Lwt_main.run.run_loop in file "src/unix/lwt_main.ml", line 27, characters 10-20
Called from Lwt_main.run in file "src/unix/lwt_main.ml", line 106, characters 8-13
Re-raised at Lwt_main.run in file "src/unix/lwt_main.ml", line 112, characters 4-13
Called from Dune__exe__Main in file "bin/forest-manager/main.ml", line 37, characters 9-26

I encountered this when trying to track down the following issue:

I have some representations of types, but when adapting the above code it results in a stack overflow:

https://github.com/kentookura/ocaml-forester/blob/irmin/bin/forest-manager/main.ml

Could the reason for this be the unimplemented size functions for the representation of Range?

https://github.com/kentookura/ocaml-forester/blob/4fe2a99ad9869e155d8b8c124846e0ecd2380f73/lib/core/Rep.ml#L83

I would like to request a fully worked example of representing abstract types that works with irmin. It would help me track down where the error lies in my code. Thanks!

art-w commented 7 months ago

Very cool to see Forester experimenting with Irmin!

Here's a complete example of Repr.abstract and some tests of the different functions:

open Lwt.Syntax

type foo = { v : string } [@@deriving repr]

module Abstract : Irmin.Contents.S with type t = foo = struct
  type t = foo

  let t : foo Repr.ty =

    let pp h foo =
      Format.fprintf h "{ v = %S }" foo.v
    in
    let of_string str =
      (* Must be able to decode the output of [pp] *)
      try Scanf.sscanf str "{ v = %S }" (fun str -> Ok { v = str })
      with _ -> Error (`Msg "of_string")
    in

    let encode_json encoder foo =
      let (`Ok | _) = Jsonm.encode encoder (`Lexeme `Os) in
      let (`Ok | _) = Jsonm.encode encoder (`Lexeme (`Name "v")) in
      let (`Ok | _) = Jsonm.encode encoder (`Lexeme (`String foo.v)) in
      let (`Ok | _) = Jsonm.encode encoder (`Lexeme `Oe) in
      ()
    in
    let decode_json decoder =
      (* Must be able to decode the output of [encode_json] *)
      let (let*) = Result.bind in
      let* () = Repr.Json.decode decoder |> function `Lexeme `Os         -> Ok () | _ -> Error (`Msg "decode_json") in
      let* () = Repr.Json.decode decoder |> function `Lexeme (`Name "v") -> Ok () | _ -> Error (`Msg "decode_json") in
      let* v  = Repr.Json.decode decoder |> function `Lexeme (`String v) -> Ok v  | _ -> Error (`Msg "decode_json") in
      let* () = Repr.Json.decode decoder |> function `Lexeme `Oe         -> Ok () | _ -> Error (`Msg "decode_json") in
      Ok { v = v }
    in

    let encode_bin foo write =
      Repr.Binary.Varint.encode (String.length foo.v) write ;
      write foo.v
    in
    let decode_bin buffer at =
      (* Must be able to decode the output of [encode_bin] *)
      let len = Repr.Binary.Varint.decode buffer at in
      let v = String.sub buffer !at len in
      at := !at + len ;
      { v }
    in
    let size =
      Repr.Size.custom_dynamic
        ~of_value:(fun foo ->
          (* Optional, precompute the size that will be used by [encode_bin] *)
          let len = String.length foo.v in
          let varint_len_size = ref 0 in
          Repr.Binary.Varint.encode len
            (fun s -> varint_len_size := !varint_len_size + String.length s) ;
          !varint_len_size + len)
        ~of_encoding:(fun str offset ->
          (* You can skip this function, it's unused nowadays *)
          let at = ref offset in
          let len = Repr.Binary.Varint.decode str at in
          let varint_len_size = !at - offset in
          varint_len_size + len)
        ()
    in
    let pre_hash =
      (* Same as [encode_bin], unless the binary serialization has changed
         but we want to preserve the previous cryptographic hash for
         backward compatibility. *)
      encode_bin
    in

    let equal a b = String.equal a.v b.v in
    let compare a b = String.compare a.v b.v in
    let short_hash ?seed foo =
      match seed with
      | None -> Hashtbl.hash foo.v
      | Some seed -> Hashtbl.seeded_hash seed foo.v
    in
    Repr.abstract
      ~pp ~of_string
      ~json:(encode_json, decode_json)
      ~bin:(encode_bin, decode_bin, size)
      ~pre_hash
      ~equal ~compare ~short_hash
      ()

  let merge = Irmin.Merge.(option (idempotent t))
end

module Store = Irmin_git_unix.FS.KV (Abstract) 
module Info = Irmin_unix.Info (Store.Info)

let info message = Info.v ~author:"Example" "%s" message

let main_branch config =
  let* repo = Store.Repo.v config in
  Store.main repo

let test schema foo =

  let pretty_string = Format.asprintf "%a" (Repr.pp schema) foo in
  assert (pretty_string = Repr.to_string schema foo) ;
  Format.printf "pp: %s@." pretty_string;

  let r = Repr.of_string schema pretty_string in
  assert (r = Ok foo) ;

  let buf = Buffer.create 16 in
  let encoder = Jsonm.encoder ~minify:true (`Buffer buf) in
  Repr.encode_json schema encoder foo ;
  let (`Ok | _) = Jsonm.encode encoder `End in
  let json_encoded = Buffer.contents buf in
  Format.printf "json: %S@." json_encoded ;
  assert (json_encoded = Printf.sprintf {|{"v":%S}|} foo.v) ;

  let decoder = Jsonm.decoder (`String json_encoded) in
  let r = Repr.decode_json schema decoder in
  assert (r = Ok foo) ;

  let binstr = Repr.(unstage (to_bin_string schema)) foo in
  Format.printf "binstring = %S@." binstr ;
  let r = Repr.(unstage (of_bin_string schema)) binstr in
  assert (r = Ok foo) ;

  let expected_size = match Repr.(unstage (size_of schema)) foo with
    | None -> failwith "unable to precompute binstring length"
    | Some s -> s
  in
  Format.printf "expected_size = %i@." expected_size ;
  assert (expected_size = String.length binstr) ;

  let short_hash = Repr.(unstage (short_hash schema)) foo in
  Format.printf "short_hash = %#i@." short_hash ;

  ()

let main =
  let config = Irmin_git.config ~bare:true "/tmp/irmin" in
  let* t = main_branch config in
  let* () = Store.set_exn t ["a"] { v = "foo" } ~info:(info "first commit") in
  let+ s = Store.get t ["a"] in
  assert (s = { v = "foo" } ) ;
  Format.printf "# Abstract test:@." ;
  test Abstract.t s ;
  Format.printf "@." ;
  Format.printf "# PPX test:@." ;
  test foo_t s

let () = Lwt_main.run main

For the irmin-git backend, the most important functions to implement are pp, of_string and equal (and the rest is likely unused by this specific backend so you could skip it with assert false, although the other functions like the binary encoding are used by e.g. the irmin-pack backend).

Regarding the stack overflow, it's likely coming from https://github.com/kentookura/ocaml-forester/blob/irmin/lib/core/Rep.ml#L336 as recursive types must be explicitly created with Repr.mu (or the repr definition doesn't terminate!)

I haven't tested it thoroughly but for example:

(... other definitions from Rep.Tree ...) ```ocaml (* can probably remove the [rec]? *) let rec sem_node (t : Sem.t ty) (tree : Sem.tree ty) : Sem.node ty = let open Sem in variant "node" (fun text transclude subtree query xml_tag unresolved math link embed_tex img if_tex prim object_ ref -> function | Text s -> text s | Transclude (x, y) -> transclude (x, y) | Subtree (x, y) -> subtree (x, y) | Query (x, y) -> query (x, y) | Xml_tag (x, y, z) -> xml_tag (x, y, z) | Unresolved x -> unresolved x | Math (x, y) -> math (x, y) | Link x -> link x | Embed_tex x -> embed_tex x | Img x -> img x | If_tex (x, y) -> if_tex (x, y) | Prim (x, y) -> prim (x, y) | Object x -> object_ x | Ref x -> ref x) |~ case1 "Text" string (fun s -> Text s) |~ case1 "Transclude" (pair (tranclusion_opts t) string) (fun (x, y) -> Transclude (x, y)) |~ case1 "Subtree" (pair (tranclusion_opts t) tree) (fun (x, y) -> Subtree (x, y)) |~ case1 "Query" (pair (tranclusion_opts t) (query tree t t)) (fun (x, y) -> Query (x, y)) |~ case1 "Xml_tag" (triple string (list @@ pair string t) t) (fun (x, y, z) -> Xml_tag (x, y, z)) |~ case1 "Unresolved" string (fun s -> Unresolved s) |~ case1 "Math" (pair math_mode t) (fun (x, y) -> Math (x, y)) |~ case1 "Link" (link t ) (fun s -> Link s) |~ case1 "Embed_tex" (embed_tex t) (fun s -> Embed_tex s) |~ case1 "Img" img (fun s -> Img s) |~ case1 "If_tex" (pair t t) (fun (x, y) -> If_tex (x, y)) |~ case1 "Prim" (pair prim t) (fun (x, y) -> Prim (x, y)) |~ case1 "Object_" symbol (fun s -> Object s) |~ case1 "Ref" ref_cfg (fun s -> Ref s) |> sealv and embed_tex (t : Sem.t ty) : Sem.embed_tex ty = let open Sem in record "embed_tex" (fun preamble source -> { preamble; source }) |+ field "preamble" t (fun t -> t.preamble) |+ field "source" t (fun t -> t.source) |> sealr and modifier = let open Sem in enum "modifier" [ ("sentence_case", `Sentence_case)] and img : Sem.img ty = let open Sem in record "img" (fun path -> { path }) |+ field "path" string (fun t -> t.path) |> sealr and ref_cfg : Sem.ref_cfg ty = let open Sem in record "ref_cfg" (fun address -> { address }) |+ field "address" string (fun t -> t.address) |> sealr and symbol : Symbol.t ty = let open Symbol in pair (list string) int and link (t : Sem.t ty) : Sem.link ty = let open Sem in record "link" (fun dest label modifier -> { dest; label; modifier }) |+ field "dest" string (fun t -> t.dest) |+ field "label" (option t) (fun t -> t.label) |+ field "modifier" (option modifier) (fun t -> t.modifier) |> sealr and query (tree : Sem.tree ty) (t : Sem.t ty) a : 'a Query.t ty = let open Query in Repr.mu @@ fun query -> variant "query" (fun author tag taxon meta or_ and_ not_ true_ -> function | Author x -> author x | Tag x -> tag x | Taxon x -> taxon x | Meta (x, y) -> meta (x, y) | Or x -> or_ x | And x -> and_ x | Not x -> not_ x | True -> true_) |~ case1 "Author" a (fun x -> Author x) |~ case1 "Tag" a (fun x -> Tag x) |~ case1 "Taxon" a (fun x -> Taxon x) |~ case1 "Meta" (pair string a) (fun (x, y) -> Meta (x, y)) |~ case1 "Or" (list query) (fun x -> Or x) |~ case1 "And" (list query) (fun x -> And x) |~ case1 "Not" query (fun x -> Not x) |~ case0 "True" True |> sealv and located_sem_node (t : Sem.t ty) (tree : Sem.tree ty) : Sem.node Range.located ty = let open Asai in let open Range in record "located_sem_node" (fun loc value -> { loc; value }) |+ field "loc" (option range) (fun t -> None) |+ field "value" (sem_node t tree) (fun t -> t.value) |> sealr and tranclusion_opts (t : Sem.t ty) = let open Sem in record "tranclusion_opts" (fun toc show_heading show_metadata title_override taxon_override expanded numbered -> { toc; show_heading; show_metadata; title_override; taxon_override; expanded; numbered; }) |+ field "toc" bool (fun t -> t.toc) |+ field "show_heading" bool (fun t -> t.show_heading) |+ field "show_metadata" bool (fun t -> t.show_metadata) |+ field "title_override" (option t) (fun t -> t.title_override) |+ field "taxon_override" (option string) (fun t -> t.taxon_override) |+ field "expanded" bool (fun t -> t.expanded) |+ field "numbered" bool (fun t -> t.numbered) |> sealr and frontmatter (t : Sem.t ty) = let open Sem in record "frontmatter" (fun title taxon authors contributors dates addr metas tags physical_parent designated_parent source_path number -> { title; taxon; authors; contributors; dates; addr; metas; tags; physical_parent; designated_parent; source_path; number; }) |+ field "title" (option t) (fun t -> t.title) |+ field "taxon" (option string) (fun t -> t.taxon) |+ field "authors" (list string) (fun t -> t.authors) |+ field "contributors" (list string) (fun t -> t.contributors) |+ field "dates" (list date) (fun t -> t.dates) |+ field "addr" (option string) (fun t -> t.addr) |+ field "metas" (list (pair string t)) (fun t -> t.metas) |+ field "tags" (list string) (fun t -> t.tags) |+ field "physical_parent" (option string) (fun t -> t.physical_parent) |+ field "designated_parent" (option string) (fun t -> t.designated_parent) |+ field "source_path" (option string) (fun t -> t.source_path) |+ field "number" (option string) (fun t -> t.number) |> sealr ```
  let t : Sem.tree ty =
    let open Sem in
    Repr.mu (fun tree ->
      let t = Repr.mu (fun t -> list (located_sem_node t tree)) in
      record "tree" (fun fm body : Sem.tree -> { fm; body })
      |+ field "fm" (frontmatter t) (fun t -> t.fm)
      (* without annotation compiler thinks that t is obj_method due to `body` field *)
      |+ field "body" t (fun (t : Sem.tree) -> t.body)
      |> sealr)
kentookura commented 7 months ago

Thanks a lot! I've applied your suggestions and it works well.