OCamlPro / ocp-indent

Indentation tool for OCaml, to be used from editors like Emacs and Vim.
http://www.typerex.org/ocp-indent.html
Other
200 stars 63 forks source link

problems with indentation in files with cinaps comments #301

Closed ceastlund closed 4 years ago

ceastlund commented 4 years ago

This is a subset of the code from the implementation of bin_prot, formatted using ocp-indent built from 786f61b01674c3315e91ade9a1e2cf160b705a8d. It demonstrates a number of problems. After the initial cinaps comment, everything is indented five characters, but shouldn't be indented at all. The match clauses containing cinaps comments are indented very far to the right, but those cinaps comments shouldn't incur indentation either. And the final non-cinaps comment in the file -- the one that reads "No error possible either." -- causes everything after it to be indented as prose rather than as code, and everything gets aligned to the same indentation level.

(*$ open Bin_prot_cinaps $*)

     let bin_read_nat0 buf ~pos_ref =
       let pos = safe_get_pos buf pos_ref in
       assert_pos pos;
       match unsafe_get buf pos with
       | '\x00'..'\x7f' as ch ->
         pos_ref := pos + 1;
         Nat0.unsafe_of_int (Char.code ch)
       | (*$ Code.char INT16 *)'\xfe'(*$*) ->
   safe_bin_read_nat0_16 buf ~pos_ref ~pos:(pos + 1)
                                         | (*$ Code.char INT32 *)'\xfd'(*$*) ->
   safe_bin_read_nat0_32 buf ~pos_ref ~pos:(pos + 1)
                                                                           | (*$ Code.char INT64 *)'\xfc'(*$*) ->
   if arch_sixtyfour then
     safe_bin_read_nat0_64 buf ~pos_ref ~pos:(pos + 1)
   else
     raise_read_error ReadError.Nat0_overflow pos
                                                                                                             | _ ->
                                                                                                               raise_read_error ReadError.Nat0_code pos
     [@@ocamlformat "disable"]

     let bin_read_int buf ~pos_ref =
       let pos = safe_get_pos buf pos_ref in
       assert_pos pos;
       match unsafe_get buf pos with
       | '\x00'..'\x7f' as ch ->
         pos_ref := pos + 1;
         Char.code ch
       | (*$ Code.char NEG_INT8 *)'\xff'(*$*) ->
   safe_bin_read_neg_int8 buf ~pos_ref ~pos:(pos + 1)
                                            | (*$ Code.char INT16 *)'\xfe'(*$*) ->
   safe_bin_read_int16 buf ~pos_ref ~pos:(pos + 1)
                                                                              | (*$ Code.char INT32 *)'\xfd'(*$*) ->
   safe_bin_read_int32_as_int buf ~pos_ref ~pos:(pos + 1)
                                                                                                                | (*$ Code.char INT64 *)'\xfc'(*$*) ->
   if arch_sixtyfour then
     safe_bin_read_int64_as_int buf ~pos_ref ~pos:(pos + 1)
   else
     raise_read_error ReadError.Int_overflow pos
                                                                                                                                                  | _ ->
                                                                                                                                                    raise_read_error ReadError.Int_code pos
     [@@ocamlformat "disable"]

     let bin_read_float buf ~pos_ref =
       let pos = safe_get_pos buf pos_ref in
       assert_pos pos;
       let next = pos + 8 in
       check_next buf next;
       pos_ref := next;
       (* No error possible either. *)
          Int64.float_of_bits (unsafe_get64le buf pos)
          ;;

          let bin_read_int32 buf ~pos_ref =
          let pos = safe_get_pos buf pos_ref in
          assert_pos pos;
          match unsafe_get buf pos with
          | '\x00'..'\x7f' as ch ->
          pos_ref := pos + 1;
          Int32.of_int (Char.code ch)
          | (*$ Code.char NEG_INT8 *)'\xff'(*$*) ->
          Int32.of_int (safe_bin_read_neg_int8 buf ~pos_ref ~pos:(pos + 1))
          | (*$ Code.char INT16 *)'\xfe'(*$*) ->
          Int32.of_int (safe_bin_read_int16 buf ~pos_ref ~pos:(pos + 1))
          | (*$ Code.char INT32 *)'\xfd'(*$*) ->
          safe_bin_read_int32 buf ~pos_ref ~pos:(pos + 1)
          | _ ->
          raise_read_error ReadError.Int32_code pos
          [@@ocamlformat "disable"]

          let bin_read_int64 buf ~pos_ref =
          let pos = safe_get_pos buf pos_ref in
          assert_pos pos;
          match unsafe_get buf pos with
          | '\x00'..'\x7f' as ch ->
          pos_ref := pos + 1;
          Int64.of_int (Char.code ch)
          | (*$ Code.char NEG_INT8 *)'\xff'(*$*) ->
          Int64.of_int (safe_bin_read_neg_int8 buf ~pos_ref ~pos:(pos + 1))
          | (*$ Code.char INT16 *)'\xfe'(*$*) ->
          Int64.of_int (safe_bin_read_int16 buf ~pos_ref ~pos:(pos + 1))
          | (*$ Code.char INT32 *)'\xfd'(*$*) ->
          safe_bin_read_int32_as_int64 buf ~pos_ref ~pos:(pos + 1)
          | (*$ Code.char INT64 *)'\xfc'(*$*) ->
          safe_bin_read_int64 buf ~pos_ref ~pos:(pos + 1)
          | _ ->
          raise_read_error ReadError.Int64_code pos
          [@@ocamlformat "disable"]
AltGr commented 4 years ago

Ah! I see the problem: apparently, $*) in code is lexed as $* then ). So when it happens in code-in-comment, ocp-indent doesn't detect it, and believes everything afterwards is in the comment still, which causes all of this weird stuff.

Interestingly enough, the bug was already there if writing something like (* {[ foo $*): while ocp-indent was designed to detect premature ending of the code-in-comment block, parsing $*) as code bypassed the end of comment detection altogether.

I'll find a fix ASAP, thanks for the feedback.

AltGr commented 4 years ago

03bb53f1a9db9e61b66e6674f995ec150619a68b fixes the above, and indeed seems to resolve all the issues you pointed here (so it seems they were all due to the ill-parsed $*)). Thanks for the report!