toodom02 / ocamlregextkit

https://toodom02.github.io/ocamlregextkit/
GNU General Public License v3.0
1 stars 1 forks source link

Crash during intersection #6

Closed Kakadu closed 5 months ago

Kakadu commented 5 months ago

I wanted to check whether intersection of two automata is empty, but for simple example it crashes. @toodom02, Is it a bug in my understanding on in the library?

open Regextkit

let%expect_test _ =
  let t1 = Re.parse "(ab)*" in
  let t2 = Re.parse "(cd)*" in
  let dfa1 = Dfa.nfa_to_dfa @@ Nfa.re_to_nfa t1 in
  let dfa2 = Dfa.nfa_to_dfa @@ Nfa.re_to_nfa t2 in
  Printf.printf "%b" (Dfa.is_empty @@ Dfa.product_intersection dfa1 dfa2);
  [%expect.unreachable]
[@@expect.uncaught_exn {|
  (* CR expect_test_collector: This test expectation appears to contain a backtrace.
     This is strongly discouraged as backtraces are fragile.
     Please change this test to not include a backtrace. *)

  (Failure hd)
  Raised at Stdlib.failwith in file "stdlib.ml" (inlined), line 29, characters 17-33
  Called from Stdlib__List.hd in file "list.ml" (inlined), line 30, characters 10-23
  Called from Regextkit__Dfa.succ in file "lib/dfa.ml", line 15, characters 17-52
  Called from Regextkit__Dfa.product_construction.find_product_trans.(fun) in file "lib/dfa.ml", line 146, characters 27-38
  Called from Stdlib__List.fold_left in file "list.ml", line 121, characters 24-34
  Called from Stdlib__List.fold_left in file "list.ml", line 121, characters 24-34
  Called from Regextkit__Dfa.product_construction.find_product_trans in file "lib/dfa.ml", line 140, characters 4-357
  Called from Regextkit__Dfa.product_intersection in file "lib/dfa.ml" (inlined), line 173, characters 27-60
  Called from Test_regex.(fun) in file "tests/Test_regex.ml", line 12, characters 38-72
  Called from Expect_test_collector.Make.Instance_io.exec in file "collector/expect_test_collector.ml", line 262, characters 12-19 |}]

let%expect_test _ =
  let t1 = Re.parse "(ab)*e" in
  let t2 = Re.parse "e(cd)*" in
  let dfa1 = Dfa.nfa_to_dfa @@ Nfa.re_to_nfa t1 in
  let dfa2 = Dfa.nfa_to_dfa @@ Nfa.re_to_nfa t2 in
  assert (not (Dfa.is_empty dfa1));
  assert (not (Dfa.is_empty dfa2));
  Printf.printf "%b" (Dfa.is_empty @@ Dfa.product_intersection dfa1 dfa2);
  [%expect.unreachable]
[@@expect.uncaught_exn {|
  (* CR expect_test_collector: This test expectation appears to contain a backtrace.
     This is strongly discouraged as backtraces are fragile.
     Please change this test to not include a backtrace. *)

  (Failure hd)
  Raised at Stdlib.failwith in file "stdlib.ml" (inlined), line 29, characters 17-33
  Called from Stdlib__List.hd in file "list.ml" (inlined), line 30, characters 10-23
  Called from Regextkit__Dfa.succ in file "lib/dfa.ml", line 15, characters 17-52
  Called from Regextkit__Dfa.product_construction.find_product_trans.(fun) in file "lib/dfa.ml", line 146, characters 27-38
  Called from Stdlib__List.fold_left in file "list.ml", line 121, characters 24-34
  Called from Stdlib__List.fold_left in file "list.ml", line 121, characters 24-34
  Called from Regextkit__Dfa.product_construction.find_product_trans in file "lib/dfa.ml", line 140, characters 4-357
  Called from Regextkit__Dfa.product_intersection in file "lib/dfa.ml" (inlined), line 173, characters 27-60
  Called from Test_regex.(fun) in file "tests/Test_regex.ml", line 22, characters 38-72
  Called from Expect_test_collector.Make.Instance_io.exec in file "collector/expect_test_collector.ml", line 262, characters 12-19 |}]
Kakadu commented 5 months ago

Something like this should help

--- a/lib/dfa.ml
+++ b/lib/dfa.ml
@@ -147,6 +147,11 @@ let get_accepted m =
;;

let product_construction op m1 m2 =
+  let log fmt =
+    if false then
+    Format.kasprintf (print_endline) fmt
+  else
+    Format.ikfprintf (fun _  -> ()) Format.std_formatter fmt in
let cross_product a b =
List.concat
(List.rev_map (fun e1 -> List.rev_map (fun e2 -> ProductState (e1, e2)) b) a)
@@ -159,8 +164,9 @@ let product_construction op m1 m2 =
| ProductState (l, r) ->
List.fold_left
(fun acc' a ->
-              let lRes = succ m1 l a
-              and rRes = succ m2 r a in
+              match (Adt.get_next_states m1 l a, Adt.get_next_states m2 r a) with
+              | [],_ | _,[] ->  acc'
+              | lRes::_, rRes :: _ ->
(ProductState (l, r), a, ProductState (lRes, rRes)) :: acc')
acc
alphabet
toodom02 commented 5 months ago

This issue is caused by attempting a product operation on DFAs over different alphabets.

The following change to your function should fix this:

open Regextkit

let%expect_test _ =
  let t1 = Re.parse "(ab)*" in
  let t2 = Re.parse "(cd)*" in
  let nfa1 = Nfa.re_to_nfa t1 in
  let nfa2 = Nfa.re_to_nfa t2 in
  Nfa.merge_alphabets nfa1 nfa2;
  let dfa1 =  Dfa.nfa_to_dfa nfa1 in
  let dfa2 =  Dfa.nfa_to_dfa nfa2 in
  Printf.printf "%b" (Dfa.is_empty @@ Dfa.product_intersection dfa1 dfa2);

It is easier to merge the alphabets of the NFAs. Merging alphabets of DFAs adds unnecessary complexity (maintaining a total transition func).

Merged PR catches this behaviour in error.