OnionGrief / Chipollino

преобразования регулярных выражений и конечных автоматов
Other
17 stars 4 forks source link

MergeBisim быстрее, чем Minimize #339

Open TonitaN opened 2 months ago

TonitaN commented 2 months ago

Тест:

N1 = Determinize.Thompson {(a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*((a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*)((a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*)}
N2 = Determinize.Thompson {(a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*((a|bb(baaa)*|a(bb)*|bbab*)*|a(bbba|ba)*|(baba*b)*)((a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*)}
N3 = Determinize.Thompson {(a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*((a|baa(aaa)*|a(bbbaba)*|bbab*)*|a(b|ba)*|(baba*b)*)((a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*)}
M1 = Minimize N1
M2 = Minimize N2
M3 = Minimize N3
MM1 = MergeBisim N1
MM2 = MergeBisim N2
MM3 = MergeBisim N3
B1 = Equal M1 MM1 
B2 = Equal M2 MM2 
B3 = Equal M3 MM3
Equal B1 B2 !!
Equal B2 B3 !!   

Логи на самые последние равенства, чтобы не рендерить огромные автоматы и сделать логирование константным по времени. Разница очень ощутима.

Вывод: @xendalm , нужно разобраться с асимптотикой алгоритма, не так уж он и плох. И заодно посмотреть, почему у нас такая медленная минимизация.

xendalm commented 2 months ago

Сохраню здесь замеры

    std::vector<Regex> r(
        {Regex("(a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*((a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|("
               "baba*b)*)((a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*)"),
         Regex("(a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*((a|bb(baaa)*|a(bb)*|bbab*)*|a(bbba|ba)"
               "*|(baba*b)*)((a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*)"),
         Regex("(a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*((a|baa(aaa)*|a(bbbaba)*|bbab*)*|a(b|"
               "ba)*|(baba*b)*)((a|b(aaa)*|a(bb)*|bbab*)*|a(b|ba)*|(baba*b)*)")});

    for (const auto& i : r) {
        FiniteAutomaton t(i.to_thompson());

        auto start_minimize = std::chrono::high_resolution_clock::now();
        t.minimize();
        auto end_minimize = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double> duration_minimize = end_minimize - start_minimize;

        auto start_mb = std::chrono::high_resolution_clock::now();
        t.determinize().merge_bisimilar();
        auto end_mb = std::chrono::high_resolution_clock::now();
        std::chrono::duration<double> duration_mb = end_mb - start_mb;

        cout << "Time taken for minimize(): " << duration_minimize.count() << " seconds"
             << std::endl;
        cout << "Time taken for determinize() and merge_bisimilar(): " << duration_mb.count()
             << " seconds" << std::endl;
    }
Time taken for minimize(): 0.0197977 seconds
Time taken for determinize() and merge_bisimilar(): 0.0150672 seconds
Time taken for minimize(): 0.284295 seconds
Time taken for determinize() and merge_bisimilar(): 0.0328585 seconds
Time taken for minimize(): 0.461334 seconds
Time taken for determinize() and merge_bisimilar(): 0.0459332 seconds