dmadisetti / AirLatex.vim

Overleaf / ShareLatex in Vim
MIT License
15 stars 0 forks source link

Update comment intervals on edit #3

Closed dmadisetti closed 1 year ago

dmadisetti commented 1 year ago

Using interval tree. Currently not updating interval, so highlights will get out of sync with buffer if too much editing occurs.

dmadisetti commented 1 year ago

Some interval code that is almost there

import unittest
from intervaltree import Interval, IntervalTree

# The update_intervals and compute_difference functions from the previous answer

from intervaltree import Interval, IntervalTree

def compute_difference(old_text, new_text):
    import difflib
    matcher = difflib.SequenceMatcher(None, old_text, new_text)
    return [diff for diff in matcher.get_opcodes()]

def update_intervals(tree, old_text, new_text):
    diffs = compute_difference(old_text, new_text)
    shift = 0
    print(diffs)

    for tag, i1, i2, j1, j2 in diffs:
        if tag == 'delete':
            overlap = set({})
            for interval in tree[i1 - shift:i2 - shift]:
                tree.remove(interval)
                start = interval.begin + min(i1 - interval.begin, 0)
                end = interval.end - (min(i2 - shift, interval.end) - max(i1 - shift, interval.begin)) + min(i1 - interval.begin, 0)
                interval = Interval(start, end, interval.data)
                overlap.add(interval)
            for interval in tree[i2 - shift:]:
                tree.remove(interval)
                tree.add(Interval(interval.begin - (i2 - i1), interval.end - (i2 - i1), interval.data))
            for o in overlap:
                tree.add(o)
            shift += i2 - i1

        if tag == 'insert':
            overlap = set({})
            delta = (j2 - j1)
            for interval in tree[i1 - shift]:
                tree.remove(interval)
                end = interval.end + delta - shift
                interval = Interval(interval.begin, end, interval.data)
                overlap.add(interval)
            for interval in tree[i1 - shift:]:
                tree.remove(interval)
                tree.add(Interval(interval.begin + delta - shift, interval.end + delta - shift, interval.data))

            for o in overlap:
                tree.add(o)
            shift -= delta

import unittest
from intervaltree import Interval, IntervalTree

# The compute_difference and update_intervals functions from the previous answer

class TestIntervalTreeUpdate(unittest.TestCase):

    def assert_intervals_equal(self, tree, new_text, expected_substrings):
        self.assertEqual(len(tree), len(expected_substrings))
        for interval, expected_substring in zip(sorted(tree), expected_substrings):
            actual_substring = new_text[interval.begin:interval.end]
            self.assertEqual(actual_substring, expected_substring)

    def test_assert_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        text = "hello my name is george"
        self.assert_intervals_equal(tree, text, ["o my name is g"])

    def test_double_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello name george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o name g"])

    def test_pre_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "o my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is g"])

    def test_post_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is g"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is g"])

    def test_single_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o name is g"])

    def test_pre_into_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hename is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["name is g"])

    def test_post_into_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name e"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name "])

    def test_single_interval_insertion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g"])

    def test_multiple_intervals_no_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2")) #or
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g", "or"])

    def test_multiple_intervals_with_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2")) # name is george
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g", "name is not really george"])

    def test_insertion_within_interval(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is not really george, but I am called george actually"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g"])

    def test_single_char_insertion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello, my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o, my name is g"])

    def test_single_char_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hell my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, [" my name is g"])

    def test_insertion_in_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my full name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my full name is g", "name is george"])

    def test_insertion_between_intervals(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my name is really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is really g", "or"])

    def test_deletion_in_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my ne is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my ne is g", "ne is george"])

    def test_deletion_between_intervals(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my nameis george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my nameis g", "or"])

    def test_complex_case_diff_based(self):
        old_text = "abcdefghijklmno"

        # Create the initial tree with 5 highlights of varying sizes
        tree = IntervalTree()
        tree.add(Interval(1, 3, "Highlight1"))
        tree.add(Interval(4, 7, "Highlight2"))
        tree.add(Interval(8, 11, "Highlight3"))
        tree.add(Interval(10, 12, "Highlight4"))
        tree.add(Interval(12, 15, "Highlight5"))

        # Modify the text with multiple insertions and deletions
        # Deletion:  remove "cd"
        # Insertion: add "x" between "e" and "f"
        # Deletion:  remove "ij"
        # Insertion: add "yz" between "m" and "n"
        new_text = "abefxghklyzmno"

        # Update the tree based on the modifications
        update_intervals(tree, old_text, new_text)

        # Define the expected substrings for each updated highlight
        expected_substrings = [
            "b",  # Highlight1
            "efxg",  # Highlight2
            "h",  # Highlight3
            "k",  # Highlight4
            "lyzmno"  # Highlight5
        ]

        self.assert_intervals_equal(tree, new_text, expected_substrings)

    def test_complex_case(self):
        # Generate the test string dynamically
        part1 = "This is an example "
        part2 = "where we will insert, delete, and modify "
        part3 = "text within multiple overlapping intervals. "
        part4 = "The goal is to cover a variety of cases. "
        part5 = "Let's see how well the algorithm handles this."

        old_text = part1 + part2 + part3 + part4 + part5

        # Create the initial tree with 5 highlights of varying sizes
        tree = IntervalTree()
        tree.add(Interval(len(part1), len(part1) + len("insert"), "Highlight1"))
        tree.add(Interval(len(part1 + part2), len(part1 + part2) + len("text"), "Highlight2"))
        tree.add(Interval(len(part1 + part2 + part3) - len("cases. "), len(part1 + part2 + part3), "Highlight3"))
        tree.add(Interval(len(part1 + part2 + part3 + part4) - len("the "), len(part1 + part2 + part3 + part4), "Highlight4"))
        tree.add(Interval(len(part1 + part2 + part3 + part4 + part5) - len("handles this."), len(part1 + part2 + part3 + part4 + part5), "Highlight5"))

        # Modify the text with multiple insertions and deletions
        part2_modified = "where we will insert and delete some "
        part3_modified = "text within multiple overlapping intervals and add more content. "
        part4_modified = "The goal is to cover a variety of cases and situations. "
        new_text = part1 + part2_modified + part3_modified + part4_modified + part5

        # Update the tree based on the modifications
        update_intervals(tree, old_text, new_text)

        # Define the expected substrings for each updated highlight
        expected_substrings = [
            "insert and delete",  # Highlight1
            "text within multiple overlapping",  # Highlight2
            "situations.",  # Highlight3
            "to cover a variety of cases and",  # Highlight4
            "handles this."  # Highlight5
        ]

        self.assert_intervals_equal(tree, new_text, expected_substrings)

suite = unittest.TestLoader().loadTestsFromTestCase(TestIntervalTreeUpdate)
unittest.TextTestRunner(verbosity=2).run(suite)
dmadisetti commented 1 year ago

Works?