Giter Site home page Giter Site logo

Comments (2)

dmadisetti avatar dmadisetti commented on June 30, 2024

Some interval code that is almost there

import unittest
from intervaltree import Interval, IntervalTree

# The update_intervals and compute_difference functions from the previous answer

from intervaltree import Interval, IntervalTree

def compute_difference(old_text, new_text):
    import difflib
    matcher = difflib.SequenceMatcher(None, old_text, new_text)
    return [diff for diff in matcher.get_opcodes()]

def update_intervals(tree, old_text, new_text):
    diffs = compute_difference(old_text, new_text)
    shift = 0
    print(diffs)

    for tag, i1, i2, j1, j2 in diffs:
        if tag == 'delete':
            overlap = set({})
            for interval in tree[i1 - shift:i2 - shift]:
                tree.remove(interval)
                start = interval.begin + min(i1 - interval.begin, 0)
                end = interval.end - (min(i2 - shift, interval.end) - max(i1 - shift, interval.begin)) + min(i1 - interval.begin, 0)
                interval = Interval(start, end, interval.data)
                overlap.add(interval)
            for interval in tree[i2 - shift:]:
                tree.remove(interval)
                tree.add(Interval(interval.begin - (i2 - i1), interval.end - (i2 - i1), interval.data))
            for o in overlap:
                tree.add(o)
            shift += i2 - i1

        if tag == 'insert':
            overlap = set({})
            delta = (j2 - j1)
            for interval in tree[i1 - shift]:
                tree.remove(interval)
                end = interval.end + delta - shift
                interval = Interval(interval.begin, end, interval.data)
                overlap.add(interval)
            for interval in tree[i1 - shift:]:
                tree.remove(interval)
                tree.add(Interval(interval.begin + delta - shift, interval.end + delta - shift, interval.data))

            for o in overlap:
                tree.add(o)
            shift -= delta

import unittest
from intervaltree import Interval, IntervalTree

# The compute_difference and update_intervals functions from the previous answer

class TestIntervalTreeUpdate(unittest.TestCase):

    def assert_intervals_equal(self, tree, new_text, expected_substrings):
        self.assertEqual(len(tree), len(expected_substrings))
        for interval, expected_substring in zip(sorted(tree), expected_substrings):
            actual_substring = new_text[interval.begin:interval.end]
            self.assertEqual(actual_substring, expected_substring)

    def test_assert_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        text = "hello my name is george"
        self.assert_intervals_equal(tree, text, ["o my name is g"])

    def test_double_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello name george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o name g"])

    def test_pre_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "o my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is g"])

    def test_post_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is g"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is g"])

    def test_single_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o name is g"])

    def test_pre_into_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hename is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["name is g"])

    def test_post_into_interval_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name e"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name "])

    def test_single_interval_insertion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g"])


    def test_multiple_intervals_no_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2")) #or
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g", "or"])

    def test_multiple_intervals_with_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2")) # name is george
        old_text = "hello my name is george"
        new_text = "hello my name is not really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g", "name is not really george"])

    def test_insertion_within_interval(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello my name is not really george, but I am called george actually"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is not really g"])

    def test_single_char_insertion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hello, my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o, my name is g"])

    def test_single_char_deletion(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight"))
        old_text = "hello my name is george"
        new_text = "hell my name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, [" my name is g"])

    def test_insertion_in_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my full name is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my full name is g", "name is george"])

    def test_insertion_between_intervals(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my name is really george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my name is really g", "or"])

    def test_deletion_in_overlap(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(9, 23, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my ne is george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my ne is g", "ne is george"])

    def test_deletion_between_intervals(self):
        tree = IntervalTree()
        tree.add(Interval(4, 18, "Highlight1"))
        tree.add(Interval(19, 21, "Highlight2"))
        old_text = "hello my name is george"
        new_text = "hello my nameis george"
        update_intervals(tree, old_text, new_text)
        self.assert_intervals_equal(tree, new_text, ["o my nameis g", "or"])

    def test_complex_case_diff_based(self):
        old_text = "abcdefghijklmno"

        # Create the initial tree with 5 highlights of varying sizes
        tree = IntervalTree()
        tree.add(Interval(1, 3, "Highlight1"))
        tree.add(Interval(4, 7, "Highlight2"))
        tree.add(Interval(8, 11, "Highlight3"))
        tree.add(Interval(10, 12, "Highlight4"))
        tree.add(Interval(12, 15, "Highlight5"))

        # Modify the text with multiple insertions and deletions
        # Deletion:  remove "cd"
        # Insertion: add "x" between "e" and "f"
        # Deletion:  remove "ij"
        # Insertion: add "yz" between "m" and "n"
        new_text = "abefxghklyzmno"

        # Update the tree based on the modifications
        update_intervals(tree, old_text, new_text)

        # Define the expected substrings for each updated highlight
        expected_substrings = [
            "b",  # Highlight1
            "efxg",  # Highlight2
            "h",  # Highlight3
            "k",  # Highlight4
            "lyzmno"  # Highlight5
        ]

        self.assert_intervals_equal(tree, new_text, expected_substrings)

    def test_complex_case(self):
        # Generate the test string dynamically
        part1 = "This is an example "
        part2 = "where we will insert, delete, and modify "
        part3 = "text within multiple overlapping intervals. "
        part4 = "The goal is to cover a variety of cases. "
        part5 = "Let's see how well the algorithm handles this."

        old_text = part1 + part2 + part3 + part4 + part5

        # Create the initial tree with 5 highlights of varying sizes
        tree = IntervalTree()
        tree.add(Interval(len(part1), len(part1) + len("insert"), "Highlight1"))
        tree.add(Interval(len(part1 + part2), len(part1 + part2) + len("text"), "Highlight2"))
        tree.add(Interval(len(part1 + part2 + part3) - len("cases. "), len(part1 + part2 + part3), "Highlight3"))
        tree.add(Interval(len(part1 + part2 + part3 + part4) - len("the "), len(part1 + part2 + part3 + part4), "Highlight4"))
        tree.add(Interval(len(part1 + part2 + part3 + part4 + part5) - len("handles this."), len(part1 + part2 + part3 + part4 + part5), "Highlight5"))

        # Modify the text with multiple insertions and deletions
        part2_modified = "where we will insert and delete some "
        part3_modified = "text within multiple overlapping intervals and add more content. "
        part4_modified = "The goal is to cover a variety of cases and situations. "
        new_text = part1 + part2_modified + part3_modified + part4_modified + part5

        # Update the tree based on the modifications
        update_intervals(tree, old_text, new_text)

        # Define the expected substrings for each updated highlight
        expected_substrings = [
            "insert and delete",  # Highlight1
            "text within multiple overlapping",  # Highlight2
            "situations.",  # Highlight3
            "to cover a variety of cases and",  # Highlight4
            "handles this."  # Highlight5
        ]

        self.assert_intervals_equal(tree, new_text, expected_substrings)


suite = unittest.TestLoader().loadTestsFromTestCase(TestIntervalTreeUpdate)
unittest.TextTestRunner(verbosity=2).run(suite)

from airlatex.vim.

dmadisetti avatar dmadisetti commented on June 30, 2024

Works?

from airlatex.vim.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.