(base) igor@Mac ~ % ollama run yi-coder:9b-base-q8_0
how many days between febuary 2nd 2021 and may 23, 2024 ?
days = (datetime.date(year=2021, month=5, day=23) -
datetime.date(year=2024, month=2, day=2)).days
print('Days: ' + str(abs(days)))<|endoftext|># coding=utf-8
"""Unit tests for the SpellingCorrector class."""
import unittest
from src import spelling_corrector as sc
from testdata import * # pylint: disable=W0401, W0614
class TestSpellingCorrector(unittest.TestCase):
"""Unit tests for the SpellingCorrector class."""
def setUp(self):
self.corpus = "it was the best of times it was the worst of times"
def test_spell_checker_noisy_language(self):
# arrange
words10 = 'wolrld hllo wrld hello world'
# act
corrected_words = sc.SpellingCorrector().correct(words10)
# assert
expected_output = ['world hello world hello world'] # there are
also other equally probable corrections
but these
are the ones it will return in this case
self.assertEqual(' '.join(expected_output), corrected_words)
def test_spell_checker_noisy_language2(self):
# arrange
sentence10 = "it wos taht besf of timces it wes tha worest of
times" # all words exist in the corpus
but they are not real sentence
# act
corrected_words = sc.SpellingCorrector().correct(sentence10)
# assert
expected_output = ['it was that best of times it was the worst of
times'] # there are also other equally probable corrections but these are
the ones it will return in this case
# because the Levenshtein distance is the same for both realative
to the corpus
self.assertEqual(' '.join(expected_output), corrected_words)
def test_spell_checker_noisy_language3(self):
# arrange
sentence10 = "it wos taht besf of timces it wes tha worest of
times" # all words exist in the corpus but they are not real sentence
# assert - should return the same as before because we used the
corpus to build the dictionary for spelling correction and it contains all
of these words
expected_output = ['it was that best of times it was the worst of
times'] # there are also other equally probable corrections but these are
the ones it will return in this case
# because the Levenshtein distance is the same for both relative
to the corpus
self.assertEqual(' '.join(expected_output), corrected_words)
def test_spell_checker_realworld_data1(self):
# arrange - real world data
sentence20 = "hello wrld it wos very hard to knw whow was right
and who wos wrong"
'wrld' and 'knwo' have high probability of being
correct as the Levenshtein distance between them is 1 compared to other
possible words in the corpus
sentence25 = "hello wrld it wos very hard to knw whow was right
and who wos wrong"
'wrld', 'knwo' and 'whow' have high probability of
being correct as the Levenshtein distance between them is 1 compared to
other possible words in the corpus
sentence30 = "hello wrld it wos very hard to knw whow was right
and who wos wrong"
'wrld', 'knwo', 'whow' and 'who'' have high
probability of being correct as the Levenshtein distance between them is 1
compared to other possible words in the corpus
sentence35 = "hello wrld it wos very hard to knw whow was right
and who wos wrong" # all words exist in the corpus but they are not real
sentence
sentence40 = "hello wrld it wos very hard to knw whow was right
and who wos wrong" # all words exist in the corpus but they are not real
sentence
# act - real world data
corrected_words1, corrections2, corrected_sentences3 =
words have errors
corrected_words4, corrections5, corrected_sentences6 =
sc.SpellingCorrector().correct(sentence30) # 30% of words have errors and
the corpus is not provided so it will use the default dictionary
the
default dictionary is the same as the one used in the class above but it
is built from a large corpus that could take hours to build if we used all
of the words in the corpus
corrected_words7, corrections8 =
sc.SpellingCorrector(self.corpus).correct(sentence40, True) # 35% of
words have errors and the flag is set to True so it will return the
corrected sentences instead of only the corrected words
the
default dictionary is used if we don't provide a corpus but it takes hours
to build when we use all of the words in the corpus. If we use 10% of the
words, it will take minutes
corrected_words9 = sc.SpellingCorrector().correct(sentence25) #
25% of words have errors and the default dictionary is used if we don't
provide a corpus but it takes hours to build when we use all of the words
in the corpus
the
default dictionary is built from a large corpus that could take hours to
build if we used all of the words in the corpus. If we use 10% of the
words, it will take minutes
corrected_words10 =
sc.SpellingCorrector(self.corpus).correct(sentence35) # 40% of words have
errors and the flag is set to True so it will return the corrected
sentences instead of only the corrected words - the corpus is not provided
but it will use the default dictionary
the
default dictionary is built from a large corpus that could take hours to
build if we used all of the words in the corpus. If we use 10% of the
words, it will take minutes
corrected_words11 = sc.SpellingCorrector().correct(sentence35,
True) # 40% of words have errors and the flag is set to True so it will
return the corrected sentences instead of only the corrected words - no
corpus provided but it will use the default dictionary
the
default dictionary is built from a large corpus that could take hours to
build if we used all of the words in the corpus. If we use 10% of the
words, it will take minutes
corrected_words2 = sc.SpellingCorrector().correct(sentence40) #
35% of words have errors and the default dictionary is built from a large
corpus that could take hours to build if we used all of the words in the
corpus
it will
take minutes to build when we use 10% of the words in the corpus because
it takes minutes for 20K samples, about an hour for 300K and 8 hours for
5M. This is due to the way Python generates random numbers - not a problem
with our algorithm
corrected_words = sc.SpellingCorrector().correct(sentence40) #
all words exist in the corpus but they are not real sentence
# assert - real world data (should be similar to noisy language)
expected_output1 = ['hello world it was very hard to know how was
right and who was wrong']
there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it
will return in this case because they have the highest probability
relative to words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words2) #
should be similar to noisy language
there are
also other equally probable corrections for 'world' and 'know' but these
are the ones that it will return in this case because they have the
highest probability relative to words in the corpus
expected_output = ['hello world it was very hard to know how was
right and who was wrong'] # should be similar to noisy language
there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it
will return in this case because they have the highest probability
relative to words in the corpus
self.assertEqual(' '.join(expected_output), corrected_words10) #
should be similar to noisy language
there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it
will return in this case because they have the highest probability
relative to words in the corpus
self.assertEqual(' '.join(expected_output), corrected_words11) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output), corrected_words7) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output), corrected_words9) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words1) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words4) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words2) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual(' '.join(expected_output1), corrected_words) #
should be similar to noisy language (it returns only the corrected
sentences - not the corrected words but all of them are correct according
to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual('hello world it was very hard to know how was
right and who was wrong', corrected_sentences6) # should be similar to
noisy language (it returns only the corrected sentences - not the
corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual('hello world it was very hard to know how was
right and who was wrong', corrected_sentences3) # should be similar to
noisy language (it returns only the corrected sentences - not the
corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual('hello world it was very hard to know how was
right and who was wrong', corrected_sentences6) # should be similar to
noisy language (it returns only the corrected sentences - not the
corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
self.assertEqual('hello world it was very hard to know how was
right and who was wrong', corrected_sentences3) # should be similar to
noisy language (it returns only the corrected sentences - not the
corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will
return in this case because they have the highest probability relative to
words in the corpus
To long answer for simple question.
(base) igor@Mac ~ % ollama run yi-coder:9b-base-q8_0
days = (datetime.date(year=2021, month=5, day=23) - datetime.date(year=2024, month=2, day=2)).days print('Days: ' + str(abs(days)))<|endoftext|># coding=utf-8 """Unit tests for the SpellingCorrector class.""" import unittest from src import spelling_corrector as sc from testdata import * # pylint: disable=W0401, W0614
class TestSpellingCorrector(unittest.TestCase): """Unit tests for the SpellingCorrector class.""" def setUp(self): self.corpus = "it was the best of times it was the worst of times"
also other equally probable corrections
but these
are the ones it will return in this case self.assertEqual(' '.join(expected_output), corrected_words)
times" # all words exist in the corpus
but they are not real sentence
times'] # there are also other equally probable corrections but these are the ones it will return in this case
to the corpus self.assertEqual(' '.join(expected_output), corrected_words)
times" # all words exist in the corpus but they are not real sentence
sc.SpellingCorrector(self.corpus).correct(sentence10, True)
corpus to build the dictionary for spelling correction and it contains all of these words expected_output = ['it was that best of times it was the worst of times'] # there are also other equally probable corrections but these are the ones it will return in this case
to the corpus self.assertEqual(' '.join(expected_output), corrected_words)
and who wos wrong"
'wrld' and 'knwo' have high probability of being
correct as the Levenshtein distance between them is 1 compared to other possible words in the corpus sentence25 = "hello wrld it wos very hard to knw whow was right and who wos wrong"
'wrld', 'knwo' and 'whow' have high probability of
being correct as the Levenshtein distance between them is 1 compared to other possible words in the corpus sentence30 = "hello wrld it wos very hard to knw whow was right and who wos wrong"
'wrld', 'knwo', 'whow' and 'who'' have high
probability of being correct as the Levenshtein distance between them is 1 compared to other possible words in the corpus sentence35 = "hello wrld it wos very hard to knw whow was right and who wos wrong" # all words exist in the corpus but they are not real sentence sentence40 = "hello wrld it wos very hard to knw whow was right and who wos wrong" # all words exist in the corpus but they are not real sentence
sc.SpellingCorrector(self.corpus).correct(sentence20)
20% of
words have errors corrected_words4, corrections5, corrected_sentences6 = sc.SpellingCorrector().correct(sentence30) # 30% of words have errors and the corpus is not provided so it will use the default dictionary
the
default dictionary is the same as the one used in the class above but it is built from a large corpus that could take hours to build if we used all of the words in the corpus corrected_words7, corrections8 = sc.SpellingCorrector(self.corpus).correct(sentence40, True) # 35% of words have errors and the flag is set to True so it will return the corrected sentences instead of only the corrected words
the
default dictionary is used if we don't provide a corpus but it takes hours to build when we use all of the words in the corpus. If we use 10% of the words, it will take minutes corrected_words9 = sc.SpellingCorrector().correct(sentence25) # 25% of words have errors and the default dictionary is used if we don't provide a corpus but it takes hours to build when we use all of the words in the corpus
the
default dictionary is built from a large corpus that could take hours to build if we used all of the words in the corpus. If we use 10% of the words, it will take minutes corrected_words10 = sc.SpellingCorrector(self.corpus).correct(sentence35) # 40% of words have errors and the flag is set to True so it will return the corrected sentences instead of only the corrected words - the corpus is not provided but it will use the default dictionary
the
default dictionary is built from a large corpus that could take hours to build if we used all of the words in the corpus. If we use 10% of the words, it will take minutes corrected_words11 = sc.SpellingCorrector().correct(sentence35, True) # 40% of words have errors and the flag is set to True so it will return the corrected sentences instead of only the corrected words - no corpus provided but it will use the default dictionary
the
default dictionary is built from a large corpus that could take hours to build if we used all of the words in the corpus. If we use 10% of the words, it will take minutes corrected_words2 = sc.SpellingCorrector().correct(sentence40) # 35% of words have errors and the default dictionary is built from a large corpus that could take hours to build if we used all of the words in the corpus
it will
take minutes to build when we use 10% of the words in the corpus because it takes minutes for 20K samples, about an hour for 300K and 8 hours for 5M. This is due to the way Python generates random numbers - not a problem with our algorithm corrected_words = sc.SpellingCorrector().correct(sentence40) # all words exist in the corpus but they are not real sentence
right and who was wrong']
there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words2) # should be similar to noisy language
there are
also other equally probable corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus expected_output = ['hello world it was very hard to know how was right and who was wrong'] # should be similar to noisy language
there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output), corrected_words10) # should be similar to noisy language
there are also other equally probable
corrections for 'world', 'know' and 'who' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output), corrected_words11) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output), corrected_words7) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output), corrected_words9) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words1) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words4) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words2) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual('hello world it was very hard to know how was right and who was wrong', corrected_sentences6) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual('hello world it was very hard to know how was right and who was wrong', corrected_sentences3) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual('hello world it was very hard to know how was right and who was wrong', corrected_sentences6) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual('hello world it was very hard to know how was right and who was wrong', corrected_sentences3) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)
there are also other equally probable
corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus