01-ai / Yi-Coder

🌟 Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters.
358 stars 26 forks source link

To long answer will using Ollama to run yi-coder:9b-base-q8_0 #11

Open igorschlum opened 2 weeks ago

igorschlum commented 2 weeks ago

To long answer for simple question.

(base) igor@Mac ~ % ollama run yi-coder:9b-base-q8_0

how many days between febuary 2nd 2021 and may 23, 2024 ?

days = (datetime.date(year=2021, month=5, day=23) - datetime.date(year=2024, month=2, day=2)).days print('Days: ' + str(abs(days)))<|endoftext|># coding=utf-8 """Unit tests for the SpellingCorrector class.""" import unittest from src import spelling_corrector as sc from testdata import * # pylint: disable=W0401, W0614

class TestSpellingCorrector(unittest.TestCase): """Unit tests for the SpellingCorrector class.""" def setUp(self): self.corpus = "it was the best of times it was the worst of times"

def test_spell_checker_noisy_language(self):
    # arrange
    words10 = 'wolrld hllo wrld hello world'

    # act
    corrected_words = sc.SpellingCorrector().correct(words10)

    # assert
    expected_output = ['world hello world hello world']  # there are 

also other equally probable corrections

but these

are the ones it will return in this case self.assertEqual(' '.join(expected_output), corrected_words)

def test_spell_checker_noisy_language2(self):
    # arrange
    sentence10 = "it wos taht besf of timces it wes tha worest of 

times" # all words exist in the corpus

but they are not real sentence

    # act
    corrected_words = sc.SpellingCorrector().correct(sentence10)

    # assert
    expected_output = ['it was that best of times it was the worst of 

times'] # there are also other equally probable corrections but these are the ones it will return in this case

    # because the Levenshtein distance is the same for both realative 

to the corpus self.assertEqual(' '.join(expected_output), corrected_words)

def test_spell_checker_noisy_language3(self):
    # arrange
    sentence10 = "it wos taht besf of timces it wes tha worest of 

times" # all words exist in the corpus but they are not real sentence

    # act
    corrected_words = 

sc.SpellingCorrector(self.corpus).correct(sentence10, True)

    # assert - should return the same as before because we used the 

corpus to build the dictionary for spelling correction and it contains all of these words expected_output = ['it was that best of times it was the worst of times'] # there are also other equally probable corrections but these are the ones it will return in this case

    # because the Levenshtein distance is the same for both relative 

to the corpus self.assertEqual(' '.join(expected_output), corrected_words)

def test_spell_checker_realworld_data1(self):
    # arrange - real world data
    sentence20 = "hello wrld it wos very hard to knw whow was right 

and who wos wrong"

'wrld' and 'knwo' have high probability of being

correct as the Levenshtein distance between them is 1 compared to other possible words in the corpus sentence25 = "hello wrld it wos very hard to knw whow was right and who wos wrong"

'wrld', 'knwo' and 'whow' have high probability of

being correct as the Levenshtein distance between them is 1 compared to other possible words in the corpus sentence30 = "hello wrld it wos very hard to knw whow was right and who wos wrong"

'wrld', 'knwo', 'whow' and 'who'' have high

probability of being correct as the Levenshtein distance between them is 1 compared to other possible words in the corpus sentence35 = "hello wrld it wos very hard to knw whow was right and who wos wrong" # all words exist in the corpus but they are not real sentence sentence40 = "hello wrld it wos very hard to knw whow was right and who wos wrong" # all words exist in the corpus but they are not real sentence

    # act - real world data
    corrected_words1, corrections2, corrected_sentences3 = 

sc.SpellingCorrector(self.corpus).correct(sentence20)

20% of

words have errors corrected_words4, corrections5, corrected_sentences6 = sc.SpellingCorrector().correct(sentence30) # 30% of words have errors and the corpus is not provided so it will use the default dictionary

the

default dictionary is the same as the one used in the class above but it is built from a large corpus that could take hours to build if we used all of the words in the corpus corrected_words7, corrections8 = sc.SpellingCorrector(self.corpus).correct(sentence40, True) # 35% of words have errors and the flag is set to True so it will return the corrected sentences instead of only the corrected words

the

default dictionary is used if we don't provide a corpus but it takes hours to build when we use all of the words in the corpus. If we use 10% of the words, it will take minutes corrected_words9 = sc.SpellingCorrector().correct(sentence25) # 25% of words have errors and the default dictionary is used if we don't provide a corpus but it takes hours to build when we use all of the words in the corpus

the

default dictionary is built from a large corpus that could take hours to build if we used all of the words in the corpus. If we use 10% of the words, it will take minutes corrected_words10 = sc.SpellingCorrector(self.corpus).correct(sentence35) # 40% of words have errors and the flag is set to True so it will return the corrected sentences instead of only the corrected words - the corpus is not provided but it will use the default dictionary

the

default dictionary is built from a large corpus that could take hours to build if we used all of the words in the corpus. If we use 10% of the words, it will take minutes corrected_words11 = sc.SpellingCorrector().correct(sentence35, True) # 40% of words have errors and the flag is set to True so it will return the corrected sentences instead of only the corrected words - no corpus provided but it will use the default dictionary

the

default dictionary is built from a large corpus that could take hours to build if we used all of the words in the corpus. If we use 10% of the words, it will take minutes corrected_words2 = sc.SpellingCorrector().correct(sentence40) # 35% of words have errors and the default dictionary is built from a large corpus that could take hours to build if we used all of the words in the corpus

it will

take minutes to build when we use 10% of the words in the corpus because it takes minutes for 20K samples, about an hour for 300K and 8 hours for 5M. This is due to the way Python generates random numbers - not a problem with our algorithm corrected_words = sc.SpellingCorrector().correct(sentence40) # all words exist in the corpus but they are not real sentence

    # assert - real world data (should be similar to noisy language)
    expected_output1 = ['hello world it was very hard to know how was 

right and who was wrong']

there are also other equally probable

corrections for 'world', 'know' and 'who' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words2) # should be similar to noisy language

there are

also other equally probable corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus expected_output = ['hello world it was very hard to know how was right and who was wrong'] # should be similar to noisy language

there are also other equally probable

corrections for 'world', 'know' and 'who' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output), corrected_words10) # should be similar to noisy language

there are also other equally probable

corrections for 'world', 'know' and 'who' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output), corrected_words11) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output), corrected_words7) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output), corrected_words9) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words1) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words4) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words2) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual(' '.join(expected_output1), corrected_words) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual('hello world it was very hard to know how was right and who was wrong', corrected_sentences6) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual('hello world it was very hard to know how was right and who was wrong', corrected_sentences3) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual('hello world it was very hard to know how was right and who was wrong', corrected_sentences6) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus self.assertEqual('hello world it was very hard to know how was right and who was wrong', corrected_sentences3) # should be similar to noisy language (it returns only the corrected sentences - not the corrected words but all of them are correct according to the corpus)

there are also other equally probable

corrections for 'world' and 'know' but these are the ones that it will return in this case because they have the highest probability relative to words in the corpus