Closed rachhitgarg closed 6 years ago
I am new to github and python trying to make wiki corpus for wiki dumps of articles..... getting error
please help me as soon as possible
output.write(b' '.join(text) + '\n') TypeError: sequence item 0: expected a bytes-like object, str found
from future import print_function
import logging import os.path import six import sys
from gensim.corpora import WikiCorpus
if name == 'main': program = os.path.basename(sys.argv[0]) logger = logging.getLogger(program)
logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s') logging.root.setLevel(level=logging.INFO) logger.info("running %s" % ' '.join(sys.argv)) # check and process input arguments if len(sys.argv) != 3: print("Using: python process_wiki.py wiki.xml.bz2 wiki.en.text") sys.exit(1) inp, outp = sys.argv[1:3] space = " " i = 0 output = open(outp, 'w') wiki = WikiCorpus(inp, lemmatize=False, dictionary={}) for text in wiki.get_texts(): if six.PY3: output.write(b' '.join(text) + '\n') # ###another method### #output.write( # space.join(map(lambda x:x.decode("utf-8"), text)) + '\n') else: output.write(space.join(text) + "\n") i = i + 1 if (i % 10000 == 0): logger.info("Saved " + str(i) + " articles") output.close() logger.info("Finished Saved " + str(i) + " articles")
Yeah, I don't think this is very related to Pokemon Go.
I can recommend stackoverflow.com for your question.
I am new to github and python trying to make wiki corpus for wiki dumps of articles..... getting error
please help me as soon as possible
output.write(b' '.join(text) + '\n') TypeError: sequence item 0: expected a bytes-like object, str found
from future import print_function
import logging import os.path import six import sys
from gensim.corpora import WikiCorpus
if name == 'main': program = os.path.basename(sys.argv[0]) logger = logging.getLogger(program)