sloria / TextBlob

Simple, Pythonic, text processing--Sentiment analysis, part-of-speech tagging, noun phrase extraction, translation, and more.
https://textblob.readthedocs.io/
MIT License
9.05k stars 1.13k forks source link

AttributeError: 'list' object has no attribute 'strip' #371

Open sneex-seo opened 3 years ago

sneex-seo commented 3 years ago

from textblob import TextBlob

words = str("Кардиганы из норка") blob = TextBlob(words) perevod = blob.translate(from_lang ="ru", to='uk') print(perevod)

New error I catch today. It said that: AttributeError: 'list' object has no attribute 'strip' image

dhavinc commented 3 years ago

This is happened to me also. This should be fixed ASAP

dacosta-github commented 3 years ago

This is happened to me also. This should be fixed ASAP, please.

I have this problem in production.

/venv/lib/python3.9/site-packages/textblob/translate.py", line 84, in _validate_translation if result.strip() == source.strip(): AttributeError: 'list' object has no attribute 'strip'

I have tried "30 per a line" and I can't get around this error.

Only if I use another library, which I don't want to, because I trust TextBlob too much.

Can you help, please?

igalma commented 3 years ago

In the mean time I took the relevant functions and did some modifications to make it work. You guys can just copy this code and it will work (just fix some minor issues that created while copy pasting my code):

from textblob.exceptions import NotTranslated import ctypes import json from textblob.compat import request, urlencode

headers = { 'Accept': '/', 'Connection': 'keep-alive', 'User-Agent': ( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) ' 'AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19') }

translate_url = "http://translate.google.com/translate_a/t?client=webapp&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&dt=at&ie=UTF-8&oe=UTF-8&otf=2&ssel=0&tsel=0&kc=1"

english_sentence = translate(source="some text in other language")

def translate(source, from_lang='auto', tolang='en', host=None, type=None): """Translate the source text from one language to another.""" data = {"q": source} url = '{url}&sl={from_lang}&tl={to_lang}&hl={to_lang}&tk={tk}'.format( url=translate_url, from_lang=from_lang, to_lang=to_lang, tk=calculate_tk(source), ) response = request(url, host=host, type=type_, data=data) result = json.loads(response) if isinstance(result, list): try: result = result[0][0][0][0][0] # ignore detected language except IndexError: pass _validate_translation(source, result) return result

def request(url, host=None, type=None, data=None):

encoded_data = urlencode(data).encode('utf-8')
req = request.Request(url=url, headers=headers, data=encoded_data)
if host or type_:
    req.set_proxy(host=host, type=type_)
resp = request.urlopen(req)
content = resp.read()
return content.decode('utf-8')

def calculate_tk(source): """Reverse engineered cross-site request protection."""

Source: https://github.com/soimort/translate-shell/issues/94#issuecomment-165433715

# Source: http://www.liuxiatool.com/t.php

tkk = [406398, 561666268 + 1526272306]
b = tkk[0]

d = source.encode('utf-8')

def RL(a, b):
    for c in range(0, len(b) - 2, 3):
        d = b[c + 2]
        d = ord(d) - 87 if d >= 'a' else int(d)
        xa = ctypes.c_uint32(a).value
        d = xa >> d if b[c + 1] == '+' else xa << d
        a = a + d & 4294967295 if b[c] == '+' else a ^ d
    return ctypes.c_int32(a).value

a = b

for di in d:
    a = RL(a + di, "+-a^+6")

a = RL(a, "+-3^+b+-f")
a ^= tkk[1]
a = a if a >= 0 else ((a & 2147483647) + 2147483648)
a %= pow(10, 6)

tk = '{0:d}.{1:d}'.format(a, a ^ b)
return tk

def _validate_translation(source, result): """Validate API returned expected schema, and that the translated text is different than the original string. """ if not result: raise NotTranslated('Translation API returned and empty response.') if result.strip() == source.strip(): raise NotTranslated('Translation API returned the input string unchanged.')

skoenig commented 3 years ago

@igalma your suggested code would be much more readable if you fence it in a markdown code block, this eliminates the reformatting issues.

debjyoti003 commented 3 years ago

what is None and 'it', are they the part of translation or what?

Simulacrotron commented 2 years ago

Over a year later and this is still an issue. Can we get an update?

ShashidharDevraj commented 10 months ago

In the mean time I took the relevant functions and did some modifications to make it work. You guys can just copy this code and it will work (just fix some minor issues that created while copy pasting my code):

from textblob.exceptions import NotTranslated import ctypes import json from textblob.compat import request, urlencode

headers = { 'Accept': '/', 'Connection': 'keep-alive', 'User-Agent': ( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) ' 'AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.168 Safari/535.19') }

translate_url = "http://translate.google.com/translate_a/t?client=webapp&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&dt=at&ie=UTF-8&oe=UTF-8&otf=2&ssel=0&tsel=0&kc=1"

english_sentence = translate(source="some text in other language")

def translate(source, from_lang='auto', tolang='en', host=None, type=None): """Translate the source text from one language to another.""" data = {"q": source} url = '{url}&sl={from_lang}&tl={to_lang}&hl={to_lang}&tk={tk}'.format( url=translate_url, from_lang=from_lang, to_lang=to_lang, tk=calculate_tk(source), ) response = request(url, host=host, type=type_, data=data) result = json.loads(response) if isinstance(result, list): try: result = result[0][0][0][0][0] # ignore detected language except IndexError: pass _validate_translation(source, result) return result

def request(url, host=None, type=None, data=None):

encoded_data = urlencode(data).encode('utf-8')
req = request.Request(url=url, headers=headers, data=encoded_data)
if host or type_:
    req.set_proxy(host=host, type=type_)
resp = request.urlopen(req)
content = resp.read()
return content.decode('utf-8')

def calculate_tk(source): """Reverse engineered cross-site request protection.""" # Source: soimort/translate-shell#94 (comment) # Source: http://www.liuxiatool.com/t.php

tkk = [406398, 561666268 + 1526272306]
b = tkk[0]

d = source.encode('utf-8')

def RL(a, b):
    for c in range(0, len(b) - 2, 3):
        d = b[c + 2]
        d = ord(d) - 87 if d >= 'a' else int(d)
        xa = ctypes.c_uint32(a).value
        d = xa >> d if b[c + 1] == '+' else xa << d
        a = a + d & 4294967295 if b[c] == '+' else a ^ d
    return ctypes.c_int32(a).value

a = b

for di in d:
    a = RL(a + di, "+-a^+6")

a = RL(a, "+-3^+b+-f")
a ^= tkk[1]
a = a if a >= 0 else ((a & 2147483647) + 2147483648)
a %= pow(10, 6)

tk = '{0:d}.{1:d}'.format(a, a ^ b)
return tk

def _validate_translation(source, result): """Validate API returned expected schema, and that the translated text is different than the original string. """ if not result: raise NotTranslated('Translation API returned and empty response.') if result.strip() == source.strip(): raise NotTranslated('Translation API returned the input string unchanged.')

Thanks for the working code. I could translate .png files as of now. It fails to support .jpg and other formats.