monniert / docExtractor

(ICFHR 2020 oral) Code for "docExtractor: An off-the-shelf historical document element extraction" paper
https://www.tmonnier.com/docExtractor
MIT License
85 stars 10 forks source link

[bug] translation.exception.TranslateError: No translation get, you may retry #3

Closed seekingdeep closed 3 years ago

seekingdeep commented 3 years ago

@monniert The error:

(docExtractor) home@home-lnx:~/programs/docExtractor$ python src/syndoc_generator.py -d testing -n 100 --merged_labels
[2020-11-29 00:49:37] Creating train set...
[2020-11-29 00:49:37]   Generating random document with seed 0...
Traceback (most recent call last):
  File "src/syndoc_generator.py", line 62, in <module>
    gen.run(args.nb_train)
  File "src/syndoc_generator.py", line 46, in run
    d = SyntheticDocument(**kwargs)
  File "/home/home/programs/docExtractor/src/utils/__init__.py", line 74, in wrapper
    return f(*args, **kw)
  File "/home/home/programs/docExtractor/src/synthetic/document.py", line 126, in __init__
    self.elements, self.positions = self._generate_random_layout()
  File "/home/home/programs/docExtractor/src/utils/__init__.py", line 74, in wrapper
    return f(*args, **kw)
  File "/home/home/programs/docExtractor/src/synthetic/document.py", line 238, in _generate_random_layout
    element = choice(self.available_elements, p=weights)(width, height, **element_kwargs)
  File "/home/home/programs/docExtractor/src/synthetic/element.py", line 151, in __init__
    self.generate_content(seed=seed)
  File "/home/home/programs/docExtractor/src/utils/__init__.py", line 74, in wrapper
    return f(*args, **kw)
  File "/home/home/programs/docExtractor/src/synthetic/element.py", line 597, in generate_content
    self.text, content_width, content_height = self.format_text(text)
  File "/home/home/programs/docExtractor/src/synthetic/element.py", line 624, in format_text
    text = google(text, src='en', dst='ar')
  File "/home/home/anaconda3/envs/docExtractor/lib/python3.6/site-packages/translation/__init__.py", line 19, in google
    dst = dst, proxies = proxies)
  File "/home/home/anaconda3/envs/docExtractor/lib/python3.6/site-packages/translation/main.py", line 33, in get
    if r == '': raise TranslateError('No translation get, you may retry')
translation.exception.TranslateError: No translation get, you may retry

Listing the folder's tree:

(docExtractor) home@home-lnx:~/programs/docExtractor$ tree -d
.
├── configs
├── demo
├── models
│   └── default
├── raw_data
│   └── illuhisdoc
│       ├── msd
│       ├── msi
│       ├── mss
│       ├── p
│       └── via_json
├── scripts
├── src
│   ├── datasets
│   ├── loss
│   ├── models
│   ├── optimizers
│   ├── schedulers
│   ├── synthetic
│   │   └── __pycache__
│   └── utils
│       └── __pycache__
└── synthetic_resource
    ├── background
    │   ├── 0
    │   ├── 10
    │   ├── 100
    │   ├── 110
    │   ├── 120
    │   ├── 20
    │   ├── 30
    │   ├── 40
    │   ├── 50
    │   ├── 60
    │   ├── 70
    │   ├── 80
    │   └── 90
    ├── context_background
    ├── drawing
    ├── drawing_background
    ├── font
    │   ├── arabic
    │   │   ├── Amiri
    │   │   ├── Arial
    │   │   ├── Cairo
    │   │   ├── dejavu_dejavu-sans
    │   │   ├── El_Messiri
    │   │   ├── gnu-freefont_freeserif
    │   │   └── st-gigafont-typefaces_code2003
    │   ├── chinese
    │   │   ├── Liu_Jian_Mao_Cao
    │   │   ├── Long_Cang
    │   │   ├── Ma_Shan_Zheng
    │   │   ├── Noto_Sans_SC
    │   │   ├── Noto_Serif_SC
    │   │   ├── ZCOOL_KuaiLe
    │   │   ├── ZCOOL_QingKe_HuangYou
    │   │   ├── ZCOOL_XiaoWei
    │   │   └── Zhi_Mang_Xing
    │   ├── foreign_like
    │   │   ├── alhambra
    │   │   ├── barmee_afarat-ibn-blady
    │   │   ├── bizancia
    │   │   ├── catharsis_bedouin
    │   │   ├── catharsis_catharsis-bedouin
    │   │   ├── k22_timbuctu
    │   │   ├── kingthings_conundrum
    │   │   ├── meifen
    │   │   ├── ming_imperial
    │   │   ├── running_smobble
    │   │   ├── samarkan
    │   │   ├── selamet_lebaran
    │   │   ├── uddi-uddi_running-smobble
    │   │   ├── yozakura
    │   │   └── zilap_oriental
    │   ├── handwritten
    │   │   ├── Alako
    │   │   ├── Angelina
    │   │   ├── anke-print
    │   │   ├── atlandsketches-bb
    │   │   ├── bathilda
    │   │   ├── BlackJack_Regular
    │   │   ├── blzee
    │   │   ├── bromello
    │   │   ├── calligravity
    │   │   ├── Carefree
    │   │   ├── conformity
    │   │   ├── Cursive_standard
    │   │   ├── Damion
    │   │   ├── Elegant
    │   │   ├── emizfont
    │   │   ├── hoffmanhand
    │   │   ├── honey_script
    │   │   ├── hurryup
    │   │   ├── irezumi
    │   │   ├── james-tan-dinawanao
    │   │   ├── JaneAusten
    │   │   ├── Jellyka_-_Love_and_Passion
    │   │   ├── jr-hand
    │   │   ├── Juergen
    │   │   ├── khand
    │   │   ├── kosal-says-hy
    │   │   ├── Learning_Curve
    │   │   ├── Learning_Curve_Pro
    │   │   ├── maddison_signature
    │   │   ├── may-queen
    │   │   ├── mistis-fonts_october-twilight
    │   │   ├── mistis-fonts_stylish-calligraphy-demo
    │   │   ├── mistis-fonts_watermelon-script-demo
    │   │   ├── Monika
    │   │   ├── mumsies
    │   │   ├── nymphont_xiomara
    │   │   ├── otto
    │   │   │   └── Otto
    │   │   ├── Pacifico
    │   │   ├── paul_signature
    │   │   ├── pecita
    │   │   ├── popsies
    │   │   ├── quigleywiggly
    │   │   ├── rabiohead
    │   │   ├── roddy
    │   │   ├── Saginaw
    │   │   ├── Saginaw 2
    │   │   ├── santos-dumont
    │   │   ├── scribble
    │   │   ├── scriptina
    │   │   ├── sf-burlington-script
    │   │   │   └── TrueType
    │   │   ├── sf-foxboro-script
    │   │   │   └── TrueType
    │   │   ├── shadows-into-light
    │   │   ├── shartoll-light
    │   │   ├── shelter-me
    │   │   │   └── kimberly-geswein_shelter-me
    │   │   ├── shorelines_script
    │   │   ├── signerica
    │   │   ├── sild
    │   │   ├── silent-fighter
    │   │   │   └── Silent Fighter
    │   │   ├── sillii_willinn
    │   │   ├── silverline-script-demo
    │   │   ├── simple-signature
    │   │   ├── snake
    │   │   │   └── Snake
    │   │   ├── somes-style
    │   │   ├── sophia-bella-demo
    │   │   ├── spitter
    │   │   │   └── Spitter
    │   │   ├── stalemate
    │   │   ├── standard-pilot-demo
    │   │   │   └── standard pilot demo
    │   │   ├── stingray
    │   │   │   └── Stingray
    │   │   ├── stylish-marker
    │   │   ├── Sudestada
    │   │   ├── sunshine-in-my-soul
    │   │   │   └── kimberly-geswein_sunshine-in-my-soul
    │   │   ├── sweet-lady
    │   │   ├── Tabitha
    │   │   ├── the-girl-next-door
    │   │   ├── the-great-escape
    │   │   │   └── kimberly-geswein_the-great-escape
    │   │   ├── the-illusion-of-beauty
    │   │   ├── theodista-decally
    │   │   ├── the-only-exception
    │   │   │   └── kimberly-geswein_the-only-exception
    │   │   ├── the-queenthine
    │   │   │   └── The Queenthine demo
    │   │   ├── the_wave
    │   │   ├── think-dreams
    │   │   ├── toubibdemo
    │   │   ├── turkeyface
    │   │   ├── typhoon-type-suthi-srisopha_sweet-hipster
    │   │   ├── undercut
    │   │   ├── variane-script
    │   │   ├── velocity-demo
    │   │   ├── vengeance
    │   │   ├── victorisa
    │   │   ├── waiting-for-the-sunrise
    │   │   ├── watasyina
    │   │   ├── westbury-signature-demo-version
    │   │   │   └── Westbury-Signature-Demo-Version
    │   │   ├── white_angelica
    │   │   ├── wiegel-kurrent
    │   │   ├── wiegel-latein
    │   │   ├── Windsong
    │   │   ├── winkdeep
    │   │   ├── wolgast-two
    │   │   ├── wonder_bay
    │   │   ├── written-on-his-hands
    │   │   │   └── kimberly-geswein_written-on-his-hands
    │   │   ├── you-wont-bring-me-down
    │   │   └── zeyada
    │   └── normal
    │       ├── alexey-kryukov_theano
    │       ├── daniel-johnson_didact-gothic
    │       ├── david-perry_cardo
    │       ├── dejavu_dejavu-sans
    │       ├── dejavu_dejavu-serif
    │       ├── ek-type_ek-mukta
    │       ├── georg-duffner_eb-garamond
    │       ├── gnu-freefont_freemono
    │       ├── gnu-freefont_freesans
    │       ├── gnu-freefont_freeserif
    │       ├── google_noto-sans
    │       ├── google_noto-serif
    │       ├── google_roboto
    │       ├── gust-e-foundry_texgyreschola
    │       ├── gust-e-foundry_texgyretermes
    │       ├── james-kass_code2000
    │       ├── kineticplasma-fonts_din-kursivschrift
    │       ├── kineticplasma-fonts_falling-sky
    │       ├── kineticplasma-fonts_mechanical
    │       ├── kineticplasma-fonts_trueno
    │       ├── linux-libertine_linux-libertine
    │       ├── m-fonts_m-2p
    │       ├── nymphont_aver
    │       ├── red-hat-inc_liberation-sans
    │       ├── sil-international_charis-sil
    │       ├── sil-international_doulos-sil
    │       ├── sil-international_doulos-sil-compact
    │       ├── sil-international_gentium-book-basic
    │       ├── sil-international_gentium-plus
    │       └── st-gigafont-typefaces_code2003
    ├── glyph_font
    │   ├── ababil-script-demo
    │   │   └── MJ Ababil Demo
    │   ├── aldus_regal
    │   ├── aldus_romant
    │   ├── aldus_royal
    │   ├── anglo-text
    │   ├── art-designs-by-sue_fairies-gone-wild
    │   ├── art-designs-by-sue_fairies-gone-wild-plus
    │   ├── camelotcaps
    │   ├── cameoappearance
    │   ├── character_cherubic-initials
    │   ├── character_masselleam
    │   ├── character_romantique-initials
    │   ├── cheap-stealer
    │   │   └── cheap stealer
    │   ├── cheshire-initials
    │   ├── chung-deh-tien-chase-zen_chase-zen-jingletruck-karachi
    │   ├── cloutierfontes_british-museum-1490
    │   ├── colchester
    │   ├── dan-roseman_chaucher
    │   ├── decorated-roman-initials
    │   ├── digital-type-foundry_burton
    │   ├── dominatrix
    │   ├── ds-romantiques
    │   ├── egyptienne-zierinitialien
    │   ├── ehmcke-fraktur-initialen
    │   ├── ehmcke-schwabacher-initialen
    │   ├── elzevier-caps
    │   ├── eva-barabasne-olasz_kahirpersonaluse
    │   ├── extraornamentalno2
    │   ├── fleurcornercaps
    │   ├── flowers-initials
    │   ├── gate-and-lock-co_metalover
    │   ├── gemfonts_gothic-illuminate
    │   ├── genzsch-initials
    │   ├── george-williams_andrade
    │   ├── george-williams_floral-caps-nouveau
    │   ├── george-williams_morris
    │   ├── george-williams_square-caps
    │   ├── germanika-personal-use
    │   │   └── Germanika Personal Use
    │   ├── griffintwo
    │   ├── house-of-lime_fleurcornercaps
    │   ├── house-of-lime_german-caps
    │   ├── house-of-lime_gothic-flourish
    │   ├── house-of-lime_lime-blossom-caps
    │   ├── house-of-lime_limeglorycaps
    │   ├── intellecta-design_centennialscriptfancy-three
    │   ├── intellecta-design_hard-to-read-monograms
    │   ├── intellecta-design_holbeinchildrens
    │   ├── intellecta-design_intellecta-monograms-random-eight
    │   ├── intellecta-design_intellecta-monograms-random-sam
    │   ├── intellecta-design_intellecta-monograms-random-six
    │   ├── intellecta-design_intellecta-monograms-random-two
    │   ├── intellecta-design_jaggard-two
    │   ├── intellecta-design_nardis
    │   ├── jlh-fonts_apex-lake
    │   ├── kaiserzeitgotisch
    │   ├── kanzler
    │   ├── kr-keltic-one
    │   ├── lime-blossom-caps
    │   ├── lord-kyl-mackay_floral-majuscules-11th-c
    │   ├── lord-kyl-mackay_gothic-leaf
    │   ├── lorvad_spatz
    │   ├── manfred-klein_delitschinitialen
    │   ├── manfred-klein_lombardi-caps
    │   ├── manfred-klein_vespasiancaps
    │   ├── manfred-klein_vespasiansflorials
    │   ├── medici-text
    │   ├── medievalalphabet
    │   ├── morris-initialen
    │   ├── napoli-initialen
    │   ├── neugotische-initialen
    │   ├── nouveau-drop-caps
    │   ├── paisleycaps
    │   ├── pamela
    │   ├── panhead
    │   ├── paulus-franck-initialen
    │   ├── pau-the-1st
    │   ├── precious
    │   ├── rediviva
    │   ├── rothenburg-decorative
    │   ├── royal-initialen
    │   ├── rudelsberg
    │   ├── sentinel
    │   ├── sniper
    │   ├── spring
    │   ├── the-black-box_seven-waves-sighs-salome
    │   ├── tulips
    │   ├── typographerwoodcutinitialsone
    │   ├── unger-fraktur-zierbuchstaben
    │   ├── victorian-initials-one
    │   ├── vtks-deja-vu
    │   ├── vtks-focus
    │   ├── vtks-mercearia
    │   ├── vtks-simplex-beauty-2
    │   ├── vtks-sonho
    │   ├── vtks-velhos-tempos
    │   ├── waste-of-paint
    │   ├── west-wind-fonts_exotica
    │   ├── west-wind-fonts_leafy
    │   ├── zallman-caps
    │   └── zamolxis_zamolxisornament
    ├── noise_pattern
    │   ├── border_hole
    │   ├── center_hole
    │   ├── corner_hole
    │   └── phantom_character
    ├── text
    └── wikiart
        ├── Abstract_Expressionism
        ├── Action_painting
        ├── Analytical_Cubism
        ├── Art_Nouveau_Modern
        ├── Baroque
        ├── Color_Field_Painting
        ├── Contemporary_Realism
        ├── Cubism
        ├── Early_Renaissance
        ├── Expressionism
        ├── Fauvism
        ├── High_Renaissance
        ├── Impressionism
        ├── Mannerism_Late_Renaissance
        ├── Minimalism
        ├── Naive_Art_Primitivism
        ├── New_Realism
        ├── Northern_Renaissance
        ├── Pointillism
        ├── Pop_Art
        ├── Post_Impressionism
        ├── Realism
        ├── Rococo
        ├── Romanticism
        ├── Symbolism
        ├── Synthetic_Cubism
        └── Ukiyo_e

362 directories
monniert commented 3 years ago

Hi @seekingdeep thanks for pointing that out, translation package doesn't seem to work these days and tried other alternatives (googletrans, google_trans_new) but they are not stable yet. I removed the translations for now, so it should work fine, don't hesitate to reopen if not

googletrans seems the more reliable but is currently facing a big issue, I will retry adding translations in a couple of weeks

seekingdeep commented 3 years ago

but why did you even included translation in the first place?

monniert commented 3 years ago

including translations in languages using different alphabets from the latin one (here arabic and chinese) enables a better generalization power on these particular alphabets

seekingdeep commented 3 years ago

oh, i understood