Official code and data repository for our EMNLP 2020 long paper "Reformulating Unsupervised Style Transfer as Paraphrase Generation" (https://arxiv.org/abs/2010.05700).
Issue #2 (datasets/bpe2text.py)
Running the file I got the following error:
Downloading: "https://github.com/pytorch/fairseq/archive/master.zip" to
/home/anubhavjangra/.cache/torch/hub/master.zip
Traceback (most recent call last):
File "bpe2text.py", line 11, in <module>
roberta = torch.hub.load('pytorch/fairseq', 'roberta.base')
File
"/home/anubhavjangra/style-transfer-paraphrase/style-venv/lib/python3.7/site-packages/torch/
hub.py", line 345, in load
repo_dir = _get_cache_or_reload(github, force_reload, verbose)
File
"/home/anubhavjangra/style-transfer-paraphrase/style-venv/lib/python3.7/site-packages/torch/
hub.py", line 144, in _get_cache_or_reload
download_url_to_file(url, cached_file, progress=False)
File
"/home/anubhavjangra/style-transfer-paraphrase/style-venv/lib/python3.7/site-packages/torch/
hub.py", line 379, in download_url_to_file
u = urlopen(req)
File "/opt/conda/lib/python3.7/urllib/request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
response = meth(req, response)
File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "/opt/conda/lib/python3.7/urllib/request.py", line 563, in error
result = self._call_chain(*args)
File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/opt/conda/lib/python3.7/urllib/request.py", line 755, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/opt/conda/lib/python3.7/urllib/request.py", line 531, in open
response = meth(req, response)
File "/opt/conda/lib/python3.7/urllib/request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "/opt/conda/lib/python3.7/urllib/request.py", line 569, in error
return self._call_chain(*args)
File "/opt/conda/lib/python3.7/urllib/request.py", line 503, in _call_chain
result = func(*args)
File "/opt/conda/lib/python3.7/urllib/request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
Which was resolved by following this github discussion, and to make it work I had to change line
11 in datasets/bpe2text.py from:
roberta = torch.hub.load('pytorch/fairseq', 'roberta.base')
to:
roberta = torch.hub.load('pytorch/fairseq:main', 'roberta.base')
Issue #3 (datasets/bpe2text.py)
Running it again, I got these errors:
Downloading: "https://github.com/pytorch/fairseq/archive/main.zip" to
/home/anubhavjangra/.cache/torch/hub/main.zip
Traceback (most recent call last):
File "bpe2text.py", line 11, in <module>
roberta = torch.hub.load('pytorch/fairseq:main', 'roberta.base')
File
"/home/anubhavjangra/style-transfer-paraphrase/style-venv/lib/python3.7/site-packages/torch/
hub.py", line 349, in load
hub_module = import_module(MODULE_HUBCONF, repo_dir + '/' + MODULE_HUBCONF)
File
"/home/anubhavjangra/style-transfer-paraphrase/style-venv/lib/python3.7/site-packages/torch/
hub.py", line 71, in import_module
spec.loader.exec_module(module)
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/hubconf.py", line 35, in
<module>
raise RuntimeError("Missing dependencies: {}".format(", ".join(missing_deps)))
RuntimeError: Missing dependencies: hydra-core, omegaconf
Apparently tf.hub is not dependent on hydra-core and omegaconf (Github discussion), and it
was resolved by this simple package installation:
pip install hydra-core omegaconf
Issue #4 (datasets/bpe2text.py)
There was another dependency issue running the file again:
Using cache found in /home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main
Traceback (most recent call last):
File "bpe2text.py", line 11, in <module>
roberta = torch.hub.load('pytorch/fairseq:main', 'roberta.base')
File
"/home/anubhavjangra/style-transfer-paraphrase/style-venv/lib/python3.7/site-packages/torch/
hub.py", line 349, in load
hub_module = import_module(MODULE_HUBCONF, repo_dir + '/' + MODULE_HUBCONF)
File
"/home/anubhavjangra/style-transfer-paraphrase/style-venv/lib/python3.7/site-packages/torch/
hub.py", line 71, in import_module
spec.loader.exec_module(module)
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/hubconf.py", line 39, in
<module>
from fairseq.hub_utils import ( # noqa; noqa
File "/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/fairseq/__init__.py", line
32, in <module>
import fairseq.criterions # noqa
File
"/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/fairseq/criterions/__init__.py",
line 36, in <module>
importlib.import_module("fairseq.criterions." + file_name)
File "/opt/conda/lib/python3.7/importlib/__init__.py", line 127, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/fairseq/criterions/ctc.py",
line 18, in <module>
from fairseq.data.data_utils import post_process
File "/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/fairseq/data/__init__.py",
line 24, in <module>
from .indexed_dataset import (
File
"/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/fairseq/data/indexed_dataset.p
y", line 15, in <module>
from fairseq.data.huffman import HuffmanMMapIndexedDataset, HuffmanMMapIndex
File
"/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/fairseq/data/huffman/__init__.p
y", line 6, in <module>
from .huffman_coder import HuffmanCodeBuilder, HuffmanCoder
File
"/home/anubhavjangra/.cache/torch/hub/pytorch_fairseq_main/fairseq/data/huffman/huffman_
coder.py", line 11, in <module>
from bitarray import bitarray, util
ModuleNotFoundError: No module named 'bitarray'
This was also resolved using package installation:
pip install bitarray
Issue #9 (dependency issues)
nltk, scipy and sentencepiece are not there in the requirements.txt file.
Thanks to Anubhav Jangra for reporting these,