Code for the paper "Multi-Task Learning for Domain-General Spoken Disfluency Detection in Dialogue Systems" (Igor Shalyminov, Arash Eshghi, and Oliver Lemon) [SemDial 2018 paper] [Slides]
$ cd code-directory
$ git submodule update --init
$ conda create -n multitask_disfluency python=2.7
$ conda activate multitask_disfluency
$ pip install -r requirements.txt
$ python make_deep_disfluency_dataset.py swbd disfluency
$ python train.py swbd model
babi_tools
foldersh make_generalization_study_datasets.sh <RESULT_FOLDER>
sh tag_dataset.sh <RESULT_FOLDER> <config_file_name>
for every config in 2018_generalization_study_configs
<RESULT_FOLDER>/<BABI_DATASET_NAME>/*.tagged.json