mozilla / DeepSpeech

DeepSpeech is an open source embedded (offline, on-device) speech-to-text engine which can run in real time on devices ranging from a Raspberry Pi 4 to high power GPU servers.
Mozilla Public License 2.0
25.37k stars 3.97k forks source link

add build.sh file in native_client #2025

Closed JRMeyer closed 5 years ago

JRMeyer commented 5 years ago

@JRMeyer

something like the following from reuben's code in .compute (utf8 branch)

#!/bin/bash

set -xe

data="${SHARED_DIR}/data"
fis="${data}/LDC/fisher"
swb="${data}/LDC/LDC97S62/swb"
lbs="${data}/OpenSLR/LibriSpeech/librivox"
cv2de="${data}/mozilla/CommonVoice/v2.0/de/clips"

wikide="${data}/wikipedia/de/wiki.txt"

srcdir=`pwd`/../src

apt-get install -y build-essential cmake libboost-all-dev zlib1g-dev libbz2-dev liblzma-dev openjdk-8-jdk bash-completion unzip

mkdir -p /tmp/tf

pushd /tmp/tf
  # Download and install bazel
  curl -LO "https://github.com/bazelbuild/bazel/releases/download/0.19.2/bazel_0.19.2-linux-x86_64.deb"
  dpkg -i bazel_*.deb

  # Download tensorflow
  git clone --depth 1 --branch r1.13 http://github.com/mozilla/tensorflow

  pushd tensorflow
    ln -s $srcdir/native_client .

    # Configure and build generate_trie
    export TF_NEED_CUDA=0
    export TF_ENABLE_XLA=0
    export TF_NEED_JEMALLOC=1
    export TF_NEED_OPENCL_SYCL=0
    export TF_NEED_MKL=0
    export TF_NEED_VERBS=0
    export TF_NEED_MPI=0
    export TF_NEED_IGNITE=0
    export TF_NEED_GDR=0
    export TF_NEED_NGRAPH=0
    export TF_DOWNLOAD_CLANG=0
    export TF_SET_ANDROID_WORKSPACE=0
    export TF_NEED_TENSORRT=0
    export GCC_HOST_COMPILER_PATH=/usr/bin/gcc
    ./configure

    bazel build --config=monolithic -c opt --copt=-march=native --copt=-fvisibility=hidden //native_client:generate_trie
  popd # tensorflow

  gen_trie=/tmp/tf/tensorflow/bazel-bin/native_client/generate_trie
  cp ${gen_trie} ${USER_DIR}/generate_trie
popd # /tmp/tf

# LM stuff
mkdir -p /tmp/lm
pushd /tmp/lm
  # Download and build kenlm
  curl -L https://kheafield.com/code/kenlm.tar.gz | tar xz
  mkdir kenlm/build
  pushd kenlm/build
    cmake ..
    make -j16
  popd

  klmbin=/tmp/lm/kenlm/build/bin

  # Save in user folder
  mkdir -p ${USER_DIR}/kenlm
  cp ${klmbin}/lmplz ${USER_DIR}/kenlm/lmplz
  cp ${klmbin}/build_binary ${USER_DIR}/kenlm/build_binary

  # Generate LM binary from wikipedia german dump
  ${klmbin}/lmplz --order 3 --skip_symbols --text ${wikide} --arpa wikide.arpa
  ${klmbin}/build_binary -a 255 -q 8 trie wikide.arpa wikide.binary

  # Save in user dir
  mkdir -p ${USER_DIR}/wikide_lm
  cp wikide.arpa ${USER_DIR}/wikide_lm/wikide.arpa
  cp wikide.binary ${USER_DIR}/wikide_lm/wikide.binary
  echo "lmplz --order 3 --text ${wikide} --lm wikide.arpa" > ${USER_DIR}/wikide_lm/README
  echo "build_binary -a 255 -q 8 trie wikide.arpa wikide.binary" >> ${USER_DIR}/wikide_lm/README

  # Generate trie from LM and save in user dir
  ${gen_trie} unused_param /tmp/lm/wikide.binary /tmp/lm/wikide.trie
  cp /tmp/lm/wikide.trie ${USER_DIR}/wikide_lm/wikide.trie

  lm_binary=/tmp/lm/wikide.binary
  lm_trie=/tmp/lm/wikide.trie
popd # /tmp/lm

apt-get install -y python3-venv swig
python3 -m venv /tmp/venv
source /tmp/venv/bin/activate

pip install wheel
pip install -r <(grep -v tensorflow requirements.txt)
pip install tensorflow-gpu==1.13.1

# Build ds_ctcdecoder
pushd ../src/native_client/ctcdecode
  make clean
  make NUM_PROCESSES=16
  pip install dist/*.whl
popd
lock[bot] commented 5 years ago

This thread has been automatically locked since there has not been any recent activity after it was closed. Please open a new issue for related bugs.