Mozilla-Ocho / llamafile

Distribute and run LLMs with a single file.
https://llamafile.ai
Other
16.91k stars 838 forks source link

Running with Nix and ROCm #272

Open CyberShadow opened 4 months ago

CyberShadow commented 4 months ago

This creates a script that allows running a .llamafile with ROCm acceleration under Nix:

let
  package = {
      writeShellScript
    , symlinkJoin
    , clang
    , coreutils
    , gzip
    , bash
    , rocmPackages
    }:

    let
      rocmtoolkit_joined = symlinkJoin {
        name = "rocm-merged";

        paths = with rocmPackages; [
          rocm-core clr rccl miopen miopengemm rocrand rocblas
          rocsparse hipsparse rocthrust rocprim hipcub roctracer
          rocfft rocsolver hipfft hipsolver hipblas
          rocminfo rocm-thunk rocm-comgr rocm-device-libs
          rocm-runtime clr.icd hipify
        ];
      };
    in
    writeShellScript "run-llamafile" ''
      if [ $# -eq 0 ] ; then
        echo "Usage: $0 wizardcoder.llamafile -ngl 9999"
        exit 2
      fi

      export PATH=${rocmtoolkit_joined}/bin
      export PATH=$PATH:${clang}/bin
      export PATH=$PATH:${coreutils}/bin
      export PATH=$PATH:${gzip}/bin

      export HIPCC_COMPILE_FLAGS_APPEND="-I${rocmtoolkit_joined}/include -I${rocmtoolkit_joined}/include/rocblas"
      export HIPCC_LINK_FLAGS_APPEND="-L${rocmtoolkit_joined}/lib"

      export ROCM_PATH=${rocmtoolkit_joined}
      export ROCM_SOURCE_DIR=${rocmtoolkit_joined}

      ${pkgs.bash}/bin/sh "$@"
    '';

  nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/archive/0ef56bec7281e2372338f2dfe7c13327ce96f6bb.tar.gz";
  pkgs = import nixpkgs {};
in
pkgs.callPackage package {}

Can be used like nix-build && ./result ~/Downloads/wizardcoder-python-34b-v1.0.Q5_K_M.llamafile -ngl 9999

Posting this here with the hope that this will help someone. Thank you for this project!

deftdawg commented 2 months ago

This is the way I've been running it on NixOS using podman:

nix-shell -p podman fuse-overlayfs --run "podman run --rm -ti --device=/dev/kfd --device=/dev/dri -e DISPLAY=${DISPLAY} -v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 -v /home:/home -p \"8080:8080\" docker.io/rocm/pytorch bash ~/Downloads/Meta-Llama-3-70B-Instruct.Q8_0.llamafile -ngl 10 --host \"0.0.0.0\""

For llama3 70B, each layer takes about 1GB of VRAM, so can only do ~14 layers on a 6900XT.