immich-app / immich

High performance self-hosted photo and video management solution.
https://immich.app
GNU Affero General Public License v3.0
51.71k stars 2.74k forks source link

Issue when load openvino for machine learning #9007

Closed whxciotw closed 6 months ago

whxciotw commented 6 months ago

The bug

The openvino cannot be loaded properly for machine learning, and attached below is the log.

The OS that Immich Server is running on

Unraid 6.12.10

Version of Immich Server

v1.102.3

Version of Immich Mobile App

none

Platform with the issue

Your docker-compose.yml content

version: "3.8"

#
# WARNING: Make sure to use the docker-compose.yml of the current release:
#
# https://github.com/immich-app/immich/releases/latest/download/docker-compose.yml
#
# The compose file on main may not be compatible with the latest release.
#

name: immich

services:
  immich-server:
    container_name: immich_server
    image: ghcr.io/immich-app/immich-server:${IMMICH_VERSION:-release}
    command: [ "start.sh", "immich" ]
    volumes:
      - ${UPLOAD_LOCATION}:/usr/src/app/upload
      - /etc/localtime:/etc/localtime:ro
    env_file:
      - .env
    ports:
      - 2283:3001
    depends_on:
      - redis
      - database
    restart: always

  immich-microservices:
    container_name: immich_microservices
    image: ghcr.io/immich-app/immich-server:${IMMICH_VERSION:-release}
    extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/hardware-transcoding
      file: hwaccel.transcoding.yml
      service: quicksync # set to one of [nvenc, quicksync, rkmpp, vaapi, vaapi-wsl] for accelerated transcoding
    command: [ "start.sh", "microservices" ]
    volumes:
      - ${UPLOAD_LOCATION}:/usr/src/app/upload
      - /etc/localtime:/etc/localtime:ro
    env_file:
      - .env
    depends_on:
      - redis
      - database
    restart: always

  immich-machine-learning:
    container_name: immich_machine_learning
    image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}-openvino
    extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/ml-hardware-acceleration
      file: hwaccel.ml.yml
      service: openvino # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
    volumes:
      - model-cache:/cache
    env_file:
      - .env
    restart: always

  redis:
    container_name: immich_redis
    image: redis:6.2-alpine@sha256:b6124ab2e45cc332e16398022a411d7e37181f21ff7874835e0180f56a09e82a
    restart: always
    labels:
      - com.centurylinklabs.watchtower.enable=true   

  database:
    container_name: immich_postgres
    image: tensorchord/pgvecto-rs:pg14-v0.2.0@sha256:90724186f0a3517cf6914295b5ab410db9ce23190a2d9d0b9dd6463e3fa298f0
    env_file:
      - .env
    environment:
      POSTGRES_PASSWORD: ${DB_PASSWORD}
      POSTGRES_USER: ${DB_USERNAME}
      POSTGRES_DB: ${DB_DATABASE_NAME}
    volumes:
      - ${DB_DATA_LOCATION}:/var/lib/postgresql/data
    restart: always

volumes:
  model-cache:

Your .env content

# You can find documentation for all the supported env variables at https://immich.app/docs/install/environment-variables

# The location where your uploaded files are stored
UPLOAD_LOCATION=/mnt/user/pictures

# The Immich version to use. You can pin this to a specific version like "v1.71.0"
IMMICH_VERSION=release

# Connection secret for postgres. You should change it to a random password
DB_PASSWORD=postgres

# The values below this line do not need to be changed
###################################################################################
DB_HOSTNAME=immich_postgres
DB_USERNAME=postgres
DB_DATABASE_NAME=immich
DB_DATA_LOCATION=/mnt/user/appdata/immich/postgres

REDIS_HOSTNAME=immich_redis

Reproduction steps

1. docker compose up -d
2. start face recognition

Relevant log output

immich_machine_learning  | [04/22/24 07:25:33] ERROR    Exception in ASGI application                      
immich_machine_learning  |                                                                                 
immich_machine_learning  |                              ╭─────── Traceback (most recent call last) ───────╮
immich_machine_learning  |                              │ /usr/src/app/main.py:118 in predict             │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │   115 │                                         │
immich_machine_learning  |                              │   116 │   model = await load(await model_cache. │
immich_machine_learning  |                              │       ttl=settings.model_ttl, **kwargs))        │
immich_machine_learning  |                              │   117 │   model.configure(**kwargs)             │
immich_machine_learning  |                              │ ❱ 118 │   outputs = await run(model.predict, in │
immich_machine_learning  |                              │   119 │   return ORJSONResponse(outputs)        │
immich_machine_learning  |                              │   120                                           │
immich_machine_learning  |                              │   121                                           │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │ /usr/src/app/main.py:125 in run                 │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │   122 async def run(func: Callable[..., Any], i │
immich_machine_learning  |                              │   123 │   if thread_pool is None:               │
immich_machine_learning  |                              │   124 │   │   return func(inputs)               │
immich_machine_learning  |                              │ ❱ 125 │   return await asyncio.get_running_loop │
immich_machine_learning  |                              │   126                                           │
immich_machine_learning  |                              │   127                                           │
immich_machine_learning  |                              │   128 async def load(model: InferenceModel) ->  │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │ /usr/lib/python3.10/concurrent/futures/thread.p │
immich_machine_learning  |                              │ y:58 in run                                     │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │ /usr/src/app/models/base.py:59 in predict       │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │    56 │   │   self.load()                       │
immich_machine_learning  |                              │    57 │   │   if model_kwargs:                  │
immich_machine_learning  |                              │    58 │   │   │   self.configure(**model_kwargs │
immich_machine_learning  |                              │ ❱  59 │   │   return self._predict(inputs)      │
immich_machine_learning  |                              │    60 │                                         │
immich_machine_learning  |                              │    61 │   @abstractmethod                       │
immich_machine_learning  |                              │    62 │   def _predict(self, inputs: Any) -> An │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │ /usr/src/app/models/facial_recognition.py:49 in │
immich_machine_learning  |                              │ _predict                                        │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │   46 │   │   else:                              │
immich_machine_learning  |                              │   47 │   │   │   decoded_image = image          │
immich_machine_learning  |                              │   48 │   │   assert is_ndarray(decoded_image, n │
immich_machine_learning  |                              │ ❱ 49 │   │   bboxes, kpss = self.det_model.dete │
immich_machine_learning  |                              │   50 │   │   if bboxes.size == 0:               │
immich_machine_learning  |                              │   51 │   │   │   return []                      │
immich_machine_learning  |                              │   52 │   │   assert is_ndarray(kpss, np.float32 │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │ /opt/venv/lib/python3.10/site-packages/insightf │
immich_machine_learning  |                              │ ace/model_zoo/retinaface.py:224 in detect       │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │   221 │   │   det_img = np.zeros( (input_size[1 │
immich_machine_learning  |                              │   222 │   │   det_img[:new_height, :new_width,  │
immich_machine_learning  |                              │   223 │   │                                     │
immich_machine_learning  |                              │ ❱ 224 │   │   scores_list, bboxes_list, kpss_li │
immich_machine_learning  |                              │   225 │   │                                     │
immich_machine_learning  |                              │   226 │   │   scores = np.vstack(scores_list)   │
immich_machine_learning  |                              │   227 │   │   scores_ravel = scores.ravel()     │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │ /opt/venv/lib/python3.10/site-packages/insightf │
immich_machine_learning  |                              │ ace/model_zoo/retinaface.py:152 in forward      │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │   149 │   │   kpss_list = []                    │
immich_machine_learning  |                              │   150 │   │   input_size = tuple(img.shape[0:2] │
immich_machine_learning  |                              │   151 │   │   blob = cv2.dnn.blobFromImage(img, │
immich_machine_learning  |                              │       (self.input_mean, self.input_mean, self.i │
immich_machine_learning  |                              │ ❱ 152 │   │   net_outs = self.session.run(self. │
immich_machine_learning  |                              │   153 │   │                                     │
immich_machine_learning  |                              │   154 │   │   input_height = blob.shape[2]      │
immich_machine_learning  |                              │   155 │   │   input_width = blob.shape[3]       │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │ /opt/venv/lib/python3.10/site-packages/onnxrunt │
immich_machine_learning  |                              │ ime/capi/onnxruntime_inference_collection.py:22 │
immich_machine_learning  |                              │ 0 in run                                        │
immich_machine_learning  |                              │                                                 │
immich_machine_learning  |                              │    217 │   │   if not output_names:             │
immich_machine_learning  |                              │    218 │   │   │   output_names = [output.name  │
immich_machine_learning  |                              │    219 │   │   try:                             │
immich_machine_learning  |                              │ ❱  220 │   │   │   return self._sess.run(output │
immich_machine_learning  |                              │    221 │   │   except C.EPFail as err:          │
immich_machine_learning  |                              │    222 │   │   │   if self._enable_fallback:    │
immich_machine_learning  |                              │    223 │   │   │   │   print(f"EP Error: {err!s │
immich_machine_learning  |                              ╰─────────────────────────────────────────────────╯
immich_machine_learning  |                              RuntimeException: [ONNXRuntimeError] : 6 :         
immich_machine_learning  |                              RUNTIME_EXCEPTION : Encountered unknown exception  
immich_machine_learning  |                              in Run()                                           
immich_microservices     | [Nest] 7  - 04/22/2024, 7:25:33 AM   ERROR [JobService] Unable to run job handler (faceDetection/face-detection): Error: Machine learning request for facial recognition failed with status 500: Internal Server Error
immich_microservices     | [Nest] 7  - 04/22/2024, 7:25:33 AM   ERROR [JobService] Error: Machine learning request for facial recognition failed with status 500: Internal Server Error
immich_microservices     |     at MachineLearningRepository.predict (/usr/src/app/dist/repositories/machine-learning.repository.js:23:19)
immich_microservices     |     at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
immich_microservices     |     at async PersonService.handleDetectFaces (/usr/src/app/dist/services/person.service.js:268:23)
immich_microservices     |     at async /usr/src/app/dist/services/job.service.js:149:36
immich_microservices     |     at async Worker.processJob (/usr/src/app/node_modules/bullmq/dist/cjs/classes/worker.js:394:28)
immich_microservices     |     at async Worker.retryIfFailed (/usr/src/app/node_modules/bullmq/dist/cjs/classes/worker.js:581:24)
immich_microservices     | [Nest] 7  - 04/22/2024, 7:25:33 AM   ERROR [JobService] Object:
immich_microservices     | {
immich_microservices     |   "id": "004706a0-7d9d-495e-bca8-c56bb8d9344f"
immich_microservices     | }
immich_microservices     | 
immich_redis             | 1:M 22 Apr 2024 07:25:45.063 * 10000 changes in 60 seconds. Saving...
immich_redis             | 1:M 22 Apr 2024 07:25:45.063 * Background saving started by pid 20
immich_redis             | 20:C 22 Apr 2024 07:25:45.083 * DB saved on disk
immich_redis             | 20:C 22 Apr 2024 07:25:45.083 * RDB: 0 MB of memory used by copy-on-write
immich_redis             | 1:M 22 Apr 2024 07:25:45.163 * Background saving terminated with success
immich_microservices     | [Nest] 7  - 04/22/2024, 7:30:33 AM   ERROR [JobService] Unable to run job handler (faceDetection/face-detection): Error: Machine learning request to "http://immich-machine-learning:3003" failed with HeadersTimeoutError: Headers Timeout Error
immich_microservices     | [Nest] 7  - 04/22/2024, 7:30:33 AM   ERROR [JobService] Error: Machine learning request to "http://immich-machine-learning:3003" failed with HeadersTimeoutError: Headers Timeout Error
immich_microservices     |     at /usr/src/app/dist/repositories/machine-learning.repository.js:19:19
immich_microservices     |     at async MachineLearningRepository.predict (/usr/src/app/dist/repositories/machine-learning.repository.js:18:21)
immich_microservices     |     at async PersonService.handleDetectFaces (/usr/src/app/dist/services/person.service.js:268:23)
immich_microservices     |     at async /usr/src/app/dist/services/job.service.js:149:36
immich_microservices     |     at async Worker.processJob (/usr/src/app/node_modules/bullmq/dist/cjs/classes/worker.js:394:28)
immich_microservices     |     at async Worker.retryIfFailed (/usr/src/app/node_modules/bullmq/dist/cjs/classes/worker.js:581:24)
immich_microservices     | [Nest] 7  - 04/22/2024, 7:30:33 AM   ERROR [JobService] Object:
immich_microservices     | {
immich_microservices     |   "id": "9d17b2f3-dcc5-45b1-9151-6765249a2956"
immich_microservices     | }
immich_microservices     | 
immich_microservices     | [Nest] 7  - 04/22/2024, 7:30:34 AM   ERROR [JobService] Unable to run job handler (faceDetection/face-detection): Error: Machine learning request to "http://immich-machine-learning:3003" failed with HeadersTimeoutError: Headers Timeout Error
immich_microservices     | [Nest] 7  - 04/22/2024, 7:30:34 AM   ERROR [JobService] Error: Machine learning request to "http://immich-machine-learning:3003" failed with HeadersTimeoutError: Headers Timeout Error
immich_microservices     |     at /usr/src/app/dist/repositories/machine-learning.repository.js:19:19
immich_microservices     |     at async MachineLearningRepository.predict (/usr/src/app/dist/repositories/machine-learning.repository.js:18:21)
immich_microservices     |     at async PersonService.handleDetectFaces (/usr/src/app/dist/services/person.service.js:268:23)
immich_microservices     |     at async /usr/src/app/dist/services/job.service.js:149:36
immich_microservices     |     at async Worker.processJob (/usr/src/app/node_modules/bullmq/dist/cjs/classes/worker.js:394:28)
immich_microservices     |     at async Worker.retryIfFailed (/usr/src/app/node_modules/bullmq/dist/cjs/classes/worker.js:581:24)
immich_microservices     | [Nest] 7  - 04/22/2024, 7:30:34 AM   ERROR [JobService] Object:
immich_microservices     | {
immich_microservices     |   "id": "bc642a4b-26ec-414f-a860-3c7083f80aed"
immich_microservices     | }
immich_microservices     | 

Additional information

CPU is i5 8600T

The hwaccel.ml.yml and the hwaccel.transcoding.yml is just same as given by official. hwaccel.ml.yml

version: "3.8"

# Configurations for hardware-accelerated machine learning

# If using Unraid or another platform that doesn't allow multiple Compose files,
# you can inline the config for a backend by copying its contents 
# into the immich-machine-learning service in the docker-compose.yml file.

# See https://immich.app/docs/features/ml-hardware-acceleration for info on usage.

services:
  armnn:
    devices:
      - /dev/mali0:/dev/mali0
    volumes:
      - /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset (not always required depending on the driver)
      - /usr/lib/libmali.so:/usr/lib/libmali.so:ro # Mali driver for your chipset (always required)

  cpu: {}

  cuda:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities:
                - gpu

  openvino:
    device_cgroup_rules:
      - "c 189:* rmw"
    devices:
      - /dev/dri:/dev/dri
    volumes:
      - /dev/bus/usb:/dev/bus/usb

  openvino-wsl:
    devices:
      - /dev/dri:/dev/dri
      - /dev/dxg:/dev/dxg
    volumes:
      - /dev/bus/usb:/dev/bus/usb
      - /usr/lib/wsl:/usr/lib/wsl

hwaccel.transcoding.ml

version: "3.8"

# Configurations for hardware-accelerated transcoding

# If using Unraid or another platform that doesn't allow multiple Compose files,
# you can inline the config for a backend by copying its contents
# into the immich-microservices service in the docker-compose.yml file.

# See https://immich.app/docs/features/hardware-transcoding for more info on using hardware transcoding.

services:
  cpu: {}

  nvenc:
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities:
                - gpu
                - compute
                - video

  quicksync:
    devices:
      - /dev/dri:/dev/dri

  rkmpp:
    security_opt: # enables full access to /sys and /proc, still far better than privileged: true
      - systempaths=unconfined
      - apparmor=unconfined
    group_add:
      - video
    devices:
      - /dev/rga:/dev/rga
      - /dev/dri:/dev/dri
      - /dev/dma_heap:/dev/dma_heap
      - /dev/mpp_service:/dev/mpp_service
      #- /dev/mali0:/dev/mali0 # only required to enable OpenCL-accelerated HDR -> SDR tonemapping
    volumes:
      #- /etc/OpenCL:/etc/OpenCL:ro # only required to enable OpenCL-accelerated HDR -> SDR tonemapping
      #- /usr/lib/aarch64-linux-gnu/libmali.so.1:/usr/lib/aarch64-linux-gnu/libmali.so.1:ro # only required to enable OpenCL-accelerated HDR -> SDR tonemapping

  vaapi:
    devices:
      - /dev/dri:/dev/dri

  vaapi-wsl: # use this for VAAPI if you're running Immich in WSL2
    devices:
      - /dev/dri:/dev/dri
    volumes:
      - /usr/lib/wsl:/usr/lib/wsl
    environment:
      - LD_LIBRARY_PATH=/usr/lib/wsl/lib
      - LIBVA_DRIVER_NAME=d3d12
mertalev commented 6 months ago

This is being tracked in #8226.