Closed rrbanda closed 1 month ago
~ podman --version
podman version 5.2.0
{
"Id": "6682db1c0a967dab2c776059f9a3beaac2fa2604aba7800a779f0dbc4c073cf2",
"Created": "2024-08-06T03:19:03.594571785Z",
"Path": "sh",
"Args": [
"run.sh"
],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 2298,
"ExitCode": 0,
"Error": "",
"StartedAt": "2024-08-06T03:19:04.019449016Z",
"FinishedAt": "0001-01-01T00:00:00Z",
"Health": {
"Status": "healthy",
"FailingStreak": 0,
"Log": [
{
"Start": "2024-08-05T23:33:19.947685157-04:00",
"End": "2024-08-05T23:33:20.179546093-04:00",
"ExitCode": 0,
"Output": ""
},
{
"Start": "2024-08-05T23:33:25.941785352-04:00",
"End": "2024-08-05T23:33:26.190816714-04:00",
"ExitCode": 0,
"Output": ""
},
{
"Start": "2024-08-05T23:33:31.950996882-04:00",
"End": "2024-08-05T23:33:32.197648029-04:00",
"ExitCode": 0,
"Output": ""
},
{
"Start": "2024-08-05T23:33:37.938927767-04:00",
"End": "2024-08-05T23:33:38.208537981-04:00",
"ExitCode": 0,
"Output": ""
},
{
"Start": "2024-08-05T23:33:43.958068827-04:00",
"End": "2024-08-05T23:33:44.2248957-04:00",
"ExitCode": 0,
"Output": ""
}
]
}
},
"Image": "sha256:496fcef1d8856ef2bf37cd0928ae4f233f5bdbdf44c61571d1689a085cf2c2e5",
"ResolvConfPath": "/run/containers/storage/overlay-containers/6682db1c0a967dab2c776059f9a3beaac2fa2604aba7800a779f0dbc4c073cf2/userdata/resolv.conf",
"HostnamePath": "/run/containers/storage/overlay-containers/6682db1c0a967dab2c776059f9a3beaac2fa2604aba7800a779f0dbc4c073cf2/userdata/hostname",
"HostsPath": "/run/containers/storage/overlay-containers/6682db1c0a967dab2c776059f9a3beaac2fa2604aba7800a779f0dbc4c073cf2/userdata/hosts",
"LogPath": "",
"Name": "/infallible_lamarr",
"RestartCount": 0,
"Driver": "overlay",
"Platform": "linux",
"MountLabel": "system_u:object_r:container_file_t:s0:c1022,c1023",
"ProcessLabel": "",
"AppArmorProfile": "",
"ExecIDs": [
"9ab8555aaad3f13835221d6bcd3a79c29cc4aed2fb73dbe68f3c8810d6d39df2"
],
"HostConfig": {
"Binds": [
"/Users/raghurambanda/.local/share/containers/podman-desktop/extensions-storage/redhat.ai-lab/models/hf.instructlab.granite-7b-lab-GGUF:/models:rw,rprivate,rbind"
],
"ContainerIDFile": "",
"LogConfig": {
"Type": "journald",
"Config": null
},
"NetworkMode": "bridge",
"PortBindings": {
"8000/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "49686"
}
]
},
"RestartPolicy": {
"Name": "no",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"ConsoleSize": [
0,
0
],
"Annotations": {
"io.container.manager": "libpod",
"io.podman.annotations.label": "disable",
"org.opencontainers.image.stopSignal": "15",
"org.systemd.property.KillSignal": "15",
"org.systemd.property.TimeoutStopUSec": "uint64 10000000"
},
"CapAdd": [],
"CapDrop": [],
"CgroupnsMode": "",
"Dns": [],
"DnsOptions": [],
"DnsSearch": [],
"ExtraHosts": [],
"GroupAdd": [],
"IpcMode": "shareable",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "private",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": [
"label=disable"
],
"UTSMode": "private",
"UsernsMode": "",
"ShmSize": 65536000,
"Runtime": "oci",
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": null,
"BlkioDeviceReadBps": null,
"BlkioDeviceWriteBps": null,
"BlkioDeviceReadIOps": null,
"BlkioDeviceWriteIOps": null,
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": [
{
"PathOnHost": "/dev/dri/card0",
"PathInContainer": "/dev/dri/card0",
"CgroupPermissions": ""
},
{
"PathOnHost": "/dev/dri/renderD128",
"PathInContainer": "/dev/dri/renderD128",
"CgroupPermissions": ""
}
],
"DeviceCgroupRules": null,
"DeviceRequests": null,
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": 0,
"OomKillDisable": false,
"PidsLimit": 2048,
"Ulimits": [
{
"Name": "RLIMIT_NPROC",
"Hard": 4194304,
"Soft": 4194304
}
],
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0,
"MaskedPaths": null,
"ReadonlyPaths": null
},
"GraphDriver": {
"Data": {
"LowerDir": "/var/lib/containers/storage/overlay/8a7473e15940de22d1d235ad293c0dbd124b3f6dfba2e0a185d099dee598eebb/diff:/var/lib/containers/storage/overlay/e15fbf9ea9a3f3a7040f8010ea4414179ea9b6bf740f009a0281e9f5f8d86700/diff:/var/lib/containers/storage/overlay/46009ea56de0d875916fddd521245691268c77f9b8087ea48651c57884a3724b/diff:/var/lib/containers/storage/overlay/7962580760defbe2b334758e9429ec74bf32edfad5725faa9180e9f483445797/diff:/var/lib/containers/storage/overlay/0d95c43a69d6f6bfdda3d564003300517fe99fe67ae33388a4feb601d1f3cd16/diff",
"MergedDir": "/var/lib/containers/storage/overlay/779439d3223f809e6aae2a85a4ee4d75da466dbfebd5462380bca71db0570803/merged",
"UpperDir": "/var/lib/containers/storage/overlay/779439d3223f809e6aae2a85a4ee4d75da466dbfebd5462380bca71db0570803/diff",
"WorkDir": "/var/lib/containers/storage/overlay/779439d3223f809e6aae2a85a4ee4d75da466dbfebd5462380bca71db0570803/work"
},
"Name": "overlay"
},
"SizeRootFs": 0,
"Mounts": [
{
"Type": "bind",
"Source": "/Users/raghurambanda/.local/share/containers/podman-desktop/extensions-storage/redhat.ai-lab/models/hf.instructlab.granite-7b-lab-GGUF",
"Destination": "/models",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
}
],
"Config": {
"Hostname": "6682db1c0a96",
"Domainname": "",
"User": "1001",
"AttachStdin": false,
"AttachStdout": false,
"AttachStderr": false,
"ExposedPorts": {
"49686/tcp": {},
"8000/tcp": {},
"8080/tcp": {}
},
"Tty": false,
"OpenStdin": false,
"StdinOnce": false,
"Env": [
"STI_SCRIPTS_PATH=/usr/libexec/s2i",
"MODEL_PATH=/models/granite-7b-lab-Q4_K_M.gguf",
"PROMPT_COMMAND=. /opt/app-root/bin/activate",
"HOME=/opt/app-root/src",
"HOST=0.0.0.0",
"FORCE_CMAKE=1",
"CNB_USER_ID=1001",
"SUMMARY=Platform for building and running Python 3.11 applications",
"PLATFORM=el9",
"MODEL_CHAT_FORMAT=openchat",
"ENV=/opt/app-root/bin/activate",
"LC_ALL=en_US.UTF-8",
"DESCRIPTION=Python 3.11 available as container is a base platform for building and running various Python 3.11 applications and frameworks. Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. Python's elegant syntax and dynamic typing, together with its interpreted nature, make it an ideal language for scripting and rapid application development in many areas on most platforms.",
"CMAKE_ARGS=-DLLAMA_VULKAN=on",
"PIP_NO_CACHE_DIR=off",
"PYTHONIOENCODING=UTF-8",
"CNB_GROUP_ID=0",
"NODEJS_VER=20",
"GPU_LAYERS=-1",
"container=oci",
"PYTHONUNBUFFERED=1",
"BASH_ENV=/opt/app-root/bin/activate",
"PORT=8000",
"APP_ROOT=/opt/app-root",
"PATH=/opt/app-root/src/.local/bin/:/opt/app-root/src/bin:/opt/app-root/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"CNB_STACK_ID=com.redhat.stacks.ubi9-python-311",
"STI_SCRIPTS_URL=image:///usr/libexec/s2i",
"LANG=en_US.UTF-8",
"PYTHON_VERSION=3.11",
"HOSTNAME=6682db1c0a96"
],
"Cmd": [],
"Healthcheck": {
"Test": [
"CMD-SHELL",
"curl -sSf localhost:8000/docs > /dev/null"
],
"Interval": 5000000000,
"Timeout": 30000000000,
"Retries": 20
},
"Image": "quay.io/ai-lab/llamacpp-python-vulkan:latest",
"Volumes": null,
"WorkingDir": "/locallm",
"Entrypoint": [
"sh",
"run.sh"
],
"OnBuild": null,
"Labels": {
"ai-lab-inference-server": "[\"hf.instructlab.granite-7b-lab-GGUF\"]",
"api": "http://localhost:49686/v1",
"architecture": "aarch64",
"build-date": "2024-02-29T16:28:59",
"com.redhat.component": "python-311-container",
"com.redhat.license_terms": "https://www.redhat.com/en/about/red-hat-end-user-license-agreements#UBI",
"description": "Python 3.11 available as container is a base platform for building and running various Python 3.11 applications and frameworks. Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. Python's elegant syntax and dynamic typing, together with its interpreted nature, make it an ideal language for scripting and rapid application development in many areas on most platforms.",
"distribution-scope": "public",
"docs": "http://localhost:49686/docs",
"gpu": "Apple M1 Pro",
"io.buildah.version": "1.23.1",
"io.buildpacks.stack.id": "com.redhat.stacks.ubi9-python-311",
"io.k8s.description": "Python 3.11 available as container is a base platform for building and running various Python 3.11 applications and frameworks. Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. Python's elegant syntax and dynamic typing, together with its interpreted nature, make it an ideal language for scripting and rapid application development in many areas on most platforms.",
"io.k8s.display-name": "Python 3.11",
"io.openshift.expose-services": "8080:http",
"io.openshift.s2i.scripts-url": "image:///usr/libexec/s2i",
"io.openshift.tags": "builder,python,python311,python-311,rh-python311",
"io.s2i.scripts-url": "image:///usr/libexec/s2i",
"maintainer": "SoftwareCollections.org <sclorg@redhat.com>",
"name": "ubi9/python-311",
"release": "52",
"summary": "Platform for building and running Python 3.11 applications",
"trackingId": "znu9cg",
"url": "https://access.redhat.com/containers/#/registry.access.redhat.com/ubi9/python-311/images/1-52",
"usage": "s2i build https://github.com/sclorg/s2i-python-container.git --context-dir=3.11/test/setup-test-app/ ubi9/python-311 python-sample-app",
"vcs-ref": "e62e3648c350ef90416ec6891e59758f1bdfe547",
"vcs-type": "git",
"vendor": "Red Hat, Inc.",
"version": "1"
},
"StopSignal": "15",
"StopTimeout": 10
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "",
"SandboxKey": "/run/netns/netns-1b68ff30-5959-b71b-32a6-74508e6411f8",
"Ports": {
"49686/tcp": null,
"8000/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "49686"
}
],
"8080/tcp": null
},
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "",
"Gateway": "10.88.0.1",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "10.88.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"MacAddress": "66:d0:eb:f6:d0:a6",
"Networks": {
"podman": {
"IPAMConfig": null,
"Links": null,
"Aliases": [
"6682db1c0a96"
],
"MacAddress": "66:d0:eb:f6:d0:a6",
"DriverOpts": null,
"NetworkID": "podman",
"EndpointID": "",
"Gateway": "10.88.0.1",
"IPAddress": "10.88.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"DNSNames": null
}
}
}
}
Hardware Overview:
Model Name: MacBook Pro
Model Identifier: MacBookPro18,3
Chip: Apple M1 Pro
Total Number of Cores: 10 (8 performance and 2 efficiency)
Memory: 32 GB
System Firmware Version: 10151.121.1
OS Loader Version: 10151.121.1
Also reproduced this on M2 Pro, I've seen huge increase in vcpu usage in the container:
Also observed that sometimes the container crashes after a while of hanging with the error:
run.sh: line 10: 2 Killed python -m llama_cpp.server --model ${MODEL_PATH} --host ${HOST:=0.0.0.0} --port ${PORT:=8001} --n_gpu_layers ${GPU_LAYERS:=0} --clip_model_path ${CLIP_MODEL_PATH:=None} --chat_format ${MODEL_CHAT_FORMAT:="llama-2"}
I managed to get some kind of response from the system using the playground, but it took a very long time
https://github.com/user-attachments/assets/dc142688-9ea3-4b57-810d-1f1f686262fd
Bug description
While using GPU enabled podman desktop , inference call to model /service is not responding. It keeps saying loading on browser for so long , same with CLI
Operating system
Apple M1
Installation Method
from
ghcr.io/containers/podman-desktop-extension-ai-lab
container imageVersion
next (development version)
Steps to reproduce
Relevant log output
Additional context
No response