Open artistlu opened 2 months ago
before i also modified exo/exo/topology/device_capabilities.py file
def linux_device_capabilities() -> DeviceCapabilities:
if DEBUG >= 2: print(f"tinygrad {Device.DEFAULT=}")
context = pyudev.Context()
# Check for Mali GPU
mali_gpu = next((device for device in context.list_devices(subsystem='mali0') if 'mali' in device.sys_name.lower()), None)
if mali_gpu:
gpu_name = mali_gpu.get('DEVNAME', 'Unknown Mali GPU')
return DeviceCapabilities(
model=f"Linux Box (ARM Mali)",
chip=gpu_name,
memory=psutil.virtual_memory().total // 2**20,
flops=DeviceFlops(fp32=100, fp16=200, int8=400) # Placeholder values, adjust as needed
)
# Check for NVIDIA GPU
try:
import pynvml
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
gpu_name = pynvml.nvmlDeviceGetName(handle)
gpu_memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
if DEBUG >= 2: print(f"NVIDIA device {gpu_name=} {gpu_memory_info=}")
return DeviceCapabilities(
model=f"Linux Box ({gpu_name})",
chip=gpu_name,
memory=gpu_memory_info.total // 2**20,
flops=CHIP_FLOPS.get(gpu_name, DeviceFlops(fp32=0, fp16=0, int8=0))
)
except ImportError:
pass # NVIDIA libraries not available
except pynvml.NVMLError:
pass # NVIDIA GPU not found or error accessing it
# Fallback for other devices
return DeviceCapabilities(
model=f"Linux Box (Device: {Device.DEFAULT})",
chip=f"Unknown Chip (Device: {Device.DEFAULT})",
memory=psutil.virtual_memory().total // 2**20,
flops=DeviceFlops(fp32=0, fp16=0, int8=0)
)
#76
I've been trying to load a local model on a device with a Mali GPU, and have encountered an issue. After modifying the code in three locations, I'm still encountering an error. The error message is as follows:
I've made modifications to the following files
exo/exo/api/chatgpt_api.py: resolve_tinygrad_tokenizer function
exo/exo/api/chatgpt_api.py: resolve_tokenizer function
exo/inference/tinygard/inference.py: ensure_shard function
The error message is as follows:
I'm not entirely sure if the way I'm loading the model is the root cause of this problem. Are there more elegant methods to load local models that I could explore?