Closed MahmoudAshraf97 closed 4 days ago
Measure CPU memory for whisper.cpp
#!/bin/bash
# Command to run
COMMAND="./main -m models/ggml-large-v2.bin -l auto -fa ../faster-whisper/benchmark/output.wav"
# Run the command and measure memory consumption
OUTPUT=$(/usr/bin/time -v $COMMAND 2>&1)
# Extract the peak memory usage from the output
PEAK_MEMORY=$(echo "$OUTPUT" | grep "Maximum resident set size" | awk '{print $6}')
# Convert to MB for readability
PEAK_MEMORY_MB=$(bc <<< "scale=2; $PEAK_MEMORY / 1024")
# Print the result
echo "Peak memory consumption: $PEAK_MEMORY_MB MB"
GPU memory
import time
import pynvml
def measure_gpu_memory(command):
# Initialize NVML
pynvml.nvmlInit()
device_count = pynvml.nvmlDeviceGetCount()
peak_memory = 0
# Get initial memory usage for all GPUs
initial_memory = 0
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
initial_memory += mem_info.used
# Start the process
import subprocess
process = subprocess.Popen(command, shell=True)
try:
while process.poll() is None: # While the process is running
total_memory = 0
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
total_memory += mem_info.used
peak_memory = max(peak_memory, total_memory)
time.sleep(0.5)
finally:
pynvml.nvmlShutdown()
# Calculate memory usage difference (peak - initial)
memory_difference = (peak_memory - initial_memory) / 1024 / 1024 # Convert to MB
return memory_difference
if __name__ == "__main__":
command = "./main -m models/ggml-large-v2.bin -l auto -fa ../faster-whisper/benchmark/output.wav"
additional_memory = measure_gpu_memory(command)
print(f"Additional GPU memory used: {additional_memory:.2f} MB")
OpenAI Whisper Inference