Bump version to 1.1.0 and update benchmarks

Measure CPU memory for whisper.cpp

#!/bin/bash

# Command to run
COMMAND="./main -m models/ggml-large-v2.bin -l auto -fa ../faster-whisper/benchmark/output.wav"
# Run the command and measure memory consumption
OUTPUT=$(/usr/bin/time -v $COMMAND 2>&1)

# Extract the peak memory usage from the output
PEAK_MEMORY=$(echo "$OUTPUT" | grep "Maximum resident set size" | awk '{print $6}')

# Convert to MB for readability
PEAK_MEMORY_MB=$(bc <<< "scale=2; $PEAK_MEMORY / 1024")

# Print the result
echo "Peak memory consumption: $PEAK_MEMORY_MB MB"

GPU memory

import time
import pynvml

def measure_gpu_memory(command):
    # Initialize NVML
    pynvml.nvmlInit()
    device_count = pynvml.nvmlDeviceGetCount()
    peak_memory = 0

    # Get initial memory usage for all GPUs
    initial_memory = 0

    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
    mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    initial_memory += mem_info.used

    # Start the process
    import subprocess
    process = subprocess.Popen(command, shell=True)

    try:
        while process.poll() is None:  # While the process is running
            total_memory = 0
            handle = pynvml.nvmlDeviceGetHandleByIndex(0)
            mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
            total_memory += mem_info.used
            peak_memory = max(peak_memory, total_memory)
            time.sleep(0.5)
    finally:
        pynvml.nvmlShutdown()

    # Calculate memory usage difference (peak - initial)
    memory_difference = (peak_memory - initial_memory) / 1024 / 1024  # Convert to MB
    return memory_difference

if __name__ == "__main__":
    command = "./main -m models/ggml-large-v2.bin -l auto -fa ../faster-whisper/benchmark/output.wav"
    additional_memory = measure_gpu_memory(command)
    print(f"Additional GPU memory used: {additional_memory:.2f} MB")

SYSTRAN / faster-whisper

Bump version to 1.1.0 and update benchmarks #1161