natrys / whisper.el

Speech-to-Text interface for Emacs using OpenAI's whisper model and whisper.cpp as inference engine.
140 stars 10 forks source link

Support/FR: Put transcription in kill ring #14

Open pataquets opened 10 months ago

pataquets commented 10 months ago

Is it possible to transcribe text and get it as if it was yanked?

natrys commented 10 months ago

Not by default, the range of things people want is too varied for me to accommodate.

But it's possible if you add custom logic to post process hook. Please load the latest commit and try this:

(setq whisper-insert-text-at-point nil) ;; optional but sensible to disable live marker

(add-hook 'whisper-post-process-hook
          (lambda ()
            (kill-region (point-min) (point-max))))
liar666 commented 9 months ago

For those interested, I've written the following (Bash) script to do exactly what OP wants, from anywhere in the OS (Linux) :) How it works:

Very handy to add "VoiceTyping" to Firefox, LibreOffice, Emacs, Vim, etc.

Unfortunately, it is very rough and might fail in some cases (best solution in this case is to remove all my temp files from /tmp).

Unfortunately, I don't have time to improve it. But feel free to use it, modify/rewrite it (in a better language/format), make a beautiful GUI (Gnome/Palsma applet?), packaging/distributing it in a better format (GitHub, package, etc.), Just mention this issue if you do so that people know where it comes from :) Actual work is done in Whisper itself, so let's say my work is worth a C0 license ;), but if it could help/inspire others, you're welcome!

#!/usr/bin/env bash

# Load functions "show_message" & "debug"to display feedback to user in logs & OSD
show_message(){
    echo "$1"
    notify-send -t 800 -c INFO "$1"
}

debug(){
    echo "$1"
    # notify-send -t 1000 -c INFO "$1"
}

export DISPLAY=$(w | grep i3 | awk '{ print $3 }')
export XAUTHORITY=$HOME/.Xauthority

# tiny[-q5_0] / base[-q5_0] / small[-q5_0] /
MODEL=base-q5_0

TMP_WAV_FILE=/tmp/whisper_wav_name
MARKER_FILE=/tmp/whisper_pid
CONTENT_FILE=/tmp/whisper_content

# Set language
if [[ -e /tmp/READER ]]; then
    . /tmp/READER
fi
# Assign default values if they were not defined by /tmp/READER
: ${RECORD_LANG:="fr"} # "en"

# Examples of commands to run Whisper in continuous recognition mode
# DIR=~/.doom.d/packages/whisper.cpp/
# $DIR/stream -m $DIR//models/ggml-base-q5_0.bin -t 8 --step 500 --length 5000
# $DIR/stream -m $DIR//models/ggml-small-q5_0.bin -t 6 --step 0 --length 30000 -vth 0.6
WHISPER_DIR=~/.doom.d/packages/whisper.cpp
RECORD_COMMAND='rec --rate 16k'
WHISPER_COMMAND="${WHISPER_DIR}/main -m ${WHISPER_DIR}/models/ggml-${MODEL}.bin -l ${RECORD_LANG} -f $(cat ${TMP_WAV_FILE})"

if [[ -e ${MARKER_FILE} ]] ; then
    show_message "STOP recording"
    # Kill running 'sox::rec' process
    debug "kill $(cat ${MARKER_FILE})" 
    kill "$(cat ${MARKER_FILE})" || pkill rec
    sleep 1   # wait to be sure .wav file is closed before starting whisper, otherwise it endlessy waits for the file/stream to be closed
    # Run whisper.cpp
    debug "${WHISPER_COMMAND}"
    eval "${WHISPER_COMMAND}" > "${CONTENT_FILE}"
    # Extract text recognized by Whisper
\s*//g'tent=$(grep '^[[]' "${CONTENT_FILE}" | sed 's/^[^]]*][[:space:]]*//g' | tr '\n' ' ') #  's/^[^]]*][[:space:]]*//g'  | sed -r 's/\s*
    # Put it in the Xorg buffer
    echo "${content}" | tee /tmp/whisper_debug
    echo "${content}" | xsel -i
    debug "Extracted content: ${content}"
    show_message "FINISHED parsing"
    # Remove temp files
    \rm -f "${MARKER_FILE}" "${CONTENT_FILE}" "${TMP_WAV_FILE}"
else
    show_message "START Recording - Using with model [${MODEL}] in language [${RECORD_LANG}]"
    # Start Whisper
    (
    # Create filename for recording
    echo "$(mktemp --dry-run).wav" > "${TMP_WAV_FILE}"
    # Expand command
    RECORD_CMD="${RECORD_COMMAND} $(cat "${TMP_WAV_FILE}")"
        debug "${RECORD_CMD}"
        eval "${RECORD_CMD}" &
        # TODO Get its process ID
        WHISPER_PID=$(pgrep -f "${RECORD_CMD}")
        # Store it for later
        debug "${WHISPER_PID}" > "${MARKER_FILE}" # mark the starting of Whisper recording & store PID
    )
fi