bnosac / audio.vadwebrtc

Voice Activity Detection in R using the "webrtc" toolkit
Mozilla Public License 2.0
1 stars 1 forks source link

vad by channel #6

Open jwijffels opened 8 months ago

jwijffels commented 8 months ago
VAD_channel <- function(x, sample_rate = 16000, ...){
  requireNamespace("av")
  i        <- av::av_media_info(x)
  duration <- i$duration
  channels <- i$audio$channels
  tempfile_wav_allchannels <- tempfile(pattern = "allchannels_", fileext = ".wav")
  tempfile_wav_onechannel  <- tempfile(pattern = "onechannel_", fileext = ".wav")
  on.exit(invisible(file.remove(c(tempfile_wav_allchannels, tempfile_wav_onechannel))))
  av::av_audio_convert(audio = x, output = tempfile_wav_allchannels, format = "wav", sample_rate = sample_rate, verbose = FALSE)
  av::av_audio_convert(audio = x, output = tempfile_wav_onechannel, format = "wav", sample_rate = sample_rate, verbose = FALSE, channels = 1)
  ## By channel - VAD
  out <- list()
  if(channels > 1){
    requireNamespace("audio")
    audio_content <- audio::load.wave(tempfile_wav_allchannels)
    out <- lapply(seq_len(nrow(audio_content)), FUN = function(i, ...){
      audio_content_channel <- audio_content[i, , drop = FALSE]
      audio::save.wave(audio_content_channel, tempfile_wav_onechannel)
      vad <- audio.vadwebrtc::VAD(tempfile_wav_onechannel, ...)
      vad$vad_segments$channel <- rep(i, nrow(vad$vad_segments))
      vad$vad_stats$channel    <- i
      vad[c("vad_segments", "vad_stats")]
    }, ...)
  }
  vad <- audio.vadwebrtc::VAD(tempfile_wav_allchannels, ...)
  vad$vad_segments$channel <- rep(0, nrow(vad$vad_segments))
  vad$vad_stats$channel    <- 0
  vad <- vad[c("vad_segments", "vad_stats")]
  out[[length(out) + 1]]   <- vad
  out <- lapply(out, unclass)

  results <- list(file = x,
       duration_secs = duration,
       channels = channels,
       vad_segments = do.call(rbind, lapply(out, FUN = function(x) x$vad_segments)),
       vad_stats = do.call(rbind, lapply(out, FUN = function(x) as.data.frame(x$vad_stats))))
  results$vad_segments <- results$vad_segments[, c('channel', 'vad_segment', 'start', 'end', 'has_voice')]
  results$vad_stats <- results$vad_stats[, c('channel', 'n_segments', 'n_segments_has_voice', 'n_segments_has_no_voice', 'seconds_has_voice', 'seconds_has_no_voice', 'pct_has_voice')]
  class(results) <- "VAD_WEBRTC"
  results
}