LAAC-LSCP / ChildRecordsR

R package for the evaluation of annotations of daylong recordings
https://laac-lscp.github.io/ChildRecordsR
0 stars 1 forks source link

derive recording-level metrics #51

Closed alecristia closed 3 years ago

alecristia commented 3 years ago

For each recording, derive the metrics listed in the csv attached child-recordings-variables.csv

alecristia commented 3 years ago

Many of the variables are calculated in the R code below:

    its <- extractDataCR("its",tmp,LENA.OL = F)
    ### speech summary 
    summa = summary(its)
    speech <- summa$data %>% filter(speaker_type %in% c("CHI","FEM","MAL","OCH")) %>% 
      mutate(child_id = as.character(child_id),)%>%
      group_by(child_id,age_in_day)%>%
      mutate(
        prop_voc = voc / sum(voc) #modification of proportion 
      )

    speech.ph <- its$data %>%
      dplyr::mutate(duration = segment_offset - segment_onset,
                    hour = lubridate::hour(POSIXct_time_onset),) %>%
      dplyr::arrange(child_id,age_in_day) %>%
      dplyr::group_by(child_id,age_in_day,speaker_type,experiment,hour) %>%
      dplyr::summarise(
        voc = dplyr::n(),
        voc_ph = dplyr::n()/(max(range_offset)/3600000),
        avg_voc_dur = mean(duration),
        voc_dur = sum(duration),
        voc_dur_ph = sum(duration)/(max(range_offset)/3600000),

      ) %>%
      dplyr::group_by(child_id,age_in_day,experiment) %>%
      dplyr::mutate(
        prop_voc = voc / sum(voc),
      ) %>%
      dplyr::ungroup()%>% filter(speaker_type %in% c("CHI","FEM","MAL","OCH")) %>% 
      mutate(child_id = as.character(child_id),)%>%
      group_by(child_id,age_in_day,hour)%>%
      mutate(
        prop_voc = voc / sum(voc) #modification of proportion 
      )

    ### cannonical summary 
    cannonical <- its$data %>% 
      mutate(child_id = as.character(child_id),
             duration = segment_offset -segment_onset, 
             )%>%
      filter(speaker_type %in% c("CHI","FEM","MAL","OCH")) %>%
      group_by(child_id,age_in_day,speaker_type) %>% 
      mutate (
        cry = ifelse(child_cry_vfx_len>0,1,0)
      ) %>%
      summarise(
        can_voc = sum(utterances_count),
        can_voc_ph = sum(utterances_count)/((max(range_offset)/1000)/3600),
        can_voc_dur = sum(utterances_length),
        can_voc_dur_ph = sum(utterances_length)/((max(range_offset)/1000)/3600),

        cry_voc = sum(cry),
        cry_voc_ph = sum(cry)/((max(range_offset)/1000)/3600),
        cry_voc_dur = sum(child_cry_vfx_len),
        cry_voc_dur_ph = sum(child_cry_vfx_len)/((max(range_offset)/1000)/3600),

      )

    cannonical.ph <- its$data %>% 
      mutate(child_id = as.character(child_id),
             duration = segment_offset -segment_onset, 
             hour = lubridate::hour(POSIXct_time_onset),
      )%>%
      filter(speaker_type %in% c("CHI","FEM","MAL","OCH")) %>%
      group_by(child_id,age_in_day,speaker_type,hour) %>% 
      mutate (
        cry = ifelse(child_cry_vfx_len>0,1,0)
      ) %>%
      summarise(
        can_voc = sum(utterances_count),
        can_voc_ph = sum(utterances_count)/((max(range_offset)/1000)/3600),
        can_voc_dur = sum(utterances_length),
        can_voc_dur_ph = sum(utterances_length)/((max(range_offset)/1000)/3600),

        cry_voc = sum(cry),
        cry_voc_ph = sum(cry)/((max(range_offset)/1000)/3600),
        cry_voc_dur = sum(child_cry_vfx_len),
        cry_voc_dur_ph = sum(child_cry_vfx_len)/((max(range_offset)/1000)/3600),

      )

    word <- its$data %>% 
      mutate(child_id = as.character(child_id),
             duration = segment_offset -segment_onset, 
             cond.word = ifelse(words >0,1,0), #bool absence presence 
      )%>%
      filter(speaker_type %in% c("FEM","MAL") &  cond.word == 1) %>%
      group_by(child_id,age_in_day,speaker_type) %>% 
      summarise(
        word_nbr = sum(words),
        word_mean_nbr = mean(words),
        word_length = sum(duration),
        word_mean_length = sum(duration),

      )

    word.ph <- its$data %>% 
      mutate(child_id = as.character(child_id),
             duration = segment_offset -segment_onset, 
             cond.word = ifelse(words >0,1,0), #bool absence presence 
             hour = lubridate::hour(POSIXct_time_onset),
      )%>%
      filter(speaker_type %in% c("FEM","MAL") &  cond.word == 1) %>%
      group_by(child_id,age_in_day,speaker_type,hour) %>% 
      summarise(
        word_nbr = sum(words),
        word_mean_nbr = mean(words),
        word_length = sum(duration),
        word_mean_length = sum(duration),

      )