HomeBankCode / rlena

R package for parsing LENA's .ITS files
GNU General Public License v2.0
6 stars 5 forks source link

alter conversational turn count parameter #8

Open ebergelson opened 5 years ago

ebergelson commented 5 years ago

right now, get_conversations uses LENAs built in guidelines for what counts as a conversational turn (which can let 5s lapse between the relevant speaker categories segments, e.g. FAN at time 0s and CHN at time 3s would count as a turn). It would be great to be able to adjust this parameter to count conversations with lower lags. This is a little tricky and requires working over gather_segments and incorporated details in the LENA Adex Reference Guide.

(the first pass attempt below needs refinement so it's FAN/MAN -> CHN or CXN -> FAN/MAN+CHN, and so cryvgfx is correctly accounted for)

Any interest in taking this on @tjmahr ? Here's a minimal example (the dput was generated from get_segments over an its file, filtered to a specific relevant blkId)

you can see in the longer relevant_lag values in the output [there are probably errors in that code, i'm just trying to show the flavor in the issue]

demo_block_segs <- dput(demo_block_segs)
structure(list(itsId = c("20160511_125444_009456", "20160511_125444_009456", 
"20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456", 
"20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456", 
"20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456", 
"20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456", 
"20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456", 
"20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456"
), recId = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1), blkId = c(64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 
64, 64, 64, 64, 64, 64, 64, 64, 64, 64), blkTypeId = c(32, 32, 
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 
32, 32), segId = c(989, 990, 991, 992, 993, 994, 995, 996, 997, 
998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008
), blkType = c("Conversation", "Conversation", "Conversation", 
"Conversation", "Conversation", "Conversation", "Conversation", 
"Conversation", "Conversation", "Conversation", "Conversation", 
"Conversation", "Conversation", "Conversation", "Conversation", 
"Conversation", "Conversation", "Conversation", "Conversation", 
"Conversation"), spkr = c("CHN", "SIL", "FAN", "SIL", "SIL", 
"SIL", "FAN", "SIL", "SIL", "CHN", "SIL", "CHN", "SIL", "SIL", 
"SIL", "SIL", "CHN", "SIL", "FAN", "FAN"), startTime = c(1842.47, 
1843.5, 1845.32, 1846.92, 1847.72, 1850.76, 1851.65, 1853.67, 
1854.89, 1856.13, 1856.73, 1858.1, 1858.7, 1859.5, 1860.81, 1862.54, 
1863.54, 1864.19, 1865.47, 1866.71), endTime = c(1843.5, 1845.32, 
1846.92, 1847.72, 1850.76, 1851.65, 1853.67, 1854.89, 1856.13, 
1856.73, 1858.1, 1858.7, 1859.5, 1860.81, 1862.54, 1863.54, 1864.19, 
1865.47, 1866.71, 1867.37), startClockTime = structure(c(1462881502.47, 
1462881503.5, 1462881505.32, 1462881506.92, 1462881507.72, 1462881510.76, 
1462881511.65, 1462881513.67, 1462881514.89, 1462881516.13, 1462881516.73, 
1462881518.1, 1462881518.7, 1462881519.5, 1462881520.81, 1462881522.54, 
1462881523.54, 1462881524.19, 1462881525.47, 1462881526.71), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), endClockTime = structure(c(1462881503.5, 
1462881505.32, 1462881506.92, 1462881507.72, 1462881510.76, 1462881511.65, 
1462881513.67, 1462881514.89, 1462881516.13, 1462881516.73, 1462881518.1, 
1462881518.7, 1462881519.5, 1462881520.81, 1462881522.54, 1462881523.54, 
1462881524.19, 1462881525.47, 1462881526.71, 1462881527.37), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), startClockTimeLocal = structure(c(1462863502.47, 
1462863503.5, 1462863505.32, 1462863506.92, 1462863507.72, 1462863510.76, 
1462863511.65, 1462863513.67, 1462863514.89, 1462863516.13, 1462863516.73, 
1462863518.1, 1462863518.7, 1462863519.5, 1462863520.81, 1462863522.54, 
1462863523.54, 1462863524.19, 1462863525.47, 1462863526.71), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), endClockTimeLocal = structure(c(1462863503.5, 
1462863505.32, 1462863506.92, 1462863507.72, 1462863510.76, 1462863511.65, 
1462863513.67, 1462863514.89, 1462863516.13, 1462863516.73, 1462863518.1, 
1462863518.7, 1462863519.5, 1462863520.81, 1462863522.54, 1462863523.54, 
1462863524.19, 1462863525.47, 1462863526.71, 1462863527.37), class = c("POSIXct", 
"POSIXt"), tzone = "UTC"), average_dB = c(-25.96, -64.98, -40.94, 
-50.58, -75.97, -65.56, -31.76, -62.86, -70.25, -22.05, -53.64, 
-32.33, -47.33, -72.13, -61.01, -60.97, -31.2, -74.41, -37.73, 
-36.97), peak_dB = c(-20.45, -47.92, -30.99, -39.03, -61.14, 
-57.3, -13.71, -51.49, -56.03, -16.31, -39.93, -25.91, -38.41, 
-64.25, -48.82, -46.28, -25.73, -64.3, -31.68, -29.45), recordingInfo = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
    conversationInfo = c("BC|32|2|0|CIC|TIFI|FI", NA, "RC|32|3|1|CIC|TIFR|FI", 
    NA, NA, NA, "RC|32|3|1|CIC|NT|FH", NA, NA, "RC|32|3|1|CIC|TIFE|FI", 
    NA, "RC|32|3|1|CIC|NT|FH", NA, NA, NA, NA, "RC|32|3|1|CIC|TIFI|FH", 
    NA, "RC|32|4|2|CIC|TIFR|FI", "EC|32|4|2|CIC|NT|FH"), convStatus = c("BC", 
    NA, "RC", NA, NA, NA, "RC", NA, NA, "RC", NA, "RC", NA, NA, 
    NA, NA, "RC", NA, "RC", "EC"), convCount = c(32L, NA, 32L, 
    NA, NA, NA, 32L, NA, NA, 32L, NA, 32L, NA, NA, NA, NA, 32L, 
    NA, 32L, 32L), convTurnCount = c(2L, NA, 3L, NA, NA, NA, 
    3L, NA, NA, 3L, NA, 3L, NA, NA, NA, NA, 3L, NA, 4L, 4L), 
    convResponseCount = c(0L, NA, 1L, NA, NA, NA, 1L, NA, NA, 
    1L, NA, 1L, NA, NA, NA, NA, 1L, NA, 2L, 2L), convType = c("CIC", 
    NA, "CIC", NA, NA, NA, "CIC", NA, NA, "CIC", NA, "CIC", NA, 
    NA, NA, NA, "CIC", NA, "CIC", "CIC"), convTurnType = c("TIFI", 
    NA, "TIFR", NA, NA, NA, "NT", NA, NA, "TIFE", NA, "NT", NA, 
    NA, NA, NA, "TIFI", NA, "TIFR", "NT"), convFloorType = c("FI", 
    NA, "FI", NA, NA, NA, "FH", NA, NA, "FI", NA, "FH", NA, NA, 
    NA, NA, "FH", NA, "FI", "FH"), femaleAdultWordCnt = c(NA, 
    NA, 7.57, NA, NA, NA, 6.61, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, 5.61, 3.25), femaleAdultNonSpeechLen = c(NA, 
    NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, 0, 0), femaleAdultUttCnt = c(NA, NA, 0, NA, NA, NA, 
    0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0), femaleAdultUttLen = c(NA, 
    NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, 0, 0), childUttCnt = c(1, NA, NA, NA, NA, NA, NA, 
    NA, NA, 1, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA), childUttLen = c(0.78, 
    NA, NA, NA, NA, NA, NA, NA, NA, 0.47, NA, 0.6, NA, NA, NA, 
    NA, 0.51, NA, NA, NA), startCry1 = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_), endCry1 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), childCryVfxLen = c(0, 
    NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, NA, NA, 
    0, NA, NA, NA), startUtt1 = c("PT1842.47S", NA, NA, NA, NA, 
    NA, NA, NA, NA, "PT1856.13S", NA, "PT1858.38S", NA, NA, NA, 
    NA, "PT1863.54S", NA, NA, NA), endUtt1 = c("PT1843.25S", 
    NA, NA, NA, NA, NA, NA, NA, NA, "PT1856.60S", NA, "PT1858.70S", 
    NA, NA, NA, NA, "PT1864.05S", NA, NA, NA), startVfx1 = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), endVfx1 = c(NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_), maleAdultWordCnt = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), maleAdultNonSpeechLen = c(NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
    ), maleAdultUttCnt = c(NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_), maleAdultUttLen = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), startUtt2 = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), endUtt2 = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_), startUtt3 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), endUtt3 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), startCry2 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), endCry2 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), startCry3 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), endCry3 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

demo_block_segs%>% 
  gather(key = "start_end", value = "time", startTime, endTime) %>% 
  arrange(segId) %>% 
  filter(spkr %in% c("CHN","CXN","FAN","MAN") & blkType=="Conversation") %>% 
  dplyr::select(spkr, start_end, time, segId, convTurnCount, blkId) %>% 
  mutate(relevant_lag = ifelse((spkr=="FAN" & (lead(spkr,1) %in% c("CHN"))) |
                                 (spkr=="CHN" & lead(spkr,1) %in% c("FAN")) ,
                               lead(time,1)-time,NA)) %>%
   filter(!is.na(relevant_lag))

Output:

# A tibble: 3 x 7
  spkr  start_end  time segId convTurnCount blkId relevant_lag
  <chr> <chr>     <dbl> <dbl>         <int> <dbl>        <dbl>
1 CHN   endTime   1844.   989             2    64         1.82
2 FAN   endTime   1854.   995             3    64         2.46
3 CHN   endTime   1864.  1005             3    64         1.28
tjmahr commented 5 years ago

Just some notes from the manuals.

Statistical modeling is further used to detect Conversational Turns (CT), or back and forth alternation between the key child and an adult. For this purpose a conversation was defined as a contiguous region containing live human speech separated from the next conversation by a pause region of at least five seconds duration which contains only non-live-human speech audio signals. CTs cannot cross conversation boundaries. -- [The LENA TM Language Environment Analysis System: Audio Specifications of the DLP-0121]

The duration of a pause is by definition greater than or equal to five seconds when it occurs between two consecutive conversations. however, a pause inside a conversation may be less than five seconds in duration.
-- [The LENA™ language environment analysis system: The Interpreted Time Segments (ITS) File]

Note that there are two types of Vocalization Activity Block that include the key child, an adult, and another child (XIC & XIOCAC). The block types differ by whether or not Conversational Turns were produced in the block. When key child vocalizations and adult speech are contiguous, Conversational Turns may be produced, but when vocalizations from the other child intercede between key child and adult segments, Conversational Turns are not produced. -- [The LENA Advanced Data Extractor (ADEX) User Guide]

tjmahr commented 5 years ago

I won't be able to take this on, completely. I haven't worked on LENA data in a couple years at this point, nor have I ever work on the conversation turn / contingency level of the data.

That said, I did clean up your proof-of-concept code, and I have added a prototype of the code to the package. The two functions are gather_speaker_transitions(its_xml) and gather_speaker_transitions_from_segments(data_segments).

Their code is in extract.R. It's basically your proof of concept code, but I broke up the logic to handle xml or segments in a dataframe, cleaned up the prefixed names so the code could work inside of a package, and changed the filtering. Now, the main idea to enumerate all the speaker transitions in the segments dataframe (excluding transitions from/to SIL), take lags, and filter down to the legal transitions.

The functions are marked as internal so that you can try them using the package but they are separate from the other more complete functions.

The next steps would be to battle-test this code on real LENA data to make sure it works correctly. If there is other python/perl code that does the same thing, we could validate against that. I don't know what else there is. Once, it works to your satisfaction one would have to finish writing documentation for it in the package and in the README. This would be a great opportunity for a student going deep on this kind of data. :wink:

# for now treat as internal/experimental
#' Gather speaker transitions
#'
#' Combs throughs speech segments and returns a dataframe of speaker
#' transition.
#'
#' @param legal_transitions a character vector with transitions to keep. If
#'   `NULL` (the default), only `c("MAN_CHN", "MAN_CHN", "FAN_CHN", "CHN_FAN")`
#'   are used.
#' @inheritParams extract
#' @export
#' @keywords internal
#' @return a dataframe with one row per segment. It contains the columns
#'   `transSpkr` (speaker transition as `previous_current`,) `transSegId`
#'   (segment IDs), and `transTime` (the time lag between the two segments).
gather_speaker_transitions <- function(its_xml, legal_transitions = NULL) {
  its_xml %>%
    gather_segments() %>%
    gather_speaker_transitions_from_segments(legal_transitions)
}

# for now treat as internal/experimental
#' @param data_segments a dataframe produced by `gather_segments()`
#' @export
#' @keywords internal
#' @rdname gather_speaker_transitions
gather_speaker_transitions_from_segments <- function(data_segments,
                                                     legal_transitions = NULL) {
  if (is.null(legal_transitions)) {
    legal_transitions <- c(
      "MAN_CHN", "MAN_CHN",
      "FAN_CHN", "CHN_FAN"
    )
  }

  segments <- data_segments %>%
    tidyr::gather(
      key = "startEnd",
      value = "time",
      .data$startTime,
      .data$endTime
    ) %>%
    dplyr::arrange(.data$segId)

  no_pauses <- segments %>%
    dplyr::filter(.data$blkType != "Pause")

  # Label different kinds of transitions
  transitions <- no_pauses %>%
    dplyr::filter(.data$spkr != "SIL") %>%
    dplyr::mutate(
      transStartEnd =
        paste0(dplyr::lag(.data$startEnd, 1), "_", .data$startEnd),
      transSpkr =
        paste0(dplyr::lag(.data$spkr, 1), "_", .data$spkr),
      transTime =
        .data$time - dplyr::lag(.data$time),
      transSegId =
        paste0(dplyr::lag(.data$segId, 1), "_", .data$segId)
    )

  to_select <- c(
    "spkr", "startEnd", "time", "segId", "convTurnCount", "blkId",
    "transSpkr", "transSegId", "transTime"
  )

  transitions %>%
    # exclude within-turn transitions
    dplyr::filter(.data$transStartEnd != "startTime_endTime") %>%
    dplyr::filter(transSpkr %in% c(legal_transitions)) %>%
    dplyr::select(dplyr::one_of(to_select)) %>%
    tidyr::spread(.data$startEnd, .data$time)
}

Your demo then yields the same results.

library(rlena)

demo_block_segs <- structure(list(
  itsId = c(
    "20160511_125444_009456", "20160511_125444_009456",
    "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
    "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
    "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
    "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
    "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
    "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456"
  ), recId = c(
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1
  ), blkId = c(
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64
  ), blkTypeId = c(
    32, 32,
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
    32, 32
  ), segId = c(
    989, 990, 991, 992, 993, 994, 995, 996, 997,
    998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008
  ), blkType = c(
    "Conversation", "Conversation", "Conversation",
    "Conversation", "Conversation", "Conversation", "Conversation",
    "Conversation", "Conversation", "Conversation", "Conversation",
    "Conversation", "Conversation", "Conversation", "Conversation",
    "Conversation", "Conversation", "Conversation", "Conversation",
    "Conversation"
  ), spkr = c(
    "CHN", "SIL", "FAN", "SIL", "SIL",
    "SIL", "FAN", "SIL", "SIL", "CHN", "SIL", "CHN", "SIL", "SIL",
    "SIL", "SIL", "CHN", "SIL", "FAN", "FAN"
  ), startTime = c(
    1842.47,
    1843.5, 1845.32, 1846.92, 1847.72, 1850.76, 1851.65, 1853.67,
    1854.89, 1856.13, 1856.73, 1858.1, 1858.7, 1859.5, 1860.81, 1862.54,
    1863.54, 1864.19, 1865.47, 1866.71
  ), endTime = c(
    1843.5, 1845.32,
    1846.92, 1847.72, 1850.76, 1851.65, 1853.67, 1854.89, 1856.13,
    1856.73, 1858.1, 1858.7, 1859.5, 1860.81, 1862.54, 1863.54, 1864.19,
    1865.47, 1866.71, 1867.37
  ), startClockTime = structure(c(
    1462881502.47,
    1462881503.5, 1462881505.32, 1462881506.92, 1462881507.72, 1462881510.76,
    1462881511.65, 1462881513.67, 1462881514.89, 1462881516.13, 1462881516.73,
    1462881518.1, 1462881518.7, 1462881519.5, 1462881520.81, 1462881522.54,
    1462881523.54, 1462881524.19, 1462881525.47, 1462881526.71
  ), class = c(
    "POSIXct",
    "POSIXt"
  ), tzone = "UTC"), endClockTime = structure(c(
    1462881503.5,
    1462881505.32, 1462881506.92, 1462881507.72, 1462881510.76, 1462881511.65,
    1462881513.67, 1462881514.89, 1462881516.13, 1462881516.73, 1462881518.1,
    1462881518.7, 1462881519.5, 1462881520.81, 1462881522.54, 1462881523.54,
    1462881524.19, 1462881525.47, 1462881526.71, 1462881527.37
  ), class = c(
    "POSIXct",
    "POSIXt"
  ), tzone = "UTC"), startClockTimeLocal = structure(c(
    1462863502.47,
    1462863503.5, 1462863505.32, 1462863506.92, 1462863507.72, 1462863510.76,
    1462863511.65, 1462863513.67, 1462863514.89, 1462863516.13, 1462863516.73,
    1462863518.1, 1462863518.7, 1462863519.5, 1462863520.81, 1462863522.54,
    1462863523.54, 1462863524.19, 1462863525.47, 1462863526.71
  ), class = c(
    "POSIXct",
    "POSIXt"
  ), tzone = "UTC"), endClockTimeLocal = structure(c(
    1462863503.5,
    1462863505.32, 1462863506.92, 1462863507.72, 1462863510.76, 1462863511.65,
    1462863513.67, 1462863514.89, 1462863516.13, 1462863516.73, 1462863518.1,
    1462863518.7, 1462863519.5, 1462863520.81, 1462863522.54, 1462863523.54,
    1462863524.19, 1462863525.47, 1462863526.71, 1462863527.37
  ), class = c(
    "POSIXct",
    "POSIXt"
  ), tzone = "UTC"), average_dB = c(
    -25.96, -64.98, -40.94,
    -50.58, -75.97, -65.56, -31.76, -62.86, -70.25, -22.05, -53.64,
    -32.33, -47.33, -72.13, -61.01, -60.97, -31.2, -74.41, -37.73,
    -36.97
  ), peak_dB = c(
    -20.45, -47.92, -30.99, -39.03, -61.14,
    -57.3, -13.71, -51.49, -56.03, -16.31, -39.93, -25.91, -38.41,
    -64.25, -48.82, -46.28, -25.73, -64.3, -31.68, -29.45
  ), recordingInfo = c(
    NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_
  ),
  conversationInfo = c(
    "BC|32|2|0|CIC|TIFI|FI", NA, "RC|32|3|1|CIC|TIFR|FI",
    NA, NA, NA, "RC|32|3|1|CIC|NT|FH", NA, NA, "RC|32|3|1|CIC|TIFE|FI",
    NA, "RC|32|3|1|CIC|NT|FH", NA, NA, NA, NA, "RC|32|3|1|CIC|TIFI|FH",
    NA, "RC|32|4|2|CIC|TIFR|FI", "EC|32|4|2|CIC|NT|FH"
  ), convStatus = c(
    "BC",
    NA, "RC", NA, NA, NA, "RC", NA, NA, "RC", NA, "RC", NA, NA,
    NA, NA, "RC", NA, "RC", "EC"
  ), convCount = c(
    32L, NA, 32L,
    NA, NA, NA, 32L, NA, NA, 32L, NA, 32L, NA, NA, NA, NA, 32L,
    NA, 32L, 32L
  ), convTurnCount = c(
    2L, NA, 3L, NA, NA, NA,
    3L, NA, NA, 3L, NA, 3L, NA, NA, NA, NA, 3L, NA, 4L, 4L
  ),
  convResponseCount = c(
    0L, NA, 1L, NA, NA, NA, 1L, NA, NA,
    1L, NA, 1L, NA, NA, NA, NA, 1L, NA, 2L, 2L
  ), convType = c(
    "CIC",
    NA, "CIC", NA, NA, NA, "CIC", NA, NA, "CIC", NA, "CIC", NA,
    NA, NA, NA, "CIC", NA, "CIC", "CIC"
  ), convTurnType = c(
    "TIFI",
    NA, "TIFR", NA, NA, NA, "NT", NA, NA, "TIFE", NA, "NT", NA,
    NA, NA, NA, "TIFI", NA, "TIFR", "NT"
  ), convFloorType = c(
    "FI",
    NA, "FI", NA, NA, NA, "FH", NA, NA, "FI", NA, "FH", NA, NA,
    NA, NA, "FH", NA, "FI", "FH"
  ), femaleAdultWordCnt = c(
    NA,
    NA, 7.57, NA, NA, NA, 6.61, NA, NA, NA, NA, NA, NA, NA, NA,
    NA, NA, NA, 5.61, 3.25
  ), femaleAdultNonSpeechLen = c(
    NA,
    NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA,
    NA, NA, 0, 0
  ), femaleAdultUttCnt = c(
    NA, NA, 0, NA, NA, NA,
    0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0
  ), femaleAdultUttLen = c(
    NA,
    NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA,
    NA, NA, 0, 0
  ), childUttCnt = c(
    1, NA, NA, NA, NA, NA, NA,
    NA, NA, 1, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA
  ), childUttLen = c(
    0.78,
    NA, NA, NA, NA, NA, NA, NA, NA, 0.47, NA, 0.6, NA, NA, NA,
    NA, 0.51, NA, NA, NA
  ), startCry1 = c(
    NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_
  ), endCry1 = c(
    NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_
  ), childCryVfxLen = c(
    0,
    NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, NA, NA,
    0, NA, NA, NA
  ), startUtt1 = c(
    "PT1842.47S", NA, NA, NA, NA,
    NA, NA, NA, NA, "PT1856.13S", NA, "PT1858.38S", NA, NA, NA,
    NA, "PT1863.54S", NA, NA, NA
  ), endUtt1 = c(
    "PT1843.25S",
    NA, NA, NA, NA, NA, NA, NA, NA, "PT1856.60S", NA, "PT1858.70S",
    NA, NA, NA, NA, "PT1864.05S", NA, NA, NA
  ), startVfx1 = c(
    NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_
  ), endVfx1 = c(
    NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_
  ), maleAdultWordCnt = c(
    NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_
  ), maleAdultNonSpeechLen = c(
    NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
  ), maleAdultUttCnt = c(
    NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_
  ), maleAdultUttLen = c(
    NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
    NA_real_
  ), startUtt2 = c(
    NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_
  ), endUtt2 = c(
    NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_
  ), startUtt3 = c(
    NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_
  ), endUtt3 = c(
    NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_
  ), startCry2 = c(
    NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_
  ), endCry2 = c(
    NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_
  ), startCry3 = c(
    NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_
  ), endCry3 = c(
    NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_, NA_character_,
    NA_character_, NA_character_, NA_character_
  )
), row.names = c(
  NA,
  -20L
), class = c("tbl_df", "tbl", "data.frame"))

demo_block_segs %>% 
  gather_speaker_transitions_from_segments()
#> # A tibble: 3 x 8
#>   spkr  segId convTurnCount blkId transSpkr transSegId transTime startTime
#>   <chr> <dbl>         <int> <dbl> <chr>     <chr>          <dbl>     <dbl>
#> 1 CHN     998             3    64 FAN_CHN   995_998         2.46     1856.
#> 2 FAN     991             3    64 CHN_FAN   989_991         1.82     1845.
#> 3 FAN    1007             4    64 CHN_FAN   1005_1007       1.28     1865.

Created on 2019-08-07 by the reprex package (v0.3.0)

ebergelson commented 5 years ago

awesome, thnx tristan, will give this a go for 'battletesting' as you say.


*Elika Bergelson, PhD*
Crandall Family Assistant Professor, Duke University
Psychology & Neuroscience Dept.
Center for Cognitive Neuroscience
Linguistics (secondary)

On Wed, Aug 7, 2019 at 10:40 AM TJ Mahr <notifications@github.com> wrote:

> I won't be able to take this on, completely. I haven't work on LENA data
> in a couple years at this point, nor have I ever work on the conversation
> turn / contingency level of the data.
>
> That said, I did clean up your proof-of-concept code, and I have added a
> prototype of the code to the package. The two functions are
> gather_speaker_transitions(its_xml) and
> gather_speaker_transitions_from_segments(data_segments).
>
> Their code is in extract.R. It's basically your proof of concept code, but
> I broke up the logic to handle xml or segments in a dataframe, cleaned up
> the prefixed names so the code to work inside of a package, and changed the
> filtering. Now, the main idea to enumerate all the speaker transitions in
> the segments dataframe (excluding transitions from/to SIL), take lags, and
> filter down to the legal transitions.
>
> The functions are marked as internal so that you can try them using the
> package but they are separate from the other more complete functions.
>
> The next steps would be to battle-test this code on real LENA data to make
> sure it works correctly. If there is other python/perl code that does the
> same thing, we could validate against that. I don't what else there is.
> Once, it works to your satisfaction one would have to finish writing
> documentation for it in the package and in the README. This would be a
> great opportunity for a student going deep on this kind of data. 😉
>
> # for now treat as internal/experimental
> #' Gather speaker transitions
> #'
> #' Combs throughs speech segments and returns a dataframe of speaker
> #' transition.
> #'
> #' @param legal_transitions a character vector with transitions to keep. If
> #'   `NULL` (the default), only `c("MAN_CHN", "MAN_CHN", "FAN_CHN", "CHN_FAN")`
> #'   are used.
> #' @inheritParams extract
> #' @export
> #' @keywords internal
> #' @return a dataframe with one row per segment. It contains the columns
> #'   `transSpkr` (speaker transition as `previous_current`,) `transSegId`
> #'   (segment IDs), and `transTime` (the time lag between the two segments).
> gather_speaker_transitions <- function(its_xml, legal_transitions = NULL) {
>
>   its_xml %>%
>
>     gather_segments() %>%
>
>     gather_speaker_transitions_from_segments(legal_transitions)
>
> }
>
>
> # for now treat as internal/experimental
> #' @param data_segments a dataframe produced by `gather_segments()`
> #' @export
> #' @keywords internal
> #' @rdname gather_speaker_transitions
> gather_speaker_transitions_from_segments <- function(data_segments,
>
>                                                      legal_transitions = NULL) {
>
>   if (is.null(legal_transitions)) {
>
>     legal_transitions <- c(
>
>       "MAN_CHN", "MAN_CHN",
>
>       "FAN_CHN", "CHN_FAN"
>
>     )
>
>   }
>
>
>
>   segments <- data_segments %>%
>
>     tidyr::gather(
>
>       key = "startEnd",
>
>       value = "time",
>
>       .data$startTime,
>
>       .data$endTime
>
>     ) %>%
>
>     dplyr::arrange(.data$segId)
>
>
>
>   no_pauses <- segments %>%
>
>     dplyr::filter(.data$blkType != "Pause")
>
>
>
>   # Label different kinds of transitions
>
>   transitions <- no_pauses %>%
>
>     dplyr::filter(.data$spkr != "SIL") %>%
>
>     dplyr::mutate(
>
>       transStartEnd =
>
>         paste0(dplyr::lag(.data$startEnd, 1), "_", .data$startEnd),
>
>       transSpkr =
>
>         paste0(dplyr::lag(.data$spkr, 1), "_", .data$spkr),
>
>       transTime =
>
>         .data$time - dplyr::lag(.data$time),
>
>       transSegId =
>
>         paste0(dplyr::lag(.data$segId, 1), "_", .data$segId)
>
>     )
>
>
>
>   to_select <- c(
>
>     "spkr", "startEnd", "time", "segId", "convTurnCount", "blkId",
>
>     "transSpkr", "transSegId", "transTime"
>
>   )
>
>
>
>   transitions %>%
>
>     # exclude within-turn transitions
>
>     dplyr::filter(.data$transStartEnd != "startTime_endTime") %>%
>
>     dplyr::filter(transSpkr %in% c(legal_transitions)) %>%
>
>     dplyr::select(dplyr::one_of(to_select)) %>%
>
>     tidyr::spread(.data$startEnd, .data$time)
>
> }
>
> Your demo then yields the same results.
>
> library(rlena)
>
>
> demo_block_segs <- structure(list(
>
>   itsId = c(
>
>     "20160511_125444_009456", "20160511_125444_009456",
>
>     "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
>
>     "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
>
>     "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
>
>     "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
>
>     "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456",
>
>     "20160511_125444_009456", "20160511_125444_009456", "20160511_125444_009456"
>
>   ), recId = c(
>
>     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
>
>     1, 1, 1, 1
>
>   ), blkId = c(
>
>     64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
>
>     64, 64, 64, 64, 64, 64, 64, 64, 64, 64
>
>   ), blkTypeId = c(
>
>     32, 32,
>
>     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
>
>     32, 32
>
>   ), segId = c(
>
>     989, 990, 991, 992, 993, 994, 995, 996, 997,
>
>     998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008
>
>   ), blkType = c(
>
>     "Conversation", "Conversation", "Conversation",
>
>     "Conversation", "Conversation", "Conversation", "Conversation",
>
>     "Conversation", "Conversation", "Conversation", "Conversation",
>
>     "Conversation", "Conversation", "Conversation", "Conversation",
>
>     "Conversation", "Conversation", "Conversation", "Conversation",
>
>     "Conversation"
>
>   ), spkr = c(
>
>     "CHN", "SIL", "FAN", "SIL", "SIL",
>
>     "SIL", "FAN", "SIL", "SIL", "CHN", "SIL", "CHN", "SIL", "SIL",
>
>     "SIL", "SIL", "CHN", "SIL", "FAN", "FAN"
>
>   ), startTime = c(
>
>     1842.47,
>
>     1843.5, 1845.32, 1846.92, 1847.72, 1850.76, 1851.65, 1853.67,
>
>     1854.89, 1856.13, 1856.73, 1858.1, 1858.7, 1859.5, 1860.81, 1862.54,
>
>     1863.54, 1864.19, 1865.47, 1866.71
>
>   ), endTime = c(
>
>     1843.5, 1845.32,
>
>     1846.92, 1847.72, 1850.76, 1851.65, 1853.67, 1854.89, 1856.13,
>
>     1856.73, 1858.1, 1858.7, 1859.5, 1860.81, 1862.54, 1863.54, 1864.19,
>
>     1865.47, 1866.71, 1867.37
>
>   ), startClockTime = structure(c(
>
>     1462881502.47,
>
>     1462881503.5, 1462881505.32, 1462881506.92, 1462881507.72, 1462881510.76,
>
>     1462881511.65, 1462881513.67, 1462881514.89, 1462881516.13, 1462881516.73,
>
>     1462881518.1, 1462881518.7, 1462881519.5, 1462881520.81, 1462881522.54,
>
>     1462881523.54, 1462881524.19, 1462881525.47, 1462881526.71
>
>   ), class = c(
>
>     "POSIXct",
>
>     "POSIXt"
>
>   ), tzone = "UTC"), endClockTime = structure(c(
>
>     1462881503.5,
>
>     1462881505.32, 1462881506.92, 1462881507.72, 1462881510.76, 1462881511.65,
>
>     1462881513.67, 1462881514.89, 1462881516.13, 1462881516.73, 1462881518.1,
>
>     1462881518.7, 1462881519.5, 1462881520.81, 1462881522.54, 1462881523.54,
>
>     1462881524.19, 1462881525.47, 1462881526.71, 1462881527.37
>
>   ), class = c(
>
>     "POSIXct",
>
>     "POSIXt"
>
>   ), tzone = "UTC"), startClockTimeLocal = structure(c(
>
>     1462863502.47,
>
>     1462863503.5, 1462863505.32, 1462863506.92, 1462863507.72, 1462863510.76,
>
>     1462863511.65, 1462863513.67, 1462863514.89, 1462863516.13, 1462863516.73,
>
>     1462863518.1, 1462863518.7, 1462863519.5, 1462863520.81, 1462863522.54,
>
>     1462863523.54, 1462863524.19, 1462863525.47, 1462863526.71
>
>   ), class = c(
>
>     "POSIXct",
>
>     "POSIXt"
>
>   ), tzone = "UTC"), endClockTimeLocal = structure(c(
>
>     1462863503.5,
>
>     1462863505.32, 1462863506.92, 1462863507.72, 1462863510.76, 1462863511.65,
>
>     1462863513.67, 1462863514.89, 1462863516.13, 1462863516.73, 1462863518.1,
>
>     1462863518.7, 1462863519.5, 1462863520.81, 1462863522.54, 1462863523.54,
>
>     1462863524.19, 1462863525.47, 1462863526.71, 1462863527.37
>
>   ), class = c(
>
>     "POSIXct",
>
>     "POSIXt"
>
>   ), tzone = "UTC"), average_dB = c(
>
>     -25.96, -64.98, -40.94,
>
>     -50.58, -75.97, -65.56, -31.76, -62.86, -70.25, -22.05, -53.64,
>
>     -32.33, -47.33, -72.13, -61.01, -60.97, -31.2, -74.41, -37.73,
>
>     -36.97
>
>   ), peak_dB = c(
>
>     -20.45, -47.92, -30.99, -39.03, -61.14,
>
>     -57.3, -13.71, -51.49, -56.03, -16.31, -39.93, -25.91, -38.41,
>
>     -64.25, -48.82, -46.28, -25.73, -64.3, -31.68, -29.45
>
>   ), recordingInfo = c(
>
>     NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_
>
>   ),
>
>   conversationInfo = c(
>
>     "BC|32|2|0|CIC|TIFI|FI", NA, "RC|32|3|1|CIC|TIFR|FI",
>
>     NA, NA, NA, "RC|32|3|1|CIC|NT|FH", NA, NA, "RC|32|3|1|CIC|TIFE|FI",
>
>     NA, "RC|32|3|1|CIC|NT|FH", NA, NA, NA, NA, "RC|32|3|1|CIC|TIFI|FH",
>
>     NA, "RC|32|4|2|CIC|TIFR|FI", "EC|32|4|2|CIC|NT|FH"
>
>   ), convStatus = c(
>
>     "BC",
>
>     NA, "RC", NA, NA, NA, "RC", NA, NA, "RC", NA, "RC", NA, NA,
>
>     NA, NA, "RC", NA, "RC", "EC"
>
>   ), convCount = c(
>
>     32L, NA, 32L,
>
>     NA, NA, NA, 32L, NA, NA, 32L, NA, 32L, NA, NA, NA, NA, 32L,
>
>     NA, 32L, 32L
>
>   ), convTurnCount = c(
>
>     2L, NA, 3L, NA, NA, NA,
>
>     3L, NA, NA, 3L, NA, 3L, NA, NA, NA, NA, 3L, NA, 4L, 4L
>
>   ),
>
>   convResponseCount = c(
>
>     0L, NA, 1L, NA, NA, NA, 1L, NA, NA,
>
>     1L, NA, 1L, NA, NA, NA, NA, 1L, NA, 2L, 2L
>
>   ), convType = c(
>
>     "CIC",
>
>     NA, "CIC", NA, NA, NA, "CIC", NA, NA, "CIC", NA, "CIC", NA,
>
>     NA, NA, NA, "CIC", NA, "CIC", "CIC"
>
>   ), convTurnType = c(
>
>     "TIFI",
>
>     NA, "TIFR", NA, NA, NA, "NT", NA, NA, "TIFE", NA, "NT", NA,
>
>     NA, NA, NA, "TIFI", NA, "TIFR", "NT"
>
>   ), convFloorType = c(
>
>     "FI",
>
>     NA, "FI", NA, NA, NA, "FH", NA, NA, "FI", NA, "FH", NA, NA,
>
>     NA, NA, "FH", NA, "FI", "FH"
>
>   ), femaleAdultWordCnt = c(
>
>     NA,
>
>     NA, 7.57, NA, NA, NA, 6.61, NA, NA, NA, NA, NA, NA, NA, NA,
>
>     NA, NA, NA, 5.61, 3.25
>
>   ), femaleAdultNonSpeechLen = c(
>
>     NA,
>
>     NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA,
>
>     NA, NA, 0, 0
>
>   ), femaleAdultUttCnt = c(
>
>     NA, NA, 0, NA, NA, NA,
>
>     0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, 0
>
>   ), femaleAdultUttLen = c(
>
>     NA,
>
>     NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA,
>
>     NA, NA, 0, 0
>
>   ), childUttCnt = c(
>
>     1, NA, NA, NA, NA, NA, NA,
>
>     NA, NA, 1, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA
>
>   ), childUttLen = c(
>
>     0.78,
>
>     NA, NA, NA, NA, NA, NA, NA, NA, 0.47, NA, 0.6, NA, NA, NA,
>
>     NA, 0.51, NA, NA, NA
>
>   ), startCry1 = c(
>
>     NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_
>
>   ), endCry1 = c(
>
>     NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_
>
>   ), childCryVfxLen = c(
>
>     0,
>
>     NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, 0, NA, NA, NA, NA,
>
>     0, NA, NA, NA
>
>   ), startUtt1 = c(
>
>     "PT1842.47S", NA, NA, NA, NA,
>
>     NA, NA, NA, NA, "PT1856.13S", NA, "PT1858.38S", NA, NA, NA,
>
>     NA, "PT1863.54S", NA, NA, NA
>
>   ), endUtt1 = c(
>
>     "PT1843.25S",
>
>     NA, NA, NA, NA, NA, NA, NA, NA, "PT1856.60S", NA, "PT1858.70S",
>
>     NA, NA, NA, NA, "PT1864.05S", NA, NA, NA
>
>   ), startVfx1 = c(
>
>     NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_
>
>   ), endVfx1 = c(
>
>     NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_
>
>   ), maleAdultWordCnt = c(
>
>     NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_
>
>   ), maleAdultNonSpeechLen = c(
>
>     NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
>
>   ), maleAdultUttCnt = c(
>
>     NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_
>
>   ), maleAdultUttLen = c(
>
>     NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
>
>     NA_real_
>
>   ), startUtt2 = c(
>
>     NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_
>
>   ), endUtt2 = c(
>
>     NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_
>
>   ), startUtt3 = c(
>
>     NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_
>
>   ), endUtt3 = c(
>
>     NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_
>
>   ), startCry2 = c(
>
>     NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_
>
>   ), endCry2 = c(
>
>     NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_
>
>   ), startCry3 = c(
>
>     NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_
>
>   ), endCry3 = c(
>
>     NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_, NA_character_,
>
>     NA_character_, NA_character_, NA_character_
>
>   )
>
> ), row.names = c(
>
>   NA,
>
>   -20L
>
> ), class = c("tbl_df", "tbl", "data.frame"))
>
>
>
>
> demo_block_segs %>%
>
>   gather_speaker_transitions_from_segments()
> #> # A tibble: 3 x 8
> #>   spkr  segId convTurnCount blkId transSpkr transSegId transTime startTime
> #>   <chr> <dbl>         <int> <dbl> <chr>     <chr>          <dbl>     <dbl>
> #> 1 CHN     998             3    64 FAN_CHN   995_998         2.46     1856.
> #> 2 FAN     991             3    64 CHN_FAN   989_991         1.82     1845.
> #> 3 FAN    1007             4    64 CHN_FAN   1005_1007       1.28     1865.
>
> Created on 2019-08-07 by the reprex package <https://reprex.tidyverse.org>
> (v0.3.0)
>
> —
> You are receiving this because you authored the thread.
> Reply to this email directly, view it on GitHub
> <https://github.com/HomeBankCode/rlena/issues/8?email_source=notifications&email_token=ACUSONDWQ4EPXOLPGWR5QODQDLNFVA5CNFSM4IJYBS5KYY3PNVWWK3TUL52HS4DFVREXG43VMVBW63LNMVXHJKTDN5WW2ZLOORPWSZGOD3YUCXA#issuecomment-519127388>,
> or mute the thread
> <https://github.com/notifications/unsubscribe-auth/ACUSONBCGQKOGQFUME6BEMTQDLNFVANCNFSM4IJYBS5A>
> .
>