r-hyperspec / hyperSpec

hyperSpec: Tools for Spectroscopy (R package)
https://r-hyperspec.github.io/hyperSpec/
GNU General Public License v3.0
12 stars 3 forks source link

The use of null characters in gsub are not allowed and breaks read.spc #118

Closed olaf-b closed 9 months ago

olaf-b commented 10 months ago

https://github.com/r-hyperspec/hyperSpec/blob/531d1943210d21bfb7c2ba56e4a3f507c70f42b1/R/DEPRECATED-read.spc.R#L555

The use of null-character in gsub breaks read.spc after upgrade of R. My guess is that this is due to the new gsub is not allowing use of \0 or ff in the input.

AlreadyTakenJonas commented 10 months ago

For anyone else finding their way here. Those functions are moved to another package: https://github.com/r-hyperspec/hySpc.read.spc

AlreadyTakenJonas commented 10 months ago

The new package does also not work. Different error, could not be bothered to have a look at it. I fixed the function in this package instead. @olaf-b you're right, only the one line is wrong. I added useBytes=TRUE and that fixed it.

Copy this code and your good to go. You'll still need hyperSpec though, there are two internal package calls in the function.

##    COPIED FROM https://github.com/r-hyperspec/hyperSpec/blob/develop/R/DEPRECATED-read.spc.R
##    Because hyperSpec package's buggy and the replacement/fixed package as well. Copied code and fixed it myself.
##
### read.spc - Import Thermo Galactic's .spc file format into an hyperSpec Object
###
### C. Beleites 2009/11/29
###
#####################################################################################################

## Define constants ---------------------------------------------------------------------------------

.nul <- as.raw(0)

## header sizes
.spc.size <- c(hdr = 512, subhdr = 32, subfiledir = 12, loghdr = 64)

.spc.default.keys.hdr2data <- c("fexper", "fres", "fsource")
.spc.default.keys.log2data <- FALSE

## axis labeling ------------------------------------------------------------------------------------

## x-axis units .....................................................................................
.spc.FXTYPE <- c(
  expression(`/`(x, "a. u.")), # 0
  expression(`/`(tilde(nu), cm^-1)),
  expression(`/`(lambda, (mu * m))),
  expression(`/`(lambda, nm)),
  expression(`/`(t, s)),
  expression(`/`(t, min)),
  expression(`/`(f, Hz)),
  expression(`/`(f, kHz)),
  expression(`/`(f, MHz)),
  expression(`/`(frac(m, z), frac(u, e))),
  expression(`/`(delta, ppm)), # 10
  expression(`/`(t, d)),
  expression(`/`(t, a)),
  expression(`/`(Delta * tilde(nu), cm^-1)),
  expression(`/`(E, eV)),
  NA, # old version file uses label in gcatxt
  "Diode No",
  "Channel",
  expression(`/`(x, degree)),
  expression(`/`(T, degree * F)),
  expression(`/`(T, degree * C)), # 20
  expression(`/`(T, K)),
  "Data Point",
  expression(`/`(t, ms)),
  expression(`/`(t, micro * s)),
  expression(`/`(t, ns)),
  expression(`/`(f, GHz)),
  expression(`/`(lambda, cm)),
  expression(`/`(lambda, m)),
  expression(`/`(lambda, mm)),
  expression(`/`(t, h)) # 30
)

.spc.xlab <- function(x) {
  if (is.character(x)) {
    x
  } else if (x <= length(.spc.FXTYPE) + 1) {
    .spc.FXTYPE[x + 1]
  } else {
    ## x = 255 is for double interferogram and supposed not to have a label.
    ## Thus, returning NA is appropriate
    NA
  }
}

## y-axis units .....................................................................................
.spc.FYTYPE <- c(
  expression(`/`(I[Ref], "a. u.")), # -1
  expression(`/`(I, "a. u.")),
  expression(`/`(I[IGRM], "a. u.")),
  "A",
  expression(frac((1 - R)^2, 2 * R)),
  "Counts",
  expression(`/`(U, V)),
  expression(`/`(y, degree)),
  expression(`/`(I, mA)),
  expression(`/`(l, mm)),
  expression(`/`(U, mV)),
  expression(-log(R)), # 10
  expression(`/`(y, "%")),
  expression(`/`(I, "a. u.")),
  expression(I / I[0]),
  expression(`/`(E, J)),
  NA, # old version file uses label in gcatxt
  expression(`/`(G, dB)),
  NA, # old version file uses label in gcatxt
  NA, # old version file uses label in gcatxt
  expression(`/`(T, degree * F)),
  expression(`/`(T, degree * C)), # 20
  expression(`/`(T, K)),
  "n",
  "K", # extinction coeaffictient
  expression(Re(y)),
  expression(Im(y)),
  "y (complex)", # complex
  "T",
  "R",
  expression(`/`(I, "a. u.")),
  expression(`/`(I[Emission], "a. u."))
)
.spc.ylab <- function(x) {
  if (is.character(x)) {
    x
  } else if (x <= 26) {
    .spc.FYTYPE[x + 2]
  } else if (x %in% 128:131) {
    .spc.FYTYPE[x - 99]
  } else {
    NA
  }
}

## helper functions ---------------------------------------------------------------------------------
### raw.split.nul - rawToChar conversion, splitting at \0
#' @importFrom utils tail
raw.split.nul <- function(raw, trunc = c(TRUE, TRUE), firstonly = FALSE, paste.collapse = NULL) {
  # todo make better truncation
  trunc <- rep(trunc, length.out = 2)

  if (trunc[1] && raw[1] == .nul) {
    raw <- raw[-1]
  }
  if (trunc[2]) {
    tmp <- which(raw > .nul)
    if (length(tmp) == 0) {
      return("")
    }
    raw <- raw[1:tail(tmp, 1)]
  }
  if (raw[length(raw)] != .nul) {
    raw <- c(raw, .nul)
  }

  tmp <- c(0, which(raw == .nul))

  out <- character(length(tmp) - 1)
  for (i in 1:(length(tmp) - 1)) {
    if (tmp[i] + 1 < tmp[i + 1] - 1) {
      out[i] <- rawToChar(raw[(tmp[i] + 1):(tmp[i + 1] - 1)])
    }
  }

  if (length(out) > 1L) {
    if (firstonly) {
      message("multiple strings encountered in spc file ", paste(out, collapse = ", "), ": using only the first one.")
      out <- out[1]
    } else if (!is.null(paste.collapse)) {
      if (hy_get_option("debuglevel") > 2L) {
        message("multiple strings encountered in spc file ", paste(out, collapse = ", "), " => pasting.")
      }

      out <- paste(out, collapse = paste.collapse)
    }
  }

  out
}

## file part reading functions ----------------------------------------------------------------------

## read file header .................................................................................
##
##

#' @importFrom utils maintainer
.spc.filehdr <- function(raw.data) {
  ## check file format

  ## Detect Shimadzu SPC (which is effectively a variant of OLE CF format)
  if (isTRUE(all.equal(
    raw.data[1:4],
    as.raw(c("0xD0", "0xCF", "0x11", "0xE0"))
  ))) {
    stop("Support for Shimadzu SPC file format (OLE CF) is not yet implemented")
  }

  ## NEW.LSB = 75 supported,
  ## NEW.MSB = 76 not supported (neither by many Grams software according to spc doc)
  ## OLD     = 77 not supported (replaced by new format in 1996)
  if (raw.data[2] != 75) {
    stop(
      "Wrong spc file format version (or no spc file at all).\n",
      "Only 'new' spc files (1996 file format) with LSB word order are supported."
    )
  }

  hdr <- list(
    ftflgs = readBin(raw.data[1], "integer", 1, 1, signed = FALSE),
    ## byte 2 is already interpreted
    fexper = readBin(raw.data[3], "integer", 1, 1, signed = TRUE),
    fexp = readBin(raw.data[4], "integer", 1, 1, signed = TRUE),
    fnpts = readBin(raw.data[5:8], "integer", 1, 4),
    ffirst = readBin(raw.data[9:16], "double", 1, 8),
    flast = readBin(raw.data[17:24], "double", 1, 8),
    fnsub = readBin(raw.data[25:28], "integer", 1, 4),
    fxtype = readBin(raw.data[29], "integer", 1, 1, signed = FALSE),
    fytype = readBin(raw.data[30], "integer", 1, 1, signed = FALSE),
    fztype = readBin(raw.data[31], "integer", 1, 1, signed = FALSE),
    fpost = readBin(raw.data[32], "integer", 1, 1, signed = TRUE),
    fdate = readBin(raw.data[33:36], "integer", 1, 4),
    fres = raw.split.nul(raw.data[37:45], paste.collapse = "\r\n"),
    fsource = raw.split.nul(raw.data[46:54], paste.collapse = "\r\n"),
    fpeakpt = readBin(raw.data[55:56], "integer", 1, 2, signed = FALSE),
    fspare = readBin(raw.data[57:88], "numeric", 8, 4),
    fcmnt = raw.split.nul(raw.data[89:218], paste.collapse = "\r\n"),
    fcatxt = raw.split.nul(raw.data[219:248], trunc = c(FALSE, TRUE)),
    flogoff = readBin(raw.data[249:252], "integer", 1, 4), # , signed = FALSE),
    fmods = readBin(raw.data[253:256], "integer", 1, 4), # , signed = FALSE),
    fprocs = readBin(raw.data[257], "integer", 1, 1, signed = TRUE),
    flevel = readBin(raw.data[258], "integer", 1, 1, signed = TRUE),
    fsampin = readBin(raw.data[259:260], "integer", 1, 2, signed = FALSE),
    ffactor = readBin(raw.data[261:264], "numeric", 1, 4),
    fmethod = raw.split.nul(raw.data[265:312]),
    fzinc = readBin(raw.data[313:316], "numeric", 1, 4), # , signed = FALSE),
    fwplanes = readBin(raw.data[317:320], "integer", 1, 4), # , signed = FALSE),
    fwinc = readBin(raw.data[321:324], "numeric", 1, 4),
    fwtype = readBin(raw.data[325], "integer", 1, 1, signed = TRUE),
    ## 187 bytes reserved
    .last.read = .spc.size["hdr"]
  )

  ## R doesn't have unsigned long int .................................
  if (any(unlist(hdr[c("flogoff", "fmods", "fwplanes")]) < 0)) {
    stop(
      "error reading header: R does not support unsigned long integers.",
      "Please contact the maintainer of the package."
    )
  }

  ## do some post processing ..........................................

  experiments <- c(
    "General", "Gas Chromatogram", "General Chromatogram", "HPLC Chromatogram",
    "NIR Spectrum", "UV-VIS Spectrum", "* reserved *", "X-ray diffraction spectrum",
    "Mass Spectrum", "NMR Spectrum", "Raman Spectrum", "Fluorescence Spectrum",
    "Atomic Spectrum", "Chroatography Diode Array Data"
  )
  hdr$fexper <- factor(hdr$fexper + 1, levels = seq_along(experiments))
  levels(hdr$fexper) <- experiments

  hdr$ftflgs <- .spc.ftflags(hdr$ftflgs)

  hdr$fdate <- ISOdate(
    year = hdr$fdate %/% 1048560,
    month = hdr$fdate %/% 65536 %% 16,
    day = hdr$fdate %/% 2048 %% 32,
    hour = hdr$fdate %/% 64 %% 32,
    min = hdr$fdate %% 64
  )

  ## interferogram ?
  ## if not, hdr$fpeakpt is set to NULL
  if (hdr$fytype == 1) {
    if (hdr$fpeakpt != 0) {
      hdr$fpeakpt <- hdr$fpeakpt + 1
    }
  } else {
    hdr$fpeakpt <- NULL
  }

  ## set the axis labels
  if (hdr$ftflgs["TALABS"]) {
    # TODO: find test data
    tmp <- rep(0, 4)
    tmp[seq_along(hdr$fcatxt)] <- nchar(hdr$fcatxt)

    if (tmp[1] > 0) hdr$fxtype <- hdr$fcatxt[1]
    if (tmp[2] > 0) hdr$fytype <- hdr$fcatxt[2]
    if (tmp[3] > 0) hdr$fztype <- hdr$fcatxt[3]
    if (tmp[4] > 0) hdr$fwtype <- hdr$fcatxt[4]
  }
  hdr$fxtype <- .spc.xlab(hdr$fxtype)
  hdr$fytype <- .spc.ylab(hdr$fytype)
  hdr$fztype <- .spc.xlab(hdr$fztype)
  hdr$fwtype <- .spc.xlab(hdr$fwtype)

  ## File with subfiles with individual x axes?
  ## Then there should be a subfile directory:
  if (hdr$ftflgs["TXYXYS"] && hdr$ftflgs["TMULTI"]) {
    ## try to reject impossible values for the subfiledir offset
    if (hdr$fnpts > length(raw.data) ||
      (hdr$fnpts > hdr$flogoff && hdr$flogoff > 0) ||
      hdr$fnpts < 512) {
      .spc.error(
        ".spc.read.hdr", list(hdr = hdr),
        "file header flags specify TXYXYS and TMULTI, ",
        "but fnpts does not give a valid offset for the subfile directory.\n hdr$ftflgs = ",
        paste(names(hdr$ftflgs)[hdr$ftflgs], collapse = " | "),
        " (", sum(2^(0:7)[hdr$ftflgs]), ")\n",
        "You can try to read the file using hdr$ftflgs & ! TXYXYS (",
        sum(2^(0:7)[hdr$ftflgs & c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE)]),
        "). This assumes that all subfiles do have the same x axis.\n\n"
      )
    }

    hdr$subfiledir <- hdr$fnpts
    hdr$fnpts <- 0
  } else {
    hdr$subfiledir <- 0
  }

  ## some checks ......................................................

  if (hdr$ftflgs["TMULTI"]) {
    ## multiple spectra in file
    if (hdr$fnsub <= 1) {
      if (hy_get_option("debuglevel") >= 2L) {
        message("spc file header specifies multiple spectra but only zero or one subfile.")
      }
    }
  } else {
    ## single spectrum file
    if (hdr$fnsub == 0) {
      hdr$fnsub <- 1
    }

    if (hdr$fnsub > 1) {
      warning(
        "spc file header specifies single spectrum file  but ", hdr$fnsub,
        " subfiles (spectra).\nOnly first subfile will be read."
      )
      hdr$fnsub <- 1
    }

    if (hdr$ftflgs["TRANDM"]) {
      message("spc file header: file type flag TRANDM encountered => Enforcing TMULTI.")
    }

    if (hdr$ftflgs["TORDRD"]) {
      message("spc file header: file type flag TORDRD encountered => Enforcing TMULTI.")
    }

    if ((hdr$ftflgs["TRANDM"] || hdr$ftflgs["TORDRD"]) && hdr$fnsub > 1) {
      hdr$ftflgs["TMULTI"] <- TRUE
    }
  }

  if (hdr$ftflgs["TXYXYS"] && !hdr$ftflgs["TXVALS"]) {
    warning("spc file header: file type flag TXYXYS encountered => Enforcing TXVALS.")
    hdr$ftflgs["TXVALS"] <- TRUE
  }

  if (hdr$fwplanes > 0) {
    warning(
      "w planes found! This is not yet tested as the developer didn't have access to such files.\n",
      "Please contact the package maintainer ", maintainer("hyperSpec"),
      " stating whether the file was imported successfully or not."
    )
  }

  hdr
}

## read sub file header .............................................................................
##
## needs header for consistency checks
##

.spc.subhdr <- function(raw.data, pos, hdr) {
  subhdr <- list(
    subflgs = raw.data[pos + (1)],
    subexp = readBin(raw.data[pos + (2)], "integer", 1, 1, signed = TRUE),
    subindx = readBin(raw.data[pos + (3:4)], "integer", 1, 2, signed = FALSE),
    subtime = readBin(raw.data[pos + (5:8)], "numeric", 1, 4),
    subnext = readBin(raw.data[pos + (9:12)], "numeric", 1, 4),
    subnois = readBin(raw.data[pos + (13:16)], "numeric", 1, 4),
    subnpts = readBin(raw.data[pos + (17:20)], "integer", 1, 4), # , signed = FALSE),
    subscan = readBin(raw.data[pos + (21:24)], "integer", 1, 4), # , signed = FALSE),
    subwlevel = readBin(raw.data[pos + (25:28)], "numeric", 1, 4)
  )
  ## 4 bytes reserved

  ## R doesn't have unsigned long int .................................
  if (any(unlist(subhdr[c("subnpts", "subscan")]) < 0)) {
    stop(
      "error reading subheader: R does not support unsigned long integers.",
      "Please contact the maintainer of the package."
    )
  }

  hdr$.last.read <- pos + .spc.size["subhdr"]

  ## checking
  if (subhdr$subexp == -128 && hdr$fexp != -128) {
    message(
      "subfile ", subhdr$subindx, " specifies data type float, but file header doesn't.",
      "\n=> Data will be interpreted as float unless TMULTI is set."
    )
  }

  if (subhdr$subnpts > 0 && subhdr$subnpts != hdr$fnpts && !hdr$ftflgs["TXYXYS"]) {
    message(
      "subfile ", subhdr$subindx, ": number of points in file header and subfile header ",
      "inconsistent. => Going to use subheader."
    )
  }

  if (subhdr$subnpts == 0) {
    if (hdr$ftflgs["TXYXYS"]) {
      message(
        "subfile ", subhdr$subindx, ": number of data points per spectrum not specified. ",
        "=> Using file header information (", hdr$fnpts, ")."
      )
    }
    subhdr$subnpts <- hdr$fnpts
  }

  if (!hdr$ftflgs["TXYXYS"]) {
    if (hdr$fnpts != subhdr$subnpts) {
      .spc.error(
        ".spc.read.subhdr", list(hdr = hdr, subhdr = subhdr),
        "hdr and subhdr differ in number of points per spectrum, ",
        "but TXYXYS is not specified.\n hdr$ftflgs = ",
        paste(names(hdr$ftflgs)[hdr$ftflgs], collapse = " | "),
        " (", sum(2^(0:7)[hdr$ftflgs]), ")\n",
        "You can try to read the file using hdr$ftflgs | TMULTI | TXYXYS (",
        sum(2^(0:7)[hdr$ftflgs |
          c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE)]),
        ").\n\n"
      )
    }
  }

  #  str (subhdr)
  ## according to .spc file documentation:
  if (!hdr$ftflgs["TMULTI"]) {
    subhdr$subexp <- hdr$fexp
  } else if (hdr$fexp == -128 && subhdr$subexp != -128) {
    message(
      "Header file specifies float data format, but subfile uses integer exponent. ",
      "=> Using file header settings."
    )
    subhdr$subexp <- -128
  }

  ## the z values
  if (hdr$fzinc == 0) { # should only happen for the first subfile...
    hdr$fzinc <- subhdr$subnext - subhdr$subtime
  }

  if (subhdr$subindx == 0) {
    hdr$firstz <- subhdr$subtime
  }

  if (subhdr$subtime == 0) {
    subhdr$subtime <- subhdr$subindx * hdr$fzinc + hdr$firstz
  }

  ## the w values
  if (hdr$fwplanes > 0) {
    if (hdr$fwinc == 0) { ## unevenly spaced w planes
    }

    # if (subhdr$subwlevel != 0) {
    #   subhdr$w <- subhdr$subwlevel
    #
    # } else if (subhdr$subindx %% hdr$fwplanes == 1)
    #   subhdr$w <- hdr$subhdr$w +  hdr$fwinc
    # else
    #   subhdr$w <- hdr$subhdr$w
  }

  hdr$subhdr <- subhdr

  hdr
}

## read subfile directory ...........................................................................
##

.spc.subfiledir <- function(raw.data, pos, nsub) {
  dir <- data.frame(
    ssfposn = rep(NA, nsub),
    ssfsize = rep(NA, nsub),
    ssftime = rep(NA, nsub)
  )

  for (s in seq_len(nsub)) {
    dir[s, ] <- c(
      readBin(raw.data[pos + (1:4)], "integer", 1, 4), # , signed = FALSE),
      readBin(raw.data[pos + (5:8)], "integer", 1, 4), # , signed = FALSE),
      readBin(raw.data[pos + (9:12)], "numeric", 1, 4)
    )
    pos <- pos + .spc.size["subfiledir"]
  }

  ## R doesn't have unsigned long int .................................
  if (any(dir[, 1:2] < 0)) {
    stop(
      "error reading subfiledir: R does not support unsigned long integers.",
      "Please contact the maintainer of the package."
    )
  }

  #     dir$ssfposn <- dir$ssfposn
  dir
}

## read log block header ............................................................................
##
#' @importFrom utils head tail
.spc.log <- function(raw.data, pos, log.bin, log.disk, log.txt, keys.log2data,
                     replace.nul = as.raw(255), iconv.from = "latin1", iconv.to = "utf8") {
  if (pos == 0) { # no log block exists
    return(list(
      data = list(),
      log = list()
    ))
  }

  loghdr <- list(
    logsizd = readBin(raw.data[pos + (1:4)], "integer", 1, 4), #  , signed = FALSE),
    logsizm = readBin(raw.data[pos + (5:8)], "integer", 1, 4), # , signed = FALSE),
    logtxto = readBin(raw.data[pos + (9:12)], "integer", 1, 4), # , signed = FALSE),
    logbins = readBin(raw.data[pos + (13:16)], "integer", 1, 4), # , signed = FALSE),
    logdsks = readBin(raw.data[pos + (17:20)], "integer", 1, 4), # , signed = FALSE),
    ## 44 bytes reserved
    .last.read = pos + .spc.size["loghdr"]
  )

  ## R doesn't have unsigned long int .................................
  if (any(unlist(loghdr) < 0)) {
    stop(
      "error reading log: R does not support unsigned long integers.",
      "Please contact the maintainer of the package."
    )
  }

  log <- list()
  data <- list()

  ## read binary part of log
  if (log.bin) {
    log$.log.bin <- raw.data[loghdr$.last.read + seq_len(loghdr$logbins)]
  }

  ## read binary on-disk-only part of log
  if (log.disk) {
    log$.log.disk <- raw.data[loghdr$.last.read + loghdr$logbins + seq_len(loghdr$logdsks)]
  }

  ## read text part of log
  if (log.txt & loghdr$logsizd > loghdr$logtxto) {
    log.txt <- raw.data[pos + loghdr$logtxto + seq_len(loghdr$logsizd - loghdr$logtxto)]
    if (tail(log.txt, 1) == .nul) { # throw away nul at the end
      log.txt <- head(log.txt, -1)
    }
    log.txt[log.txt == .nul] <- replace.nul
    log.txt <- readChar(log.txt, length(log.txt), useBytes = T)
    log.txt <- gsub(rawToChar(replace.nul), "\r\n", log.txt, useBytes=TRUE)
    log.txt <- iconv(log.txt, iconv.from, iconv.to)
    log.txt <- split.string(log.txt, "\r\n") ## spc file spec says \r\n regardless of OS
    log.txt <- hyperSpec:::split.line(log.txt, "=")
    data <- getbynames(log.txt, keys.log2data)
  }

  list(log.long = log, extra.data = data)
}

## read y data ......................................................................................
##

.spc.read.y <- function(raw.data, pos, npts, exponent, word) {
  if (exponent == -128) { # 4 byte float

    list(
      y = readBin(raw.data[pos + seq_len(npts * 4)], "numeric", npts, 4),
      .last.read = pos + npts * 4
    )
  } else if (word) { # 2 byte fixed point integer = word

    list(
      y = readBin(raw.data[pos + seq_len(npts * 2)], "integer", npts, 2, signed = TRUE) *
        2^(exponent - 16),
      .last.read = pos + npts * 2
    )
  } else { # 4 byte fixed point integer = dword
    list(
      y = readBin(raw.data[pos + seq_len(npts * 4)], "integer", npts, 4) *
        2^(exponent - 32),
      .last.read = pos + npts * 4
    )
  }
}

## read x data ......................................................................................
##

.spc.read.x <- function(raw.data, pos, npts) {
  list(
    x = readBin(raw.data[pos + seq_len(npts * 4)], "numeric", npts, 4),
    .last.read = pos + 4 * npts
  )
}

## error .............................................................................................
#' @importFrom utils str
.spc.error <- function(fname, objects, ...) {
  cat("ERROR in read.spc function ", fname, "\n\n")
  for (i in seq_along(objects)) {
    cat(names(objects)[i], ":\n")
    str(objects[[i]], vec.len = 20)
  }
  stop(...)
}

.spc.ftflags <- function(x) {
  ftflgs <- as.logical(x %/% 2^(0:7) %% 2)
  names(ftflgs) <- c(
    "TSPREC", "TCGRAM", "TMULTI", "TRANDM",
    "TORDRD", "TALABS", "TXYXYS", "TXVALS"
  )
  ftflgs
}

#####################################################################################################

#' @name DEPRECATED-read.spc
#' @concept moved to hySpc.read.spc
#'
#' @title (DEPRECATED)
#'        Import for Thermo Galactic's `spc` file format
#'
#' @description
#'
#' These data input functions are **deprecated** and they will be removed in
#' the next release of \pkg{hyperspcc} package.
#' Now functions in package \pkg{hySpc.read.spc}
#' ([link](https://r-hyperspcc.github.io/hySpc.read.spc/reference/index.html))
#' should be used as the alternatives.
#'
#'
#' **Old description:**
#'
#' These functions allow to import Thermo Galactic/Grams `.spc` files.
#'
#' @param filename The complete file name of the `.spc` file.
#' @param keys.hdr2data,keys.log2data character vectors with the names of parameters in the `.spc`
#' file's log block (log2xxx) or header (hdr2xxx) that should go into the extra data (yyy2data) of
#' the returned hyperSpec object.
#'
#' All header fields specified in the `.spc` file format specification (see
#'   below) are imported and can be referred to by their de-capitalized names.
#' @param log.txt Should the text part of the `.spc` file's log block be read?
#' @param log.bin,log.disk Should the normal and on-disk binary parts of the
#'   `.spc` file's log block be read?  If so, they will be put as raw vectors
#'   into the hyperSpec object's log.
#' @param hdr A list with fileheader fields that overwrite the settings of
#'   actual file's header.
#'
#' Use with care, and look into the source code for detailed insight on the
#'   elements of this list.
#' @param no.object If `TRUE`, a list with wavelengths, spectra, labels,
#'   log and data are returned instead of a hyperSpec object.
#'
#' This parameter will likely be subject to change in future - use with care.
#' @return If the file contains multiple spectra with individual wavelength
#'   axes, `read.spc` returns a list of hyperSpec objects.  Otherwise the
#'   result is a hyperSpec object.
#'
#' `read.spc.KaiserMap` returns a hyperSpec object with data columns x,
#'   y, and z containing the stage position as recorded in the `.spc` files'
#'   log.
#' @note Only a restricted set of test files was available for development.
#'   Particularly, the w-planes feature could not be tested.
#'
#' If you have `.spc` files that cannot be read with these function, don't
#'   hesitate to contact the package maintainer with your code patch or asking
#'   advice.
#' @author C. Beleites
#' @references Source development kit and file format specification of `.spc`
#'   files.
#' @export
#'
#' @keywords IO file
#'
#' @examples
#'
#' ## get the sample .spc files from ftirsearch.com (see above)
#' \dontrun{
#' # single spectrum
#' spc <- read.spc("BENZENE.SPC")
#' plot(spc)
#'
#' # multi-spectra .spc file with common wavelength axis
#' spc <- read.spc("IG_MULTI.SPC")
#' spc
#'
#' # multi-spectra .spc file with individual wavelength axes
#' spc <- read.spc("BARBITUATES.SPC")
#' plot(spc[[1]], lines.args = list(type = "h"))
#' }
#'
#' @importFrom utils modifyList
read.spc <- function(filename,
                     keys.hdr2data = FALSE, keys.log2data = FALSE,
                     log.txt = TRUE, log.bin = FALSE, log.disk = FALSE,
                     hdr = list(),
                     no.object = FALSE) {
  # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  hyperSpec:::deprecated_read_spc()
  # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

  ## f contains the raw bytes of the file

  ## fpos marks the position of the last read byte
  ## this is the same as the offset from beginning of the file (count 0) in the .spc definition

  f <- readBin(filename, "raw", file.info(filename)$size, 1)

  hdr <- modifyList(.spc.filehdr(f), hdr)
  fpos <- hdr$.last.read

  if (!hdr$ftflgs["TXYXYS"]) {
    if (!hdr$ftflgs["TXVALS"]) {
      ## spectra with common evenly spaced wavelength axis
      wavelength <- seq(hdr$ffirst, hdr$flast, length.out = hdr$fnpts)
    } else {
      ## spectra with common unevenly spaced wavelength axis
      #     if (! hdr$ftflgs ['TMULTI']) { # also for multifile with common wavelength axis
      tmp <- .spc.read.x(f, fpos, hdr$fnpts)
      wavelength <- tmp$x
      fpos <- tmp$.last.read
    }
    # }
  }

  ## otherwise (TXYXYS set) hdr$fnpts gives offset to subfile directory if that exists

  ## obtain labels from file hdr or from parameter
  label <- list(
    .wavelength = hdr$fxtype, spc = hdr$fytype,
    z = hdr$fztype, z.end = hdr$fztype
  )

  if (hdr$fwplanes > 0) {
    label$w <- hdr$fwtype
  }

  ## prepare list for hyperSpec log and data.frame for extra data

  data <- list(z = NA, z.end = NA)
  if (hdr$fwplanes > 0) {
    data <- c(data, w = NA)
  }

  ## process the log block
  tmp <- .spc.log(
    f, hdr$flogoff,
    log.bin, log.disk, log.txt,
    keys.log2data
  )
  ## TODO: remove data2log

  data <- c(data, tmp$extra.data, getbynames(hdr, keys.hdr2data))

  ## preallocate spectra matrix or list for multispectra file with separate wavelength axes
  ## populate extra data
  if (hdr$ftflgs["TXYXYS"] && hdr$ftflgs["TMULTI"]) {
    spc <- list()
    data <- .prepare.hdr.df(data, nsubfiles = 1L)
  } else {
    spc <- matrix(NA, nrow = hdr$fnsub, ncol = hdr$fnpts)
    data <- .prepare.hdr.df(data, nsubfiles = hdr$fnsub)
  }

  ## read subfiles
  if (hdr$subfiledir) { ## TXYXYS
    hdr$subfiledir <- .spc.subfiledir(f, hdr$subfiledir, hdr$fnsub)

    for (s in seq_len(hdr$fnsub)) {
      hdr <- .spc.subhdr(f, hdr$subfiledir$ssfposn[s], hdr)
      fpos <- hdr$.last.read
      wavelength <- .spc.read.x(f, fpos, hdr$subhdr$subnpts)
      fpos <- wavelength$.last.read

      y <- .spc.read.y(f, fpos,
        npts = hdr$subhdr$subnpts, exponent = hdr$subhdr$subexp,
        word = hdr$ftflgs["TSPREC"]
      )
      fpos <- y$.last.read

      data$z <- hdr$subhdr$subtime
      data$z.end <- hdr$subhdr$subnext

      if (hdr$fwplanes > 0) {
        data$w <- hdr$subhdr$w
      }

      if (!exists("wavelength")) {
        .spc.error(
          "read.spc", list(hdr = hdr),
          "wavelength not read. This may be caused by wrong header information."
        )
      }

      spc[[s]] <- new("hyperSpec",
        spc = y$y,
        wavelength = wavelength$x,
        data = data,
        labels = label
      )
    }
  } else { ## multiple y data blocks behind each other
    for (s in seq_len(hdr$fnsub)) {
      hdr <- .spc.subhdr(f, fpos, hdr)
      fpos <- hdr$.last.read
      tmp <- .spc.read.y(f, fpos,
        npts = hdr$subhdr$subnpts, exponent = hdr$subhdr$subexp,
        word = hdr$ftflgs["TSPREC"]
      )
      fpos <- tmp$.last.read

      spc[s, ] <- tmp$y

      data[s, c("z", "z.end")] <- unlist(hdr$subhdr[c("subtime", "subnext")])

      if (hdr$fwplanes > 0) {
        data[s, "w"] <- hdr$subhdr$w
      }
    }
  }

  if (hdr$ftflgs["TXYXYS"] && hdr$ftflgs["TMULTI"]) {
    ## list of hyperSpec objects
    ## consistent file import behaviour across import functions
    lapply(spc, .spc_io_postprocess_optional, filename = filename)
  } else if (no.object) {
    list(spc = spc, wavelength = wavelength, data = data, labels = label)
  } else {
    if (hdr$fnsub > 1L && nrow(data) == 1L) {
      data <- data[rep(1L, hdr$fnsub), ]
    }

    spc <- new("hyperSpec",
      spc = spc, wavelength = wavelength,
      data = data, labels = label
    )

    ## consistent file import behaviour across import functions
    .spc_io_postprocess_optional(spc, filename)
  }
}

hySpc.testthat::test(read.spc) <- function() {

  test_that("deprecated", {
    local_edition(3)

    expect_warning(expect_warning(
      expect_error(read.spc(file = ""), "can only read"),
      "deprecated"
    ))
  })
}

.prepare.hdr.df <- function(data, nsubfiles) {
  ## the *type header elements are expressions. They need to be converted to character.
  data <- lapply(data, function(x) {
    if (mode(x) == "expression") {
      as.character(x)
    } else {
      x
    }
  })

  ## convert vectors to matrix, otherwise the data.frame will contain one  row per element.
  ## matrices need to be protected during as.data.frame

  vector.entries <- which(sapply(data, length) > 1L)
  for (v in vector.entries) {
    data[[v]] <- I(t(as.matrix(data[[v]])))
  }

  data <- as.data.frame(data, stringsAsFactors = FALSE)
  data <- data[rep(1L, nsubfiles), ]

  for (v in vector.entries) {
    data[[v]] <- unclass(data[[v]])
  } # remove AsIs protection

  data
}

# Helper functions -----------------------------------------------------------
### -----------------------------------------------------------------------------
###
### split.string - split string at pattern
###
###

split.string <- function(x, separator, trim.blank = TRUE, remove.empty = TRUE) {

  stopifnot(length(x) == 1) # we want a single character string

  pos <- gregexpr(separator, x)
  pos <- pos[[1]]

  if (length(pos) == 1 & pos[1] == -1) { # -1 means no match
    return(x)
  }

  pos <- matrix(c(
    1, pos + attr(pos, "match.length"),
    pos - 1, nchar(x)
  ),
  ncol = 2
  )

  if (pos[nrow(pos), 1] > nchar(x)) {
    pos <- pos[-nrow(pos), ]
  }

  x <- apply(pos, 1, function(p, x) substr(x, p[1], p[2]), x)

  if (trim.blank) {
    blank.pattern <- "^[[:blank:]]*([^[:blank:]]+.*[^[:blank:]]+)[[:blank:]]*$"
    x <- sub(blank.pattern, "\\1", x)
  }

  if (remove.empty) {
    x <- x[sapply(x, nchar) > 0]
  }

  x
}

# Unit tests -----------------------------------------------------------------
hySpc.testthat::test(split.string) <- function() {
  context("split.string")

  # Perform tests
  test_that("split.string() returnts output silently", {
    expect_error(split.string())
    expect_error(split.string(letters))

    expect_silent(split.string("letters", "r"))
  })

  # FIXME (tests): add tests to check the correctness of the output!!!
}

### -----------------------------------------------------------------------------
###
### getbynames - get list elements by name and if no such element exists, NA
###
###

getbynames <- function(x, e) {
  x <- x[e]
  if (length(x) > 0) {
    if (is.character(e)) {
      names(x) <- e
    }
    x[sapply(x, is.null)] <- NA
    x
  } else {
    list()
  }
}

# Unit tests -----------------------------------------------------------------
hySpc.testthat::test(getbynames) <- function() {
  context("getbynames")

  # Perform tests
  test_that("getbynames() works", {
    lst <- list(a = 1, b = "b", c = 2i)

    expect_equal(getbynames(lst, "a"), list(a = 1))
    expect_equal(getbynames(lst, 1), list(a = 1))
    expect_equal(getbynames(lst, 2), list(b = "b"))
    expect_equal(getbynames(lst, 6)[[1]], NA)
  })
}
GegznaV commented 10 months ago

@AlreadyTakenJonas, could you, please, create a pull request?

@olaf-b and @AlreadyTakenJonas, could you, please, report, which R version you use now and which you used when the function worked?

AlreadyTakenJonas commented 10 months ago

Sure, I'll create a pull request; give me a second. It works on Windows R 4.1.2 and fails for Linux R 4.3.

AlreadyTakenJonas commented 10 months ago

I don't fork other repos that often. Did I do it right?

GegznaV commented 10 months ago

I don't fork other repos that often. Did I do it right?

The pull request is created correctly. Thank you.